ByteScout PDF Extractor SDK can be used to index all PDF files in a directory. You can include various metadata into the index, such as File Name, Page Count, Author, Title, Producer and others.
Use the sample source code below to list all PDF files in a directory in C#, VB.NET and VBScript.
Select your programming language:
using System;
using System.IO;
using Bytescout.PDFExtractor;
namespace IndexPDFFiles
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.InfoExtractor instance
InfoExtractor infoExtractor = new InfoExtractor();
infoExtractor.RegistrationName = "demo";
infoExtractor.RegistrationKey = "demo";
TextExtractor textExtractor = new TextExtractor();
textExtractor.RegistrationName = "demo";
textExtractor.RegistrationKey = "demo";
// List all PDF files in directory
foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf"))
{
infoExtractor.LoadDocumentFromFile(file);
Console.WriteLine("File Name: " + Path.GetFileName(file));
Console.WriteLine("Page Count: " + infoExtractor.GetPageCount());
Console.WriteLine("Author: " + infoExtractor.Author);
Console.WriteLine("Title: " + infoExtractor.Title);
Console.WriteLine("Producer: " + infoExtractor.Producer);
Console.WriteLine("Subject: " + infoExtractor.Subject);
Console.WriteLine("CreationDate: " + infoExtractor.CreationDate);
Console.WriteLine("Text (2 lines): ");
textExtractor.LoadDocumentFromFile(file);
StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0));
Console.WriteLine(stringReader.ReadLine());
Console.WriteLine(stringReader.ReadLine());
Console.WriteLine();
}
Console.WriteLine();
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
}
}
}
Imports System.IO
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(ByVal args As String())
' Create Bytescout.PDFExtractor.InfoExtractor instance
Dim infoExtractor As New InfoExtractor()
infoExtractor.RegistrationName = "demo"
infoExtractor.RegistrationKey = "demo"
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim textExtractor As New TextExtractor()
textExtractor.RegistrationName = "demo"
textExtractor.RegistrationKey = "demo"
' List all PDF files in directory
For Each file As String In Directory.GetFiles("..\..\..\..", "*.pdf")
infoExtractor.LoadDocumentFromFile(file)
Console.WriteLine("File Name: " & Path.GetFileName(file))
Console.WriteLine("Page Count: " & infoExtractor.GetPageCount())
Console.WriteLine("Author: " & infoExtractor.Author)
Console.WriteLine("Title: " & infoExtractor.Title)
Console.WriteLine("Producer: " & infoExtractor.Producer)
Console.WriteLine("Subject: " & infoExtractor.Subject)
Console.WriteLine("CreationDate: " & infoExtractor.CreationDate)
Console.WriteLine("Text (2 lines): ")
textExtractor.LoadDocumentFromFile(file)
Dim stringReader As New StringReader(textExtractor.GetTextFromPage(0))
Console.WriteLine(stringReader.ReadLine())
Console.WriteLine(stringReader.ReadLine())
Console.WriteLine()
Next
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Class
' Create Bytescout.PDFExtractor.InfoExtractor object
Set infoExtractor = CreateObject("Bytescout.PDFExtractor.InfoExtractor")
infoExtractor.RegistrationName = "demo"
infoExtractor.RegistrationKey = "demo"
' Create Bytescout.PDFExtractor.TextExtractor object
Set textExtractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
textExtractor.RegistrationName = "demo"
textExtractor.RegistrationKey = "demo"
' Create File System object
Set FSO = CreateObject("Scripting.FileSystemObject")
' Get folder object
Set objFolder = FSO.GetFolder("..\..")
' Get file list
Set files = objFolder.Files
' Create output file
Set TS = FSO.CreateTextFile("output.txt")
For Each file in files
ext = UCase(FSO.GetExtensionName(file))
If ext = "PDF" Then
infoExtractor.LoadDocumentFromFile(file)
TS.WriteLine("File Name: " & FSO.GetFileName(file))
TS.WriteLine("Page Count: " & infoExtractor.GetPageCount())
TS.WriteLine("Author: " & infoExtractor.Author)
TS.WriteLine("Title: " & infoExtractor.Title)
TS.WriteLine("Producer: " & infoExtractor.Producer)
TS.WriteLine("Subject: " & infoExtractor.Subject)
TS.WriteLine("CreationDate: " & infoExtractor.CreationDate)
textExtractor.LoadDocumentFromFile(file)
text = textExtractor.GetTextFromPage(0)
If len(text) > 0 Then
TS.WriteLine("Text (a bit): ")
TS.WriteLine(Mid(text, 1, 200))
End If
TS.WriteBlankLines(2)
End If
Next
TS.Close
Set infoExtractor = Nothing
Set textExtractor = Nothing
Set FSO = Nothing