ByteScout PDF Extractor SDK can be used to index all PDF files in a directory. You can include various metadata into the index, such as File Name, Page Count, Author, Title, Producer and others.
Use the sample source code below to list all PDF files in a directory in C#, VB.NET and VBScript.
Select your programming language:
using System; using System.IO; using Bytescout.PDFExtractor; namespace IndexPDFFiles { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor infoExtractor = new InfoExtractor(); infoExtractor.RegistrationName = "demo"; infoExtractor.RegistrationKey = "demo"; TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo"; textExtractor.RegistrationKey = "demo"; // List all PDF files in directory foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf")) { infoExtractor.LoadDocumentFromFile(file); Console.WriteLine("File Name: " + Path.GetFileName(file)); Console.WriteLine("Page Count: " + infoExtractor.GetPageCount()); Console.WriteLine("Author: " + infoExtractor.Author); Console.WriteLine("Title: " + infoExtractor.Title); Console.WriteLine("Producer: " + infoExtractor.Producer); Console.WriteLine("Subject: " + infoExtractor.Subject); Console.WriteLine("CreationDate: " + infoExtractor.CreationDate); Console.WriteLine("Text (2 lines): "); textExtractor.LoadDocumentFromFile(file); StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
Imports System.IO Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(ByVal args As String()) ' Create Bytescout.PDFExtractor.InfoExtractor instance Dim infoExtractor As New InfoExtractor() infoExtractor.RegistrationName = "demo" infoExtractor.RegistrationKey = "demo" ' Create Bytescout.PDFExtractor.TextExtractor instance Dim textExtractor As New TextExtractor() textExtractor.RegistrationName = "demo" textExtractor.RegistrationKey = "demo" ' List all PDF files in directory For Each file As String In Directory.GetFiles("..\..\..\..", "*.pdf") infoExtractor.LoadDocumentFromFile(file) Console.WriteLine("File Name: " & Path.GetFileName(file)) Console.WriteLine("Page Count: " & infoExtractor.GetPageCount()) Console.WriteLine("Author: " & infoExtractor.Author) Console.WriteLine("Title: " & infoExtractor.Title) Console.WriteLine("Producer: " & infoExtractor.Producer) Console.WriteLine("Subject: " & infoExtractor.Subject) Console.WriteLine("CreationDate: " & infoExtractor.CreationDate) Console.WriteLine("Text (2 lines): ") textExtractor.LoadDocumentFromFile(file) Dim stringReader As New StringReader(textExtractor.GetTextFromPage(0)) Console.WriteLine(stringReader.ReadLine()) Console.WriteLine(stringReader.ReadLine()) Console.WriteLine() Next Console.WriteLine() Console.WriteLine("Press any key to continue...") Console.ReadLine() End Sub End Class
' Create Bytescout.PDFExtractor.InfoExtractor object Set infoExtractor = CreateObject("Bytescout.PDFExtractor.InfoExtractor") infoExtractor.RegistrationName = "demo" infoExtractor.RegistrationKey = "demo" ' Create Bytescout.PDFExtractor.TextExtractor object Set textExtractor = CreateObject("Bytescout.PDFExtractor.TextExtractor") textExtractor.RegistrationName = "demo" textExtractor.RegistrationKey = "demo" ' Create File System object Set FSO = CreateObject("Scripting.FileSystemObject") ' Get folder object Set objFolder = FSO.GetFolder("..\..") ' Get file list Set files = objFolder.Files ' Create output file Set TS = FSO.CreateTextFile("output.txt") For Each file in files ext = UCase(FSO.GetExtensionName(file)) If ext = "PDF" Then infoExtractor.LoadDocumentFromFile(file) TS.WriteLine("File Name: " & FSO.GetFileName(file)) TS.WriteLine("Page Count: " & infoExtractor.GetPageCount()) TS.WriteLine("Author: " & infoExtractor.Author) TS.WriteLine("Title: " & infoExtractor.Title) TS.WriteLine("Producer: " & infoExtractor.Producer) TS.WriteLine("Subject: " & infoExtractor.Subject) TS.WriteLine("CreationDate: " & infoExtractor.CreationDate) textExtractor.LoadDocumentFromFile(file) text = textExtractor.GetTextFromPage(0) If len(text) > 0 Then TS.WriteLine("Text (a bit): ") TS.WriteLine(Mid(text, 1, 200)) End If TS.WriteBlankLines(2) End If Next TS.Close Set infoExtractor = Nothing Set textExtractor = Nothing Set FSO = Nothing