Index PDF C# tutorial shows how to use PDF Extractor SDK to extract data from PDF document in C Sharp for indexing PDF files.
Use C# source code sample below to index PDF documents. Get the full version of this sample in your PDF Extractor SDK free trial in Index PDF Files folder.
C#
using System; using System.IO; using Bytescout.PDFExtractor; namespace IndexPDFFiles { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor infoExtractor = new InfoExtractor(); infoExtractor.RegistrationName = "demo"; infoExtractor.RegistrationKey = "demo"; TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo"; textExtractor.RegistrationKey = "demo"; // List all PDF files in directory foreach (string file in Directory.GetFiles(@"........", "*.pdf")) { infoExtractor.LoadDocumentFromFile(file); Console.WriteLine("File Name: " + Path.GetFileName(file)); Console.WriteLine("Page Count: " + infoExtractor.GetPageCount()); Console.WriteLine("Author: " + infoExtractor.Author); Console.WriteLine("Title: " + infoExtractor.Title); Console.WriteLine("Producer: " + infoExtractor.Producer); Console.WriteLine("Subject: " + infoExtractor.Subject); Console.WriteLine("CreationDate: " + infoExtractor.CreationDate); Console.WriteLine("Text (2 lines): "); textExtractor.LoadDocumentFromFile(file); StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }