Index PDF C# tutorial shows how to use PDF Extractor SDK to extract data from PDF document in C Sharp for indexing PDF files.
Use C# source code sample below to index PDF documents. Get the full version of this sample in your PDF Extractor SDK free trial in Index PDF Files folder.
C#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | using System; using System.IO; using Bytescout.PDFExtractor; namespace IndexPDFFiles { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor infoExtractor = new InfoExtractor(); infoExtractor.RegistrationName = "demo" ; infoExtractor.RegistrationKey = "demo" ; TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo" ; textExtractor.RegistrationKey = "demo" ; // List all PDF files in directory foreach (string file in Directory.GetFiles(@ "........" , "*.pdf" )) { infoExtractor.LoadDocumentFromFile(file); Console.WriteLine( "File Name: " + Path.GetFileName(file)); Console.WriteLine( "Page Count: " + infoExtractor.GetPageCount()); Console.WriteLine( "Author: " + infoExtractor.Author); Console.WriteLine( "Title: " + infoExtractor.Title); Console.WriteLine( "Producer: " + infoExtractor.Producer); Console.WriteLine( "Subject: " + infoExtractor.Subject); Console.WriteLine( "CreationDate: " + infoExtractor.CreationDate); Console.WriteLine( "Text (2 lines): " ); textExtractor.LoadDocumentFromFile(file); StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine( "Press any key to continue..." ); Console.ReadLine(); } } } |