Index PDF – C# sample

Index PDF C# tutorial shows how to use PDF Extractor SDK to extract data from PDF document in C Sharp for indexing PDF files.

Use C# source code sample below to index PDF documents. Get the full version of this sample in your PDF Extractor SDK free trial in Index PDF Files folder.

C#

using System;
using System.IO;
using Bytescout.PDFExtractor;

namespace IndexPDFFiles
{
class Program
{
 static void Main(string[] args)
 {
 	// Create Bytescout.PDFExtractor.InfoExtractor instance
 	InfoExtractor infoExtractor = new InfoExtractor();
 	infoExtractor.RegistrationName = "demo";
 	infoExtractor.RegistrationKey = "demo";

 	TextExtractor textExtractor = new TextExtractor();
 	textExtractor.RegistrationName = "demo";
 	textExtractor.RegistrationKey = "demo";

 	// List all PDF files in directory
 	foreach (string file in Directory.GetFiles(@"........", "*.pdf"))
 	{
   infoExtractor.LoadDocumentFromFile(file);

   Console.WriteLine("File Name:      " + Path.GetFileName(file));
   Console.WriteLine("Page Count:     " + infoExtractor.GetPageCount());
   Console.WriteLine("Author:         " + infoExtractor.Author);
   Console.WriteLine("Title:          " + infoExtractor.Title);
   Console.WriteLine("Producer:       " + infoExtractor.Producer);
   Console.WriteLine("Subject:        " + infoExtractor.Subject);
   Console.WriteLine("CreationDate:   " + infoExtractor.CreationDate);
   Console.WriteLine("Text (2 lines): ");

   textExtractor.LoadDocumentFromFile(file);
   StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0));
   Console.WriteLine(stringReader.ReadLine());
   Console.WriteLine(stringReader.ReadLine());
   Console.WriteLine();
 	}
 	
 	Console.WriteLine();
 	Console.WriteLine("Press any key to continue...");
 	Console.ReadLine();
 }
}
}
prev
next