Index PDF C# tutorial shows how to use PDF Extractor SDK to extract data from PDF document in C Sharp for indexing PDF files.
Use C# source code sample below to index PDF documents. Get the full version of this sample in your PDF Extractor SDK free trial in Index PDF Files folder.
C#
using System;
using System.IO;
using Bytescout.PDFExtractor;
namespace IndexPDFFiles
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.InfoExtractor instance
InfoExtractor infoExtractor = new InfoExtractor();
infoExtractor.RegistrationName = "demo";
infoExtractor.RegistrationKey = "demo";
TextExtractor textExtractor = new TextExtractor();
textExtractor.RegistrationName = "demo";
textExtractor.RegistrationKey = "demo";
// List all PDF files in directory
foreach (string file in Directory.GetFiles(@"........", "*.pdf"))
{
infoExtractor.LoadDocumentFromFile(file);
Console.WriteLine("File Name: " + Path.GetFileName(file));
Console.WriteLine("Page Count: " + infoExtractor.GetPageCount());
Console.WriteLine("Author: " + infoExtractor.Author);
Console.WriteLine("Title: " + infoExtractor.Title);
Console.WriteLine("Producer: " + infoExtractor.Producer);
Console.WriteLine("Subject: " + infoExtractor.Subject);
Console.WriteLine("CreationDate: " + infoExtractor.CreationDate);
Console.WriteLine("Text (2 lines): ");
textExtractor.LoadDocumentFromFile(file);
StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0));
Console.WriteLine(stringReader.ReadLine());
Console.WriteLine(stringReader.ReadLine());
Console.WriteLine();
}
Console.WriteLine();
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
}
}
}