ByteScout PDF Extractor SDK – C# – Index PDF Files

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Index PDF Files

ByteScout PDF Extractor SDK – C# – Index PDF Files

Program.cs

using System;
using System.IO;
using Bytescout.PDFExtractor;

namespace IndexPDFFiles
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.InfoExtractor instance
			InfoExtractor infoExtractor = new InfoExtractor();
			infoExtractor.RegistrationName = "demo";
			infoExtractor.RegistrationKey = "demo";

			TextExtractor textExtractor = new TextExtractor();
			textExtractor.RegistrationName = "demo";
			textExtractor.RegistrationKey = "demo";

			// List all PDF files in directory
			foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf"))
			{
				infoExtractor.LoadDocumentFromFile(file);

				Console.WriteLine("File Name:      " + Path.GetFileName(file));
				Console.WriteLine("Page Count:     " + infoExtractor.GetPageCount());
				Console.WriteLine("Author:         " + infoExtractor.Author);
				Console.WriteLine("Title:          " + infoExtractor.Title);
				Console.WriteLine("Producer:       " + infoExtractor.Producer);
				Console.WriteLine("Subject:        " + infoExtractor.Subject);
				Console.WriteLine("CreationDate:   " + infoExtractor.CreationDate);
				Console.WriteLine("Text (first 2 lines): ");

				// Load a couple of lines from each document
				textExtractor.LoadDocumentFromFile(file);
				using (StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0)))
				{
				    Console.WriteLine(stringReader.ReadLine());
				    Console.WriteLine(stringReader.ReadLine());
				}
				Console.WriteLine();
			}

			// Cleanup
			infoExtractor.Dispose();
        	textExtractor.Dispose();
			
			Console.WriteLine();
			Console.WriteLine("Press any key to continue...");
			Console.ReadLine();
		}
	}
}


  Click here to get your Free Trial version of the SDK

Tutorials:

prev
next