ByteScout PDF Extractor SDK – C# – Extract Text From Page Area

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Extract Text From Page Area

ByteScout PDF Extractor SDK – C# – Extract Text From Page Area

Program.cs

using System;
using Bytescout.PDFExtractor;
using System.Drawing;

namespace ExtractTextFromPageArea
{
class Program
{
static void Main(string[] args)
{
TextExtractor extractor = new TextExtractor(“demo”, “demo”);

// Load document
extractor.LoadDocumentFromFile(@”.\sample2.pdf”);

// Get page count
int pageCount = extractor.GetPageCount();

// Iterate through pages
for (int i = 0; i < pageCount; i++) { // Define rectangle location to extract from RectangleF location = new RectangleF(0, 0, 200, 200); // Set extraction area extractor.SetExtractionArea(location); // Extract text from the extraction area string text = extractor.GetTextFromPage(i); Console.WriteLine("Extracted from page #" + i + ":"); Console.WriteLine(); Console.WriteLine(text); // Reset the extraction area extractor.ResetExtractionArea(); Console.WriteLine(); } // Cleanup extractor.Dispose(); Console.WriteLine("Press any key to exit..."); Console.ReadKey(); } } } [/csharp]


  Click here to get your Free Trial version of the SDK

Tutorials:

prev
next