How to extract text from rectangle with X Y coordinates in PDF - ByteScout

How to extract text from rectangle with X Y coordinates in PDF

  • Home
  • /
  • Articles
  • /
  • How to extract text from rectangle with X Y coordinates in PDF

With PDF Extractor SDK, you can extract text from specific parts of PDF documents defined by X Y coordinates. This sample code shows extracting text from PDF within a specific rectangular region with X Y coordinates.

C#

using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;
using System.Drawing;
using System.Diagnostics;

namespace Example
{
class Program
{
 static void Main(string[] args)
 {
 	// Create Bytescout.PDFExtractor.TextExtractor instance
 	TextExtractor extractor = new TextExtractor();
 	extractor.RegistrationName = "demo";
 	extractor.RegistrationKey = "demo";

 	// Load sample PDF document
 	extractor.LoadDocumentFromFile("sample3.pdf");

 	// Table dimensions (measured in points by hand using the original 100% scaled PDF document) 
 	const int tableX = 207;
 	const int tableY = 110;
 	const int rowHeight = 24;
 	const int col1width = 177;
 	const int col2width = 76;
 	const int col3width = 76;

 	StringBuilder stringBuilder = new StringBuilder();
 	
 	// Parse text from table cells
 	for (int row = 0; row<5; row++)
 	{
   extractor.SetExtractionArea(Rectangle.FromLTRB(tableX, tableY + row * rowHeight, tableX + col1width, tableY + row * rowHeight + rowHeight));
               string cell1 = extractor.GetTextFromPage(0).Trim();

               extractor.SetExtractionArea(Rectangle.FromLTRB(tableX+ col1width, tableY + row * rowHeight, tableX + col1width + col2width, tableY + row * rowHeight + rowHeight));
               string cell2 = extractor.GetTextFromPage(0).Trim();

               extractor.SetExtractionArea(Rectangle.FromLTRB(tableX + col1width + col2width, tableY + row * rowHeight, tableX + col1width + col2width + col3width, tableY + row * rowHeight + rowHeight));
               string cell3 = extractor.GetTextFromPage(0).Trim();

   Console.WriteLine("Line #{0}: {1}, {2}, {3}", row, cell1, cell2, cell3);
               stringBuilder.AppendFormat("Line #{0}: {1},{2},{3}rnrn", row, cell1, cell2, cell3);
 	}

 	// Save text to file
 	File.WriteAllText("output.txt", stringBuilder.ToString());

 	Console.WriteLine();
 	Console.WriteLine("Data has been extracted to 'output.txt' file.");
 	Console.WriteLine();
 	Console.WriteLine("Press any key to continue to open OUTPUT.TXT in Notepad...");
 	Console.ReadKey();

 	Process.Start("output.txt");
 }
}
}

Tutorials:

prev
next