This tutorial shows how to extract text from pdf coordinates x y in C#
using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;
using System.Drawing;
using System.Diagnostics;
namespace Example
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample3.pdf");
// Table dimensions (measured in points by hand using the original 100% scaled PDF document)
const int tableX = 207;
const int tableY = 110;
const int rowHeight = 24;
const int col1width = 177;
const int col2width = 76;
const int col3width = 76;
StringBuilder stringBuilder = new StringBuilder();
// Parse text from table cells
for (int row = 0; row < 5; row++)
{
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX, tableY + row * rowHeight, tableX + col1width, tableY + row * rowHeight + rowHeight));
string cell1 = extractor.GetTextFromPage(0).Trim();
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX+ col1width, tableY + row * rowHeight, tableX + col1width + col2width, tableY + row * rowHeight + rowHeight));
string cell2 = extractor.GetTextFromPage(0).Trim();
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX + col1width + col2width, tableY + row * rowHeight, tableX + col1width + col2width + col3width, tableY + row * rowHeight + rowHeight));
string cell3 = extractor.GetTextFromPage(0).Trim();
Console.WriteLine("Line #{0}: {1}, {2}, {3}", row, cell1, cell2, cell3);
stringBuilder.AppendFormat("Line #{0}: {1},{2},{3}rnrn", row, cell1, cell2, cell3);
}
// Save text to file
File.WriteAllText("output.txt", stringBuilder.ToString());
Console.WriteLine();
Console.WriteLine("Data has been extracted to 'output.txt' file.");
Console.WriteLine();
Console.WriteLine("Press any key to continue to open OUTPUT.TXT in Notepad...");
Console.ReadKey();
Process.Start("output.txt");
}
}
}