With PDF Extractor SDK, you can extract text from specific parts of PDF documents defined by X Y coordinates. This sample code shows extracting text from PDF within a specific rectangular region with X Y coordinates.
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;
using System.Drawing;
using System.Diagnostics;
namespace Example
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName =
"demo"
;
extractor.RegistrationKey =
"demo"
;
// Load sample PDF document
extractor.LoadDocumentFromFile(
"sample3.pdf"
);
// Table dimensions (measured in points by hand using the original 100% scaled PDF document)
const int tableX = 207;
const int tableY = 110;
const int rowHeight = 24;
const int col1width = 177;
const int col2width = 76;
const int col3width = 76;
StringBuilder stringBuilder = new StringBuilder();
// Parse text from table cells
for (int row = 0; row<5; row++)
{
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX, tableY + row * rowHeight, tableX + col1width, tableY + row * rowHeight + rowHeight));
string cell1 = extractor.GetTextFromPage(0).Trim();
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX+ col1width, tableY + row * rowHeight, tableX + col1width + col2width, tableY + row * rowHeight + rowHeight));
string cell2 = extractor.GetTextFromPage(0).Trim();
extractor.SetExtractionArea(Rectangle.FromLTRB(tableX + col1width + col2width, tableY + row * rowHeight, tableX + col1width + col2width + col3width, tableY + row * rowHeight + rowHeight));
string cell3 = extractor.GetTextFromPage(0).Trim();
Console.WriteLine(
"Line #{0}: {1}, {2}, {3}"
, row, cell1, cell2, cell3);
stringBuilder.AppendFormat(
"Line #{0}: {1},{2},{3}rnrn"
, row, cell1, cell2, cell3);
}
// Save text to file
File.WriteAllText(
"output.txt"
, stringBuilder.ToString());
Console.WriteLine();
Console.WriteLine(
"Data has been extracted to 'output.txt' file."
);
Console.WriteLine();
Console.WriteLine(
"Press any key to continue to open OUTPUT.TXT in Notepad..."
);
Console.ReadKey();
Process.Start(
"output.txt"
);
}
}
}
Tutorials: