The sample code below shows how to extract data from PDF to CSV Excel file in C# using Bytescout PDF Extractor SDK.
In these screenshots you can see input PDF invoice and Excel CSV file with data extracted from the invoice.
Input PDF file | |
↓ | |
Output CSV file for Excel |
C#
using System; using System.IO; using System.Text; using Bytescout.PDFExtractor; namespace Example { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); // Table dimensions (in points) const int tableX = 207; const int tableY = 110; const int rowHeight = 24; const int col1width = 177; const int col2width = 76; const int col3width = 76; StringBuilder stringBuilder = new StringBuilder(); // Parse text from table cells for (int row = 0; row < 5; row++) { string cell1 = extractor.GetTextFromPage(0, tableX, tableY + row * rowHeight, col1width, rowHeight).Trim(); string cell2 = extractor.GetTextFromPage(0, tableX + col1width, tableY + row * rowHeight, col2width, rowHeight).Trim(); string cell3 = extractor.GetTextFromPage(0, tableX + col1width + col2width, tableY + row * rowHeight, col3width, rowHeight).Trim(); Console.WriteLine("Line #{0}: {1}, {2}, {3}", row, cell1, cell2, cell3); stringBuilder.AppendFormat("{0},{1},{2}rn", cell1, cell2, cell3); } // Save text to file File.WriteAllText("output.csv", stringBuilder.ToString()); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.csv' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); } } }