The sample code below shows how to extract data from PDF to CSV Excel file in C# using Bytescout PDF Extractor SDK.

In these screenshots you can see input PDF invoice and Excel CSV file with data extracted from the invoice.

Input PDF invoice to be converted to CSV Excel file Input PDF file
Output CSV file with data extracted from PDF invoice Output CSV file for Excel


using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;

namespace Example
	class Program
		static void Main(string[] args)
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document

			// Table dimensions (in points)
			const int tableX = 207;
			const int tableY = 110;
			const int rowHeight = 24;
			const int col1width = 177;
			const int col2width = 76;
			const int col3width = 76;

			StringBuilder stringBuilder = new StringBuilder();
			// Parse text from table cells
			for (int row = 0; row < 5; row++)
				string cell1 = extractor.GetTextFromPage(0, tableX, tableY + row * rowHeight, col1width, rowHeight).Trim();
				string cell2 = extractor.GetTextFromPage(0, tableX + col1width, tableY + row * rowHeight, col2width, rowHeight).Trim();
				string cell3 = extractor.GetTextFromPage(0, tableX + col1width + col2width, tableY + row * rowHeight, col3width, rowHeight).Trim();

				Console.WriteLine("Line #{0}: {1}, {2}, {3}", row, cell1, cell2, cell3);
				stringBuilder.AppendFormat("{0},{1},{2}rn", cell1, cell2, cell3);

			// Save text to file
			File.WriteAllText("output.csv", stringBuilder.ToString());

			Console.WriteLine("Data has been extracted to 'output.csv' file.");
			Console.WriteLine("Press any key to continue...");