How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK - ByteScout

How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK

Use the sample source codes below to detect tables in PDF files and convert PDF table to CSV file in C# and VBScript using PDF Extractor SDK.

C# Source Code

See how you can detect tables in PDF files and convert those tables into a CSV file using the C# code snippet.

using System;
using Bytescout.PDFExtractor;

namespace ExtractTextByPages
	class Program
		static void Main(string[] args)
			// Create Bytescout.PDFExtractor.TextExtractor instance
			CSVExtractor extractor = new CSVExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

            TableDetector tdetector = new TableDetector();
            tdetector.RegistrationKey = "demo";
            tdetector.RegistrationName = "demo";

			// Load sample PDF document

			// Get page count
			int pageCount = tdetector.GetPageCount();

			for (int i = 0; i < pageCount; i++)
                int j = 1;
                // find first table and continue if found
                if (tdetector.FindTable(i))
                        // set extraction area for CSV extractor to rectangle given by table detector

                        // and finally save the table into CSV file
                        extractor.SavePageCSVToFile(i, "page-" + i + "-table-" + j + ".csv");
                    } while (tdetector.FindNextTable()); // search next table

			// Open first output file in default associated application

VBScript Source Code

See how you can search for tables in PDF files and extract those tables into the CSV file using the VBScript code snippet.

' Create Bytescout.PDFExtractor.TextExtractor object
Set tdetector= CreateObject("Bytescout.PDFExtractor.TableDetector")
tdetector.RegistrationName = "demo"
tdetector.RegistrationKey = "demo"

' Create Bytescout.PDFExtractor.CSVExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.CSVExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document into table detector

' Load sample PDF document into CSV extractor
extractor.LoadDocumentFromFile "....sample3.pdf"

' Get page count

pageCount = tdetector.GetPageCount()

For i=0 to PageCount-1 
 If tdetector.FindTable(i) Then ' parameters are: page index, string to find, case sensitivity
 		MsgBox "Found a table on page #" & CStr(i) & " at left=" & CStr(tdetector.GetFoundTableRectangle_Left) & "; top=" & CStr(tdetector.GetFoundTableRectangle_Top) & "; width=" & CStr(tdetector.GetFoundTableRectangle_Width) & "; height=" & CStr(tdetector.GetFoundTableRectangle_Height)

	' set extraction area to extract table data as CSV
	extractor.SetExtractionArea tdetector.GetFoundTableRectangle_Left, tdetector.GetFoundTableRectangle_Top, tdetector.GetFoundTableRectangle_Width, tdetector.GetFoundTableRectangle_Height

	' define filename to save CSV
 	CSVFileName = "page-" & CStr(i) & "-table-at-" & CStr(tdetector.GetFoundTableRectangle_Top) & ".csv"

	' save CSV from this page (bounded by extraction area) into file
	extractor.SavePageCSVToFile i, CSVFileName

	MsgBox "Table saved into CSV as " & CSVFileName

	' reset extraction area on the page

  	Loop While tdetector.FindNextTable
 End If


MsgBox "Done"

Set tdetector= Nothing