How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to Find and Extract PDF Table to CSV in C# and VBScript using PDF Extractor SDK

Use the sample source codes below to detect tables in PDF files and convert PDF table to CSV file in C# and VBScript using PDF Extractor SDK.

C# Source Code

See how you can detect tables in PDF files and convert those tables into a CSV file using the C# code snippet.

using System;
using Bytescout.PDFExtractor;

namespace ExtractTextByPages
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			CSVExtractor extractor = new CSVExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

            TableDetector tdetector = new TableDetector();
            tdetector.RegistrationKey = "demo";
            tdetector.RegistrationName = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("sample3.pdf");
            tdetector.LoadDocumentFromFile("sample3.pdf");

			// Get page count
			int pageCount = tdetector.GetPageCount();

			for (int i = 0; i < pageCount; i++)
			{
                int j = 1;
                // find first table and continue if found
                if (tdetector.FindTable(i))
                    do
                    {
                        // set extraction area for CSV extractor to rectangle given by table detector
                        extractor.SetExtractionArea(tdetector.GetFoundTableRectangle_Left(),
                            tdetector.GetFoundTableRectangle_Top(),
                            tdetector.GetFoundTableRectangle_Width(),
                            tdetector.GetFoundTableRectangle_Height()
                        );

                        // and finally save the table into CSV file
                        extractor.SavePageCSVToFile(i, "page-" + i + "-table-" + j + ".csv");
                        j++;
                    } while (tdetector.FindNextTable()); // search next table
			}

			// Open first output file in default associated application
			System.Diagnostics.Process.Start("page-0-table-1.csv");
		}
	}
}

VBScript Source Code

See how you can search for tables in PDF files and extract those tables into the CSV file using the VBScript code snippet.

' Create Bytescout.PDFExtractor.TextExtractor object
Set tdetector= CreateObject("Bytescout.PDFExtractor.TableDetector")
tdetector.RegistrationName = "demo"
tdetector.RegistrationKey = "demo"

' Create Bytescout.PDFExtractor.CSVExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.CSVExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document into table detector
tdetector.LoadDocumentFromFile("....sample3.pdf")

' Load sample PDF document into CSV extractor
extractor.LoadDocumentFromFile "....sample3.pdf"

' Get page count

pageCount = tdetector.GetPageCount()

For i=0 to PageCount-1 
 
 If tdetector.FindTable(i) Then ' parameters are: page index, string to find, case sensitivity
 	Do
 		MsgBox "Found a table on page #" & CStr(i) & " at left=" & CStr(tdetector.GetFoundTableRectangle_Left) & "; top=" & CStr(tdetector.GetFoundTableRectangle_Top) & "; width=" & CStr(tdetector.GetFoundTableRectangle_Width) & "; height=" & CStr(tdetector.GetFoundTableRectangle_Height)

	
	' set extraction area to extract table data as CSV
	extractor.SetExtractionArea tdetector.GetFoundTableRectangle_Left, tdetector.GetFoundTableRectangle_Top, tdetector.GetFoundTableRectangle_Width, tdetector.GetFoundTableRectangle_Height

	' define filename to save CSV
 	CSVFileName = "page-" & CStr(i) & "-table-at-" & CStr(tdetector.GetFoundTableRectangle_Top) & ".csv"

	' save CSV from this page (bounded by extraction area) into file
	extractor.SavePageCSVToFile i, CSVFileName

	MsgBox "Table saved into CSV as " & CSVFileName

	' reset extraction area on the page
	extractor.ResetExtractionArea


  	Loop While tdetector.FindNextTable
 End If

Next

MsgBox "Done"

Set tdetector= Nothing

Tutorials:

prev
next