Use the sample source codes below to detect tables in PDF files and convert PDF table to CSV file in C# and VBScript using PDF Extractor SDK.
See how you can detect tables in PDF files and convert those tables into a CSV file using the C# code snippet.
using System;
using Bytescout.PDFExtractor;
namespace ExtractTextByPages
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
CSVExtractor extractor = new CSVExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
TableDetector tdetector = new TableDetector();
tdetector.RegistrationKey = "demo";
tdetector.RegistrationName = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample3.pdf");
tdetector.LoadDocumentFromFile("sample3.pdf");
// Get page count
int pageCount = tdetector.GetPageCount();
for (int i = 0; i < pageCount; i++)
{
int j = 1;
// find first table and continue if found
if (tdetector.FindTable(i))
do
{
// set extraction area for CSV extractor to rectangle given by table detector
extractor.SetExtractionArea(tdetector.GetFoundTableRectangle_Left(),
tdetector.GetFoundTableRectangle_Top(),
tdetector.GetFoundTableRectangle_Width(),
tdetector.GetFoundTableRectangle_Height()
);
// and finally save the table into CSV file
extractor.SavePageCSVToFile(i, "page-" + i + "-table-" + j + ".csv");
j++;
} while (tdetector.FindNextTable()); // search next table
}
// Open first output file in default associated application
System.Diagnostics.Process.Start("page-0-table-1.csv");
}
}
}
See how you can search for tables in PDF files and extract those tables into the CSV file using the VBScript code snippet.
' Create Bytescout.PDFExtractor.TextExtractor object
Set tdetector= CreateObject("Bytescout.PDFExtractor.TableDetector")
tdetector.RegistrationName = "demo"
tdetector.RegistrationKey = "demo"
' Create Bytescout.PDFExtractor.CSVExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.CSVExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document into table detector
tdetector.LoadDocumentFromFile("....sample3.pdf")
' Load sample PDF document into CSV extractor
extractor.LoadDocumentFromFile "....sample3.pdf"
' Get page count
pageCount = tdetector.GetPageCount()
For i=0 to PageCount-1
If tdetector.FindTable(i) Then ' parameters are: page index, string to find, case sensitivity
Do
MsgBox "Found a table on page #" & CStr(i) & " at left=" & CStr(tdetector.GetFoundTableRectangle_Left) & "; top=" & CStr(tdetector.GetFoundTableRectangle_Top) & "; width=" & CStr(tdetector.GetFoundTableRectangle_Width) & "; height=" & CStr(tdetector.GetFoundTableRectangle_Height)
' set extraction area to extract table data as CSV
extractor.SetExtractionArea tdetector.GetFoundTableRectangle_Left, tdetector.GetFoundTableRectangle_Top, tdetector.GetFoundTableRectangle_Width, tdetector.GetFoundTableRectangle_Height
' define filename to save CSV
CSVFileName = "page-" & CStr(i) & "-table-at-" & CStr(tdetector.GetFoundTableRectangle_Top) & ".csv"
' save CSV from this page (bounded by extraction area) into file
extractor.SavePageCSVToFile i, CSVFileName
MsgBox "Table saved into CSV as " & CSVFileName
' reset extraction area on the page
extractor.ResetExtractionArea
Loop While tdetector.FindNextTable
End If
Next
MsgBox "Done"
Set tdetector= Nothing