These sample source codes can be used to extract table structure from PDF in C# and VBScript using ByteScout PDF Extractor SDK.
You may also find useful to check how to extract data from PDF tables containing superscript values in C#.
Use this code snippet to extract table structure from PDF in C# programming language.
using Bytescout.PDFExtractor;
using System.Diagnostics;
using System;
namespace TableStructure
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.StructuredExtractor instance (former TableExtractor)
StructuredExtractor extractor = new StructuredExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample3.pdf");
for (int ipage = 0; ipage < extractor.GetPageCount(); ipage++)
{
Console.WriteLine("starting extraction from page #" + ipage);
extractor.PrepareStructure(ipage);
int rowCount = extractor.GetRowCount(ipage);
int CellsAlreadyScanned = 0;
for (int row = 0; row < rowCount; row++)
{
int columnCount = extractor.GetColumnCount(ipage, row);
for (int col = 0; col < columnCount; col++)
{
Console.WriteLine(extractor.GetCellValue(ipage, row, col));
}
CellsAlreadyScanned += columnCount;
}
}
Console.WriteLine("Press any key..");
Console.ReadKey();
}
}
}
Use this code sample to extract table structure from PDF in VBScript programming language.
' Create Bytescout.PDFExtractor.StructuredExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.StructuredExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile "../../sample3.pdf"
For ipage = 0 to extractor.GetPageCount()-1
' starting extraction from page #"
extractor.PrepareStructure ipage
rowCount = extractor.GetRowCount(ipage)
CellsAlreadyScanned = 0
FOR row = 0 to rowCount-1
columnCount = extractor.GetColumnCount(ipage, row)
For col = 0 To columnCount-1
MsgBox "Cell at page #" +CStr(ipage) + ", row=" & CStr(row) & ", column=" & CStr(col) & vbCRLF & extractor.GetCellValue(ipage, row, col)
Next
CellsAlreadyScanned = CellsAlreadyScanned + columnCount
Next
Next