These sample source codes can be used to extract table structure from PDF in C# and VBScript using ByteScout PDF Extractor SDK.
You may also find useful to check how to extract data from PDF tables containing superscript values in C#.
Use this code snippet to extract table structure from PDF in C# programming language.
using Bytescout.PDFExtractor; using System.Diagnostics; using System; namespace TableStructure { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.StructuredExtractor instance (former TableExtractor) StructuredExtractor extractor = new StructuredExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); for (int ipage = 0; ipage < extractor.GetPageCount(); ipage++) { Console.WriteLine("starting extraction from page #" + ipage); extractor.PrepareStructure(ipage); int rowCount = extractor.GetRowCount(ipage); int CellsAlreadyScanned = 0; for (int row = 0; row < rowCount; row++) { int columnCount = extractor.GetColumnCount(ipage, row); for (int col = 0; col < columnCount; col++) { Console.WriteLine(extractor.GetCellValue(ipage, row, col)); } CellsAlreadyScanned += columnCount; } } Console.WriteLine("Press any key.."); Console.ReadKey(); } } }
Use this code sample to extract table structure from PDF in VBScript programming language.
' Create Bytescout.PDFExtractor.StructuredExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.StructuredExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile "../../sample3.pdf" For ipage = 0 to extractor.GetPageCount()-1 ' starting extraction from page #" extractor.PrepareStructure ipage rowCount = extractor.GetRowCount(ipage) CellsAlreadyScanned = 0 FOR row = 0 to rowCount-1 columnCount = extractor.GetColumnCount(ipage, row) For col = 0 To columnCount-1 MsgBox "Cell at page #" +CStr(ipage) + ", row=" & CStr(row) & ", column=" & CStr(col) & vbCRLF & extractor.GetCellValue(ipage, row, col) Next CellsAlreadyScanned = CellsAlreadyScanned + columnCount Next Next