How to extract table structure from PDF in C# and VBScript using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to extract table structure from PDF in C# and VBScript using PDF Extractor SDK

These sample source codes can be used to extract table structure from PDF in C# and VBScript using ByteScout PDF Extractor SDK.

You may also find useful to check how to extract data from PDF tables containing superscript values in C#.

How to extract table structure from PDF in C#

using Bytescout.PDFExtractor;
using System.Diagnostics;
using System;

namespace TableStructure
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.StructuredExtractor instance (former TableExtractor)
			StructuredExtractor extractor = new StructuredExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("sample3.pdf");

            for (int ipage = 0; ipage < extractor.GetPageCount(); ipage++)
            {
                Console.WriteLine("starting extraction from page #" + ipage);
                extractor.PrepareStructure(ipage);

                int rowCount = extractor.GetRowCount(ipage);
                int CellsAlreadyScanned = 0;

                for (int row = 0; row < rowCount; row++)
                {
                    int columnCount = extractor.GetColumnCount(ipage, row);

                    for (int col = 0; col < columnCount; col++)
                    {
                        Console.WriteLine(extractor.GetCellValue(ipage, row, col));
                    }

                    CellsAlreadyScanned += columnCount;
                }
            }
            Console.WriteLine("Press any key..");
            Console.ReadKey();
		}
	}
}

How to extract table structure from PDF in VBScript (Visual Basic 6)


' Create Bytescout.PDFExtractor.StructuredExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.StructuredExtractor")

extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile "../../sample3.pdf"
           

For ipage = 0 to extractor.GetPageCount()-1 

	' starting extraction from page #"
	extractor.PrepareStructure ipage

	rowCount = extractor.GetRowCount(ipage)
	CellsAlreadyScanned = 0

	FOR row = 0 to rowCount-1 
		columnCount = extractor.GetColumnCount(ipage, row)

		For col = 0 To columnCount-1
                    MsgBox "Cell at page #" +CStr(ipage) + ", row=" & CStr(row) & ", column=" & CStr(col) & vbCRLF & extractor.GetCellValue(ipage, row, col)
                Next
	CellsAlreadyScanned = CellsAlreadyScanned + columnCount
        Next
Next

prev
next