ByteScout PDF Extractor SDK – C++ – Find Table And Extract As CSV

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C++ – Find Table And Extract As CSV

ByteScout PDF Extractor SDK – C++ – Find Table And Extract As CSV

FindTableAndExtractAsCsv.cpp

#include "stdafx.h"
#include "comip.h"

#import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net4.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only

using namespace Bytescout_PDFExtractor;

int _tmain(int argc, _TCHAR* argv[])
{
	// Initialize COM.
	HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);

	// Create CSVExtractor instance
	_CSVExtractorPtr pICSVExtractor(__uuidof(CSVExtractor));
	pICSVExtractor->put_RegistrationName(_bstr_t(L"DEMO"));
	pICSVExtractor->put_RegistrationKey(_bstr_t(L"DEMO"));

	// Create TableDetector instance
	_TableDetectorPtr pITableDetector(__uuidof(TableDetector));
	pITableDetector->put_RegistrationName(_bstr_t(L"DEMO"));
	pITableDetector->put_RegistrationKey(_bstr_t(L"DEMO"));

	// Set table detection mode to "bordered tables" - best for tables with closed solid borders.
	pITableDetector->put_ColumnDetectionMode(ColumnDetectionMode_BorderedTables);

	// We should define what kind of tables we should detect.
	// So we set min required number of columns to 2 ...
	pITableDetector->put_DetectionMinNumberOfColumns(2);
	// ... and we set min required number of rows to 2
	pITableDetector->put_DetectionMinNumberOfRows(2);

	// Load sample PDF document
	_bstr_t inputFile(L"..\\..\\sample3.pdf");
	pICSVExtractor->LoadDocumentFromFile(inputFile);
	pITableDetector->LoadDocumentFromFile(inputFile);

	// Get page count
	long pageCount;
	pITableDetector->GetPageCount(&pageCount);

	for (int pageIndex = 0; pageIndex < pageCount; pageIndex++)
	{
		int t = 1;
		VARIANT_BOOL vbResult;

		// Find first table and continue if found
		pITableDetector->FindTable(pageIndex, &vbResult);
		if (vbResult == VARIANT_TRUE)
		{
			do
			{
				float left, top, width, height;
				pITableDetector->GetFoundTableRectangle_Left(&left);
				pITableDetector->GetFoundTableRectangle_Top(&top);
				pITableDetector->GetFoundTableRectangle_Width(&width);
				pITableDetector->GetFoundTableRectangle_Height(&height);

				// Set extraction area for CSV extractor to rectangle received from the table detector
				pICSVExtractor->SetExtractionArea(left, top, width, height);
				// Export the table to CSV file
				CString fileName;
				fileName.Format(L"page-%d-table-%d.csv", pageIndex, t);
				pICSVExtractor->SavePageCSVToFile(pageIndex, _bstr_t(fileName));
				t++;

				pITableDetector->FindNextTable(&vbResult);
			} while (vbResult == VARIANT_TRUE);
		}
	}

	pICSVExtractor->Release();
	pITableDetector->Release();

	CoUninitialize();

	return 0;
}


stdafx.cpp

// stdafx.cpp : source file that includes just the standard includes
// CPPExample.pch will be the pre-compiled header
// stdafx.obj will contain the pre-compiled type information

#include "stdafx.h"

// TODO: reference any additional headers you need in STDAFX.H
// and not in this file


  Click here to get your Free Trial version of the SDK

prev
next