CPPExample.cpp
#include "stdafx.h"
#include "comip.h"
#import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net4.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only
using namespace Bytescout_PDFExtractor;
int _tmain(int argc, _TCHAR* argv[])
{
// Initialize COM.
HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
// Create the interface pointer.
_TextExtractorPtr pITextExtractor(__uuidof(TextExtractor));
// Set the registration name and key
// Note: You should use _bstr_t or BSTR to pass string to the library because of COM requirements
_bstr_t bstrRegName(L"DEMO");
pITextExtractor->put_RegistrationName(bstrRegName);
_bstr_t bstrRegKey(L"DEMO");
pITextExtractor->put_RegistrationKey(bstrRegKey);
// Load sample PDF document
_bstr_t bstrPath(L"..\\..\\sample_ocr.pdf");
pITextExtractor->LoadDocumentFromFile(bstrPath);
// Enable Optical Character Recognition (OCR)
// in .Auto mode (SDK automatically checks if needs to use OCR or not)
pITextExtractor->put_OCRMode(OCRMode_Auto);
// Set the location of "tessdata" folder containing language data files
_bstr_t bstrOCRLangDataPath(L"c:\\Program Files\\Bytescout PDF Extractor SDK\\net4.00\\tessdata");
pITextExtractor->put_OCRLanguageDataFolder(bstrOCRLangDataPath);
// Set OCR language
_bstr_t bstrOCRLanguage(L"eng");
pITextExtractor->put_OCRLanguage(bstrOCRLanguage);
// Set PDF document rendering resolution
pITextExtractor->put_OCRResolution(300);
// You can also apply various preprocessing filters
// to improve the recognition on low-quality scans.
_ImagePreprocessingFiltersCollection* pIImagePreprocessingFilters;
pITextExtractor->get_OCRImagePreprocessingFilters(&pIImagePreprocessingFilters);
// Automatically deskew skewed scans
//pIImagePreprocessingFilters->AddDeskew();
// Remove vertical or horizontal lines (sometimes helps to avoid OCR engine's page segmentation errors)
//pIImagePreprocessingFilters->AddVerticalLinesRemover();
//pIImagePreprocessingFilters->AddHorizontalLinesRemover();
// Repair broken letters
//pIImagePreprocessingFilters->AddDilate();
// Remove noise
//pIImagePreprocessingFilters->AddMedian();
// Apply Gamma Correction
//pIImagePreprocessingFilters->AddGammaCorrection();
// Add Contrast
//pIImagePreprocessingFilters->AddContrast(20);
// (!) You can use new OCRAnalyser class to find an optimal set of image preprocessing
// filters for your specific document.
// See "OCR Analyser" example.
// Save extracted text to file
_bstr_t bstrOutputFile(L"output.txt");
pITextExtractor->SaveTextToFile(bstrOutputFile);
pITextExtractor->Release();
CoUninitialize();
return 0;
}
stdafx.cpp
// stdafx.cpp : source file that includes just the standard includes // CPPExample.pch will be the pre-compiled header // stdafx.obj will contain the pre-compiled type information #include "stdafx.h" // TODO: reference any additional headers you need in STDAFX.H // and not in this file
Click here to get your Free Trial version of the SDK
also available as: