Program.cs
using Bytescout.PDFExtractor;
using System;
namespace RepairText
{
class Program
{
static void Main(string[] args)
{
try
{
//Read all text from pdf file
using (TextExtractor extractor = new TextExtractor())
{
// Load PDF document
extractor.LoadDocumentFromFile("sample.pdf");
// Set the font repairing OCR mode
extractor.OCRMode = OCRMode.TextFromImagesAndVectorsAndRepairedFonts;
// Set the location of "tessdata" folder containing language data files
extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\";
// Set OCR language
extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata
// Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00
// Set PDF document rendering resolution
extractor.OCRResolution = 300;
//Read all text
string allText = extractor.GetText();
Console.WriteLine("Extracted Text: \n\n" + allText);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
Console.ReadLine();
}
}
}
Click here to get your Free Trial version of the SDK
also available as: