Program.cs
using Bytescout.PDFExtractor;
using System;
namespace CheckIfOCRIsRequired
{
class Program
{
static void Main(string[] args)
{
try
{
// Loop through all files in directory and check whether OCR operation is required
foreach (string filePath in System.IO.Directory.GetFiles("InputFiles"))
{
_CheckOCRRequired(filePath);
}
}
catch (Exception ex)
{
Console.WriteLine("Error: " + ex.Message);
}
Console.WriteLine("Press enter key to exit...");
Console.ReadLine();
}
/// <summary>
/// Check whether OCR Operation is required
/// </summary>
/// <param name="filePath"></param>
private static void _CheckOCRRequired(string filePath)
{
//Read all file content...
using (TextExtractor extractor = new TextExtractor())
{
extractor.RegistrationKey = "demo";
extractor.RegistrationName = "demo";
// Load document
extractor.LoadDocumentFromFile(filePath);
Console.WriteLine("\n*******************\n\nFilePath: {0}", filePath);
int pageIndex = 0;
// Identify OCR operation is recommended for page
if (extractor.IsOCRRecommendedForPage(pageIndex))
{
Console.WriteLine("\nOCR Recommended: True");
// Enable Optical Character Recognition (OCR)
// in .Auto mode (SDK automatically checks if needs to use OCR or not)
extractor.OCRMode = OCRMode.Auto;
// Set the location of "tessdata" folder containing language data files
extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\";
// Set OCR language
extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata
// Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00
// Set PDF document rendering resolution
extractor.OCRResolution = 300;
}
else
{
Console.WriteLine("\nOCR Recommended: False");
}
//Read all text
var allExtractedText = extractor.GetText();
Console.WriteLine("\nExtracted Text:\n{0}\n\n", allExtractedText);
}
}
}
}
Click here to get your Free Trial version of the SDK
also available as: