ByteScout PDF Extractor SDK – C# – Repair Text

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Repair Text

ByteScout PDF Extractor SDK – C# – Repair Text

Program.cs

using Bytescout.PDFExtractor;
using System;

namespace RepairText
{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                //Read all text from pdf file
                using (TextExtractor extractor = new TextExtractor())
                {
                    // Load PDF document
                    extractor.LoadDocumentFromFile("sample.pdf");

                    // Set the font repairing OCR mode 
                    extractor.OCRMode = OCRMode.TextFromImagesAndVectorsAndRepairedFonts;

                    // Set the location of "tessdata" folder containing language data files
                    extractor.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\";

                    // Set OCR language
                    extractor.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata
                                                   // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00

                    // Set PDF document rendering resolution
                    extractor.OCRResolution = 300;

                    //Read all text
                    string allText = extractor.GetText();

                    Console.WriteLine("Extracted Text: \n\n" + allText);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

            Console.ReadLine();
        }
    }
}


  Click here to get your Free Trial version of the SDK

prev
next