Program.cs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | using System; using System.Diagnostics; using System.Drawing; using ByteScout.TextRecognition; namespace TextRecognitionExample { class Program { static void Main( string [] args) { string inputDocument = @".\areas-sample.pdf" ; int pageIndex = 0; string outputDocument = @".\result.txt" ; // Create and activate TextRecognizer instance using (TextRecognizer textRecognizer = new TextRecognizer( "demo" , "demo" )) { try { // Load document (image or PDF) textRecognizer.LoadDocument(inputDocument); // Set location of "tessdata" folder containing language data files textRecognizer.OCRLanguageDataFolder = @"c:\Program Files\ByteScout Text Recognition SDK\tessdata\" ; // Set OCR language. // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder // Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00 textRecognizer.OCRLanguage = "eng" ; // Get page size (in pixels). Size of PDF document is computed from PDF Points // and the rendering resolution specified by `textRecognizer.PDFRenderingResolution` (default 300 DPI) Size pageSize = textRecognizer.GetPageSize(pageIndex); // Add area of interest as a rectangle at the top-right corner of the page textRecognizer.RecognitionAreas.Add(pageSize.Width / 2, 0, pageSize.Width / 2, 300); // Add area of interest as a rectangle at the bottom-left corner of the page, // and indicate it should be rotated at 90 deg textRecognizer.RecognitionAreas.Add(0, pageSize.Height / 2, 300, pageSize.Height / 2, AreaRotation.Rotate90FlipNone); // Now, you can get recognized text for further analysis as a list of objects // containing coordinates, object kind, confidence. OCRObjectList ocrObjectList = textRecognizer.GetOCRObjects(pageIndex); foreach (OCRObject ocrObject in ocrObjectList) Console.WriteLine(ocrObject.ToString()); // ... or you can save recognized text pieces to file textRecognizer.KeepTextFormatting = false ; // save without formatting textRecognizer.SaveText(outputDocument, pageIndex, pageIndex); // Open the result file in default associated application (for demo purposes) Process.Start(outputDocument); } catch (Exception exception) { Console.WriteLine(exception); } } Console.WriteLine(); Console.WriteLine( "Press any key..." ); Console.ReadKey(); } } } |
Click here to get your Free Trial version of the SDK
also available as: