Module1.vb
Imports System.Drawing Imports Bytescout.TextRecognition Module Module1 Sub Main() Dim inputDocument As String = ".\areas-sample.pdf" Dim pageIndex As Integer = 0 Dim outputDocument As String = ".\result.txt" ' Create and activate TextRecognizer instance Using textRecognizer As TextRecognizer = New TextRecognizer("demo", "demo") Try ' Load document (image or PDF) textRecognizer.LoadDocument(inputDocument) ' Set location of "tessdata" folder containing language data files textRecognizer.OCRLanguageDataFolder = "c:\Program Files\ByteScout Text Recognition SDK\tessdata\" ' Set OCR language. ' "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "tessdata" folder ' Find more language files at https://github.com/tesseract-ocr/tessdata/tree/3.04.00 textRecognizer.OCRLanguage = "eng" ' Get page size (in pixels). Size of PDF document is computed from PDF Points ' and the rendering resoultion specified by `textRecognizer.PDFRenderingResolution` (default 300 DPI) Dim pageSize As Size = textRecognizer.GetPageSize(pageIndex) ' Add area of interest as a rectangle at the top-right corner of the page textRecognizer.RecognitionAreas.Add(pageSize.Width / 2, 0, pageSize.Width / 2, 300) ' Add area of interest as a rectangle at the bottom-left corner of the page, ' and indicate it should be rotated at 90 deg textRecognizer.RecognitionAreas.Add(0, pageSize.Height / 2, 300, pageSize.Height / 2, AreaRotation.Rotate90FlipNone) ' Now you can get recognized text for further analysis as a list of objects ' containing coordinates, object kind, confidence. Dim ocrObjectList As OCRObjectList = textRecognizer.GetOCRObjects(pageIndex) For Each ocrObject As OCRObject In ocrObjectList Console.WriteLine(ocrObject.ToString()) Next ' ... or you can save recognized text pieces to file textRecognizer.KeepTextFormatting = False ' save without formatting textRecognizer.SaveText(outputDocument, pageIndex, pageIndex) ' Open the result file in default associated application (for demo purposes) Process.Start(outputDocument) Catch exception As Exception Console.WriteLine(exception) End Try End Using End Sub End Module