What is ByteScout Robotic Process Automation? It is tools and API for rapid manual data entry automation using Robotic Process Automation based on ByteScout data extraction tools.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
Imports System.Drawing.Imaging Imports System.IO Imports Bytescout.PDF Imports Bytescout.PDFExtractor Module Program Sub Main() Try ' Files Dim fileName As String = "hindi_text_with_image.pdf" Dim destFileName As String = "output_hindi_text_with_image.pdf" Dim destFileName_serachable As String = "output_hindi_text_with_image_searchable.pdf" ' Read all text from pdf file Dim allTextExtracted As String = "" Using extractor As New TextExtractor ' Load PDF document extractor.LoadDocumentFromFile(fileName) ' Read all text to a variable allTextExtracted = extractor.GetText End Using ' Get image from pdf file Dim memoryStream As MemoryStream = New MemoryStream Using extractor As New ImageExtractor ' Load PDF document extractor.LoadDocumentFromFile(fileName) If extractor.GetFirstImage Then extractor.SaveCurrentImageToStream(memoryStream, ImageFormat.Png) End If End Using ' Load image from file to System.Drawing.Image object (we need it to get the image resolution) Using sysImage As System.Drawing.Image = System.Drawing.Image.FromStream(memoryStream) ' Compute image size in PDF units (Points) Dim widthInPoints As Single = (sysImage.Width / sysImage.HorizontalResolution * 72.0F) Dim heightInPoints As Single = (sysImage.Height / sysImage.VerticalResolution * 72.0F) ' Create new PDF document Dim outPdfDocument As Document = New Document outPdfDocument.RegistrationName = "demo" outPdfDocument.RegistrationKey = "demo" ' Create page of computed size Dim page As Page = New Page(widthInPoints, heightInPoints) ' Add page to the document outPdfDocument.Pages.Add(page) Dim canvas As Canvas = page.Canvas ' Create Bytescout.PDF.Image object from loaded image Dim pdfImage As Image = New Image(sysImage) ' Draw the image canvas.DrawImage(pdfImage, 0, 0, widthInPoints, heightInPoints) ' Dispose the System.Drawing.Image object to free resources sysImage.Dispose() ' Create brush Dim transparentBrush As SolidBrush = New SolidBrush(New ColorGray(0)) ' ... and make it transparent transparentBrush.Opacity = 0 ' Draw text with transparent brush ' Need to set Font which supports hindi characters. Dim font16 As Font = New Font("Arial Unicode MS", 16) canvas.DrawString(allTextExtracted, font16, transparentBrush, 40, 40) ' Save document to file outPdfDocument.Save(destFileName) End Using 'Make PDF file with hindi text searchable to OCR. Using searchablePDFMaker As New SearchablePDFMaker 'Load PDF document searchablePDFMaker.LoadDocumentFromFile(destFileName) ' Set the location of "tessdata" folder containing language data files ' It used following files for hindi language support. Need to put these files into "testdata" folder. Below location contains these files. ' https://github.com/tesseract-ocr/tessdata/tree/3.04.00 ' hin.traineddata ' hin.cube.bigrams ' hin.cube.lm ' hin.cube.nn ' hin.cube.params ' hin.cube.word-freq ' hin.tesseract_cube.nn ' Set the location of "tessdata" folder containing language data files searchablePDFMaker.OCRLanguageDataFolder = "c:\Program Files\Bytescout PDF Extractor SDK\net2.00\tessdata" ' Set OCR language searchablePDFMaker.OCRLanguage = "hin" ' Need to set Font which supports hindi characters searchablePDFMaker.LabelingFont = "Arial Unicode MS" ' Set PDF document rendering resolution searchablePDFMaker.OCRResolution = 300 ' Make PDF document searchable searchablePDFMaker.MakePDFSearchable(destFileName_serachable) End Using ' Open document in default PDF viewer app Process.Start(destFileName_serachable) Catch ex As Exception Console.WriteLine("ERROR:" + ex.Message) End Try Console.WriteLine() Console.WriteLine("Press any key to exit...") Console.ReadLine() End Sub End Module
Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.27703.2026 MinimumVisualStudioVersion = 10.0.40219.1 Project("{F184B08F-C81C-45F6-A57F-5ABD9991F28F}") = "ReadPDFWithImageHindiText", "ReadPDFWithImageHindiText.vbproj", "{846F275E-BE99-4254-85ED-B8CBBB4546A9}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {846F275E-BE99-4254-85ED-B8CBBB4546A9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {846F275E-BE99-4254-85ED-B8CBBB4546A9}.Debug|Any CPU.Build.0 = Debug|Any CPU {846F275E-BE99-4254-85ED-B8CBBB4546A9}.Release|Any CPU.ActiveCfg = Release|Any CPU {846F275E-BE99-4254-85ED-B8CBBB4546A9}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {1F0B11D3-6F1D-4CCE-91F9-5566ABC60672} EndGlobalSection EndGlobal
<?xml version="1.0" encoding="utf-8"?> <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProjectGuid>{846F275E-BE99-4254-85ED-B8CBBB4546A9}</ProjectGuid> <OutputType>Exe</OutputType> <StartupObject>ReadPDFWithImageHindiText.Program</StartupObject> <RootNamespace>ReadPDFWithImageHindiText</RootNamespace> <AssemblyName>ReadPDFWithImageHindiText</AssemblyName> <FileAlignment>512</FileAlignment> <MyType>Console</MyType> <TargetFrameworkVersion>v2.0</TargetFrameworkVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <DefineDebug>true</DefineDebug> <DefineTrace>true</DefineTrace> <OutputPath>bin\Debug\</OutputPath> <DocumentationFile>ReadPDFWithImageHindiText.xml</DocumentationFile> <NoWarn>42016,41999,42017,42018,42019,42032,42036,42020,42021,42022</NoWarn> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugType>pdbonly</DebugType> <DefineDebug>false</DefineDebug> <DefineTrace>true</DefineTrace> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DocumentationFile>ReadPDFWithImageHindiText.xml</DocumentationFile> <NoWarn>42016,41999,42017,42018,42019,42032,42036,42020,42021,42022</NoWarn> </PropertyGroup> <PropertyGroup> <OptionExplicit>On</OptionExplicit> </PropertyGroup> <PropertyGroup> <OptionCompare>Binary</OptionCompare> </PropertyGroup> <PropertyGroup> <OptionStrict>Off</OptionStrict> </PropertyGroup> <PropertyGroup> <OptionInfer>On</OptionInfer> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDF, Version=1.8.2.254, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> <HintPath>c:\Program Files\Bytescout PDF SDK\net2.0\Bytescout.PDF.dll</HintPath> </Reference> <Reference Include="Bytescout.PDFExtractor, Version=9.1.0.3170, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> <HintPath>c:\Program Files\Bytescout PDF Extractor SDK\net2.00\Bytescout.PDFExtractor.dll</HintPath> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Drawing" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Import Include="Microsoft.VisualBasic" /> <Import Include="System" /> <Import Include="System.Collections" /> <Import Include="System.Collections.Generic" /> <Import Include="System.Data" /> <Import Include="System.Diagnostics" /> </ItemGroup> <ItemGroup> <Compile Include="Program.vb" /> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.targets" /> </Project>
See also:
Get Your API Key
See also: