What is ByteScout Robotic Process Automation? It is components and tools for quick RPA (Robotic Process Automation) implementation.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
Imports System.IO Imports System.Threading Imports Bytescout.PDFExtractor Module Module1 ' Limit to 4 threads in queue. ' Set this value to number of your processor cores for max performance. Dim ThreadLimiter as Semaphore = New Semaphore(4, 4) <MTAThread> Sub Main() Const inputFile = "sample.pdf" Const resultFile = "result.pdf" Dim pageCount As Integer ' Get document page count Using infoExtractor = New InfoExtractor() infoExtractor.LoadDocumentFromFile(inputFile) pageCount = infoExtractor.GetPageCount() End Using ' Process the document by 10-page pieces Dim numberOfThreads As Integer numberOfThreads = pageCount \ 10 If (pageCount - numberOfThreads * 10 > 0) Then numberOfThreads = numberOfThreads + 1 Dim doneEvents(numberOfThreads - 1) As WaitHandle Dim stopwatch As Stopwatch = Stopwatch.StartNew() Dim startPage, endPage As Integer Dim pieces(numberOfThreads - 1) As String ' Run threads For i As Integer = 0 To numberOfThreads - 1 ' Wait for the queue ThreadLimiter.WaitOne() doneEvents(i) = New ManualResetEvent(False) startPage = i * 10 endPage = Math.Min(pageCount - 1, (i + 1) * 10 - 1) If numberOfThreads = 1 Then endPage = endPage - 1 End If pieces(i) = String.Format("temp-{0}-{1}.pdf", startPage, endPage) ThreadPool.QueueUserWorkItem(New WaitCallback(AddressOf ThreadProc), New Object() {i, doneEvents(i), inputFile, pieces(i), startPage, endPage}) Next ' Wait for all threads WaitHandle.WaitAll(doneEvents) ' Merge pieces Using merger = New DocumentMerger merger.Merge(pieces, resultFile) End Using ' Delete temp files For Each tempFile As String In pieces File.Delete(tempFile) Next Console.WriteLine("All done in {0}.", stopwatch.Elapsed) Console.WriteLine() Console.WriteLine("Press any key to exit...") Console.ReadKey() End Sub Sub ThreadProc(ByVal stateInfo As Object) Dim threadIndex As Integer = stateInfo(0) Dim waitEvent As ManualResetEvent = stateInfo(1) Dim inputFile As String = stateInfo(2) Dim outputFile As String = stateInfo(3) Dim startPage As Integer = stateInfo(4) Dim endPage As Integer = stateInfo(5) Try Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage) Dim stopwatch As Stopwatch = Stopwatch.StartNew() ' Extract a piece of document Dim chunk As String = String.Format("temp-{0}-{1}", startPage, endPage) Using splitter = New DocumentSplitter splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1) End Using ' Process the piece Using searchablePdfMaker As New SearchablePDFMaker("demo", "demo") searchablePdfMaker.OCRDetectPageRotation = True searchablePdfMaker.OCRLanguageDataFolder = "c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\" searchablePdfMaker.LoadDocumentFromFile(chunk) ' 300 DPI resolution is recommended. ' Using of higher values will slow down the processing but does not guarantee the higher quality. searchablePdfMaker.OCRResolution = 300 searchablePdfMaker.MakePDFSearchable(outputFile) End Using File.Delete(chunk) Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed) Finally ' Signal the thread is finished waitEvent.Set() ' Release semaphore ThreadLimiter.Release() End Try End Sub End Module
Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.26730.8 MinimumVisualStudioVersion = 10.0.40219.1 Project("{F184B08F-C81C-45F6-A57F-5ABD9991F28F}") = "MultithreadProcessing", "MultithreadProcessing.vbproj", "{F18881BC-38FA-4D69-AD4E-4DC6CB637251}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {F18881BC-38FA-4D69-AD4E-4DC6CB637251}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {F18881BC-38FA-4D69-AD4E-4DC6CB637251}.Debug|Any CPU.Build.0 = Debug|Any CPU {F18881BC-38FA-4D69-AD4E-4DC6CB637251}.Release|Any CPU.ActiveCfg = Release|Any CPU {F18881BC-38FA-4D69-AD4E-4DC6CB637251}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {BADFE9BE-5CD2-45AC-9023-A1BB794B42E4} EndGlobalSection EndGlobal
<?xml version="1.0" encoding="utf-8"?> <Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProjectGuid>{F18881BC-38FA-4D69-AD4E-4DC6CB637251}</ProjectGuid> <OutputType>Exe</OutputType> <StartupObject>MultithreadProcessing.Module1</StartupObject> <RootNamespace>MultithreadProcessing</RootNamespace> <AssemblyName>MultithreadProcessing</AssemblyName> <FileAlignment>512</FileAlignment> <MyType>Console</MyType> <TargetFrameworkVersion>v4.0</TargetFrameworkVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <DefineDebug>true</DefineDebug> <DefineTrace>true</DefineTrace> <OutputPath>bin\Debug\</OutputPath> <DocumentationFile>MultithreadProcessing.xml</DocumentationFile> <NoWarn>42016,41999,42017,42018,42019,42032,42036,42020,42021,42022</NoWarn> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <PlatformTarget>AnyCPU</PlatformTarget> <DebugType>pdbonly</DebugType> <DefineDebug>false</DefineDebug> <DefineTrace>true</DefineTrace> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DocumentationFile>MultithreadProcessing.xml</DocumentationFile> <NoWarn>42016,41999,42017,42018,42019,42032,42036,42020,42021,42022</NoWarn> </PropertyGroup> <PropertyGroup> <OptionExplicit>On</OptionExplicit> </PropertyGroup> <PropertyGroup> <OptionCompare>Binary</OptionCompare> </PropertyGroup> <PropertyGroup> <OptionStrict>Off</OptionStrict> </PropertyGroup> <PropertyGroup> <OptionInfer>On</OptionInfer> </PropertyGroup> <PropertyGroup> <ApplicationManifest>My Project\app.manifest</ApplicationManifest> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDFExtractor, Version=8.2.0.2710, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> <HintPath>S:\Bytescout\PDF Extractor SDK\Bytescout.PDFExtractor\bin\Debug_FULL_4.0\Bytescout.PDFExtractor.dll</HintPath> </Reference> <Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=8.2.0.2710, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> <HintPath>S:\Bytescout\PDF Extractor SDK\Bytescout.PDFExtractor\bin\Debug_FULL_4.0\Bytescout.PDFExtractor.OCRExtension.dll</HintPath> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Xml" /> <Reference Include="System.Core" /> <Reference Include="System.Xml.Linq" /> <Reference Include="System.Data.DataSetExtensions" /> </ItemGroup> <ItemGroup> <Import Include="Microsoft.VisualBasic" /> <Import Include="System" /> <Import Include="System.Collections" /> <Import Include="System.Collections.Generic" /> <Import Include="System.Data" /> <Import Include="System.Diagnostics" /> <Import Include="System.Linq" /> <Import Include="System.Xml.Linq" /> </ItemGroup> <ItemGroup> <Compile Include="Module1.vb" /> <Compile Include="My Project\AssemblyInfo.vb" /> <Compile Include="My Project\Application.Designer.vb"> <AutoGen>True</AutoGen> <DependentUpon>Application.myapp</DependentUpon> </Compile> <Compile Include="My Project\Resources.Designer.vb"> <AutoGen>True</AutoGen> <DesignTime>True</DesignTime> <DependentUpon>Resources.resx</DependentUpon> </Compile> <Compile Include="My Project\Settings.Designer.vb"> <AutoGen>True</AutoGen> <DependentUpon>Settings.settings</DependentUpon> <DesignTimeSharedInput>True</DesignTimeSharedInput> </Compile> </ItemGroup> <ItemGroup> <EmbeddedResource Include="My Project\Resources.resx"> <Generator>VbMyResourcesResXFileCodeGenerator</Generator> <LastGenOutput>Resources.Designer.vb</LastGenOutput> <CustomToolNamespace>My.Resources</CustomToolNamespace> <SubType>Designer</SubType> </EmbeddedResource> </ItemGroup> <ItemGroup> <None Include="My Project\app.manifest" /> <None Include="My Project\Application.myapp"> <Generator>MyApplicationCodeGenerator</Generator> <LastGenOutput>Application.Designer.vb</LastGenOutput> </None> <None Include="My Project\Settings.settings"> <Generator>SettingsSingleFileGenerator</Generator> <CustomToolNamespace>My</CustomToolNamespace> <LastGenOutput>Settings.Designer.vb</LastGenOutput> </None> <Content Include="sample.pdf"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </Content> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>
See also:
Get Your API Key
See also: