What is ByteScout Robotic Process Automation? It is set of integrated APIs for quick replaccement of manual data processing with robotic process automations.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{0B102DA4-C143-481D-A076-1F56E3CB1CF5}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>MultithreadProcessing</RootNamespace>
<AssemblyName>MultithreadProcessing</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="Bytescout.PDFExtractor, Version=8.6.0.2917, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>C:\Program Files\Bytescout PDF Extractor SDK\net4.00\Bytescout.PDFExtractor.dll</HintPath>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=8.6.0.2917, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>C:\Program Files\Bytescout PDF Extractor SDK\net4.00\Bytescout.PDFExtractor.OCRExtension.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<Content Include="sample.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26730.10
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultithreadProcessing", "MultithreadProcessing.csproj", "{0B102DA4-C143-481D-A076-1F56E3CB1CF5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{0B102DA4-C143-481D-A076-1F56E3CB1CF5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0B102DA4-C143-481D-A076-1F56E3CB1CF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0B102DA4-C143-481D-A076-1F56E3CB1CF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0B102DA4-C143-481D-A076-1F56E3CB1CF5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {50466307-7059-438B-8545-42FDA71BC1A6}
EndGlobalSection
EndGlobal
using System;
using System.Diagnostics;
using System.IO;
using System.Threading;
using Bytescout.PDFExtractor;
namespace MultithreadProcessing
{
class Program
{
// Limit to 4 threads in queue.
// Set this value to number of your processor cores for max performance.
private static readonly Semaphore ThreadLimiter = new Semaphore(4, 4);
static void Main(string[] args)
{
const string inputFile = "sample.pdf";
const string resultFile = "result.pdf";
int pageCount;
// Get document page count
using (var infoExtractor = new InfoExtractor("demo", "demo"))
{
infoExtractor.LoadDocumentFromFile(inputFile);
pageCount = infoExtractor.GetPageCount();
}
// Process the document by 10-page pieces
int numberOfThreads = pageCount / 10;
if (pageCount - numberOfThreads * 10 > 0)
numberOfThreads += 1;
WaitHandle[] doneEvents = new WaitHandle[numberOfThreads];
Stopwatch stopwatch = Stopwatch.StartNew();
int startPage, endPage;
string[] pieces = new string[numberOfThreads];
for (int i = 0; i < numberOfThreads; i++)
{
// Wait for the queue
ThreadLimiter.WaitOne();
doneEvents[i] = new ManualResetEvent(false);
startPage = i * 10;
endPage = Math.Min(pageCount - 1, (i + 1) * 10 - 1);
pieces[i] = string.Format("temp-{0}-{1}.pdf", startPage, endPage);
ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc),
new object[] { i, doneEvents[i], inputFile, pieces[i], startPage, endPage });
}
// Wait for all threads
WaitHandle.WaitAll(doneEvents);
// Merge pieces
using (DocumentMerger merger = new DocumentMerger("demo", "demo"))
merger.Merge(pieces, resultFile);
// Delete temp files
foreach (string tempFile in pieces)
File.Delete(tempFile);
Console.WriteLine("All done in {0}.", stopwatch.Elapsed);
Console.WriteLine();
Console.WriteLine("Press any key to exit...");
Console.ReadKey();
}
private static void ThreadProc(object stateInfo)
{
int threadIndex = (int) ((object[]) stateInfo)[0];
ManualResetEvent doneEvent = (ManualResetEvent) ((object[]) stateInfo)[1];
string inputFile = (string) ((object[]) stateInfo)[2];
string outputFile = (string)((object[])stateInfo)[3];
int startPage = (int)((object[])stateInfo)[4];
int endPage = (int)((object[])stateInfo)[5];
try
{
Console.WriteLine("Thread #{0} started with the page range from {1} to {2}.", threadIndex, startPage, endPage);
Stopwatch stopwatch = Stopwatch.StartNew();
// Extract a piece of document
string chunk = string.Format("temp-{0}-{1}", startPage, endPage);
using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
splitter.ExtractPageRange(inputFile, chunk, startPage + 1, endPage + 1);
// Process the piece
using (SearchablePDFMaker searchablePdfMaker = new SearchablePDFMaker("demo", "demo"))
{
searchablePdfMaker.OCRDetectPageRotation = true;
searchablePdfMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\";
searchablePdfMaker.LoadDocumentFromFile(chunk);
// 300 DPI resolution is recommended.
// Using of higher values will slow down the processing but does not guarantee the higher quality.
searchablePdfMaker.OCRResolution = 300;
searchablePdfMaker.MakePDFSearchable(outputFile);
}
File.Delete(chunk);
Console.WriteLine("Thread #{0} finished in {1}.", threadIndex, stopwatch.Elapsed);
}
finally
{
// Signal the thread is finished
doneEvent.Set();
// Release semaphore
ThreadLimiter.Release();
}
}
}
}
See also:
Get Your API Key
See also: