ByteScout PDF Extractor SDK is the Software Development Kit (SDK) that is designed to help developers with data extraction from unstructured documents like pdf, tiff, scans, images, scanned and electronic forms. The library is powered by OCR, computer vision and AI to provide unique functionality like table detection, automatic table structure extraction, data restoration, data restructuring and reconstruction. Supports PDF, TIFF, PNG, JPG images as input and can output CSV, XML, JSON formatted data. Includes full set of utilities like pdf splitter, pdf merger, searchable pdf maker.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>8.0.50727</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
<Project ToolsVersion="3.5" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>8.0.50727</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{393E826B-A9B3-4E6A-AD48-6C790AFF8283}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>OCRModes</RootNamespace>
<AssemblyName>OCRModes</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG,TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.VisualBasic.Targets" />
<ItemGroup>
<Import Include="Microsoft.VisualBasic" />
<Import Include="System" />
<Reference Include="Bytescout.PDFExtractor, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="Bytescout.PDFExtractor.OCRExtension, Version=4.0.0.1542, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Xml" />
<Compile Include="Program.vb" />
<Compile Include="Properties\AssemblyInfo.vb" />
<Content Include="..\..\SampleWith_Vector_Image_Font.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
<Link>SampleWith_Vector_Image_Font.pdf</Link>
</Content>
</ItemGroup>
</Project>
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
Imports Bytescout.PDFExtractor
' To make OCR work you should add the following references to your project:
' 'Bytescout.PDFExtractor.dll', 'Bytescout.PDFExtractor.OCRExtension.dll'.
Class Program
Friend Shared Sub Main(args As String())
' Input document
Dim inputDocument As String = ".\SampleWith_Vector_Image_Font.pdf"
' Extracting text with different OCRModes
' 1. TextFromImagesOnly (Plain Mode)
Console.WriteLine("---------------------------------" + Environment.NewLine + "Extraction Mode: TextFromImagesOnly " + Environment.NewLine + "---------------------------------")
Dim resultText As String = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesOnly)
Console.WriteLine(resultText)
' 2. TextFromVectorOnly (Plain Mode)
Console.WriteLine("---------------------------------" + Environment.NewLine + "Extraction Mode: TextFromVectorOnly " + Environment.NewLine + "---------------------------------")
resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromVectorsOnly)
Console.WriteLine(resultText)
' 3. TextFromImagesAndFonts (Combined Mode)
Console.WriteLine("---------------------------------" + Environment.NewLine + "Extraction Mode: TextFromImagesAndFonts " + Environment.NewLine + "---------------------------------")
resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndFonts)
Console.WriteLine(resultText)
' 4. TextFromImagesAndVectorsAndFonts (Combined Mode)
Console.WriteLine("---------------------------------" + Environment.NewLine + "Extraction Mode: TextFromImagesAndVectorsAndFonts " + Environment.NewLine + "---------------------------------")
resultText = _ExtractTextWithSpecificOCRMode(inputDocument, OCRMode.TextFromImagesAndVectorsAndFonts)
Console.WriteLine(resultText)
Console.ReadLine()
End Sub
''' <summary>
''' Extract text from document with specific Ocr Mode
''' </summary>
Friend Shared Function _ExtractTextWithSpecificOCRMode(inputDocument As String, ocrMode As OCRMode) As String
' Location of OCR language data files
Dim ocrLanguageDataFolder As String = "c:\Program Files\Bytescout PDF Extractor SDK\ocrdata_best\"
' OCR language
Dim ocrLanguage As String = "eng" ' "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in "ocrdata" folder
' Find more language files at https://github.com/bytescout/ocrdata/tree/master/ocrdata_best
' Create TextExtractor instance
Using textExtractor As TextExtractor = New TextExtractor("demo", "demo")
' Load document to TextExtractor
textExtractor.LoadDocumentFromFile(inputDocument)
' Specify Ocr Mode
textExtractor.OCRMode = ocrMode
' Ocr language data folder path and language
textExtractor.OCRLanguageDataFolder = ocrLanguageDataFolder
textExtractor.OCRLanguage = ocrLanguage
' Return extracted text
Return textExtractor.GetText()
End Using
End Function
End Class
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples
60 Day Free Trial or Visit ByteScout PDF Extractor SDK Home Page
Explore ByteScout PDF Extractor SDK Documentation
Explore Samples
Sign Up for ByteScout PDF Extractor SDK Online Training
Get Your API Key
Explore Web API Docs
Explore Web API Samples