ByteScout PDF Extractor SDK – C# – Index PDF Files

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Index PDF Files

How to index PDF files in C# using ByteScout PDF Extractor SDK

The tutorial shows how to index PDF files in C#

Learn how to index PDF files in C# with this source code sample. Want to index PDF files in your C# app? ByteScout PDF Extractor SDK is designed for it. ByteScout PDF Extractor SDK is the SDK that helps developers to extract data from unstructured documents, pdf, images, scanned and electronic forms. Includes AI functions like automatic table detection, automatic table extraction and restructuring, text recognition and text restoration from pdf and scanned documents. Includes PDF to CSV, PDF to XML, PDF to JSON, PDF to searchable PDF functions as well as methods for low level data extraction.

You will save a lot of time on writing and testing code as you may just take the C# code from ByteScout PDF Extractor SDK for index PDF files below and use it in your application. This C# sample code is all you need for your app. Just copy and paste the code, add references (if needs to) and you are all set! Enjoy writing a code with ready-to-use sample C# codes.

Free trial version of ByteScout PDF Extractor SDK is available on our website. Documentation and source code samples are included.

IndexPDFFiles.NETCore.csproj
      
<?xml version="1.0" encoding="utf-8"?> <Project Sdk="Microsoft.NET.Sdk"> <PropertyGroup> <OutputType>Exe</OutputType> <TargetFramework>netcoreapp2.0</TargetFramework> <EnableDefaultCompileItems>false</EnableDefaultCompileItems> <GenerateAssemblyCompanyAttribute>false</GenerateAssemblyCompanyAttribute> <GenerateAssemblyConfigurationAttribute>false</GenerateAssemblyConfigurationAttribute> <GenerateAssemblyFileVersionAttribute>false</GenerateAssemblyFileVersionAttribute> <GenerateAssemblyInformationalVersionAttribute>false</GenerateAssemblyInformationalVersionAttribute> <GenerateAssemblyProductAttribute>false</GenerateAssemblyProductAttribute> <GenerateAssemblyTitleAttribute>false</GenerateAssemblyTitleAttribute> <GenerateAssemblyVersionAttribute>false</GenerateAssemblyVersionAttribute> <GenerateAssemblyCopyrightAttribute>false</GenerateAssemblyCopyrightAttribute> <GenerateAssemblyTrademarkAttribute>false</GenerateAssemblyTrademarkAttribute> <GenerateAssemblyCultureAttribute>false</GenerateAssemblyCultureAttribute> <GenerateAssemblyDescriptionAttribute>false</GenerateAssemblyDescriptionAttribute> </PropertyGroup> <ItemGroup> <Compile Include="Program.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> </ItemGroup> <ItemGroup> <PackageReference Include="Microsoft.Windows.Compatibility" Version="2.0.0" /> </ItemGroup> <ItemGroup> <Reference Include="Bytescout.PDFExtractor"> <SpecificVersion>False</SpecificVersion> <HintPath>c:\Program Files\Bytescout PDF Extractor SDK\netcoreapp2.0\Bytescout.PDFExtractor.dll</HintPath> </Reference> <Reference Include="Bytescout.PDFExtractor.OCRExtension"> <SpecificVersion>False</SpecificVersion> <HintPath>c:\Program Files\Bytescout PDF Extractor SDK\netcoreapp2.0\Bytescout.PDFExtractor.OCRExtension.dll</HintPath> </Reference> </ItemGroup> </Project>

IndexPDFFiles.VS2005.csproj
      
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProductVersion>8.0.50727</ProductVersion> <SchemaVersion>2.0</SchemaVersion> <ProjectGuid>{F1199084-E703-4C28-85E0-1AA2E1C8991B}</ProjectGuid> <OutputType>Exe</OutputType> <AppDesignerFolder>Properties</AppDesignerFolder> <RootNamespace>IndexPDFFiles</RootNamespace> <AssemblyName>IndexPDFFiles</AssemblyName> <StartupObject> </StartupObject> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDFExtractor, Version=3.0.0.860, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Drawing" /> <Reference Include="System.Windows.Forms" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="Program.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>

IndexPDFFiles.VS2008.csproj
      
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="3.5"> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProductVersion>9.0.21022</ProductVersion> <SchemaVersion>2.0</SchemaVersion> <ProjectGuid>{F1199084-E703-4C28-85E0-1AA2E1C8991B}</ProjectGuid> <OutputType>Exe</OutputType> <AppDesignerFolder>Properties</AppDesignerFolder> <RootNamespace>IndexPDFFiles</RootNamespace> <AssemblyName>IndexPDFFiles</AssemblyName> <StartupObject> </StartupObject> <OldToolsVersion>2.0</OldToolsVersion> <TargetFrameworkVersion>v3.5</TargetFrameworkVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDFExtractor, Version=3.0.0.860, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Drawing" /> <Reference Include="System.Windows.Forms" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="Program.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>

IndexPDFFiles.VS2010.csproj
      
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0"> <PropertyGroup> <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> <ProductVersion> </ProductVersion> <SchemaVersion>2.0</SchemaVersion> <ProjectGuid>{F1199084-E703-4C28-85E0-1AA2E1C8991B}</ProjectGuid> <OutputType>Exe</OutputType> <AppDesignerFolder>Properties</AppDesignerFolder> <RootNamespace>IndexPDFFiles</RootNamespace> <AssemblyName>IndexPDFFiles</AssemblyName> <StartupObject> </StartupObject> <OldToolsVersion>3.5</OldToolsVersion> <TargetFrameworkVersion>v4.0</TargetFrameworkVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> <DebugType>full</DebugType> <Optimize>false</Optimize> <OutputPath>bin\Debug\</OutputPath> <DefineConstants>DEBUG;TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> <DebugType>pdbonly</DebugType> <Optimize>true</Optimize> <OutputPath>bin\Release\</OutputPath> <DefineConstants>TRACE</DefineConstants> <ErrorReport>prompt</ErrorReport> <WarningLevel>4</WarningLevel> </PropertyGroup> <ItemGroup> <Reference Include="Bytescout.PDFExtractor, Version=3.0.0.860, Culture=neutral, PublicKeyToken=f7dd1bd9d40a50eb, processorArchitecture=MSIL"> <SpecificVersion>False</SpecificVersion> </Reference> <Reference Include="System" /> <Reference Include="System.Data" /> <Reference Include="System.Deployment" /> <Reference Include="System.Drawing" /> <Reference Include="System.Windows.Forms" /> <Reference Include="System.Xml" /> </ItemGroup> <ItemGroup> <Compile Include="Program.cs" /> <Compile Include="Properties\AssemblyInfo.cs" /> </ItemGroup> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <!-- To modify your build process, add your task inside one of the targets below and uncomment it. Other similar extension points exist, see Microsoft.Common.targets. <Target Name="BeforeBuild"> </Target> <Target Name="AfterBuild"> </Target> --> </Project>

Program.cs
      
using System; using System.IO; using Bytescout.PDFExtractor; namespace IndexPDFFiles { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.InfoExtractor instance InfoExtractor infoExtractor = new InfoExtractor(); infoExtractor.RegistrationName = "demo"; infoExtractor.RegistrationKey = "demo"; TextExtractor textExtractor = new TextExtractor(); textExtractor.RegistrationName = "demo"; textExtractor.RegistrationKey = "demo"; // List all PDF files in directory foreach (string file in Directory.GetFiles(@"..\..\..\..", "*.pdf")) { infoExtractor.LoadDocumentFromFile(file); Console.WriteLine("File Name: " + Path.GetFileName(file)); Console.WriteLine("Page Count: " + infoExtractor.GetPageCount()); Console.WriteLine("Author: " + infoExtractor.Author); Console.WriteLine("Title: " + infoExtractor.Title); Console.WriteLine("Producer: " + infoExtractor.Producer); Console.WriteLine("Subject: " + infoExtractor.Subject); Console.WriteLine("CreationDate: " + infoExtractor.CreationDate); Console.WriteLine("Text (first 2 lines): "); // Load a couple of lines from each document textExtractor.LoadDocumentFromFile(file); using (StringReader stringReader = new StringReader(textExtractor.GetTextFromPage(0))) { Console.WriteLine(stringReader.ReadLine()); Console.WriteLine(stringReader.ReadLine()); } Console.WriteLine(); } // Cleanup infoExtractor.Dispose(); textExtractor.Dispose(); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }

READ MORE

Visit ByteScout PDF Extractor SDK page or

explore documentation

GET YOUR 90 DAY FREE

https://bytescout.com/download/web-installer

Tutorials:

prev
next