The sample source codes below show how to make a PDF document searchable in C# and VB.NET using ByteScout PDF Extractor SDK.
It is also possible to make an unsearchable PDF.
using Bytescout.PDFExtractor;
// To make OCR work you should add to your project references to Bytescout.PDFExtractor.dll and Bytescout.PDFExtractor.OCRExtension.dll
namespace MakeSearchablePDF
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
SearchablePDFMaker searchablePDFMaker = new SearchablePDFMaker();
searchablePDFMaker.RegistrationName = "demo";
searchablePDFMaker.RegistrationKey = "demo";
// Load sample PDF document
searchablePDFMaker.LoadDocumentFromFile("sample_ocr.pdf");
// Set the location of "tessdata" folder containing language data files
searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\";
// Set OCR language
searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata
// Set PDF document rendering resolution
searchablePDFMaker.OCRResolution = 300;
// Save extracted text to file
searchablePDFMaker.MakePDFSearchable("output.pdf");
// Open output file in default associated application
System.Diagnostics.Process.Start("output.pdf");
}
}
}
Imports Bytescout.PDFExtractor
Module Module1
Sub Main()
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim searchablePdfMaker As SearchablePDFMaker = New SearchablePDFMaker()
searchablePdfMaker.RegistrationName = "demo"
searchablePdfMaker.RegistrationKey = "demo"
' Load sample PDF document
searchablePdfMaker.LoadDocumentFromFile("sample_ocr.pdf")
' Set the location of "tessdata" folder containing language data files
searchablePdfMaker.OCRLanguageDataFolder = "c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\"
' Set OCR language
searchablePdfMaker.OCRLanguage = "eng" ' "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata
' Set PDF document rendering resolution
searchablePdfMaker.OCRResolution = 300
' Save extracted text to file
searchablePdfMaker.MakePDFSearchable("output.pdf")
'/ Open output file in default associated application
System.Diagnostics.Process.Start("output.pdf")
End Sub
End Module