The sample source codes below show how to make a PDF document searchable in C# and VB.NET using ByteScout PDF Extractor SDK.
It is also possible to make an unsearchable PDF.
using Bytescout.PDFExtractor; // To make OCR work you should add to your project references to Bytescout.PDFExtractor.dll and Bytescout.PDFExtractor.OCRExtension.dll namespace MakeSearchablePDF { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance SearchablePDFMaker searchablePDFMaker = new SearchablePDFMaker(); searchablePDFMaker.RegistrationName = "demo"; searchablePDFMaker.RegistrationKey = "demo"; // Load sample PDF document searchablePDFMaker.LoadDocumentFromFile("sample_ocr.pdf"); // Set the location of "tessdata" folder containing language data files searchablePDFMaker.OCRLanguageDataFolder = @"c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\"; // Set OCR language searchablePDFMaker.OCRLanguage = "eng"; // "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata // Set PDF document rendering resolution searchablePDFMaker.OCRResolution = 300; // Save extracted text to file searchablePDFMaker.MakePDFSearchable("output.pdf"); // Open output file in default associated application System.Diagnostics.Process.Start("output.pdf"); } } }
Imports Bytescout.PDFExtractor Module Module1 Sub Main() ' Create Bytescout.PDFExtractor.TextExtractor instance Dim searchablePdfMaker As SearchablePDFMaker = New SearchablePDFMaker() searchablePdfMaker.RegistrationName = "demo" searchablePdfMaker.RegistrationKey = "demo" ' Load sample PDF document searchablePdfMaker.LoadDocumentFromFile("sample_ocr.pdf") ' Set the location of "tessdata" folder containing language data files searchablePdfMaker.OCRLanguageDataFolder = "c:\Program Files\Bytescout PDF Extractor SDK\Redistributable\net2.00\tessdata\" ' Set OCR language searchablePdfMaker.OCRLanguage = "eng" ' "eng" for english, "deu" for German, "fra" for French, "spa" for Spanish etc - according to files in /tessdata ' Set PDF document rendering resolution searchablePdfMaker.OCRResolution = 300 ' Save extracted text to file searchablePdfMaker.MakePDFSearchable("output.pdf") '/ Open output file in default associated application System.Diagnostics.Process.Start("output.pdf") End Sub End Module