ByteScout PDF Extractor SDK can be used to extract text from PDF by a specific keyword. Check the samples below to learn how to search each page of a PDF file for a keyword and extract text from the pages containing the keyword in C# and VB.NET.
You may also find useful to check how to extract text from a specific area by coordinates.
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); int pageCount = extractor.GetPageCount(); // Search each page for some keyword for (int i = 0; i < pageCount; i++) { if (extractor.Find(i, "References", false)) { // If page contains the keyword, extract a text from it. // For demonstration we'll extract the text from top part of the page only extractor.SetExtractionArea(0, 0, 600, 200); string text = extractor.GetTextFromPage(i); Console.WriteLine(text); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
Imports System.Drawing Imports Bytescout.PDFExtractor Namespace FindText Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf") Dim pageCount As Integer = extractor.GetPageCount() ' Search each page for some keyword For i As Integer = 0 To pageCount - 1 If extractor.Find(i, "References", False) Then ' If page contains the keyword, extract a text from it. ' For demonstration we'll extract the text from top part of the page only extractor.SetExtractionArea(0, 0, 600, 200) Dim text As String = extractor.GetTextFromPage(i) Console.WriteLine(text) End If Next Console.WriteLine() Console.WriteLine("Press any key to continue...") Console.ReadLine() End Sub End Class End Namespace