Extract data from PDF based on keyword – source code samples below will help you to extract certain text from PDF files in C# or Visual Basic .NET using PDF Extractor SDK.
C#
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); int pageCount = extractor.GetPageCount(); RectangleF location; // Search each page for some keyword for (int i = 0; i < pageCount; i++) { if (extractor.Find(i, "References", false, out location)) { // If page contains the keyword, extract a text from it. // For demonstration we'll extract the text from top part of the page only extractor.SetExtractionArea(0, 0, 600, 200); string text = extractor.GetTextFromPage(i); Console.WriteLine(text); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
VB.NET
Imports System.Drawing Imports Bytescout.PDFExtractor Namespace FindText Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf") Dim pageCount As Integer = extractor.GetPageCount() Dim location As RectangleF ' Search each page for some keyword For i As Integer = 0 To pageCount - 1 If extractor.Find(i, "References", False, location) Then ' If page contains the keyword, extract a text from it. ' For demonstration we'll extract the text from top part of the page only extractor.SetExtractionArea(0, 0, 600, 200) Dim text As String = extractor.GetTextFromPage(i) Console.WriteLine(text) End If Next Console.WriteLine() Console.WriteLine("Press any key to continue...") Console.ReadLine() End Sub End Class End Namespace