Extract data from PDF based on keyword in C# and VB.NET

Home
/
Articles
/
Extract data from PDF based on keyword in C# and VB.NET

Extract data from PDF based on keyword – source code samples below will help you to extract certain text from PDF files in C# or Visual Basic .NET using PDF Extractor SDK.

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
class Program
{
 static void Main(string[] args)
 {
 	// Create Bytescout.PDFExtractor.TextExtractor instance
 	TextExtractor extractor = new TextExtractor();
 	extractor.RegistrationName = "demo";
 	extractor.RegistrationKey = "demo";

 	// Load sample PDF document
 	extractor.LoadDocumentFromFile("sample2.pdf");
 	
 	int pageCount = extractor.GetPageCount();
 	RectangleF location;

 	// Search each page for some keyword 
 	for (int i = 0; i < pageCount; i++)
 	{
   if (extractor.Find(i, "References", false, out location))
   {
   	// If page contains the keyword, extract a text from it.
   	// For demonstration we'll extract the text from top part of the page only
   	extractor.SetExtractionArea(0, 0, 600, 200);
   	string text = extractor.GetTextFromPage(i);
   	Console.WriteLine(text);
   }
 	}
 	
 	Console.WriteLine();
 	Console.WriteLine("Press any key to continue...");
 	Console.ReadLine();
 }
}
}

VB.NET

Imports System.Drawing
Imports Bytescout.PDFExtractor

Namespace FindText
Class Program
 Friend Shared Sub Main(args As String())

           ' Create Bytescout.PDFExtractor.TextExtractor instance
 	Dim extractor As New TextExtractor()
 	extractor.RegistrationName = "demo"
 	extractor.RegistrationKey = "demo"

 	' Load sample PDF document
 	extractor.LoadDocumentFromFile("sample2.pdf")

 	Dim pageCount As Integer = extractor.GetPageCount()
 	Dim location As RectangleF

 	' Search each page for some keyword 
 	For i As Integer = 0 To pageCount - 1
   If extractor.Find(i, "References", False, location) Then
   	' If page contains the keyword, extract a text from it.
   	' For demonstration we'll extract the text from top part of the page only
   	extractor.SetExtractionArea(0, 0, 600, 200)
   	Dim text As String = extractor.GetTextFromPage(i)
   	Console.WriteLine(text)
   End If
 	Next

 	Console.WriteLine()
 	Console.WriteLine("Press any key to continue...")
 	Console.ReadLine()
 End Sub
End Class
End Namespace

Tutorials:

Web API