Extract data from PDF based on keyword in C# and VB.NET - ByteScout

Extract data from PDF based on keyword in C# and VB.NET

  • Home
  • /
  • Articles
  • /
  • Extract data from PDF based on keyword in C# and VB.NET

Extract data from PDF based on keyword – source code samples below will help you to extract certain text from PDF files in C# or Visual Basic .NET using PDF Extractor SDK.

C#

[vb]
using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";

// Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf");

int pageCount = extractor.GetPageCount();
RectangleF location;

// Search each page for some keyword
for (int i = 0; i < pageCount; i++)
{
if (extractor.Find(i, "References", false, out location))
{
// If page contains the keyword, extract a text from it.
// For demonstration we’ll extract the text from top part of the page only
extractor.SetExtractionArea(0, 0, 600, 200);
string text = extractor.GetTextFromPage(i);
Console.WriteLine(text);
}
}

Console.WriteLine();
Console.WriteLine("Press any key to continue…");
Console.ReadLine();
}
}
}
[/vb]

VB.NET

[vbnet]
Imports System.Drawing
Imports Bytescout.PDFExtractor

Namespace FindText
Class Program
Friend Shared Sub Main(args As String())

‘ Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

‘ Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf")

Dim pageCount As Integer = extractor.GetPageCount()
Dim location As RectangleF

‘ Search each page for some keyword
For i As Integer = 0 To pageCount – 1
If extractor.Find(i, "References", False, location) Then
‘ If page contains the keyword, extract a text from it.
‘ For demonstration we’ll extract the text from top part of the page only
extractor.SetExtractionArea(0, 0, 600, 200)
Dim text As String = extractor.GetTextFromPage(i)
Console.WriteLine(text)
End If
Next

Console.WriteLine()
Console.WriteLine("Press any key to continue…")
Console.ReadLine()
End Sub
End Class
End Namespace
[/vbnet]

Tutorials:

prev
next