ByteScout PDF Extractor SDK can be used to extract text from PDF by a specific keyword. Check the samples below to learn how to search each page of a PDF file for a keyword and extract text from the pages containing the keyword in C# and VB.NET.
You may also find useful to check how to extract text from a specific area by coordinates.
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
namespace FindText
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf");
int pageCount = extractor.GetPageCount();
// Search each page for some keyword
for (int i = 0; i < pageCount; i++)
{
if (extractor.Find(i, "References", false))
{
// If page contains the keyword, extract a text from it.
// For demonstration we'll extract the text from top part of the page only
extractor.SetExtractionArea(0, 0, 600, 200);
string text = extractor.GetTextFromPage(i);
Console.WriteLine(text);
}
}
Console.WriteLine();
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
}
}
}
Imports System.Drawing
Imports Bytescout.PDFExtractor
Namespace FindText
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf")
Dim pageCount As Integer = extractor.GetPageCount()
' Search each page for some keyword
For i As Integer = 0 To pageCount - 1
If extractor.Find(i, "References", False) Then
' If page contains the keyword, extract a text from it.
' For demonstration we'll extract the text from top part of the page only
extractor.SetExtractionArea(0, 0, 600, 200)
Dim text As String = extractor.GetTextFromPage(i)
Console.WriteLine(text)
End If
Next
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Class
End Namespace