Check the samples below to learn how to find specific text in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.
If you need to find text with hyphens in your PDF, check this tutorial.
Select your programming language:
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
namespace FindText
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// This test file will be copied to the project directory on the pre-build event (see the project properties).
String inputFile = Server.MapPath("sample1.pdf");
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile(inputFile);
Response.Clear();
Response.ContentType = "text/html";
Rectangle location;
int pageIndex;
Response.Write("Searching for \"ipsum\" string:<br><br>");
// Search for "ipsum" string
if (extractor.Find(0, "ipsum"))
{
do
{
Response.Write("<br/>");
Response.Write("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()+"<br/>");
Response.Write("<br/>");
// iterate through each element in the found text
foreach (SearchResultElement element in extractor.FoundText.Elements)
{
Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>");
Response.Write("Text: " + element.Text + "<br/>");
Response.Write("Font is bold: " + element.FontIsBold + "<br/>");
Response.Write("Font is italic:" + element.FontIsItalic + "<br/>");
Response.Write("Font name: " + element.FontName + "<br/>");
Response.Write("Font size:" + element.FontSize + "<br/>");
Response.Write("Font color:" + element.FontColor + "<br/>");
}
}
while (extractor.FindNext());
}
Response.End();
}
}
}
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
namespace FindText
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample1.pdf");
int pageCount = extractor.GetPageCount();
for (int i = 0; i < pageCount; i++)
{
// Search each page for "ipsum" string
if (extractor.Find(i, "ipsum", false))
{
do
{
Console.WriteLine("");
Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
Console.WriteLine("");
// iterate through each element in the found text
foreach (SearchResultElement element in extractor.FoundText.Elements)
{
Console.WriteLine ("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
Console.WriteLine ("Text: " + element.Text);
Console.WriteLine ("Font is bold: " + element.FontIsBold);
Console.WriteLine ("Font is italic:" + element.FontIsItalic);
Console.WriteLine ( "Font name: " + element.FontName);
Console.WriteLine ( "Font size:" + element.FontSize);
Console.WriteLine ( "Font color:" + element.FontColor);
}
}
while (extractor.FindNext());
}
}
Console.WriteLine();
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
}
}
}
using System;
using System.Drawing;
using System.Text;
using System.Windows;
using Bytescout.PDFExtractor;
namespace WpfApplication1
{
public partial class MainWindow : Window
{
private string _pdfFile;
private TextExtractor extractor;
public MainWindow()
{
InitializeComponent();
extractor = new TextExtractor();
}
private void Button_Load(object sender, RoutedEventArgs e)
{
Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
dlg.DefaultExt = ".pdf";
dlg.Filter = "PDF documents (.pdf)|*.pdf";
bool? result = dlg.ShowDialog();
if (result == true)
{
try
{
extractor.LoadDocumentFromFile(dlg.FileName);
_pdfFile = dlg.FileName;
Title = _pdfFile;
}
catch (Exception exception)
{
MessageBox.Show(exception.ToString());
}
}
}
private void Button_Extract(object sender, RoutedEventArgs e)
{
if (_pdfFile != null)
{
string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes)
textBox1.Text = text;
}
}
private void Button_Find(object sender, RoutedEventArgs e)
{
if (textBoxFind.Text.Length > 0)
{
StringBuilder builder = new StringBuilder();
builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");
if (extractor.Find(0, textBoxFind.Text, false))
{
do
{
builder.AppendLine("");
builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
builder.AppendLine("");
// iterate through each element in the found text
foreach (SearchResultElement element in extractor.FoundText.Elements)
{
builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
builder.AppendLine("Text: " + element.Text);
builder.AppendLine("Font is bold: " + element.FontIsBold);
builder.AppendLine("Font is italic:" + element.FontIsItalic);
builder.AppendLine("Font name: " + element.FontName);
builder.AppendLine("Font size:" + element.FontSize);
builder.AppendLine("Font color:" + element.FontColor);
}
}
while (extractor.FindNext());
}
builder.AppendLine("Finished.");
textBox1.Text = builder.ToString();
}
}
}
}
Imports System.Drawing
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample1.pdf")
Dim pageCount As Integer = extractor.GetPageCount()
For i As Integer = 0 To pageCount - 1
' Search each page for "ipsum" string
If extractor.Find(i, "ipsum", False) Then
Do
Console.WriteLine("")
Console.WriteLine(("Found on page " & i & " at location ") + extractor.FoundText.Bounds.ToString())
Console.WriteLine("")
' iterate through each element in the found text
For Each element As SearchResultElement In extractor.FoundText.Elements
Console.WriteLine((((("Element #" + element.Index.ToString() & " at left=") + element.Left.ToString() & "; top=") + element.Top.ToString() & "; width=") + element.Width.ToString() & "; height=") + element.Height.ToString())
Console.WriteLine("Text: " + element.Text)
Console.WriteLine("Font is bold: " + element.FontIsBold.ToString())
Console.WriteLine("Font is italic:" + element.FontIsItalic.ToString())
Console.WriteLine("Font name: " + element.FontName)
Console.WriteLine("Font size:" + element.FontSize.ToString())
Console.WriteLine("Font color:" + element.FontColor.ToString())
Next
Loop While extractor.FindNext()
End If
Next
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Class
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample1.pdf")
' Get page count
pageCount = extractor.GetPageCount()
foundMessage = ""
For i=0 to PageCount-1
If extractor.Find(i, "ipsum", false) Then ' parameters are: page index, string to find, case sensitivity
Do
foundMessage = "Found word 'ipsum' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height)
' iterate through each element in the found text
For j=0 to extractor.FoundText.ElementCount-1
' get search result element
Set element = extractor.FoundText.GetElement(0)
elementInfo= "Element #" & CStr(j) & " at left=" & CStr(element.Left) & "; top=" & CStr(element.Top) & "; width=" & CStr(element.Width) & "; height=" & CStr(element.Height) & vbCRLF
elementInfo= elementInfo& "Text: " & CStr(element.Text) & vbCRLF
elementInfo= elementInfo& "Font is bold: " & CStr(element.FontIsBold) & vbCRLF
elementInfo= elementInfo& "Font is italic:" & CStr(element.FontIsItalic) & vbCRLF
elementInfo= elementInfo& "Font name: " & CStr(element.FontName) & vbCRLF
elementInfo= elementInfo& "Font size:" & CStr(element.FontSize) & vbCRLF
elementInfo= elementInfo & "Font color (as Ole Color):" & CStr(element.FontColorAsOleColor)
Next
MsgBox foundMessage & vbCRLF & vbCRLF & elementInfo
Loop While extractor.FindNext
End If
Next
MsgBox "Done"
Set extractor = Nothing