Check the samples below to learn how to find specific text in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.
If you need to find text with hyphens in your PDF, check this tutorial.
Select your programming language:
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("sample1.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; Rectangle location; int pageIndex; Response.Write("Searching for \"ipsum\" string:<br><br>"); // Search for "ipsum" string if (extractor.Find(0, "ipsum")) { do { Response.Write("<br/>"); Response.Write("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()+"<br/>"); Response.Write("<br/>"); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>"); Response.Write("Text: " + element.Text + "<br/>"); Response.Write("Font is bold: " + element.FontIsBold + "<br/>"); Response.Write("Font is italic:" + element.FontIsItalic + "<br/>"); Response.Write("Font name: " + element.FontName + "<br/>"); Response.Write("Font size:" + element.FontSize + "<br/>"); Response.Write("Font color:" + element.FontColor + "<br/>"); } } while (extractor.FindNext()); } Response.End(); } } }
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample1.pdf"); int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Search each page for "ipsum" string if (extractor.Find(i, "ipsum", false)) { do { Console.WriteLine(""); Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); Console.WriteLine(""); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine ("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); Console.WriteLine ("Text: " + element.Text); Console.WriteLine ("Font is bold: " + element.FontIsBold); Console.WriteLine ("Font is italic:" + element.FontIsItalic); Console.WriteLine ( "Font name: " + element.FontName); Console.WriteLine ( "Font size:" + element.FontSize); Console.WriteLine ( "Font color:" + element.FontColor); } } while (extractor.FindNext()); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
using System; using System.Drawing; using System.Text; using System.Windows; using Bytescout.PDFExtractor; namespace WpfApplication1 { public partial class MainWindow : Window { private string _pdfFile; private TextExtractor extractor; public MainWindow() { InitializeComponent(); extractor = new TextExtractor(); } private void Button_Load(object sender, RoutedEventArgs e) { Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog(); dlg.DefaultExt = ".pdf"; dlg.Filter = "PDF documents (.pdf)|*.pdf"; bool? result = dlg.ShowDialog(); if (result == true) { try { extractor.LoadDocumentFromFile(dlg.FileName); _pdfFile = dlg.FileName; Title = _pdfFile; } catch (Exception exception) { MessageBox.Show(exception.ToString()); } } } private void Button_Extract(object sender, RoutedEventArgs e) { if (_pdfFile != null) { string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes) textBox1.Text = text; } } private void Button_Find(object sender, RoutedEventArgs e) { if (textBoxFind.Text.Length > 0) { StringBuilder builder = new StringBuilder(); builder.AppendLine("Searching for \"" + textBoxFind.Text + "\""); if (extractor.Find(0, textBoxFind.Text, false)) { do { builder.AppendLine(""); builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); builder.AppendLine(""); // iterate through each element in the found text foreach (SearchResultElement element in extractor.FoundText.Elements) { builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height); builder.AppendLine("Text: " + element.Text); builder.AppendLine("Font is bold: " + element.FontIsBold); builder.AppendLine("Font is italic:" + element.FontIsItalic); builder.AppendLine("Font name: " + element.FontName); builder.AppendLine("Font size:" + element.FontSize); builder.AppendLine("Font color:" + element.FontColor); } } while (extractor.FindNext()); } builder.AppendLine("Finished."); textBox1.Text = builder.ToString(); } } } }
Imports System.Drawing Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample1.pdf") Dim pageCount As Integer = extractor.GetPageCount() For i As Integer = 0 To pageCount - 1 ' Search each page for "ipsum" string If extractor.Find(i, "ipsum", False) Then Do Console.WriteLine("") Console.WriteLine(("Found on page " & i & " at location ") + extractor.FoundText.Bounds.ToString()) Console.WriteLine("") ' iterate through each element in the found text For Each element As SearchResultElement In extractor.FoundText.Elements Console.WriteLine((((("Element #" + element.Index.ToString() & " at left=") + element.Left.ToString() & "; top=") + element.Top.ToString() & "; width=") + element.Width.ToString() & "; height=") + element.Height.ToString()) Console.WriteLine("Text: " + element.Text) Console.WriteLine("Font is bold: " + element.FontIsBold.ToString()) Console.WriteLine("Font is italic:" + element.FontIsItalic.ToString()) Console.WriteLine("Font name: " + element.FontName) Console.WriteLine("Font size:" + element.FontSize.ToString()) Console.WriteLine("Font color:" + element.FontColor.ToString()) Next Loop While extractor.FindNext() End If Next Console.WriteLine() Console.WriteLine("Press any key to continue...") Console.ReadLine() End Sub End Class
' Create Bytescout.PDFExtractor.TextExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("..\..\sample1.pdf") ' Get page count pageCount = extractor.GetPageCount() foundMessage = "" For i=0 to PageCount-1 If extractor.Find(i, "ipsum", false) Then ' parameters are: page index, string to find, case sensitivity Do foundMessage = "Found word 'ipsum' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height) ' iterate through each element in the found text For j=0 to extractor.FoundText.ElementCount-1 ' get search result element Set element = extractor.FoundText.GetElement(0) elementInfo= "Element #" & CStr(j) & " at left=" & CStr(element.Left) & "; top=" & CStr(element.Top) & "; width=" & CStr(element.Width) & "; height=" & CStr(element.Height) & vbCRLF elementInfo= elementInfo& "Text: " & CStr(element.Text) & vbCRLF elementInfo= elementInfo& "Font is bold: " & CStr(element.FontIsBold) & vbCRLF elementInfo= elementInfo& "Font is italic:" & CStr(element.FontIsItalic) & vbCRLF elementInfo= elementInfo& "Font name: " & CStr(element.FontName) & vbCRLF elementInfo= elementInfo& "Font size:" & CStr(element.FontSize) & vbCRLF elementInfo= elementInfo & "Font color (as Ole Color):" & CStr(element.FontColorAsOleColor) Next MsgBox foundMessage & vbCRLF & vbCRLF & elementInfo Loop While extractor.FindNext End If Next MsgBox "Done" Set extractor = Nothing