How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript

  • Home
  • /
  • Articles
  • /
  • How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript

Check the samples below to learn how to find specific text in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.

If you need to find text with hyphens in your PDF, check this tutorial.

Select your programming language:

How to find text in PDF in ASP.NET

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			// This test file will be copied to the project directory on the pre-build event (see the project properties).
			String inputFile = Server.MapPath("sample1.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);

			Response.Clear();
			Response.ContentType = "text/html";

			Rectangle location;
			int pageIndex;

			Response.Write("Searching for \"ipsum\" string:<br><br>");
			
			// Search for "ipsum" string
			if (extractor.Find(0, "ipsum"))
			{

                do
                {
                    Response.Write("<br/>");
                    Response.Write("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()+"<br/>");
                    Response.Write("<br/>");
                    // iterate through each element in the found text
                    foreach (SearchResultElement element in extractor.FoundText.Elements)
                    {
                        Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>");
                        Response.Write("Text: " + element.Text + "<br/>");
                        Response.Write("Font is bold: " + element.FontIsBold + "<br/>");
                        Response.Write("Font is italic:" + element.FontIsItalic + "<br/>");
                        Response.Write("Font name: " + element.FontName + "<br/>");
                        Response.Write("Font size:" + element.FontSize + "<br/>");
                        Response.Write("Font color:" + element.FontColor + "<br/>");
                    }

                }
                while (extractor.FindNext());

			}

			Response.End();
		}
	}
}

How to find tex in PDF in C#

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("sample1.pdf");
			
			int pageCount = extractor.GetPageCount();

			for (int i = 0; i < pageCount; i++)
			{
				// Search each page for "ipsum" string
				if (extractor.Find(i, "ipsum", false))
				{
					do
					{
                        Console.WriteLine("");
						Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        Console.WriteLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
            		        Console.WriteLine ("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
		                    Console.WriteLine ("Text: " + element.Text);
		                    Console.WriteLine ("Font is bold: " + element.FontIsBold); 
		                    Console.WriteLine ("Font is italic:" + element.FontIsItalic);
		                    Console.WriteLine ( "Font name: " + element.FontName);
		                    Console.WriteLine ( "Font size:" + element.FontSize);
		                    Console.WriteLine ( "Font color:" + element.FontColor);
                        }


					}
					while (extractor.FindNext());
				}
			}
			
			Console.WriteLine();
			Console.WriteLine("Press any key to continue...");
			Console.ReadLine();
		}
	}
}

How to find text in PDF in C#-WPF

using System;
using System.Drawing;
using System.Text;
using System.Windows;
using Bytescout.PDFExtractor;

namespace WpfApplication1
{
	public partial class MainWindow : Window
	{
		private string _pdfFile;
		private TextExtractor extractor;

		public MainWindow()
		{
			InitializeComponent();

			extractor = new TextExtractor();
		}

		private void Button_Load(object sender, RoutedEventArgs e)
		{
			Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
			dlg.DefaultExt = ".pdf";
			dlg.Filter = "PDF documents (.pdf)|*.pdf";

			bool? result = dlg.ShowDialog();

			if (result == true)
			{
				try
				{
					extractor.LoadDocumentFromFile(dlg.FileName);
					_pdfFile = dlg.FileName;
					Title = _pdfFile;
				}
				catch (Exception exception)
				{
					MessageBox.Show(exception.ToString());
				}
			}
		}

		private void Button_Extract(object sender, RoutedEventArgs e)
		{
			if (_pdfFile != null)
			{
				string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes)

				textBox1.Text = text;
			}
		}

		private void Button_Find(object sender, RoutedEventArgs e)
		{
			if (textBoxFind.Text.Length > 0)
			{
				StringBuilder builder = new StringBuilder();

				builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");

				if (extractor.Find(0, textBoxFind.Text, false))
				{
                    do
                    {
                        builder.AppendLine("");
                        builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        builder.AppendLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            builder.AppendLine("Text: " + element.Text);
                            builder.AppendLine("Font is bold: " + element.FontIsBold);
                            builder.AppendLine("Font is italic:" + element.FontIsItalic);
                            builder.AppendLine("Font name: " + element.FontName);
                            builder.AppendLine("Font size:" + element.FontSize);
                            builder.AppendLine("Font color:" + element.FontColor);
                        }

                    }
                    while (extractor.FindNext());
                
                }

				builder.AppendLine("Finished.");

				textBox1.Text = builder.ToString();
			}
		}
	}
}

How to find text in PDF in Visual Basic .NET


Imports System.Drawing
Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())

			' Create Bytescout.PDFExtractor.TextExtractor instance
			Dim extractor As New TextExtractor()
			extractor.RegistrationName = "demo"
			extractor.RegistrationKey = "demo"

			' Load sample PDF document
			extractor.LoadDocumentFromFile("sample1.pdf")

			Dim pageCount As Integer = extractor.GetPageCount()

			For i As Integer = 0 To pageCount - 1
				' Search each page for "ipsum" string
				If extractor.Find(i, "ipsum", False) Then
					Do
						Console.WriteLine("")
						Console.WriteLine(("Found on page " & i & " at location ") + extractor.FoundText.Bounds.ToString())
						Console.WriteLine("")
						' iterate through each element in the found text
						For Each element As SearchResultElement In extractor.FoundText.Elements
                        Console.WriteLine((((("Element #" + element.Index.ToString() & " at left=") + element.Left.ToString() & "; top=") + element.Top.ToString() & "; width=") + element.Width.ToString() & "; height=") + element.Height.ToString())
							Console.WriteLine("Text: " + element.Text)
							Console.WriteLine("Font is bold: " + element.FontIsBold.ToString())
							Console.WriteLine("Font is italic:" + element.FontIsItalic.ToString())
							Console.WriteLine("Font name: " + element.FontName)
                        Console.WriteLine("Font size:" + element.FontSize.ToString())
							Console.WriteLine("Font color:" + element.FontColor.ToString())


						Next
					Loop While extractor.FindNext()
				End If
			Next

			Console.WriteLine()
			Console.WriteLine("Press any key to continue...")
			Console.ReadLine()
	End Sub
End Class

How to find text in PDF in VBScript (Visual Basic 6)


' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample1.pdf")

' Get page count

pageCount = extractor.GetPageCount()

foundMessage = ""

For i=0 to PageCount-1 
 
 If extractor.Find(i, "ipsum", false) Then ' parameters are: page index, string to find, case sensitivity
 	Do
	
		foundMessage = "Found word 'ipsum' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height)

		' iterate through each element in the found text
		For j=0 to extractor.FoundText.ElementCount-1 	
		
		' get search result element
		Set element = extractor.FoundText.GetElement(0)	

		elementInfo= "Element #" & CStr(j) & " at left=" & CStr(element.Left) & "; top=" & CStr(element.Top) & "; width=" & CStr(element.Width) & "; height=" & CStr(element.Height) & vbCRLF
		elementInfo= elementInfo& "Text: " & CStr(element.Text) & vbCRLF
		elementInfo= elementInfo& "Font is bold: " & CStr(element.FontIsBold) & vbCRLF
		elementInfo= elementInfo& "Font is italic:" & CStr(element.FontIsItalic) & vbCRLF
		elementInfo= elementInfo& "Font name: " & CStr(element.FontName) & vbCRLF
		elementInfo= elementInfo& "Font size:" & CStr(element.FontSize) & vbCRLF
		elementInfo= elementInfo & "Font color (as Ole Color):" & CStr(element.FontColorAsOleColor) 			
		Next 

		MsgBox foundMessage & vbCRLF & vbCRLF & elementInfo


  	Loop While extractor.FindNext
 End If

Next

MsgBox "Done"

Set extractor = Nothing

Tutorials:

prev
next