This tutorial will demonstrate how to find text with hyphens in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.

You may also find helpful this sample that shows how to find plain text in a PDF.

Select your programming language:

How to find text with hyphens in PDF in ASP.NET

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			// This test file will be copied to the project directory on the pre-build event (see the project properties).
			String inputFile = Server.MapPath("words-with-hyphens.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);

			Response.Clear();
			Response.ContentType = "text/html";

			Rectangle location;
			int pageIndex;

			Response.Write("Searching for \"hyphen\" string:<br><br>");
			
			// Search for "hyphen" string
			if (extractor.Find(0, "hyphen"))
			{
				do
				{
					Response.Write("Found at location " + extractor.FoundText.Bounds.ToString()+ "<br>");

				} while (extractor.FindNext())
			}

			Response.End();
		}
	}
}

How to find text with hyphens in PDF in C#

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("words-with-hyphens.pdf");
			
			int pageCount = extractor.GetPageCount();

			for (int i = 0; i < pageCount; i++)
			{
				// Search each page for "hyphen" string
				if (extractor.Find(i, "hyphen", false))
				{
					do
					{
						Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());

					}
					while (extractor.FindNext());
				}
			}
			
			Console.WriteLine();
			Console.WriteLine("Press any key to continue...");
			Console.ReadLine();
		}
	}
}

How to find text with hyphens in PDF in C#-WPF

using System;
using System.Drawing;
using System.Text;
using System.Windows;
using Bytescout.PDFExtractor;

namespace WpfApplication1
{
	public partial class MainWindow : Window
	{
		private string _pdfFile;
		private TextExtractor extractor;

		public MainWindow()
		{
			InitializeComponent();

			extractor = new TextExtractor();
		}

		private void Button_Load(object sender, RoutedEventArgs e)
		{
			Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
			dlg.DefaultExt = ".pdf";
			dlg.Filter = "PDF documents (.pdf)|*.pdf";

			bool? result = dlg.ShowDialog();

			if (result == true)
			{
				try
				{
					extractor.LoadDocumentFromFile(dlg.FileName);
					_pdfFile = dlg.FileName;
					Title = _pdfFile;
				}
				catch (Exception exception)
				{
					MessageBox.Show(exception.ToString());
				}
			}
		}

		private void Button_Extract(object sender, RoutedEventArgs e)
		{
			if (_pdfFile != null)
			{
				string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes)

				textBox1.Text = text;
			}
		}

		private void Button_Find(object sender, RoutedEventArgs e)
		{
			if (textBoxFind.Text.Length > 0)
			{
				StringBuilder builder = new StringBuilder();

				builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");

				if (extractor.Find(0, textBoxFind.Text, false))
				{
					do
					{
						builder.AppendLine("Found on page 0 at location " + extractor.FoundText.Location.ToString());

					}
					while (extractor.FindNext());
				}

				builder.AppendLine("Finished.");

				textBox1.Text = builder.ToString();
			}
		}
	}
}

How to find text with hyphens in PDF in Visual Basic .NET

Imports System.Drawing
Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())

		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"

		' Load sample PDF document
		extractor.LoadDocumentFromFile("words-with-hyphens.pdf")

		Dim pageCount As Integer = extractor.GetPageCount()

		' Search each page for "hyphen" string
		For i As Integer = 0 To pageCount - 1
			If extractor.Find(i, "hyphen", False) Then
				Do
					Console.WriteLine("Found on page " & i.ToString() & " at location " & extractor.FoundText.Bounds.ToString())
				Loop While extractor.FindNext()
			End If
		Next

		Console.WriteLine()
		Console.WriteLine("Press any key to continue...")
		Console.ReadLine()
	End Sub
End Class

How to find text with hyphens in PDF in VBScript (Visual Basic 6)

' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\words-with-hyphens.pdf")

' Get page count

pageCount = extractor.GetPageCount()

For i=0 to PageCount-1 
 
 If extractor.Find(i, "hyphen", false) Then ' parameters are: page index, string to find, case sensitivity
 	Do
 		MsgBox "Found word 'hyphen' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height)
  	Loop While extractor.FindNext
 End If

Next

MsgBox "Done"

Set extractor = Nothing