This tutorial will demonstrate how to find text with hyphens in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.
You may also find helpful this sample that shows how to find plain text in a PDF.
Select your programming language:
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
namespace FindText
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// This test file will be copied to the project directory on the pre-build event (see the project properties).
String inputFile = Server.MapPath("words-with-hyphens.pdf");
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile(inputFile);
Response.Clear();
Response.ContentType = "text/html";
Rectangle location;
int pageIndex;
Response.Write("Searching for \"hyphen\" string:<br><br>");
// Search for "hyphen" string
if (extractor.Find(0, "hyphen"))
{
do
{
Response.Write("Found at location " + extractor.FoundText.Bounds.ToString()+ "<br>");
} while (extractor.FindNext())
}
Response.End();
}
}
}
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
namespace FindText
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("words-with-hyphens.pdf");
int pageCount = extractor.GetPageCount();
for (int i = 0; i < pageCount; i++)
{
// Search each page for "hyphen" string
if (extractor.Find(i, "hyphen", false))
{
do
{
Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
}
while (extractor.FindNext());
}
}
Console.WriteLine();
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
}
}
}
using System;
using System.Drawing;
using System.Text;
using System.Windows;
using Bytescout.PDFExtractor;
namespace WpfApplication1
{
public partial class MainWindow : Window
{
private string _pdfFile;
private TextExtractor extractor;
public MainWindow()
{
InitializeComponent();
extractor = new TextExtractor();
}
private void Button_Load(object sender, RoutedEventArgs e)
{
Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
dlg.DefaultExt = ".pdf";
dlg.Filter = "PDF documents (.pdf)|*.pdf";
bool? result = dlg.ShowDialog();
if (result == true)
{
try
{
extractor.LoadDocumentFromFile(dlg.FileName);
_pdfFile = dlg.FileName;
Title = _pdfFile;
}
catch (Exception exception)
{
MessageBox.Show(exception.ToString());
}
}
}
private void Button_Extract(object sender, RoutedEventArgs e)
{
if (_pdfFile != null)
{
string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes)
textBox1.Text = text;
}
}
private void Button_Find(object sender, RoutedEventArgs e)
{
if (textBoxFind.Text.Length > 0)
{
StringBuilder builder = new StringBuilder();
builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");
if (extractor.Find(0, textBoxFind.Text, false))
{
do
{
builder.AppendLine("Found on page 0 at location " + extractor.FoundText.Location.ToString());
}
while (extractor.FindNext());
}
builder.AppendLine("Finished.");
textBox1.Text = builder.ToString();
}
}
}
}
Imports System.Drawing
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("words-with-hyphens.pdf")
Dim pageCount As Integer = extractor.GetPageCount()
' Search each page for "hyphen" string
For i As Integer = 0 To pageCount - 1
If extractor.Find(i, "hyphen", False) Then
Do
Console.WriteLine("Found on page " & i.ToString() & " at location " & extractor.FoundText.Bounds.ToString())
Loop While extractor.FindNext()
End If
Next
Console.WriteLine()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
End Class
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\words-with-hyphens.pdf")
' Get page count
pageCount = extractor.GetPageCount()
For i=0 to PageCount-1
If extractor.Find(i, "hyphen", false) Then ' parameters are: page index, string to find, case sensitivity
Do
MsgBox "Found word 'hyphen' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height)
Loop While extractor.FindNext
End If
Next
MsgBox "Done"
Set extractor = Nothing