This tutorial will demonstrate how to find text with hyphens in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.
You may also find helpful this sample that shows how to find plain text in a PDF.
Select your programming language:
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("words-with-hyphens.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; Rectangle location; int pageIndex; Response.Write("Searching for \"hyphen\" string:<br><br>"); // Search for "hyphen" string if (extractor.Find(0, "hyphen")) { do { Response.Write("Found at location " + extractor.FoundText.Bounds.ToString()+ "<br>"); } while (extractor.FindNext()) } Response.End(); } } }
using System; using System.Drawing; using Bytescout.PDFExtractor; namespace FindText { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("words-with-hyphens.pdf"); int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { // Search each page for "hyphen" string if (extractor.Find(i, "hyphen", false)) { do { Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()); } while (extractor.FindNext()); } } Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadLine(); } } }
using System; using System.Drawing; using System.Text; using System.Windows; using Bytescout.PDFExtractor; namespace WpfApplication1 { public partial class MainWindow : Window { private string _pdfFile; private TextExtractor extractor; public MainWindow() { InitializeComponent(); extractor = new TextExtractor(); } private void Button_Load(object sender, RoutedEventArgs e) { Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog(); dlg.DefaultExt = ".pdf"; dlg.Filter = "PDF documents (.pdf)|*.pdf"; bool? result = dlg.ShowDialog(); if (result == true) { try { extractor.LoadDocumentFromFile(dlg.FileName); _pdfFile = dlg.FileName; Title = _pdfFile; } catch (Exception exception) { MessageBox.Show(exception.ToString()); } } } private void Button_Extract(object sender, RoutedEventArgs e) { if (_pdfFile != null) { string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes) textBox1.Text = text; } } private void Button_Find(object sender, RoutedEventArgs e) { if (textBoxFind.Text.Length > 0) { StringBuilder builder = new StringBuilder(); builder.AppendLine("Searching for \"" + textBoxFind.Text + "\""); if (extractor.Find(0, textBoxFind.Text, false)) { do { builder.AppendLine("Found on page 0 at location " + extractor.FoundText.Location.ToString()); } while (extractor.FindNext()); } builder.AppendLine("Finished."); textBox1.Text = builder.ToString(); } } } }
Imports System.Drawing Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("words-with-hyphens.pdf") Dim pageCount As Integer = extractor.GetPageCount() ' Search each page for "hyphen" string For i As Integer = 0 To pageCount - 1 If extractor.Find(i, "hyphen", False) Then Do Console.WriteLine("Found on page " & i.ToString() & " at location " & extractor.FoundText.Bounds.ToString()) Loop While extractor.FindNext() End If Next Console.WriteLine() Console.WriteLine("Press any key to continue...") Console.ReadLine() End Sub End Class
' Create Bytescout.PDFExtractor.TextExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("..\..\words-with-hyphens.pdf") ' Get page count pageCount = extractor.GetPageCount() For i=0 to PageCount-1 If extractor.Find(i, "hyphen", false) Then ' parameters are: page index, string to find, case sensitivity Do MsgBox "Found word 'hyphen' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height) Loop While extractor.FindNext End If Next MsgBox "Done" Set extractor = Nothing