How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript

  • Home
  • /
  • Articles
  • /
  • How to find text in PDF using PDF Extractor SDK in ASP.NET, C#, C#-WPF, VB.NET and VBScript

Check the samples below to learn how to find specific text in a PDF document in ASP.NET, C#, C#-WPF, VB.NET and VBScript using ByteScout PDF Extractor SDK.

If you need to find text with hyphens in your PDF, check this tutorial.

Select your programming language:

How to find text in PDF in ASP.NET

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
 
namespace FindText
{
    public partial class _Default : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
            // This test file will be copied to the project directory on the pre-build event (see the project properties).
            String inputFile = Server.MapPath("sample1.pdf");
 
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";
             
            // Load sample PDF document
            extractor.LoadDocumentFromFile(inputFile);
 
            Response.Clear();
            Response.ContentType = "text/html";
 
            Rectangle location;
            int pageIndex;
 
            Response.Write("Searching for \"ipsum\" string:<br><br>");
             
            // Search for "ipsum" string
            if (extractor.Find(0, "ipsum"))
            {
 
                do
                {
                    Response.Write("<br/>");
                    Response.Write("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString()+"<br/>");
                    Response.Write("<br/>");
                    // iterate through each element in the found text
                    foreach (SearchResultElement element in extractor.FoundText.Elements)
                    {
                        Response.Write("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height + "<br/>");
                        Response.Write("Text: " + element.Text + "<br/>");
                        Response.Write("Font is bold: " + element.FontIsBold + "<br/>");
                        Response.Write("Font is italic:" + element.FontIsItalic + "<br/>");
                        Response.Write("Font name: " + element.FontName + "<br/>");
                        Response.Write("Font size:" + element.FontSize + "<br/>");
                        Response.Write("Font color:" + element.FontColor + "<br/>");
                    }
 
                }
                while (extractor.FindNext());
 
            }
 
            Response.End();
        }
    }
}

How to find tex in PDF in C#

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
using System;
using System.Drawing;
using Bytescout.PDFExtractor;
 
namespace FindText
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.TextExtractor instance
            TextExtractor extractor = new TextExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";
 
            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf");
             
            int pageCount = extractor.GetPageCount();
 
            for (int i = 0; i < pageCount; i++)
            {
                // Search each page for "ipsum" string
                if (extractor.Find(i, "ipsum", false))
                {
                    do
                    {
                        Console.WriteLine("");
                        Console.WriteLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        Console.WriteLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            Console.WriteLine ("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            Console.WriteLine ("Text: " + element.Text);
                            Console.WriteLine ("Font is bold: " + element.FontIsBold);
                            Console.WriteLine ("Font is italic:" + element.FontIsItalic);
                            Console.WriteLine ( "Font name: " + element.FontName);
                            Console.WriteLine ( "Font size:" + element.FontSize);
                            Console.WriteLine ( "Font color:" + element.FontColor);
                        }
 
 
                    }
                    while (extractor.FindNext());
                }
            }
             
            Console.WriteLine();
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
    }
}

How to find text in PDF in C#-WPF

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
using System;
using System.Drawing;
using System.Text;
using System.Windows;
using Bytescout.PDFExtractor;
 
namespace WpfApplication1
{
    public partial class MainWindow : Window
    {
        private string _pdfFile;
        private TextExtractor extractor;
 
        public MainWindow()
        {
            InitializeComponent();
 
            extractor = new TextExtractor();
        }
 
        private void Button_Load(object sender, RoutedEventArgs e)
        {
            Microsoft.Win32.OpenFileDialog dlg = new Microsoft.Win32.OpenFileDialog();
            dlg.DefaultExt = ".pdf";
            dlg.Filter = "PDF documents (.pdf)|*.pdf";
 
            bool? result = dlg.ShowDialog();
 
            if (result == true)
            {
                try
                {
                    extractor.LoadDocumentFromFile(dlg.FileName);
                    _pdfFile = dlg.FileName;
                    Title = _pdfFile;
                }
                catch (Exception exception)
                {
                    MessageBox.Show(exception.ToString());
                }
            }
        }
 
        private void Button_Extract(object sender, RoutedEventArgs e)
        {
            if (_pdfFile != null)
            {
                string text = extractor.GetText(0, 0); // extract from the first page only (for demonstration purposes)
 
                textBox1.Text = text;
            }
        }
 
        private void Button_Find(object sender, RoutedEventArgs e)
        {
            if (textBoxFind.Text.Length > 0)
            {
                StringBuilder builder = new StringBuilder();
 
                builder.AppendLine("Searching for \"" + textBoxFind.Text + "\"");
 
                if (extractor.Find(0, textBoxFind.Text, false))
                {
                    do
                    {
                        builder.AppendLine("");
                        builder.AppendLine("Found on page " + i + " at location " + extractor.FoundText.Bounds.ToString());
                        builder.AppendLine("");
                        // iterate through each element in the found text
                        foreach (SearchResultElement element in extractor.FoundText.Elements)
                        {
                            builder.AppendLine("Element #" + element.Index + " at left=" + element.Left + "; top=" + element.Top + "; width=" + element.Width + "; height=" + element.Height);
                            builder.AppendLine("Text: " + element.Text);
                            builder.AppendLine("Font is bold: " + element.FontIsBold);
                            builder.AppendLine("Font is italic:" + element.FontIsItalic);
                            builder.AppendLine("Font name: " + element.FontName);
                            builder.AppendLine("Font size:" + element.FontSize);
                            builder.AppendLine("Font color:" + element.FontColor);
                        }
 
                    }
                    while (extractor.FindNext());
                 
                }
 
                builder.AppendLine("Finished.");
 
                textBox1.Text = builder.ToString();
            }
        }
    }
}

How to find text in PDF in Visual Basic .NET

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Imports System.Drawing
Imports Bytescout.PDFExtractor
 
Class Program
    Friend Shared Sub Main(args As String())
 
            ' Create Bytescout.PDFExtractor.TextExtractor instance
            Dim extractor As New TextExtractor()
            extractor.RegistrationName = "demo"
            extractor.RegistrationKey = "demo"
 
            ' Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf")
 
            Dim pageCount As Integer = extractor.GetPageCount()
 
            For i As Integer = 0 To pageCount - 1
                ' Search each page for "ipsum" string
                If extractor.Find(i, "ipsum", False) Then
                    Do
                        Console.WriteLine("")
                        Console.WriteLine(("Found on page " & i & " at location ") + extractor.FoundText.Bounds.ToString())
                        Console.WriteLine("")
                        ' iterate through each element in the found text
                        For Each element As SearchResultElement In extractor.FoundText.Elements
                        Console.WriteLine((((("Element #" + element.Index.ToString() & " at left=") + element.Left.ToString() & "; top=") + element.Top.ToString() & "; width=") + element.Width.ToString() & "; height=") + element.Height.ToString())
                            Console.WriteLine("Text: " + element.Text)
                            Console.WriteLine("Font is bold: " + element.FontIsBold.ToString())
                            Console.WriteLine("Font is italic:" + element.FontIsItalic.ToString())
                            Console.WriteLine("Font name: " + element.FontName)
                        Console.WriteLine("Font size:" + element.FontSize.ToString())
                            Console.WriteLine("Font color:" + element.FontColor.ToString())
 
 
                        Next
                    Loop While extractor.FindNext()
                End If
            Next
 
            Console.WriteLine()
            Console.WriteLine("Press any key to continue...")
            Console.ReadLine()
    End Sub
End Class

How to find text in PDF in VBScript (Visual Basic 6)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
 
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample1.pdf")
 
' Get page count
 
pageCount = extractor.GetPageCount()
 
foundMessage = ""
 
For i=0 to PageCount-1
  
 If extractor.Find(i, "ipsum", false) Then ' parameters are: page index, string to find, case sensitivity
    Do
     
        foundMessage = "Found word 'ipsum' on page #" & CStr(i) & " at left=" & CStr(extractor.FoundText.Left) & "; top=" & CStr(extractor.FoundText.Top) & "; width=" & CStr(extractor.FoundText.Width) & "; height=" & CStr(extractor.FoundText.Height)
 
        ' iterate through each element in the found text
        For j=0 to extractor.FoundText.ElementCount-1  
         
        ' get search result element
        Set element = extractor.FoundText.GetElement(0)
 
        elementInfo= "Element #" & CStr(j) & " at left=" & CStr(element.Left) & "; top=" & CStr(element.Top) & "; width=" & CStr(element.Width) & "; height=" & CStr(element.Height) & vbCRLF
        elementInfo= elementInfo& "Text: " & CStr(element.Text) & vbCRLF
        elementInfo= elementInfo& "Font is bold: " & CStr(element.FontIsBold) & vbCRLF
        elementInfo= elementInfo& "Font is italic:" & CStr(element.FontIsItalic) & vbCRLF
        elementInfo= elementInfo& "Font name: " & CStr(element.FontName) & vbCRLF
        elementInfo= elementInfo& "Font size:" & CStr(element.FontSize) & vbCRLF
        elementInfo= elementInfo & "Font color (as Ole Color):" & CStr(element.FontColorAsOleColor)            
        Next
 
        MsgBox foundMessage & vbCRLF & vbCRLF & elementInfo
 
 
    Loop While extractor.FindNext
 End If
 
Next
 
MsgBox "Done"
 
Set extractor = Nothing

Tutorials:

prev
next