How to find text in PDF file and get coordinates in ASP.NET, C#, VB.NET, VBScript using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to find text in PDF file and get coordinates in ASP.NET, C#, VB.NET, VBScript using PDF Extractor SDK

These sample source codes can be used to find text in PDF files and get coordinates using Bytescout PDF Extractor SDK.

Select your programming language:

ASP.NET

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			// This test file will be copied to the project directory on the pre-build event (see the project properties).
			String inputFile = Server.MapPath("sample1.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);

			Response.Clear();
			Response.ContentType = "text/html";

			Rectangle location;
			int pageIndex;

			Response.Write("Searching for "ipsum" string:

");
			
			// Search for "ipsum" string
			if (extractor.Find("ipsum", out pageIndex, out location))
			{
				do
				{
					Response.Write("Found on page " + pageIndex + " at location " + location.ToString() + "
");

				} while (extractor.FindNext(out pageIndex, out location));
			}

			Response.End();
		}
	}
}

C#

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace FindText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("sample1.pdf");
			
			int pageCount = extractor.GetPageCount();
			RectangleF location;

			for (int i = 0; i < pageCount; i++)
			{
				// Search each page for "ipsum" string
				if (extractor.Find(i, "ipsum", false, out location))
				{
					do
					{
						Console.WriteLine("Found on page " + i + " at location " + location.ToString());

					}
					while (extractor.FindNext(out location));
				}
			}
			
			Console.WriteLine();
			Console.WriteLine("Press any key to continue...");
			Console.ReadLine();
		}
	}
}

VB.NET

Imports System.Drawing
Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())
		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"

		' Load sample PDF document
		extractor.LoadDocumentFromFile("sample1.pdf")

		Dim location As Rectangle
		Dim pageIndex As Integer

		' Search for "ipsum" string
		If extractor.Find("ipsum", pageIndex, location) Then
			Do
                Console.WriteLine("Found on page " & pageIndex & " at location " & location.ToString())
            Loop While extractor.FindNext(pageIndex, location)
		End If

		Console.WriteLine()
		Console.WriteLine("Press any key to continue...")
		Console.ReadLine()
	End Sub
End Class

VBScript

' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("....sample1.pdf")

' Get page count

pageCount = extractor.GetPageCount()

For i=0 to PageCount-1 
 
 If extractor.Find(i, "ipsum", false) Then ' parameters are: page index, string to find, case sensitivity
 	Do
 		MsgBox "Found word 'ipsum' on page #" & CStr(i) & " at left=" & CStr(extractor.GetFoundTextRectangle_Left) & "; top=" & CStr(extractor.GetFoundTextRectangle_Top) & "; width=" & CStr(extractor.GetFoundTextRectangle_Width) & "; height=" & CStr(extractor.GetFoundTextRectangle_Height)
  	Loop While extractor.FindNext
 End If

Next

MsgBox "Done"

Set extractor = Nothing


prev
next