 
         
         
             Important Update
                        Important Update
                    
                With PDF Extractor SDK, you can extract text from a specific rectangular area of a PDF document defined by coordinates. Check the samples below to learn how to extract text by coordinates C#, VB.NET and VBScript using ByteScout PDF Extractor SDK.
Also, check this tutorial to learn how to extract text from PDF by keyword.
Select your programming language:
using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;
using System.Drawing;
using System.Diagnostics;
namespace Example
{
    class Program
    {
        static void Main(string[] args)
        {
            TextExtractor extractor = new TextExtractor("demo", "demo");
            // load the document
            extractor.LoadDocumentFromFile("../../sample2.pdf");
            // get page count
            int pageCount = extractor.GetPageCount();
            int count = 0;
            // iterate through pages
            for (int i = 0; i < pageCount; i++)
            {
                // define rectangle location to extract from
                RectangleF location = new RectangleF(0, 0, 200, 200);
                
                // set extraction area
                extractor.SetExtractionArea(location);
                // extract text bounded by the extraction area
                string extractedString = extractor.GetTextFromPage(i);
                
                Console.WriteLine("Extracted from page #" + i + ":\r\n" + extractedString);
                // reset extraction area to full page (by default)
                extractor.ResetExtractionArea();
                Console.WriteLine("\r\n");
            }
            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }
    }
}
Imports System.Drawing
Imports System.IO
Imports Bytescout.PDFExtractor
Class Program
	Friend Shared Sub Main(args As String())
		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"
		' Load sample PDF document
		extractor.LoadDocumentFromFile("sample2.pdf")
		' define a rectangle location to get text from it from pdf at 0,0 with width and height as 200x200 accordingly
            	Dim location as RectangleF  = new RectangleF(0, 0, 200, 200)
                
            	' set text extractor extraction area to this rectangle
            	extractor.SetExtractionArea(location)
            	' now we can get text from this pdf rectangle from page #0
        Dim extractedString As String = extractor.GetTextFromPage(0)
                
            	' write text from pdf rectangle to the console
        Console.WriteLine("Extracted from page #0" + ":\r\n" + extractedString)
	End Sub
End Class
' Create TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample1.pdf")
' Get page count
pageCount = extractor.GetPageCount()
For i = 0 to pageCount - 1
	' find some text
	If extractor.Find (i, "ALIQUIP EX EA COMMODO", false) Then
		Do
			RectLeft = extractor.GetFoundTextRectangle_Left
			RectTop = extractor.GetFoundTextRectangle_Top
			RectWidth = extractor.GetFoundTextRectangle_Width
			RectHeight = extractor.GetFoundTextRectangle_Height
			Wscript.echo "Found on page #" & CStr(i) & " at left=" & CStr(RectLeft) & "; top=" & CStr(RectTop) & "; width=" & CStr(RectWidth) & "; height=" & CStr(RectHeight)
			' check the same text is extracted from returned coordinates
			extractor.SetExtractionArea RectLeft, RectTop, RectWidth, RectHeight
			Wscript.echo "Extracted text: " & extractor.GetTextFromPage(i)
			extractor.ResetExtractionArea
		
		Loop While extractor.FindNext
		
	End If
Next
Set extractor = Nothing
                                                                        
                    