With PDF Extractor SDK, you can extract text from separate columns of PDF documents defined by column width. This sample code shows extracting text from PDF within 3 separate columns in ASP.NET, C#, VB.NET, and VBScript.

Select your programming language:

How to extract text from columns in PDF by coordinates in ASP.NET

using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.IO;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Bytescout.PDFExtractor;

namespace ExtractAllText
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			// This test file will be copied to the project directory on the pre-build event (see the project properties).
			String inputFile = Server.MapPath("columns.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);

			// read width of the very first page (zero index)
			int pageWidth = extractor.GetPageRect_Width(0);
			int pageHeight = extractor.GetPageRect_Height(0);

			// now we are extracting content assuming we have 3 columns 
			// equally distributed on pages

			// first calculate the width of the one column by dividing page width by number of columns (3)
			int columnWidth = pageWidth / 3;


			Response.Clear();
			Response.ContentType = "text/html";


			// iterate through 3 columns
			for (int i=0; i<3; i++)
			{

				// set the extraction area to the #i column 
				extractor.SetExtractionArea(i * columnWidth, 0, columnWidth , pageHeight);

				// Save extracted text to output stream
				extractor.SavePageTextToStream(0, Response.OutputStream);
			}



			Response.End();
		}
	}
}

How to extract text from columns in PDF by coordinates in C#

using System;
using Bytescout.PDFExtractor;

namespace ExtractAllText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("columns.pdf");


			// read width of the very first page (zero index)
			float pageWidth = extractor.GetPageRect_Width(0);
			float pageHeight = extractor.GetPageRect_Height(0);

			// now we are extracting content assuming we have 3 columns 
			// equally distributed on pages

			// first calculate the width of the one column by dividing page width by number of columns (3)
			float columnWidth = pageWidth / 3f;

			// iterate through 3 columns
			for (int i=0; i<3; i++)
			{

				// set the extraction area to the #i column 
				extractor.SetExtractionArea(i * columnWidth, 0, columnWidth , pageHeight);

				string outFileName = "columns-column" + i + ".txt";
				extractor.SavePageTextToFile (0, outFileName);

				// Open output file in default associated application
				System.Diagnostics.Process.Start(outFileName);

			}

		}
	}
}

How to extract text from columns in PDF by coordinates in Visual Basic .NET

Imports Bytescout.PDFExtractor
Imports Sysem.Drawing

Class Program
	Friend Shared Sub Main(args As String())

		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"

		' Load sample PDF document
		extractor.LoadDocumentFromFile("columns.pdf")


		' read width of the very first page (zero index)
		Dim pageWidth As Integer = extractor.GetPageRect_Width (0)
		Dim pageHeight As Integer = extractor.GetPageRect_Height (0)

		' now we are extracting content assuming we have 3 columns 
		' equally distributed on pages

		' first calculate the width of the one column by dividing page width by number of columns (3)
		Dim columnWidth As Integer = pageWidth / 3

		' iterate through 3 columns
		For i As Integer=0 to 2

		 ' set the extraction area to the #i column 
		 extractor.SetExtractionArea (i * columnWidth, 0, columnWidth , pageHeight)

		 ' save column to file
   		 Dim outFileName As String = "columns-column" + i + ".txt"
 		 extractor.SavePageTextToFile (0, outFileName)

		 ' Open generated file in default text viewer
		 System.Diagnostics.Process.Start(outFileName )

		Next 



	End Sub
End Class

How to extract text from columns in PDF by coordinates in VBScript (Visual Basic 6)

' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\columns.pdf")

' read width of the very first page (zero index)
pageWidth = extractor.GetPageRect_Width (0)
pageHeight = extractor.GetPageRect_Height (0)

' now we are extracting content assuming we have 3 columns 
' equally distributed on pages

' first calculate the width of the one column by dividing page width by number of columns (3)
columnWidth = pageWidth / 3

' iterate through 3 columns
For i=0 to 2

' set the extraction area to the #i column 
extractor.SetExtractionArea i * columnWidth, 0, columnWidth , pageHeight

outFileName = "columns-column"& CStr(i) & ".txt"
extractor.SavePageTextToFile 0, outFileName

' Open output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run outFileName, 1, false
Set shell = Nothing

Next

Set extractor = Nothing

MsgBox "Done"