How to extract text from columns in PDF in ASP.NET, C#, VB.NET and VBScript using ByteScout PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to extract text from columns in PDF in ASP.NET, C#, VB.NET and VBScript using ByteScout PDF Extractor SDK

This tutorial will help you to extract text from columns in a PDF file in ASP.NET, C#, VB.NET and VBScript using PDF Extractor SDK.

Also, see the following tutorial: How to extract text from columns in PDF by coordinates.

Select your programming language:

How to extract text from columns in ASP.NET

using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.IO;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Bytescout.PDFExtractor;

namespace ExtractAllText
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			// This test file will be copied to the project directory on the pre-build event (see the project properties).
			String inputFile = Server.MapPath("columns.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// set to extract text column by column
			extractor.ExtractColumnByColumn = true;
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);


			Response.Clear();
			Response.ContentType = "text/html";

			// Save extracted text to output stream
			extractor.SaveTextToStream(Response.OutputStream);

			Response.End();
		}
	}
}

How to extract text from columns in C#

using System;
using Bytescout.PDFExtractor;

namespace ExtractAllText
{
	class Program
	{
		static void Main(string[] args)
		{
			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("columns.pdf");

			// set to extract text column by column
			extractor.ExtractColumnByColumn = true;

			// Save extracted text to file
			extractor.SaveTextToFile("output.txt");

			// Open output file in default associated application
			System.Diagnostics.Process.Start("output.txt");
		}
	}
}

How to extract text from columns in Visual Basic .NET

Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())

		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"

		' Load sample PDF document
		extractor.LoadDocumentFromFile("sample2.pdf")

		' set to extract text column by column
		extractor.ExtractColumnByColumn = true

		' Save extracted text to file
		extractor.SaveTextToFile("output.txt")

		' Open output file in default associated application
		System.Diagnostics.Process.Start("output.txt")
	End Sub
End Class

How to extract text from columns in VBScript (Visual Basic 6)

' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\columns.pdf")

' set to extract text column by column
extractor.ExtractColumnByColumn = true

' Save extracted text to file
extractor.SaveTextToFile("output.txt")

' Open output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run "output.txt", 1, false
Set shell = Nothing

Set extractor = Nothing
prev
next