This tutorial will help you to extract text from columns in a PDF file in ASP.NET, C#, VB.NET and VBScript using PDF Extractor SDK.
Also, see the following tutorial: How to extract text from columns in PDF by coordinates.
Select your programming language:
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.IO;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Bytescout.PDFExtractor;
namespace ExtractAllText
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// This test file will be copied to the project directory on the pre-build event (see the project properties).
String inputFile = Server.MapPath("columns.pdf");
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// set to extract text column by column
extractor.ExtractColumnByColumn = true;
// Load sample PDF document
extractor.LoadDocumentFromFile(inputFile);
Response.Clear();
Response.ContentType = "text/html";
// Save extracted text to output stream
extractor.SaveTextToStream(Response.OutputStream);
Response.End();
}
}
}
using System;
using Bytescout.PDFExtractor;
namespace ExtractAllText
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("columns.pdf");
// set to extract text column by column
extractor.ExtractColumnByColumn = true;
// Save extracted text to file
extractor.SaveTextToFile("output.txt");
// Open output file in default associated application
System.Diagnostics.Process.Start("output.txt");
}
}
}
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf")
' set to extract text column by column
extractor.ExtractColumnByColumn = true
' Save extracted text to file
extractor.SaveTextToFile("output.txt")
' Open output file in default associated application
System.Diagnostics.Process.Start("output.txt")
End Sub
End Class
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\columns.pdf")
' set to extract text column by column
extractor.ExtractColumnByColumn = true
' Save extracted text to file
extractor.SaveTextToFile("output.txt")
' Open output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run "output.txt", 1, false
Set shell = Nothing
Set extractor = Nothing