This tutorial will help you to extract text from columns in a PDF file in ASP.NET, C#, VB.NET and VBScript using PDF Extractor SDK.
Also, see the following tutorial: How to extract text from columns in PDF by coordinates.
Select your programming language:
using System; using System.Data; using System.Configuration; using System.Collections; using System.IO; using System.Web; using System.Web.Security; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.WebControls.WebParts; using System.Web.UI.HtmlControls; using Bytescout.PDFExtractor; namespace ExtractAllText { public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { // This test file will be copied to the project directory on the pre-build event (see the project properties). String inputFile = Server.MapPath("columns.pdf"); // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // set to extract text column by column extractor.ExtractColumnByColumn = true; // Load sample PDF document extractor.LoadDocumentFromFile(inputFile); Response.Clear(); Response.ContentType = "text/html"; // Save extracted text to output stream extractor.SaveTextToStream(Response.OutputStream); Response.End(); } } }
using System; using Bytescout.PDFExtractor; namespace ExtractAllText { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("columns.pdf"); // set to extract text column by column extractor.ExtractColumnByColumn = true; // Save extracted text to file extractor.SaveTextToFile("output.txt"); // Open output file in default associated application System.Diagnostics.Process.Start("output.txt"); } } }
Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf") ' set to extract text column by column extractor.ExtractColumnByColumn = true ' Save extracted text to file extractor.SaveTextToFile("output.txt") ' Open output file in default associated application System.Diagnostics.Process.Start("output.txt") End Sub End Class
' Create Bytescout.PDFExtractor.TextExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("..\..\columns.pdf") ' set to extract text column by column extractor.ExtractColumnByColumn = true ' Save extracted text to file extractor.SaveTextToFile("output.txt") ' Open output file in default associated application Set shell = CreateObject("WScript.Shell") shell.Run "output.txt", 1, false Set shell = Nothing Set extractor = Nothing