ByteScout PDF Extractor SDK – ASP.NET – Extract Text By Columns

Home
/
Articles
/
ByteScout PDF Extractor SDK – ASP.NET – Extract Text By Columns

ByteScout PDF Extractor SDK – ASP.NET – Extract Text By Columns

Default.aspx.cs

using System;
using Bytescout.PDFExtractor;

namespace ExtractTextByColumns
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			String inputFile = Server.MapPath(@".\bin\columns.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Extract text by columns (useful if PDF document is designed in column layout like a newspaper)
			extractor.ExtractColumnByColumn = true;
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);
			
			Response.Clear();
			Response.ContentType = "text/html";

			Response.Write("<pre>");

			// Save extracted text to output stream
			extractor.SaveTextToStream(Response.OutputStream);

			Response.Write("</pre>");

			Response.End();
		}
	}
}

Web.config

<?xml version="1.0"?>

<configuration>
  
    <appSettings/>
    <connectionStrings/>
  
    <system.web>
        <!-- 
            Set compilation debug="true" to insert debugging 
            symbols into the compiled page. Because this 
            affects performance, set this value to true only 
            during development.
        -->
        <compilation debug="true" />
        <!--
            The <authentication> section enables configuration 
            of the security authentication mode used by 
            ASP.NET to identify an incoming user. 
        -->
        <authentication mode="Windows" />
        <!--
            The <customErrors> section enables configuration 
            of what to do if/when an unhandled error occurs 
            during the execution of a request. Specifically, 
            it enables developers to configure html error pages 
            to be displayed in place of a error stack trace.

        <customErrors mode="RemoteOnly" defaultRedirect="GenericErrorPage.htm">
            <error statusCode="403" redirect="NoAccess.htm" />
            <error statusCode="404" redirect="FileNotFound.htm" />
        </customErrors>
        -->
    </system.web>
</configuration>

Click here to get your Free Trial version of the SDK

ByteScout PDF Extractor SDK – ASP.NET – Extract Text By Columns

ByteScout PDF Extractor SDK – ASP.NET – Extract Text By Columns

Tutorials: