ByteScout PDF Extractor SDK – ASP.NET – Extract Text From Page Area

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – ASP.NET – Extract Text From Page Area

ByteScout PDF Extractor SDK – ASP.NET – Extract Text From Page Area

Default.aspx.cs

using System;
using System.Drawing;
using Bytescout.PDFExtractor;

namespace ExtractTextFromPageArea
{
	public partial class _Default : System.Web.UI.Page
	{
		protected void Page_Load(object sender, EventArgs e)
		{
			String inputFile = Server.MapPath(@".\bin\columns.pdf");

			// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";
			
			// Load sample PDF document
			extractor.LoadDocumentFromFile(inputFile);

			// Get dimensions of the first document page
			RectangleF rectangle = extractor.GetPageRectangle(0);

			// Get text from the 1/3 of the page

			rectangle.Width = rectangle.Width / 3f;
			
			Response.Clear();
			Response.ContentType = "text/html";

			extractor.SetExtractionArea(rectangle);

			Response.Write("<pre>");

			// Save extracted text to output stream
			extractor.SavePageTextToStream(0, Response.OutputStream);

			Response.Write("</pre>");
			
			Response.End();
		}
	}
}

Web.config

<?xml version="1.0"?>

<configuration>
  
    <appSettings/>
    <connectionStrings/>
  
    <system.web>
        <!-- 
            Set compilation debug="true" to insert debugging 
            symbols into the compiled page. Because this 
            affects performance, set this value to true only 
            during development.
        -->
        <compilation debug="true" />
        <!--
            The <authentication> section enables configuration 
            of the security authentication mode used by 
            ASP.NET to identify an incoming user. 
        -->
        <authentication mode="Windows" />
        <!--
            The <customErrors> section enables configuration 
            of what to do if/when an unhandled error occurs 
            during the execution of a request. Specifically, 
            it enables developers to configure html error pages 
            to be displayed in place of a error stack trace.

        <customErrors mode="RemoteOnly" defaultRedirect="GenericErrorPage.htm">
            <error statusCode="403" redirect="NoAccess.htm" />
            <error statusCode="404" redirect="FileNotFound.htm" />
        </customErrors>
        -->
    </system.web>
</configuration>


  Click here to get your Free Trial version of the SDK

prev
next