ByteScout PDF To HTML SDK – ASP.NET – PDF To HTML with Images

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF To HTML SDK – ASP.NET – PDF To HTML with Images

ByteScout PDF To HTML SDK – ASP.NET – PDF To HTML with Images


using System;
using System.IO;
using System.Text;
using Bytescout.PDF2HTML;

namespace ExtractHTML
	public partial class _Default : System.Web.UI.Page
		protected void Page_Load(object sender, EventArgs e)
			// This test PDF file is automatically copied to the output build directory.
			String inputFile = Server.MapPath("bin\\sample2.pdf");

			// Prepare output file path
			String outputFolder = Server.MapPath("~/output/");
			String outputFile = Path.Combine(outputFolder, "result.html");

			// Create Bytescout.PDF2HTML.HTMLExtractor instance
			HTMLExtractor extractor = new HTMLExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Set HTML with CSS extraction mode
			extractor.ExtractionMode = HTMLExtractionMode.HTMLWithCSS;

			// Load sample PDF document

			// Save extracted text to output file. Document images will be created in a subfolder.

			// The converter knows nothing about server's home directory, so it places images to a subfolder 
			// named "<filename>.images" near to the output file and generates image paths relative to the output folder.
			// We need to change those paths relative to server home directory.

			string[] lines = File.ReadAllLines(outputFile);
			StringBuilder builder = new StringBuilder();

			foreach (string line in lines)
				// replace image source paths
				builder.AppendLine(line.Replace("sample2.pdf.images", "output/sample2.pdf.images"));

			// Write the modified content to Response:

			Response.ContentType = "text/html";





<?xml version="1.0"?>

            Set compilation debug="true" to insert debugging 
            symbols into the compiled page. Because this 
            affects performance, set this value to true only 
            during development.
        <compilation debug="true" />
            The <authentication> section enables configuration 
            of the security authentication mode used by 
            ASP.NET to identify an incoming user. 
        <authentication mode="Windows" />
            The <customErrors> section enables configuration 
            of what to do if/when an unhandled error occurs 
            during the execution of a request. Specifically, 
            it enables developers to configure html error pages 
            to be displayed in place of a error stack trace.

        <customErrors mode="RemoteOnly" defaultRedirect="GenericErrorPage.htm">
            <error statusCode="403" redirect="NoAccess.htm" />
            <error statusCode="404" redirect="FileNotFound.htm" />

  Click here to get your Free Trial version of the SDK