ByteScout PDF Extractor SDK – C# – PDF To XML With Images

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – PDF To XML With Images

ByteScout PDF Extractor SDK – C# – PDF To XML With Images

Program.cs

using System;
using Bytescout.PDFExtractor;

namespace PDF2XML
{
    class Program
    {
        static void Main(string[] args)
        {
            // Create Bytescout.PDFExtractor.XMLExtractor instance
            XMLExtractor extractor = new XMLExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample1.pdf");

            // Uncomment this line to get rid of empty nodes in XML
            //extractor.PreserveFormattingOnTextExtraction = false;

            // Set output image format
            extractor.ImageFormat = OutputImageFormat.PNG;
            
            // Save images to external files
            extractor.SaveImages = ImageHandling.OuterFile;
            extractor.ImageFolder = "images"; // Folder for external images
            extractor.SaveXMLToFile("result_with_external_images.xml");

            // Embed images into XML as Base64 encoded string
            extractor.SaveImages = ImageHandling.Embed;
            extractor.SaveXMLToFile("result_with_embedded_images.xml");

            // Cleanup
			extractor.Dispose();
        }
    }
}


  Click here to get your Free Trial version of the SDK

prev
next