Extract PDF to XML file in C# using PDF Extractor SDK

Home
/
Articles
/
Extract PDF to XML file in C# using PDF Extractor SDK

The sample below allows to convert PDF to XML (eXtensible Markup Language) file using Bytescout PDF Extractor SDK. You can use this option both from PDF Extractor SDK Dashboard and from Bytescout PDF Viewer (Data Extraction > Extract as XML):

using System;
using System.IO;
using System.Text;
using Bytescout.PDFExtractor;
using System.Xml;
using System.Drawing;
using System.Diagnostics;

namespace PDFtoXML
{
	class Program
	{
		static void Main(string[] args)
		{

            // Create Bytescout.PDFExtractor.XMLExtractor instance
            XMLExtractor extractor = new XMLExtractor();
            extractor.RegistrationName = "demo";
            extractor.RegistrationKey = "demo";

            // Load sample PDF document
            extractor.LoadDocumentFromFile("sample3.pdf");

            extractor.SaveXMLToFile("output.xml");            

			Console.WriteLine();
			Console.WriteLine("Data has been extracted to 'output.xml' file.");
			Console.WriteLine();
			Console.WriteLine("Press any key to continue and open OUTPUT.XML in default viewer...");
			Console.ReadKey();

            Process.Start("output.xml");
		}
	}
}

Tutorials:

Web API