This article shows how to convert a PDF document to XML in C#, VB.NET, and VBScript using ByteScout PDF Extractor SDK.
You may check these tutorials also to learn how to convert PDF to XLS or CSV.
Select your programming language:
using System; using System.Collections.Generic; using System.Text; using Bytescout.PDFExtractor; using System.Diagnostics; namespace ConsoleApplication1 { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.XMLExtractor instance XMLExtractor extractor = new XMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf"); extractor.SaveXMLToFile("output.XML"); Console.WriteLine(); Console.WriteLine("Data has been extracted to 'output.XML' file."); Console.WriteLine(); Console.WriteLine("Press any key to continue and open XML in default XML viewer..."); Console.ReadKey(); Process.Start("output.XML"); } } }
Imports System Imports System.Collections.Generic Imports System.Text Imports Bytescout.PDFExtractor Imports System.Diagnostics Namespace ConsoleApplication1 Class Program Shared Sub Main(ByVal args As String()) ' Create Bytescout.PDFExtractor.XMLExtractor instance Dim extractor As New XMLExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample3.pdf") extractor.SaveXMLToFile("output.XML") Console.WriteLine() Console.WriteLine("Data has been extracted to 'output.XML' file.") Console.WriteLine() Console.WriteLine("Press any key to continue and open XML in default XML viewer...") Console.ReadKey() Process.Start("output.XML") End Sub End Class End Namespace
' Create Bytescout.PDFExtractor.XMLExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.XMLExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile "../../sample3.pdf" extractor.SaveXMLToFile "output.XML" MsgBox "Data has been extracted to 'output.xml' file."