These samples show how to extract filled form data from a PDF file in C# and VB.NET using Bytescout PDF Extractor SDK.
Also, check this article to find out how to extract audio from PDF.
using System; using System.Diagnostics; using System.Xml; using Bytescout.PDFExtractor; namespace ExtractFilledFormData { static class Program { static void Main() { // Create XMLExtractor instance XMLExtractor extractor = new XMLExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("interactiveform.pdf"); // Get PDF document text as XML string xmlText = extractor.GetXML(); // Load XML XmlDocument xmlDocument = new XmlDocument(); xmlDocument.LoadXml(xmlText); // Select all "control" nodes XmlNodeList formControls = xmlDocument.SelectNodes("//control"); if (formControls != null) { foreach (XmlNode formControl in formControls) { XmlAttribute typeAttribute = formControl.Attributes["type"]; // Trace filled textboxes if (typeAttribute.Value == "editbox") { if (!String.IsNullOrEmpty(formControl.InnerText)) Trace.WriteLine("EDITBOX " + formControl.Attributes["id"].Value + ": " + formControl.InnerText); } // Trace checked checkboxes else if (typeAttribute.Value == "checkbox") { if (formControl.Attributes["state"].Value == "1") Trace.WriteLine("CHECKBOX " + formControl.Attributes["id"].Value + ": " + formControl.Attributes["state"].Value); } } } } } }
Imports Bytescout.PDFExtractor Imports System.Xml Module Module1 Sub Main() ' Create XMLExtractor instance Dim extractor As XMLExtractor = New XMLExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("interactiveform.pdf") ' Get PDF document text as XML Dim xmlText As String = extractor.GetXML() ' Load XML Dim xmlDocument As XmlDocument = New XmlDocument() XmlDocument.LoadXml(XmlText) ' Select all "control" nodes Dim formControls As XmlNodeList = xmlDocument.SelectNodes("//control") If (formControls IsNot Nothing) Then For Each formControl As XmlNode In formControls Dim typeAttribute = formControl.Attributes("type") If (typeAttribute.Value = "editbox") Then ' Trace filled textboxes If (Not String.IsNullOrEmpty(formControl.InnerText)) Then Trace.WriteLine("EDITBOX " + formControl.Attributes("id").Value + ": " + formControl.InnerText) End If ElseIf (typeAttribute.Value = "checkbox") Then ' Trace checked checkboxes If (formControl.Attributes("state").Value = "1") Then Trace.WriteLine("CHECKBOX " + formControl.Attributes("id").Value + ": " + formControl.Attributes("state").Value) End If End If Next formControl End If End Sub End Module