These samples show how to extract filled form data from a PDF file in C# and VB.NET using Bytescout PDF Extractor SDK.
Also, check this article to find out how to extract audio from PDF.
using System;
using System.Diagnostics;
using System.Xml;
using Bytescout.PDFExtractor;
namespace ExtractFilledFormData
{
static class Program
{
static void Main()
{
// Create XMLExtractor instance
XMLExtractor extractor = new XMLExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("interactiveform.pdf");
// Get PDF document text as XML
string xmlText = extractor.GetXML();
// Load XML
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.LoadXml(xmlText);
// Select all "control" nodes
XmlNodeList formControls = xmlDocument.SelectNodes("//control");
if (formControls != null)
{
foreach (XmlNode formControl in formControls)
{
XmlAttribute typeAttribute = formControl.Attributes["type"];
// Trace filled textboxes
if (typeAttribute.Value == "editbox")
{
if (!String.IsNullOrEmpty(formControl.InnerText))
Trace.WriteLine("EDITBOX " + formControl.Attributes["id"].Value + ": " + formControl.InnerText);
}
// Trace checked checkboxes
else if (typeAttribute.Value == "checkbox")
{
if (formControl.Attributes["state"].Value == "1")
Trace.WriteLine("CHECKBOX " + formControl.Attributes["id"].Value + ": " + formControl.Attributes["state"].Value);
}
}
}
}
}
}
Imports Bytescout.PDFExtractor
Imports System.Xml
Module Module1
Sub Main()
' Create XMLExtractor instance
Dim extractor As XMLExtractor = New XMLExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("interactiveform.pdf")
' Get PDF document text as XML
Dim xmlText As String = extractor.GetXML()
' Load XML
Dim xmlDocument As XmlDocument = New XmlDocument()
XmlDocument.LoadXml(XmlText)
' Select all "control" nodes
Dim formControls As XmlNodeList = xmlDocument.SelectNodes("//control")
If (formControls IsNot Nothing) Then
For Each formControl As XmlNode In formControls
Dim typeAttribute = formControl.Attributes("type")
If (typeAttribute.Value = "editbox") Then
' Trace filled textboxes
If (Not String.IsNullOrEmpty(formControl.InnerText)) Then
Trace.WriteLine("EDITBOX " + formControl.Attributes("id").Value + ": " + formControl.InnerText)
End If
ElseIf (typeAttribute.Value = "checkbox") Then
' Trace checked checkboxes
If (formControl.Attributes("state").Value = "1") Then
Trace.WriteLine("CHECKBOX " + formControl.Attributes("id").Value + ": " + formControl.Attributes("state").Value)
End If
End If
Next formControl
End If
End Sub
End Module