How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK - ByteScout

How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK

These samples show how to extract filled form data from a PDF file in C# and VB.NET using Bytescout PDF Extractor SDK.

Also, check this article to find out how to extract audio from PDF.

How to extract filled form data from a PDF file in C#

using System;
using System.Diagnostics;
using System.Xml;
using Bytescout.PDFExtractor;

namespace ExtractFilledFormData
{
	static class Program
	{
		static void Main()
		{
			// Create XMLExtractor instance
			XMLExtractor extractor = new XMLExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("interactiveform.pdf");

			// Get PDF document text as XML
			string xmlText = extractor.GetXML();

			// Load XML
			XmlDocument xmlDocument = new XmlDocument();
			xmlDocument.LoadXml(xmlText);

			// Select all "control" nodes
			XmlNodeList formControls = xmlDocument.SelectNodes("//control");
			if (formControls != null)
			{
				foreach (XmlNode formControl in formControls)
				{
					XmlAttribute typeAttribute = formControl.Attributes["type"];

					// Trace filled textboxes
					if (typeAttribute.Value == "editbox")
					{
						if (!String.IsNullOrEmpty(formControl.InnerText))
							Trace.WriteLine("EDITBOX " + formControl.Attributes["id"].Value + ": " + formControl.InnerText);
					}
					// Trace checked checkboxes
					else if (typeAttribute.Value == "checkbox")
					{
						if (formControl.Attributes["state"].Value == "1")
							Trace.WriteLine("CHECKBOX " + formControl.Attributes["id"].Value + ": " + formControl.Attributes["state"].Value);

					}
				}
			}
		}
	}
}

How to extract filled form data from a PDF file in Visual Basic .NET

Imports Bytescout.PDFExtractor
Imports System.Xml

Module Module1

    Sub Main()

        ' Create XMLExtractor instance
        Dim extractor As XMLExtractor = New XMLExtractor()
        extractor.RegistrationName = "demo"
        extractor.RegistrationKey = "demo"

        ' Load sample PDF document
        extractor.LoadDocumentFromFile("interactiveform.pdf")

        ' Get PDF document text as XML
        Dim xmlText As String = extractor.GetXML()

        ' Load XML
        Dim xmlDocument As XmlDocument = New XmlDocument()
        XmlDocument.LoadXml(XmlText)

        ' Select all "control" nodes
        Dim formControls As XmlNodeList = xmlDocument.SelectNodes("//control")
        If (formControls IsNot Nothing) Then

            For Each formControl As XmlNode In formControls

                Dim typeAttribute = formControl.Attributes("type")

                If (typeAttribute.Value = "editbox") Then
                    ' Trace filled textboxes
                    If (Not String.IsNullOrEmpty(formControl.InnerText)) Then
                        Trace.WriteLine("EDITBOX " + formControl.Attributes("id").Value + ": " + formControl.InnerText)
                    End If
                ElseIf (typeAttribute.Value = "checkbox") Then
                    ' Trace checked checkboxes
                    If (formControl.Attributes("state").Value = "1") Then
                        Trace.WriteLine("CHECKBOX " + formControl.Attributes("id").Value + ": " + formControl.Attributes("state").Value)
                    End If
                End If

            Next formControl

        End If

    End Sub

End Module

Tutorials:

prev
next