How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to extract filled form data from PDF files in C# and VB.NET using PDF Extractor SDK

These samples show how to extract filled form data from a PDF file in C# and VB.NET using Bytescout PDF Extractor SDK.

Also, check this article to find out how to extract audio from PDF.

How to extract filled form data from a PDF file in C#

using System;
using System.Diagnostics;
using System.Xml;
using Bytescout.PDFExtractor;

namespace ExtractFilledFormData
{
	static class Program
	{
		static void Main()
		{
			// Create XMLExtractor instance
			XMLExtractor extractor = new XMLExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("interactiveform.pdf");

			// Get PDF document text as XML
			string xmlText = extractor.GetXML();

			// Load XML
			XmlDocument xmlDocument = new XmlDocument();
			xmlDocument.LoadXml(xmlText);

			// Select all "control" nodes
			XmlNodeList formControls = xmlDocument.SelectNodes("//control");
			if (formControls != null)
			{
				foreach (XmlNode formControl in formControls)
				{
					XmlAttribute typeAttribute = formControl.Attributes["type"];

					// Trace filled textboxes
					if (typeAttribute.Value == "editbox")
					{
						if (!String.IsNullOrEmpty(formControl.InnerText))
							Trace.WriteLine("EDITBOX " + formControl.Attributes["id"].Value + ": " + formControl.InnerText);
					}
					// Trace checked checkboxes
					else if (typeAttribute.Value == "checkbox")
					{
						if (formControl.Attributes["state"].Value == "1")
							Trace.WriteLine("CHECKBOX " + formControl.Attributes["id"].Value + ": " + formControl.Attributes["state"].Value);

					}
				}
			}
		}
	}
}

How to extract filled form data from a PDF file in Visual Basic .NET

Imports Bytescout.PDFExtractor
Imports System.Xml

Module Module1

    Sub Main()

        ' Create XMLExtractor instance
        Dim extractor As XMLExtractor = New XMLExtractor()
        extractor.RegistrationName = "demo"
        extractor.RegistrationKey = "demo"

        ' Load sample PDF document
        extractor.LoadDocumentFromFile("interactiveform.pdf")

        ' Get PDF document text as XML
        Dim xmlText As String = extractor.GetXML()

        ' Load XML
        Dim xmlDocument As XmlDocument = New XmlDocument()
        XmlDocument.LoadXml(XmlText)

        ' Select all "control" nodes
        Dim formControls As XmlNodeList = xmlDocument.SelectNodes("//control")
        If (formControls IsNot Nothing) Then

            For Each formControl As XmlNode In formControls

                Dim typeAttribute = formControl.Attributes("type")

                If (typeAttribute.Value = "editbox") Then
                    ' Trace filled textboxes
                    If (Not String.IsNullOrEmpty(formControl.InnerText)) Then
                        Trace.WriteLine("EDITBOX " + formControl.Attributes("id").Value + ": " + formControl.InnerText)
                    End If
                ElseIf (typeAttribute.Value = "checkbox") Then
                    ' Trace checked checkboxes
                    If (formControl.Attributes("state").Value = "1") Then
                        Trace.WriteLine("CHECKBOX " + formControl.Attributes("id").Value + ": " + formControl.Attributes("state").Value)
                    End If
                End If

            Next formControl

        End If

    End Sub

End Module

Tutorials:

prev
next