ByteScout PDF Extractor SDK – C# – Find US Address in PDF with Regex

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Find US Address in PDF with Regex

ByteScout PDF Extractor SDK – C# – Find US Address in PDF with Regex

Program.cs

using Bytescout.PDFExtractor;
using System;

namespace FindUsAddressRegex
{
class Program
{
static void Main(string[] args)
{
try
{
// Create Bytescout.PDFExtractor.TextExtractor instance
using (TextExtractor extractor = new TextExtractor())
{
extractor.RegistrationName = “demo”;
extractor.RegistrationKey = “demo”;

// Load sample PDF document
extractor.LoadDocumentFromFile(“samplePDF_Address.pdf”);

extractor.RegexSearch = true; // Enable the regular expressions

int pageCount = extractor.GetPageCount();

// Search through pages
for (int i = 0; i < pageCount; i++) { // Search Address string regexPattern = @"((\w+[ ,])+ ){2}([a-zA-Z]){2}[ , ] (\d+)"; // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx // Search each page for the pattern if (extractor.Find(i, regexPattern, false)) { do { // Iterate through each element in the found text foreach (ISearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Found Address: " + element.Text); } } while (extractor.FindNext()); } } } } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); } Console.WriteLine(); Console.WriteLine("Press enter key to continue..."); Console.ReadLine(); } } } [/csharp]


  Click here to get your Free Trial version of the SDK

Tutorials:

prev
next