ByteScout PDF Extractor SDK - C# - Find Email Addresses (with Regex) - ByteScout

ByteScout PDF Extractor SDK – C# – Find Email Addresses (with Regex)

  • Home
  • /
  • Articles
  • /
  • ByteScout PDF Extractor SDK – C# – Find Email Addresses (with Regex)

ByteScout PDF Extractor SDK – C# – Find Email Addresses (with Regex)

Program.cs

using Bytescout.PDFExtractor;
using System;

namespace FindEmailAddressesRegexp
{
class Program
{
static void Main(string[] args)
{
try
{
// Create Bytescout.PDFExtractor.TextExtractor instance
using (TextExtractor extractor = new TextExtractor())
{
extractor.RegistrationName = “demo”;
extractor.RegistrationKey = “demo”;

// Load sample PDF document
extractor.LoadDocumentFromFile(“samplePDF_EmailAddress.pdf”);

extractor.RegexSearch = true; // Enable the regular expressions

int pageCount = extractor.GetPageCount();

// Search through pages
for (int i = 0; i < pageCount; i++) { // Search email Addresses string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b"; // See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx // Search each page for the pattern if (extractor.Find(i, regexPattern, false)) { do { // Iterate through each element in the found text foreach (ISearchResultElement element in extractor.FoundText.Elements) { Console.WriteLine("Found Email Addresses: " + element.Text); } } while (extractor.FindNext()); } } } } catch (Exception ex) { Console.WriteLine("Error: " + ex.Message); } Console.WriteLine(); Console.WriteLine("Press enter key to continue..."); Console.ReadLine(); } } } [/csharp]


  Click here to get your Free Trial version of the SDK

Tutorials:

prev
next