Program.cs
using Bytescout.PDFExtractor;
using System;
namespace FindEmailAddressesRegexp
{
class Program
{
static void Main(string[] args)
{
try
{
// Create Bytescout.PDFExtractor.TextExtractor instance
using (TextExtractor extractor = new TextExtractor())
{
extractor.RegistrationName = “demo”;
extractor.RegistrationKey = “demo”;
// Load sample PDF document
extractor.LoadDocumentFromFile(“samplePDF_EmailAddress.pdf”);
extractor.RegexSearch = true; // Enable the regular expressions
int pageCount = extractor.GetPageCount();
// Search through pages
for (int i = 0; i < pageCount; i++)
{
// Search email Addresses
string regexPattern = @"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b";
// See the complete regular expressions reference at https://msdn.microsoft.com/en-us/library/az24scfc(v=vs.110).aspx
// Search each page for the pattern
if (extractor.Find(i, regexPattern, false))
{
do
{
// Iterate through each element in the found text
foreach (ISearchResultElement element in extractor.FoundText.Elements)
{
Console.WriteLine("Found Email Addresses: " + element.Text);
}
}
while (extractor.FindNext());
}
}
}
}
catch (Exception ex)
{
Console.WriteLine("Error: " + ex.Message);
}
Console.WriteLine();
Console.WriteLine("Press enter key to continue...");
Console.ReadLine();
}
}
}
[/csharp]
Click here to get your Free Trial version of the SDK
also available as: