How to extract a page from PDF by found keyword in C# and VB.NET using ByteScout PDF Extractor SDK

Home
/
Articles
/
How to extract a page from PDF by found keyword in C# and VB.NET using ByteScout PDF Extractor SDK

Check the samples below to learn how to extract a page from PDF by found keyword in C# and VB.NET using ByteScout PDF Extractor SDK.

How to extract a page from PDF by found keyword in C#

// This example page extraction by found keyword.

using System;
using Bytescout.PDFExtractor;

namespace SplittingExample
{
	class Program
	{
		static void Main(string[] args)
		{
			string inputFile = "sample.pdf";

         	// Create Bytescout.PDFExtractor.TextExtractor instance
			TextExtractor extractor = new TextExtractor();
			extractor.RegistrationName = "demo";
			extractor.RegistrationKey = "demo";

			// Load sample PDF document
			extractor.LoadDocumentFromFile("sample.pdf");
			
			int pageCount = extractor.GetPageCount();
			
			// Search each page for a keyword 
			for (int i = 0; i < pageCount; i++)
			{
                if (extractor.Find(i, "history", false))
				{
                    // extract page
                    using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
                    {
                        splitter.OptimizeSplittedDocuments = true;

                        int pageNumber = i + 1;  // (!) page number in ExtractPage() is 1-based
                        string outputFile = "page" + pageNumber.ToString() + ".pdf";
                        splitter.ExtractPage(inputFile, outputFile, pageNumber);
                        Console.WriteLine("Extracted page " + pageNumber.ToString() + " to file \"" + outputFile +"\"");
                    }
				}
			}
			
			Console.WriteLine();
			Console.WriteLine("Press any key...");
			Console.ReadKey();			
		}
	}
}

How to extract a page from PDF by found keyword in Visual Basic .NET

' This example page extraction by found keyword.

Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())
		Dim inputFile As String = "sample.pdf"

		' Create Bytescout.PDFExtractor.TextExtractor instance
		Dim extractor As New TextExtractor()
		extractor.RegistrationName = "demo"
		extractor.RegistrationKey = "demo"

		' Load sample PDF document
		extractor.LoadDocumentFromFile("sample.pdf")

		Dim pageCount As Integer = extractor.GetPageCount()

		' Search each page for a keyword 
		For i As Integer = 0 To pageCount - 1
			If extractor.Find(i, "history", False) Then
				' extract page
				Using splitter As New DocumentSplitter("demo", "demo")
					splitter.OptimizeSplittedDocuments = True

					Dim pageNumber As Integer = i + 1
					' (!) page number in ExtractPage() is 1-based
					Dim outputFile As String = "page" & pageNumber.ToString() & ".pdf"
					splitter.ExtractPage(inputFile, outputFile, pageNumber)
					Console.WriteLine("Extracted page " & pageNumber.ToString() & " to file """ & outputFile & """")
				End Using
			End If
		Next

		Console.WriteLine()
		Console.WriteLine("Press any key...")
		Console.ReadKey()
	End Sub
End Class

How to extract a page from PDF by found keyword in C#

How to extract a page from PDF by found keyword in Visual Basic .NET

Tutorials:

Web API