Check the samples below to learn how to extract a page from PDF by found keyword in C# and VB.NET using ByteScout PDF Extractor SDK.
// This example page extraction by found keyword.
using System;
using Bytescout.PDFExtractor;
namespace SplittingExample
{
class Program
{
static void Main(string[] args)
{
string inputFile = "sample.pdf";
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample.pdf");
int pageCount = extractor.GetPageCount();
// Search each page for a keyword
for (int i = 0; i < pageCount; i++)
{
if (extractor.Find(i, "history", false))
{
// extract page
using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
{
splitter.OptimizeSplittedDocuments = true;
int pageNumber = i + 1; // (!) page number in ExtractPage() is 1-based
string outputFile = "page" + pageNumber.ToString() + ".pdf";
splitter.ExtractPage(inputFile, outputFile, pageNumber);
Console.WriteLine("Extracted page " + pageNumber.ToString() + " to file \"" + outputFile +"\"");
}
}
}
Console.WriteLine();
Console.WriteLine("Press any key...");
Console.ReadKey();
}
}
}
' This example page extraction by found keyword.
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
Dim inputFile As String = "sample.pdf"
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample.pdf")
Dim pageCount As Integer = extractor.GetPageCount()
' Search each page for a keyword
For i As Integer = 0 To pageCount - 1
If extractor.Find(i, "history", False) Then
' extract page
Using splitter As New DocumentSplitter("demo", "demo")
splitter.OptimizeSplittedDocuments = True
Dim pageNumber As Integer = i + 1
' (!) page number in ExtractPage() is 1-based
Dim outputFile As String = "page" & pageNumber.ToString() & ".pdf"
splitter.ExtractPage(inputFile, outputFile, pageNumber)
Console.WriteLine("Extracted page " & pageNumber.ToString() & " to file """ & outputFile & """")
End Using
End If
Next
Console.WriteLine()
Console.WriteLine("Press any key...")
Console.ReadKey()
End Sub
End Class