Split PDF Based on Keyword - Tutorial with C# and VB.NET Samples - ByteScout
Announcement
Our ByteScout SDK products are sunsetting as we focus on expanding new solutions.
Learn More Open modal
Close modal
Announcement Important Update
ByteScout SDK Sunsetting Notice
Our ByteScout SDK products are sunsetting as we focus on our new & improved solutions. Thank you for being part of our journey, and we look forward to supporting you in this next chapter!

Split PDF Based on Keyword – Tutorial with C# and VB.NET Samples

  • Home
  • /
  • Articles
  • /
  • Split PDF Based on Keyword – Tutorial with C# and VB.NET Samples

Split PDF based on keyword tutorial shows how to split PDF file by keywords in C# or Visual Basic .NET using PDF Extractor SDK. Use sample source codes below for splitting PDF documents into pages by keywords.

C# Code Snippet

using Bytescout.PDFExtractor;
using System.IO;

namespace FindAndExtractPageExample
{
class Program
{
 static void Main(string[] args)
 {
 	string inputFile = "sample.pdf";
 	string keyword = "demographic";

 	TextExtractor extractor = new TextExtractor("demo", "demo");
 	extractor.LoadDocumentFromFile(inputFile);

 	// Search each page for keyword
 	for (int i = 0; i < extractor.GetPageCount(); i++)
 	{
   if (extractor.Find(i, keyword, false))
   {
   	// extract the page containing the keyword
   	ExtractPage(inputFile, i, "page" + i + ".pdf");
   }
 	}
 }

 private static void ExtractPage(string inputFile, int pageIndex, string outputFile)
 {
 	DocumentSplitter splitter = new DocumentSplitter("demo", "demo");

 	if (pageIndex == 0)
 	{
   if (splitter.GetPageCount(inputFile) == 1)
   {
   	// no splitting required if there is the only page
   	File.Copy(inputFile, outputFile);
   }
   else
   {
   	// split at the second page (page numeration starts from 1 in this function).
   	// the first part will be our sought-for 1-page document.
   	splitter.Split(inputFile, outputFile, "waste", 2);
   	File.Delete("waste"); // delete the waste part
   }
 	}
 	else
 	{
   if (pageIndex == splitter.GetPageCount(inputFile) - 1)
   {
   	// if this is the last page, just split on it.
   	// the second part will be our sought-for 1-page document.
   	splitter.Split(inputFile, "waste", outputFile, pageIndex + 1);
   	File.Delete("waste"); // delete the waste part
   }
   else
   {
   	// if the required page is in the middle of the document, we need two split operations:
   	splitter.Split(inputFile, "waste", "part", pageIndex + 1);
   	File.Delete("waste");
   	splitter.Split("part", outputFile, "waste", 2);
   	File.Delete("part");
   	File.Delete("waste");
   }
 	}  	
 }
}
}

VB.NET Code Snippet

Imports Bytescout.PDFExtractor
Imports System.IO

Class Program
Friend Shared Sub Main(args As String())
 Dim inputFile As String = "sample.pdf"
 Dim keyword As String = "demographic"

 Dim extractor As New TextExtractor("demo", "demo")
 extractor.LoadDocumentFromFile(inputFile)

 ' Search each page for keyword
 For i As Integer = 0 To extractor.GetPageCount() - 1
 	If extractor.Find(i, keyword, False) Then
   ' extract the page containing the keyword
   ExtractPage(inputFile, i, "page" & i & ".pdf")
 	End If
 Next
End Sub

Private Shared Sub ExtractPage(inputFile As String, pageIndex As Integer, outputFile As String)
 Dim splitter As New DocumentSplitter("demo", "demo")

 If pageIndex = 0 Then
 	If splitter.GetPageCount(inputFile) = 1 Then
   ' no splitting required if there is the only page
   File.Copy(inputFile, outputFile)
 	Else
   ' split at the second page (page numeration starts from 1 in this function).
   ' the first part will be our sought-for 1-page document.
   splitter.Split(inputFile, outputFile, "waste", 2)
               ' delete the waste part
   File.Delete("waste")
 	End If
 Else
 	If pageIndex = splitter.GetPageCount(inputFile) - 1 Then
   ' if this is the last page, just split on it.
   ' the second part will be our sought-for 1-page document.
   splitter.Split(inputFile, "waste", outputFile, pageIndex + 1)
               ' delete the waste part
   File.Delete("waste")
 	Else
   ' if the required page is in the middle of the document, we need two split operations:
   splitter.Split(inputFile, "waste", "part", pageIndex + 1)
   File.Delete("waste")
   splitter.Split("part", outputFile, "waste", 2)
   File.Delete("part")
   File.Delete("waste")
 	End If
 End If
End Sub
End Class

Tutorials:

prev
next