How to extract pages from PDF in C#, C++, VB.NET and VBScript using ByteScout PDF Extractor SDK - ByteScout

How to extract pages from PDF in C#, C++, VB.NET and VBScript using ByteScout PDF Extractor SDK

  • Home
  • /
  • Articles
  • /
  • How to extract pages from PDF in C#, C++, VB.NET and VBScript using ByteScout PDF Extractor SDK

This tutorial will help you to extract pages from PDF file and split files by ranges in C#, C++, VB.NET and VBScript using ByteScout PDF Extractor SDK.

Select your programming language:

How to extract pages from PDF in C#

// This example demonstrates various PDF document splitting scenarios:
// - extract a single page;
// - split in two parts;
// - split by ranges specified in text form: "1-5,6,7-10,11-".

using System;
using System.IO;
using Bytescout.PDFExtractor;

namespace SplittingExample
{
	class Program
	{
		static void Main(string[] args)
		{
			string inputFile = "sample.pdf";

            using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo"))
            {
                splitter.OptimizeSplittedDocuments = true;

                // Extracting specific page:
                // =========================

                splitter.ExtractPage(inputFile, "page3.pdf", 3); // (!) Note: page number is 1-based.
                
                Console.WriteLine(@"Extracted page 3 to file ""page3.pdf""");
                Console.WriteLine();

                // Split in two parts:
                // ===================

                splitter.Split(inputFile, "part1.pdf", "part2.pdf", 3); // (!) Note: page number is 1-based.

                Console.WriteLine(@"Splitted at page 3 to files ""part1.pdf"" and ""part2.pdf""");
                Console.WriteLine();

                // Split by ranges:
                // ================

                string[] files = splitter.Split(inputFile, "1-3,4-6,7,8-"); // (!) Note: page numbers are 1-based; ending "-" means "to the end".
                
                Console.WriteLine(@"Splitted by ranges: ");
                foreach (string file in files)
                    Console.WriteLine("    " + Path.GetFileName(file));
            }

			Console.WriteLine();
			Console.WriteLine("Press any key...");
			Console.ReadKey();			
		}
	}
}

How to extract pages from PDF in C++


	#include "stdafx.h"
	#include "comip.h"

	// you may also refer to the tlb from \net4.00\ folder
	// you may also want to include the tlb file into the project so you could compile it and use intellisense for it
	#import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net2.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only

	using namespace Bytescout_PDFExtractor;

	int _tmain(int argc, _TCHAR* argv[])
	{
		// Initialize COM.
		HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);

		// Create the interface pointer.
		_DocumentSplitterPtr pIDocumentSplitter(__uuidof(DocumentSplitter));

		// Set the registration name and key
		// Note: You should use _bstr_t or BSTR to pass string to the library because of COM requirements
		_bstr_t bstrRegName(L"DEMO"); 
		pIDocumentSplitter->put_RegistrationName(bstrRegName);
		
		_bstr_t bstrRegKey(L"DEMO");
		pIDocumentSplitter->put_RegistrationKey(bstrRegKey);

		// you may enable optimization for extracted pages from documents
		// pIDocumentSplitter->put_OptimizeSplittedDocuments = true;

		// Load sample PDF document
		HRESULT sRes = S_OK;
		//1. extract selected pages (!note: page numbers are 1-based)
		_bstr_t bstrPath(L"..\\..\\sample2.pdf");
		_bstr_t bstrParam(L"page2.pdf");
		sRes = pIDocumentSplitter->ExtractPage(bstrPath, bstrParam, 2);

		// 2. split the doc into 2 parts at page #2
		// (!) Note: page numbers are 1-based
		_bstr_t bstrPathInput(L"..\\..\\sample2.pdf");
		_bstr_t bstrParam1(L"part1.pdf");
		_bstr_t bstrParam2(L"part2.pdf");
		sRes = pIDocumentSplitter->Split(bstrPathInput, bstrParam1, bstrParam2, 2);

		// 3. merge page 2 extracted on step 1 along with base pdf
		// Create the interface pointer.
		_DocumentMergerPtr pIDocumentMerger(__uuidof(DocumentMerger));
		//_bstr_t bstrRegName(L"DEMO"); 
		pIDocumentMerger->put_RegistrationName(bstrRegName);		
		//_bstr_t bstrRegKey(L"DEMO");
		pIDocumentMerger->put_RegistrationKey(bstrRegKey);

		// merge 2 files into the 3rd one
		_bstr_t bstrParamMerge1(L"page2.pdf");
		_bstr_t bstrParamMerge2(L"..\\..\\sample2.pdf");
		_bstr_t bstrParamMergeOutput(L"merged.pdf");

		sRes = pIDocumentMerger->Merge2(bstrParamMerge1, bstrParamMerge2,bstrParamMergeOutput);

		// finally release both instances
		pIDocumentSplitter->Release();
		pIDocumentMerger->Release();

		// uninitialize ActiveX COM support
		CoUninitialize();

		return 0;
	}

How to extract pages from PDF in Visual Basic .NET

' This example demonstrates various PDF document splitting scenarios:
' - extract a single page;
' - split in two parts;
' - split by ranges specified in text form: "1-5,6,7-10,11-".

Imports System.IO
Imports Bytescout.PDFExtractor

Class Program
	Friend Shared Sub Main(args As String())
		Dim inputFile As String = "sample.pdf"

		Using splitter As New DocumentSplitter("demo", "demo")
			splitter.OptimizeSplittedDocuments = True

			' Extracting specific page:
			' =========================

			splitter.ExtractPage(inputFile, "page3.pdf", 3)
			' (!) Note: page number is 1-based.
			Console.WriteLine("Extracted page 3 to file ""page3.pdf""")
			Console.WriteLine()

			' Split in two parts:
			' ===================

			splitter.Split(inputFile, "part1.pdf", "part2.pdf", 3)
			' (!) Note: page number is 1-based.
			Console.WriteLine("Splitted at page 3 to files ""part1.pdf"" and ""part2.pdf""")
			Console.WriteLine()

			' Split by ranges:
			' ================

			Dim files As String() = splitter.Split(inputFile, "1-3,4-6,7,8-")
			' (!) Note: page numbers are 1-based; ending "-" means "to the end".
			Console.WriteLine("Splitted by ranges: ")
			For Each file As String In files
				Console.WriteLine("    " & Path.GetFileName(file))
			Next
		End Using

		Console.WriteLine()
		Console.WriteLine("Press any key...")
		Console.ReadKey()
	End Sub
End Class

How to extract pages from PDF in VBScript (Visual Basic 6)

' Create Bytescout.PDFExtractor.DocumentSplitter object
Set splitter = CreateObject("Bytescout.PDFExtractor.DocumentSplitter")
splitter.RegistrationName = "demo"
splitter.RegistrationKey = "demo"

inputFile = "sample.pdf"

' enable optimization for documents
' splitter.OptimizeSplittedDocuments = true

splitter.ExtractPage inputFile, "page3.pdf", 3 ' (!) Note: page number is 1-based.
                
MsgBox "Extracted page 3 to file page3.pdf"

' Split in two parts:
' ===================

splitter.Split inputFile, "part1.pdf", "part2.pdf", 3 ' (!) Note: page number is 1-based.

MsgBox "Splitted at page 3 to files part1.pdf and part2.pdf"

' Split by ranges:
' ================

' array to store output file names
Dim OutFiles

' SplitCOM() returns array with a list of filenames
OutFiles = splitter.SplitCOM(inputFile, "1-3,4-6,7,8-") ' (!) Note: page numbers are 1-based; ending "-" means "to the end".
 
 MsgBox "Splitted by ranges sucessfully! Click OK to see filenames generated"

  ' display list of generated outputfiles              
  For each of in outFiles
	MsgBox of
  Next

Tutorials:

prev
next