This tutorial will help you to extract pages from PDF file and split files by ranges in C#, C++, VB.NET and VBScript using ByteScout PDF Extractor SDK.
Select your programming language:
// This example demonstrates various PDF document splitting scenarios: // - extract a single page; // - split in two parts; // - split by ranges specified in text form: "1-5,6,7-10,11-". using System; using System.IO; using Bytescout.PDFExtractor; namespace SplittingExample { class Program { static void Main(string[] args) { string inputFile = "sample.pdf"; using (DocumentSplitter splitter = new DocumentSplitter("demo", "demo")) { splitter.OptimizeSplittedDocuments = true; // Extracting specific page: // ========================= splitter.ExtractPage(inputFile, "page3.pdf", 3); // (!) Note: page number is 1-based. Console.WriteLine(@"Extracted page 3 to file ""page3.pdf"""); Console.WriteLine(); // Split in two parts: // =================== splitter.Split(inputFile, "part1.pdf", "part2.pdf", 3); // (!) Note: page number is 1-based. Console.WriteLine(@"Splitted at page 3 to files ""part1.pdf"" and ""part2.pdf"""); Console.WriteLine(); // Split by ranges: // ================ string[] files = splitter.Split(inputFile, "1-3,4-6,7,8-"); // (!) Note: page numbers are 1-based; ending "-" means "to the end". Console.WriteLine(@"Splitted by ranges: "); foreach (string file in files) Console.WriteLine(" " + Path.GetFileName(file)); } Console.WriteLine(); Console.WriteLine("Press any key..."); Console.ReadKey(); } } }
#include "stdafx.h" #include "comip.h" // you may also refer to the tlb from \net4.00\ folder // you may also want to include the tlb file into the project so you could compile it and use intellisense for it #import "c:\\Program Files\\Bytescout PDF Extractor SDK\\net2.00\\Bytescout.PDFExtractor.tlb" raw_interfaces_only using namespace Bytescout_PDFExtractor; int _tmain(int argc, _TCHAR* argv[]) { // Initialize COM. HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); // Create the interface pointer. _DocumentSplitterPtr pIDocumentSplitter(__uuidof(DocumentSplitter)); // Set the registration name and key // Note: You should use _bstr_t or BSTR to pass string to the library because of COM requirements _bstr_t bstrRegName(L"DEMO"); pIDocumentSplitter->put_RegistrationName(bstrRegName); _bstr_t bstrRegKey(L"DEMO"); pIDocumentSplitter->put_RegistrationKey(bstrRegKey); // you may enable optimization for extracted pages from documents // pIDocumentSplitter->put_OptimizeSplittedDocuments = true; // Load sample PDF document HRESULT sRes = S_OK; //1. extract selected pages (!note: page numbers are 1-based) _bstr_t bstrPath(L"..\\..\\sample2.pdf"); _bstr_t bstrParam(L"page2.pdf"); sRes = pIDocumentSplitter->ExtractPage(bstrPath, bstrParam, 2); // 2. split the doc into 2 parts at page #2 // (!) Note: page numbers are 1-based _bstr_t bstrPathInput(L"..\\..\\sample2.pdf"); _bstr_t bstrParam1(L"part1.pdf"); _bstr_t bstrParam2(L"part2.pdf"); sRes = pIDocumentSplitter->Split(bstrPathInput, bstrParam1, bstrParam2, 2); // 3. merge page 2 extracted on step 1 along with base pdf // Create the interface pointer. _DocumentMergerPtr pIDocumentMerger(__uuidof(DocumentMerger)); //_bstr_t bstrRegName(L"DEMO"); pIDocumentMerger->put_RegistrationName(bstrRegName); //_bstr_t bstrRegKey(L"DEMO"); pIDocumentMerger->put_RegistrationKey(bstrRegKey); // merge 2 files into the 3rd one _bstr_t bstrParamMerge1(L"page2.pdf"); _bstr_t bstrParamMerge2(L"..\\..\\sample2.pdf"); _bstr_t bstrParamMergeOutput(L"merged.pdf"); sRes = pIDocumentMerger->Merge2(bstrParamMerge1, bstrParamMerge2,bstrParamMergeOutput); // finally release both instances pIDocumentSplitter->Release(); pIDocumentMerger->Release(); // uninitialize ActiveX COM support CoUninitialize(); return 0; }
' This example demonstrates various PDF document splitting scenarios: ' - extract a single page; ' - split in two parts; ' - split by ranges specified in text form: "1-5,6,7-10,11-". Imports System.IO Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(args As String()) Dim inputFile As String = "sample.pdf" Using splitter As New DocumentSplitter("demo", "demo") splitter.OptimizeSplittedDocuments = True ' Extracting specific page: ' ========================= splitter.ExtractPage(inputFile, "page3.pdf", 3) ' (!) Note: page number is 1-based. Console.WriteLine("Extracted page 3 to file ""page3.pdf""") Console.WriteLine() ' Split in two parts: ' =================== splitter.Split(inputFile, "part1.pdf", "part2.pdf", 3) ' (!) Note: page number is 1-based. Console.WriteLine("Splitted at page 3 to files ""part1.pdf"" and ""part2.pdf""") Console.WriteLine() ' Split by ranges: ' ================ Dim files As String() = splitter.Split(inputFile, "1-3,4-6,7,8-") ' (!) Note: page numbers are 1-based; ending "-" means "to the end". Console.WriteLine("Splitted by ranges: ") For Each file As String In files Console.WriteLine(" " & Path.GetFileName(file)) Next End Using Console.WriteLine() Console.WriteLine("Press any key...") Console.ReadKey() End Sub End Class
' Create Bytescout.PDFExtractor.DocumentSplitter object Set splitter = CreateObject("Bytescout.PDFExtractor.DocumentSplitter") splitter.RegistrationName = "demo" splitter.RegistrationKey = "demo" inputFile = "sample.pdf" ' enable optimization for documents ' splitter.OptimizeSplittedDocuments = true splitter.ExtractPage inputFile, "page3.pdf", 3 ' (!) Note: page number is 1-based. MsgBox "Extracted page 3 to file page3.pdf" ' Split in two parts: ' =================== splitter.Split inputFile, "part1.pdf", "part2.pdf", 3 ' (!) Note: page number is 1-based. MsgBox "Splitted at page 3 to files part1.pdf and part2.pdf" ' Split by ranges: ' ================ ' array to store output file names Dim OutFiles ' SplitCOM() returns array with a list of filenames OutFiles = splitter.SplitCOM(inputFile, "1-3,4-6,7,8-") ' (!) Note: page numbers are 1-based; ending "-" means "to the end". MsgBox "Splitted by ranges sucessfully! Click OK to see filenames generated" ' display list of generated outputfiles For each of in outFiles MsgBox of Next