Check the samples below to learn how to extract text from PDF by pages in C#, VB.NET and VBScript using ByteScout PDF Extractor SDK.
With PDF Extractor SDK, you can also extract separate pages from PDF.
Select your programming language:
using System; using Bytescout.PDFExtractor; namespace ExtractTextByPages { class Program { static void Main(string[] args) { // Create Bytescout.PDFExtractor.TextExtractor instance TextExtractor extractor = new TextExtractor(); extractor.RegistrationName = "demo"; extractor.RegistrationKey = "demo"; // Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf"); // Get page count int pageCount = extractor.GetPageCount(); for (int i = 0; i < pageCount; i++) { string fileName = "page" + i + ".txt"; // Save extracted page text to file extractor.SavePageTextToFile(i, fileName); } // Open first output file in default associated application System.Diagnostics.Process.Start("page1.txt"); } } }
Imports Bytescout.PDFExtractor Class Program Friend Shared Sub Main(args As String()) ' Create Bytescout.PDFExtractor.TextExtractor instance Dim extractor As New TextExtractor() extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("sample2.pdf") ' Get page count Dim pageCount As Integer = extractor.GetPageCount() For i As Integer = 0 To pageCount - 1 Dim fileName As String = "page" & i & ".txt" ' Save extracted page text to file extractor.SavePageTextToFile(i, fileName) Next ' Open first output file in default associated application System.Diagnostics.Process.Start("page1.txt") End Sub End Class
' Create Bytescout.PDFExtractor.TextExtractor object Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor") extractor.RegistrationName = "demo" extractor.RegistrationKey = "demo" ' Load sample PDF document extractor.LoadDocumentFromFile("..\..\sample2.pdf") ' Get page count pageCount = extractor.GetPageCount() For i = 0 To pageCount - 1 fileName = "page" & i & ".txt" ' Save extracted page text to file extractor.SavePageTextToFile i, fileName Next ' Open first output file in default associated application Set shell = CreateObject("WScript.Shell") shell.Run "page0.txt", 1, false Set shell = Nothing Set extractor = Nothing