Check the samples below to learn how to extract text from PDF by pages in C#, VB.NET and VBScript using ByteScout PDF Extractor SDK.
With PDF Extractor SDK, you can also extract separate pages from PDF.
Select your programming language:
using System;
using Bytescout.PDFExtractor;
namespace ExtractTextByPages
{
class Program
{
static void Main(string[] args)
{
// Create Bytescout.PDFExtractor.TextExtractor instance
TextExtractor extractor = new TextExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf");
// Get page count
int pageCount = extractor.GetPageCount();
for (int i = 0; i < pageCount; i++)
{
string fileName = "page" + i + ".txt";
// Save extracted page text to file
extractor.SavePageTextToFile(i, fileName);
}
// Open first output file in default associated application
System.Diagnostics.Process.Start("page1.txt");
}
}
}
Imports Bytescout.PDFExtractor
Class Program
Friend Shared Sub Main(args As String())
' Create Bytescout.PDFExtractor.TextExtractor instance
Dim extractor As New TextExtractor()
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf")
' Get page count
Dim pageCount As Integer = extractor.GetPageCount()
For i As Integer = 0 To pageCount - 1
Dim fileName As String = "page" & i & ".txt"
' Save extracted page text to file
extractor.SavePageTextToFile(i, fileName)
Next
' Open first output file in default associated application
System.Diagnostics.Process.Start("page1.txt")
End Sub
End Class
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample2.pdf")
' Get page count
pageCount = extractor.GetPageCount()
For i = 0 To pageCount - 1
fileName = "page" & i & ".txt"
' Save extracted page text to file
extractor.SavePageTextToFile i, fileName
Next
' Open first output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run "page0.txt", 1, false
Set shell = Nothing
Set extractor = Nothing