When processing huge PDF documents using PDF Extractor SDK you may run into OutOfMemoryException. This tutorial demonstrates how to reduce memory usage by disabling page data caching in C#, VB.NET, and VBScript.
Also, check this tutorial: How to batch process PDF files with ByteScout PDF Extractor SDK.
Select your programming language:
using System;
using System.IO;
using Bytescout.PDFExtractor;
namespace ReduceMemoryUsage
{
class Program
{
static void Main(string[] args)
{
// When processing huge PDF documents you may run into OutOfMemoryException.
// This example demonstrates a way to spare the memory by disabling page data caching.
// Create Bytescout.PDFExtractor.TextExtractor instance
using (TextExtractor extractor = new TextExtractor("demo", "demo"))
{
try
{
// Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf");
// Disable page data caching, so processed pages wiil be disposed automatically
extractor.PageDataCaching = PageDataCaching.None;
// Save extracted text to file
extractor.SaveTextToFile("output.txt");
}
catch (PDFExtractorException exception)
{
Console.Write(exception.ToString());
}
}
// Open the output file in default associated application
System.Diagnostics.Process.Start("output.txt");
}
}
}
Imports Bytescout.PDFExtractor
Imports System.IO
Class Program
Friend Shared Sub Main(args As String())
' When processing huge PDF documents you may run into OutOfMemoryException.
' This example demonstrates a way to spare the memory by disabling page data caching.
' Create Bytescout.PDFExtractor.TextExtractor instance
Using extractor As New TextExtractor("demo", "demo")
Try
' Load sample PDF document
extractor.LoadDocumentFromFile("sample2.pdf")
' Disable page data caching, so processed pages wiil be disposed automatically
extractor.PageDataCaching = PageDataCaching.None
' Save extracted text to file
extractor.SaveTextToFile("output.txt")
Catch exception As PDFExtractorException
Console.Write(exception.ToString())
End Try
End Using
' Open the output file in default associated application
System.Diagnostics.Process.Start("output.txt")
End Sub
End Class
' When processing huge PDF documents you may run into OutOfMemoryException.
' This example demonstrates a way to spare the memory by disabling page data caching.
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"
' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample2.pdf")
' Disable page data caching, so processed pages wiil be disposed automatically
extractor.PageDataCaching = PageDataCaching.None
' Save extracted text to file
extractor.SaveTextToFile("output.txt")
' Open first output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run "output.txt", 1, false
Set shell = Nothing
Set extractor = Nothing