How to reduce memory usage when processing PDF using ByteScout PDF Extractor SDK in C#, VB.NET and VBScript

  • Home
  • /
  • Articles
  • /
  • How to reduce memory usage when processing PDF using ByteScout PDF Extractor SDK in C#, VB.NET and VBScript

When processing huge PDF documents using PDF Extractor SDK you may run into OutOfMemoryException. This tutorial demonstrates how to reduce memory usage by disabling page data caching in C#, VB.NET, and VBScript.

Also, check this tutorial: How to batch process PDF files with ByteScout PDF Extractor SDK.

Select your programming language:

How to reduce memory usage in C#

using System;
using System.IO;
using Bytescout.PDFExtractor;

namespace ReduceMemoryUsage
{
	class Program
	{
		static void Main(string[] args)
		{
            // When processing huge PDF documents you may run into OutOfMemoryException.
            // This example demonstrates a way to spare the memory by disabling page data caching.

			// Create Bytescout.PDFExtractor.TextExtractor instance
			using (TextExtractor extractor = new TextExtractor("demo", "demo"))
			{
			    try
			    {
			        // Load sample PDF document
			        extractor.LoadDocumentFromFile("sample2.pdf");

					// Disable page data caching, so processed pages wiil be disposed automatically
					extractor.PageDataCaching = PageDataCaching.None;

					// Save extracted text to file
					extractor.SaveTextToFile("output.txt");
                }
                catch (PDFExtractorException exception)
                {
                    Console.Write(exception.ToString());
                }
			}

			// Open the output file in default associated application
            System.Diagnostics.Process.Start("output.txt");
		}
	}
}

How to reduce memory usage in Visual Basic .NET

Imports Bytescout.PDFExtractor
Imports System.IO

Class Program
	Friend Shared Sub Main(args As String())

        ' When processing huge PDF documents you may run into OutOfMemoryException.
        ' This example demonstrates a way to spare the memory by disabling page data caching.

        ' Create Bytescout.PDFExtractor.TextExtractor instance
        Using extractor As New TextExtractor("demo", "demo")

            Try
                ' Load sample PDF document
                extractor.LoadDocumentFromFile("sample2.pdf")

                ' Disable page data caching, so processed pages wiil be disposed automatically
                extractor.PageDataCaching = PageDataCaching.None

                ' Save extracted text to file
                extractor.SaveTextToFile("output.txt")

            Catch exception As PDFExtractorException
                Console.Write(exception.ToString())
            End Try

        End Using

        ' Open the output file in default associated application
        System.Diagnostics.Process.Start("output.txt")

    End Sub
End Class

How to reduce memory usage in VBScript (Visual Basic 6)

' When processing huge PDF documents you may run into OutOfMemoryException.
' This example demonstrates a way to spare the memory by disabling page data caching.
            
' Create Bytescout.PDFExtractor.TextExtractor object
Set extractor = CreateObject("Bytescout.PDFExtractor.TextExtractor")
extractor.RegistrationName = "demo"
extractor.RegistrationKey = "demo"

' Load sample PDF document
extractor.LoadDocumentFromFile("..\..\sample2.pdf")

' Disable page data caching, so processed pages wiil be disposed automatically
extractor.PageDataCaching = PageDataCaching.None

' Save extracted text to file
extractor.SaveTextToFile("output.txt")

' Open first output file in default associated application
Set shell = CreateObject("WScript.Shell")
shell.Run "output.txt", 1, false
Set shell = Nothing

Set extractor = Nothing
prev
next