Module1.vb
Imports System.IO
Imports System.Threading
Imports Bytescout.PDFExtractor
Module Module1
' Limit to 4 threads in queue.
' Set this value to number of your processor cores for max performance.
ReadOnly ThreadLimiter As Semaphore = New Semaphore(4, 4)
<MTAThread()>
Sub Main()
' Get all PDF files in a folder
Dim files = Directory.GetFiles("..\..\..\..\", "*.pdf")
' Array of events to wait
Dim doneEvents(files.Length - 1) As ManualResetEvent
For i As Integer = 0 To files.Length - 1
' Wait for the queue
ThreadLimiter.WaitOne()
' Start thread with filename and event in params
doneEvents(i) = New ManualResetEvent(False)
Dim threadData As Object = New Object() {files(i), doneEvents(i)}
ThreadPool.QueueUserWorkItem(AddressOf ConvertPdfToTxt, threadData)
Next
' Wait until all threads finish
WaitHandle.WaitAll(doneEvents)
Console.WriteLine()
Console.WriteLine("All is done.")
Console.WriteLine()
Console.WriteLine("Press any key to exit...")
Console.ReadKey()
End Sub
Private Sub ConvertPdfToTxt(ByVal state As Object)
' Get filename and event from params
Dim file As String = state(0)
Dim doneEvent As ManualResetEvent = state(1)
Dim resultFileName = Path.GetFileName(file) + ".txt"
Try
Console.WriteLine("Converting " + file)
Using extractor As New TextExtractor("demo", "demo")
extractor.LoadDocumentFromFile(file)
extractor.SaveTextToFile(resultFileName)
End Using
Console.WriteLine("Finished " + resultFileName)
Finally
' Signal the thread is finished
doneEvent.Set()
' Release semaphore
ThreadLimiter.Release()
End Try
End Sub
End Module
Click here to get your Free Trial version of the SDK
also available as: