FindTableAndExtractAsXML.vbs
' Create Bytescout.PDFExtractor.TextExtractor object Set tableDetector= CreateObject("Bytescout.PDFExtractor.TableDetector") tableDetector.RegistrationName = "demo" tableDetector.RegistrationKey = "demo" ' Create Bytescout.PDFExtractor.xmlExtractor object Set xmlExtractor = CreateObject("Bytescout.PDFExtractor.XMLExtractor") xmlExtractor.RegistrationName = "demo" xmlExtractor.RegistrationKey = "demo" ' We should define what kind of tables we should detect. ' So we set min required number of columns to 3 ... tableDetector.DetectionMinNumberOfColumns = 3 ' ... and we set min required number of rows to 3 tableDetector.DetectionMinNumberOfRows = 3 ' Load sample PDF document tableDetector.LoadDocumentFromFile("..\..\sample3.pdf") xmlExtractor.LoadDocumentFromFile "..\..\sample3.pdf" ' Get page count pageCount = tableDetector.GetPageCount() ' Iterate through pages For i = 0 to pageCount - 1 t = 0 ' Find first table and continue if found If (tableDetector.FindTable(i)) Then Do ' Set extraction area for CSV extractor to rectangle received from the table detector xmlExtractor.SetExtractionArea _ tableDetector.GetFoundTableRectangle_Left(), _ tableDetector.GetFoundTableRectangle_Top(), _ tableDetector.GetFoundTableRectangle_Width(), _ tableDetector.GetFoundTableRectangle_Height() ' Export the table to CSV file xmlExtractor.SavePageXMLToFile i, "page-" & CStr(i) & "-table-" & CStr(t) & ".xml" t = t + 1 Loop While tableDetector.FindNextTable() End If Next Set xmlExtractor = Nothing Set tableDetector = Nothing
Click here to get your Free Trial version of the SDK
also available as: