What is ByteScout Cloud API Server? It is the ready to deploy Web API Server that can be deployed in less than thirty minutes into your own in-house Windows server (no Internet connnection is required to process data!) or into private cloud server. Can store data on in-house local server based storage or in Amazon AWS S3 bucket. Processing data solely on the server using built-in ByteScout powered engine, no cloud services are used to process your data!.
On-demand (REST Web API) version:
Web API (on-demand version)
On-premise offline SDK for Windows:
60 Day Free Trial (on-premise)
---
# Template that demonstrates parsing of multi-page table using only
# regular expressions for the table start, end, and rows.
# If regular expression cannot be written for every table row (for example,
# if the table contains empty cells), try the second method demonstrated
# in `MultiPageTable-template2.yml` template.
templateVersion: 3
templatePriority: 0
sourceId: Multipage Table Test
detectionRules:
keywords:
- Sample document with multi-page table
fields:
total:
type: regex
expression: TOTAL {{DECIMAL}}
dataType: decimal
tables:
- name: table1
start:
# regular expression to find the table start in document
expression: Item\s+Description\s+Price\s+Qty\s+Extended Price
end:
# regular expression to find the table end in document
expression: TOTAL\s+\d+\.\d\d
row:
# regular expression to find table rows
expression: '^\s*(?<itemNo>\d+)\s+(?<description>.+?)\s+(?<price>\d+\.\d\d)\s+(?<qty>\d+)\s+(?<extPrice>\d+\.\d\d)'
columns:
- name: itemNo
type: integer
- name: description
type: string
- name: price
type: decimal
- name: qty
type: integer
- name: extPrice
type: decimal
multipage: true
/*jshint esversion: 6 */
// Please NOTE: In this sample we're assuming Cloud Api Server is hosted at "https://localhost".
// If it's not then please replace this with with your hosting url.
var https = require("https");
var path = require("path");
var fs = require("fs");
// `request` module is required for file upload.
// Use "npm install request" command to install.
var request = require("request");
// Source PDF file
const SourceFile = "./MultiPageTable.pdf";
// PDF document password. Leave empty for unprotected documents.
const Password = "";
// Destination PDF file name
const DestinationFile = "./result.json";
// 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
getPresignedUrl(SourceFile)
.then(([uploadUrl, uploadedFileUrl]) => {
// 2. UPLOAD THE FILE TO CLOUD.
uploadFile(SourceFile, uploadUrl)
.then(() => {
// 3. OPTIMIZE UPLOADED PDF FILE
parsePdf(uploadedFileUrl, Password, DestinationFile);
})
.catch(e => {
console.log(e);
});
})
.catch(e => {
console.log(e);
});
function getPresignedUrl(localFile) {
return new Promise(resolve => {
// Prepare request to `Get Presigned URL` API endpoint
let queryPath = `/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`;
let reqOptions = {
host: "localhost",
path: encodeURI(queryPath)
};
// Send request
https.get(reqOptions, (response) => {
response.on("data", (d) => {
let data = JSON.parse(d);
if (data.error == false) {
// Return presigned url we received
resolve([data.presignedUrl, data.url]);
}
else {
// Service reported error
console.log("getPresignedUrl(): " + data.message);
}
});
})
.on("error", (e) => {
// Request error
console.log("getPresignedUrl(): " + e);
});
});
}
function uploadFile(localFile, uploadUrl) {
return new Promise(resolve => {
fs.readFile(SourceFile, (err, data) => {
request({
method: "PUT",
url: uploadUrl,
body: data,
headers: {
"Content-Type": "application/octet-stream"
}
}, (err, res, body) => {
if (!err) {
resolve();
}
else {
console.log("uploadFile() request error: " + e);
}
});
});
});
}
function parsePdf(uploadedFileUrl, password, destinationFile) {
// Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html)
// to create templates.
// Read template from file:
var templateText = fs.readFileSync("./MultiPageTable-template1.yml", "utf-8");
// URL for `Document Parser` API call
var query = `https://localhost/pdf/documentparser`;
var jsonRequestObject = {
url: uploadedFileUrl,
template: templateText
};
request(
{
url: query,
method: "POST",
json: true,
body: jsonRequestObject
},
function (error, response, body) {
if (error) {
return console.error("Error: ", error);
}
// Parse JSON response
let data = JSON.parse(JSON.stringify(body));
if (data.error == false) {
//Download generated file
var file = fs.createWriteStream(destinationFile);
https.get(data.url, (response2) => {
response2.pipe(file)
.on("close", () => {
console.log(`Generated result file saved as "${destinationFile}" file.`);
});
});
}
else {
// Service reported error
console.log("Error: " + data.message);
}
}
);
}
{
"name": "test",
"version": "1.0.0",
"description": "PDF.co",
"main": "app.js",
"scripts": {
},
"keywords": [
"pdf.co",
"web",
"api",
"bytescout",
"api"
],
"author": "ByteScout & PDF.co",
"license": "ISC",
"dependencies": {
"request": "^2.88.2"
}
}
See also:
Get Your API Key
See also: