ByteScout Cloud API Server - Document Parser API - JavaScript - Parse Uploaded File (Node.js) - ByteScout

ByteScout Cloud API Server – Document Parser API – JavaScript – Parse Uploaded File (Node.js)

  • Home
  • /
  • Articles
  • /
  • ByteScout Cloud API Server – Document Parser API – JavaScript – Parse Uploaded File (Node.js)

How to parse uploaded file (node for document parser API in JavaScript and ByteScout Cloud API Server

What is ByteScout Cloud API Server? It is the ready to deploy Web API Server that can be deployed in less than thirty minutes into your own in-house Windows server (no Internet connnection is required to process data!) or into private cloud server. Can store data on in-house local server based storage or in Amazon AWS S3 bucket. Processing data solely on the server using built-in ByteScout powered engine, no cloud services are used to process your data!.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

MultiPageTable-template1.yml

      
--- # Template that demonstrates parsing of multi-page table using only # regular expressions for the table start, end, and rows. # If regular expression cannot be written for every table row (for example, # if the table contains empty cells), try the second method demonstrated # in `MultiPageTable-template2.yml` template. templateVersion: 3 templatePriority: 0 sourceId: Multipage Table Test detectionRules: keywords: - Sample document with multi-page table fields: total: type: regex expression: TOTAL {{DECIMAL}} dataType: decimal tables: - name: table1 start: # regular expression to find the table start in document expression: Item\s+Description\s+Price\s+Qty\s+Extended Price end: # regular expression to find the table end in document expression: TOTAL\s+\d+\.\d\d row: # regular expression to find table rows expression: '^\s*(?<itemNo>\d+)\s+(?<description>.+?)\s+(?<price>\d+\.\d\d)\s+(?<qty>\d+)\s+(?<extPrice>\d+\.\d\d)' columns: - name: itemNo type: integer - name: description type: string - name: price type: decimal - name: qty type: integer - name: extPrice type: decimal multipage: true

app.js

      
/*jshint esversion: 6 */ // Please NOTE: In this sample we're assuming Cloud Api Server is hosted at "https://localhost". // If it's not then please replace this with with your hosting url. var https = require("https"); var path = require("path"); var fs = require("fs"); // `request` module is required for file upload. // Use "npm install request" command to install. var request = require("request"); // Source PDF file const SourceFile = "./MultiPageTable.pdf"; // PDF document password. Leave empty for unprotected documents. const Password = ""; // Destination PDF file name const DestinationFile = "./result.json"; // 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. getPresignedUrl(SourceFile) .then(([uploadUrl, uploadedFileUrl]) => { // 2. UPLOAD THE FILE TO CLOUD. uploadFile(SourceFile, uploadUrl) .then(() => { // 3. OPTIMIZE UPLOADED PDF FILE parsePdf(uploadedFileUrl, Password, DestinationFile); }) .catch(e => { console.log(e); }); }) .catch(e => { console.log(e); }); function getPresignedUrl(localFile) { return new Promise(resolve => { // Prepare request to `Get Presigned URL` API endpoint let queryPath = `/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`; let reqOptions = { host: "localhost", path: encodeURI(queryPath) }; // Send request https.get(reqOptions, (response) => { response.on("data", (d) => { let data = JSON.parse(d); if (data.error == false) { // Return presigned url we received resolve([data.presignedUrl, data.url]); } else { // Service reported error console.log("getPresignedUrl(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("getPresignedUrl(): " + e); }); }); } function uploadFile(localFile, uploadUrl) { return new Promise(resolve => { fs.readFile(SourceFile, (err, data) => { request({ method: "PUT", url: uploadUrl, body: data, headers: { "Content-Type": "application/octet-stream" } }, (err, res, body) => { if (!err) { resolve(); } else { console.log("uploadFile() request error: " + e); } }); }); }); } function parsePdf(uploadedFileUrl, password, destinationFile) { // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html) // to create templates. // Read template from file: var templateText = fs.readFileSync("./MultiPageTable-template1.yml", "utf-8"); // URL for `Document Parser` API call var query = `https://localhost/pdf/documentparser`; var jsonRequestObject = { url: uploadedFileUrl, template: templateText }; request( { url: query, method: "POST", json: true, body: jsonRequestObject }, function (error, response, body) { if (error) { return console.error("Error: ", error); } // Parse JSON response let data = JSON.parse(JSON.stringify(body)); if (data.error == false) { //Download generated file var file = fs.createWriteStream(destinationFile); https.get(data.url, (response2) => { response2.pipe(file) .on("close", () => { console.log(`Generated result file saved as "${destinationFile}" file.`); }); }); } else { // Service reported error console.log("Error: " + data.message); } } ); }

package.json

      
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Tutorials:

prev
next