ByteScout Cloud API Server - PDF Text Search API - JavaScript - PDF Text Search from Uploaded File (Node js) - Async API - ByteScout

ByteScout Cloud API Server – PDF Text Search API – JavaScript – PDF Text Search from Uploaded File (Node js) – Async API

  • Home
  • /
  • Articles
  • /
  • ByteScout Cloud API Server – PDF Text Search API – JavaScript – PDF Text Search from Uploaded File (Node js) – Async API

How to PDF text search API in JavaScript using ByteScout Cloud API Server

What is ByteScout Cloud API Server? It is the ready to deploy Web API Server that can be deployed in less than thirty minutes into your own in-house Windows server (no Internet connnection is required to process data!) or into private cloud server. Can store data on in-house local server based storage or in Amazon AWS S3 bucket. Processing data solely on the server using built-in ByteScout powered engine, no cloud services are used to process your data!.

On-demand (REST Web API) version:
 Web API (on-demand version)

On-premise offline SDK for Windows:
 60 Day Free Trial (on-premise)

app.js

      
/*jshint esversion: 6 */ // Please NOTE: In this sample we're assuming Cloud Api Server is hosted at "https://localhost". // If it's not then please replace this with with your hosting url. var https = require("https"); var path = require("path"); var fs = require("fs"); // `request` module is required for file upload. // Use "npm install request" command to install. var request = require("request"); // Source file name const SourceFile = "./sample.pdf"; // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. const Pages = ""; // PDF document password. Leave empty for unprotected documents. const Password = ""; // Search string. const SearchString = '[4-9][0-9].[0-9][0-9]'; // Regular expression to find numbers in format dd.dd and between 40.00 to 99.99 // Enable regular expressions (Regex) const RegexSearch = 'True'; // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE. getPresignedUrl(SourceFile) .then(([uploadUrl, uploadedFileUrl]) => { // 2. UPLOAD THE FILE TO CLOUD. uploadFile(SourceFile, uploadUrl) .then(() => { // 3. PDF Text Search FROM UPLOADED FILE pdfTextSearch(uploadedFileUrl); }) .catch(e => { console.log(e); }); }) .catch(e => { console.log(e); }); function getPresignedUrl(localFile) { return new Promise(resolve => { // Prepare request to `Get Presigned URL` API endpoint let queryPath = `/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`; let reqOptions = { host: "localhost", path: encodeURI(queryPath) }; // Send request https.get(reqOptions, (response) => { response.on("data", (d) => { let data = JSON.parse(d); if (data.error == false) { // Return presigned url we received resolve([data.presignedUrl, data.url]); } else { // Service reported error console.log("getPresignedUrl(): " + data.message); } }); }) .on("error", (e) => { // Request error console.log("getPresignedUrl(): " + e); }); }); } function uploadFile(localFile, uploadUrl) { return new Promise(resolve => { fs.readFile(SourceFile, (err, data) => { request({ method: "PUT", url: uploadUrl, body: data, headers: { "Content-Type": "application/octet-stream" } }, (err, res, body) => { if (!err) { resolve(); } else { console.log("uploadFile() request error: " + e); } }); }); }); } function pdfTextSearch(uploadedFileUrl) { // Prepare URL for PDF text search API call. var query = `https://localhost/pdf/find`; let reqOptions = { uri: query, formData: { password: Password, pages: Pages, url: uploadedFileUrl, searchString: SearchString, regexSearch: RegexSearch, async: 'True' } }; // Send request request.get(reqOptions, function (error, response, body) { if (error) { return console.error("Error: ", error); } // Parse JSON response let data = JSON.parse(body); checkIfJobIsCompleted(data.jobId, data.url); }); } function checkIfJobIsCompleted(jobId, resultFileUrl) { let queryPath = `/job/check?jobid=${jobId}`; let reqOptions = { host: "localhost", path: encodeURI(queryPath), method: "GET" }; https.get(reqOptions, (response) => { response.on("data", (d) => { response.setEncoding("utf8"); // Parse JSON response let data = JSON.parse(d); console.log(`Checking Job #${jobId}, Status: ${data.status}, Time: ${new Date().toLocaleString()}`); if (data.status == "working") { // Check again after 3 seconds setTimeout(function () { checkIfJobIsCompleted(jobId, resultFileUrl); }, 3000); } else if (data.status == "success") { request({ method: 'GET', uri: resultFileUrl, gzip: true }, function (error, response, body) { // Parse JSON response let respJsonFileArray = JSON.parse(body); respJsonFileArray.forEach((element) => { console.log("Found text " + element["text"] + " at coordinates " + element["left"] + ", " + element["top"]); }, this); }); } else { console.log(`Operation ended with status: "${data.status}".`); } }) }); }

package.json

      
{ "name": "test", "version": "1.0.0", "description": "PDF.co", "main": "app.js", "scripts": { }, "keywords": [ "pdf.co", "web", "api", "bytescout", "api" ], "author": "ByteScout & PDF.co", "license": "ISC", "dependencies": { "request": "^2.88.2" } }

VIDEO

ON-PREMISE OFFLINE SDK

Get 60 Day Free Trial

See also:

ON-DEMAND REST WEB API

Get Your API Key

See also:

Tutorials:

prev
next