Cloud API - PDF To Text API - JavaScript - Convert PDF To Text From Uploaded File (Node.js) - Async API - ByteScout

Cloud API – PDF To Text API – JavaScript – Convert PDF To Text From Uploaded File (Node.js) – Async API

  • Home
  • /
  • Articles
  • /
  • Cloud API – PDF To Text API – JavaScript – Convert PDF To Text From Uploaded File (Node.js) – Async API

Cloud API – PDF To Text API – JavaScript – Convert PDF To Text From Uploaded File (Node.js) – Async API

ConvertPdfToTextFromUploadedFile.js

/*jshint esversion: 6 */

var https = require("https");
var path = require("path");
var fs = require("fs");

// `request` module is required for file upload.
// Use "npm install request" command to install.
var request = require("request");

// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co/documentation/api
const API_KEY = "*******************************";

// Source PDF file
const SourceFile = "./sample.pdf";
// Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
const Pages = "";
// PDF document password. Leave empty for unprotected documents.
const Password = "";
// Destination TXT file name
const DestinationFile = "./result.txt";


// 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.
getPresignedUrl(API_KEY, SourceFile)
    .then(([uploadUrl, uploadedFileUrl]) => {
        // 2. UPLOAD THE FILE TO CLOUD.
        uploadFile(API_KEY, SourceFile, uploadUrl)
            .then(() => {
                // 3. CONVERT UPLOADED PDF FILE TO TEXT
                convertPdfToText(API_KEY, uploadedFileUrl, Password, Pages, DestinationFile);
            })
            .catch(e => {
                console.log(e);
            });
    })
    .catch(e => {
        console.log(e);
    });


function getPresignedUrl(apiKey, localFile) {
    return new Promise(resolve => {
        // Prepare request to `Get Presigned URL` API endpoint
        let queryPath = `/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=${path.basename(SourceFile)}`;
        let reqOptions = {
            host: "api.pdf.co",
            path: encodeURI(queryPath),
            headers: { "x-api-key": API_KEY }
        };
        // Send request
        https.get(reqOptions, (response) => {
            response.on("data", (d) => {
                let data = JSON.parse(d);
                if (data.error == false) {
                    // Return presigned url we received
                    resolve([data.presignedUrl, data.url]);
                }
                else {
                    // Service reported error
                    console.log("getPresignedUrl(): " + data.message);
                }
            });
        })
            .on("error", (e) => {
                // Request error
                console.log("getPresignedUrl(): " + e);
            });
    });
}

function uploadFile(apiKey, localFile, uploadUrl) {
    return new Promise(resolve => {
        fs.readFile(SourceFile, (err, data) => {
            request({
                method: "PUT",
                url: uploadUrl,
                body: data,
                headers: {
                    "Content-Type": "application/octet-stream"
                }
            }, (err, res, body) => {
                if (!err) {
                    resolve();
                }
                else {
                    console.log("uploadFile() request error: " + e);
                }
            });
        });
    });
}


function convertPdfToText(apiKey, uploadedFileUrl, password, pages, destinationFile) {
    // Prepare request to `PDF To Text` API endpoint
    var queryPath = `/v1/pdf/convert/to/text?name=${path.basename(destinationFile)}&password=${password}&pages=${pages}&url=${uploadedFileUrl}&async=True`;
    let reqOptions = {
        host: "api.pdf.co",
        path: encodeURI(queryPath),
        method: "GET",
        headers: { "x-api-key": API_KEY }
    };
    // Send request
    https.get(reqOptions, (response) => {
        response.on("data", (d) => {
            response.setEncoding("utf8");
            // Parse JSON response
            let data = JSON.parse(d);
            if (data.error == false) {
                // Process returned job
                checkIfJobIsCompleted(data.jobId, data.url, destinationFile)
            }
            else {
                // Service reported error
                console.log("convertPdfToText(): " + data.message);
            }
        });
    })
        .on("error", (e) => {
            // Request error
            console.log("convertPdfToText(): " + e);
        });
}

function checkIfJobIsCompleted(jobId, resultFileUrl, destinationFile) {
    let queryPath = `/v1/job/check?jobid=${jobId}`;
    let reqOptions = {
        host: "api.pdf.co",
        path: encodeURI(queryPath),
        method: "GET",
        headers: { "x-api-key": API_KEY }
    };

    https.get(reqOptions, (response) => {
        response.on("data", (d) => {
            response.setEncoding("utf8");
            // Parse JSON response
            let data = JSON.parse(d);
            if (data.Status == "InProgress") {
                // Check again after 2 seconds
                setTimeout((jobId, resultFileUrl, destinationFile) => {
                    checkIfJobIsCompleted(jobId, resultFileUrl, destinationFile);
                }, 2000);
            }
            else if (data.Status == "Finished") {
                // Download TXT file
                var file = fs.createWriteStream(destinationFile);
                https.get(resultFileUrl, (response2) => {
                    response2.pipe(file)
                        .on("close", () => {
                            console.log(`Generated TXT file saved as "${destinationFile}" file.`);
                        });
                });
            }
            else {
                console.log(`Operation ended with status: "${data.Status}".`);
            }
        })
    });
}


  Click here to get your Free Trial version of the SDK

Tutorials:

prev
next