Parsing XML to JSON in Amazon Lambda using external js libraries - javascript

I am trying to convert an XML String that I get from a server to JSON inside my Lambda function.
I have set up this rather simple example to simulate the XML answer that i get from the server using DynamoDB. (Currently I'm just trying to get the convertion going)
'use strict';
var AWS = require('aws-sdk');
var docClient = new AWS.DynamoDB.DocumentClient({region: 'eu-west-1'});
exports.handler = function (e, ctx, callback){
let table = "dsbTable";
let bpNumber = 1337;
var test;
var x2js = new X2JS();
let params = {
TableName: table,
Key:{
"bpNumber": bpNumber
},
};
docClient.get(params, function(err, data) {
if (err) {
console.error("Unable to read item. Error JSON:", JSON.stringify(err, null, 2));
callback(err, null);
} else {
console.log("GetItem succeeded:", JSON.stringify(data, null, 2));
console.log('test' +data.Item.getBp);
//var jsonObj = x2js.xml_str2json(data.Item.getBp);
//console.log(jsonObj);
callback(null, data);
}
});
} ;
getting the item works just fine and is returned like this
{
"Item": {
"getBp": "<message version=\"1.0\" system=\"AVS/3\"><header><client>553</client><avs3-sales-organization>7564</avs3-sales-organization><avs3-service-provider>DSD</avs3-service-provider></header><body><business-partner><salutation-code>01</salutation-code><titel-code-academic/><titel-academic/><titel-code-royal/><titel-royal/><job-titel/><last-name1>Pickle</last-name1><last-name2/><first-name>N</first-name><street/><street-suffix/><street-number/><street-number-suffix/><address-line-1>10 Waterside Way</address-line-1><address-line-2/><address-line-3/><zipcode>NN4 7XD</zipcode><country-code>GB</country-code><city>NORTHAMPTON</city><district/><region-code>NH</region-code><region-text>Northamptonshire</region-text><company1/><company2/><company3/><department/><po-box/><po-box-zipcode/><po-box-city/><po-box-country-code/><major-customer-zipcode/><address-source/><advertisement>Y</advertisement><category/><bp-number>1100000772</bp-number><bp-number-external/><bp-group>ABON</bp-group><eu-sales-tax-number/><bic-master-number/><sector/><communication><communication-type>WW</communication-type><communication-value>kate.southorn#dsbnet.co.uk</communication-value><communication-default>Y</communication-default></communication><attribute><attribute-type>ACC</attribute-type><attribute-value>Y</attribute-value></attribute><attribute><attribute-type>OIEMEX</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OINLIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OISMEX</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OISMIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOEMIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOFXEX</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOFXIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOPTEX</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOPTIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOTEEX</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>OOTEIN</attribute-type><attribute-value>N20121211</attribute-value></attribute><attribute><attribute-type>THEDSU</attribute-type><attribute-value/></attribute></business-partner></body></message>",
"bpNumber": 1337
}
}
My main issue now is that I can not figure out how i can import any XMLtoJSON library files like this one here
I hope my code in this case is not completely worthless and there is a rather simple solution.

You're going through the path that many new Lambda users have gone.
With Lambda, it is absolutely easy, you just write your code and validate that it works as expected - I mean on your computer.
Once you have validated it, do as follows:
Zip the entire folder's content, including node_modules directory and any dependency that you use.
Upload it to Lambda.
If you accidentally zipped the containing folder as well, that is fine, just make sure to update Lambda to run the script from: dir_name/file_name.function_name (don't forget to export function_name from your module).

Always the handler name is the <filename>.<handler> function name> and if the filename is incorrectly mentioned then also such error is thrown in cloudwatch logs.

Related

Nodejs: wget, unzip and convert to js without writing to file

Well the title says it all, I'm trying to write a script (that runs in a nodejs/express server-side application) that leverages libraries request, unzip and xml2js to perform a task consisting of fetching a zip file from a given url, whose content is an xml file which I need to parse to a javascript object for some further processing.
So far I've managed to come up with:
var express = require("express");
var app = express();
/* some init code omitted */
var request = require("request");
var unzip = require("unzip");
var xml2js = require("xml2js");
var parser = new xml2js.Parser();
app.get("/import", function(req, res) {
request("http://path.to/file.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
//This is what I'm trying to avoid, which doesn't even work
entry.pipe(fs.createWriteStream(entry.path));
fs.readFile(entry.path, function(err, data) {
if(err) {
return res.status(500).send(err);
}
parser.parseString(data, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
});
});
Albeit the fact the contents of the xml file are correctly written to disk, I'm looking for an alternative to this approach for two reasons:
to save disk space, since I don't really need to keep the xml file anyway once it has been converted to js
it doesn't even work: fs.readFile probably starts reading the file before fs.createWriteStream is done writing it, because the line console.log(utils.inspect(obj)) logs null (whereas if I run only the innermost fs.readFile block and replace entry.path with the name of the previously written file, it produces the desired output)
I wish I could jot down a jsFiddle for this but I'm clueless as to how, when it comes to expressjs applications. Cheers.
EDITED
Piping is unnecessary, parse data directly from the entry stream:
app.get("/import", function(req, res) {
request("http://link-top.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
var chunks = [];
var res;
if(entry.path == 'needed.xml') {
entry.on('data', function(data) {
chunks.push(data.toString());
});
entry.on('end', function () {
res = chunks.join("");
parser.parseString(res, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
}
});
});

local PDF file scraping in node.js

I have uploaded a pdf via a MEAN stack web application using fs. I want to extract certain fields from the pdf and display them on the web app. I have looked at a couple npm packages like pdf.js, pdf2json. I can't figure out the documentation and javascript callbacks used in the examples available. Please help!
I hope I can help answer your question. Using pdf2json can be used to parse a pdf and extract the text. There are a couple of steps that need to be taken to get it working. I have adapted the example from https://github.com/modesty/pdf2json.
The setup is to install pdf2json in the node app, and also underscore. The example page didn't explain the need to define your own callback functions. It also used self instead of this to register them. So, with the appropriate changes the code to extract all the text from the pdf will be something like this:
// Get the dependencies that have already been installed
// to ./node_modules with `npm install <dep>`in the root director
// of your app
var _ = require('underscore'),
PDFParser = require('pdf2json');
var pdfParser = new PDFParser();
// Create a function to handle the pdf once it has been parsed.
// In this case we cycle through all the pages and extraxt
// All the text blocks and print them to console.
// If you do `console.log(JSON.stringify(pdf))` you will
// see how the parsed pdf is composed. Drill down into it
// to find the data you are looking for.
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
for (var j in page.Texts) {
var text = page.Texts[j];
console.log(text.R[0].T);
}
}
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
// Use underscore to bind the data ready function to the pdfParser
// so that when the data ready event is emitted your function will
// be called. As opposed to the example, I have used `this` instead
// of `self` since self had no meaning in this context
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = 'test3.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
I am running the code out of my server side controller.
module.exports = (function() {
return {
add: function(req, res) {
var tmp_path = req.files.pdf.path;
var target_path = './uploads/' + req.files.pdf.name;
fs.rename(tmp_path, target_path, function(err) {
if (err) throw err;
// delete the temporary file, so that the explicitly set temporary upload dir does not get filled with unwanted files
fs.unlink(tmp_path, function() {
if (err) throw err;
//edit here pdf parser
res.redirect('#/');
});
})
},
show: function(req, res) {
var pdfParser = new PDFParser();
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
// console.log(page.Texts);
for (var j in page.Texts) {
var text = page.Texts[j];
// console.log(text.R[0].T);
}
}
console.log(JSON.stringify(pdf));
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = './uploads/Invoice_template.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
},
//end controller
}

Better place to setup an initialisation in SailsJS

I'm new to Sails and don't know exactly where to put the initialisation of an object to be unique in all the app. After reading the docs I assumed that I can have it in the global sails object, but not sure if is the better place.
I'm using the new Appcelerator ArrowDB to store my users and objects. Docs talk about declare the appropriate vars and use it, with the APP_KEY.
var ArrowDB = require('arrowdb'),
arrowDBApp = new ArrowDB('<App Key>');
function login(req, res) {
var data = {
login: req.body.username,
password: req.body.password,
// the req and res parameters are optional
req: req,
res: res
};
arrowDBApp.usersLogin(data, function(err, result) {
if (err) {
console.error("Login error:" + (err.message || result.reason));
} else {
console.log("Login successful!");
console.log("UserInfo: " + JSON.stringify(result.body.response.users[0]));
}
});
}
But I will need to use constantly that arrowDBApp var to create, update, delete objects in the database, so I think the best way is to initialize it in the starting script app.js and share across the app.
I tried it, but I was not able to store it in the sails var, it seems that this var is not available (or lose its config) until sails.lift() is executed.
This code (app.js file) shows nothing in the console:
// Ensure we're in the project directory, so relative paths work as expected
// no matter where we actually lift from.
process.chdir(__dirname);
// Ensure a "sails" can be located:
(function() {
var sails;
try {
sails = require('sails');
} catch (e) {
console.error('To run an app using `node app.js`, you usually need to have a version of `sails` installed in the same directory as your app.');
console.error('To do that, run `npm install sails`');
console.error('');
console.error('Alternatively, if you have sails installed globally (i.e. you did `npm install -g sails`), you can use `sails lift`.');
console.error('When you run `sails lift`, your app will still use a local `./node_modules/sails` dependency if it exists,');
console.error('but if it doesn\'t, the app will run with the global sails instead!');
return;
}
// Try to get `rc` dependency
var rc;
try {
rc = require('rc');
} catch (e0) {
try {
rc = require('sails/node_modules/rc');
} catch (e1) {
console.error('Could not find dependency: `rc`.');
console.error('Your `.sailsrc` file(s) will be ignored.');
console.error('To resolve this, run:');
console.error('npm install rc --save');
rc = function () { return {}; };
}
}
// My own code
var APP_KEY = 'mykey';
var ArrowDB = require('arrowdb');
sails.arrowDBApp = new ArrowDB(APP_KEY);
console.log("Hi" + JSON.stringify(sails));
// Start server
sails.lift(rc('sails'));
console.log("Finish");
})();
No "HI" and no "Finish" are printed. If I try to use sails.arrowDBApp in another controller, it is undefined.
Tips are welcome.
It's not advisable to modify app.js unless you really need to.
The usual space to save all configuration information (e.g. the APP_KEY) is in the config directory in your project root.
One-time initializations (e.g. ArrowDB initialization) can be added to config/bootstrap.js.
Update
In config/arrowdb.js (you need to create this file yourself):
module.exports.arrowdb = {
APP_KEY: 'yourappkey',
ArrowDBApp: null
};
In config/bootstrap.js:
var ArrowDB = require('arrowdb');
module.exports.bootstrap = function(next){
sails.config.arrowdb['ArrowDBApp'] = new ArrowDB(sails.config.arrowdb['APP_KEY']);
next(); // Don't forget to add this
};
In your controller:
'task': function(req, res, next) {
sails.config.arrowdb['ArrowDBApp'].usersLogin(...);
// and so on.
// You could also add something like
// var ADB = sails.config.arrowdb['ArrowDBApp'];
// at the top in case you need to use it on and on.
}
Use config/bootstrap.js to initialize something before Sails lifted. Sometimes if we want to put something in global variable, this approach is good to use, like define/ override native Promise with Bluebird Promise.
Use api/services to put some method or other things that you will use regularly in your code (controllers, models, etc.), like Mail Service, that handle sending email within your application.
Use config at config folder to predefined something at sails.config[something]. It can be an object, function, or whatever in order to become configurable, like put Twitter API Key to use Twitter REST API.
To achieve what you wanted, I'll try to use service and bootstrap.js. Try this example.
Create service file at api/services/ArrowDBService.js
Put with this code:
var ArrowDB = require('arrowdb'),
arrowDBApp = new ArrowDB('<App Key>');
module.exports = {
arrowDBApp : arrowDBApp,
login : function (req, res) {
var data = {
login: req.body.username,
password: req.body.password,
// the req and res parameters are optional
req: req,
res: res
};
arrowDBApp.usersLogin(data, function(err, result) {
if (err) {
console.error("Login error:" + (err.message || result.reason));
} else {
console.log("Login successful!");
console.log("UserInfo: " + JSON.stringify(result.body.response.users[0]));
}
});
}
};
Now you can use it by sails.services.arrowdbservice.login(req,res) or simply ArrowDBService.login(req,res) (notice about case sensitive thing). Since I don't know about ArrowDB, so you may explore by yourself about login method that your example provide.

replicate pouchDB document with couchDB

I have used pouchDB in one application and now I want to introduce couchDB to sync the document to remote server. Hence i followed this link http://pouchdb.com/getting-started.html i used the below code to replicate the data to couchDB
var db2 = new PouchDB('todos');
var remoteCouch = 'http://localhost:5984/_utils/database.html?couchdb_sample';
db2.changes({
since: 'now',
live: true
}).on('change', showTodos);
sync();
function sync() {
//alert("sync");
//syncDom.setAttribute('data-sync-state', 'syncing');
//var opts = {live: true};
db2.replicate.to(remoteCouch).on('complete', function () {
console.log("done");
}).on('error', function (err) {
console.log(err);
});
function addTodo(text) {
var todo = {
_id: $("#eid").val()+$("#version").val(),
title: text,
name: $("#nameid").val(),
version: $("#version").val(),
completed: false
};
db2.put(todo, function callback(err, result) {
if (!err) {
console.log('Successfully posted a todo!');
}
else{
console.log(err);
}
});}
here the title has an xml string as value. But i am facing below error
SyntaxError: Unexpected token <
at Object.parse (native)
for this line db2.replicate.to(remoteCouch). I manually created a new document in couchDb database and entered the same data it gave no error but when i try replicating it shows syntax error. Can anyone please hint me where I have gone wrong
http://localhost:5984/_utils/database.html?couchdb_sample
Points to a HTML site (copied over from the browsers address bar, right?). Remove the middle part:
http://localhost:5984/couchdb_sample
It look like you have not defined the remote database in the way PouchDb is expecting. You should use the "new PouchDb" call. The second line of your code is:
var remoteCouch = 'http://localhost:5984/_utils/database.html?couchdb_sample';
but I think it should be like this:
var remoteCouch = new PouchDB('http://localhost:5984/couchdb_sample');
I am not clear from your code what the name of the remote database is, but it would not normally end in ".html" as Ingo Radatz pointed out, so I have assumed it is couchdb_sample above. There is more information about replication on the PouchDb site.

Renaming files using node.js

I have a folder with 260 .png files with different country names: Afghanistan.png, Albania.png, Algeria.png, etc.
I have a .json file with a piece of code with all the ISO codes for each country like this:
{
"AF" : "Afghanistan",
"AL" : "Albania",
"DZ" : "Algeria",
...
}
I would like to rename the .png files with their ISO name in low-case. That means I would like to have the following input in my folder with all the .png images:
af.png, al.png, dz.png, etc.
I was trying to research by myself how to do this with node.js, but I am a little lost here and I would appreciate some clues a lot.
You'll need to use fs for that: http://nodejs.org/api/fs.html
And in particular the fs.rename() function:
var fs = require('fs');
fs.rename('/path/to/Afghanistan.png', '/path/to/AF.png', function(err) {
if ( err ) console.log('ERROR: ' + err);
});
Put that in a loop over your freshly-read JSON object's keys and values, and you've got a batch renaming script.
fs.readFile('/path/to/countries.json', function(error, data) {
if (error) {
console.log(error);
return;
}
var obj = JSON.parse(data);
for(var p in obj) {
fs.rename('/path/to/' + obj[p] + '.png', '/path/to/' + p + '.png', function(err) {
if ( err ) console.log('ERROR: ' + err);
});
}
});
(This assumes here that your .json file is trustworthy and that it's safe to use its keys and values directly in filenames. If that's not the case, be sure to escape those properly!)
For synchronous renaming use fs.renameSync
fs.renameSync('/path/to/Afghanistan.png', '/path/to/AF.png');
fs.readdir(path, callback)
fs.rename(old,new,callback)
Go through http://nodejs.org/api/fs.html
One important thing - you can use sync functions also. (It will work like C program)
For linux/unix OS, you can use the shell syntax
const shell = require('child_process').execSync ;
const currentPath= `/path/to/name.png`;
const newPath= `/path/to/another_name.png`;
shell(`mv ${currentPath} ${newPath}`);
That's it!
Here's an updated version of the script that renames a file of any directory;
i.e => "C:\Users\user\Downloads"
const fs = require('fs');
// current file name
const fileName = 'C:\\Users\\user\\Downloads\\oldFileName.jpg';
// new file name
const newFileName = 'C:\\Users\\user\\Downloads\\newFileName.jpg';
fs.rename(fileName, newFileName, function(err) {
if (err) throw err;
console.log('File Renamed!');
});
This script renames a file with a specific path and file name, in this case, "C:\Users\user\Downloads\oldFileName.jpg" to "C:\Users\user\Downloads\newFileName.jpg" using the "fs" module in Node.js. The "rename" function takes in the current file name, the new file name, and a callback function that will be called after the file has been renamed. If there is an error, it will throw an error. Otherwise, it will print "File Renamed!" to the console.

Categories