Save to database sequentially with NodeJS - javascript

I am trying to write a back-end in NodeJS for a newsreader app. The idea is to check a number of RSS feeds every few minutes, save new items to a database, and emit an event to connected users when a new item is added to the database.
I am having trouble writing a module which saves to the database. The desired behaviour is as follows:
Take the parsed RSS feeds, which are ordered in an array from newest to oldest, as input
For each item, starting with the newest, attempt to save it in the database
If the save was successful, log 'Save successful' to the console and attempt the next item
If the save fails, because the item already exists in the database, stop running the module.
The database model is already configured so the database does reject it when I try to save an item which already exists. However, the code below never logs the successful saves, and if I drop the database, it only saves the first document and then gives me a duplicate key error.
Any idea what I am doing wrong?
// Module
var { mongoose } = require('../db/mongoose');
var { Item } = require('../models/item');
var { scrape } = require('./scrape')
var { parse } = require('./parse')
var updateNewsDatabase = function() {
return new Promise((resolve, reject) => {
console.log('Scraping now')
scrape().then((rssFeeds) => {
var output = parse(rssFeeds);
saveEachNewsItem(output)
.catch((e) => {
console.log('Error:', e);
resolve()
})
})
});
}
async function saveEachNewsItem(newsItems) {
for (let item of newsItems) {
console.log('Attempting to save document')
var itemToSave = new Item(item);
await itemToSave.save()
.then((err, docs) => {
if (docs) {
console.log('Saved document to database')
}
if (err) {
throw new Error(err)
}
});
}
}
module.exports = { updateNewsDatabase }

In this part of your code, you are throwing exception and it makes it stop the for loop.
await itemToSave.save()
.then((err, docs) => {
if (docs) {
console.log('Saved document to database')
}
if (err) {
throw new Error(err)
}
});

Related

Reading Parquet objects in AWS S3 from node.js

I need to load and interpret Parquet files from an S3 bucket using node.js. I've already tried parquetjs-lite and other npm libraries I could find, but none of them seems to interpret date-time fields correctly. So I'm trying to AWS's own SDK instead, in the believe that is should be able to deserialize its own Parquet format correctly -- the objects were originally written from SageMaker.
The way to go about it, apparently, is to use the JS version of
https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html
but the documentation for that is horrifically out of date (it's referring to the 2006 API, https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#selectObjectContent-property). Likewise, the example they show in their blog post doesn't work either (data.Payload is neither a ReadableStream not iterable).
I've already tried the response in
Javascript - Read parquet data (with snappy compression) from AWS s3 bucket. Neither of them work: the first uses
node-parquet, which doesn't currently compile, and the second uses parquetjs-lite (which doesn't work, see above).
So my question is, how is SelectObjectContent supposed to work nowadays, i.e., using aws-sdk v3?
import { S3Client, ListBucketsCommand, GetObjectCommand,
SelectObjectContentCommand } from "#aws-sdk/client-s3";
const REGION = "us-west-2";
const s3Client = new S3Client({ region: REGION });
const params = {
Bucket: "my-bucket-name",
Key: "mykey",
ExpressionType: 'SQL',
Expression: 'SELECT created_at FROM S3Object',
InputSerialization: {
Parquet: {}
},
OutputSerialization: {
CSV: {}
}
};
const run = async () => {
try {
const data = await s3Client.send(new SelectObjectContentCommand(params));
console.log("Success", data);
const events = data.Payload;
const eventStream = data.Payload;
// Read events as they are available
eventStream.on('data', (event) => { // <--- This fails
if (event.Records) {
// event.Records.Payload is a buffer containing
// a single record, partial records, or multiple records
process.stdout.write(event.Records.Payload.toString());
} else if (event.Stats) {
console.log(`Processed ${event.Stats.Details.BytesProcessed} bytes`);
} else if (event.End) {
console.log('SelectObjectContent completed');
}
});
// Handle errors encountered during the API call
eventStream.on('error', (err) => {
switch (err.name) {
// Check against specific error codes that need custom handling
}
});
eventStream.on('end', () => {
// Finished receiving events from S3
});
} catch (err) {
console.log("Error", err);
}
};
run();
The console.log shows data.Payload as:
Payload: {
[Symbol(Symbol.asyncIterator)]: [AsyncGeneratorFunction: [Symbol.asyncIterator]]
}
what should I do with that?
I was stuck on this exact same issue for quite some time. It looks like the best option now is to append a promise() to it.
So far, I've made progress using the following (sorry, this is incomplete but should at least enable you to read data):
try {
const s3Data = await s3.selectObjectContent(params3).promise();
// using 'any' here temporarily, but will need to address type issues
const events: any = s3Data.Payload;
for await (const event of events) {
try {
if(event?.Records) {
if (event?.Records?.Payload) {
const record = decodeURIComponent(event.Records.Payload.toString().replace(/\+|\t/g, ' '));
records.push(record);
} else {
console.log('skipped event, payload: ', event?.Records?.Payload);
}
}
else if (event.Stats) {
console.log(`Processed ${event.Stats.Details.BytesProcessed} bytes`);
} else if (event.End) {
console.log('SelectObjectContent completed');
}
}
catch (err) {
if (err instanceof TypeError) {
console.log('error in events: ', err);
throw err;
}
}
}
}
catch (err) {
console.log('error fetching data: ', err);
throw err;
}
console.log("final records: ", records);
return records;
}

Manipulate MySql timestamp value in JavaScript

I'm working with a MySql database and a web application; I use Firebase Functions (Google Cloud Functions) to retrieve data from tables and send it to the web app. When the record's create_dt and update_dt fields make it to the web application, they're an object that doesn't have any accessible properties.
I'm trying to display the create and update date values in my application, but whenever I try to display one of the values, the web application displays [object Object]. Looking at the object in the console, it looks like an empty object with nothing but prototype properties
I've looked around here and other places on the Internet and found a bunch of articles that show how to manipulate a MySql Timestamp (as a time string) in JavaScript, but none that shows how to actually access the timestamp value.
My goal right now is just to display the time/date value in my app, but ultimately I want to get it as a JavaScript Date object so I can format the output the way I want in my app. Can someone please show me how to do this? I don't get why the timestamp shows up in the browser as an object with no accessible properties.
My function looks like this:
export const get = functions
.runWith({
vpcConnector: 'myapp-connector',
vpcConnectorEgressSettings: 'PRIVATE_RANGES_ONLY'
})
.https.onCall((data, context) => {
// Checking that the user is authenticated.
if (!context.auth) {
// Throwing an HttpsError so that the client gets the error details.
throw new functions.https.HttpsError(AUTHCODE, AUTHMSG);
}
const idx = data.idx;
if (idx) {
let cmd = `SELECT * FROM companies WHERE id=${idx}`;
return sqlStuff.executeQuery(cmd)
.then(result => {
functions.logger.log('Query result', result);
return { result: result };
}).catch(err => {
functions.logger.log('ERROR', err);
return { err: err };
});
} else {
functions.logger.log('Missing index');
return { result: {} };
}
});
The query code looks like this:
export async function executeQuery(cmd: string) {
const mySQLConfig = {
connectionLimit: 10,
host: functions.config().sql.prodhost,
user: functions.config().sql.produser,
password: functions.config().sql.prodpswd,
database: functions.config().sql.proddatabase,
}
var pool: any;
if (!pool) {
pool = mysql.createPool(mySQLConfig);
}
return new Promise(function (resolve, reject) {
pool.query(cmd, function (error, results) {
if (error) {
return reject(error);
}
resolve(results);
});
});
}
On the client, I'm using AngularFireFunctions since this is an Angular (Ionic) app.
getCompany(idx: number) {
const companyGet = this.fireFunc.httpsCallable('companyGet');
companyGet({ idx }).subscribe(
data => {
if (data.result && data.result.length > 0) {
this.company = Object.assign({}, data.result[0]);
} else {
this.alertController.create({
header: 'Company Lookup',
message: `The specified company record (${idx}) does not exist`,
buttons: ['OK']
}).then(alert => {
alert.present();
this.router.navigate(['/companies']);
});
}
},
err => {
this.alertController.create({
header: 'Company Refresh',
message: `The process reported the following error: ${err.message}`,
buttons: ['OK']
}).then(alert => alert.present());
},
() => {
console.log('CompanyPage: Company request completed');
}
);
});
}

Making a distinction between file not present and access denied while accessing s3 object via Javascript

I have inherited the following code. This is part of CICD pipeline. It tries to get an object called "changes" from a bucket and does something with it. If it is able to grab the object, it sends a success message back to pipeline. If it fails to grab the file for whatever reason, it sends a failure message back to codepipeline.
This "changes" file is made in previous step of the codepipeline. However, sometimes it is valid for this file NOT to exist (i.e. when there IS no change).
Currently, the following code makes no distinction if file simply does not exist OR some reason code failed to get it (access denied etc.)
Desired:
I would like to send a success message back to codepipeline if file is simply not there.
If there is access issue , then the current outcome of "failure' would still be valid.
Any help is greatly appreciated. Unfortunately I am not good enough with Javascript to have any ideas to try.
RELEVANT PARTS OF THE CODE
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
const lambda = new AWS.Lambda();
const codePipeline = new AWS.CodePipeline();
// GET THESE FROM ENV Variables
const {
API_SOURCE_S3_BUCKET: s3Bucket,
ENV: env
} = process.env;
const jobSuccess = (CodePipeline, params) => {
return new Promise((resolve, reject) => {
CodePipeline.putJobSuccessResult(params, (err, data) => {
if (err) { reject(err); }
else { resolve(data); }
});
});
};
const jobFailure = (CodePipeline, params) => {
return new Promise((resolve, reject) => {
CodePipeline.putJobFailureResult(params, (err, data) => {
if (err) { reject(err); }
else { resolve(data); }
});
});
};
// MAIN CALLER FUNCTION. STARTING POINT
exports.handler = async (event, context, callback) => {
try {
// WHAT IS IN changes file in S3
let changesFile = await getObject(s3, s3Bucket, `lambda/${version}/changes`);
let changes = changesFile.trim().split("\n");
console.log("List of Changes");
console.log(changes);
let params = { jobId };
let jobSuccessResponse = await jobSuccess(codePipeline, params);
context.succeed("Job Success");
}
catch (exception) {
let message = "Job Failure (General)";
let failureParams = {
jobId,
failureDetails: {
message: JSON.stringify(message),
type: "JobFailed",
externalExecutionId: context.invokeid
}
};
let jobFailureResponse = await jobFailure(codePipeline, failureParams);
console.log(message, exception);
context.fail(`${message}: ${exception}`);
}
};
S3 should return an error code in the exception:
The ones you care about are below:
AccessDenied - Access Denied
NoSuchKey - The specified key does not exist.
So in your catch block you should be able to validate exception.code to check if it matches one of these 2.

Adding multiple BigQuery JSON credential files in Node project

I've been working on a Node project that involves fetching some data from BigQuery. Everything has been fine so far; I have my credential.json file (from BigQuery) and the project works as expected.
However, I want to implement a new feature in the project and this would involve fetching another set of data from BigQuery. I have an entirely different credential.json file for this new dataset. My project seems to recognize only the initial credential.json file I had (I named them differently though).
Here's a snippet of how I linked my first credential.json file:
function createCredentials(){
try{
const encodedCredentials = process.env.GOOGLE_AUTH_KEY;
if (typeof encodedCredentials === 'string' && encodedCredentials.length > 0) {
const google_auth = atob(encodedCredentials);
if (!fs.existsSync('credentials.json')) {
fs.writeFile("credentials.json", google_auth, function (err, google_auth) {
if (err) console.log(err);
console.log("Successfully Written to File.");
});
}
}
}
catch (error){
logger.warn(`Ensure that the environment variable for GOOGLE_AUTH_KEY is set correctly: full errors is given here: ${error.message}`)
process.kill(process.pid, 'SIGTERM')
}
}
Is there a way to fuse my two credential.json files together? If not, how can I separately declare which credential.json file to use?
If not, how can I separately declare which credential.json file to use?
What I would do I would create a function which is the exit point to BigQuery and pass an identifier to your function which credential to generate, This credential will then be used when calling BigQuery.
The below code assume you changed this
function createCredentials(){
try{
const encodedCredentials = process.env.GOOGLE_AUTH_KEY;
To this:
function createCredentials(auth){
try{
const encodedCredentials = auth;
And you can use it like this
import BigQuery from '#google-cloud/bigquery';
import {GoogApi} from "../apiManager" //Private code to get Token from client DB
if (!global._babelPolyfill) {
var a = require("babel-polyfill")
}
describe('Check routing', async () => {
it('Test stack ', async (done, auth) => {
//Fetch client Auth from local Database
//Replace the 2 value below with real values
const tableName = "myTest";
const dataset = "myDataset";
try {
const bigquery = new BigQuery({
projectId: `myProject`,
keyFilename: this.createCredentials(auth)
});
await bigquery.createDataset(dataset)
.then(
args => {
console.log(`Create dataset, result is: ${args}`)
})
.catch(err => {
console.log(`Error in the process: ${err.message}`)
})
} catch (err) {
console.log("err", err)
}
})
})

How do I copy a node(object) in Firebase to another node?

I am having a node called Events in Firebase. It consists of child objects like: address, description, longitude, latitude. Before a user deletes an event node I want to copy it to the same database to a node called eventsDeleted.
This is the code for deleting the node:
removeEvent(eventId, groupId) {
return new Promise((resolve, reject)=> {
this.eventRef.child(groupId).child(eventId).remove();
resolve();
});
}
This is the code for creating the node:
addEvent(data:any) {
console.log('Form data', data.group);
let localEventRef = firebase.database().ref('events').child(data.group.split(',')[1]);
let storageRef = firebase.storage().ref();
let file = data.image;
let uploadTask = storageRef.child('eventImages/' + UuidGenerator.generateUUID()).put(file);
uploadTask.on('state_changed', function (snapshot) {
}, function (error) {
// Handle unsuccessful uploads
console.error(error);
}, function () {
// Handle successful uploads on complete
let downloadURL = uploadTask.snapshot.downloadURL;
let keyOfNewEvent = localEventRef.push(
new Event(
null,
firebase.app().auth().currentUser.uid,
data.description,
data.location.address,
0
)
).key;
localEventRef.child(keyOfNewEvent).update({eventId: keyOfNewEvent});
});
}
Never mind the code for uploading an image. I just need a way to copy that entire node if possible then paste it somewhere in the database. Thanks in advance.
When the user clicks delete, make sure to get the object that's being deleted, if you were to query your database for that object you can use the .once to retrieve the object otherwise you can just jump to the removeEvent function directly.
localEventRef.child(keyOfEvent).once("value", function(snapshot) {
//one data is returned, you can then call the removeEvent fn
let eventToBeRemoved = snapshot.val();
//assuming you have the eventid, groupid then.
removeEvent(eventId, groupId, eventToBeRemoved);
});
removeEvent(eventId, groupId, eventObjectToBeRemoved) {
//firebase 3.x comes with a promise method
firebase.database().ref('eventsDeleted/' + groupId + '/' + eventId )
.set({...eventObjectToBeRemoved})
.then(function () {
eventRef.child(groupId).child(eventId).remove();//you can now remove
})
.catch(function (error) {
console.error("error occured trying to add to deletedEvents", error);
});
});
}

Categories