So I'm triyng to update some ids from a categories tree using TreeModelJS.
after editing I would like to dump the tree to a file in JSON format.
but when outputing other keys from TreeModel gets outputed as well.
How could I output edited tree as JSON (model only)?
I managed to replace other keys values with null and so far I got this:
const axios = require('axios')
const TreeModel = require('tree-model')
const fs = require('fs')
const url = 'https://my-api-uri-for-categories'
const dumpPath = `${process.cwd()}/data/test/categories.json`
const getCategories = async () => {
try {
const response = await axios.get(url)
return response.data.categories
} catch (error) {
console.log('Error reading categories', error)
}
}
const dumpJsonTofile = data => {
try {
console.log('Dumping to file')
console.log(data)
fs.writeFileSync(
dumpPath,
JSON.stringify(data, (k, v) => {
if (k === 'parent' || k === 'config' || k === 'children') return null
else return v
}),
'utf8'
) // write it back
} catch (error) {
console.log('Error dumping categories', error)
}
}
const scraping = async category => {
try {
const response = await axios.get(category.url)
const document = response.data
const json = document.match(/{"searchTerm"(.*);/g)[0]
const data = JSON.parse(json.replace(';', ''))
return data
} catch (error) {
console.log(`Error while scraping category: ${category.name}`, error)
}
}
async function run() {
const categories = await getCategories()
const categoriesTree = new TreeModel({
childrenPropertyName: 'items',
})
const root = categoriesTree.parse({ id: 0, origin: {}, items: categories })
root.walk(async node => {
const category = node.model
console.log(`scraping category: ${category.name}...`)
if (!category.url) return console.log(`skipping (root?)...`)
const data = await scraping(category)
category.id = data.categoryId
})
dumpJsonTofile(root)
}
run()
but that still outputs a Node object like this:
{
"config":null,
"model":{},
"children":null
}
I need to output all the tree showing only the model key value for each item
Try JSON.stringify(root.model).
Related
I'm using Node JS, here's the code
import fetch from 'node-fetch';
import { JSDOM } from 'jsdom';
import {Appartment} from "./models/Appartment.mjs"
let applist = []
let multipleDivs = []
async function kijAppartments() {
try {
const kijCall = await fetch(`https://www.kijiji.ca/b-ville-de-montreal/appartement-4-1-2/k0l1700281?rb=true&dc=true`);
if(!kijCall.ok) {
throw new Error (
`HTTP error: ${kijCall.status}`
)
}
const response = await kijCall.text()
const dom = new JSDOM(response)
multipleDivs = dom.window.document.querySelectorAll(".info-container")
// console.log(multipleDivs)
return multipleDivs
}
catch(error) {
console.log("Error Made")
console.log(error)
}
}
async function arrayOfApps() {
await kijAppartments()
.then(data => {
data.forEach(div => {
const newApp = new Appartment
newApp.price = div.childNodes[1].innerText
newApp.title = div.childNodes[3].innerText
newApp.description = div.childNodes[7].innerText
console.log(newApp)
})
})
}
await arrayOfApps()
If you go on this link and try the following const aList = document.querySelectorAll(".info-container"), you get access to all of the nodes, innerHTML and innerText all work and give you access to the actual value but for some reason, when I try to run this code in the terminal, the value of all my objects is undefined.
You should use textContent instead of innerText.
Here's my solution:
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
const jsdom = require('jsdom');
const { JSDOM } = jsdom;
class Appartment {
price
title
description
location
}
let multipleDivs = []
const appartments = []
function trim(text){
return text.replace(/(\r\n|\n|\r)/gm, "").trim()
}
async function fetchKijijiAppartments() {
const url = `https://www.kijiji.ca/b-ville-de-montreal/appartement-4-1-2/k0l1700281?rb=true&dc=true`
try {
const kijijiRes = await fetch(url);
if (!kijijiRes.ok) {
throw new Error(
`HTTP error: ${kijijiRes.status}`
)
}
const response = await kijijiRes.text()
// console.log("DB: ", response)
const dom = new JSDOM(response)
multipleDivs = dom.window.document.querySelectorAll(".info-container")
//console.log("DB: " multipleDivs)
return multipleDivs
} catch (error) {
console.log("Error Made")
console.log(error)
}
}
async function scrapeAppartments() {
await fetchKijijiAppartments()
.then(data => {
data.forEach(div => {
const appartement = new Appartment
appartement.price = trim(div.querySelector(".price").textContent)
appartement.title = trim(div.querySelector(".title").textContent)
appartement.description = trim(div.querySelector(".description").textContent)
console.log("DB: ", appartement)
appartments.push(appartement)
})
})
}
scrapeAppartments()
i am initializing a node js app with crucial data for the app to work from a database in index.js.
index.ts
import {getInitialData} from 'initData.ts';
export let APP_DATA: AppData;
export const initializeAppData = async () => {
try {
APP_DATA = (await getInitialData()) as AppData;
if (process.env.NODE_ENV !== 'test') {
initializeMongoose();
startServer();
}
} catch (error) {
console.log(error);
}
};
initData.ts
let dbName: string = 'initialData';
if (process.env.NODE_ENV === 'test') {
dbName = 'testDb';
}
const uri = `${process.env.MONGODB_URI}/?maxPoolSize=20&w=majority`;
export async function getInitialData() {
const client = new MongoClient(uri);
try {
await client.connect();
const database = client.db(dbName);
const configCursor = database
.collection('config')
.find({}, { projection: { _id: 0 } });
const config = await configCursor.toArray();
const aaoCursor = database
.collection('aao')
.find({}, { projection: { _id: 0 } });
const aao = await aaoCursor.toArray();
return { config, aao };
} catch {
(err: Error) => console.log(err);
} finally {
await client.close();
}
}
I'm using this array in another file and import it there.
missionCreateHandler
import { APP_DATA } from '../index';
export const addMissionResources = (
alarmKeyword: AlarmKeyword,
newMission: MissionDocument
) => {
const alarmKeywordObject = APP_DATA?.aao.find(
(el) => Object.keys(el)[0] === alarmKeyword
);
const resourceCommand = Object.values(alarmKeywordObject!);
resourceCommand.forEach((el) => {
Object.entries(el).forEach(([key, value]) => {
for (let ii = 1; ii <= value; ii++) {
newMission.resources?.push({
initialType: key,
status: 'unarranged',
});
}
});
});
};
I'm setting up a mongodb-memory-server in globalSetup.ts for Jest and copy the relevant data to the database from json-files.
globalSetup.ts
export = async function globalSetup() {
const instance = await MongoMemoryServer.create({
instance: { dbName: 'testDb' },
});
const uri = instance.getUri();
(global as any).__MONGOINSTANCE = instance;
process.env.MONGODB_URI = uri.slice(0, uri.lastIndexOf('/'));
process.env.JWT_SECRET = 'testSECRET';
const client = new MongoClient(
`${process.env.MONGODB_URI}/?maxPoolSize=20&w=majority`
);
try {
await client.connect();
const database = client.db('testDb');
database.createCollection('aao');
//#ts-ignore
await database.collection('aao').insertMany(aao['default']);
} catch (error) {
console.log(error);
} finally {
await client.close();
}
};
missionCreateHandler.test.ts
test('it adds the correct mission resources to the array', async () => {
const newMission = await Mission.create({
address: {
street: 'test',
houseNr: 23,
},
alarmKeyword: 'R1',
});
const expected = {
initialType: 'rtw',
status: 'unarranged',
};
addMissionResources('R1', newMission);
expect(newMission.resources[0].initialType).toEqual(expected.initialType);
expect(newMission.resources[0].status).toEqual(expected.status);
});
When runing the test, i get an 'TypeError: Cannot convert undefined or null to object at Function.values ()'. So it seems that the APP_DATA object is not set. I checked that the mongodb-memory-server is set up correctly and feed with the needed data.
When i hardcode the content of APP_DATA in index.ts, the test runs without problems.
So my questions are: How is the best practice to set up initial data in a node js app and where to store it (global object, simple variable and import it in the files where needed)? How can the test successfully run, or is my code just untestable?
Thank you!
I am trying to save to json the values returned from indeed api. I use indeed-scraper code from github https://github.com/rynobax/indeed-scraper
My code:
... required files ...
const parsedResults = []
indeed.query(queryOptions).then(response => {
response.forEach((res,i) => {
setTimeout(function(){
let url = res.url
let resultCount = 0
console.log(`\n Scraping of ${url} initiated...\n`)
const getWebsiteContent = async (url) => {
try {
const response = await axios.get(url)
const $ = cheerio.load(response.data)
...get scraped data...
parsedResults.push(metadata)
} catch (error) {
exportResults(parsedResults)
console.error(error)
}
}
getWebsiteContent(url)
}
, i*3000);
});
});
const outputFile = 'data.json'
const fs = require('fs');
const exportResults = (parsedResults) => {
fs.writeFile(outputFile, JSON.stringify(parsedResults, null, 4), (err) => {
if (err) {
console.log(err)
}
console.log(`\n ${parsedResults.length} Results exported successfully to ${outputFile}\n`)
})
}
parsedResults is not accessible in last portion of script, so to save as json file.
Any help appreciated!
I am trying to create a function for my custom resolver that gets all documents in a collection and returns an amended payload with new data. Below is the code that im using to get one client and amend its data:
exports = (input) => {
const clientId = input._id;
const openStatusId = new BSON.ObjectId("898999");
const mongodb = context.services.get("mongodb-atlas");
const clientRecords = mongodb.db("db-name").collection("clients");
const jobRecords = mongodb.db("db-name").collection("jobs");
let client = clientRecords.findOne({"_id": clientId});
const query = { "client_id": clientId};
let jobsForClient = jobRecords.count(query)
.then(items => {
console.log(`Successfully found ${items} documents.`)
// items.forEach(console.log)
return items
})
.catch(err => console.error(`Failed to find documents: ${err}`));
let openJobs = jobRecords.count({"client_id": clientId,"status": openStatusId})
.then(numOfDocs => {
console.log(`Found ${numOfDocs} open jobs.`)
// items.forEach(console.log)
return numOfDocs
})
.catch(err => console.error(`Failed to find documents: ${err}`));
return Promise.all([client, jobsForClient, openJobs]).then(values => {
return {...values[0], "jobs": values[1], "openJobs": values[2]}
})
};
How can i fix this function to get all clients and loop over them to add data to each client?
I understand that changing this:
let client = clientRecords.findOne({"_id": clientId});
to this
let clients = clientRecords.find();
will get all the documents from the clients collection. How would i loop over each client after that?
UPDATE:
I have updated the function to the below and it works when running it in the realm environment but gives me an error when running it as a GraphQL query.
Updated code:
exports = (input) => {
const openStatusId = new BSON.ObjectId("999999");
const mongodb = context.services.get("mongodb-atlas");
const clientRecords = mongodb.db("db-name").collection("clients");
const jobRecords = mongodb.db("db-name").collection("jobs");
const clients = clientRecords.find();
const formatted = clients.toArray().then(cs => {
return cs.map((c,i) => {
const clientId = c._id;
const query = { "client_id": clientId};
let jobsForClient = jobRecords.count(query)
.then(items => {
console.log(`Successfully found ${items} documents.`)
// items.forEach(console.log)
return items
})
.catch(err => console.error(`Failed to find documents: ${err}`));
let openJobs = jobRecords.count({"client_id": clientId,"status": openStatusId})
.then(numOfDocs => {
console.log(`Found ${numOfDocs} open jobs.`)
// items.forEach(console.log)
return numOfDocs
})
.catch(err => console.error(`Failed to find documents: ${err}`));
return Promise.all([jobsForClient, openJobs]).then(values => {
return {...c, "jobs": values[0], "openJobs": values[1]}
});
})
}).catch(err => console.error(`Failed: ${err}`));
return Promise.all([clients, formatted]).then(values => {
return values[1]
}).catch(err => console.error(`Failed to find documents: ${err}`));
};
Error in GraphQL:
"message": "pending promise returned that will never resolve/reject",
It looks like you need wait for the last promise in your function to resolve before the function returns. I would do something like this:
exports = async (input) => {
...
let values = await Promise.all([jobsForClient, openJobs]);
return {...c, "jobs": values[0], "openJobs": values[1]};
}
Managed to solve by using mongodb aggregate. Solution below:
exports = async function(input) {
const openStatusId = new BSON.ObjectId("xxxxxx");
const mongodb = context.services.get("mongodb-atlas");
const clientRecords = mongodb.db("xxxxx").collection("xxxx");
const jobRecords = mongodb.db("xxxxx").collection("xxxx");
return clientRecords.aggregate([
{
$lookup: {
from: "jobs",
localField: "_id",
foreignField: "client_id",
as: "totalJobs"
}
},
{
$addFields: {
jobs: { $size: "$totalJobs" },
openJobs: {
$size: {
$filter: {
input: "$totalJobs",
as: "job",
cond: { "$eq": ["$$job.status", openStatusId]},
}
}
},
}
}
]);
};
This is the module that collections and exports async data: scraper.js
const express = require('express')
const cheerio = require('cheerio')
const request = require("tinyreq")
const fs = require('fs')
const _ = require('lodash')
const uuid = require('uuid/v4')
const async = require('async')
const mental_models = {
url: 'https://www.farnamstreetblog.com/mental-models/',
data: {}
}
const decision_making = {
url: 'https://www.farnamstreetblog.com/smart-decisions/',
data: {}
}
const cognitive_bias = {
url: 'https://betterhumans.coach.me/cognitive-bias-cheat-sheet-55a472476b18',
data: {}
}
const DATA_URLS = [mental_models, decision_making, cognitive_bias]
const filterScrape = async (source, params) => {
let filtered_data = {
topics: [],
content: [],
additional_content: []
}
let response = await scrape(source)
try {
let $ = cheerio.load(response)
params.forEach((elem) => {
let headers = ['h1', 'h2', 'h3']
if ($(elem) && headers.includes(elem)) {
let topic = {}
let content = {}
let id = uuid()
topic.id = id
topic.text = $(elem).text()
if ($(elem).closest('p')) {
content.text = $(elem).closest('p').text()
content.id = id
}
filtered_data.topics.push(topic)
filtered_data.content.push(content)
} else if ($(elem) && !headers.includes(elem)) {
let content = {}
let id = uuid()
content.text = $(elem).text()
content.id = id
filtered_data.additional_content.push(content)
} else {
}
})
}
catch (err) {
console.log(err)
}
return filtered_data
}
const scrape = (source) => {
return new Promise((resolve, reject) => {
request(source.url, function (err, body) {
if (err) {
reject(err)
return
}
resolve(body)
})
})
}
const DATA = _.map(DATA_URLS, async (source) => {
let params = ['h1', 'h2', 'h3', 'p']
let new_data = await filterScrape(source, params)
try {
source.data = new_data
}
catch (err) {
console.log(err)
}
})
module.exports = DATA
This is the module that imports the data: neural.js
const brain = require('brain')
const neural_net = new brain.NeuralNetwork()
const DATA = require('./scraper')
console.log(DATA)
Obviously not much going on, I've removed the code since the variable doesn't resolve. When logged it logs a promise but the promise does not resolve. However in the imported module, the promise is logged and then resolves. What gives? Should I import a function that resolves the data?
Of course it would be best to import that function, however it won't change the issue in your code which is here:
const DATA = _.map(DATA_URLS, async (source) => {
Lodash doesn't support async iteration - so you need to have some other method, one would be to use the newest nodejs version (10.x) and make use of async iteration - but that won't use the full power of asynchronous code.
You can also use scramjet - a framework my company is supporting. The code above would take the following form:
const {DataStream} = require("scramjet");
const DATA_URLS = [mental_models, decision_making, cognitive_bias];
module.exports = async () => DataStream.fromArray(DATA_URLS)
.setOptions({maxParallel: 2}) // if you need to limit that at all.
.map(async ({url}) => {
let params = ['h1', 'h2', 'h3', 'p']
let data = await filterScrape(source, params);
return { url, data };
})
.toArray();
The other file would take the following form:
const brain = require('brain')
const neural_net = new brain.NeuralNetwork()
const scraper = require('./scraper')
(async (){
const DATA = await scraper();
console.log(DATA); // or do whatever else you were expecting...
})();