The original code that scrape the first page of data works but then I created a loop that clicks on a "load more" button and then scrapes the data until there is no more "load more" button. At the end of my run it is not exporting anything. Is my code for exporting to CSV incorrect? Where am I going wrong with this?
const puppeteer = require('puppeteer');
const jsonexport = require('jsonexport');
(async () => {
const browser = await puppeteer.launch({ headless: false }); // default is true
const page = await browser.newPage();
await page.goto('https://www.Website.com/exercises/finder', {
waitUntil: 'domcontentloaded',
});
//load more CSS to be targeted
const LoadMoreButton =
'#js-ex-content > #js-ex-category-body > .ExCategory-results > .ExLoadMore > .bb-flat-btn';
do {
// clicking load more button and waiting 1sec
await page.click(LoadMoreButton);
await page.waitFor(1000);
const loadMore = true;
const rowsCounts = await page.$eval(
'.ExCategory-results > .ExResult-row',
(rows) => rows.length
);
//scraping the data
const exerciseNames = [];
for (let i = 2; i < rowsCounts + 1; i++) {
const exerciseName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExHeading > a`,
(el) => el.innerText
);
exerciseNames.push(exerciseName);
}
console.log({exerciseNames});
} while (10000);
const allData = [
{
exercise: exerciseNames,
},
];
// exporting data to CSV
const options = [exercise];
//json export error part
jsonexport(allData, options, function (err, csv) {
if (err) return console.error(err);
console.log(csv);
});
await browser.close();
})().catch((e) => {
console.error(e);
});
Edit:
This is what I have at the moment for the exporting and writing to a CSV file. I'm getting 3 headers but only the exercises are being written and then nothing else. Console.log shows exercises, muscle target group, and equipments being exported though. I'm trying to get it where they are 3 headers (name, equipment, and targeted muscle) and then each row is being filled inside of it. Ex: squat, barbell, legs these would be in one row but each in their own cell.
Current export code:
const allData = [
{
exercise: exerciseNames,
muscleGroup: muscleTargets,
equipment: equipmentTypes,
},
];
var ws = fs.createWriteStream('test1.csv');
csv.write(allData, { headers: true, delimiter: ',' }).pipe(ws);
//json export error part
jsonexport(allData, function (err, csv) {
if (err) return console.error(err);
console.log(csv);
});
Edit 2
This is currently my entire code. It is outputting the allData pre-filled info but no more new data
const puppeteer = require('puppeteer');
const jsonexport = require('jsonexport');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch({ headless: false }); // default is true
const page = await browser.newPage();
await page.goto('https://www.website.com/exercises/finder', {
waitUntil: 'domcontentloaded',
});
const loadMore = true;
const rowsCounts = await page.$$eval(
'.ExCategory-results > .ExResult-row',
(rows) => rows.length
);
let allData = [];
for (let i = 2; i < rowsCounts + 1; i++) {
const exerciseName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExHeading > a`,
(el) => el.innerText
);
const muscleGroupName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExResult-muscleTargeted > a`,
(el) => el.innerHTML
);
const equipmentName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExResult-equipmentType > a`,
(el) => el.innerHTML
);
let obj = {
exercise: exerciseName,
muscleGroup: muscleGroupName,
equipment: equipmentName,
};
allData.push(obj);
}
console.log(allData);
async function fn() {
const allData = [
{
exercise: 'Rickshaw Carry',
muscleGroup: 'Forearms',
equipment: 'Other',
},
{
exercise: 'Single-Leg Press',
muscleGroup: 'Quadriceps',
equipment: 'Machine',
},
{
exercise: 'Landmine twist',
muscleGroup: 'Forearms',
equipment: 'Other',
},
{
exercise: 'Weighted pull-up',
muscleGroup: 'Forearms',
equipment: 'Other',
},
];
// json export error part
jsonexport(allData, function (err, csv) {
if (err) return console.error(err);
console.log(csv);
fs.writeFileSync('output.csv', csv);
});
}
fn();
await browser.close();
})().catch((e) => {
console.error(e);
});
I see two issues here.
I.) One of them is with the options declaration:
const options = [exercise]; // ❌
You are trying to access the exercise property of allData object without a proper notation. If you really need to extract it inside a new array you can do it by going inside the first element of the allData array using index [0], then using the dot-notation to access the exercise property.
const options = [allData[0].exercise]; // ✅
Note: I suggest to leave the options simply allData[0].exercise (without the wrapping array) as your allData object is already an array, I see no benefit of making the structure deeper.
II.) The second issue is with the usage of jsonexport npm package. I suppose you left allData accidentally in this line:
jsonexport(allData, options, function (err, csv) // ❌
You only need the options here (as according to the docs you can give only one object as the input):
jsonexport(options, function (err, csv) // ✅
Edit
Based on your updated answer your problem can be solved if you restructure a bit your allData object, so the jsonexport will recognize each column and row correctly.
const jsonexport = require('jsonexport')
const fs = require('fs')
async function fn() {
const allData = [
{
exercise: 'Rickshaw Carry',
muscleGroup: 'Forearms',
equipment: 'Other'
},
{
exercise: 'Single-Leg Press',
muscleGroup: 'Quadriceps',
equipment: 'Machine'
},
{
exercise: 'Landmine twist',
muscleGroup: 'Forearms',
equipment: 'Other'
},
{
exercise: 'Weighted pull-up',
muscleGroup: 'Forearms',
equipment: 'Other'
}
]
// json export error part
jsonexport(allData, function (err, csv) {
if (err) return console.error(err)
console.log(csv)
fs.writeFileSync('output.csv', csv)
})
}
fn()
To achieve such structure you should extend allData in each iteration like this:
let allData = []
for (let i = 2; i < rowsCounts; i++) {
const exerciseName = await page.$eval(`...row:nth-child(${i})...`,
el => el.textContent.trim())
const muscleGroupName = await page.$eval(`...row:nth-child(${i})...`,
el => el.textContent.trim())
const equipmentName = await page.$eval(`...row:nth-child(${i})...`,
el => el.textContent.trim())
let obj = {
exercise: exerciseName,
muscleGroup: muscleGroupName,
equipment: equipmentName
}
allData.push(obj)
}
console.log(allData)
Related
I have a function that receives data, I use an asynchronous promise to get a link to the document item.poster = await Promise.all(promises), then the data does not have time to be added to the array and I get an empty array. But if I remove the function where I get the link to the document, then everything works fine. In debug mode, I can see all the data fine, but why am I getting an empty array?
async FetchData({ state, commit }, to) {
try {
const q = query(collection(db, to));
await onSnapshot(q, (querySnapshot) => {
let data = [];
querySnapshot.forEach(async (doc) => {
let promises = [];
let item = {
id: doc.id,
name: doc.data().name,
slug: doc.data().slug,
country: doc.data().country,
duration: doc.data().duration,
year: doc.data().year,
video: doc.data().video,
genres: doc.data().genres,
actors: doc.data().actors,
};
if (to === "films") {
const starsRef = ref(storage, `images/${doc.id}/poster.png`);
promises.push(getDownloadURL(starsRef));
item.poster = await Promise.all(promises);
}
data.push(item);
});
commit("setData", { data, to });
});
} catch (err) {
console.error(err);
} }
forEach and async do not work nice together, forEach is not awaitable. But the solution is simple, .map and Promise.all():
// async not really needed
async FetchData({ state, commit }, to) {
const q = query(collection(db, to));
// TODO: Call the unsubscribeFn method in order to stop
// onSnapshot callback executions (when needed)
const unsubscribeFn = onSnapshot(q, (querySnapshot) => {
const allPromises = querySnapshot.docs.map(async (doc) => {
const docData = doc.data();
let item = {
id: doc.id,
name: docData.name,
slug: docData.slug,
country: docData.country,
duration: docData.duration,
year: docData.year,
video: docData.video,
genres: docData.genres,
actors: docData.actors
};
// Or you can use
// let item = { id: doc.id, ...docData };
if (to === "films") {
const starsRef = ref(storage, `images/${doc.id}/poster.png`);
item.poster = await getDownloadURL(starsRef);
}
return item;
});
Promise.all(allPromises)
.then((data) => commit("setData", { data, to }))
.catch(console.error);
});
}
i get this error whenever i try to test my code with hardhat
Error: call revert exception; VM Exception while processing transaction: reverted with reason string "ERC721: invalid token ID" [ See: https://links.ethers.org/v5-errors-CALL_EXCEPTION ] (method="tokenURI(uint256)", data="0x08c379a0000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000184552433732313a20696e76616c696420746f6b656e2049440000000000000000", errorArgs=["ERC721: invalid token ID"], errorName="Error", errorSignature="Error(string)", reason="ERC721: invalid token ID", code=CALL_EXCEPTION, version=abi/5.6.4)
here is my script-test.js code
where could the error be coming from?
and how can i solve it?
const { expect } = require("chai");
describe("NFTMarket", function (){
it("Should create and execute market sale", async function(){
const Market = await ethers.getContractFactory("NFTMarket")
const market = await Market.deploy();
await market.deployed()
const marketAddress = market.address
const NFT = await ethers.getContractFactory("NFT")
const nft = await NFT.deploy(marketAddress)
await nft.deployed()
const nftContractAddress = nft.address
let listingPrice = await market.getListingPrice()
listingPrice = listingPrice.toString()
const auctionPrice = ethers.utils.parseUnits('100', 'ether')
await nft.createToken("https://www.mytokenlocation.com")
await nft.createToken("https://www.mytokenlocation2.com")
await market.createMarketItem(nftContractAddress, 1, auctionPrice, { value:listingPrice })
await market.createMarketItem(nftContractAddress, 2, auctionPrice, { value:listingPrice })
const [_, buyerAddress] = await ethers.getSigners()
await market.connect(buyerAddress).createMarketSale(nftContractAddress, 1, {value: auctionPrice})
let items = await market.fetchMarketItems()
items = await Promise.all(items.map(async i => {
const tokenUri = await nft.tokenURI(i.tokenId)
let item = {
price: i.price.toString(),
tokenId: i.tokenId.toString(),
seller: i.seller,
owner: i.owner,
tokenUri
}
return item
}))
console.log('items: ', items)
});
});
It looks like you haven't minted the NFT yet. try minting first
So i tried following the https://discordjs.guide/additional-info/rest-api.html guide before making my own. But I can't get either to work.
Firstly with /cat it crashes and the console returns with:
SyntaxError: Unexpected end of JSON input
at JSON.parse (<anonymous>)
at getJSONResponse (BOTLOCATION\index.js:77:14)
at processTicksAndRejections (node:internal/process/task_queues:96:5)
at async Client.<anonymous> (BOTLOCATION\index.js:90:20)
And /urban works but no matter what term I enter it returns with NULL.
Here is the code, its nearly identical from the guides apart from the added SlashCommandBuilder and REST.
const { request } = require('undici');
const clientId = 'CLIENTID_HERE';
const guildId = 'GUILDID_HERE';
const { SlashCommandBuilder } = require('#discordjs/builders');
const { REST } = require('#discordjs/rest');
const { Routes } = require('discord-api-types/v9');
const commands = [
new SlashCommandBuilder().setName('cat').setDescription('Cat thing idk'),
new SlashCommandBuilder().setName('urban').setDescription('Urban Dictionary Thing'),
]
.map(command => command.toJSON());
const rest = new REST({ version: '9' }).setToken("TOKEN_HERE");
rest.put(Routes.applicationGuildCommands(clientId, guildId), { body: commands })
//rest.put(Routes.applicationGuildCommands(clientId), { body: commands })
.then(() => console.log('Successfully registered application commands.'))
.catch(console.error);
const trim = (str, max) => (str.length > max ? `${str.slice(0, max - 3)}...` : str);
async function getJSONResponse(body) {
let fullBody = '';
for await (const data of body) {
fullBody += data.toString();
}
return JSON.parse(fullBody);
}
client.on('interactionCreate', async interaction => {
if (!interaction.isCommand()) return;
const { commandName } = interaction;
await interaction.deferReply();
if (commandName === 'cat') {
const catResult = await request('https://aws.random.cat/meow');
const { file } = await getJSONResponse(catResult.body);
interaction.reply({ files: [{ attachment: file, name: 'cat.png' }] });
} else if (commandName === 'urban') {
const term = interaction.options.getString('term');
const query = new URLSearchParams({ term });
const dictResult = await request(`https://api.urbandictionary.com/v0/define?${query}`);
const { list } = await getJSONResponse(dictResult.body);
if (!list.length) {
return interaction.editReply(`No results found for **${term}**.`);
}
const [answer] = list;
const embed = new MessageEmbed()
.setColor('#EFFF00')
.setTitle(answer.word)
.setURL(answer.permalink)
.addFields(
{ name: 'Definition', value: trim(answer.definition, 1024) },
{ name: 'Example', value: trim(answer.example, 1024) },
{
name: 'Rating',
value: `${answer.thumbs_up} thumbs up. ${answer.thumbs_down} thumbs down.`,
},
);
interaction.editReply({ embeds: [embed] });
}
});
So for the cat command since there is a deferReply first we need to use editReply since deferReply counts as the first/initial reply.
await interaction.deferReply();
const catResult = await request('https://aws.random.cat/meow').catch((err) => { console.log(err); });;
const { file } = await getJSONResponse(catResult.body).catch((err) => { console.log(err); });
return await interaction.editReply({ files: [{ attachment: file, name: 'cat.png' }] });
I also added a .catch to the end of each await, this was just for testing however I recommend it.
Now with the urban command, the reason it is using null is since you don't have the string option's text. We can check for it by adding an if statement.
await interaction.deferReply();
const term = interaction.options.getString('term');
if (!term) return await interaction.editReply('Please provide a term.'); // We need to add this check to see if the user provided the term option or not.
const query = new URLSearchParams({ term });
const dictResult = await request(`https://api.urbandictionary.com/v0/define?${query}`);
const { list } = await getJSONResponse(dictResult.body);
if (!list.length) {
return interaction.editReply(`No results found for **${term}**.`);
}
const [answer] = list;
const embed = new MessageEmbed()
.setColor('#EFFF00')
.setTitle(answer.word)
.setURL(answer.permalink)
.addFields(
{ name: 'Definition', value: trim(answer.definition, 1024) },
{ name: 'Example', value: trim(answer.example, 1024) },
{
name: 'Rating',
value: `${answer.thumbs_up} thumbs up. ${answer.thumbs_down} thumbs down.`,
},
);
return await interaction.editReply({ embeds: [embed] });
IMPORTANT: When you are building your slash command you are not setting a string option. In the commands array, when creating the second slash command called urban we will add the support for the string option there. (An example using the string option, discord.js guide all command options)
This is how we can do this:
const commands = [
new SlashCommandBuilder().setName('cat')
.setDescription('Cat thing idk'),
new SlashCommandBuilder()
.setName('urban')
.setDescription('Urban Dictionary Thing')
.addStringOption((option) => option.setName('term').setDescription('term')) // We first add the string option then set the name to 'term' which is what the code calls for and then the description.
].map((command) => command.toJSON());
If you would like to make the term input required, add .setRequired(true) which will not allow the command to be ran without entering the term to search.
Once you do that you should be all good! Tested the code and it's working once that's fixed
How do we handle multiple query promises in node js and mongo using async function ?
For example I wanted to add new query which is and then handle the gallery result the way const file result being handled.
const gallery = await gfs.findManyAsync({ metadata: vehicle.VIN })
Here is my current code
async function myFunction() {
gfs.findOneAsync = promisify(gfs.findOne);
gfs.findManyAsync = promisify(gfs.files.find);
const totalCount = await Vehicle.model.count({})
const vehicles = await Vehicle.model
.find(query, {}, pagination)
.sort(sortOrd + sortType)
const totalPages = Math.ceil(totalCount / size)
const promises = vehicles.map(async vehicle => {
let ImageList = vehicle.ImageList.split(',')
let profile_name = ImageList[0].split('/').pop();
const file = await gfs.findOneAsync({
$and: [{
$or: [{
metadata: vehicle.VIN
}]
},
{
$or: [{
filename: profile_name
}]
}
]
})
const gallery = await gfs.findManyAsync({ metadata: vehicle.VIN })
// const gallery = await gfs.findManyAsync({ metadata: vehicle.VIN })
// console.log("Gallery :" , gallery)
if (file) {
return new Promise(resolve => {
const readstream = gfs.createReadStream(file.filename);
const url = `http://localhost:3002/api/vehicle/file/${file.filename}`
vehicle.FileData = url
resolve()
})
}
})
try { } catch { (err) }
await Promise.all(promises)
return res.status(200).send({
message: "success",
pageNo: pageNo,
totalRecords: vehicles.length,
data: vehicles,
totalPages: totalPages
})
}
This is the module that collections and exports async data: scraper.js
const express = require('express')
const cheerio = require('cheerio')
const request = require("tinyreq")
const fs = require('fs')
const _ = require('lodash')
const uuid = require('uuid/v4')
const async = require('async')
const mental_models = {
url: 'https://www.farnamstreetblog.com/mental-models/',
data: {}
}
const decision_making = {
url: 'https://www.farnamstreetblog.com/smart-decisions/',
data: {}
}
const cognitive_bias = {
url: 'https://betterhumans.coach.me/cognitive-bias-cheat-sheet-55a472476b18',
data: {}
}
const DATA_URLS = [mental_models, decision_making, cognitive_bias]
const filterScrape = async (source, params) => {
let filtered_data = {
topics: [],
content: [],
additional_content: []
}
let response = await scrape(source)
try {
let $ = cheerio.load(response)
params.forEach((elem) => {
let headers = ['h1', 'h2', 'h3']
if ($(elem) && headers.includes(elem)) {
let topic = {}
let content = {}
let id = uuid()
topic.id = id
topic.text = $(elem).text()
if ($(elem).closest('p')) {
content.text = $(elem).closest('p').text()
content.id = id
}
filtered_data.topics.push(topic)
filtered_data.content.push(content)
} else if ($(elem) && !headers.includes(elem)) {
let content = {}
let id = uuid()
content.text = $(elem).text()
content.id = id
filtered_data.additional_content.push(content)
} else {
}
})
}
catch (err) {
console.log(err)
}
return filtered_data
}
const scrape = (source) => {
return new Promise((resolve, reject) => {
request(source.url, function (err, body) {
if (err) {
reject(err)
return
}
resolve(body)
})
})
}
const DATA = _.map(DATA_URLS, async (source) => {
let params = ['h1', 'h2', 'h3', 'p']
let new_data = await filterScrape(source, params)
try {
source.data = new_data
}
catch (err) {
console.log(err)
}
})
module.exports = DATA
This is the module that imports the data: neural.js
const brain = require('brain')
const neural_net = new brain.NeuralNetwork()
const DATA = require('./scraper')
console.log(DATA)
Obviously not much going on, I've removed the code since the variable doesn't resolve. When logged it logs a promise but the promise does not resolve. However in the imported module, the promise is logged and then resolves. What gives? Should I import a function that resolves the data?
Of course it would be best to import that function, however it won't change the issue in your code which is here:
const DATA = _.map(DATA_URLS, async (source) => {
Lodash doesn't support async iteration - so you need to have some other method, one would be to use the newest nodejs version (10.x) and make use of async iteration - but that won't use the full power of asynchronous code.
You can also use scramjet - a framework my company is supporting. The code above would take the following form:
const {DataStream} = require("scramjet");
const DATA_URLS = [mental_models, decision_making, cognitive_bias];
module.exports = async () => DataStream.fromArray(DATA_URLS)
.setOptions({maxParallel: 2}) // if you need to limit that at all.
.map(async ({url}) => {
let params = ['h1', 'h2', 'h3', 'p']
let data = await filterScrape(source, params);
return { url, data };
})
.toArray();
The other file would take the following form:
const brain = require('brain')
const neural_net = new brain.NeuralNetwork()
const scraper = require('./scraper')
(async (){
const DATA = await scraper();
console.log(DATA); // or do whatever else you were expecting...
})();