Webscraper function returns undefined - javascript

Im making a webscraping function to make a json with the data, the webscraper part works, wierd thing is that the function returns undefined
getproduct.js
module.exports.getproduct = url => {
fetch(url)
.then(response => response.text())
.then(body => {
let product;
const $ = cheerio.load(body);
product = {
productName: $(".product-name").text()
};
console.log(product);
return product;
});
};
index.js
const {getproduct} = require('./webScraper/getproduct');
console.log(getproduct('https://www.americanas.com.br/produto/134118928'));
the console.log(product); works fine, but the console.log on the index.js prints nothing. What im missing?

Every return statement in javascript only belongs to it's closest surrounding function. You have one return statement in your code, and it belongs to a different function than you might expect:
.then(body => {
...
return product;
})
So the return statement will only return a value to that function.
You main function, getproducts, actually has no return statements in it, thus it does return undefined. Adding a return in front of your fetch solves that, but we are not yet done:
return fetch(url)
Because fetch and the .then-s that follow will not just return the value. The return a Promise. Promises are hard concepts, and something I will not be able to explain here, so I would suggest reading more about that if you are not sure about them yet :)
The main take-away is to get the value out of the promise, you have to use .then or await, more on await later, lets stay with .then first:
getproduct('https://www.americanas.com.br/produto/134118928')
.then(product => {
console.log('product:', product);
});
Now, people realized that writing all your code that does something with promises in chains of .then(...).then(...)-s would be a bit frustrating, so we (the javascript community) invented async/await. That way you can write your code like this:
module.exports.getproduct = async (url) => {
let response = await fetch(url);
let body = await response.text();
let $ = cheerio.load(body);
let product = {
productName: $(".product-name").text()
};
console.log(product);
return product;
};
Now it looks a lot nicer, and you can see that the return statement actually is in the right function again! Beware though, you still need not forget to put await before functions that would normally require a .then at the end, but it is definitely easier.
Now you index.js is a little trickier, as you can only use await in a function that is marked with async, but we can:
const {getproduct} = require('./webScraper/getproduct');
let main = async () => {
let product = await getproduct('https://www.americanas.com.br/produto/134118928');
console.log('product:', product);
}
main();
I hope it is a little clearer how you can move forward from here :)

Should add async function wrapper and await promise
// getproduct.js
module.exports.getproduct = url => {
fetch(url)
.then(response => response.text())
.then(body => {
const $ = cheerio.load(body);
const product = {
productName: $(".product-name").text()
};
console.log('getproduct.js > product', product);
return product;
});
};
//index.js updated file
const {getproduct} = require('./webScraper/getproduct');
(async () => {
const product = await getproduct('https://www.americanas.com.br/produto/134118928');
console.log('index.js > product', product);
})();

Related

Store fetch data in variable to access it later

I'm facing a probably super easy to solve problem regarding fetching.
I'd like to fetch some json datas and store it in a variable to access it later.
The problem is that I always ends up getting undefined in my variable. What's the way to do to deal with that kind of data storing ?
Here's my code.
const fetchCities = () => {
fetch('cities.json')
.then(response => response.json())
.then(data => {
return data;
});
}
let cities = fetchCities();
console.log(cities)
Already looked up for answers but couldn't find a way to do. Thanks !
You could do this very simply with async/await like this:
const fetchCities = async () => {
let cities = await fetch('cities.json');
return cities.json();
};
let cities = await fetchCities();
console.log(cities);
Sending a fetch request takes time, so the console.log works before the data arrives.
The best way to deal with fetch is using async functions and await like so:
const fetchCities = ()=>{
return fetch('cities.json');
}
async function main(){
try {
const res = await fetchCities();
const data = await res.json();
// handle the data here, this will work only after the data arrival
console.log(data);
} catch (err) {
console.log(err);
}
}
main();
Note: await can only be used in async functions, that's the main purpose of the main function.
Or if you want to use .then:
const fetchCities = ()=>{
return fetch('cities.json');
}
function main(){
fetchCities()
.then(res => res.json())
.then(data => {
// handle the data here, all you code should be here
})
.catch (err => console.log(err));
}
main();

Promise wont return valid value

I have this test I made just to check an API, but then i tryied to add an URL from a second fetch using as parameter a value obtained in the first fetch and then return a value to add in the first fecth. The idea is to add the image URL to the link. thanks in advance.
function script() {
const url = 'https://pokeapi.co/api/v2/pokemon/?offset=20&limit=20'
const result = fetch(url)
.then( (res)=>{
if(res.ok) {
return res.json()
} else {
console.log("Error!!")
}
}).then( data => {
console.log(data)
const main = document.getElementById('main');
main.innerHTML=`<p><a href='${data.next}'>Next</a></p>`;
for(let i=0; i<data.results.length;i++){
main.innerHTML=main.innerHTML+`<p><a href=${getImageURL(data.results[i].url)}>${data.results[i].name}</a></p>`;
}
})
}
async function getImageURL(imgUrl) {
const resultImg = await fetch(imgUrl)
.then( (res)=> {
return res.json()
})
.then (data => {
console.log(data.sprites.other.dream_world.front_default);
})
return resultImg.sprites.other.dream_world.front_default;
}
In general, don't mix .then/.catch handlers with async/await. There's usually no need, and it can trip you up like this.
The problem is that your fulfillment handler (the .then callback) doesn't return anything, so the promise it creates is fulfilled with undefined.
You could return data, but really just don't use .then/.catch at all:
async function getImageURL(imgUrl) {
const res = await fetch(imgUrl);
if (!res.ok) {
throw new Error(`HTTP error ${res.status}`);
}
const resultImg = await res.json();
return resultImg.sprites.other.dream_world.front_default;
}
[Note I added a check of res.ok. This is (IMHO) a footgun in the fetch API, it doesn't reject its promise on HTTP errors (like 404 or 500), only on network errors. You have to check explicitly for HTTP errors. (I wrote it up on my anemic old blog here.)]
There's also a problem where you use getImageURL:
// Incorrent
for (let i = 0; i < data.results.length; i++) {
main.innerHTML=main.innerHTML+`<p><a href=${getImageURL(data.results[i].url)}>${data.results[i].name}</a></p>`;
}
The problen here is that getImageURL, like all async functions, returns a promise. You're trying to use it as those it returned the fulfillment value you're expecting, but it can't — it doesn't have that value yet.
Instead, you need to wait for the promise(s) youre creating in that loop to be fulfilled. Since that loop is in synchronous code (not an async function), we'd go back to .then/.catch, and since we want to wait for a group of things to finish that can be done in parallel, we'd do that with Promise.all:
// ...
const main = document.getElementById('main');
const html = `<p><a href='${data.next}'>Next</a></p>`;
Promise.all(data.results.map(async ({url, name}) => {
const realUrl = await getImageURL(url);
return `<p><a href=${realUrl}>${name}</a></p>`;
}))
.then(paragraphs => {
html += paragraphs.join("");
main.innerHTML = html;
})
.catch(error => {
// ...handle/report error...
});
For one, your
.then (data => {
console.log(//...
at the end of the promise chain returns undefined. Just remove it, and if you want to console.log it, do console.log(resultImg) in the next statement/next line, after await.
This the final version that accomplish my goal. Just want to leave this just in case someone finds it usefull. Thanks for those who answer!
function script() {
const url = 'https://pokeapi.co/api/v2/pokemon/?offset=20&limit=20'
const result = fetch(url)
.then( (res)=>{
if(res.ok) {
return res.json()
} else {
console.log("Error!!")
}
}).then( data => {
console.log(data)
const main = document.getElementById('main');
main.innerHTML=`<p><a href='${data.next}'>Proxima Página</a></p>`;
Promise.all(data.results.map(async ({url, name}) => {
const realUrl = await getImageURL(url);
return `<div><a href=${realUrl}>${name}</a></div>`;
}))
.then(paragraphs => {
main.innerHTML=main.innerHTML+paragraphs;
})
.catch(error => {
console.log(error);
});
})
}
async function getImageURL(imgUrl) {
const res = await fetch(imgUrl);
if(!res.ok) {
throw new Error(`HTTP Error ${res.status}`)
}
const resultImg = await res.json();
return resultImg.sprites.other.dream_world.front_default
}

Axios console.log data but return Promise <pending>

I've trying to retrieve the data, but I can't return it, can only see it in the console,
it's a simple axios get function but for some reason, I keep getting Promise even after using async/await.
my goal is to save the data to the memory.
any help would really be appreciated
let fetchTodo = async () => {
await axios.get('https://jsonplaceholder.typicode.com/todos/1')
.then(res => console.log(res.data))
.then(res => { return res })
.catch(err => console.log(err))
};
console.log("TEST: ", fetchTodo())
console
Asycn function always returns a promise, to get data from the fetchTodo function you need to create another async function which will await the result returned by fetchTodo(). if you are using react, you can use states and update the state while you are inside the .then chain of the fetchTodo function.
Asycn function always returns a promise. For getting or saving data you need to get it from .then() function. Here you can check the example. Hope so it will help you.
let fetchTodo = async () => {
await axios.get('https://jsonplaceholder.typicode.com/todos/1')
.then(res => console.log(res.data))
.then(res => {
// here you can performance your task, save data, send
// response or anything else
return res
})
.catch(err => console.log(err))
};
fetchTodo()
The async/await syntax means a function will return a Promise.
If you want to return the value, you could do something like this:
let fetchTodo = async () => {
try {
const res = await axios.get("https://jsonplaceholder.typicode.com/todos/1");
return res;
} catch (error) {
console.log(error);
}
};
// For the folowing code to work, it must be placed inside a async function as well
const res = await fetchTodo();
console.log(`Test: ${res.data}`);
// If it's a Top level call, use the folowing code
const res = fetchTodo().then( res => {
const data = res.data;
// The rest of your code goes here.
// ...
// ...
// ...
}).catch( error => {
console.log(error);
});
Some more information about it on: How can I use async/await at the top level?

How to access date in next .then() - fetch api

I wonder if there is a way to access data from then on the next one? Code looks like this:
fetch(`http://localhost:3003/users/${userObject.id}`)
.then(res => res.json())
.then(user => {
...
})
.then(???)
and in second then I have all the needed info about the user. When I put everything in second then it works, but code is very messy and repetitive and I would like to put some things into function. Unfortunately, in that case, I don't have access to some data...
Any ideas? Thanks!
You can pass an argument to the next promise by returning it from the previous one.
fetch(`http://localhost:3003/users/${userObject.id}`)
.then(res => res.json())
.then(user => {
return user.id;
})
.then(userId => {
console.log('user id is:', userId);
});
Also, you can accomplish same result by using async programming like bellow:
async function fetchSomeData() {
var res = await fetch(`http://localhost:3003/users/${userObject.id}`);
var user = await res.json();
return user;
}
fetchSomeData().then(user => {
console.log('user id is:', user.id)
});
but code is very messy and repetitive and I would like to put some
things into function
Just use async/await syntax, it's much more understandable.
(async() => {
const request = await fetch(`http://localhost:3003/users/${userObject.id}`);
const result = await request.json();
// proceed to do whatever you want with the object....
})();
This is a self-executing function, but you can also declare this in the following way:
const myFunc = async() => {
const request = await fetch(`http://localhost:3003/users/${userObject.id}`);
const result = await request.json();
// your logic here or you can return the result object instead;
};
or even without arrow functions:
const myFunc = async function() {
const request = await fetch(`http://localhost:3003/users/${userObject.id}`);
const result = await request.json();
console.log(result.id);
};

What's the promise chaining equivalent of awaiting multiple async functions?

I'm studying the usage of promsies and async/await.
I've wrote the following code, which does the following:
It gets some database's data (using Knex.js),
Handles that data,
Assigns the handled data into a specified property.
These 3 steps are done multiple times (In the following code, it's done twice), and are always awaited:
async function run() {
return await getData();
}
async function getData() {
let handledData = {};
handledData.res1 = await knex.select('column1').from('table1').where('column1', '1')
.then(data => handleData(data))
.catch(handleError);
handledData.res2 = await knex.select('column1').from('table1').where('column1', '2')
.then(data => handleData(data, handledData))
.catch(handleError);
return handledData;
}
async function handleData(data) {
let res = [];
data.forEach(item => {
res.push(item.column1);
});
return res;
}
function handleError (error) {
console.log(error);
}
Now, I'm trying to write the promise-chaining equivalent of getData, and this is what I came up with:
async function getData() {
let handledData = {};
let promise = new Promise(function(resolve, error){ resolve(); });
promise
.then(function () {
return knex.select('column1').from('table1').where('column1', '1')
.then(data => handleData(data))
.catch(handleError);
})
.then(function(handled){
handledData.res1 = handled;
return knex.select('column1').from('table1').where('column1', '2')
.then(data => handleData(data))
.catch(handleError);
})
.then(function(handled){
handledData.res2 = handled;
return handledData;
})
.catch(handleError);
return promise;
}
But this doesn't quite work. What happens is that after the first then returns, the await inside run ends its awaiting, which causes run to return - and only then the second then is executed.
How can I make the promise-chaining version work as the multiple-await version does?
(and please, feel free to point out any misunderstaings I made of promises/async-await)
If possible, I'd recommend using Promise.all instead, it'll make your script run faster in addition to making the logic clearer:
const getData = Promise.all([
knex.select('column1').from('table1').where('column1', '1')
// Simply pass the function name as a parameter to the `.then`:
.then(handleData)
.catch(handleError),
knex.select('column1').from('table1').where('column1', '2')
.then(handleData)
.catch(handleError)
])
.then(([res1, res1]) => ({ res1, res2 }));
knex.select().then() returns a promise, so you don't need to wrap it in another promise you just need to set up the chain of then()s and return the whole thing. The result will be that getData returns the promise from the last then. You can return the value you want from that then() which will make it available to the caller. For example:
function run() {
getData()
.then(handledData => console.log(handledData) /* do something with data */)
}
function getData() {
let handledData = {};
// need to return this promise to callers can access it
return knex.select('column1').from('table1').where('column1', '1')
.then(data => handledData.res1 = handleData(data))
.then(() => knex.select('column1').from('table1').where('column1', '2'))
.then(data => {
handledData.res2 = handleData(data)
return handledData
})
.catch(handleError);
}
You could also set this up to pass the handledData object thought the chain, but you don't need to in this case.
The function handleData() is synchronous, so you don't need to make it an async function.

Categories