I have an express server with a POST endpoint that starts a crawler. When the crawler finishes it shuts down the whole server. Am I doing something wrong? How can I prevent it from happening?
The project looks something like this:
// server.js
const express = require('express')
const bodyParser = require('body-parser')
const startSearch = require('./crawler.js')
const app = express()
app.use(bodyParser.json())
app.post('/crawl', async (req, res) => {
const { foo, bar } = req.body
startSearch({ foo, bar })
res.end()
})
app.listen(PORT, () => console.log(`listening on port ${PORT}`))
// crawler.js
const Apify = require('apify')
const startSearch = ({ foo, bar }) => {
Apify.main(async () => {
const sources = [{
url: 'https://path_to_website.com',
userData: { foo, bar }
}]
const requestList = await Apify.openRequestList(null, sources)
const crawler = new Apify.PuppeteerCrawler({
requestList,
handlePageFunction: async ({ request, page }) => {
// do things using puppeteer
}
}
})
await crawler.run()
})
}
Just avoid using Apify.main(). For details, see How to use Apify on Google Cloud Functions
(I thought I'm sending the answer, but it seems it was just a comment)
Related
My app is successfully deployed on Heroku- it works when I have VSCode open and do npm run start manually, however when I close VSCode it is no longer able to successfully call any APIs on the backend and the console shows me a bunch of errors like the one in the title.
my console (ONLY happens when I close VSCode):
my code in the backend:
const PORT = 8000
const express = require('express')
const cors = require('cors')
const {TwitterApi} = require('twitter-api-v2')
const axios = require('axios')
const cheerio = require('cheerio')
require('dotenv').config()
const snoowrap = require('snoowrap')
const linkPreviewGenerator = require('link-preview-generator')
const spotify = require('spotify-web-api-node')
const fetch = require('node-fetch')
var request = require('request')
const app = express()
app.use(cors())
app.get('/', async (req, res) => {
const client = new TwitterApi(process.env.twt_bearer_token)
const trendsInternational = await client.v1.trendsByPlace(1);
const trendList = []
for (const {trends} of trendsInternational) {
for (const trend of trends) {
trendList.push({
name: trend.name,
url: trend.url
})
}
}
res.json(trendList)
})
app.get('/reddit', async (req, res) => {
const r = new snoowrap({
userAgent: process.env.user_agent,
clientId: process.env.client_id,
clientSecret: process.env.client_secret,
refreshToken: process.env.refresh_token
})
topPosts = []
;(await r.getHot()).forEach(post => {
topPosts.push({
title: post.title,
url: post.url
})
})
res.json(topPosts);
})
app.get('/news', async (req, res) => {
const news_url = 'https://www.theguardian.com/international'
axios(news_url)
.then(response => {
const html = response.data;
const $ = cheerio.load(html);
const articles = [];
const values = new Set();
$('.fc-item__title', html).each(function () { //<-- cannot be a function expression
const title = $(this).text().trim();
const url = $(this).find('a').attr('href');
if (!values.has(url)) {
values.add(url);
articles.push({
title,
url
});
}
})
res.json(articles)
}).catch(err => console.log(err))
})
app.listen(PORT, () => console.log(`Server is running on port ${PORT}`))```
Heroku runs on its own port, try setting port like this
const PORT = Number(process.env["PORT"]) || 8000
I wrote the following code for parse some part of HTML for one URL. I means parse page const URL= 'https://www.example.com/1'
Now I want to parse the next page 'https://www.example.com/2' and so on. so I want to implement a For-Loop manner here.
what is the easiest way that I can use the iteration manner here to
change URL (cover page 1,2,3, ...) automatically and run this code in repeat to parse other pages? How I can use for-loop manner here?
const PORT = 8000
const axios = require('axios')
const cheerio = require('cheerio')
const express = require('express')
const app = express()
const cors = require('cors')
app.use(cors())
const url = 'https://www.example.com/1'
app.get('/', function (req, res) {
res.json('This is my parser')
})
app.get('/results', (req, res) => {
axios(url)
.then(response => {
const html = response.data
const $ = cheerio.load(html)
const articles = []
$('.fc-item__title', html).each(function () {
const title = $(this).text()
const url = $(this).find('a').attr('href')
articles.push({
title,
url
})
})
res.json(articles)
}).catch(err => console.log(err))
})
app.listen(PORT, () => console.log(`server running on PORT ${PORT}`))
Some considerations, if you added CORS to your app, so that you can GET the data, it's useless, you add CORS when you want to SEND data, when your app is going to receive requests, CORS enable other people to use your app, it's useless then trying to use other people's app. And CORS problems happen only in the browser, as node is on the server, it will never get CORS error.
The first problem with your code, is that https://www.example.com/1, even working on the browser, returns 404 Not Found Error to axios, because this page really doesn't exist, only https://www.example.com would work.
I added an example using the comic site https://xkcd.com/ that accepts pages.
I added each axios request to an array of promises, then used Promise.all to wait for all of them:
The code is to get the image link:
const PORT = 8000;
const axios = require("axios");
const cheerio = require("cheerio");
const express = require("express");
const app = express();
const url = "https://xkcd.com/";
app.get("/", function (req, res) {
res.json("This is my parser");
});
let pagesToScrap = 50;
app.get("/results", (req, res) => {
const promisesArray = [];
for (let pageNumber = 1; pageNumber <= pagesToScrap; pageNumber++) {
let promise = new Promise((resolve, reject) => {
axios(url + pageNumber)
.then((response) => {
const $ = cheerio.load(response.data);
let result = $("#transcript").prev().html();
resolve(result);
})
.catch((err) => reject(err));
});
promisesArray.push(promise);
}
Promise.all(promisesArray)
.then((result) => res.json(result))
.catch((err) => {
res.json(err);
});
});
app.listen(PORT, () => console.log(`server running on PORT ${PORT}`));
I want to fetch some data from a public API and then modify this data. For example, I will add a Point(long, lat) in geography in Postgis. However, I need to fetch the data from a public API before I get there. I have tried this so far, but it doesn't seem like the logic makes sense.
Inserting data into the database works fine, and I have set it up correctly. However, the problems happen when I try to do it in a JSON function.
require('dotenv').config()
const express = require('express')
const fetch = require('node-fetch');
const app = express();
const db = require("./db");
app.use(express.json());
async function fetchDummyJSON(){
fetch('https://jsonplaceholder.typicode.com/todos/1')
.then(res => res.json())
.then((json) => {
await db.query("INSERT INTO logictest(userId,id,title,completed) values($1,$2,$3,$4)",[json.userId+1,json.id,json.title,json.completed])
});
}
fetchDummyJSON()
app.get('/', (req, res) => {
res.send('Hello World!')
});
const port = process.env.PORT || 3001
app.listen(port, () => {
console.log(`Example app listening on port ${port}`)
})
I keep getting SyntaxError: await is only valid in async functions and the top-level bodies of modules; however, fetchDummyData() is an async function from what I can tell. Is there a way to make more sense or make this work? I am going for a PERN stack.
This is a function as well:
.then((json) => {
await db.query("INSERT INTO logictest(userId,id,title,completed) values($1,$2,$3,$4)",[json.userId+1,json.id,json.title,json.completed])
});
try
.then(async (json) => {
await db.query("INSERT INTO logictest(userId,id,title,completed) values($1,$2,$3,$4)",[json.userId+1,json.id,json.title,json.completed])
});
I was setting a weather app website that is connected to another site using a server, and asynchronous javascript was used, but after trying to run the code, an error reading "uncaught syntax error: unexpected end of input" in the last line in the app file...I don't understand what it means and therefore I don't know how to solve it
here's my app file code
/* Global Variables */
const apiKey = "726f360f99f8ed5ce834f19b2f632fd3"
// Create a new date instance dynamically with JS
let d = new Date();
let newDate = +d.getMonth()+1+'.'+ d.getDate()+'.'+ d.getFullYear();
const gen = document.querySelector("#generate");
gen.addEventListener("click", async() =>{
const Zcode = document.querySelector("#zip").value;
const feel = document.querySelector("#feelings").value;
try {
getTemp()
.then(temp =>{
const object = {
date: newDate,
temp: temp,
}
return DealingWithServer()
})
.then(data =>{
UpdateSite(data)
})
}catch(error){
console.log(error);
}
});
async function getTemp (){
const res = await fetch (`https://api.openweathermap.org/data/2.5/weather=?zip=${zipCode}&appid=${apiKey}&units=metric`);
const data= await res.json;
const temp = data.main.temp
return temp
}
async function DealingWithServer (){
await fetch('/recieve', {
method: "POST",
credentials: "same-origin",
headers: {"Content-Type": "application/json"},
body:JSON.stringfy({
date: newDate,
temp: temp,
feel: feel
})
});
const Sres = await fetch('/get', {credentials: "same-origin"});
const Sdata = await Sres.json()
return (Sdata);
}
function UpdateSite (data)
and my server file code
// Setup empty JS object to act as endpoint for all routes
projectData = {};
const port = 3000;
// Require Express to run server and routes
const express= require("express");
const bodyParser = require("body-parser");
const cors = require("cors");
// Start up an instance of app
const app=express()
/* Middleware*/
//Here we are configuring express to use body-parser as middle-ware.
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
// Cors for cross origin allowance
app.use(cors())
// Initialize the main project folder
app.use(express.static('website'));
app.get("/get",(req, res) => {
res.send(projectData)
})
app.post("/recieve", (req, res) => {
projectData =req.body
res.status(200)
})
// Setup Server
app.listen(3000,() =>{
console.log("Server running");
})
It looks to me like you've not finished your UpdateSite function at the bottom. It should read something like this:
function UpdateSite (data) {
// Do things to update the site
}
Since there is no function definition, the input (JavaScript code in this case) has ended unexpectedly - i.e. the parser was not expecting the input to end with function UpdateSite (data)
I have two repos for the Front End and Back End portions of my project.
The Front End is a simple create-react-app project that hits my Express Back End and received responses from API calls.
I ran npm run build in my Front End project and moved that build folder to the root of my express backend repo.
However, when I try to reach the root page (i.e. localhost:3001), for some reason the response only returns the static html from index.html and doesn't actually render anything.
But if I go to something that has a path like localhost:3001/pokedex/1 then at least I see a correct response coming from the API.
I have a feeling that there is something wrong with the way I'm declaring my paths.
Here is the code on the Front End that is reaching out to the Back End:
import axios from 'axios'
const baseUrl = '/'
const getAll = () => {
const request = axios.get(baseUrl)
return request.then(response => response.data)
}
const getPkm = (id) => {
const request = axios.get(`${baseUrl}pokedex/${id}`)
return request.then(response => response.data)
}
export default { getAll, getPkm }
This is my Express Back End entry index.js:
const express = require('express')
const app = express()
const cors = require('cors')
const axios = require('axios')
//Middleware
app.use(cors())
app.use(express.json())
app.use(express.static('build'))
const unknownEndpoint = (request, response) => {
response.status(404).send({ error: 'unknown endpoint' })
}
let fullPkmList = require('./fullPkmList.json')
function ignoreFavicon(req, res, next) {
if (req.originalUrl.includes('favicon.ico')) {
res.status(204).end()
}
next();
}
app.get('/', (req, res) => {
axios.get(`https://pokeapi.co/api/v2/pokemon/?limit=100`)
.then((list) => res.json(list.data.results))
})
app.get('/pokedex/:id', (request, response) => {
const id = Number(request.params.id)
const pokemon = fullPkmList[id - 1]
if (pokemon) {
axios.all([
axios.get(`https://pokeapi.co/api/v2/pokemon/${id}`),
axios.get(`https://pokeapi.co/api/v2/pokemon-species/${id}`)
])
.then(axios.spread((pokemonResponse, speciesReponse) => {
let pkmResponse = pokemonResponse.data
let speciesResponse = speciesReponse.data
response.json({pkm: pkmResponse, species: speciesResponse })
}))
} else {
response.status(404).end()
}
})
app.use(unknownEndpoint)
const PORT = process.env.PORT || 3001
app.listen(PORT, () => {
console.log(`this is a test ${PORT}`)
})
Code for the Front End: https://github.com/rohithpalagiri/pocketdex
Code for the Back End: https://github.com/rohithpalagiri/pocketdex-backend
To see the issue, you only need to run the backend. I console log the response and in that, you will see the index.html file markup being returned. My goal is to have all of the paths relative so that the root url doesn't really matter. I think that is the part I'm getting stuck on.
I'd appreciate any help!