Web scraping with Node.js - javascript

I'm trying to finish this task on web scraping. On my web page, I take the URL and find what located is between its <body> tags. Then, I want to output the content which was found on my web page. I learned that I can use request module for this purpose.
The problem is, I cannot show the result in my page's HTML, because I could not save the result of request's work (in POST part)
Here is my code:
var request = require("request");
const express = require('express');
const app = express();
const session = require('express-session');
const path = require('path');
const bodyParser = require('body-parser');
const router = express.Router();
app.use(session({secret: 'shhhhhhh', saveUninitialized: true, resave: true}));
app.use(bodyParser.urlencoded({extended: true}));
var sess;
router.get('/', (req, res) => {
res.sendFile(path.join(__dirname + '/index.html'));
sess = req.session;
if (app.get('done') === true) {
console.log(app.get('info')); // prints "undefined"
app.set('done', false);
res.end(`
<h1>Show other sites</h1>
<form action="/" method="POST">
<input type="text" name="site" id="site" placeholder="url"><br>
<button type="submit">go</button>
BACK
</form><br>
<hr>
<p>url: ${app.get('site')}</p>
<hr>
<div>
${app.get('info')}
</div>
`);
}
else
res.sendFile(path.join(__dirname + '/index.html'));
})
router.post('/', (req, res) => {
sess = req.session;
sess.site = req.body.site;
app.set('done', false);
if (sess.site) {
app.set('done', true);
request({
uri: `${sess.site}`,}, function(error, response, body) {
app.set('info', body); // Here I'm trying to save the scraped result
app.set('site', sess.site);
}
);
}
res.redirect('/');
})
router.get('/clear', (req, res) => {
req.session.destroy((err) => {
if (err)
return console.log(err);
res.redirect('/');
})
})
app.use('/', router);
app.listen(3000);
console.log("Running at port 3000");
Please help me find out what I'm doing wrong and how to save the result of Request module for later use.

I have to say that after following your logic it will be better to think again about it. keep in mind using a global variable is a bad practice!
been said that
you can solve the logic by following these minor changes
install node-fetch
npm i node-fetch
import it
const fetch = require('node-fetch');
then change the POST end point to
router.post('/', async (req, res) => {
sess = req.session;
sess.site = req.body.site;
app.set('done', false);
if (sess.site) {
app.set('done', true);
await fetch(sess.site)
.then(resp => resp.text()).then(body => {
console.log(body)
app.set('info', body); //Here I'm trying to save the scrapped result
app.set('site', sess.site);
})
}
res.redirect('/');
})

Related

Only GET '/' route is working in my MERN project, testing with POSTMAN getting 404 responses

My app.js looks like this:
const express = require('express');
const connectDB = require('./config/db');
var cors = require('cors');
const fruits = require('./routes/api/fruits');
const app = express();
connectDB();
app.use(cors({origin: true, credentails: true}));
app.use(express.json({extended: false}));
app.get('/', (req, res) => res.send('Hello World'));
app.use('/api/fruits', fruits);
const port = process.env.PORT || 8082;
app.listen(port, () => console.log(`Server running on port ${port}`));
My fruit.js in routes/api/fruits.js
const express = require('express');
const router = express.Router();
const Fruit = require('../../models/Fruit');
router.get('/test', (req, res) => {
res.send('fruit route testing!');
console.log('Route found');
});
router.get('/', (req, res) => {
Fruit.find()
.then(fruits => res.json(fruits))
.catch(err => res.status(404).json({nofruitsfound : 'No Fruit Found'}));
});
router.get('/:id', (req, res) => {
Fruit.findById(req.params.id)
.then(fruit => res.json(fruit))
.catch(err => res.status(404).json({nofruitsfound : 'No Fruit Found'}));
});
router.post('/', (req, res) => {
Fruit.create(req.body)
.then(fruit => res.json({ mgs: 'Fruit added sucessfully'}))
.catch(err => res.status(400).json({ error: 'Unable to add this fruit'}));
});
router.put(':/id', (req, res) => {
Fruit.findByIdAndUpdate(req.params.id, req.body)
.then(fruit => res.json({ mgs: 'Updated successfully'}))
.catch(err => res.status(400).json({ error: 'Unable to update the Database'}));
});
router.delete('/:id', (req, res) => {
Fruit.findByIdAndDelete(req.params.id, req.body)
.then(fruit => res.json({ mgs: 'Fruit deleted sucessfully'}))
.catch(err => res.status(404).json({ error: 'Unable to find fruit by Id and delete'}));
});
module.exports = router;
http://localhost:8082/ gets "Hello World"
http://localhost:8082/api/fruit/test gets:
<html lang="en">
<head>
<meta charset="utf-8">
<title>Error</title>
</head>
<body>
<pre>Cannot GET /test</pre>
</body>
</html>
I have also tried http://localhost:8082/api/fruits/test is working but POST for http://localhost:8082/api/fruits/ is getting {"error":"Unable to add this fruit"}
postman request for POST http://localhost:8082/api/fruits/
Based on this line app.use('/api/fruits.', fruits); you have defined a base path for the endpoints/routes in the fruits.js file. What this means is that for you to be able access the routes you have to prefix the base path to the url as follows:
http://localhost:8082/api/fruits/test - will get the /test route
http://localhost:8082/api/fruits - will get all fruits as defined in fruits.js
http://localhost:8082/api/fruits - used with a POST request you can create a new fruit.
Your base path plus host is then this: http://localhost:8082/api/fruits
Looks like you are new to Express.js so I am going to add this:
app.use(express.json({extended: false})); - This line tells express to handle request body in JSON only. So in Postman or any other HTTP client you are using, ensure the body for your POST requests are JSON.
you have neither /test or /api/test defined. /api/fruit/test would be the closest you have with the endpoints with a :id parameter.
Be aware though you have a . character where your express app is consuming your /api/fruit route. It says /api/fruit.

why my form data doesn't stored in database?

My app.js entire codes:
var express = require("express");
var app = express();
var port = 3000;
app.listen(port, () => {
console.log("Server listening on port " + port);
});
var mongoose = require("mongoose");
mongoose.Promise = global.Promise;
mongoose.connect("mongodb://localhost:27017/node-demo", { useNewUrlParser: true, useUnifiedTopology: true });
app.use("/", (req, res) => {
res.sendFile(__dirname + "/index.html");
});
var nameSchema = new mongoose.Schema({
firstName: String,
lastName: String
});
var User = mongoose.model("User", nameSchema);
app.post("/addname", (req, res) => {
});
var bodyParser = require('body-parser');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({ extended: true }));
app.post("/addname", (req, res) => {
app.post("/addname", (req, res) => {
var myData = new User(req.body.firstName, req.body.lastName);
myData.save()
.then(item => {
res.send("item saved to database");
})
.catch(err => {
res.status(400).send("unable to save to database");
});
})
});
This is my index.html file
<!DOCTYPE html>
<html>
<head>
<title>Intro to Node and MongoDB
</title>
</head>
<body>
<h1>Into to Node and MongoDB</h1>
<form method="post" action="/addname">
<label>Enter Your Name</label><br>
<input type="text" name="firstName" placeholder="Enter first name..." required>
<input type="text" name="lastName" placeholder="Enter last name..." required>
<input type="submit" value="Add Name">
</form>
</body>
<html>
After writing my code I open my mongod server than start mongo in other terminal then I run my app.js it worked properly it create server localhost:3000 then I went to my host I add my name in form and then submit, but it doesn't add to my database
Why this is not working?
My form data doesn't stored in my mongodb database. Please help me!
I think the body-parser library is deprecated on express 4 and later versions.
please use
app.use(express.json())
instead of body=parser.
not need to call any npm packages for parsing data from the form.
Here is the issue:
var myData = new User(req.body.firstName, req.body.lastName);
you have to pass an object to new User
var myData = new User( { firstName:req.body.firstName, lastName:req.body.lastName } );
i think i know what is wrong.
change app.use("/"... to app.get("/"...
bacause use handles all requests including POST. so when you send any request it sends the html file again and kills any other process.
app.get("/", (req, res) => {
res.sendFile(__dirname + "/index.html");
});
I think you are not passing data correctly in the user model.
app.post("/addname", (req, res) => {
// make sure your request object is getting the same properties
var myData = new User(req.body.firstName, req.body.lastName);
myData.save()
.then(item => {
res.send("item saved to database");
})
.catch(err => {
res.status(400).send("unable to save to database");
});
});
As you updated the code for form, lastName is the fieldName. You should make the changes in the schema object too.
Also, Please check the request headers that comes in if it is of Accept:multipart/form-data then the bodyParser won't work instead you should use multer.
Some useful body parsers that you might need to consider if needed.
form-data: multer
x-www-form-urlencoded: express.urlencoded()
raw: express.raw()
json: express.json()
text: express.text()
Updated app.js file:
var express = require("express");
var app = express();
var port = 3000;
app.listen(port, () => {
console.log("Server listening on port " + port);
});
var mongoose = require("mongoose");
mongoose.Promise = global.Promise;
mongoose.connect("mongodb://localhost:27017/node-demo", { useNewUrlParser: true, useUnifiedTopology: true });
app.use("/", (req, res) => {
res.sendFile(__dirname + "/index.html");
});
var nameSchema = new mongoose.Schema({
firstName: String,
lastName: String
});
var User = mongoose.model("User", nameSchema);
var bodyParser = require('body-parser');
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({ extended: true }));
app.post("/addname", (req, res) => {
var myData = new User(req.body.firstName, req.body.lastName);
myData.save()
.then(item => {
res.send("item saved to database");
})
.catch(err => {
res.status(400).send("unable to save to database");
});
});
Note: you have added two extra routes with same route handler, I have just removed it. Update this file in your project and run it.

How to properly set up Express and mongoDB using mongoDB Node.js driver?

I am new to web development in general. I am trying to insert and find documents in a local mongoDB database through GET and POST requests. I am doing this with a Node.js/Express web server. I've managed to get something working, but I'm not sure if I'm doing the right thing.
First, I wrote client.html to send POST requests.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Client</title>
</head>
<body>
<p>Insert something into the database:</p>
<form action="/items" method="POST">
<input type="text" name="item">
<button type="submit">Submit</button>
</form>
</body>
</html>
After reading the mongoDB documentation for Node.js, I decided to write my Node.js server as follows, putting all my routes under one connection (approach 1):
const express = require("express");
const app = express();
const port = 5000;
const MongoClient = require('mongodb').MongoClient;
const assert = require('assert');
const url = 'mongodb://localhost:27017';
const client = new MongoClient(url, { useUnifiedTopology: true });
app.use(express.urlencoded({ extended: true }));
client.connect(function(err) {
assert.equal(null, err);
console.log("Connected successfully to server");
const db = client.db("my_db");
const itemsCollection = db.collection("items");
app.get("/", function(req, res) {
res.sendFile(__dirname + "/client.html");
});
// GET request - find documents
app.get("/items", function(req, res) {
itemsCollection.find().toArray(function(err, docs) {
assert.equal(null, err);
console.log(docs);
});
});
// POST request - insert document
app.post("/items", function(req, res) {
itemsCollection.insertOne(req.body, function(err, result) {
assert.equal(null, err);
res.redirect("/");
});
});
app.listen(port, function() {
console.log(`Listening on port: ${port}`);
});
});
However, I've also seen people create a new connection for every route, which would be something like this (approach 2):
const express = require("express");
const app = express();
const port = 5000;
const MongoClient = require('mongodb').MongoClient;
const assert = require('assert');
const url = 'mongodb://localhost:27017';
const client = new MongoClient(url, { useUnifiedTopology: true });
app.use(express.urlencoded({ extended: true }));
app.get("/", function(req, res) {
res.sendFile(__dirname + "/client.html");
});
// GET request - find documents
app.get("/items", function(req, res) {
client.connect(function(err) {
assert.equal(null, err);
console.log("Connected successfully to server");
const db = client.db("my_db");
const itemsCollection = db.collection("items");
itemsCollection.find().toArray(function(err, docs) {
assert.equal(null, err);
console.log(docs);
});
});
});
// POST request - insert document
app.post("/items", function(req, res) {
client.connect(function(err) {
assert.equal(null, err);
const db = client.db("my_db");
const itemsCollection = db.collection("items");
itemsCollection.insertOne(req.body, function(err, result) {
assert.equal(null, err);
res.redirect("/");
});
});
});
app.listen(port, function() {
console.log(`Listening on port: ${port}`);
});
Both approaches successfully complete the database operations. I would like to know:
Which of these approaches (if any) are recommended, and why?
Should I be calling client.close() anywhere in my server code?
Any help is appreciated, thanks!

Using Express JS to make API external CALL (avoiding cors problem)

I am trying to make API calls using express js to get some data and then use them for my school project!
I have heard that I can install an extension or something like that on my browser but that will only work on my pc.
So I am trying to create my own proxy using Express JS.
Do I need to write something else on I app.get('/') or is it okay with a slash.
Thanks in advance!
const express = require('express');
const request = require('request');
const app = express();
app.use((req, res, next) => {
res.header('Access-Control-Allow-Origin', '*');
next();
});
let key1 = '8151a53b2c1d4c3db2df'
let url ='http://api.sl.se/api2/realtimedeparturesv4.json?key='+key1+'&siteid=9192&timewindow=5'
app.get('/', (req, res) => {
request( { url: url},
(error, response, body) => {
if (error || response.statusCode !== 200) {
return res.status(500).json({ type: 'error', message: err.message });
}
res.json(JSON.parse(body));
console.log(body);
} )
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(`listening on ${PORT}`));```
Use the cors package like this
var express = require('express')
var cors = require('cors')
var app = express()
app.use(cors())
if you want to enable it for a single route :
app.get('/', cors(), (req, res) => {
});

req.body returning undefined?

i have installed body parser through npm, required it, set it with express buti am still getting req.body undefined. If someone knows whats wrong please let me know i feel like its something stupid im missing.
This is my app.js file
const express = require('express')
const index = require('./routes/index');
const bodyParser = require('body-parser');
const cookieParser = require('cookie-parser');
//Create the express server
const app = express();
// Use index routes file
app.use('', index);
// Use the /public folder for our assets
app.use('/public', express.static('public'));
// Use body-parser and cookie-parser
app.use(bodyParser.urlencoded({extended: false}));
app.use(bodyParser.json());
app.use(cookieParser);
// Set ejs as our templating engine
app.set('view engine', 'ejs');
// Catch 404 and forward to error handler
app.use(function(req, res, next) {
const err = new Error('Not Found');
err.status = 404;
next(err);
});
// Error handler
app.use((err, req, res, next) => {
res.locals.message = err.message;
res.status(res.statusCode || 500);
res.render('error', {error: err.message});
console.log(err.message);
});
app.listen(process.env.PORT || 3000, () => {
console.log('Application running on localhost:3000');
});
This is my route index.js file that's making the post
const express = require('express');
const router = express.Router();
const fs = require('fs');
const movie_controller = require('../controllers/movieController');
const bodyParser = require('body-parser');
const request = require('request');
router.get('/about', (req, res, error) => {
res.render('about');
});
router.get('/', movie_controller.get_index);
router.get('/currently_playing', movie_controller.get_currently_playing);
router.get('/top_rated', movie_controller.get_top_rated);
router.get('/upcoming', movie_controller.get_upcoming);
router.get('/movie_view/:id', movie_controller.get_movie);
// Post request for a search query
router.post('/search', (req, res, next) => {
console.log('Query', req.body);
// Make request for query
request('https://api.themoviedb.org/3/search/movie?api_key=&language=en-
US&query=' + req.body + '&page=1&include_adult=false', (error, response,
body) => {
//handle errors
if(error){res.render('error', {error: error.message})}
//handle body
if(response.statusCode === 200){
//place body data in a variable for later reference
let movieData = JSON.parse(body);
let movies = [];
movieData.results.forEach(movie => {
movies.push(movie);
});
// Make request for genres
request('https://api.themoviedb.org/3/genre/movie/list?
api_key=&language=en-US', (error, response, body) => {
//handle errors
if(error){res.render('error', {error: error.message})}
//handle body
if(response.statusCode === 200){
//place body in a variable for later reference
let genreData = JSON.parse(body);
let genres = [];
genreData.genres.forEach(genre => {
genres.push(genre);
});
res.render('results', {movie: movies, genres: genres });
}
})
}
})
});
module.exports = router;
This is where the form is on a header.ejs partial
<form class="form-inline my-2 my-lg-0" action="/search" method="post">
<input class="form-control mr-sm-2" type="search" placeholder="Search" name="searchQuery" aria-label="Search">
<button class="btn btn-outline-success my-2 my-sm-0" type="submit">Search</button>
</form>enter code here
You should to use the bodyParser() before app.use('', index) of your router, to avoid any problem just place app.use('', index) it in the last.
// Use body-parser and cookie-parser
app.use(bodyParser.urlencoded({extended: false}));
app.use(bodyParser.json());
app.use(cookieParser);
// Use index routes file
app.use('', index);
Express middleware runs in the order you register them (routes included). You have registered the bodyParser and cookieParser middlewares after attaching your routes. Therefore, you will not have the parsed body or cookies by the time your route is encountered.
To fix this, make sure that any middlewares you want to run before your actual route are registered before:
const express = require('express')
const index = require('./routes/index');
const bodyParser = require('body-parser');
const cookieParser = require('cookie-parser');
//Create the express server
const app = express();
// Use the /public folder for our assets
app.use('/public', express.static('public'));
// Use body-parser and cookie-parser
app.use(bodyParser.urlencoded({extended: false}));
app.use(bodyParser.json());
app.use(cookieParser);
// Use index routes file
app.use('', index);
...

Categories