NodeJS multi site web scrape - javascript

I am learning NodeJS and trying to scrape a fan wikia to get names of characters and store them in a json file. I have an array of character names and I want to loop through them and scrape each character name from each url in the array. The issue I am running into is:
throw new Error('Can\'t set headers after they are sent.');
Here is my source code at the moment:
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/', function(req, res){
var bosses = ["Boss1","Boss2"];
for (boss in bosses) {
url = 'http://wikiasiteexample.com/' + bosses[boss];
request(url, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
var title;
var json = { title: "" };
$('.page-header__title').filter(function () {
var data = $(this);
title = data.text();
json.title = title;
})
}
fs.writeFile('output.json', JSON.stringify(json, null, 4), {'flag':'a'}, function(err) {
if (err) {
return console.error(err);
}
});
res.send('Check your console!')
})
}
})
app.listen('8081')
console.log('Running on port 8081');
exports = module.exports = app;

You're calling res.send() for every request you make.
Your HTTP request can only have one response, so that gives an error.
You must call res.send() exactly once.
Promises (and Promise.all()) will help you do that.

Related

Postman GET url request test for simple server API

I'm learning server-side javascript and am trying to test a GET request using postman where the server (server.js) receives a request for products.html (products.js) and returns the products JSON.
My files are packaged via npm, products.js is held in node_modules, and when I run server.js in command and then open localhost:3000 in browser, I can see that it's connecting. But, the browser returns a 404 and command shows a 400.
I feel like this is likely a syntax or file path error (or possibly I just don't know how to use postman), but I've been running myself in circles trying to fix. Anything stand out as wrong / any advice on how to correct?
//server.js
var fs = require('fs');
var http = require('http');
var url = require('url');
var product_mgr = require('product_manager'),
path=require('path');
//create server that listens on port 3000
http.createServer(function(req, res) {
var urlObj = url.parse(req.url, true, false);
var filename = urlObj.pathname;
fs.readFile(filename, function (err, data) {
// if url not returned, show error code 404
if (err) {
res.writeHead(404, {'Content-Type': 'application/json'});
return res.end("404 Not Found");
} else {
// if url returned, show success code 200
res.writeHead(200, {'Content-Type': 'application/json'});
res.write(data);
return res.end();
}});
}).listen(3000, function() {
console.log('Listening on port 3000.');
});
//products.js
//create class that represents a product
//include name, price, description and qty
class Product {
constructor(name, price, description, qty) {
this.name = name;
this.price = price;
this.description = description;
this.qty = qty;
};
};
var product_1 = new Product('Yo Yo', 2.99, 'Spinning Toy', 40);
var product_2 = new Product('Hot Wheel', 1.99, 'Tiny Toy Car', 30);
var product_3 = new Product('Glove', 23.49, 'Baseball Glove', 12);
var productArray = [product_1, product_2, product_3];
//create function called products which returns JSON array of product info
function products() {
return JSON.stringify(productArray)
};
//export products function
exports.products = products;
var fs = require('fs');
var http = require('http');
var url = require('url');
var product_mgr = require('product_manager');
http.createServer(function(req, res) {
var urlObj = url.parse(req.url, true, false);
var filename = "." + urlObj.pathname;
if (req.method == "GET" && req.url == "/products.html") {
res.writeHead(200, {
'Content-Type': 'application/json'
});
res.end(JSON.stringify({
error: null
}));
} else {
res.writeHead(404, {
'Content-Type': 'application/json'
});
res.end(JSON.stringify({
error: "Invalid Request"
}));
}
}).listen(3000, function() {
console.log('Listening on port 3000.');
});

How to query from flutter to node js

Hello i have small problem here but i don't know how to solve it
I have a data from flutter "townid" what i want is to query using node.js from flutter's post request
//code from flutter app
Future getB(townId) async{
var data;
print(townId);
final response = await http.post("http://xxx.xxx.xx.xx:3000/selectB",body:{
"townid": townId.toString()
});
data = jsonDecode(response.body);
return data;
}
//my node js that receive my flutter request
const express = require('express');
const bodyParser = require("body-parser");
const mysql = require('mysql');
const router = express.Router();
const db = require('./db');
const app = express();
app.use(bodyParser);
router.post('/selectB', function (req, res) {
var townid = req.query; // i dont know if this is the correct way to do this
db.connect(function(err) {
//Select all customers and return the result object:
db.query("SELECT `town_id` as `tid` , `brgy_name` as `brg_name` FROM barangays WHERE `town_id` = $townid ", function (err, result, fields) { // i don't know if this is the correct way to do this
if (err) throw err;
res.send(result);
});
});
})
From the node side, you'll have to fetch the townid as :
var townid = req.body.townid
//node
var townid = req.body['townid']
try this maybe this might work
since the body of post request from your flutter app is
body:{"townid": townId.toString()});
you can parse the boy using body-parser and this is what it looks like
const townid = req.body.townid

Parse request in my simple Node Js server

I'm new to Node and am trying to build a simple server in Node using Express. The requests are in the form of say /input00001/1/output00001. What I need to do is to parse this request and if the flag is 1 (middle value), I need to replace the file \home\inputfiles\input00001.txt with file \home\outputfiles\output00001.txt. How is it possible to do that?
Here is my simple server so far. I'm OK with not using the Express and pure NodeJs if that makes things easier.
const express = require('express');
const app = express();
const port = 8000;
app.get('/', (request, response) => {
response.send('Hello from Express!');
request.param
});
app.get('/*', (request, response) => {
response.send('Start!');
var url = request.originalUrl;
});
app.listen(port, (err) => {
if (err) {
return console.log('something bad happened', err);
}
console.log(`server is listening on ${port} for incoming messages`);
});
You should set up a route that expects these items as url parameters and then use those parameters to do what you want. For example if you're url is /input00001/1/output00001 then you could set up a route like this:
app.get('/:input/:flag/:output', (req, res) => {
var params = req.params
var input = params.input //input0001
var flag = params.flag // 1
var output = params.output //output0001
// now do what you need to with input, flag, and output
if(typeof flag!=='undefined' && flag==1){
var file_name_string = '\home\inputfiles\input00001.txt';
var res = file_name_string.replace("input", "output");
}
console.log(input, flag, output)
res.send("done")
})

Testing a node.js server in the same process as the test

I am wondering if there is any disadvantage to starting a server in a process and then running tests against that server in the same process.
Obviously there are some performance concerns, but if we are testing accuracy instead of performance, are there any major concerns with code like the following?
var fs = require('fs');
var path = require('path');
var http = require('http');
var supertest = require('supertest');
var assert = require('assert');
describe('#Test - handleXml()*', function() {
var self = this;
var server;
var payload = ''; // stringified XML
var xmlPath = path.resolve('test', 'test_data', 'xml_payloads', 'IVR_OnDemandErrorCode.xml');
before(function(done) {
var config = self.config = require('univ-config')(module, this.test.parent.title, 'config/test-config');
server = createServer().on('listening', function() {
done(null);
});
});
beforeEach(function(done) {
fs.readFile(xmlPath, 'utf8', function(err, content) {
assert(err == null);
payload = content;
done();
});
});
it('should accept request Content-type "text/xml or application/xml"', function(done) {
supertest(server)
.post('/event')
.set('Content-Type', 'application/xml')
.send(payload)
.expect(200, done);
});
it('should transform XML payload into JSON object', function(done) {
supertest(server)
.post('/event')
.set('Content-type', 'application/xml')
.send(payload)
.expect(200)
.end(function(err, res) {
assert(err == null,'Error is not null');
var jsonifiedXml = JSON.parse(res.text);
assert(typeof jsonifiedXml === 'object','jsonifiedXml not an object');
done();
});
});
describe('JSONified XML', function() {
it('should have proper key casing', function(done) {
supertest(server)
.post('/event')
.set('Content-type', 'application/xml')
.send(payload)
.expect(200)
.end(function(err, res) {
assert(err == null);
var payload = JSON.parse(res.text);
payload = payload.events[0].data;
assert(payload.hasOwnProperty('ppv'),'Bad value for ppv');
assert(payload.hasOwnProperty('mac'),'Bad value for mac');
assert(payload.hasOwnProperty('appName'),'Bad value for appName');
assert(payload.hasOwnProperty('divisionId'),'Bad value for divisionId');
assert(payload.hasOwnProperty('callTime'),'Bad value for callTime');
assert(payload.hasOwnProperty('callDate'),'Bad value for callDate');
assert(payload.hasOwnProperty('ivrLOB'),'Bad value for ivrLOB');
done();
});
});
});
});
function createServer(opts) {
//Note: this is a good pattern, definitely
var handleXml = require(path.resolve('lib', 'handleXml'));
var server = http.createServer(function(req, res) {
handleXml(req, res, function(err) {
res.statusCode = err ? (err.status || 500) : 200;
res.end(err ? err.message : JSON.stringify(req.body));
});
});
server.listen(5999); //TODO: which port should this be listening on? a unused port, surely
return server;
}
That's the standard way of testing a the http endpoints in a node application. But you are not going to want to have a createServer() function in each test. You will have a common function that creates a server that you can use through out your application, including to start the production server.
You right in noticing the having the server listen to a port doesn't actually do anything for you.
For this reason, it's common to have what I call an application factory that starts everything about a server, but does not listen to a port. That way I can access the server from a test or a script. The production app gets booted from a minimal index file:
var createServer = require('./AppFactory');
var server = createServer();
server.listen(5999);

Cannot save a remote image with node.js and request

I want to save an image with node.js and the request library. So far I have this simple code:
var request = require('request');
var fs = require('fs');
request('http://upload.wikimedia.org/wikipedia/commons/8/8c/JPEG_example_JPG_RIP_025.jpg', function(error, response, body)
{
// further logic that decides
// whether or not the image will be saved
fs.writeFile('downloaded.jpg', body, function(){});
});
But it doesn't work. The image always arrives corrupt. I assume it's an encoding error but I cannot figure out how to fix this.
var request = require('request'),
fs = require('fs'),
url = 'http://upload.wikimedia.org/wikipedia/commons/8/8c/JPEG_example_JPG_RIP_025.jpg';
request(url, {encoding: 'binary'}, function(error, response, body) {
fs.writeFile('downloaded.jpg', body, 'binary', function (err) {});
});
var fs = require('fs'),
request = require('request'),
url='http://upload.wikimedia.org/wikipedia/commons/8/8c/JPEG_example_JPG_RIP_025.jpg';
request(url).pipe(fs.createWriteStream('downloaded.jpg'));
Here's how I did it using stream and pipe, (I was using express but you may not need that)
var express = require('express');
var app = express();
var filesystem = require('fs');
var https = require('https');
var download = function(url, dest, cb) {
var file = filesystem.createWriteStream(dest);
var request = https.get(url, function(httpResponse) {
httpResponse.pipe(file);
file.on('finish', function() {
console.log("piping to file finished")
file.close(cb); // close() is async, call cb after close completes.
});
}).on('error', function(err) { // Handle errors
filesystem.unlink(dest); // Delete the file async. (But we don't check the result)
if (cb) cb(err.message);
});
};
app.get('/image', (req, res) => {
download('https://lastfm-img2.akamaized.net/i/u/64s/15cc734fb0e045e3baac02674d2092d6.png',
'porcupine.png',
() => {console.log("downloaded to porcupine.png")})
})
When I run using node server.js and hit the url localhost:3000/image, it will download and save the file to porcupine.png in the base directory.

Categories