NPM Request not get html from thai url. (Web Scraping) - javascript

This code will not show html in console it show error
var request = require('request');
var thai_url = "http://pantip.com/tag/Honda_(มอเตอร์ไซค์)";
request(thai_url, function (error, response, html) {
if (!error && response.statusCode === 200) {
console.log(html);
} else {
console.log("Error");
}
});
But I change thai_url from มอเตอร์ไซค์ to %E0%B8%A1%E0%B8%AD%E0%B9%80%E0%B8%95%E0%B8%AD%E0%B8%A3%E0%B9%8C%E0%B9%84%E0%B8%8B%E0%B8%84%E0%B9%8C
Like This
var request = require('request');
var thai_url = "http://pantip.com/tag/Honda_(%E0%B8%A1%E0%B8%AD%E0%B9%80%E0%B8%95%E0%B8%AD%E0%B8%A3%E0%B9%8C%E0%B9%84%E0%B8%8B%E0%B8%84%E0%B9%8C)";
request(thai_url, function (error, response, html) {
if (!error && response.statusCode === 200) {
console.log(html);
} else {
console.log("Error");
}
});
The second code working right. Can I change มอเตอไซค์ to %E0%B8%A1%E0%B8...(more)? Or If it's possible, How am I doing for http://pantip.com/tag/Honda_(มอเตอร์ไซค์) working with npm request ? Thank you for any help.

First one is working fine. Just log the error instead of "error" string.
Make sure your IDE supports utf character. otherwise on saving it might be changing it. That why you might get error.

Update. Now I can solve this problem by this
var thai_url = encodeURI("http://pantip.com/tag/Honda_(มอเตอร์ไซค์)");
Use encodeURI. http://www.javascripter.net/faq/escape.htm

Related

Node js get image from web and convert it into an array of byte values

What other libraries i can use with request so I can convert it to an array of bytes?
var request = require('request');
request('https://randomuser.me/api/portraits/men/81.jpg', function (error, response, body) {
if (!error && response.statusCode == 200) {
}
})
ImageMagic, which has several 3rd party JS SDKs, would work.

Scraper not returning any values with jquery using cheerio

Trying to scrape the front page of a website (www.ozbargain.com) to return any content in the a tag that holds a reference to xbox but nothing is being returned to console. I believe the issue is with the if statement with :contains.
var fs = require('fs'),
request = require('request'),
cheerio = require('cheerio');
url = 'http://www.ozbargain.com.au';
request(url, function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
if($("a:contains('Xbox')").length) {
//console.log(this);
var el = $(this);
var log = el.text();
console.log(log);
} else {
console.log('hey');
}
}
});
The html block I'm after. In particulare, I want the a tag;
<h2 class="title" id="title214252">Free on Xbox One, Xbox 360, PS3, PS4: Tales from the Borderlands (Episode 1)</h2>
The Cheerio syntax for contains is slightly different than jQuery. Ommit the single quotes around the string you're searching for an it should work:
$("a:contains(Xbox)")
Assigned the selector to a variable then called the text method.
request(url, function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var $el = $("a:contains('Xbox')");
if ($el.length) {
console.log($el.text());
} else {
console.log('hey');
}
}
});

node.js + request => node.js + bluebird + request

I'm trying to understand how to write code with promises.
Check my code plz. This is right?
Node.js + request:
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var jsonpData = body;
var json;
try {
json = JSON.parse(jsonpData);
} catch (e) {
var startPos = jsonpData.indexOf('({');
var endPos = jsonpData.indexOf('})');
var jsonString = jsonpData.substring(startPos+1, endPos+1);
json = JSON.parse(jsonString);
}
callback(null, json);
} else {
callback(error);
}
});
Node.js + bluebird + request:
request.getAsync(url)
.spread(function(response, body) {return body;})
.then(JSON.parse)
.then(function(json){console.log(json)})
.catch(function(e){console.error(e)});
How to check response status? I should use if from first example or something more interesting?
You can simply check if the response.statusCode is not 200 in the spread handler and throw an Error from that, so that the catch handler will take care of it. You can implement it like this
var request = require('bluebird').promisifyAll(require('request'), {multiArgs: true});
request.getAsync(url).spread(function (response, body) {
if (response.statusCode != 200)
throw new Error('Unsuccessful attempt. Code: ' + response.statusCode);
return JSON.parse(body);
}).then(console.log).catch(console.error);
And if you notice, we return the parsed JSON from the spread handler, because JSON.parse is not an async function, so we don't have to do it in a separate then handler.
One way to check the status code:
.spread(function(response, body) {
if (response.statusCode !== 200) {
throw new Error('Unexpected status code');
}
return body;
})

Node.js mikeal/request module - Garbled non-utf8 website (Shift_JIS)

I am trying to access a non utf-8 website using request module. Response is garbled for this request.
var request = require('request');
request('http://www.alc.co.jp/', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body) // Print the web page.
}
});
Even after setting the encoding option to Shift_JIS I am seeing garbled Japanese text.
You need to do the conversion yourself. The example code below uses node-iconv.
var Iconv = require('iconv').Iconv;
var request = require('request');
request({
uri: 'http://www.jalan.net/',
encoding: null,
}, function (error, response, body) {
if (!error && response.statusCode == 200) {
body = new Iconv('shift_jis', 'utf-8').convert(body).toString();
console.log(body); // Print the web page.
}
});
The encoding: null parameter asks request not to convert the Buffer (a byte array) into String yet.
We pass this buffer to Iconv for converting into another Buffer of UTF-8 encoding.
Now this Buffer is good for being converted into a String.
(BTW, http://www.alc.co.jp has switched to UTF-8, so I substituted with another site.)

Node Convert Image URL to data:image using streams

I currently have an endpoint which is used to circumvent CORS that takes in any image URL and returns the contents in data-uri format. This works well, but I think it would be much more efficient to use a streaming solution where the response is built as a stream from the image request.
Here is the working, non-stream version:
app.get('/api/image/convert', function (req, res) {
// TODO: use request piping for significantly more efficient throughput
request.get(req.query.url, function (error, response, body) {
if (!error && response && response.statusCode >= 200 && response.statusCode < 300) {
var data = "data:" + response.headers["content-type"] + ";base64," + new Buffer(body).toString('base64')
res.send(data)
} else {
res.send((response && response.statusCode) || 500, error)
}
})
})
My question is would it be more efficient to pipe the request.get to the result and, if so, how would this look?
Thanks!
I've try this with google maps example, and it works
var request = require('request');
app.get('/', function(req, res){
var url = "http://maps.googleapis.com/maps/api/staticmap?center=Brooklyn+Bridge,New+York,NY&zoom=13&size=600x300&maptype=roadmap" +
"&markers=color:blue%7Clabel:S%7C40.702147,-74.015794&markers=color:green%7Clabel:G;%7C40.711614,-74.012318" +
"&markers=color:red%7Ccolor:red%7Clabel:C%7C40.718217,-73.998284&sensor=false";
request.get(url).pipe(res);
});
github:request

Categories