I found this website http://www.mess.be/inickgenwuname.php
It allows you to type in a name and it will generate a random rapper name. I wanted to have a button on my website that just generates the name for you so I decided to write some javascript that will send a request to this website and parse the response to get the random name.
Here is the node.js code I wrote.
function getRandomName() {
var http = require('http');
var data = {
realname:"something"
};
var querystring = require("querystring");
var qs = querystring.stringify(data);
var qslength = qs.length;
var options = {
hostname: "www.mess.be",
path: "/inickgenwuname.php",
method: 'POST',
headers:{
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': qslength
}
};
var str = "";
var req = http.request(options, function(res) {
res.on('data', function (chunk) {
str+=chunk;
});
res.on('end', function() {
var s = str.slice(str.indexOf("From this day forward, I will be known as... ") + "From this day forward, I will be known as... ".length,
str.indexOf("-And you"));
s = s.replace("\n", "").trim();
console.log(s);
});
});
req.write(qs);
req.end();
}
When I went to the website and pressed f12 on chrome and inspected the code, I found this little segment...
So this is what I used to formulate the request to the php. However, I only guessed through trial and error that the data that needed to be send was key-value pair object where the key is realname. My question is, how would I have known this otherwise? Is there no way to find out from the website, where the data being send with the POST is being received from?
Why by guessing? The form tells you everything that needs to be sent.
Also you could press F12 -> Network, and then send the request. After this you look at the sent requests and search for a POST request. When you click on the /inickgenwuname.php request you get more information about it. In there you can see Response Headers / Request Headers and as a last category "Form Data". There you can see all the data that is sent with this form.
I hope this is the answer you were looking for.
Stealing bandwidth without proper compensation (so called web-scraping) is quite commonly frowned upon. I couldn't find anything on that site that allows for it although I did not search thoroughly.
Why don't you roll your own? It's very simple, as can be seen in this Q&D hack:
function wu_names(input){
// some nice, fitting adjectives. Add more
var adjectives = ["annoying", "crazy", "expert", "insane", "lucky", "sardonic", "pestering"];
// some nice, fitting nouns. Add more
var nouns = ["assassin", "bastard", "conjurer", "destroyer", "ninja", "prophet", "wizard"];
var first = "";
var second = "";
var hash = 0;
var primitive_hash = function(s){
var h = 0;
for(var i = 0;i < s.length;i++){
var c = s.charCodeAt(i);
// standard hash = hash * 31 + c
h = ((h << 5) - h>>>0) + c;
}
return h;
};
first = input.split(" ")[0];
// no useful entry at all
if(first === undefined){
return null;
}
hash = primitive_hash(first);
first = adjectives[hash % adjectives.length];
second = input.split(" ")[1];
// no second entry
if(second === undefined){
return null;
}
hash = primitive_hash(second);
second = nouns[hash % nouns.length];
return first + " " + second;
}
The lists of adjectives and nouns is quite short, you might add to them, as the comments suggest.
Related
I am receiving a push from a websocket (Java server/servlet) every 5 seconds. This works fine, the json are transmitted and parsed and delivered as planned. However, there are two problems.
I need to clear the between every push or else the result just grows with the same (json) dataset over and over again.
I need to stop the handleMessage()-function duplication data.
This is the javascript code:
function handleMessage(message) {
document.getElementById('delegate_number').empty();
document.getElementById('delegate_name').empty();
document.getElementById('reservation_type').empty();
var delegates = JSON.parse(message);
for (var i=0; i<delegates.length; i++) {
const nodeDelegateNumber = document.getElementById("delegate_number");
const nodeDelegateName = document.getElementById("delegate_name");
const nodeReservationType = document.getElementById("reservation_type");
const cloneDelegateNumber = nodeDelegateNumber.cloneNode(true);
const cloneDelegateName = nodeDelegateName.cloneNode(true);
const cloneReservationType = nodeReservationType.cloneNode(true);
document.body.appendChild(cloneDelegateNumber);
document.body.appendChild(cloneDelegateName);
document.body.appendChild(cloneReservationType);
document.getElementById("delegate_number").innerHTML = delegates[i].delegate_number;
document.getElementById("delegate_name").innerHTML = delegates[i].name_last + ", " + delegates[i].name_first;
document.getElementById("reservation_type").innerHTML = delegates[i].reservation_type;
}
}
If my english is bad I apologize. It is not my way of being lazy.
Best regards.
Why output is different in following:
1st scenario: prints
https://appmagic.io/modern/1
https://appmagic.io/modern/1
let urlHash = {};
const rootURL = 'http://tinyurl.com/';
var encode = function(longUrl) {
let hash = Date.now().toString(36);
urlHash[hash] = longUrl;
return `${rootURL}${hash}`
};
var decode = function(shortUrl) {
return urlHash[shortUrl.substring(rootURL.length)]
};
let url1 = encode("https://appmagic.io/classic/1");
let url2 = encode("https://appmagic.io/modern/1");
console.log(decode(url1));
console.log(decode(url2));
2nd scenario: prints
https://appmagic.io/classic/1
https://appmagic.io/modern/1
let urlHash = {};
const rootURL = 'http://tinyurl.com/';
var encode = function(longUrl) {
let hash = Date.now().toString(36);
console.log({hash}); // difference in code
console.log({hash}); // difference in code
urlHash[hash] = longUrl;
return `${rootURL}${hash}`
};
var decode = function(shortUrl) {
return urlHash[shortUrl.substring(rootURL.length)]
};
let url1 = encode("https://appmagic.io/classic/1");
let url2 = encode("https://appmagic.io/modern/1");
console.log(decode(url1));
console.log(decode(url2));
My guess is:
Since Date.now() gives values in milisecond, without console (IO operation i.e. time consuming sync operations) they get evaluate in nano-second and hash remains same, So the similar output in 1st scenario
But if we are adding console (IO operation i.e. time consuming sync operations) it delay operation for more than millisecond and different output comes in 2nd scenario.
I'm not sure if my perception is correct. Can any-one provide better/correct explanation.
If my guess is right, how can I create collision free fast hash,
thinking to use window.performance.now() but it is also not available in all browsers
While the comments address your main concern that it's possible to generate the same hash due to the encode function running more than once in the same millisecond, I'd like to leave this here as an example of addressing that by deferring the hashing until a unique key is generated:
function Coder(rootUrl) {
const urlHash = {}
return {
encode(longUrl) {
let hash
do {
hash = Date.now().toString(36)
} while (urlHash[hash])
urlHash[hash] = longUrl
return `${rootUrl}${hash}`
},
decode(shortUrl) {
return urlHash[shortUrl.substring(rootUrl.length)]
}
}
}
// usage example
const { decode, encode } = Coder('http://tinyurl.com/')
const url1 = encode('https://appmagic.io/classic/1')
const url2 = encode('https://appmagic.io/modern/1')
console.log('url1 encoded', url1)
console.log('url2 encoded', url2)
console.log('url1 decoded', decode(url1))
console.log('url2 decoded', decode(url2))
With this you'd only be able to generate one hash per millisecond, but I suppose that isn't a bad trade-off.
I've been working with the Microsoft Bot Framework to create a bot that can interface between MS Teams and AWS. I've been trying to write some JS functions but have been unsuccessful in getting them to operate how I want them to.
Here is what I am currently working on and am stuck on:
I am creating a 'ping' like functionality so a bot user can ping an instance in AWS and receive its status whether its running and has passed the system checks or not. My code is currently able to take the user request for the ping, retrieve the information from AWS, and can even print that info to the console. However, when I am trying to retrieve that information back out of the object that I set it to and print it to MS Teams, it says my variable is undefined.
Some code snippets are below:
class aws_Link {
constructor (mT, ping_1, i_state, i_status) {
this.myTag = mT;
this.ping = ping_1;
this.instance_state = i_state; // I declare this here, but should I?
this.instance_status = i_status; // I declare this here, but should I?
}
//i_state and i_status are just passed NULL when the object is initialized
//so they would be holding some value, not sure if I have to do this
api_link () {
var mainLink = API_LINK_TAKEN_OUT_FOR_OBVIOUS_REASONS;
var myTagFill = "myTag=";
var ampersand = "&";
var pingFill = "ping=";
var completeLink = String(mainLink + myTagFill + this.myTag + ampersand + pingFill + this.ping);
var finalLink = completeLink;
finalLink = finalLink.split(' ').join('');
//set up API-key authenticication
var options = {
url: finalLink,
headers: {
'x-api-key': 'AWS-PRIVATE-TOKEN'
}
};
if(this.ping == "TRUE") { // if the user wants to use /ping
var res = request(options, function(error, response, body) {
console.log("PING REQUEST"); //debug
body = JSON.parse(body);
var h_state = body['instanceState'];
var h_status = body['instanceStatus'];
this.instance_state = h_state;
this.instance_status = h_status;
console.log("STATE: " + h_state); //debug
console.log("STATUS: " + h_status); //debug
});
}
}
pingFunction () {
var tmp = "Instance State: " + this.instance_state + " Instance Status: " + this.instance_status;
return tmp;
}
}
And here is where I call the api_link() function and pingFunction():
var apiLink1 = new aws_Link("MY_TAG_VALUE", "TRUE", "NULL", "NULL");
var completeAPILink = apiLink1.api_link();
session.send('Request complete.');
session.send("PING: " + apiLink1.pingFunction());
So essentially the user enters in some info which gets passed to where I create the "new aws_Link" which then a my understanding is, creates an object called apiLink1. From there, it makes the request to AWS in my api_link() function, which retrieves the info I want. I thought I was then saving this info when I do the: this.instance_state = h_state; & this.instance_status = h_status;. So then when I call pingFunction() again on apiLink1, I thought I would be able to retrieve the information back out using this.instance_state and this.instance_status, but all it prints out is undefined. Any clarification on why my current code isn't working and any changes or improvements I can make would be greatly appreciated.
Thanks!
I have a problem, I know what information I want to scrape of a website and I also know where the information is at. I know in what class it's in and also the xpath.
The problem I'm having is that no matter what I try, it seems like I can't scrape the content.
This is my scrape function:
function scrape(doc, url) {
var itemType = detectWeb(doc, doc.location.href);
var keywords = new Array();
var keywordText = doc.evaluate('//div[span="Index Terms:"]/div', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
if (keywordText) keywords = (Zotero.Utilities.trimInternal(keywordText.textContent.toLowerCase())).split(",");
var attachments = new Array();
var notes = new Array();
attachments.push({
document: doc,
mimeType: "text/html",
title: "IEEE Computer Snapshot"
});
var htmls = doc.evaluate('//img[#src="/plugins/images/digitalLibrary/dl_html_icon.gif"]/ancestor::a', doc, null, XPathResult.ANY_TYPE, null);
var htmlDoc;
//TESTING
//var affiliation = doc.getElementsByTagName('meta')[property='citation_author_institution'].content;
//var affiliations = [];
var abstracts;
if (htmlDoc = htmls.iterateNext()) {
//var urlField = htmlDoc.attributes.getNamedItem("onclick").value;
var urlField = htmlDoc.href;
urlField = urlField.substr(urlField.indexOf('"') + 1);
urlField = urlField.substr(0, urlField.indexOf('"'));
if (urlField.indexOf("?") > -1) {
urlField += '&' + templte;
} else {
urlField += '?' + templte;
}
urlField = "http://www2.computer.org" + urlField;
var mimeTypeField = "text/html";
var titleField = "IEEE Computer Full Text Snapshot";
var attachment = {
url: urlField,
mimeType: mimeTypeField,
title: titleField
};
attachments.push(attachment);
}
var pdfurl = ZU.xpathText(doc, '//div[#class="abs-pdf"]/a/#href')
if (pdfurl) {
var mimeTypeField = "application/pdf";
var titleField = "IEEE Computer Full Text PDF";
var attachment = {
url: pdfurl,
mimeType: mimeTypeField,
title: titleField
};
attachments.push(attachment);
} else {
notes.push({
note: "Complete PDF document was either not available or accessible. Please make sure you're logged in to the digital library to retrieve the complete PDF document."
});
}
var bibtex = doc.evaluate('//div[#id="bibText-content"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
var bibtexlink = ZU.xpathText(doc, '//li/a[contains(text(), "BibTex") and contains(#href, ".bib")]/#href')
if (bibtex) {
bibtex = bibtex.textContent;
//bibtex = bibtex.substring(bibtex.indexOf("document.write('")+16,bibtex.indexOf("');Popup.document.close();"));
//workaround as bibtex translator obviously needs a whitespace following the first curly brace
bibtex = Zotero.Utilities.cleanTags(bibtex);
bibtex = Zotero.Utilities.trimInternal(bibtex);
var translator = Zotero.loadTranslator("import");
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
translator.setString(bibtex);
translator.setHandler("itemDone", function(obj, item) {
if (item.url) { // add http to url
item.url = "http://" + item.url;
}
if (itemType) item.itemType = itemType;
item.attachments = attachments;
if (keywords) item.tags = keywords;
if (notes) item.notes = notes;
if (item.DOI) item.DOI = item.DOI.replace(/^.*?10\./, "10.");
//Affiliations
/*if (affiliation)
{
for (i=0; i<affiliations.length; i++)
{
affiliation.push(affiliations[i].textContent)
}
item.extra = affiliation.join("; ");
}*/
if (abstracts) {
item.abstractNote = abstracts;
}
item.complete();
});
translator.translate();
} else if (bibtexlink) {
ZU.doGet(bibtexlink, function(text) {
var translator = Zotero.loadTranslator("import");
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
translator.setString(text);
translator.setHandler("itemDone", function(obj, item) {
if (item.url) { // add http to url
item.url = "http://" + item.url;
}
if (itemType) item.itemType = itemType;
item.attachments = attachments;
if (keywords) item.tags = keywords;
if (notes) item.notes = notes;
if (item.DOI) item.DOI = item.DOI.replace(/^.*?10\./, "10.");
//Affiliations
/*if (affiliation)
{
for (i=0; i<affiliations.length; i++)
{
affiliation.push(affiliations[i].textContent)
}
item.extra = affiliation.join("; ");
}*/
//Abstract
if (abstracts) {
item.abstractNote = abstracts;
}
item.complete();
});
translator.translate();
})
} else {
throw "No BibTeX found!";
}
}
It's the variable called abstracts that I wanna fill with the abstract from this website.
ieee article
I used Firebug to locate where this information was stored and found it in the class="article" in the div="tabs-main".
It looks something like this:
<div id="tabs-main">
<!-- place holder -->
<div class="tab-content" id="articleDetails" role="main" data-section="articleDetails.ajax"
>
<div class="article-blk">
<div class="article">
(I want this)--> <p>Distributed database systems (DDBS) have received considerable attention in recent years. Being a relatively young research field, there are still many problems associated with DDB systems that need solution. Concurrency control is one of these problems and, probably, the most extensively studied. However, most of the work has concentrated on the development of alternative solutions and the field seems to be ready for some comparative analysis work. This paper reports the results of a performance evaluation study on distributed database concurrency control algorithms. The research has resulted in the development of a formalism, based on Petri nets, for modeling and analysis purposes. The formalism, called the Extended Place/Transition Nets (EPTN), is both descriptively powerful in that it can be used to model various algorithms precisely and succinctly and to communicate them in a clear manner, while at the same time lending itself to be used as a performance evaluation tool. An EPTN simulator is implemented and various algorithms are studied using this tool. This paper describes both the formalism and the performance results that have been obtained.</p>
</div>
And in Firebug I also get the XPath which is:
/html/body/div[2]/div[8]/div/div[2]/div/div[2]/div[1]/div/div[1]
But I don't know how I can get this content. I have tried with
var abstracts = doc.querySelector(".article").innerHTML;
I have tried with doc.getElementByClassName().
But I can never get the content, var is always null.
Someone out there have an idea?
I am currently, trying to National Library of Australia's API to find pictures on a specific search term. Trove API I have the following functions which should send a query from an input form to the api and receive images back, however I am not receiving the majority of the images. In a particular example, if is search for 'sydney' I am only receiving 3 images back when there is in fact way more. For instance, this is the json, that is returned. I know that you will not be familiar with this api, but in my code below, is there anything that you can see, that would be causing it not to return all the images? I have changed a few things around to try and find the problem as well as put a few console.log statements but it is still not being kind to me.
var availableImages = {
"nla": {
"numImages":0,
"url_pattern":"nla.gov.au",
"images":[]
},
};
var url_patterns = ["nla.gov.au"];
$(document).ready(function(){
$("form#searchTrove").submit();
$("form#searchTrove").submit(function() {
resetImageData();
//get input values
var searchTerm = $("#searchTerm").val().trim();
searchTerm = searchTerm.replace(/ /g,"%20");
var sortBy = $("#sortBy").val();
//create searh query
var url = "http://api.trove.nla.gov.au/result?key="
+ apiKey + "&l-availability=y%2Ff&encoding=json&zone=picture"
+ "&sortby=relevance&n=100&q=" + searchTerm + "&callback=?";
//print JSON object
console.log(url);
//get the JSON information we need to display the images
$.getJSON(url, function(data) {
$('#output').empty();
$.each(data.response.zone[0].records.work, processImages);
//console.log(data);
printImages();
});
});
});
function processImages(index, troveItem){
console.log("av"+ availableImages);
for(var i in availableImages){
//console.log(availableImages[i].url_pattern)
if(troveItem.identifier[0].value.indexOf(availableImages[i].url_pattern) >= 0){
console.log("Trove URL "+troveItem.identifier[0].value+" Pattern: "+availableImages[i]["url_pattern"]);
availableImages[i].numImages++;
availableImages.totalimages++;
availableImages[i]["images"].push(troveItem.identifier[0].value);
}
}
}
function printImages(){
$("#output").append("<h3>Image Search Results</h3>");
for(var i in availableImages){
if(availableImages[i]["url_pattern"]=="nla.gov.au" && availableImages[i]["numImages"]>0){
printNLAImages();
console.log(availableImages);
}
}
}
function printNLAImages(){
$("#output").append("<h3>National Library of Australia</h3><p>"
+availableImages["nla"]["numImages"]+" images found from <a href='http://"
+availableImages["nla"]["url_pattern"]+"'>"
+availableImages["nla"]["url_pattern"]+"</a></p>");
for (var i in availableImages["nla"]["images"]){
$("#output").append("<img src='"+availableImages["nla"]["images"][i]+"-v'>");
}
console.log(availableImages);
}
function resetImageData(){
availableImages.totalimages = 0;
for (var i in availableImages){
availableImages[i].numImages = 0;
availableImages[i]["images"] = [];
}
console.log(availableImages); //displaying hee
}