Inside a web worker, how to find html attribute from string?

Inside a web worker, how to find html attribute from string? - javascript

Inside a web worker, I have an html string like:
"<div id='foo'> <img src='bar'></img> <ul id='baz'></ul> </div>"
Is there any library I can import to easily access id and src attributes of the different tags ? Is regex the only way inside a worker ?

There are two ways to solve this problem efficiently:
Regex
With the risk of getting false positives, you can use something like:
var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';
Built-in parser & messaging
If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:
Caller:
var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
if (!e.data) return;
if (e.data.method === 'getsrc') {
// Unlike document.createElement, etc, the following method does not
// load the image when the HTML is parsed
var doc = document.implementation.createHTMLDocument('');
doc.body.innerHTML = e.data.data;
var images = doc.getElementsByTagName('img');
var result = [];
for (var i=0; i<images.length; i++) {
result.push(images[i].getAttribute('src'));
}
worker.postMessage({
messageID: e.data.messageID,
result: result
});
} else if (e.data.method === 'debug') {
console.log(e.data.data);
}
});
worker.js
// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
if (callbacks[e.data.messageID]) {
callbacks[e.data.messageID](e.data.result);
}
});
function sendRequest(method, data, callback) {
var messageID = ++lastMessageID;
if (callback) callbacks[messageID] = callback;
postMessage({
method: method,
data: data,
messageID: messageID
});
}
// Example:
sendRequest('getsrc',
'<img src="foo.png">' +
"<img src='bar.png'>" +
'<textarea><img src="should.not.be.visible"></textarea>',
function(result) {
sendRequest('debug', 'Received: ' + result.join(', '));
}
);

Related

How can I get the value of a checkbox from a for loop

My receive() function parses through data from a backend server and I use that data to create a renderHTML() function which displays the parsed data as an HTML string. I get the data to display and can also attach checkboxes perfectly fine. I am trying to get the value of questionid so that when the user clicks on the checkbox, I can use Ajax to send the values of which question was selected, which can be done by questionid. I am not sure on how to get the value of the questionid, store it, and send it through Ajax.
function receive() {
var text = document.getElementById("text").value;
var data = {
'text': text
};
var xhr = new XMLHttpRequest();
xhr.onreadystatechange = function() {
if (xhr.readyState === 4) {
var ourData = xhr.responseText;
var parsedData = JSON.parse(ourData);
console.log(parsedData);
renderHTML(parsedData);
}
};
xhr.open("POST", "URL", true);
xhr.send(JSON.stringify(data));
}
var questionid;
function renderHTML(data) {
var htmlString = "";
for (i = 0; i < data.length; i++) {
htmlString += "<p><input type='checkbox' value='data[i].questionid'>" +
data[i].questionid + "." + "\n";
htmlString += '</p>';
}
response.insertAdjacentHTML('beforeend', htmlString);
var t = this;
var checkboxes = document.querySelectorAll('input[type="checkbox"]');
for (var i = 0; i < checkboxes.length; i++) {
checkboxes[i].onclick = function() {
if (this.checked) {
console.log(this.questionid.value);
}
var xhr = new XMLHttpRequest();
xhr.onreadystatechange = function() {
if (xhr.readyState === 4) {
var Data = xhr.responseText;
console.log(Data);
var parseData = JSON.parse(Data);
};
xhr.send(JSON.stringify(data));
}
}
}

There are many ways in which you can achieve what you are looking for. For that, you will need to understand one of two concepts:
bind
closures
None of them is easy to understand for beginners but will help you vastly improve your coding skills once you get them. You can read about them here and here respectively.
The problem with your code (amongst other details) is that the value of i is global, and so by the time the DOM is rendered and the user can click in one of the checkboxes, all the checkboxes have the same value of i (the last one).
bind helps you solve this by setting an argument to the function that will always remain the same.
closures help you solve this by storing the value of a variable declared in a scope that is accessible only from a function that stores a reference to that variable.
Here is some code I wrote that does what you want using a most modern syntax, which I would highly recommend.
Specially, I would recommend you to read about the fetch api; which is a much cleaner way to make http request.
This code does what you are looking for:
function receive() {
const text = document.getElementById('text').value;
const options = {
method: 'POST',
body: JSON.stringify({ text })
}
fetch("<the url here>", options)
.then( response => response.json())
.then( data => {
renderHTML(JSON.parse(data));
})
}
function renderHTML(data){
for (const x of data) {
const content = x.questionid;
// i'm asuming that the response html element already exists
response.insertAdjacentHTML('beforeend', `<p><input value="${content}" type="checkbox">${content}</p>`);
}
document.querySelectorAll('input[type="checkbox"]').forEach( input => {
input.addEventListener((e) => {
const options = {
method: 'POST',
body: JSON.stringify(e.target.value)
};
fetch('<the second url here>', options).then( response => response.json())
.then( data => {
// 'data' is the response you were looking for.
})
})
})
}

Save 1st page of a PDF file using JavaScript

I need to extract 1st page of an uploaded PDF file(in SharePoint Online) & save it as a separate PDF file using JavaScript.
After some searching I found this. But I'm not able to understand how it works.
Please help.

As requested in the comment in a previous answer I am posting sample code to just get the first page in its original format, so not as a bitmap.
This uses a third party REST service that can PDF Convert, Merge, Split, Watermark, Secure and OCR files. As it is REST based, it supports loads of languages, JavaScript being one of them.
What follows is a self-contained HTML page that does not require any additional server side logic on your part. It allows a PDF file to be uploaded, splits up the PDF into individual pages and discards them all except for the first one. There are other ways to achieve the same using this service, but this is the easiest one that came to mind.
You need to create an account to get the API key, which you then need to insert in the code.
Quite a bit of the code below deals with the UI and pushing the generated PDF to the browser. Naturally you can shorten it significantly by taking all that code out.
<!DOCTYPE html>
<html>
<head>
<title>Muhimbi API - Split action</title>
<script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js"></script>
<script type="text/javascript">
// ** Specify the API key associated with your subscription.
var api_key = '';
// ** For IE compatibility*
// ** IE does not support 'readAsBinaryString' function for the FileReader object. Create a substitute function using 'readAsArrayBuffer' function.
if (FileReader.prototype.readAsBinaryString === undefined) {
FileReader.prototype.readAsBinaryString = function (file_content) {
var binary_string = "";
var thiswindow = this;
var reader = new FileReader();
reader.onload = function (e) {
var bytes = new Uint8Array(reader.result);
var length = bytes.byteLength;
for (var i = 0; i < length; i++) {
binary_string += String.fromCharCode(bytes[i]);
}
thiswindow.content = binary_string;
$(thiswindow).trigger('onload');
}
reader.readAsArrayBuffer(file_content);
}
}
// ** For IE compatibility*
// ** Create a Blob object from the base64 encoded string.
function CreateBlob(base64string)
{
var file_bytes = atob(base64string);
var byte_numbers = new Array(file_bytes.length);
for (var i = 0; i < file_bytes.length; i++) {
byte_numbers[i] = file_bytes.charCodeAt(i);
}
var byte_array = new Uint8Array(byte_numbers);
var file_blob = new Blob([byte_array], {type: "application/pdf"});
return file_blob;
}
// ** Execute code when DOM is loaded in the browser.
$(document).ready(function ()
{
//** Make sure an api key has been entered.
if(api_key=='')
{
alert('Please update the sample code and enter the API Key that came with your subscription.');
}
// ** Attach a click event to the Convert button.
$('#btnConvert').click(function ()
{
// ** Proceed only when API Key is provided.
if(api_key=='')
return;
try
{
// ** Get the file object from the File control.
var source_file = document.getElementById('file_to_split').files[0];
//** Was a file uploaded?
if (source_file)
{
// ** Get the file name from the uploaded file.
var source_file_name = source_file.name;
var reader = new FileReader();
//** Read the file into base64 encoded string using FileReader object.
reader.onload = function(reader_event)
{
var binary_string;
if (!reader_event) {
// ** For IE.
binary_string = reader.content;
}
else {
// ** For other browsers.
binary_string = reader_event.target.result;
}
// ** Convert binary to base64 encoded string.
var source_file_content = btoa(binary_string);
if(source_file_content)
{
// ** We need to fill out the data for the conversion operation
var input_data = "{";
input_data += '"use_async_pattern": false';
input_data += ', "fail_on_error": false';
input_data += ', "split_parameter": 1';
input_data += ', "file_split_type": "ByNumberOfPages"';
input_data += ', "source_file_name": "' + source_file_name + '"'; // ** Always pass the name of the input file with the correct file extension.
input_data += ', "source_file_content": "' + source_file_content + '"'; // ** Pass the content of the uploaded file, making sure it is base64 encoded.
input_data += '}',
// ** Allow cross domain request
jQuery.support.cors = true;
// ** Make API Call.
$.ajax(
{
type: 'POST',
// ** Set the request header with API key and content type
beforeSend: function(request)
{
request.setRequestHeader("Content-Type", 'application/json');
request.setRequestHeader("api_key", api_key);
},
url: 'https://api.muhimbi.com/api/v1/operations/split_pdf',
data: input_data,
dataType: 'json',
// ** Carry out the conversion
success: function (data)
{
var result_code = "";
var result_details = "";
var processed_file_contents = "";
var base_file_name = "";
// ** Read response values.
$.each(data, function (key, value)
{
if (key == 'result_code')
{
result_code = value;
}
else if (key == 'result_details')
{
result_details = value;
}
else if (key == 'processed_file_contents')
{
processed_file_contents = value;
}
else if (key == 'base_file_name')
{
base_file_name = value;
}
});
// ** Show result code and details.
$("#spnResultCode").text(result_code);
$("#spnResultDetails").text(result_details);
if(result_code=="Success")
{
// ** Get first item in the array. This is the first page in the PDF
var processed_file_content = processed_file_contents[0];
// ** Convert to Blob.
var file_blob = CreateBlob(processed_file_content)
// ** Prompt user to save or open the converted file
if (window.navigator.msSaveBlob) {
// ** For IE.
window.navigator.msSaveOrOpenBlob(file_blob, base_file_name + "." + output_format);
}
else {
// ** For other browsers.
// ** Create temporary hyperlink to download content.
var download_link = window.document.createElement("a");
download_link.href = window.URL.createObjectURL(file_blob, { type: "application/octet-stream" });
download_link.download = base_file_name + ".pdf";
document.body.appendChild(download_link);
download_link.click();
document.body.removeChild(download_link);
}
}
},
error: function (msg, url, line)
{
console.log('error msg = ' + msg + ', url = ' + url + ', line = ' + line);
// ** Show the error
$("#spnResultCode").text("API call error.");
$("#spnResultDetails").text('error msg = ' + msg + ', url = ' + url + ', line = ' + line);
}
});
}
else
{
// ** Show the error
$("#spnResultCode").text("File read error.");
$("#spnResultDetails").text('Could not read file.');
}
};
reader.readAsBinaryString(source_file);
}
else
{
alert('Select file to convert.');
}
}
catch(err)
{
console.log(err.message);
// ** Show exception
$("#spnResultCode").text("Exception occurred.");
$("#spnResultDetails").text(err.message);
}
});
});
</script>
</head>
<body>
<div>
<form id="convert_form">
Select file: <input type="file" id="file_to_split" />
<br /><br />
<button id="btnConvert" type="button">Split PDF</button>
<br /><br />
Result_Code: <span id="spnResultCode"></span>
<br />
Result_Details: <span id="spnResultDetails"></span>
</form>
</div>
</body>
</html>
Big fat disclaimer, I worked on this service, so consider me biased. Having said that, it works well and could potentially solve your problem.

Finally found a solution.
First converting the uploaded PDF to image using PDF.JS, done some customization in the sample code.
Then saved the 1st page image as PDF using jsPDF.
The customized download code,
$("#download-image").on('click', function() {
var imgData = __CANVAS.toDataURL();
var doc = new jsPDF();
doc.addImage(imgData, 0, 0, 210, 300);
doc.save('page1.pdf');
});

JSON encode array parse correctly via Javascript

EXPLANATION
So from PHP to JavaScript encoded JSON array is sent in following:
$qa = array('question' => $question, 'a1' => $answer1, 'a2' => $answer2, 'a3' => $answer3);
echo json_encode($qa, JSON_UNESCAPED_UNICODE);
In JavaScript when I'm using like this (take a look at drawOutput):
function getSuccessOutput() {
getRequest(
't.php', // demo-only URL
drawOutput,
drawError
);
return false;
}
function drawOutput(responseText) {
var container = document.getElementById('output');
container.innerHTML = responseText;
}
function getRequest(url, success, error) {
var req = false;
try{
// most browsers
req = new XMLHttpRequest();
} catch (e){
// IE
try{
req = new ActiveXObject("Msxml2.XMLHTTP");
} catch (e) {
// try an older version
try{
req = new ActiveXObject("Microsoft.XMLHTTP");
} catch (e){
return false;
}
}
}
if (!req) return false;
if (typeof success != 'function') success = function () {};
if (typeof error!= 'function') error = function () {};
req.onreadystatechange = function(){
if(req .readyState == 4){
return req.status === 200 ?
success(req.responseText) : error(req.status)
;
}
}
req.open("GET", url, true);
req.send(null);
return req;
}
I got following output:
{"question":"This is my question?","a1":"answer1","a2":"answer2","a3":"answer3"}
DESIRED RESULTS
I need extract each value from this array and assign to variables, example:
var myQuestion = "This is my question?";
var answ1 = "answer1";
var answ2 = "answer2";
var answ3 = "answer3";
WHAT I'VE TRIED
function drawOutput(responseText) {
var container = document.getElementById('output');
container.innerHTML = responseText;
// OUTPUT: {"question":"This is my question?","a1":"answer1","a2":"answer2","a3":"answer3"}
container.innerHTML = responseText[0];
// OUTPUT: <
container.innerHTML = responseText[a1];
// OUTPUT: nothing (blank)
}
Also I've tried:
function drawOutput(responseText) {
$.getJSON("t.php", function(responseText){
var container = document.getElementById('output');
var theObject = JSON.parse(responseText);
container.innerHTML = theObject[a1];
});
}
// OUTPUT: nothing (blank)
Also I've tried to use var theObject = JSON.parse(JSON.stringify(responseText)); instead of var theObject = JSON.parse(responseText); in this case output: undefined
If I'm using in following:
function drawOutput(responseText) {
var container = document.getElementById('output');
var theObject = JSON.parse(responseText);
container.innerHTML = theObject[a1];
}
// Output: nothing (blank)
But I got an error in browser's console: SCRIPT1014: Invalid character
I've read many similar issues, but nothing helped in my case. Have you any ideas, how could I solve It?
UPDATE
That's what I'm trying now and have nothing as output (blank):
function drawOutput(responseText) {
$.getJSON("t.php", function(responseText){
var question = theObject.question; // Get the question
var a1 = theObject.a1; // Get the answers
var a2 = theObject.a2;
var a3 = theObject.a3;
var container = document.getElementById('output');
container.innerHTML = a1;
alert(a1);
});
}

Parsing it is correct. If you get the text via XMLHttpRequest, you'll need to use JSON.parse on responseText. If you use jQuery's $.getJSON, it will do it for you and give you the parsed result.
The problem is how you're trying to access the values from the result. In theObject[a1], you're trying to use a variable called a1, whose value will then be used as the name of the property to get. Instead, use theObject.a1 or theObject["a1"].
So here, where you're using getJSON and so it's already been parsed for you:
function drawOutput() {
$.getJSON("t.php", function(theObject){ // Note: Not `responseText`, a parsed object
var question = theObject.question; // Get the question
var a1 = theObject.a1; // Get the answers
var a2 = theObject.a2;
var a3 = theObject.a3;
// ...use them...
});
}
Side note: Instead of separate a1,a2, anda3` properties on the object you're returning, you might consider using an array.
$qa = array(
'question' => $question,
'answers' => array($answer1, $answer2, $answer3)
);
echo json_encode($qa);
then
function drawOutput() {
$.getJSON("t.php", function(theObject){
var question = theObject.question; // Get the question
var answers = theObject.answers; // Get the answers
// ...use question, and `answers[0]` through `answers[2]`, where
// you can get the number of answers from `answers.length`...
});
}

You can access object values with one of these method:
var responseText = '{"question":"This is my question?","a1":"answer1","a2":"answer2","a3":"answer3"} ';
var theObject = JSON.parse(responseText);
console.log(theObject['a1']); // a1 in string
console.log(theObject.a1); // Gives the same result

In PHP you need to json_encode your response :
$ServerResponse= json_encode($MyJSON);
In Javascript you need to JSON.parse the response you got from PHP :
var myJSONObject = JSON.parse(ServerResponse);
After that , you have a normal Object in your hands , and that is myJsonObject. You can access its properties either with the use of "dot" (Eg. myJsonObject.PROPERTY_NAME) or with brackets (Eg myJsonObject["PROPERTY_NAME"]).

node.js + cheerio scrape: Passing an array of urls to download?

Firstly, here is my code as I've progressed so far:
var http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
var cheerio = require("cheerio");
var url = "http://www.bloglovin.com/en/blogs/1/2/all";
var myArray = [];
var a = 0;
var getLinks = function(){download(url, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$(".content").each(function(i, e) {
var blogName = $(e).find(".blog-name").text();
var followLink = $(e).find("a").attr("href");
var blogSite = $(e).find(".description").text();
myArray[a] = [a];
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
a++;
console.log(myArray);
});
}
});
}
getLinks();
As you can see, followLinks is concatenated to followUrl, of which I'd like to pass through the 'url' download, so effectively I'll be scraping each of the pages using the same CSS rules, which will be added to the multidimensional array for the corresponding blogger.
How can I go about this?

I do something similar in one of my scraping jobs, but I use the async.js library to accomplish. Note that I'm also using the request module and cheerio.js in my scraping. I fetch and scrape rows of data from a single webpage, but suspect you could do something similar to fetch URLs and request / scrape them in the same manner.
I also admit this is quite basic coding, certainly could be optimized with a bit of refactoring. Hope it gives you some ideas at least...
First, I use request to fetch the page and call my parse function -
var url = 'http://www.target-website.com';
function(lastCallback) {
request(url, function(err, resp, body) {
if(!err) { parsePage(err, resp, body, lastCallback); }
else { console.log('web request error:' + resp.statusCode); }
}
}
Next, in my parsePage function, I load the website into Cheerio, fetch the HTML of each data row into an array, push my parseRow function and each HTML segment into another array, and use async.parallel to process each iteration -
var rows = [];
function parsePage(err, resp, body, callback1) {
var $ = cheerio.load(body);
$('div#targetTable tr').each(function(i, elem) {
rows.push($(this).html());
});
var scrRows = [];
rows.forEach(function(row) {
scrRows.push(function(callback2) {
parseRow(err, resp, row);
callback2();
});
async.parallel(scrRows, function() {
callback1();
});
}

Inside your loop, just create an object with the properties you scrape then push that object onto your array.
var blogInfo = {
blogName: blogName,
followLink: "http://www.bloglovin.com"+followLink;
blogSite: blogSite
};
myArray.push(blogInfo);

You have defined a = 0; So
myArray[a] = [a]; // => myArray[0] = [0]; myArray[0] becomes an array with 0 as only member in it
All these statements throw an error since Array can have only integer as keys.
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
Instead try this:
var obj = {
index: a,
blogName: blogName,
followLink: "http://www.bloglovin.com" + followLink,
blogSite: blogSite
}
myArray.push(obj);
console.log(myArray);

How to pass parameters in url callback function in javascript

I have a javascript file from a wordpress plugin that I'm trying to modify. It seems to be getting away with cross-domain requests with some sort of loophole. It's this function:
function urlCallback(url){
var req = url;
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
The url passed is something like 'http://url.com/page?callback=namespace.myFunction' where myFunction is a function defined elsewhere in the script.
From what I understand, this is inserting a source url at the top of my page, causing the browser to load that page. The callback attached to that url is then called, passing the result as a single parameter to the example function, myFunction.
My problem is that I need to call myFunction for a few different unique urls, but the results are tied to the calling url. myFunction seems to be called whenever the page finishes loading, so I can't simply keep count of which data set it's operating on. I need some way for myFunction to either be passed an additional variable encoded in the callback url, or for myFunction to know the url it was attached to.
Can anyone help me?
EDIT:
To elaborate further, here is a simplified version of the code I have:
var parseUrls = (function(){
function urlCallback(url){
var req = url;
// Here is where I need to save the url
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
return {
options: {},
parseNextUrl: function(result) {
if (!result || !result.data) { return; }
var data = result.data;
// Here is where I need the url
},
parseUrl: function(result) {
if (!result || !result.data) { return; }
var data = result.data;
for (var i = 0; i < data.length; i++) {
urlCallback( data[i].url + "/new_url/page?callback=parseUrls.parseNextUrl" );
}
},
showResult: function(options){
urlCallback( "http://start.url.com/page?callback=parseUrls.parseUrl" );
this.options = options;
}
};
})();
Just to be clear, parseNextUrl is called whenever the source request is finished. Which means all the urlCallback calls have already finished by then.

Here's the updated code based on the newly provided code.
var parseUrls = (function(){
function urlCallback(url){
// Create request
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
return {
parsers: [], //response handler array
options: {},
parseUrl: function(result) {
//parseUrls.parseUrl.url = request URL
if (!result || !result.data) { return; }
var data = result.data;
// Create requests
for (var i = 0; i < data.length; i++) {
// Create new response handler
var parseNextUrl = function(result) {
// parseNextUrl.url = request URL
if (!result || !result.data) { return; }
var data = result.data;
// Check the URL
console.log('Result URL = ' + parseNextUrl.url);
};
// Make callback names and URLs for each handler
var cbName = "parseUrls.parsers[" + this.parsers.length + "]";
var req = data[i].url + "/new_url/page?callback=" + encodeURI(cbName);
// Save the URL in the handler
parseNextUrl.url = req;
// Put handler into storage.
// Note: Don't delete/insert any of parsers array element
// until no more new requests and all responses are received.
this.parsers.push(parseNextUrl);
urlCallback(req);
}
},
showResult: function(options){
this.parseUrl.url = "http://start.url.com/page?callback=parseUrls.parseUrl";
urlCallback(this.parseUrl.url);
this.options = options;
}
};
})();

We Keep Coding

JavaScript is the programming language of the Web.

Inside a web worker, how to find html attribute from string? - javascript

Inside a web worker, I have an html string like: "<div id='foo'> <img src='bar'></img> <ul id='baz'></ul> </div>" Is there any library I can import to easily access id and src attributes of the different tags ? Is regex the only way inside a worker ?

Related

How can I get the value of a checkbox from a for loop

Save 1st page of a PDF file using JavaScript

JSON encode array parse correctly via Javascript

node.js + cheerio scrape: Passing an array of urls to download?

How to pass parameters in url callback function in javascript

Categories

Resources