page.open breaks when called multiple times [duplicate] - javascript

My goal is open many pages(with a short delay) and save my data to a file.
But my code does not work.
var gamesList = [url1,url2,url3];
//gamesList is getting from a file
var urls = [];
var useragent = [];
useragent.push('Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14');
useragent.push('Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50');
var page = require('webpage').create();
page.settings.userAgent = useragent[Math.floor(Math.random() * useragent.length)];
console.log('Loading a web page');
function handle_page(url){
page.open(url,function(){
//...
var html= page.evaluate(function(){
// ...do stuff...
page.injectJs('jquery.min.js');
return $('body').html();
});
//save to file
var file = fs.open('new_test.txt', "w");
file.write(html + '\n');
file.close();
console.log(html);
setTimeout(next_page,1000);
});
}
function next_page(urls){
var url=urls.shift();
if(!urls){
phantom.exit(0);
}
handle_page(url);
}
next_page(urls);
phantom.exit();
Does it matter where I am writing phantom.exit();? If I am writing it in the page.open() callback in the end then the 1st page opens well.

Your idea of opening multiple pages with recursion is correct, but you have some problems.
Exit
As you correctly noted, you have a problem with phantom.exit(). Since page.open() and setTimeout() are asynchronous, you only need to exit when you are done. When you call phantom.exit() at the end of the script, you're exiting before the first page is even loaded.
Simply remove that last phantom.exit(), because you already have another exit at the correct place.
Page context
page.evaluate() provides access to the DOM context (page context). The problem is that it is sandboxed. Inside of that callback you have no access to variables defined outside. You can explicitly pass variables in, but they have to be primitive objects which page is not. You simply have to access to page inside of page.evaluate(). You need to inject jQuery before calling page.evaluate().
Files
You're overwriting the file in every iteration by not changing the file name. Either you need to change the filename or use the appending mode 'a' instead of 'w'.
Then you don't need to open a stream when you simply want to write once. Change:
var file = fs.open('new_test.txt', "w");
file.write(html + '\n');
file.close();
to
fs.write('new_test.txt', html + '\n', 'a');
Recursive step
The recursive step with calling the next_page() function requires that you pass in the urls. Since urls is already a global variable and you change it in each iteration, you don't need to pass in the urls.
You also don't need to add a setTimeout(), because everything before inside of the page.open() callback was synchronous.
Fixed Script
//...
var urls = [/*....*/];
function handle_page(url){
page.open(url, function(){
//...
page.injectJs('jquery.min.js');
var html = page.evaluate(function(){
// ...do stuff...
return $('body').html();
});
//save to file
fs.write('new_test.txt', html + '\n', 'a');
console.log(html);
next_page();
});
}
function next_page(){
var url = urls.shift();
if(!url){
phantom.exit(0);
}
handle_page(url);
}
next_page();

Explanation above is very helpful to me. So, thanx a lot. Come to the point, sometimes js function renders even after the page has already loaded, in this scenario setTimeout() method, is very helpful. And,I faced problem like this when scraping many site....I used setTimeout() method in this way,
`
function handle_page(url){page.open(url, function() {setTimeout(function() {var html_text=page.evaluate(function(){
var is= document.querySelectorAll("li.bookinDetails_c1_180616")[0].textContent;
var isbn=is.trim();
//return s1;
var w,x,y,z,z1,w1,u,a;
a= document.querySelectorAll("li.selectableBook");
if(a.length==5){
w1=document.querySelectorAll("span.bookPrice")[0].textContent;
w=w1.trim();
x1=document.querySelectorAll("span.bookPrice")[1].textContent;
x=x1.trim();
y1=document.querySelectorAll("span.bookPrice")[2].textContent;
y=y1.trim();
z1=document.querySelectorAll("span.bookPrice")[3].textContent;
z=z1.trim();
u=isbn+"=>["+"RENT USED:-"+w+","+"RENT NEW:-"+x+","+"BUY USED:-"+y+","+"BUY NEW:-"+z+"]";
return u;
}else{
y1=document.querySelectorAll("span.bookPrice")[0].textContent;
y=y1.trim();
z1=document.querySelectorAll("span.bookPrice")[1].textContent;
z=z1.trim();
u=isbn+"=>["+"BUY USED:-"+y+","+"BUY NEW:-"+z+"]";
return u;
}
});
fs.write('html.txt',html_text+'\r\n','a');
next_page();
}, 400);
});
}
`

I ran into a similar problem recently. I found an answer here:https://stackoverflow.com/questions/34120421/scraping-multiple-urls-by-looping-in-phantomjs. But it did not work for me. I think recursion should be used instead of loop. But I don't know how to write the code. Fortunately, I found a solution here. These answers above are very helpful to me. I will post my code here, hoping to help others in the future.
var urls = new Array("1.html", "2.html", "3.html");
var page = new WebPage();
var fs = require('fs');
var count = 1;
function handle_page(url) {
page.open(url, function () {
setTimeout(function () {
var ct = page.evaluate(function () {
return document.getElementsByClassName('content');
});
var fn = '00' + count + '.html';
console.log(fn);
try {
fs.write(fn, ct[0].textContent, 'w');
} catch (e) {
console.log(e);
};
count += 1;
next_page();
}, 1000);
});
}
function next_page() {
var url = urls.shift();
if (!url) {
phantom.exit(0);
}
handle_page(url);
}
next_page();

Related

Using multiple page.open in one script

My goal is open many pages(with a short delay) and save my data to a file.
But my code does not work.
var gamesList = [url1,url2,url3];
//gamesList is getting from a file
var urls = [];
var useragent = [];
useragent.push('Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14');
useragent.push('Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50');
var page = require('webpage').create();
page.settings.userAgent = useragent[Math.floor(Math.random() * useragent.length)];
console.log('Loading a web page');
function handle_page(url){
page.open(url,function(){
//...
var html= page.evaluate(function(){
// ...do stuff...
page.injectJs('jquery.min.js');
return $('body').html();
});
//save to file
var file = fs.open('new_test.txt', "w");
file.write(html + '\n');
file.close();
console.log(html);
setTimeout(next_page,1000);
});
}
function next_page(urls){
var url=urls.shift();
if(!urls){
phantom.exit(0);
}
handle_page(url);
}
next_page(urls);
phantom.exit();
Does it matter where I am writing phantom.exit();? If I am writing it in the page.open() callback in the end then the 1st page opens well.
Your idea of opening multiple pages with recursion is correct, but you have some problems.
Exit
As you correctly noted, you have a problem with phantom.exit(). Since page.open() and setTimeout() are asynchronous, you only need to exit when you are done. When you call phantom.exit() at the end of the script, you're exiting before the first page is even loaded.
Simply remove that last phantom.exit(), because you already have another exit at the correct place.
Page context
page.evaluate() provides access to the DOM context (page context). The problem is that it is sandboxed. Inside of that callback you have no access to variables defined outside. You can explicitly pass variables in, but they have to be primitive objects which page is not. You simply have to access to page inside of page.evaluate(). You need to inject jQuery before calling page.evaluate().
Files
You're overwriting the file in every iteration by not changing the file name. Either you need to change the filename or use the appending mode 'a' instead of 'w'.
Then you don't need to open a stream when you simply want to write once. Change:
var file = fs.open('new_test.txt', "w");
file.write(html + '\n');
file.close();
to
fs.write('new_test.txt', html + '\n', 'a');
Recursive step
The recursive step with calling the next_page() function requires that you pass in the urls. Since urls is already a global variable and you change it in each iteration, you don't need to pass in the urls.
You also don't need to add a setTimeout(), because everything before inside of the page.open() callback was synchronous.
Fixed Script
//...
var urls = [/*....*/];
function handle_page(url){
page.open(url, function(){
//...
page.injectJs('jquery.min.js');
var html = page.evaluate(function(){
// ...do stuff...
return $('body').html();
});
//save to file
fs.write('new_test.txt', html + '\n', 'a');
console.log(html);
next_page();
});
}
function next_page(){
var url = urls.shift();
if(!url){
phantom.exit(0);
}
handle_page(url);
}
next_page();
Explanation above is very helpful to me. So, thanx a lot. Come to the point, sometimes js function renders even after the page has already loaded, in this scenario setTimeout() method, is very helpful. And,I faced problem like this when scraping many site....I used setTimeout() method in this way,
`
function handle_page(url){page.open(url, function() {setTimeout(function() {var html_text=page.evaluate(function(){
var is= document.querySelectorAll("li.bookinDetails_c1_180616")[0].textContent;
var isbn=is.trim();
//return s1;
var w,x,y,z,z1,w1,u,a;
a= document.querySelectorAll("li.selectableBook");
if(a.length==5){
w1=document.querySelectorAll("span.bookPrice")[0].textContent;
w=w1.trim();
x1=document.querySelectorAll("span.bookPrice")[1].textContent;
x=x1.trim();
y1=document.querySelectorAll("span.bookPrice")[2].textContent;
y=y1.trim();
z1=document.querySelectorAll("span.bookPrice")[3].textContent;
z=z1.trim();
u=isbn+"=>["+"RENT USED:-"+w+","+"RENT NEW:-"+x+","+"BUY USED:-"+y+","+"BUY NEW:-"+z+"]";
return u;
}else{
y1=document.querySelectorAll("span.bookPrice")[0].textContent;
y=y1.trim();
z1=document.querySelectorAll("span.bookPrice")[1].textContent;
z=z1.trim();
u=isbn+"=>["+"BUY USED:-"+y+","+"BUY NEW:-"+z+"]";
return u;
}
});
fs.write('html.txt',html_text+'\r\n','a');
next_page();
}, 400);
});
}
`
I ran into a similar problem recently. I found an answer here:https://stackoverflow.com/questions/34120421/scraping-multiple-urls-by-looping-in-phantomjs. But it did not work for me. I think recursion should be used instead of loop. But I don't know how to write the code. Fortunately, I found a solution here. These answers above are very helpful to me. I will post my code here, hoping to help others in the future.
var urls = new Array("1.html", "2.html", "3.html");
var page = new WebPage();
var fs = require('fs');
var count = 1;
function handle_page(url) {
page.open(url, function () {
setTimeout(function () {
var ct = page.evaluate(function () {
return document.getElementsByClassName('content');
});
var fn = '00' + count + '.html';
console.log(fn);
try {
fs.write(fn, ct[0].textContent, 'w');
} catch (e) {
console.log(e);
};
count += 1;
next_page();
}, 1000);
});
}
function next_page() {
var url = urls.shift();
if (!url) {
phantom.exit(0);
}
handle_page(url);
}
next_page();

.each in Phantom.js with Cheerio - any alternative?

I'm looking for some function that would allow me to reiterate through the div elements scraped by PhantomJs (which uses jQuery-like syntax), but one by one - not all at the same time like .each seems to be doing. So I guess I need it to run syncroniously.
At the moment my code looks something like this
page.open("https://www.google.com" + expandedurl, function (status) {
console.log("opened google knowledge graph ", status);
page.evaluate(function () { return document.body.innerHTML; }, function (result) {
var $ = cheerio.load(result);
$(".kltat").each(function() {
var link = $(this);
var text = link.text();
launch(text);
});
ph.exit();
// Move on to the next one
});
});
I need something that would not launch all of the each iterations at the same time. Maybe there's some way of reiterating I could use that would not work asynchroniously - that's what i need...
If launch is something asynchronous and is able to take a callback, then
Use async for this:
var async = require('async');
var $ = cheerio.load(result);
var callbacks = [];
$(".kltat").each(function() {
var link = $(this);
var text = link.text();
callbacks.push(function(cb){
launch(text, cb);
});
});
async.series(callbacks, function(){
ph.exit();
});
Otherwise, you can either use a static wait amount:
callbacks.push(function(cb){
launch(text);
setTimeout(function(){
cb(null);
});
});
or use something like waitFor to wait for an external condition triggered through launch.

Why is the JavaScript console saying my variable is undefined, even when I defined it two lines before?

I'm using WebSockets to connect to a remote host, and whenever I populate realData and pass it to grapher(), the JavaScript console keeps telling me realDatais undefined. I tried checking the type of the data in the array, but it seems to be fine. I've called grapher() before using an array with random data, and the call went through without any problems. With the data from the WebSocket, however, the call will always give me "error: realData is not defined". I'm not sure why this is happening. Here is the code I used:
current.html:
var command = "Hi Scott"
getData();
function getData()
{
console.log("getData is called");
if("WebSocket" in window)
{
var dataCollector = new WebSocket("ws://console.sb2.orbit-lab.org:6100",'binary');
dataCollector.binaryType = "arraybuffer";
console.log(dataCollector.readyState);
dataCollector.onopen = function()
{
//alert("The WebSocket is now open!");
console.log("Ready state in onopen is: " + dataCollector.readyState);
dataCollector.send(command);
console.log(command + " sent");
}
dataCollector.onmessage = function(evt)
{
console.log("onmessage is being called");
var realData = new Uint8Array(evt.data);
console.log(realData);
grapher(realData); //everything up to this point works perfectly.
}
dataCollector.onclose = function()
{
alert("Connection to Server has been closed");
}
return (dataCollector);
}
else
{
alert("Your browser does not support WebSockets!");
}
}
graphing.js:
function grapher(realData)
{
console.log("grapher is called");
setInterval('myGraph(realData);',1000); //This is where the error is. I always get "realData is not defined".
}
function myGraph(realData)
{
/*
for(var i = 0; i < SAarray.length; i++) // Loop which will load the channel data from the SA objects into the data array for graphing.
{
var data[i] = SAarray[i];
}
*/
console.log("myGraph is called");
var bar = new RGraph.Bar('channelStatus', realData);
bar.Set('labels', ['1','2','3','4','5','6','7','8']);
bar.Set('gutter.left', 50);
bar.Set('gutter.bottom', 40);
bar.Set('ymax',100);
bar.Set('ymin',0);
bar.Set('scale.decimals',1);
bar.Set('title','Channel Status');
bar.Set('title.yaxis','Status (1 is on, 0 is off)');
bar.Set('title.xaxis','Channel Number');
bar.Set('title.xaxis.pos',.1);
bar.Set('background.color','white');
bar.Set('colors', ['Gradient(#a33:red)']);
bar.Set('colors', ['red']);
bar.Set('key',['Occupied','Unoccupied']);
bar.getShapeByX(2).Set('colors',barColor(data[0]));
bar.Draw();
}
Because strings (as code) passed to setInterval execute in the global scope, therefore the realData parameter isn't available. There's rarely a good reason to pass a string to setInterval. Instead, use:
setInterval(function () {
myGraph(realData);
}, 1000);
Reference:
https://developer.mozilla.org/en-US/docs/Web/API/window.setInterval
Try it without it needing to evaluate a string:
setInterval(function() {
myGraph(realData);
},1000);
Any time you are using setTimeout or setInterval, you should opt for passing an actual function instead of a string.

Dynamically loading JavaScript synchronously

I'm using the module pattern, one of the things I want to do is dynamically include an external JavaScript file, execute the file, and then use the functions/variables in the file in the return { } of my module.
I can't figure out how to do this easily. Are there any standard ways of performing a pseudo synchronous external script load?
function myModule() {
var tag = document.createElement("script");
tag.type = "text/javascript";
tag.src = "http://some/script.js";
document.getElementsByTagName('head')[0].appendChild(tag);
//something should go here to ensure file is loaded before return is executed
return {
external: externalVariable
}
}
There is only one way to synchronously load and execute a script resource, and that is using a synchronous XHR
This is an example of how to do this
// get some kind of XMLHttpRequest
var xhrObj = createXMLHTTPObject();
// open and send a synchronous request
xhrObj.open('GET', "script.js", false);
xhrObj.send('');
// add the returned content to a newly created script tag
var se = document.createElement('script');
se.type = "text/javascript";
se.text = xhrObj.responseText;
document.getElementsByTagName('head')[0].appendChild(se);
But you shouldn't in general use synchronous requests as this will block everything else.
But that being said, there are of course scenarios where this is appropriate.
I would probably refactor the containing function into an asynchronous pattern though using an onload handler.
The accepted answer is NOT correct.
Loading a file synchronously is not the same as executing the file synchronously - which is what the OP requested.
The accepted answer loads the file sync, but does nothing more than append a script tag to the DOM. Just because appendChild() has returned does not in anyway guarantee that the script has finished executing and it's members are initialised for use.
The only (see caveat) way to achieve the OPs question is to sync load the script over XHR as stated, then read as text and pass into either eval() or a new Function() call and wait for that function to return. This is the only way to guarantee the script is loaded AND executed synchronously.
I make no comment as to whether this is a wise thing to do either from a UI or security perspective, but there are certainly use cases that justify a sync load & execute.
Caveat:
Unless you're using web workers in which case just call loadScripts();
This is the code that I'm using for multiple file load in my app.
Utilities.require = function (file, callback) {
callback = callback ||
function () {};
var filenode;
var jsfile_extension = /(.js)$/i;
var cssfile_extension = /(.css)$/i;
if (jsfile_extension.test(file)) {
filenode = document.createElement('script');
filenode.src = file;
// IE
filenode.onreadystatechange = function () {
if (filenode.readyState === 'loaded' || filenode.readyState === 'complete') {
filenode.onreadystatechange = null;
callback();
}
};
// others
filenode.onload = function () {
callback();
};
document.head.appendChild(filenode);
} else if (cssfile_extension.test(file)) {
filenode = document.createElement('link');
filenode.rel = 'stylesheet';
filenode.type = 'text/css';
filenode.href = file;
document.head.appendChild(filenode);
callback();
} else {
console.log("Unknown file type to load.")
}
};
Utilities.requireFiles = function () {
var index = 0;
return function (files, callback) {
index += 1;
Utilities.require(files[index - 1], callBackCounter);
function callBackCounter() {
if (index === files.length) {
index = 0;
callback();
} else {
Utilities.requireFiles(files, callback);
}
};
};
}();
And this utilities can be used by
Utilities.requireFiles(["url1", "url2",....], function(){
//Call the init function in the loaded file.
})
The most Node.js-like implementation I could come up with was able to load JS files synchonously, and use them as objects/modules
var scriptCache = [];
var paths = [];
function Import(path)
{
var index = 0;
if((index = paths.indexOf(path)) != -1) //If we already imported this module
{
return scriptCache [index];
}
var request, script, source;
var fullPath = window.location.protocol + '//' + window.location.host + '/' + path;
request = new XMLHttpRequest();
request.open('GET', fullPath, false);
request.send();
source = request.responseText;
var module = (function concealedEval() {
eval(source);
return exports;
})();
scriptCache.push(module);
paths.push(path);
return module;
}
An example source (addobjects.js):
function AddTwoObjects(a, b)
{
return a + b;
}
this.exports = AddTwoObjects;
And use it like this:
var AddTwoObjects = Import('addobjects.js');
alert(AddTwoObjects(3, 4)); //7
//or even like this:
alert(Import('addobjects.js')(3, 4)); //7
the accepted answer is not correct:
the script.async = false; directive only means that html parsing will be paused during script execution. this does not guarantee in which order javascript code will run. see https://developers.google.com/web/fundamentals/performance/optimizing-content-efficiency/loading-third-party-javascript/
the easiest and most elegant solution which was yet to be mentioned here is using promises, like so:
function loadScript(url) {
return new Promise((resolve, reject) => {
var script = document.createElement('script')
script.src = url
script.onload = () => {
resolve()
}
script.onerror = () => {
reject('cannot load script '+ url)
}
document.body.appendChild(script)
})
}
and then when you want to execute scripts in order:
loadScript('myfirstscript.js').then(() => {
console.log('first script ran');
loadScript('index.js').then(() => {
console.log('second script ran');
})
})
I had the following problem(s) with the existing answers to this question (and variations of this question on other stackoverflow threads):
None of the loaded code was debuggable
Many of the solutions required callbacks to know when loading was finished instead of truly blocking, meaning I would get execution errors from immediately calling loaded (ie loading) code.
Or, slightly more accurately:
None of the loaded code was debuggable (except from the HTML script tag block, if and only if the solution added a script elements to the dom, and never ever as individual viewable scripts.) => Given how many scripts I have to load (and debug), this was unacceptable.
Solutions using 'onreadystatechange' or 'onload' events failed to block, which was a big problem since the code originally loaded dynamic scripts synchronously using 'require([filename, 'dojo/domReady']);' and I was stripping out dojo.
My final solution, which loads the script before returning, AND has all scripts properly accessible in the debugger (for Chrome at least) is as follows:
WARNING: The following code should PROBABLY be used only in 'development' mode. (For 'release' mode I recommend prepackaging and minification WITHOUT dynamic script loading, or at least without eval).
//Code User TODO: you must create and set your own 'noEval' variable
require = function require(inFileName)
{
var aRequest
,aScript
,aScriptSource
;
//setup the full relative filename
inFileName =
window.location.protocol + '//'
+ window.location.host + '/'
+ inFileName;
//synchronously get the code
aRequest = new XMLHttpRequest();
aRequest.open('GET', inFileName, false);
aRequest.send();
//set the returned script text while adding special comment to auto include in debugger source listing:
aScriptSource = aRequest.responseText + '\n////# sourceURL=' + inFileName + '\n';
if(noEval)//<== **TODO: Provide + set condition variable yourself!!!!**
{
//create a dom element to hold the code
aScript = document.createElement('script');
aScript.type = 'text/javascript';
//set the script tag text, including the debugger id at the end!!
aScript.text = aScriptSource;
//append the code to the dom
document.getElementsByTagName('body')[0].appendChild(aScript);
}
else
{
eval(aScriptSource);
}
};
var xhrObj = new XMLHttpRequest();
xhrObj.open('GET', '/filename.js', false);
xhrObj.send(null);
eval(xhrObj.responseText);
If this is a cross-domain request, it will not work. In that case you have to upload the requested file to your server, or make a mirror php that outputs it, and require that php.
With jquery (works with cross-domain request too):
$.getScript('/filename.js',callbackFunction);
callbackFunction will be called synchronously.
For loading more scripts see this thread.
There actually is a way to load a list of scripts and execute them synchronously. You need to insert each script tag into the DOM, explicitly setting its async attribute to false:
script.async = false;
Scripts that have been injected into the DOM are executed asynchronously by default, so you have to set the async attribute to false manually to work around this.
Example
<script>
(function() {
var scriptNames = [
"https://code.jquery.com/jquery.min.js",
"example.js"
];
for (var i = 0; i < scriptNames.length; i++) {
var script = document.createElement('script');
script.src = scriptNames[i];
script.async = false; // This is required for synchronous execution
document.head.appendChild(script);
}
// jquery.min.js and example.js will be run in order and synchronously
})();
</script>
<!-- Gotcha: these two script tags may still be run before `jquery.min.js`
and `example.js` -->
<script src="example2.js"></script>
<script>/* ... */<script>
References
There is a great article by Jake Archibald of Google about this called Deep dive into the murky waters of script loading.
The WHATWG spec on the tag is a good and thorough description of how tags are loaded.
If you need to load an arbitrary number of scripts and only proceed when the last one is done, and you cannot use XHR (e.g. due to CORS limitations) you can do the following. It is not synchronous, but does allow a callback to occur exactly when the last file is done loading:
// Load <script> elements for all uris
// Invoke the whenDone callback function after the last URI has loaded
function loadScripts(uris,whenDone){
if (!uris.length) whenDone && whenDone();
else{
for (var wait=[],i=uris.length;i--;){
var tag = document.createElement('script');
tag.type = 'text/javascript';
tag.src = uris[i];
if (whenDone){
wait.push(tag)
tag.onload = maybeDone;
tag.onreadystatechange = maybeDone; // For IE8-
}
document.body.appendChild(tag);
}
}
function maybeDone(){
if (this.readyState===undefined || this.readyState==='complete'){
// Pull the tags out based on the actual element in case IE ever
// intermingles the onload and onreadystatechange handlers for the same
// script block before notifying for another one.
for (var i=wait.length;i--;) if (wait[i]==this) wait.splice(i,1);
if (!wait.length) whenDone();
}
}
}
Edit: Updated to work with IE7, IE8, and IE9 (in quirks mode). These IE versions do not fire an onload event, but do for onreadystatechange. IE9 in standards mode fires both (with onreadystatechange for all scripts firing before onload for any).
Based on this page there may be a small chance that old versions of IE will never send an onreadystatechange event with readyState=='complete'; if this is the case (I could not reproduce this problem) then the above script will fail and your callback will never be invoked.
You can't and shouldn't perform server operations synchronously for obvious reasons. What you can do, though, is to have an event handler telling you when the script is loaded:
tag.onreadystatechange = function() { if (this.readyState == 'complete' || this.readyState == 'loaded') this.onload({ target: this }); };
tag.onload = function(load) {/*init code here*/}
onreadystatechange delegation is, from memory, a workaround for IE, which has patchy support for onload.
same as Sean's answer, but instead of creating a script tag, just evaluate it. this ensures that the code is actually ready to use.
My strategy, classic example when load jQuery UI, i hope this can help you
( function( tools, libs ){
// Iterator
var require = function( scripts, onEnd ){
onEnd = onEnd || function(){};
if( !scripts || scripts.length < 1 )return onEnd();
var src = scripts.splice( 0, 1),
script = document.createElement( "script" );
script.setAttribute( "src", src );
tools.addEvent( "load", script, function(){
require( scripts, onEnd );
} );
document.getElementsByTagName( "head" )[ 0 ].appendChild( script );
};
// Install all scripts with a copy of scripts
require( libs.slice(), function(){
alert( "Enjoy :)" );
} );
// Timeout information
var ti = setTimeout( function(){
if( !window.jQuery || !window.jQuery.ui )alert( "Timeout !" );
clearTimeout( ti );
}, 5000 );
} )(
{ // Tools
addEvent : function( evnt, elem, func ){
try{
if( elem.addEventListener ){
elem.addEventListener( evnt, func, false );
}else if( elem.attachEvent ){
var r = elem.attachEvent( "on" + evnt, func );
}
return true;
}catch( e ){
return false;
}
}
},
[ // Scripts
"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.0.0-alpha1/jquery.min.js",
"https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.11.4/jquery-ui.min.js"
]
);
When using Angular you can take advantage of the fact that every Provider is instantiated before other services are instantiated. You can combine this fact with using xhr and the eval() as mentioned by #Neil. The code would be following:
app.provider('SomeScriptSyncLoader', function() {
var resourceUrl = 'http://some/script.js';
var dummy = {};
this.$get = function() {
var q = jQuery.ajax({
type: 'GET', url: resourceUrl, cache: false, async: false
});
if (q.status === 200) {
eval(q.responseText); // execute some script synchronously as inline script - eval forces sync processing
}
return dummy;
};
});
To force the Provider to be inialized you need to inject it in at least one other directive/service. Preferably this would be the service which takes advantage of the code loaded by script.
app.directive('myDirective', ['SomeScriptSyncLoader', function(someScriptSyncLoader) {
return {
restrict: 'E',
link: function(scope, element, attrs) {
// some ode
},
template: "this is my template"
};
}]);
I know this is an old question, but maybe someone else read this and find it useful !
Just created a new components uses ES6 to load scripts dynamically in synchronous way.
The Project details and source code are on GitHub https://github.com/amgadfahmi/scripty
I may be late to answering this question.
My current solution is to recursively add <script> tags such that the addition of the subsequent script is in the callback of its predecessor. It assumes that each function contains one function and that function is the same as the file name (minus the extension). This probably isn't the best way to do things, but it works ok.
Code to consider
Code directory structure:
- directory
---- index.html
---- bundle.js
---- test_module/
-------- a.js
-------- b.js
-------- log_num.js
-------- many_parameters.js
index.html
<head>
<script src="bundle.js"></script>
</head>
bundle.js
// Give JS arrays the .empty() function prototype
if (!Array.prototype.empty){
Array.prototype.empty = function(){
return this.length == 0;
};
};
function bundle(module_object, list_of_files, directory="") {
if (!list_of_files.empty()) {
var current_file = list_of_files.pop()
var [function_name, extension] = current_file.split(".")
var new_script = document.createElement("script")
document.head.appendChild(new_script)
new_script.src = directory + current_file
new_script.onload = function() {
module_object[function_name] = eval(function_name)
bundle(module_object, list_of_files, directory)
/*
nullify the function in the global namespace as - assumed - last
reference to this function garbage collection will remove it. Thus modules
assembled by this function - bundle(obj, files, dir) - must be called
FIRST, else one risks overwritting a funciton in the global namespace and
then deleting it
*/
eval(function_name + "= undefined")
}
}
}
var test_module = {}
bundle(test_module, ["a.js", "b.js", "log_num.js", "many_parameters.js"], "test_module/")
a.js
function a() {
console.log("a")
}
b.js
function b() {
console.log("b")
}
log_num.js
// it works with parameters too
function log_num(num) {
console.log(num)
}
many_parameters.js
function many_parameters(a, b, c) {
var calc = a - b * c
console.log(calc)
}
here is my code
var loaded_script = [];
function loadScript(urls, callback, sync) {
var len = urls.length, count = 0;
// check are all js loaded, then execute callback (if any)
var check = function() {
if (count == len) {
callback && typeof callback=="function" && callback();
}
};
for (var i = 0; i < len; i++) {
var url = urls[i];
// check if script not loaded (prevent load again)
if (loaded_script.indexOf(url) == -1) {
var script = document.createElement("script");
script.type = "text/javascript";
// set sync loading here (default is async)
if (sync) {
script.async = false;
}
// script onload event
if (script.readyState) { // IE
script.onreadystatechange = function() {
if (script.readyState=="loaded" || script.readyState=="complete") {
script.onreadystatechange = null;
count++, check();
}
};
} else { // Others
script.onload = function() {
count++, check();
};
}
// add script to head tag
script.src = url;
document.getElementsByTagName("head")[0].appendChild(script);
// mark this script has loaded
loaded_script.push(url);
} else {
count++, check();
}
}
}
I use this on pjax site.
loadScript(
[
"js/first.js",
"js/second.js",
],
function() {
alert("Scripts loaded.");
},
true
);
I've had a similar task a few days earlier, and here's how I did it.
This loader works both in file:// prefixes as well as in http:// and https://, and is cross-browser compatible.
It however, cannot load specific classes or functions as modules from scripts; it will load the whole script altogether and make it available to the DOM.
// Loads a script or an array of scripts (including stylesheets)
// in their respective index order, synchronously.
// By Sayanjyoti Das #https://stackoverflow.com/users/7189950/sayanjyoti-das
var Loader={
queue: [], // Scripts queued to be loaded synchronously
loadJsCss: function(src, onl) {
var ext=src.toLowerCase().substring(src.length-3, src.length);
if(ext=='.js') {
var scrNode=el('script', null, null, null);
scrNode.type='text/javascript';
scrNode.onload=function() {onl();};
scrNode.src=src;
document.body.appendChild(scrNode);
}else if(ext=='css') {
var cssNode=el('link', null, null, null);
cssNode.rel='stylesheet';
cssNode.type='text/css';
cssNode.href=src;
document.head.appendChild(cssNode);
onl();
}
},
add: function(data) {
var ltype=(typeof data.src).toLowerCase();
// Load a single script
if(ltype=='string') {
data.src=data.src;
Loader.queue.splice(0, 1, data, Loader.queue[0]);
Loader.next();
}
// Load an array of scripts
else if(ltype=='object') {
for(var i=data.src.length-1; i>=0; i--) {
Loader.queue.splice(0, 1, {
src: data.src[i],
onload: function() {
if(Loader.next()==false) {
data.onload();
return;
}
Loader.next();
}
}, Loader.queue[0]);
}
Loader.next();
}
},
next: function() {
if(Loader.queue.length!=0 && Loader.queue[0]) {
var scr=Loader.queue[0];
// Remove the script from the queue
if(Loader.queue.length>1)
Loader.queue.splice(0, 2, Loader.queue[1]);
else
Loader.queue=[];
// Load the script
Loader.loadJsCss(scr.src, scr.onload);
}else return false;
}
};
The above function is very powerful and elegant; it allows you to load a single script or an array of script synchronously (i.e, next script not loaded until previous script loading finished). Moreover, a loaded script may load more scripts, which defers the queue in the parent script.
BTW, a script here means a JavaScript file or a CSS stylesheet.
Here's how to use it:-
// Load a single script
Loader.add({
src: 'test.js',
onload: function() {
alert('yay!');
}
});
// Load multiple scripts
Loader.add({
src: ['test1.js', 'test2.js', 'mystyles.css', 'test3.js'],
onload: function() {
alert('all loaded!');
}
});
Note that, the onload function in the Loader arguments is called when all of the scripts have loaded, not when one or a single script is loaded.
You can also load more scripts in the scripts you loaded, such as in test.js, test1.js, etc. By doing this, you will defer the load of the next parent script and the queue in the child script will be prioritized.
Hope it helps :-)
I use jquery load method applied to div element. something like
<div id="js">
<!-- script will be inserted here -->
</div>
...
$("#js").load("path", function() { alert("callback!" });
You can load scripts several times and each time one script will completely replace the one loaded earlier

Get the url of currently executing js file when dynamically loaded

So I'm trying to load a script dynamically and figure out the URL path at which that script was loaded. So some guy gave me a pretty awesome solution to this problem if the scripts are statically loaded ( How to get the file-path of the currently executing javascript code ). But I need a dynamically loaded solution. For example:
$(function()
{ $.getScript("brilliant.js", function(data, textStatus)
{ // do nothing
});
});
where "brilliant.js" has:
var scripts = document.getElementsByTagName("script");
var src = scripts[scripts.length-1].src;
alert("THIS IS: "+src);
Ideally this should either print out "brilliant.js" or "〈hostname+basepath〉/brilliant.js"
Currently brilliant.js works for statically included scripts, but not for scripts included dynamically (like with $.getScript). Anyone have any ideas? Is there somewhere in the dom that stores all the scripts that have been loaded?
EDIT: Andras gave a pretty good solution, though it probably only works for jQuery. Since that's probably the most popular library, and definitely what I'm going to be using. It can probably be extended for other libraries as well. Here's my simplified version:
var scriptUri;
curScriptUrl(function(x)
{ scriptUri = x;
alert(scriptUri);
});
function curScriptUrl(callback)
{ var scripts = document.getElementsByTagName("script");
var scriptURI = scripts[scripts.length-1].src;
if(scriptURI != "") // static include
{ callback(scriptURI);
}else if($ != undefined) // jQuery ajax
{ $(document).ajaxSuccess(function(e, xhr, s)
{ callback(s.url);
});
}
}
When your script gets loaded with jQuery (and I guess other frameworks as well), your script will become indistinguishable from a script that was originally in the HTML document. jQuery makes a request reaching out for your script and puts back the reply as the text child of a <script> node. Your browser has no way of knowing where it originated from, whether it was modified before inserted, etc. It is just a script node as far as she is concerned.
There can be workarounds, however. In the case of jQuery, you can hook up to the ajax events and exploit the fact that they are called right after your script executes. Basically, this would yield "brilliant.js" in your example:
var handler = function (e, xhr, s) {
alert(s.url);
}
$(document).ajaxSuccess(handler);
A more elaborate one:
(function ($, undefined) {
/* Let's try to figure out if we are inlined.*/
var scripts = document.getElementsByTagName("script");
if (scripts[scripts.length - 1].src.length === 0) {
// Yes, we are inlined.
// See if we have jQuery loading us with AJAX here.
if ($ !== undefined) {
var initialized = false;
var ajaxHandler = function (e, xhr, s) {
if (!initialized) {
initialized = true;
alert("Inlined:" + s.url);
initmywholejsframework();
}
}
//If it is, our handler will be called right after this file gets loaded.
$(document).ajaxSuccess(ajaxHandler);
//Make sure to remove our handler if we ever yield back.
window.setTimeout(function () {
jQuery(document).unbind("ajaxSuccess", ajaxHandler);
if (!initialized) {
handleInlinedNonjQuery();
}
}, 0);
}
} else {
//We are included.
alert("Included:" + scripts[scripts.length - 1].src);
initmywholejsframework();
}
//Handle other JS frameworks etc. here, if you will.
function handleInlinedNonjQuery() {
alert("nonJQuery");
initmywholejsframework();
}
//Initialize your lib here
function initmywholejsframework() {
alert("loaded");
}
})(jQuery);
B T, sorry if this doesn't help, but I'm curious why you would need to do this? The reason I'm asking is I don't see why you can't just use the relative file paths to load these files? Finding out where you're located could be done with window.location, but why would you? And as for loading them, can't you make an ajax call to the file and then eval them?
This will work in every browser except IE and doesn't depend on assuming what the name of a file is:
var getErrorLocation = function (error) {
var loc, replacer = function (stack, matchedLoc) {
loc = matchedLoc;
};
if ("fileName" in error) {
loc = error.fileName;
} else if ("stacktrace" in error) { // Opera
error.stacktrace.replace(/Line \d+ of .+ script (.*)/gm, replacer);
} else if ("stack" in error) { // WebKit
error.stack.replace(/at (.*)/gm, replacer);
loc = loc.replace(/:\d+:\d+$/, ""); // remove line number
}
return loc;
};
try {
0();
} catch (e) {
var scriptURI = getErrorLocation(e);
}
alert("THIS IS: " + scriptURI);

Categories