Python lxml Cleaner module was not working as expected? - javascript

In my webscraper some of the content is coming with advertisements and some java script functions in it. Like the below;
(function() { var infAds = document.createElement('script'); infAds.async = true; infAds.type = 'text/javascript'; var useSSL = 'https:' == document.location.protocol; infAds.src = (useSSL ? 'https:' : 'http:') + '//d.infeed.id/widget-50716010/loader/all/'; var node = document.getElementById('cont-50716010-all'); node.parentNode.insertBefore(infAds, node); })();
I have used lxml.html.clean.Cleaner to remove the scripts and styles from the content. But it is not all removing what i expected. I tried like the below;
from lxml.html.clean import Cleaner
cleaner = Cleaner()
cleaner.javascript = True
cleaner.scripts = True
content = "Page content"
clean_content = cleaner.clean_html(content)
print(clean_content)
But if i use
__clean_content = lxml.html.toString(cleaner.clean_html(content))
__
i am getting the below type error;
TypeError: Type 'str' cannot be serialized.
Using regex also i have tried, it wasn't worked out too. Any suggestions or help would be greatly appreciable!
Thanks in advance.

Related

Integrate NotifyVisitors Javascript Code OPencart

I want to add the following javascript code into the footer of Opencart (2.0.3) to integrate notifyvisitors app for my website.
But I don't know how and where to add javascript code to enable notifyvisitors app.
<div id='notifyvisitorstag'></div>
<script>
var notify_visitors = window.notify_visitors || {};
(function() {
notify_visitors.auth = { bid_e : '7E8xxxxxxxxxxxx561', bid : '0000`enter code here`', t : '420'};
var script = document.createElement('script');script.async = true;
script.src = (document.location.protocol == 'https:' ? "//d2933uxo1uhve4.cloudfront.net" : "//cdn.notifyvisitors.com") + '/js/notify-visitors-1.0.js';
var entry = document.getElementsByTagName('script')[0];
entry.parentNode.insertBefore(script, entry);
})();
</script>
Please guide.
Thanks.
Notifyvisitors updated plugin is now available at official opencart extension store.

How to make goal completion work on hashchange?

I have an AngularJS webapp where there are no separate pages.
I have set up a goal completion so that there's a code that is triggered by a javascript event,
and it has the following structure:
function call_completion(){
window.google_conversion_id = 973404965;
window.google_conversion_language = "en";
window.google_conversion_format = "1";
window.google_conversion_color = "ffffff";
window.google_conversion_label = "doGHCLPK2wkQpfaT0AM";
window.google_conversion_value = 1.000000;
window.google_remarketing_only = false;
var s = document.createElement('script');
s.type = "text/javascript";
s.src = '//www.googleadservices.com/pagead/conversion.js';
document.body.appendChild(s);
}
Yet, I can't see any completions registered in Google Analytics. What could be the problem?
Is this the correct way to do it? Is there perhaps some nifty plugin that gets this problem right?
UPDATE:
For the new Universal Analytics, there is a plugin called angular-ga.
It eliminates the whole question.
You shouldn't append the script to your view, since it won't be executed.
You should use this:
$.getScript( "http://www.googleadservices.com/pagead/conversion.js" );
To get the script working on that page and execute it.
Hope this helps :)
The <noscript> part also has to be included.
Something like this:
$('body')
.append('\
<!-- Google Code for Subscriptions Conversion Page -->\n\
<script type="text/javascript">\n\
/* <![CDATA[ */\n\
var google_conversion_id = 973404965;\n\
var google_conversion_language = "en";\n\
var google_conversion_format = "1";\n\
var google_conversion_color = "ffffff";\n\
var google_conversion_label = "doGHCLPK2wkQpfaT0AM";\n\
var google_conversion_value = 1.000000;\n\
var google_remarketing_only = false;\n\
/* ]]> */\n\
</script>\
');
$('body')
.append('<script type="text/javascript" src="//www.googleadservices.com/pagead/conversion.js">')
.append($('<noscript>')
.append($('<div style="display:inline;">')
.append('<img height="1" width="1" style="border-style:none;" alt="" src="//www.googleadservices.com/pagead/conversion/973404965/?value=1.000000&label=doGHCLPK2wkQpfaT0AM&guid=ON&script=0"/>')
)
);

Convert html code into actionscript code

How can i convert this HTML code into Action script code, i am an android and actionscript developer, i dont know much about HTML and java script. i have tried different forums, i know it can be converted, can someone help me
<script type="text/javascript">
adroll_adv_id = "JD5ZGBNO4RBYVAMMMPX3J7";
adroll_pix_id = "CEJOJM5N5VAHBKCVMT2DML";
(function () {
var oldonload = window.onload;
window.onload = function(){
__adroll_loaded=true;
var scr = document.createElement("script");
var host = (("https:" == document.location.protocol) ? "https://s.adroll.com" : "http://a.adroll.com");
scr.setAttribute('async', 'true');
scr.type = "text/javascript";
scr.src = host + "/j/roundtrip.js";
((document.getElementsByTagName('head') || [null])[0] ||
document.getElementsByTagName('script')[0].parentNode).appendChild(scr);
if(oldonload){oldonload()}};
}());
</script>
Well the code you listed is actually javascript encased in a html tag. Javascript and actionscript are pretty similar, you should be able to use the same code with minimal changes. Just use the javascript as a reference to code it with actionscript

Cookiesdirective and Statcounter

I have implemented cookiesdirective.js which can be found here cookiesdirective.com and Statcounter code is not working. I am using the following code (DoYourOwnSite, standard):
var sc_project=9181072;
var sc_invisible=1;
var sc_security="328a83d2";
var scJsHost = (("https:" == document.location.protocol) ? "https://secure." : "http://www.");
document.write("<sc"+"ript type='text/javascript' src='" + scJsHost + "statcounter.com/counter/counter.js'></"+"script>");
Since cookiesdirective.js requires to remove the tags, I have removed them, but it is still not working. On the other hand, Google Analytics code is working without a problem. I am sure it is about changing the code a bit so it will work inside another javascript, but I do not now how to do it.
Thanks,
Goran
Solved. I had to change the statcounter code a bit:
var sc_project=xxxxxxxxxx;
var sc_invisible=1;
var sc_security="xxxxxxxxxxx";
var imported = document.createElement('script');
imported.type = 'text/javascript';
imported.async = true;
var scJsHost = (("https:" == document.location.protocol) ? "https://secure." : "http://www.");
imported.src = scJsHost + "statcounter.com/counter/counter_xhtml.js";
var s = document.getElementsByTagName('script')[0];
s.parentNode.insertBefore(imported, s);
Thanks anyway.

How can I specify as_sitesearch in v2 of Google Site Search / Custom Search?

I have a Google Custom Search (that will be a Site Search in the future) and I'd like to use the snippet that Google provides:
<script>
(function() {
var cx = '...';
var gcse = document.createElement('script'); gcse.type = 'text/javascript'; gcse.async = true;
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
'//www.google.com/cse/cse.js?cx=' + cx;
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(gcse, s);
})();
</script>
<gcse:search></gcse:search>
I need to restrict it to certain urls. When using the xml api, you can use the parameter as_sitesearch to specify this filter.. is there a way to do this with the code above?
with this I can now access the google.search.cse object before the search is loaded.. but I still don't know parameter:
<script>
var myCallback = function() {
if (document.readyState == 'complete') {
// Document is ready when CSE element is initialized.
// Render an element with both search box and search results in div with id 'test'.
google.search.cse.element.render(
{
div: "test",
tag: 'search'
});
} else {
// Document is not ready yet, when CSE element is initialized.
google.setOnLoadCallback(function() {
// Render an element with both search box and search results in div with id 'test'.
google.search.cse.element.render(
{
div: "test",
tag: 'search'
});
}, true);
}
};
// Insert it before the CSE code snippet so that cse.js can take the script
// parameters, like parsetags, callbacks.
window.__gcse = {
parsetags: 'explicit',
callback: myCallback
};
(function() {
var cx = '007407192365638902354:eyxoavi7oa0';
var gcse = document.createElement('script'); gcse.type = 'text/javascript'; gcse.async = true;
gcse.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') +
'//www.google.com/cse/cse.js?cx=' + cx;
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(gcse, s);
})();
</script>
<style>
.gsc-control-cse * {
box-sizing:content-box;
}
</style>
Yes you can.
Use a hidden field inside your search form:
<input type="hidden" name="as_sitesearch" value="your_url_here/directory">
I've got the same question. The only way I have been able to restrict the search to a particular folder in Google Site Search version 2 is to hard-code it into the element:
<gcse:search as_sitesearch="www.mydomain.com/site1"></gcse:search>
It would be great if there was a way to add as_sitesearch via JavaScript, like with version 1.
To restrict to certain urls:
<gcse:searchbox-only as_sitesearch="mydomain.com" resultsUrl="http://mydomain.com/search-results/" enableAutoComplete="false"></gcse:searchbox-only>
Be sure to replace mydomain.com with the appropriate domain that you want to restrict to.

Categories