Chrome extension: XHR request to website, get html content by class name - javascript

I am creating a chrome extension that will go to a specified website, and get parts of the site's HTML from the source code.
I want to get the html content contained within a div with class name 'span1 rating-num-span'.
I tried using .getElementsByClassName but it returned undefined, however when I use .getElementsByTagName on ('h2') it worked.
Here is the javascript function to make the request from my main.js
function getFlowSite(){
var request = new XMLHttpRequest();
request.onreadystatechange = function(){
if (request.readyState == 4){
if (request.status == 200){
var temp = document.createElement('div');
temp.innerHTML = request.responseText;
alert(temp.getElementsByTagName('h2')[0].innerText);
alert(temp.getElementsByClassName('span1 rating-num-span')[0].innerText);
}
else{
console.log("Messed up!!!");
}
}
};
request.open("GET", "http://uwflow.com/course/" + courseName, true);
request.send(null);
}
stumped...
Thanks for reading!
--------------------Update------------------------
Turns out the class isn't present initially on the page, and is loaded in dynamically with a script. How can I get the source code of the page after everything is loaded in?

That element is added by the page script dynamically.
It's not present initially on the page. You can check this by examining the first server response from the site when loading it with devtools Network panel open. Or, if you use the great uBlock (origin) extension, simply disable all javascript on the site temporarily and reload the page.
You have two options:
find out how that webpage's code fetches the data from the server/elsewhere and do it yourself, there's usually some kind of JSON API. For example in this case there's a huge config object right in the page:
<script>
window.pageData.courseObj = {"ratings": [{"count": 375, "rating": .............
Simply use XMLHttpRequest with .responseType = "document" mode and get that element, then use JSON.parse on it.
Or, actually, in this case a simple regexp + JSON.parse will do:
var match = request.responseText
.match(/window\.pageData\.courseObj\s*=\s*(\{.+?\});\s*[\r\n]/);
var config = JSON.parse(match[1]);
config.ratings.forEach(function(r) { console.log(r) });
Object {count: 375, rating: 0.6986666666666667, name: "usefulness"}
Object {count: 494, rating: 0.7449392712550608, name: "easiness"}
Object {count: 555, rating: 0.5621621621621622, name: "interest"}
The above code wasn't tested live and doesn't contain any error checks which must be implemented in the real code.
load the page as a normal browser tab without activating it, inject a content script, wait for the element to appear, extract the data, close the tab.
manifest.json:
"permissions": ["http://uwflow.com/*"] - permissions for executeScript on non-active tab
popup.js:
var globalTabId = 0;
function openTab(url) {
chrome.tabs.create({url: url, active: false}, function(tab) {
globalTabId = tab.id;
chrome.tabs.executeScript(tab.id, {file: "getData.js", runAt: "document_end"});
});
}
chrome.runtime.onMessage.addListener(function(msg, sender, response) {
if (msg.action == "data" && sender.tab && sender.tab.id == globalTabId) {
chrome.tabs.remove(globalTabId);
processData(msg.data);
}
});
getData.js, this is a content script but it doesn't need to be declared in manifest.json.
var interval = setInterval(function() {
var ratings = document.querySelector(".span1.rating-num-span");
if (!ratings) {
return;
}
clearInterval(interval);
chrome.runtime.sendMessage({action: "data", data: {ratings: ratings.textContent}});
}, 100);

Related

chrome.runtime.sendMessage not working on the 1st click when running normally. it works while debugging though

I have a function in the context.js which loads a panel and sends a message to panel.js at the last. The panel.js function updates the ui on receiving that msg. But it is not working for the first click i.e. it just loads normal ui, not the one that is expected that is updated one after the msg is received. while debugging it works fine.
manifest.json
"background": {
"scripts": ["background.js"],
"persistent": false
},
"content_scripts": [{
"all_frames": false,
"matches": ["<all_urls>"],
"js":["context.js"]
}],
"permissions": ["activeTab","<all_urls>", "storage","tabs"],
"web_accessible_resources":
"panel.html",
"panel.js"
]
context.js - code
fillUI (){
var iframeNode = document.createElement('iframe');
iframeNode.id = "panel"
iframeNode.style.height = "100%";
iframeNode.style.width = "400px";
iframeNode.style.position = "fixed";
iframeNode.style.top = "0px";
iframeNode.style.left = "0px";
iframeNode.style.zIndex = "9000000000000000000";
iframeNode.frameBorder = "none";
iframeNode.src = chrome.extension.getURL("panel.html")
document.body.appendChild(iframeNode);
var dataForUI = "some string data"
chrome.runtime.sendMessage({action: "update UI", results: dataForUI},
(response)=> {
console.log(response.message)
})
}
}
panel.js - code
var handleRequest = function(request, sender, cb) {
console.log(request.results)
if (request.action === 'update Not UI') {
//do something
} else if (request.action === 'update UI') {
document.getElementById("displayContent").value = request.results
}
};
chrome.runtime.onMessage.addListener(handleRequest);
background.js
chrome.runtime.onMessage.addListener((request,sender,sendResponse) => {
chrome.tabs.sendMessage(sender.tab.id,request,function(response){
console.log(response)`
});
});
panel.html
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="panel.css" />
</head>
<body>
<textarea id="displayContent" rows="10" cols="40"></textarea>
</body>
</html>
Any suggestions on what I am doing wrong or what can I do instead?
An iframe with a real URL loads asynchronously so its code runs after the embedding code finishes - hence, your message is sent too early and is lost. The URL in your case points to an extension resource so it's a real URL. For reference, a synchronously loading iframe would have a dummy URL e.g. no src at all (or an empty string) or it would be something like about:blank or javascript:/*some code here*/, possibly srcdoc as well.
Solution 1: send a message in iframe's onload event
Possible disadvantage: all extension frames in all tabs will receive it, including the background script and any other open extension pages such the popup, options, if they also have an onMessage listener.
iframeNode.onload = () => {
chrome.runtime.sendMessage('foo', res => { console.log(res); });
};
document.body.appendChild(iframeNode);
Solution 2: let iframe send a message to its embedder
Possible disadvantage: wrong data may be sent in case you add several such extension frames in one tab and for example the 2nd one loads earlier than the 1st one due to a bug or an optimization in the browser - in this case you may have to use direct DOM messaging (solution 3).
iframe script (panel.js):
chrome.tabs.getCurrent(ownTab => {
chrome.tabs.sendMessage(ownTab.id, 'getData', data => {
console.log('frame got data');
// process data here
});
});
content script (context.js):
document.body.appendChild(iframeNode);
chrome.runtime.onMessage.addListener(
function onMessage(msg, sender, sendResponse) {
if (msg === 'getData') {
chrome.runtime.onMessage.removeListener(onMessage)
sendResponse({ action: 'update UI', results: 'foo' });
}
});
Solution 3: direct messaging via postMessage
Use in case of multiple extension frames in one tab.
Disadvantage: no way to tell if the message was forged by the page or by another extension's content script.
The iframe script declares a one-time listener for message event:
window.addEventListener('message', function onMessage(e) {
if (typeof e.data === 'string' && e.data.startsWith(chrome.runtime.id)) {
window.removeEventListener('message', onMessage);
const data = JSON.parse(e.data.slice(chrome.runtime.id.length));
// process data here
}
});
Then, additionally, use one of the following:
if content script is the initiator
iframeNode.onload = () => {
iframeNode.contentWindow.postMessage(
chrome.runtime.id + JSON.stringify({foo: 'data'}), '*');
};
document.body.appendChild(iframeNode);
if iframe is the initiator
iframe script:
parent.postMessage('getData', '*');
content script:
document.body.appendChild(iframeNode);
window.addEventListener('message', function onMessage(e) {
if (e.source === iframeNode) {
window.removeEventListener('message', onMessage);
e.source.postMessage(chrome.runtime.id + JSON.stringify({foo: 'data'}), '*');
}
});
one possible way that worked for me is by using functionality in setTimeout() method.
in context.js
setTimeout(() => {
chrome.runtime.sendMessage({action: "update UI", results: dataForUI},
(response)=> {
console.log(response.message)
}
)
}, 100);
But I am not sure if this is the best way.

How to make url parameter in chrome.getCookies changed based on url of active tab

I am currently developing a chrome extension that can get cookies from a website, using chrome.getCookies.
However, the url parameter in
chrome.getCookies
must be a static url in order to work, like this:
chrome.cookies.get({url: 'https://example.org/#/', name:
'token'}, function(cookie) {
document.getElementById("token").innerHTML = cookie.value
});
But I want to make that url changed dynamically based on which tab I am using that extension. So I tried this:
var tabUrl;
chrome.tabs.getSelected(null, function(tab) {
tabUrl = tab.url
});
chrome.cookies.get({url: tabUrl, name:
'token'}, function(cookie) {
document.getElementById("token").innerHTML = cookie.value
});
And it didn't work. What should I to to achieve my goal?
Edit: If anyone ever reach this page, here the solution, you have to put subsequent code inside the callback, here's the correct one:
chrome.tabs.getSelected(null, function(tab) {
chrome.cookies.get({url: tab.url, name: 'expa_token'}, function(cookie) {
document.getElementById("token").innerHTML = cookie.value
});
});

Differentiate webRequest.onBeforeRequest original page vs. injected

I'm using webRequest.onBeforeRequest in the background script. Then, in the content(?) script, that is injected to the original page I make an AJAX request to the same URL to which the page makes the request I'm detecting, so naturally it starts to loop which hangs the browser pretty soon!
As a temporary measure, I put another parameter at the end of the URL in my AJAX call (&me=1) and made a urls filter that ends like the original URL ends, but that doesn't seem the best way, since the original URL might change in the future.
What would be better way of doing it? For example, I looked at the requestDetails that my listener returned. But, since the script is injected in the original page, I can't find any difference. Or, maybe I could make a urls filter which would only accept URLs that don't end with "&me=1"...?
(I can't (won't) use a flag variable, because the page changes dynamically and again, it doesn't seem the best way, even if I could make it work somehow (I haven't).)
Of course, alternatively, I could just use the data that the original request provides, but I could not find the event/object that would do it, but maybe I've missed it somehow, since I'm pretty much a beginner in making web extensions.
Edit:
manifest.json
{
"manifest_version": 2,
"name": "testtube",
"version": "1.0",
"permissions": [
"webRequest",
"https://*/*",
"activeTab"
],
"web_accessible_resources": [
"https://www.youtube.com/player_api",
"index.html",
"js/jquery-3.2.1.min.js",
"js/player.js",
"js/jquery-ui.min.js",
"css/jquery-ui.css", "css/style.css"
],
"content_scripts": [
{
"matches": [ "https://www.youtube.com/watch*"],
"css": [ "css/jquery.dataTables.min.css", "css/jquery-ui.css", "css/style.css" ],
"js": [ "js/jquery-3.2.1.min.js","js/jquery-ui.min.js", "js/main.js" ]
}
]
,"background": {
"scripts": ["js/background.js" ]
}
}
background.js
"use strict";
var lastRequestId = 0;
function logURL(requestDetails) {
console.log("requestDetails: ", requestDetails);
//this is actually a solution as per my answer below
if ((lastRequestId != requestDetails.requestId) && (requestDetails.url.indexOf("&me=1") == -1))
{
browser.tabs.sendMessage(requestDetails.tabId, { ccurl: requestDetails.url }).then(response => {
console.log("Message from the content script:");
console.log(response.response);
}).catch(onError);;
}
}
browser.webRequest.onBeforeRequest.removeListener(logURL);
browser.webRequest.onBeforeRequest.addListener(
logURL,
{ urls: ["*://www.youtube.com/api/somepattern*"] }
);
function onError(error) {
console.error(`Error: ${error}`);
}
main.js
"use strict";
var jq;
jq = document.createElement('script');
jq.onload = function () { };
jq.src = chrome.extension.getURL("/js/jquery-3.2.1.min.js");
document.querySelector('head').appendChild(jq);
jq = document.createElement('script');
jq.onload = function () { };
jq.src = chrome.extension.getURL("/js/jquery-ui.min.js");
document.querySelector('head').appendChild(jq);
/*... some other scripts... */
var s = document.createElement('script');
s.src = chrome.extension.getURL('/js/player.js');
s.onload = function () {
this.remove();
};
(document.head || document.documentElement).appendChild(s);
var _url = "";
function UURL(request, sender, sendResponse) {
_url = request.url;
var loadR = " loadRequest('" + _url.toString() + "'); ";
var script = document.createElement('script');
script.textContent = loadR;
document.querySelector('head').appendChild(script);
return Promise.resolve({ response: "Hi from content script" });
}
$(function () {
console.log("jquery loaded");
browser.runtime.onMessage.removeListener(UURL)
browser.runtime.onMessage.addListener(UURL);
/* irrelevant code here... */
});
player.js
var loadRequest = function loadRequest(_url) {
_url = _url + "&me=1";
$.ajax({
type: "get",
url: _url,
dataType: "xml",
success: function (data) {
/* irrelevant code here */
If you are attempting to differentiate between the main page load and your XMLHttpRequest1
There are multiple ways that you could differentiate between different types of requests. Without your actual code to try it out, we have to guess as to what you are actually doing.
The webRequest.onBeforeRequest for a normal page load will look like:
webRequest.onBeforeRequest -> arg[0]= {"frameId":0,"method":"GET","parentFrameId":-1,"requestId":"260870","tabId":411,"timeStamp":1500401223979.044,"type":"main_frame","url":"http://www.example.com/"}
-----------------------------------------------------------------------------------------------------------------------------------------------------^^^^^^^^^^^^^^^^^^^
As you can see, the details Object contains a type property, which is a webRequest.ResourceType. For the load of HTML for the main frame it will contain "main_frame" (the details Object will also have "frameId":0,"parentFrameId":-1).
For your AJAX request, the value of the type property should be xmlhttprequest. However, it's possible that by "ajax request" you meant something other than an XMLHttpRequest. In which case, the type property might have some other value, but it should not be "main_frame".
1. I read the original version of the question differently than okkko intended. As currently written, this does not cover the case which they are interested in. However, it might have some value to someone else reading this question/answer, so I'm leaving it up.
Ok, the improved version of "temporary measure" is that in the listener I add another condition against requested url
(requestDetails.url.indexOf("&me=1") == -1)
And then make my ajax call... and in the urls filter for the request I just leave asterix (*) at the end. In this way, it doesn't matter if the original url changes at the end, however technically additional request is caught.. good enough if nobody else replies.

Chrome Extension: WebQL database is not appearing in Inspector

I am trying to create database and tables in background.js file of Chrome but somehow it's not being created. When I see in Chrome Inspector > Resources > WebSQL, I find nothing. Code is given below:
function fetchData(){
var xhr = new XMLHttpRequest();
xhr.open("GET", "http://localhost/php/fetch.php", true);
xhr.onreadystatechange = function() {
if (xhr.readyState == 4) {
// JSON.parse does not evaluate the attacker's scripts.
var resp = xhr.responseText;
if(resp != null) {
var json = JSON.parse(resp)
console.log(resp);
var data = json['data'];
if(data != null) {
openDatabase('documents', '1.0', 'my storage', 5*1024*1024, function (db) {
alert('Called'); //This is not being called.
});
//var dbConnection = openDbConnect();
//createTable(dbConnection);
//Code below is called
for(var a=0;a <= data.length;a++) {
alert(data[a].title);
}
}
}
}
}
xhr.send();
}
Update
I guess it's being created: My Extension ID is bkjajbjnoadlhnhmfekipifehbhgidpg
In ~/Library/Application Support/Google/Chrome/Default/databases I find:
chrome-extension_bkjajbjnoadlhnhmfekipifehbhgidpg_0
But it's weird I can't see it in Inspector.
Update #2
Turns out that like pages, WebSQL is not visible across Chrome. It shows Db to the page which is visited. Now I am getting no idea how to excess chrome related Db in Viewer.
To access the inspector for the background page of your app, go to Menu>Settings>Extensions and make sure it's in Developer Mode. It should have a link to inspect the background page of your app in there. It will open up in a new window.

Load Javascript into ajax loaded content

I am new to working with AJAX and have some experience with Java/Jquery. I have been looking around for an solution to my problem but i cant seem to find any.
I am trying to build a function in a webshop where the product will appear in a popup window instead of loading a new page.
I got it working by using this code:
$(".product-slot a").live('click', function() {
var myUrl = $(this).attr("href") + " #product-content";
$("#product-overlay-inner").load(myUrl, function() {
});
$("#product-overlay").fadeIn();
return false;
});
product-slot a = Link to the product in the category page.
product-content = the div i want to insert in the popup from the product page.
product-overlay-inner = The popup window.
product-overlay = The popup wrapper.
The problem that i now have is that my Javascript/Jquery isnt working in the productpopup. For example the lightbox for the product image or the button to add product to shoppingcart doesnt work. Is there anyway to make the javascript work inside the loaded content or to load javascript into the popup?
I hope you can understand what my problem is!
Thank you in advance!
EDIT: The platform im using has jquery-ui-1.7.2
I know this is an old thread but I've been working on a similar process with the same script loading problem and thought I'd share my version as another option.
I have a basic route handler for when a user clicks an anchor/button etc that I use to swap out the main content area of the site, in this example it's the ".page" class.
I then use a function to make an ajax call to get the html content as a partial, at the moment they are php files and they do some preliminary rendering server side to build the html but this isn't necessary.
The callback handles placing the new html and as I know what script I need I just append it to the bottom in a script tag created on the fly. If I have an error at the server I pass this back as content which may be just a key word that I can use to trigger a custom js method to print something more meaningful to the page.
here's a basic implementation based on the register route handler:
var register = function(){
$(".page").html("");
// use the getText ajax function to get the page content:
getText('partials/register.php', function(content) {
$(".page").html(content);
var script = document.createElement('script');
script.src = "js/register.js";
$(".page").append(script);
});
};
/******************************************
* Ajax helpers
******************************************/
// Issue a Http GET request for the contents of the specified Url.
// when the response arrives successfully, verify it's plain text
// and if so, pass it to the specified callback function
function getText(url, callback) {
var request = new XMLHttpRequest();
request.open("GET", url);
request.onreadystatechange = function() {
// if the request is complete and was successful -
if (request.readyState === 4 && request.status === 200) {
// check the content type:
var type = request.getResponseHeader("Content-Type");
if (type.match(/^text/)) {
callback(request.responseText);
}
}
};
// send it:
request.send(null); // nothing to send on GET requests.
}
I find this a good way to 'module-ize' my code into partial views and separated JavaScript files that can be swapped in/out of the page easily.
I will be working on a way to make this more dynamic and even cache these 'modules' for repeated use in an SPA scenario.
I'm relatively new to web dev so if you can see any problems with this or a safer/better way to do it I'm all ears :)
Yes you can load Javascript from a dynamic page, but not with load() as load strips any Javascript and inserts the raw HTML.
Solution: pull down raw page with a get and reattach any Javascript blocks.
Apologies that this is in Typescript, but you should get the idea (if anything, strongly-typed TypeScript is easier to read than plain Javascript):
_loadIntoPanel(panel: JQuery, url: string, callback?: { (): void; })
{
// Regular expression to match <script>...</script> block
var re = /<script\b[^>]*>([\s\S]*?)<\/script>/gm;
var scripts: string = "";
var match;
// Do an async AJAX get
$.ajax({
url: url,
type: "get",
success: function (data: string, status: string, xhr)
{
while (match = re.exec(data))
{
if (match[1] != "")
{
// TODO: Any extra work here to eliminate existing scripts from being inserted
scripts += match[0];
}
}
// Replace the contents of the panel
//panel.html(data);
// If you only want part of the loaded view (assuming it is not a partial view)
// using something like
panel.html($(data).find('#product-content'));
// Add the scripts - will evaluate immediately - beware of any onload code
panel.append(scripts);
if (callback) { callback(); }
},
error: function (xhr, status, error)
{
alert(error);
}
});
}
Plain JQuery/Javascript version with hooks:
It will go something like:
var _loadFormIntoPanel = function (panel, url, callback) {
var that = this;
var re = /<script\b[^>]*>([\s\S]*?)<\/script>/gm;
var scripts = "";
var match;
$.ajax({
url: url,
type: "get",
success: function (data, status, xhr) {
while(match = re.exec(data)) {
if(match[1] != "") {
// TODO: Any extra work here to eliminate existing scripts from being inserted
scripts += match[0];
}
}
panel.html(data);
panel.append(scripts);
if(callback) {
callback();
}
},
error: function (xhr, status, error) {
alert(error);
}
});
};
$(".product-slot a").live('click', function() {
var myUrl = $(this).attr("href") + " #product-content";
_loadFormIntoPanel($("#product-overlay-inner"), myUrl, function() {
// Now do extra stuff to loaded panel here
});
$("#product-overlay").fadeIn();
return false;
});

Categories