I'm having a major brain fart. I have a java application that adds items to carts, and I'm using JSOUP to parse the html document since it can't run any javascript scripts. So I extracted this out of the getElementsByTag().html(); function. And now I need to find the value 1038, but while keeping in mind the values is not always 1038. So I would need it to search attribute -> 92 -> options -> id = 1038.
var spConfig = new Product.Config({
"attributes": {
"92": {
"id": "92",
"code": "color",
"label": "Color",
"options": [{
"id": "1038",
"label": "GREEN",
"price": "0",
"oldPrice": "0",
"products": ["94035", "94036", "94037", "94038", "94039", "94040", "94041", "94042", "94043", "94044"]
}]
},
"196": {
"id": "196",
"code": "size",
"label": "Size",
"options": [{
"id": "189",
"label": "8 ",
"price": "0",
"oldPrice": "0",
"products": ["94041"]
}, {
"id": "188",
"label": "8.5",
"price": "0",
"oldPrice": "0",
"products": ["94042"]
}, {
"id": "187",
"label": "9",
"price": "0",
"oldPrice": "0",
"products": ["94043"]
}, {
"id": "186",
"label": "9.5",
"price": "0",
"oldPrice": "0",
"products": ["94044"]
}, {
"id": "185",
"label": "10",
"price": "0",
"oldPrice": "0",
"products": ["94035"]
}, {
"id": "184",
"label": "10.5",
"price": "0",
"oldPrice": "0",
"products": ["94036"]
}, {
"id": "183",
"label": "11",
"price": "0",
"oldPrice": "0",
"products": ["94037"]
}, {
"id": "182",
"label": "11.5",
"price": "0",
"oldPrice": "0",
"products": ["94038"]
}, {
"id": "181",
"label": "12",
"price": "0",
"oldPrice": "0",
"products": ["94039"]
}, {
"id": "179",
"label": "13",
"price": "0",
"oldPrice": "0",
"products": ["94040"]
}]
}
},
"template": "$#{price}",
"basePrice": "129.98",
"oldPrice": "180",
"productId": "94013",
"chooseText": "Choose an Option...",
"taxConfig": {
"includeTax": false,
"showIncludeTax": false,
"showBothPrices": false,
"defaultTax": 0,
"currentTax": 0,
"inclTaxTitle": "Incl. Tax"
}
});
I need to receive the value "1038", but it can't just be a search function for integer 1038, because that value can possibly change in my script.
Related
so I am trying to figure out how I can possible scrape a javascript tag using regex which I believe might be the easiest way.
The tag looks like:
<script type="text/javascript">
var spConfig=newApex.Config({
"attributes": {
"199": {
"id": "199",
"code": "legend",
"label": "Weapons",
"options": [
{
"label": "10",
"priceInGame": "0",
"id": [
]
},
{
"label": "10.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "11",
"priceInGame": "0",
"id": [
"66659"
]
},
{
"label": "11.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "12",
"priceInGame": "0",
"id": [
]
},
{
"label": "12.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "13",
"priceInGame": "0",
"id": [
]
},
{
"label": "4",
"priceInGame": "0",
"id": [
]
},
{
"label": "4.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "5",
"priceInGame": "0",
"id": [
]
},
{
"label": "5.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "6",
"priceInGame": "0",
"id": [
]
},
{
"label": "6.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "7",
"priceInGame": "0",
"id": [
]
},
{
"label": "7.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "8",
"priceInGame": "0",
"id": [
"66672"
]
},
{
"label": "8.5",
"priceInGame": "0",
"id": [
"66673"
]
},
{
"label": "9",
"priceInGame": "0",
"id": [
]
},
{
"label": "9.5",
"priceInGame": "0",
"id": [
"66675"
]
}
]
}
},
"weaponID": "66733",
"chooseText": "Apex Legends",
"Config": {
"includeCoins": false,
}
});
</script>
and I want to scrape all Label
Whaht I tried to do is:
for nosto_sku_tag in bs4.find_all('script', {'type': 'text/javascript'}):
try:
test = re.findall('var spConfig = (\{.*}?);', nosto_sku_tag.text.strip())
print(test)
except: # noqa
continue
but it only returned an empty value of []
so I am here asking what can I do to be able to scrape the labels?
You need to specify the attribute using attr=value or attrs={'attr': 'value'} syntax.
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#the-keyword-arguments
import json
import re
from bs4 import BeautifulSoup
if __name__ == '__main__':
html = '''
<script type="text/javascript">
var spConfig=newApex.Config({
"attributes": {
"199": {
"id": "199",
"code": "legend",
"label": "Weapons",
"options": [
{ "label": "10", "priceInGame": "0", "id": [] },
{ "label": "10.5", "priceInGame": "0", "id": [] },
{ "label": "11", "priceInGame": "0", "id": [ "66659" ] },
{ "label": "7.5", "priceInGame": "0", "id": [] },
{ "label": "8", "priceInGame": "0", "id": ["66672"] }
]
}
},
"weaponID": "66733",
"chooseText": "Apex Legends",
"taxConfig": {
"includeCoins": false,
}
});
</script>
'''
soup = BeautifulSoup(html, 'html.parser')
# this one works too
# script = soup.find('script', attrs={'type':'text/javascript'})
script = soup.find('script', type='text/javascript')
js: str = script.text.replace('\n', '')
raw_json = re.search('var spConfig=newApex.Config\(({.*})\);', js, flags=re.MULTILINE).group(1)
data = json.loads(raw_json)
labels = [opt['label'] for opt in data['attributes']['199']['options']]
print(labels)
output:
['10', '10.5', '11', '7.5', '8'] ... some removed for brevity
If you are just looking for the entire row field in the JSON object, use the following;
("label":) "([^"]+)",
Then if you want to return the actual value, just use
\2
to pull back the second group
Lets say my url is http://www.example.com/apptest/app/index.php/json/getSingleMenuItemReviews?id=94
and the json file is on this Url is:
{
"query": "SELECT `webapp_menuitem_reviews`.`id` AS `id` , `webapp_menuitem_reviews`.`order` AS `order` , `webapp_menuitem_reviews`.`status` AS `status` , `webapp_menuitem_reviews`.`reviewcomment` AS `reviewcomment` , `webapp_menuitem_reviews`.`json` AS `json` , `webapp_menuitem_reviews`.`menuitemid` AS `menuitemid` , `webapp_menuitem_reviews`.`menutypeid` AS `menutypeid` , `webapp_menuitem_reviews`.`user` AS `user` , `webapp_menuitems`.`name` AS `menuitemname` , `webapp_menuitems`.`image` AS `menuitemimage` , `webapp_menuitem_reviews`.`timestamp` AS `timestamp` , `webapp_menuitem_reviews`.`rate` AS `rate` , 1 \n\t\tFROM `webapp_menuitem_reviews` RIGHT OUTER JOIN `webapp_menuitems` ON `webapp_menuitems`.`id`=`webapp_menuitem_reviews`.`menuitemid` WHERE `webapp_menuitem_reviews`.`status`=1 AND `webapp_menuitems`.`status`=1 AND `webapp_menuitem_reviews`.`menuitemid`='94' AND ( 1 ) ORDER BY `order` DESC, 1 LIMIT 0,20",
"queryresult": [{
"id": "1",
"order": "0",
"status": "1",
"reviewcomment": "22222",
"json": "0",
"menuitemid": "94",
"menutypeid": "13",
"user": "jose",
"menuitemname": "test food",
"menuitemimage": "super-foods-400x400.jpg",
"timestamp": "2016-07-19 02:06:14",
"rate": "2"
}, {
"id": "6",
"order": "0",
"status": "1",
"reviewcomment": "Hhhgh",
"json": "0",
"menuitemid": "94",
"menutypeid": "13",
"user": "jose",
"menuitemname": "test food",
"menuitemimage": "super-foods-400x400.jpg",
"timestamp": "2016-07-19 03:27:09",
"rate": "4"
}, {
"id": "7",
"order": "0",
"status": "1",
"reviewcomment": "Hhhgh",
"json": "0",
"menuitemid": "94",
"menutypeid": "13",
"user": "jose",
"menuitemname": "test food",
"menuitemimage": "super-foods-400x400.jpg",
"timestamp": "2016-07-19 03:27:10",
"rate": "4"
}, {
"id": "8",
"order": "0",
"status": "1",
"reviewcomment": "mremajoel",
"json": "0",
"menuitemid": "94",
"menutypeid": "13",
"user": "jose",
"menuitemname": "test food",
"menuitemimage": "super-foods-400x400.jpg",
"timestamp": "2016-07-19 16:10:45",
"rate": "5"
}],
"totalvalues": 4,
"pageno": 1,
"lastpage": 1,
"elements": [{
"field": "`webapp_menuitem_reviews`.`id`",
"sort": "1",
"header": "ID",
"alias": "id"
}, {
"field": "`webapp_menuitem_reviews`.`order`",
"sort": "DESC",
"header": "Order",
"alias": "order"
}, {
"field": "`webapp_menuitem_reviews`.`status`",
"sort": "1",
"header": "Status",
"alias": "status"
}, {
"field": "`webapp_menuitem_reviews`.`reviewcomment`",
"sort": "1",
"header": "reviewcomment",
"alias": "reviewcomment"
}, {
"field": "`webapp_menuitem_reviews`.`json`",
"sort": "1",
"header": "Json",
"alias": "json"
}, {
"field": "`webapp_menuitem_reviews`.`menuitemid`",
"sort": "1",
"header": "menuitemid",
"alias": "menuitemid"
}, {
"field": "`webapp_menuitem_reviews`.`menutypeid`",
"sort": "1",
"header": "menutypeid",
"alias": "menutypeid"
}, {
"field": "`webapp_menuitem_reviews`.`user`",
"sort": "1",
"header": "user",
"alias": "user"
}, {
"field": "`webapp_menuitems`.`name`",
"sort": "1",
"header": "name",
"alias": "menuitemname"
}, {
"field": "`webapp_menuitems`.`image`",
"sort": "1",
"header": "image",
"alias": "menuitemimage"
}, {
"field": "`webapp_menuitem_reviews`.`timestamp`",
"sort": "1",
"header": "timestamp",
"alias": "timestamp"
}, {
"field": "`webapp_menuitem_reviews`.`rate`",
"sort": "1",
"header": "Rate",
"alias": "rate"
}],
"from": "\n\t\tFROM `webapp_menuitem_reviews` RIGHT OUTER JOIN `webapp_menuitems` ON `webapp_menuitems`.`id`=`webapp_menuitem_reviews`.`menuitemid`",
"where": "WHERE `webapp_menuitem_reviews`.`status`=1 AND `webapp_menuitems`.`status`=1 AND `webapp_menuitem_reviews`.`menuitemid`='94'",
"group": "",
"having": "",
"search": false,
"startingfrom": 0,
"maxlength": 20,
"options": []
}
how can I get all the data to display on the list item.
Thanks a lot.
You would generally use Angulars $http service https://docs.angularjs.org/api/ng/service/$http in a controller or factory and then bind the JSON data to $scope in your controller https://docs.angularjs.org/guide/scope.
In the html template you bind( present ) the JSON data using ng-bind https://docs.angularjs.org/api/ng/directive/ngBind.
I found http://www.thinkster.io/pick/GtaQ0oMGIl/ helpful to learn the basics of angular.
I am using fusion charts in my jsp to present data. Here is the code I am using.
FusionCharts.ready(function(){
var summaryChart = new FusionCharts(
{
type: "line",
renderAt: "chartContainer1",
width: "500",
height: "300",
dataFormat: "json",
"dataSource": {
"chart": {
"caption": "Total Revenues from 2008-2013",
"numberprefix": "$",
"bgcolor": "FFFFFF",
"showalternatehgridcolor": "0",
"plotbordercolor": "008ee4",
"plotborderthickness": "3",
"showvalues": "0",
"divlinecolor": "CCCCCC",
"showcanvasborder": "0",
"tooltipbgcolor": "00396d",
"tooltipcolor": "FFFFFF",
"tooltipbordercolor": "00396d",
"numdivlines": "2",
"yaxisvaluespadding": "20",
"anchorbgcolor": "008ee4",
"anchorborderthickness": "0",
"showshadow": "0",
"anchorradius": "4",
"chartrightmargin": "25",
"canvasborderalpha": "0",
"showborder": "0"
},
"data": [
{
"label": "2009",
"value": "4400000",
"color": "008ee4"
},
{
"label": "2010",
"value": "4800000",
"color": "008ee4"
},
{
"label": "2011",
"value": "5500000",
"color": "008ee4"
},
{
"label": "2012",
"value": "6700000",
"color": "008ee4",
"anchorradius": "7",
"tooltext": "Historical high"
},
{
"label": "2013",
"value": "4200000",
"color": "008ee4"
}
]
}
});
summaryChart.render();
});
I want to add some data that has been read from the database as data to this chart. I tried using arrays inside javascipt and the stuff and still unable to do that.
Please help me.
yes, you can do it by using fusioncharts jsp wrapper
http://www.fusioncharts.com/jsp-charts/
More details about this wrapper
http://www.fusioncharts.com/dev/using-with-server-side-languages/java/introduction.html
I grab a list of data from the server and I have to convert it.
Part of this is turning it into a 3 dimensional array. After the "myArr[i].children.push(temp);" it leaves copies of the objects that were pushed in the root of the array. Can I either push without copying or how would I delete these? (I have underscore js included, I know they have good array functions :))
for (var i = 0; i < myArr.length; i++) {
myArr[i].children = [];
for (var q = 0; q < myArr.length; q++) {
if (myArr[i].id == myArr[q].parentid) {
var temp = {
id: myArr[q].id,
index: myArr[q].index,
text: myArr[q].text
}
myArr[i].children.push(temp);
};
};
};
The Data
[{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": "0"
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": "1"
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": "2"
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": "0"
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": "1"
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation\/Soundpoint",
"index": "2"
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": "1"
}, {
"id": "15",
"parentid": "0",
"text": "Post-Sales Implementation Check List",
"index": "7"
}, {
"id": "16",
"parentid": "15",
"text": "Porting and New Number Details",
"index": "0"
}, {
"id": "18",
"parentid": "15",
"text": "Partner Setup",
"index": "1"
}, {
"id": "19",
"parentid": "15",
"text": "test",
"index": "2"
}, {
"id": "20",
"parentid": "0",
"text": "test",
"index": "11"
}, {
"id": "21",
"parentid": "15",
"text": "test",
"index": "3"
}, {
"id": "23",
"parentid": "5",
"text": "New Polycom",
"index": "0"
}, {
"id": "24",
"parentid": "0",
"text": "Test Markup",
"index": "14"
}, {
"id": "25",
"parentid": "0",
"text": "test",
"index": "15"
}]
After it is formated:
{
"children": [{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": "1",
"children": [{
"id": "10",
"index": "0",
"text": "Grandstream GXP-21XX"
}, {
"id": "11",
"index": "1",
"text": "Polycom Soundstation/Soundpoint"
}, {
"id": "23",
"index": "2",
"text": "New Polycom"
}]
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": "0",
"children": []
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": "2",
"children": [{
"id": "9",
"index": "0",
"text": "Sonicwall"
}, {
"id": "12",
"index": "1",
"text": "Cisco"
}]
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": "0",
"children": []
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": "0",
"children": []
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation/Soundpoint",
"index": "1",
"children": []
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": "1",
"children": []
}, {
"id": "15",
"parentid": "0",
"text": "Post-Sales Implementation Check List",
"index": "7",
"children": [{
"id": "16",
"index": "0",
"text": "Porting and New Number Details"
}, {
"id": "18",
"index": "1",
"text": "Partner Setup"
}, {
"id": "19",
"index": "2",
"text": "test"
}, {
"id": "21",
"index": "3",
"text": "test"
}]
}, {
"id": "16",
"parentid": "15",
"text": "Porting and New Number Details",
"index": "0",
"children": []
}, {
"id": "18",
"parentid": "15",
"text": "Partner Setup",
"index": "1",
"children": []
}, {
"id": "19",
"parentid": "15",
"text": "test",
"index": "2",
"children": []
}, {
"id": "20",
"parentid": "0",
"text": "test",
"index": "11",
"children": []
}, {
"id": "21",
"parentid": "15",
"text": "test",
"index": "3",
"children": []
}, {
"id": "23",
"parentid": "5",
"text": "New Polycom",
"index": "2",
"children": []
}, {
"id": "24",
"parentid": "0",
"text": "Test Markup",
"index": "14",
"children": []
}, {
"id": "25",
"parentid": "0",
"text": "test",
"index": "15",
"children": []
}]
}
Here you go
tree = {0: {children: []}}
data.forEach(function(x) {
x.children = tree[x.id] ? tree[x.id].children : [];
tree[x.id] = x;
if(!tree[x.parentid])
tree[x.parentid] = {children: []}
tree[x.parentid].children.push(x)
})
result = tree[0].children
This solution is linear (iterates over the array just once) and doesn't require any pre-sorting.
http://jsfiddle.net/U47WY/
and here's how to convert the tree back to the linear array:
function flatten(source) {
return source.reduce(function(a, x) {
var children = x.children;
delete x.children;
return a.concat([x], flatten(x.children))
}, []);
}
Following on from a friendly discussion in the comments :
var zeroObj = {"children":[]};
for (var i = 0; i < myArr.length; i++) {
if(myArr[i].parentid === 0) {
zeroObj.children.push(myArr[i]);
} else {
for (var q = 0; q < myArr.length; q++) {
if (myArr[i].parentid == myArr[q].id) {
myArr[q].children = myArr[q].children || [];
myArr[q].children.push(myArr[i]);
};
};
}
};
I have to convert JSON to the format below, I'm having a problem converting it back.
Here is the current format
[{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": 0,
"children": [{
"id": "10",
"text": "Grandstream GXP-21XX",
"index": 0
}, {
"id": "11",
"text": "Polycom Soundstation/Soundpoint",
"index": 1
}, {
"id": "23",
"text": "New Polycom",
"index": 2
}]
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": 1,
"children": []
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": 2,
"children": [{
"id": "9",
"text": "Sonicwall",
"index": 0
}, {
"id": "12",
"text": "Cisco",
"index": 1
}]
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": 3,
"children": []
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": 4,
"children": []
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation/Soundpoint",
"index": 5,
"children": []
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": 6,
"children": []
}]
Here is the format I need it in:
[{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": "0"
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": "0"
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": "0"
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": "0"
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": "0"
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation\/Soundpoint",
"index": "0"
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": "0"
}]
Basically, I have to nest it for the script I'm using but the server is expecting to see it flattened, in the current format the 3rd object dimension starts with "children". I need to unnest children and keep the objects going like the format I need it in.
A first solution, granted you don't want the resulting array to be sorted based on the id:
function visitor(graph) {
var i, l,
nodes=[],
visited=[];
function clone(n) {
// improve the function yourself I'm lazy
var i,l,
props=["id","parentid","index","text"],
result={};
for (i = 0, l = props.length; i < l; i++) {
if (n[props[i]]) {
result[props[i]]= n[props[i]];
}
}
return result;
}
function helper (node) {
var i, limit;
if (visited.indexOf(node.id) == -1) {
visited.push(node.id);
nodes.push(clone(node));
if( node.children) {
for (i = 0, limit = node.children.length; i < limit; i++) {
helper(node.children[i]);
}
}
}
}
for (i = 0, l = graph.length; i < l; i++) {
helper(graph[i]);
}
return nodes;
}
var graph = [{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": 0,
"children": [{
"id": "10",
"text": "Grandstream GXP-21XX",
"index": 0
}, {
"id": "11",
"text": "Polycom Soundstation/Soundpoint",
"index": 1
}, {
"id": "23",
"text": "New Polycom",
"index": 2
}]
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": 1,
"children": []
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": 2,
"children": [{
"id": "9",
"text": "Sonicwall",
"index": 0
}, {
"id": "12",
"text": "Cisco",
"index": 1
}]
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": 3,
"children": []
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": 4,
"children": []
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation/Soundpoint",
"index": 5,
"children": []
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": 6,
"children": []
}];
nodes = visitor(graph);
And yes, I know, the helper function relay on side effects but I've scoped them into the visitor function to reduce harm and there is room for improvements (at least sorting the resulting array based on the id) but I will leave them to you