How to scrape <script text/javascript> - javascript

so I am trying to figure out how I can possible scrape a javascript tag using regex which I believe might be the easiest way.
The tag looks like:
<script type="text/javascript">
var spConfig=newApex.Config({
"attributes": {
"199": {
"id": "199",
"code": "legend",
"label": "Weapons",
"options": [
{
"label": "10",
"priceInGame": "0",
"id": [
]
},
{
"label": "10.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "11",
"priceInGame": "0",
"id": [
"66659"
]
},
{
"label": "11.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "12",
"priceInGame": "0",
"id": [
]
},
{
"label": "12.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "13",
"priceInGame": "0",
"id": [
]
},
{
"label": "4",
"priceInGame": "0",
"id": [
]
},
{
"label": "4.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "5",
"priceInGame": "0",
"id": [
]
},
{
"label": "5.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "6",
"priceInGame": "0",
"id": [
]
},
{
"label": "6.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "7",
"priceInGame": "0",
"id": [
]
},
{
"label": "7.5",
"priceInGame": "0",
"id": [
]
},
{
"label": "8",
"priceInGame": "0",
"id": [
"66672"
]
},
{
"label": "8.5",
"priceInGame": "0",
"id": [
"66673"
]
},
{
"label": "9",
"priceInGame": "0",
"id": [
]
},
{
"label": "9.5",
"priceInGame": "0",
"id": [
"66675"
]
}
]
}
},
"weaponID": "66733",
"chooseText": "Apex Legends",
"Config": {
"includeCoins": false,
}
});
</script>
and I want to scrape all Label
Whaht I tried to do is:
for nosto_sku_tag in bs4.find_all('script', {'type': 'text/javascript'}):
try:
test = re.findall('var spConfig = (\{.*}?);', nosto_sku_tag.text.strip())
print(test)
except: # noqa
continue
but it only returned an empty value of []
so I am here asking what can I do to be able to scrape the labels?

You need to specify the attribute using attr=value or attrs={'attr': 'value'} syntax.
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#the-keyword-arguments
import json
import re
from bs4 import BeautifulSoup
if __name__ == '__main__':
html = '''
<script type="text/javascript">
var spConfig=newApex.Config({
"attributes": {
"199": {
"id": "199",
"code": "legend",
"label": "Weapons",
"options": [
{ "label": "10", "priceInGame": "0", "id": [] },
{ "label": "10.5", "priceInGame": "0", "id": [] },
{ "label": "11", "priceInGame": "0", "id": [ "66659" ] },
{ "label": "7.5", "priceInGame": "0", "id": [] },
{ "label": "8", "priceInGame": "0", "id": ["66672"] }
]
}
},
"weaponID": "66733",
"chooseText": "Apex Legends",
"taxConfig": {
"includeCoins": false,
}
});
</script>
'''
soup = BeautifulSoup(html, 'html.parser')
# this one works too
# script = soup.find('script', attrs={'type':'text/javascript'})
script = soup.find('script', type='text/javascript')
js: str = script.text.replace('\n', '')
raw_json = re.search('var spConfig=newApex.Config\(({.*})\);', js, flags=re.MULTILINE).group(1)
data = json.loads(raw_json)
labels = [opt['label'] for opt in data['attributes']['199']['options']]
print(labels)
output:
['10', '10.5', '11', '7.5', '8'] ... some removed for brevity

If you are just looking for the entire row field in the JSON object, use the following;
("label":) "([^"]+)",
Then if you want to return the actual value, just use
\2
to pull back the second group

Related

sequelize aggregate function gives wrong result

I'm newbie and sequelize aggregate function makes me confused because it gave me different result than expected. I'm trying to get a total price with sequelize.fn('sum', ...), and somehow sequelize only gives me the the total of first data of the associated model instead of summing all the result. My code as follows:
Training.findAll({
where: {
owner_id: payload
},
attributes: ['id', [sequelize.fn('sum', sequelize.col('training_classes.price')), 'totalPrice']],
include: [{
model: TrainingClass,
as: 'training_classes',
attributes: ['id', 'price'],
}],
group: ['Training.id', 'training_classes.id']
})
And the result of the query :
[
{
"id": "45",
"totalPrice": "300000",
"training_classes": [
{
"id": "94",
"price": "300000"
}
]
},
{
"id": "8",
"totalPrice": "1000000",
"training_classes": [
{
"id": "14",
"price": "1000000"
},
{
"id": "15",
"price": "300000"
},
{
"id": "16",
"price": "200000"
}
]
},
{
"id": "47",
"totalPrice": "100000",
"training_classes": [
{
"id": "97",
"price": "100000"
}
]
},
{
"id": "39",
"totalPrice": "1000000",
"training_classes": [
{
"id": "81",
"price": "1000000"
},
{
"id": "82",
"price": "300000"
}
]
},
{
"id": "24",
"totalPrice": "300000",
"training_classes": [
{
"id": "46",
"price": "300000"
}
]
},
{
"id": "6",
"totalPrice": "200000",
"training_classes": [
{
"id": "11",
"price": "200000"
}
]
},
{
"id": "49",
"totalPrice": "100000",
"training_classes": [
{
"id": "100",
"price": "100000"
},
{
"id": "99",
"price": "1000000"
},
{
"id": "101",
"price": "100000"
}
]
},
{
"id": "20",
"totalPrice": "200000",
"training_classes": [
{
"id": "38",
"price": "200000"
},
{
"id": "35",
"price": "400000"
},
{
"id": "37",
"price": "500000"
},
{
"id": "36",
"price": "100000"
}
]
},
]
as you can see, the totalPrice only from the first element of training_classes, not the entire data of it. How can I resolve this? Thank you in advance
If you want to sum all records in TrainingClass that have the same Training.id then you need to indicate no attributes for TrainingClass and indicate only Training.id in group option:
Training.findAll({
where: {
owner_id: payload
},
attributes: ['id', [sequelize.fn('sum', sequelize.col('training_classes.price')), 'totalPrice']],
include: [{
model: TrainingClass,
as: 'training_classes',
attributes: [],
}],
group: ['Training.id']
})
If you group by TrainingClass.id then you'll get grouped results for each record that has unique TrainingClass.id value and literally every record of TrainingClass has unique id value.

Get deep object along with parent

Below is the sample data(Hierarchical data) I want to only that array of object which has IsChecked = true and also all its children with condition isChecked =true.
$scope.treedData = [{
"id": "1",
"text": "Women",
"parentId": null,
"IsChecked": true,
"children": [{
"id": "4",
"text": "Jeans",
"parentId": "1",
"IsChecked": true,
"children": [
{ "id": "5", "text": "Jeans child", "parentId": "4", "IsChecked": true, "children": [] },
{ "id": "6", "text": "Jeans child child", "parentId": "4", "IsChecked": false, "children": [] }
]
}]
},
{
"id": "2",
"text": "Men",
"parentId": null,
"IsChecked": false,
"children": [{ "id": "10", "text": "Sweatshirts", "parentId": "2", "IsChecked": false, "children": [] }]
},
{
"id": "3",
"text": "Kids",
"parentId": null,
"IsChecked": true,
"children": [{ "id": "12", "text": "Toys", "parentId": "3", "IsChecked": false, "children": [] }]
}
];
You can use reduce for that, and use recursion to apply the filter to the children hierarchy as well:
var treeData = [
{ "id": "1", "text": "Women", "parentId": null, "IsChecked": true,
"children": [
{ "id": "4", "text": "Jeans", "parentId": "1", "IsChecked": false, "children":[
{ "id": "5", "text": "Jeans child", "parentId": "4", "IsChecked": true, "children":[] },
{ "id": "6", "text": "Jeans child child", "parentId": "4", "IsChecked": false, "children":[] }
] }]
},
{ "id": "2", "text": "Men", "parentId": null, "IsChecked": false,
"children": [{ "id": "10", "text": "Sweatshirts", "parentId": "2", "IsChecked": false, "children":[]}]
},
{"id": "3", "text": "Kids", "parentId": null, "IsChecked": true,
"children": [{ "id": "12", "text": "Toys", "parentId": "3", "IsChecked": false, "children":[] }]
}
];
checkedTreeData = treeData.reduce(function checkedOnly (acc, obj) {
return obj.IsChecked
? acc.concat(Object.assign({}, obj, { children: obj.children.reduce(checkedOnly, []) }))
: acc;
}, []);
console.log(checkedTreeData);
.as-console-wrapper { max-height: 100% !important; top: 0; }
NB: In JavaScript there is an unwritten rule to not use an initial capital letter for property names, so IsChecked would be with a lower case i: isChecked. Initial capital letters are commonly used for constructors (classes).
Alternative with .filter()
function filterChecked(treeData) {
return treeData.filter(obj => obj.IsChecked)
.map(obj => Object.assign({}, obj, obj.children ?
{ children: filterChecked(obj.children) } : {}))
}
var treeData = [
{ "id": "1", "text": "Women", "parentId": null, "IsChecked": true,
"children": [
{ "id": "4", "text": "Jeans", "parentId": "1", "IsChecked": false, "children":[
{ "id": "5", "text": "Jeans child", "parentId": "4", "IsChecked": true, "children":[] },
{ "id": "6", "text": "Jeans child child", "parentId": "4", "IsChecked": false, "children":[] }
] }]
},
{ "id": "2", "text": "Men", "parentId": null, "IsChecked": false,
"children": [{ "id": "10", "text": "Sweatshirts", "parentId": "2", "IsChecked": false, "children":[]}]
},
{"id": "3", "text": "Kids", "parentId": null, "IsChecked": true,
"children": [{ "id": "12", "text": "Toys", "parentId": "3", "IsChecked": false, "children":[] }]
}
];
console.log(filterChecked(treeData));
.as-console-wrapper { max-height: 100% !important; top: 0; }
use Lodash _.filter and _.every
_.filter(treedData, function(item) {
return _.every([
item.IsChecked,
_.every(item.children, 'IsChecked')
]);
});
if you also want to check condition of children of children you can do it recursively like
_.filter(treedData, function check(item) {
return _.every([
item.IsChecked,
_.size(item.children) === 0 || _.every(item.children, check)
]);
});

Searching for a certain parameter in a given String

I'm having a major brain fart. I have a java application that adds items to carts, and I'm using JSOUP to parse the html document since it can't run any javascript scripts. So I extracted this out of the getElementsByTag().html(); function. And now I need to find the value 1038, but while keeping in mind the values is not always 1038. So I would need it to search attribute -> 92 -> options -> id = 1038.
var spConfig = new Product.Config({
"attributes": {
"92": {
"id": "92",
"code": "color",
"label": "Color",
"options": [{
"id": "1038",
"label": "GREEN",
"price": "0",
"oldPrice": "0",
"products": ["94035", "94036", "94037", "94038", "94039", "94040", "94041", "94042", "94043", "94044"]
}]
},
"196": {
"id": "196",
"code": "size",
"label": "Size",
"options": [{
"id": "189",
"label": "8 ",
"price": "0",
"oldPrice": "0",
"products": ["94041"]
}, {
"id": "188",
"label": "8.5",
"price": "0",
"oldPrice": "0",
"products": ["94042"]
}, {
"id": "187",
"label": "9",
"price": "0",
"oldPrice": "0",
"products": ["94043"]
}, {
"id": "186",
"label": "9.5",
"price": "0",
"oldPrice": "0",
"products": ["94044"]
}, {
"id": "185",
"label": "10",
"price": "0",
"oldPrice": "0",
"products": ["94035"]
}, {
"id": "184",
"label": "10.5",
"price": "0",
"oldPrice": "0",
"products": ["94036"]
}, {
"id": "183",
"label": "11",
"price": "0",
"oldPrice": "0",
"products": ["94037"]
}, {
"id": "182",
"label": "11.5",
"price": "0",
"oldPrice": "0",
"products": ["94038"]
}, {
"id": "181",
"label": "12",
"price": "0",
"oldPrice": "0",
"products": ["94039"]
}, {
"id": "179",
"label": "13",
"price": "0",
"oldPrice": "0",
"products": ["94040"]
}]
}
},
"template": "$#{price}",
"basePrice": "129.98",
"oldPrice": "180",
"productId": "94013",
"chooseText": "Choose an Option...",
"taxConfig": {
"includeTax": false,
"showIncludeTax": false,
"showBothPrices": false,
"defaultTax": 0,
"currentTax": 0,
"inclTaxTitle": "Incl. Tax"
}
});
I need to receive the value "1038", but it can't just be a search function for integer 1038, because that value can possibly change in my script.

How to read the JSON Array with Javascript to get the value if it exists

Here is my JSON code. I'm storing this json in an array.
{
"kind": "urlshortener#url",
"id": "http://goo.gl/2FIrtF",
"longUrl": "http://hike.com/?utm_source=facebook",
"status": "OK",
"created": "2015-09-22T13:45:53.645+00:00",
"analytics": {
"allTime": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"month": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"week": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"day": {
"shortUrlClicks": "0",
"longUrlClicks": "0"
},
"twoHours": {
"shortUrlClicks": "0",
"longUrlClicks": "0"
}
},
"result": {
"kind": "urlshortener#url",
"id": "http://goo.gl/2FIuvF",
"longUrl": "http://hike.com/?utm_source=facebook",
"status": "OK",
"created": "2015-09-22T13:45:53.645+00:00",
"analytics": {
"allTime": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"month": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"week": {
"shortUrlClicks": "1",
"longUrlClicks": "1",
"referrers": [
{
"count": "1",
"id": "unknown"
}
],
"countries": [
{
"count": "1",
"id": "IN"
}
],
"browsers": [
{
"count": "1",
"id": "Chrome"
}
],
"platforms": [
{
"count": "1",
"id": "Macintosh"
}
]
},
"day": {
"shortUrlClicks": "0",
"longUrlClicks": "0"
},
"twoHours": {
"shortUrlClicks": "0",
"longUrlClicks": "0"
}
}
}
}
In the above JSON, how can we get the existence of analytics -> day -> countries?
I want to know whether the countries exists in day or not first, if it's not, show some value. If it is there, it will try to fetch the count of particualr country.
I'm trying this from last 5 hours without any luck.
if(arr.analytics.day.countries !== undefined) {
function thingscount(arr, platf) {
var x = arr.analytics.day.countries.map(function(el) {
return (platf.indexOf(el.id) != -1) ? parseInt(el.count) : 0; });
var count = 0;
for (var i = 0; i < x.length; i++) count += x[i];
return count;
}
var one = thingscount(arr, ["US"]);
}else{
var one = 0;
}
The above code is working fine if there is countries in day, but sometimes, in my JSON there will be no platforms part, in that case it's giving me
Uncaught TypeError: Cannot read property 'map' of undefined
I need a way to check if the platforms exist, if it's go for a count, if it's not give some other value to the variable.
UPDATE :
I'm using this below code to get the count of IN.
When it has IN key and value, it's giving me the result. But when it don't has the IN key, it's showing 'undefined count' error.
var month_inclicks = arr.analytics.month.countries.filter(function(el) { return el.id == "IN"; })[0].count;
How can we set a default value if the key we are looking for is not exists?
While that isn't JSON, I'm assuming it's a javascript object. That being said, you'll want to look into utilizing the hasOwnProperty method or the in keyword.
Example:
if (arr.total.limited.hasOwnProperty('platforms')) { //do stuff
or
if ('platforms' in arr.total.limited) { //do something
I have corrected your JSON. use hasOwnProperty as #CollinD suggested
var arr = {
total: {
limited: {
things: "451",
platforms: [{
count: "358",
id: "Windows"
}, {
count: "44",
id: "X11"
}, {
count: "42",
id: "Macintosh"
}, {
count: "2",
id: "Linux"
}, {
count: "1",
id: "iPhone"
}, {
count: "1",
id: "iPod"
}]
}
}
};
Object.prototype.hasOwnProperty()
console.log(arr.total.limited.hasOwnProperty('platforms'));
DEMO
For the record, you can roll the map and the count in your 'thingscount' function into one operation by using reduce:
var getCount = function getCount( ary, find ) {
return ary.reduce(function ( acc, record) {
if (find.indexOf(record.id) !== -1) acc += parseInt(record.count, 10);
return acc;
}, 0);
};
Usage inline:
if (arr.analytics.day.hasOwnProperty('countries')) {
var find = ['IN'],
count = arr.analytics.day.countries.reduce(function ( acc, record) {
if (find.indexOf(record.id) !== -1) acc += parseInt(record.count, 10);
return acc;
}, 0);
}
Or with the function:
if (arr.analytics.day.hasOwnProperty('countries')) {
var count = getCount(arr.analytics.day.countries, ['US','IN']);
}

JS Prevent duplicates when using .push()

I grab a list of data from the server and I have to convert it.
Part of this is turning it into a 3 dimensional array. After the "myArr[i].children.push(temp);" it leaves copies of the objects that were pushed in the root of the array. Can I either push without copying or how would I delete these? (I have underscore js included, I know they have good array functions :))
for (var i = 0; i < myArr.length; i++) {
myArr[i].children = [];
for (var q = 0; q < myArr.length; q++) {
if (myArr[i].id == myArr[q].parentid) {
var temp = {
id: myArr[q].id,
index: myArr[q].index,
text: myArr[q].text
}
myArr[i].children.push(temp);
};
};
};
The Data
[{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": "0"
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": "1"
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": "2"
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": "0"
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": "1"
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation\/Soundpoint",
"index": "2"
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": "1"
}, {
"id": "15",
"parentid": "0",
"text": "Post-Sales Implementation Check List",
"index": "7"
}, {
"id": "16",
"parentid": "15",
"text": "Porting and New Number Details",
"index": "0"
}, {
"id": "18",
"parentid": "15",
"text": "Partner Setup",
"index": "1"
}, {
"id": "19",
"parentid": "15",
"text": "test",
"index": "2"
}, {
"id": "20",
"parentid": "0",
"text": "test",
"index": "11"
}, {
"id": "21",
"parentid": "15",
"text": "test",
"index": "3"
}, {
"id": "23",
"parentid": "5",
"text": "New Polycom",
"index": "0"
}, {
"id": "24",
"parentid": "0",
"text": "Test Markup",
"index": "14"
}, {
"id": "25",
"parentid": "0",
"text": "test",
"index": "15"
}]
After it is formated:
{
"children": [{
"id": "5",
"parentid": "0",
"text": "Device Guides",
"index": "1",
"children": [{
"id": "10",
"index": "0",
"text": "Grandstream GXP-21XX"
}, {
"id": "11",
"index": "1",
"text": "Polycom Soundstation/Soundpoint"
}, {
"id": "23",
"index": "2",
"text": "New Polycom"
}]
}, {
"id": "6",
"parentid": "0",
"text": "Pre-Sales Evaluation",
"index": "0",
"children": []
}, {
"id": "7",
"parentid": "0",
"text": "Router Setup Guides",
"index": "2",
"children": [{
"id": "9",
"index": "0",
"text": "Sonicwall"
}, {
"id": "12",
"index": "1",
"text": "Cisco"
}]
}, {
"id": "9",
"parentid": "7",
"text": "Sonicwall",
"index": "0",
"children": []
}, {
"id": "10",
"parentid": "5",
"text": "Grandstream GXP-21XX",
"index": "0",
"children": []
}, {
"id": "11",
"parentid": "5",
"text": "Polycom Soundstation/Soundpoint",
"index": "1",
"children": []
}, {
"id": "12",
"parentid": "7",
"text": "Cisco",
"index": "1",
"children": []
}, {
"id": "15",
"parentid": "0",
"text": "Post-Sales Implementation Check List",
"index": "7",
"children": [{
"id": "16",
"index": "0",
"text": "Porting and New Number Details"
}, {
"id": "18",
"index": "1",
"text": "Partner Setup"
}, {
"id": "19",
"index": "2",
"text": "test"
}, {
"id": "21",
"index": "3",
"text": "test"
}]
}, {
"id": "16",
"parentid": "15",
"text": "Porting and New Number Details",
"index": "0",
"children": []
}, {
"id": "18",
"parentid": "15",
"text": "Partner Setup",
"index": "1",
"children": []
}, {
"id": "19",
"parentid": "15",
"text": "test",
"index": "2",
"children": []
}, {
"id": "20",
"parentid": "0",
"text": "test",
"index": "11",
"children": []
}, {
"id": "21",
"parentid": "15",
"text": "test",
"index": "3",
"children": []
}, {
"id": "23",
"parentid": "5",
"text": "New Polycom",
"index": "2",
"children": []
}, {
"id": "24",
"parentid": "0",
"text": "Test Markup",
"index": "14",
"children": []
}, {
"id": "25",
"parentid": "0",
"text": "test",
"index": "15",
"children": []
}]
}
Here you go
tree = {0: {children: []}}
data.forEach(function(x) {
x.children = tree[x.id] ? tree[x.id].children : [];
tree[x.id] = x;
if(!tree[x.parentid])
tree[x.parentid] = {children: []}
tree[x.parentid].children.push(x)
})
result = tree[0].children
This solution is linear (iterates over the array just once) and doesn't require any pre-sorting.
http://jsfiddle.net/U47WY/
and here's how to convert the tree back to the linear array:
function flatten(source) {
return source.reduce(function(a, x) {
var children = x.children;
delete x.children;
return a.concat([x], flatten(x.children))
}, []);
}
Following on from a friendly discussion in the comments :
var zeroObj = {"children":[]};
for (var i = 0; i < myArr.length; i++) {
if(myArr[i].parentid === 0) {
zeroObj.children.push(myArr[i]);
} else {
for (var q = 0; q < myArr.length; q++) {
if (myArr[i].parentid == myArr[q].id) {
myArr[q].children = myArr[q].children || [];
myArr[q].children.push(myArr[i]);
};
};
}
};

Categories