scrape Shopee API v4 - javascript

I have a final project where the data I want to retrieve is through scraping data on shopee, but I have a problem when I scrape shopee on a hidden API, when I try it on the Insomnia script the script runs, but when I try it on local or google colab script this is an error.
How can I fix this?
import requests
import json
headers = {
'sec-ch-ua-mobile': '?0',
'cookie': 'REC_T_ID=e67c02b5-ae54-11ec-b368-46ac8e8cc9d8; SPC_F=KrJ9Ck0EYC252EWJ3FSH5QFNzjmvng6O; SPC_IA=-1; _gcl_au=1.1.459910866.1654678938; _fbp=fb.2.1654678939550.956784750; G_ENABLED_IDPS=google; SPC_CLIENTID=S3JKOUNrMEVZQzI1jkqfwanvqrwehsep; _gcl_aw=GCL.1660529943.Cj0KCQjwuuKXBhCRARIsAC-gM0g5RPYu1Cfx0PZbXHrR5qqd7JqgFEy4XrCAxXEGFD4quU2tORTIR9caAsVdEALw_wcB; _gac_UA-61904553-8=1.1660529949.Cj0KCQjwuuKXBhCRARIsAC-gM0g5RPYu1Cfx0PZbXHrR5qqd7JqgFEy4XrCAxXEGFD4quU2tORTIR9caAsVdEALw_wcB; _med=refer; _gid=GA1.3.792417909.1660891119; csrftoken=Hk3UgpYhG30zu0CO9Vhk2OIKptWNBS0g; _QPWSDCXHZQA=9be12e07-9c49-426e-e0d8-01a11f73956b; AMP_TOKEN=%24NOT_FOUND; __LOCALE__null=ID; _dc_gtm_UA-61904553-8=1; SPC_T_ID="uXbSXytLbRMSr+KtQpRkW7f5FHiriPO+CdAryBv6THa5ljtJhfxKSiI5g2Ps2Fl4eILJBWgkAYeR+c0hO4843b12KCXHt56jNWASfgA5Uq8="; SPC_U=616200160; SPC_T_IV="eB95as87FjhL8HoasAA0kw=="; _ga_KK6LLGGZNQ=GS1.1.1660961596.8.0.1660961596.0.0.0; SPC_R_T_ID=uXbSXytLbRMSr+KtQpRkW7f5FHiriPO+CdAryBv6THa5ljtJhfxKSiI5g2Ps2Fl4eILJBWgkAYeR+c0hO4843b12KCXHt56jNWASfgA5Uq8=; SPC_R_T_IV=eB95as87FjhL8HoasAA0kw==; SPC_T_ID=uXbSXytLbRMSr+KtQpRkW7f5FHiriPO+CdAryBv6THa5ljtJhfxKSiI5g2Ps2Fl4eILJBWgkAYeR+c0hO4843b12KCXHt56jNWASfgA5Uq8=; SPC_T_IV=eB95as87FjhL8HoasAA0kw==; SPC_SI=id+yYgAAAABBUWdBaGJJRaEQWwAAAAAAbDFtUDZZZ2k=; SPC_ST=.aVZDcFoyVjBuUWIwUXVSUnkGCGuGI58EkFOzdykhsuSCGz0GrBWotkUiREvJO38YxTxyl3Pgbl73NUs1AmCexDhPneO/ABd8bgUkVqlhCvZTNPDPg8jv/9KaHwWagKm9FM55IY61eECu5twdRUQl9u3xgfshk26TRkvpli4dlCUZzIE0boMi5/5B/CcqUgoXsDH567+KunuKEe92wUSC1Q==; _ga=GA1.3.1352849021.1654678939; cto_bundle=ZLv7oF9EWUpOZWVHYUkyUHh0d2RBWDJvTWk5eXllWVpia1F1eXJ4RkdZcjhVZ1Q3NVRYYnE0c0hOWERsMm1tTjFER09MbmdMTW1VZG5VbkQ0MjByVnpxYlNRdk1MRk9TUGtNSzZpRzRnZFNXU1ZUVlElMkY1dXRpbGFUZm5vdjdvcklFQzk0YzBuVm1qMUJzZnRyb2xPMHpRMldVQSUzRCUzRA; _ga_SW6D8G0HXK=GS1.1.1660959836.35.1.1660961617.22.0.0; SPC_EC=U1A1Vk5JdzlVaFVYdjJRUk4fyVPKEHSso64GpvFSCO/oihfsUpaQrXO9e4XqPT/AjNQJP7hcW+o+A7chna6AIbCtFRsocFdW1x1oS3A8+pNHmK3oRTDCZe2BDyAP0cOp133wiyu0GTSCetXIhbIRwvkOTJYqOXYBGKuTW6tGY1o=; shopee_webUnique_ccd=veSMI3XpR84mDT6rWJgoWg%3D%3D%7C9xD6GCFDkurxx4Cxf%2F72oK7%2FP2ilXgSYBkzRAd4F%2BSkKrCsqCWGVzz0SHGMINBr5KgoTxt7LXhBKejCILMQlWRcetFY%3D%7ClXsfMcnYECC51PEy%7C05%7C3',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'referer':'https://shopee.co.id/Perawatan-Kecantikan-cat.11043145?page=0&ratingFilter=4',
}
x=0
number_page = x*60
url =("https://shopee.co.id/api/v4/search/search_items?by=relevancy&limit=60&match_id=11043145"
"&newest={}&order=desc&page_type=search&rating_filter=4&scenario=PAGE_CATEGORY&version=2").format(number_page)
y = requests.get(url, headers=headers).json()
y
Output:
{'tracking_id': '745a6f4b-0fc3-48af-b563-5a7ec483a601',
'action_type': 2,
'error': 90309999}

You can add a requests header, like this headers key af-ac-enc-dat value null, this works for me.
var request = WebRequest.Create(apiURL);
request.Headers["x-api-source"] = "pc";
request.Headers["af-ac-enc-dat"] = "null";
var response = request.GetResponse();
var reader = new StreamReader(response.GetResponseStream());
s_ResponseString = reader.ReadToEnd();
dynamic Prodata = Newtonsoft.Json.Linq.JObject.Parse(s_ResponseString);

Related

cmd doesn't recognize casper.start()

phantom.casperPath=('C:\Users\lee0\Desktop\casperjs-1.1.4-1\casperjs-1.1.4-
1');
phantom.injectJs('C:\Users\lee0\Desktop\casperjs-1.1.4-1\casperjs-1.1.4-
1\bin\bootstrap.js');
var fs=require('fs')
var casper = require('casper').create()({
pageSettings: {
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
//open Facebook
casper.start().thenOpen("https://dcid.dcinside.com/join/login.php?",
function() {
console.log("Facebook website opened");
});
//로그인
casper.then(function(){
console.log("해당 id와 pw로 로그인 합니다.");
this.evaluate(function(){
document.getElementById("user_id").value="chermy018";
document.getElementById("password").value="kl424321";
document.getElementById("login_ok").children[1].click();
});
});
var request = require('request'),
cheerio = require('cheerio');
var url = "http://gallog.dcinside.com/inc/_mylog.php?
gid=chermy018&oneview=Y&cid=59&page=1";
request(url, function(err, res, html) {
if (!err) {
var $ = cheerio.load(html);
}
console.log(html);
})
casper.run();
TypeError: undefined is not a constructor evaluating casper.start().
i already posted same question of above texts. i added comments that it's seems i have problem with installation of casperjs because other proper codes also doesn't work resulting "undefined is not a constructor evaluating casper.start()". just in case of possibility of ask, i post same question.
Your script is written for CasperJS, but you've ran it via PhantomJS, which is incorrect. Instead you should run it this way (supposing CasperJS is in PATH):
casperjs text2.js

No HTTP response from Chrome extension to my Java Server

This is my very first question I am posting on StackOverflow. I usually tend to find answers to any question I have for whatever project, but I am really stuck on this one. I'm writing a Chrome extension that attempts to send JSON-objects over a HTTP post request with certain data in it to a basic Java server that will -in a later stage- process some of the data and return a HTTP response with relevant (processed) information for the client. This is the relevant JavaScript snippet on client-side:
function postRequest(jsonObject) {
var param = JSON.stringify(jsonObject);
var request = new XMLHttpRequest();
request.onreadystatechange = function () {
if (request.readyState == 4) {
console.log(request.response);
}
};
request.open("POST", "http://"+link);
request.setRequestHeader("Content-type","text/plain");
request.send(param);
// request.abort();
}
The input JSON-object is legit and the link-item is the predefined host IP of the server. The server-side code is Java-based and is the following:
public class Server {
public static void main(String args[]) throws Exception {
while (true) {
try {
ServerSocket ss = new ServerSocket(1024);
Socket s = ss.accept();
System.out.println("Request: ");
BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream()));
PrintWriter pw = new PrintWriter(s.getOutputStream());
String line;
while ((line = br.readLine()) != null) {
if (line.length() == 0) {
break;
}
System.out.print(line + "\r\n");
}
System.out.println("");
System.out.println("Response:");
String response = "HTTP/1.1 200 OK \r\n" + "Access-Control-Allow-Origin: * \r\n"
+ "Connection: close \r\n" + "\r\n";
System.out.println(response);
pw.write(response);
while ((line = br.readLine()) != null) {
if (line.length() == 0) {
break;
}
System.out.print(line + "\r\n");
}
br.close();
pw.close();
System.out.println("");
System.out.println("Socket Closed");
System.out.println("");
s.close();
ss.close();
} catch (Exception ex) {
// Do whatever
}
}
}}
The server does receive the request and the JSON-object, but no response is received on client side (readyState is usually 4, but status code is 0 and responseText is empty). Additionally the connection is not closed when I don't use request.abort() or close the extension's window. It for sure has nothing to do with the permissions in the manifest file, so I didn't include it in this post. Can somebody see what my mistake is or anybody have had a similar experience?
EDIT:
The HTTP request is received on server side and looks like:
POST / HTTP/1.1
Host: [IP address of the server; I deleted the exact address for privacy]
Connection: keep-alive
Content-Length: 173
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36
Origin: chrome-extension://nhpingbdjkkljheamibjobneeehiednk
Content-type: text/plain
Accept: */*
Accept-Encoding: gzip, deflate
Accept-Language: nl-NL,nl;q=0.8,en-US;q=0.6,en;q=0.4,fr;q=0.2
Since the request is received on server side, I figured that the actual sending of the response is currently unsuccessful. Might it have something to do with the origin domain of the client? When I copy-paste that domain in my browser, I get a ERR_FILE_NOT_FOUND.

Websocket handshake doesnt work

i have a cpp server using WinSock2 and im trying to connect to this server with my javascript client, and it doesnt work, the chrome console says "Error during WebSocket handshake: Incorrect 'Sec-WebSocket-Accept' header value".
i compared my sha1 and base64 functions with online sha1 and base64 so the problem isnt here.
Chrome Response Header:
HTTP/1.1 101 Switching Protocols
Upgrade: websocket
Connection: Upgrade
Sec-Websocket-Accept: NzdkYjg1Y2I4MDRlNTk0OGNmNzI1NzdjZDgwOTEwZWZiYWI1NzQ3Yw==
Chrome Request Header:
GET ws://localhost:8820/ HTTP/1.1
Host: localhost:8820
Connection: Upgrade
Pragma: no-cache
Cache-Control: no-cache
Upgrade: websocket
Origin: file://
Sec-WebSocket-Version: 13
User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Accept-Encoding: gzip, deflate, sdch, br
Accept-Language: he-IL,he;q=0.8,en-US;q=0.6,en;q=0.4
Sec-WebSocket-Key: Y7a2ZKEz/VCM92Wya49iPA==
Sec-WebSocket-Extensions: permessage-deflate; client_max_window_bits
Server Code:
//key is already defined.
key += "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
key = sha1(key);
key = base64_encode(reinterpret_cast<const unsigned char*>(key.c_str()), key.length());
toClient = "HTTP/1.1 101 Switching Protocols\r\n";
toClient += "Upgrade: websocket\r\n";
toClient += "connection: Upgrade\r\n";
toClient += "Sec-Websocket-Accept: ";
toClient += key;
toClient += "\r\n\r\n";
sendData(sc, toClient);
Client Code:
<!DOCTYPE HTML>
<script type="text/javascript">
function WebSocketTest()
{
if ("WebSocket" in window)
{
//alert("WebSocket is supported by your Browser!");
// Let us open a web socket
var ws = new WebSocket("ws://localhost:8820");
console.log("test");
ws.onopen = function()
{
alert("Connection.")
// Web Socket is connected, send data using send()
ws.send("20304user04user04user");
//alert("Message is sent...");
};
ws.onmessage = function (evt)
{
var received_msg = evt.data;
alert("Hey");
};
ws.onclose = function()
{
// websocket is closed.
alert("Connection is closed...");
};
}
else
{
// The browser doesn't support WebSocket
alert("WebSocket NOT supported by your Browser!");
}
}
</script>
<div id="sse">
Run WebSocket
</div>
I tried manually to build the response:
Raw sha1 digest bytes of the concatenation are 77db85cb804e5948cf72577cd80910efbab5747c
Bytes to base 64: d9uFy4BOWUjPcld82AkQ77q1dHw=
This is different than what you have. You are transforming the hexadecimal string representation of those bytes to base 64 instead of raw bytes to base 64.

Want to display a PDF document returned by a GET request in IE11

This is something that I feel should be very simple, but haven't been able to solve so far.
I'm working with a vendor system which has implemented a very basic API for our use. We send a GET request with an authentication token in the header, the server then returns us a PDF document in the body of the response.
All I want to do is display the PDF in the browser, any method would be fine, in a new tab or iframe etc.
I have managed to get this working in both firefox and chrome utilising the answer at: Request a file with a custom header But this does not work with IE 11, and IE11 is the only supported browser within our organisation.
Request headers are:
Accept:*/*
Accept-Encoding:gzip, deflate, sdch
Accept-Language:en-US,en;q=0.8
Authorization:bearer ZYFXVEnE2Uf3kU0Ud8rFvq5pbV2IfrNzzJBAu8lI8p73reZ6-vdeChtXlGGN7NwUNvo2-5LKNd9FniZlcElplycBL4f2qu6EaHkO-Xb_-G5DR07p62UYq_DErl937Yc-mpLMphBBHC7-0uqYNfrivkbc3xeOvEBnRRtfagz7dYJ8EJive6QjwWjYgGj9HRQUAbcOggbJGxZDXmrlWTveUji-5CKb7w5guBUOhjyDkyB53r8rm2qptGfsp1NsKLU4h4kEDlNaxbbzB_oJQbuyIoG80BTnP-NB0bqOtPJ09FrM9AFVfrdJM0fRwS3BfxRgVNm01FgW-jQwp1GgzeAbS-3uRR1G92Y-rw6L8R17l31PPFlV_CNeK_oAG-AJldmn2lgv6a6l6Cj9s2OqOfXyX09iZIN6vIKXAqedSWb708GTNfJ7iLKjdGVYCYW1zZ9IZKXMyeMoK7nW_rDuMzmoXeLY3tGFeeOf27vuI4RdSaGVCD5kIynrYPe8fU1sp-KZE0i1aJ0qqQ9g7Nvg42ZsFIFHBqhRIY-k4Dxjm1jPloGNbFqhdc-GK5LYHcg_u3DwFbSUKWpXdzCPBn43qJ_yVfDqffQDsDafvGDKP0U4duq0eYNXYWKnB8VR7xytykWjXAj70a9SFPRocqhugiqJIwBMS6a5gfqlUssgEnCfhVGE_eGUSGrYdCvfHHKb-13O9m6dXomYFuK8Ql7H73MjDgzTihtYLULh3nAHrU-FehrBRsUeKpc32hKUVhVvTlw2lTOUcMhlC99EKPMT0hZhLy4t8e-icL2aqcKdN-S1rt-HU60cukw4SnLyM_Nfa-ytD8vtUwMLAV9K5h4DdK7H3LpfbQNbaHRfBjRk5aQ7Q2o
Content-type:application/json
Host: /vendor_URL/
Origin:null
Proxy-Connection:keep-alive
User-Agent:Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36
Response Headers:
Accept-Ranges:none
Access-Control-Allow-Origin:*
Cache-Control:no-cache
Content-Disposition:inline; filename=GoToPlan.pdf
Content-Length:372483
Content-Type:application/pdf
Date:Wed, 26 Oct 2016 19:41:04 GMT
Expires:-1
Pragma:no-cache
Proxy-Connection:keep-alive
Server:Microsoft-IIS/7.5
Via:1.1 /proxy_URL/
X-AspNet-Version:4.0.30319
X-Powered-By:ASP.NET
Code that currently works in firefox and Chrome:
var document_http = new XMLHttpRequest();
var url = /document_URL/
document_http.open("GET", url, true);
//DOn'y know why, but needs this to display correctly
document_http.overrideMimeType( "application/octet-stream; charset=x-user-defined;" );
//Send the proper header information along with the request
document_http.setRequestHeader("Content-type", "application/json");
document_http.setRequestHeader("Authorization", "bearer " + auth_response[3]);
document_http.onreadystatechange = function() {//Call a function when the state changes.
if (document_http.readyState == 4 & document_http.status == 200) {
var data = toBinaryString(this.responseText);
data = "data:application/pdf;base64,"+btoa(data);
window.open(data);
}
}
document_http.send(params);
function toBinaryString(data) {
var ret = [];
var len = data.length;
var byte;
for (var i = 0; i < len; i++) {
byte=( data.charCodeAt(i) & 0xFF )>>> 0;
ret.push( String.fromCharCode(byte) );
}
return ret.join('');
}
Data URIs are subject to browsers' limitations on URLs. You can't reliably use them to display long content.
You can load the document as a blob, then use it in a iframe:
var xhr = new XMLHttpRequest();
xhr.open("GET", "/path/to/file.pdf");
xhr.responseType = "blob";
xhr.onload = function(res) {
iframe.src = URL.createObjectURL(res.response);
};
xhr.send();
This should work in every major browser. URL.createObjectURL will create a string of the form blob:http://host/unique-id that is a reference to the blob object.
Remember to call URL.revokeObjectURL on the given string when you're done showing the PDF. This will discard the reference to the blob, which can be safely garbage collected.

Cookie not set with CORS Ajax request using XMLHttpRequest

I've been struggling about this for a while. We're calling our internal service via Ajax which will set a cookie after the request is done. From what I've seen so far this can be done via withCredentials parameter. I've also check the example from Mozilla
https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Requests_with_credentials
Which points me to this. http://arunranga.com/examples/access-control/credentialedRequest.html
And it looks like it's working I just have to set the CORS correctly. However, when I copy the code and I tried calling my service I didn't see any cookie being set.
Here's the code.
var invocation = new XMLHttpRequest();
var url = 'MY SERVICE URL';
var invocationHistoryText;
function callOtherDomain(){
if(invocation)
{
invocation.open('GET', url, true);
invocation.withCredentials = "true";
invocation.onreadystatechange = handler;
invocation.send();
}
}
function handler(evtXHR)
{
if (invocation.readyState == 4)
{
if (invocation.status == 200)
{
var response = invocation.responseText;
console.log(response);
}
else
alert("Invocation Errors Occured" + invocation.readyState);
}
else
{
console.log("currently the application is at" + invocation.readyState);
}
}
This is the response I get
access-control-allow-credentials:true
access-control-allow-headers:Authorization, Content-Type, If-None-Match
access-control-allow-methods:GET, HEAD, POST, PUT, PATCH, DELETE, OPTIONS
access-control-allow-origin:http://localhost:8888
access-control-expose-headers:WWW-Authenticate, Server-Authorization
access-control-max-age:86400
cache-control:max-age=600
Connection:keep-alive
content-encoding:gzip
Content-Length:56
Content-Type:text/plain; charset=utf-8
Date:Thu, 07 Jul 2016 14:25:19 GMT
Expires:Thu, 07 Jul 2016 14:35:19 GMT
And this is the request header it makes
Request Headers
Accept:*/*
Accept-Encoding:gzip, deflate, sdch
Accept-Language:en-US,en;q=0.8,th;q=0.6,zh-CN;q=0.4,zh-TW;q=0.2
Cache-Control:no-cache
Connection:keep-alive
Cookie:name=Fe26.2**6cff63f4eb1d5d63c9ef3beb4618b19b4638f5a805030ac9e3e06f0f3561e6f1*gX0LowRnJ-Bc0YS5WAMFqQ*66rp6okC9kDZXmZ4rl8BVVlGOI_KcGlydCG-XnFmgUX4_e6CMeg6JCpjx12xEF5c**a454d2b7be5b7f32cc463aaf151f67fbeefedf4dbe0d389ba2fb19a1b6745ba3*XBh9bkxi6HLcB0zwm500nYjsiiUw7TRsCZB_p2xgln4
DNT:1
Host:MY INTERNAL SERVICE
Origin:http://localhost:8888
Pragma:no-cache
Referer:http://localhost:8888/
User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36
So, I'm not sure what's wrong here.

Categories