Extract data from a web page - javascript

I have a link to an app on Apple App Store. I need to use its data on my own webpage. To be more precise, I want to extract the app icon, its category and whether or not it is free and to add this data to my webpage. How can I do this using JavaScript?

To my knowledge, you can't do this using Javascript due to Cross Platform security issues.
You would nee a Web Server and possibly write a scraper in PHP to read that page. then call your PHP script from your JavaScript.

Here is the PHP Script:
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $Url);
curl_setopt($ch, CURLOPT_REFERER, "https://itunes.apple.com/us/app/google+/id447119634?mt=8");
curl_setopt($ch, CURLOPT_USERAGENT, "Mozzila/1.0");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$output = curl_exec($ch);
curl_close($ch);
$logo = explode('<div class="artwork">', $output);
$logo = explode('src="', $logo[1]);
$logo = explode('"', $logo[1]);
$logo = $logo[0];
$category = explode('<span class="label">Category:', $output);
$category = explode('">', $category[1]);
$category = explode('<', $category[1]);
$category = $category[0];
echo $logo;
echo $category;
?>

Related

Extract a portion code from an external page

I am creating a personal website for vacation rentals (a joomla website).
The owner has created an ad here: https://www.armor-vacances.com/locat...tml#calendrier
Do you know if there is a way to extract the entire "calendar" portion to display on my website site?
I try some scripts find with "file_get_html" for example but I don't arrive to my goals.
Thanks for your help.
php can do it, as long as you're not treading on site copyright or robot control issues, and that you can rely on the site you're reading will always have the container you're after, but it's not going to be easy.
It would be good if the site you're scraping has the information in machine ready format using meta tags that it knows programs will be looking for.
Here's a starting point for some scraping code for you (I've cached the page content to a local file so that you don't hit the website too many times each day):
<?php
// php7.0
$src ="https://stackoverflow.com/questions/52678213/extract-a-portion-code-from-an-external-page";
$tmpfn="C:/temp/temp.$srcX.$now.html";
$findDivId="Place your ID here";
$now = date('Y-m-d', time());
$srcX = preg_replace("/[^a-zA-Z0-9]+/", "", $src);
$srcX = substr($srcX, 0, 155);
if ( file_exists($tmpfn) ) {
$html=file_get_contents($tmpfn);
}
else {
$ch = curl_init($src);
curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_HEADER, true);
$html = curl_exec($ch);
if ( !$html ) {
echo curl_error($ch);
exit;
}
curl_close($ch);
file_put_contents($tmpfn, $html);
}
echo "<LI>html size = ".strlen($html)." bytes";
if ( strcmp($html, "") != 0 ) {
$dom = new DOMDocument;
#$dom->loadHTML($html);
$divs = $dom->getElementsByTagName("div"); // or ->getElementsById($id);
if ( $divs ) {
echo "<UL>";
foreach ($divs as $div) {
echo "<LI>Tag::".$div->nodeName;
if ( $div->hasAttributes() ) {
foreach ($div->attributes as $attr) {
echo "<BR>Attribute::".$attr->nodeName . "=" . $attr->nodeValue . " ";
if ( strcmp($attr->nodeName,'id')==0
and strcmp($attr->nodeValue,$findDivId)==0 ) {
echo "<LI>Found $findDivId!!";
}
}
}
echo "<BR>Value::".$div->nodeValue."<BR><BR>";
}
echo "</UL>";
}
}
?>

External Portal In Unifi

I am creating an external portal in unifi AC, where by users will be authenticated then after if they exist they will be allowed to use the internet connection for sometime the problem is after the user has been authenticated i got some codes which allow the user to use the internet but they are not working for me.
so The user is authenticated in the external portal but is not allowed to start
using internet the codes which should allow a user to start using internet is
in the function sendAuthentication.
The login.php source codes.
<?php session_start();
require_once('dbconnect.php');
$username = trim($_POST['username']);
$password = trim(md5($_POST['pass']));
$query = "select * from students where email='".$username."' && password='".$password."'";
$result = mysql_query($query) or die (mysql_error());
$row = mysql_fetch_array($result);
if($username === $row['email'] && $password === $row['password']){
//Minutes to authorize, change to suit your needs
echo '<script language="javascript">';
echo 'alert("Successfully login")';
echo '</script>';
sendAuthorization($_SESSION['id'], (12*60));
return true;
}
else{
$_SESSION['error']=TRUE;
header('location:http://b4w.uhuruone.co.tz/guest/s/default/');
}
function sendAuthorization($id, $minutes)
{
$unifiServer = "https://41.38.13.78:8443";
$unifiUser = "bro4wote";
$unifiPass = "rt#b45w";
// Start Curl for login
$ch = curl_init();
// We are posting data
curl_setopt($ch, CURLOPT_POST, TRUE);
// Set up cookies
$cookie_file = "/tmp/unifi_cookie";
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
// Allow Self Signed Certs
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
// Force SSL3 only
curl_setopt($ch, CURLOPT_SSLVERSION, 3);
// Login to the UniFi controller
curl_setopt($ch, CURLOPT_URL, "$unifiServer/login");
curl_setopt($ch, CURLOPT_POSTFIELDS,
"login=login&username=$unifiUser&password=$unifiPass");
// send login command
curl_exec ($ch);
// Send user to authorize and the time allowed
$data = json_encode(array(
'cmd'=>'authorize-guest',
'mac'=>$id,
'minutes'=>$minutes));
// Send the command to the API
curl_setopt($ch, CURLOPT_URL, $unifiServer.'/api/cmd/stamgr');
curl_setopt($ch, CURLOPT_POSTFIELDS, 'json='.$data);
curl_exec ($ch);
// Logout of the UniFi Controller
curl_setopt($ch, CURLOPT_URL, $unifiServer.'/logout');
curl_exec ($ch);
curl_close ($ch);
unset($ch);
}
?>
<p>Connecting to the network...</p>
<script>
//allow time for the authorization to go through
setTimeout("location.href='http://www.Google.com'",6000);
</script>

Add alert box after procces is successful

I have this PHP cURL code. I know this is a simple question. I just want to have an alert box right after the process is done but I don't know where to put my code and its right format. Can someone help?
function myFunction() {
alert("Successfull!");
}
PHP
<?php
$account = $_POST["selectbasic-0"];
$sitename = $_POST["siteAlias"];
//Initiate cURL
$ch = curl_init();
//Set cURL parameters
curl_setopt($ch, CURLOPT_URL, '');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERPWD, "");
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
curl_setopt($ch, CURLOPT_POST,1);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/json',
'Content-Length: ' . strlen($data))
);
$output = curl_exec($ch);
curl_close($ch);
?>
if your output variable returns 1 (true) then you can try this.
if($output) {
echo "<script>";
echo "alert(""\Successfull!\");";
echo "</script>";
}
Your php is a server side script. You can't call javascript functions from server. You could
echo 'success';
or
echo '<script>alert("success");</script>';
after curl_exec function which returns true or false.

Replace whateverorigin.org call with local proxy in php

I'm currently using whateverorigin.org in some javascript to retrieve a URL as a JSON object because a 3rd party site hasn't made one of their functions available via their JSON API.
I'd like to remove this dependancy from my website as whateverorigin.org breaks the HTTPS/SSL browser checks for secure content because it's a clear http call.
Has anyone done this? I haven't found an example of it anywhere.
Thanks in advance for a response!
Ok, so since I first typed up this question, I've now already found some examples and cobbled together a working proxy function in php... Feel free to use it for your own purposes!
<?php
// Sourced from: http://stackoverflow.com/questions/2511410/curl-follow-location-error
function curl_exec_follow(/*resource*/ &$ch, /*int*/ $redirects = 20, /*bool*/ $curlopt_header = false) {
if ((!ini_get('open_basedir') && !ini_get('safe_mode')) || $redirects < 1) {
curl_setopt($ch, CURLOPT_HEADER, $curlopt_header);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $redirects > 0);
curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects);
return curl_exec($ch);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FORBID_REUSE, false);
do {
$data = curl_exec($ch);
if (curl_errno($ch))
break;
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($code != 301 && $code != 302)
break;
$header_start = strpos($data, "\r\n")+2;
$headers = substr($data, $header_start, strpos($data,"\r\n\r\n", $header_start)+2-$header_start);
if (!preg_match("!\r\n(?:Location|URI): *(.*?) *\r\n!",$headers, $matches))
break;
curl_setopt($ch, CURLOPT_URL, $matches[1]);
} while (--$redirects);
if (!$redirects)
trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING);
if (!$curlopt_header)
$data = substr($data, strpos($data, "\r\n\r\n")+4);
return $data;
}
}
header('Content-Type: application/json');
$retrieveurl = curl_init(urldecode($_GET['url']));
$callbackname = $_GET['callback'];
$htmldata = curl_exec_follow($retrieveurl);
if (curl_error($retrieveurl))
die(curl_error($retrieveurl));
$status = curl_getinfo($retrieveurl, CURLINFO_HTTP_CODE);
curl_close($retrieveurl);
$data = array('contents' => $htmldata, 'status' => $status);
$jsonresult = json_encode($data);
echo $callbackname . '(' . $jsonresult . ')';
?>
Hope this helps someone!

get dynamic generated content after using curl to login

I know that curl does not execute javascript, it only grabs static html, so this is why a simple curl will not work for me.
I do not know much about php, I'm new to this, but what I understand so for is that if I did not have to first login to grab the content I can simple use file_get_contents witch will first execute the dynamic content and then grab the html content, witch in return give me what I need, but I first have to login and then get the page.
I tried to login using curl
$user = "myuser";
$pass = "mypassword";
//create cookie file
$random = rand(0,9999999);
$cookie = $random."cookie.txt";
$fp = fopen("$cookie","w") or die("<BR><B>Unable to open cookie file $cookie_file_path for write!<BR>");
fclose($fp);
//do login using curl
$LOGINURL = "https://controlpanel.example.com/index.html";
$agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0";
$v2 = array( 'userName'=>$user, 'password'=>$pass);
$reffer = "https://www.google.com";
//this first call is to set the cookie
$ch = curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
ob_start(); // Prevent output
curl_exec ($ch);
ob_end_clean(); // End preventing output
curl_close ($ch);
unset($ch);
//now that the cookie is set, do login
$ch = curl_init();
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS,$v2);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_REFERER, $reffer);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
$result = curl_exec($ch);
//now we are logged-in
//now grab the page you need
$profileurl = 'https://controlpanel.example.com/information.html';
curl_setopt($ch, CURLOPT_URL, $profileurl);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
$result = curl_exec ($ch);
But this will only get the static html, not the dynamic content too.
Let me explain better.
The code I get, at this point using above curl method, in $result is:
.....
<div id="DisplayAccountInfo"><span class="loading">Loading info</span></div>
.....
If I do this manually using firefox and inspect element with firebug the source is:
.....
<div id="DisplayAccountInfo">
<div class="formModule" id="formContainer">
......
<legend>Your code for this hour is 8T5D9LO</legend>
.....
</div>
</div>
.....
What I notice in firebug console is:
GET https://controlpanel.example.com/async/information.html
200 OK
669ms
jquery-....min.js (line 19)
What I, as a noob, understand from this is that the content is dinamicly loaded using jquery, and curl does not know how to do that.
I tried to put instead of
$profileurl = 'https://controlpanel.example.com/information.html';
curl_setopt($ch, CURLOPT_URL, $profileurl);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
$result = curl_exec ($ch);
//replaced the above with this
$result = file_get_contents($profileurl);
but I get the html from login page because I think it does not recognize anymore that I'm logged in.
So how can I solve this? Can you please help me?
I think I got what you're doing.
The key point here is, most website handle login with cookie. In https://controlpanel.example.com/information.html, if the website set a cookie after you login in your browser, then the good news is you can solve this problem.
The problem in your code is, PHP won't set cookie for you.
You need 2 steps:
Step 1. You need to obtain the cookie when your php curl the login
Here's how you get cookie header returned from the login page.
$ch = curl_init('https://controlpanel.example.com/index.html');
....
$result = curl_exec($ch);
preg_match('/^Set-Cookie:\s*([^;]*)/mi', $result, $m);
parse_str($m[1], $cookies);
echo $cookies;//See if you've successfully obtained the return cookie
Step 2. You access https://controlpanel.example.com/information.html with the cookie you obtained in step 1. (like you've already did in your own code)
haha, so easy it did not cross my mind.
For me it is simple, I did not have to call
https://controlpanel.example.com/information.html
but
https://controlpanel.example.com/async/information.html
to get the div I wanted :)
Lucky for me I noticed the get function in firebug :)
So the cod now is :
$user = "myuser";
$pass = "mypassword";
//create cookie file
$random = rand(0,9999999);
$cookie = $random."cookie.txt";
$fp = fopen("$cookie","w") or die("<BR><B>Unable to open cookie file $cookie for write!<BR>");
fclose($fp);
//do login using curl
$LOGINURL = "https://controlpanel.example.com/index.html";
$agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0";
$v2 = array( 'userName'=>$user, 'password'=>$pass);
$reffer = "https://www.google.com";
//this first call is to set the cookie
$ch = curl_init();
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
ob_start(); // Prevent output
curl_exec ($ch);
ob_end_clean(); // End preventing output
curl_close ($ch);
unset($ch);
//now that the cookie is set, do login
$ch = curl_init();
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS,$v2);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($ch, CURLOPT_URL,$LOGINURL);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_REFERER, $reffer);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
$result = curl_exec($ch);
//now we are logged-in
//now grab the page you need
$profileurl = 'https://controlpanel.example.com/async/information.html';
curl_setopt($ch, CURLOPT_URL, $profileurl);
curl_setopt($ch, CURLOPT_POST, 0);
$result = curl_exec ($ch);

Categories