python selenium webdriver not showing all html - javascript
I am developing a web scraper in python.
This is my code:
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from bs4 import BeautifulSoup
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://www.hapag-lloyd.com/en/home.html")
source = driver.page_source
soup = BeautifulSoup(source, 'html.parser')
print(soup)
but the html returned is different from what I saw on the browser(please check the last few lines):
<html><head>
<meta content="no-cache" http-equiv="Pragma"/>
<meta content="-1" http-equiv="Expires"/>
<meta content="no-cache" http-equiv="CacheControl"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<link href="data:;base64,iVBORw0KGgo=" rel="shortcut icon"/>
<script>
(function(){
window["bobcmn"] = "111110101010102000000022000000052000000002a4b927ad200000096300000000300000000300000006/TSPD/300000008TSPD_101300000005https3000000b0081ecde62cab2000d65f90c7efd5185e314a8800e00a5aad11b1a439eb174c6c3f64d45284e14d9508dcf0830d0a2800346a2db5907272d4309ad725a7dc856ab98589c10724bd284477ca152744f4ac2102b44d72e2a1e9200000000200000000";
window.aIv=!!window.aIv;try{(function(){(function(){})();var sZ=78;try{var IZ,lZ,OZ=s(868)?0:1,zZ=s(999)?0:1,ss=s(445)?0:1,Ss=s(601)?0:1;for(var is=(s(421),0);is<lZ;++is)OZ+=s(211)?2:1,zZ+=s(768)?1:2,ss+=(s(54),2),Ss+=s(289)?2:3;IZ=OZ+zZ+ss+Ss;window.zz===IZ&&(window.zz=++IZ)}catch(Ls){window.zz=IZ}var Os=!0;function _(Z){var S=arguments.length,I=[],O=1;while(O<S)I[O-1]=arguments[O++]-Z;return String.fromCharCode.apply(String,I)}
function SS(Z){var S=30;!Z||document[J(S,148,135,145,135,128,135,138,135,146,151,113,146,127,146,131)]&&document[_(S,148,135,145,135,128,135,138,135,146,151,113,146,127,146,131)]!==l(68616527636,S)||(Os=!1);return Os}function l(Z,S){Z+=S;return Z.toString(36)}function J(Z){var S=arguments.length,I=[];for(var O=1;O<S;++O)I.push(arguments[O]-Z);return String.fromCharCode.apply(String,I)}function _S(){}SS(window[_S[_(sZ,188,175,187,179)]]===_S);SS(typeof ie9rgb4!==l(1242178186121,sZ));
SS(RegExp("\x3c")[l(1372127,sZ)](function(){return"\x3c"})&!RegExp(l(42811,sZ))[l(1372127,sZ)](function(){return"'x3'+'d';"}));
var IS=window[J(sZ,175,194,194,175,177,182,147,196,179,188,194)]||RegExp(J(sZ,187,189,176,183,202,175,188,178,192,189,183,178),l(-60,sZ))[l(1372127,sZ)](window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),jS=+new Date+(s(267)?375283:6E5),JS,Z_,s_,S_=window[_(sZ,193,179,194,162,183,187,179,189,195,194)],__=IS?s(890)?18994:3E4:s(725)?3775:6E3;
document[J(sZ,175,178,178,147,196,179,188,194,154,183,193,194,179,188,179,192)]&&document[J(sZ,175,178,178,147,196,179,188,194,154,183,193,194,179,188,179,192)](J(sZ,196,183,193,183,176,183,186,183,194,199,177,182,175,188,181,179),function(Z){var S=88;document[J(S,206,193,203,193,186,193,196,193,204,209,171,204,185,204,189)]&&(document[_(S,206,193,203,193,186,193,196,193,204,209,171,204,185,204,189)]===_(S,192,193,188,188,189,198)&&Z[J(S,193,203,172,202,205,203,204,189,188)]?s_=!0:document[J(S,206,
193,203,193,186,193,196,193,204,209,171,204,185,204,189)]===l(68616527578,S)&&(JS=+new Date,s_=!1,i_()))});function i_(){if(!document[_(47,160,164,148,161,168,130,148,155,148,146,163,158,161)])return!0;var Z=+new Date;if(Z>jS&&(s(988)?840535:6E5)>Z-JS)return SS(!1);var S=SS(Z_&&!s_&&JS+__<Z);JS=Z;Z_||(Z_=!0,S_(function(){Z_=!1},s(891)?0:1));return S}i_();var I_=[s(915)?10661718:17795081,s(30)?27611931586:2147483647,s(748)?1636390818:1558153217];
function L_(Z){var S=43;Z=typeof Z===l(1743045633,S)?Z:Z[_(S,159,154,126,159,157,148,153,146)](s(837)?37:36);var I=window[Z];if(!I[_(S,159,154,126,159,157,148,153,146)])return;var O=""+I;window[Z]=function(Z,S){Z_=!1;return I(Z,S)};window[Z][J(S,159,154,126,159,157,148,153,146)]=function(){return O}}for(var O_=(s(493),0);O_<I_[l(1294399127,sZ)];++O_)L_(I_[O_]);SS(!1!==window[_(sZ,175,151,196)]);window.LZ={zs:"084e4452c4017800c5def6fe02b0086dc53ff9519b1bcb514d1f4dd874776393bcfec37f99ebfc4795da47aec5f492a8a4131f92a5e26fecd10807e6bd8ba79b77bb1692ddac2154a98808ca5559f35a278cf21dd71a1e61c4579303187e42dc179ae0846f6078a996bb6f824e2238fc7b431f54a421fcf7145bd4fcc3d9b982"};
function Zi(Z){var S=+new Date,I;!document[_(63,176,180,164,177,184,146,164,171,164,162,179,174,177,128,171,171)]||S>jS&&(s(968)?421041:6E5)>S-JS?I=SS(!1):(I=SS(Z_&&!s_&&JS+__<S),JS=S,Z_||(Z_=!0,S_(function(){Z_=!1},s(688)?0:1)));return!(arguments[Z]^I)}function s(Z){return 265>Z}
(function(){var Z=/(\A([0-9a-f]{1,4}:){1,6}(:[0-9a-f]{1,4}){1,1}\Z)|(\A(([0-9a-f]{1,4}:){1,7}|:):\Z)|(\A:(:[0-9a-f]{1,4}){1,7}\Z)/ig,S=document.getElementsByTagName("head")[0],I=[];S&&(S=S.innerHTML.slice(0,1E3));while(S=Z.exec(""))I.push(S)})();})();}catch(x){}finally{ie9rgb4=void(0);};function ie9rgb4(a,b){return a>>b>>0};
})();
</script>
<script src="/TSPD/081ecde62cab200082f75af3905bec19af31f4aaf7bd4079c3ac5a62a6fb4096cfcec166097ddde7?type=7" type="text/javascript"></script>
<noscript>Please enable JavaScript to view the page content.<br/>Your support ID is: 17324345507588527622.</noscript>
</head><body>
<form action="" enctype="multipart/form-data" method="post"><input name="_pd" type="hidden" value=""/></form></body></html>
It reports "Please enable JavaScript to view the page content.Your support ID is: 17324345507588527622.".
I checked a few queries launched by other people. This problem should have been solved by the use of chrome.
And I also tried to get html with the requests-html. But the programming just keep running and do not return a thing.
It's a limitation of the page_source method. See this answer: https://stackoverflow.com/a/64897405/1387701
and See the source code:
Description copied from interface: WebDriver Get the source of the
last loaded page. If the page has been modified after loading (for
example, by Javascript) there is no guarantee that the returned text
is that of the modified page. Please consult the documentation of the
particular driver being used to determine whether the returned text
reflects the current state of the page or the text last sent by the
web server. The page source returned is a representation of the
underlying DOM: do not expect it to be formatted or escaped in the
same way as the response sent from the web server. Think of it as an
artist's impression.
Related
Using html code with a javascript code as a widget in flutter web
I am currently using flutter web and I already have an html button that I want to add inside my flutter app. This html contains a java script as its body. How to add the html with javascript as a widget inside my app? This is the html snippet: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta http-equiv="X-UA-Compatible" content="ie=edge" /> <title>Paytabs Express Checkout V4</title> </head> <body> <script src="https://paytabs.com/express/v4/paytabs-express-checkout.js" id="paytabs-express-checkout" data-secret-key="key" data-ui-type="button" data-merchant-id="mid" data-url-redirect="https://my09713z28.codesandbox.io/" data-amount="3.3" data-currency="SAR" data-title="John Doe" data-product-names="click" data-order-id="25" data-ui-show-header="true" data-customer-phone-number="5486253" data-customer-email-address="john.deo#paytabs.com" data-customer-country-code="973" data-ui-show-billing-address="false" data-billing-full-address="test test test" data-billing-city="test" data-billing-state="test" data-billing-country="BHR" data-billing-postal-code="123" ></script> <script> </script> </body> </html> Hope you provide me with some help.
You can go something like this. You should put your html releated code in index.html file and in src you need to put a path for your index.html e.g. 'assets/index.html' import 'dart:html' as html; import 'dart:js' as js; import 'dart:ui' as ui; String viewID = "your-view-id"; #override Widget build(BuildContext context) { // ignore: undefined_prefixed_name ui.platformViewRegistry.registerViewFactory( viewID, (int id) => html.IFrameElement() ..width = MediaQuery.of(context).size.width.toString() ..height = MediaQuery.of(context).size.height.toString() ..src = 'path/to/your/index.html' ..style.border = 'none'); return SizedBox( height: 500, child: HtmlElementView( viewType: viewID, ), ); }
You can use HtmlElementView for adding html elements inside a flutter web app https://api.flutter.dev/flutter/widgets/HtmlElementView-class.html Beware that would only work in flutter web and Embedding HTML is an expensive operation and should be avoided when a Flutter equivalent is possible You should add this html content inside the file web/main.html. I suggest you to build the button with Flutter and call javascript code with dart like this example calling javascript from Dart
If I understand correctly, your intention is to be able to render your html/javascript as a native widget in flutter. Unfortunately, I don't think this is technically possible due to the fact that flutter is rendering everything in its own light-weight rendering engine, rather than creating native code that your native runtime executes. The artifact(s) created (even in flutter web) after compilation is a combination of flutter runtime + your compiled code that executes on flutter runtime. Therefore this is not possible to add html/javascript to your flutter code as a widget and run it everywhere. The solution is to implement your widget in pure Dart code.
how to display a rally "custom board" dashboard in confluence wiki
I have been digging in the this site and others for several days and the answer to my problem still escapes me. I have read all of these pages: http://pastebin.com/cbagkw8h but none of them exactly answers this question: I am trying to get a Rally Dashboard (custom board) to appear in HTML/Javascript in a confluence wiki. I have gotten a simple Standard Report working using a read-only account and AppSDK1.32 with loginKey by embedding the following HTML/Javascript into the Confluence wiki page: {html} <meta name="Name" content="App Example: Rally Application" /> <meta name="Version" content="2011.04" /> <meta name="Vendor" content="Rally Software" /> <script type="text/javascript" src="https://rally1.rallydev.com/apps/1.32/sdk.js?loginKey=loginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkey"> </script> <script type="text/javascript"> function onLoad() { var rallyDataSource = new rally.sdk.data.RallyDataSource( '__WORKSPACE_OID__', '__PROJECT_OID__', '__PROJECT_SCOPING_UP__', '__PROJECT_SCOPING_DOWN__'); rally.sdk.ui.AppHeader.destroy(); var reportConfig = {report: rally.sdk.ui.StandardReport.IterationBurndown, width : 400, height: 300}; var report = new rally.sdk.ui.StandardReport(reportConfig); report.display("reportDiv"); } rally.addOnLoad(onLoad); </script> <div id="reportDiv" style="width: 400px; margin-left:20px"></div> <br/> {html} I am trying to expand this success to an entire dashboard with App SDK2.x using the new apiKey - by using the following code: {html} <meta name="Name" content="App Example: Rally Application" /> <meta name="Version" content="2015.04" /> <meta name="Vendor" content="eBay Enterprise" /> <script type="text/javascript" src="https://loginapirally1.rallydev.com/apps/1.32/sdk.js?loginKey=loginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkeyloginkey"> </script> <script type="text/javascript"> function onLoad() { rally.sdk.ui.AppHeader.destroy(); document.getElementById("iframeA").width = screen.width - 60 ; document.getElementById("iframeA").height = (screen.width - 60 ) * 3; } rally.addOnLoad(onLoad); </script> <iframe id="iframeA" src="https://loginapirally1.rallydev.com/#/111111111d/custom/222222222?expandApp=333333333&apiKey=_apikeyapikeyapikeyapikeyapikeyapikeyapikey" width="1024" height="1024"> </iframe> <br/> {html} I am noticing a few things: 1) it almost works - I get the dashboard/report title but not the cards 2) the apiKey seems to have no affect at all - I still get prompted for a login and password (which I could stand if I could see the cards). 3) it doesn't seem to matter if I put the apiKey before or after the hash symbol Citation A suggested using the "full screen" dashboard/report but didn't cover the apiKey. Citaton B says that the AppSDK2 uses the apiKey as of Apr 14 2014 but doesn't say how to use it exactly with AppSDK2. I have gotten the apiKey to work with the Ruby API but it is unclear how to access the dashboard/reports from there. Citation C says that the AppSDK1 is based on the Javascript dojo framework and the AppSDK2 is based on the Javascript Sencha's ExtJS but avoids giving any kind of rosetta stone from one to the other. The only other options I can think of is to 1) copy the entire HTML page-source from the "Custom Board" and then start debugging the Javascript with ExtJS (but I cannot find an example of where to put the apiKey for ExtJS) or 2) bypass all of the APIs and use Ruby Watir-Webdriver (which uses Selenium) and VNCServer to clip an image of the "Custom Board" page and show THAT in confluence. Citations: http://pastebin.com/YMUEPjSF
The issue seems to be specific to the canned Custom Board that cannot be loaded externally. It should work if you write a js source code to build a similar Board and compile that into a deployment html. That is similar to option (1) you mentioned in the end of your post if I understand it correctly. Here is my test where I compared Custom Board and Custom HTML side by side. Below is a screenshot from Rally. This custom dashboard has Custom Board on the left and Custom HTML on the right. The Custom HTML is using a code example of filterable board from AppSDK2 documentation. Next I use this code: <html> <head> <title>Custom Grid</title> <meta name="Name" content="App: Custom Dashboard"/> <meta name="Version" content="2011.08.31"/> <meta name="Vendor" content="Rally Labs, NickM"/> <script type="text/javascript" src="https://rally1.rallydev.com/apps/1.32/sdk.js?loginKey=c33e83...."></script> <script type="text/javascript"> rally.addOnLoad(function() { var iframe = document.createElement("iframe"); iframe.src = "https://loginapirally1.rallydev.com/#/14018981229/custom/34207853894" iframe.width = 2000; iframe.height = 1000; document.getElementById("storyboard").appendChild(iframe); }); </script> </head> <body> <div id="storyboard"></div> </body> </html> Here is the dashboard loaded externally on localhost:3000. The left column of the dashboard where Custom Board is expected to load is empty, but the right column with a custom html code of a board loads successfully: It looks like the canned Custom Board cannot be loaded externally but a custom html code written in AppSDK2 can be displayed externally. A couple of observations: There is really no upgrade path from AppSDK1 to AppSDK2. The underlying frameworks and the class structures are very different and the code cannot be simply refactored. There is no translation from one to the other. LoginKey is intended to work with AppSDK1. Both are legacy. Both predate AppSDK2 and ApiKey. To use ApiKey with custom html apps written with AppSDK2 see "Use API Key with AppSDK2" article. AppSDK2 does not support LoginKey usage for authentication. The reason it seems to work in this example is that we load the entire page. Loading the entire page using iframe's src property is possible with LoginKey. In this example there is no reason to use ApiKey and LoginKey together. You are right that ApiKey makes no difference in this use case. The way LoginKey works is that it "tricks" the browser into thinking that there is this different server, loginapirally1.rallydev.com. If you look in Network tab of your browser and see that request comes from there it means that LoginKey is working. ApiKey works differently. There is no equivalent to loginapirally1 server with ApiKey. If you are being prompted to supply login credentials when using LoginKey it means that LoginKey is not working. See "LoginKey Troublshooting" article. Embedding custom AppSDK2 apps in 3rd party portals (running custom apps externally) is possible with ApiKey, and the supported scenario described in this guide is similar to the option (1) you mentioned in the end of your post. Loading entire Rally page or entire Rallynavigation is not a supported use case even though it is possible with LoginKey.
I'm making a widget for adobe muse, and I need to know how to place a domain name in front of the content of a meta property tag
The widget I'm making uses Open Graph tags and I'm running into a complication when it comes to the "og:image" meta tag. I stripped out the rest of the irrelevant code, but this is what is causing the problem: <?xml version="1.0" encoding="UTF-8"?> <HTMLWidget> <parameters> <file name="ogImage" label="Image"/> <!-- This enables the user to upload an image--> </parameters> <headHTML> <meta property="og:image" content="{param_ogImage}" /> </headHTML> </HTMLWidget> The code that this will output is: <meta property="og:image" content="/assets/example-image.png"/> Facebook debugger will then say: Object at URL 'http://www.example.com/' of type 'website' is invalid because the given value '/assets/image.png' for property 'og:image:url' could not be parsed as type 'url'. So, my question is, is there a way to automatically grab and place the domain name in front of the {param_ogImage} content? Is it possible to use something like this to accomplish this task? <script> function myFunction() { var x = document.domain; document.getElementById("demo").innerHTML = x; } </script>
Not with current mucow's (version 3). This does appear to be something that is being added in the future version.
HTML button on client to run python script on server then send results to webpage on client
I have seen some previous questions, that were similar but I couldn't find anything like this. I have a webpage (on a server) and I would like the user to click a button which will execute a python script. I want this python script to run on the server and then send the results back to the webpage and display it. When the user clicks the button, the data that will be sent to the server would be an XML file. I just don't know where to start with all of this. What can I use to accomplish this? Thanks for your time. EDIT: I actually have the webpage all done and setup, and it produces the XML. I just need to run the python script when a user clicks on a button on the webpage. Not sure if that helps, but I'm posting it. Thanks I WOULD LIKE A HIGH-LEVEL EXPLANATION FOR THIS PLEASE AND THANK YOU, since I don't know about what has been suggested to me already.
There is a lot of web libs for python. You may try bottle (work without installing, one-file, just put the „bottle.py” file in your work folder. A simple example: from bottle import route, run, static_file, post, request #route('/js/<filename>') def js(filename): return static_file(filename, root='js') #route('/') def index():return static_file('tst.html', root='./') #post('/xml') def xml(): for x in request.forms: print(x) return {'return': 'accepted'} run(host='0.0.0.0', port=8000) And html: <!DOCTYPE html> <html lang="ro"> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <title>TTL</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <script type="text/javascript" src="js/jquery.js"></script> </head> <body> <button onclick="test()">Test</button> <script type="text/javascript"> function test() { $.ajax({ url: 'xml', type: 'POST', data: '<my><xml>string</xml></my>', dataType: 'json', success: function (ret) { alert(ret['return']); } }); } </script> </body> </html> Sorry for JQuery, to lazy to write plain js xhr. Bottle is well documented, but cherrypy, pyramid, django, tornado also.
How to prevent caching of my Javascript file? [duplicate]
This question already has answers here: How to force browsers to reload cached CSS and JS files? (57 answers) Closed 9 years ago. I have a simple html: <html> <body> <head> <meta charset="utf-8"> <meta http-equiv='cache-control' content='no-cache'> <meta http-equiv='expires' content='0'> <meta http-equiv='pragma' content='no-cache'> <script src="test.js"></script> </body> </html> In test.js I changed a Javascript function, but my browser is caching this file. How to disable cache for script src?
Add a random query string to the src You could either do this manually by incrementing the querystring each time you make a change: <script src="test.js?version=1"></script> Or if you are using a server side language, you could automatically generate this: ASP.NET: <script src="test.js?rndstr=<%= getRandomStr() %>"></script> More info on cache-busting can be found here: https://www.curtiscode.dev/post/front-end-dev/what-is-cache-busting
<script src="test.js?random=<?php echo uniqid(); ?>"></script> EDIT: Or you could use the file modification time so that it's cached on the client. <script src="test.js?random=<?php echo filemtime('test.js'); ?>"></script>
Configure your webserver to send caching control HTTP headers for the script. Fake headers in the HTML documents: Aren't as well supported as real HTTP headers Apply to the HTML document, not to resources that it links to
You can append a queryString to your src and change it only when you will release an updated version: <script src="test.js?v=1"></script> In this way the browser will use the cached version until a new version will be specified (v=2, v=3...)
You can add a random (or datetime string) as query string to the url that points to your script. Like so: <script type="text/javascript" src="test.js?q=123"></script> Every time you refresh the page you need to make sure the value of 'q' is changed.