My code goes into a webpage, and identifies each block in the page.
Each block contains the same style format for information.
When trying to get the title however, I am not able to pull anything?
Ideally i want the Title, Abstract, And Author.
Here is my code so far in trying it for the title, using xpath.
from selenium import webdriver
from bs4 import BeautifulSoup
import time
driver = webdriver.Chrome()
driver.get('https://meetinglibrary.asco.org/results?filters=JTVCJTdCJTIyZmllbGQlMjIlM0ElMjJmY3RNZWV0aW5nTmFtZSUyMiUyQyUyMnZhbHVlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclMjIlMkMlMjJxdWVyeVZhbHVlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclMjIlMkMlMjJjaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMCUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIwJTIyJTdEJTJDJTdCJTIyZmllbGQlMjIlM0ElMjJZZWFyJTIyJTJDJTIydmFsdWUlMjIlM0ElMjIyMDIxJTIyJTJDJTIycXVlcnlWYWx1ZSUyMiUzQSUyMjIwMjElMjIlMkMlMjJjaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMSUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIxJTIyJTdEJTVE')
time.sleep(4)
page_source = driver.page_source
soup=BeautifulSoup(page_source,'html.parser')
productlist=soup.find_all('div',class_='ng-star-inserted')
for item in productlist:
title=item.find_element_by_xpath("//span[#class='ng-star-inserted']").text
print(title)
Try below code and let me know if you have any query -
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 60)
driver.get(
'https://meetinglibrary.asco.org/results?filters=JTVCJTdCJTIyZmllbGQlMjIlM0ElMjJmY3RNZWV0aW5nTmFtZSUyMiUyQyUyMnZhbH'
'VlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclMjIlMkMlMjJxdWVyeVZhbHVlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclM'
'jIlMkMlMjJjaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMCUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIwJTIyJTdEJTJDJTdCJTIy'
'ZmllbGQlMjIlM0ElMjJZZWFyJTIyJTJDJTIydmFsdWUlMjIlM0ElMjIyMDIxJTIyJTJDJTIycXVlcnlWYWx1ZSUyMiUzQSUyMjIwMjElMjIlMkMlMjJ'
'jaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMSUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIxJTIyJTdEJTVE')
AllRecords = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class=\"record\"]")))
for SingleRecord in AllRecords:
print("Title :- " + SingleRecord.find_element_by_xpath(
"./descendant::div[contains(#class,\"record__title\")]/span").text)
print("Author :- " + SingleRecord.find_element_by_xpath(
"./descendant::div[contains(text(),\"Author\")]/following-sibling::div").text)
print("Abstract :- " + SingleRecord.find_element_by_xpath(
"./descendant::span[contains(text(),\"Abstract\")]/parent::div/following-sibling::span").text)
print("-------------------------------------------------")
The Output looks like -
if it resolves then please mark it as answer.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait=WebDriverWait(driver, 40)
driver.get('https://meetinglibrary.asco.org/results?filters=JTVCJTdCJTIyZmllbGQlMjIlM0ElMjJmY3RNZWV0aW5nTmFtZSUyMiUyQyUyMnZhbHVlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclMjIlMkMlMjJxdWVyeVZhbHVlJTIyJTNBJTIyQVNDTyUyMEFubnVhbCUyME1lZXRpbmclMjIlMkMlMjJjaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMCUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIwJTIyJTdEJTJDJTdCJTIyZmllbGQlMjIlM0ElMjJZZWFyJTIyJTJDJTIydmFsdWUlMjIlM0ElMjIyMDIxJTIyJTJDJTIycXVlcnlWYWx1ZSUyMiUzQSUyMjIwMjElMjIlMkMlMjJjaGlsZHJlbiUyMiUzQSU1QiU1RCUyQyUyMmluZGV4JTIyJTNBMSUyQyUyMm5lc3RlZFBhdGglMjIlM0ElMjIxJTIyJTdEJTVE')
productList=wait.until(EC.presence_of_all_elements_located((By.XPATH,"//div[#class='record']")))
for product in productList:
title=product.find_element_by_xpath(".//span[#class='ng-star-inserted']").text
print(title)
Use .// and wait for the elements to be present. Also the div class you used was off.
Outputs
A post-COVID survey of current and future parents among faculty, trainees, and research staff at an...
Novel approach to improve the diagnosis of pediatric cancer in Kenya via telehealth education.
Sexual harassment of oncologists.
Overall survival with circulating tumor DNA-guided therapy in advanced non-small cell lung cancer.
The other two are
.//div[#class='record__ellipsis']
.//span[.=' Abstract ']/following::span
Related
I am trying to scrape the url given below with python selenium.
https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9
here is my code
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import requests
from time import sleep
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',options=chrome_options)
wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9
")
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click()
sleep(10)
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
After running through this code I should be redirected to https://rareapk.com/finance/?n1p0ei2ng5yd3gz but It stuck at the same page.
The element, I am clicking is given below.
<img class="spoint" id="showlink" x-onclick="changeLink()" src="https://eductin.com/wp-content/uploads/2021/06/Download.png">
Element Image
What is my code doing?
First it go to this url
Then click to I'M NOT A ROBOT.
After that next page is loaded and selenium waits for 10 seconds.
Then an Image (having text DOWNLOAD RTI) is clicked which should redirect it to REDIRECTED URL
But in the last step it stuck at the same url, it do not redirect
I have tried the following ways
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
wd.find_element(By.ID, "showlink").click()
I tested code without headless and I see browser opens expected page but wd.current_url still show old URL (and wd.title also show old title)
All problem can be because page opens new URL in new tab and it needs to use wd.switch_to_window(...) to access other tab.
This code uses switch_to_window(...) and it shows correct URL (and title) in other tab.
BTW: I had to add "Consent" because my browser sometimes show it.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
from webdriver_manager.chrome import ChromeDriverManager, ChromeType
#from webdriver_manager.firefox import GeckoDriverManager
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
#wd = webdriver.Chrome('chromedriver', options=chrome_options)
wd = webdriver.Chrome(service=Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()), options=chrome_options)
#wd = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9")
p = wd.current_window_handle
print('current_window_handle:', p)
try:
print('Waiting for: "Consent"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[#aria-label='Consent']"))).click()
except Exception as ex:
print('Exception:', ex)
print('Waiting for: "I\'m not a robot"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click()
print('Waiting for: "Download (RTI)"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
print('--- active tab ---')
print('current_window_handle:', p)
print('current_url:', wd.current_url)
print('title:', wd.title)
print('--- other tabs ---')
chwd = wd.window_handles
for w in chwd:
#switch focus to child window
if w != p:
wd.switch_to.window(w)
print('current_window_handle:', w)
print('current_url:', wd.current_url)
print('title:', wd.title)
print('---')
wd.close()
Result:
Waiting for: "Consent"
Waiting for: "I'm not a robot"
Waiting for: "Download (RTI)"
--- active tab ---
current_window_handle: CDwindow-31FDEC2C62AA0666A8F3A1DD2133D02C
current_url: https://eductin.com/how-to-fix-and-restore-deleted-mac-system-files/
title: How to fix and Restore deleted Mac system files. – Eductin
--- other tabs ---
current_window_handle: CDwindow-CB1EAE5B6DCD4ACF5D061ED4ECC314CD
current_url: https://sakarnewz.com/
title: SakarNewz – BOOST YOUR KNOWLEDGE WITH TECH NEWS AND UPDATES
---
EDIT:
Sometimes this code has problem to display information about other tabs because it seems tab runs all time JavaScript and probably Selenium can't access data.
I was building scrapper to get the information of products.
I would like the get the description text about delivery as shwon in this screenshot (Sorry reputation is not high enough to add pictures yet). The text highlighted in blue is the desired information to obtain. The code I am using is
reg = wait(driver, 2).until(EC.presence_of_element_located((By.TAG_NAME, "pns-product-pickup")))
methods = wait(reg, 1).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.delivery-options")))
delivery=''
for method in methods:
method1 = method.text
info = wait(method, 1).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.option-tips"))).text
delivery=delivery+method1+info+'\n'
print(delivery)
The result I get is
Home Delivery
Click & Collect
Which means all the 'info' I add to 'delivery' is empty text, only 'method1' did return something.
I was wondering if it is the case that, the text I want is not shown on web unless I hold the mouse on the icon next to 'home delivery' like I was doing in the screenshot. Yet it is just a random guess and probably make no sense. Any idea of what might be wrong? Thanks in advance!
You will be able to access the delivery info, if you navigate to that element first (hover over it). THe following will get the home delivery info:
### chromedriver setup for linux
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
url='https://parknshop.com/en/full-cream-evaporated-milk/p/BP_491777'
browser.get(url)
t.sleep(5)
pns_more_info = WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.TAG_NAME, "pns-more-information")))
actions.move_to_element(pns_more_info)
actions.perform()
tips = WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.XPATH, "//div[#class='option-tips']")))
print(tips.text)
This will print out:
Free delivery on orders above $500. For orders below $500, delivery fee of $50 shall apply
Do not forget to import ActionChains.
I am working with www.freightquote.com and at some point I need to sign in otherwise not allowed me to get freight rates for more than 45 pairs.
I would like to enter sign in information for this website but for some reason it is not working. I could not understand the problem.
You can directly use this website: https://account.chrobinson.com/
I have problem to enter the information that I am asked. Here is what I did:
from selenium import webdriver
from time import sleep
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
PATH = r'C:\Users\b\Desktop\Webscraping\chromedriver.exe'
s= Service(PATH )
driver = webdriver.Chrome(service=s)
link = "https://www.freightquote.com/book/#/free-quote/pickup"
driver.get(link)
sleep(2)
driver.maximize_window()
sleep(2)
driver.find_elements(by=By.XPATH, value = '//button[#type="button"]')[0].click()
sleep(3)
#Username:
driver.find_element(by=By.XPATH, value='//input[#type="email"]').send_keys('USERNAME')
driver.find_elements(by=By.XPATH, value = '//input[#class="button button-primary" and #type="submit"]')[0].click()
#password
driver.find_element(by=By.XPATH, value='//input[#type="password"]').send_keys('PASSWORD')
driver.find_elements(by=By.XPATH, value = '//input[#class="button button-primary" and #type="submit"]')[0].click()
sleep(2)
your code and your technic have too many problems, you should learn how to code in selenium completely and then start writing code.
I modified your code to the point of entering the email, please complete the code accordingly.
driver = webdriver.Chrome()
link = "https://www.freightquote.com/book/#/free-quote/pickup"
driver.get(link)
driver.maximize_window()
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.XPATH,
'(//button[#type="button"])[1]'))).click()
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.XPATH,
'//input[#type="email"]'))).send_keys('USERNAME')
also, you don't need to add chromedriver path in your code. if you use Windows or Linux you should add it into your virtualenv, in the /bin folder
and if you use from mac you should add it to this path /usr/local/bin
To enter sign in information for the website you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following locator strategies:
Using CSS_SELECTOR:
driver.get("https://account.chrobinson.com/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']"))).send_keys("Ribella")
driver.find_element(By.CSS_SELECTOR, "input[name='password']").send_keys("Ribella")
driver.find_element(By.CSS_SELECTOR, "input[value='Sign In']").click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
My code accesses a page, and I am trying to click on the button that says "Physician Program" on the menu list. If you click on this on the browser, it directs you to a new webpage.
However, there is no href on the html of the page that would help me find this link via code (I am assuming because it is JavaScript?) Currently, I just used its Xpath.
My question is - If I am able to click on it in a browser, shouldnt I be able to click on it using Selenium? If so, how can this be done?
import time
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.kidney.org/spring-clinical/program')
time.sleep(6)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
element1 = driver.find_element_by_xpath('//*[#id="dx-c7ad8807-6124-b55e-d292-29a4389dee8e"]/div')
element1.click()
The element is inside iframe you need to switch to iframe
driver.switch_to.frame("SCM20 Advanced Practitioner Program")
element1 = driver.find_element_by_xpath("//div[text()='Physician Program']")
element1.click()
Ideally you should use webdriverwait and wait for frame to be available.
WebDriverWait(driver,10).until(EC.frame_to_be_available_and_switch_to_it((By.NAME,"SCM20 Advanced Practitioner Program")))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH "//div[text()='Physician Program']"))).click()
You need to import below libraries
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import subprocess
#other imports
import time
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.kidney.org/spring-clinical/program')
time.sleep(6)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')
frame= WebDriverWait(driver,10).until(EC.presence_of_element_located(
(By.NAME, "SCM20 Advanced Practitioner Program")))
driver.switch_to.frame(frame)
options = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located(
(By.CSS_SELECTOR, '[class="track-selector-popup"] [role="option"]')))
options[0].click()
input()
Element is inside iframe so switch to it and also use waits, to switch back and interact with elements outside the frame use:
driver.switch_to.default_content()
So the page in question is here, I want to navigate pagination having following markup:
<li class="btn-next">
Suivant</li>
If you notice, JS method is being called here. So far I have done this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = None
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 30)
def fetch(url):
driver.get(
'http://www.leparking.fr/voiture-occasion/Porsche--targa-g.html#!/voiture-occasion/Porsche--targa-g.html%3Fslider_millesime%3D1940%7C1985')
elem_more = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "Suivant")))
elem_more.click()
fetch(None)
It does hover the element but does not navigate on click. What should I do?
Thanks
I sorted it out by using execute_script method:
elem_more = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "Suivant")))
driver.execute_script("arguments[0].click();", elem_more)