I'm trying to scrape a webpage where I need to expand the items list by clicking expand button several times.
So as I researched how to do this in smart way, I've been trying to use explicit wait with expected condition (element_to_be_clickable).
here is my test code:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
import time
btn_xpath = '//*[#id="contents"]/div[1]/div[2]/div/div[1]'
browser = webdriver.Chrome('/Users/dongpark/Downloads/chromedriver') # calling chrome driver from local folder
browser.get('http://cu.bgfretail.com/event/plus.do?category=event&depth2=1&sf=N')
wait = WebDriverWait(browser, 20)
time.sleep(8)
def check_exists_by_xpath(xpath):
try:
browser.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
while True:
button = check_exists_by_xpath(btn_xpath)
if button is False:
print "done"
break
else:
print "more"
wait.until(EC.element_to_be_clickable((By.XPATH, btn_xpath)))
browser.find_element_by_xpath(btn_xpath).click()
check_exists_by_xpath just tests if the expand button is still available on the page.
When I run this, I get:
File "/Users/dongpark/Documents/kuk/firstSelenium/test.py", line 37, in <module> browser.find_element_by_xpath(btn_xpath).click()
selenium.common.exceptions.WebDriverException: Message: unknown error: Element is not clickable at point (418, 920). Other element would receive the click: <div class="ico"></div>
(Session info: chrome=54.0.2840.98)
(Driver info: chromedriver=2.25.426935 (820a95b0b81d33e42712f9198c215f703412e1a1),platform=Mac OS X 10.12.0 x86_64)
If I just give enough sleep before the click it works but I wanna make it more efficient.
Change your check_exists_by_xpath to wait for element presence:
def check_exists_by_xpath(xpath):
try:
wait.until(EC.presence_of_element_located((By.XPATH, xpath))
except NoSuchElementException:
return False
return True
Related
I am trying to scrape the url given below with python selenium.
https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9
here is my code
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import requests
from time import sleep
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',options=chrome_options)
wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9
")
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click()
sleep(10)
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
After running through this code I should be redirected to https://rareapk.com/finance/?n1p0ei2ng5yd3gz but It stuck at the same page.
The element, I am clicking is given below.
<img class="spoint" id="showlink" x-onclick="changeLink()" src="https://eductin.com/wp-content/uploads/2021/06/Download.png">
Element Image
What is my code doing?
First it go to this url
Then click to I'M NOT A ROBOT.
After that next page is loaded and selenium waits for 10 seconds.
Then an Image (having text DOWNLOAD RTI) is clicked which should redirect it to REDIRECTED URL
But in the last step it stuck at the same url, it do not redirect
I have tried the following ways
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
wd.find_element(By.ID, "showlink").click()
I tested code without headless and I see browser opens expected page but wd.current_url still show old URL (and wd.title also show old title)
All problem can be because page opens new URL in new tab and it needs to use wd.switch_to_window(...) to access other tab.
This code uses switch_to_window(...) and it shows correct URL (and title) in other tab.
BTW: I had to add "Consent" because my browser sometimes show it.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
from webdriver_manager.chrome import ChromeDriverManager, ChromeType
#from webdriver_manager.firefox import GeckoDriverManager
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
#wd = webdriver.Chrome('chromedriver', options=chrome_options)
wd = webdriver.Chrome(service=Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()), options=chrome_options)
#wd = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
wd.get("https://www.rtilinks.com/?5b5483ba2d=OUhWbXlXOGY4cEE0VEtsK1pWSU5CdEJob0hiR0xFNjN2M252ZXlOWnp0RC9yaFpvN3ZNeW9SazlONWJSTWpvNGNpR0FwWUZwQWduaXdFY202bkcrUHAybkVDc0hMMk9EWFdweitsS0xHa0U9")
p = wd.current_window_handle
print('current_window_handle:', p)
try:
print('Waiting for: "Consent"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[#aria-label='Consent']"))).click()
except Exception as ex:
print('Exception:', ex)
print('Waiting for: "I\'m not a robot"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.ID, "soralink-human-verif-main"))).click()
print('Waiting for: "Download (RTI)"')
WebDriverWait(wd, 20).until(EC.element_to_be_clickable((By.XPATH, "//img[#id='showlink' and #x-onclick]"))).click()
print('--- active tab ---')
print('current_window_handle:', p)
print('current_url:', wd.current_url)
print('title:', wd.title)
print('--- other tabs ---')
chwd = wd.window_handles
for w in chwd:
#switch focus to child window
if w != p:
wd.switch_to.window(w)
print('current_window_handle:', w)
print('current_url:', wd.current_url)
print('title:', wd.title)
print('---')
wd.close()
Result:
Waiting for: "Consent"
Waiting for: "I'm not a robot"
Waiting for: "Download (RTI)"
--- active tab ---
current_window_handle: CDwindow-31FDEC2C62AA0666A8F3A1DD2133D02C
current_url: https://eductin.com/how-to-fix-and-restore-deleted-mac-system-files/
title: How to fix and Restore deleted Mac system files. – Eductin
--- other tabs ---
current_window_handle: CDwindow-CB1EAE5B6DCD4ACF5D061ED4ECC314CD
current_url: https://sakarnewz.com/
title: SakarNewz – BOOST YOUR KNOWLEDGE WITH TECH NEWS AND UPDATES
---
EDIT:
Sometimes this code has problem to display information about other tabs because it seems tab runs all time JavaScript and probably Selenium can't access data.
I am new to Selenium. Read through many historical posts and try to retrieve tooltip text but fail...
X path (for "col_issued_shares" should be correct as it is directly copy like below screen:
Screenshot 1
The result is SyntaxError: invalid syntax. My expected output is Issued Shares 20,428,671,958 ( as at 27 May 2022) . But when I view source , it shows toolkit text.
Screenshot 2
Screenshot 3
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
s=Service(r'C:\Users\chromedriver.exe')
driver = webdriver.Chrome(service=s)
url='https://www.hkex.com.hk/Market-Data/Securities-Prices/Equities/Equities-Quote?sym=5&sc_lang=en'
driver.get(url)
element = driver.find_element(By.XPATH, "//*[#id="lhkexw-quoteequities"]/div[2]/div[3]/div[2]/div/div[2]/div[1]/div[1]/span[2]/span")
Can someone help? Thanks a lot
Try xpath //*[#id='lhkexw-quoteequities']/div[2]/div[3]/div[2]/div/div[2]/div[1]/div[1]/span[2]/span
Actually you are enclosing xpath with double quotes ("). So in xpath you should use single-quote (') for #id=''.
I'm trying to automate the bet365 casino, I know they have tools to block bots.
link :https://casino.bet365.com/Play/LiveRoulette
I can't handle anything that's inside the div class="app-container", at least by selenium. But I find these elements using JavaScript in the browser console.
import undetected_chromedriver as UChrome
from webdriver_manager.chrome import ChromeDriverManager
UChrome.install(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
driver = UChrome.Chrome()
driver.get('https://www.bet365.com/#/HO/')
after login
driver.get('https://casino.bet365.com/Play/LiveRoulette')
locator = (By.XPATH,'//*[contains(#class, "second-dozen")]')
I try
probably the selectors should be a little different
driver.execute_script('return document.getElementsByClassName("roulette-table-cell roulette-table-cell_side-first-dozen roulette-table-cell_group-dozen")[0].getBoundingClientRect()')
Try
driver.find_element(locator[0], locator[1])
but I recive this: raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"(//*[contains(text(), "PAR")])[1]"}
(Session info: chrome=96.0.4664.110)
Stacktrace:
0 0x55f8fa1bcee3
1 0x55f8f9c8a608
2 0x55f8f9cc0aa1
You probably missing a delay / wait.
Redirecting to the inner page with
driver.get('https://casino.bet365.com/Play/LiveRoulette')
It takes some time to make all the elements loaded there, you can not access elements immediately.
The recommended way to do that is to use to use Expected Conditions explicit waits, something like this:
import undetected_chromedriver as UChrome
from webdriver_manager.chrome import ChromeDriverManager
UChrome.install(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
driver = UChrome.Chrome()
wait = WebDriverWait(driver, 20)
driver.get('https://www.bet365.com/#/HO/')
#perform the login here
driver.get('https://casino.bet365.com/Play/LiveRoulette')
locator = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '//*[contains(#class, "second-dozen")]')))
I see you are also basically missing the driver.find_element method.
This:
(By.XPATH,'//*[contains(#class, "second-dozen")]')
will not return a web element.
Also make sure that element is not inside the iframe.
I was trying to make a simple selenium program to play cookie clicker from what I have seen I can't figure why it is not working here is my code
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
PATH = 'C:\Program Files (x86)\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get('https://orteil.dashnet.org/cookieclicker/')
driver.implicitly_wait(5)
cookie = driver.find_elements_by_id('bigCookie')
cookie_count = driver.find_elements_by_id('cookies')
items = [driver.find_elements_by_id('productPrice' + str(i)) for i in range (1,-1,-1)]
actions = ActionChains(driver)
actions.click(cookie)
for i in range(5000):
actions.perform()
and here is the error I was getting
Traceback (most recent call last):
File "c:/Users/ffl_s/Desktop/Botting/My Bot/cookie.py", line 15, in <module>
actions.click(cookie)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\action_chains.py", line 102, in click
self.move_to_element(on_element)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\action_chains.py", line 273, in move_to_element
self.w3c_actions.pointer_action.move_to(to_element)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\actions\pointer_actions.py", line 42, in move_to
raise AttributeError("move_to requires a WebElement")
AttributeError: move_to requires a WebElement
PS C:\Users\ffl_s\Desktop\Botting\My Bot> [21704:18120:0918/223803.402:ERROR:device_event_log_impl.cc(208)] [22:38:03.402] Bluetooth: bluetooth_adapter_winrt.cc:1074 Getting Default Adapter failed.
If you want to click 5000 times and display the cookies text you could do this.
Just pip install webdriver-manager to fix up your binaries as well.
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://orteil.dashnet.org/cookieclicker/')
for i in range(5000):
ActionChains(driver).move_to_element(driver.find_element_by_id('bigCookie')).click().perform()
items = driver.find_element_by_id('cookies')
print(items.text)
To click on the Cookie numerous times inorder to play cookie clicker you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get('https://orteil.dashnet.org/cookieclicker/')
for i in range(100):
driver.execute_script("arguments[0].click();", WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bigCookie"))))
print(driver.find_element_by_css_selector("#cookies").text)
Console Output:
80 cookies
per second : 0
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
I am using Selenium in Python 3.6 to simulate the search function in this page. (http://www.bobaedream.co.kr/cyber/CyberCar.php?gubun=I) What I trying to do is as below:
click "상세검색열기" button click
click "판매중인차량" checkbox check
start the for loop that simulates "search function" including the manipulation of drop-down menu
When I tried 1~2 (the code between "soup" and "makers") and 3(for loop) on separate code, it worked well. But the code combining 1~3 doesn't work.
Please help me to work this out.
My code is as below:
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import re
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
from time import sleep
link = 'http://www.bobaedream.co.kr/cyber/CyberCar.php?gubun=I'
driver = webdriver.PhantomJS()
driver.set_window_size(1920, 1080)
driver.get(link)
sleep(.75)
soup = BeautifulSoup(driver.page_source, "html.parser", from_encoding='utf-8')
driver.find_element_by_xpath('//img[#title="상세검색열기"]').click()
print("상세검색열기 버튼 클릭")
driver.find_element_by_xpath('//input[#title="판매중인 차량"]').click()
print ("판매중인 차량 클릭")
makers = ['아우디', 'BMW', '벤츠'] #아우디 = audi, 벤츠 = benz
for maker in makers:
# open manufacturer layer
next_elem = driver.find_element_by_xpath('//a[#title="제조사 선택"]')
next_elem.click()
# select manufacturer
next_elem = driver.find_element_by_link_text(maker)
next_elem.click()
print(maker)
print("====clicked maker")
sleep(.75)
The Error message is like below:
/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/chongwonshin/PycharmProjects/Crawler_test/temp_temp2.py
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bs4/__init__.py:146: UserWarning: You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
상세검색열기 버튼 클릭
판매중인 차량 클릭
Traceback (most recent call last):
File "/Users/chongwonshin/PycharmProjects/Crawler_test/temp_temp2.py", line 33, in <module>
next_elem.click()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 77, in click
self._execute(Command.CLICK_ELEMENT)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 494, in _execute
return self._parent.execute(command, params)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 236, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 192, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"81","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:50709","User-Agent":"Python-urllib/3.6"},"httpVersion":"1.1","method":"POST","post":"{\"id\": \":wdc:1484839459174\", \"sessionId\": \"55be8bc0-de5b-11e6-980a-4135a6020523\"}","url":"/click","urlParsed":{"anchor":"","query":"","file":"click","directory":"/","path":"/click","relative":"/click","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/click","queryKey":{},"chunks":["click"]},"urlOriginal":"/session/55be8bc0-de5b-11e6-980a-4135a6020523/element/:wdc:1484839459174/click"}}
Screenshot: available via screen
Process finished with exit code 1
Focus on your error - ElementNotVisibleException. That means you can't click on invisible element found with driver.find_element_by_xpath('//a[#title="제조사 선택"]').
After inspection
please notice that you can open manufacturer layer simply copying and executing javascript responsible for this - layerShow('layer_maker');$('#layer_maker .order a:first-child').focus().
Replace your loop's code with this
for maker in makers:
# open manufacturer layer
driver.execute_script("layerShow('layer_maker');$('#layer_maker .order a:first-child').focus()")
# select manufacturer
next_elem = driver.find_element_by_link_text(maker)
next_elem.click()
print(maker)
print("====clicked maker")
sleep(.75)
and everything works.