Failing to scrape web data with Selenium - javascript

I'm trying to fetch data from the front page table on https://icostats.com/. But something just isn't clicking.
from selenium import webdriver
browser = webdriver.Chrome(executable_path=r'C:\Scrapers\chromedriver.exe')
browser.get("https://icostats.com")
browser.find_element_by_xpath("""//*[#id="app"]/div/div[2]/div[2]/div[2]/div[2]/div[8]/span/span""").s()
posts = browser.find_element_by_class_name("tdPrimary-0-75")
for post in posts:
print(post.text)
The errors I'm getting:
*
C:\Python36\python.exe C:/.../PycharmProjects/PyQtPS/ICO_spyder.py
Traceback (most recent call last): File
"C:/.../PycharmProjects/PyQtPS/ICO_spyder.py", line 5, in
browser.find_element_by_xpath("""//[#id="app"]/div/div[2]/div[2]/div[2]/div[1]/div[2]""").click()
File
"C:\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 313, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath) File "C:\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 791, in find_element
'value': value})['value'] File "C:\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 256, in execute
self.error_handler.check_response(response) File "C:\Python36\lib\site-packages\selenium\webdriver\remote\errorhandler.py",
line 194, in check_response
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: no such
element: Unable to locate element:
{"method":"xpath","selector":"//[#id="app"]/div/div[2]/div[2]/div[2]/div[1]/div[2]"}
(Session info: chrome=59.0.3071.115) (Driver info:
chromedriver=2.30.477700
(0057494ad8732195794a7b32078424f92a5fce41),platform=Windows NT
6.1.7600 x86_64)
*
EDIT
Finally got it working:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
browser = webdriver.Chrome(executable_path=r'C:\Scrapers\chromedriver.exe')
browser.get("https://icostats.com")
wait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#app > div > div.container-0-16 > div.table-0-20 > div.tbody-0-21 > div:nth-child(2) > div:nth-child(8)")))
posts = browser.find_elements_by_class_name("thName-0-55")
for post in posts:
print(post.text)
posts = browser.find_elements_by_class_name("tdName-0-73")
for post in posts:
print(post.text)
Is there any way to iterate over every header/column and export it to a csv file without having to go through each class like this?

Required data generated dynamically by JavaScript. You need to wait until it present on the page:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait
browser = webdriver.Chrome(executable_path=r'C:\Scrapers\chromedriver.exe')
browser.get("https://icostats.com")
wait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div#app>div")))
posts = browser.find_element_by_class_name("tdPrimary-0-75")
for post in posts:
print(post.text)

Seems like there is no s() method in this line
browser.find_element_by_xpath("""//*[#id="app"]/div/div[2]/div[2]/div[2]/div[2]/div[8]/span/span""").s()
so, what you need might be
browser.find_element_by_xpath("""//*[#id="app"]/div/div[2]/div[2]/div[2]/div[2]/div[8]/span/span""").text
Since you want to iterate on the results, this line:
posts = browser.find_element_by_class_name("tdPrimary-0-75")
should be
posts = browser.find_elements_by_class_name("tdPrimary-0-75")

Related

Selenium does not find existing elements that can be found by the browser aba elements and by using js in console

I'm trying to automate the bet365 casino, I know they have tools to block bots.
link :https://casino.bet365.com/Play/LiveRoulette
I can't handle anything that's inside the div class="app-container", at least by selenium. But I find these elements using JavaScript in the browser console.
import undetected_chromedriver as UChrome
from webdriver_manager.chrome import ChromeDriverManager
UChrome.install(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
driver = UChrome.Chrome()
driver.get('https://www.bet365.com/#/HO/')
after login
driver.get('https://casino.bet365.com/Play/LiveRoulette')
locator = (By.XPATH,'//*[contains(#class, "second-dozen")]')
I try
probably the selectors should be a little different
driver.execute_script('return document.getElementsByClassName("roulette-table-cell roulette-table-cell_side-first-dozen roulette-table-cell_group-dozen")[0].getBoundingClientRect()')
Try
driver.find_element(locator[0], locator[1])
but I recive this: raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"(//*[contains(text(), "PAR")])[1]"}
(Session info: chrome=96.0.4664.110)
Stacktrace:
0 0x55f8fa1bcee3
1 0x55f8f9c8a608
2 0x55f8f9cc0aa1
You probably missing a delay / wait.
Redirecting to the inner page with
driver.get('https://casino.bet365.com/Play/LiveRoulette')
It takes some time to make all the elements loaded there, you can not access elements immediately.
The recommended way to do that is to use to use Expected Conditions explicit waits, something like this:
import undetected_chromedriver as UChrome
from webdriver_manager.chrome import ChromeDriverManager
UChrome.install(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
driver = UChrome.Chrome()
wait = WebDriverWait(driver, 20)
driver.get('https://www.bet365.com/#/HO/')
#perform the login here
driver.get('https://casino.bet365.com/Play/LiveRoulette')
locator = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '//*[contains(#class, "second-dozen")]')))
I see you are also basically missing the driver.find_element method.
This:
(By.XPATH,'//*[contains(#class, "second-dozen")]')
will not return a web element.
Also make sure that element is not inside the iframe.

Web scraping javascript based websites using selenium gives error

I've been working on a project to send a few numbers to a specific discord server scraped from a javascript based website. I've gotten to the point where I only need to scrape the numbers, but I am having issues with it. When I try to get the numbers, this error pops up:
Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\cukor4_dry.py", line 48, in <module>
element = wait.until(EC.visibility_of_element_located((By.ID, "mainbgsection")))
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
code I use:
#import libraries
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
import time
from twill.commands import *
import pyautogui
import os
import subprocess
from dhooks import Webhook, File
import sys
#set settings
chrome_options = webdriver.ChromeOptions()
webdriver = webdriver.Chrome("chromedriver.exe", options=chrome_options)
hook = Webhook('webhook link')
time.sleep(4)
print('form')
showforms()
try:
#try to log into page
webdriver.get('url')
webdriver.find_element_by_id('username').send_keys('username')
webdriver.find_element_by_id('password').send_keys('password')
webdriver.find_element_by_name('actionButton').click()
print('submit')
except:
#already logged in
pass
print('waited')
#try to scrape the website
url = "url"
webdriver.get(url)
wait = WebDriverWait(webdriver, 10)
element = wait.until(EC.visibility_of_element_located((By.ID, "mainbgsection")))

How to click on the Cookie numerous times in-order to play cookie clicker within https://orteil.dashnet.org/cookieclicker/ using Selenium and Python

I was trying to make a simple selenium program to play cookie clicker from what I have seen I can't figure why it is not working here is my code
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
PATH = 'C:\Program Files (x86)\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get('https://orteil.dashnet.org/cookieclicker/')
driver.implicitly_wait(5)
cookie = driver.find_elements_by_id('bigCookie')
cookie_count = driver.find_elements_by_id('cookies')
items = [driver.find_elements_by_id('productPrice' + str(i)) for i in range (1,-1,-1)]
actions = ActionChains(driver)
actions.click(cookie)
for i in range(5000):
actions.perform()
and here is the error I was getting
Traceback (most recent call last):
File "c:/Users/ffl_s/Desktop/Botting/My Bot/cookie.py", line 15, in <module>
actions.click(cookie)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\action_chains.py", line 102, in click
self.move_to_element(on_element)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\action_chains.py", line 273, in move_to_element
self.w3c_actions.pointer_action.move_to(to_element)
File "C:\Users\ffl_s\AppData\Local\Programs\Python\Python38-32\lib\site-packages\selenium\webdriver\common\actions\pointer_actions.py", line 42, in move_to
raise AttributeError("move_to requires a WebElement")
AttributeError: move_to requires a WebElement
PS C:\Users\ffl_s\Desktop\Botting\My Bot> [21704:18120:0918/223803.402:ERROR:device_event_log_impl.cc(208)] [22:38:03.402] Bluetooth: bluetooth_adapter_winrt.cc:1074 Getting Default Adapter failed.
If you want to click 5000 times and display the cookies text you could do this.
Just pip install webdriver-manager to fix up your binaries as well.
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://orteil.dashnet.org/cookieclicker/')
for i in range(5000):
ActionChains(driver).move_to_element(driver.find_element_by_id('bigCookie')).click().perform()
items = driver.find_element_by_id('cookies')
print(items.text)
To click on the Cookie numerous times inorder to play cookie clicker you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get('https://orteil.dashnet.org/cookieclicker/')
for i in range(100):
driver.execute_script("arguments[0].click();", WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bigCookie"))))
print(driver.find_element_by_css_selector("#cookies").text)
Console Output:
80 cookies
per second : 0
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:

Issue with AttributeError: 'WebDriver' object has no attribute 'manage'

My code:
commentr = driver.find_element_by_id("simplebox-placeholder")
commentr.click()
driver.execute_script("document.getElementById('simplebox-
placeholder').value = 'your comment text here';")
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
commentr.send_keys("HELO")
My error:
Traceback (most recent call last): File
"C:\Users\weqwwg\Desktop\python\Game.py", line 77, in
driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
AttributeError: 'WebDriver' object has no attribute 'manage'
I'm trying to send a key to the comment box on youtube. I removed some code, I am currently running this code.
commentr = driver.find_element_by_id("simplebox-placeholder")
commentr.click()
driver.implicitly_wait(10)
commentr.send_keys("HELO")
This is the error I'm getting:
Traceback (most recent call last):
File "C:\Users\Brandsdo\Desktop\python\Game.py", line 76, in <module>
commentr.send_keys("HELO")
File "C:\Users\Braasdasndo\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 479, in send_keys
'value': keys_to_typing(value)})
File "C:\Users\Brsadasdando\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "C:\Users\Braasdasndo\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\Braasdando\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotInteractableException: Message: element not interactable
(Session info: chrome=73.0.3683.103)
(Driver info: chromedriver=73.0.3683.68 (47787ec04b6e38e22703e856e101e840b65afe72),platform=Windows NT 10.0.17763 x86_64)
UPDATED PART OF CODE
driver.find_element_by_id("simplebox-placeholder").click()
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.ID, 'contenteditable-textarea') ))
commentr.click().send_keys("HELO")
driver.find_element_by_id("submit-button").click()
THIS IS THE ERROR
Traceback (most recent call last):
File "C:\Users\Desktop\python\Game.py", line 74, in
commentr.click().send_keys("HELO")
AttributeError: 'NoneType' object has no attribute 'send_keys'
This is answer to an original question:
To fix your immediate problem, use
driver.implicitly_wait(10)
Manual is there
However you are probably going in a wrong direction altogether.
Instead, try to use the WebDriverWait module.
from selenium.webdriver.support.ui import WebDriverWait
For example:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#...
footer = WebDriverWait(driver, 10).until(EC.visibility_of_element_located(
(By.CSS_SELECTOR, ".b-footer__divider"))
)
Update for the updated part of question:
I'm trying to send a key to the comment box on youtube. I removed some code, I am currently running this code.
As I suspected, you don't need the implicitly_wait function at all there.
I have reviewed the YouTube page. Your first step is right - you are locating "Add a public comment..." box and clicking on it.
I skipping the implicitly_wait call - it doesn't affect nothing there.
At the next step you are trying to send keystrokes into the same box you found and clicked. This is wrong. Though they look exactly the same, you were clicking on the element with id simplebox-placeholder, but once clicked that element becomes invisible, and the same looking element with id contenteditable-textarea is ready to get your input.
In a simple approach, you should locate this element and send keystrokes into it:
commentr = driver.find_element_by_id("contenteditable-textarea")
commentr.click()
commentr.send_keys("HELO")
But when you are clicked to simplebox-placeholder, it could take some time for page to perform the necessary actions and make the contenteditable-textarea visible and clickable. The approach below will allow you to avoid exception if an element is not ready yet:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.ID, 'contenteditable-textarea') ))
commentr.click()
commentr.send_keys("HELO")
Finally, locate the "Comment" button and click it to submit your comment. Here you can use simplified approach, because the "Comment" button is already ready:
driver.find_element_by_id("submit-button").click()
Overall, your code could look like:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.find_element_by_id("simplebox-placeholder").click()
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.ID, 'contenteditable-textarea') ))
commentr.click()
commentr.send_keys("HELO")
driver.find_element_by_id("submit-button").click()

explicit wait on selenium on python

I'm trying to scrape a webpage where I need to expand the items list by clicking expand button several times.
So as I researched how to do this in smart way, I've been trying to use explicit wait with expected condition (element_to_be_clickable).
here is my test code:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import WebDriverWait
import time
btn_xpath = '//*[#id="contents"]/div[1]/div[2]/div/div[1]'
browser = webdriver.Chrome('/Users/dongpark/Downloads/chromedriver') # calling chrome driver from local folder
browser.get('http://cu.bgfretail.com/event/plus.do?category=event&depth2=1&sf=N')
wait = WebDriverWait(browser, 20)
time.sleep(8)
def check_exists_by_xpath(xpath):
try:
browser.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
while True:
button = check_exists_by_xpath(btn_xpath)
if button is False:
print "done"
break
else:
print "more"
wait.until(EC.element_to_be_clickable((By.XPATH, btn_xpath)))
browser.find_element_by_xpath(btn_xpath).click()
check_exists_by_xpath just tests if the expand button is still available on the page.
When I run this, I get:
File "/Users/dongpark/Documents/kuk/firstSelenium/test.py", line 37, in <module> browser.find_element_by_xpath(btn_xpath).click()
selenium.common.exceptions.WebDriverException: Message: unknown error: Element is not clickable at point (418, 920). Other element would receive the click: <div class="ico"></div>
(Session info: chrome=54.0.2840.98)
(Driver info: chromedriver=2.25.426935 (820a95b0b81d33e42712f9198c215f703412e1a1),platform=Mac OS X 10.12.0 x86_64)
If I just give enough sleep before the click it works but I wanna make it more efficient.
Change your check_exists_by_xpath to wait for element presence:
def check_exists_by_xpath(xpath):
try:
wait.until(EC.presence_of_element_located((By.XPATH, xpath))
except NoSuchElementException:
return False
return True

Categories