I have an application that is almost working as intended. The problem arises after it runs through the loop on the 5th instance. The search states there are two results which results in the same end result. When this occurs I'd like to select the first of the two.
The popup messages looks like the following:

I'm using the following code to create the list and then loop:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#service = Service('C:\Program Files\Chrome Driver\chromedriver.exe')
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img a")))]
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.propText")))]
dfObj = pd.DataFrame(category)
dfObj.columns =['Category']
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj['Count'] = dfObj['Count'].str.rstrip(')')
dfObj['IsNumber'] = dfObj['Count'].str.isnumeric()
dfObj = dfObj[(dfObj['IsNumber'] == True)]
searchcat = dfObj['New Category'].tolist()
print(searchcat)
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(8)
driver.quit()
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
time.sleep(4)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
time.sleep(4)
filename = search[0:30] 'table.csv'
pd.read_html(driver.page_source)[1].iloc[:,:-1].to_csv(filename,index=False)
time.sleep(4)
driver.quit()
The loop will continue to run if I manually click each search result. However, I would like for selenium to always select the first option. How would I go about this?
Updated Code:
from selenium import webdriver
import pandas as pd
import random
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
import time
with webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe') as driver:
URL = "https://mor.nlm.nih.gov/RxClass/search?query=ALIMENTARY TRACT AND METABOLISM"
driver.get(URL)
category = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.drug_class img a")))]
dfObj = pd.DataFrame(category)
dfObj.columns =["Category"]
dfObj.dropna(inplace = True)
new = dfObj["Category"].str.split("(", n = 1, expand = True)
dfObj["New Category"]= new[0]
dfObj["Count"]= new[1]
dfObj.drop(columns =["Category"], inplace = True)
dfObj["Count"] = dfObj["Count"].str.rstrip(')')
dfObj["IsNumber"] = dfObj["Count"].str.isnumeric()
dfObj = dfObj[(dfObj["IsNumber"] == True)]
searchcat = dfObj["New Category"].tolist()
dfObj.to_csv('tabledf.csv',index=False)
time.sleep(3)
for search in searchcat:
page = f"https://mor.nlm.nih.gov/RxClass/search?query={search}"
driver = webdriver.Chrome('C:\Program Files\Chrome Driver\chromedriver.exe')
driver.get(page)
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'tr.dbsearch')))
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.visibility_of_element_located((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
filename = search[0:30] 'table.csv'
classid = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(2)")))]
classname = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(1)")))]
classtype = [my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.table-responsive div.propText strong:nth-child(3)")))]
df = pd.read_html(driver.page_source)[1].iloc[:,:-1]
df["ClassID"] = pd.Series(classid)
df["ClassName"] = pd.Series(classname)
df["ClassType"] = pd.Series(classtype)
df.to_csv(filename,index=False)
time.sleep(4)
driver.quit()
CodePudding user response:
First of, I will suggest that you use the with context manager. It will handle opening/closing the driver (Chrome) by itself. This ensure if any exception is raised that it will still be closed.
To do so, use:
with webdriver.Chrome() as driver:
...
In your code I see you close/open a new browser for each URL. This is not needed and not doing so will speed up your script. Just use driver.get() to change the URL.
For your main issue, just add a portion of code that will detect the modal and chose the first option. Something along those lines
modal_wait = WebDriverWait(driver, 1)
try:
modal_el = modal_wait.until(EC.element_to_be_clickable((By.ID, 'optionModal')))
modal_el.find_element(By.CSS_SELECTOR, '.uloption').click()
except TimeoutException:
pass
You must include the following imports:
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
