Using Beautifulsoup to scrap all information for all products. but my code just return first product information. Also when I add .text at the end of find element, it return noneType
options = ChromeOptions()
options.add_argument("headless")
driver=chrome(executable_path="/AppData/Local/Programs/Python/Driver/chromedriver_win32/chromedriver.exe")
driver.get("https://www.adiglobaldistribution.us/MyAccount/signin")# here change your link
driver.maximize_window()
time.sleep(5)
wait=WebDriverWait(driver,10)
data_adi = []
n=0
for n in range(12):
pages_url = f"https://www.adiglobaldistribution.us/search?page={n 1}&criteria=Tp-link Usa Corporation"
driver.get(pages_url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '[]')))
time.sleep(5)
html = driver.page_source
soup = Soup(html)
for item in soup.select("div", class_='[]'):
data_adi.append({
'title' : item.find("span", class_="rd-item-name"),
'name' : item.find("span", class_='item-num-mfg'),
'link' : item.find("div", class_="rd-item-name", href=True),
'price' : item.find("div", class_="pdp-price-wrapper"),
'stock' : item.find("span", class_="availabilityMessage-rd")
})
df_adi = pd.DataFrame(data_adi)
df_adi.drop_duplicates()
df_adi
Result
CodePudding user response:
I tried completely with Selenium. It worked.
I did not include the last two items (price and stock) as they may need a login. I am not sure though, but you can mimic the locator as written in here for the rest of them (name, title, link) and then take their text into the dictionary (table) and then append it to the list.
data_adi = []
for i in range(12):
driver.get(f"https://www.adiglobaldistribution.us/search?page={i 1}&criteria=tp-link usa corporation")# here change your link
time.sleep(5)
wait=WebDriverWait(driver,10)
search_items = driver.find_elements(By.CSS_SELECTOR, "[class='rd-thumb-details-price']")
print(len(search_items))
for each_item in search_items:
item_title = each_item.find_element(By.CSS_SELECTOR, "div[class='rd-item-name'] span").text
item_name = each_item.find_element(By.CSS_SELECTOR, "span[class='item-num-mfg']").text
item_link = each_item.find_element(By.CSS_SELECTOR, "div[class='rd-item-name'] a").get_attribute('href')
table = {"title": item_title, "name": item_name, "link": item_link}
data_adi.append(table)
df_adi = pd.DataFrame(data_adi)
df_adi.drop_duplicates()
print(df_adi)
Output:
title ... link
0 TP-Link TL-SG1005P 5-Port Gigabit Desktop Swit... ... https://www.adiglobaldistribution.us/Product/F...
1 TP-Link TL-POE10R Gigabit POE Splitter ... https://www.adiglobaldistribution.us/Product/F...
2 TP-Link TL-POE160S PoE Injector ... https://www.adiglobaldistribution.us/Product/F...
3 TP-Link TL-SG101616-Port Gigabit Desktop/Rackm... ... https://www.adiglobaldistribution.us/Product/F...
4 TP-Link RE220 AC750 WiFi Range Extender ... https://www.adiglobaldistribution.us/Product/F...
.. ... ... ...
137 TP-Link TL-WA850RE 300Mbps Universal Wi-Fi Ran... ... https://www.adiglobaldistribution.us/Product/F...
138 TP-Link KL430E Kasa Smart Light Strip Extensio... ... https://www.adiglobaldistribution.us/Product/F...
139 TP-Link EAP245 V3 AC1750 Wireless Dual Band Gi... ... https://www.adiglobaldistribution.us/Product/F...
140 T-Link Archer T2E AC600 Wireless Dual Band PCI... ... https://www.adiglobaldistribution.us/Product/F...
141 TP-Link TL-WR940N 450Mbps Wireless N Router wi... ... https://www.adiglobaldistribution.us/Product/F...
[142 rows x 3 columns]

