I have a script that adds (scrapes) links to a dictionary, then scrapes for each of theses links individually:
def get_products():
links = []
for x in range(1,3):
url1 = f"https://www.kaufland.de/category/39251/p{x}/"
r = s.get(url1)
items = r.html.find('div.results')
for item in items:
rawlinks = str(item.find('a'))
list1 = re.findall(r"[0-9]{9}", rawlinks)
result = ['https://www.kaufland.de/product/' direction for direction in list1]
links.append(result)
for link in links:
wcapi = API(
url='https://s-qmzs9fc4fs3c.eu1.wpsandbox.org/',
consumer_key='ck_365febfbc43beca56bef990b20cb88db6022a5cd',
consumer_secret='cs_05c93973ffb6bc467483bf0105cd730a288dd405',
version="wc/v3"
)
r = requests.get(link)
html = r.text
soup = BeautifulSoup(html, 'html.parser')
title = soup.find('h1').text.strip()
price = soup.find('div', {'class':'rd-buybox__price'}).text.strip().replace(' €', '').replace(',', '.')
product_data = soup.find_all('script')[2] # 3rd script tag
product_data = str(product_data).partition('return {')[-1]
product_data = '{' product_data.split('}(')[0] '}'
saleprice = price
try:
ean = str(re.findall('[0-9]{13}' , product_data)[0])
except:
ean = ""
try:
brand = str(re.findall('id:[0-9]{8},name:"(. ?)"', product_data))
except:
brand = ""
try:
description = str(re.findall('descriptionHtml:"(. ?)"', product_data)).replace('\\u003E', '').replace('\\u003Cbr', '').replace('\\u003Cli', '').replace('\\u003C', '').replace('\\u002Fli', '').replace('\\u002F', '').replace('\\\u002Fb', '').replace('\b', '').replace('\p', '')
except:
description = ""
try:
imgs = str(re.findall('fallbackSrc:"(. ?)"' , product_data))
except:
imgs = "https://www.anchorpackaging.com/wp-content/uploads/2016/10/SampleKit.jpg"
id = ''.join([random.choice(string.digits) for n in range(2)])
sku = ''.join([random.choice(string.digits) for n in range(7)])
scraped = {
"name": title,
"type": "simple",
"regular_price": price,
"stock_quantity": id,
"short_description": "Hersteller: " brand.replace("']", "").replace("['", "") ", " "EAN: " ean,
"description": description,
"categories": [
{
"id": id
}
],
"images": [
{
"src": imgs.replace('\\\\', '\\').replace('\\u002F', '/').replace(" '", "").replace("'", "").split(",")[1],
"alt": "img"
},
{
"src": imgs.replace('\\\\', '\\').replace('\\u002F', '/').replace(" '", "").replace("'", "").split(",")[2],
"alt": "img"
},
{
"src": imgs.replace('\\\\', '\\').replace('\\u002F', '/').replace(" '", "").replace("'", "").split(",")[3],
"alt": "img"
}
],
}
wcapi.post('products', scraped)
print(scraped)
print(get_products())
It gives me the error: requests.exceptions.InvalidSchema: No connection adapters were found for "['link1, link2, link3 ']", whereby instead of link1 the urls are printed, i just dont write them here for readability. Why does my script fail to use each dict item individually and tries to connect to the whole dict instead?
CodePudding user response:
I think your issue is in the line
links.append(result)
You want
links.extend(result)
