I have a for loop that parses through 6 urls to get the text of the first class with "GARawf". The loop works however I've noticed that it now takes the page about 9 seconds to load compared to 1 second before. As I am new to Django and BeautifulSoup I was wondering if there was a way I could refactor the code so it loads the view faster.
views.py
# create list of cities
city_list = ["Toronto", "Montreal", "Calgary", "Edmonton", "Vancouver", "Quebec"]
# create price list
prices_list = []
# set origin for flight
origin = "Madrid"
#origin_urllib = urllib.parse.quote_plus(origin)
# set headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"
}
for i in city_list:
# set destination for flight
destination = i
# set search query
url = "https://google.com/search?q=" origin " to " destination " Google Flights"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
# get price element
prices = soup.find("span", attrs={"class": "GARawf"})
if prices != None:
prices_list.append(prices.text.strip())
else:
prices_list.append("Not Available")
CodePudding user response:
I would use threading to call the function doing the requests, so that the requests will run concurrently. You will want to import concurrent.futures
Then move citylist list to threading function like below. Read more about using Thread Pools
def get_prices(city):
prices_list = []
origin = "Madrid"
#origin_urllib = urllib.parse.quote_plus(origin)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119
Safari/537.36"
}
url = "https://google.com/search?q=" origin " to " destination " Google Flights"
with requests.get(url, headers=headers) as response:
soup = BeautifulSoup(response.text, 'lxml')
# get price element
prices = soup.find("span", attrs={"class": "GARawf"})
if prices != None:
prices_list.append(prices.text.strip())
else:
prices_list.append("Not Available")
return prices_list
def run_threadPool():
with concurrent.futures.ThreadPoolExecutor() as executor:
city_list = ["Toronto", "Montreal", "Calgary", "Edmonton", "Vancouver", "Quebec"]
results = executor.map(get_prices, city_list)
for result in results:
#do what you need here....
t2 = time.perf_counter()
print(f'Finished in {t2-t1} seconds')
