I am using the following python code to scrape a website. The issue which I can't resolve is it scrapes multiple events successfully but only sends the result of the first event to excel. All help greatly appreciated
import requests
import json
import pandas as pd
import xlsxwriter
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0',
'Accept-Language' : 'en-US,en;q=0.5'}
for race in range(1, 3):
url = f"https://s3-ap-southeast-2.amazonaws.com/racevic.static/2019-01-01/flemington/sectionaltimes/race-{race}.json?callback=sectionaltimes_callback"
r = requests.get(url, headers=headers)
json_obj = json.loads(r.text.split('sectionaltimes_callback(')[1].rsplit(')', 1)[0])
print(json_obj)
#df = pd.DataFrame(json_obj['Horses'])
#writer = pd.ExcelWriter('horses.xlsx', engine='xlsxwriter')
#df.to_excel(writer, sheet_name='Sheet1')
#writer.save()
#print(df)
CodePudding user response:
First Create an empty df and use concat method to append DataFrame from loop df will have concated data from the looped and convert to excel
df=pd.DataFrame()
for race in range(1, 3):
url = f"https://s3-ap-southeast-2.amazonaws.com/racevic.static/2019-01-01/flemington/sectionaltimes/race-{race}.json?callback=sectionaltimes_callback"
r = requests.get(url, headers=headers)
json_obj = json.loads(r.text.split('sectionaltimes_callback(')[1].rsplit(')', 1)[0])
main_df=pd.DataFrame(json_obj['Horses'])
df=pd.concat([df,main_df])
df.reset_index(drop=True, inplace=True)
df.to_excel("horses.xlsx")
