I am web scrapping a piece of Code to get NSE Corporate Announcements. But problem is that the url i am using in this code can only contain 20 Items in one go , Hence what happens is that their are many many 100's of announcements each day which are missed as it only contains 20 at a time
I want this problem to solve so that i get all previous announcements as well as the prior announcements. Here is my code-
import requests
import pandas as pd
from datetime import date
from datetime import datetime
today = date.today()
__request_headers = {
'Host':'www.nseindia.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:82.0) Gecko/20100101 Firefox/82.0',
'Accept':'text/html,application/xhtml xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language':'en-US,en;q=0.5',
'Accept-Encoding':'gzip, deflate, br',
'DNT':'1',
'Connection':'keep-alive',
'Upgrade-Insecure-Requests':'1',
'Pragma':'no-cache',
'Cache-Control':'no-cache',
}
try:
nse_url = 'https://www.nseindia.com/'
url = 'https://www.nseindia.com/api/corporate-announcements?index=equities'
resp = requests.get(url=nse_url, headers=__request_headers)
if resp.ok:
req_cookies = dict(nsit=resp.cookies['nsit'], nseappid=resp.cookies['nseappid'], ak_bmsc=resp.cookies['ak_bmsc'])
tresp = requests.get(url=url, headers=__request_headers, cookies=req_cookies)
result = tresp.json()
result = pd.DataFrame(result)
result.drop(['difference', 'dt','exchdisstime','csvName','old_new','orgid','seq_id','sm_isin','bflag','symbol','sort_date'], axis = 1, inplace = True)
result.rename(columns = {'an_dt':'DateandTime', 'attchmntFile':'Source','attchmntText':'Topic','desc':'Type','smIndustry':'Sector','sm_name':'Company Name'}, inplace = True)
result[['Date','Time']] = result.DateandTime.str.split(expand=True)
result.to_csv( ( str(today.day) '-' str(today.month) '-' 'CA.csv'), index=True)
print(result)
res_data = result["NIFTY"]["data"] if "NIFTY" in result and "data" in result["NIFTY"] else []
if res_data != None and len(res_data) > 0:
__top_list = res_data
print(__top_list)
except OSError as err:
logger.error('Unable to fetch data')
CodePudding user response:
You can use the 1-day url to build your request and use today's date (or whatever date range you want)
import requests
import pandas as pd
from datetime import datetime
s = requests.Session()
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
url = 'https://www.nseindia.com/'
step = s.get(url,headers=headers)
today = datetime.now().strftime('%d-%m-%Y')
api_url = f'https://www.nseindia.com/api/corporate-announcements?index=equities&from_date={today}&to_date={today}'
resp = s.get(api_url,headers=headers).json()
df= pd.DataFrame(resp)
df.to_csv('nseindia.csv',index=False)
print('Saved to nseindia.csv')
