I am working with this website (
CodePudding user response:
Why not just get the data directly from the json? Would be far more efficient than Selenium.
import requests
import re
import json
import pandas as pd
from fuzzywuzzy import process
# Get Player Slugs from javascript
response = requests.get('https://projects.fivethirtyeight.com/2020-nba-player-projections/js/bundle.js')
js = response.text
p = re.compile('{t\.exports=(\[.*}]},{}\])')
result = p.search(js).group(1).rsplit('},{}',1)[0]
result = result.replace('team_abbr', '"team_abbr"')
result = result.replace('name', '"name"')
result = result.replace('full_team', '"full_team"')
result = result.replace('short_team', '"short_team"')
result = result.replace('slug', '"slug"')
result = result.replace(',randable:!1', '')
result = result.replace(',randable:!0', '')
playerSearch = json.loads(result)
df = pd.DataFrame(playerSearch)
players = list(df['name'])
slugs = list(df['slug'])
playerDict = dict(zip(players, slugs))
# Get the Player Data
# You could also make this as a user input
list_of_players = ['lebron james', 'Derik RoSe', 'LEamarkcus Allridge']
data = {}
for player in list_of_players:
playerMatch = process.extractOne(player, list(playerDict.keys()))[0]
player_slug = playerDict[playerMatch]
url = f'https://projects.fivethirtyeight.com/2020-nba-player-projections/{player_slug}.json'
jsonData = requests.get(url).json()
data[playerMatch] = jsonData
print(f'Returned: {playerMatch}')
Output:

