I have this kind of json I would transform it into a pandas dataframe, with specific columns names.
{
"data": [
{
"id": 1,
"name": "3Way Result",
"suspended": false,
"bookmaker": {
"data": [
{
"id": 27802,
"name": "Ladbrokes",
"odds": {
"data": [
{
"label": "1",
"value": "1.61",
"probability": "62.11%",
"dp3": "1.610",
"american": -164,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "3.90",
"probability": "25.64%",
"dp3": "3.900",
"american": 290,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.20",
"probability": "19.23%",
"dp3": "5.200",
"american": 420,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:41:27.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
},
{
"id": 70,
"name": "Pncl",
"odds": {
"data": [
{
"label": "1",
"value": "1.65",
"probability": "60.61%",
"dp3": "1.645",
"american": -154,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "X",
"value": "4.20",
"probability": "23.81%",
"dp3": "4.200",
"american": 320,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
},
{
"label": "2",
"value": "5.43",
"probability": "18.42%",
"dp3": "5.430",
"american": 443,
"factional": null,
"winning": null,
"handicap": null,
"total": null,
"bookmaker_event_id": null,
"last_update": {
"date": "2021-10-01 16:59:18.000000",
"timezone_type": 3,
"timezone": "UTC"
}
}
]
}
}
]
}
}
],
"meta": {
"plans": [
{
"name": "Football Free Plan",
"features": "Standard",
"request_limit": "180,60",
"sport": "Soccer"
}
],
"sports": [
{
"id": 1,
"name": "Soccer",
"current": true
}
]
}
}
All columns name contains the name of the bookmaker plus the label value.
I would take the value in label and use it as column name with the name of the bookmaker in name. Then the float in value use it as row of the dataframe
Here the Expected Output
1_LadBrokes X_LadBrokes 2_LadBrokes last_update_LadBrokes 1_Pncl X_Pncl 2_Pncl last_update_Pncl
0 1.61 3.9 5.2 2021-10-01 16:41:27.000000 1.65 4.2 5.43 2021-10-01 16:59:18.000000
CodePudding user response:
You can achieve it like so using json_normalize apply.
def set_values(x):
data = x["odds.data"]
label = data.get("label")
value = data.get("value")
last_update_date = data.get("last_update").get("date")
name = x["name"]
x[f"{label}_{name}"] = value
x[f"last_update_{name}"] = last_update_date
return x
df = (
pd.json_normalize(data["data"], record_path=["bookmaker", "data"])
.explode("odds.data")
.apply(lambda x: set_values(x), axis=1)
.drop(["odds.data", "id", "name"], axis=1)
.ffill()
.bfill()
.head(1)
)
In [39]: df
Out[39]:
1_Ladbrokes 1_Pncl 2_Ladbrokes 2_Pncl X_Ladbrokes X_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 1.65 5.20 5.43 3.90 4.20 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000
CodePudding user response:
Use pd.json_normalize and create two subdataframes for value and last_update them join them.
out = pd.json_normalize(
data=data['data'],
record_path=['bookmaker', 'data', 'odds', 'data'],
meta=[['bookmaker', 'data', 'name']]
)[['label', 'value', 'last_update.date', 'bookmaker.data.name']]
df1 = out.set_index(out['label'] '_' out['bookmaker.data.name'])['value']
df2 = out.set_index('bookmaker.data.name')['last_update.date'] \
.add_prefix('last_update_').drop_duplicates()
df = pd.concat([df1, df2]).to_frame().T
Output:
>>> df
1_Ladbrokes_Ladbrokes X_Ladbrokes_Ladbrokes 2_Ladbrokes_Ladbrokes 1_Pncl_Pncl X_Pncl_Pncl 2_Pncl_Pncl last_update_Ladbrokes last_update_Pncl
0 1.61 3.90 5.20 1.65 4.20 5.43 2021-10-01 16:41:27.000000 2021-10-01 16:59:18.000000
