I have this json file:
{
"totalCount": 10,
"nextPageKey": null,
"result": [
{
"metricId": "builtin:host.cpu.system:",
"data": [
{
"dimensions": [
"host13"
],
"dimensionMap": {
"dt.entity.host": "host13"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.3207547664642334,
2.684309959411621,
2.9341392517089844,
2.646502733230591,
3.0115926265716553,
5.730949878692627,
2.8099799156188965,
1.348116397857666,
1.687232494354248,
2.947845697402954,
1.4364919662475586,
1.7499685287475586,
3.0505483150482178
]
},
{
"dimensions": [
"host234"
],
"dimensionMap": {
"dt.entity.host": "host234"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.3899515867233276,
3.487347364425659,
3.2599117755889893,
2.5915210247039795,
2.971543788909912,
5.280444622039795,
1.863745093345642,
3.30224347114563,
1.6360433101654053,
3.5056746006011963,
4.327470779418945,
2.026176691055298,
3.5184237957000732
]
},
{
"dimensions": [
"host9000"
],
"dimensionMap": {
"dt.entity.host": "host9000"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.6770082712173462,
3.380143642425537,
2.664911985397339,
3.472454071044922,
3.053309202194214,
4.846103668212891,
1.625740885734558,
3.096479892730713,
1.6604264974594116,
3.3423821926116943,
1.8394920825958252,
2.470339775085449,
2.6470096111297607
]
},
{
"dimensions": [
"host777"
],
"dimensionMap": {
"dt.entity.host": "host777"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.409336805343628,
2.9157116413116455,
3.176203727722168,
2.773225784301758,
2.760272264480591,
4.695191383361816,
2.698251724243164,
3.1021218299865723,
2.0791330337524414,
3.1175060272216797,
2.448374032974243,
2.5085086822509766,
3.1482181549072266
]
},
{
"dimensions": [
"host999"
],
"dimensionMap": {
"dt.entity.host": "host999"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.3565394878387451,
3.0833122730255127,
2.405238628387451,
2.5759658813476562,
2.4040277004241943,
4.598429203033447,
2.5394818782806396,
3.2278401851654053,
1.499437689781189,
3.1490111351013184,
3.756218910217285,
3.3754220008850098,
2.418745279312134
]
},
{
"dimensions": [
"Host111"
],
"dimensionMap": {
"dt.entity.host": "Host111"
},
"timestamps": [
1643131800000,
1643132400000,
1643133000000,
1643133600000,
1643134200000,
1643134800000,
1643135400000,
1643136000000,
1643136600000,
1643137200000,
1643137800000,
1643138400000,
1643139000000
],
"values": [
1.4735431671142578,
3.3221476078033447,
3.101944923400879,
2.8839704990386963,
4.312218189239502,
1.8795099258422852,
1.5133275985717773,
3.0394625663757324,
1.683100938796997,
2.910053014755249,
1.5296052694320679,
1.9281476736068726,
2.5355799198150635
]
}
]
}
]
}
I need to create a data frame out of this json file field "dimensions', 'timestamp' and 'values'
dimensions timestamps values
host13 1.64313E 12 1.320754766
host13 1.64313E 12 2.684309959
host13 1.64313E 12 2.934139252
host13 1.64313E 12 2.646502733
I tried this:
dataList = []
dataList.append([resp['data']['dimensions'], resp['data']['timestamp'],resp['data']['values']])
data frame is not working? any ideas?
CodePudding user response:
Use pd.json_normalize for better performance.
df = pd.json_normalize(data, record_path=['result', 'data'])[['dimensions', 'timestamps', 'values']]
# Then explode the list into rows
df = df.explode('dimensions').explode('timestamps').explode('values')
# If pandas version 1.3
df = df.explode(['dimensions', 'timestamps', 'values'])
CodePudding user response:
You need to iterate on each data, and for each on the values of timestamps and values
dataList = []
for item in data['result'][0]['data']:
for timestamp, value in zip(item["timestamps"], item["values"]):
dataList.append((item["dimensions"][0], timestamp, value))
df = pd.DataFrame(dataList, columns=["dimensions", "timestamps", "values"])
Using itertools.repeat you can shorten it in a nice list comprehension
from itertools import repeat
dataList = [zip(repeat(item["dimensions"][0]), item["timestamps"], item["values"])
for item in data['result'][0]['data']]
