I'm trying to generate a nested dictionary but when for loops finish all the values are the same in every key.
def frequency(df, headers, numerical, target):
di = {}
if len(numerical) == 0:
print("No NM")
else:
for i in numerical:
if i in headers:
headers.remove(i)
for i in range(len(headers)):
attdic = {}
valdic = {}
aux_list = df[headers[i]].to_list()
aux_list = list(OrderedDict.fromkeys(aux_list))
tar_list = df[target].to_list()
tar_list = list(OrderedDict.fromkeys(tar_list))
for j in range(len(aux_list)):
print(aux_list[j])
for k in range(len(tar_list)):
temp_df = df.loc[(df[headers[i]] == aux_list[j]) & (df[target] == tar_list[k])]
tempvar = len(temp_df.index) 1
valdic[tar_list[k]] = tempvar
attdic[aux_list[j]] = valdic
di[headers[i]] = attdic
Output:
{'Outlook': {'overcast': {'No': 2, 'Yes': 5}, 'rainy': {'No': 2, 'Yes': 5}, 'sunny': {'No': 2, 'Yes': 5}}, 'Windy': {False: {'No': 3, 'Yes': 5}, True: {'No': 3, 'Yes': 5}}}
Expected output:
{'Outlook': {'overcast': {'No': 3, 'Yes': 3}, 'rainy': {'No': 3, 'Yes': 4}, 'sunny': {'No': 2, 'Yes': 5}}, 'Windy': {False: {'No': 4, 'Yes': 6}, True: {'No': 3, 'Yes': 5}}}
CodePudding user response:
Notice that you create one valdic for each i, but you should have created one for each (i,j), so you can move it inside the loop for j, and maybe it works
def frequency(df, headers, numerical, target):
di = {}
if len(numerical) == 0:
print("No NM")
else:
for i in numerical:
if i in headers:
headers.remove(i)
for i in range(len(headers)):
attdic = {}
aux_list = df[headers[i]].to_list()
aux_list = list(OrderedDict.fromkeys(aux_list))
tar_list = df[target].to_list()
tar_list = list(OrderedDict.fromkeys(tar_list))
for j in range(len(aux_list)):
valdic = {} # create a new valdic for each j
print(aux_list[j])
for k in range(len(tar_list)):
temp_df = df.loc[(df[headers[i]] == aux_list[j]) & (df[target] == tar_list[k])]
tempvar = len(temp_df.index) 1
valdic[tar_list[k]] = tempvar
attdic[aux_list[j]] = valdic
di[headers[i]] = attdic
