I wrote a function to fill NaN with Values, but instead this function first fills the NaN with values and then deletes every value that was in the list before I did the function
def preprocessing(df):
median_male_3= df[(df["Sex"]=="male") & (df["Pclass"] ==3 )]["Age"].median()
median_male_2= df[(df["Sex"]=="male") & (df["Pclass"] ==2 )]["Age"].median()
median_male_1= df[(df["Sex"]=="male") & (df["Pclass"] ==1 )]["Age"].median()
median_female_3= df[(df["Sex"]=="female") & (df["Pclass"] ==3 )]["Age"].median()
median_female_2= df[(df["Sex"]=="female") & (df["Pclass"] ==2 )]["Age"].median()
median_female_1= df[(df["Sex"]=="female") & (df["Pclass"] ==1 )]["Age"].median()
def agemaking(para):
Age=para[0]
bookclass=para[1]
sex=para[2]
if pd.isnull(Age):
print(train_titanic["Age"]) #want to check what happens inside
if bookclass==3 and sex=="male":
return median_male_3
elif bookclass==2 and sex=="male":
return median_male_2
elif bookclass==1 and sex=="male":
return median_male_1
elif bookclass==3 and sex=="female":
return median_female_3
elif bookclass==2 and sex=="female":
return median_female_2
elif bookclass==1 and sex=="female":
return median_female_1
else:
return Age
train_titanic['Age']= train_titanic[['Age','Pclass','Sex']].apply(agemaking,axis=1)
thats my function
thats what i looked like before thats the suprising result
CodePudding user response:
Try this
def agemaking(para):
Age=para[0]
bookclass=para[1]
sex=para[2]
if pd.isnull(Age):
print(train_titanic["Age"])
if bookclass==3 and sex=="male":
return median_male_3
elif bookclass==2 and sex=="male":
return median_male_2
elif bookclass==1 and sex=="male":
return median_male_1
elif bookclass==3 and sex=="female":
return median_female_3
elif bookclass==2 and sex=="female":
return median_female_2
elif bookclass==1 and sex=="female":
return median_female_1
else:
return Age
else:
return age
CodePudding user response:
A shorter version of your code could be:
df['Age'] = df['Age'].fillna(df.groupby(['Sex', 'Pclass'])['Age'].transform('median'))
Compute the median Age per (Sex, Pclass) group and broadcast values to all rows with transform. Finally fill nan values with the computed value previously only and only if Age is null.
