Function turns Values to NaN unwanted

Question:

I wrote a function to fill NaN with Values, but instead this function first fills the NaN with values and then deletes every value that was in the list before I did the function

def preprocessing(df):
median_male_3= df[(df["Sex"]=="male") &  (df["Pclass"] ==3 )]["Age"].median()
median_male_2= df[(df["Sex"]=="male") &  (df["Pclass"] ==2 )]["Age"].median()
median_male_1= df[(df["Sex"]=="male") &  (df["Pclass"] ==1 )]["Age"].median()
median_female_3= df[(df["Sex"]=="female") &  (df["Pclass"] ==3 )]["Age"].median()
median_female_2= df[(df["Sex"]=="female") &  (df["Pclass"] ==2 )]["Age"].median()
median_female_1= df[(df["Sex"]=="female") &  (df["Pclass"] ==1 )]["Age"].median()

def agemaking(para):
    Age=para[0]
    bookclass=para[1]
    sex=para[2]
    
    if pd.isnull(Age):
        print(train_titanic["Age"])          #want to check what happens inside
        if bookclass==3 and sex=="male":
            return median_male_3
        elif bookclass==2 and sex=="male":
            return median_male_2
        elif bookclass==1 and sex=="male":
            return median_male_1
        elif bookclass==3 and sex=="female":
            return median_female_3
        elif bookclass==2 and sex=="female":
            return median_female_2
        elif bookclass==1 and sex=="female":
            return median_female_1
        
        else:
            return Age
        
train_titanic['Age']= train_titanic[['Age','Pclass','Sex']].apply(agemaking,axis=1)

thats my function

thats what i looked like before
thats the suprising result

Asked By: Simon

||

Answers:

Try this

def agemaking(para):
    Age=para[0]
    bookclass=para[1]
    sex=para[2]
    
    if pd.isnull(Age):
        print(train_titanic["Age"])
        if bookclass==3 and sex=="male":
            return median_male_3
        elif bookclass==2 and sex=="male":
            return median_male_2
        elif bookclass==1 and sex=="male":
            return median_male_1
        elif bookclass==3 and sex=="female":
            return median_female_3
        elif bookclass==2 and sex=="female":
            return median_female_2
        elif bookclass==1 and sex=="female":
            return median_female_1
        
        else:
            return Age
    else: 
        return age
Answered By: jjislam

A shorter version of your code could be:

df['Age'] = df['Age'].fillna(df.groupby(['Sex', 'Pclass'])['Age'].transform('median'))

Compute the median Age per (Sex, Pclass) group and broadcast values to all rows with transform. Finally fill nan values with the computed value previously only and only if Age is null.

Answered By: Corralien
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.