Sunday, February 27, 2022

Replace Missing Value in Data Frame with mean

In [1]:

import numpy as np
import pandas as pd

In [34]:

df=pd.DataFrame(np.array([np.random.rand(5),
                         np.random.rand(5)])).T
df.columns=['Var1','Var2']
df

Out[34]:

In [31]:

# Index position to introduce missing/NaN values
# Creating two rows indices for adding missing/NaN values
pos=np.random.randint(5, size=(2))
pos

Out[31]:

array([1, 3])

In [32]:

df.iloc[pos,]

Out[32]:

	Var1	Var2
1	NaN	NaN
3	0.833466	0.280094

In [35]:

# Adding missing/NaN values
df.iloc[pos,]=np.NaN
df

Out[35]:

In [48]:

s1=df['Var1']
s1

Out[48]:

0    0.976802
1         NaN
2    0.412303
3         NaN
4    0.091193
Name: Var1, dtype: float64

In [38]:

np.mean(s1)

Out[38]:

0.4934328696723167

In [51]:

s2=np.where(s1.isnull(),np.mean(s1),s1)
s2

Out[51]:

array([0.97680246, 0.49343287, 0.4123033 , 0.49343287, 0.09119284])

In [52]:

# Now creating the function
def Replace_NaN(x):
    y=np.where(x.isnull(),np.mean(x),x)
    return(y)

In [54]:

df.apply(Replace_NaN)

Out[54]:

Machine Learning Made Easy

Sunday, February 27, 2022