In [1]:
import numpy as np
import pandas as pd
In [37]:
parm=np.array(['Blood_Gulucose','Blood_Gulucose','Blood_Gulucose','Blood_Gulucose',
'Blood Pressure','Blood Pressure','Blood Pressure','Sick','Sick'])
val=np.array([1,2,3,4,"high","high",'low','No','No'])
df=pd.DataFrame([parm,val]).T
df.columns=['Parm','Value']
df
Out[37]:
Parm | Value | |
---|---|---|
0 | Blood_Gulucose | 1 |
1 | Blood_Gulucose | 2 |
2 | Blood_Gulucose | 3 |
3 | Blood_Gulucose | 4 |
4 | Blood Pressure | high |
5 | Blood Pressure | high |
6 | Blood Pressure | low |
7 | Sick | No |
8 | Sick | No |
In [38]:
# Checking data types
df.dtypes
Out[38]:
Parm object Value object dtype: object
In [39]:
# Converting everything in Value to Numeric
df['Value_Numeric']=df['Value'].apply(pd.to_numeric, errors='coerce')
df['Value_Cat']=df['Value'].astype(object)
df
Out[39]:
Parm | Value | Value_Numeric | Value_Cat | |
---|---|---|---|---|
0 | Blood_Gulucose | 1 | 1.0 | 1 |
1 | Blood_Gulucose | 2 | 2.0 | 2 |
2 | Blood_Gulucose | 3 | 3.0 | 3 |
3 | Blood_Gulucose | 4 | 4.0 | 4 |
4 | Blood Pressure | high | NaN | high |
5 | Blood Pressure | high | NaN | high |
6 | Blood Pressure | low | NaN | low |
7 | Sick | No | NaN | No |
8 | Sick | No | NaN | No |
In [40]:
# Calculating mean as well mode
# For mean, we will group using Value_Numeric
# For mode, we will group using Value_Cat
numeric_mean=df.groupby(['Parm'])['Value_Numeric'].mean().reset_index()
cat_mode=df.groupby(['Parm'])['Value_Cat'].agg(pd.Series.mode).reset_index()
interim_df=pd.DataFrame(df['Parm'].unique())
interim_df.columns=['Parm']
final_df=interim_df.merge(numeric_mean,on="Parm").merge(cat_mode,on="Parm")
final_df['Metric']=np.where([m in ['Blood Pressure','Sick'] for m in final_df['Parm']],
final_df['Value_Cat'],final_df['Value_Numeric'])
final_df
Out[40]:
Parm | Value_Numeric | Value_Cat | Metric | |
---|---|---|---|---|
0 | Blood_Gulucose | 2.5 | [1, 2, 3, 4] | 2.5 |
1 | Blood Pressure | NaN | high | high |
2 | Sick | NaN | No | No |
In [45]:
final_df.drop(columns=['Value_Numeric','Value_Cat'],inplace=True)
In [46]:
final_df
Out[46]:
Parm | Metric | |
---|---|---|
0 | Blood_Gulucose | 2.5 |
1 | Blood Pressure | high |
2 | Sick | No |