In [18]:
import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
from datetime import date
Creating Dummy data¶
In [7]:
date_val = pd.Series(['20/09/2020','22/09/2020','25/09/2020','28/09/2020','29/09/2020','30/09/2020'])
metric_val = pd.Series([1,2,3,4,5,6])
df=pd.DataFrame([date_val,metric_val]).T
df.columns=['Date_val','Mertic_Val']
df
Out[7]:
Date_val | Mertic_Val | |
---|---|---|
0 | 20/09/2020 | 1 |
1 | 22/09/2020 | 2 |
2 | 25/09/2020 | 3 |
3 | 28/09/2020 | 4 |
4 | 29/09/2020 | 5 |
5 | 30/09/2020 | 6 |
Adding reference date¶
In [13]:
df['Reference_Date']='15/09/2020'
df
Out[13]:
Date_val | Mertic_Val | Reference_Date | |
---|---|---|---|
0 | 20/09/2020 | 1 | 15/09/2020 |
1 | 22/09/2020 | 2 | 15/09/2020 |
2 | 25/09/2020 | 3 | 15/09/2020 |
3 | 28/09/2020 | 4 | 15/09/2020 |
4 | 29/09/2020 | 5 | 15/09/2020 |
5 | 30/09/2020 | 6 | 15/09/2020 |
In [14]:
# checking dtypes
df.dtypes
Out[14]:
Date_val object Mertic_Val object Reference_Date object dtype: object
Converting dates into proper date format¶
In [34]:
interim_df=df[['Date_val','Reference_Date']].apply(lambda x: pd.to_datetime(x))
interim_df['Metric_Val']=df['Mertic_Val']
interim_df.dtypes
Out[34]:
Date_val datetime64[ns] Reference_Date datetime64[ns] Metric_Val object dtype: object
In [35]:
interim_df
Out[35]:
Date_val | Reference_Date | Metric_Val | |
---|---|---|---|
0 | 2020-09-20 | 2020-09-15 | 1 |
1 | 2020-09-22 | 2020-09-15 | 2 |
2 | 2020-09-25 | 2020-09-15 | 3 |
3 | 2020-09-28 | 2020-09-15 | 4 |
4 | 2020-09-29 | 2020-09-15 | 5 |
5 | 2020-09-30 | 2020-09-15 | 6 |
Days range¶
In [36]:
dys=[4,8,10]
dys
Out[36]:
[4, 8, 10]
We have to check whether reference_date + days in the list of dys is greater than Date_val¶
In [37]:
current_dys=8
indx_dys=dys.index(current_dys)
if indx_dys == 0:
print("Dont compute filter in date range")
else:
print("Next")
lower_dys=dys[indx_dys-1]
upper_dys=current_dys
interim_df['Flag']=np.where((interim_df['Date_val'] >= interim_df['Reference_Date']
+ timedelta(days=lower_dys))
& (interim_df['Date_val'] <= interim_df['Reference_Date']
+ timedelta(days=upper_dys)),"Yes","No")
Next
In [38]:
# Filtering on Yes value in Flag
interim_df['Metric_Val2'] = np.where(interim_df['Flag']=="Yes",
interim_df['Metric_Val'],np.nan)
interim_df.drop('Metric_Val',axis=1,inplace=True)
interim_df
Out[38]:
Date_val | Reference_Date | Flag | Metric_Val2 | |
---|---|---|---|---|
0 | 2020-09-20 | 2020-09-15 | Yes | 1 |
1 | 2020-09-22 | 2020-09-15 | Yes | 2 |
2 | 2020-09-25 | 2020-09-15 | No | NaN |
3 | 2020-09-28 | 2020-09-15 | No | NaN |
4 | 2020-09-29 | 2020-09-15 | No | NaN |
5 | 2020-09-30 | 2020-09-15 | No | NaN |
In [44]:
# renaming column back to original name
interim_df.rename(columns={'Metric_Val2':'Metric_Val'},inplace=True)
In [45]:
interim_df
Out[45]:
Date_val | Reference_Date | Flag | Metric_Val | |
---|---|---|---|---|
0 | 2020-09-20 | 2020-09-15 | Yes | 1 |
1 | 2020-09-22 | 2020-09-15 | Yes | 2 |
2 | 2020-09-25 | 2020-09-15 | No | NaN |
3 | 2020-09-28 | 2020-09-15 | No | NaN |
4 | 2020-09-29 | 2020-09-15 | No | NaN |
5 | 2020-09-30 | 2020-09-15 | No | NaN |
No comments:
Post a Comment