In [1]:
import pandas as pd
import numpy as np
Step 1: Creating the data frame¶
In [2]:
day = ['Day1','Day1','Day1','Day1','Day2','Day2','Day2']
product=['A','B','C','D','A','B','C']
sales=np.random.rand(7)*10
df=pd.DataFrame([day,product,sales]).T
df.columns=['Day','Product','Sales']
df
Out[2]:
Day | Product | Sales | |
---|---|---|---|
0 | Day1 | A | 9.195704 |
1 | Day1 | B | 9.855288 |
2 | Day1 | C | 2.591246 |
3 | Day1 | D | 4.182998 |
4 | Day2 | A | 9.355021 |
5 | Day2 | B | 4.063917 |
6 | Day2 | C | 5.310852 |
Step 2:Taking Product in Columns¶
In [5]:
df_pivot=df.pivot(index='Day', columns='Product', values='Sales').reset_index()
df_pivot
Out[5]:
Product | Day | A | B | C | D |
---|---|---|---|---|---|
0 | Day1 | 9.195704 | 9.855288 | 2.591246 | 4.182998 |
1 | Day2 | 9.355021 | 4.063917 | 5.310852 | NaN |
When index and Product repeats in the data frame¶
In [6]:
day = ['Day1','Day1','Day1','Day1','Day2','Day2','Day2']
product=['A','A','C','D','A','B','C']
sales=np.random.rand(7)*10
df=pd.DataFrame([day,product,sales]).T
df.columns=['Day','Product','Sales']
df
Out[6]:
Day | Product | Sales | |
---|---|---|---|
0 | Day1 | A | 5.007048 |
1 | Day1 | A | 2.731242 |
2 | Day1 | C | 8.732437 |
3 | Day1 | D | 1.735509 |
4 | Day2 | A | 9.161315 |
5 | Day2 | B | 9.35687 |
6 | Day2 | C | 4.397887 |
In [8]:
df_pivot=df.pivot_table(index='Day', columns='Product', values='Sales',aggfunc="sum").reset_index()
df_pivot
Out[8]:
Product | Day | A | B | C | D |
---|---|---|---|---|---|
0 | Day1 | 7.738290 | NaN | 8.732437 | 1.735509 |
1 | Day2 | 9.161315 | 9.35687 | 4.397887 | NaN |
No comments:
Post a Comment