In [2]:
# Pivoting is used in a lot of cases where different summaries needs to be created for different Features
# Here we will see how to do standard pivoting operations using Pandas
import pandas as pd
import numpy as np
In [18]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important;margin-left: -5%; }</style>"))
In [4]:
# We will use OrderedDict function from collecctions library
# Normal dictionaries doesnt remember the order in which the key value pairs are inserted in a dictionary
# but OrderedDict remebers the order
from collections import OrderedDict
table = OrderedDict((
("Item", ['Item0', 'Item1', 'Item2', 'Item3']),
('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
('USD', [5,7,2,7]),
('EU', [2,4,6,1])
))
df1 = pd.DataFrame(table)
df1
Out[4]:
In [6]:
# Creating the pivot
p = df1.pivot(index='Item', columns='CType', values='USD')
p
# We see the NaN where there is no value in df1 for Ctype columns
Out[6]:
In [8]:
type(p)
# p is a Data Frame object and hence standard data frame operations applies
Out[8]:
In [10]:
p.columns.values
# Getting the column names
Out[10]:
In [12]:
p.index.values
# Getting the iindex values
Out[12]:
In [14]:
print (p[p.index=='Item0'].Gold.values)
# Getting the values for item0 corresponding to Gold column
In [16]:
# Another subsetting operation
p[['Bronze','Gold']][0:1]
Out[16]:
In [18]:
p[['Bronze','Gold']][0:2]
Out[18]:
In [20]:
# Using iloc function for subsetting
p.iloc[0:2,0:2]
Out[20]:
In [22]:
# Creates Hierarchical Columns; Values argument is not given
# Here for USD as well as EU values , summary is created
a = df1.pivot(index='Item',columns='CType')
a
Out[22]:
In [24]:
# Accessing Hierarchical Dataframe
a['USD'] # Type is a Dataframe
Out[24]:
In [26]:
# Limitations with pivot method
table1 = OrderedDict((
("Item", ['Item0', 'Item0', 'Item0', 'Item1']),
('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
('USD', [5,7,2,7]),
('EU', [2,4,6,1])
))
df2 = pd.DataFrame(table1)
df2
Out[26]:
In [27]:
# p1 = df2.pivot(index='Item', columns='CType', values='USD')
# Throws an error as item0 value repeats in 'item' column
In [29]:
# To handle this kind of situations we can use pivot_table function
p1 = df2.pivot_table(index='Item', columns='CType', values='USD', aggfunc=np.sum)
p1
# here we can specify an agggreagating function
Out[29]:
In [31]:
p2 = df2.pivot_table(index='Item', columns='CType', aggfunc=np.sum)
p2
Out[31]:
In [19]:
!jupyter nbconvert --to html PivotingusingPandas.ipynb
No comments:
Post a Comment