# Pivoting is used in a lot of cases where different summaries needs to be created for different Features
# Here we will see how to do standard pivoting operations using Pandas
import pandas as pd
import numpy as np

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important;margin-left: -5%; }</style>"))

# We will use OrderedDict  function from collecctions library
# Normal dictionaries doesnt remember the order in which the key value pairs are inserted in a dictionary
# but OrderedDict  remebers the order
from collections import OrderedDict
table = OrderedDict((
    ("Item", ['Item0', 'Item1', 'Item2', 'Item3']),
    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
    ('USD',  [5,7,2,7]),
    ('EU',   [2,4,6,1])
))
df1 = pd.DataFrame(table)
df1

# Creating the pivot
p = df1.pivot(index='Item', columns='CType', values='USD')
p

# We see the NaN where there is no value in df1 for Ctype columns

type(p)
# p is a Data Frame object and hence standard data frame operations applies

pandas.core.frame.DataFrame

p.columns.values
# Getting the column names

array(['Bronze', 'Gold', 'Silver'], dtype=object)

p.index.values
# Getting the iindex values

array(['Item0', 'Item1', 'Item2', 'Item3'], dtype=object)

print (p[p.index=='Item0'].Gold.values)
# Getting the values for item0 corresponding to Gold column

[5.]

# Another subsetting operation
p[['Bronze','Gold']][0:1]

p[['Bronze','Gold']][0:2]

# Using iloc function for subsetting
p.iloc[0:2,0:2]

# Creates Hierarchical Columns; Values argument is not given
# Here for USD as well as EU values , summary is created
a = df1.pivot(index='Item',columns='CType')
a

# Accessing Hierarchical Dataframe
a['USD'] # Type is a Dataframe

# Limitations with pivot method
table1 = OrderedDict((
    ("Item", ['Item0', 'Item0', 'Item0', 'Item1']),
    ('CType',['Gold', 'Bronze', 'Gold', 'Silver']),
    ('USD',  [5,7,2,7]),
    ('EU',   [2,4,6,1])
))
df2 = pd.DataFrame(table1)

df2

# p1 = df2.pivot(index='Item', columns='CType', values='USD')
# Throws an error as item0 value repeats in 'item' column

# To handle this kind of situations we can use pivot_table function
p1 = df2.pivot_table(index='Item', columns='CType', values='USD', aggfunc=np.sum)
p1

# here we can specify an agggreagating function

p2 = df2.pivot_table(index='Item', columns='CType', aggfunc=np.sum)
p2

!jupyter nbconvert --to html PivotingusingPandas.ipynb

[NbConvertApp] Converting notebook PivotingusingPandas.ipynb to html
[NbConvertApp] Writing 299655 bytes to PivotingusingPandas.html

Machine Learning Made Easy

Tuesday, October 6, 2020

Blog 6 Python Blog:Pivoting using Pandas

No comments:

Post a Comment

Price Elasticity Model in Python

CType	Bronze	Gold	Silver
Item
Item0	NaN	5.0	NaN
Item1	7.0	NaN	NaN
Item2	NaN	2.0	NaN
Item3	NaN	NaN	7.0

	USD			EU
CType	Bronze	Gold	Silver	Bronze	Gold	Silver
Item
Item0	NaN	5.0	NaN	NaN	2.0	NaN
Item1	7.0	NaN	NaN	4.0	NaN	NaN
Item2	NaN	2.0	NaN	NaN	6.0	NaN
Item3	NaN	NaN	7.0	NaN	NaN	1.0