In [176]:
#Adjusting the cell width and margin
In [184]:
from IPython.display import display, HTML
display(HTML(data="""
<style>
div#notebook-container { width: 100%; }
div#notebook-container { margin-left: -2.8%; }
div#menubar-container { width: 65%; }
div#maintoolbar-container { width: 99%; }
</style>
"""))
In [148]:
# Importing the libraries
In [1]:
import pandas as pd
import numpy as np
In [ ]:
# Lets create a dummy data frame and
# review the functions
# associated with a data frame
In [76]:
df_m = pd.DataFrame(np.array([[1,2,3],[10,11,12]]), columns=['col1','col2','col3'])
df_m
Out[76]:
In [120]:
# Number of rows and columns.The output is in the form
# of a tuple
# Two rows and 4 columns
df_m.shape
Out[120]:
In [121]:
# Get number of rows
df_m.shape[0]
Out[121]:
In [122]:
# Get number of columns
df_m.shape[1]
Out[122]:
In [77]:
# Lets look at the top record
df_m.head(1)
Out[77]:
In [78]:
# Lets look at the bottom record
df_m.tail(1)
Out[78]:
In [81]:
# Change column names
df_m.columns=['Col3','Col4','Col5']
df_m
Out[81]:
In [82]:
# Data Types of data frame
df_m.dtypes
Out[82]:
In [ ]:
# Summarising the data using describe function
In [83]:
df_m.describe()
Out[83]:
In [86]:
# Accessing the index of the data frame
list(df_m.index)
Out[86]:
In [87]:
# Changing the index of the data frame
df_m.index = ['Ind1','Ind2']
In [18]:
df_m
Out[18]:
In [88]:
# Accessing single column(results in a series)
# Series is the building block of dataframe
s1=df_m['Col3']
type(s1)
Out[88]:
In [89]:
# Adding a data frame
df_m['Name']=['Roger','Nadal']
df_m
Out[89]:
In [90]:
df_m.describe()
# Describe works only for Numeric columns
Out[90]:
In [91]:
# Accessing Multiple column
df_m2=df_m[['Col3','Name']]
df_m2
Out[91]:
In [92]:
# Returns a series
type(df_m['Name'])
Out[92]:
In [93]:
# Returns a DataFrame
type(df_m[['Name']])
Out[93]:
In [94]:
# Selecting rows using numerical values/slices
# The below selects data frame records
# from 0 to 1 rows
df_m[:2]
Out[94]:
In [95]:
# The below selects data frame records
# from 0th row
df_m[:1]
Out[95]:
In [97]:
# Filtering a dataset
pos1=list(df_m['Name']=="Roger")
pos1
Out[97]:
In [98]:
df_m[pos1]
Out[98]:
In [99]:
# Filtering a dataset based on Multiple
# conditions
pos1=list(df_m['Name']=="Roger")
pos1
Out[99]:
In [100]:
pos2=list(df_m['Col3']==1)
pos2
Out[100]:
In [105]:
pos3=pos1 and pos2
pos3
Out[105]:
In [106]:
df_m[pos3]
Out[106]:
In [ ]:
# Using Iloc for indexing
In [107]:
df_m.iloc[0:1]
Out[107]:
In [108]:
df_m.iloc[0:2]
Out[108]:
In [114]:
# Fist two rows and First Column
df_m.iloc[0:2,0:1]
Out[114]:
In [115]:
# Fist two rows and First two Column
df_m.iloc[0:2,0:2]
Out[115]:
In [116]:
# Fist two rows and Column at first
# index position
df_m.iloc[0:2,1:2]
Out[116]:
In [118]:
# Fist two rows and all columns
df_m.iloc[0:2,0:len(df_m.columns)]
Out[118]:
In [123]:
# All rows and all columns
df_m.iloc[0:df_m.shape[0],0:len(df_m.columns)]
Out[123]: