Machine Learning Made Easy: Blog 34- Pie Chart using plotly in R

Pie Chart using plotly library

Introduction

In this blog, we will look at how to create a simple pie-chart using plotly library

Installing libraries

Lets install plotly and other libraries used to create the plot

package.name<-c("dplyr","tidyr","Ecdat","plotly")

for(i in package.name){

  if(!require(i,character.only = T)){

    install.packages(i)
  }
  library(i,character.only = T)

}


# Ecdat package has the 'Health Insurance and Hours Worked By Wives' data
data(HI)
df<-HI
head(df)

  whrswk hhi whi hhi2  education  race hispanic experience kidslt6 kids618
1      0  no  no   no 13-15years white       no       13.0       2       1
2     50  no yes   no 13-15years white       no       24.0       0       1
3     40 yes  no  yes    12years white       no       43.0       0       0
4     40  no yes  yes 13-15years white       no       17.0       0       1
5      0 yes  no  yes  9-11years white       no       44.5       0       0
6     40 yes yes  yes    12years white       no       32.0       0       0
   husby       region   wght
1 11.960 northcentral 214986
2  1.200 northcentral 210119
3 31.275 northcentral 219955
4  9.000 northcentral 210317
5  0.000 northcentral 219955
6 15.690 northcentral 208148

Step 1:Frequency Profile of the variables

Lets look at the count of records for different levels of categorical variables

interim.df<-df%>%
  select(hhi,whi,hhi2,education,race,hispanic,kidslt6,kids618,region)
  
l1<-lapply(colnames(interim.df),function(x){

  z<-interim.df%>%
    select(x)%>%
    mutate(Feature=x)
  
  colnames(z)<-c("Level","Feature")
  
  z1<-z%>%
    group_by(Feature,Level)%>%
    summarise(Total=n())
  
  z1["Level"]<-sapply(z1["Level"],as.character)
  
  return(z1)
})

df.final<-do.call(rbind.data.frame,l1)%>%
  as.data.frame()
row.names(df.final)<-NULL
head(df.final)

  Feature Level Total
1     hhi    no 11219
2     hhi   yes 11053
3     whi    no 13961
4     whi   yes  8311
5    hhi2    no  8696
6    hhi2   yes 13576

Step 2:Dataset for ‘education’ variable

df.interim<-df.final%>%
  filter(Feature=="education")%>%
  select(-Feature)%>%
  mutate(Per_contr=round(100*Total/sum(Total),2))

df.interim

       Level Total Per_contr
1    <9years  1122      5.04
2  9-11years  1771      7.95
3    12years  8677     38.96
4 13-15years  5790     26.00
5    16years  3472     15.59
6   >16years  1440      6.47

Step 3:Initialising the plotly object

# For font
t = list(family = "Georgia",color = 'black')

pie_chrt <- df.interim %>% plot_ly(labels = df.interim$Level, values = df.interim$Per_contr, type = 'pie',
                   
                   textposition = 'inside',
                   text = ~paste(df.interim$Level),
                   textinfo = 'text',
                   insidetextfont = list(color = '#FFFFFF'),
                   hovertemplate = paste(df.interim$Per_contr,"%",'<extra></extra>'),
                   
                   marker = list(colors = colors,
                                 line = list(color = '#FFFFFF', width = 1)),
                   #The 'pull' attribute can also be used to create space between the sectors
                   showlegend = TRUE,width = 500)

pie_chrt <- pie_chrt %>% layout(
      # title = 'Composition of Optimal Sequences',
      xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
      yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
      font=t,
      margin = list(l = 50, r = 50, b = 50, t = 50, pad = 4))

pie_chrt

Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.