Complete guide to bar plot using ggplot2


Importing the ggplot2 package -

library(ggplot2)

Let’s use the built in Hair and Eye Color data set -

HairEyeColor
, , Sex = Male

       Eye
Hair    Brown Blue Hazel Green
  Black    32   11    10     3
  Brown    53   50    25    15
  Red      10   10     7     7
  Blond     3   30     5     8

, , Sex = Female

       Eye
Hair    Brown Blue Hazel Green
  Black    36    9     5     2
  Brown    66   34    29    14
  Red      16    7     7     7
  Blond     4   64     5     8

This data set is not so suitable for visualization. So we need to do some manipulation before moving on.

Let’s import some necessary packages -

library(dplyr)

The data set is then transformed into a form so that we can use it for plotting -

df <- HairEyeColor %>%        
  as_tibble() %>%             
  tidyr::uncount(n) %>%              
  mutate_all(as.factor)

More about uncount -

tibble(a=c(2,1,4),
       b=c('one','two','three')) %>% tidyr::uncount(a)
# A tibble: 7 x 1
  b    
  <chr>
1 one  
2 one  
3 two  
4 three
5 three
6 three
7 three

Uncount does the opposite work of count.

Let’s see the new data frame now-

glimpse(df)
Rows: 592
Columns: 3
$ Hair <fct> Black, Black, Black, Black, Black, Black, Black, Black, Black, Bl~
$ Eye  <fct> Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Br~
$ Sex  <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Male,~

Now it can be used to create bar charts.


A Simple Barplot

ggplot(data = df) +
  geom_bar(mapping = aes(x = Hair))

The mapping can be done inside the ggplot() function -

ggplot(data = df, mapping = aes(x=Hair))+
  geom_bar(fill = "black") +  
  labs(title = "Hair Color", 
       subtitle = "592 Statistics Students",
       caption = "(From R's built in HairEyeColor sample dataset)",
       y = "Number of Students", x = NULL)

Horizontal Bar Chart

Using coord_flip() -

ggplot(data = df, mapping = aes(x=Hair))+
  geom_bar(fill = "black") +  
  labs(title = "Hair Color", 
       subtitle = "592 Statistics Students",
       caption = "(From R's built in HairEyeColor sample dataset)",
       y = "Number of Students", x = NULL) +
  coord_flip()

Assigning variable to the y axis -

ggplot(data = df, mapping = aes(y = Hair))+
  geom_bar(fill = "black") +  
  labs(title = "Hair Color", 
       subtitle = "592 Statistics Students",
       caption = "(From R's built in HairEyeColor sample dataset)",
       y = "Number of Students", x = NULL)

Using Colors

fill = {the same variable as the x axis} so that for each variable different colors is shown -

ggplot(data = df)+
  geom_bar(mapping = aes(x = Hair, fill = Hair))+
  theme(legend.position = "none")  # Don't show the legend

Using hue -

ggplot(data = df)+
  geom_bar(mapping = aes(x = Hair, fill = Hair))+
  theme(legend.position = "none") +  # Don't show the legend
  scale_fill_hue(c = 20) # Different values c gives different intensity of colors

Manually selecting colors

How to manually set colors in a bar chart?
Manually selecting colors -

ggplot(data = df)+
  geom_bar(mapping = aes(x = Hair, fill = Hair), 
           col = "black",
           fill = c("Black","beige","bisque3","red"))+
  theme(legend.position = "none")

Another way to do that -

ggplot(data = df)+
  geom_bar(mapping = aes(x = Hair, fill = Hair), col = "black")+
  theme(legend.position = "none") +
  scale_fill_manual(values = c("Black","beige","bisque3","red"))

Modifying Axis Tickmarks

ggplot(df, aes(x = Hair)) +
  geom_bar() +
  scale_y_continuous(breaks = seq(0, 300, by=50)) +
  labs(x = "Colors", y = "Frequency",
       title = "Bar Chart of Colors",
       subtitle = "An observational study") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) # center the title and subtitle

Stacked Bar Chart

Using fill argument stacked bar can be made -

ggplot(data = df) + 
  geom_bar(mapping = aes(Hair, fill = Sex))

100% Stacked Bar Chart

Using position = “fill” inside geom_bar -

ggplot(df, aes(Hair, fill = Sex)) + 
  geom_bar(position = "fill") +
  labs(x="Hair Color", y=NULL) +
  coord_flip()

Changing Order of Bars

df$Hair <- factor(df$Hair, levels = c("Red", "Black", "Blond", "Brown"))
ggplot(df, aes(y=Hair, fill = Sex)) + 
  geom_bar(position = "fill") +
  labs(x=NULL, y="Hair Color") 

Another way to do this using scale_y_discrete()-

ggplot(df, aes(y = Hair, fill = Sex)) + 
  geom_bar(position = "fill") +
  labs(x=NULL, y="Hair Color") +
  scale_y_discrete(limits = c("Black","Red","Brown","Blond"))

Changing Order in Legend’s Labels

Using scale_fill_discrete() -

ggplot(df, aes(y = Hair, fill = Sex)) + 
  geom_bar(position = "fill") +
  labs(x=NULL, y="Hair Color") +
  scale_y_discrete(limits = c("Black","Red","Brown","Blond")) +
  scale_fill_discrete(breaks = c("Male","Female"))

Changing Order of Stacks

In the following stacked barplot, the left bar denotes female and the right bar denotes male -

ggplot(df, aes(x = Hair, fill = Sex)) + 
  geom_bar(position = "dodge") +
  labs(x=NULL, y="Hair Color") +
  scale_x_discrete(limits = c("Black","Red","Brown","Blond"))

If we check the order of levels of Sex we’ll see -

levels(df$Sex)
[1] "Female" "Male"  

Now if the order is changed, the bar will also change its order -

df %>% 
  mutate(Sex = factor(Sex, levels = c("Male","Female"))) %>% 
  ggplot(aes(x = Hair, fill = Sex)) + 
  geom_bar(position = "dodge") +
  labs(x=NULL, y="Hair Color") +
  scale_x_discrete(limits = c("Black","Red","Brown","Blond"))

This is particularly useful when showing a 100% stacked barplot -

df %>%
  mutate(Hair = factor(Hair,
                       levels = rev(c("Black","Brown","Red","Blond")))) %>%
  ggplot(aes(y = Sex, fill = Hair)) + 
  geom_bar(position = "fill") +
  labs(x=NULL, y=NULL, fill = "Hair Colors") +
  scale_fill_manual(values = c("black","#8B4513","#FF0000","#faf0be"),
                      limits = c("Black","Brown","Red","Blond")) +
  theme_bw() + theme(legend.position = "bottom")

Changing width of the bars

Width of the bars can be changed using the width argument from geom_bar(). It takes values from 0 to 1 -

ggplot(df, aes(Hair, fill = Sex)) + 
  geom_bar(position = "fill", 
           width = 0.5) +
  labs(x="Hair Color", y=NULL) +
  coord_flip()

Side by Side Bar Chart

Using dodge -

ggplot(df, aes(Hair, fill = Sex)) + 
  geom_bar(position = "dodge") +
  labs(x="Hair Color", y=NULL)

Using dodge2 -

ggplot(df, aes(Hair, fill = Sex)) + 
  geom_bar(position = "dodge2") +
  labs(x="Hair Color", y=NULL)

In the following case we can see that there is no Male who has the hair color red. It fills the whole bar with Female bar -

df %>% 
  filter(!(Sex=="Male" & Hair=="Red")) %>% 
  ggplot(aes(Hair, fill = Sex)) + 
  geom_bar(position = "dodge2") +
  labs(x="Hair Color", y=NULL)

To prevent it from happening use position_dodge2(preserve = “single”) in position argument -

df %>% 
  filter(!(Sex=="Male" & Hair=="Red")) %>% 
  ggplot(aes(Hair, fill = Sex)) + 
  geom_bar(position = position_dodge2(preserve = "single")) +
  labs(x="Hair Color", y=NULL)

preserve = “total” will fill the whole place -

df %>% 
  filter(!(Sex=="Male" & Hair=="Red")) %>% 
  ggplot(aes(Hair, fill = Sex)) + 
  geom_bar(position = position_dodge2(preserve = "total")) +
  labs(x="Hair Color", y=NULL)

Column Chart

Column charts data looks like this - (after manipulation)

hairdf <- df %>% 
  filter(Sex == "Male") %>% 
  group_by(Hair) %>% 
  summarize(frequency = n()) 
hairdf
# A tibble: 4 x 2
  Hair  frequency
  <fct>     <int>
1 Red          34
2 Black        56
3 Blond        46
4 Brown       143

This types of data frame can be graphed in column chart using the function geom_col(), not geom_bar(), here is the difference -

hairdf %>% 
  ggplot()+ 
  geom_col(mapping = aes(x=Hair, y=frequency),
           fill = c("Black","beige","bisque3","coral2")) +
  labs(title="Hair Color in Column Chart")

This kind of data can also be graphed by defining stat = "identity" in the geom_bar() function -

hairdf %>% 
  ggplot() +
  geom_bar(aes(x = Hair, y = frequency), 
           stat = "identity")

Putting frequencies on each bars

hairdf %>% 
  ggplot(aes(x = Hair, y = frequency)) +
  geom_col() +
  scale_y_continuous(breaks = seq(0, 150, by=30)) +
  labs(x = "Colors", y = "Frequency",
       title = "Bar Chart of Colors",
       subtitle = "An observational study") +
  geom_text(aes(label= frequency), 
            vjust=1.2, size=3,
            col = "white")

To know more about ggplot2 visit here

To know more about colors visit here

Md Ahsanul Islam
Md Ahsanul Islam
Freelance Data Analysis and R Programmer

Statistics graduate student currently researching on econometrics