Complete guide to bar plot using ggplot2
Importing the ggplot2 package -
library(ggplot2)
Let’s use the built in Hair and Eye Color data set -
HairEyeColor
, , Sex = Male
Eye
Hair Brown Blue Hazel Green
Black 32 11 10 3
Brown 53 50 25 15
Red 10 10 7 7
Blond 3 30 5 8
, , Sex = Female
Eye
Hair Brown Blue Hazel Green
Black 36 9 5 2
Brown 66 34 29 14
Red 16 7 7 7
Blond 4 64 5 8
This data set is not so suitable for visualization. So we need to do some manipulation before moving on.
Let’s import some necessary packages -
library(dplyr)
The data set is then transformed into a form so that we can use it for plotting -
df <- HairEyeColor %>%
as_tibble() %>%
tidyr::uncount(n) %>%
mutate_all(as.factor)
More about uncount -
tibble(a=c(2,1,4),
b=c('one','two','three')) %>% tidyr::uncount(a)
# A tibble: 7 x 1
b
<chr>
1 one
2 one
3 two
4 three
5 three
6 three
7 three
Uncount does the opposite work of count.
Let’s see the new data frame now-
glimpse(df)
Rows: 592
Columns: 3
$ Hair <fct> Black, Black, Black, Black, Black, Black, Black, Black, Black, Bl~
$ Eye <fct> Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Brown, Br~
$ Sex <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Male,~
Now it can be used to create bar charts.
A Simple Barplot
ggplot(data = df) +
geom_bar(mapping = aes(x = Hair))
The mapping can be done inside the ggplot() function -
ggplot(data = df, mapping = aes(x=Hair))+
geom_bar(fill = "black") +
labs(title = "Hair Color",
subtitle = "592 Statistics Students",
caption = "(From R's built in HairEyeColor sample dataset)",
y = "Number of Students", x = NULL)
Horizontal Bar Chart
Using coord_flip() -
ggplot(data = df, mapping = aes(x=Hair))+
geom_bar(fill = "black") +
labs(title = "Hair Color",
subtitle = "592 Statistics Students",
caption = "(From R's built in HairEyeColor sample dataset)",
y = "Number of Students", x = NULL) +
coord_flip()
Assigning variable to the y axis -
ggplot(data = df, mapping = aes(y = Hair))+
geom_bar(fill = "black") +
labs(title = "Hair Color",
subtitle = "592 Statistics Students",
caption = "(From R's built in HairEyeColor sample dataset)",
y = "Number of Students", x = NULL)
Using Colors
fill = {the same variable as the x axis} so that for each variable different colors is shown -
ggplot(data = df)+
geom_bar(mapping = aes(x = Hair, fill = Hair))+
theme(legend.position = "none") # Don't show the legend
Using hue -
ggplot(data = df)+
geom_bar(mapping = aes(x = Hair, fill = Hair))+
theme(legend.position = "none") + # Don't show the legend
scale_fill_hue(c = 20) # Different values c gives different intensity of colors
Manually selecting colors
How to manually set colors in a bar chart?
Manually selecting colors -
ggplot(data = df)+
geom_bar(mapping = aes(x = Hair, fill = Hair),
col = "black",
fill = c("Black","beige","bisque3","red"))+
theme(legend.position = "none")
Another way to do that -
ggplot(data = df)+
geom_bar(mapping = aes(x = Hair, fill = Hair), col = "black")+
theme(legend.position = "none") +
scale_fill_manual(values = c("Black","beige","bisque3","red"))
Modifying Axis Tickmarks
ggplot(df, aes(x = Hair)) +
geom_bar() +
scale_y_continuous(breaks = seq(0, 300, by=50)) +
labs(x = "Colors", y = "Frequency",
title = "Bar Chart of Colors",
subtitle = "An observational study") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) # center the title and subtitle
Stacked Bar Chart
Using fill argument stacked bar can be made -
ggplot(data = df) +
geom_bar(mapping = aes(Hair, fill = Sex))
100% Stacked Bar Chart
Using position = “fill” inside geom_bar -
ggplot(df, aes(Hair, fill = Sex)) +
geom_bar(position = "fill") +
labs(x="Hair Color", y=NULL) +
coord_flip()
Changing Order of Bars
df$Hair <- factor(df$Hair, levels = c("Red", "Black", "Blond", "Brown"))
ggplot(df, aes(y=Hair, fill = Sex)) +
geom_bar(position = "fill") +
labs(x=NULL, y="Hair Color")
Another way to do this using scale_y_discrete()
-
ggplot(df, aes(y = Hair, fill = Sex)) +
geom_bar(position = "fill") +
labs(x=NULL, y="Hair Color") +
scale_y_discrete(limits = c("Black","Red","Brown","Blond"))
Changing Order in Legend’s Labels
Using scale_fill_discrete()
-
ggplot(df, aes(y = Hair, fill = Sex)) +
geom_bar(position = "fill") +
labs(x=NULL, y="Hair Color") +
scale_y_discrete(limits = c("Black","Red","Brown","Blond")) +
scale_fill_discrete(breaks = c("Male","Female"))
Changing Order of Stacks
In the following stacked barplot, the left bar denotes female and the right bar denotes male -
ggplot(df, aes(x = Hair, fill = Sex)) +
geom_bar(position = "dodge") +
labs(x=NULL, y="Hair Color") +
scale_x_discrete(limits = c("Black","Red","Brown","Blond"))
If we check the order of levels of Sex we’ll see -
levels(df$Sex)
[1] "Female" "Male"
Now if the order is changed, the bar will also change its order -
df %>%
mutate(Sex = factor(Sex, levels = c("Male","Female"))) %>%
ggplot(aes(x = Hair, fill = Sex)) +
geom_bar(position = "dodge") +
labs(x=NULL, y="Hair Color") +
scale_x_discrete(limits = c("Black","Red","Brown","Blond"))
This is particularly useful when showing a 100% stacked barplot -
df %>%
mutate(Hair = factor(Hair,
levels = rev(c("Black","Brown","Red","Blond")))) %>%
ggplot(aes(y = Sex, fill = Hair)) +
geom_bar(position = "fill") +
labs(x=NULL, y=NULL, fill = "Hair Colors") +
scale_fill_manual(values = c("black","#8B4513","#FF0000","#faf0be"),
limits = c("Black","Brown","Red","Blond")) +
theme_bw() + theme(legend.position = "bottom")
Changing width of the bars
Width of the bars can be changed using the width
argument from geom_bar(). It takes values from 0 to 1 -
ggplot(df, aes(Hair, fill = Sex)) +
geom_bar(position = "fill",
width = 0.5) +
labs(x="Hair Color", y=NULL) +
coord_flip()
Side by Side Bar Chart
Using dodge -
ggplot(df, aes(Hair, fill = Sex)) +
geom_bar(position = "dodge") +
labs(x="Hair Color", y=NULL)
Using dodge2 -
ggplot(df, aes(Hair, fill = Sex)) +
geom_bar(position = "dodge2") +
labs(x="Hair Color", y=NULL)
In the following case we can see that there is no Male who has the hair color red. It fills the whole bar with Female bar -
df %>%
filter(!(Sex=="Male" & Hair=="Red")) %>%
ggplot(aes(Hair, fill = Sex)) +
geom_bar(position = "dodge2") +
labs(x="Hair Color", y=NULL)
To prevent it from happening use position_dodge2(preserve = “single”) in position argument -
df %>%
filter(!(Sex=="Male" & Hair=="Red")) %>%
ggplot(aes(Hair, fill = Sex)) +
geom_bar(position = position_dodge2(preserve = "single")) +
labs(x="Hair Color", y=NULL)
preserve = “total” will fill the whole place -
df %>%
filter(!(Sex=="Male" & Hair=="Red")) %>%
ggplot(aes(Hair, fill = Sex)) +
geom_bar(position = position_dodge2(preserve = "total")) +
labs(x="Hair Color", y=NULL)
Column Chart
Column charts data looks like this - (after manipulation)
hairdf <- df %>%
filter(Sex == "Male") %>%
group_by(Hair) %>%
summarize(frequency = n())
hairdf
# A tibble: 4 x 2
Hair frequency
<fct> <int>
1 Red 34
2 Black 56
3 Blond 46
4 Brown 143
This types of data frame can be graphed in column chart using the function geom_col(), not geom_bar(), here is the difference -
hairdf %>%
ggplot()+
geom_col(mapping = aes(x=Hair, y=frequency),
fill = c("Black","beige","bisque3","coral2")) +
labs(title="Hair Color in Column Chart")
This kind of data can also be graphed by defining stat = "identity"
in the geom_bar()
function -
hairdf %>%
ggplot() +
geom_bar(aes(x = Hair, y = frequency),
stat = "identity")
Putting frequencies on each bars
hairdf %>%
ggplot(aes(x = Hair, y = frequency)) +
geom_col() +
scale_y_continuous(breaks = seq(0, 150, by=30)) +
labs(x = "Colors", y = "Frequency",
title = "Bar Chart of Colors",
subtitle = "An observational study") +
geom_text(aes(label= frequency),
vjust=1.2, size=3,
col = "white")
To know more about ggplot2 visit here
To know more about colors visit here