The purpose of this document is to provide additional options for visualiziation of data. Questions about code can be directed to Alicia Vallorani (
When you have a large number of data points, you can add jitter to the points (i.e., slightly vary the location of the points) to handle this overplotting issue.
# Scatter plot with jitter
ggplot(sat.act, aes(age, ACT, color=education)) +
geom_point(position="jitter") +
ylab("Score") +
xlab("Age") +
labs(color="Education level") +
ggtitle("ACT scores by age and education level") +
# Scatter plot with jitter short-cut
ggplot(sat.act, aes(age, ACT, color=education)) +
geom_jitter() +
ylab("Score") +
xlab("Age") +
labs(color="Education level") +
ggtitle("ACT scores by age and education level") +
# Bar graph with additional element stacked
ggplot(sat.act, aes(education, ACT, fill=sex)) +
geom_bar(stat="summary", fun.y="mean") +
scale_fill_grey() +
ylab("Average score") +
xlab("Education level") +
labs(fill="Sex") +
ggtitle("Average ACT scores at each education level by sex") +
scale_y_continuous(expand=c(0,0)) +
# Bar graph with additional element dodged
ggplot(sat.act, aes(education, ACT, fill=sex)) +
geom_bar(stat="summary", fun.y="mean", position="dodge") +
scale_fill_grey() +
ylab("Average score") +
xlab("Education level") +
labs(fill="Sex") +
ggtitle("Average ACT scores at each education level by sex") +
scale_y_continuous(expand=c(0,0)) +
Instead of plotting the bars side-by-side, we can use facet_wrap() to create separate graphs for each sex. It’s also possible to set the limits on the axes manually, so the bars don’t run into the labels.
# Bar graph with additional element stacked
ggplot(sat.act, aes(education, ACT, fill=sex)) +
geom_bar(stat="summary", fun.y="mean") +
facet_wrap(~sex) +
scale_fill_grey() +
ylab("Average score") +
xlab("Education level") +
labs(fill="Sex") +
ggtitle("Average ACT scores at each education level by sex") +
scale_y_continuous(expand=c(0,0), limits=c(0,36)) +
ggplot(sat.act, aes(x=as.factor(education), y=SATV, fill = sex)) +
geom_boxplot() +
geom_jitter(position=position_jitterdodge(), alpha = .2) +
labs(fill = "Sex",
x = "Education")
vcd::mosaic(~ sex+education, data = sat.act, shade = TRUE, legend = TRUE)