Chapter 12 Recreating Figures from Front. Genetics (2021)
This chapter refers to this paper: Seung Hyun Min, Jiawei Zhou. smplot: An R Package for Easy and Elegant Data Visualization (2021). Frontiers in Genetics, 12:802894.
Load these packages by typing the codes below.
library(tidyverse) # it has ggplot2 package
library(cowplot) # it allows you to save figures in .png file
library(smplot)
12.1 Figure 1 - Correlation Plot
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
corr1 geom_point(shape = 21, fill = sm_color('green'), color = 'white',
size = 3) + # ggplot2 default
ggtitle('Correlation plot without smplot')
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
text1 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text',
label = 'Base plot only using \nthe defaults of ggplot2',
x = 3.9, y = 25)
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
corr2 geom_point(shape = 21, fill = sm_color('green'), color = 'white',
size = 3) +
sm_corr_theme() + # smplot correlation theme
ggtitle('Correlation plot with smplot')
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
text2 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Base plot + sm_corr_theme()',
x = 3.9, y = 29, fontface = 2) +
annotate('text', x = 3.9, y = 23, parse = TRUE,
label = 'bold(sm_corr_theme())~\nprovides~\na~theme~with') +
annotate('text', x = 3.9, y = 20,
label = 'minimalistic background\nlarger font\ncentered title')
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
corr3 geom_point(shape = 21, fill = sm_color('green'), color = 'white',
size = 3) +
sm_corr_theme() + # smplot corr theme + p and r values
sm_statCorr(color = sm_color('green'), corr_method = 'spearman') +
ggtitle('Statistics computed with smplot')
<- ggplot(data = mtcars, mapping = aes(x = drat, y = mpg)) +
text3 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Base plot + sm_corr_theme() +\nsm_statCorr()',
x = 3.9, y = 30, fontface = 2) +
annotate('text', x = 3.9, y = 24, parse = TRUE,
label = 'bold(sm_statCorr())~from~smplot~computes') +
annotate('text', x = 3.9, y = 22.6,
label = 'R and p values from a correlation test.') +
annotate('text', x = 3.9, y = 18,
label = '\nIt also prints the best-fit \nlinear regression line \nbased on the R value.')
<- plot_grid(corr1, text1, corr2, text2,
corr_all
corr3, text3,labels = c("A", "", "B", "", "C", ""),
label_size = 12,
ncol = 2, nrow = 3,
scale = 0.95)
print(corr_all)
save_plot("corr_plots.png",
ncol = 2, nrow = 3, base_asp = .95,
corr_all, dpi = 600)
12.2 Figure 2 - Bar Plot
set.seed(1) # generate random data
= rnorm(20,0,1)
day1 = rnorm(20,5,1)
day2 <- rep(paste0('S',seq(1:20)), 2)
Subject <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Data <- rep(c('Day 1', 'Day 2'), each = length(day1))
Day <- cbind(Subject, Data, Day)
df
<- ggplot(data = df, mapping = aes(x = Day, y = Value, fill = Day)) +
bar1 stat_summary(fun = mean, geom = 'bar') +
theme(legend.position = 'none') +
ggtitle('Bar plot without smplot')
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text1 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Bar~plot~using~bold(stat_summary())~from~ggplot2',
x = 1.5, y = 3, parse = TRUE)
<- ggplot(data = df, mapping = aes(x = Day, y = Value, fill = Day)) +
bar2 sm_bar(shape = 21, color = 'white', bar_fill_color = 'gray80') +
scale_fill_manual(values = sm_color('blue','orange')) +
ggtitle('Bar plot using smplot')
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text2 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Bar~plot~using~bold(sm_bar())',
x = 1.5, y = 4.5, parse = TRUE) +
annotate('text', label = 'Individual points,\nlarger font,\nminimalistic theme\ncentered title\nnarrower bar width\nerror bar',
x = 1.5, y = 1.5)
<- plot_grid(bar1, text1, bar2, text2,
bar_all labels = c("A", "", "B", ""),
label_size = 12,
ncol = 2, nrow = 2,
scale = 0.95)
print(bar_all)
save_plot("bars.png",
ncol = 2, nrow = 2, base_asp = .95,
bar_all, dpi = 600)
12.3 Figure 3 - Boxplot
<- read_csv('https://www.smin95.com/data.csv')
df
$Day <- factor(df$Day, levels = c('One','Two','Three'))
df
<- ggplot(data = df, mapping = aes(x = Day, y = Value, color = Day)) +
boxplot1 geom_boxplot() +
theme(legend.position = 'none') +
ggtitle('Boxplot without smplot')
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text1 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Boxplot~using~bold(geom_boxplot())\n~from~ggplot2',
x = 2, y = 8, parse = TRUE)
<- ggplot(data = df, mapping = aes(x = Day, y = Value, color = Day)) +
boxplot2 sm_boxplot(shape = 16, alpha = 0.4) +
scale_color_manual(values = sm_palette(3)) +
ggtitle('Boxplot with smplot') # smplot's default
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text2 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Boxplot~using~bold(sm_boxplot())',
x = 2, y = 11, parse = TRUE) +
annotate('text', label = 'Individual points,\nlarger font,\nminimalistic theme\ncentered title',
x = 2, y = 5)
<- plot_grid(boxplot1, text1, boxplot2, text2,
boxplot_all labels = c("A", "", "B", ""),
label_size = 12,
ncol = 2, nrow = 2,
scale = 0.95)
print(boxplot_all)
save_plot("boxplots.png",
ncol = 2, nrow = 2, base_asp = .95,
boxplot_all, dpi = 600)
12.4 Figure 4 - Violin Plot
<- ggplot(data = df, mapping = aes(x = Day, y = Value, color = Day)) +
violin1 geom_violin() +
theme(legend.position = 'none') +
ggtitle('Violin plot without smplot') ## ggplot2 default
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text1 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Violin~plot~using~bold(geom_violin())\n~from~ggplot2',
x = 2, y = 8, parse = TRUE)
<- ggplot(data = df, mapping = aes(x = Day, y = Value, color = Day)) +
violin2 sm_violin() +
scale_color_manual(values = sm_palette(3)) +
ggtitle('Violin plot with smplot') # smplot default
<- ggplot(data = df, mapping = aes(x = Day, y = Value)) +
text2 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'Violin~plot~using~bold(sm_violin())\nfrom~smplot',
x = 2, y = 12, parse = TRUE) +
annotate('text', label = 'Individual points,\nlarger font,\nminimalistic theme\ncentered title\nerror bars',
x = 2, y = 4)
<- plot_grid(violin1, text1, violin2, text2,
violin_all labels = c("A", "", "B", ""),
label_size = 12,
ncol = 2, nrow = 2,
scale = 0.95)
print(violin_all)
save_plot("violins.png",
ncol = 2, nrow = 2, base_asp = .95,
violin_all, dpi = 600)
12.5 Figure 5 - Slope Chart
set.seed(1) # generate random data
= rnorm(20,0,1)
day1 = rnorm(20,5,1)
day2 <- rep(paste0('S',seq(1:20)), 2)
Subject <- data.frame(Value = matrix(c(day1,day2),ncol=1))
Data <- rep(c('Day 1', 'Day 2'), each = length(day1))
Day <- cbind(Subject, Data, Day)
df1
ggplot(data = df1, mapping = aes(x = Day, y = Value,
group = Subject, fill = Day)) +
sm_slope(labels = c('Day 1', 'Day 2'),
shape = 21, color = 'white', line_alpha = 0.3) +
scale_fill_manual(values = sm_color('blue','orange')) +
ggtitle('A slope chart with 2 x-levels') +
theme(plot.title = element_text(face="bold")) -> slope_n2
set.seed(2) # generate random data
= rnorm(20,0,1)
day1 = rnorm(20,5,1)
day2 = rnorm(20,6,1.5)
day3 = rnorm(20,7,2)
day4 <- rep(paste0('S',seq(1:20)), 4)
Subject <- data.frame(Value = matrix(c(day1,day2,day3,day4),ncol=1))
Data <- rep(c('Day 1', 'Day 2', 'Day 3', 'Day 4'), each = length(day1))
Day <- cbind(Subject, Data, Day)
df2
ggplot(data = df2, mapping = aes(x = Day, y = Value,
group = Subject, fill = Day)) +
sm_slope(labels = c('Day 1', 'Day 2', 'Day 3', 'Day 4'),
shape = 21, color = 'white', line_alpha = 0.3) +
scale_fill_manual(values = sm_palette(4)) +
ggtitle('sm_slope() with 4 x-levels') +
theme(plot.title = element_text(face="bold")) -> slope_n4
<- plot_grid(slope_n2, slope_n4,
slopes labels = c("A", "B"), rel_widths = c(1.5,2),
label_size = 12,
ncol = 2, nrow = 1,
scale = 0.95)
print(slopes)
save_plot("slopes.png",
ncol = 2, nrow = 1, base_asp = .95,
slopes, dpi = 600)
12.6 Figure 6 - Raincloud Plot
Due to the recent update of smplot (Feb. 21st, 2022), paired lines are no longer shown in this figure.
<- sm_raincloud(data = df1, x = Day, y = Value,
raincloud1 boxplot_alpha = 0.5, color = 'white', shape = 21,
sep_level = 0) +
scale_x_continuous(limits = c(0.15,2.85),
labels = c('1', '2'),
breaks = c(1,2)) +
scale_color_manual(values = rep('transparent',2)) +
scale_fill_manual(values = sm_palette(2)) +
ggtitle('A raincloud plot with 2 x-levels') +
xlab('Day') +
theme(plot.title = element_text(face="bold"))
<- ggplot(data = df1, mapping = aes(x = Day, y = Value)) +
text1 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'sep_level = 0\nwhich_side = "right"',
x = 1.5, y = 5, fontface = 2) +
annotate('text', x = 1.5, y = 2.5,
label = 'Separation is minimum amongst\npoints, violin plot and boxplot.\nThis can be adjusted with') +
annotate('text', x = 1.5, y = 1.6, label = '\n sep_level (0-4).', fontface = 2) +
annotate('text', x = 1.5, y = -0.5,
label = 'The violin plots face to the right.\nThis can be be changed with') +
annotate('text', x = 1.5, y = -1.1, label = '\n which_side ("right", "left", "mixed").', fontface = 2)
<- sm_raincloud(data = df1, x = Day, y = Value,
raincloud2 boxplot_alpha = 0.5, color = 'white', shape = 21,
sep_level = 2, which_side = 'mixed') +
scale_x_continuous(limits = c(0.15,2.85),
labels = c('1', '2'),
breaks = c(1,2)) +
scale_color_manual(values = rep('transparent',2)) +
scale_fill_manual(values = sm_palette(2)) +
ggtitle('sm_raincloud() with 2 x-levels') +
xlab('Day') +
theme(plot.title = element_text(face="bold"))
<- ggplot(data = df1, mapping = aes(x = Day, y = Value)) +
text2 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'sep_level = 2\nwhich_side = "mixed"',
x = 1.5, y = 4.7, fontface = 2) +
annotate('text', x = 1.5, y = 2.8,
label = 'Default sep_level is set at 2\n but this can be changed (0-4).') +
annotate('text', x = 1.5, y = 1,
label = 'The violin plots face both to the\nleft (x=1) and right (x=2).') +
annotate('text', x = 1.5, y = -0.6, label = '\nwhich_side = "mixed" only works when\nthere are 2 x discrete levels.', fontface = 2)
<- sm_raincloud(data = df1, x = Day, y = Value,
raincloud3 boxplot_alpha = 0.5, color = 'white', shape = 21,
sep_level = 4, which_side = 'left') +
scale_x_continuous(limits = c(0.15,2.85),
labels = c('1', '2'),
breaks = c(1,2)) +
scale_color_manual(values = rep('transparent',2)) +
scale_fill_manual(values = sm_palette(2)) +
ggtitle('A raincloud with no overlap') +
xlab('Day') +
theme(plot.title = element_text(face="bold"))
<- ggplot(data = df1, mapping = aes(x = Day, y = Value)) +
text3 geom_point(color = 'white', fill = 'white') + theme_nothing() +
annotate('text', label = 'sep_level = 4\nwhich_side = "left"',
x = 1.5, y = 3.5, fontface = 2) +
annotate('text', x = 1.5, y = 1.5,
label = 'The violin plots face to the left.')
<- plot_grid(raincloud1, text1,
raincloud_2x_all
raincloud2, text2,
raincloud3, text3, labels = c("A", "", "B", "", "C", ""),
label_size = 12,
ncol = 2, nrow = 3,
scale = 0.95)
<- sm_raincloud(data = df2, x = Day, y = Value,
raincloud4 boxplot_alpha = 0.5, color = 'white', shape = 21,
sep_level = 2) +
scale_x_continuous(limits = c(0.25,4.75),
labels = c('1', '2', '3', '4'),
breaks = c(1,2,3,4)) +
scale_color_manual(values = rep('transparent',4)) +
scale_fill_manual(values = sm_palette(4)) +
ggtitle('Four discrete levels of x, sep_level = 2, which_side = "right"') +
xlab('Day') +
theme(plot.title = element_text(face="bold"))
<- plot_grid(raincloud_2x_all,
raincloud_all
raincloud4, labels = c("","D"),
label_size = 12,
ncol = 1, nrow = 2,
scale = 0.95,
rel_heights = c(7.5,2.5))
print(raincloud_all)
save_plot("rainclouds.png",
ncol = 2, nrow = 4, base_asp = .95,
raincloud_all, dpi = 600)
12.7 Figure 7 - Case Study
set.seed(11) # generate random data
= c(rnorm(19,0,1),2.5)
method1 = c(rnorm(19,0,1),2.5)
method2 <- rep(paste0('S',seq(1:20)), 2)
Subject <- data.frame(Value = matrix(c(method1,method2),ncol=1))
Data <- rep(c('Method 1', 'Method 2'), each = length(method1))
Method <- cbind(Subject, Data, Method)
df
# slope chart
ggplot(data = df, mapping = aes(x = Method, y = Value,
group = Subject, fill = Method)) +
sm_slope(labels = c('Method 1', 'Method 2'),
shape = 21, color = 'white', line_alpha = 0.3) +
scale_fill_manual(values = sm_color('blue','orange')) +
ggtitle('Slope chart') +
theme(plot.title = element_text(face="bold")) +
ylab('Gene expression level') +
annotate('text', x = 1.5, y = 2, label = 'sm_slope()') -> slope
# correlation plot
<- data.frame(first = method1, second = method2)
df3
<- ggplot(data = df3, mapping = aes(x = first, y = second)) +
corr_plot geom_point(shape = 21, fill = sm_color('blue'), color = 'white',
size = 3) + sm_corr_theme(borders = FALSE) +
scale_y_continuous(limits = c(-2.5,2.5)) +
scale_x_continuous(limits = c(-2.5,2.5)) +
sm_statCorr(color = sm_color('blue'), corr_method = 'pearson',
label_x = -2.2, label_y = 2.3) +
ggtitle('Correlation plot') +
xlab('Method 1') + ylab('Method 2') +
annotate('text', x = 0, y = -2.3, label = 'sm_corr_theme() + sm_statCorr()')
# BA plot
<- sm_statBlandAlt(df3$first,df3$second)
res
<- sm_bland_altman(df3$first, df3$second, shape = 21,
ba_plot color = 'white',
fill = sm_color('blue')) +
scale_y_continuous(limits = c(-5,5)) +
scale_x_continuous(limits = c(-2,5)) +
annotate('text', label = 'Mean', x = 4.3, y = res$mean_diff + 0.4) +
annotate('text', label = signif(res$mean_diff,3), x = 4.3, y = res$mean_diff - 0.4) +
annotate('text', label = 'Upper limit', x = 4.3, y = res$upper_limit + 0.4) +
annotate('text', label = signif(res$upper_limit,3), x = 4.3, y = res$upper_limit - 0.4) +
annotate('text', label = 'Lower limit', x = 4.3, y = res$lower_limit + 0.4) +
annotate('text', label = signif(res$lower_limit,3), x = 4.3, y = res$lower_limit - 0.4) +
ggtitle('Bland-Altman plot') +
annotate('text', x = 1.5, y = -4, label = 'sm_statBlandAlt() + \nsm_bland_altman()')
<- plot_grid(slope, corr_plot, ba_plot,
case_study labels = c("A", "B", "C"),
hjust = -4, # horizontal position of ABC labels
label_size = 12,
ncol = 3, nrow = 1,
scale = 0.95)
print(case_study)
save_plot("case_study.png",
ncol = 3, nrow = 1, base_asp = .95,
case_study, dpi = 600)