I am attempting to plot something like the following where x axis is InEx and ReA SFMC but they are two categories of the variable called "Group", y axis is variable called "n" (frequency) and fill is variable called "CTaa_beta".
My data is as follows:
a<- structure(list(CTaa_beta = c("CASSVAGPNTEAFF", "CASSEGTSGGASTQYF",
"CASSLRQGPSYEQYF", "CASSVGNRGGTDTQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSLEGRERYEQFF", "CASSLLAGGNNEQFF", "CASTPAVRDGNYEQYF",
"CASSSGGLDEQYF", "CASSAGLAGGYEQYF", "CASSSPGTTNEKLFF", "CATSIGGPPYEQYF",
"CASSLSGSPAYGYTF", "CASSEGHRGGTDTQYF", "CASSLRDSHYEQYF", "CASSLGGSSYEQYF",
"CASSYPTSGANVLTF", "CASSRQAEAFF", "CASSLLQGPSSPLHF", "CASSLRDGHYGYTF",
"CASSQWMYSPNGYTF", "CASSQYRGTEAFF", "CAWSRPLGYTF", "CSASFGDGGEGETQYF",
"CSARVPTSGDYNEQFF", "CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CASSLVGAGANVLTF",
"CSVEDPSSGSYEQYF", "CASSGTGTGASGNEQFF", "CASSVGLFSTDTQYF", "CASSPLQGPSQPQHF",
"CASSFGTENTGELFF", "CASSEVGGSMETQYF", "CATSGRGDEVGELFF", "CASSSHYRGGTDTQYF",
"CASSPDRTGEANNEQFF", "CASSVGLYSTDTQYF", "CASGLVQQGGTEAFF", "CASGPGLQQTYGYTF",
"CASGGTGPYNEQFF", "CASSTDISSYNEQFF", "CASSLAKAGTGGEKLFF", "CASSQAKGGGETQYF",
"CASSLRGGPYNEQFF", "CASSLLPGLAGAGNEQFF", "CSARAGGWGTDTQYF", "CASSPGSSGSETQYF"
), Group = structure(c(4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
7L, 7L, 7L, 4L, 4L, 4L, 7L, 7L, 7L, 4L, 7L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L), levels = c("HC PBMC", "axSpA PBMC",
"axSpA SFMC", "InEx", "PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"),
n = c(303L, 292L, 200L, 163L, 125L, 99L, 96L, 94L, 89L, 85L,
80L, 67L, 66L, 62L, 59L, 57L, 54L, 53L, 53L, 52L, 52L, 51L,
50L, 49L, 48L, 48L, 48L, 46L, 45L, 44L, 43L, 41L, 41L, 40L,
36L, 35L, 35L, 34L, 33L, 32L, 32L, 31L, 31L, 30L, 29L, 29L,
29L, 28L, 27L, 27L)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
What I am hoping to get:
My codes for plotting:
ggplot(data = a,
aes(axis1 = Group=="InEx", axis2 = Group=="ReA SFMC", y = n)) +
geom_alluvium(aes(fill = CTaa_beta)) +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")
Output of my code:
Essentially, I am hoping to see which CTaa_beta are shared between groups InEx and ReA SFMC and but how much (which would be indicated by thickness of the alluvial plot). Any help would be great!
I am attempting to plot something like the following where x axis is InEx and ReA SFMC but they are two categories of the variable called "Group", y axis is variable called "n" (frequency) and fill is variable called "CTaa_beta".
My data is as follows:
a<- structure(list(CTaa_beta = c("CASSVAGPNTEAFF", "CASSEGTSGGASTQYF",
"CASSLRQGPSYEQYF", "CASSVGNRGGTDTQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSLEGRERYEQFF", "CASSLLAGGNNEQFF", "CASTPAVRDGNYEQYF",
"CASSSGGLDEQYF", "CASSAGLAGGYEQYF", "CASSSPGTTNEKLFF", "CATSIGGPPYEQYF",
"CASSLSGSPAYGYTF", "CASSEGHRGGTDTQYF", "CASSLRDSHYEQYF", "CASSLGGSSYEQYF",
"CASSYPTSGANVLTF", "CASSRQAEAFF", "CASSLLQGPSSPLHF", "CASSLRDGHYGYTF",
"CASSQWMYSPNGYTF", "CASSQYRGTEAFF", "CAWSRPLGYTF", "CSASFGDGGEGETQYF",
"CSARVPTSGDYNEQFF", "CASRPEQGGPYEQYF", "CSARGGKENSPLHF", "CASSLVGAGANVLTF",
"CSVEDPSSGSYEQYF", "CASSGTGTGASGNEQFF", "CASSVGLFSTDTQYF", "CASSPLQGPSQPQHF",
"CASSFGTENTGELFF", "CASSEVGGSMETQYF", "CATSGRGDEVGELFF", "CASSSHYRGGTDTQYF",
"CASSPDRTGEANNEQFF", "CASSVGLYSTDTQYF", "CASGLVQQGGTEAFF", "CASGPGLQQTYGYTF",
"CASGGTGPYNEQFF", "CASSTDISSYNEQFF", "CASSLAKAGTGGEKLFF", "CASSQAKGGGETQYF",
"CASSLRGGPYNEQFF", "CASSLLPGLAGAGNEQFF", "CSARAGGWGTDTQYF", "CASSPGSSGSETQYF"
), Group = structure(c(4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
7L, 7L, 7L, 4L, 4L, 4L, 7L, 7L, 7L, 4L, 7L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L), levels = c("HC PBMC", "axSpA PBMC",
"axSpA SFMC", "InEx", "PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), class = "factor"),
n = c(303L, 292L, 200L, 163L, 125L, 99L, 96L, 94L, 89L, 85L,
80L, 67L, 66L, 62L, 59L, 57L, 54L, 53L, 53L, 52L, 52L, 51L,
50L, 49L, 48L, 48L, 48L, 46L, 45L, 44L, 43L, 41L, 41L, 40L,
36L, 35L, 35L, 34L, 33L, 32L, 32L, 31L, 31L, 30L, 29L, 29L,
29L, 28L, 27L, 27L)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
What I am hoping to get:
My codes for plotting:
ggplot(data = a,
aes(axis1 = Group=="InEx", axis2 = Group=="ReA SFMC", y = n)) +
geom_alluvium(aes(fill = CTaa_beta)) +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")
Output of my code:
Essentially, I am hoping to see which CTaa_beta are shared between groups InEx and ReA SFMC and but how much (which would be indicated by thickness of the alluvial plot). Any help would be great!
Share Improve this question edited Nov 19, 2024 at 2:39 Phil 8,1273 gold badges40 silver badges76 bronze badges asked Nov 18, 2024 at 23:54 Zoya QaiyumZoya Qaiyum 998 bronze badges1 Answer
Reset to default 2Your data appears to be in a "long" format, so you can use the Nodes format in ggalluvial
, with the Group
variable as your key.
library(ggalluvial)
a |>
ggplot(aes(x = Group,
y = n,
alluvium = CTaa_beta,
stratum = CTaa_beta,
fill = CTaa_beta)) +
geom_flow() +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")
The shared sample data doesn't have any amino acid sequences with data for more than 1 group, so no alluvials are drawn. I've made up some data that hopefully helps illustrate how to create this plot. I dropped the peptide labels from this example.
Generate some data
set.seed(123)
b <- data.frame(
CTaa_beta = rep(c("CASSVAGPNTEAFF", "CASSEGTSGGASTQYF",
"CASSLRQGPSYEQYF", "CASSVGNRGGTDTQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSLEGRERYEQFF", "CASSLLAGGNNEQFF", "CASTPAVRDGNYEQYF"
), each = 7),
Group = as.factor(rep(c("HC PBMC", "axSpA PBMC",
"axSpA SFMC", "InEx", "PD-1+ TIGIT+", "ReA PBMC", "ReA SFMC"), 10)),
n = sample(0:100, 70, replace = TRUE)
)
Draw the alluvial plot
b |>
ggplot(aes(x = Group,
y = n,
alluvium = CTaa_beta,
stratum = CTaa_beta,
fill = CTaa_beta)) +
geom_flow() +
geom_stratum() +
scale_fill_viridis_d() +
theme_classic() + theme(legend.position = "none")