最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

r - Alluvial plots using ggplot2 - highlight certain pairings - Stack Overflow

programmeradmin3浏览0评论

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

I have plotted an alluvial plot using ggplot2, however I cannot seem to figure out how to colour only the most frequent pair "CAGGFNYQLIW" from the variable "CTaa_alpha" which is paired with "CASSVAGPNTEAFF" from the variable "CTaa_beta", while keeping everything else grey.

My code below:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

The code above gives me the following plot:

As you can see, it is a bit "messy" and I would like to have the option for highlighting specific pairings (whether it is row 1 which is the most frequent pairing or row 10 which is the t0th most frequent pairing). Any insights would be welcome!

Share Improve this question edited Nov 18, 2024 at 21:31 stefan 128k6 gold badges38 silver badges77 bronze badges asked Nov 18, 2024 at 21:20 Zoya QaiyumZoya Qaiyum 998 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 5

To highlight some of the catgories you can map a condition on the fill aes, then set your desired colors using scale_fill_manual, e.g. to highlight the top 3 categories you can do:

library(ggplot2)
library(ggalluvial)

# Highlight Top 3
.highlight <- a[order(a$n, decreasing = TRUE), ] |>
  head(3) |>
  subset(select = CTaa_alpha, drop = TRUE)

ggplot(
  data = a,
  aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
) +
  geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
  geom_stratum() +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum))
  ) +
  scale_x_discrete(
    limits = c("CDR3_alpha", "CDR3_beta"),
    expand = c(0.15, 0.05)
  ) +
  scale_fill_manual(
    values = c("grey65", "steelblue")
  ) +
  theme_classic() +
  theme(legend.position = "none")

发布评论

评论列表(0)

  1. 暂无评论