最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

ggplot2 - Sort of a likert plot based on the sorting of another likert plot in R - Stack Overflow

programmeradmin5浏览0评论

I have a data frame in R that I use to calculate percentages and present them in a likert plot plus a bar plot. In the middle I have a bar plot that has the percentages of the NA's in this data frame in each question within each grouping level. I want to match the questions of the middle likert plot with the one at left (i.e the left likert plot is my base and dependent on this plot to match the q1:q6 in the middle plot). How can I achieve this in R ?

Any help?


library(ggstats)
library(dplyr)
library(ggplot2)


likert_levels <- c(
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree"
)
set.seed(42)
df <-
  tibble(
    grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
    q1 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q2 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q3 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q4 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q6 = sample(c(likert_levels, NA), 150, replace = TRUE)
  ) |>
  mutate(across(-grouping, ~ factor(.x, levels = likert_levels)))

filter_df = df %>%
  dplyr::select(grouping) %>%
  dplyr::group_by(grouping) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(n >= 18)%>%
  dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])

# Seed used to create the data
set.seed(42)

data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 4:5) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}

df = df%>%
  filter(grouping %in% parameters)

v1 <- gglikert(df, q1:q6,
               facet_rows = vars(grouping),
               add_totals = TRUE,
               data_fun = data_fun
) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside"
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 5)),
    ncol = 1, scales = "free_y",
    strip.position = "right"
  )

v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none",
    strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

availability_levels <- c(
  "available",
  "not_available"
)

df_ava = df%>%
  
  pivot_longer(!grouping, names_to = "question", values_to = "response")%>%
  mutate(count2 = case_when(is.na(response) ~ "not_available",
                            TRUE ~"available"))%>%
  select(-response)%>%
  group_by(grouping,question)%>%
  summarise(
    total = n(),
    available_percent = sum(count2 == "available") / total * 100,
    not_available_percent = round(sum(count2 == "not_available") / total * 100,0),
    .groups = 'drop'
  )%>%
  select(grouping,question,not_available_percent)

df_ava


v3 <- df_ava%>%
  ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = paste0(not_available_percent, "%")), 
            size = 2.5,
            position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom"#,
  #  strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

v1+v3+v2+ plot_layout(widths = c(3,1,.5)
) &
  theme(legend.position = "bottom")

I have a data frame in R that I use to calculate percentages and present them in a likert plot plus a bar plot. In the middle I have a bar plot that has the percentages of the NA's in this data frame in each question within each grouping level. I want to match the questions of the middle likert plot with the one at left (i.e the left likert plot is my base and dependent on this plot to match the q1:q6 in the middle plot). How can I achieve this in R ?

Any help?


library(ggstats)
library(dplyr)
library(ggplot2)


likert_levels <- c(
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree"
)
set.seed(42)
df <-
  tibble(
    grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
    q1 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q2 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q3 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q4 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q6 = sample(c(likert_levels, NA), 150, replace = TRUE)
  ) |>
  mutate(across(-grouping, ~ factor(.x, levels = likert_levels)))

filter_df = df %>%
  dplyr::select(grouping) %>%
  dplyr::group_by(grouping) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(n >= 18)%>%
  dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])

# Seed used to create the data
set.seed(42)

data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 4:5) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}

df = df%>%
  filter(grouping %in% parameters)

v1 <- gglikert(df, q1:q6,
               facet_rows = vars(grouping),
               add_totals = TRUE,
               data_fun = data_fun
) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside"
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 5)),
    ncol = 1, scales = "free_y",
    strip.position = "right"
  )

v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none",
    strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

availability_levels <- c(
  "available",
  "not_available"
)

df_ava = df%>%
  
  pivot_longer(!grouping, names_to = "question", values_to = "response")%>%
  mutate(count2 = case_when(is.na(response) ~ "not_available",
                            TRUE ~"available"))%>%
  select(-response)%>%
  group_by(grouping,question)%>%
  summarise(
    total = n(),
    available_percent = sum(count2 == "available") / total * 100,
    not_available_percent = round(sum(count2 == "not_available") / total * 100,0),
    .groups = 'drop'
  )%>%
  select(grouping,question,not_available_percent)

df_ava


v3 <- df_ava%>%
  ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = paste0(not_available_percent, "%")), 
            size = 2.5,
            position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom"#,
  #  strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

v1+v3+v2+ plot_layout(widths = c(3,1,.5)
) &
  theme(legend.position = "bottom")

Share Improve this question edited yesterday Homer Jay Simpson asked yesterday Homer Jay SimpsonHomer Jay Simpson 1,2187 silver badges35 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 1

Here is a quick and easy approach to achieve your desired result which uses gglikert_data to replicate the dataset used under the hood for v1 then uses the levels for the .question column of this data to set the order of the question column in the v3 data, where of course we have to first create the interaction with grouping and clean up the y axis labels as you did for v1:

library(ggstats)
library(dplyr)
library(ggplot2)

v1_data <- gglikert_data(df, q1:q6, data_fun = data_fun)

v3 <- df_ava %>%
  mutate(
    question = interaction(grouping, question),
    question = factor(question, levels = levels(v1_data$.question))
  ) |>
  ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = paste0(not_available_percent, "%")),
    size = 2.5,
    position = position_stack(vjust = 0.5)
  ) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x),
    limits = rev, 
    expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom"
  ) +
  labs(x = NULL, y = NULL)

v1 + v3 + v2 + plot_layout(widths = c(3, 1, .5)) &
  theme(legend.position = "bottom")

发布评论

评论列表(0)

  1. 暂无评论