最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

dataframe - Totals are not properly shown based on sorting of even number of likert levels in R using gglikert - Stack Overflow

programmeradmin1浏览0评论

I have a simulated data frame in R with 6 levels of likert.

When I have odd number of likert levels I can sort the plot based on the two upper or the two lower levels of likert and the totals that appear in both ends are properly shown. But when I have even number of levels first of all the sorting is not working and the totals are taking into account also the 4th level to the right (positive) direction and the 3rd level at the left in the negative direction.

I suspect that in the data_fun function the part sum(x %in% 4:5) / length(x[!is.na(x)]) might be changed.

How can I properly, first of all sort the plot (each facet) as was solved here and show the totals of the two upper levels (satisfied and very satisfied) only and excluding the 4th level ?

Any help ?


library(ggstats)
library(dplyr)
library(ggplot2)
library(patchwork)
likert_levels_na = c("Not \n Available",
                     "Very \n Dissatisfied",
                     "Dissatisfied",
                     "Neutral",
                     "Satisfied",
                     "Very \n Satisfied")


custom_colors_na = c("Not \n Available" = "black",
                     "Very \n Dissatisfied" = "#ed2e1c",
                     "Dissatisfied" = "#e09c95",
                     "Neutral" = "#85c1e9",
                     "Satisfied" = "#7FF98B",
                     "Very \n Satisfied" = "#04B431"
)
df <-
  tibble(
    grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
    q1 = sample(c(0:5), 150, replace = TRUE),
    q2 = sample(c(0:5), 150, replace = TRUE),
    q3 = sample(c(0:5), 150, replace = TRUE),
    q4 = sample(c(0:5), 150, replace = TRUE),
    q5 = sample(c(0:5), 150, replace = TRUE),
    q6 = sample(c(0:5), 150, replace = TRUE)
  )
df


filter_df = df %>%
  dplyr::select(grouping) %>%
  dplyr::group_by(grouping) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(n >= 15)%>%
  dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])





data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 4:5) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}


df=df%>%
  filter(grouping %in% parameters)%>%
  dplyr::mutate(across(-grouping, ~ case_when(
    . == 0 ~ likert_levels_na[1],
    . == 1 ~ likert_levels_na[2],
    . == 2 ~ likert_levels_na[3],
    . == 3 ~ likert_levels_na[4],
    . == 4 ~ likert_levels_na[5],
    . == 5 ~ likert_levels_na[6]
  )))%>%
  dplyr::mutate(across(- grouping, ~ factor(.x, levels = likert_levels_na)))

v1 <- gglikert(df, q1:q6,
               facet_rows = vars(grouping),
               add_totals = TRUE,
               totals_include_center = FALSE,
               data_fun = data_fun
) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    axis.text.y.right = element_text(color = "black"),  # Move Y-axis text to the right
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside",
    strip.text.y.left = element_text(angle = 0)        # Facet text on the left
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +scale_y_discrete(position = "right", labels = function(x) sub(".*?\\.", "", x))+
  scale_fill_manual(values = custom_colors_na)




v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none"
  ) +
  labs(x = NULL, y = NULL)

v1 + v2 +
  plot_layout(widths = c(0.8, 0.2), guides = "collect") &
  theme(legend.position = "bottom")


I have a simulated data frame in R with 6 levels of likert.

When I have odd number of likert levels I can sort the plot based on the two upper or the two lower levels of likert and the totals that appear in both ends are properly shown. But when I have even number of levels first of all the sorting is not working and the totals are taking into account also the 4th level to the right (positive) direction and the 3rd level at the left in the negative direction.

I suspect that in the data_fun function the part sum(x %in% 4:5) / length(x[!is.na(x)]) might be changed.

How can I properly, first of all sort the plot (each facet) as was solved here and show the totals of the two upper levels (satisfied and very satisfied) only and excluding the 4th level ?

Any help ?


library(ggstats)
library(dplyr)
library(ggplot2)
library(patchwork)
likert_levels_na = c("Not \n Available",
                     "Very \n Dissatisfied",
                     "Dissatisfied",
                     "Neutral",
                     "Satisfied",
                     "Very \n Satisfied")


custom_colors_na = c("Not \n Available" = "black",
                     "Very \n Dissatisfied" = "#ed2e1c",
                     "Dissatisfied" = "#e09c95",
                     "Neutral" = "#85c1e9",
                     "Satisfied" = "#7FF98B",
                     "Very \n Satisfied" = "#04B431"
)
df <-
  tibble(
    grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
    q1 = sample(c(0:5), 150, replace = TRUE),
    q2 = sample(c(0:5), 150, replace = TRUE),
    q3 = sample(c(0:5), 150, replace = TRUE),
    q4 = sample(c(0:5), 150, replace = TRUE),
    q5 = sample(c(0:5), 150, replace = TRUE),
    q6 = sample(c(0:5), 150, replace = TRUE)
  )
df


filter_df = df %>%
  dplyr::select(grouping) %>%
  dplyr::group_by(grouping) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(n >= 15)%>%
  dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])





data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 4:5) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}


df=df%>%
  filter(grouping %in% parameters)%>%
  dplyr::mutate(across(-grouping, ~ case_when(
    . == 0 ~ likert_levels_na[1],
    . == 1 ~ likert_levels_na[2],
    . == 2 ~ likert_levels_na[3],
    . == 3 ~ likert_levels_na[4],
    . == 4 ~ likert_levels_na[5],
    . == 5 ~ likert_levels_na[6]
  )))%>%
  dplyr::mutate(across(- grouping, ~ factor(.x, levels = likert_levels_na)))

v1 <- gglikert(df, q1:q6,
               facet_rows = vars(grouping),
               add_totals = TRUE,
               totals_include_center = FALSE,
               data_fun = data_fun
) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    axis.text.y.right = element_text(color = "black"),  # Move Y-axis text to the right
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside",
    strip.text.y.left = element_text(angle = 0)        # Facet text on the left
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +scale_y_discrete(position = "right", labels = function(x) sub(".*?\\.", "", x))+
  scale_fill_manual(values = custom_colors_na)




v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none"
  ) +
  labs(x = NULL, y = NULL)

v1 + v2 +
  plot_layout(widths = c(0.8, 0.2), guides = "collect") &
  theme(legend.position = "bottom")


Share Improve this question asked Feb 16 at 9:52 Homer Jay SimpsonHomer Jay Simpson 1,2187 silver badges35 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 2

Concerning the first issue. As you now have 6 levels you have to use sum(x %in% 5:6) to sort by the top two. Concerning the labels, I don't see any option to include only the top labels, i.e. gglikert will compute the sum for the positive and the negative values and only allows to exclude a center category. Hence, to achieve your desired result you have to do the hard work yourself and add the labels via a geom_text as I already have done in some of your previous posts, e.g. the second option I offered here. Finally note, that I use facetting for the bar chart, too. IMHO this will in general give a nice alignment when combining the charts.

library(tidyverse)
library(ggstats)
library(patchwork)

set.seed(42)

dat <- df |>
  mutate(
    across(-c(grouping), ~ factor(.x, likert_levels_na))
  ) |>
  pivot_longer(-c(grouping), names_to = ".question") |>
  filter(!is.na(value)) |>
  count(.question, value, grouping) |>
  complete(.question, value, grouping, fill = list(n = 0)) |>
  mutate(
    prop = n / sum(n),
    prop_lower = sum(prop[value %in% likert_levels_na[2:3]]),
    prop_higher = sum(prop[value %in% likert_levels_na[5:6]]),
    .by = c(.question, grouping)
  ) |>
  arrange(grouping, prop_higher) |>
  mutate(
    .question = interaction(grouping, .question),
    .question = fct_inorder(.question)
  )

dat_tot <- dat |>
  distinct(grouping, .question, prop_lower, prop_higher) |>
  pivot_longer(-c(grouping, .question),
    names_to = c(".value", "name"),
    names_sep = "_"
  ) |>
  mutate(
    hjust_tot = ifelse(name == "lower", .5, .5),
    x_tot = ifelse(name == "lower", -1, 1)
  )

data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 5:6) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}

v1 <- gglikert(df, q1:q6,
  facet_rows = vars(grouping),
  add_totals = FALSE,
  totals_include_center = FALSE,
  data_fun = data_fun
) +
  geom_label(
    aes(
      x = x_tot,
      y = .question,
      label = label_percent_abs(accuracy = 1)(prop),
      hjust = hjust_tot,
      fill = NULL
    ),
    data = dat_tot,
    size = 8 / .pt,
    color = "black",
    fontface = "bold",
    label.size = 0,
    show.legend = FALSE,
    inherit.aes = FALSE
  ) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    axis.text.y.right = element_text(color = "black"), # Move Y-axis text to the right
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside",
    strip.text.y.left = element_text(angle = 0) # Facet text on the left
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  scale_y_discrete(position = "right", labels = function(x) sub(".*?\\.", "", x)) +
  scale_fill_manual(values = custom_colors_na)

v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none",
    strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

v1 + v2 +
  plot_layout(widths = c(0.8, 0.2), guides = "collect") &
  theme(legend.position = "bottom")

UPDATE Here is one possible option to remove the empty space which sets the position for the total labels based on the maximum value of the positive or negative bars.

dat <- df |>
  mutate(
    across(-c(grouping), ~ factor(.x, likert_levels_na))
  ) |>
  pivot_longer(-c(grouping), names_to = ".question") |>
  filter(!is.na(value)) |>
  count(.question, value, grouping) |>
  complete(.question, value, grouping, fill = list(n = 0)) |>
  mutate(
    prop = n / sum(n),
    prop_lower = sum(prop[value %in% likert_levels_na[2:3]]),
    prop_higher = sum(prop[value %in% likert_levels_na[5:6]]),
    prop_lower1 = sum(prop[value %in% likert_levels_na[1:3]]),
    prop_higher1 = sum(prop[value %in% likert_levels_na[4:6]]),
    .by = c(.question, grouping)
  ) |>
  arrange(grouping, prop_higher) |>
  mutate(
    .question = interaction(grouping, .question),
    .question = fct_inorder(.question)
  )

dat_tot <- dat |>
  distinct(
    grouping, .question,
    prop_lower, prop_higher,
    prop_lower1, prop_higher1
  ) |>
  mutate(
    x_tot_lower = -max(prop_lower1),
    x_tot_higher = max(prop_higher1)
  ) |>
  pivot_longer(-c(grouping, .question),
    names_to = c(".value", "name"),
    names_pattern = "^(.*)_(lower|higher)"
  ) |>
  mutate(
    hjust_tot = ifelse(name == "lower", .5, .5)
  )

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论