I have a data frame in R that I use to calculate percentages and present them in a likert plot plus a bar plot. In the middle I have a bar plot that has the percentages of the NA's in this data frame in each question within each grouping level. I want to match the questions of the middle likert plot with the one at left (i.e the left likert plot is my base and dependent on this plot to match the q1:q6 in the middle plot). How can I achieve this in R ?
Any help?
library(ggstats)
library(dplyr)
library(ggplot2)
likert_levels <- c(
"Strongly disagree",
"Disagree",
"Neither agree nor disagree",
"Agree",
"Strongly agree"
)
set.seed(42)
df <-
tibble(
grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
q1 = sample(c(likert_levels, NA), 150, replace = TRUE),
q2 = sample(c(likert_levels, NA), 150, replace = TRUE),
q3 = sample(c(likert_levels, NA), 150, replace = TRUE),
q4 = sample(c(likert_levels, NA), 150, replace = TRUE),
q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
q6 = sample(c(likert_levels, NA), 150, replace = TRUE)
) |>
mutate(across(-grouping, ~ factor(.x, levels = likert_levels)))
filter_df = df %>%
dplyr::select(grouping) %>%
dplyr::group_by(grouping) %>%
dplyr::summarise(n = n()) %>%
dplyr::filter(n >= 18)%>%
dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])
# Seed used to create the data
set.seed(42)
data_fun <- function(.data) {
.data |>
mutate(
.question = interaction(grouping, .question),
.question = reorder(
.question,
ave(as.numeric(.answer), .question, FUN = \(x) {
sum(x %in% 4:5) / length(x[!is.na(x)])
}),
decreasing = TRUE
)
)
}
df = df%>%
filter(grouping %in% parameters)
v1 <- gglikert(df, q1:q6,
facet_rows = vars(grouping),
add_totals = TRUE,
data_fun = data_fun
) +
scale_y_discrete(
labels = ~ gsub("^.*\\.", "", .x)
) +
labs(y = NULL) +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "bottom",
strip.text = element_text(color = "black", face = "bold"),
strip.placement = "outside"
) +
theme(strip.text.y = element_text(angle = 0)) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 5)),
ncol = 1, scales = "free_y",
strip.position = "right"
)
v2 <- filter_df %>%
ggplot2::ggplot(aes(y = grouping, x = n)) +
geom_bar(stat = "identity", fill = "lightgrey") +
geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
scale_y_discrete(
limits = rev, expand = c(0, 0)
) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 10)),
ncol = 1, scales = "free_y",
strip.position = "left"
) +
theme_light() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "none",
strip.text.y = element_blank()
) +
labs(x = NULL, y = NULL)
availability_levels <- c(
"available",
"not_available"
)
df_ava = df%>%
pivot_longer(!grouping, names_to = "question", values_to = "response")%>%
mutate(count2 = case_when(is.na(response) ~ "not_available",
TRUE ~"available"))%>%
select(-response)%>%
group_by(grouping,question)%>%
summarise(
total = n(),
available_percent = sum(count2 == "available") / total * 100,
not_available_percent = round(sum(count2 == "not_available") / total * 100,0),
.groups = 'drop'
)%>%
select(grouping,question,not_available_percent)
df_ava
v3 <- df_ava%>%
ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
geom_bar(stat = "identity", fill = "lightgrey") +
geom_text(aes(label = paste0(not_available_percent, "%")),
size = 2.5,
position = position_stack(vjust = 0.5)) +
scale_y_discrete(
limits = rev, expand = c(0, 0)
) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 10)),
ncol = 1, scales = "free_y",
strip.position = "left"
) +
theme_light() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "bottom"#,
# strip.text.y = element_blank()
) +
labs(x = NULL, y = NULL)
v1+v3+v2+ plot_layout(widths = c(3,1,.5)
) &
theme(legend.position = "bottom")
I have a data frame in R that I use to calculate percentages and present them in a likert plot plus a bar plot. In the middle I have a bar plot that has the percentages of the NA's in this data frame in each question within each grouping level. I want to match the questions of the middle likert plot with the one at left (i.e the left likert plot is my base and dependent on this plot to match the q1:q6 in the middle plot). How can I achieve this in R ?
Any help?
library(ggstats)
library(dplyr)
library(ggplot2)
likert_levels <- c(
"Strongly disagree",
"Disagree",
"Neither agree nor disagree",
"Agree",
"Strongly agree"
)
set.seed(42)
df <-
tibble(
grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
q1 = sample(c(likert_levels, NA), 150, replace = TRUE),
q2 = sample(c(likert_levels, NA), 150, replace = TRUE),
q3 = sample(c(likert_levels, NA), 150, replace = TRUE),
q4 = sample(c(likert_levels, NA), 150, replace = TRUE),
q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
q6 = sample(c(likert_levels, NA), 150, replace = TRUE)
) |>
mutate(across(-grouping, ~ factor(.x, levels = likert_levels)))
filter_df = df %>%
dplyr::select(grouping) %>%
dplyr::group_by(grouping) %>%
dplyr::summarise(n = n()) %>%
dplyr::filter(n >= 18)%>%
dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])
# Seed used to create the data
set.seed(42)
data_fun <- function(.data) {
.data |>
mutate(
.question = interaction(grouping, .question),
.question = reorder(
.question,
ave(as.numeric(.answer), .question, FUN = \(x) {
sum(x %in% 4:5) / length(x[!is.na(x)])
}),
decreasing = TRUE
)
)
}
df = df%>%
filter(grouping %in% parameters)
v1 <- gglikert(df, q1:q6,
facet_rows = vars(grouping),
add_totals = TRUE,
data_fun = data_fun
) +
scale_y_discrete(
labels = ~ gsub("^.*\\.", "", .x)
) +
labs(y = NULL) +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "bottom",
strip.text = element_text(color = "black", face = "bold"),
strip.placement = "outside"
) +
theme(strip.text.y = element_text(angle = 0)) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 5)),
ncol = 1, scales = "free_y",
strip.position = "right"
)
v2 <- filter_df %>%
ggplot2::ggplot(aes(y = grouping, x = n)) +
geom_bar(stat = "identity", fill = "lightgrey") +
geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
scale_y_discrete(
limits = rev, expand = c(0, 0)
) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 10)),
ncol = 1, scales = "free_y",
strip.position = "left"
) +
theme_light() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "none",
strip.text.y = element_blank()
) +
labs(x = NULL, y = NULL)
availability_levels <- c(
"available",
"not_available"
)
df_ava = df%>%
pivot_longer(!grouping, names_to = "question", values_to = "response")%>%
mutate(count2 = case_when(is.na(response) ~ "not_available",
TRUE ~"available"))%>%
select(-response)%>%
group_by(grouping,question)%>%
summarise(
total = n(),
available_percent = sum(count2 == "available") / total * 100,
not_available_percent = round(sum(count2 == "not_available") / total * 100,0),
.groups = 'drop'
)%>%
select(grouping,question,not_available_percent)
df_ava
v3 <- df_ava%>%
ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
geom_bar(stat = "identity", fill = "lightgrey") +
geom_text(aes(label = paste0(not_available_percent, "%")),
size = 2.5,
position = position_stack(vjust = 0.5)) +
scale_y_discrete(
limits = rev, expand = c(0, 0)
) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 10)),
ncol = 1, scales = "free_y",
strip.position = "left"
) +
theme_light() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "bottom"#,
# strip.text.y = element_blank()
) +
labs(x = NULL, y = NULL)
v1+v3+v2+ plot_layout(widths = c(3,1,.5)
) &
theme(legend.position = "bottom")
Share
Improve this question
edited yesterday
Homer Jay Simpson
asked yesterday
Homer Jay SimpsonHomer Jay Simpson
1,2187 silver badges35 bronze badges
1 Answer
Reset to default 1Here is a quick and easy approach to achieve your desired result which uses gglikert_data
to replicate the dataset used under the hood for v1
then uses the levels
for the .question
column of this data to set the order of the question
column in the v3
data, where of course we have to first create the interaction
with grouping
and clean up the y axis labels as you did for v1
:
library(ggstats)
library(dplyr)
library(ggplot2)
v1_data <- gglikert_data(df, q1:q6, data_fun = data_fun)
v3 <- df_ava %>%
mutate(
question = interaction(grouping, question),
question = factor(question, levels = levels(v1_data$.question))
) |>
ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
geom_bar(stat = "identity", fill = "lightgrey") +
geom_text(aes(label = paste0(not_available_percent, "%")),
size = 2.5,
position = position_stack(vjust = 0.5)
) +
scale_y_discrete(
labels = ~ gsub("^.*\\.", "", .x),
limits = rev,
expand = c(0, 0)
) +
facet_wrap(
facets = vars(grouping),
labeller = labeller(grouping = label_wrap_gen(width = 10)),
ncol = 1, scales = "free_y",
strip.position = "left"
) +
theme_light() +
theme(
panel.border = element_rect(color = "gray", fill = NA),
axis.text.x = element_blank(),
legend.position = "bottom"
) +
labs(x = NULL, y = NULL)
v1 + v3 + v2 + plot_layout(widths = c(3, 1, .5)) &
theme(legend.position = "bottom")