最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

r - Creating Summary table using GTSUMMARY - Stack Overflow

programmeradmin1浏览0评论

I am trying to get a table like below using GTSUMMARY. Notice, there are 2 layers of indentation needed.

I have tried using tbl_hierarchical to achieve this as I can't find a way for tbl_summary to create multiple indentation layers. Below is my code:

# Denominator
adsl.d <- pharmaverseadam::adsl %>%
  select(c(USUBJID, TRT01A))

#Table body input data
adsl1 <- pharmaverseadam::adsl %>%
  select(c(USUBJID, TRT01A, RFSTDTC, EOSSTT, DTHFL)) %>%
  mutate(
    TOTPART = 'TOTAL NUMBER OF PARTICIPANTS',
    RANDFL = 'RANDOMIZED',
    ITTFL = 'INTENT-TO-TREAT',
    SAFFL = if_else(!is.na(RFSTDTC) & is.na(DTHFL), 'SAFETY', NA_character_),
    COMPLFL = case_when(
      EOSSTT == 'COMPLETED' ~ 'Y',
      EOSSTT == 'DISCONTINUED' ~ 'N'),
    DSREAS = case_when(
      grepl('Screen', TRT01A) ~ TRT01A,
      DTHFL == 'Y' ~ 'Death',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2012' ~ 'Adverse event',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2013' ~ 'Withdrawal by subject',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2014' ~ 'Lost to Follow-up')
  ) %>%
  set_variable_labels(DSREAS = "PRIMARY REASON FOR DISCONTINUATION")

#Manipulate data to create ITT & SAF counts
adsl2 <- adsl1 %>%
  select(c(USUBJID, TRT01A, TOTPART, ITTFL, SAFFL)) %>%
  pivot_longer(
    cols = c("ITTFL", "SAFFL"),
    names_to = "EOSSTT",
    values_to = "RANDFL")

adsl <- bind_rows(adsl1, adsl2) %>%
  filter(!grepl('Screen', TRT01A)) %>%
  set_variable_labels(DSREAS = "PRIMARY REASON FOR DISCONTINUATION")

# Generate hierarchical summary
tbl.hrch <- adsl %>% tbl_hierarchical(
  variables = c(TOTPART, RANDFL, EOSSTT),
  id = USUBJID,
  denominator = adsl.d,
  by = TRT01A,
  include = c(TOTPART, RANDFL, EOSSTT),
  statistic = c(TOTPART, RANDFL, EOSSTT) ~ c("{n} ({p})"),
  digits = list(RANDFL = c(1, 2),
                EOSSTT = c(1, 1))
) %>% add_overall(
  last = TRUE,
  col_label = "**Overall**  \nN = {style_number(N)}",
)

tbl.hrch$table_body <- tbl.hrch$table_body %>%
  mutate(across(starts_with('stat_'), ~ if_else(variable == 'TOTPART', NA, .))) %>%
  filter(!(variable == 'EOSSTT' & label %in% c('ITTFL', 'SAFFL')))

The tbl.hrch provides me with the intended table body. I have two questions.

  1. Is there a way to achieve this without manipulating the tbl.hrch$table_body metadata?
  2. Can we customize the sort of variables instead of alphabetical? I get ITT row first, which is not the intention of the shell.

Thank you for any help!

I am trying to get a table like below using GTSUMMARY. Notice, there are 2 layers of indentation needed.

I have tried using tbl_hierarchical to achieve this as I can't find a way for tbl_summary to create multiple indentation layers. Below is my code:

# Denominator
adsl.d <- pharmaverseadam::adsl %>%
  select(c(USUBJID, TRT01A))

#Table body input data
adsl1 <- pharmaverseadam::adsl %>%
  select(c(USUBJID, TRT01A, RFSTDTC, EOSSTT, DTHFL)) %>%
  mutate(
    TOTPART = 'TOTAL NUMBER OF PARTICIPANTS',
    RANDFL = 'RANDOMIZED',
    ITTFL = 'INTENT-TO-TREAT',
    SAFFL = if_else(!is.na(RFSTDTC) & is.na(DTHFL), 'SAFETY', NA_character_),
    COMPLFL = case_when(
      EOSSTT == 'COMPLETED' ~ 'Y',
      EOSSTT == 'DISCONTINUED' ~ 'N'),
    DSREAS = case_when(
      grepl('Screen', TRT01A) ~ TRT01A,
      DTHFL == 'Y' ~ 'Death',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2012' ~ 'Adverse event',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2013' ~ 'Withdrawal by subject',
      EOSSTT == 'DISCONTINUED' & substr(RFSTDTC, 1, 4) == '2014' ~ 'Lost to Follow-up')
  ) %>%
  set_variable_labels(DSREAS = "PRIMARY REASON FOR DISCONTINUATION")

#Manipulate data to create ITT & SAF counts
adsl2 <- adsl1 %>%
  select(c(USUBJID, TRT01A, TOTPART, ITTFL, SAFFL)) %>%
  pivot_longer(
    cols = c("ITTFL", "SAFFL"),
    names_to = "EOSSTT",
    values_to = "RANDFL")

adsl <- bind_rows(adsl1, adsl2) %>%
  filter(!grepl('Screen', TRT01A)) %>%
  set_variable_labels(DSREAS = "PRIMARY REASON FOR DISCONTINUATION")

# Generate hierarchical summary
tbl.hrch <- adsl %>% tbl_hierarchical(
  variables = c(TOTPART, RANDFL, EOSSTT),
  id = USUBJID,
  denominator = adsl.d,
  by = TRT01A,
  include = c(TOTPART, RANDFL, EOSSTT),
  statistic = c(TOTPART, RANDFL, EOSSTT) ~ c("{n} ({p})"),
  digits = list(RANDFL = c(1, 2),
                EOSSTT = c(1, 1))
) %>% add_overall(
  last = TRUE,
  col_label = "**Overall**  \nN = {style_number(N)}",
)

tbl.hrch$table_body <- tbl.hrch$table_body %>%
  mutate(across(starts_with('stat_'), ~ if_else(variable == 'TOTPART', NA, .))) %>%
  filter(!(variable == 'EOSSTT' & label %in% c('ITTFL', 'SAFFL')))

The tbl.hrch provides me with the intended table body. I have two questions.

  1. Is there a way to achieve this without manipulating the tbl.hrch$table_body metadata?
  2. Can we customize the sort of variables instead of alphabetical? I get ITT row first, which is not the intention of the shell.

Thank you for any help!

Share asked 11 hours ago BlueKnight2025BlueKnight2025 453 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 0

I would use the basic tbl_summary() function for this table. In the example below, tbl_summary() gives us the basic structure of the table we need. We then call a few functions to style it like the spec, e.g. removing an unneeded row header, adding extra indentation, etc.

Happy Programming!

set.seed(123)
library(gtsummary)
library(dplyr)
packageVersion("gtsummary")
theme_gtsummary_compact()

tbl <- 
  pharmaverseadam::adsl |> 
  # remove sreen failures
  filter(TRT01A != "Screen Failure") |> 
  # create fake data for table
  mutate(
    RANDFL = sample(c(TRUE, FALSE), size = n(), replace = TRUE, prob = c(0.95, 0.5)),
    ITTFL = sample(c(TRUE, FALSE), size = n(), replace = TRUE, prob = c(0.95, 0.5)) %>%
      {ifelse(RANDFL == FALSE, NA, .)},
    COMPLFL = 
      sample(c(TRUE, FALSE), size = n(), replace = TRUE, prob = c(0.95, 0.5)) %>%
      {ifelse(RANDFL == FALSE, NA, .)} |> 
      factor(levels = c(TRUE, FALSE), labels = c("COMPLETED", "DISCONTINUED"))
  ) |> 
  # create a basic summary table
  tbl_summary(
    by = TRT01A,
    include = c(RANDFL, COMPLFL, ITTFL, SAFFL),
    value = list(SAFFL = "Y"),
    label = list(RANDFL = "RANDOMIZED",
                 ITTFL = "INTENT-TO-TREAT",
                 SAFFL = "SAFETY"),
    missing = "no"
  ) |> 
  # add column for all subjects
  add_overall(last = TRUE) |> 
  # update header to match spec
  modify_header(
    all_stat_cols() ~ "{level}  \n(N={n})  \n n (%)",
    label = ""
  ) |> 
  # remove header row for COMPLFL
  remove_row_type(variables = "COMPLFL", type = "header") |> 
  # group summaries together under one heading
  add_variable_group_header(
    header = "TOTAL NUMBER OF PARTICIPANTS",
    variables = everything()
  ) |> 
  # indent the COMPLFL rows under RANDFL
  modify_column_indent(
    columns = "label",
    rows = variable == "COMPLFL",
    indent = 8
  ) |> 
  # remove default footnote
  remove_footnote_header(columns = all_stat_cols())

Created on 2025-04-03 with reprex v2.1.1

发布评论

评论列表(0)

  1. 暂无评论