Skip to contents

Introduction

This article demonstrate how to use the ADNIMERGE2 R package to generate simple enrollment summaries.

Load Required R Packages

# Abbreviation list
abbrev_list <- paste0(
  paste0(
    "CN: Cognitive Normal; MCI: Mild Cognitive Impairment; DEM: Dementia; "
  ),
  paste0(
    "SD: Standard Deviation; Q1: the 25th percentile; Q3: the 75th percentile; "
  ),
  paste0(
    "Baseline mPACCdigit score was based on subjects that were ",
    "enrolled only in ADNI1 study phase. "
  ),
  collapse = "\n "
)
conts_statistic_label <- c("Mean (SD)", "Median (Q1, Q3)", "Range")
# Adjust BMI variable labels
var_label(ADSL$BMI) <- paste0("Baseline ", get_variable_labels(ADSL)$BMI)

ADNI Enrollment Summaries

Enrollment Overtime

enroll_summary_data <- ADSL %>%
  filter(ENRLFL %in% "Y") %>%
  mutate(ENRLDT = floor_date(ENRLDT, unit = "month")) %>%
  group_by(ENRLDT, ORIGPROT) %>%
  summarise(num_enroll = n()) %>%
  ungroup() %>%
  mutate(ORIGPROT = factor(ORIGPROT, levels = adni_phase())) %>%
  arrange(ENRLDT, ORIGPROT) %>%
  mutate(cum_num_enroll = cumsum(num_enroll))

enroll_summary_plot <- enroll_summary_data %>%
  ggplot(aes(x = ENRLDT, y = cum_num_enroll, color = ORIGPROT)) +
  geom_line() +
  scale_x_date(
    date_minor_breaks = "2 years",
    limits = range(enroll_summary_data$ENRLDT)
  ) +
  labs(
    x = "Timeline Calendar",
    y = "Cumulative Enrollment Per Month",
    color = "ADNI Study Phase"
  ) +
  theme(axis.text.x = element_text(hjust = 0.3, vjust = 0, angle = 15))
enroll_summary_plot

Enrollment overtime by study phase

Demographic Summaries: By Study Phase

include_vars <- c(
  "AGE", "SEX", "EDUC", "RACE", "ETHNIC", "MARISTAT", "BMI",
  "DX", "APOE", "ADASTT13", "CDGLOBAL", "CDRSB", "MMSCORE",
  "FAQTOTAL", "MPACCTRAILSB", "MPACCDIGIT"
)

tbl_summary(
  data = ADSL %>%
    filter(ENRLFL %in% "Y"),
  by = ORIGPROT,
  include = include_vars,
  type = all_continuous() ~ "continuous2",
  statistic = list(
    all_continuous() ~ c(
      "{mean} ({sd})",
      "{median} ({p25}, {p75})",
      "{min}, {max}"
    ),
    all_categorical() ~ "{n} ({p}%)"
  ),
  digits = all_continuous() ~ 1,
  percent = "column",
  missing_text = "(Missing)"
) %>%
  add_overall(last = TRUE) %>%
  add_stat_label(label = all_continuous2() ~ conts_statistic_label) %>%
  modify_footnote_header(
    footnote = "Column-wise percentage; n (%)",
    columns = all_stat_cols(),
    replace = TRUE
  ) %>%
  modify_abbreviation(abbreviation = abbrev_list) %>%
  modify_caption(
    caption = "Table 1. ADNI - Subject Characteristics: By Study Phase"
  ) %>%
  bold_labels()
Table 1. ADNI - Subject Characteristics: By Study Phase
Characteristic ADNI1
N = 819
1
ADNIGO
N = 131
1
ADNI2
N = 790
1
ADNI3
N = 696
1
ADNI4
N = 590
1
Overall
N = 3,026
1
Age (in Years)





    Mean (SD) 75.2 (6.8) 71.6 (7.9) 72.7 (7.2) 70.7 (7.4) 69.1 (7.6) 72.2 (7.6)
    Median (Q1, Q3) 75.6 (71.2, 80.1) 71.1 (65.8, 77.4) 72.8 (67.8, 77.6) 70.0 (65.9, 75.8) 69.0 (63.2, 74.4) 72.2 (66.9, 77.6)
    Range 54.5, 90.9 55.6, 88.3 55.0, 91.4 50.5, 90.7 55.0, 90.8 50.5, 91.4
    (Missing) 0 0 0 0 1 1
Sex, n (%)





    Female 342 (42%) 60 (46%) 379 (48%) 381 (55%) 364 (62%) 1,526 (50%)
    Male 477 (58%) 71 (54%) 411 (52%) 315 (45%) 226 (38%) 1,500 (50%)
Education





    Mean (SD) 15.5 (3.0) 15.8 (2.7) 16.3 (2.6) 16.4 (2.3) 15.8 (2.8) 16.0 (2.8)
    Median (Q1, Q3) 16.0 (13.0, 18.0) 16.0 (14.0, 18.0) 16.0 (14.0, 18.0) 16.0 (15.0, 18.0) 16.0 (14.0, 18.0) 16.0 (14.0, 18.0)
    Range 4.0, 20.0 10.0, 20.0 8.0, 20.0 10.0, 20.0 7.0, 20.0 4.0, 20.0
    (Missing) 1 0 0 0 2 3
Race, n (%)





    American Indian or Alaskan Native 1 (0.1%) 1 (0.8%) 1 (0.1%) 2 (0.3%) 4 (0.7%) 9 (0.3%)
    Asian 14 (1.7%) 1 (0.8%) 14 (1.8%) 29 (4.2%) 49 (8.3%) 107 (3.5%)
    Black or African American 39 (4.8%) 4 (3.1%) 34 (4.3%) 105 (15%) 204 (35%) 386 (13%)
    Native Hawaiian or Other Pacific Islander 0 (0%) 0 (0%) 2 (0.3%) 1 (0.1%) 2 (0.3%) 5 (0.2%)
    Other Pacific Islander 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
    White 762 (93%) 118 (90%) 728 (92%) 537 (77%) 294 (50%) 2,439 (81%)
    More than one race 3 (0.4%) 5 (3.8%) 10 (1.3%) 13 (1.9%) 26 (4.4%) 57 (1.9%)
    Unknown 0 (0%) 2 (1.5%) 1 (0.1%) 9 (1.3%) 11 (1.9%) 23 (0.8%)
Ethnicity, n (%)





    Hispanic or Latino 19 (2.3%) 8 (6.1%) 31 (3.9%) 58 (8.3%) 77 (13%) 193 (6.4%)
    Not Hispanic or Latino 794 (97%) 122 (93%) 755 (96%) 637 (92%) 511 (87%) 2,819 (93%)
    Unknown 6 (0.7%) 1 (0.8%) 4 (0.5%) 1 (0.1%) 2 (0.3%) 14 (0.5%)
Marital Status, n (%)





    Divorced 52 (6.4%) 17 (13%) 82 (10%) 79 (11%) 107 (18%) 337 (11%)
    Domestic Partnership 0 (0%) 0 (0%) 0 (0%) 0 (0%) 13 (2.2%) 13 (0.4%)
    Married 629 (77%) 95 (73%) 586 (74%) 517 (74%) 344 (59%) 2,171 (72%)
    Never married 28 (3.4%) 3 (2.3%) 33 (4.2%) 35 (5.0%) 75 (13%) 174 (5.8%)
    Unknown 1 (0.1%) 4 (3.1%) 2 (0.3%) 2 (0.3%) 0 (0%) 9 (0.3%)
    Widowed 108 (13%) 12 (9.2%) 87 (11%) 62 (8.9%) 49 (8.3%) 318 (11%)
    (Missing) 1 0 0 1 2 4
Baseline Body Mass Index





    Mean (SD) NA (NA) 24.0 (NA) 30.8 (7.2) 30.0 (NA) 28.3 (6.1) 28.5 (6.1)
    Median (Q1, Q3) NA (NA, NA) 24.0 (24.0, 24.0) 28.7 (26.4, 31.0) 30.0 (30.0, 30.0) 26.8 (24.7, 31.0) 27.2 (24.8, 31.0)
    Range Inf, -Inf 24.0, 24.0 25.1, 44.7 30.0, 30.0 18.6, 47.9 18.6, 47.9
    (Missing) 819 130 784 695 544 2,972
Baseline Diagnostics Status, n (%)





    CN 229 (28%) 1 (0.8%) 295 (37%) 378 (54%) 306 (52%) 1,209 (40%)
    MCI 397 (48%) 128 (99%) 344 (44%) 244 (35%) 225 (38%) 1,338 (44%)
    DEM 193 (24%) 0 (0%) 151 (19%) 74 (11%) 59 (10%) 477 (16%)
    (Missing) 0 2 0 0 0 2
APOE Genotype, n (%)





    ε2/ε2 2 (0.2%) 0 (0%) 3 (0.4%) 1 (0.1%) 3 (1.2%) 9 (0.3%)
    ε2/ε3 53 (6.5%) 9 (7.0%) 66 (8.5%) 52 (7.7%) 18 (7.1%) 198 (7.5%)
    ε2/ε4 18 (2.2%) 2 (1.6%) 14 (1.8%) 17 (2.5%) 11 (4.3%) 62 (2.3%)
    ε3/ε3 363 (44%) 67 (52%) 352 (45%) 347 (51%) 114 (45%) 1,243 (47%)
    ε3/ε4 295 (36%) 42 (33%) 269 (35%) 205 (30%) 85 (33%) 896 (34%)
    ε4/ε4 88 (11%) 8 (6.3%) 75 (9.6%) 53 (7.9%) 24 (9.4%) 248 (9.3%)
    (Missing) 0 3 11 21 335 370
Baseline ADAS-Cog Item 13 Total Score





    Mean (SD) 18.4 (9.2) 12.4 (5.4) 16.1 (10.1) 13.1 (8.9) 14.1 (8.7) 15.5 (9.4)
    Median (Q1, Q3) 17.7 (11.0, 24.3) 11.2 (8.7, 15.3) 13.7 (8.3, 21.7) 11.0 (6.7, 16.7) 12.7 (7.7, 18.7) 13.5 (8.3, 21.0)
    Range 1.0, 54.7 2.3, 28.3 0.0, 52.3 0.0, 48.3 0.0, 51.0 0.0, 54.7
    (Missing) 8 1 7 10 30 56
Baseline CDR Global Score, n (%)





    0 229 (28%) 0 (0%) 296 (37%) 385 (55%) 311 (53%) 1,221 (40%)
    0.5 496 (61%) 131 (100%) 407 (52%) 269 (39%) 249 (42%) 1,552 (51%)
    1 93 (11%) 0 (0%) 86 (11%) 40 (5.7%) 28 (4.8%) 247 (8.2%)
    2 0 (0%) 0 (0%) 1 (0.1%) 2 (0.3%) 0 (0%) 3 (<0.1%)
    (Missing) 1 0 0 0 2 3
Baseline CDR Sum of Boxes Score





    Mean (SD) 1.8 (1.8) 1.2 (0.7) 1.5 (1.9) 1.0 (1.6) 1.1 (1.6) 1.4 (1.7)
    Median (Q1, Q3) 1.5 (0.0, 3.0) 1.0 (0.5, 1.5) 1.0 (0.0, 2.5) 0.0 (0.0, 1.5) 0.5 (0.0, 1.5) 1.0 (0.0, 2.0)
    Range 0.0, 9.0 0.5, 4.0 0.0, 10.0 0.0, 10.0 0.0, 15.0 0.0, 15.0
    (Missing) 1 0 0 0 2 3
Baseline MMSE Score





    Mean (SD) 26.7 (2.7) 28.3 (1.5) 27.4 (2.7) 28.0 (2.5) 27.6 (2.5) 27.4 (2.6)
    Median (Q1, Q3) 27.0 (25.0, 29.0) 28.0 (27.0, 30.0) 28.0 (26.0, 30.0) 29.0 (27.0, 30.0) 28.0 (26.0, 29.0) 28.0 (26.0, 29.0)
    Range 18.0, 30.0 23.0, 30.0 19.0, 30.0 16.0, 30.0 12.0, 30.0 12.0, 30.0
    (Missing) 1 0 0 0 4 5
Baseline FAQ Total Score





    Mean (SD) 5.0 (6.6) 1.9 (3.2) 3.9 (6.2) 2.5 (5.2) 2.6 (4.9) 3.6 (5.9)
    Median (Q1, Q3) 2.0 (0.0, 8.0) 1.0 (0.0, 2.0) 1.0 (0.0, 5.0) 0.0 (0.0, 2.0) 0.0 (0.0, 3.0) 0.0 (0.0, 5.0)
    Range 0.0, 30.0 0.0, 22.0 0.0, 28.0 0.0, 30.0 0.0, 25.0 0.0, 30.0
    (Missing) 3 2 6 20 36 67
Baseline mPACCtrialsB





    Mean (SD) -6.6 (5.6) -2.9 (2.8) -4.6 (5.8) -2.8 (5.5) -3.9 (5.3) -4.5 (5.7)
    Median (Q1, Q3) -6.8 (-10.9, -2.1) -2.4 (-4.9, -1.0) -3.1 (-8.4, -0.2) -1.3 (-5.5, 1.0) -2.9 (-6.8, 0.2) -3.3 (-8.3, -0.1)
    Range -20.3, 5.4 -10.7, 2.7 -21.3, 5.4 -22.6, 7.7 -25.5, 5.8 -25.5, 7.7
Baseline mPACCdigit





    Mean (SD) -6.4 (5.6) NA (NA) NA (NA) NA (NA) NA (NA) -6.4 (5.6)
    Median (Q1, Q3) -6.6 (-10.4, -1.6) NA (NA, NA) NA (NA, NA) NA (NA, NA) NA (NA, NA) -6.6 (-10.4, -1.6)
    Range -19.6, 6.3 Inf, -Inf Inf, -Inf Inf, -Inf Inf, -Inf -19.6, 6.3
    (Missing) 0 131 790 696 590 2,207
Abbreviation: CN: Cognitive Normal; MCI: Mild Cognitive Impairment; DEM: Dementia; SD: Standard Deviation; Q1: the 25th percentile; Q3: the 75th percentile; Baseline mPACCdigit score was based on subjects that were enrolled only in ADNI1 study phase.
1 Column-wise percentage; n (%)
var_label_list <- get_variable_labels(ADSL)
cont_var_list <- c(
  "AGE", "EDUC", "BMI", "ADASTT13", "CDGLOBAL",
  "CDRSB", "MMSCORE", "FAQTOTAL", "MPACCTRAILSB", "MPACCDIGIT"
)
cont_bl_violin_plot <- lapply(cont_var_list, function(x) {
  graph_data <- ADSL %>%
    filter(ENRLFL %in% "Y") %>%
    rename_with(~ paste0("yvalue"), all_of(x))

  graph_data %>%
    ggplot(data = ., aes(x = yvalue)) +
    geom_histogram() +
    labs(
      x = var_label_list[[x]],
      y = "Count",
      title = paste0("n = ", sum(!is.na(graph_data$yvalue)))
    ) +
    theme(title = element_text(size = 11))
})

names(cont_bl_violin_plot) <- cont_var_list

plots(
  cont_bl_violin_plot,
  n_columns = 2,
  caption = paste0(
    "Baseline mPACCdigit score was based on subjects that were enrolled only ",
    "in ADNI1 study phase. \n The remaining summary plots were based on ",
    "subjects that enrolled in ADNI study."
  ),
  title = "ADNI - Plots of Numeric Characteristics: By Study Phase"
)

Distribution of numeric characteristics: Overall

Demographic Summaries: By Baseline Diagnostics Status

tbl_summary(
  data = ADSL %>%
    filter(ENRLFL %in% "Y"),
  by = DX,
  include = include_vars,
  type = all_continuous() ~ "continuous2",
  statistic = list(
    all_continuous() ~ c(
      "{mean} ({sd})",
      "{median} ({p25}, {p75})",
      "{min}, {max}"
    ),
    all_categorical() ~ "{n} ({p}%)"
  ),
  digits = all_continuous() ~ 1,
  percent = "row",
  missing_text = "(Missing)"
) %>%
  add_stat_label(label = all_continuous2() ~ conts_statistic_label) %>%
  modify_caption(caption = paste0(
    "Table 2. ADNI - Subject Characteristics: ",
    "By Baseline Diagnosis Status"
  )) %>%
  modify_footnote_header(
    footnote = "Row-wise percentage; n (%)",
    columns = all_stat_cols(),
    replace = TRUE
  ) %>%
  modify_abbreviation(abbreviation = abbrev_list) %>%
  bold_labels()
Table 2. ADNI - Subject Characteristics: By Baseline Diagnosis Status
Characteristic CN
N = 1,209
1
MCI
N = 1,338
1
DEM
N = 477
1
Age (in Years)


    Mean (SD) 71.0 (7.0) 72.4 (7.7) 74.6 (8.0)
    Median (Q1, Q3) 70.9 (66.3, 75.9) 72.7 (66.9, 77.9) 75.3 (69.8, 80.3)
    Range 50.5, 90.3 54.5, 91.4 55.1, 90.9
    (Missing) 0 1 0
Sex, n (%)


    Female 732 (48%) 582 (38%) 212 (14%)
    Male 477 (32%) 756 (50%) 265 (18%)
Education


    Mean (SD) 16.5 (2.6) 15.9 (2.8) 15.2 (2.9)
    Median (Q1, Q3) 16.0 (15.0, 18.0) 16.0 (14.0, 18.0) 16.0 (13.0, 18.0)
    Range 6.0, 20.0 4.0, 20.0 4.0, 20.0
    (Missing) 1 2 0
Race, n (%)


    American Indian or Alaskan Native 5 (56%) 4 (44%) 0 (0%)
    Asian 61 (57%) 35 (33%) 11 (10%)
    Black or African American 207 (54%) 139 (36%) 40 (10%)
    Native Hawaiian or Other Pacific Islander 2 (40%) 3 (60%) 0 (0%)
    Other Pacific Islander 0 (NA%) 0 (NA%) 0 (NA%)
    White 895 (37%) 1,125 (46%) 417 (17%)
    More than one race 30 (53%) 19 (33%) 8 (14%)
    Unknown 9 (39%) 13 (57%) 1 (4.3%)
Ethnicity, n (%)


    Hispanic or Latino 104 (54%) 68 (35%) 21 (11%)
    Not Hispanic or Latino 1,100 (39%) 1,264 (45%) 453 (16%)
    Unknown 5 (36%) 6 (43%) 3 (21%)
Marital Status, n (%)


    Divorced 173 (51%) 146 (43%) 18 (5.3%)
    Domestic Partnership 8 (62%) 4 (31%) 1 (7.7%)
    Married 798 (37%) 977 (45%) 395 (18%)
    Never married 89 (51%) 66 (38%) 19 (11%)
    Unknown 3 (33%) 6 (67%) 0 (0%)
    Widowed 138 (44%) 135 (43%) 44 (14%)
    (Missing) 0 4 0
Baseline Body Mass Index


    Mean (SD) 28.7 (5.6) 28.5 (7.1) 28.7 (1.9)
    Median (Q1, Q3) 28.8 (24.9, 31.9) 26.7 (24.0, 30.5) 29.7 (27.6, 29.9)
    Range 18.6, 39.3 18.9, 47.9 25.2, 30.1
    (Missing) 1,190 1,310 471
APOE Genotype, n (%)


    ε2/ε2 5 (56%) 3 (33%) 1 (11%)
    ε2/ε3 117 (59%) 67 (34%) 14 (7.1%)
    ε2/ε4 22 (35%) 29 (47%) 11 (18%)
    ε3/ε3 577 (46%) 537 (43%) 129 (10%)
    ε3/ε4 271 (30%) 430 (48%) 195 (22%)
    ε4/ε4 37 (15%) 124 (50%) 87 (35%)
    (Missing) 180 148 40
Baseline ADAS-Cog Item 13 Total Score


    Mean (SD) 8.9 (4.4) 16.4 (6.7) 29.9 (8.1)
    Median (Q1, Q3) 8.3 (5.3, 11.7) 16.0 (11.3, 21.0) 29.3 (24.3, 34.3)
    Range 0.0, 26.3 0.7, 39.7 9.3, 54.7
    (Missing) 14 29 12
Baseline CDR Global Score, n (%)


    0 1,199 (98%) 22 (1.8%) 0 (0%)
    0.5 9 (0.6%) 1,307 (84%) 234 (15%)
    1 0 (0%) 7 (2.8%) 240 (97%)
    2 0 (0%) 0 (0%) 3 (100%)
    (Missing) 1 2 0
Baseline CDR Sum of Boxes Score


    Mean (SD) 0.0 (0.2) 1.5 (1.0) 4.4 (1.7)
    Median (Q1, Q3) 0.0 (0.0, 0.0) 1.5 (1.0, 2.0) 4.5 (3.0, 5.0)
    Range 0.0, 2.0 0.0, 15.0 1.0, 10.0
    (Missing) 1 2 0
Baseline MMSE Score


    Mean (SD) 29.0 (1.2) 27.5 (1.9) 23.1 (2.3)
    Median (Q1, Q3) 29.0 (29.0, 30.0) 28.0 (26.0, 29.0) 23.0 (21.0, 25.0)
    Range 23.0, 30.0 19.0, 30.0 12.0, 30.0
    (Missing) 0 4 1
Baseline FAQ Total Score


    Mean (SD) 0.2 (0.9) 3.2 (4.1) 13.0 (6.9)
    Median (Q1, Q3) 0.0 (0.0, 0.0) 2.0 (0.0, 5.0) 13.0 (8.0, 18.0)
    Range 0.0, 13.0 0.0, 24.0 0.0, 30.0
    (Missing) 24 37 4
Baseline mPACCtrialsB


    Mean (SD) 0.0 (2.6) -5.4 (3.8) -13.6 (3.4)
    Median (Q1, Q3) 0.3 (-1.7, 1.8) -5.2 (-8.1, -2.4) -13.6 (-16.0, -11.3)
    Range -10.4, 7.7 -18.9, 5.1 -25.5, 0.9
Baseline mPACCdigit


    Mean (SD) 0.1 (2.4) -6.9 (3.1) -13.1 (2.9)
    Median (Q1, Q3) 0.2 (-1.3, 1.7) -7.1 (-9.3, -4.6) -13.0 (-15.2, -10.9)
    Range -6.0, 6.3 -14.9, 1.6 -19.6, -5.4
    (Missing) 980 941 284
Abbreviation: CN: Cognitive Normal; MCI: Mild Cognitive Impairment; DEM: Dementia; SD: Standard Deviation; Q1: the 25th percentile; Q3: the 75th percentile; Baseline mPACCdigit score was based on subjects that were enrolled only in ADNI1 study phase.
1 Row-wise percentage; n (%)
dx_color_pal <- c("#73C186", "#F2B974", "#DF957C", "#999999")

# Create density plot
cont_violin_plot_bl_dx <- lapply(cont_var_list, function(x) {
  graph_data <- ADSL %>%
    filter(ENRLFL %in% "Y") %>%
    filter(!is.na(DX)) %>%
    rename_with(~ paste0("yvalue"), all_of(x))

  n_obs <- graph_data %>%
    select(yvalue, DX) %>%
    na.omit() %>%
    nrow()

  graph_data %>%
    ggplot(data = ., aes(x = yvalue, fill = DX)) +
    geom_density(alpha = 0.5) +
    labs(
      x = paste0(var_label_list[[x]]),
      y = "Density",
      color = get_variable_labels(ADSL$DX),
      title = var_label_list[[x]],
      subtitle = paste0(" By ", get_variable_labels(ADSL$DX), ", n = ", n_obs),
      fill = get_variable_labels(ADSL$DX)
    ) +
    scale_fill_manual(values = dx_color_pal) +
    theme(
      legend.position = "bottom",
      title = element_text(size = 10.5)
    )
})

names(cont_violin_plot_bl_dx) <- cont_var_list

plots(
  cont_violin_plot_bl_dx,
  n_columns = 2,
  guides = "collect",
  caption = paste0(
    "Based on subjects that had known diagnostics status at baseline visit. \n",
    "Baseline mPACCdigit score was based on subjects that were enrolled only ",
    "in ADNI1 study phase. \n The remaining summary plots were based on ",
    "subjects that enrolled in ADNI study."
  ),
  title = paste0(
    "ADNI - Plots of Numeric Characteristics: ",
    "By Baseline Diagnostics Status"
  )
) & theme(legend.position = "bottom")

Distribution of numeric characteristics: By baseline diagnostics status