Methodology

Marks were collected in individual files across four academic years spanning from 2017-2018 to 2020-2021, for the seven different pratical sessions, then anonymized. Marks range from 0 (worst) to 20 (best). Scores of 0, which where only given when a student was absent from the session without justification, were removed from the analysis but are present in the raw data.

The data was processed in R, by importing and collating the raw data, converting the dataframe to a long format, then characterizing and plotting distributions by year and/or class.

#file import

## file list

file.list <- list.files('notes_TP', full.names = TRUE)

## import fuction

file.importr <- function(file.input){
  readxl::read_excel(
    path = file.input,
    col_types = 'numeric'
  ) %>% 
    magrittr::set_colnames(c("id", "group", "1:sol", "2:sol", "3:UV", "4:HPLC", "5:GC", "6:LLE", "7:IT")) %>% 
    mutate(
      year = str_remove(tools::file_path_sans_ext(file.input), "notes_TP/"),
      group = as.factor(group)
      )
}

## file importing and preparation

marks.0 <- do.call(rbind, lapply(file.list, file.importr)) %>% 
  pivot_longer(
    cols = 4:ncol(.)-1,
    values_to = 'marks',
    names_to = 'session'
  )  

marks <- marks.0 %>% 
  filter(marks != 0)

Data

Raw data

DT::datatable(
  marks.0,
  callback = JS("return table;"),
  caption = "Anonimized raw data", 
  filter = c("top"), 
  escape = TRUE,
  style = "default", 
  class = 'table-hover',
  width = NULL, 
  height = NULL, 
  elementId = NULL,
  fillContainer = getOption("DT.fillContainer", NULL),
  autoHideNavigation = getOption("DT.autoHideNavigation", NULL),
  selection = c("multiple"), 
  extensions = 'Buttons', 
  options = list(
    dom = 'Bfrtip',
    buttons = c('copy','excel', 'pdf')
  ),
  # plugins = list("four_button"), 
  editable = FALSE
)

Processed data

Overall view

Below are visualisations of the mark distribution. The first two plots display the distributions of each class across, separated in different panel per academic year.

## custom theme with rescaling variable
rescale.theme <- function(rescale = 1){
  theme_pander() +
    theme(plot.margin=unit(c(0.5,0.5,0.5,0.5),"cm"),
          panel.border = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black", size = 0.75*rescale),
          axis.ticks.length=unit(0.05*rescale, "in"), 
          axis.ticks = element_line(size = 0.75*rescale, color = 'black'), 
          legend.position = 'bottom',
          legend.title = element_text(size=14*rescale,face="bold"),
          legend.key = element_rect(fill = "white"),
          legend.text = element_text(size=12*rescale, face="bold"),
          axis.title.x = element_text(size=16*rescale, face="bold"),
          axis.title.y = element_text(size=16*rescale, face="bold"),
          axis.text.x = element_text(color = "black", size = 14*rescale, angle = 0),
          axis.text.y = element_text(color = "black", size = 14*rescale, angle = 0),
          strip.text.x = element_text(face = "bold"),
          strip.text.y = element_text(face = 'bold'))
}

scale <- 0.7

###boxplots
marks %>% 
  ggplot(aes(y = marks, x = session, group = session)) +
  geom_boxplot(
    aes(fill = session), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_jitter(
    aes(color = session), 
    alpha = 0.2, width = 0.25,
    show.legend = FALSE
  ) +
  geom_text(
    data = marks %>% 
      group_by(year, session) %>% 
      mutate(n=sum(!is.na(marks))) %>% 
      select(session, year, n) %>% 
      unique() %>% 
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic',
    size = 3*scale
  ) +
  scale_color_d3() +
  scale_fill_d3() +
  facet_wrap(~year) +
  rescale.theme(scale) +
  labs(x = '', y = 'mark')
Distributions of the marks per year per session (boxplots)

Distributions of the marks per year per session (boxplots)

marks %>% 
  ggplot(aes(y = marks, x = session, group = session)) +
  geom_violin(
    aes(fill = session), 
    color = 'black', alpha = 0.5,
    show.legend = FALSE
  ) +
  geom_text(
    data = marks %>% 
      group_by(year, session) %>% 
      mutate(n=sum(!is.na(marks))) %>% 
      select(session, year, n) %>% 
      unique() %>% 
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic',
    size = 3*scale
  ) +
  scale_color_d3() +
  scale_fill_d3() +
  facet_wrap(~year) +
  rescale.theme(scale) +
  labs(x = '', y = 'mark')
Distributions of the marks per year per session (violin plots)

Distributions of the marks per year per session (violin plots)

Influence of groups (or lack thereof)

Students are (mostly) assigned to groups in alphabetical order, and within these groups, pairs of students are also based on their surname. Although this is not true randomisation, the groups are clearly not designed according to grade level. Yet, over the years, we have observed qualitative differences in the level of different groups performing the same tasks.

We have therefore performed systematic pairwise t-tests to assess whether these differences do quantitatively exist, and whether they should be taken into account further into the analysis.

#p testing function
p.t.test <- function(x, dummy = NULL){
  pairwise.t.test(
    x = x$marks,
    g = x$group
  )$p.value %>% 
    as.data.frame() %>% 
    rownames_to_column(var = 'group')
}


#p testing
marks %>% 
  filter(year != "2017-2018") %>% 
  group_by(year) %>% 
  nest() %>% 
  mutate(
    pvalues = purrr::map(
      data,
      p.t.test
    )
  ) %>% 
  unnest(pvalues) %>% 
  select(-data) %>% 
  flextable() %>% 
  theme_vanilla() %>% 
  merge_v(j = 1) %>% 
  colformat_double(digits = 2)

Clearly, there are significant differences between a few of these groups (likely between the extrema for each year), which are highlighted in the figure below, but most are relatively similar to each other.

marks %>% 
  filter(year != "2017-2018") %>% 
  ggplot(aes(y = marks, x = group, group = group)) +
  geom_boxplot(
    aes(fill = group), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_jitter(
    aes(color = group), 
    alpha = 0.2, width = 0.25,
    show.legend = FALSE
  ) +
  geom_signif(
    comparisons = list(c(8,1)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 21,
    tip_length = c(0.1, 0.05),
    color = 'steelblue'
  ) +
  geom_signif(
    comparisons = list(c(8,7)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 20,
    tip_length = 0.05,
    color = 'tomato'
  ) +
  geom_signif(
    comparisons = list(c(4,5)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 20,
    tip_length = 0.05,
    color = 'tomato'
  ) +
  geom_signif(
    comparisons = list(c(4,8)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 22,
    tip_length = c(0.1, 0.05),
    color = 'forestgreen'
  ) +
  geom_text(
    data = marks %>%
      filter(year != "2017-2018") %>% 
      group_by(year, group) %>%
      mutate(n=sum(!is.na(marks))) %>%
      select(group, year, n) %>%
      unique() %>%
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic',
    size = 3*scale
  ) +
scale_color_d3() +
  scale_fill_d3() +
  facet_wrap(~year) +
  rescale.theme(scale) +
  labs(x = 'group', y = 'mark')
Distribution of marks per group and per year.

Distribution of marks per group and per year.

This group effect is not constant from year to year (e.g., the difference between groups 7 and 8 is only visible in 2020-2021), so our pseudo-randomisation is sufficient to remove any potential systematic bias.

marks %>% 
  filter(year != "2017-2018") %>% 
  ggplot(aes(y = marks, x = group, group = group)) +
  geom_boxplot(
    aes(fill = group), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_jitter(
    aes(color = group), 
    alpha = 0.2, width = 0.25,
    show.legend = FALSE
  ) +
  geom_signif(
    comparisons = list(c(8,1)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 21,
    tip_length = c(0.1, 0.05),
    color = 'steelblue'
  ) +
  geom_signif(
    comparisons = list(c(8,7)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 20,
    tip_length = 0.05,
    color = 'tomato'
  ) +
  geom_signif(
    comparisons = list(c(4,5)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 20,
    tip_length = 0.05,
    color = 'tomato'
  ) +
  geom_signif(
    comparisons = list(c(4,8)),
    map_signif_level = TRUE, textsize = 4,
    y_position = 22,
    tip_length = c(0.1, 0.05),
    color = 'forestgreen'
  ) +
  geom_text(
    data = marks %>%
      filter(year != "2017-2018") %>% 
      group_by(group) %>%
      mutate(n=sum(!is.na(marks))) %>%
      select(group, n) %>%
      unique() %>%
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic',
    size = 3*scale
  ) +
scale_color_d3() +
  scale_fill_d3() +
  rescale.theme(scale) +
  labs(x = 'group', y = 'mark')
Distribution of marks per group across years.

Distribution of marks per group across years.

Analysis

The marks are remarkably similar the first three years. There is a drop of around 0.6 points in 2020-2021, but the median remains the same, suggesting that a few students scored particularly low.

marks %>% 
  group_by(year) %>% 
  summarise(
    median = sprintf("%.1f", median(marks, na.rm = TRUE)),
    mean = sprintf("%.1f", mean(marks, na.rm = TRUE)),
    sd = sprintf("%.1f", sd(marks, na.rm = TRUE))
  ) %>% 
  flextable() %>% 
  theme_vanilla() %>% 
  align(align = 'center', part = 'all') %>% 
  bold(j=1) %>% 
  bg(bg = 'white', part = 'all') %>% 
  set_caption("Mean and standard deviation of the marks per year")
marks %>% 
  ggplot(aes(y = marks, x = year, group = year)) +
  geom_boxplot(
    aes(fill = year), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_jitter(
    aes(color = year), 
    alpha = 0.2, width = 0.25,
    show.legend = FALSE
  ) +
  geom_text(
    data = marks %>%
      group_by(year) %>%
      mutate(n=sum(!is.na(marks))) %>%
      select(year, n) %>%
      unique() %>%
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic'
  ) +
  scale_color_d3() +
  scale_fill_d3() +
  rescale.theme() +
  labs(x = '', y = 'mark')
Distribution of marks across all classes per year

Distribution of marks across all classes per year

Marks are fairly homogeneous across all classes, the first two being generally slightly more successful. Classes evaluated more quantitatively (sessions 1 and 2, which are largely based on quantitation results, and 7:IT, which was marked automatically starting from 2019) have the largest distribution widths. Sessions wherein a large portion of the mark depends on qualitative factors seem to be marked quite conservatively, and homogeneously across supervising professor, leading to narrower distributions.

marks %>% 
  group_by(session) %>% 
  summarise(
    mean = sprintf("%.1f", mean(marks, na.rm = TRUE)),
    sd = sprintf("%.1f", sd(marks, na.rm = TRUE))
  ) %>% 
  flextable() %>% 
  theme_vanilla() %>% 
  align(align = 'center', part = 'all') %>% 
  bold(j=1) %>% 
  bg(bg = 'white', part = 'all') %>% 
  set_caption("Mean and standard deviation of the marks per class")
###boxplots yearly average

marks %>% 
  ggplot(aes(y = marks, x = session, group = session)) +
  geom_boxplot(
    aes(fill = session), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_jitter(
    aes(color = session), 
    alpha = 0.2, width = 0.25,
    show.legend = FALSE
  ) +
  geom_text(
    data = marks %>% 
      group_by(session) %>% 
      mutate(n=sum(!is.na(marks))) %>% 
      select(session, n) %>% 
      unique() %>% 
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 2),
    fontface = 'italic',
    scale = 6*scale
  ) +
  scale_color_d3() +
  scale_fill_d3() +
  rescale.theme() +
  labs(x = '', y = 'mark')
Distributions of the marks per year

Distributions of the marks per year

In 2019-2020, three classes were canceled due to covid, and the last session was conducted asynchronously, online, and only by a fraction of the students who volunteered. They also had to work alone (but were not forbidden to communicate with each others) rather than in pairs, which was the standard procedure pre-covid. They had at their disposal an assistance chat for technical issues, and a forum for questions related to the actual course content.

IT.covid <- marks %>% 
  filter(year == '2019-2020') %>% 
  select(id, marks) %>% 
  left_join(
    marks.0 %>% 
      filter(year == '2019-2020') %>% 
      group_by(id) %>% 
      #select students that volunteered for the IT session
      mutate(IT = if_else(is.na(marks[session == '7:IT']), 'no', 'yes')) ,
    by = c('id', 'marks')
  ) 

IT.covid %>% 
  filter(session != "7:IT") %>% 
  ggplot(aes(y = marks, x = IT, group = IT)) +
  geom_violin(
    aes(fill = IT), 
    color = 'black', alpha = 0.5,
    outlier.size = 0, outlier.alpha = 0,
    show.legend = FALSE
  ) +
  geom_text(
    data = marks.0 %>% 
      filter(year == '2019-2020') %>% 
      group_by(id) %>% 
      #select students that volunteered for the IT session
      mutate(IT = if_else(is.na(marks[session == '7:IT']), 'no', 'yes')) %>%
      filter(session == "7:IT") %>% 
      group_by(IT) %>%
      mutate(
        n=if_else(IT=='yes', sum(!is.na(marks)), sum(is.na(marks)))
      ) %>%
      select(IT, n) %>%
      unique() %>%
      mutate(label = paste0('n = ', n)),
    aes(label = label, y = 4.5),
    fontface = 'italic'
  ) +
  scale_y_continuous(limits = c(4, 22)) +
  scale_color_d3() +
  scale_fill_d3() +
  rescale.theme() +
  labs(x = '', y = 'mark') +
  geom_signif(
    comparisons = list(c("yes", "no")),
    map_signif_level = TRUE, textsize = 6
  )
Mark distributions for students who volunteered ("yes") or not ("no") to the online IT session in 2019-2020.

Mark distributions for students who volunteered (“yes”) or not (“no”) to the online IT session in 2019-2020.

IT.covid <- IT.covid %>% 
  #scores from IT excluded for fair comparison of samples
  filter(session != '7:IT')

#two-sample t-test
##sample 1: volunteers
##sample 2: not volunteers
ttest.no.IT <- t.test(
  x = IT.covid %>% filter(IT == 'yes') %>% ungroup() %>%  select(marks) %>% filter(!is.na(marks)),
  y = IT.covid %>% filter(IT == 'no') %>% ungroup() %>%  select(marks) %>% filter(!is.na(marks)),
  conf.level = 0.95
) 

IT.covid %>%
  group_by(IT) %>%
  summarise(
    median = sprintf("%.2f", median(marks, na.rm = TRUE)),
    mean = sprintf("%.2f", mean(marks, na.rm = TRUE)),
    sd = sprintf("%.2f", sd(marks, na.rm = TRUE))
  ) %>%
  mutate(
    t.statistic = sprintf("%.2f", ttest.no.IT$statistic),
    df = sprintf("%.2f", ttest.no.IT$parameter),
    p.value.no.IT = sprintf("%.3f", ttest.no.IT$p.value),
    mean.dif = sprintf("%.2f", ttest.no.IT$estimate[1]-ttest.no.IT$estimate[2]),
    conf = paste0(sprintf("%.1f", ttest.no.IT$conf.int[1]), '-', sprintf("%.2f", ttest.no.IT$conf.int[2]))
  ) %>%
  flextable() %>%
  merge_v(j = 5:9) %>%
  set_header_labels(
    'IT' ="Volunteers",
    "t.statistic" ="t-statistic",
    'df' ="degrees-of-freedom",
    "p.value.no.IT" ="p value",
    'mean.dif' ="mean difference",
    'conf' = "confidence interval (95%)"
  ) %>%
  theme_vanilla()%>%
  bg(bg = 'white', part = 'all') %>% 
  set_caption("Statistical significance of the mean score between students who volunteered, or not, for the online IT session during the 2019-2020 academic year")

Despite these more challenging conditions (also accounting for the various consequences of covid), students scored similarly to the previous years. Possible explanations for this include i) a slight adaptation of the marking in their favor and ii) volunteers having scored higher in previous classes, on average, than students that opted out. Although relatively limited in size (0.3-0.96 with a 95% confidence), this difference in score is statistically significant.

In 2020-2021, the ‘IT’ class went back to face-to-face but, due to covid restrictions, students worked alone rather than in pairs, which lead to some poor results for a number of them. It is possible that some of these students usually rely on their lab partners. As a result, the IT session of 2020-2021 records both the lowest average mark and largest standard deviation of the dataset.

marks %>% 
  group_by(session, year) %>% 
  summarise(
    mean = mean(marks, na.rm = TRUE),
    sd = sd(marks, na.rm = TRUE)
  ) %>% 
  mutate(
    mean = if_else(mean == 'NaN', 'cancelled', sprintf("%.1f", mean)),
    sd = if_else(is.na(sd), 'cancelled', sprintf("%.1f", sd))
  ) %>% 
  pivot_wider(
    id_cols = 1:4,
    names_from = c(session),
    values_from = c(mean, sd)
  ) %>% 
  flextable() %>% 
  set_header_labels(
    "mean_1:sol" = '1:sol',
    "mean_2:sol" = '2:sol',
    "mean_3:UV" = '3:UV',
    "mean_4:HPLC" = '4:HPLC',
    "mean_5:GC" = '5:GC',
    "mean_6:LLE" = '6:LLE',
    "mean_7:IT" = '7:IT',
    "sd_1:sol" = '1:sol',
    "sd_2:sol" = '2:sol',
    "sd_3:UV" = '3:UV',
    "sd_4:HPLC" = '4:HPLC',
    "sd_5:GC" = '5:GC',
    "sd_6:LLE" = '6:LLE',
    "sd_7:IT" = '7:IT'
  ) %>% 
  add_header_row(values = c('', 'Mean', 'Standard deviation'), colwidths = c(1, 7, 7)) %>% 
  theme_vanilla() %>% 
  bg(bg = 'white', part = 'all') %>% 
  vline(j = c(1, 8)) %>% 
  align(align = 'center', part = 'all') %>% 
  bold(part = "header") %>% 
  color(i = 4, j = c(8,15), color = 'tomato') %>% 
  bold(i = 4, j = c(8,15)) %>% 
  color(i = 3, j = c(8,15), color = 'steelblue') %>% 
  bold(i = 3, j = c(8,15)) %>% 
  italic(i = 3, j = c(5:7, 12:14)) %>% 
  merge_h(i = 3) %>% 
  set_caption("Mean and standard deviation of the marks per year and class")

Time evolution

The animation below highlights:

  • Globally homogeneous scores across classes and time,
  • More forgiving marking for sessions 3–6 that occurred the week before a hard lock-down was put into place (small sample size),
  • Slightly better marks for the IT session in 2019-2020, only performed by opt-in students,
  • Larger amounts of low scores in 2020-2021, in particular for (but not limited to) the IT session.
# animation
animate(
  marks %>%
    group_by(session, year) %>%
    mutate(rsd = sd(marks, na.rm = TRUE)/mean(marks, na.rm = TRUE)) %>%
    ungroup() %>%
    ggplot(aes(y = marks, x = session, group = session)) +
    geom_violin(
      aes(fill = rsd),
      color = 'black', alpha = 0.8,
      show.legend = FALSE
    ) +
    geom_text(
      data = data.frame(
        year = rep('2019-2020', 5),
        session = c('3:UV', '4:HPLC', '5:GC', '6:LLE', '7:IT'),
        label = c(rep("partially\ncanceled", 4), 'only\nvolunteers')
      ),
      aes(label = label, y = 21, x = session),
      fontface = 'italic',
      size = 3*scale
    ) +
    geom_text(
      data = marks %>%
        group_by(year, session) %>%
        mutate(n=sum(!is.na(marks))) %>%
        select(session, year, n) %>%
        unique() %>%
        mutate(label = paste0('n = ', n)),
      aes(label = label, x = session, y = 3),
      fontface = 'italic',
      size = 3*scale
    ) +
    scale_fill_viridis_c("RSD") +
    scale_y_continuous(limits = c(2, 22)) +
    rescale.theme() +
    labs(
      x = '', y = 'mark',
      title = 'Academic year: {closest_state}'
    ) +
    transition_states(
      year,
      transition_length = 2,
      state_length = 1
    ) +
    enter_fade() +
    exit_shrink() +
    ease_aes('sine-in-out'),
  res = 300,
  width = 1800,
  height = 1200
)
Evolution of the mark distribution per class across time, colored by relative standard deviation.

Evolution of the mark distribution per class across time, colored by relative standard deviation.