Workshop Description

The topics presented in this workshop will be

Pre-requisites

Recommended Background Reading Introduction to R for Biologists

Workshop Participation

R / Bioconductor packages used

  • tidyverse
  • tidybulk
  • tidyHeatmap
  • edgeR
  • ggrepel
  • airway

Time outline

Activity Time

Workshop goals and objectives

Learning goals

  • To …
  • To …

Learning objectives

Acknowledgements

This material is adapted from an R for RNA sequencing workshop first run here.

Introduction

Gate + gene enrichment

# Setup data frame
tt <-

  # Load dataset
  cellsig::counts %>%
  tidybulk(sample, symbol, count) %>%

  # Group by level because otherwise samples are duplicated
  nest(data = -level) %>%

  # Redefine factors inside each level
  mutate(data = future_map(data, ~ droplevels(.x))) %>%

  # Fill missing data. There are many genes that
  # are not shared by the majority of samples
  mutate(data = future_map(data, ~ fill_missing_abundance(.x, fill_with = 0))) %>%

  # Scale for future PCA plotting
  mutate(data = future_map(data, ~ .x %>% identify_abundant() %>% scale_abundance()))

get_constrasts_from_df = function(.data){
  .data %>%

    distinct(cell_type) %>%

    # Permute
    mutate(cell_type2 = cell_type) %>%
    expand(cell_type, cell_type2) %>%
    filter(cell_type != cell_type2) %>%

    # Create contrasts
    mutate(contrast = sprintf("cell_type%s - cell_type%s", cell_type, cell_type2)) %>%
    pull(contrast)

}

select_markers_for_each_contrast = function(.data){
  .data %>%

    # Group by contrast. Comparisons both ways.
    pivot_longer(
      cols = contains("___"),
      names_to = c("stats", "contrast"),
      values_to = ".value",
      names_sep="___"
    ) %>%

    # Markers selection
    nest(stat_df = -contrast) %>%

    # Reshape inside each contrast
    mutate(stat_df = map(stat_df, ~.x %>% pivot_wider(names_from = stats, values_from = .value))) %>%

    # Rank
    mutate(stat_df = map(stat_df, ~.x %>%
                           filter(FDR < 0.05 & abs(logFC) > 2) %>%
                           filter(logCPM > mean(logCPM)) %>%
                           arrange(logFC %>% desc()) %>%
                           slice(1:10)
    )) %>%
    unnest(stat_df)
}


all_contrasts <-
  tt %>%

  # Investigate one level
  filter(level==1) %>%

  # Differential transcription
  mutate(markers = map(
    data,
    ~ test_differential_abundance(.x,
        ~ 0 + cell_type,
        .contrasts = get_constrasts_from_df(.x),
        action="only"
      )

  )) %>%

  # Select rank from each contrast
  mutate(markers = map( markers, ~ .x %>% select_markers_for_each_contrast)) %>%

  # Add marker info to original data
  mutate(markers = map2(markers, data, ~ left_join(.x, .y))) %>%
  select(markers) %>%
  unnest(markers) %>%

  # make contrasts pretty
  mutate(contrast_pretty = str_replace(contrast, "cell_type", "") %>% str_replace("cell_type", ""))


# Plot Markers
all_contrasts %>%
  ggplot(aes(cell_type, count_scaled + 1, color=contrast_pretty)) +
  geom_point(size = 0.5) +
  facet_wrap(~contrast_pretty + symbol, scale="free_y") +
  scale_y_log10() +
  theme_bw() +
  theme(
    text = element_text(size=6),
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)
  )

# Plot PCA
all_contrasts %>%
  distinct(sample, symbol, count_scaled,cell_type) %>%
  nanny::reduce_dimensions(sample, symbol, count_scaled,  method = "PCA", action="get", transform = log1p) %>%
  ggplot(aes(x = PC1, y = PC2, colour = cell_type)) +
  geom_point() +
  theme_bw()

Key Points

Exercises

Questions:

Contributing

If you want to suggest improvements for this workshop or ask questions, you can do so as described here.

Reproducibility

Record package and version information with sessionInfo

sessionInfo()
#> R version 4.0.2 Patched (2020-08-06 r78974)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04 LTS
#> 
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> loaded via a namespace (and not attached):
#>  [1] rprojroot_1.3-2  digest_0.6.25    crayon_1.3.4     assertthat_0.2.1
#>  [5] MASS_7.3-51.6    R6_2.4.1         backports_1.1.8  magrittr_1.5    
#>  [9] evaluate_0.14    stringi_1.4.6    rlang_0.4.7      fs_1.5.0        
#> [13] rmarkdown_2.3    pkgdown_1.5.1    desc_1.2.0       tools_4.0.2     
#> [17] stringr_1.4.0    yaml_2.2.1       xfun_0.16        compiler_4.0.2  
#> [21] memoise_1.1.0    htmltools_0.5.0  knitr_1.29

References