library(recount)
## Warning: package 'MatrixGenerics' was built under R version 4.1.1
## Warning: package 'matrixStats' was built under R version 4.1.3
## Warning: package 'S4Vectors' was built under R version 4.1.1
## Warning: package 'GenomeInfoDb' was built under R version 4.1.1
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.1.1
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.1.3
library(tidyverse)
library(Homo.sapiens)
## Warning: package 'GenomicFeatures' was built under R version 4.1.1
### Download the RangedSummarizedExperiment object at the gene level for TCGA melanoma data.
url <-'https://wimr-genomics.vip.sydney.edu.au/AMED3002/data/rse_gene_skin2.txt'
### View the url for the file by printing the object url
# url
### Download the file
download.file(url = url, destfile = "rse_gene_skin.Rdata", method = "auto", mode = "wb")

### Load the data
load("rse_gene_skin.Rdata")


rse_gene
## class: RangedSummarizedExperiment 
## dim: 58037 473 
## metadata(0):
## assays(1): counts
## rownames(58037): ENSG00000000003.14 ENSG00000000005.5 ...
##   ENSG00000283698.1 ENSG00000283699.1
## rowData names(3): gene_id bp_length symbol
## colnames(473): 72189C87-53DD-427E-B163-91E404577A7A
##   27496AAB-AC06-4100-BB41-0029F63CC678 ...
##   D868AC1A-11DB-4A07-BF2F-1429FA197F7B
##   4AFBA9FB-3DBA-4271-B20D-40580B89D09E
## colData names(864): project sample ...
##   xml_primary_pathology_history_myasthenia_gravis
##   xml_primary_pathology_section_myasthenia_gravis
counts <- assay(rse_gene)
genes <- rowData(rse_gene)
samples <- as.data.frame(colData(rse_gene))
# Week 7
TCGA <- DGEList(counts = counts, samples = samples, genes = genes)
TCGA <- calcNormFactors(TCGA, method = "TMM")
keep <- filterByExpr(TCGA)
## Warning in filterByExpr.DGEList(TCGA): All samples appear to belong to the same
## group.
TCGA <- TCGA[keep,]
cpm <- cpm(TCGA)
lcpm <- cpm(TCGA, log=TRUE)
pca <- prcomp(t(lcpm), scale = TRUE)
autoplot(pca, data = samples, colour = "cgc_case_gender")

# Week 8
design <- model.matrix(~ cgc_case_gender, samples)
v <- voom(TCGA, design)
fit <- lmFit(v, design)
efit <- eBayes(fit)
dt_fdr <- decideTests(efit, adjust.method = "fdr", p.value = 0.05)
summary(dt_fdr)
##        (Intercept) cgc_case_genderMALE
## Down         15186                  59
## NotSig         819               33355
## Up           17438                  29
volcanoplot(efit, coef = "cgc_case_genderMALE", highlight = 10, names = efit$genes$symbol)

# Week 9
load(url("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata"))
symbol <- unlist(lapply(v$genes$symbol,function(x)x[1])) #Unfortunately there are more than gene symbol per gene.
ENTREZID <- mapIds(Homo.sapiens, keys=symbol, column=c("ENTREZID"),keytype="SYMBOL", multiVals = "first")
## 'select()' returned 1:many mapping between keys and columns
v$genes$ENTREZID <-ENTREZID
idx <- ids2indices(Hs.c2,id=v$genes$ENTREZID)
cam<- camera(v,idx,design,contrast="cgc_case_genderMALE",trend.var = TRUE)
head(cam,4)
##                                           NGenes Direction       PValue
## RUNNE_GENDER_EFFECT_UP                         9        Up 0.000000e+00
## PYEON_CANCER_HEAD_AND_NECK_VS_CERVICAL_DN     26        Up 2.152953e-74
## DISTECHE_ESCAPED_FROM_X_INACTIVATION          13      Down 1.157596e-34
## ZHANG_TLX_TARGETS_36HR_DN                    185        Up 5.608022e-19
##                                                    FDR
## RUNNE_GENDER_EFFECT_UP                    0.000000e+00
## PYEON_CANCER_HEAD_AND_NECK_VS_CERVICAL_DN 5.090658e-71
## DISTECHE_ESCAPED_FROM_X_INACTIVATION      1.824758e-31
## ZHANG_TLX_TARGETS_36HR_DN                 6.630084e-16