3 Differential expression analysis
Having defined our statistical model, we will proceed with the differential expression analysis, using the limma package. The voom function transforms count data to log2-counts per million. From this, the mean-variance relationship is used to compute observation level weights.
library("limma")
vGene <- voom(dge, mod, plot = TRUE)
Our next step is calculating different statistics, to evaluate which genes are differentially expressed in different circumstances. For this, we will use the eBayes function. Looking back at what each coefficient represents:
colnames(mod)## [1] "(Intercept)" "age" "subtypeBasal" "subtypeHer2" "subtypeLumA" "subtypeLumB" "tumor_stage"
We will look at the genes differentially expressed with respect to coefficients 3 (Basal), 4 (Her2), 5 (LumA) and 6 (LumB), to look at the genes associated with the different subtypes. We can predict that these genes are among the ones used to distinguish the subtypes.
# Empirical Bayes Statistics for Differential Expression
eb_results <- eBayes(lmFit(vGene))3.1 Normal tissue
de_results_normal <- topTable(
eb_results,
coef = 1,
number = nrow(rse_BRCA),
sort.by = "none"
)
# Number of differentially expressed genes
table(de_results_normal$adj.P.Val < 0.05)##
## FALSE TRUE
## 1865 62972
When compared to cancer, most genes are differentially expressed.
3.2 Basal subtype
de_results_basal <- topTable(
eb_results,
coef = 3,
number = nrow(rse_BRCA),
sort.by = "none"
)
# Number of differentially expressed genes
table(de_results_basal$adj.P.Val < 0.05)##
## FALSE TRUE
## 22277 42560
65% of genes are differentially expressed.
# Visualizing as volcano plot
volcanoplot(eb_results, coef = 3, highlight = 3, names = de_results_basal$gene_name)
# Highlighted genes
de_results_basal$gene_name[rank(de_results_basal$adj.P.Val) < 4]## [1] "CDCA8" "CAVIN2" "HJURP"
3.3 Her2 subtype
de_results_her2 <- topTable(
eb_results,
coef = 4,
number = nrow(rse_BRCA),
sort.by = "none"
)
# Number of differentially expressed genes
table(de_results_her2$adj.P.Val < 0.05)##
## FALSE TRUE
## 25519 39318
60% of genes are differentially expressed.
# Visualizing as volcano plot
volcanoplot(eb_results, coef = 4, highlight = 3, names = de_results_her2$gene_name)
# Highlighted genes
de_results_her2$gene_name[rank(de_results_her2$adj.P.Val) < 4]## [1] "RRM2" "HJURP" "KIF4A"
3.4 LumA subtype
de_results_luma <- topTable(
eb_results,
coef = 5,
number = nrow(rse_BRCA),
sort.by = "none"
)
# Number of differentially expressed genes
table(de_results_luma$adj.P.Val < 0.05)##
## FALSE TRUE
## 34262 30575
47% of genes are differentially expressed.
# Visualizing as volcano plot
volcanoplot(eb_results, coef = 5, highlight = 3, names = de_results_luma$gene_name)
# Highlighted genes
de_results_luma$gene_name[rank(de_results_luma$adj.P.Val) < 4]## [1] "LYVE1" "VEGFD" "DMD"
3.5 LumB subtype
de_results_lumb <- topTable(
eb_results,
coef = 6,
number = nrow(rse_BRCA),
sort.by = "none"
)
# Number of differentially expressed genes
table(de_results_lumb$adj.P.Val < 0.05)##
## FALSE TRUE
## 23569 41268
63% of genes are differentially expressed.
# Visualizing as volcano plot
volcanoplot(eb_results, coef = 6, highlight = 3, names = de_results_lumb$gene_name)
# Highlighted genes
de_results_lumb$gene_name[rank(de_results_lumb$adj.P.Val) < 4]## [1] "PAMR1" "CAVIN2" "DMD"