-
Notifications
You must be signed in to change notification settings - Fork 0
/
EGSEAWorkflow.R
230 lines (188 loc) · 9.33 KB
/
EGSEAWorkflow.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
## ----setup, message=FALSE, echo = FALSE---------------------------------------
library(limma);library(edgeR);library(EGSEAdata);library(EGSEA)
#biocM(c("EGSEAdata","EGSEA"))
## ----collectionlist-----------------------------------------------------------
egsea.data("human")
info = egsea.data("human", returnInfo = TRUE)
names(info)
info$msigdb$info$collections
## ----indexing-----------------------------------------------------------------
gs.annots = buildIdx(entrezIDs=v$genes$ENTREZID, species="human",
msigdb.gsets=c("c2", "c5"), go.part = TRUE)
names(gs.annots)
url = "http://bioinf.wehi.edu.au/EGSEA/mam.rnaseq.rdata"
utils::download.file(url, destfile="mam.rnaseq.rdata", mode="wb")
load("mam.rnaseq.rdata")
names(mam.rnaseq.data)
dim(mam.rnaseq.data)
x = calcNormFactors(mam.rnaseq.data, method = "TMM")
design = model.matrix(~0+x$samples$group+x$samples$lane)
colnames(design) = gsub("x\\$samples\\$group", "", colnames(design))
colnames(design) = gsub("x\\$samples\\$lane", "", colnames(design))
head(design)
contr.matrix = makeContrasts(
BasalvsLP = Basal-LP,
BasalvsML = Basal - ML,
LPvsML = LP - ML,
levels = colnames(design))
head(contr.matrix)
## ----setup2, eval=TRUE--------------------------------------------------------
v = voom(x, design, plot=FALSE)
names(v)
## ----exploresets--------------------------------------------------------------
class(gs.annots$c2)
summary(gs.annots$c2)
show(gs.annots$c2)
s = getSetByName(gs.annots$c2, "SMID_BREAST_CANCER_LUMINAL_A_DN")
class(s)
names(s)
names(s$SMID_BREAST_CANCER_LUMINAL_A_DN)
## ----indexclass---------------------------------------------------------------
slotNames(gs.annots$c2)
## ----symbolmap----------------------------------------------------------------
colnames(v$genes)
symbolsMap = v$genes[, c(1, 2)]
colnames(symbolsMap) = c("FeatureID", "Symbols")
symbolsMap[, "Symbols"] = as.character(symbolsMap[, "Symbols"])
## ----base---------------------------------------------------------------------
egsea.base()
## ----selectbasemethods--------------------------------------------------------
baseMethods = egsea.base()[-2]
baseMethods
## ----combine------------------------------------------------------------------
egsea.combine()
## ----sort---------------------------------------------------------------------
egsea.sort()
## ----egseatest----------------------------------------------------------------
gsa = egsea(voom.results=v, contrasts=contr.matrix,
gs.annots=gs.annots, symbolsMap=symbolsMap,
baseGSEAs=baseMethods, sort.by="med.rank",
num.threads = 16, report = FALSE)
## ----showegsea----------------------------------------------------------------
show(gsa)
## ----summariseegsea-----------------------------------------------------------
summary(gsa)
## ----topsets------------------------------------------------------------------
topSets(gsa, gs.label="c2", contrast = "comparison", names.only=TRUE)
## ----topsetslim---------------------------------------------------------------
t = topSets(gsa, contrast = "comparison",
names.only=FALSE, number = Inf, verbose = FALSE)
t[grep("LIM_", rownames(t)), c("p.adj", "Rank", "med.rank", "vote.rank")]
## ----topsets2-----------------------------------------------------------------
topSets(gsa, gs.label="kegg", contrast="BasalvsLP", sort.by="med.rank")
topSets(gsa, gs.label="kegg", contrast="comparison", sort.by="med.rank")
## ----heatmaps-----------------------------------------------------------------
plotHeatmap(gsa, gene.set="LIM_MAMMARY_STEM_CELL_UP", gs.label="c2",
contrast = "comparison", file.name = "hm_cmp_LIM_MAMMARY_STEM_CELL_UP", format="png")
plotHeatmap(gsa, gene.set="LIM_MAMMARY_STEM_CELL_DN", gs.label="c2",
contrast = "comparison", file.name = "hm_cmp_LIM_MAMMARY_STEM_CELL_DN", format="png")
## ----pathwayplot1, eval=FALSE-------------------------------------------------
# plotPathway(gsa, gene.set = "Vascular smooth muscle contraction",
# contrast = "BasalvsLP", gs.label = "kegg",
# file.name = "Vascular_smooth_muscle_contraction")
## ----pathwayplot2, eval=FALSE-------------------------------------------------
# plotPathway(gsa, gene.set = "Vascular smooth muscle contraction",
# contrast = "comparison", gs.label = "kegg",
# file.name = "Vascular_smooth_muscle_contraction_cmp")
## ----mdsplot------------------------------------------------------------------
plotMethods(gsa, gs.label = "c2", contrast = "BasalvsLP",
file.name = "mds_c2_BasalvsLP", format="png")
plotMethods(gsa, gs.label = "c5BP", contrast = "BasalvsLP",
file.name = "mds_c5_BasalvsLP", format="png")
## ----keggsummaryplot1---------------------------------------------------------
plotSummary(gsa, gs.label = 3, contrast = 3,
file.name = "summary_kegg_LPvsML", format="png")
## ----c2summaryplot2-----------------------------------------------------------
plotSummary(gsa, gs.label = 1, contrast = 3,
file.name = "summary_c2_LPvsML",
x.axis = "med.rank", format="png")
## ----c2summaryplot3-----------------------------------------------------------
plotSummary(gsa, gs.label = 1, contrast = 3,
file.name = "summary_sig_c2_LPvsML",
x.axis = "med.rank", x.cutoff=300, format="png")
## ----summaryplotkegg1and2-----------------------------------------------------
plotSummary(gsa, gs.label = "kegg", contrast = c(1,2),
file.name = "summary_kegg_1vs2", format="png")
## ----gographs-----------------------------------------------------------------
plotGOGraph(gsa, gs.label="c5BP", contrast = 1, file.name="BasalvsLP-c5BP-top-", format="png")
plotGOGraph(gsa, gs.label="c5CC", contrast = 1, file.name="BasalvsLP-c5CC-top-", format="png")
## ----summarybarplot-----------------------------------------------------------
plotBars(gsa, gs.label = "c2", contrast="comparison", file.name="comparison-c2-bars", format="png")
## ----summaryheatmap-----------------------------------------------------------
plotSummaryHeatmap(gsa, gs.label="c2", hm.vals = "avg.logfc.dir",
file.name="summary_heatmaps_c2", format="png")
plotSummaryHeatmap(gsa, gs.label="kegg", hm.vals = "avg.logfc.dir",
file.name="summary_heatmaps_kegg", format="png")
## ----toptable-----------------------------------------------------------------
t = limmaTopTable(gsa, contrast=1)
head(t)
## ----htmlreport, warning=FALSE, eval=FALSE------------------------------------
# generateReport(gsa, number = 20, report.dir="./mam-rnaseq-egsea-report")
## ----mareadidats, eval=TRUE---------------------------------------------------
library(limma)
url = "http://bioinf.wehi.edu.au/EGSEA/arraydata.zip"
utils::download.file(url, destfile="arraydata.zip", mode="wb")
utils::unzip("arraydata.zip", exdir = ".")
targets = read.delim("targets.txt", header=TRUE, sep=" ")
data = read.idat(as.character(targets$File),
bgxfile="GPL6887_MouseWG-6_V2_0_R0_11278593_A.bgx",
annotation=c("Entrez_Gene_ID","Symbol", "Chromosome"))
data$other$Detection = detectionPValues(data)
data$targets = targets
colnames(data) = targets$Sample
## ----manormalize, eval=TRUE---------------------------------------------------
data = neqc(data)
## ----mafilter, eval=TRUE------------------------------------------------------
table(targets$Celltype)
keep.exprs = rowSums(data$other$Detection<0.05)>=5
table(keep.exprs)
data = data[keep.exprs,]
dim(data)
head(data$genes)
sum(is.na(data$genes$Entrez_Gene_ID))
data1 = data[!is.na(data$genes$Entrez_Gene_ID), ]
dim(data1)
ord = order(lmFit(data1)$Amean, decreasing=TRUE)
ids2keep = data1$genes$Array_Address_Id[ord][!duplicated(data1$genes$Entrez_Gene_ID[ord])]
data1 = data1[match(ids2keep, data1$genes$Array_Address_Id),]
dim(data1)
expr = data1$E
group = as.factor(data1$targets$Celltype)
probe.annot = data1$genes[, 2:4]
head(probe.annot)
## ----malinearmodel, eval=TRUE-------------------------------------------------
head(data1$targets)
experiment = as.character(data1$targets$Experiment)
design = model.matrix(~0 + group + experiment)
colnames(design) = gsub("group", "", colnames(design))
design
contr.matrix = makeContrasts(
BasalvsLP = Basal-LP,
BasalvsML = Basal-ML,
LPvsML = LP-ML,
levels = colnames(design))
contr.matrix
## ----maindex, eval=FALSE------------------------------------------------------
# library(EGSEA)
# library(EGSEAdata)
# gs.annots = buildIdx(entrezIDs=probe.annot[, 2],
# species="mouse",
# msigdb.gsets=c("c2", "c5"), go.part = TRUE)
# names(gs.annots)
## ----maegsea, eval=FALSE------------------------------------------------------
# baseMethods = egsea.base()[-2]
# baseMethods
#
# gsam = egsea.ma(expr=expr, group=group,
# probe.annot = probe.annot,
# design = design,
# contrasts=contr.matrix,
# gs.annots=gs.annots,
# baseGSEAs=baseMethods, sort.by="med.rank",
# num.threads = 8, report = FALSE)
## ----mareport, warning=FALSE, eval=FALSE--------------------------------------
# generateReport(gsam, number = 20, report.dir="./mam-ma-egsea-report")
## ----matopsets, eval=FALSE----------------------------------------------------
# topSets(gsam, gs.label="c2", contrast="comparison", names.only=TRUE, number=5)
## ----softwareinfo-------------------------------------------------------------
sessionInfo()