-
Notifications
You must be signed in to change notification settings - Fork 1
/
compute_normalization_stats.wdl
169 lines (137 loc) · 5.34 KB
/
compute_normalization_stats.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
version 1.0
import "./run_sims.wdl"
import "./tasks.wdl"
# * workflow compute_normalization_stats_wf
workflow compute_normalization_stats_wf {
meta {
description: "Computes stats (means and stds) needed to normalize each component score. For each component score, for each selected pop or combo of (selected pop, alternate pop), computes that component score for SNPs in neutral hapsets, then computes the mean and std of that score for SNPs within each frequency bin."
}
parameter_meta {
# ** inputs
modelId: "(String) Id of the demographic model; used in naming output files."
hapsets_per_block: "(Int) Number of hapsets to process together when computing component scores"
# ** outputs
}
input {
String modelId
PopsInfo pops_info
Array[File] neut_sim_region_haps_tar_gzs
File compute_components_script = "./remodel_components.py"
Int n_bins_ihs = 20
Int n_bins_nsl = 20
Int n_bins_delihh = 20
Int hapset_block_size = 2
Int threads = 1
Int mem_base_gb = 0
Int mem_per_thread_gb = 1
Int local_disk_gb = 50
String docker = "quay.io/ilya_broad/cms@1sha256:fc4825edda550ef203c917adb0b149cbcc82f0eeae34b516a02afaaab0eceac6" # selscan=1.3.0a09
Int preemptible
ComputeResources compute_resources_for_compute_one_pop_cms2_components = object {
mem_gb: 4,
cpus: 1,
local_storage_gb: 50
}
ComputeResources compute_resources_for_compute_two_pop_cms2_components = object {
mem_gb: 4,
cpus: 1,
local_storage_gb: 50
}
} # end: input
Int n_bins_ihh12 = 1
Int n_bins_xpehh = 1
Array[Pop] pops = pops_info.pops
Int n_pops = length(pops)
Int n_hapset_blocks = length(neut_sim_region_haps_tar_gzs) / hapset_block_size
scatter(hapset_block_num in range(n_hapset_blocks)) {
scatter(hapset_block_offset in range(hapset_block_size)) {
Int idx = hapset_block_num * hapset_block_size + hapset_block_offset
File neut_hapset_haps_tar_gzs_in_block = neut_sim_region_haps_tar_gzs[idx]
}
}
scatter(sel_pop in pops) {
scatter(hapsets_block in neut_hapset_haps_tar_gzs_in_block) {
call tasks.compute_one_pop_cms2_components as compute_one_pop_cms2_components_for_neutral {
input:
sel_pop=sel_pop,
region_haps_tar_gzs=hapsets_block,
script=compute_components_script,
compute_resources=compute_resources_for_compute_one_pop_cms2_components,
docker=docker,
preemptible=preemptible
}
}
# **** Compute normalization stats for one-pop components for neutral sims
call tasks.compute_one_pop_bin_stats_for_normalization {
input:
out_fnames_base = modelId + "__selpop_" + sel_pop.pop_id,
sel_pop=sel_pop,
ihs_out=flatten(compute_one_pop_cms2_components_for_neutral.ihs),
nsl_out=flatten(compute_one_pop_cms2_components_for_neutral.nsl),
ihh12_out=flatten(compute_one_pop_cms2_components_for_neutral.ihh12),
delihh_out=flatten(compute_one_pop_cms2_components_for_neutral.delihh),
n_bins_ihs=n_bins_ihs,
n_bins_nsl=n_bins_nsl,
n_bins_ihh12=n_bins_ihh12,
n_bins_delihh=n_bins_delihh,
threads=1,
mem_base_gb=64,
mem_per_thread_gb=0,
local_disk_gb=local_disk_gb,
docker=docker,
preemptible=preemptible
} # end: call tasks.compute_one_pop_bin_stats_for_normalization
} # end: scatter(sel_pop in pops)
# **** Compute two-pop CMS2 components for neutral sims
scatter(sel_pop_idx in range(n_pops)) {
scatter(alt_pop_idx in range(n_pops)) {
if (alt_pop_idx > sel_pop_idx) {
scatter(hapsets_block in neut_hapset_haps_tar_gzs_in_block) {
call tasks.compute_two_pop_cms2_components as compute_two_pop_cms2_components_for_neutral {
input:
sel_pop=pops[sel_pop_idx],
alt_pop=pops[alt_pop_idx],
region_haps_tar_gzs=hapsets_block,
script=compute_components_script,
compute_resources=compute_resources_for_compute_two_pop_cms2_components,
docker=docker,
preemptible=preemptible
}
}
call tasks.compute_two_pop_bin_stats_for_normalization {
input:
out_fnames_base = modelId,
sel_pop=pops[sel_pop_idx],
alt_pop=pops[alt_pop_idx],
xpehh_out=flatten(compute_two_pop_cms2_components_for_neutral.xpehh),
n_bins_xpehh=n_bins_xpehh,
threads=1,
mem_base_gb=64,
mem_per_thread_gb=0,
local_disk_gb=local_disk_gb,
docker=docker,
preemptible=preemptible
}
}
}
}
scatter(sel_pop_idx in range(n_pops)) {
scatter(alt_pop_idx in range(n_pops)) {
if (alt_pop_idx != sel_pop_idx) {
File norm_bins_xpehh_maybe =
select_first([
compute_two_pop_bin_stats_for_normalization.norm_bins_xpehh[sel_pop_idx][alt_pop_idx],
compute_two_pop_bin_stats_for_normalization.norm_bins_flip_pops_xpehh[alt_pop_idx][sel_pop_idx]
])
}
}
Array[File] norm_bins_xpehh_vals = select_all(norm_bins_xpehh_maybe)
} # end: scatter(sel_pop_idx in range(length(pops)))
output {
Array[File] norm_bins_ihs=compute_one_pop_bin_stats_for_normalization.norm_bins_ihs
Array[File] norm_bins_nsl=compute_one_pop_bin_stats_for_normalization.norm_bins_nsl
Array[File] norm_bins_ihh12=compute_one_pop_bin_stats_for_normalization.norm_bins_ihh12
Array[File] norm_bins_delihh=compute_one_pop_bin_stats_for_normalization.norm_bins_delihh
Array[Array[File]] norm_bins_xpehh = norm_bins_xpehh_vals
}
}