-
Notifications
You must be signed in to change notification settings - Fork 2
/
nextflow_schema.json
242 lines (242 loc) · 11.4 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
"title": "treangenlab/wf-emu",
"workflow_title": "Emu workflow",
"description": "Taxonomic classification of single reads from 16S rDNA sequencing.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-16s/wf16s-demo.tar.gz",
"aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-16s/wf-16s-demo/aws.nextflow.config",
"url": "https://gitlab.com/treangenlab/wf-emu",
"type": "object",
"definitions": {
"input_options": {
"title": "Input Options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"properties": {
"fastq": {
"type": "string",
"format": "path",
"title": "FASTQ",
"description": "FASTQ files to use in the analysis.",
"help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`.",
"demo_data": "test_data"
},
"analyse_unclassified": {
"type": "boolean",
"default": false,
"title": "Analyse unclassified reads",
"description": "Analyse unclassified reads from input directory. By default the workflow will not process reads in the unclassified directory.",
"help_text": "If selected and if the input is a multiplex directory the workflow will also process the unclassified directory."
}
},
"required": [
"fastq"
]
},
"sample_options": {
"title": "Sample Options",
"type": "object",
"default": "",
"properties": {
"sample_sheet": {
"type": "string",
"format": "file-path",
"title": "Sample sheet",
"description": "A CSV file used to map barcodes to sample aliases. The sample sheet can be provided when the input data is a directory containing sub-directories with FASTQ files. Disabled in the real time pipeline.",
"help_text": "The sample sheet is a CSV file with, minimally, columns named `barcode` , `sample_id`, `alias`. Extra columns are allowed. A `type` column is required for certain workflows and should have the following values; `test_sample`, `positive_control`, `negative_control`, `no_template_control`."
},
"sample": {
"type": "string",
"title": "Sample name",
"description": "A single sample name for non-multiplexed data. Permissible if passing a single .fastq(.gz) file or directory of .fastq(.gz) files. Disabled in the real time pipeline."
}
},
"description": "Parameters that relate to samples such as sample sheets and sample names."
},
"output_options": {
"title": "Output Options",
"type": "object",
"description": "Parameters for saving and naming workflow outputs.",
"default": "",
"properties": {
"out_dir": {
"type": "string",
"format": "directory-path",
"default": "output",
"title": "Output folder name",
"description": "Directory for output of all user-facing files."
}
}
},
"emu_advanced_options": {
"title": "Advanced Emu Options",
"type": "object",
"description": "Advanced options for configuring emu.",
"default": "",
"properties": {
"database_set": {
"type": "string",
"default": "emu",
"title": "Database",
"description": "Set the reference database. Choices: ['emu', 'silva', 'rdp', 'unite-all', 'unite-fungi']",
"enum": [
"emu",
"silva",
"rdp",
"unite-all",
"unite-fungi"
]
},
"min_abundance": {
"type": "number",
"default": 0.0001,
"title": "Minimum species abundance",
"description": "Minimum species abundance in the results. Default: 0.0001."
},
"K": {
"type": "integer",
"default": 500000000,
"description": "minibatch size for minimap2 mapping."
},
"num_alignments": {
"type": "integer",
"default": 50,
"description": "minimap max number of secondary alignments per read.",
"hidden": true
},
"threads": {
"type": "integer",
"default": 2,
"description": "Number of CPU threads used for classifying reads.",
"help_text": "For the real-time kraken2 workflow, this is the number of CPU threads used by the kraken2 server (and the number of clients sending reads to the server). For the minimap2 workflow, it is the number of CPU threads used by minimap2. For the real-time kraken2 workflow the `threads` parameter should not be set to no more than 4 fewer than the executor CPU limit."
},
"rank": {
"type": "string",
"default": "species",
"description": "Combined table will only include all ranks above the specified rank according to this list: tax_id, species, genus, family, order, class, phylum, superkingdom."
},
"split_tables": {
"type": "boolean",
"default": false,
"description": "Output 2 tables: (1) abundances only at specified rank and (2) taxonomic lineages down to specified rank."
},
"counts": {
"type": "boolean",
"default": false,
"description": "Output estimated counts rather than relative abundance percentage in combined table. Only includes Emu relative abundance outputs that already have estimated counts."
}
}
},
"advanced_options": {
"title": "Advanced Options",
"type": "object",
"description": "Advanced options for configuring processes inside the workflow.",
"default": "",
"properties": {
"min_len": {
"type": "integer",
"default": 0,
"title": "Minimum read length",
"description": "Specify read length lower limit.",
"help_text": "Any reads shorter than this limit will not be included in the analysis."
},
"min_read_qual": {
"type": "number",
"title": "Minimum read quality",
"description": "Specify read quality lower limit.",
"help_text": "Any reads with a quality lower than this limit will not be included in the analysis."
},
"max_len": {
"type": "integer",
"title": "Maximum read length",
"description": "Specify read length upper limit",
"help_text": "Any reads longer than this limit will not be included in the analysis."
},
"threads": {
"type": "integer",
"default": 4,
"title": "Maximum number of CPU threads",
"description": "Maximum number of CPU threads to use per workflow task.",
"help_text": "Several tasks in this workflow benefit from using multiple CPU threads. This option sets the number of CPU threads for all such processes. The total CPU resource used by the workflow is constrained by the executor configuration. See server threads parameter for kraken specific threads in the real_time pipeline."
}
}
},
"misc": {
"title": "Miscellaneous Options",
"type": "object",
"description": "Everything else.",
"default": "",
"properties": {
"disable_ping": {
"type": "boolean",
"default": false,
"description": "Enable to prevent sending a workflow ping."
},
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input_options"
},
{
"$ref": "#/definitions/sample_options"
},
{
"$ref": "#/definitions/output_options"
},
{
"$ref": "#/definitions/emu_advanced_options"
},
{
"$ref": "#/definitions/advanced_options"
},
{
"$ref": "#/definitions/misc"
}
],
"properties": {
"process_label": {
"type": "string",
"description": "The main process label for emu processes to use by default",
"hidden": true,
"default": "wf-emu"
},
"aws_image_prefix": {
"type": "string",
"hidden": true
},
"aws_queue": {
"type": "string",
"hidden": true
},
"monochrome_logs": {
"type": "boolean"
},
"validate_params": {
"type": "boolean",
"default": true
},
"show_hidden_params": {
"type": "boolean"
}
},
"docs": {
"intro": "## Introduction\n\nThis section of documentation typically contains an overview of the workflow in terms of motivation\nand bioinformatics methods, listing any key tools or algorithms employed, whilst also describing its\nrange of use-cases and what a suitable input dataset should look like.\n\n",
"links": "## Useful links\n\n* [nextflow](https://www.nextflow.io/)\n* [docker](https://www.docker.com/products/docker-desktop)\n* [conda](https://docs.conda.io/en/latest/miniconda.html)\n"
}
}