nextflow_schema.json

{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
    "title": "treangenlab/wf-emu",
    "workflow_title": "Emu workflow",
    "description": "Taxonomic classification of single reads from 16S rDNA sequencing.",
    "demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-16s/wf16s-demo.tar.gz",
    "aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-16s/wf-16s-demo/aws.nextflow.config",
    "url": "https://gitlab.com/treangenlab/wf-emu",
    "type": "object",
    "definitions": {
        "input_options": {
            "title": "Input Options",
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Define where the pipeline should find input data and save output data.",
            "properties": {
                "fastq": {
                    "type": "string",
                    "format": "path",
                    "title": "FASTQ",
                    "description": "FASTQ files to use in the analysis.",
                    "help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`.",
                    "demo_data": "test_data"
                },
                "analyse_unclassified": {
                    "type": "boolean",
                    "default": false,
                    "title": "Analyse unclassified reads",
                    "description": "Analyse unclassified reads from input directory. By default the workflow will not process reads in the unclassified directory.",
                    "help_text": "If selected and if the input is a multiplex directory the workflow will also process the unclassified directory."
                }
            },
            "required": [
                "fastq"
            ]
        },
        "sample_options": {
            "title": "Sample Options",
            "type": "object",
            "default": "",
            "properties": {
                "sample_sheet": {
                    "type": "string",
                    "format": "file-path",
                    "title": "Sample sheet",
                    "description": "A CSV file used to map barcodes to sample aliases. The sample sheet can be provided when the input data is a directory containing sub-directories with FASTQ files. Disabled in the real time pipeline.",
                    "help_text": "The sample sheet is a CSV file with, minimally, columns named `barcode` , `sample_id`,  `alias`. Extra columns are allowed. A `type` column is required for certain workflows and should have the following values; `test_sample`, `positive_control`, `negative_control`, `no_template_control`."
                },
                "sample": {
                    "type": "string",
                    "title": "Sample name",
                    "description": "A single sample name for non-multiplexed data. Permissible if passing a single .fastq(.gz) file or directory of .fastq(.gz) files. Disabled in the real time pipeline."
                }
            },
            "description": "Parameters that relate to samples such as sample sheets and sample names."
        },
        "output_options": {
            "title": "Output Options",
            "type": "object",
            "description": "Parameters for saving and naming workflow outputs.",
            "default": "",
            "properties": {
                "out_dir": {
                    "type": "string",
                    "format": "directory-path",
                    "default": "output",
                    "title": "Output folder name",
                    "description": "Directory for output of all user-facing files."
                }
            }
        },
        "emu_advanced_options": {
            "title": "Advanced Emu Options",
            "type": "object",
            "description": "Advanced options for configuring emu.",
            "default": "",
            "properties": {
                "database_set": {
                    "type": "string",
                    "default": "emu",
                    "title": "Database",
                    "description": "Set the reference database. Choices: ['emu', 'silva', 'rdp', 'unite-all', 'unite-fungi']",
                    "enum": [
                        "emu",
                        "silva",
                        "rdp",
                        "unite-all",
                        "unite-fungi"
                    ]
                },
                "min_abundance": {
                    "type": "number",
                    "default": 0.0001,
                    "title": "Minimum species abundance",
                    "description": "Minimum species abundance in the results. Default: 0.0001."
                },
                "K": {
                    "type": "integer",
                    "default": 500000000,
                    "description": "minibatch size for minimap2 mapping."
                },
                "num_alignments": {
                    "type": "integer",
                    "default": 50,
                    "description": "minimap max number of secondary alignments per read.",
                    "hidden": true
                },
                "threads": {
                    "type": "integer",
                    "default": 2,
                    "description": "Number of CPU threads used for classifying reads.",
                    "help_text": "For the real-time kraken2 workflow, this is the number of CPU threads used by the kraken2 server (and the number of clients sending reads to the server). For the minimap2 workflow, it is the number of CPU threads used by minimap2. For the real-time kraken2 workflow the `threads` parameter should not be set to no more than 4 fewer than the executor CPU limit."
                },
                "rank": {
                    "type": "string",
                    "default": "species",
                    "description": "Combined table will only include all ranks above the specified rank according to this list: tax_id, species, genus, family, order, class, phylum, superkingdom."
                },
                "split_tables": {
                    "type": "boolean",
                    "default": false,
                    "description": "Output 2 tables: (1) abundances only at specified rank and (2) taxonomic lineages down to specified rank."
                },
                "counts": {
                    "type": "boolean",
                    "default": false,
                    "description": "Output estimated counts rather than relative abundance percentage in combined table. Only includes Emu relative abundance outputs that already have estimated counts."
                }
            }
        },
        "advanced_options": {
            "title": "Advanced Options",
            "type": "object",
            "description": "Advanced options for configuring processes inside the workflow.",
            "default": "",
            "properties": {
                "min_len": {
                    "type": "integer",
                    "default": 0,
                    "title": "Minimum read length",
                    "description": "Specify read length lower limit.",
                    "help_text": "Any reads shorter than this limit will not be included in the analysis."
                },
                "min_read_qual": {
                    "type": "number",
                    "title": "Minimum read quality",
                    "description": "Specify read quality lower limit.",
                    "help_text": "Any reads with a quality lower than this limit will not be included in the analysis."
                },
                "max_len": {
                    "type": "integer",
                    "title": "Maximum read length",
                    "description": "Specify read length upper limit",
                    "help_text": "Any reads longer than this limit will not be included in the analysis."
                },
                "threads": {
                    "type": "integer",
                    "default": 4,
                    "title": "Maximum number of CPU threads",
                    "description": "Maximum number of CPU threads to use per workflow task.",
                    "help_text": "Several tasks in this workflow benefit from using multiple CPU threads. This option sets the number of CPU threads for all such processes. The total CPU resource used by the workflow is constrained by the executor configuration. See server threads parameter for kraken specific threads in the real_time pipeline."
                }
            }
        },
        "misc": {
            "title": "Miscellaneous Options",
            "type": "object",
            "description": "Everything else.",
            "default": "",
            "properties": {
                "disable_ping": {
                    "type": "boolean",
                    "default": false,
                    "description": "Enable to prevent sending a workflow ping."
                },
                "help": {
                    "type": "boolean",
                    "description": "Display help text.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true
                },
                "version": {
                    "type": "boolean",
                    "description": "Display version and exit.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/definitions/input_options"
        },
        {
            "$ref": "#/definitions/sample_options"
        },
        {
            "$ref": "#/definitions/output_options"
        },
        {
            "$ref": "#/definitions/emu_advanced_options"
        },
        {
            "$ref": "#/definitions/advanced_options"
        },
        {
            "$ref": "#/definitions/misc"
        }
    ],
    "properties": {
        "process_label": {
            "type": "string",
            "description": "The main process label for emu processes to use by default",
            "hidden": true,
            "default": "wf-emu"
        },
        "aws_image_prefix": {
            "type": "string",
            "hidden": true
        },
        "aws_queue": {
            "type": "string",
            "hidden": true
        },
        "monochrome_logs": {
            "type": "boolean"
        },
        "validate_params": {
            "type": "boolean",
            "default": true
        },
        "show_hidden_params": {
            "type": "boolean"
        }
    },
    "docs": {
        "intro": "## Introduction\n\nThis section of documentation typically contains an overview of the workflow in terms of motivation\nand bioinformatics methods, listing any key tools or algorithms employed, whilst also describing its\nrange of use-cases and what a suitable input dataset should look like.\n\n",
        "links": "## Useful links\n\n* [nextflow](https://www.nextflow.io/)\n* [docker](https://www.docker.com/products/docker-desktop)\n* [conda](https://docs.conda.io/en/latest/miniconda.html)\n"
    }
}