diff --git a/.editorconfig b/.editorconfig index b6b31907..b78de6e6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d8c70ebe..a66a077d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/mhcq - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/mhcquant/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/mhcquant _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/mhcquant/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/mhcquant _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 778a5219..7d4ecaa8 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -25,6 +25,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/mhcquant/results-${{ github.sha }}" } profiles: test_full,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 29b1ee53..52b781f2 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,6 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/mhcquant/results-test-${{ github.sha }}" } profiles: test,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 2759e501..c7b6a4cd 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/mhcquant' run: | - "{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/mhcquant ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]" + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/mhcquant ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets @@ -42,4 +42,3 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false -# diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9a4b30dd..80129b57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none jobs: test: @@ -20,27 +19,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data run: | diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..629e8806 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/mhcquant' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "::set-output name=result::pass" + else + echo "::set-output name=result::fail" + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index e9cf5de3..8a5ce69b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies @@ -78,5 +104,3 @@ jobs: lint_log.txt lint_results.md PR_number.txt - -# diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 91c487a1..04758f61 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -26,4 +26,3 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md -# diff --git a/.nf-core.yml b/.nf-core.yml index 7ca12279..3805dc81 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,5 +1 @@ repository_type: pipeline - -lint: - files_exist: - - conf/igenomes.config diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..eb74a574 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,10 @@ +email_template.html +adaptivecard.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/CHANGELOG.md b/CHANGELOG.md index 90aa5d30..820c9305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # nf-core/mhcquant: Changelog +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v2.4.0 nfcore/mhcquant "Maroon Gold Boxer" - 2022/12/02 + +Initial release of nf-core/mhcquant, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +- Additional output from `CometAdapter` (generated with the parameter `--pin_out`) +- Folder structure within the `intermediate_results` folder to retrace the outcome files easier +- `OPENMS_FALSEDISCOVERYRATE` and `OPENMS_IDFILTER_FOR_ALIGNMENT` are now included in the first quantification step +- Altered the outcome content with the inclusion of the different folder structure +- Updated the mhcquant_web.png in the `assets` folder +- [#229](https://github.com/nf-core/mhcquant/pull/229) Add ion annotation feature requested in [#220](https://github.com/nf-core/mhcquant/issues/220) +- [#235](https://github.com/nf-core/mhcquant/issues/235) Add the `annotate_ions` parameter to enable/disable the ion annotation feature (default is false) + +### `Fixed` + +- Resolved issues with `SAMPLESHEET_CHECK` +- Fix for the `peakpickerhires`, mzml files generated from input raw files are now seen as input for this step as well +- `PRE_QUANTIFICATION` is renamed to `MAP_ALIGNMENT` to indicate that the alignment (and the complementing processes) of the different maps happens here +- `POST_QUANTIFICATION` is renamed to `PROCESS_FEATURE` since the feature identification and processing is done here +- Outcome of `OPENMS_FEATUREFINDERIDENTIFICATION` got lost during one of the previous updates, this is reintroduced +- `OPENMS_TEXTEXPORTER_UNQUANTIFIED` and `OPENMS_TEXTEXPORTER_QUANTIFIED` return only significant hits again +- [#226](https://github.com/nf-core/mhcquant/pull/226) - nf-core template update (version 2.6) +- [#230](https://github.com/nf-core/mhcquant/issues/230) - Issue with `OPENMS_MZTABEXPORTER_QUANT` +- [#236](https://github.com/nf-core/mhcquant/issues/236) - Resolved issue with `PYOPENMS_IONANNOTATOR` +- Fix for an inconsistent mzml channel issue +- [#241](https://github.com/nf-core/mhcquant/issues/241) - Fix of the HLA allele annotation in the help of the `allele_sheet` parameter + +### `Dependencies` + +- Updated the multiQC module + +| Dependency | Old version | New version | +| --------------------- | ----------- | ----------- | +| `MultiQC` | 1.11 | 1.12 | +| `OpenMS` | 2.6.0 | 2.8.0 | +| `OpenMS thirdparty` | 2.6.0 | 2.8.0 | +| `pyOpenMS` | - | 2.8 | +| `thermorawfileparser` | 1.3.4 | 1.4.0 | + +### `Deprecated` + +- `OPENMS_TEXTEXPORTER_PSMS` was removed due to the outcome of the comet adapter step + ## v2.3.1 nfcore/mhcquant "White Gold Swallow" - 2022/05/10 ### `Added` @@ -20,7 +67,7 @@ ### `Added` - [#206](https://github.com/nf-core/mhcquant/issues/206) Updated the workflow picture -- Adjustments of the `PRE_QUANTIFICATION` subworkflow: `OPENMS_FALSEDISCOVERYRATE`, `OPENMS_IDFILTER_FOR_ALIGNMENT`, and `OPENMS_TEXTEXPORTER_PSMS` +- Adjustments of the `PRE_QUANTIFICATION` subworkflow: `OPENMS_FALSEDISCOVERYRATE`, `OPENMS_IDFILTER_FOR_ALIGNMENT`, and `OPENMS_TEXTEXPORTER_SINGLE` - Included `OPENMS_TEXTEXPORTER_UNQUANTIFIED`to write a combined FDR filtered output file for unquantified data - Included `pipeline summary` and increment the `documentation` paragraph - [#195](https://github.com/nf-core/mhcquant/issues/195) Updated parameter documentation diff --git a/README.md b/README.md index 1146e702..01794454 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,14 @@ # ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_light.png#gh-light-mode-only) ![nf-core/mhcquant](docs/images/nf-core-mhcquant_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/mhcquant/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/mhcquant/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1569909-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1569909) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mhcquant/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1569909-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1569909) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/mhcquant) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mhcquant-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mhcquant) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mhcquant-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mhcquant)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -36,7 +32,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/mhcquant -profile test,YOURPROFILE --outdir ``` @@ -86,7 +82,7 @@ Additional functionality contained by the pipeline currently includes: - Conversion of raw to mzML files (`ThermoRawFileParser`) - Executing the peak picking with high_res algorithm (`PeakPickerHiRes`) -#### Prequantification +#### Map alignment - Corrects retention time distortions between maps, using information from peptides identified in different maps (`MapAlignerIdentification`) - Applies retention time transformations to maps (`MapRTTransformer`) @@ -97,12 +93,13 @@ Additional functionality contained by the pipeline currently includes: - Predict psm results using mhcflurry to shrink search space (`mhcflurry`) - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`) -#### Post-quantification +#### Process features - Detects features in MS1 data based on peptide identifications (`FeatureFinderIdentification`) - Group corresponding features across labelfree experiments (`FeatureLinkerUnlabeledKD`) - Resolves ambiguous annotations of features with peptide identifications (`IDConflictResolver`) - Converts XML format to text files (`TextExporter`) +- Annotates final list of peptides with their respective ions and charges (`IonAnnotator`) #### Prediction of HLA class 1 peptides @@ -145,6 +142,7 @@ Helpful contributors: - [Christian Fufezan](https://github.com/fu) - [Sven Fillinger](https://github.com/sven1103) - [Kevin Menden](https://github.com/KevinMenden) +- [Jonas Scheid](https://github.com/jonasscheid) ## Contributions and Support diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..27a8c8c4 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/mhcquant v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 27035c21..55411550 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,111 +1,53 @@ - - - - + + + + - - - nf-core/mhcquant Pipeline Report - - -
- + + nf-core/mhcquant Pipeline Report + + +
-

nf-core/mhcquant v${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

nf-core/mhcquant execution completed unsuccessfully!

+

nf-core/mhcquant v${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/mhcquant execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
nf-core/mhcquant execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

nf-core/mhcquant

-

https://github.com/nf-core/mhcquant

-
- +

nf-core/mhcquant

+

https://github.com/nf-core/mhcquant

+ +
+ + diff --git a/assets/email_template.txt b/assets/email_template.txt index 7dd1dd50..52b08b4e 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/mhcquant v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..eb886895 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,33 @@ +id: "nf-core-mhcquant-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/mhcquant Methods Description" +section_href: "https://github.com/nf-core/mhcquant" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/mhcquant v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+ +
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/mhcquant_web.png b/assets/mhcquant_web.png index 05c49069..421a4099 100644 Binary files a/assets/mhcquant_web.png and b/assets/mhcquant_web.png differ diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index eaed6ec9..43bfdcf6 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,11 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - software_versions: + "nf-core-mhcquant-methods-description": order: -1000 - "nf-core-mhcquant-summary": + software_versions: order: -1001 + "nf-core-mhcquant-summary": + order: -1002 export_plots: true diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3bd02e19..3435eefc 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,10 +1,8 @@ #!/usr/bin/env python -# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """Provide a command line tool to validate and transform tabular samplesheets.""" -import os.path +import os import argparse import csv import logging @@ -31,46 +29,35 @@ class RowChecker: ) def __init__( - self, - id_col="ID", - sample_col="Sample", - condition_col="Condition", - filename_col="ReplicateFileName", - ext_col="Extension", - **kwargs, + self, id_col="ID", sample_col="Sample", condition_col="Condition", filename_col="ReplicateFileName", **kwargs ): """ Initialize the row checker with the expected column names. - Args: - id_col (int) : An integer that acts as a unique identifier for the + id_col (int) : An integer that acts as a unique identifier for the unique samples. sample_col (str): The name of the column that contains the sample name (default "sample"). condition_col (str): This column consists of additional information about - the sample, could be anything from a "wildtype/disease" description + the sample, could be anything from a "wildtype/disease" description to a particular treatment (default ""). - filename_col (str): The name of the column that contains the path to the + filename_col (str): The name of the column that contains the path to the raw or mzMl files ). - """ super().__init__(**kwargs) self._id_col = id_col self._sample_col = sample_col self._condition_col = condition_col self._filename_col = filename_col - self._ext_col = ext_col self._seen = set() self.modified = [] def validate_and_transform(self, row): """ Perform all validations on the given row and insert the read pairing status. - Args: row (dict): A mapping from column headers (keys) to elements of that row (values). - """ self._validate_id(row) self._validate_sample(row) @@ -99,7 +86,6 @@ def _validate_filename(self, row): """Assert that the data entry has the right format if it exists.""" if len(row[self._filename_col]) > 0: self._validate_ms_format(row[self._filename_col]) - row[self._ext_col] = os.path.splitext(row[self._filename_col])[1][1:].lower() def _validate_ms_format(self, filename): """Assert that a given filename has one of the expected MS extensions.""" @@ -111,10 +97,8 @@ def _validate_ms_format(self, filename): def validate_unique_samples(self): """ Assert that the combination of sample name and filename is unique. - In addition to the validation, also rename the sample if more than one sample file combination exists. - """ assert len(self._seen) == len(self.modified), "The pair of sample name and file must be unique." if len({pair[0] for pair in self._seen}) < len(self._seen): @@ -127,6 +111,16 @@ def validate_unique_samples(self): row[self._sample_col] = f"{sample}" +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + def sniff_format(handle): """ Detect the tabular format. @@ -142,50 +136,47 @@ def sniff_format(handle): https://docs.python.org/3/glossary.html#term-text-file """ - peek = handle.read(2048) + peek = read_head(handle) + handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") + logger.critical("The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) - handle.seek(0) return dialect def check_samplesheet(file_in, file_out): + """ Check that the tabular samplesheet has the structure expected by nf-core pipelines. - Validate the general shape of the table, expected columns, and each row. Also add an additional column which records whether one or two FASTQ reads were found. - Args: file_in (pathlib.Path): The given tabular samplesheet. The format can be either CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. file_out (pathlib.Path): Where the validated and transformed samplesheet should be created; always in CSV format. - Example: This function checks that the samplesheet follows the following structure, see also the `viral recon samplesheet`_:: - ID Sample Condition ReplicateFileName 1 WT A WT_A.raw 2 WT B WT_B.raw 3 KO A KO_A.raw 4 KO B KO_B.raw - .. _viral recon samplesheet: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """ required_columns = {"ID", "Sample", "Condition", "ReplicateFileName"} + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_in.open(newline="") as in_handle: reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) # Validate each row. checker = RowChecker() @@ -200,16 +191,18 @@ def check_samplesheet(file_in, file_out): header.insert(4, "Extension") # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") + writer = csv.DictWriter(out_handle, header, delimiter="\t") writer.writeheader() for row in checker.modified: + row["Extension"] = os.path.splitext(row["ReplicateFileName"])[1][1:].lower() writer.writerow(row) + def parse_args(argv=None): """Define and immediately parse command line arguments.""" parser = argparse.ArgumentParser( description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.tsv samplesheet.valid.tsv", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", ) parser.add_argument( "file_in", @@ -243,5 +236,6 @@ def main(argv=None): args.file_out.parent.mkdir(parents=True, exist_ok=True) check_samplesheet(args.file_in, args.file_out) + if __name__ == "__main__": sys.exit(main()) diff --git a/bin/get_ion_annotations.py b/bin/get_ion_annotations.py new file mode 100755 index 00000000..0449fd25 --- /dev/null +++ b/bin/get_ion_annotations.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +__author__ = "Jonas Scheid" + +from typing import Tuple +from pyopenms import * +import pandas as pd +import numpy as np +import argparse +from pyopenms.Plotting import * + + +def parse_arguments() -> Tuple[argparse.ArgumentParser, argparse.Namespace]: + """ + Purpose: Parse command line arguments + Output: parser + args: Contains all parsed command line arguments + """ + parser = argparse.ArgumentParser( + description="""Ion annotator \n Returns two tsv files (all_ions, matching_ions) + that retrieve all ions with their respective m/z and intensities as well as the ion annotation of ions matching + to the peptides theoretical spectrum""" + ) + + parser.add_argument( + "-i", + "--input", + required=True, + nargs="+", + help="List of replicate mzML files of a sample", + ) + parser.add_argument( + "-idxml", + "--filtered_idXML", + required=True, + type=str, + help="FDR filtered idXML file that contains all peptides of the final results", + ) + parser.add_argument( + "-p", + "--prefix", + required=True, + type=str, + help="Prefix that will be added to the two output files", + ) + parser.add_argument( + "-pc", + "--precursor_charge", + type=str, + default="2:3", + help="Precursor charge range", + ) + parser.add_argument( + "-fmt", + "--fragment_mass_tolerance", + type=str, + default="0.02", + help="The fragment mass tolerance of theoretical and experimental spectrum matching", + ) + parser.add_argument( + "-a_ions", + "--use_a_ions", + action="store_true", + help="Add a-ions to the theoretical spectrum generation", + ) + parser.add_argument( + "-c_ions", + "--use_c_ions", + action="store_true", + help="Add c-ions to the theoretical spectrum generation", + ) + parser.add_argument( + "-x_ions", + "--use_x_ions", + action="store_true", + help="Add x-ions to the theoretical spectrum generation", + ) + parser.add_argument( + "-z_ions", + "--use_z_ions", + action="store_true", + help="Add z-ions to the theoretical spectrum generation", + ) + parser.add_argument( + "-rpp", + "--remove_precursor_peak", + type=bool, + default=False, + help="Do not consider precursor masses in the theoretical spectrum generation", + ) + + args = parser.parse_args() + + return parser, args + + +def generate_theoretical_spectrum(peptide: PeptideIdentification, args: argparse.Namespace) -> MSSpectrum: + """ + Purpose: Generate theoretical spectrum based on PeptideIdentification + Output: Theoretical spectrum of input peptide + """ + sequence = peptide.getHits()[0].getSequence() + min_charge = int(args.precursor_charge.split(":")[0]) + # If precursor charge ranges between 2:5, the fragment charges range from 1:4. + # Get the precursor charge information from the idXML file + min_fragment_charge, max_fragment_charge = min_charge - 1, max(peptide.getHits()[0].getCharge() - 1, 1) + + # Define parameters for theoretical spectrum generation + tsg = TheoreticalSpectrumGenerator() + theo_spectrum = MSSpectrum() + p = tsg.getParameters() + p.setValue("add_y_ions", "true") + p.setValue("add_b_ions", "true") + if args.use_a_ions: + p.setValue("add_a_ions", "true") + if args.use_c_ions: + p.setValue("add_c_ions", "true") + if args.use_x_ions: + p.setValue("add_x_ions", "true") + if args.use_z_ions: + p.setValue("add_z_ions", "true") + p.setValue("add_metainfo", "true") + p.setValue("add_losses", "true") + p.setValue("add_precursor_peaks", "true") + p.setValue("add_first_prefix_ion", "true") + p.setValue("add_abundant_immonium_ions", "true") + if not args.remove_precursor_peak: + p.setValue("add_all_precursor_charges", "true") + + # Generate theoretical spectrum + tsg.setParameters(p) + tsg.getSpectrum(theo_spectrum, sequence, min_fragment_charge, max_fragment_charge) + + return theo_spectrum + + +def flatten(ls: list) -> list: + """ + Purpose: Flatten list of lists into a list + Output: List + """ + return [item for sublist in ls for item in sublist] + + +def __main__(): + parser, args = parse_arguments() + protein_ids = [] + peptide_ids = [] + # Each peptide ID should only contain one hit in the FDR_filtered idXML + IdXMLFile().load(args.filtered_idXML, protein_ids, peptide_ids) + # Get the list of mzML files that have been merged together by IDmerger previously + filenames = [filename.decode("utf-8") for filename in protein_ids[0].getMetaValue("spectra_data")] + # Define empty lists that collect all the necessary information, which is comprised in DataFrames + ions = [] + spectra_mz = [] + spectra_intensities = [] + spectra_peptides = [] + spectra_nativeIDs = [] + spectra_filename = [] + is_matching_ion = [] + # Define spectrum alignment class and set parameters for later use + spa = SpectrumAlignment() + # Specify parameters for the alignment. Pyopenms offers two parameters here + p = spa.getParameters() + # Since we look at the MS2 Spectrum we align Da tolerance + p.setValue("tolerance", float(args.fragment_mass_tolerance)) + p.setValue("is_relative_tolerance", "false") # false == Da, true == ppm + spa.setParameters(p) + + for file in args.input: + # Load the mzML files into the pyopenms structure + exp = MSExperiment() + MzMLFile().load(file, exp) + # Create lookup object for easy spectrum reference. + spectrum_lookup = SpectrumLookup() + spectrum_lookup.readSpectra(exp, "scan=(?\\d+)") + # Iterate over the FDR filtered peptideIDs + for peptide_id in peptide_ids: + # Check if the PeptideHit originates from the current mzML file + if file.split("/")[-1] != filenames[peptide_id.getMetaValue("id_merge_index")]: + continue + # Access raw spectrum via the spectrum native ID + spectrum = exp.getSpectrum(spectrum_lookup.findByNativeID(peptide_id.getMetaValue("spectrum_reference"))) + # Save mz and intensities of all peaks to comprise them later in a DataFrame + spectrum_mz, spectrum_intensities = spectrum.get_peaks() + spectra_mz.append(spectrum_mz) + spectra_intensities.append(spectrum_intensities) + sequence = peptide_id.getHits()[0].getSequence() + spectra_peptides.append(np.repeat(sequence, len(spectrum_mz))) + is_matching_ion_peptide = np.repeat(False, len(spectrum_mz)) + spectra_nativeIDs.append(np.repeat(peptide_id.getMetaValue("spectrum_reference"), len(spectrum_mz))) + # Generate theoretical spectrum of peptide hit + theo_spectrum = generate_theoretical_spectrum(peptide_id, args) + # Align both spectra + alignment = [] + spa.getSpectrumAlignment(alignment, theo_spectrum, spectrum) + # Obtain ion annotations from theoretical spectrum if there are at least two matching ions + if len(alignment) > 1: + for theo_idx, obs_idx in alignment: + ion_name = theo_spectrum.getStringDataArrays()[0][theo_idx].decode() + ion_charge = theo_spectrum.getIntegerDataArrays()[0][theo_idx] + obs_mz = spectrum[obs_idx].getMZ() + obs_int = spectrum[obs_idx].getIntensity() + ions.append( + [ + sequence, + ion_name, + ion_charge, + theo_spectrum[theo_idx].getMZ(), + obs_mz, + obs_int, + ] + ) + is_matching_ion_peptide[obs_idx] = True + + is_matching_ion.append(is_matching_ion_peptide) + + spectra_filename.append(np.repeat(file, len(flatten(spectra_mz)) - len(flatten(spectra_filename)))) + + # Save information of matching ions + matching_ions = pd.DataFrame.from_records( + ions, + columns=[ + "Peptide", + "Ion_name", + "Ion_charge", + "Theoretical_mass", + "Experimental_mass", + "Intensity", + ], + ) + # Save information of all peaks (including non-matching peaks) + all_peaks_df = pd.DataFrame( + [ + flatten(spectra_peptides), + flatten(spectra_mz), + flatten(spectra_intensities), + flatten(is_matching_ion), + flatten(spectra_nativeIDs), + flatten(spectra_filename), + ], + index=[ + "Peptide", + "Experimental_mass", + "Intensity", + "Is_matching_ion", + "nativeID", + "filename", + ], + ).transpose() + all_peaks_df.index.name = "Ion_ID" + # Numpy magically converts booleans to floats of 1.0 and 0.0, revert that + all_peaks_df["Is_matching_ion"] = all_peaks_df["Is_matching_ion"].astype(bool) + # Add ion ID column to matching ions table for easy reference + matching_ions["Ion_ID"] = all_peaks_df.index[all_peaks_df["Is_matching_ion"]] + # Write matching ions table + matching_ions.to_csv(f"{args.prefix}_matching_ions.tsv", sep="\t", index=False) + all_peaks_df.to_csv(f"{args.prefix}_all_peaks.tsv", sep="\t") + + +if __name__ == "__main__": + __main__() diff --git a/bin/mhcflurry_neoepitope_binding_prediction.py b/bin/mhcflurry_neoepitope_binding_prediction.py index 1ad75a8a..27bf7dd8 100755 --- a/bin/mhcflurry_neoepitope_binding_prediction.py +++ b/bin/mhcflurry_neoepitope_binding_prediction.py @@ -8,20 +8,22 @@ # logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Neoepitope Binding Prediction") LOG.addHandler(console) LOG.setLevel(logging.INFO) # List of alleles supported by mhcflurry -supported_alleles = "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," \ - "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," \ - "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," \ - "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," \ - "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," \ - "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," \ - "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") +supported_alleles = ( + "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," + "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," + "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," + "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," + "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," + "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," + "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") +) # read provided allotypes alleles = sys.argv[-3].split(";") @@ -38,7 +40,7 @@ flatten = lambda l: [item for sublist in l for item in sublist] # read identified peptides -neoepitopes = [line.rstrip('\n').strip().split(',') for line in open(sys.argv[-2])][1:] +neoepitopes = [line.rstrip("\n").strip().split(",") for line in open(sys.argv[-2])][1:] neoepitopes = flatten(neoepitopes) seqs_to_geneID = dict(zip(neoepitopes[::2], neoepitopes[1::2])) @@ -47,9 +49,8 @@ for allele in alleles: predictor = Class1AffinityPredictor.load() df_pred = predictor.predict_to_dataframe(allele=allele, peptides=seqs_to_geneID.keys()) - df_pred.insert(1, 'geneID', pd.Series(np.array(seqs_to_geneID.values()))) - df_pred.to_csv(allele + '_' + sys.argv[-1]) + df_pred.insert(1, "geneID", pd.Series(np.array(seqs_to_geneID.values()))) + df_pred.to_csv(allele + "_" + sys.argv[-1]) else: - op=open(sys.argv[-1],'w') + op = open(sys.argv[-1], "w") op.close() - diff --git a/bin/mhcflurry_predict_mztab.py b/bin/mhcflurry_predict_mztab.py index ccb1b992..57cc5a6b 100755 --- a/bin/mhcflurry_predict_mztab.py +++ b/bin/mhcflurry_predict_mztab.py @@ -8,14 +8,15 @@ from collections import defaultdict from mhcflurry import Class1AffinityPredictor -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("MHCFlurry Predict mztab") LOG.addHandler(console) LOG.setLevel(logging.INFO) + def parse_mztab(identified_peptides_file): """ parses an mztab file and returns all identified proteins @@ -29,29 +30,32 @@ def parse_mztab(identified_peptides_file): seq_geneIDs = defaultdict(str) for line in mztab_read: if line.startswith("PEP"): - content = line.split('\t') + content = line.split("\t") seq = content[1] geneID = content[2] - if not 'U' in seq and not 'X' in seq and not 'Z' in seq and not 'J' in seq and not 'B' in seq: + if not "U" in seq and not "X" in seq and not "Z" in seq and not "J" in seq and not "B" in seq: seq_geneIDs[seq] = geneID return seq_geneIDs -#List of alleles supported by mhcflurry -supported_alleles = "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," \ - "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," \ - "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," \ - "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," \ - "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," \ - "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," \ - "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") -#read provided allotypes -alleles=sys.argv[-3].split(";") +# List of alleles supported by mhcflurry +supported_alleles = ( + "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," + "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," + "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," + "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," + "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," + "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," + "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") +) + +# read provided allotypes +alleles = sys.argv[-3].split(";") -#extract and verify alleles -unsupported_alleles=[a for a in alleles if a not in supported_alleles] -alleles=[a for a in alleles if a in supported_alleles] +# extract and verify alleles +unsupported_alleles = [a for a in alleles if a not in supported_alleles] +alleles = [a for a in alleles if a in supported_alleles] if unsupported_alleles: for allele in unsupported_alleles: @@ -67,8 +71,8 @@ def parse_mztab(identified_peptides_file): for allele in alleles: predictor = Class1AffinityPredictor.load() df_pred = predictor.predict_to_dataframe(allele=allele, peptides=seqs_to_geneID.keys()) - df_pred.insert(1, 'geneID', pd.Series(np.array(seqs_to_geneID.values()))) - df_pred.to_csv(allele + '_' + sys.argv[-1]) + df_pred.insert(1, "geneID", pd.Series(np.array(seqs_to_geneID.values()))) + df_pred.to_csv(allele + "_" + sys.argv[-1]) else: - op=open(sys.argv[-1],'w') + op = open(sys.argv[-1], "w") op.close() diff --git a/bin/mhcflurry_predict_mztab_for_filtering.py b/bin/mhcflurry_predict_mztab_for_filtering.py index c12a90d3..fc4d3810 100755 --- a/bin/mhcflurry_predict_mztab_for_filtering.py +++ b/bin/mhcflurry_predict_mztab_for_filtering.py @@ -3,31 +3,33 @@ import logging import sys -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("MHCFlurry Predict mztab for filtering") LOG.addHandler(console) LOG.setLevel(logging.INFO) -#List of alleles supported by mhclurry -supported_alleles = "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," \ - "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," \ - "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," \ - "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," \ - "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," \ - "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," \ - "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") +# List of alleles supported by mhclurry +supported_alleles = ( + "A*01:01,A*02:01,A*02:02,A*02:03,A*02:05,A*02:06,A*02:07,A*02:11,A*02:12,A*02:16,A*02:17,A*02:19," + "A*02:50,A*03:01,A*11:01,A*23:01,A*24:02,A*24:03,A*25:01,A*26:01,A*26:02,A*26:03,A*29:02,A*30:01," + "A*30:02,A*31:01,A*32:01,A*33:01,A*66:01,A*68:01,A*68:02,A*68:23,A*69:01,A*80:01,B*07:01,B*07:02," + "B*08:01,B*08:02,B*08:03,B*14:02,B*15:01,B*15:02,B*15:03,B*15:09,B*15:17,B*18:01,B*27:02,B*27:03," + "B*27:04,B*27:05,B*27:06,B*35:01,B*35:03,B*37:01,B*38:01,B*39:01,B*39:06,B*40:01,B*40:02,B*42:01," + "B*44:02,B*44:03,B*45:01,B*46:01,B*48:01,B*51:01,B*53:01,B*54:01,B*57:01,B*58:01,B*83:01,C*03:03," + "C*04:01,C*05:01,C*06:02,C*07:02,C*08:02,C*12:03,C*14:02,C*15:02".split(",") +) -#read provided allotypes +# read provided allotypes print(sys.argv[-4]) -alleles=sys.argv[-4].split(";") +alleles = sys.argv[-4].split(";") print(alleles) -#extract and verify alleles -unsupported_alleles=[a for a in alleles if a not in supported_alleles] -alleles=[a for a in alleles if a in supported_alleles] +# extract and verify alleles +unsupported_alleles = [a for a in alleles if a not in supported_alleles] +alleles = [a for a in alleles if a in supported_alleles] if unsupported_alleles: for allele in unsupported_alleles: @@ -36,39 +38,43 @@ LOG.warning("Submitted alleles are not supported or formatting of input.tsv is not correct!") -#read identified peptides with q-value < threshold -mztab=open(sys.argv[-3]) -mztab_read=mztab.readlines() +# read identified peptides with q-value < threshold +mztab = open(sys.argv[-3]) +mztab_read = mztab.readlines() mztab.close() -seqs=[l.split()[1] for l in mztab_read if l.startswith("PSM")] -seqs_new_smaller_qval=list(set(seqs)) +seqs = [l.split()[1] for l in mztab_read if l.startswith("PSM")] +seqs_new_smaller_qval = list(set(seqs)) -#read all remaining peptides with q-value > threshold -mztab=open(sys.argv[-2]) -mztab_read=mztab.readlines() +# read all remaining peptides with q-value > threshold +mztab = open(sys.argv[-2]) +mztab_read = mztab.readlines() mztab.close() -seqs=[l.split()[1] for l in mztab_read if l.startswith("PSM") if l.split()[1] not in seqs_new_smaller_qval] -seqs_new_greater_qval=list(set(seqs)) -seqs_new_greater_qval=[s for s in seqs_new_greater_qval if 7' + '\n') +# write idXML for filtering +op = open(sys.argv[-1], "w") +op.write('' + "\n") for pep in seqs_new_all: - op.write('\t\t\t' + '' + '\n') - op.write('\t\t\t' + '' + '\n') -op.write('' + '\n') + op.write("\t\t\t" + '' + "\n") + op.write("\t\t\t" + "" + "\n") +op.write("" + "\n") op.close() diff --git a/bin/mhcnuggets_predict_peptides.py b/bin/mhcnuggets_predict_peptides.py index 875ba980..ab212e5a 100755 --- a/bin/mhcnuggets_predict_peptides.py +++ b/bin/mhcnuggets_predict_peptides.py @@ -4,97 +4,181 @@ import logging import sys -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("MHCNuggets Predict Peptides") LOG.addHandler(console) LOG.setLevel(logging.INFO) -supported_alleles_class_2 = ["HLA-DPA1*01:03-DPB1*02:01", "HLA-DPA1*01:03-DPB1*03:01", "HLA-DPA1*01:03-DPB1*04:01", - "HLA-DPA1*01:03-DPB1*04:02", "HLA-DPA1*02:01-DPB1*01:01", "HLA-DPA1*02:01-DPB1*05:01", - "HLA-DPA1*02:02-DPB1*05:01", "HLA-DPA1*03:01-DPB1*04:02", "HLA-DPB1*01:01", - "HLA-DPB1*02:01", "HLA-DPB1*03:01", "HLA-DPB1*04:01", "HLA-DPB1*04:02", "HLA-DPB1*05:01", - "HLA-DPB1*09:01", "HLA-DPB1*11:01", "HLA-DPB1*14:01", "HLA-DPB1*20:01", "HLA-DQA1*01:01", - "HLA-DQA1*01:01-DQB1*05:01", "HLA-DQA1*01:01-DQB1*05:03", "HLA-DQA1*01:02", - "HLA-DQA1*01:02-DQB1*05:01", "HLA-DQA1*01:02-DQB1*05:02", "HLA-DQA1*01:02-DQB1*06:02", - "HLA-DQA1*01:02-DQB1*06:04", "HLA-DQA1*01:03-DQB1*03:02", "HLA-DQA1*01:03-DQB1*06:01", - "HLA-DQA1*01:03-DQB1*06:03", "HLA-DQA1*01:04-DQB1*05:03", "HLA-DQA1*02:01-DQB1*02:01", - "HLA-DQA1*02:01-DQB1*02:02", "HLA-DQA1*02:01-DQB1*03:01", "HLA-DQA1*02:01-DQB1*03:03", - "HLA-DQA1*02:01-DQB1*04:02", "HLA-DQA1*03:01", "HLA-DQA1*03:01-DQB1*02:01", - "HLA-DQA1*03:01-DQB1*03:01", "HLA-DQA1*03:01-DQB1*03:02", "HLA-DQA1*03:01-DQB1*04:01", - "HLA-DQA1*03:02-DQB1*03:01", "HLA-DQA1*03:02-DQB1*03:03", "HLA-DQA1*03:02-DQB1*04:01", - "HLA-DQA1*03:03-DQB1*04:02", "HLA-DQA1*04:01-DQB1*04:02", "HLA-DQA1*05:01", - "HLA-DQA1*05:01-DQB1*02:01", "HLA-DQA1*05:01-DQB1*03:01", "HLA-DQA1*05:01-DQB1*03:02", - "HLA-DQA1*05:01-DQB1*03:03", "HLA-DQA1*05:01-DQB1*04:02", "HLA-DQA1*05:05-DQB1*03:01", - "HLA-DQA1*06:01-DQB1*04:02", "HLA-DQB1*02:01", "HLA-DQB1*02:02", "HLA-DQB1*03:01", - "HLA-DQB1*03:02", "HLA-DQB1*03:19", "HLA-DQB1*04:02", "HLA-DQB1*05:01", "HLA-DQB1*05:02", - "HLA-DQB1*05:03", "HLA-DQB1*06:02", "HLA-DQB1*06:03", "HLA-DQB1*06:04", - "HLA-DRA0*10:1-DRB1*01:01", "HLA-DRA0*10:1-DRB1*03:01", "HLA-DRA0*10:1-DRB1*04:01", - "HLA-DRA0*10:1-DRB1*04:04", "HLA-DRA0*10:1-DRB1*07:01", "HLA-DRA0*10:1-DRB1*08:01", - "HLA-DRA0*10:1-DRB1*09:01", "HLA-DRA0*10:1-DRB1*11:01", "HLA-DRA0*10:1-DRB1*13:01", - "HLA-DRA0*10:1-DRB1*14:54", "HLA-DRA0*10:1-DRB1*15:01", "HLA-DRA0*10:1-DRB3*01:01", - "HLA-DRA0*10:1-DRB3*02:02", "HLA-DRA0*10:1-DRB3*03:01", "HLA-DRA0*10:1-DRB4*01:03", - "HLA-DRA0*10:1-DRB5*01:01", "HLA-DRB1*01:01", "HLA-DRB1*01:02", "HLA-DRB1*01:03", - "HLA-DRB1*03:01", "HLA-DRB1*03:02", "HLA-DRB1*03:03", "HLA-DRB1*03:04", "HLA-DRB1*03:05", - "HLA-DRB1*04:01", "HLA-DRB1*04:02", "HLA-DRB1*04:03", "HLA-DRB1*04:04", "HLA-DRB1*04:05", - "HLA-DRB1*04:06", "HLA-DRB1*04:07", "HLA-DRB1*04:11", "HLA-DRB1*07:01", "HLA-DRB1*08:01", - "HLA-DRB1*08:02", "HLA-DRB1*08:03", "HLA-DRB1*08:04", "HLA-DRB1*09:01", "HLA-DRB1*10:01", - "HLA-DRB1*11:01", "HLA-DRB1*11:02", "HLA-DRB1*11:03", "HLA-DRB1*11:04", "HLA-DRB1*12:01", - "HLA-DRB1*12:02", "HLA-DRB1*13:01", "HLA-DRB1*13:02", "HLA-DRB1*13:03", "HLA-DRB1*13:04", - "HLA-DRB1*13:05", "HLA-DRB1*14:01", "HLA-DRB1*14:02", "HLA-DRB1*15:01", "HLA-DRB1*15:02", - "HLA-DRB1*15:03", "HLA-DRB1*16:01", "HLA-DRB1*16:02", "HLA-DRB3*01:01", "HLA-DRB3*02:02", - "HLA-DRB3*03:01", "HLA-DRB4*01:01", "HLA-DRB4*01:03", "HLA-DRB5*01:01", "HLA-DRB5*01:02"] +supported_alleles_class_2 = [ + "HLA-DPA1*01:03-DPB1*02:01", + "HLA-DPA1*01:03-DPB1*03:01", + "HLA-DPA1*01:03-DPB1*04:01", + "HLA-DPA1*01:03-DPB1*04:02", + "HLA-DPA1*02:01-DPB1*01:01", + "HLA-DPA1*02:01-DPB1*05:01", + "HLA-DPA1*02:02-DPB1*05:01", + "HLA-DPA1*03:01-DPB1*04:02", + "HLA-DPB1*01:01", + "HLA-DPB1*02:01", + "HLA-DPB1*03:01", + "HLA-DPB1*04:01", + "HLA-DPB1*04:02", + "HLA-DPB1*05:01", + "HLA-DPB1*09:01", + "HLA-DPB1*11:01", + "HLA-DPB1*14:01", + "HLA-DPB1*20:01", + "HLA-DQA1*01:01", + "HLA-DQA1*01:01-DQB1*05:01", + "HLA-DQA1*01:01-DQB1*05:03", + "HLA-DQA1*01:02", + "HLA-DQA1*01:02-DQB1*05:01", + "HLA-DQA1*01:02-DQB1*05:02", + "HLA-DQA1*01:02-DQB1*06:02", + "HLA-DQA1*01:02-DQB1*06:04", + "HLA-DQA1*01:03-DQB1*03:02", + "HLA-DQA1*01:03-DQB1*06:01", + "HLA-DQA1*01:03-DQB1*06:03", + "HLA-DQA1*01:04-DQB1*05:03", + "HLA-DQA1*02:01-DQB1*02:01", + "HLA-DQA1*02:01-DQB1*02:02", + "HLA-DQA1*02:01-DQB1*03:01", + "HLA-DQA1*02:01-DQB1*03:03", + "HLA-DQA1*02:01-DQB1*04:02", + "HLA-DQA1*03:01", + "HLA-DQA1*03:01-DQB1*02:01", + "HLA-DQA1*03:01-DQB1*03:01", + "HLA-DQA1*03:01-DQB1*03:02", + "HLA-DQA1*03:01-DQB1*04:01", + "HLA-DQA1*03:02-DQB1*03:01", + "HLA-DQA1*03:02-DQB1*03:03", + "HLA-DQA1*03:02-DQB1*04:01", + "HLA-DQA1*03:03-DQB1*04:02", + "HLA-DQA1*04:01-DQB1*04:02", + "HLA-DQA1*05:01", + "HLA-DQA1*05:01-DQB1*02:01", + "HLA-DQA1*05:01-DQB1*03:01", + "HLA-DQA1*05:01-DQB1*03:02", + "HLA-DQA1*05:01-DQB1*03:03", + "HLA-DQA1*05:01-DQB1*04:02", + "HLA-DQA1*05:05-DQB1*03:01", + "HLA-DQA1*06:01-DQB1*04:02", + "HLA-DQB1*02:01", + "HLA-DQB1*02:02", + "HLA-DQB1*03:01", + "HLA-DQB1*03:02", + "HLA-DQB1*03:19", + "HLA-DQB1*04:02", + "HLA-DQB1*05:01", + "HLA-DQB1*05:02", + "HLA-DQB1*05:03", + "HLA-DQB1*06:02", + "HLA-DQB1*06:03", + "HLA-DQB1*06:04", + "HLA-DRA0*10:1-DRB1*01:01", + "HLA-DRA0*10:1-DRB1*03:01", + "HLA-DRA0*10:1-DRB1*04:01", + "HLA-DRA0*10:1-DRB1*04:04", + "HLA-DRA0*10:1-DRB1*07:01", + "HLA-DRA0*10:1-DRB1*08:01", + "HLA-DRA0*10:1-DRB1*09:01", + "HLA-DRA0*10:1-DRB1*11:01", + "HLA-DRA0*10:1-DRB1*13:01", + "HLA-DRA0*10:1-DRB1*14:54", + "HLA-DRA0*10:1-DRB1*15:01", + "HLA-DRA0*10:1-DRB3*01:01", + "HLA-DRA0*10:1-DRB3*02:02", + "HLA-DRA0*10:1-DRB3*03:01", + "HLA-DRA0*10:1-DRB4*01:03", + "HLA-DRA0*10:1-DRB5*01:01", + "HLA-DRB1*01:01", + "HLA-DRB1*01:02", + "HLA-DRB1*01:03", + "HLA-DRB1*03:01", + "HLA-DRB1*03:02", + "HLA-DRB1*03:03", + "HLA-DRB1*03:04", + "HLA-DRB1*03:05", + "HLA-DRB1*04:01", + "HLA-DRB1*04:02", + "HLA-DRB1*04:03", + "HLA-DRB1*04:04", + "HLA-DRB1*04:05", + "HLA-DRB1*04:06", + "HLA-DRB1*04:07", + "HLA-DRB1*04:11", + "HLA-DRB1*07:01", + "HLA-DRB1*08:01", + "HLA-DRB1*08:02", + "HLA-DRB1*08:03", + "HLA-DRB1*08:04", + "HLA-DRB1*09:01", + "HLA-DRB1*10:01", + "HLA-DRB1*11:01", + "HLA-DRB1*11:02", + "HLA-DRB1*11:03", + "HLA-DRB1*11:04", + "HLA-DRB1*12:01", + "HLA-DRB1*12:02", + "HLA-DRB1*13:01", + "HLA-DRB1*13:02", + "HLA-DRB1*13:03", + "HLA-DRB1*13:04", + "HLA-DRB1*13:05", + "HLA-DRB1*14:01", + "HLA-DRB1*14:02", + "HLA-DRB1*15:01", + "HLA-DRB1*15:02", + "HLA-DRB1*15:03", + "HLA-DRB1*16:01", + "HLA-DRB1*16:02", + "HLA-DRB3*01:01", + "HLA-DRB3*02:02", + "HLA-DRB3*03:01", + "HLA-DRB4*01:01", + "HLA-DRB4*01:03", + "HLA-DRB5*01:01", + "HLA-DRB5*01:02", +] flatten = lambda l: [item for sublist in l for item in sublist] def convert_alleles_mhcnuggets_format(alleles): - return [allele.replace('*', '') for allele in alleles] + return [allele.replace("*", "") for allele in alleles] def parse_alleles(allele_input): - alleles = allele_input.split(';') - supp_alleles = convert_alleles_mhcnuggets_format(list(set(alleles).intersection(supported_alleles_class_2))) + alleles = allele_input.split(";") + supp_alleles = convert_alleles_mhcnuggets_format(list(set(alleles).intersection(supported_alleles_class_2))) - return supp_alleles + return supp_alleles def main(): - model = argparse.ArgumentParser(description='MHCNuggets binding prediction') - - model.add_argument( - '-p', '--peptides', - type=str, - help='mhcnuggets input' - ) - - model.add_argument( - '-a', '--alleles', - type=str, - help='class 2 alleles' - ) - - model.add_argument( - '-o', '--output', - type=str, - help='mhcnuggets output' - ) + model = argparse.ArgumentParser(description="MHCNuggets binding prediction") + + model.add_argument("-p", "--peptides", type=str, help="mhcnuggets input") + + model.add_argument("-a", "--alleles", type=str, help="class 2 alleles") + + model.add_argument("-o", "--output", type=str, help="mhcnuggets output") args = model.parse_args() - if open(args.peptides).readlines()!=[]: + if open(args.peptides).readlines() != []: supp_alleles = parse_alleles(args.alleles) for allele in supp_alleles: - predict(class_='II', - peptides_path=args.peptides, - mhc=allele, - output=allele + args.output) + predict(class_="II", peptides_path=args.peptides, mhc=allele, output=allele + args.output) else: - op=open('predicted_neoepitopes_class_2','w') + op = open("predicted_neoepitopes_class_2", "w") op.close() -if __name__ == '__main__': + + +if __name__ == "__main__": main() diff --git a/bin/postprocess_neoepitopes_mhcnuggets.py b/bin/postprocess_neoepitopes_mhcnuggets.py index 6baaea56..1982e515 100755 --- a/bin/postprocess_neoepitopes_mhcnuggets.py +++ b/bin/postprocess_neoepitopes_mhcnuggets.py @@ -4,58 +4,50 @@ import logging from collections import defaultdict -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Postprocess Neoepitopes MHCNuggets") LOG.addHandler(console) LOG.setLevel(logging.INFO) + def main(): - model = argparse.ArgumentParser(description='Postprocess Neoepitopes predicted by MHCNuggets') + model = argparse.ArgumentParser(description="Postprocess Neoepitopes predicted by MHCNuggets") - model.add_argument( - '-i', '--input', - type=str, - nargs='*', - help='predicted class 2 neoepitopes' - ) + model.add_argument("-i", "--input", type=str, nargs="*", help="predicted class 2 neoepitopes") - model.add_argument( - '-n', '--neoepitopes', - type=str, - help='neoepitopes file' - ) + model.add_argument("-n", "--neoepitopes", type=str, help="neoepitopes file") args = model.parse_args() neoepitope_to_geneID = defaultdict() - with open(args.neoepitopes, 'r') as f: + with open(args.neoepitopes, "r") as f: # skip header f.readline() for line in f: - split = line.split(',') + split = line.split(",") neoepitope_to_geneID[split[0]] = split[1] for prediction in args.input: neoepitope_to_geneID_ic50 = defaultdict() - with open(prediction, 'r') as f: + with open(prediction, "r") as f: # skip header f.readline() for line in f: - split = line.split(',') + split = line.split(",") neoepitope = split[0].strip() geneID = neoepitope_to_geneID[neoepitope].strip() ic50 = split[1].strip() neoepitope_to_geneID_ic50[neoepitope] = (geneID, ic50) - with open(prediction + '.csv', 'w') as f: - f.write('peptide,geneID,ic50\n') + with open(prediction + ".csv", "w") as f: + f.write("peptide,geneID,ic50\n") for neoepitope, pair in neoepitope_to_geneID_ic50.items(): - f.write(neoepitope + ',' + pair[0] + ',' + pair[1] + '\n') + f.write(neoepitope + "," + pair[0] + "," + pair[1] + "\n") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bin/postprocess_peptides_mhcnuggets.py b/bin/postprocess_peptides_mhcnuggets.py index e0fbf9fd..2ab6ee53 100755 --- a/bin/postprocess_peptides_mhcnuggets.py +++ b/bin/postprocess_peptides_mhcnuggets.py @@ -4,9 +4,9 @@ import logging from collections import defaultdict -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Postprocess Peptides MHCNuggets") LOG.addHandler(console) @@ -14,53 +14,40 @@ def main(): - model = argparse.ArgumentParser(description='Postprocess Neoepitopes predicted by MHCNuggets') + model = argparse.ArgumentParser(description="Postprocess Neoepitopes predicted by MHCNuggets") - model.add_argument( - '-i', '--input', - type=str, - nargs='*', - help='predicted class 2 peptides' - ) + model.add_argument("-i", "--input", type=str, nargs="*", help="predicted class 2 peptides") - model.add_argument( - '-p', '--peptides_seq_ID', - type=str, - help='peptides to seq_ID csv file' - ) + model.add_argument("-p", "--peptides_seq_ID", type=str, help="peptides to seq_ID csv file") - model.add_argument( - '-o', '--output', - type=str, - help='output file name' - ) + model.add_argument("-o", "--output", type=str, help="output file name") args = model.parse_args() peptide_to_geneID = defaultdict() - with open(args.peptides_seq_ID, 'r') as f: + with open(args.peptides_seq_ID, "r") as f: for line in f: - split = line.split(',') + split = line.split(",") peptide_to_geneID[split[0]] = split[1] for prediction in args.input: peptide_to_geneID_ic50 = defaultdict() - with open(prediction, 'r') as f: + with open(prediction, "r") as f: # skip header f.readline() for line in f: - split = line.split(',') + split = line.split(",") peptide = split[0].strip() geneID = peptide_to_geneID[peptide].strip() ic50 = split[1].strip() peptide_to_geneID_ic50[peptide] = (geneID, ic50) - with open(args.output, 'w') as f: - f.write('peptide,geneID,ic50\n') + with open(args.output, "w") as f: + f.write("peptide,geneID,ic50\n") for peptide, pair in peptide_to_geneID_ic50.items(): - f.write(peptide + ',' + pair[0] + ',' + pair[1] + '\n') + f.write(peptide + "," + pair[0] + "," + pair[1] + "\n") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bin/preprocess_neoepitopes_mhcnuggets.py b/bin/preprocess_neoepitopes_mhcnuggets.py index d29e8d88..d8229030 100755 --- a/bin/preprocess_neoepitopes_mhcnuggets.py +++ b/bin/preprocess_neoepitopes_mhcnuggets.py @@ -3,9 +3,9 @@ import logging import sys -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Preprocess Neoepitopes MHCNuggets") LOG.addHandler(console) @@ -13,38 +13,32 @@ def parse_neoepitopes(filepath): - with open(filepath, 'r') as f: + with open(filepath, "r") as f: # parse and remove the header - lines = f.read().split('\n')[1:] + lines = f.read().split("\n")[1:] # filter any leftover empty lines lines = filter(lambda x: len(x) > 0, lines) - lines = [elem.split(',')[0] for elem in lines] + lines = [elem.split(",")[0] for elem in lines] return lines def write_neoepitopes(neoepitopes, filepath): - with open(filepath, 'w') as f: + with open(filepath, "w") as f: for neoepitope in neoepitopes: if neoepitope == neoepitopes[-1]: f.write(neoepitope) else: - f.write(neoepitope + '\n') + f.write(neoepitope + "\n") def main(): - model = argparse.ArgumentParser(description='Neoepitope preprocessing for mhcnuggets') + model = argparse.ArgumentParser(description="Neoepitope preprocessing for mhcnuggets") - model.add_argument( - '-n', '--neoepitopes', - type=str, - help='neoepitopes input file' - ) + model.add_argument("-n", "--neoepitopes", type=str, help="neoepitopes input file") model.add_argument( - '-o', '--output', - type=str, - help='preprocess neoepitope file for subsequent mhcnuggets prediction' + "-o", "--output", type=str, help="preprocess neoepitope file for subsequent mhcnuggets prediction" ) args = model.parse_args() @@ -53,5 +47,5 @@ def main(): write_neoepitopes(neoepitopes, args.output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bin/preprocess_peptides_mhcnuggets.py b/bin/preprocess_peptides_mhcnuggets.py index 80dfecb6..c8192a44 100755 --- a/bin/preprocess_peptides_mhcnuggets.py +++ b/bin/preprocess_peptides_mhcnuggets.py @@ -5,14 +5,15 @@ import sys from collections import defaultdict -#logging setup +# logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Preprocess Peptides MHCNuggets") LOG.addHandler(console) LOG.setLevel(logging.INFO) + def parse_mztab(identified_peptides_file): """ parses an mztab file and returns all identified proteins @@ -26,42 +27,36 @@ def parse_mztab(identified_peptides_file): seq_geneIDs = defaultdict(str) for line in mztab_read: if line.startswith("PEP"): - content = line.split('\t') + content = line.split("\t") seq = content[1] geneID = content[2] - if not 'U' in seq and not 'X' in seq and not 'Z' in seq and not 'J' in seq and not 'B' in seq: + if not "U" in seq and not "X" in seq and not "Z" in seq and not "J" in seq and not "B" in seq: seq_geneIDs[seq] = geneID return seq_geneIDs + def main(): - model = argparse.ArgumentParser(description='MHCNuggets binding prediction') + model = argparse.ArgumentParser(description="MHCNuggets binding prediction") - model.add_argument( - '-m', '--mztab', - type=str, - help='mztab file' - ) + model.add_argument("-m", "--mztab", type=str, help="mztab file") - model.add_argument( - '-o', '--output', - type=str, - help='preprocessed ' - ) + model.add_argument("-o", "--output", type=str, help="preprocessed ") args = model.parse_args() seq_to_geneIDs = parse_mztab(args.mztab) # write just peptides new line formatted - with open(args.output, 'w') as f: + with open(args.output, "w") as f: for protein in seq_to_geneIDs.keys(): - f.write(protein + '\n') + f.write(protein + "\n") # write seq to geneID in a suitable format (csv?) - with open('peptide_to_geneID', 'w') as f: + with open("peptide_to_geneID", "w") as f: for protein, geneID in seq_to_geneIDs.items(): - f.write(protein + ',' + geneID + '\n') + f.write(protein + "," + geneID + "\n") + -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bin/resolve_neoepitopes.py b/bin/resolve_neoepitopes.py index e11dee0d..deae2957 100755 --- a/bin/resolve_neoepitopes.py +++ b/bin/resolve_neoepitopes.py @@ -30,7 +30,7 @@ # logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("Resolve Neoepitopes") LOG.addHandler(console) @@ -50,10 +50,10 @@ def parse_mztab(identified_peptides_file): seq_geneIDs = defaultdict(str) for line in mztab_read: if line.startswith("PEP"): - content = line.split('\t') + content = line.split("\t") seq = content[1] geneID = content[2] - if not 'U' in seq and not 'X' in seq and not 'Z' in seq and not 'J' in seq and not 'B' in seq: + if not "U" in seq and not "X" in seq and not "Z" in seq and not "J" in seq and not "B" in seq: seq_geneIDs[seq] = geneID return seq_geneIDs @@ -71,14 +71,14 @@ def parse_vcf_neoepitopes(neoepitope_file, alleles): if not alleles: neoepitopes = [] with open(neoepitope_file) as tsvfile: - reader = csv.DictReader(tsvfile, dialect='excel-tab') + reader = csv.DictReader(tsvfile, dialect="excel-tab") for row in reader: - neoepitopes.append(row['Sequence']) + neoepitopes.append(row["Sequence"]) return neoepitopes with open(neoepitope_file) as tsvfile: - reader = csv.DictReader(tsvfile, dialect='excel-tab') + reader = csv.DictReader(tsvfile, dialect="excel-tab") reader, reader_iter_cp = tee(reader) # determine whether any alleles were used for the vcf_neoepitope_predictor script @@ -88,8 +88,11 @@ def parse_vcf_neoepitopes(neoepitope_file, alleles): try: content[allele] except KeyError: - LOG.warning("Allele " + str(allele) - + " was specified, but not found in the possible neoepitopes predicted from the vcf file.") + LOG.warning( + "Allele " + + str(allele) + + " was specified, but not found in the possible neoepitopes predicted from the vcf file." + ) alleles.remove(allele) # allele with highest affinity score wins @@ -113,16 +116,16 @@ def write_found_neoepitopes(filepath, found_neoepitopes, file_format="csv"): :param file_format: json or csv or raw """ # if only list (no bindings) - if file_format == 'pep': - with open(filepath + "." + file_format, 'w') as f: - f.write('Peptide sequence' + '\t' + 'geneID') - f.write('\n'.join(str(seq) + '\t' + str(geneID) for seq, geneID in found_neoepitopes.items())) + if file_format == "pep": + with open(filepath + "." + file_format, "w") as f: + f.write("Peptide sequence" + "\t" + "geneID") + f.write("\n".join(str(seq) + "\t" + str(geneID) for seq, geneID in found_neoepitopes.items())) elif file_format == "json": - json.dump(found_neoepitopes, open(filepath + '.' + file_format, 'w')) + json.dump(found_neoepitopes, open(filepath + "." + file_format, "w")) elif file_format == "csv": - with open(filepath + '.' + file_format, 'w') as csv_file: + with open(filepath + "." + file_format, "w") as csv_file: writer = csv.writer(csv_file) - header = ['Peptide sequence', 'geneID'] + header = ["Peptide sequence", "geneID"] writer.writerow(header) for seq, geneID in found_neoepitopes.items(): writer.writerow([seq, geneID]) @@ -135,33 +138,17 @@ def write_found_neoepitopes(filepath, found_neoepitopes, file_format="csv"): def main(): - model = argparse.ArgumentParser(description='Neoepitope resolvement from mztab and possible vcf determined neoepitopes.') - - model.add_argument( - '-n', '--neoepitopes', - type=str, - help='All possible predicted neoepitopes' + model = argparse.ArgumentParser( + description="Neoepitope resolvement from mztab and possible vcf determined neoepitopes." ) - model.add_argument( - '-m', '--mztab', - type=str, - help='Path to mztab file' - ) + model.add_argument("-n", "--neoepitopes", type=str, help="All possible predicted neoepitopes") - model.add_argument( - '-f', '--file_format', - type=str, - default="csv", - help='File format for output file' - ) + model.add_argument("-m", "--mztab", type=str, help="Path to mztab file") - model.add_argument( - '-o', '--output', - type=str, - required=True, - help='Output file path' - ) + model.add_argument("-f", "--file_format", type=str, default="csv", help="File format for output file") + + model.add_argument("-o", "--output", type=str, required=True, help="Output file path") args = model.parse_args() diff --git a/bin/variants2fasta.py b/bin/variants2fasta.py index b1e68d3f..8ec07f6c 100755 --- a/bin/variants2fasta.py +++ b/bin/variants2fasta.py @@ -7,13 +7,21 @@ from Fred2.Core.Variant import VariationType from Fred2.IO import read_lines, MartsAdapter, read_annovar_exonic from Fred2.EpitopePrediction import EpitopePredictorFactory -from Fred2.Core import generate_peptides_from_proteins, generate_peptides_from_variants, generate_transcripts_from_variants, generate_proteins_from_transcripts +from Fred2.Core import ( + generate_peptides_from_proteins, + generate_peptides_from_variants, + generate_transcripts_from_variants, + generate_proteins_from_transcripts, +) from Fred2.IO.ADBAdapter import EIdentifierTypes, EAdapterFields from vcf_reader import read_vcf -MARTDBURL = {"GRCH37": "http://grch37.ensembl.org/biomart/martservice?query=", - "GRCH38": "http://www.ensembl.org/biomart/martservice?query="} # is corrently set to GRCh38 +MARTDBURL = { + "GRCH37": "http://grch37.ensembl.org/biomart/martservice?query=", + "GRCH38": "http://www.ensembl.org/biomart/martservice?query=", +} # is corrently set to GRCh38 + def read_variant_effect_predictor(file, gene_filter=None): """ @@ -23,137 +31,160 @@ def read_variant_effect_predictor(file, gene_filter=None): :return: list(Variant) - a list of Fred2.Core.Variant objects """ vars = [] + def get_type(ref, alt): """ - returns the variant type + returns the variant type """ - if len(ref)==1 and len(alt)==1: + if len(ref) == 1 and len(alt) == 1: return VariationType.SNP - if len(ref)>0 and len(alt)==0: - if len(ref)%3 == 0: + if len(ref) > 0 and len(alt) == 0: + if len(ref) % 3 == 0: return VariationType.DEL else: return VariationType.FSDEL - if len(ref) == 0 and len(alt)>0: - if len(alt)% 3 == 0: + if len(ref) == 0 and len(alt) > 0: + if len(alt) % 3 == 0: return VariationType.INS else: return VariationType.FSINS return VariationType.UNKNOWN - coding_types = set(["3_prime_UTR_variant", "5_prime_UTR_variant", "start_lost", "stop_gained", - "frameshift_variant", "start_lost", "inframe_insertion", "inframe_deletion", "missense_variant", - "protein_altering_variant", "splice_region_variant", "incomplete_terminal_codon_variant", "stop_retained_variant", - "synonymous_variant", "coding_sequence_variant"]) + coding_types = set( + [ + "3_prime_UTR_variant", + "5_prime_UTR_variant", + "start_lost", + "stop_gained", + "frameshift_variant", + "start_lost", + "inframe_insertion", + "inframe_deletion", + "missense_variant", + "protein_altering_variant", + "splice_region_variant", + "incomplete_terminal_codon_variant", + "stop_retained_variant", + "synonymous_variant", + "coding_sequence_variant", + ] + ) with open(file, "r") as f: - for i,l in enumerate(f): + for i, l in enumerate(f): - #skip comments + # skip comments if l.startswith("#") or l.strip() == "": continue - chrom, gene_pos,var_id,ref,alt,_,filter_flag,info= l.strip().split("\t")[:8] + chrom, gene_pos, var_id, ref, alt, _, filter_flag, info = l.strip().split("\t")[:8] coding = {} isSynonymous = False for co in info.split(","): - #Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|TSL|APPRIS|SIFT|PolyPhen|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE"> - _,var_type,_,gene,_,transcript_type,transcript_id,_,_,_,_,_,transcript_pos,_,prot_pos,aa_mutation = co.strip().split("|")[:16] - HGNC_ID=co.strip().split("|")[22] - - #pass every other feature type except Transcript (RegulatoryFeature, MotifFeature.) - #pass genes that are uninterresting for us + # Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|TSL|APPRIS|SIFT|PolyPhen|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE"> + ( + _, + var_type, + _, + gene, + _, + transcript_type, + transcript_id, + _, + _, + _, + _, + _, + transcript_pos, + _, + prot_pos, + aa_mutation, + ) = co.strip().split("|")[:16] + HGNC_ID = co.strip().split("|")[22] + + # pass every other feature type except Transcript (RegulatoryFeature, MotifFeature.) + # pass genes that are uninterresting for us if transcript_type != "Transcript" or (HGNC_ID not in gene_filter and gene_filter): continue - #pass all intronic and other mutations that do not directly influence the protein sequence + # pass all intronic and other mutations that do not directly influence the protein sequence if any(t in coding_types for t in var_type.split("&")): - #generate mutation syntax + # generate mutation syntax - #positioning in Fred2 is 0-based!!! + # positioning in Fred2 is 0-based!!! if transcript_pos != "": - coding[transcript_id] = MutationSyntax(transcript_id, int(transcript_pos.split('/')[0])-1, - -1 if prot_pos == "" else int(prot_pos)-1, co, "", geneID=HGNC_ID) - - #is variant synonymous? + coding[transcript_id] = MutationSyntax( + transcript_id, + int(transcript_pos.split("/")[0]) - 1, + -1 if prot_pos == "" else int(prot_pos) - 1, + co, + "", + geneID=HGNC_ID, + ) + + # is variant synonymous? isSynonymous = any(t == "synonymous_variant" for t in var_type.split("&")) if coding: - vars.append(Variant(var_id, get_type(ref, alt), chrom, int(gene_pos), ref.upper(), alt.upper(), coding, False, isSynonymous)) + vars.append( + Variant( + var_id, + get_type(ref, alt), + chrom, + int(gene_pos), + ref.upper(), + alt.upper(), + coding, + False, + isSynonymous, + ) + ) return vars def main(): - model = argparse.ArgumentParser(description='Neoepitope protein fasta generation from variant vcf') + model = argparse.ArgumentParser(description="Neoepitope protein fasta generation from variant vcf") - model.add_argument( - '-v', '--vcf', - type=str, - default=None, - help='Path to the vcf input file' - ) + model.add_argument("-v", "--vcf", type=str, default=None, help="Path to the vcf input file") model.add_argument( - '-t', '--type', + "-t", + "--type", type=str, choices=["VEP", "ANNOVAR", "SNPEFF"], default="VEP", - help='Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)' - ) + help="Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)", + ) - model.add_argument( - '-f', '--fasta_ref', - type=str, - default=None, - help='Path to the fasta input file' - ) + model.add_argument("-f", "--fasta_ref", type=str, default=None, help="Path to the fasta input file") model.add_argument( - '-p','--proteins', - type=str, - default=None, - help='Path to the protein ID input file (in HGNC-ID)' - ) + "-p", "--proteins", type=str, default=None, help="Path to the protein ID input file (in HGNC-ID)" + ) model.add_argument( - '-r' ,'--reference', + "-r", + "--reference", type=str, - default='GRCh38', - help='The reference genome used for varinat annotation and calling.' - ) + default="GRCh38", + help="The reference genome used for varinat annotation and calling.", + ) model.add_argument( - '-fINDEL' ,'--filterINDEL', - action="store_true", - help='Filter insertions and deletions (including frameshifts)' - ) + "-fINDEL", "--filterINDEL", action="store_true", help="Filter insertions and deletions (including frameshifts)" + ) - model.add_argument( - '-fFS' ,'--filterFSINDEL', - action="store_true", - help='Filter frameshift INDELs' - ) + model.add_argument("-fFS", "--filterFSINDEL", action="store_true", help="Filter frameshift INDELs") - model.add_argument( - '-fSNP' ,'--filterSNP', - action="store_true", - help='Filter SNPs' - ) - - model.add_argument( - '-o','--output', - type=str, - required=True, - help='Path to the output file' - ) + model.add_argument("-fSNP", "--filterSNP", action="store_true", help="Filter SNPs") + model.add_argument("-o", "--output", type=str, required=True, help="Path to the output file") args = model.parse_args() martDB = MartsAdapter(biomart=MARTDBURL[args.reference.upper()]) - if args.vcf is None: sys.stderr.write("At least a vcf file or a protein id file has to be provided.\n") return -1 @@ -177,12 +208,15 @@ def main(): else: variants = read_annovar_exonic(args.vcf, gene_filter=protein_ids) - if args.filterSNP: variants = filter(lambda x: x.type != VariationType.SNP, variants) if args.filterINDEL: - variants = filter(lambda x: x.type not in [VariationType.INS, VariationType.DEL, VariationType.FSDEL, VariationType.FSINS], variants) + variants = filter( + lambda x: x.type + not in [VariationType.INS, VariationType.DEL, VariationType.FSDEL, VariationType.FSINS], + variants, + ) if args.filterFSINDEL: variants = filter(lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], variants) @@ -191,27 +225,26 @@ def main(): sys.stderr.write("No variants left after filtering. Please refine your filtering criteria.\n") return -1 - #generate transcripts + # generate transcripts transcripts = generate_transcripts_from_variants(variants, martDB, EIdentifierTypes.ENSEMBL) - #generate proteins + # generate proteins proteins = generate_proteins_from_transcripts(transcripts) - #write fasta file + # write fasta file with open(args.output, "w") as f: for p in proteins: - f.write('>' + str(p.transcript_id) + '|' + str(p.vars) + '_var_' + '\n') - f.write(str(p)+ '\n') - + f.write(">" + str(p.transcript_id) + "|" + str(p.vars) + "_var_" + "\n") + f.write(str(p) + "\n") - #concatenate fasta file with fasta reference + # concatenate fasta file with fasta reference with open(args.output) as op: opr1 = op.read() with open(args.fasta_ref) as op: opr2 = op.read() - concat = opr1+opr2 + concat = opr1 + opr2 with open(args.output, "w") as op: op.write(concat) @@ -223,7 +256,5 @@ def main(): return 0 - if __name__ == "__main__": sys.exit(main()) - diff --git a/bin/vcf_neoepitope_predictor.py b/bin/vcf_neoepitope_predictor.py index 6abedb47..e92ba895 100755 --- a/bin/vcf_neoepitope_predictor.py +++ b/bin/vcf_neoepitope_predictor.py @@ -54,20 +54,27 @@ from Fred2.Core.Variant import VariationType from Fred2.IO import read_lines, MartsAdapter, read_annovar_exonic from Fred2.EpitopePrediction import EpitopePredictorFactory -from Fred2.Core import generate_transcripts_from_variants, generate_proteins_from_transcripts, generate_peptides_from_proteins, generate_peptides_from_variants +from Fred2.Core import ( + generate_transcripts_from_variants, + generate_proteins_from_transcripts, + generate_peptides_from_proteins, + generate_peptides_from_variants, +) from Fred2.IO.ADBAdapter import EIdentifierTypes, EAdapterFields from vcf_reader import read_vcf # logging setup console = logging.StreamHandler(sys.stdout) -formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console.setFormatter(formatter) LOG = logging.getLogger("VCF Neoepitope Predictor") LOG.addHandler(console) LOG.setLevel(logging.INFO) -MARTDBURL = {"GRCH37": "http://grch37.ensembl.org/biomart/martservice?query=", - "GRCH38": "http://www.ensembl.org/biomart/martservice?query="} # is correctly set to GRCh38 +MARTDBURL = { + "GRCH37": "http://grch37.ensembl.org/biomart/martservice?query=", + "GRCH38": "http://www.ensembl.org/biomart/martservice?query=", +} # is correctly set to GRCh38 def read_variant_effect_predictor(file, gene_filter=None): @@ -81,7 +88,7 @@ def read_variant_effect_predictor(file, gene_filter=None): def get_type(ref, alt): """ - returns the variant type + returns the variant type """ if len(ref) == 1 and len(alt) == 1: return VariationType.SNP @@ -97,10 +104,23 @@ def get_type(ref, alt): return VariationType.FSINS return VariationType.UNKNOWN - coding_types = {"3_prime_UTR_variant", "5_prime_UTR_variant", "start_lost", "stop_gained", "frameshift_variant", - "start_lost", "inframe_insertion", "inframe_deletion", "missense_variant", - "protein_altering_variant", "splice_region_variant", "incomplete_terminal_codon_variant", - "stop_retained_variant", "synonymous_variant", "coding_sequence_variant"} + coding_types = { + "3_prime_UTR_variant", + "5_prime_UTR_variant", + "start_lost", + "stop_gained", + "frameshift_variant", + "start_lost", + "inframe_insertion", + "inframe_deletion", + "missense_variant", + "protein_altering_variant", + "splice_region_variant", + "incomplete_terminal_codon_variant", + "stop_retained_variant", + "synonymous_variant", + "coding_sequence_variant", + } with open(file, "r") as f: for i, l in enumerate(f): @@ -117,8 +137,24 @@ def get_type(ref, alt): # skip additional info fields without annotation try: # Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|TSL|APPRIS|SIFT|PolyPhen|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE"> - _, var_type, _, gene, _, transcript_type, transcript_id, _, _, _, _, _, _, transcript_pos, prot_pos, aa_mutation = co.strip().split( - "|")[:16] + ( + _, + var_type, + _, + gene, + _, + transcript_type, + transcript_id, + _, + _, + _, + _, + _, + _, + transcript_pos, + prot_pos, + aa_mutation, + ) = co.strip().split("|")[:16] except ValueError: LOG.warning("INFO field in different format in line: {}, skipping...".format(str(i))) continue @@ -133,115 +169,95 @@ def get_type(ref, alt): # generate mutation syntax # positioning in Fred2 is 0-based!!! - if transcript_pos != "" and '?' not in transcript_pos: - coding[transcript_id] = MutationSyntax(transcript_id, int(transcript_pos.split("-")[0]) - 1, -1 if prot_pos == "" else int(prot_pos.split("-")[0]) - 1, co, "", geneID=gene) + if transcript_pos != "" and "?" not in transcript_pos: + coding[transcript_id] = MutationSyntax( + transcript_id, + int(transcript_pos.split("-")[0]) - 1, + -1 if prot_pos == "" else int(prot_pos.split("-")[0]) - 1, + co, + "", + geneID=gene, + ) # is variant synonymous? is_synonymous = any(t == "synonymous_variant" for t in var_type.split("&")) if coding: vars.append( - Variant(var_id, get_type(ref, alt), chrom, int(gene_pos), ref.upper(), alt.upper(), coding, False, - is_synonymous)) + Variant( + var_id, + get_type(ref, alt), + chrom, + int(gene_pos), + ref.upper(), + alt.upper(), + coding, + False, + is_synonymous, + ) + ) return vars def main(): - model = argparse.ArgumentParser(description='Neoepitope prediction for TargetInspector.') + model = argparse.ArgumentParser(description="Neoepitope prediction for TargetInspector.") model.add_argument( - '-m', '--method', + "-m", + "--method", type=str, choices=EpitopePredictorFactory.available_methods().keys(), default="bimas", - help='The name of the prediction method' + help="The name of the prediction method", ) - model.add_argument( - '-v', '--vcf', - type=str, - default=None, - help='Path to the vcf input file' - ) + model.add_argument("-v", "--vcf", type=str, default=None, help="Path to the vcf input file") model.add_argument( - '-t', '--type', + "-t", + "--type", type=str, choices=["VEP", "ANNOVAR", "SNPEFF"], default="VEP", - help='Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)' + help="Type of annotation tool used (Variant Effect Predictor, ANNOVAR exonic gene annotation, SnpEff)", ) model.add_argument( - '-p', '--proteins', - type=str, - default=None, - help='Path to the protein ID input file (in HGNC-ID)' + "-p", "--proteins", type=str, default=None, help="Path to the protein ID input file (in HGNC-ID)" ) model.add_argument( - '-minl', '--peptide_min_length', - type=int, - default=8, - help='Minimum peptide length for epitope prediction' + "-minl", "--peptide_min_length", type=int, default=8, help="Minimum peptide length for epitope prediction" ) model.add_argument( - '-maxl', '--peptide_max_length', - type=int, - default=12, - help='Maximum peptide length for epitope prediction' + "-maxl", "--peptide_max_length", type=int, default=12, help="Maximum peptide length for epitope prediction" ) model.add_argument( - '-a', '--alleles', - type=str, - required=True, - help='Path to the allele file (one per line in new nomenclature)' + "-a", "--alleles", type=str, required=True, help="Path to the allele file (one per line in new nomenclature)" ) model.add_argument( - '-r', '--reference', + "-r", + "--reference", type=str, - default='GRCh38', - help='The reference genome used for variant annotation and calling.' + default="GRCh38", + help="The reference genome used for variant annotation and calling.", ) model.add_argument( - '-fINDEL', '--filterINDEL', - action="store_true", - help='Filter insertions and deletions (including frameshifts)' + "-fINDEL", "--filterINDEL", action="store_true", help="Filter insertions and deletions (including frameshifts)" ) - model.add_argument( - '-fFS', '--filterFSINDEL', - action="store_true", - help='Filter frameshift INDELs' - ) + model.add_argument("-fFS", "--filterFSINDEL", action="store_true", help="Filter frameshift INDELs") - model.add_argument( - '-fSNP', '--filterSNP', - action="store_true", - help='Filter SNPs' - ) + model.add_argument("-fSNP", "--filterSNP", action="store_true", help="Filter SNPs") - model.add_argument( - '-etk', '--etk', - action="store_true", - help=argparse.SUPPRESS - ) + model.add_argument("-etk", "--etk", action="store_true", help=argparse.SUPPRESS) - model.add_argument( - '-bind', '--predict_bindings', - action="store_true", - help='Predict bindings' - ) + model.add_argument("-bind", "--predict_bindings", action="store_true", help="Predict bindings") - model.add_argument( - '-o', '--output', - type=str, - required=True, - help='Path to the output file' - ) + model.add_argument("-o", "--output", type=str, required=True, help="Path to the output file") args = model.parse_args() @@ -274,10 +290,11 @@ def main(): variants = filter(lambda x: x.type != VariationType.SNP, variants) if args.filterINDEL: - variants = filter(lambda x: x.type not in [VariationType.INS, - VariationType.DEL, - VariationType.FSDEL, - VariationType.FSINS], variants) + variants = filter( + lambda x: x.type + not in [VariationType.INS, VariationType.DEL, VariationType.FSDEL, VariationType.FSINS], + variants, + ) if args.filterFSINDEL: variants = filter(lambda x: x.type not in [VariationType.FSDEL, VariationType.FSINS], variants) @@ -287,10 +304,15 @@ def main(): return -1 epitopes = [] - minlength=args.peptide_min_length - maxlength=args.peptide_max_length - prots = [p for p in generate_proteins_from_transcripts(generate_transcripts_from_variants(variants, martDB, EIdentifierTypes.ENSEMBL))] - for peplen in range(minlength, maxlength+1): + minlength = args.peptide_min_length + maxlength = args.peptide_max_length + prots = [ + p + for p in generate_proteins_from_transcripts( + generate_transcripts_from_variants(variants, martDB, EIdentifierTypes.ENSEMBL) + ) + ] + for peplen in range(minlength, maxlength + 1): peptide_gen = generate_peptides_from_proteins(prots, peplen) peptides_var = [x for x in peptide_gen] @@ -304,7 +326,7 @@ def main(): if coding.geneID is not None: transcript_to_genes[trans_id] = coding.geneID else: - transcript_to_genes[trans_id] = 'None' + transcript_to_genes[trans_id] = "None" # else: generate protein sequences from given HGNC IDs and then epitopes else: @@ -316,7 +338,8 @@ def main(): if protein_seq is not None: transcript_to_genes[ensembl_ids[EAdapterFields.TRANSID]] = l.strip() proteins.append( - Protein(protein_seq, gene_id=l.strip(), transcript_id=ensembl_ids[EAdapterFields.TRANSID])) + Protein(protein_seq, gene_id=l.strip(), transcript_id=ensembl_ids[EAdapterFields.TRANSID]) + ) epitopes = [] for length in range(args.peptide_min_length, args.peptide_max_length): epitopes.extend(generate_peptides_from_proteins(proteins, length)) @@ -326,7 +349,7 @@ def main(): # predict bindings for all found neoepitopes if args.predict_bindings: - result = EpitopePredictorFactory(args.method).predict(epitopes, alleles=alleles.split(';')) + result = EpitopePredictorFactory(args.method).predict(epitopes, alleles=alleles.split(";")) with open(args.output, "w") as f: alleles = result.columns @@ -336,16 +359,32 @@ def main(): p = index[0] method = index[1] proteins = ",".join( - set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()])) + set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()]) + ) vars_str = "" if args.vcf is not None: - vars_str = "\t" + "|".join(set(prot_id.split(":FRED2")[0] + ":" + ",".join( repr(v) for v in set(p.get_variants_by_protein(prot_id)) ) - for prot_id in p.proteins.iterkeys() - if p.get_variants_by_protein(prot_id))) - - f.write(str(p) + "\t" + method + "\t" + "\t".join( - "%.3f" % row[a] for a in alleles) + "\t" + proteins + vars_str + "\n") + vars_str = "\t" + "|".join( + set( + prot_id.split(":FRED2")[0] + + ":" + + ",".join(repr(v) for v in set(p.get_variants_by_protein(prot_id))) + for prot_id in p.proteins.iterkeys() + if p.get_variants_by_protein(prot_id) + ) + ) + + f.write( + str(p) + + "\t" + + method + + "\t" + + "\t".join("%.3f" % row[a] for a in alleles) + + "\t" + + proteins + + vars_str + + "\n" + ) if args.etk: with open(args.output.rsplit(".", 1)[0] + "_etk.tsv", "w") as g: @@ -354,7 +393,13 @@ def main(): for index, row in result.iterrows(): p = index[0] proteins = " ".join( - set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()])) + set( + [ + transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] + for prot in p.get_all_proteins() + ] + ) + ) g.write(str(p) + "\t" + "\t".join("%.3f" % row[a] for a in alleles) + "\t" + proteins + "\n") # don't predict bindings! # different output format! @@ -366,17 +411,24 @@ def main(): for epitope in epitopes: p = epitope proteins = ",".join( - set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()])) + set([transcript_to_genes[prot.transcript_id.split(":FRED2")[0]] for prot in p.get_all_proteins()]) + ) vars_str = "" if args.vcf is not None: - vars_str = "\t" + "|".join(set(prot_id.split(":FRED2")[0] + ":" + ",".join( repr(v) for v in set(p.get_variants_by_protein(prot_id)) ) - for prot_id in p.proteins.iterkeys() - if p.get_variants_by_protein(prot_id))) + vars_str = "\t" + "|".join( + set( + prot_id.split(":FRED2")[0] + + ":" + + ",".join(repr(v) for v in set(p.get_variants_by_protein(prot_id))) + for prot_id in p.proteins.iterkeys() + if p.get_variants_by_protein(prot_id) + ) + ) f.write(str(p) + "\t" + proteins + vars_str + "\n") - with open(args.output.replace('.csv','.txt'), "w") as f: + with open(args.output.replace(".csv", ".txt"), "w") as f: for epitope in epitopes: f.write(str(epitope) + "\n") @@ -385,4 +437,3 @@ def main(): if __name__ == "__main__": sys.exit(main()) - diff --git a/bin/vcf_reader.py b/bin/vcf_reader.py index f53aa770..d8bb095e 100755 --- a/bin/vcf_reader.py +++ b/bin/vcf_reader.py @@ -26,7 +26,7 @@ from datetime import datetime from string import Template -__author__ = 'mohr, walzer' +__author__ = "mohr, walzer" VERSION = "1.1" ID_SYSTEM_USED = EIdentifierTypes.ENSEMBL @@ -87,18 +87,18 @@ def get_fred2_annotation(vt, p, r, alt): return p, r, alt elif vt == VariationType.DEL or vt == VariationType.FSDEL: # more than one observed ? - if alt != '-': - alternative = '-' - reference = r[len(alt):] + if alt != "-": + alternative = "-" + reference = r[len(alt) :] position = p + len(alt) else: return p, r, alt elif vt == VariationType.INS or vt == VariationType.FSINS: - if r != '-': + if r != "-": position = p - reference = '-' - if alt != '-': - alt_new = alt[len(r):] + reference = "-" + if alt != "-": + alt_new = alt[len(r) :] alternative = alt_new else: alternative = str(alt) @@ -108,7 +108,6 @@ def get_fred2_annotation(vt, p, r, alt): return position, reference, alternative - def read_vcf(filename, pass_only=True): """ reads vcf files @@ -119,7 +118,7 @@ def read_vcf(filename, pass_only=True): global ID_SYSTEM_USED vl = list() - with open(filename, 'rb') as tsvfile: + with open(filename, "rb") as tsvfile: vcf_reader = vcf.Reader(tsvfile) vl = [r for r in vcf_reader] @@ -129,7 +128,7 @@ def read_vcf(filename, pass_only=True): genotye_dict = {"het": False, "hom": True, "ref": True} for num, record in enumerate(vl): - c = record.CHROM.strip('chr') + c = record.CHROM.strip("chr") p = record.POS - 1 variation_dbid = record.ID r = str(record.REF) @@ -156,14 +155,14 @@ def read_vcf(filename, pass_only=True): vt = VariationType.FSDEL else: vt = VariationType.FSINS - gene = '' + gene = "" for alt in v_list: isHomozygous = False - if 'HOM' in record.INFO: - isHomozygous = record.INFO['HOM'] == 1 - elif 'SGT' in record.INFO: - zygosity = record.INFO['SGT'].split("->")[1] + if "HOM" in record.INFO: + isHomozygous = record.INFO["HOM"] == 1 + elif "SGT" in record.INFO: + zygosity = record.INFO["SGT"].split("->")[1] if zygosity in genotye_dict: isHomozygous = genotye_dict[zygosity] else: @@ -173,42 +172,59 @@ def read_vcf(filename, pass_only=True): isHomozygous = False else: for sample in record.samples: - if 'GT' in sample.data: - isHomozygous = sample.data['GT'] == '1/1' + if "GT" in sample.data: + isHomozygous = sample.data["GT"] == "1/1" - if record.INFO['ANN']: + if record.INFO["ANN"]: isSynonymous = False coding = dict() types = [] - for annraw in record.INFO['ANN']: # for each ANN only add a new coding! see GSvar - annots = annraw.split('|') - obs, a_mut_type, impact, a_gene, a_gene_id, feature_type, transcript_id, exon, tot_exon, trans_coding, prot_coding, cdna, cds, aa, distance, warnings = annots + for annraw in record.INFO["ANN"]: # for each ANN only add a new coding! see GSvar + annots = annraw.split("|") + ( + obs, + a_mut_type, + impact, + a_gene, + a_gene_id, + feature_type, + transcript_id, + exon, + tot_exon, + trans_coding, + prot_coding, + cdna, + cds, + aa, + distance, + warnings, + ) = annots types.append(a_mut_type) tpos = 0 ppos = 0 - positions = '' + positions = "" # get cds/protein positions and convert mutation syntax to FRED2 format - if trans_coding != '': - positions = re.findall(r'\d+', trans_coding) + if trans_coding != "": + positions = re.findall(r"\d+", trans_coding) ppos = int(positions[0]) - 1 - if prot_coding != '': - positions = re.findall(r'\d+', prot_coding) + if prot_coding != "": + positions = re.findall(r"\d+", prot_coding) tpos = int(positions[0]) - 1 - isSynonymous = (a_mut_type == "synonymous_variant") + isSynonymous = a_mut_type == "synonymous_variant" gene = a_gene_id # there are no isoforms in biomart transcript_id = transcript_id.split(".")[0] - if 'NM' in transcript_id: + if "NM" in transcript_id: ID_SYSTEM_USED = EIdentifierTypes.REFSEQ - #take online coding variants into account, FRED2 cannot deal with stopgain variants right now - if not prot_coding or 'stop_gained' in a_mut_type: + # take online coding variants into account, FRED2 cannot deal with stopgain variants right now + if not prot_coding or "stop_gained" in a_mut_type: continue coding[transcript_id] = MutationSyntax(transcript_id, ppos, tpos, trans_coding, prot_coding) @@ -216,7 +232,9 @@ def read_vcf(filename, pass_only=True): if coding: pos, reference, alternative = get_fred2_annotation(vt, p, r, str(alt)) - var = Variant("line" + str(num), vt, c, pos, reference, alternative, coding, isHomozygous, isSynonymous) + var = Variant( + "line" + str(num), vt, c, pos, reference, alternative, coding, isHomozygous, isSynonymous + ) var.gene = gene var.log_metadata("vardbid", variation_dbid) dict_vars[var] = var @@ -239,5 +257,3 @@ def read_vcf(filename, pass_only=True): dict_vars[v] = vs_new return dict_vars.values(), transcript_ids - - diff --git a/conf/base.config b/conf/base.config index 2e9e9cfc..197923f3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -20,6 +20,11 @@ process { // Process-specific resource requirements // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index 9731f50d..3392f719 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,32 +13,41 @@ process { publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + path: {"${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}"}, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: {filename -> filename.equals('versions.yml') ? null : filename}, + enabled: false ] - withName: SAMPLESHEET_CHECK { + withName: 'SAMPLESHEET_CHECK' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: {"${params.outdir}/pipeline_info"}, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: {filename -> filename.equals('versions.yml') ? null : filename} ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: {"${params.outdir}/pipeline_info"}, mode: params.publish_dir_mode, pattern: '*_versions.yml' ] } + withName: 'MULTIQC' { + publishDir = [ + path: {"${params.outdir}/multiqc"}, + mode: params.publish_dir_mode, + enabled: true + ] + } + } process { - withName: GENERATE_PROTEINS_FROM_VCF { + withName: 'GENERATE_PROTEINS_FROM_VCF' { ext.args = [ "-t ${params.variant_annotation_style}", "-r ${params.variant_reference}", @@ -47,21 +56,20 @@ process { params.variant_snp_filter ? "-fSNP" : "" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}" }, + path: {"${params.outdir}"}, mode: params.publish_dir_mode, pattern: '*.fasta' ] } - withName: OPENMS_MAPALIGNERIDENTIFICATION { + withName: 'OPENMS_MAPALIGNERIDENTIFICATION' { ext.args = [ "-model:type linear", "-algorithm:max_rt_shift ${params.max_rt_alignment_shift}" ].join(' ').trim() - publishDir = [ enabled: false ] } - withName: OPENMS_COMETADAPTER { + withName: 'OPENMS_COMETADAPTER' { ext.args = [ "-precursor_mass_tolerance ${params.precursor_mass_tolerance}", "-fragment_mass_tolerance ${params.fragment_mass_tolerance}", @@ -72,14 +80,18 @@ process { "-missed_cleavages 0", "-precursor_charge ${params.prec_charge}", "-activation_method ${params.activation_method}", - "-variable_modifications ${params.variable_mods.tokenize(',').collect { "'${it}'" }.join(" ") }", + "-variable_modifications ${params.variable_mods.tokenize(',').collect {"'${it}'"}.join(" ")}", "-enzyme '${params.enzyme}'", "-spectrum_batch_size ${params.spectrum_batch_size}" ].join(' ').trim() - publishDir = [ enabled: false ] + publishDir = [ + path: {"${params.outdir}/intermediate_results/comet"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] } - withName: OPENMS_IDFILTER_FOR_ALIGNMENT { + withName: 'OPENMS_IDFILTER_FOR_ALIGNMENT' { ext.args = [ "-remove_decoys", "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", @@ -87,14 +99,14 @@ process { (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/alignment"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: OPENMS_IDFILTER_Q_VALUE { - ext.prefix = { "${meta.id}_filtered" } + withName: 'OPENMS_IDFILTER_Q_VALUE' { + ext.prefix = {"${meta.id}_fdr_filtered"} ext.args = [ "-remove_decoys", "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", @@ -102,14 +114,14 @@ process { (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/alignment"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: OPENMS_PERCOLATORADAPTER { - ext.prefix = { "${meta.id}_all_ids_merged_psm_perc" } + withName: 'OPENMS_PERCOLATORADAPTER' { + ext.prefix = {"${meta.id}_all_ids_merged_psm_perc"} ext.args = [ "-seed 4711", "-trainFDR 0.05", @@ -120,81 +132,95 @@ process { (params.fdr_level != 'psm-level-fdrs') ? "-" + params.fdr_level : "" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/percolator"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: OPENMS_PSMFEATUREEXTRACTOR { + withName: 'OPENMS_PSMFEATUREEXTRACTOR' { publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/features"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: 'OPENMS_DECOYDATABASE|OPENMS_THERMORAWFILEPARSER|OPENMS_PEAKPICKERHIRES|OPENMS_PEPTIDEINDEXER|OPENMS_FALSEDISCOVERYRATE|OPENMS_IDMERGER|OPENMS_MAPRTTRANSFORMER.*ML|OPENMS_FEATURE.*|OPENMS_IDCONFLICTRESOLVER' { - publishDir = [ enabled: false ] - } - withName: 'OPENMS_MZTABEXPORTER_QUANT' { publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/features"}, mode: params.publish_dir_mode, pattern: '*.mzTab' ] } + withName: 'OPENMS_FEATUREFINDERIDENTIFICATION' { + publishDir = [ + path: {"${params.outdir}/intermediate_results/features"}, + mode: params.publish_dir_mode, + pattern: '*.featureXML' + ] + } + withName: 'OPENMS_TEXTEXPORTER_UNQUANTIFIED|OPENMS_TEXTEXPORTER_QUANTIFIED' { - ext.args = [ - "-id:add_metavalues 0" - ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/" }, + path: {"${params.outdir}/"}, mode: params.publish_dir_mode, pattern: '*.tsv' ] } - withName: 'OPENMS_TEXTEXPORTER_PSMS' { + withName: 'OPENMS_TEXTEXPORTER_COMET' { + ext.prefix = {"${meta.sample}_${meta.condition}_${meta.id}"} publishDir = [ - path: { "${params.outdir}/PSMs" }, + path: {"${params.outdir}/intermediate_results/comet"}, mode: params.publish_dir_mode, pattern: '*.tsv' ] } + + withName: 'OPENMS_IDCONFLICTRESOLVER' { + publishDir = [ + path: {"${params.outdir}/intermediate_results/features"}, + mode: params.publish_dir_mode, + pattern: '*.consensusXML' + ] + } } // Refine on predicted subset process { - if ( params.refine_fdr_on_predicted_subset && params.predict_class_1 ) { - withName: OPENMS_MZTABEXPORTERPERC { - ext.prefix = { "${meta.sample}_${meta.condition}_all_ids_merged_psm_perc_filtered" } + if (params.refine_fdr_on_predicted_subset && params.predict_class_1) { + withName: 'OPENMS_MZTABEXPORTERPERC' { + ext.prefix = {"${meta.sample}_${meta.condition}_all_ids_merged_psm_perc_filtered"} publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/refined_fdr"}, mode: params.publish_dir_mode, pattern: '*.mzTab' ] } - withName: OPENMS_MZTABEXPORTERPSM { - ext.prefix = { "${meta.sample}_${meta.condition}_all_ids_merged" } + withName: 'OPENMS_MZTABEXPORTERPSM' { + ext.prefix = {"${meta.sample}_${meta.condition}_all_ids_merged"} publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/refined_fdr"}, mode: params.publish_dir_mode, pattern: '*.mzTab' ] } - withName: MHCFLURRY_PREDICTPSMS { - publishDir = [ enabled: false ] + withName: 'MHCFLURRY_PREDICTPSMS' { + publishDir = [ + path: {"${params.outdir}/intermediate_results/MHCFLURRY_PREDICTPSMS"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] } - withName: REFINE_FDR:OPENMS_PERCOLATORADAPTER { - ext.prefix = { "${meta.id}_perc_subset" } + withName: 'REFINE_FDR:OPENMS_PERCOLATORADAPTER' { + ext.prefix = {"${meta.id}_perc_subset"} ext.args = [ "-seed 4711", "-trainFDR 0.05", @@ -205,26 +231,26 @@ process { (params.fdr_level != 'psm-level-fdrs') ? "-" + params.fdr_level : "" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/refined_fdr"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: OPENMS_IDFILTER_PSMS { - ext.prefix = { "${meta.id}_pred_filtered" } + withName: 'OPENMS_IDFILTER_PSMS' { + ext.prefix = {"${meta.id}_pred_filtered"} ext.args = [ "-whitelist:ignore_modifications", "-whitelist:peptides" ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/refined_fdr"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] } - withName: OPENMS_IDFILTER_REFINED { + withName: 'OPENMS_IDFILTER_REFINED' { ext.args = [ "-remove_decoys", "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", @@ -232,7 +258,7 @@ process { (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/Intermediate_Results" }, + path: {"${params.outdir}/intermediate_results/refined_fdr"}, mode: params.publish_dir_mode, pattern: '*.idXML' ] @@ -243,19 +269,19 @@ process { // Class I prediction process { - if ( params.predict_class_1 & !params.skip_quantification ) { - withName: MHCFLURRY_PREDICTPEPTIDESCLASS1 { + if (params.predict_class_1 & !params.skip_quantification) { + withName: 'MHCFLURRY_PREDICTPEPTIDESCLASS1' { publishDir = [ - path: { "${params.outdir}/class_1_bindings" }, + path: {"${params.outdir}/class_1_bindings"}, mode: params.publish_dir_mode, pattern: '*.fasta' ] } } - if ( params.predict_class_1 & !params.skip_quantification & params.include_proteins_from_vcf ) { - withName: PREDICT_POSSIBLE_CLASS1_NEOEPITOPES { - ext.prefix = { "${meta}_vcf_neoepitopes" } + if (params.predict_class_1 & !params.skip_quantification & params.include_proteins_from_vcf) { + withName: 'PREDICT_POSSIBLE_CLASS1_NEOEPITOPES' { + ext.prefix = {"${meta}_vcf_neoepitopes"} publishDir = [ path: {"${params.outdir}/class_1_bindings"}, mode: params.publish_dir_mode, @@ -263,8 +289,8 @@ process { ] } - withName: RESOLVE_FOUND_CLASS1_NEOEPITOPES { - ext.prefix = { "${meta.sample}_found_neoepitopes" } + withName: 'RESOLVE_FOUND_CLASS1_NEOEPITOPES' { + ext.prefix = {"${meta.sample}_found_neoepitopes"} publishDir = [ path: {"${params.outdir}/class_1_bindings"}, mode: params.publish_dir_mode, @@ -272,7 +298,7 @@ process { ] } - withName: MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { + withName: 'MHCFLURRY_PREDICTNEOEPITOPESCLASS1' { publishDir = [ path: {"${params.outdir}/class_1_bindings"}, mode: params.publish_dir_mode, @@ -285,72 +311,72 @@ process { // Class II prediction process { - if ( params.predict_class_2 & !params.skip_quantification ) { + if (params.predict_class_2 & !params.skip_quantification) { - withName: MHCNUGGETS_PEPTIDESCLASS2PRE { + withName: 'MHCNUGGETS_PEPTIDESCLASS2PRE' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*_peptides' ] } - withName: MHCNUGGETS_PREDICTPEPTIDESCLASS2 { + withName: 'MHCNUGGETS_PREDICTPEPTIDESCLASS2' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*_class_2' ] } - withName: MHCNUGGETS_PEPTIDESCLASS2POST { + withName: 'MHCNUGGETS_PEPTIDESCLASS2POST' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*.csv' ] } } - if ( params.predict_class_2 & !params.skip_quantification & params.include_proteins_from_vcf ) { + if (params.predict_class_2 & !params.skip_quantification & params.include_proteins_from_vcf) { - withName: PREDICT_POSSIBLE_CLASS2_NEOEPITOPES { + withName: 'PREDICT_POSSIBLE_CLASS2_NEOEPITOPES' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*.csv' ] } - withName: RESOLVE_FOUND_CLASS2_NEOEPITOPES { + withName: 'RESOLVE_FOUND_CLASS2_NEOEPITOPES' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*.csv' ] } - withName: MHCNUGGETS_NEOEPITOPESCLASS2PRE { - ext.prefix = { "${meta}_mhcnuggets_preprocessed" } + withName: 'MHCNUGGETS_NEOEPITOPESCLASS2PRE' { + ext.prefix = {"${meta}_mhcnuggets_preprocessed"} publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*${ext.prefix}' ] } - withName: MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { - ext.prefix = { "${meta}_predicted_neoepitopes_class_2" } + withName: 'MHCNUGGETS_PREDICTNEOEPITOPESCLASS2' { + ext.prefix = {"${meta}_predicted_neoepitopes_class_2"} publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*${ext.prefix}' ] } - withName: MHCNUGGETS_NEOEPITOPESCLASS2POST { + withName: 'MHCNUGGETS_NEOEPITOPESCLASS2POST' { publishDir = [ - path: { "${params.outdir}/class_2_bindings" }, + path: {"${params.outdir}/class_2_bindings"}, mode: params.publish_dir_mode, pattern: '*.csv' ] @@ -360,28 +386,50 @@ process { process { - if ( params.predict_RT ) { - - withName: OPENMS_RTMODEL { - publishDir = [ enabled: false ] + if (params.predict_RT) { + withName: 'OPENMS_RTMODEL' { + publishDir = [ + path: {"${params.outdir}/RT_prediction"}, + mode: params.publish_dir_mode, + pattern: '*.txt|*.paramXML' + ] } - withName: OPENMS_RTPREDICT_FOUND_PEPTIDES { - ext.prefix = { "${meta.sample}_id_files_for_rt_prediction_RTpredicted" } + withName: 'OPENMS_RTPREDICT_FOUND_PEPTIDES' { + ext.prefix = {"${meta.sample}_id_files_for_rt_prediction_RTpredicted"} publishDir = [ - path: { "${params.outdir}/RT_prediction" }, + path: {"${params.outdir}/RT_prediction"}, mode: params.publish_dir_mode, pattern: '*.csv' ] } - withName: OPENMS_RTPREDICT_NEOEPITOPES { - ext.prefix = { "${meta.sample}_txt_file_for_rt_prediction_RTpredicted" } + withName: 'OPENMS_RTPREDICT_NEOEPITOPES' { + ext.prefix = {"${meta.sample}_txt_file_for_rt_prediction_RTpredicted"} publishDir = [ - path: { "${params.outdir}/RT_prediction" }, + path: {"${params.outdir}/RT_prediction"}, mode: params.publish_dir_mode, pattern: '*.csv' ] } } } + +process { + + if (params.annotate_ions) { + withName: 'PYOPENMS_IONANNOTATOR' { + ext.prefix = {"${meta.sample}"} + ext.args = [ + "--precursor_charge ${params.prec_charge}", + "--fragment_mass_tolerance ${params.fragment_mass_tolerance}", + "--remove_precursor_peak ${params.remove_precursor_peak}" + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/ion_annotations"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } + } +} diff --git a/docs/output.md b/docs/output.md index 3293647c..1e74ea39 100644 --- a/docs/output.md +++ b/docs/output.md @@ -60,14 +60,28 @@ PEPTIDE contains information about peptide hits that were identified and corresp
Output files -- `Intermediate_Results/` - - `*merged_psm_perc_filtered.mzTab` : If `--refine_fdr_on_predicted_subset` is specified, consists of the hits (filtered by q-value) - - `*.mztab` : mztab file generated by the OpenMS MzTabExporter command, the community standard format for sharing mass spectrometry search results - - `*.featureXML` : If `--skip_quantification` is not specified, then this file is generated by the OpenMS FeatureFinderIdentification command - - `*fdr_filtered.idXML`: If `--skip_quantification` is not specified, then this file is generated by the OpenMS IDFilter command - - `*all_ids_merged_psm_perc*.idXML`: idXML files are generated when `--refine_fdr_on_predicted_subset` is specified - - `*peptide_filtered.idXML`: If `--refine_fdr_on_predicted_subset` is specified, then this file consists of the PSMs prediction outcome - - `*perc_subset.idXML`: If `--refine_fdr_on_predicted_subset` is specified, then this file is the outcome of the second percolator run, generated by the OpenMS PercolatorAdapter +- `intermediate_results/` + - `alignment` + - `*filtered.idXML` : If `--skip_quantification` is not specified, then this file is generated in the `OPENMS_IDFILTER_Q_VALUE` + - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_FOR_ALIGNMENT`, this file is only generated when `--skip_quantification` is not specified + - `comet` + - `{raw filename}.tsv` : The outcome of `CometAdapter` containing more detailed information about all of the hits that have been found (no filtering has been applied) + - `{Sample}_{Condition}_{ID}.tsv` : Single files that hold information about the peptides sequences that have been identified (no filtering has been applied) + - `features` + - `*.mztab` : mztab file generated by the OpenMS MzTabExporter command which is present in the `PROCESS_FEATURE` step + - `*.idXML` : Outcome of `PSMFEATUREEXTRACTOR`, containing the computations of extra features for each input PSM + - `*.featureXML` : These files file is generated by the OpenMS `FeatureFinderIdentification` command + - `ion_annotations` + - `{Sample}_{Condition}_all_peaks.tsv`: Contains metadata of all measured ions of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. + - `{Sample}_{Condition}_matching_ions.tsv`: Contains ion annotations and additional metadata of peptides reported after `OPENMS_IDFILTER_Q_VALUE`. + - `percolator` + - `*all_ids_merged_psm_perc.idXML` : idXML files are generated with `OPENMS_PERCOLATORADAPTER` + - `refined_fdr` (Only if `--refine_fdr_on_predicted_subset` is specified) + - `*merged_psm_perc_filtered.mzTab` : This file export filtered percolator results (by q-value) as mztab + - `*_all_ids_merged.mzTab` : Exportas all of the psm results as mztab + - `*perc_subset.idXML` : This file is the outcome of a second OpenMS `PercolatorAdapter` run + - `*pred_filtered.idXML` : Contains filtered PSMs prediction results by shrinked search space (outcome mhcflurry). + - `{ID}_-_{filename}_filtered` : An outcome file of `OPENMS_IDFILTER_REFINED`
@@ -98,15 +112,13 @@ The fasta database including mutated proteins used for the database search These CSV files list all of the theoretically possible neoepitope sequences from the variants specified in the vcf and neoepitopes that are found during the mass spectrometry search, independant of binding predictions, respectively -#### found_neoepitopes +#### Found neoepitopes
Output files - `class_1_bindings/` - - `*found_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified - - `class_2_bindings/` - `*found_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -125,9 +137,7 @@ peptide sequence geneID Output files - `class_1_bindings/` - - `*vcf_neoepitopes_class1.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_1` are specified - - `class_2_bindings/` - `*vcf_neoepitopes_class2.csv`: Generated when `--include_proteins_from_vcf` and `--predict_class_2` are specified @@ -148,9 +158,7 @@ Sequence Antigen ID Variants Output files - `class_1_bindings/` - - `*predicted_peptides_class_1.csv`: If `--predict_class_1` is specified, then this CSV is generated - - `class_2_bindings/` - `*predicted_peptides_class_2.csv`: If `--predict_class_2` is specified, then this CSV is generated @@ -196,8 +204,8 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ Output files - `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameters are used when running the pipeline. + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.html`. + - Reports generated by the pipeline: `software_versions.yml`. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
diff --git a/docs/usage.md b/docs/usage.md index 11634c23..a4e89179 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -8,7 +8,7 @@ You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a tab-separated file with 4 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -29,7 +29,7 @@ ID Sample Condition ReplicateFileName The pipeline will auto-detect whether a sample is either a mzML or raw files using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. ```tsv ID Sample Condition ReplicateFileName @@ -69,9 +69,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -80,7 +80,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/mhcquant ``` @@ -240,6 +240,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -254,6 +262,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..27feb009 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 28567bd7..8d030f4e 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index c34a530f..9dba706b 100644 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -60,6 +60,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -79,17 +80,4 @@ class WorkflowMain { System.exit(1) } } - - // - // Get attribute from genome config file e.g. fasta - // - public static String getGenomeAttribute(params, attribute) { - def val = '' - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] - } - } - return val - } } diff --git a/lib/WorkflowMhcquant.groovy b/lib/WorkflowMhcquant.groovy index a68f8f4b..05bc9acf 100644 --- a/lib/WorkflowMhcquant.groovy +++ b/lib/WorkflowMhcquant.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/mhcquant.nf in the nf-core/mhcquant pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowMhcquant { // @@ -136,4 +138,20 @@ class WorkflowMhcquant { return yaml_file_text } + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + } } diff --git a/main.nf b/main.nf index 228e81a3..9e273de6 100644 --- a/main.nf +++ b/main.nf @@ -4,6 +4,7 @@ nf-core/mhcquant ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/mhcquant + Website: https://nf-co.re/mhcquant Slack : https://nfcore.slack.com/channels/mhcquant ---------------------------------------------------------------------------------------- @@ -11,14 +12,6 @@ nextflow.enable.dsl = 2 -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/modules.json b/modules.json index 03d43200..0d87f6cf 100644 --- a/modules.json +++ b/modules.json @@ -2,12 +2,18 @@ "name": "nf-core/mhcquant", "homePage": "https://github.com/nf-core/mhcquant", "repos": { - "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } + } } } } diff --git a/modules/local/generate_proteins_from_vcf.nf b/modules/local/generate_proteins_from_vcf.nf index 2d86d99b..dc04f818 100644 --- a/modules/local/generate_proteins_from_vcf.nf +++ b/modules/local/generate_proteins_from_vcf.nf @@ -12,7 +12,10 @@ process GENERATE_PROTEINS_FROM_VCF { output: tuple val(meta), path("*.fasta"), emit: vcf_fasta - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: def prefix = task.ext.prefix ?: "${fasta.baseName}_added_vcf" diff --git a/modules/local/mhcflurry_predictneoepitopesclass1.nf b/modules/local/mhcflurry_predictneoepitopesclass1.nf index 08e64aba..071b0bba 100644 --- a/modules/local/mhcflurry_predictneoepitopesclass1.nf +++ b/modules/local/mhcflurry_predictneoepitopesclass1.nf @@ -14,6 +14,9 @@ process MHCFLURRY_PREDICTNEOEPITOPESCLASS1 { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.suffix ?: "${neoepitopes}_${meta}_predicted_neoepitopes_class_1" diff --git a/modules/local/mhcflurry_predictpeptidesclass1.nf b/modules/local/mhcflurry_predictpeptidesclass1.nf index 93e9ab79..efd670cf 100644 --- a/modules/local/mhcflurry_predictpeptidesclass1.nf +++ b/modules/local/mhcflurry_predictpeptidesclass1.nf @@ -14,6 +14,9 @@ process MHCFLURRY_PREDICTPEPTIDESCLASS1 { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.suffix ?: "${meta.id}_predicted_peptides_class_1" diff --git a/modules/local/mhcflurry_predictpsms.nf b/modules/local/mhcflurry_predictpsms.nf index 8065590f..b66d2889 100644 --- a/modules/local/mhcflurry_predictpsms.nf +++ b/modules/local/mhcflurry_predictpsms.nf @@ -14,6 +14,9 @@ process MHCFLURRY_PREDICTPSMS { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.suffix ?: "${meta.id}_peptide_filter" diff --git a/modules/local/mhcnuggets_neoepitopesclass2post.nf b/modules/local/mhcnuggets_neoepitopesclass2post.nf index 92760a25..a97f6a2c 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2post.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2post.nf @@ -14,6 +14,9 @@ process MHCNUGGETS_NEOEPITOPESCLASS2POST { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ diff --git a/modules/local/mhcnuggets_neoepitopesclass2pre.nf b/modules/local/mhcnuggets_neoepitopesclass2pre.nf index 3f466f27..8d03799a 100644 --- a/modules/local/mhcnuggets_neoepitopesclass2pre.nf +++ b/modules/local/mhcnuggets_neoepitopesclass2pre.nf @@ -14,6 +14,9 @@ process MHCNUGGETS_NEOEPITOPESCLASS2PRE { tuple val(meta), path("*.csv") , emit: preprocessed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta}_mhcnuggets_preprocessed" diff --git a/modules/local/mhcnuggets_peptidesclass2post.nf b/modules/local/mhcnuggets_peptidesclass2post.nf index 48f901b7..f35c7698 100644 --- a/modules/local/mhcnuggets_peptidesclass2post.nf +++ b/modules/local/mhcnuggets_peptidesclass2post.nf @@ -14,6 +14,9 @@ process MHCNUGGETS_PEPTIDESCLASS2POST { tuple val(meta), path('*.csv'), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_postprocessed" diff --git a/modules/local/mhcnuggets_peptidesclass2pre.nf b/modules/local/mhcnuggets_peptidesclass2pre.nf index d2ad8c60..56906804 100644 --- a/modules/local/mhcnuggets_peptidesclass2pre.nf +++ b/modules/local/mhcnuggets_peptidesclass2pre.nf @@ -15,6 +15,9 @@ process MHCNUGGETS_PEPTIDESCLASS2PRE { tuple val(meta), path('peptide_to_geneID'), emit: geneID path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_preprocessed_mhcnuggets_peptides" diff --git a/modules/local/mhcnuggets_predictneoepitopesclass2.nf b/modules/local/mhcnuggets_predictneoepitopesclass2.nf index 263fffa9..7f586643 100644 --- a/modules/local/mhcnuggets_predictneoepitopesclass2.nf +++ b/modules/local/mhcnuggets_predictneoepitopesclass2.nf @@ -14,6 +14,9 @@ process MHCNUGGETS_PREDICTNEOEPITOPESCLASS2 { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta}_predicted_neoepitopes_class_2" diff --git a/modules/local/mhcnuggets_predictpeptidesclass2.nf b/modules/local/mhcnuggets_predictpeptidesclass2.nf index 2481d379..5fb5fc5f 100644 --- a/modules/local/mhcnuggets_predictpeptidesclass2.nf +++ b/modules/local/mhcnuggets_predictpeptidesclass2.nf @@ -14,6 +14,9 @@ process MHCNUGGETS_PREDICTPEPTIDESCLASS2 { tuple val(meta), path("*_class_2"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_predicted_peptides_class_2" diff --git a/modules/local/openms_cometadapter.nf b/modules/local/openms_cometadapter.nf index 25819d78..e74dcb0f 100644 --- a/modules/local/openms_cometadapter.nf +++ b/modules/local/openms_cometadapter.nf @@ -2,18 +2,22 @@ process OPENMS_COMETADAPTER { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.6.0--0' : - 'quay.io/biocontainers/openms-thirdparty:2.6.0--0' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : + 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" input: tuple val(meta), path(mzml), path(fasta) output: tuple val(meta), path("*.idXML"), emit: idxml + tuple val(meta), path("*.tsv") , emit: tsv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${mzml.baseName}" def args = task.ext.args ?: '' @@ -31,6 +35,7 @@ process OPENMS_COMETADAPTER { -out ${prefix}.idXML \\ -database $fasta \\ -threads $task.cpus \\ + -pin_out ${prefix}.tsv \\ $args \\ $mods \\ $xions \\ diff --git a/modules/local/openms_decoydatabase.nf b/modules/local/openms_decoydatabase.nf index 7b74286c..f436a5c8 100644 --- a/modules/local/openms_decoydatabase.nf +++ b/modules/local/openms_decoydatabase.nf @@ -2,10 +2,10 @@ process OPENMS_DECOYDATABASE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(fasta) @@ -14,6 +14,9 @@ process OPENMS_DECOYDATABASE { tuple val(meta), path("*.fasta"), emit: decoy path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${fasta.baseName}_decoy" diff --git a/modules/local/openms_falsediscoveryrate.nf b/modules/local/openms_falsediscoveryrate.nf index 3e934bd0..84957efd 100644 --- a/modules/local/openms_falsediscoveryrate.nf +++ b/modules/local/openms_falsediscoveryrate.nf @@ -2,10 +2,10 @@ process OPENMS_FALSEDISCOVERYRATE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(idxml) @@ -14,6 +14,9 @@ process OPENMS_FALSEDISCOVERYRATE { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${idxml.baseName}_fdr" diff --git a/modules/local/openms_featurefinderidentification.nf b/modules/local/openms_featurefinderidentification.nf index 02211102..df4c7472 100644 --- a/modules/local/openms_featurefinderidentification.nf +++ b/modules/local/openms_featurefinderidentification.nf @@ -2,18 +2,21 @@ process OPENMS_FEATUREFINDERIDENTIFICATION { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: - tuple val(meta), path(id_quant_int), path(mzml), path(id_quant) + tuple val(meta), path(id_quant_int), path(mzml), path(id_quant) output: tuple val(meta), path("*.featureXML"), emit: featurexml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_${meta.id}" def arguments = params.quantification_fdr ? "-id $id_quant_int -id_ext $id_quant -svm:min_prob ${params.quantification_min_prob}" : "-id $id_quant" diff --git a/modules/local/openms_featurelinkerunlabeledkd.nf b/modules/local/openms_featurelinkerunlabeledkd.nf index 61276b64..15cb724a 100644 --- a/modules/local/openms_featurelinkerunlabeledkd.nf +++ b/modules/local/openms_featurelinkerunlabeledkd.nf @@ -2,10 +2,10 @@ process OPENMS_FEATURELINKERUNLABELEDKD { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.6.0--0' : - 'quay.io/biocontainers/openms-thirdparty:2.6.0--0' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : + 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" input: tuple val(meta), path(features) @@ -14,6 +14,9 @@ process OPENMS_FEATURELINKERUNLABELEDKD { tuple val(meta), path("*.consensusXML"), emit: consensusxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}_all_features_merged" diff --git a/modules/local/openms_idconflictresolver.nf b/modules/local/openms_idconflictresolver.nf index 2bf8f16d..35fb067d 100644 --- a/modules/local/openms_idconflictresolver.nf +++ b/modules/local/openms_idconflictresolver.nf @@ -2,10 +2,10 @@ process OPENMS_IDCONFLICTRESOLVER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(consensus) @@ -14,6 +14,9 @@ process OPENMS_IDCONFLICTRESOLVER { tuple val(meta), path("*.consensusXML"), emit: consensusxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}_resolved" diff --git a/modules/local/openms_idfilter.nf b/modules/local/openms_idfilter.nf index 2908b7d7..34af2352 100644 --- a/modules/local/openms_idfilter.nf +++ b/modules/local/openms_idfilter.nf @@ -2,10 +2,10 @@ process OPENMS_IDFILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(idxml), file(peptide_filter) @@ -14,6 +14,9 @@ process OPENMS_IDFILTER { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def whitelist = "$peptide_filter" def prefix = task.ext.prefix ?: "${meta.id}_-_${idxml.baseName}_filtered" diff --git a/modules/local/openms_idmerger.nf b/modules/local/openms_idmerger.nf index 623b11a0..dc2ebf22 100644 --- a/modules/local/openms_idmerger.nf +++ b/modules/local/openms_idmerger.nf @@ -2,10 +2,10 @@ process OPENMS_IDMERGER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(aligned) @@ -14,6 +14,9 @@ process OPENMS_IDMERGER { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}_all_ids_merged" @@ -21,7 +24,7 @@ process OPENMS_IDMERGER { IDMerger -in $aligned \\ -out ${prefix}.idXML \\ -threads $task.cpus \\ - -annotate_file_origin \\ + -annotate_file_origin true \\ -merge_proteins_add_PSMs cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/openms_mapaligneridentification.nf b/modules/local/openms_mapaligneridentification.nf index d9ece728..b8929028 100644 --- a/modules/local/openms_mapaligneridentification.nf +++ b/modules/local/openms_mapaligneridentification.nf @@ -2,10 +2,10 @@ process OPENMS_MAPALIGNERIDENTIFICATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(idxml) @@ -14,6 +14,9 @@ process OPENMS_MAPALIGNERIDENTIFICATION { tuple val(meta), path("*.trafoXML"), emit: trafoxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def out_names = idxml.collect { it.baseName+'.trafoXML' }.join(' ') def args = task.ext.args ?: '' diff --git a/modules/local/openms_maprttransformer.nf b/modules/local/openms_maprttransformer.nf index 618e72b2..459e293c 100644 --- a/modules/local/openms_maprttransformer.nf +++ b/modules/local/openms_maprttransformer.nf @@ -2,10 +2,10 @@ process OPENMS_MAPRTTRANSFORMER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(alignment_file), path(trafoxml) @@ -14,6 +14,9 @@ process OPENMS_MAPRTTRANSFORMER { tuple val(meta), path("*_aligned.*"), emit: aligned path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}_aligned" def fileExt = alignment_file.collect { it.name.tokenize("\\.")[1] }.join(' ') diff --git a/modules/local/openms_mztabexporter.nf b/modules/local/openms_mztabexporter.nf index a24fd829..f45262bd 100644 --- a/modules/local/openms_mztabexporter.nf +++ b/modules/local/openms_mztabexporter.nf @@ -2,10 +2,10 @@ process OPENMS_MZTABEXPORTER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(mztab) @@ -14,6 +14,9 @@ process OPENMS_MZTABEXPORTER { tuple val(meta), path("*.mzTab"), emit: mztab path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_${meta.condition}" def args = task.ext.args ?: '' diff --git a/modules/local/openms_peakpickerhires.nf b/modules/local/openms_peakpickerhires.nf index 5441b2cd..a788149a 100644 --- a/modules/local/openms_peakpickerhires.nf +++ b/modules/local/openms_peakpickerhires.nf @@ -2,10 +2,10 @@ process OPENMS_PEAKPICKERHIRES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(mzml) @@ -14,6 +14,9 @@ process OPENMS_PEAKPICKERHIRES { tuple val(meta), path("*.mzML"), emit: mzml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${mzml.baseName}" diff --git a/modules/local/openms_peptideindexer.nf b/modules/local/openms_peptideindexer.nf index 7d850dca..2cd44915 100644 --- a/modules/local/openms_peptideindexer.nf +++ b/modules/local/openms_peptideindexer.nf @@ -2,10 +2,10 @@ process OPENMS_PEPTIDEINDEXER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(idxml), path(fasta) @@ -14,6 +14,9 @@ process OPENMS_PEPTIDEINDEXER { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${idxml.baseName}_-_idx" diff --git a/modules/local/openms_percolatoradapter.nf b/modules/local/openms_percolatoradapter.nf index 01538745..b477832e 100644 --- a/modules/local/openms_percolatoradapter.nf +++ b/modules/local/openms_percolatoradapter.nf @@ -2,10 +2,10 @@ process OPENMS_PERCOLATORADAPTER { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.6.0--0' : - 'quay.io/biocontainers/openms-thirdparty:2.6.0--0' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : + 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" input: tuple val(meta), path(psm) @@ -14,6 +14,9 @@ process OPENMS_PERCOLATORADAPTER { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' diff --git a/modules/local/openms_psmfeatureextractor.nf b/modules/local/openms_psmfeatureextractor.nf index e6272f70..052d76bc 100644 --- a/modules/local/openms_psmfeatureextractor.nf +++ b/modules/local/openms_psmfeatureextractor.nf @@ -2,10 +2,10 @@ process OPENMS_PSMFEATUREEXTRACTOR { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(merged) @@ -14,6 +14,9 @@ process OPENMS_PSMFEATUREEXTRACTOR { tuple val(meta), path("*.idXML"), emit: idxml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${merged.baseName}_psm" def args = task.ext.args ?: '' diff --git a/modules/local/openms_rtmodel.nf b/modules/local/openms_rtmodel.nf index c16c2f0f..dc224e7f 100644 --- a/modules/local/openms_rtmodel.nf +++ b/modules/local/openms_rtmodel.nf @@ -2,10 +2,10 @@ process OPENMS_RTMODEL { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(rt_training) @@ -14,6 +14,9 @@ process OPENMS_RTMODEL { tuple val(meta), path("*_rt_training.txt"), path("*.paramXML"), path("*_trainset.txt"), emit: complete path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}" diff --git a/modules/local/openms_rtpredict.nf b/modules/local/openms_rtpredict.nf index 481905b7..9e174670 100644 --- a/modules/local/openms_rtpredict.nf +++ b/modules/local/openms_rtpredict.nf @@ -2,10 +2,10 @@ process OPENMS_RTPREDICT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms-thirdparty=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms-thirdparty=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.6.0--0' : - 'quay.io/biocontainers/openms-thirdparty:2.6.0--0' }" + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:2.8.0--h9ee0642_2' : + 'quay.io/biocontainers/openms-thirdparty:2.8.0--h9ee0642_2' }" input: tuple val(meta), path(idxml), path(rt_model), path(rt_params), path(trainset) @@ -14,6 +14,9 @@ process OPENMS_RTPREDICT { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.sample}_RTpredicted" diff --git a/modules/local/openms_textexporter.nf b/modules/local/openms_textexporter.nf index 3d771255..c63d1977 100644 --- a/modules/local/openms_textexporter.nf +++ b/modules/local/openms_textexporter.nf @@ -2,10 +2,10 @@ process OPENMS_TEXTEXPORTER { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::openms=2.6.0" : null) + conda (params.enable_conda ? "bioconda::openms=2.8.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:2.6.0--h4afb90d_0' : - 'quay.io/biocontainers/openms:2.6.0--h4afb90d_0' }" + 'https://depot.galaxyproject.org/singularity/openms:2.8.0--h7ca0330_2' : + 'quay.io/biocontainers/openms:2.8.0--h7ca0330_2' }" input: tuple val(meta), path(consensus_resolved) @@ -14,6 +14,9 @@ process OPENMS_TEXTEXPORTER { tuple val(meta), path("*.tsv"), emit: tsv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' diff --git a/modules/local/openms_thermorawfileparser.nf b/modules/local/openms_thermorawfileparser.nf index b2541369..65c1e7eb 100644 --- a/modules/local/openms_thermorawfileparser.nf +++ b/modules/local/openms_thermorawfileparser.nf @@ -2,10 +2,10 @@ process OPENMS_THERMORAWFILEPARSER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::thermorawfileparser::1.3.4" : null) + conda (params.enable_conda ? "bioconda::thermorawfileparser=1.4.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.3.4--ha8f3691_0' : - 'quay.io/biocontainers/thermorawfileparser:1.3.4--ha8f3691_0' }" + 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.0--ha8f3691_0' : + 'quay.io/biocontainers/thermorawfileparser:1.4.0--ha8f3691_0' }" input: tuple val(meta), path(rawfile) @@ -14,9 +14,13 @@ process OPENMS_THERMORAWFILEPARSER { tuple val(meta), path("*.mzML"), emit: mzml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${rawfile.baseName}" + // The ThermoRawFileParser expects a input file which is transcribed to an indexed mzML (defined by '-f 2') """ ThermoRawFileParser.sh \\ -i $rawfile \\ diff --git a/modules/local/predict_possible_neoepitopes.nf b/modules/local/predict_possible_neoepitopes.nf index c4192643..48b21cd3 100644 --- a/modules/local/predict_possible_neoepitopes.nf +++ b/modules/local/predict_possible_neoepitopes.nf @@ -15,6 +15,9 @@ process PREDICT_POSSIBLE_NEOEPITOPES { tuple val(meta), path("*.txt"), emit: txt path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta}_vcf_neoepitopes" diff --git a/modules/local/pyopenms_ionannotator.nf b/modules/local/pyopenms_ionannotator.nf new file mode 100644 index 00000000..1972e91c --- /dev/null +++ b/modules/local/pyopenms_ionannotator.nf @@ -0,0 +1,46 @@ +process PYOPENMS_IONANNOTATOR { + tag "$sample" + label 'process_high' + + conda (params.enable_conda ? "bioconda::pyopenms=2.8.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pyopenms:2.8.0--py310h3dc0cdb_1' : + 'quay.io/biocontainers/pyopenms:2.8.0--py310h3dc0cdb_1' }" + + input: + tuple val(sample), path(mzml), path(fdr_filtered_idxml) + + output: + tuple val(sample), path("*.tsv"), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${mzml.baseName}" + def args = task.ext.args ?: '' + + def xions = params.use_x_ions ? "-use_x_ions" : "" + def zions = params.use_z_ions ? "-use_z_ions" : "" + def aions = params.use_a_ions ? "-use_a_ions" : "" + def cions = params.use_c_ions ? "-use_c_ions" : "" + + """ + get_ion_annotations.py \\ + --input $mzml \\ + -idxml $fdr_filtered_idxml \\ + --prefix $sample \\ + $args \\ + $xions \\ + $zions \\ + $aions \\ + $cions + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyopenms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/resolve_found_neoepitopes.nf b/modules/local/resolve_found_neoepitopes.nf index ce5bb7f4..647552d5 100644 --- a/modules/local/resolve_found_neoepitopes.nf +++ b/modules/local/resolve_found_neoepitopes.nf @@ -14,6 +14,9 @@ process RESOLVE_FOUND_NEOEPITOPES { tuple val(meta), path("*.csv"), emit: csv path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta}_found_neoepitopes" diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 9fe13398..48374ce2 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,5 +1,6 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_low' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,21 +8,24 @@ process SAMPLESHEET_CHECK { 'quay.io/biocontainers/python:3.8.3' }" input: - path samplesheet + path samplesheet output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/mhcquant/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d5100..cebb6e05 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 88% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index d1390392..787bdb7b 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): html = [ @@ -58,11 +59,12 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 63% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index ae019dbf..a8159a57 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,13 +1,16 @@ process MULTIQC { - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: - path multiqc_files + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -20,8 +23,15 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' """ - multiqc -f $args . + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 73% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891ef..ebc29b27 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -12,11 +12,25 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +52,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 436f107d..5ba91572 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,12 +60,15 @@ params { use_NL_ions = false variable_mods = 'Oxidation (M)' vcf_sheet = null + annotate_ions = false // MultiQC options skip_multiqc = false multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options tracedir = "${params.outdir}/pipeline_info" @@ -74,12 +77,14 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' enable_conda = false + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -88,6 +93,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -122,6 +128,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true docker.userEmulation = true @@ -159,6 +174,11 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } @@ -194,7 +214,7 @@ trace { } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -204,7 +224,8 @@ manifest { description = 'Identify and quantify peptides from mass spectrometry raw data' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.3.1' + version = '2.3.2dev' + doi = '10.1021/acs.jproteome.9b00313' } // Function to ensure that resource requirements don't go beyond diff --git a/nextflow_schema.json b/nextflow_schema.json index a6f1e83a..2025acd1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -86,7 +86,7 @@ "type": "boolean", "fa_icon": "fas fa-border-center-v", "default": false, - "description": "Choose whether the specified ms_level in pick_ms_levels is centroided or not." + "description": "Include the flag when the specified ms level is not centroided (default=false). " } } }, @@ -231,6 +231,12 @@ "pattern": "^\\S+\\.tsv$", "description": "Specify a .tsv file containing the information about genomic variants (vcf files < v.4.2) for each sample.", "help_text": "| Sample | VCF_FileName |\n| -------------| :---------------------:|\n| MM15_Melanom | data/MM15_variants.vcf |\n| MM17_Melanom | data/MM17_variants.vcf |" + }, + "annotate_ions": { + "type": "boolean", + "default": "false", + "fa_icon": "fas fa-tags", + "description": "Set this option to create documents that are created to perform the ion annotation" } } }, @@ -321,7 +327,7 @@ "fa_icon": "fas fa-file-code", "pattern": "^\\S+\\.tsv$", "description": "Specify a .tsv file containing the MHC alleles of your probes as well as their metadata such as SampleID.", - "help_text": "| Sample | HLA_Alleles_Class_1 | HLA_Alleles_Class_2 |\n| -------------| :----------------------------------------------:| ------------------------------------------:|\n| MM15_Melanom | A* 03:01;A* 68:01;B* 27:05;B* 35:03;C* 02:02;C* 04:01 |HLA-DRB1* 01:01;HLA-DQB1* 03:19;HLA-DQA1* 05:01|\n| MM17_Melanom | A* 02:01;B* 07:01;B* 26:01;C* 11:01;C* 01:01 |HLA-DRB1* 01:02;HLA-DRB3* 02:02;HLA-DRB4* 01:03|\n" + "help_text": "| Sample | HLA_Alleles_Class_1 | HLA_Alleles_Class_2 |\n| -------------| :----------------------------------------------:| ------------------------------------------:|\n| MM15_Melanom | `A*03:01;A*68:01;B*27:05;B*35:03;C*02:02;C*04:01` | `HLA-DRB1*01:01;HLA-DQB1*03:19;HLA-DQA1*05:01` |\n| MM17_Melanom | `A*02:01;B*07:01;B*26:01;C*11:01;C*01:01` | `HLA-DRB1*01:02;HLA-DRB3*02:02;HLA-DRB4*01:03` |\n" }, "predict_class_1": { "type": "boolean", @@ -529,12 +535,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index c9ead17e..2dfa1fd9 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -11,7 +11,7 @@ workflow INPUT_CHECK { main: SAMPLESHEET_CHECK ( samplesheet ) .csv - .splitCsv ( header:true, sep:',' ) + .splitCsv ( header:true, sep:'\t' ) .map { create_ms_channel(it) } .set { reads } @@ -23,6 +23,7 @@ workflow INPUT_CHECK { // Function to get list of [ meta, filenames ] def create_ms_channel(LinkedHashMap row) { def meta = [:] + meta.id = row.ID meta.sample = row.Sample meta.condition = row.Condition diff --git a/subworkflows/local/pre_quantification.nf b/subworkflows/local/map_alignment.nf similarity index 56% rename from subworkflows/local/pre_quantification.nf rename to subworkflows/local/map_alignment.nf index a23dc14c..e038f7e7 100644 --- a/subworkflows/local/pre_quantification.nf +++ b/subworkflows/local/map_alignment.nf @@ -2,22 +2,31 @@ * Perform the quantification of the samples when the parameter --skip_quantification is not provided */ +include { OPENMS_FALSEDISCOVERYRATE } from '../../modules/local/openms_falsediscoveryrate' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_FOR_ALIGNMENT } from '../../modules/local/openms_idfilter' include { OPENMS_MAPALIGNERIDENTIFICATION } from '../../modules/local/openms_mapaligneridentification' include { OPENMS_MAPRTTRANSFORMER as OPENMS_MAPRTTRANSFORMERMZML OPENMS_MAPRTTRANSFORMER as OPENMS_MAPRTTRANSFORMERIDXML } from '../../modules/local/openms_maprttransformer' -workflow PRE_QUANTIFICATION { +workflow MAP_ALIGNMENT { take: - aligned_hits indexed_hits mzml_files main: ch_versions = Channel.empty() + // Calculate fdr for id based alignment + OPENMS_FALSEDISCOVERYRATE(indexed_hits) + ch_versions = ch_versions.mix(OPENMS_FALSEDISCOVERYRATE.out.versions.first().ifEmpty(null)) + // Filter fdr for id based alignment + OPENMS_IDFILTER_FOR_ALIGNMENT(OPENMS_FALSEDISCOVERYRATE.out.idxml + .flatMap { it -> [tuple(it[0], it[1], null)]}) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_FOR_ALIGNMENT.out.versions.first().ifEmpty(null)) + // Group samples together if they are replicates - ch_grouped_fdr_filtered = aligned_hits + ch_grouped_fdr_filtered = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml .map { meta, raw -> [[id:meta.sample + "_" + meta.condition, sample:meta.sample, condition:meta.condition, ext:meta.ext], raw] @@ -26,28 +35,18 @@ workflow PRE_QUANTIFICATION { // Compute alignment rt transformation OPENMS_MAPALIGNERIDENTIFICATION(ch_grouped_fdr_filtered) ch_versions = ch_versions.mix(OPENMS_MAPALIGNERIDENTIFICATION.out.versions.first().ifEmpty(null)) - // Intermediate step to join RT transformation files with mzml and idxml channels - mzml_files - .join( - OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml - .transpose() - .flatMap { - meta, trafoxml -> - ident = trafoxml.baseName.split('_-_')[0] - [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], trafoxml]] - }, by: [0] ) - .set { joined_trafos_mzmls } - - indexed_hits - .join( - OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml - .transpose() - .flatMap { - meta, trafoxml -> - ident = trafoxml.baseName.split('_-_')[0] - [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], trafoxml]] - }, by: [0] ) - .set { joined_trafos_ids } + // Obtain the unique files that were present for the combined data + joined_trafos = OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml + .transpose() + .flatMap { + meta, trafoxml -> + ident = trafoxml.baseName.split('_-_')[0] + [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], trafoxml]] + } + // Intermediate step to join RT transformation files with mzml channels + joined_trafos_mzmls = mzml_files.join(joined_trafos) + // Intermediate step to join RT transformation files with idxml channels + joined_trafos_ids = indexed_hits.join(joined_trafos) // Align mzML files using trafoXMLs OPENMS_MAPRTTRANSFORMERMZML(joined_trafos_mzmls) ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERMZML.out.versions.first().ifEmpty(null)) @@ -65,5 +64,6 @@ workflow PRE_QUANTIFICATION { // Define the information that is returned by this workflow versions = ch_versions ch_proceeding_idx + aligned_idfilter = OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml aligned_mzml = OPENMS_MAPRTTRANSFORMERMZML.out.aligned } diff --git a/subworkflows/local/post_quantification.nf b/subworkflows/local/process_feature.nf similarity index 96% rename from subworkflows/local/post_quantification.nf rename to subworkflows/local/process_feature.nf index 9f7fd6d5..cc8e3dfd 100644 --- a/subworkflows/local/post_quantification.nf +++ b/subworkflows/local/process_feature.nf @@ -6,9 +6,9 @@ include { OPENMS_FEATUREFINDERIDENTIFICATION } from include { OPENMS_FEATURELINKERUNLABELEDKD } from '../../modules/local/openms_featurelinkerunlabeledkd' include { OPENMS_IDCONFLICTRESOLVER } from '../../modules/local/openms_idconflictresolver' include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANTIFIED } from '../../modules/local/openms_textexporter' -include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT } from '../../modules/local/openms_mztabexporter' +include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT } from '../../modules/local/openms_mztabexporter' -workflow POST_QUANTIFICATION { +workflow PROCESS_FEATURE { take: psms_outcome aligned_mzml diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index 197c5315..0fe12160 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -8,30 +8,29 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) // Validate input parameters WorkflowMhcquant.initialise(params, log) -// Input/outpout options +// Input/output options if (params.input) { sample_sheet = file(params.input) } if (params.fasta) { params.fasta = params.fasta } -if (params.outdir) { params.outdir = './results' } // MHC affinity prediction -if (params.predict_class_1 || params.predict_class_2) { - Channel.from( file(params.allele_sheet, checkIfExists: true) ) - .splitCsv( header: true, sep:'\t' ) +if (params.predict_class_1 || params.predict_class_2) { + Channel.from(file(params.allele_sheet, checkIfExists: true)) + .splitCsv(header: true, sep:'\t') .multiMap { col -> - classI: ["${col.Sample}", "${col.HLA_Alleles_Class_1}"] - classII: ["${col.Sample}", "${col.HLA_Alleles_Class_2}"] } + classI: ["${col.Sample}", "${col.HLA_Alleles_Class_1}"] + classII: ["${col.Sample}", "${col.HLA_Alleles_Class_2}"] } .set { ch_alleles_from_sheet } // Allele class 1 - if( params.predict_class_1){ + if (params.predict_class_1) { ch_alleles_from_sheet.classI .ifEmpty { exit 1, "params.allele_sheet was empty - no allele input file supplied" } - .flatMap {it -> [tuple(it[0].toString(), it[1])] } + .flatMap { it -> [tuple(it[0].toString(), it[1])] } .set { peptides_class_1_alleles } } // Allele class 2 - if( params.predict_class_2){ + if (params.predict_class_2) { ch_alleles_from_sheet.classII .ifEmpty { exit 1, "params.allele_sheet was empty - no allele input file supplied" } .flatMap { it -> [tuple(it[0].toString(), it[1])] } @@ -45,8 +44,10 @@ if (params.predict_class_1 || params.predict_class_2) { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,26 +64,47 @@ include { OPENMS_PEAKPICKERHIRES } from include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' -include { OPENMS_FALSEDISCOVERYRATE } from '../modules/local/openms_falsediscoveryrate' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_FOR_ALIGNMENT } from '../modules/local/openms_idfilter' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_PSMS } from '../modules/local/openms_textexporter' +include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_COMET } from '../modules/local/openms_textexporter' include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/local/openms_idfilter' include { OPENMS_IDMERGER } from '../modules/local/openms_idmerger' include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' +include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' + +include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_FDR } from '../modules/local/openms_textexporter' include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_UNQUANTIFIED } from '../modules/local/openms_textexporter' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + include { INCLUDE_PROTEINS } from '../subworkflows/local/include_proteins' -include { PRE_QUANTIFICATION } from '../subworkflows/local/pre_quantification' +include { MAP_ALIGNMENT } from '../subworkflows/local/map_alignment' include { REFINE_FDR } from '../subworkflows/local/refine_fdr' -include { POST_QUANTIFICATION } from '../subworkflows/local/post_quantification' +include { PROCESS_FEATURE } from '../subworkflows/local/process_feature.nf' include { PREDICT_CLASS1 } from '../subworkflows/local/predict_class1' include { PREDICT_CLASS2 } from '../subworkflows/local/predict_class2' include { PREDICT_RT } from '../subworkflows/local/predict_rt' @@ -97,24 +119,24 @@ workflow MHCQUANT { // // SUBWORKFLOW: Check the input file // - INPUT_CHECK( params.input ) - .reads - .set { ch_samples_from_sheet } + INPUT_CHECK(params.input) + .reads + .set { ch_samples_from_sheet } ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) ch_samples_from_sheet - .branch { - meta, filename -> - raw : meta.ext == 'raw' - return [ meta, filename ] - mzml : meta.ext == 'mzml' - return [ meta, filename ] - other : true } - .set { ms_files } + .branch { + meta, filename -> + raw : meta.ext == 'raw' + return [ meta, filename ] + mzml : meta.ext == 'mzml' + return [ meta, filename ] + other : true } + .set { ms_files } // Input fasta file - Channel.fromPath( params.fasta ) - .combine( ch_samples_from_sheet ) + Channel.fromPath(params.fasta) + .combine(ch_samples_from_sheet) .flatMap{ it -> [tuple(it[1],it[0])] } .ifEmpty { exit 1, "params.fasta was empty - no input file supplied" } .set { input_fasta } @@ -122,9 +144,9 @@ workflow MHCQUANT { // // SUBWORKFLOW: Include protein information // - if ( params.include_proteins_from_vcf ) { + if (params.include_proteins_from_vcf) { // Include the proteins from the vcf file to the fasta file - INCLUDE_PROTEINS( input_fasta ) + INCLUDE_PROTEINS(input_fasta) ch_versions = ch_versions.mix(INCLUDE_PROTEINS.out.versions.ifEmpty(null)) ch_fasta_file = INCLUDE_PROTEINS.out.ch_fasta_file ch_vcf_from_sheet = INCLUDE_PROTEINS.out.ch_vcf_from_sheet @@ -145,55 +167,38 @@ workflow MHCQUANT { // Raw file conversion OPENMS_THERMORAWFILEPARSER(ms_files.raw) ch_versions = ch_versions.mix(OPENMS_THERMORAWFILEPARSER.out.versions.ifEmpty(null)) - if ( params.run_centroidisation ) { + // Define the ch_ms_files channels to combine the mzml files + ch_ms_files = OPENMS_THERMORAWFILEPARSER.out.mzml.mix(ms_files.mzml.map{ it -> [it[0], it[1][0]] }) + + if (params.run_centroidisation) { // Optional: Run Peak Picking as Preprocessing - OPENMS_PEAKPICKERHIRES(ms_files.mzml) + OPENMS_PEAKPICKERHIRES(ch_ms_files) ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions.ifEmpty(null)) ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml } else { - ch_mzml_file = ms_files.mzml + ch_mzml_file = ch_ms_files } + // Run comet database search OPENMS_COMETADAPTER( - OPENMS_THERMORAWFILEPARSER.out.mzml - .mix(ch_mzml_file) - .join(ch_decoy_db, remainder:true)) + ch_mzml_file.join(ch_decoy_db, remainder:true)) + // Write this information to an tsv file + OPENMS_TEXTEXPORTER_COMET(OPENMS_COMETADAPTER.out.idxml) ch_versions = ch_versions.mix(OPENMS_COMETADAPTER.out.versions.ifEmpty(null)) // Index decoy and target hits OPENMS_PEPTIDEINDEXER(OPENMS_COMETADAPTER.out.idxml.join(ch_decoy_db)) ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions.ifEmpty(null)) - // Calculate fdr for id based alignment - OPENMS_FALSEDISCOVERYRATE(OPENMS_PEPTIDEINDEXER.out.idxml) - ch_versions = ch_versions.mix(OPENMS_FALSEDISCOVERYRATE.out.versions.first().ifEmpty(null)) - // Filter fdr for id based alignment - OPENMS_IDFILTER_FOR_ALIGNMENT(OPENMS_FALSEDISCOVERYRATE.out.idxml - .flatMap { it -> [tuple(it[0], it[1], null)]}) - ch_versions = ch_versions.mix(OPENMS_IDFILTER_FOR_ALIGNMENT.out.versions.first().ifEmpty(null)) - // Write the content to a PSMs file - OPENMS_TEXTEXPORTER_PSMS( - OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml - .flatMap { - meta, idxml -> - ident = idxml.baseName.split('_-_')[1] - [[[id:ident, sample:meta.sample, condition:meta.condition, ext:meta.ext], idxml]] - } - ) - // // SUBWORKFLOW: Pre-process step for the quantification of the data // - - if(!params.skip_quantification) { - PRE_QUANTIFICATION( - OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml, + if (!params.skip_quantification) { + MAP_ALIGNMENT( OPENMS_PEPTIDEINDEXER.out.idxml, - ms_files.mzml - .mix(OPENMS_THERMORAWFILEPARSER.out.mzml) - .mix(ch_mzml_file) + ch_mzml_file ) - ch_proceeding_idx = PRE_QUANTIFICATION.out.ch_proceeding_idx - ch_versions = ch_versions.mix(PRE_QUANTIFICATION.out.versions.ifEmpty(null)) + ch_proceeding_idx = MAP_ALIGNMENT.out.ch_proceeding_idx + ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null)) } else { ch_proceeding_idx = OPENMS_PEPTIDEINDEXER.out.idxml .map { @@ -212,15 +217,23 @@ workflow MHCQUANT { // Run Percolator OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions.ifEmpty(null)) - ch_percolator_adapter_outcome = OPENMS_PERCOLATORADAPTER.out.idxml // Filter by percolator q-value - OPENMS_IDFILTER_Q_VALUE(ch_percolator_adapter_outcome.flatMap { it -> [tuple(it[0], it[1], null)]}) + OPENMS_IDFILTER_Q_VALUE(OPENMS_PERCOLATORADAPTER.out.idxml.flatMap { it -> [tuple(it[0], it[1], null)] }) ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions.ifEmpty(null)) + // Prepare for check if file is empty + OPENMS_TEXTEXPORTER_FDR(OPENMS_IDFILTER_Q_VALUE.out.idxml) + // Return an error message when there is only a header present in the document + OPENMS_TEXTEXPORTER_FDR.out.tsv.map { + meta, tsv -> if (tsv.size() < 130) { + log.error "It seems that there were no significant hits found for one or more samples.\nPlease consider incrementing the '--fdr_threshold' after removing the work directory or to exclude this sample." + exit(0) + } + } // // SUBWORKFLOW: Refine the FDR values on the predicted subset // - if ( params.refine_fdr_on_predicted_subset && params.predict_class_1 ) { + if (params.refine_fdr_on_predicted_subset && params.predict_class_1) { // Run the following subworkflow REFINE_FDR ( OPENMS_IDFILTER_Q_VALUE.out.idxml, @@ -232,32 +245,33 @@ workflow MHCQUANT { filter_q_value = REFINE_FDR.out.filter_refined_q_value.flatMap { it -> [ tuple(it[0].sample, it[0], it[1]) ] } } else { // Make sure that the columns that consists of the ID's, sample names and the idXML file names are returned - filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml.map{ it -> [it[0].sample, it[0], it[1]] } + filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml.map { it -> [it[0].sample, it[0], it[1]] } } // - // SUBWORKFLOW: Perform the post quantification step + // SUBWORKFLOW: Perform the step to process the feature and obtain the belonging information // - if ( !params.skip_quantification) { - POST_QUANTIFICATION ( - OPENMS_IDFILTER_FOR_ALIGNMENT.out.idxml, - PRE_QUANTIFICATION.out.aligned_mzml, + + if (!params.skip_quantification) { + PROCESS_FEATURE ( + MAP_ALIGNMENT.out.aligned_idfilter, + MAP_ALIGNMENT.out.aligned_mzml, filter_q_value - ) - ch_versions = ch_versions.mix(POST_QUANTIFICATION.out.versions.ifEmpty(null)) + ) + ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) } else { - OPENMS_TEXTEXPORTER_UNQUANTIFIED (filter_q_value.flatMap { ident, meta, idxml -> [[meta, idxml]] }) + OPENMS_TEXTEXPORTER_UNQUANTIFIED(filter_q_value.flatMap { ident, meta, idxml -> [[meta, idxml]] }) } // // SUBWORKFLOW: Predict class I (neoepitopes) // - if ( params.predict_class_1 & !params.skip_quantification ) { + if (params.predict_class_1 & !params.skip_quantification) { PREDICT_CLASS1 ( - POST_QUANTIFICATION.out.mztab, + PROCESS_FEATURE.out.mztab, peptides_class_1_alleles, ch_vcf_from_sheet - ) + ) ch_versions = ch_versions.mix(PREDICT_CLASS1.out.versions.ifEmpty(null)) ch_predicted_possible_neoepitopes = PREDICT_CLASS1.out.ch_predicted_possible_neoepitopes } else { @@ -267,9 +281,9 @@ workflow MHCQUANT { // // SUBWORKFLOW: Predict class II (neoepitopes) // - if ( params.predict_class_2 & !params.skip_quantification ) { + if (params.predict_class_2 & !params.skip_quantification) { PREDICT_CLASS2 ( - POST_QUANTIFICATION.out.mztab, + PROCESS_FEATURE.out.mztab, peptides_class_2_alleles, ch_vcf_from_sheet ) @@ -282,7 +296,7 @@ workflow MHCQUANT { // // SUBWORKFLOW: Predict retention time // - if ( params.predict_RT ) { + if (params.predict_RT) { PREDICT_RT ( filter_q_value.map{ it -> [it[1], it[2]] }, ch_predicted_possible_neoepitopes, @@ -290,6 +304,18 @@ workflow MHCQUANT { ) } + if (params.annotate_ions) { + // Alter the annotation of the filtered q value + ch_filtered_idxml = filter_q_value.map { ident, meta, idxml -> [meta.id, idxml] } + // Join the ch_filtered_idxml and the ch_mzml_file + ch_raw_spectra_data = ch_mzml_file.map {meta, mzml -> [meta.sample + '_' + meta.condition, mzml] } + .groupTuple() + .join(ch_filtered_idxml) + // Annotate spectra with ion fragmentation information + PYOPENMS_IONANNOTATOR(ch_raw_spectra_data) + ch_versions = ch_versions.mix(PYOPENMS_IONANNOTATOR.out.versions.ifEmpty(null)) + } + // // MODULE: Pipeline reporting // @@ -301,22 +327,27 @@ workflow MHCQUANT { // MODULE: MultiQC // if (!params.skip_multiqc) { - workflow_summary = WorkflowMhcquant.paramsSummaryMultiqc(workflow, summary_params) + workflow_summary = WorkflowMhcquant.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowMhcquant.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) ) - multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions) } + } /* @@ -330,6 +361,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } /*