From 0370b4c6b30d58f8508f4331501187ecb7778f9b Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Thu, 23 May 2024 09:17:44 +0200 Subject: [PATCH] Samtools view (#48) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * initial version with a few tests, script, and config file * update changelog, add one test * add a 4th test, fix option names in the script * Fix name of component in config * remove option named with a number * add must_exist to input file argument * removed "default: null" from one of the arguments in config * remove utf8 characters from config * Update CHANGELOG.md --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 6 +- .../samtools_idxstats/config.vsh.yaml | 2 +- src/samtools/samtools_sort/config.vsh.yaml | 2 +- src/samtools/samtools_stats/config.vsh.yaml | 4 +- src/samtools/samtools_view/config.vsh.yaml | 351 ++++++++++++++++++ src/samtools/samtools_view/help.txt | 80 ++++ src/samtools/samtools_view/script.sh | 71 ++++ src/samtools/samtools_view/test.sh | 87 +++++ src/samtools/samtools_view/test_data/a.bam | Bin 0 -> 254 bytes src/samtools/samtools_view/test_data/a.count | 1 + src/samtools/samtools_view/test_data/a.cram | Bin 0 -> 692 bytes .../samtools_view/test_data/a.forward | 3 + src/samtools/samtools_view/test_data/a.sam | 7 + .../samtools_view/test_data/script.sh | 8 + 14 files changed, 616 insertions(+), 6 deletions(-) create mode 100644 src/samtools/samtools_view/config.vsh.yaml create mode 100644 src/samtools/samtools_view/help.txt create mode 100644 src/samtools/samtools_view/script.sh create mode 100644 src/samtools/samtools_view/test.sh create mode 100644 src/samtools/samtools_view/test_data/a.bam create mode 100644 src/samtools/samtools_view/test_data/a.count create mode 100644 src/samtools/samtools_view/test_data/a.cram create mode 100644 src/samtools/samtools_view/test_data/a.forward create mode 100644 src/samtools/samtools_view/test_data/a.sam create mode 100755 src/samtools/samtools_view/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index f52a6d02..b1d7a7af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,11 +43,13 @@ - `samtools/samtools_index`: Index SAM/BAM/CRAM files (PR #35). - `samtools/samtools_sort`: Sort SAM/BAM/CRAM files (PR #36). - `samtools/samtools_stats`: Reports alignment summary statistics for a BAM file (PR #39). - - `samtools/samtools_stats`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41). - - `samtools_collate`: Shuffles and groups reads in SAM/BAM/CRAM files together by their names (PR #42). + - `samtools/samtools_faidx`: Indexes FASTA files to enable random access to fasta and fastq files (PR #41). + - `samtools/samtools_collate`: Shuffles and groups reads in SAM/BAM/CRAM files together by their names (PR #42). + - `samtools/samtools_view`: Views and converts SAM/BAM/CRAM files (PR #48). * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43). + ## MAJOR CHANGES ## MINOR CHANGES diff --git a/src/samtools/samtools_idxstats/config.vsh.yaml b/src/samtools/samtools_idxstats/config.vsh.yaml index d5e32077..30f21348 100644 --- a/src/samtools/samtools_idxstats/config.vsh.yaml +++ b/src/samtools/samtools_idxstats/config.vsh.yaml @@ -7,7 +7,7 @@ links: documentation: https://www.htslib.org/doc/samtools-idxstats.html repository: https://github.com/samtools/samtools references: - doi: 10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008 + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat argument_groups: diff --git a/src/samtools/samtools_sort/config.vsh.yaml b/src/samtools/samtools_sort/config.vsh.yaml index 7cd9ec48..a78800da 100644 --- a/src/samtools/samtools_sort/config.vsh.yaml +++ b/src/samtools/samtools_sort/config.vsh.yaml @@ -4,7 +4,7 @@ description: Sort SAM/BAM/CRAM file. keywords: [sort, bam, sam, cram] links: homepage: https://www.htslib.org/ - documentation: https://www.htslib.org/doc/samtools-idxstats.html + documentation: https://www.htslib.org/doc/samtools-sort.html repository: https://github.com/samtools/samtools references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml index 554a11e8..0d8f57a4 100644 --- a/src/samtools/samtools_stats/config.vsh.yaml +++ b/src/samtools/samtools_stats/config.vsh.yaml @@ -4,10 +4,10 @@ description: Reports alignment summary statistics for a BAM file. keywords: [statistics, counts, bam, sam, cram] links: homepage: https://www.htslib.org/ - documentation: https://www.htslib.org/doc/samtools-idxstats.html + documentation: https://www.htslib.org/doc/samtools-stats.html repository: https://github.com/samtools/samtools references: - doi: 10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008 + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat argument_groups: diff --git a/src/samtools/samtools_view/config.vsh.yaml b/src/samtools/samtools_view/config.vsh.yaml new file mode 100644 index 00000000..206b87ac --- /dev/null +++ b/src/samtools/samtools_view/config.vsh.yaml @@ -0,0 +1,351 @@ +name: samtools_view +namespace: samtools +description: Views and converts SAM/BAM/CRAM files. +keywords: [view, convert, bam, sam, cram] +links: + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/samtools-view.html + repository: https://github.com/samtools/samtools +references: + doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] +license: MIT/Expat + +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + description: Input SAM, BAM, or CRAM file. + required: true + must_exist: true + - name: --fai_reference + alternatives: -t + type: file + description: | + A tab-delimited FILE. Each line must contain the reference name in the first column + and the length of the reference in the second column, with one line for each distinct + reference. Any additional fields beyond the second column are ignored. This file also + defines the order of the reference sequences in sorting. If you run: `samtools faidx ', + the resulting index file .fai can be used as this FILE. + - name: --reference + alternatives: -T + type: file + description: | + A FASTA format reference FILE, optionally compressed by bgzip and ideally indexed by samtools faidx. + If an index is not present one will be generated for you, if the reference file is local. + If the reference file is not local, but is accessed instead via an https://, s3:// or other URL, + the index file will need to be supplied by the server alongside the reference. It is possible to + have the reference and index files in different locations by supplying both to this option separated + by the string "##idx##", for example: + --reference ftp://x.com/ref.fa##idx##ftp://y.com/index.fa.fai + However, note that only the location of the reference will be stored in the output file header. + If this method is used to make CRAM files, the cram reader may not be able to find the index, + and may not be able to decode the file unless it can get the references it needs using a different + method. + - name: --target_file + alternatives: -L + type: file + description: | + Only output alignments overlapping the input BED FILE [null]. + - name: --region_file + type: file + description: | + Use an index and multi-region iterator to only output alignments overlapping the input BED FILE. + Equivalent to --use_index --target_file FILE. + - name: --qname_file + alternatives: -N + type: file + description: | + Output only alignments with read names listed in FILE. If FILE starts with ^ then the operation is + negated and only outputs alignment with read groups not listed in FILE. It is not permissible to mix + both the filter-in and filter-out style syntax in the same command. + must_exist: true + - name: --read_group_file + alternatives: -R + type: file + description: | + Output alignments in read groups listed in FILE [null]. If FILE starts with ^ then the operation is + negated and only outputs alignment with read names not listed in FILE. It is not permissible to mix + both the filter-in and filter-out style syntax in the same command. Note that records with no RG tag + will also be output when using this option. This behaviour may change in a future release. + must_exist: true + - name: --use_index + alternatives: -M + type: boolean_true + description: | + Use the multi-region iterator on the union of a BED file and command-line region arguments. + This avoids re-reading the same regions of files so can sometimes be much faster. Note this also + removes duplicate sequences. Without this a sequence that overlaps multiple regions specified on + the command line will be reported multiple times. The usage of a BED file is optional and its path + has to be preceded by --target_file option. + + - name: Outputs + arguments: + - name: --output + alternatives: -o + type: file + description: Output to FILE instead of [stdout]. + required: true + direction: output + example: output.bam + - name: --bam + alternatives: -b + type: boolean_true + description: Output in the BAM format. + - name: --cram + alternatives: -C + type: boolean_true + description: | + Output in the CRAM format (requires --reference). + - name: --fast + type: boolean_true + description: | + Enable fast compression. This also changes the default output format to BAM, + but this can be overridden by the explicit format options or using a filename + with a known suffix. + - name: --uncompressed + alternatives: -u + type: boolean_true + description: | + Output uncompressed data. This also changes the default output format to BAM, + but this can be overridden by the explicit format options or using a filename + with a known suffix. + This option saves time spent on compression/decompression and is thus preferred + when the output is piped to another samtools command. + - name: --with_header + type: boolean_true + description: | + Include the header in the output. + - name: --header_only + alternatives: -H + type: boolean_true + description: | + Output the header only. + - name: --no_header + type: boolean_true + description: | + When producing SAM format, output alignment records but not headers. + This is the default; the option can be used to reset the effect of + --with_header/--header_only. + - name: --count + alternatives: -c + type: boolean_true + description: | + Instead of printing the alignments, only count them and print the total number. + All filter options, such as --require_flags, --excl_flags, and --min_MQ, are taken + into account. The --unmap option is ignored in this mode. + - name: --output_unselected + alternatives: -U + type: file + description: | + Write alignments that are not selected by the various filter options to FILE. + When this option is used, all alignments (or all alignments intersecting the regions + specified) are written to either the output file or this file, but never both. + - name: --unmap + alternatives: -p + type: boolean_true + description: | + Set the UNMAP flag on alignments that are not selected by the filter options. + These alignments are then written to the normal output. This is not compatible + with --output_unselected. + - name: --read_group + alternatives: -r + type: string + description: | + Output alignments in read group STR [null]. Note that records with no RG tag will also be output + when using this option. This behaviour may change in a future release. + - name: --tag + alternatives: -d + type: string + description: | + Only output alignments with tag STR1 and associated value STR2, which can be a string or an integer + [null]. + The value can be omitted, in which case only the tag is considered. + Note that this option does not specify a tag type. For example, use --tag XX:42 to select alignments + with an XX:i:42 field, not --tag XX:i:42. + - name: --tag_file + alternatives: -D + type: file + description: | + Only output alignments with tag STR and associated values listed in FILE. + must_exist: true + - name: --min_MQ + alternatives: -q + type: integer + description: | + Skip alignments with MAPQ smaller than INT. + default: 0 + - name: --library + alternatives: -l + type: string + description: | + Only output alignments in library STR. + - name: --min_qlen + alternatives: -m + type: integer + description: | + Only output alignments with number of CIGAR bases consuming query sequence >= INT. + default: 0 + - name: --expr + alternatives: -e + type: string + description: | + Only include alignments that match the filter expression STR. The syntax for these expressions is + described in the main samtools. + - name: --require_flags + alternatives: -f + type: string + description: | + Only output alignments with all bits set in FLAG present in the FLAG field. FLAG can be specified + in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' (i.e. /^0[0-7]+/), + as a decimal number not beginning with '0' or as a comma-separated list of flag names. + - name: --excl_flags + alternatives: -F + type: string + description: | + Do not output alignments with any bits set in FLAG present in the FLAG field. FLAG can be specified + in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' (i.e. /^0[0-7]+/), + as a decimal number not beginning with '0' or as a comma-separated list of flag names. + - name: --excl_all_flags + alternatives: -G + type: integer + description: | + Do not output alignments with all bits set in INT present in the FLAG field. This is the opposite of + --require_flags such that --require_flags 12 --exclude_all_flags 12 is the same as no filtering at all. + FLAG can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' + (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a comma-separated list of flag names. + - name: --incl_flags + alternatives: --rf + type: string + description: | + Only output alignments with any bit set in FLAG present in the FLAG field. FLAG can be specified in hex + by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by beginning with `0' (i.e. /^0[0-7]+/), as a decimal + number not beginning with '0' or as a comma-separated list of flag names. + - name: --remove_tag + alternatives: -x + type: string + description: | + Read tag(s) to exclude from output (repeatable) [null]. This can be a single tag or a comma separated list. + Alternatively the option itself can be repeated multiple times. + If the list starts with a `^' then it is negated and treated as a request to remove all tags except those in STR. + The list may be empty, so --remove_tag ^ will remove all tags. + Note that tags will only be removed from reads that pass filtering. + - name: --keep_tag + type: string + description: | + This keeps only tags listed in STR and is directly equivalent to --remove_tag ^STR. Specifying an empty list + will remove all tags. If both --keep_tag and --remove_tag are specified then --keep_tag has precedence. + Note that tags will only be removed from reads that pass filtering. + - name: --remove_B + alternatives: -B + type: boolean_true + description: | + Collapse the backward CIGAR operation. + - name: --add_flags + type: string + description: | + Adds flag(s) to read. FLAG can be specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal + by beginning with `0' (i.e. /^0[0-7]+/), as a decimal number not beginning with '0' or as a comma-separated + list of flag names. + - name: --remove_flags + type: string + description: | + Remove flag(s) from read. FLAG is specified in the same way as with the --add_flags option. + - name: --subsample + type: double + description: | + Output only a proportion of the input alignments, as specified by 0.0 <= FLOAT <= 1.0, which gives the fraction + of templates/pairs to be kept. This subsampling acts in the same way on all of the alignment records in the same + template or read pair, so it never keeps a read but not its mate. + - name: --subsample_seed + type: integer + description: | + Subsampling seed used to influence which subset of reads is kept. When subsampling data that has previously + been subsampled, be sure to use a different seed value from those used previously; otherwise more reads will + be retained than expected. + default: 0 + - name: --fetch_pairs + alternatives: -P + type: boolean_true + description: | + Retrieve pairs even when the mate is outside of the requested region. Enabling this option also turns on the + multi-region iterator (-M). A region to search must be specified, either on the command-line, or using the + --target_file option. The input file must be an indexed regular file. + This option first scans the requested region, using the RNEXT and PNEXT fields of the records that have the + PAIRED flag set and pass other filtering options to find where paired reads are located. These locations are + used to build an expanded region list, and a set of QNAMEs to allow from the new regions. It will then make + a second pass, collecting all reads from the originally-specified region list together with reads from additional + locations that match the allowed set of QNAMEs. Any other filtering options used will be applied to all reads + found during this second pass. + As this option links reads using RNEXT and PNEXT, it is important that these fields are set accurately. Use + 'samtools fixmate' to correct them if necessary. + Note that this option does not work with the --count, --output-unselected or --unmap options. + - name: --customized_index + alternatives: -X + type: boolean_true + description: | + Include customized index file as a part of arguments. See EXAMPLES section for sample of usage. + - name: --sanitize + alternatives: -z + type: string + description: | + Perform some sanity checks on the state of SAM record fields, fixing up common mistakes made by aligners. + These include soft-clipping alignments when they extend beyond the end of the reference, marking records as + unmapped when they have reference * or position 0, and ensuring unmapped alignments have no CIGAR or mapping + quality for unmapped alignments and no MD, NM, CG or SM tags. + FLAGs is a comma-separated list of keywords chosen from the following list. + + unmap: The UNMAPPED BAM flag. This is set for reads with position <= 0, reference name "*" or reads starting + beyond the end of the reference. Note CIGAR "*" is permitted for mapped data so does not trigger this. + + pos: Position and reference name fields. These may be cleared when a sequence is unmapped due to the + coordinates being beyond the end of the reference. Selecting this may change the sort order of the file, + so it is not a part of the on compound argument. + mqual: Mapping quality. This is set to zero for unmapped reads. + cigar: Modifies CIGAR fields, either by adding soft-clips for reads that overlap the end of the reference or + by clearing it for unmapped reads. + aux: For unmapped data, some auxiliary fields are meaningless and will be removed. These include NM, MD, CG and SM. + off: Perform no sanity fixing. This is the default + on: Sanitize data in a way that guarantees the same sort order. This is everything except for pos. + all: All sanitizing options, including pos. + - name: --no_PG + type: boolean_true + description: | + Do not add a @PG line to the header of the output file. + - name: --input_fmt_option + type: string + description: | + Specify a single input file format option in the form of OPTION or OPTION=VALUE. + - name: --output_fmt + alternatives: -O + type: string + description: | + Specify output format (SAM, BAM, CRAM). + - name: --output_fmt_option + type: string + description: | + Specify a single output file format option in the form of OPTION or OPTION=VALUE. + - name: --write_index + type: boolean_true + description: | + Automatically index the output files. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1 + setup: + - type: docker + run: | + samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \ + sed 's#Using ##;s# \([0-9\.]*\)$#: \1#' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/samtools/samtools_view/help.txt b/src/samtools/samtools_view/help.txt new file mode 100644 index 00000000..753b1bc6 --- /dev/null +++ b/src/samtools/samtools_view/help.txt @@ -0,0 +1,80 @@ +``` +samtools view +``` + +Usage: samtools view [options] || [region ...] + +Output options: + -b, --bam Output BAM + -C, --cram Output CRAM (requires -T) + -1, --fast Use fast BAM compression (and default to --bam) + -u, --uncompressed Uncompressed BAM output (and default to --bam) + -h, --with-header Include header in SAM output + -H, --header-only Print SAM header only (no alignments) + --no-header Print SAM alignment records only [default] + -c, --count Print only the count of matching records + -o, --output FILE Write output to FILE [standard output] + -U, --unoutput FILE, --output-unselected FILE + Output reads not selected by filters to FILE + -p, --unmap Set flag to UNMAP on reads not selected + then write to output file. + -P, --fetch-pairs Retrieve complete pairs even when outside of region +Input options: + -t, --fai-reference FILE FILE listing reference names and lengths + -M, --use-index Use index and multi-region iterator for regions + --region[s]-file FILE Use index to include only reads overlapping FILE + -X, --customized-index Expect extra index file argument after + +Filtering options (Only include in output reads that...): + -L, --target[s]-file FILE ...overlap (BED) regions in FILE + -N, --qname-file [^]FILE ...whose read name is listed in FILE ("^" negates) + -r, --read-group STR ...are in read group STR + -R, --read-group-file [^]FILE + ...are in a read group listed in FILE + -d, --tag STR1[:STR2] ...have a tag STR1 (with associated value STR2) + -D, --tag-file STR:FILE ...have a tag STR whose value is listed in FILE + -q, --min-MQ INT ...have mapping quality >= INT + -l, --library STR ...are in library STR + -m, --min-qlen INT ...cover >= INT query bases (as measured via CIGAR) + -e, --expr STR ...match the filter expression STR + -f, --require-flags FLAG ...have all of the FLAGs present + -F, --excl[ude]-flags FLAG ...have none of the FLAGs present + --rf, --incl-flags, --include-flags FLAG + ...have some of the FLAGs present + -G FLAG EXCLUDE reads with all of the FLAGs present + --subsample FLOAT Keep only FLOAT fraction of templates/read pairs + --subsample-seed INT Influence WHICH reads are kept in subsampling [0] + -s INT.FRAC Same as --subsample 0.FRAC --subsample-seed INT + +Processing options: + --add-flags FLAG Add FLAGs to reads + --remove-flags FLAG Remove FLAGs from reads + -x, --remove-tag STR + Comma-separated read tags to strip (repeatable) [null] + --keep-tag STR + Comma-separated read tags to preserve (repeatable) [null]. + Equivalent to "-x ^STR" + -B, --remove-B Collapse the backward CIGAR operation + -z, --sanitize FLAGS Perform sanitity checking and fixing on records. + FLAGS is comma separated (see manual). [off] + +General options: + -?, --help Print long help, including note about region specification + -S Ignored (input format is auto-detected) + --no-PG Do not add a PG line + --input-fmt-option OPT[=VAL] + Specify a single input file format option in the form + of OPTION or OPTION=VALUE + -O, --output-fmt FORMAT[,OPT[=VAL]]... + Specify output format (SAM, BAM, CRAM) + --output-fmt-option OPT[=VAL] + Specify a single output file format option in the form + of OPTION or OPTION=VALUE + -T, --reference FILE + Reference sequence FASTA FILE [null] + -@, --threads INT + Number of additional threads to use [0] + --write-index + Automatically index the output files [off] + --verbosity INT + Set level of verbosity diff --git a/src/samtools/samtools_view/script.sh b/src/samtools/samtools_view/script.sh new file mode 100644 index 00000000..c3911b48 --- /dev/null +++ b/src/samtools/samtools_view/script.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -e + +[[ "$par_bam" == "false" ]] && unset par_bam +[[ "$par_cram" == "false" ]] && unset par_cram +[[ "$par_fast" == "false" ]] && unset par_fast +[[ "$par_uncompressed" == "false" ]] && unset par_uncompressed +[[ "$par_with_header" == "false" ]] && unset par_with_header +[[ "$par_header_only" == "false" ]] && unset par_header_only +[[ "$par_no_header" == "false" ]] && unset par_no_header +[[ "$par_count" == "false" ]] && unset par_count +[[ "$par_unmap" == "false" ]] && unset par_unmap +[[ "$par_use_index" == "false" ]] && unset par_use_index +[[ "$par_fetch_pairs" == "false" ]] && unset par_fetch_pairs +[[ "$par_customized_index" == "false" ]] && unset par_customized_index +[[ "$par_no_PG" == "false" ]] && unset par_no_PG +[[ "$par_write_index" == "false" ]] && unset par_write_index +[[ "$par_remove_B" == "false" ]] && unset par_remove_B + +samtools view \ + ${par_bam:+-b} \ + ${par_cram:+-C} \ + ${par_fast:+--fast} \ + ${par_uncompressed:+-u} \ + ${par_with_header:+--with-header} \ + ${par_header_only:+-H} \ + ${par_no_header:+--no-header} \ + ${par_count:+-c} \ + ${par_output:+-o "$par_output"} \ + ${par_output_unselected:+-U "$par_output_unselected"} \ + ${par_unmap:+-p "$par_unmap"} \ + ${par_fetch_pairs:+-P "$par_fetch_pairs"} \ + ${par_fai_reference:+-t "$par_fai_reference"} \ + ${par_use_index:+-M "$par_use_index"} \ + ${par_region_file:+--region-file "$par_region_file"} \ + ${par_customized_index:+-X} \ + ${par_target_file:+-L "$par_target_file"} \ + ${par_qname_file:+-N "$par_qname_file"} \ + ${par_read_group:+-r "$par_read_group"} \ + ${par_read_group_file:+-R "$par_read_group_file"} \ + ${par_tag:+-d "$par_tag"} \ + ${par_tag_file:+-D "$par_tag_file"} \ + ${par_min_MQ:+-q "$par_min_MQ"} \ + ${par_library:+-l "$par_library"} \ + ${par_min_qlen:+-m "$par_min_qlen"} \ + ${par_expr:+-e "$par_expr"} \ + ${par_require_flags:+-f "$par_require_flags"} \ + ${par_excl_flags:+-F "$par_excl_flags"} \ + ${par_incl_flags:+--rf "$par_incl_flags"} \ + ${par_excl_all_flags:+-G "$par_excl_all_flags"} \ + ${par_subsample:+--subsample "$par_subsample"} \ + ${par_subsample_seed:+--subsample-seed "$par_subsample_seed"} \ + ${par_add_flags:+--add-flags "$par_add_flags"} \ + ${par_remove_flags:+--remove-flags "$par_remove_flags"} \ + ${par_remove_tag:+-x "$par_remove_tag"} \ + ${par_keep_tag:+--keep-tag "$par_keep_tag"} \ + ${par_remove_B:+-B} \ + ${par_sanitize:+-z "$par_sanitize"} \ + ${par_input_fmt_option:+--input-fmt-option "$par_input_fmt_option"} \ + ${par_output_fmt:+-O "$par_output_fmt"} \ + ${par_output_fmt_option:+--output-fmt-option "$par_output_fmt_option"} \ + ${par_reference:+-T "$par_reference"} \ + ${par_write_index:+--write-index} \ + ${par_no_PG:+--no-PG} \ + "$par_input" + +exit 0 diff --git a/src/samtools/samtools_view/test.sh b/src/samtools/samtools_view/test.sh new file mode 100644 index 00000000..1de29a7c --- /dev/null +++ b/src/samtools/samtools_view/test.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}/test_data" +temp_dir="${meta_resources_dir}/out" + +############################################################################################ + +echo ">>> Test 1: Import SAM to BAM when @SQ lines are present in the header" +"$meta_executable" \ + --bam \ + --output "$temp_dir/a.bam" \ + --input "$test_dir/a.sam" + +echo ">>> Checking whether output exists" +[ ! -f "$temp_dir/a.bam" ] && echo "File 'a.bam' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$temp_dir/a.bam" ] && echo "File 'a.bam' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +# compare output of "samtools view" for both files +diff <(samtools view "$temp_dir/a.bam") <(samtools view "$test_dir/a.bam") || \ + (echo "Output file a.bam does not match expected output" && exit 1) + +############################################################################################ + +echo ">>> Test 2: ${meta_functionality_name} with CRAM format output" + +"$meta_executable" \ + --cram \ + --output "$temp_dir/a.cram" \ + --input "$test_dir/a.sam" + +echo ">>> Checking whether output exists" +[ ! -f "$temp_dir/a.cram" ] && echo "File 'a.cram' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$temp_dir/a.cram" ] && echo "File 'a.cram' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +# compare output of "samtools view" for both files +diff <(samtools view "$temp_dir/a.cram") <(samtools view "$test_dir/a.cram") || \ + (echo "Output file a.cram does not match expected output" && exit 1) + +############################################################################################ + +echo ">>> Test 3: ${meta_functionality_name} with --count option" + +"$meta_executable" \ + --count \ + --output "$temp_dir/a.count" \ + --input "$test_dir/a.sam" + +echo ">>> Checking whether output exists" +[ ! -f "$temp_dir/a.count" ] && echo "File 'a.count' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$temp_dir/a.count" ] && echo "File 'a.count' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$temp_dir/a.count" "$test_dir/a.count" || \ + (echo "Output file a.count does not match expected output" && exit 1) + +############################################################################################ + +echo ">>> Test 4: ${meta_functionality_name} including only the forward reads from read pairs" + +"$meta_executable" \ + --output "$temp_dir/a.forward" \ + --excl_flags "0x80" \ + --input "$test_dir/a.sam" + +echo ">>> Checking whether output exists" +[ ! -f "$temp_dir/a.forward" ] && echo "File 'a.forward' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$temp_dir/a.forward" ] && echo "File 'a.forward' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$temp_dir/a.forward" "$test_dir/a.forward" || \ + (echo "Output file a.forward does not match expected output" && exit 1) + +############################################################################################ + +echo ">>> All test passed successfully" +rm -rf "${temp_dir}" +exit 0 \ No newline at end of file diff --git a/src/samtools/samtools_view/test_data/a.bam b/src/samtools/samtools_view/test_data/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..95b85b72e85bba4f2ecfed9f289bab13bcebfbf5 GIT binary patch literal 254 zcmb2|=3rp}f&Xj_PR>jW6%55iUs8FJ6A}tI_@3~5+q`PUgD)R98yP();wX6jrDUU) z-x=ML=g(_C}`M^ziWT zoB?7{Q642A#c*VXT}m#{K6x~YGZ>7M9T?Kw{1x5WQrPq~=65$XI<_=6DqftB$jHpx zyZ{Ik)MaJ}E;R1UnR~LLwCG@|U)$@HgoK2O&;S3|ugJEx0s@cAGhMcwOIc&Zc-(iZ QZVV%)MbZq+U=M=`0DTczqyPW_ literal 0 HcmV?d00001 diff --git a/src/samtools/samtools_view/test_data/a.count b/src/samtools/samtools_view/test_data/a.count new file mode 100644 index 00000000..1e8b3149 --- /dev/null +++ b/src/samtools/samtools_view/test_data/a.count @@ -0,0 +1 @@ +6 diff --git a/src/samtools/samtools_view/test_data/a.cram b/src/samtools/samtools_view/test_data/a.cram new file mode 100644 index 0000000000000000000000000000000000000000..57fb3269c06238f3f35201ce71f5a554098b5ebe GIT binary patch literal 692 zcmZ<`a`a_pNYqO%O3Y}Y6&fsz=PDjk6K_20>viTS6N9DK-?^KBwq36IoCl=r z?cr`Bk5T(Aa!wb}8w`vhYz#JBj19XvmaW~xz{t?Bp<#mrYlsU=vYRtQP!L0~uXMZj zWa%J3M#lighDJdrH%3N&XCNUEJ26lW9+a%bjX{0{{Dj0}DOKnX@hhLB*0 z*gqbim>W=x2`C2A1hmG@4?=tTu`)66IytehF*0f~GOGFpLi7fK#JK~585wzkJRwSh zoqhJ&4pn}=UO5G2F3`^&xDmgx&6?=E{# z5+vq)N2O`TUItd6zxCPwd}LtZ5)pBPgAh1)b|^=afrVR0DAAB1$&ex0kO9s(WfZxM zk%dRX1`+#@pQy+)Fubiw@2~+Hyf0=74+9H7CueeUa>Ha06fA39%fKSQ#>UFZ3Je$T zS9YrzSOnSFfQq>o7|#FuU&sLV1{)hQ7_8`bQ{o3of`jvctG^qN4Gc2Yy*K@V6dM~T M{1_SDB{O;h0B-B2a{vGU literal 0 HcmV?d00001 diff --git a/src/samtools/samtools_view/test_data/a.forward b/src/samtools/samtools_view/test_data/a.forward new file mode 100644 index 00000000..766d4f20 --- /dev/null +++ b/src/samtools/samtools_view/test_data/a.forward @@ -0,0 +1,3 @@ +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** diff --git a/src/samtools/samtools_view/test_data/a.sam b/src/samtools/samtools_view/test_data/a.sam new file mode 100644 index 00000000..aa8c77b3 --- /dev/null +++ b/src/samtools/samtools_view/test_data/a.sam @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +b1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** diff --git a/src/samtools/samtools_view/test_data/script.sh b/src/samtools/samtools_view/test_data/script.sh new file mode 100755 index 00000000..90918e44 --- /dev/null +++ b/src/samtools/samtools_view/test_data/script.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# dowload test data from snakemake wrapper +if [ ! -d /tmp/view_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers.git /tmp/view_source +fi + +cp -r /tmp/idxstats_source/bio/samtools/view/test/*.sam src/samtools/samtools_view/test_data \ No newline at end of file