From 74ae973b5eba7a870c9fe128e422c97c8a778285 Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Thu, 23 May 2024 16:21:34 +0200 Subject: [PATCH] Add third test and test data --- CHANGELOG.md | 2 +- src/umi_tools/umi_tools_dedup/config.vsh.yaml | 2 +- src/umi_tools/umi_tools_dedup/script.sh | 4 +-- src/umi_tools/umi_tools_dedup/test.sh | 23 ++++++++++++++ .../test_data/deduped_unique.sam | 31 +++++++++++++++++++ 5 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam diff --git a/CHANGELOG.md b/CHANGELOG.md index ceb53ac5..bbb4ef4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,7 @@ - `samtools/samtools_stats`: Reports alignment summary statistics for a BAM file (PR #39). * `umitools`: - - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #XXX). + - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54). ## MAJOR CHANGES diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml index 0c34c20a..cc5113e8 100644 --- a/src/umi_tools/umi_tools_dedup/config.vsh.yaml +++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml @@ -35,7 +35,7 @@ argument_groups: - name: Outputs arguments: - name: --output - alternatives: -S + alternatives: --stdout type: file description: Deduplicated BAM file required: true diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh index cb5b563a..1e227811 100644 --- a/src/umi_tools/umi_tools_dedup/script.sh +++ b/src/umi_tools/umi_tools_dedup/script.sh @@ -24,7 +24,7 @@ test_dir="${metal_executable}/test_data" umi_tools dedup \ --stdin "$par_input" \ ${par_in_sam:+--in-sam} \ - -S "$par_output" \ + --stdout "$par_output" \ ${par_out_sam:+--out-sam} \ ${par_paired:+--paired} \ ${par_output_stats:+--output-stats "$par_output_stats"} \ @@ -52,7 +52,7 @@ umi_tools dedup \ ${par_mapping_quality:+--mapping-quality "$par_mapping_quality"} \ ${par_unmapped_reads:+--unmapped-reads "$par_unmapped_reads"} \ ${par_chimeric_pairs:+--chimeric-pairs "$par_chimeric_pairs"} \ - ${par_unapired_reads:+--unapired-reads "$par_unapired_reads"} \ + ${par_unpaired_reads:+--unapired-reads "$par_unapired_reads"} \ ${par_ignore_umi:+--ignore-umi} \ ${par_subset:+--subset "$par_subset"} \ ${par_chrom:+--chrom "$par_chrom"} \ diff --git a/src/umi_tools/umi_tools_dedup/test.sh b/src/umi_tools/umi_tools_dedup/test.sh index db5563bc..adadb410 100644 --- a/src/umi_tools/umi_tools_dedup/test.sh +++ b/src/umi_tools/umi_tools_dedup/test.sh @@ -58,6 +58,29 @@ diff "$out_dir/deduped_fraction.sam" "$test_dir/deduped_fraction.sam" || \ ############################################################################################ +echo ">>> Test 3: $meta_functionality_name with --method unique" + +"$meta_executable" \ + --paired \ + --input "$test_dir/sample.bam" \ + --bai "$test_dir/sample.bam.bai" \ + --output "$out_dir/deduped_unique.sam" \ + --out_sam \ + --method "unique" \ + --random_seed 1 + +echo ">>> Checking whether output exists" +[ ! -f "$out_dir/deduped_unique.sam" ] && echo "File 'deduped_unique.sam' does not exist!" && exit 1 + +echo ">>> Checking whether output is non-empty" +[ ! -s "$out_dir/deduped_unique.sam" ] && echo "File 'deduped_unique.sam' is empty!" && exit 1 + +echo ">>> Checking whether output is correct" +diff "$out_dir/deduped_unique.sam" "$test_dir/deduped_unique.sam" || \ + (echo "Output file deduped_unique.sam does not match expected output" && exit 1) + +############################################################################################ + rm -rf "$out_dir" echo "All tests succeeded!" diff --git a/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam b/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam new file mode 100644 index 00000000..570ea153 --- /dev/null +++ b/src/umi_tools/umi_tools_dedup/test_data/deduped_unique.sam @@ -0,0 +1,31 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:197195432 +@SQ SN:chr10 LN:129993255 +@SQ SN:chr11 LN:121843856 +@SQ SN:chr12 LN:121257530 +@SQ SN:chr13 LN:120284312 +@SQ SN:chr14 LN:125194864 +@SQ SN:chr15 LN:103494974 +@SQ SN:chr16 LN:98319150 +@SQ SN:chr17 LN:95272651 +@SQ SN:chr18 LN:90772031 +@SQ SN:chr19 LN:61342430 +@SQ SN:chr2 LN:181748087 +@SQ SN:chr3 LN:159599783 +@SQ SN:chr4 LN:155630120 +@SQ SN:chr5 LN:152537259 +@SQ SN:chr6 LN:149517037 +@SQ SN:chr7 LN:152524553 +@SQ SN:chr8 LN:131738871 +@SQ SN:chr9 LN:124076172 +@SQ SN:chrM LN:16299 +@SQ SN:chrX LN:166650296 +@SQ SN:chrY LN:15902555 +@PG ID:Bowtie VN:1.1.2 CL:"bowtie --wrapper basic-0 --threads 4 -v 2 -m 10 -k 1 /ifs/mirror/genomes/bowtie/mm9 /dev/fd/63 --sam" +@PG ID:samtools PN:samtools PP:Bowtie VN:1.19.2 CL:samtools view -h example.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.19.2 CL:samtools view -bS - +SRR2057595.5052066_ACCGGTTTA 16 chr1 3812794 255 51M * 0 0 * * XA:i:2 MD:Z:42T2T5 NM:i:2 +SRR2057595.13520751_CCAGGTTCT 16 chr1 3967622 255 20M * 0 0 * * XA:i:2 MD:Z:12A0C6 NM:i:2 +SRR2057595.8901432_AGCGGTTAC 0 chr1 4369756 255 20M * 0 0 * * XA:i:2 MD:Z:1T4A13 NM:i:2 +SRR2057595.1210348_ACTGGTTTC 0 chr1 4762503 255 45M * 0 0 * * XA:i:2 MD:Z:0C7A36 NM:i:2 +SRR2057595.1169423_TCTGGTTTC 0 chr1 4762503 255 45M * 0 0 * * XA:i:2 MD:Z:0C7A36 NM:i:2