Skip to content

Commit

Permalink
initial commit, complete config file, add test data
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Aug 12, 2024
1 parent 1679c59 commit 924eeee
Show file tree
Hide file tree
Showing 8 changed files with 8,064 additions and 0 deletions.
101 changes: 101 additions & 0 deletions src/bbmap_bbsplit/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
namespace: "bbmap"
name: "bbmap_bbsplit"
description: |
Split sequencing reads by mapping them to multiple references simultaneously.
links:
homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/
documentation: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/
repository: https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh

license: BBTools Copyright (c) 2014

argument_groups:
- name: "Input"
arguments:
- name: "--id"
type: string
description: Sample ID
- name: "--paired"
type: boolean
default: false
description: Paired fastq files or not?
- name: "--input"
type: file
multiple: true
description: Input fastq files, either one or two (paired), separated by ";".
example: sample.fastq
- name: "--primary_ref"
type: file
description: Primary reference FASTA
- name: "--other_ref_names"
type: file
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit.
- name: "--only_build_index"
type: boolean
description: true = only build index; false = mapping

- name: "Output"
arguments:
- name: "--fastq_1"
type: file
description: |
Output file for read 1.
direction: output
example: read_1.fastq
- name: "--fastq_2"
type: file
description: |
Output file for read 2.
direction: output
example: read_2.fastq
- name: "--primary_fastq"
type: file
description: |
Output reads that map to the primary reference.
direction: output
example: primary.fastq.gz
- name: "--all_fastq"
type: file
description: |
Output reads that map to the primary reference.
direction: output
example: all.fastq.gz
- name: "--index"
type: file
description: |
Directory with index files.
direction: output
example: bbsplit
- name : "--stats"
type: file
description: |
Tab-delimited text file containing mapping statistics.
direction: output
example: stats.txt

resources:
- type: bash_script
path: script.sh

test_resources:
- type: bash_script
path: test.sh
- path: test_data

engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
run: |
apt-get update && \
apt-get install -y build-essential openjdk-17-jdk wget tar && \
wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \
tar xzf BBMap_39.01.tar.gz && \
cp -r bbmap/* /usr/local/bin
- type: docker
run: |
echo "${meta_functionality_name}: bbmap: $(bbversion.sh | grep -v "Duplicate cpuset")" > /var/software_versions.txt
runners:
- type: executable
- type: nextflow
62 changes: 62 additions & 0 deletions src/bbmap_bbsplit/script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

set -eo pipefail

function clean_up {
rm -rf "$tmpdir"
}
trap clean_up EXIT

if [ ! -d "$par_built_bbsplit_index" ]; then
other_refs=()
while IFS="," read -r name path
do
other_refs+=("ref_$name=$path")
done < "$par_bbsplit_fasta_list"
fi

if $par_only_build_index; then
if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
bbsplit.sh \
ref_primary="$par_primary_ref" "${other_refs[@]}" \
path=$par_bbsplit_index \
threads=${meta_cpus:-1}
else
echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files."
fi
else
IFS="," read -ra input <<< "$par_input"
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
index_files=''
if [ -d "$par_built_bbsplit_index" ]; then
index_files="path=$par_built_bbsplit_index"
elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
index_files="ref_primary=$par_primary_ref ${other_refs[@]}"
else
echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files."
fi
if $par_paired; then
bbsplit.sh \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
in2=${input[1]} \
basename=${tmpdir}/%_#.fastq \
refstats=bbsplit_stats.txt
read1=$(find $tmpdir/ -iname primary_1*)
read2=$(find $tmpdir/ -iname primary_2*)
cp $read1 $par_fastq_1
cp $read2 $par_fastq_2
else
bbsplit.sh \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
basename=${tmpdir}/%.fastq \
refstats=bbsplit_stats.txt
read1=$(find $tmpdir/ -iname primary*)
cp $read1 $par_fastq_1
fi
fi

exit 0
86 changes: 86 additions & 0 deletions src/bbmap_bbsplit/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash

echo ">>> Test $meta_functionality_name"

cat > bbsplit_fasta_list.txt << HERE
sarscov2,${meta_resources_dir}/test_data/sarscov2.fa
human,${meta_resources_dir}/test_data/human.fa
HERE

echo ">>> Building BBSplit index"
"${meta_executable}" \
--primary_ref "${meta_resources_dir}/test_data/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--only_build_index true \
--bbsplit_index "BBSplit_index"

echo ">>> Check whether output exists"
[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1
[ -z "$(ls -A 'BBSplit_index')" ] && echo "BBSplit index is empty!" && exit 1

echo ">>> Filtering ribosomal RNA reads"

echo ">>> Testing with single-end reads and primary/non-primary FASTA files"
"${meta_executable}" \
--paired false \
--input "${meta_resources_dir}/test_data/SRR6357070_1.fastq.gz" \
--only_build_index false \
--primary_ref "${meta_resources_dir}/test_data/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz"

echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1

rm filtered_SRR6357070_1.fastq.gz

echo ">>> Testing with paired-end reads and primary/non-primary FASTA files"
"${meta_executable}" \
--paired true \
--input "${meta_resources_dir}/test_data/SRR6357070_1.fastq.gz,${meta_resources_dir}/test_data/SRR6357070_2.fastq.gz" \
--only_build_index false \
--primary_ref "${meta_resources_dir}/test_data/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
--fastq_2 "filtered_SRR6357070_2.fastq.gz"

echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1

rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz

echo ">>> Testing with single-end reads and BBSplit index"
"${meta_executable}" \
--paired false \
--input "${meta_resources_dir}/test_data/SRR6357070_1.fastq.gz" \
--only_build_index false \
--built_bbsplit_index "BBSplit_index" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz"

echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1

echo ">>> Testing with paired-end reads and BBSplit index"
"${meta_executable}" \
--paired true \
--input "${meta_resources_dir}/test_data/SRR6357070_1.fastq.gz,${meta_resources_dir}/test_data/SRR6357070_2.fastq.gz" \
--only_build_index false \
--built_bbsplit_index "BBSplit_index" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
--fastq_2 "filtered_SRR6357070_2.fastq.gz"

echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1

rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz

echo "All tests succeeded!"
exit 0
Binary file added src/bbmap_bbsplit/test_data/SRR6357070_1.fastq.gz
Binary file not shown.
Binary file added src/bbmap_bbsplit/test_data/SRR6357070_2.fastq.gz
Binary file not shown.
Loading

0 comments on commit 924eeee

Please sign in to comment.