diff --git a/adapter_pipelines/Optimus/adapter.wdl b/adapter_pipelines/Optimus/adapter.wdl index 5cb984e9..87119d77 100644 --- a/adapter_pipelines/Optimus/adapter.wdl +++ b/adapter_pipelines/Optimus/adapter.wdl @@ -138,7 +138,7 @@ workflow AdapterOptimus { Boolean record_http = false Boolean add_md5s = false - String pipeline_tools_version = "v0.56.4" + String pipeline_tools_version = "v0.56.5" call GetInputs as prep { input: diff --git a/adapter_pipelines/cellranger/adapter.wdl b/adapter_pipelines/cellranger/adapter.wdl index 834a9f2d..ab350bb1 100644 --- a/adapter_pipelines/cellranger/adapter.wdl +++ b/adapter_pipelines/cellranger/adapter.wdl @@ -148,7 +148,7 @@ workflow Adapter10xCount { Boolean record_http = false Boolean add_md5s = false - String pipeline_tools_version = "v0.56.4" + String pipeline_tools_version = "v0.56.5" call GetInputs { input: diff --git a/adapter_pipelines/ss2_single_end/adapter.wdl b/adapter_pipelines/ss2_single_end/adapter.wdl index a9ace7ea..25b5e28b 100644 --- a/adapter_pipelines/ss2_single_end/adapter.wdl +++ b/adapter_pipelines/ss2_single_end/adapter.wdl @@ -69,7 +69,7 @@ workflow AdapterSmartSeq2SingleCellUnpaired { Boolean record_http = false Boolean add_md5s = false - String pipeline_tools_version = "v0.56.4" + String pipeline_tools_version = "v0.56.5" call GetInputs as prep { input: diff --git a/adapter_pipelines/ss2_single_sample/adapter.wdl b/adapter_pipelines/ss2_single_sample/adapter.wdl index db5a5b7e..aab41632 100644 --- a/adapter_pipelines/ss2_single_sample/adapter.wdl +++ b/adapter_pipelines/ss2_single_sample/adapter.wdl @@ -69,7 +69,7 @@ workflow AdapterSmartSeq2SingleCell{ Boolean record_http = false Boolean add_md5s = false - String pipeline_tools_version = "v0.56.4" + String pipeline_tools_version = "v0.56.5" call GetInputs as prep { input: diff --git a/pipeline_tools/shared/tenx_utils.py b/pipeline_tools/shared/tenx_utils.py index 2de21017..6ae5efd5 100644 --- a/pipeline_tools/shared/tenx_utils.py +++ b/pipeline_tools/shared/tenx_utils.py @@ -29,7 +29,15 @@ def create_fastq_dict(fastq_files): lane_to_fastqs = {} for file in fastq_files: lane = file.lane_index - if lane not in lane_to_fastqs: + if lane is None: + lane = 0 + if lane in lane_to_fastqs: + if file.read_index in lane_to_fastqs[lane]: + raise InsufficientLaneInfoError( + 'There are multiple sets of reads, but no lane index. ' + 'Cannot properly group reads for analysis.' + ) + else: lane_to_fastqs[lane] = {} lane_to_fastqs[lane][file.read_index] = file.manifest_entry @@ -132,3 +140,7 @@ def validate_lanes(lane_to_fastqs): class LaneMissingFileError(Exception): pass + + +class InsufficientLaneInfoError(Exception): + pass diff --git a/pipeline_tools/tests/shared/test_tenx_utils.py b/pipeline_tools/tests/shared/test_tenx_utils.py index 42dec936..ea9b6924 100644 --- a/pipeline_tools/tests/shared/test_tenx_utils.py +++ b/pipeline_tools/tests/shared/test_tenx_utils.py @@ -108,6 +108,49 @@ def invalid_files_missing_read2(): return files +@pytest.fixture +def missing_lane_index_one_set_of_reads(): + files = [ + BundleFile( + 'fastq.gz', None, 'read1', ManifestEntry('gs://somewhere/r1.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'read2', ManifestEntry('gs://somewhere/r2.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'index1', ManifestEntry('gs://somewhere/i1.fastq.gz') + ), + ] + r.shuffle(files) + return files + + +@pytest.fixture +def missing_lane_index_multiple_sets_of_reads(): + files = [ + BundleFile( + 'fastq.gz', None, 'read1', ManifestEntry('gs://somewhere/r1.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'read2', ManifestEntry('gs://somewhere/r2.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'index1', ManifestEntry('gs://somewhere/i1.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'read1', ManifestEntry('gs://somewhereelse/r1.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'read2', ManifestEntry('gs://somewhereelse/r2.fastq.gz') + ), + BundleFile( + 'fastq.gz', None, 'index1', ManifestEntry('gs://somewhereelse/i1.fastq.gz') + ), + ] + r.shuffle(files) + return files + + def test_create_fastq_dict( valid_files_with_index, valid_files_with_index_dict, @@ -151,6 +194,19 @@ def test_validate_lanes_accepts_lanes_when_none_indexed(valid_files_no_index): tenx_utils.validate_lanes(fastq_dict) +def test_create_fastq_dict_reindexes_with_zero_when( + missing_lane_index_one_set_of_reads +): + fastq_dict = tenx_utils.create_fastq_dict(missing_lane_index_one_set_of_reads) + assert 0 in fastq_dict + assert None not in fastq_dict + + +def test_create_fastq_dict_raises_error_when(missing_lane_index_multiple_sets_of_reads): + with pytest.raises(tenx_utils.InsufficientLaneInfoError): + tenx_utils.create_fastq_dict(missing_lane_index_multiple_sets_of_reads) + + def test_get_fastqs_for_read_index(valid_files_with_index): fastq_dict = tenx_utils.create_fastq_dict(valid_files_with_index) fastqs = tenx_utils.get_fastqs_for_read_index(fastq_dict, 'read1')