Skip to content

Commit

Permalink
Fix CRAM embed_ref=2 with seqs overlapping ref end.
Browse files Browse the repository at this point in the history
If the sequences align off the end of the reference and we are
creating consensus on the fly, then the consensus generated also steps
beyond the reference length.  Although this longer reference is
embedded, it is trimmed back by the CRAM decoder which validates
against the declared reference length in SQ LN, leading to Ns
appearing in the decoder.

Therefore we now validate in the encoder too, which also needed
refs_from_header updates to parse the LN tag so the encoder can trim.

An example of failure could be seen in:

    ./test/test_view -C -o embed_ref=2 test/realn01.sam | \
    ./test/test_view - | grep ST-E00128:308:HHVVLALXX:8:1217:16001:6565
  • Loading branch information
jkbonfield committed Oct 7, 2024
1 parent 2ff207b commit 8c8a612
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
4 changes: 3 additions & 1 deletion cram/cram_encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1761,7 +1761,6 @@ static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) {
c->ref_start = ref_start+1;
c->ref_end = ref_end+1;
c->ref_free = 1;

return 0;

err:
Expand Down Expand Up @@ -1997,6 +1996,9 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
fd->no_ref_counter -= (fd->no_ref_counter > 0);
pthread_mutex_unlock(&fd->ref_lock);
}

if (c->ref_end > fd->refs->ref_id[c->ref_id]->length)
c->ref_end = fd->refs->ref_id[c->ref_id]->length;
}

// Iterate through records creating the cram blocks for some
Expand Down
9 changes: 5 additions & 4 deletions cram/cram_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2734,7 +2734,7 @@ int refs2id(refs_t *r, sam_hdr_t *hdr) {
* Returns 0 on success
* -1 on failure
*/
static int refs_from_header(cram_fd *fd) {
int refs_from_header(cram_fd *fd) {
if (!fd)
return -1;

Expand Down Expand Up @@ -2787,10 +2787,11 @@ static int refs_from_header(cram_fd *fd) {

/* Initialise likely filename if known */
if ((ty = sam_hrecs_find_type_id(h->hrecs, "SQ", "SN", h->hrecs->ref[i].name))) {
if ((tag = sam_hrecs_find_key(ty, "M5", NULL))) {
if ((tag = sam_hrecs_find_key(ty, "M5", NULL)))
r->ref_id[j]->fn = string_dup(r->pool, tag->str+3);
//fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn);
}

if ((tag = sam_hrecs_find_key(ty, "LN", NULL)))
r->ref_id[j]->length = strtol(tag->str+3, NULL, 0);
}

k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n);
Expand Down

0 comments on commit 8c8a612

Please sign in to comment.