diff --git a/cram/cram_encode.c b/cram/cram_encode.c index 94c2cebec..5a154f3cc 100644 --- a/cram/cram_encode.c +++ b/cram/cram_encode.c @@ -1858,6 +1858,12 @@ static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c, return rg; } +static inline int is_big_endian(){ + int x = 0x01; + char *c = (char*)&x; + return (c[0] != 0x01); +} + /* * Encodes auxiliary data. Largely duplicated from above, but done so to * keep it simple and avoid a myriad of version ifs. @@ -1949,10 +1955,21 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c, case 'B': { int type = aux[3], blen; - uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) + - (((unsigned char *)aux)[5]<< 8) + - (((unsigned char *)aux)[6]<<16) + - (((unsigned char *)aux)[7]<<24)); + uint32_t count; + if(is_big_endian()) + { + count = (uint32_t)((((unsigned char *)aux)[7]<< 0) + + (((unsigned char *)aux)[6]<< 8) + + (((unsigned char *)aux)[5]<<16) + + (((unsigned char *)aux)[4]<<24)); + } + else + { + count = (uint32_t)((((unsigned char *)aux)[4]<< 0) + + (((unsigned char *)aux)[5]<< 8) + + (((unsigned char *)aux)[6]<<16) + + (((unsigned char *)aux)[7]<<24)); + } // skip TN field aux+=3; diff --git a/cram/os.h b/cram/os.h index b2affe0b2..1dd253a59 100644 --- a/cram/os.h +++ b/cram/os.h @@ -88,8 +88,15 @@ extern "C" { * processor type too. */ -/* Set by autoconf */ -#define SP_LITTLE_ENDIAN +#if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN) + +# if __BYTE_ORDER == __BIG_ENDIAN +# define SP_BIG_ENDIAN +#elif __BYTE_ORDER == __LITTLE_ENDIAN +# define SP_LITTLE_ENDIAN +#endif + +#endif /* Mac FAT binaries or unknown. Auto detect based on CPU type */ #if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN) diff --git a/htslib/hts.h b/htslib/hts.h index dad085740..28303c118 100644 --- a/htslib/hts.h +++ b/htslib/hts.h @@ -4,6 +4,8 @@ #include #include +#include "cram/os.h" + #ifndef HTS_BGZF_TYPEDEF typedef struct BGZF BGZF; #define HTS_BGZF_TYPEDEF @@ -280,7 +282,17 @@ static inline uint16_t ed_swap_2(uint16_t v) } static inline void *ed_swap_2p(void *x) { +#ifdef ALLOW_UAC *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); +#else + uint8_t tmpData[2]; + uint16_t *ptmpData = (uint16_t*)&tmpData; + uint8_t *px = (uint8_t*)x; + int j; + for(j=0;j<2;j++) tmpData[j] = px[j]; + *ptmpData = ed_swap_2(*ptmpData); + for(j=0;j<2;j++) px[j] = tmpData[j]; +#endif return x; } static inline uint32_t ed_swap_4(uint32_t v) @@ -290,7 +302,17 @@ static inline uint32_t ed_swap_4(uint32_t v) } static inline void *ed_swap_4p(void *x) { +#ifdef ALLOW_UAC *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); +#else + uint8_t tmpData[4]; + uint32_t *ptmpData = (uint32_t*)&tmpData; + uint8_t *px = (uint8_t*)x; + int j; + for(j=0;j<4;j++) tmpData[j] = px[j]; + *ptmpData = ed_swap_4(*ptmpData); + for(j=0;j<4;j++) px[j] = tmpData[j]; +#endif return x; } static inline uint64_t ed_swap_8(uint64_t v) @@ -301,7 +323,17 @@ static inline uint64_t ed_swap_8(uint64_t v) } static inline void *ed_swap_8p(void *x) { +#ifdef ALLOW_UAC *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); +#else + uint8_t tmpData[8]; + uint64_t *ptmpData = (uint64_t*)&tmpData; + uint8_t *px = (uint8_t*)x; + int j; + for(j=0;j<8;j++) tmpData[j] = px[j]; + *ptmpData = ed_swap_8(*ptmpData); + for(j=0;j<8;j++) px[j] = tmpData[j]; +#endif return x; } diff --git a/sam.c b/sam.c index 5a64255ac..50042815c 100644 --- a/sam.c +++ b/sam.c @@ -7,6 +7,7 @@ #include "htslib/sam.h" #include "htslib/bgzf.h" #include "cram/cram.h" +#include "cram/os.h" #include "htslib/hfile.h" #include "htslib/khash.h" @@ -259,9 +260,14 @@ static inline int aux_type2size(uint8_t type) } } -static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data) +typedef enum swap_data_rw { + SWAP_DATA_READ, + SWAP_DATA_WRITE +}swap_data_rw_t; + +static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data, swap_data_rw_t rw_mode) { - uint8_t *s; + uint8_t *s, *s_tmp; uint32_t *cigar = (uint32_t*)(data + c->l_qname); uint32_t i, n; s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2; @@ -282,13 +288,22 @@ static void swap_data(const bam1_core_t *c, int l_data, uint8_t *data) break; case 'B': size = aux_type2size(*s); ++s; - ed_swap_4p(s); memcpy(&n, s, 4); s += 4; + if(SWAP_DATA_READ == rw_mode) + { + ed_swap_4p(s); + } + s_tmp = s; + memcpy(&n, s, 4); s += 4; switch (size) { case 1: s += n; break; case 2: for (i = 0; i < n; ++i, s += 2) ed_swap_2p(s); break; case 4: for (i = 0; i < n; ++i, s += 4) ed_swap_4p(s); break; case 8: for (i = 0; i < n; ++i, s += 8) ed_swap_8p(s); break; } + if(SWAP_DATA_WRITE == rw_mode) + { + ed_swap_4p(s_tmp); + } break; } } @@ -326,7 +341,7 @@ int bam_read1(BGZF *fp, bam1_t *b) } if (bgzf_read(fp, b->data, b->l_data) != b->l_data) return -4; //b->l_aux = b->l_data - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; - if (fp->is_be) swap_data(c, b->l_data, b->data); + if (fp->is_be) swap_data(c, b->l_data, b->data, SWAP_DATA_READ); return 4 + block_len; } @@ -348,13 +363,13 @@ int bam_write1(BGZF *fp, const bam1_t *b) for (i = 0; i < 8; ++i) ed_swap_4p(x + i); y = block_len; if (ok) ok = (bgzf_write(fp, ed_swap_4p(&y), 4) >= 0); - swap_data(c, b->l_data, b->data); + swap_data(c, b->l_data, b->data, SWAP_DATA_WRITE); } else { if (ok) ok = (bgzf_write(fp, &block_len, 4) >= 0); } if (ok) ok = (bgzf_write(fp, x, 32) >= 0); if (ok) ok = (bgzf_write(fp, b->data, b->l_data) >= 0); - if (fp->is_be) swap_data(c, b->l_data, b->data); + if (fp->is_be) swap_data(c, b->l_data, b->data, SWAP_DATA_WRITE); return ok? 4 + block_len : -1; } @@ -909,6 +924,10 @@ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) s = bam_get_aux(b); // aux while (s+4 <= b->data + b->l_data) { uint8_t type, key[2]; +#ifndef ALLOW_UAC + uint8_t tmpData[8]; + int j; +#endif key[0] = s[0]; key[1] = s[1]; s += 2; type = *s++; kputc('\t', str); kputsn((char*)key, 2, str); kputc(':', str); @@ -924,6 +943,7 @@ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) kputsn("i:", 2, str); kputw(*(int8_t*)s, str); ++s; +#ifdef ALLOW_UAC } else if (type == 'S') { if (s+2 <= b->data + b->l_data) { kputsn("i:", 2, str); @@ -959,6 +979,54 @@ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) ksprintf(str, "d:%g", *(double*)s); s += 8; } else return -1; +#else + } else if (type == 'S') { + if (s+2 <= b->data + b->l_data) { + uint16_t *ptmpData = (uint16_t*)tmpData; + for(j=0;j<2;j++) tmpData[j]=s[j]; + kputsn("i:", 2, str); + kputw(*ptmpData, str); + s += 2; + } else return -1; + } else if (type == 's') { + if (s+2 <= b->data + b->l_data) { + int16_t *ptmpData = (int16_t*)tmpData; + for(j=0;j<2;j++) tmpData[j]=s[j]; + kputsn("i:", 2, str); + kputw(*ptmpData, str); + s += 2; + } else return -1; + } else if (type == 'I') { + if (s+4 <= b->data + b->l_data) { + uint32_t *ptmpData = (uint32_t*)tmpData; + for(j=0;j<4;j++) tmpData[j]=s[j]; + kputsn("i:", 2, str); + kputuw(*ptmpData, str); + s += 4; + } else return -1; + } else if (type == 'i') { + if (s+4 <= b->data + b->l_data) { + int32_t *ptmpData = (int32_t*)tmpData; + for(j=0;j<4;j++) tmpData[j]=s[j]; + kputsn("i:", 2, str); + kputw(*ptmpData, str); + s += 4; + } else return -1; + } else if (type == 'f') { + if (s+4 <= b->data + b->l_data) { + float *ptmpData = (float*)tmpData; + for(j=0;j<4;j++) tmpData[j]=s[j]; + ksprintf(str, "f:%g", *ptmpData); + s += 4; + } else return -1; + } else if (type == 'd') { + if (s+8 <= b->data + b->l_data) { + double *ptmpData = (double*)tmpData; + for(j=0;j<8;j++) tmpData[j]=s[j]; + ksprintf(str, "d:%g", *ptmpData); + s += 8; + } else return -1; +#endif } else if (type == 'Z' || type == 'H') { kputc(type, str); kputc(':', str); while (s < b->data + b->l_data && *s) kputc(*s++, str); @@ -977,11 +1045,49 @@ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str) kputc(',', str); if ('c' == sub_type) { kputw(*(int8_t*)s, str); ++s; } else if ('C' == sub_type) { kputw(*(uint8_t*)s, str); ++s; } +#ifdef ALLOW_UAC else if ('s' == sub_type) { kputw(*(int16_t*)s, str); s += 2; } else if ('S' == sub_type) { kputw(*(uint16_t*)s, str); s += 2; } else if ('i' == sub_type) { kputw(*(int32_t*)s, str); s += 4; } else if ('I' == sub_type) { kputuw(*(uint32_t*)s, str); s += 4; } else if ('f' == sub_type) { ksprintf(str, "%g", *(float*)s); s += 4; } +#else + else if ('s' == sub_type) + { + int16_t *ptmpData = (int16_t*)tmpData; + for(j=0;j<2;j++)tmpData[j]=s[j]; + kputw(*ptmpData, str); + s += 2; + } + else if ('S' == sub_type) + { + uint16_t *ptmpData = (uint16_t*)tmpData; + for(j=0;j<2;j++)tmpData[j]=s[j]; + kputw(*ptmpData, str); + s += 2; + } + else if ('i' == sub_type) + { + int32_t *ptmpData = (int32_t*)tmpData; + for(j=0;j<4;j++)tmpData[j]=s[j]; + kputw(*ptmpData, str); + s += 4; + } + else if ('I' == sub_type) + { + uint32_t *ptmpData = (uint32_t*)tmpData; + for(j=0;j<4;j++)tmpData[j]=s[j]; + kputuw(*ptmpData, str); + s += 4; + } + else if ('f' == sub_type) + { + float *ptmpData = (float*)tmpData; + for(j=0;j<4;j++)tmpData[j]=s[j]; + ksprintf(str, "%g", *ptmpData); + s += 4; + } +#endif } } } @@ -1071,11 +1177,22 @@ int32_t bam_aux2i(const uint8_t *s) { int type; type = *s++; +#ifdef ALLOW_UAC if (type == 'c') return (int32_t)*(int8_t*)s; else if (type == 'C') return (int32_t)*(uint8_t*)s; else if (type == 's') return (int32_t)*(int16_t*)s; else if (type == 'S') return (int32_t)*(uint16_t*)s; else if (type == 'i' || type == 'I') return *(int32_t*)s; +#else + uint8_t tmpData[4]; + int j; + if (type == 'c') return (int32_t)*(int8_t*)s; + else if (type == 'C') return (int32_t)*(uint8_t*)s; + else if (type == 's'){ int16_t *ptmpData = (int16_t*)tmpData; for(j=0;j<2;j++)tmpData[j]=s[j]; return (int32_t)(*ptmpData);} + else if (type == 'S'){ uint16_t *ptmpData = (uint16_t*)tmpData; for(j=0;j<2;j++)tmpData[j]=s[j]; return (int32_t)(*ptmpData);} + else if (type == 'i'){ int32_t *ptmpData = (int32_t*)tmpData; for(j=0;j<4;j++)tmpData[j]=s[j]; return *ptmpData;} + else if (type == 'I'){ uint32_t *ptmpData = (uint32_t*)tmpData; for(j=0;j<4;j++)tmpData[j]=s[j]; return *ptmpData;} +#endif else return 0; } @@ -1083,8 +1200,15 @@ double bam_aux2f(const uint8_t *s) { int type; type = *s++; +#ifdef ALLOW_UAC if (type == 'd') return *(double*)s; else if (type == 'f') return *(float*)s; +#else + uint8_t tmpData[8]; + int j; + if (type == 'd'){ double *ptmpData = (double*)tmpData; for(j=0;jkey = bcf_dec_typed_int1(ptr, &ptr); info->len = bcf_dec_size(ptr, &ptr, &info->type); info->vptr = ptr; @@ -1773,9 +1778,15 @@ static inline uint8_t *bcf_unpack_info_core1(uint8_t *ptr, bcf_info_t *info) info->v1.i = 0; if (info->len == 1) { if (info->type == BCF_BT_INT8 || info->type == BCF_BT_CHAR) info->v1.i = *(int8_t*)ptr; +#ifdef ALLOW_UAC else if (info->type == BCF_BT_INT32) info->v1.i = *(int32_t*)ptr; else if (info->type == BCF_BT_FLOAT) info->v1.f = *(float*)ptr; else if (info->type == BCF_BT_INT16) info->v1.i = *(int16_t*)ptr; +#else + else if (info->type == BCF_BT_INT32) { int32_t *ptmpData = (int32_t*)&tmpData; for(j=0;j<4;j++) tmpData[j]=ptr[j]; info->v1.i = *ptmpData;} + else if (info->type == BCF_BT_FLOAT) { float *ptmpData = (float*)&tmpData; for(j=0;j<4;j++) tmpData[j]=ptr[j]; info->v1.f = *ptmpData;} + else if (info->type == BCF_BT_INT16) { int16_t *ptmpData = (int16_t*)&tmpData; for(j=0;j<2;j++) tmpData[j]=ptr[j]; info->v1.i = *ptmpData;} +#endif } ptr += info->len << bcf_type_shift[info->type]; info->vptr_len = ptr - info->vptr;