From fb4c5601bc85e466ccd07d1982dd15da584a318a Mon Sep 17 00:00:00 2001 From: Souriya Trinh Date: Wed, 27 Sep 2023 23:55:40 +0200 Subject: [PATCH] Test: cnpy integration. --- modules/core/include/visp3/core/vpIoTools.h | 253 +++++++++++++++ modules/core/src/tools/file/vpIoTools.cpp | 329 ++++++++++++++++++++ 2 files changed, 582 insertions(+) diff --git a/modules/core/include/visp3/core/vpIoTools.h b/modules/core/include/visp3/core/vpIoTools.h index 6854533379..60796d1421 100644 --- a/modules/core/include/visp3/core/vpIoTools.h +++ b/modules/core/include/visp3/core/vpIoTools.h @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include /*! @@ -256,4 +258,255 @@ class VISP_EXPORT vpIoTools static int mkdir_p(const std::string &path, int mode); #endif }; + +// TODO: +// Copyright (C) 2011 Carl Rogers +// Released under MIT License +// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php +namespace visp +{ +namespace cnpy { + struct NpyArray { + NpyArray(const std::vector& _shape, size_t _word_size, bool _fortran_order) : + shape(_shape), word_size(_word_size), fortran_order(_fortran_order) + { + num_vals = 1; + for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i]; + data_holder = std::shared_ptr>( + new std::vector(num_vals * word_size)); + } + + NpyArray() : shape(0), word_size(0), fortran_order(0), num_vals(0) { } + + template + T* data() { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + const T* data() const { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + std::vector as_vec() const { + const T* p = data(); + return std::vector(p, p+num_vals); + } + + size_t num_bytes() const { + return data_holder->size(); + } + + std::shared_ptr> data_holder; + std::vector shape; + size_t word_size; + bool fortran_order; + size_t num_vals; + }; + + using npz_t = std::map; + + char BigEndianTest(); + char map_type(const std::type_info& t); + template std::vector create_npy_header(const std::vector& shape); + void parse_npy_header(FILE* fp,size_t& word_size, std::vector& shape, bool& fortran_order); + void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector& shape, bool& fortran_order); + void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset); + npz_t npz_load(std::string fname); + NpyArray npz_load(std::string fname, std::string varname); + NpyArray npy_load(std::string fname); + + template std::vector& operator+=(std::vector& lhs, const T rhs) { + //write in little endian + for(size_t byte = 0; byte < sizeof(T); byte++) { + char val = *((char*)&rhs+byte); + lhs.push_back(val); + } + return lhs; + } + + template<> std::vector& operator+=(std::vector& lhs, const std::string rhs); + template<> std::vector& operator+=(std::vector& lhs, const char* rhs); + + + template void npy_save(std::string fname, const T* data, const std::vector shape, std::string mode = "w") { + FILE* fp = NULL; + std::vector true_data_shape; //if appending, the shape of existing + new data + + if(mode == "a") fp = fopen(fname.c_str(),"r+b"); + + if(fp) { + //file exists. we need to append to it. read the header, modify the array size + size_t word_size; + bool fortran_order; + parse_npy_header(fp,word_size,true_data_shape,fortran_order); + assert(!fortran_order); + + if(word_size != sizeof(T)) { + std::cout<<"libnpy error: "< header = create_npy_header(true_data_shape); + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + + fseek(fp,0,SEEK_SET); + fwrite(&header[0],sizeof(char),header.size(),fp); + fseek(fp,0,SEEK_END); + fwrite(data,sizeof(T),nels,fp); + fclose(fp); + } + + template void npz_save(std::string zipname, std::string fname, const T* data, const std::vector& shape, std::string mode = "w") + { + //first, append a .npy to the fname + fname += ".npy"; + + //now, on with the show + FILE* fp = NULL; + uint16_t nrecs = 0; + size_t global_header_offset = 0; + std::vector global_header; + + if(mode == "a") fp = fopen(zipname.c_str(),"r+b"); + + if(fp) { + //zip file exists. we need to add a new npy file to it. + //first read the footer. this gives us the offset and size of the global header + //then read and store the global header. + //below, we will write the the new data at the start of the global header then append the global header and footer below it + size_t global_header_size; + parse_zip_footer(fp,nrecs,global_header_size,global_header_offset); + fseek(fp,global_header_offset,SEEK_SET); + global_header.resize(global_header_size); + size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp); + if(res != global_header_size){ + throw std::runtime_error("npz_save: header read error while adding to existing zip"); + } + fseek(fp,global_header_offset,SEEK_SET); + } + else { + fp = fopen(zipname.c_str(),"wb"); + } + + std::vector npy_header = create_npy_header(shape); + + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + size_t nbytes = nels*sizeof(T) + npy_header.size(); + + //get the CRC of the data to be added + uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); + crc = crc32(crc,(uint8_t*)data,nels*sizeof(T)); + + //build the local header + std::vector local_header; + local_header += "PK"; //first part of sig + local_header += (uint16_t) 0x0403; //second part of sig + local_header += (uint16_t) 20; //min version to extract + local_header += (uint16_t) 0; //general purpose bit flag + local_header += (uint16_t) 0; //compression method + local_header += (uint16_t) 0; //file last mod time + local_header += (uint16_t) 0; //file last mod date + local_header += (uint32_t) crc; //crc + local_header += (uint32_t) nbytes; //compressed size + local_header += (uint32_t) nbytes; //uncompressed size + local_header += (uint16_t) fname.size(); //fname length + local_header += (uint16_t) 0; //extra field length + local_header += fname; + + //build global header + global_header += "PK"; //first part of sig + global_header += (uint16_t) 0x0201; //second part of sig + global_header += (uint16_t) 20; //version made by + global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30); + global_header += (uint16_t) 0; //file comment length + global_header += (uint16_t) 0; //disk number where file starts + global_header += (uint16_t) 0; //internal file attributes + global_header += (uint32_t) 0; //external file attributes + global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin + global_header += fname; + + //build footer + std::vector footer; + footer += "PK"; //first part of sig + footer += (uint16_t) 0x0605; //second part of sig + footer += (uint16_t) 0; //number of this disk + footer += (uint16_t) 0; //disk where footer starts + footer += (uint16_t) (nrecs+1); //number of records on this disk + footer += (uint16_t) (nrecs+1); //total number of records + footer += (uint32_t) global_header.size(); //nbytes of global headers + footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array + footer += (uint16_t) 0; //zip file comment length + + //write everything + fwrite(&local_header[0],sizeof(char),local_header.size(),fp); + fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); + fwrite(data,sizeof(T),nels,fp); + fwrite(&global_header[0],sizeof(char),global_header.size(),fp); + fwrite(&footer[0],sizeof(char),footer.size(),fp); + fclose(fp); + } + + template void npy_save(std::string fname, const std::vector data, std::string mode = "w") { + std::vector shape; + shape.push_back(data.size()); + npy_save(fname, &data[0], shape, mode); + } + + template void npz_save(std::string zipname, std::string fname, const std::vector data, std::string mode = "w") { + std::vector shape; + shape.push_back(data.size()); + npz_save(zipname, fname, &data[0], shape, mode); + } + + template std::vector create_npy_header(const std::vector& shape) { + + std::vector dict; + dict += "{'descr': '"; + dict += BigEndianTest(); + dict += map_type(typeid(T)); + dict += std::to_string(sizeof(T)); + dict += "', 'fortran_order': False, 'shape': ("; + dict += std::to_string(shape[0]); + for(size_t i = 1;i < shape.size();i++) { + dict += ", "; + dict += std::to_string(shape[i]); + } + if(shape.size() == 1) dict += ","; + dict += "), }"; + //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(),remainder,' '); + dict.back() = '\n'; + + std::vector header; + header += (char) 0x93; + header += "NUMPY"; + header += (char) 0x01; //major version of numpy format + header += (char) 0x00; //minor version of numpy format + header += (uint16_t) dict.size(); + header.insert(header.end(),dict.begin(),dict.end()); + + return header; + } +} +} #endif diff --git a/modules/core/src/tools/file/vpIoTools.cpp b/modules/core/src/tools/file/vpIoTools.cpp index 11910f1817..58fb54aeab 100644 --- a/modules/core/src/tools/file/vpIoTools.cpp +++ b/modules/core/src/tools/file/vpIoTools.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -2276,3 +2277,331 @@ bool vpIoTools::parseBoolean(std::string input) Remove leading and trailing whitespaces from a string. */ std::string vpIoTools::trim(std::string s) { return ltrim(rtrim(s)); } + +// TODO: +// Copyright (C) 2011 Carl Rogers +// Released under MIT License +// license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php +namespace visp +{ +char cnpy::BigEndianTest() { + int x = 1; + return (((char *)&x)[0]) ? '<' : '>'; +} + +char cnpy::map_type(const std::type_info& t) +{ + if(t == typeid(float) ) return 'f'; + if(t == typeid(double) ) return 'f'; + if(t == typeid(long double) ) return 'f'; + + if(t == typeid(int) ) return 'i'; + if(t == typeid(char) ) return 'i'; + if(t == typeid(short) ) return 'i'; + if(t == typeid(long) ) return 'i'; + if(t == typeid(long long) ) return 'i'; + + if(t == typeid(unsigned char) ) return 'u'; + if(t == typeid(unsigned short) ) return 'u'; + if(t == typeid(unsigned long) ) return 'u'; + if(t == typeid(unsigned long long) ) return 'u'; + if(t == typeid(unsigned int) ) return 'u'; + + if(t == typeid(bool) ) return 'b'; + + if(t == typeid(std::complex) ) return 'c'; + if(t == typeid(std::complex) ) return 'c'; + if(t == typeid(std::complex) ) return 'c'; + + else return '?'; +} + +template<> std::vector& cnpy::operator+=(std::vector& lhs, const std::string rhs) { + lhs.insert(lhs.end(),rhs.begin(),rhs.end()); + return lhs; +} + +template<> std::vector& cnpy::operator+=(std::vector& lhs, const char* rhs) { + //write in little endian + size_t len = strlen(rhs); + lhs.reserve(len); + for(size_t byte = 0; byte < len; byte++) { + lhs.push_back(rhs[byte]); + } + return lhs; +} + +void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector& shape, bool& fortran_order) { + //std::string magic_string(buffer,6); + // uint8_t major_version = *reinterpret_cast(buffer+6); + // uint8_t minor_version = *reinterpret_cast(buffer+7); + uint16_t header_len = *reinterpret_cast(buffer+8); + std::string header(reinterpret_cast(buffer+9),header_len); + + //fortran order + size_t loc1 = header.find("fortran_order")+16; + fortran_order = (header.substr(loc1,4) == "True" ? true : false); + + //shape + loc1 = header.find("("); + size_t loc2 = header.find(")"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + while(std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + //endian, word size, data type + //byte order code | stands for not applicable. + //not sure when this applies except for byte array + loc1 = header.find("descr")+9; + bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + //char type = header[loc1+1]; + //assert(type == map_type(T)); + + std::string str_ws = header.substr(loc1+2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0,loc2).c_str()); +} + +void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& shape, bool& fortran_order) { + char buffer[256]; + size_t res = fread(buffer,sizeof(char),11,fp); + if(res != 11) + throw std::runtime_error("parse_npy_header: failed fread"); + std::string header = fgets(buffer,256,fp); + assert(header[header.size()-1] == '\n'); + + size_t loc1, loc2; + + //fortran order + loc1 = header.find("fortran_order"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'"); + loc1 += 16; + fortran_order = (header.substr(loc1,4) == "True" ? true : false); + + //shape + loc1 = header.find("("); + loc2 = header.find(")"); + if (loc1 == std::string::npos || loc2 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + while(std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + //endian, word size, data type + //byte order code | stands for not applicable. + //not sure when this applies except for byte array + loc1 = header.find("descr"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'"); + loc1 += 9; + bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + //char type = header[loc1+1]; + //assert(type == map_type(T)); + + std::string str_ws = header.substr(loc1+2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0,loc2).c_str()); +} + +void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset) +{ + std::vector footer(22); + fseek(fp,-22,SEEK_END); + size_t res = fread(&footer[0],sizeof(char),22,fp); + if(res != 22) + throw std::runtime_error("parse_zip_footer: failed fread"); + + uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; + disk_no = *(uint16_t*) &footer[4]; + disk_start = *(uint16_t*) &footer[6]; + nrecs_on_disk = *(uint16_t*) &footer[8]; + nrecs = *(uint16_t*) &footer[10]; + global_header_size = *(uint32_t*) &footer[12]; + global_header_offset = *(uint32_t*) &footer[16]; + comment_len = *(uint16_t*) &footer[20]; + + assert(disk_no == 0); + assert(disk_start == 0); + assert(nrecs_on_disk == nrecs); + assert(comment_len == 0); +} + +cnpy::NpyArray load_the_npy_file(FILE* fp) { + std::vector shape; + size_t word_size; + bool fortran_order; + cnpy::parse_npy_header(fp,word_size,shape,fortran_order); + + cnpy::NpyArray arr(shape, word_size, fortran_order); + size_t nread = fread(arr.data(),1,arr.num_bytes(),fp); + if(nread != arr.num_bytes()) + throw std::runtime_error("load_the_npy_file: failed fread"); + return arr; +} + +cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes) { + + std::vector buffer_compr(compr_bytes); + std::vector buffer_uncompr(uncompr_bytes); + size_t nread = fread(&buffer_compr[0],1,compr_bytes,fp); + if(nread != compr_bytes) + throw std::runtime_error("load_the_npy_file: failed fread"); + + z_stream d_stream; + + d_stream.zalloc = Z_NULL; + d_stream.zfree = Z_NULL; + d_stream.opaque = Z_NULL; + d_stream.avail_in = 0; + d_stream.next_in = Z_NULL; + int err = inflateInit2(&d_stream, -MAX_WBITS); + + d_stream.avail_in = compr_bytes; + d_stream.next_in = &buffer_compr[0]; + d_stream.avail_out = uncompr_bytes; + d_stream.next_out = &buffer_uncompr[0]; + + err = inflate(&d_stream, Z_FINISH); + err = inflateEnd(&d_stream); + + std::vector shape; + size_t word_size; + bool fortran_order; + cnpy::parse_npy_header(&buffer_uncompr[0],word_size,shape,fortran_order); + + cnpy::NpyArray array(shape, word_size, fortran_order); + + size_t offset = uncompr_bytes - array.num_bytes(); + memcpy(array.data(),&buffer_uncompr[0]+offset,array.num_bytes()); + + return array; +} + +cnpy::npz_t cnpy::npz_load(std::string fname) { + FILE* fp = fopen(fname.c_str(),"rb"); + + if(!fp) { + throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!"); + } + + cnpy::npz_t arrays; + + while(1) { + std::vector local_header(30); + size_t headerres = fread(&local_header[0],sizeof(char),30,fp); + if(headerres != 30) + throw std::runtime_error("npz_load: failed fread"); + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + uint16_t name_len = *(uint16_t*) &local_header[26]; + std::string varname(name_len,' '); + size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + + //erase the lagging .npy + varname.erase(varname.end()-4,varname.end()); + + //read in the extra field + uint16_t extra_field_len = *(uint16_t*) &local_header[28]; + if(extra_field_len > 0) { + std::vector buff(extra_field_len); + size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp); + if(efield_res != extra_field_len) + throw std::runtime_error("npz_load: failed fread"); + } + + uint16_t compr_method = *reinterpret_cast(&local_header[0]+8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0]+18); + uint32_t uncompr_bytes = *reinterpret_cast(&local_header[0]+22); + + if(compr_method == 0) {arrays[varname] = load_the_npy_file(fp);} + else {arrays[varname] = load_the_npz_array(fp,compr_bytes,uncompr_bytes);} + } + + fclose(fp); + return arrays; +} + +cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { + FILE* fp = fopen(fname.c_str(),"rb"); + + if(!fp) throw std::runtime_error("npz_load: Unable to open file "+fname); + + while(1) { + std::vector local_header(30); + size_t header_res = fread(&local_header[0],sizeof(char),30,fp); + if(header_res != 30) + throw std::runtime_error("npz_load: failed fread"); + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + uint16_t name_len = *(uint16_t*) &local_header[26]; + std::string vname(name_len,' '); + size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy + + //read in the extra field + uint16_t extra_field_len = *(uint16_t*) &local_header[28]; + fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field + + uint16_t compr_method = *reinterpret_cast(&local_header[0]+8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0]+18); + uint32_t uncompr_bytes = *reinterpret_cast(&local_header[0]+22); + + if(vname == varname) { + NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp,compr_bytes,uncompr_bytes); + fclose(fp); + return array; + } + else { + //skip past the data + uint32_t size = *(uint32_t*) &local_header[22]; + fseek(fp,size,SEEK_CUR); + } + } + + fclose(fp); + + //if we get here, we haven't found the variable in the file + throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname); +} + +cnpy::NpyArray cnpy::npy_load(std::string fname) { + + FILE* fp = fopen(fname.c_str(), "rb"); + + if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname); + + NpyArray arr = load_the_npy_file(fp); + + fclose(fp); + return arr; +} +}