From 43754677b7a1f1b7e96bf7caa3ec2fb251589710 Mon Sep 17 00:00:00 2001 From: MikeG Date: Fri, 11 Oct 2024 11:14:58 +0200 Subject: [PATCH] Efficient swc build (#476) * handles `locale` correctly, so decimal numbers using comma are allowed * ~800 line SWC parse time goes from 0.976 to 0.699 * Doesn't create changes to bifurcations at the root nodes of SWC files * More strict Neuromorpho 3 point soma checks disallow repeated Sample IDs * Follow https://swc-specification.readthedocs.io/en/latest/swc.html more closely * Allow only positive integer Sample IDs * Allow for the type of neurite to change at bifurcation points * Allow Window style `End of Lines` * allow trailing spaces on sample line --- .github/workflows/coverage_test.yaml | 9 +- .github/workflows/run-tests.yml | 3 +- CHANGELOG.md | 30 + binds/python/bind_misc.cpp | 1 - binds/python/bind_warnings_exceptions.cpp | 2 +- binds/python/generated/docstrings.h | 10 +- include/morphio/warning_handling.h | 4 +- src/CMakeLists.txt | 1 + src/enums.cpp | 1 + src/error_message_generation.cpp | 22 +- src/error_message_generation.h | 18 +- src/mut/morphology.cpp | 9 +- src/mut/writer_asc.cpp | 6 +- src/mut/writer_hdf5.cpp | 7 +- src/mut/writer_swc.cpp | 7 +- src/mut/writer_utils.cpp | 9 + src/mut/writer_utils.h | 1 + src/properties.cpp | 22 +- src/readers/morphologyASC.cpp | 19 +- src/readers/morphologySWC.cpp | 745 ++++++++++++---------- src/readers/utils.cpp | 81 +++ src/readers/utils.h | 26 + src/shared_utils.cpp | 4 +- src/vasc/properties.cpp | 9 +- src/warning_handling.cpp | 6 +- tests/CMakeLists.txt | 3 +- tests/data/disconnected_neurite.swc | 2 +- tests/data/neurite_wrong_root_point.swc | 14 +- tests/data/simple-trailing-space.swc | 5 + tests/data/simple-windows-eol.swc | 33 + tests/test_11_collection.py | 2 +- tests/test_1_swc.py | 179 ++++-- tests/test_6_writers.py | 32 +- tests/test_9_surfaces.py | 33 + tests/test_enums.cpp | 37 ++ tests/test_immutable_morphology.cpp | 2 +- tests/test_morphology_readers.cpp | 29 + tests/test_mutable_morphology.cpp | 4 +- tests/test_swc_reader.cpp | 79 ++- 39 files changed, 1009 insertions(+), 497 deletions(-) create mode 100644 src/readers/utils.cpp create mode 100644 src/readers/utils.h create mode 100644 tests/data/simple-trailing-space.swc create mode 100644 tests/data/simple-windows-eol.swc create mode 100644 tests/test_enums.cpp diff --git a/.github/workflows/coverage_test.yaml b/.github/workflows/coverage_test.yaml index 24ee8b4c6..2583f0f25 100644 --- a/.github/workflows/coverage_test.yaml +++ b/.github/workflows/coverage_test.yaml @@ -19,14 +19,19 @@ jobs: uses: actions/checkout@v4 - name: Fetch repository run: git fetch --prune --unshallow + - name: Get submodules run: git submodule update --init --force --recursive + - name: Install packages run: | sudo apt-get ${{env.apt_options}} update -y - sudo apt-get ${{env.apt_options}} install -y build-essential libhdf5-dev lcov + sudo apt-get ${{env.apt_options}} install -y build-essential libhdf5-dev lcov language-pack-de - name: Build and run unittests - run: ci/coverage_test.sh + run: | + locale -a + ci/coverage_test.sh + - name: Upload Coverage to Coveralls uses: coverallsapp/github-action@master with: diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 4930be2f3..91ad91cb8 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -37,7 +37,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get ${{env.apt_options}} update -y - sudo apt-get ${{env.apt_options}} install -y build-essential libhdf5-dev + sudo apt-get ${{env.apt_options}} install -y build-essential libhdf5-dev language-pack-de - name: Install packages on MacOS if: runner.os == 'macOS' @@ -55,6 +55,7 @@ jobs: CC: ${{ matrix.compiler.CC }} CXX: ${{ matrix.compiler.CXX }} run: | + locale -a ./ci/python_test.sh ./ci/cpp_test.sh ./ci/cpp_test.sh "-DMORPHIO_USE_DOUBLE=ON" diff --git a/CHANGELOG.md b/CHANGELOG.md index ea507c4f2..03161d265 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,33 @@ +v3.4.0 +====== +Improvements: +* New SWC parser: + * handles `locale` correctly, so decimal numbers using comma are allowed + * ~800 line SWC parse time goes from 0.976 to 0.699 + * Doesn't create changes to bifurcations at the root nodes of SWC files + * More strict Neuromorpho 3 point soma checks + * disallow repeated Sample IDs + * Follow https://swc-specification.readthedocs.io/en/latest/swc.html more closely + * Allow only positive integer Sample IDs + * Allow for the type of neurite to change at bifurcation points + * Allow Window style `End of Lines` + * allow trailing spaces on sample lines + +* Since this is a large change to the SWC parser, the minor release was increased + +v3.3.10 +======= +* This is primarily so that 3.13 wheels exist before the new SWC parser is put into production. +* try to work around removal of older MSVC tools +* Remove Errorlevel::Info; it's not an error if it's only informative +* Update upload/download actions to v4. +* update HDF5 url +* Update HighFive to 2.10.0 + +v3.3.9 +====== +* remove embarrassing debug print; Hat-tip SarahM + v3.3.8 ====== * Make ThreePointSoma use relative tolerance (494) diff --git a/binds/python/bind_misc.cpp b/binds/python/bind_misc.cpp index d442917fb..c273ba867 100644 --- a/binds/python/bind_misc.cpp +++ b/binds/python/bind_misc.cpp @@ -283,5 +283,4 @@ Note: This API is 'experimental', meaning it might change in the future. // Bind the lifetime of the `morphio::LoadUnordered` (1) to the // lifetime of the returned iterator (0). py::keep_alive<0, 1>()); - } diff --git a/binds/python/bind_warnings_exceptions.cpp b/binds/python/bind_warnings_exceptions.cpp index b0d91f6b5..ff220b42c 100644 --- a/binds/python/bind_warnings_exceptions.cpp +++ b/binds/python/bind_warnings_exceptions.cpp @@ -104,7 +104,7 @@ void bind_warnings_exceptions(py::module& m) { (void) C(WriteUndefinedSoma); (void) C(MitochondriaWriteNotSupported); (void) C(SomaNonContour); - (void) C(SomaNonCynlinderOrPoint); + (void) C(SomaNonCylinderOrPoint); #undef QUOTE #undef C } diff --git a/binds/python/generated/docstrings.h b/binds/python/generated/docstrings.h index bb036519b..a00e0c197 100644 --- a/binds/python/generated/docstrings.h +++ b/binds/python/generated/docstrings.h @@ -862,15 +862,15 @@ static const char *mkd_doc_morphio_SomaNonContour_msg = R"doc()doc"; static const char *mkd_doc_morphio_SomaNonContour_warning = R"doc()doc"; -static const char *mkd_doc_morphio_SomaNonCynlinderOrPoint = R"doc()doc"; +static const char *mkd_doc_morphio_SomaNonCylinderOrPoint = R"doc()doc"; -static const char *mkd_doc_morphio_SomaNonCynlinderOrPoint_SomaNonCynlinderOrPoint = R"doc()doc"; +static const char *mkd_doc_morphio_SomaNonCylinderOrPoint_SomaNonCylinderOrPoint = R"doc()doc"; -static const char *mkd_doc_morphio_SomaNonCynlinderOrPoint_errorLevel = R"doc()doc"; +static const char *mkd_doc_morphio_SomaNonCylinderOrPoint_errorLevel = R"doc()doc"; -static const char *mkd_doc_morphio_SomaNonCynlinderOrPoint_msg = R"doc()doc"; +static const char *mkd_doc_morphio_SomaNonCylinderOrPoint_msg = R"doc()doc"; -static const char *mkd_doc_morphio_SomaNonCynlinderOrPoint_warning = R"doc()doc"; +static const char *mkd_doc_morphio_SomaNonCylinderOrPoint_warning = R"doc()doc"; static const char *mkd_doc_morphio_Soma_Soma = R"doc()doc"; diff --git a/include/morphio/warning_handling.h b/include/morphio/warning_handling.h index e8bdbcd39..e0650b0cb 100644 --- a/include/morphio/warning_handling.h +++ b/include/morphio/warning_handling.h @@ -243,8 +243,8 @@ struct SomaNonContour: public WarningMessage { } }; -struct SomaNonCynlinderOrPoint: public WarningMessage { - SomaNonCynlinderOrPoint() +struct SomaNonCylinderOrPoint: public WarningMessage { + SomaNonCylinderOrPoint() : WarningMessage(std::string()) {} Warning warning() const final { return Warning::SOMA_NON_CYLINDER_OR_POINT; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5bfa8751d..8f2e1d40d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,6 +28,7 @@ set(MORPHIO_SOURCES readers/morphologyASC.cpp readers/morphologyHDF5.cpp readers/morphologySWC.cpp + readers/utils.cpp readers/vasculatureHDF5.cpp section.cpp shared_utils.cpp diff --git a/src/enums.cpp b/src/enums.cpp index 6f52aada4..f55cd54b7 100644 --- a/src/enums.cpp +++ b/src/enums.cpp @@ -8,6 +8,7 @@ namespace morphio { namespace enums { +/** Output stream formatter for SomaType */ std::ostream& operator<<(std::ostream& os, const SomaType v) { switch (v) { case SOMA_SINGLE_POINT: diff --git a/src/error_message_generation.cpp b/src/error_message_generation.cpp index a005896d5..972054b57 100644 --- a/src/error_message_generation.cpp +++ b/src/error_message_generation.cpp @@ -13,7 +13,7 @@ std::string errorMsg(const std::string& uri, const std::string& msg) { return "\n" + (uri.empty() ? "" : errorLink(uri, lineNumber, errorLevel) + "\n") + msg; } -} // namespace +} // namespace // LCOV_EXCL_START { all the error messages are excluded from coverage @@ -82,8 +82,14 @@ std::string ErrorMessages::ERROR_REPEATED_ID(const unsigned int originalId, newLineNumber, ErrorLevel::WARNING, "Repeated ID: " + std::to_string(originalId)) + - "\nID already appears here: \n" + - errorLink(_uri, originalLineNumber, ErrorLevel::WARNING); + "\nID already appears here: \n" + errorLink(_uri, originalLineNumber, ErrorLevel::WARNING); +} + +std::string ErrorMessages::EARLY_END_OF_FILE(long unsigned int lineNumber) const { + return errorMsg(_uri, + lineNumber, + ErrorLevel::ERROR, + "The end of the file was reached before parsing finshed"); } std::string ErrorMessages::ERROR_SELF_PARENT(const unsigned int lineNumber) const { @@ -100,6 +106,13 @@ std::string ErrorMessages::ERROR_MISSING_MITO_PARENT(int mitoParentId) const { std::to_string(mitoParentId) + " does not exist."; } +std::string ErrorMessages::ERROR_NEGATIVE_ID(long unsigned int lineNumber) const { + return errorMsg(_uri, + lineNumber, + ErrorLevel::WARNING, + "The ID assigned to this line is negative"); +} + //////////////////////////////////////////////////////////////////////////////// // NEUROLUCIDA //////////////////////////////////////////////////////////////////////////////// @@ -157,6 +170,9 @@ std::string ErrorMessages::ERROR_UNCOMPATIBLE_FLAGS(morphio::Option flag1, //////////////////////////////////////////////////////////////////////////////// // WRITERS //////////////////////////////////////////////////////////////////////////////// +std::string ErrorMessages::ERROR_EMPTY_MORPHOLOGY() const { + return errorMsg(_uri, 0, ErrorLevel::ERROR, "Morphology is empty."); +} std::string ErrorMessages::ERROR_UNSUPPORTED_SECTION_TYPE(const SectionType& type) const { return ("Attempted to write unsupported section type: " + std::to_string(type) + diff --git a/src/error_message_generation.h b/src/error_message_generation.h index 6779528d7..ca0645c20 100644 --- a/src/error_message_generation.h +++ b/src/error_message_generation.h @@ -52,9 +52,7 @@ class ErrorMessages std::string ERROR_MULTIPLE_SOMATA(const std::vector& lineNumbers) const; /** Missing section parent error message */ - std::string ERROR_MISSING_PARENT(unsigned int id, - int parentId, - unsigned int lineNumber) const; + std::string ERROR_MISSING_PARENT(unsigned int id, int parentId, unsigned int lineNumber) const; /** Bifurcating soma error message */ std::string ERROR_SOMA_BIFURCATION(unsigned int sampleLineNumber, @@ -71,12 +69,21 @@ class ErrorMessages /** Section self parent error message */ std::string ERROR_SELF_PARENT(unsigned int lineNumber) const; + /** The end of the file was reached before parsing finshed */ + std::string EARLY_END_OF_FILE(long unsigned int lineNumber) const; + /** Undefined soma error message */ std::string ERROR_NOT_IMPLEMENTED_UNDEFINED_SOMA(const std::string&) const; /** Missing mitochondria parent section error message */ std::string ERROR_MISSING_MITO_PARENT(int mitoParentId) const; + //////////////////////////////////////////////////////////////////////////////// + // SWC + //////////////////////////////////////////////////////////////////////////////// + /** A negative ID is used in SWC */ + std::string ERROR_NEGATIVE_ID(long unsigned int lineNumber) const; + //////////////////////////////////////////////////////////////////////////////// // NEUROLUCIDA //////////////////////////////////////////////////////////////////////////////// @@ -111,6 +118,9 @@ class ErrorMessages // WRITERS //////////////////////////////////////////////////////////////////////////////// + /** Morphology is empty */ + std::string ERROR_EMPTY_MORPHOLOGY() const; + /** Unsupported morphology section type error message */ std::string ERROR_UNSUPPORTED_SECTION_TYPE(const enums::SectionType& type) const; @@ -125,6 +135,7 @@ class ErrorMessages /** Can't write perimeter data to SWC, ASC error message */ std::string ERROR_PERIMETER_DATA_NOT_WRITABLE(); + /** Single section child SWC error message */ std::string ERROR_ONLY_CHILD_SWC_WRITER(unsigned int parentId) const; @@ -136,6 +147,7 @@ class ErrorMessages /** Contour soma must have at least 3 points. */ std::string ERROR_SOMA_INVALID_CONTOUR() const; + private: std::string _uri; }; diff --git a/src/mut/morphology.cpp b/src/mut/morphology.cpp index 7b60b5304..0e0dcd4ff 100644 --- a/src/mut/morphology.cpp +++ b/src/mut/morphology.cpp @@ -345,8 +345,7 @@ breadth_iterator Morphology::breadth_end() const { void Morphology::applyModifiers(unsigned int modifierFlags) { if (modifierFlags & NO_DUPLICATES & TWO_POINTS_SECTIONS) { const auto err = details::ErrorMessages(_uri); - throw SectionBuilderError( - err.ERROR_UNCOMPATIBLE_FLAGS(NO_DUPLICATES, TWO_POINTS_SECTIONS)); + throw SectionBuilderError(err.ERROR_UNCOMPATIBLE_FLAGS(NO_DUPLICATES, TWO_POINTS_SECTIONS)); } if (modifierFlags & SOMA_SPHERE) { @@ -389,12 +388,6 @@ std::unordered_map> Morphology::connectivity() { } void Morphology::write(const std::string& filename) const { - for (const auto& root : rootSections()) { - if (root->points().size() < 2) { - throw morphio::SectionBuilderError("Root sections must have at least 2 points"); - } - } - const size_t pos = filename.find_last_of('.'); if (pos == std::string::npos) { throw UnknownFileType("Missing file extension."); diff --git a/src/mut/writer_asc.cpp b/src/mut/writer_asc.cpp index 00e4551c6..a0c4a497a 100644 --- a/src/mut/writer_asc.cpp +++ b/src/mut/writer_asc.cpp @@ -54,13 +54,14 @@ void asc(const Morphology& morph, const std::string& filename, std::shared_ptr handler) { if (details::emptyMorphology(morph, handler)) { - return; + throw morphio::WriterError(morphio::details::ErrorMessages().ERROR_EMPTY_MORPHOLOGY()); } details::validateContourSoma(morph, handler); details::checkSomaHasSameNumberPointsDiameters(*morph.soma()); details::validateHasNoMitochondria(morph, handler); details::validateHasNoPerimeterData(morph); + details::validateRootPointsHaveTwoOrMorePoints(morph); std::ofstream myfile(filename); @@ -80,7 +81,8 @@ void asc(const Morphology& morph, } else if (type == SECTION_APICAL_DENDRITE) { myfile << "( (Color Red)\n (Apical)\n"; } else { - throw WriterError(morphio::details::ErrorMessages().ERROR_UNSUPPORTED_SECTION_TYPE(type)); + throw WriterError( + morphio::details::ErrorMessages().ERROR_UNSUPPORTED_SECTION_TYPE(type)); } write_asc_section(myfile, section, 2); myfile << ")\n\n"; diff --git a/src/mut/writer_hdf5.cpp b/src/mut/writer_hdf5.cpp index 9e561097e..47557c84e 100644 --- a/src/mut/writer_hdf5.cpp +++ b/src/mut/writer_hdf5.cpp @@ -13,8 +13,9 @@ #include #include -#include "../shared_utils.hpp" #include "../error_message_generation.h" +#include "../shared_utils.hpp" +#include "morphio/exceptions.h" #include "writer_utils.h" namespace { @@ -151,11 +152,12 @@ void h5(const Morphology& morph, const std::string& filename, std::shared_ptr handler) { if (details::emptyMorphology(morph, handler)) { - return; + throw morphio::WriterError(morphio::details::ErrorMessages().ERROR_EMPTY_MORPHOLOGY()); } details::validateContourSoma(morph, handler); details::checkSomaHasSameNumberPointsDiameters(*morph.soma()); + details::validateRootPointsHaveTwoOrMorePoints(morph); HighFive::File h5_file(filename, HighFive::File::ReadWrite | HighFive::File::Create | @@ -169,7 +171,6 @@ void h5(const Morphology& morph, const std::vector& somaPoints = morph.soma()->points(); const auto& somaDiameters = morph.soma()->diameters(); - for (unsigned int i = 0; i < somaPoints.size(); ++i) { raw_points.push_back( {somaPoints[i][0], somaPoints[i][1], somaPoints[i][2], somaDiameters[i]}); diff --git a/src/mut/writer_swc.cpp b/src/mut/writer_swc.cpp index d62b52c39..4373c5a1c 100644 --- a/src/mut/writer_swc.cpp +++ b/src/mut/writer_swc.cpp @@ -20,8 +20,8 @@ #include #include -#include "../shared_utils.hpp" #include "../error_message_generation.h" +#include "../shared_utils.hpp" #include "writer_utils.h" namespace { @@ -122,7 +122,7 @@ void validateSWCSoma(const morphio::mut::Morphology& morph, } else if (!(soma->type() == SomaType::SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS || soma->type() == SomaType::SOMA_CYLINDERS || soma->type() == SomaType::SOMA_SINGLE_POINT)) { - handler->emit(std::make_shared()); + handler->emit(std::make_shared()); } else if (soma->type() == SomaType::SOMA_SINGLE_POINT && soma_points.size() != 1) { throw WriterError(ErrorMessages().ERROR_SOMA_INVALID_SINGLE_POINT()); } else if (soma->type() == SomaType::SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS && @@ -142,7 +142,8 @@ void swc(const Morphology& morph, const std::string& filename, std::shared_ptr handler) { if (details::emptyMorphology(morph, handler)) { - return; + throw morphio::WriterError( + morphio::details::ErrorMessages(filename).ERROR_EMPTY_MORPHOLOGY()); } const std::shared_ptr& soma = morph.soma(); diff --git a/src/mut/writer_utils.cpp b/src/mut/writer_utils.cpp index 06282b5b1..e3fc3960d 100644 --- a/src/mut/writer_utils.cpp +++ b/src/mut/writer_utils.cpp @@ -71,6 +71,15 @@ void validateHasNoMitochondria(const morphio::mut::Morphology& morph, } } +void validateRootPointsHaveTwoOrMorePoints(const morphio::mut::Morphology& morph) { + for (const auto& root : morph.rootSections()) { + if (root->points().size() < 2) { + throw morphio::RawDataError("Root sections must have at least 2 points"); + } + } + +} + } // namespace details } // namespace writer } // namespace mut diff --git a/src/mut/writer_utils.h b/src/mut/writer_utils.h index eb475b39d..dca3e2d43 100644 --- a/src/mut/writer_utils.h +++ b/src/mut/writer_utils.h @@ -22,6 +22,7 @@ void validateContourSoma(const morphio::mut::Morphology&, void validateHasNoPerimeterData(const morphio::mut::Morphology&); void validateHasNoMitochondria(const morphio::mut::Morphology&, std::shared_ptr handler); +void validateRootPointsHaveTwoOrMorePoints(const morphio::mut::Morphology& morph); } // namespace details } // namespace writer diff --git a/src/properties.cpp b/src/properties.cpp index 4596cbada..2e0e755a7 100644 --- a/src/properties.cpp +++ b/src/properties.cpp @@ -74,10 +74,9 @@ PointLevel& PointLevel::operator=(const PointLevel& other) { } bool SectionLevel::diff(const SectionLevel& other) const { - return !(this == &other || - (compare_section_structure(_sections, other._sections) && - morphio::property::compare(_sectionTypes, other._sectionTypes) && - morphio::property::compare(_children, other._children))); + return !(this == &other || (compare_section_structure(_sections, other._sections) && + morphio::property::compare(_sectionTypes, other._sectionTypes) && + morphio::property::compare(_children, other._children))); } bool SectionLevel::operator==(const SectionLevel& other) const { @@ -136,9 +135,8 @@ MitochondriaPointLevel::MitochondriaPointLevel( } bool MitochondriaSectionLevel::diff(const MitochondriaSectionLevel& other) const { - return !(this == &other || - (compare_section_structure(this->_sections, other._sections) && - morphio::property::compare(this->_children, other._children))); + return !(this == &other || (compare_section_structure(this->_sections, other._sections) && + morphio::property::compare(this->_children, other._children))); } bool MitochondriaSectionLevel::operator==(const MitochondriaSectionLevel& other) const { @@ -151,13 +149,9 @@ bool MitochondriaSectionLevel::operator!=(const MitochondriaSectionLevel& other) bool MitochondriaPointLevel::diff(const MitochondriaPointLevel& other) const { return !(this == &other || - (morphio::property::compare( - this->_sectionIds, other._sectionIds) && - morphio::property::compare(this->_relativePathLengths, - other._relativePathLengths) - && - morphio::property::compare( - this->_diameters, other._diameters))); + (morphio::property::compare(this->_sectionIds, other._sectionIds) && + morphio::property::compare(this->_relativePathLengths, other._relativePathLengths) && + morphio::property::compare(this->_diameters, other._diameters))); } bool MitochondriaPointLevel::operator==(const MitochondriaPointLevel& other) const { diff --git a/src/readers/morphologyASC.cpp b/src/readers/morphologyASC.cpp index be0680cbf..b64024f92 100644 --- a/src/readers/morphologyASC.cpp +++ b/src/readers/morphologyASC.cpp @@ -4,6 +4,7 @@ */ #include "morphologyASC.h" +#include "utils.h" #include #include @@ -22,12 +23,9 @@ namespace { Contain header info about the root S-exps **/ struct Header { - Header() - : token(static_cast(+Token::STRING)) - , parent_id(-1) {} - Token token; + Token token = static_cast(+Token::STRING); std::string label; - int32_t parent_id; + int32_t parent_id = -1; }; bool is_eof(Token type) { @@ -84,15 +82,14 @@ class NeurolucidaParser std::tuple parse_point(NeurolucidaLexer& lex, bool is_marker) { lex.expect(Token::LPAREN, "Point should start in LPAREN"); std::array point{}; // X,Y,Z,D + const auto& stn = morphio::getStringToNumber(); + for (unsigned int i = 0; i < 4; i++) { + const std::string s = lex.consume()->str(); try { -#ifdef MORPHIO_USE_DOUBLE - point[i] = std::stod(lex.consume()->str()); -#else - point[i] = std::stof(lex.consume()->str()); -#endif + point[i] = std::get<0>(stn.toFloat(s, 0)); } catch (const std::invalid_argument&) { - throw RawDataError(err_.ERROR_PARSING_POINT(lex.line_num(), lex.current()->str())); + throw RawDataError(err_.ERROR_PARSING_POINT(lex.line_num(), s)); } // Markers can have an s-exp (X Y Z) without diameter diff --git a/src/readers/morphologySWC.cpp b/src/readers/morphologySWC.cpp index f5e1ebe20..8d7a7fdb3 100644 --- a/src/readers/morphologySWC.cpp +++ b/src/readers/morphologySWC.cpp @@ -5,17 +5,14 @@ #include "morphologySWC.h" -#include // std::transform +#include // isdigit #include // uint32_t -#include // std::back_inserter #include // std::shared_ptr -#include // std::stringstream #include // std::string #include // std::unordered_map #include #include // std::vector -#include #include #include #include @@ -24,27 +21,139 @@ #include "../error_message_generation.h" #include "../shared_utils.hpp" +#include "utils.h" -namespace { -// It's not clear if -1 is the only way of identifying the root section. +namespace morphio { +namespace details { + +// It's not clear if -1 is the only way of identifying a root section. const int SWC_UNDEFINED_PARENT = -1; const unsigned int SWC_ROOT = 0xFFFFFFFD; +/* simple stream parser for SWC file format which is a line oriented format + * + * This parser advances across comments and blank lines, and allows the caller + * to get integers and floats + */ +class SWCTokenizer +{ +public: + explicit SWCTokenizer(std::string contents, std::string path) + : contents_(std::move(contents)) + , path_(std::move(path)) { + // ensure null termination + (void) contents_.c_str(); + } + + bool done() const noexcept { + return pos_ >= contents_.size(); + } + + size_t lineNumber() const noexcept { + return line_; + } + + void skip_to(char value) { + std::size_t pos = contents_.find_first_of(value, pos_); + if (pos == std::string::npos) { + pos_ = contents_.size(); + } + pos_ = pos; + } + + void advance_to_non_whitespace() { + if (done()) { + return; + } + std::size_t pos = contents_.find_first_not_of(" \t\r", pos_); + if (pos == std::string::npos) { + pos_ = contents_.size(); + return; + } + pos_ = pos; + } + + void advance_to_number() { + advance_to_non_whitespace(); + + if (done()) { + details::ErrorMessages err(path_); + throw RawDataError(err.EARLY_END_OF_FILE(line_)); + } + + auto c = contents_.at(pos_); + if (std::isdigit(c) != 0 || c == '-' || c == '+' || c == '.') { + return; + } + + details::ErrorMessages err(path_); + throw RawDataError(err.ERROR_LINE_NON_PARSABLE(line_)); + } + + int64_t read_int() { + advance_to_number(); + try { + auto parsed = stn_.toInt(contents_, pos_); + pos_ = std::get<1>(parsed); + return std::get<0>(parsed); + } catch(std::invalid_argument&e) { + throw RawDataError(e.what()); + } + } + + floatType read_float() { + advance_to_number(); + auto parsed = stn_.toFloat(contents_, pos_); + pos_ = std::get<1>(parsed); + return std::get<0>(parsed); + } + + void skip_blank_lines_and_comments() { + advance_to_non_whitespace(); + + while (!done() && (contents_.at(pos_) == '#' || contents_.at(pos_) == '\n')) { + if (contents_.at(pos_) == '#') { + skip_to('\n'); + } + + if (!done() && contents_.at(pos_) == '\n') { + ++line_; + ++pos_; + } + advance_to_non_whitespace(); + } + } + + void finish_line() { + skip_to('\n'); + if (!done() && contents_.at(pos_) == '\n') { + ++line_; + ++pos_; + } + } + + +private: + size_t pos_ = 0; + size_t line_ = 1; + std::string contents_; + StringToNumber stn_; + std::string path_; +}; + struct SWCSample { enum : unsigned int { UNKNOWN_ID = 0xFFFFFFFE }; SWCSample() = default; // XXX - morphio::floatType diameter = -1.; - bool valid = false; - morphio::Point point{}; - morphio::SectionType type = morphio::SECTION_UNDEFINED; + floatType diameter = -1.; + Point point{}; + SectionType type = SECTION_UNDEFINED; unsigned int parentId = UNKNOWN_ID; unsigned int id = UNKNOWN_ID; unsigned int lineNumber = 0; - }; -std::vector gatherLineNumbers(const std::vector& samples) { +static std::vector gatherLineNumbers(const std::vector& samples) { std::vector lineNumbers; std::transform(samples.begin(), samples.cend(), @@ -53,397 +162,375 @@ std::vector gatherLineNumbers(const std::vector& sample return lineNumbers; } -bool _ignoreLine(const std::string& line) { - std::size_t pos = line.find_first_not_of("\n\r\t "); - return pos == std::string::npos || line[pos] == '#'; -} - -SWCSample readSample(const char* line, unsigned int lineNumber_) { -#ifdef MORPHIO_USE_DOUBLE - const char* const format = "%d%d%lg%lg%lg%lg%d"; -#else - const char* const format = "%d%d%f%f%f%f%d"; -#endif - - morphio::floatType radius = -1.; - int int_type = -1; - int parentId = -1; - int id = -1; +static std::vector readSamples(const std::string& contents, const std::string& path) { + std::vector samples; SWCSample sample; - sample.valid = sscanf(line, - format, - &id, - &int_type, - &sample.point[0], - &sample.point[1], - &sample.point[2], - &radius, - &parentId) == 7; - - if (id < 0) { - throw morphio::RawDataError("Negative IDs are not supported"); - } - sample.id = static_cast(id); - - if (parentId < -1) { - throw morphio::RawDataError("Negative ParentIDs are not supported"); - } else if (parentId == SWC_UNDEFINED_PARENT) { - sample.parentId = SWC_ROOT; - } else { - sample.parentId = static_cast(parentId); - } - sample.type = static_cast(int_type); - sample.diameter = radius * 2; // The point array stores diameters. - sample.lineNumber = lineNumber_; - return sample; -} + SWCTokenizer tokenizer{contents, path}; -} // unnamed namespace + tokenizer.skip_blank_lines_and_comments(); + while (!tokenizer.done()) { + sample.lineNumber = static_cast(tokenizer.lineNumber()); -namespace morphio { -namespace readers { -namespace swc { + int64_t id = tokenizer.read_int(); + if (id < 0) { + details::ErrorMessages err(path); + throw RawDataError(err.ERROR_NEGATIVE_ID(sample.lineNumber)); + }else if(id > std::numeric_limits::max()){ + throw RawDataError("SWC does not support ids larger than" + std::to_string(std::numeric_limits::max())); + } -/** - Parsing SWC according to this specification: - http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html -**/ -class SWCBuilder -{ - public: - explicit SWCBuilder(std::string uri_) - : uri(std::move(uri_)) {} + sample.id = static_cast(id); - void readSamples(const std::string& contents) { - std::stringstream stream{contents}; - unsigned int lineNumber = 0; - std::string line; - while (!std::getline(stream, line).fail()) { - ++lineNumber; + sample.type = static_cast(tokenizer.read_int()); - if (line.empty() || _ignoreLine(line)) { - continue; - } + for (auto& point : sample.point) { + point = tokenizer.read_float(); + } - const auto sample = readSample(line.data(), lineNumber); + sample.diameter = 2 * tokenizer.read_float(); - if (!sample.valid) { - const auto err = details::ErrorMessages(uri); - throw RawDataError(err.ERROR_LINE_NON_PARSABLE(lineNumber)); - } + int64_t parentId = tokenizer.read_int(); + if (parentId < -1) { + details::ErrorMessages err(path); + throw RawDataError(err.ERROR_NEGATIVE_ID(sample.lineNumber)); + }else if(parentId > std::numeric_limits::max()){ + throw RawDataError("SWC does not support parent ids larger than" + std::to_string(std::numeric_limits::max())); + } else if (parentId == SWC_UNDEFINED_PARENT) { + sample.parentId = SWC_ROOT; + } else { + sample.parentId = static_cast(parentId); + } - if (sample.type >= SECTION_OUT_OF_RANGE_START || sample.type <= 0) { - const auto err = details::ErrorMessages(uri); - throw RawDataError(err.ERROR_UNSUPPORTED_SECTION_TYPE(lineNumber, sample.type)); - } + samples.push_back(sample); - if (samples.count(sample.id) > 0) { - const auto err = details::ErrorMessages(uri); - auto original = samples[sample.id]; - throw RawDataError( - err.ERROR_REPEATED_ID(original.id, original.lineNumber, sample.lineNumber)); - } + // Normally one would just `skip_blank_lines_and_comments` directly, + // but we support "extra columns" that are ignored; so since people can put anything + // in these "extra columns", we have to ignore everything until the end of the line + tokenizer.finish_line(); - samples[sample.id] = sample; - children[sample.parentId].push_back(sample.id); - - if (sample.type == SECTION_SOMA) { - lastSomaPoint = sample.id; - } - } + tokenizer.skip_blank_lines_and_comments(); } - /** - Are considered potential somata all sample - with parentId == SWC_ROOT and sample.type == SECTION_SOMA - **/ - std::vector _potentialSomata() { - std::vector somata; - for (auto id : children[SWC_ROOT]) { - if (samples[id].type == SECTION_SOMA) { - somata.push_back(samples[id]); - } - } - return somata; + if (!tokenizer.done()) { + details::ErrorMessages err(path); + throw RawDataError(err.EARLY_END_OF_FILE(0)); } - void raiseIfBrokenSoma(const SWCSample& sample) { - if (sample.type != SECTION_SOMA) { - return; - } + return samples; +} - if (sample.parentId != SWC_ROOT && !children[sample.id].empty()) { - std::vector soma_bifurcations; - for (unsigned int id : children[sample.id]) { - if (samples[id].type == SECTION_SOMA) { - soma_bifurcations.push_back(samples[id]); - } else { - neurite_wrong_root.push_back(samples[id]); - } - } +/** + Parsing SWC according to this specification: +http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html + **/ - if (soma_bifurcations.size() > 1) { - const auto err = details::ErrorMessages(uri); - throw morphio::SomaError( - err.ERROR_SOMA_BIFURCATION(sample.lineNumber, - gatherLineNumbers(soma_bifurcations))); - } - } +enum class DeclaredID : unsigned int {}; - if (sample.parentId != SWC_ROOT && samples.count(sample.parentId) > 0 && - samples[sample.parentId].type != SECTION_SOMA) { - const auto err = details::ErrorMessages(uri); - throw morphio::SomaError(err.ERROR_SOMA_WITH_NEURITE_PARENT(sample.lineNumber)); - } - } +class SWCBuilder +{ + using MorphioID = uint32_t; + using PhysicalID = size_t; + using Samples = std::vector; - void raiseIfSelfParent(const SWCSample& sample) { - if (sample.parentId == sample.id) { - const auto err = details::ErrorMessages(uri); - throw morphio::RawDataError(err.ERROR_SELF_PARENT(sample.id)); - } + public: + SWCBuilder(std::string path, WarningHandler* warning_handler) + : path_(std::move(path)) + , warning_handler_(warning_handler) {} + + Property::Properties buildProperties(const std::string& contents, unsigned int options) { + const Samples samples = readSamples(contents, path_); + buildSWC(samples); + morph_.applyModifiers(options); + return morph_.buildReadOnly(); } - void warnIfDisconnectedNeurite(const SWCSample& sample, WarningHandler* h) { - if (sample.parentId == SWC_ROOT && sample.type != SECTION_SOMA) { - h->emit(std::make_shared(uri, sample.lineNumber)); - } - } + private: + void build_soma(const Samples& soma_samples) { + auto& soma = morph_.soma(); - void checkSoma(WarningHandler* h) { - auto somata = _potentialSomata(); + if (soma_samples.empty()) { + soma->type() = SOMA_UNDEFINED; + warning_handler_->emit(std::make_unique(path_)); + return; + } else if (soma_samples.size() == 1) { + SWCSample sample = soma_samples[0]; - if (somata.size() > 1) { - const auto err = details::ErrorMessages(uri); - const auto lineNumbers = gatherLineNumbers(somata); - throw morphio::SomaError(err.ERROR_MULTIPLE_SOMATA(lineNumbers)); - } + if (sample.parentId != SWC_ROOT && samples_.at(sample.parentId).type != SECTION_SOMA) { + details::ErrorMessages err_(path_); + throw SomaError(err_.ERROR_SOMA_WITH_NEURITE_PARENT(sample.lineNumber)); + } - if (somata.empty()) { - h->emit(std::make_shared(uri)); - } else { - for (const auto& sample_pair : samples) { - const auto& sample = sample_pair.second; - warnIfDisconnectedNeurite(sample, h); + soma->type() = SOMA_SINGLE_POINT; + soma->points() = {sample.point}; + soma->diameters() = {sample.diameter}; + return; + } else if (soma_samples.size() == 3 && + (soma_samples[0].id == 1 && soma_samples[0].parentId == SWC_ROOT && + soma_samples[1].id == 2 && soma_samples[1].parentId == 1 && + soma_samples[2].id == 3 && soma_samples[2].parentId == 1)) { + const std::array points = { + soma_samples[0].point, + soma_samples[1].point, + soma_samples[2].point, + }; + // All soma that bifurcate with the first parent having two children are considered + // SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS + const details::ThreePointSomaStatus status = + details::checkNeuroMorphoSoma(points, soma_samples[0].diameter / 2); + if (status != details::ThreePointSomaStatus::Conforms) { + std::stringstream stream; + stream << status; + warning_handler_->emit(std::make_unique(path_, stream.str())); } + soma->type() = SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS; + soma->points() = std::vector(points.begin(), points.end()); + soma->diameters() = { + soma_samples[0].diameter, + soma_samples[1].diameter, + soma_samples[2].diameter, + }; + return; } - } + // might also have 3 points at this point, as well + + // a "normal" SWC soma + soma->type() = SOMA_CYLINDERS; + auto& points = soma->points(); + auto& diameters = soma->diameters(); + points.reserve(soma_samples.size()); + diameters.reserve(soma_samples.size()); + + size_t parent_count = 0; + for (const auto& s : soma_samples) { + if (s.parentId == SWC_ROOT) { + parent_count++; + } else if (samples_.count(s.parentId) == 0) { + details::ErrorMessages err_(path_); + throw MissingParentError( + err_.ERROR_MISSING_PARENT(s.id, static_cast(s.parentId), s.lineNumber)); + } else if (samples_.at(s.parentId).type != SECTION_SOMA) { + details::ErrorMessages err_(path_); + throw SomaError(err_.ERROR_SOMA_WITH_NEURITE_PARENT(s.lineNumber)); + } - void raiseIfNoParent(const SWCSample& sample) { - if (sample.parentId != SWC_ROOT && samples.count(sample.parentId) == 0) { - const auto err = details::ErrorMessages(uri); - throw morphio::MissingParentError(err.ERROR_MISSING_PARENT( - sample.id, static_cast(sample.parentId), sample.lineNumber)); + if (children_.count(s.id) > 0 && children_.at(s.id).size() > 1) { + std::vector soma_bifurcations; + for (auto id : children_.at(s.id)) { + if (samples_[id].type == SECTION_SOMA && s.parentId != SWC_ROOT) { + soma_bifurcations.push_back(samples_[id]); + } + } + if (soma_bifurcations.size() > 1) { + details::ErrorMessages err_(path_); + throw SomaError( + err_.ERROR_SOMA_BIFURCATION(s.lineNumber, + gatherLineNumbers(soma_bifurcations))); + } + } + points.push_back(s.point); + diameters.push_back(s.diameter); } - } - void warnIfZeroDiameter(const SWCSample& sample, WarningHandler* h) { - if (sample.diameter < morphio::epsilon) { - h->emit(std::make_shared(uri, sample.lineNumber)); + if (parent_count > 1) { + details::ErrorMessages err_(path_); + throw SomaError(err_.ERROR_MULTIPLE_SOMATA(gatherLineNumbers(soma_samples))); } } - bool isRootPoint(const SWCSample& sample) { - bool isOrphanNeurite = sample.parentId == SWC_ROOT && sample.type != SECTION_SOMA; - return isOrphanNeurite || - (sample.type != SECTION_SOMA && - samples[sample.parentId].type == SECTION_SOMA); // Exclude soma bifurcations - } - - bool isSectionStart(const SWCSample& sample) { - return (isRootPoint(sample) || - (sample.parentId != SWC_ROOT && - isSectionEnd(samples[sample.parentId]))); // Standard section - } - - bool isSectionEnd(const SWCSample& sample) { - return sample.id == lastSomaPoint || // End of soma - children[sample.id].empty() || // Reached leaf - (children[sample.id].size() >= 2 && // Reached neurite bifurcation - sample.type != SECTION_SOMA); - } - - template - void appendSample(const std::shared_ptr& somaOrSection, const SWCSample& sample) { - somaOrSection->points().push_back(sample.point); - somaOrSection->diameters().push_back(sample.diameter); - } + void buildSWC(const Samples& samples) { + Samples soma_samples; + Samples root_samples; - std::vector constructDepthFirstSamples() { - std::vector ret; - ret.reserve(samples.size()); - const auto pushChildren = [&](const auto& f, unsigned int id) -> void { - for (unsigned int childId : children[id]) { - ret.push_back(childId); - f(f, childId); + for (const auto& sample: samples) { + // { checks + if (sample.diameter < morphio::epsilon) { + details::ErrorMessages err_(path_); + warning_handler_->emit(std::make_unique(path_, sample.lineNumber)); } - }; - pushChildren(pushChildren, SWC_ROOT); + if (sample.parentId == sample.id) { + details::ErrorMessages err_(path_); + throw RawDataError(err_.ERROR_SELF_PARENT(sample.id)); + } - return ret; - } + if (sample.type >= SECTION_OUT_OF_RANGE_START || sample.type <= 0) { + details::ErrorMessages err_(path_); + throw RawDataError( + err_.ERROR_UNSUPPORTED_SECTION_TYPE(sample.lineNumber, sample.type)); + } + if (sample.parentId == SWC_ROOT && sample.type != SECTION_SOMA) { + warning_handler_->emit( + std::make_unique(path_, sample.lineNumber)); + } + // } checks - SomaType somaType(mut::Morphology& morph, WarningHandler* h) { - switch (morph.soma()->points().size()) { - case 0: { - return SOMA_UNDEFINED; - } - case 1: { - return SOMA_SINGLE_POINT; - } - case 2: { - return SOMA_CYLINDERS; - } - // NeuroMorpho format is characterized by a 3 points soma - // with a bifurcation at soma root - case 3: { - uint32_t somaRootId = children[SWC_ROOT][0]; - const auto& somaChildren = children[somaRootId]; - - std::vector children_soma_points; - for (unsigned int child : somaChildren) { - if (this->samples[child].type == SECTION_SOMA) { - children_soma_points.push_back(this->samples[child]); - } + if (sample.type == SECTION_SOMA) { + soma_samples.push_back(sample); } - if (children_soma_points.size() == 2) { - if (!h->isIgnored(Warning::SOMA_NON_CONFORM)) { - const std::array points = { - samples[somaRootId].point, - children_soma_points[0].point, - children_soma_points[1].point, - }; - details::ThreePointSomaStatus status = - details::checkNeuroMorphoSoma(points, samples[somaRootId].diameter / 2); - - if (status != details::ThreePointSomaStatus::Conforms) { - std::stringstream stream; - stream << status; - h->emit(std::make_shared(uri, stream.str())); - } - } + if (sample.parentId == SWC_ROOT || sample.type == SECTION_SOMA) { + root_samples.push_back(sample); + } - return SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS; + if (!samples_.insert({sample.id, sample}).second) { + const auto& original = samples_[sample.id]; + details::ErrorMessages err_(path_); + throw RawDataError(err_.ERROR_REPEATED_ID(original.id, original.lineNumber, sample.id)); } - return SOMA_CYLINDERS; - } - default: - return SOMA_CYLINDERS; - } - } - Property::Properties buildProperties(const std::string& contents, - unsigned int options, - std::shared_ptr& h) { - mut::Morphology morph{h}; - readSamples(contents); - - for (const auto& sample_pair : samples) { - const auto& sample = sample_pair.second; - raiseIfSelfParent(sample); - raiseIfBrokenSoma(sample); - raiseIfNoParent(sample); - warnIfZeroDiameter(sample, h.get()); + children_[sample.parentId].push_back(sample.id); } - checkSoma(h.get()); + // can only check for missing parents once all samples are loaded + // since it's possible there may be forward references + for (const auto& sample: samples) { + if(sample.parentId != SWC_ROOT && samples_.count(sample.parentId) == 0){ + details::ErrorMessages err_(path_); + throw MissingParentError(err_.ERROR_MISSING_PARENT( + sample.id, static_cast(sample.parentId), sample.lineNumber)); + } + } - // The process might occasionally creates empty section before - // filling them so the warning is ignored - bool originalIsIgnored = h->isIgnored(morphio::Warning::APPENDING_EMPTY_SECTION); - h->setIgnoredWarning(morphio::Warning::APPENDING_EMPTY_SECTION, true); + build_soma(soma_samples); - std::vector depthFirstSamples = constructDepthFirstSamples(); - for (const auto id : depthFirstSamples) { - const SWCSample& sample = samples[id]; + std::unordered_map> declared_to_swc; + declared_to_swc.reserve(samples.size()); - // Bifurcation right at the start - if (isRootPoint(sample) && isSectionEnd(sample)) { + for (const SWCSample& root_sample : root_samples) { + if (children_.count(root_sample.id) == 0) { continue; } - if (isSectionStart(sample)) { - _processSectionStart(morph, sample); - } else if (sample.type != SECTION_SOMA) { - swcIdToSectionId[sample.id] = - swcIdToSectionId[static_cast(sample.parentId)]; + // https://neuromorpho.org/SomaFormat.html + // "The second and third soma points, as well as all starting points + // (roots) of dendritic and axonal arbors have this first point as + // the parent (parent ID 1)." + if (morph_.soma()->type() == SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS && + root_sample.type == SECTION_SOMA && root_sample.id != 1) { + warning_handler_->emit(std::make_unique( + path_, std::vector{root_sample.lineNumber})); } - if (sample.type == SECTION_SOMA) { - appendSample(morph.soma(), sample); - } else { - appendSample(morph.section(swcIdToSectionId.at(sample.id)), sample); + for (unsigned int child_id : children_.at(root_sample.id)) { + if (samples_.at(child_id).type == SECTION_SOMA) { + continue; + } + if (root_sample.type == SECTION_SOMA) { + assembleSections(child_id, + DeclaredID(root_sample.id), + declared_to_swc, + morph_.soma()->points()[0], + morph_.soma()->diameters()[0], + true); + } else { + // this is neurite as the start + assembleSections(root_sample.id, + DeclaredID(SWC_ROOT), + declared_to_swc, + root_sample.point, + root_sample.diameter, + true); + break; + } } } + } - if (morph.soma()->points().size() == 3 && !neurite_wrong_root.empty()) { - h->emit(std::make_shared(uri, gatherLineNumbers(neurite_wrong_root))); - } - - morph.applyModifiers(options); - - Property::Properties properties = morph.buildReadOnly(); - properties._cellLevel._somaType = somaType(morph, h.get()); + void assembleSections( + unsigned int id, + DeclaredID parent_id, + std::unordered_map>& declared_to_swc, + const Point& start_point, + floatType start_diameter, + bool is_root) + { + Property::PointLevel properties; + auto& points = properties._points; + auto& diameters = properties._diameters; - h->setIgnoredWarning(morphio::Warning::APPENDING_EMPTY_SECTION, originalIsIgnored); + auto appendSection = [&](DeclaredID section_id_, DeclaredID parent_id_, SectionType starting_section_type) { + std::shared_ptr new_section; + if (is_root) { + new_section = morph_.appendRootSection(properties, starting_section_type); + } else { + new_section = declared_to_swc.at(parent_id_) + ->appendSection(properties, starting_section_type); + } + declared_to_swc[section_id_] = new_section; + }; - return properties; - } + auto get_child_count = [&](unsigned int child_id) { + return children_.count(child_id) == 0 ? 0 : children_.at(child_id).size(); + }; - /* - append last point of previous section if current section is not a root section - and update the parent ID of the new section - */ - void _processSectionStart(mut::Morphology& morph, const SWCSample& sample) { - Property::PointLevel properties; + const SWCSample* sample = &samples_.at(id); - uint32_t id = 0; + // create duplicate point if needed + if (!is_root && sample->point != start_point) { + points.push_back(start_point); + diameters.push_back(start_diameter); + } - if (isRootPoint(sample)) { - id = morph.appendRootSection(properties, sample.type)->id(); - } else { - // Duplicating last point of previous section if there is not already a duplicate - auto parentId = static_cast(sample.parentId); - if (sample.point != samples[parentId].point) { - properties._points.push_back(samples[parentId].point); - properties._diameters.push_back(samples[parentId].diameter); + // try and combine as many single samples into a single section as possible + size_t children_count = get_child_count(id); + while (children_count == 1) { + sample = &samples_.at(id); + if(sample->type != samples_.at(children_.at(id)[0]).type){ + break; } + points.push_back(sample->point); + diameters.push_back(sample->diameter); + id = children_.at(id)[0]; + children_count = get_child_count(id); + } - // Handle the case, bifurcatation at root point - if (isRootPoint(samples[parentId])) { - id = morph.appendRootSection(properties, sample.type)->id(); - } else { - id = morph.section(swcIdToSectionId[parentId]) - ->appendSection(properties, sample.type) - ->id(); + sample = &samples_.at(id); + points.push_back(sample->point); + diameters.push_back(sample->diameter); + appendSection(DeclaredID(id), parent_id, sample->type); + + if (children_count == 0) { + // section was already appended above, nothing to do + } else if (children_count == 1) { + // section_type changed + size_t offset = properties._points.size() - 1; + const Point& new_start_point = properties._points[offset]; + floatType new_start_diameter = properties._diameters[offset]; + assembleSections(children_.at(id)[0], DeclaredID(id), declared_to_swc, new_start_point, new_start_diameter, false); + } else { + size_t offset = properties._points.size() - 1; + const Point& new_start_point = properties._points[offset]; + floatType new_start_diameter = properties._diameters[offset]; + for (unsigned int child_id : children_.at(id)) { + assembleSections(child_id, + DeclaredID(id), + declared_to_swc, + new_start_point, + new_start_diameter, + false); } } - - swcIdToSectionId[sample.id] = id; } - private: - // Dictionary: SWC Id of the last point of a section to morphio::mut::Section ID - std::unordered_map swcIdToSectionId; + std::unordered_map> children_; + std::unordered_map samples_; + mut::Morphology morph_; + std::string path_; + WarningHandler* warning_handler_; +}; - // Neurite that do not have parent ID = 1, allowed for soma contour, not - // 3-pts soma - std::vector neurite_wrong_root; - unsigned int lastSomaPoint = 0; - std::unordered_map> children; - std::unordered_map samples; - std::string uri; -}; +} // namespace details +namespace readers { +namespace swc { Property::Properties load(const std::string& path, const std::string& contents, unsigned int options, std::shared_ptr& warning_handler) { - auto properties = SWCBuilder(path).buildProperties(contents, options, warning_handler); + auto properties = + details::SWCBuilder(path, warning_handler.get()).buildProperties(contents, options); properties._cellLevel._cellFamily = NEURON; properties._cellLevel._version = {"swc", 1, 0}; diff --git a/src/readers/utils.cpp b/src/readers/utils.cpp new file mode 100644 index 000000000..179cb7a97 --- /dev/null +++ b/src/readers/utils.cpp @@ -0,0 +1,81 @@ +#include "./utils.h" + +namespace morphio { +#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +#define freelocale _free_locale +#define strtol_l _strtol_l + +#ifdef MORPHIO_USE_DOUBLE +#define strto_float _strtod_l +#else +#define strto_float _strtof_l +#endif + +#else // not WIN32 + +#ifdef MORPHIO_USE_DOUBLE +#define strto_float strtod_l +#else +#define strto_float strtof_l +#endif + +#endif + +// Only create the `locale` to facilitate number handling once +StringToNumber::StringToNumber() +#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) + // On windows, use their locale handling for their runtime + : locale(_create_locale(LC_ALL, "C")) { +} +#else + // On other platforms, use the POSIX version + : locale(newlocale(LC_NUMERIC_MASK, "POSIX", nullptr)) { +} +#endif + +StringToNumber::~StringToNumber() { + freelocale(locale); +} + +std::tuple StringToNumber::toInt(const std::string& s, size_t offset) const { + const size_t base = 10; + const char *pos = &s[offset]; + const char *endpos = &s[s.size()]; + int64_t ret = strtol_l(pos, const_cast(&endpos), base, locale); + + auto new_offset = static_cast(endpos - s.data()); + + if (ret == 0 && new_offset == 0) { + throw std::invalid_argument("could not parse integer"); + } + + return {ret, new_offset}; +} + +std::tuple StringToNumber::toFloat(const std::string& s, size_t offset) const { + const char *pos = &s[offset]; + const char *endpos = &s[s.size()]; + floatType ret = strto_float(pos, const_cast(&endpos), locale); + + auto new_offset = static_cast(endpos - s.data()); + + if (std::fabs(ret - 0) < morphio::epsilon && new_offset == 0) { + throw std::invalid_argument("could not parse float"); + } + + return {ret, new_offset}; +} + +StringToNumber& getStringToNumber() { + static StringToNumber stn; + return stn; +} + +#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +#undef freelocale +#undef strtol_l +#endif + +#undef strto_float + +} // namespace morphio diff --git a/src/readers/utils.h b/src/readers/utils.h new file mode 100644 index 000000000..045e5aeaf --- /dev/null +++ b/src/readers/utils.h @@ -0,0 +1,26 @@ +#pragma once +#include // locale_t +#include +#include // std::tuple + +#include // floatType + +namespace morphio { + +struct StringToNumber { +#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) + _locale_t locale; +#else + locale_t locale; +#endif + + StringToNumber(); + ~StringToNumber(); + + std::tuple toInt(const std::string&s, size_t offset) const; + std::tuple toFloat(const std::string&s, size_t offset) const; +}; + +StringToNumber& getStringToNumber(); + +} // namespace morphio diff --git a/src/shared_utils.cpp b/src/shared_utils.cpp index fb99ff65e..c0f6a6930 100644 --- a/src/shared_utils.cpp +++ b/src/shared_utils.cpp @@ -4,7 +4,7 @@ */ #include // std::max #include -#include // std::fabs +#include // std::fabs #include // std::numeric_limits #include "error_message_generation.h" @@ -153,6 +153,8 @@ std::ostream& operator<<(std::ostream& os, ThreePointSomaStatus s) { case Conforms: os << "Three Point Soma: conforms to specification"; break; + default: // LCOV_EXCL_LINE + break; // LCOV_EXCL_LINE } return os; } diff --git a/src/vasc/properties.cpp b/src/vasc/properties.cpp index 30be59ab8..320c16980 100644 --- a/src/vasc/properties.cpp +++ b/src/vasc/properties.cpp @@ -57,12 +57,9 @@ VascPointLevel::VascPointLevel(const VascPointLevel& data, SectionRange range) { bool VascSectionLevel::diff(const VascSectionLevel& other) const { return this == &other || (compare_section_structure(this->_sections, other._sections) && - morphio::property::compare( - this->_sectionTypes, other._sectionTypes) && - morphio::property::compare( - this->_predecessors, other._predecessors) && - morphio::property::compare( - this->_successors, other._successors)); + morphio::property::compare(this->_sectionTypes, other._sectionTypes) && + morphio::property::compare(this->_predecessors, other._predecessors) && + morphio::property::compare(this->_successors, other._successors)); } bool VascSectionLevel::operator==(const VascSectionLevel& other) const { diff --git a/src/warning_handling.cpp b/src/warning_handling.cpp index 958ba5572..43812c8b7 100644 --- a/src/warning_handling.cpp +++ b/src/warning_handling.cpp @@ -20,8 +20,9 @@ std::string errorLink(const std::string& uri, return "warning"; case ErrorLevel::ERROR: return "error"; + default: + throw std::runtime_error("Unknown ErrorLevel"); } - throw std::runtime_error("Unknown ErrorLevel"); }; auto COLOR = [](ErrorLevel el) { @@ -30,8 +31,9 @@ std::string errorLink(const std::string& uri, return "\033[1;33m"; case ErrorLevel::ERROR: return "\033[1;31m"; + default: + throw std::runtime_error("Unknown ErrorLevel"); } - throw std::runtime_error("Unknown ErrorLevel"); }; const std::string COLOR_END("\033[0m"); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4058b597f..0759b4ed6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,7 @@ set(TESTS_SRC main.cpp test_collection.cpp + test_enums.cpp test_immutable_morphology.cpp test_mitochondria.cpp test_morphology_readers.cpp @@ -42,7 +43,7 @@ if (MORPHIO_ENABLE_COVERAGE) include(CodeCoverage) SETUP_TARGET_FOR_COVERAGE_LCOV( NAME coverage - EXECUTABLE ctest + EXECUTABLE ctest --output-on-failure DEPENDENCIES unittests EXCLUDE "/usr/*" "${PROJECT_SOURCE_DIR}/include/*" "${PROJECT_SOURCE_DIR}/3rdparty/*" ) diff --git a/tests/data/disconnected_neurite.swc b/tests/data/disconnected_neurite.swc index 899cedc7b..85ad3488a 100644 --- a/tests/data/disconnected_neurite.swc +++ b/tests/data/disconnected_neurite.swc @@ -7,4 +7,4 @@ 7 2 0 -4 0 1. 6 8 2 6 -4 0 2. 7 9 2 -5 -4 0 2. 7 - 10 3 -9 -8 0 2. -1 + 10 3 -9 -8 0 2. -1 # <- this is connected to nothing diff --git a/tests/data/neurite_wrong_root_point.swc b/tests/data/neurite_wrong_root_point.swc index f920c5ee3..9b7d00975 100644 --- a/tests/data/neurite_wrong_root_point.swc +++ b/tests/data/neurite_wrong_root_point.swc @@ -1,7 +1,7 @@ - 1 1 9 29.66 -18.86 6.795 -1 - 2 1 7.53 31.08 -18.86 6.33 1 - 3 1 6.51 31.21 -18.86 5.77 2 - 4 3 0 0 0 10 3 # <-- should have parent ID: 1 - 5 3 0 0 1 10 4 - 6 3 0 0 1 10 3 # <-- should have parent ID: 1 - 7 3 0 0 2 10 6 \ No newline at end of file + 1 1 0 0 0 5 -1 + 2 1 0 -5 0 5 1 + 3 1 0 +5 0 5 1 + 4 3 0 0 0 10 2 # <-- should have parent ID: 1 + 5 3 0 0 1 10 4 + 6 3 0 0 1 10 3 # <-- should have parent ID: 1 + 7 3 0 0 2 10 6 diff --git a/tests/data/simple-trailing-space.swc b/tests/data/simple-trailing-space.swc new file mode 100644 index 000000000..a44f97443 --- /dev/null +++ b/tests/data/simple-trailing-space.swc @@ -0,0 +1,5 @@ +# note that this has a trailing spaces! +1 2 0 0 0 3.0 -1 +# ^ here +2 2 1 1 1 3.0 1 +3 2 2 2 2 3.0 2 diff --git a/tests/data/simple-windows-eol.swc b/tests/data/simple-windows-eol.swc new file mode 100644 index 000000000..7497ae7e2 --- /dev/null +++ b/tests/data/simple-windows-eol.swc @@ -0,0 +1,33 @@ +# SWC structure: +# index, type, x, y, z, radius, parent +# +# (0, 5) +# (-5, 5)----- ------ (6, 5) +# | +# | +# | +# | Type = 3 +# | +# o origin +# | +# | Type = 2 +# | +# | +#(-5, -4)----- ------ (6, -4) +# (0, -4) +# +# all radii are 1, except for end points, which are 0 +# +# This is the same file as simple.asc + + + 1 1 0 0 0 1. -1 + 2 3 0 0 0 1. 1 + 3 3 0 5 0 1. 2 + 4 3 -5 5 0 1.5 3 + 5 3 6 5 0 1.5 3 + 6 2 0 0 0 1. 1 + 7 2 0 -4 0 1. 6 + 8 2 6 -4 0 2. 7 + 9 2 -5 -4 0 2.0000000000000000000000000000000000000000000000000000000000000 7 +# long value for radius is to make sure SWC parser accepts long values diff --git a/tests/test_11_collection.py b/tests/test_11_collection.py index 100c90ee5..dd1445f73 100644 --- a/tests/test_11_collection.py +++ b/tests/test_11_collection.py @@ -85,4 +85,4 @@ def test_container_with_warning_handler(): with morphio.Collection(DATA_DIR) as collection: warning_handler = morphio.WarningHandlerCollector() collection.load('neurite_wrong_root_point', warning_handler=warning_handler) - assert len(warning_handler.get_all()) == 3 + assert len(warning_handler.get_all()) == 2 diff --git a/tests/test_1_swc.py b/tests/test_1_swc.py index f2934970d..669d36a6e 100644 --- a/tests/test_1_swc.py +++ b/tests/test_1_swc.py @@ -3,6 +3,7 @@ from pathlib import Path import numpy as np +import morphio import pytest from numpy.testing import assert_array_equal from utils import (assert_swc_exception, captured_output, ignored_warning, @@ -10,11 +11,17 @@ from morphio import (MorphioError, Morphology, RawDataError, SomaError, SomaType, Warning, ostream_redirect, set_raise_warnings) -import morphio DATA_DIR = Path(__file__).parent / "data" +def test_basic(): + contents =('''\ +1 1 0 4 0 3.0 -1 +2 3 0 0 2 0.5 1 +''') + Morphology(contents, "swc") + def test_build_from_string(): contents =('''1 1 0 4 0 3.0 -1 @@ -27,7 +34,6 @@ def test_build_from_string(): assert_array_equal(n.soma.diameters, [6.0]) assert len(n.root_sections) == 1 assert n.root_sections[0].id == 0 - assert len(n.root_sections) == 1 assert_array_equal(n.root_sections[0].points, np.array([[0, 0, 2], [0, 0, 3], @@ -49,7 +55,6 @@ def test_read_single_neurite(): assert_array_equal(n.soma.diameters, [6.0]) assert len(n.root_sections) == 1 assert n.root_sections[0].id == 0 - assert len(n.root_sections) == 1 assert_array_equal(n.root_sections[0].points, np.array([[0, 0, 2], [0, 0, 3], @@ -118,7 +123,7 @@ def test_repeated_id(): ''', RawDataError, 'Repeated ID: 4\nID already appears here:', - ':6:warning') + ':4:warning') def test_neurite_followed_by_soma(): @@ -253,14 +258,12 @@ def test_simple_reversed(): def test_soma_type_1_point(): - # 1 point soma content = '''1 1 0 0 0 3.0 -1''' assert (Morphology(content, extension='swc').soma_type == SomaType.SOMA_SINGLE_POINT) def test_soma_type_2_point(): - # 2 point soma content = ('''1 1 0 0 0 3.0 -1 2 1 0 0 0 3.0 1''') assert (Morphology(content, extension='swc').soma_type == @@ -268,7 +271,6 @@ def test_soma_type_2_point(): def test_soma_type_many_point(): - # > 3 points soma content = ('''1 1 0 0 0 3.0 -1 2 1 0 0 0 3.0 1 3 1 0 0 0 3.0 2 @@ -277,8 +279,7 @@ def test_soma_type_many_point(): assert (Morphology(content, extension='swc').soma_type == SomaType.SOMA_CYLINDERS) - -def test_soma_type_3_point(tmp_path): +def test_soma_type_3_point_neuromorpho(tmp_path): # 3 points soma can be of type SOMA_CYLINDERS or SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS # depending on the point layout @@ -301,7 +302,7 @@ def test_soma_type_3_point(tmp_path): with captured_output() as (_, err): with ostream_redirect(stdout=True, stderr=True): content = ('''1 1 0 0 0 3.0 -1 - 2 1 1 -3 0 3.0 1 + 2 1 1 -3 0 3.0 1 # <- note y is 1 instead of 0 3 1 0 0 0 3.0 1 # PID is 1''') assert (Morphology(content, extension='swc').soma_type == SomaType.SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS) @@ -313,7 +314,7 @@ def test_soma_type_3_point(tmp_path): with captured_output() as (_, err): with ostream_redirect(stdout=True, stderr=True): content = ('''1 1 0 0 0 3.0 -1 - 2 1 0 -3 0 3.0 1 + 2 1 0 -3 0 3.0 1 # <- note z is -3 instead of 0 3 1 0 0 0 3.0 1 # PID is 1''') assert (Morphology(content, extension='swc').soma_type == SomaType.SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS) @@ -322,6 +323,7 @@ def test_soma_type_3_point(tmp_path): "Three Point Soma: The non-constant columns is not offset by +/- the radius from the initial sample." ) +def test_soma_type_3_point(): # If this configuration is not respected -> SOMA_CYLINDERS content = ( '''1 1 0 0 0 3.0 -1 2 1 0 0 0 3.0 1 @@ -407,25 +409,19 @@ def test_disconnected_neurite(): (although this is normal if this neuron has no soma)''' == strip_color_codes(err.getvalue().strip())) -def test_neurite_wrong_root_point(): - '''Test that for 3 points soma, the neurites are attached to first soma point''' - - # Not 3-points soma --> OK - with captured_output() as (_, err): - with ostream_redirect(stdout=True, stderr=True): - n = Morphology(DATA_DIR / 'soma_cylinders.swc') - assert err.getvalue().strip() == '' - assert len(n.root_sections) == 1 +def test_neurite_wrong_root_point_neuromorpho_3_point_soma(): + '''Test that for 3 points soma, the neurites are attached to first soma point''' with captured_output() as (_, err): with ostream_redirect(stdout=True, stderr=True): path = DATA_DIR / 'neurite_wrong_root_point.swc' n = Morphology(path) - assert strip_color_codes(err.getvalue().strip()) == f'''\ + assert strip_color_codes(err.getvalue().strip()) == f'''\ Warning: with a 3 points soma, neurites must be connected to the first soma point: -{path}:4:warning +{path}:2:warning -{path}:6:warning''' +Warning: with a 3 points soma, neurites must be connected to the first soma point: +{path}:3:warning''' assert len(n.root_sections) == 2 assert_array_equal(n.root_sections[0].points, [[0,0,0], [0,0,1]]) @@ -494,44 +490,20 @@ def test_unsupported_section_type(): assert obj.match('Unsupported section type: 20') -def test_root_node_split(): - '''Test that a bifurcation at the root point produces - two root sections - ''' - content = ('''1 1 0 0 0 1 -1 - 2 3 1 0 0 1 1 - 3 3 1 1 0 1 2 - 4 3 1 0 1 1 2 - ''') - n = Morphology(content, extension='swc') - assert len(n.root_sections) == 2 - assert_array_equal(n.root_sections[0].points, - [[1, 0, 0], [1, 1, 0]]) - assert_array_equal(n.root_sections[1].points, - [[1, 0, 0], [1, 0, 1]]) - - # Normal bifurcation - content = ('''1 1 0 0 0 1 -1 - 2 3 1 0 0 1 1 - 3 3 2 1 0 1 2 - 4 3 1 1 0 1 3 - 5 3 1 0 1 1 3 - ''') - n = Morphology(content, extension='swc') +def test_three_point_soma_neuromorpho(): + n = Morphology(DATA_DIR / 'three_point_soma.swc') + assert n.soma_type == SomaType.SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS + + +def test_three_point_soma_stacked_cylinders(): + n = Morphology(DATA_DIR / 'soma_cylinders.swc') + assert n.soma_type == SomaType.SOMA_CYLINDERS assert len(n.root_sections) == 1 - root = n.root_sections[0] - assert_array_equal(root.points, - [[1, 0, 0], [2, 1, 0]]) - assert len(root.children) == 2 - assert_array_equal(root.children[0].points, - [[2, 1, 0], [1, 1, 0]]) - assert_array_equal(root.children[1].points, - [[2, 1, 0], [1, 0, 1]]) -def test_three_point_soma(): - n = Morphology(DATA_DIR / 'three_point_soma.swc') - assert n.soma_type == SomaType.SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS +def test_trailing_space(): + n = Morphology(DATA_DIR / 'simple-trailing-space.swc') + assert n.points.shape == (3, 3) def test_zero_diameter(): @@ -554,27 +526,104 @@ def test_version(): def test_no_soma(): + with captured_output() as (_, err): + with ostream_redirect(stdout=True, stderr=True): + Morphology("", extension='swc') + assert ('$STRING$:0:warning\nWarning: no soma found in file' == + strip_color_codes(err.getvalue().strip())) + content = '''1 2 0 0 0 3.0 -1 2 2 0 0 0 3.0 1 3 2 0 0 0 3.0 2''' with captured_output() as (_, err): with ostream_redirect(stdout=True, stderr=True): Morphology(content, extension='swc') - assert ('$STRING$:0:warning\nWarning: no soma found in file' == - strip_color_codes(err.getvalue().strip())) + assert ('''$STRING$:1:warning +Warning: found a disconnected neurite. +Neurites are not supposed to have parentId: -1 +(although this is normal if this neuron has no soma) + +$STRING$:0:warning +Warning: no soma found in file''' == + + strip_color_codes(err.getvalue().strip())) + +def test_throw_on_missing_data(): + content = ''' 3 # missing data ''' + with pytest.raises(RawDataError, match='Unable to parse this line'): + Morphology(content, extension='swc') + + content = ''' + # some pre data + 3 3 # missing data ''' + with pytest.raises(RawDataError, match='Unable to parse this line'): + Morphology(content, extension='swc') + + +def test_throw_on_negative_id(): + content = '''1 2 0 0 0 3.0 -1 + -2 2 0 0 0 3.0 1 + 3 2 0 0 0 3.0 2''' + with pytest.raises(RawDataError, match='The ID assigned to this line is negative'): + Morphology(content, extension='swc') + + +def test_multi_type_section(): + contents =('''1 1 0 4 0 3.0 -1 + 2 6 0 0 2 0.5 1 # <- type 6 + 3 7 0 0 3 0.5 2 # <- type 7 + 4 8 0 0 4 0.5 3 # <- type 8 + 5 9 0 0 5 0.5 4''') # <- type 9 + n = Morphology(contents, "swc") + assert_array_equal(n.soma.points, [[0, 4, 0]]) + assert_array_equal(n.soma.diameters, [6.0]) + assert len(n.root_sections) == 1 + assert n.root_sections[0].id == 0 + assert_array_equal(n.root_sections[0].points, + np.array([[0, 0, 2], ])) + assert len(n.sections) == 4 + assert_array_equal(n.section_offsets, [0, 1, 3, 5, 7]) + + +def test_missing_parent(): + contents =(''' +1 1 0 0 0 10 -1 +2 2 -2 -6 0 10 1 +3 2 2 6 0 10 2 +4 2 2 6 0 10 10 +''') + with pytest.raises(morphio.MissingParentError, match='Sample id: 4 refers to non-existant parent ID: 10'): + Morphology(contents, "swc") + + +def test_extra_column(): + # some SWC files include extra columns; this goes outside the spec, but we will allow it for + # backwards compatibility + contents =(''' +1 1 0 0 0 10 -1 3 +2 2 -2 -6 0 10 1 3 +''') + Morphology(contents, "swc") + +def test_read_simple_windows_eol(): + simple = Morphology(DATA_DIR / 'simple-windows-eol.swc') + assert len(simple.root_sections) == 2 + assert simple.root_sections[0].id == 0 + assert simple.root_sections[1].id == 3 def test_WarningHandlerCollector(): warnings = morphio.WarningHandlerCollector() Morphology(DATA_DIR / 'neurite_wrong_root_point.swc', warning_handler=warnings) - assert len(warnings.get_all()) == 3 - assert [True, True, False] == [e.was_marked_ignore for e in warnings.get_all()] - assert warnings.get_all()[2].warning.line_numbers[0] == 4 - assert warnings.get_all()[2].warning.line_numbers[1] == 6 + warnings = warnings.get_all() + assert len(warnings) == 2 + assert [False, False] == [e.was_marked_ignore for e in warnings] + assert warnings[0].warning.line_numbers[0] == 2 + assert warnings[1].warning.line_numbers[0] == 3 warnings0 = morphio.WarningHandlerCollector() warnings1 = morphio.WarningHandlerCollector() Morphology(DATA_DIR / 'neurite_wrong_root_point.swc', warning_handler=warnings0) Morphology("", extension="swc", warning_handler=warnings1) - assert len(warnings0.get_all()) == 3 + assert len(warnings0.get_all()) == 2 assert len(warnings1.get_all()) == 1 diff --git a/tests/test_6_writers.py b/tests/test_6_writers.py index efcb60df7..638970786 100644 --- a/tests/test_6_writers.py +++ b/tests/test_6_writers.py @@ -1,16 +1,22 @@ # Copyright (c) 2013-2023, EPFL/Blue Brain Project # SPDX-License-Identifier: Apache-2.0 +from pathlib import Path + import h5py import numpy as np + import morphio from morphio import MitochondriaPointLevel from morphio import Morphology as ImmutMorphology -from morphio import PointLevel, SectionBuilderError, SectionType, WriterError, ostream_redirect, SomaType +from morphio import PointLevel, RawDataError, SectionType, WriterError, ostream_redirect, SomaType from morphio.mut import Morphology + import pytest + from numpy.testing import assert_array_equal, assert_almost_equal from utils import captured_output +DATA_DIR = Path(__file__).parent / "data" def test_write_empty_file(tmp_path): '''Check that empty morphology are not written to disk''' @@ -18,8 +24,8 @@ def test_write_empty_file(tmp_path): with ostream_redirect(stdout=True, stderr=True): for ext in ['asc', 'swc', 'h5']: outname = tmp_path / f'empty.{ext}' - Morphology().write(outname) - assert not outname.exists() + with pytest.raises(WriterError, match='Morphology is empty.'): + Morphology().write(outname) def test_write_undefined_soma(tmp_path): @@ -319,20 +325,17 @@ def test_single_point_root_section(tmp_path): points = [] diameters = [] - # To hide warning: appending empty section - with captured_output(): - with ostream_redirect(stdout=True, stderr=True): - m.append_root_section(PointLevel(points, diameters), SectionType(2)) - with pytest.raises(SectionBuilderError): - m.write(tmp_path / "h5/empty_vasculature.h5") + m.append_root_section(PointLevel(points, diameters), SectionType(2)) + with pytest.raises(RawDataError): + m.write(tmp_path / "empty_vasculature.h5") m = Morphology() points = [[1., 1., 1.]] diameters = [2.] m.append_root_section(PointLevel(points, diameters), SectionType(2)) - with pytest.raises(SectionBuilderError): - m.write(tmp_path / "h5/empty_vasculature.h5") + with pytest.raises(RawDataError): + m.write(tmp_path / "empty_vasculature.h5") def test_write_custom_property__throws(tmp_path): @@ -397,3 +400,10 @@ def test_write_soma_invariants(tmp_path): with pytest.raises(WriterError): morph.write(tmp_path / "test_write.h5") + + +def test_diameter_mismatch(tmp_path): + morph = Morphology(DATA_DIR / 'simple.swc') + morph.soma.diameters = [] + with pytest.raises(WriterError, match='Vector length mismatch:'): + morph.write(tmp_path / "diameter-sample-mismatch.swc") diff --git a/tests/test_9_surfaces.py b/tests/test_9_surfaces.py index b92d07a51..7cd50379e 100644 --- a/tests/test_9_surfaces.py +++ b/tests/test_9_surfaces.py @@ -70,3 +70,36 @@ def test_contour(): # assert_almost_equal(Morphology(DATA_DIR / "simple.asc").soma.surface, # 4 * np.pi, decimal=2) + # single point SWC + assert_almost_equal(ImmutMorphology(DATA_DIR / "simple.swc").soma.surface, + 4 * np.pi, decimal=2) + assert_almost_equal(Morphology(DATA_DIR / "simple.swc").soma.surface, + 4 * np.pi, decimal=2) + + # SWC three points cylinder + assert_almost_equal(ImmutMorphology(DATA_DIR / "soma_three_points_cylinder.swc").soma.surface, + 4 * np.pi * 81, decimal=2) + assert_almost_equal(Morphology(DATA_DIR / "soma_three_points_cylinder.swc").soma.surface, + 4 * np.pi * 81, decimal=2) + + # SWC consecutive cylinders (3 cylinders along X) + assert_almost_equal(ImmutMorphology(DATA_DIR / "soma_cylinders.swc").soma.surface, + 2 * np.pi * 40 * 3, decimal=2) + assert_almost_equal(Morphology(DATA_DIR / "soma_cylinders.swc").soma.surface, + 2 * np.pi * 40 * 3, decimal=2) + + with pytest.raises(SomaError): + Morphology(DATA_DIR / "no_soma.swc").soma.surface + + assert_almost_equal(Morphology(DATA_DIR / "soma_single_frustum.swc").soma.surface, + 1201.428, decimal=3) + + # SWC multiple frustums + assert_almost_equal(ImmutMorphology(DATA_DIR / "soma_multiple_frustums.swc").soma.surface, + 4164.610254415956, decimal=3) + assert_almost_equal(Morphology(DATA_DIR / "soma_multiple_frustums.swc").soma.surface, + 4164.610254415956, decimal=3) + + # SWC complex + assert_almost_equal(ImmutMorphology(DATA_DIR / "complexe.swc").soma.surface, + 13.980, decimal=2) diff --git a/tests/test_enums.cpp b/tests/test_enums.cpp new file mode 100644 index 000000000..ac9beaa0a --- /dev/null +++ b/tests/test_enums.cpp @@ -0,0 +1,37 @@ +#include + +#include +#include + + +TEST_CASE("enums") { + SECTION("SOMA_SINGLE_POINT") { + std::stringstream ss; + ss << morphio::enums::SomaType::SOMA_SINGLE_POINT; + CHECK(ss.str() == "SOMA_SINGLE_POINT"); + } + + SECTION("SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS") { + std::stringstream ss; + ss << morphio::enums::SomaType::SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS; + CHECK(ss.str() == "SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS"); + } + + SECTION("SOMA_CYLINDERS") { + std::stringstream ss; + ss << morphio::enums::SomaType::SOMA_CYLINDERS; + CHECK(ss.str() == "SOMA_CYLINDERS"); + } + + SECTION("SOMA_SIMPLE_CONTOUR") { + std::stringstream ss; + ss << morphio::enums::SomaType::SOMA_SIMPLE_CONTOUR; + CHECK(ss.str() == "SOMA_SIMPLE_CONTOUR"); + } + + SECTION("SOMA_UNDEFINED") { + std::stringstream ss; + ss << morphio::enums::SomaType::SOMA_UNDEFINED; + CHECK(ss.str() == "SOMA_UNDEFINED"); + } +} diff --git a/tests/test_immutable_morphology.cpp b/tests/test_immutable_morphology.cpp index bf1cd32dd..ea0950d21 100644 --- a/tests/test_immutable_morphology.cpp +++ b/tests/test_immutable_morphology.cpp @@ -337,7 +337,7 @@ TEST_CASE("warnings-collection") { morphio::NO_MODIFIER, warningHandler); auto allErrors = warningHandler->getAll(); - REQUIRE(allErrors.size() == 3); + REQUIRE(allErrors.size() == 1); } { auto warningHandler = std::make_shared(); diff --git a/tests/test_morphology_readers.cpp b/tests/test_morphology_readers.cpp index 21a0c656b..3b42595bf 100644 --- a/tests/test_morphology_readers.cpp +++ b/tests/test_morphology_readers.cpp @@ -4,6 +4,7 @@ */ #include "../src/readers/morphologyHDF5.h" #include +#include #include #include @@ -193,6 +194,34 @@ TEST_CASE("LoadSWCMorphology", "[morphology]") { } } +class LocaleGuard { +public: + explicit LocaleGuard(const std::locale& newLocale) + : previousLocale(std::locale()) + { + previousLocale = std::locale::global(newLocale); + } + + ~LocaleGuard() { + std::locale::global(previousLocale); + } + +private: + std::locale previousLocale; +}; + +TEST_CASE("LoadSWCMorphologyLocale", "[morphology]") { + { +#ifdef __APPLE__ + const auto locale = LocaleGuard(std::locale("de_CH.UTF-8")); +#else + const auto locale = LocaleGuard(std::locale("de_CH.UTF8")); +#endif + const morphio::Morphology m("data/simple.swc"); + REQUIRE(m.diameters().size() == 12); + } +} + TEST_CASE("LoadNeurolucidaMorphology", "[morphology]") { const morphio::Morphology m("data/multiple_point_section.asc"); diff --git a/tests/test_mutable_morphology.cpp b/tests/test_mutable_morphology.cpp index 0123870f2..5a755e71b 100644 --- a/tests/test_mutable_morphology.cpp +++ b/tests/test_mutable_morphology.cpp @@ -131,9 +131,9 @@ TEST_CASE("writing", "[mutableMorphology]") { * morphio::WriterError); */ /* } */ - { // Can write empty morphologies + { // Can't write empty morphologies morphio::mut::Morphology morph; - morph.write(tmpDirectory / "empty.swc"); + CHECK_THROWS_AS(morph.write(tmpDirectory / "empty.swc"), morphio::WriterError); } { // Can write morph with no soma diff --git a/tests/test_swc_reader.cpp b/tests/test_swc_reader.cpp index c8f164bb4..75ae7dedd 100644 --- a/tests/test_swc_reader.cpp +++ b/tests/test_swc_reader.cpp @@ -4,43 +4,71 @@ */ #include #include +#include +#include #include using namespace morphio; -TEST_CASE("morphio::swc") { - /* +TEST_CASE("morphio::swc::errors") { + SECTION("super-early-file-end") { + const auto* contents = R"( +1 + )"; + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); + } + + SECTION("early-file-end") { + const auto* contents = R"( +-100 1 0 0 1 + )"; + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); + } + SECTION("negative_id") { - const auto* negative_id = R"( + const auto* contents = R"( -100 1 0 0 1 0.5 -1 )"; - CHECK_THROWS_AS(Morphology(negative_id, "swc"), RawDataError); + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); + } + + SECTION("negative_id_parent") { + const auto* contents = R"( +100 1 0 0 1 0.5 -10 + )"; + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); + } + + SECTION("unparseable") { + const auto* contents = R"( +100 1 0 0 1 0.5 -10 this is some random text that isn't commented + )"; + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); } - */ SECTION("repeated_id") { - const auto* repeated_id = R"( + const auto* contents = R"( 1 1 0 0 1 0.5 -1 2 3 0 0 2 0.5 1 2 3 0 0 2 0.5 1 # <-- repeated id )"; - CHECK_THROWS_AS(Morphology(repeated_id, "swc"), RawDataError); + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); } SECTION("unsupported_section_type") { - const auto* unsupported_section_type = R"( + const auto* contents = R"( 1 10000 0 0 1 0.5 -1 )"; - CHECK_THROWS_AS(Morphology(unsupported_section_type, "swc"), RawDataError); + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); } SECTION("non_parsable") { - const auto* non_parsable = R"( + const auto* contents = R"( 1 1 0 0 1 .5 "-1" )"; - CHECK_THROWS_AS(Morphology(non_parsable, "swc"), RawDataError); + CHECK_THROWS_AS(Morphology(contents, "swc"), RawDataError); } SECTION("soma_multi_bifurcation") { @@ -85,3 +113,32 @@ TEST_CASE("morphio::swc") { CHECK_THROWS_AS(Morphology(multiple_soma, "swc"), SomaError); } } + +TEST_CASE("morphio::swc::working") { + SECTION("no-soma") { + const auto* no_soma = R"( +1 2 0 0 1 .5 -1 +2 2 0 0 1 .5 1 +3 2 0 0 1 .5 -1 +4 2 0 0 1 .5 3 + )"; + const auto m = Morphology(no_soma, "swc"); + + REQUIRE(m.soma().points().empty()); + REQUIRE(m.somaType() == morphio::SomaType::SOMA_UNDEFINED); + REQUIRE(m.diameters().size() == 4); + } + + SECTION("chimera-axon-on-dendrite") { + const auto* no_soma = R"( +1 1 0 0 1 1 -1 +2 2 0 0 2 2 1 +3 2 0 0 3 3 2 +4 3 0 0 4 4 3 +5 3 0 0 5 5 4 + )"; + const auto m = Morphology(no_soma, "swc"); + REQUIRE(m.sections().size() == 2); + REQUIRE(m.diameters().size() == 5); + } +}