From 0d7208984fe4addf00d413a3d7f585d2baed3f3b Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Mon, 18 Nov 2024 14:33:11 -0500 Subject: [PATCH 1/6] Update to use new config parameter. --- test/src/unit-capi-config.cc | 3 +++ test/src/unit-cppapi-enumerations.cc | 4 ++-- tiledb/sm/array/array.cc | 19 +++++++++---------- tiledb/sm/array/array.h | 6 +++++- tiledb/sm/c_api/tiledb.cc | 2 +- tiledb/sm/config/config.cc | 5 +++++ tiledb/sm/config/config.h | 11 ++++++++++- tiledb/sm/cpp_api/config.h | 8 ++++++-- tiledb/sm/serialization/array.cc | 3 ++- tiledb/sm/serialization/array_schema.cc | 2 +- tiledb/sm/serialization/array_schema.h | 15 +++++++++++---- 11 files changed, 55 insertions(+), 23 deletions(-) diff --git a/test/src/unit-capi-config.cc b/test/src/unit-capi-config.cc index 36e52835416..37a731d0c70 100644 --- a/test/src/unit-capi-config.cc +++ b/test/src/unit-capi-config.cc @@ -233,6 +233,7 @@ void check_save_to_file() { ss << "rest.curl.verbose false\n"; ss << "rest.http_compressor any\n"; ss << "rest.load_enumerations_on_array_open false\n"; + ss << "rest.load_enumerations_on_array_open_all_schemas false\n"; ss << "rest.load_metadata_on_array_open true\n"; ss << "rest.load_non_empty_domain_on_array_open true\n"; ss << "rest.retry_count 25\n"; @@ -617,6 +618,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { all_param_values["rest.load_metadata_on_array_open"] = "false"; all_param_values["rest.load_non_empty_domain_on_array_open"] = "false"; all_param_values["rest.load_enumerations_on_array_open"] = "false"; + all_param_values["rest.load_enumerations_on_array_open_all_schemas"] = + "false"; all_param_values["rest.use_refactored_array_open"] = "false"; all_param_values["rest.use_refactored_array_open_and_query_submit"] = "false"; all_param_values["rest.payer_namespace"] = ""; diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc index 03222cc2378..1947c6dcadc 100644 --- a/test/src/unit-cppapi-enumerations.cc +++ b/test/src/unit-cppapi-enumerations.cc @@ -434,7 +434,7 @@ TEST_CASE_METHOD( // Test with `rest.load_enumerations_on_array_open` enabled and disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); - config["rest.load_enumerations_on_array_open"] = + config["rest.load_enumerations_on_array_open_all_schemas"] = load_enmrs ? "true" : "false"; vfs_test_setup_.update_config(config.ptr().get()); ctx_ = vfs_test_setup_.ctx(); @@ -639,7 +639,7 @@ TEST_CASE_METHOD( // Test with `rest.load_enumerations_on_array_open` enabled and disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); - config["rest.load_enumerations_on_array_open"] = + config["rest.load_enumerations_on_array_open_all_schemas"] = load_enmrs ? "true" : "false"; vfs_test_setup_.update_config(config.ptr().get()); ctx_ = vfs_test_setup_.ctx(); diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index c4e1de27ca2..7b93c116cfe 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -601,9 +601,10 @@ Status Array::open( // For fetching remote enumerations REST calls // tiledb_handle_load_enumerations_request which loads enumerations. For // local arrays we don't call this method. - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { - load_all_enumerations(use_refactored_array_open()); + if (serialize_enumerations()) { + load_all_enumerations(config_.get( + "rest.load_enumerations_on_array_open_all_schemas", + Config::must_find)); } } @@ -1615,13 +1616,11 @@ bool Array::serialize_non_empty_domain() const { } bool Array::serialize_enumerations() const { - auto serialize = config_.get("rest.load_enumerations_on_array_open"); - if (!serialize.has_value()) { - throw std::runtime_error( - "Cannot get rest.load_enumerations_on_array_open configuration option " - "from config"); - } - return serialize.value(); + return config_.get( + "rest.load_enumerations_on_array_open", Config::must_find) || + config_.get( + "rest.load_enumerations_on_array_open_all_schemas", + Config::must_find); } bool Array::serialize_metadata() const { diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 2861cbd5435..f846848f7c5 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -985,7 +985,11 @@ class Array { bool serialize_non_empty_domain() const; /** - * Checks the config to se if enumerations should be serialized on array open. + * Checks the config to see if enumerations should be serialized on array + * open. + * + * @return True if either `rest.load_enumerations_on_array_open` or + * `rest.load_enumerations_on_array_open_all_schemas` is set, else False. */ bool serialize_enumerations() const; diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index ea967c4fd0a..fd929c9689b 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -2155,7 +2155,7 @@ capi_return_t tiledb_handle_load_array_schema_request( static_cast(serialization_type), request->buffer()); - if (load_schema_req.include_enumerations()) { + if (load_schema_req.include_enumerations_latest_schema()) { array->load_all_enumerations(); } diff --git a/tiledb/sm/config/config.cc b/tiledb/sm/config/config.cc index d879122e6c5..4e15c56702c 100644 --- a/tiledb/sm/config/config.cc +++ b/tiledb/sm/config/config.cc @@ -94,6 +94,8 @@ const std::string Config::REST_CURL_VERBOSE = "false"; const std::string Config::REST_CURL_TCP_KEEPALIVE = "true"; const std::string Config::REST_CURL_RETRY_ERRORS = "true"; const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN = "false"; +const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS = + "false"; const std::string Config::REST_LOAD_METADATA_ON_ARRAY_OPEN = "true"; const std::string Config::REST_LOAD_NON_EMPTY_DOMAIN_ON_ARRAY_OPEN = "true"; const std::string Config::REST_USE_REFACTORED_ARRAY_OPEN = "true"; @@ -264,6 +266,9 @@ const std::map default_config_values = { std::make_pair( "rest.load_enumerations_on_array_open", Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN), + std::make_pair( + "rest.load_enumerations_on_array_open_all_schemas", + Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS), std::make_pair( "rest.load_metadata_on_array_open", Config::REST_LOAD_METADATA_ON_ARRAY_OPEN), diff --git a/tiledb/sm/config/config.h b/tiledb/sm/config/config.h index 1f75e6f97a0..ef8dda564ca 100644 --- a/tiledb/sm/config/config.h +++ b/tiledb/sm/config/config.h @@ -116,9 +116,18 @@ class Config { /** If we should retry Curl errors in requests to REST. */ static const std::string REST_CURL_RETRY_ERRORS; - /** If the array enumerations should be loaded on array open */ + /** + * If the array enumerations should be loaded on array open for the latest + * array schema only. + */ static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN; + /** + * If the array enumerations should be loaded on array open for all array + * schemas. + */ + static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS; + /** If the array metadata should be loaded on array open */ static const std::string REST_LOAD_METADATA_ON_ARRAY_OPEN; diff --git a/tiledb/sm/cpp_api/config.h b/tiledb/sm/cpp_api/config.h index 0bccd534930..2ab91a6338a 100644 --- a/tiledb/sm/cpp_api/config.h +++ b/tiledb/sm/cpp_api/config.h @@ -928,8 +928,12 @@ class Config { * together with the open array
* **Default**: true * - `rest.load_enumerations_on_array_open`
- * If true, enumerations will be loaded and sent to server together with - * the open array. + * If true, enumerations will be loaded for the latest array schema on + * array open. + * **Default**: false + * - `rest.load_enumerations_on_array_open_all_schemas`
+ * If true, enumerations will be loaded for all array schemas on array + * open. * **Default**: false * - `rest.use_refactored_array_open`
* If true, the new REST routes and APIs for opening an array will be used diff --git a/tiledb/sm/serialization/array.cc b/tiledb/sm/serialization/array.cc index 1958b8406ce..4724cd7c36b 100644 --- a/tiledb/sm/serialization/array.cc +++ b/tiledb/sm/serialization/array.cc @@ -148,7 +148,8 @@ Status array_to_capnp( array_builder->setQueryType(query_type_str(array->get_query_type())); if (array->use_refactored_array_open() && array->serialize_enumerations()) { - array->load_all_enumerations(true); + array->load_all_enumerations(array->config().get( + "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); } const auto& array_schema_latest = array->array_schema_latest(); diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 4496097ea8e..bc8baa186ba 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -1638,7 +1638,7 @@ void load_array_schema_request_to_capnp( throw_if_not_ok(config_to_capnp(config, &config_builder)); // This boolean is only serialized to support clients using TileDB < 2.26. // Future options should only be serialized within the Config object above. - builder.setIncludeEnumerations(req.include_enumerations()); + builder.setIncludeEnumerations(req.include_enumerations_latest_schema()); } void serialize_load_array_schema_request( diff --git a/tiledb/sm/serialization/array_schema.h b/tiledb/sm/serialization/array_schema.h index 54406042b70..aac214c57a9 100644 --- a/tiledb/sm/serialization/array_schema.h +++ b/tiledb/sm/serialization/array_schema.h @@ -60,16 +60,23 @@ namespace serialization { class LoadArraySchemaRequest { public: explicit LoadArraySchemaRequest(const Config& config) - : include_enumerations_(config.get( + : include_enumerations_latest_schema_(config.get( + "rest.load_enumerations_on_array_open", Config::must_find)) + , include_enumerations_all_schemas_(config.get( "rest.load_enumerations_on_array_open", Config::must_find)) { } - inline bool include_enumerations() const { - return include_enumerations_; + inline bool include_enumerations_latest_schema() const { + return include_enumerations_latest_schema_; + } + + inline bool include_enumerations_all_schemas() const { + return include_enumerations_all_schemas_; } private: - bool include_enumerations_; + bool include_enumerations_latest_schema_; + bool include_enumerations_all_schemas_; }; #ifdef TILEDB_SERIALIZATION From bb61037a0e6064bcd12b208887fd3d9272fcdc61 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Mon, 18 Nov 2024 16:06:38 -0500 Subject: [PATCH 2/6] Add unit tests. --- test/src/unit-cppapi-enumerations.cc | 343 ++++++++++++++++++++++++++- tiledb/sm/array/array.cc | 14 +- 2 files changed, 349 insertions(+), 8 deletions(-) diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc index 1947c6dcadc..1fa57d335ba 100644 --- a/test/src/unit-cppapi-enumerations.cc +++ b/test/src/unit-cppapi-enumerations.cc @@ -431,7 +431,8 @@ TEST_CASE_METHOD( "CPP API: Load All Enumerations - All Schemas", "[enumeration][array][load-all-enumerations][all-schemas][rest]") { create_array(); - // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + // Test with `rest.load_enumerations_on_array_open_all_schemas` enabled and + // disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); config["rest.load_enumerations_on_array_open_all_schemas"] = @@ -636,7 +637,8 @@ TEST_CASE_METHOD( "[enumeration][array][schema-evolution][load-all-enumerations][all-schemas]" "[rest]") { create_array(); - // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + // Test with `rest.load_enumerations_on_array_open_all_schemas` enabled and + // disabled. bool load_enmrs = GENERATE(true, false); auto config = ctx_.config(); config["rest.load_enumerations_on_array_open_all_schemas"] = @@ -662,9 +664,9 @@ TEST_CASE_METHOD( array.schema().ptr()->array_schema()->is_enumeration_loaded( "an_enumeration") == true); } - // If `rest.load_enumerations_on_array_open=false` do not load enmrs - // explicitly. Leave enumerations unloaded initially, evolve the array without - // reopening, and then attempt to load all enumerations. + // If `rest.load_enumerations_on_array_open_all_schemas=false` do not load + // enmrs explicitly. Leave enumerations unloaded initially, evolve the array + // without reopening, and then attempt to load all enumerations. // Evolve once to add an enumeration. ArraySchemaEvolution ase(ctx_); @@ -834,6 +836,337 @@ TEST_CASE_METHOD( "an_enumeration") == true); } +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP API: Load Enumerations - Latest Schema", + "[enumeration][array][load-enumerations][schema][rest]") { + create_array(); + // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + bool load_enmrs = GENERATE(true, false); + auto config = ctx_.config(); + config["rest.load_enumerations_on_array_open"] = + load_enmrs ? "true" : "false"; + vfs_test_setup_.update_config(config.ptr().get()); + ctx_ = vfs_test_setup_.ctx(); + + auto array = tiledb::Array(ctx_, uri_, TILEDB_READ); + bool array_open_v2 = array.ptr()->array()->use_refactored_array_open(); + + // Helper function to reopen and conditionally call load_all_enumerations. + auto reopen_and_load_enmrs = [&]() { + // We always reopen because we are evolving the schema which requires + // reopening the array after applying the schema evolution. + CHECK_NOTHROW(array.reopen()); + + // If we are not loading enmrs on array open we must load them explicitly + // with a separate request. + if (!load_enmrs) { + // Load enumerations for all schemas if using array open v2. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + }; + reopen_and_load_enmrs(); + + REQUIRE( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + REQUIRE( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + std::string schema_name_1 = array.schema().ptr()->array_schema()->name(); + + // Evolve once to add an enumeration. + ArraySchemaEvolution ase(ctx_); + std::vector var_values{"one", "two", "three"}; + auto var_enmr = Enumeration::create(ctx_, "ase_var_enmr", var_values); + ase.add_enumeration(var_enmr); + auto attr4 = Attribute::create(ctx_, "attr4"); + AttributeExperimental::set_enumeration_name(ctx_, attr4, "ase_var_enmr"); + ase.add_attribute(attr4); + // Apply evolution to the array and reopen. + ase.array_evolve(uri_); + reopen_and_load_enmrs(); + + std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + // Check all schemas if we are using array open v2. + auto all_schemas = array.ptr()->array()->array_schemas_all(); + // Previous schemas + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + + // Latest schema + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // We can always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); + + // Evolve a second time to drop an enumeration. + ArraySchemaEvolution ase2(ctx_); + ase2.drop_enumeration("an_enumeration"); + ase2.drop_attribute("attr1"); + // Apply evolution to the array and reopen. + CHECK_NOTHROW(ase2.array_evolve(uri_)); + reopen_and_load_enmrs(); + + std::string schema_name_3 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + // Check all schemas if we are using array open v2. + auto all_schemas = array.ptr()->array()->array_schemas_all(); + // Previous schemas + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + false); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + false); + + // Latest schema + CHECK( + all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false); + CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // Always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + false); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); + + // Evolve a third time to add an enumeration with a name equal to a previously + // dropped enumeration. + ArraySchemaEvolution ase3(ctx_); + auto old_enmr = Enumeration::create(ctx_, "an_enumeration", var_values); + ase3.add_enumeration(old_enmr); + auto attr5 = Attribute::create(ctx_, "attr5"); + AttributeExperimental::set_enumeration_name(ctx_, attr5, "an_enumeration"); + ase.add_attribute(attr5); + // Apply evolution to the array and reopen. + CHECK_NOTHROW(ase3.array_evolve(uri_)); + reopen_and_load_enmrs(); + + // Check all schemas. + if (array_open_v2) { + auto all_schemas = array.ptr()->array()->array_schemas_all(); + std::string schema_name_4 = array.schema().ptr()->array_schema()->name(); + // Previous schemas. + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + false); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + false); + CHECK( + all_schemas[schema_name_3]->has_enumeration("an_enumeration") == false); + CHECK(all_schemas[schema_name_3]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") == + false); + + // Latest schema. + CHECK( + all_schemas[schema_name_4]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_4]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_4]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_4]->is_enumeration_loaded("ase_var_enmr") == + true); + } + // Always validate the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); +} + +TEST_CASE_METHOD( + CPPEnumerationFx, + "CPP API: Load Enumerations - Latest schema post evolution", + "[enumeration][array][schema-evolution][load-enumerations][schema][rest]") { + create_array(); + // Test with `rest.load_enumerations_on_array_open` enabled and disabled. + bool load_enmrs = GENERATE(true, false); + auto config = ctx_.config(); + config["rest.load_enumerations_on_array_open"] = + load_enmrs ? "true" : "false"; + vfs_test_setup_.update_config(config.ptr().get()); + ctx_ = vfs_test_setup_.ctx(); + + // Open the array with no explicit timestamps set. + auto array = tiledb::Array(ctx_, uri_, TILEDB_READ); + + // REST CI will test with both array open v1 and v2. + bool array_open_v2 = array.ptr()->array()->use_refactored_array_open(); + REQUIRE( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + if (load_enmrs) { + // Array open v1 does not support loading enumerations on array open so we + // must load them explicitly in this case. + if (!array_open_v2) { + ArrayExperimental::load_all_enumerations(ctx_, array); + } + REQUIRE( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + } + // If `rest.load_enumerations_on_array_open=false` do not load enmrs + // explicitly. Leave enumerations unloaded initially, evolve the array without + // reopening, and then attempt to load all enumerations. + + // Evolve once to add an enumeration. + ArraySchemaEvolution ase(ctx_); + std::vector var_values{"one", "two", "three"}; + auto var_enmr = Enumeration::create(ctx_, "ase_var_enmr", var_values); + ase.add_enumeration(var_enmr); + auto attr4 = Attribute::create(ctx_, "attr4"); + AttributeExperimental::set_enumeration_name(ctx_, attr4, "ase_var_enmr"); + ase.add_attribute(attr4); + // Apply evolution to the array and intentionally skip reopening after + // evolution to test exceptions. + ase.array_evolve(uri_); + + // Store the original array schema name so we can validate all_schemas later. + std::string schema_name_1 = array.schema().ptr()->array_schema()->name(); + if (array_open_v2) { + if (load_enmrs) { + // If we load enmrs before reopen, we will not load the evolved schema + // that contains the enumeration added during evolution. Since we + // explicitly load only enumerations in the latest schema, we will not hit + // an exception when loading enumerations on an evolved schema prior to + // reopening. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + auto all_schemas = array.ptr()->array_schemas_all(); + CHECK(all_schemas.size() == 1); + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == + true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + true); + CHECK( + all_schemas[schema_name_1]->has_enumeration("ase_var_enmr") == false); + } + + // Reopen and load the evolved enumerations. + array.reopen(); + std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); + if (!load_enmrs) { + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + // Validate all array schemas now contain the expected enumerations. + auto all_schemas = array.ptr()->array_schemas_all(); + CHECK( + all_schemas[schema_name_1]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_1]->is_enumeration_loaded("an_enumeration") == + false); + + // Latest schema. + CHECK( + all_schemas[schema_name_2]->has_enumeration("an_enumeration") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("an_enumeration") == + true); + CHECK(all_schemas[schema_name_2]->has_enumeration("ase_var_enmr") == true); + CHECK( + all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") == + true); + } else { + // If we load enmrs before reopen, we will not load the evolved schema that + // contains the enumeration added during evolution. + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + false); + CHECK_THROWS_WITH( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr"), + Catch::Matchers::ContainsSubstring("unknown enumeration")); + + // Reopen to apply the schema evolution and reload enumerations. + array.reopen(); + if (!load_enmrs) { + CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); + } + } + + // Check all original and evolved enumerations are in the latest schema. + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("an_enumeration") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "an_enumeration") == true); + CHECK( + array.schema().ptr()->array_schema()->has_enumeration("ase_var_enmr") == + true); + CHECK( + array.schema().ptr()->array_schema()->is_enumeration_loaded( + "ase_var_enmr") == true); +} + TEST_CASE_METHOD( CPPEnumerationFx, "CPP: ArraySchemaEvolution - Add Enumeration", diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 7b93c116cfe..b2129d40d80 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -991,8 +991,16 @@ std::vector> Array::get_enumerations( } // Store the loaded enumerations in the schema + std::string latest_schema_name = array_schema_latest().name(); for (auto& enmr : loaded) { opened_array_->array_schema_latest_ptr()->store_enumeration(enmr); + if (remote_ && use_refactored_array_open()) { + if (!array_schemas_all().contains(latest_schema_name)) { + throw ArrayException( + "No schema with name '" + latest_schema_name + "' was found."); + } + array_schemas_all().at(latest_schema_name)->store_enumeration(enmr); + } } } @@ -1148,9 +1156,9 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) { set_array_schemas_all(std::move(array_schemas_all)); set_fragment_metadata(std::move(fragment_metadata)); - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { - load_all_enumerations(use_refactored_array_open()); + if (use_refactored_array_open()) { + load_all_enumerations(config_.get( + "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); } return Status::Ok(); From 19a123525cd483d4d35235edbdb77902a5202d56 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 19 Nov 2024 14:32:23 -0500 Subject: [PATCH 3/6] Fix CI. --- tiledb/sm/array/array.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index b2129d40d80..28f76d731ee 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -502,8 +502,7 @@ Status Array::open( rest_client->get_array_schema_from_rest(array_uri_); throw_if_not_ok(st); set_array_schema_latest(array_schema_latest.value()); - if (config_.get( - "rest.load_enumerations_on_array_open", Config::must_find)) { + if (serialize_enumerations()) { // The route for array open v1 does not currently support loading // enumerations. Once #5359 is merged and deployed to REST this will // not be the case. @@ -601,10 +600,12 @@ Status Array::open( // For fetching remote enumerations REST calls // tiledb_handle_load_enumerations_request which loads enumerations. For // local arrays we don't call this method. - if (serialize_enumerations()) { + if (serialize_enumerations() && use_refactored_array_open()) { load_all_enumerations(config_.get( "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); + } else if (serialize_enumerations()) { + load_all_enumerations(false); } } @@ -1156,9 +1157,11 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) { set_array_schemas_all(std::move(array_schemas_all)); set_fragment_metadata(std::move(fragment_metadata)); - if (use_refactored_array_open()) { + if (serialize_enumerations() && use_refactored_array_open()) { load_all_enumerations(config_.get( "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); + } else if (serialize_enumerations()) { + load_all_enumerations(false); } return Status::Ok(); From a32ee186fa619fd2f1a05e11238d2054749980e8 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 19 Nov 2024 15:03:21 -0500 Subject: [PATCH 4/6] Update schemas shared pointer when latest is set. --- tiledb/sm/array/array.cc | 31 +++++++++---------------------- tiledb/sm/array/array.h | 2 ++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 28f76d731ee..1f8e12a8994 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -906,32 +906,24 @@ Array::get_enumerations_all_schemas() { } // Store the loaded enumerations into the schemas. - auto latest_schema = opened_array_->array_schema_latest_ptr(); - for (const auto& schema_enmrs : ret) { + for (const auto& [schema_name, enmrs] : ret) { // This case will only be hit for remote arrays if the client evolves the // schema and does not reopen the array before loading all enumerations. - if (!array_schemas_all().contains(schema_enmrs.first)) { + if (!array_schemas_all().contains(schema_name)) { throw ArrayException( "Array opened using timestamp range (" + std::to_string(array_dir_timestamp_start_) + ", " + std::to_string(array_dir_timestamp_end_) + - ") has no loaded schema named '" + schema_enmrs.first + + ") has no loaded schema named '" + schema_name + "'; If the array was recently evolved be sure to reopen it after " "applying the evolution."); } - auto schema = array_schemas_all().at(schema_enmrs.first); - for (const auto& enmr : schema_enmrs.second) { + auto schema = array_schemas_all().at(schema_name); + for (const auto& enmr : enmrs) { if (!schema->is_enumeration_loaded(enmr->name())) { schema->store_enumeration(enmr); } - // Also store enumerations into the latest schema when we encounter - // it. - if (schema_enmrs.first == latest_schema->name()) { - if (!latest_schema->is_enumeration_loaded(enmr->name())) { - latest_schema->store_enumeration(enmr); - } - } } } @@ -991,16 +983,11 @@ std::vector> Array::get_enumerations( paths_to_load, *encryption_key(), memory_tracker_); } - // Store the loaded enumerations in the schema - std::string latest_schema_name = array_schema_latest().name(); + // Store the loaded enumerations in the latest schema + auto schema = opened_array_->array_schema_latest_ptr(); for (auto& enmr : loaded) { - opened_array_->array_schema_latest_ptr()->store_enumeration(enmr); - if (remote_ && use_refactored_array_open()) { - if (!array_schemas_all().contains(latest_schema_name)) { - throw ArrayException( - "No schema with name '" + latest_schema_name + "' was found."); - } - array_schemas_all().at(latest_schema_name)->store_enumeration(enmr); + if (!schema->is_enumeration_loaded(enmr->name())) { + schema->store_enumeration(enmr); } } } diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index f846848f7c5..71e9361cb55 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -142,6 +142,8 @@ class OpenedArray { inline void set_array_schema_latest( const shared_ptr& array_schema) { array_schema_latest_ = array_schema; + // Update the latest schema in the map of all array schemas. + array_schemas_all_[array_schema->name()] = array_schema; } /** Returns all array schemas. */ From ea2362b2eb6de2b01f0eca71dc63bf23788834df Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 19 Nov 2024 16:39:16 -0500 Subject: [PATCH 5/6] Only load enmrs on open for remote arrays. --- test/src/unit-cppapi-enumerations.cc | 16 +++++++++------- test/src/unit-rest-enumerations.cc | 21 +++++++++++++++++++++ tiledb/sm/array/array.cc | 21 --------------------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/test/src/unit-cppapi-enumerations.cc b/test/src/unit-cppapi-enumerations.cc index 1fa57d335ba..5952bdbd5e1 100644 --- a/test/src/unit-cppapi-enumerations.cc +++ b/test/src/unit-cppapi-enumerations.cc @@ -454,7 +454,7 @@ TEST_CASE_METHOD( if (array_open_v2) { // If we are not loading enmrs on array open we must load them explicitly // with a separate request. - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { // Load enumerations for all schemas if using array open v2. CHECK_NOTHROW( ArrayExperimental::load_enumerations_all_schemas(ctx_, array)); @@ -657,7 +657,7 @@ TEST_CASE_METHOD( if (load_enmrs) { // Array open v1 does not support loading enumerations on array open so we // must load them explicitly in this case. - if (!array_open_v2) { + if (!array_open_v2 || !vfs_test_setup_.is_rest()) { ArrayExperimental::load_all_enumerations(ctx_, array); } REQUIRE( @@ -736,7 +736,7 @@ TEST_CASE_METHOD( // Reopen and load the evolved enumerations. array.reopen(); std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW( ArrayExperimental::load_enumerations_all_schemas(ctx_, array)); } @@ -776,7 +776,7 @@ TEST_CASE_METHOD( // Reopen to apply the schema evolution and reload enumerations. array.reopen(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); } } @@ -860,7 +860,7 @@ TEST_CASE_METHOD( // If we are not loading enmrs on array open we must load them explicitly // with a separate request. - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { // Load enumerations for all schemas if using array open v2. CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); } @@ -1060,6 +1060,8 @@ TEST_CASE_METHOD( // must load them explicitly in this case. if (!array_open_v2) { ArrayExperimental::load_all_enumerations(ctx_, array); + } else if (!vfs_test_setup_.is_rest()) { + ArrayExperimental::load_enumerations_all_schemas(ctx_, array); } REQUIRE( array.schema().ptr()->array_schema()->is_enumeration_loaded( @@ -1106,7 +1108,7 @@ TEST_CASE_METHOD( // Reopen and load the evolved enumerations. array.reopen(); std::string schema_name_2 = array.schema().ptr()->array_schema()->name(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); } // Validate all array schemas now contain the expected enumerations. @@ -1147,7 +1149,7 @@ TEST_CASE_METHOD( // Reopen to apply the schema evolution and reload enumerations. array.reopen(); - if (!load_enmrs) { + if (!load_enmrs || !vfs_test_setup_.is_rest()) { CHECK_NOTHROW(ArrayExperimental::load_all_enumerations(ctx_, array)); } } diff --git a/test/src/unit-rest-enumerations.cc b/test/src/unit-rest-enumerations.cc index 85efda75fcb..1a299043fcc 100644 --- a/test/src/unit-rest-enumerations.cc +++ b/test/src/unit-rest-enumerations.cc @@ -164,6 +164,15 @@ TEST_CASE_METHOD( create_array(uri_); Array opened_array(ctx_, uri_, TILEDB_READ); + if (!vfs_test_setup_.is_rest() && load_enmrs) { + if (opened_array.ptr()->array()->use_refactored_array_open()) { + CHECK_NOTHROW( + ArrayExperimental::load_enumerations_all_schemas(ctx_, opened_array)); + } else { + CHECK_NOTHROW( + ArrayExperimental::load_all_enumerations(ctx_, opened_array)); + } + } auto array = opened_array.ptr()->array(); auto schema = array->array_schema_latest_ptr(); REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs); @@ -218,6 +227,9 @@ TEST_CASE_METHOD( CHECK_NOTHROW(sm::Array::evolve_array_schema( ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key())); CHECK(array->reopen().ok()); + if (load_enmrs && !vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } schema = array->array_schema_latest_ptr(); std::string schema_name_2 = schema->name(); REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs); @@ -227,6 +239,9 @@ TEST_CASE_METHOD( expected_enmrs[schema_name_2] = {enmr1, enmr2, var_enmr.ptr()->enumeration()}; actual_enmrs = array->get_enumerations_all_schemas(); if (!load_enmrs) { + if (!vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } REQUIRE(schema->is_enumeration_loaded("my_enum") == true); REQUIRE(schema->is_enumeration_loaded("fruit") == true); REQUIRE(schema->is_enumeration_loaded("ase_var_enmr") == true); @@ -242,6 +257,9 @@ TEST_CASE_METHOD( CHECK_NOTHROW(sm::Array::evolve_array_schema( ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key())); CHECK(array->reopen().ok()); + if (load_enmrs && !vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } schema = array->array_schema_latest_ptr(); std::string schema_name_3 = schema->name(); REQUIRE_THROWS_WITH( @@ -253,6 +271,9 @@ TEST_CASE_METHOD( expected_enmrs[schema_name_3] = {enmr2, var_enmr.ptr()->enumeration()}; actual_enmrs = array->get_enumerations_all_schemas(); if (!load_enmrs) { + if (!vfs_test_setup_.is_rest()) { + array->load_all_enumerations(array->use_refactored_array_open()); + } REQUIRE_THROWS_WITH( schema->is_enumeration_loaded("my_enum"), Catch::Matchers::ContainsSubstring("unknown enumeration")); diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index 1f8e12a8994..b277ff7a380 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -595,20 +595,6 @@ Status Array::open( return LOG_STATUS(Status_ArrayError(e.what())); } - // Handle any array open config options for local arrays. - if (!remote_) { - // For fetching remote enumerations REST calls - // tiledb_handle_load_enumerations_request which loads enumerations. For - // local arrays we don't call this method. - if (serialize_enumerations() && use_refactored_array_open()) { - load_all_enumerations(config_.get( - "rest.load_enumerations_on_array_open_all_schemas", - Config::must_find)); - } else if (serialize_enumerations()) { - load_all_enumerations(false); - } - } - is_opening_or_closing_ = false; return Status::Ok(); } @@ -1144,13 +1130,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) { set_array_schemas_all(std::move(array_schemas_all)); set_fragment_metadata(std::move(fragment_metadata)); - if (serialize_enumerations() && use_refactored_array_open()) { - load_all_enumerations(config_.get( - "rest.load_enumerations_on_array_open_all_schemas", Config::must_find)); - } else if (serialize_enumerations()) { - load_all_enumerations(false); - } - return Status::Ok(); } From 2992f6af6853017a3af60a1496657ec9ffae731f Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 19 Nov 2024 17:20:42 -0500 Subject: [PATCH 6/6] Fix test. --- test/src/unit-enumerations.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/unit-enumerations.cc b/test/src/unit-enumerations.cc index 2a5669fa0a8..a846d07a987 100644 --- a/test/src/unit-enumerations.cc +++ b/test/src/unit-enumerations.cc @@ -2488,7 +2488,7 @@ TEST_CASE_METHOD( REQUIRE(a1->serialize_enumerations() == (do_load == "true")); REQUIRE( a1->array_schema_latest_ptr()->get_loaded_enumeration_names().size() == - (do_load == "true" ? 2 : 0)); + 0); auto a2 = make_shared(HERE(), ctx.resources(), uri_);