Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configuration parameter to load enumerations on all schemas. #5379

Merged
merged 7 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ void check_save_to_file() {
ss << "rest.curl.verbose false\n";
ss << "rest.http_compressor any\n";
ss << "rest.load_enumerations_on_array_open false\n";
ss << "rest.load_enumerations_on_array_open_all_schemas false\n";
ss << "rest.load_metadata_on_array_open true\n";
ss << "rest.load_non_empty_domain_on_array_open true\n";
ss << "rest.retry_count 25\n";
Expand Down Expand Up @@ -617,6 +618,8 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["rest.load_metadata_on_array_open"] = "false";
all_param_values["rest.load_non_empty_domain_on_array_open"] = "false";
all_param_values["rest.load_enumerations_on_array_open"] = "false";
all_param_values["rest.load_enumerations_on_array_open_all_schemas"] =
"false";
all_param_values["rest.use_refactored_array_open"] = "false";
all_param_values["rest.use_refactored_array_open_and_query_submit"] = "false";
all_param_values["rest.payer_namespace"] = "";
Expand Down
357 changes: 346 additions & 11 deletions test/src/unit-cppapi-enumerations.cc

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,7 @@ TEST_CASE_METHOD(
REQUIRE(a1->serialize_enumerations() == (do_load == "true"));
REQUIRE(
a1->array_schema_latest_ptr()->get_loaded_enumeration_names().size() ==
(do_load == "true" ? 2 : 0));
0);

auto a2 = make_shared<Array>(HERE(), ctx.resources(), uri_);

Expand Down
21 changes: 21 additions & 0 deletions test/src/unit-rest-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,15 @@ TEST_CASE_METHOD(

create_array(uri_);
Array opened_array(ctx_, uri_, TILEDB_READ);
if (!vfs_test_setup_.is_rest() && load_enmrs) {
if (opened_array.ptr()->array()->use_refactored_array_open()) {
CHECK_NOTHROW(
ArrayExperimental::load_enumerations_all_schemas(ctx_, opened_array));
} else {
CHECK_NOTHROW(
ArrayExperimental::load_all_enumerations(ctx_, opened_array));
}
}
auto array = opened_array.ptr()->array();
auto schema = array->array_schema_latest_ptr();
REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs);
Expand Down Expand Up @@ -218,6 +227,9 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(sm::Array::evolve_array_schema(
ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
if (load_enmrs && !vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
schema = array->array_schema_latest_ptr();
std::string schema_name_2 = schema->name();
REQUIRE(schema->is_enumeration_loaded("my_enum") == load_enmrs);
Expand All @@ -227,6 +239,9 @@ TEST_CASE_METHOD(
expected_enmrs[schema_name_2] = {enmr1, enmr2, var_enmr.ptr()->enumeration()};
actual_enmrs = array->get_enumerations_all_schemas();
if (!load_enmrs) {
if (!vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
REQUIRE(schema->is_enumeration_loaded("my_enum") == true);
REQUIRE(schema->is_enumeration_loaded("fruit") == true);
REQUIRE(schema->is_enumeration_loaded("ase_var_enmr") == true);
Expand All @@ -242,6 +257,9 @@ TEST_CASE_METHOD(
CHECK_NOTHROW(sm::Array::evolve_array_schema(
ctx_.ptr()->resources(), uri, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
if (load_enmrs && !vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
schema = array->array_schema_latest_ptr();
std::string schema_name_3 = schema->name();
REQUIRE_THROWS_WITH(
Expand All @@ -253,6 +271,9 @@ TEST_CASE_METHOD(
expected_enmrs[schema_name_3] = {enmr2, var_enmr.ptr()->enumeration()};
actual_enmrs = array->get_enumerations_all_schemas();
if (!load_enmrs) {
if (!vfs_test_setup_.is_rest()) {
array->load_all_enumerations(array->use_refactored_array_open());
}
REQUIRE_THROWS_WITH(
schema->is_enumeration_loaded("my_enum"),
Catch::Matchers::ContainsSubstring("unknown enumeration"));
Expand Down
56 changes: 16 additions & 40 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,7 @@ Status Array::open(
rest_client->get_array_schema_from_rest(array_uri_);
throw_if_not_ok(st);
set_array_schema_latest(array_schema_latest.value());
if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
if (serialize_enumerations()) {
// The route for array open v1 does not currently support loading
// enumerations. Once #5359 is merged and deployed to REST this will
// not be the case.
Expand Down Expand Up @@ -596,17 +595,6 @@ Status Array::open(
return LOG_STATUS(Status_ArrayError(e.what()));
}

// Handle any array open config options for local arrays.
if (!remote_) {
// For fetching remote enumerations REST calls
// tiledb_handle_load_enumerations_request which loads enumerations. For
// local arrays we don't call this method.
if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
load_all_enumerations(use_refactored_array_open());
}
}

is_opening_or_closing_ = false;
return Status::Ok();
}
Expand Down Expand Up @@ -904,32 +892,24 @@ Array::get_enumerations_all_schemas() {
}

// Store the loaded enumerations into the schemas.
auto latest_schema = opened_array_->array_schema_latest_ptr();
for (const auto& schema_enmrs : ret) {
for (const auto& [schema_name, enmrs] : ret) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

// This case will only be hit for remote arrays if the client evolves the
// schema and does not reopen the array before loading all enumerations.
if (!array_schemas_all().contains(schema_enmrs.first)) {
if (!array_schemas_all().contains(schema_name)) {
throw ArrayException(
"Array opened using timestamp range (" +
std::to_string(array_dir_timestamp_start_) + ", " +
std::to_string(array_dir_timestamp_end_) +
") has no loaded schema named '" + schema_enmrs.first +
") has no loaded schema named '" + schema_name +
"'; If the array was recently evolved be sure to reopen it after "
"applying the evolution.");
}

auto schema = array_schemas_all().at(schema_enmrs.first);
for (const auto& enmr : schema_enmrs.second) {
auto schema = array_schemas_all().at(schema_name);
for (const auto& enmr : enmrs) {
if (!schema->is_enumeration_loaded(enmr->name())) {
schema->store_enumeration(enmr);
}
// Also store enumerations into the latest schema when we encounter
// it.
if (schema_enmrs.first == latest_schema->name()) {
if (!latest_schema->is_enumeration_loaded(enmr->name())) {
latest_schema->store_enumeration(enmr);
}
}
}
}

Expand Down Expand Up @@ -989,9 +969,12 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
paths_to_load, *encryption_key(), memory_tracker_);
}

// Store the loaded enumerations in the schema
// Store the loaded enumerations in the latest schema
auto schema = opened_array_->array_schema_latest_ptr();
for (auto& enmr : loaded) {
opened_array_->array_schema_latest_ptr()->store_enumeration(enmr);
if (!schema->is_enumeration_loaded(enmr->name())) {
schema->store_enumeration(enmr);
}
}
}

Expand Down Expand Up @@ -1147,11 +1130,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) {
set_array_schemas_all(std::move(array_schemas_all));
set_fragment_metadata(std::move(fragment_metadata));

if (config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
load_all_enumerations(use_refactored_array_open());
}

return Status::Ok();
}

Expand Down Expand Up @@ -1615,13 +1593,11 @@ bool Array::serialize_non_empty_domain() const {
}

bool Array::serialize_enumerations() const {
auto serialize = config_.get<bool>("rest.load_enumerations_on_array_open");
if (!serialize.has_value()) {
throw std::runtime_error(
"Cannot get rest.load_enumerations_on_array_open configuration option "
"from config");
}
return serialize.value();
return config_.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find) ||
config_.get<bool>(
"rest.load_enumerations_on_array_open_all_schemas",
Config::must_find);
}

bool Array::serialize_metadata() const {
Expand Down
8 changes: 7 additions & 1 deletion tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ class OpenedArray {
inline void set_array_schema_latest(
const shared_ptr<ArraySchema>& array_schema) {
array_schema_latest_ = array_schema;
// Update the latest schema in the map of all array schemas.
array_schemas_all_[array_schema->name()] = array_schema;
}

/** Returns all array schemas. */
Expand Down Expand Up @@ -985,7 +987,11 @@ class Array {
bool serialize_non_empty_domain() const;

/**
* Checks the config to se if enumerations should be serialized on array open.
* Checks the config to see if enumerations should be serialized on array
* open.
*
* @return True if either `rest.load_enumerations_on_array_open` or
* `rest.load_enumerations_on_array_open_all_schemas` is set, else False.
*/
bool serialize_enumerations() const;

Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2155,7 +2155,7 @@ capi_return_t tiledb_handle_load_array_schema_request(
static_cast<tiledb::sm::SerializationType>(serialization_type),
request->buffer());

if (load_schema_req.include_enumerations()) {
if (load_schema_req.include_enumerations_latest_schema()) {
array->load_all_enumerations();
}

Expand Down
5 changes: 5 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ const std::string Config::REST_CURL_VERBOSE = "false";
const std::string Config::REST_CURL_TCP_KEEPALIVE = "true";
const std::string Config::REST_CURL_RETRY_ERRORS = "true";
const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN = "false";
const std::string Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS =
"false";
const std::string Config::REST_LOAD_METADATA_ON_ARRAY_OPEN = "true";
const std::string Config::REST_LOAD_NON_EMPTY_DOMAIN_ON_ARRAY_OPEN = "true";
const std::string Config::REST_USE_REFACTORED_ARRAY_OPEN = "true";
Expand Down Expand Up @@ -264,6 +266,9 @@ const std::map<std::string, std::string> default_config_values = {
std::make_pair(
"rest.load_enumerations_on_array_open",
Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN),
std::make_pair(
"rest.load_enumerations_on_array_open_all_schemas",
Config::REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS),
std::make_pair(
"rest.load_metadata_on_array_open",
Config::REST_LOAD_METADATA_ON_ARRAY_OPEN),
Expand Down
11 changes: 10 additions & 1 deletion tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,18 @@ class Config {
/** If we should retry Curl errors in requests to REST. */
static const std::string REST_CURL_RETRY_ERRORS;

/** If the array enumerations should be loaded on array open */
/**
* If the array enumerations should be loaded on array open for the latest
* array schema only.
*/
static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN;

/**
* If the array enumerations should be loaded on array open for all array
* schemas.
*/
static const std::string REST_LOAD_ENUMERATIONS_ON_ARRAY_OPEN_ALL_SCHEMAS;

/** If the array metadata should be loaded on array open */
static const std::string REST_LOAD_METADATA_ON_ARRAY_OPEN;

Expand Down
8 changes: 6 additions & 2 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,8 +928,12 @@ class Config {
* together with the open array <br>
* **Default**: true
* - `rest.load_enumerations_on_array_open` <br>
* If true, enumerations will be loaded and sent to server together with
* the open array.
* If true, enumerations will be loaded for the latest array schema on
* array open.
* **Default**: false
* - `rest.load_enumerations_on_array_open_all_schemas` <br>
* If true, enumerations will be loaded for all array schemas on array
* open.
* **Default**: false
* - `rest.use_refactored_array_open` <br>
* If true, the new REST routes and APIs for opening an array will be used
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/serialization/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ Status array_to_capnp(
array_builder->setQueryType(query_type_str(array->get_query_type()));

if (array->use_refactored_array_open() && array->serialize_enumerations()) {
array->load_all_enumerations(true);
array->load_all_enumerations(array->config().get<bool>(
"rest.load_enumerations_on_array_open_all_schemas", Config::must_find));
}

const auto& array_schema_latest = array->array_schema_latest();
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/serialization/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,7 @@ void load_array_schema_request_to_capnp(
throw_if_not_ok(config_to_capnp(config, &config_builder));
// This boolean is only serialized to support clients using TileDB < 2.26.
// Future options should only be serialized within the Config object above.
builder.setIncludeEnumerations(req.include_enumerations());
builder.setIncludeEnumerations(req.include_enumerations_latest_schema());
}

void serialize_load_array_schema_request(
Expand Down
15 changes: 11 additions & 4 deletions tiledb/sm/serialization/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,23 @@ namespace serialization {
class LoadArraySchemaRequest {
public:
explicit LoadArraySchemaRequest(const Config& config)
: include_enumerations_(config.get<bool>(
: include_enumerations_latest_schema_(config.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find))
, include_enumerations_all_schemas_(config.get<bool>(
"rest.load_enumerations_on_array_open", Config::must_find)) {
}

inline bool include_enumerations() const {
return include_enumerations_;
inline bool include_enumerations_latest_schema() const {
return include_enumerations_latest_schema_;
}

inline bool include_enumerations_all_schemas() const {
return include_enumerations_all_schemas_;
}

private:
bool include_enumerations_;
bool include_enumerations_latest_schema_;
bool include_enumerations_all_schemas_;
};

#ifdef TILEDB_SERIALIZATION
Expand Down
Loading