diff --git a/.gitignore b/.gitignore index d884f9aa5dc..44b5b01fa6c 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,7 @@ cmake-build-* third-party/folly/ .cache *.sublime-* + +.clang-format +.editorconfig +*.vim diff --git a/Makefile b/Makefile index 8a9c884cd50..59cd6db51c0 100644 --- a/Makefile +++ b/Makefile @@ -1886,6 +1886,12 @@ replication_test: cloud/replication_test.o $(TEST_LIBRARY) $(LIBRARY) cloud_file_system_test: cloud/cloud_file_system_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +gcp_file_system_test: cloud/gcp/gcp_file_system_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +gcp_db_cloud_test: cloud/gcp/gcp_db_cloud_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + cloud_manifest_test: cloud/cloud_manifest_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) @@ -2618,4 +2624,4 @@ list_all_tests: ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test checkout_folly, $(MAKECMDGOALS)) ifneq ("$(ROCKS_DEP_RULES)", "") -include $(DEPFILES) -endif +endif \ No newline at end of file diff --git a/TARGETS b/TARGETS index 77a7c16fd7e..d456e95034d 100644 --- a/TARGETS +++ b/TARGETS @@ -375,7 +375,6 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "//folly/synchronization:distributed_mutex", ], headers=None, link_whole=False, extra_test_libs=False) -<<<<<<< HEAD cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "cache/cache.cc", "cache/cache_entry_roles.cc", @@ -4983,6 +4982,15 @@ cpp_unittest_wrapper(name="cloud_file_system_test", deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) +cpp_unittest_wrapper(name="gcp_file_system_test", + srcs=["cloud/gcp/gcp_file_system_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) + +cpp_unittest_wrapper(name="gcp_db_cloud_test", + srcs=["cloud/gcp/gcp_db_cloud_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) cpp_unittest_wrapper(name="cloud_manifest_test", srcs=["cloud/cloud_manifest_test.cc"], diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index b56e298554b..376abda8819 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -641,6 +641,16 @@ if [ "${USE_AWS}XXX" = "1XXX" ]; then COMMON_FLAGS="$COMMON_FLAGS $S3_CCFLAGS" PLATFORM_LDFLAGS="$S3_LDFLAGS $PLATFORM_LDFLAGS" fi + +if [ "${USE_GCP}XXX" = "1XXX" ]; then + GCP_SDK=/usr/local + GCI=${GCP_SDK}/include/ + GCS_CCFLAGS="$GCS_CCFLAGS -I$GCI -DUSE_GCP" + GCS_LDFLAGS="$GCS_LDFLAGS -lgoogle_cloud_cpp_common -lgoogle_cloud_cpp_storage -labsl_bad_variant_access -labsl_bad_optional_access" + COMMON_FLAGS="$COMMON_FLAGS $GCS_CCFLAGS" + PLATFORM_LDFLAGS="$GCS_LDFLAGS $PLATFORM_LDFLAGS" +fi + # # Support the Kafka WAL storing if the env variable named USE_KAFKA # is set to 1. Setting it to any other value or not setting it at all means diff --git a/cloud/cloud_file_system.cc b/cloud/cloud_file_system.cc index 4a660830df1..7004fcaa027 100644 --- a/cloud/cloud_file_system.cc +++ b/cloud/cloud_file_system.cc @@ -10,10 +10,12 @@ #include #include "cloud/aws/aws_file_system.h" +#include "rocksdb/cloud/cloud_file_system_impl.h" #include "cloud/cloud_log_controller_impl.h" #include "cloud/cloud_manifest.h" #include "cloud/db_cloud_impl.h" #include "cloud/filename.h" +#include "cloud/gcp/gcp_file_system.h" #include "env/composite_env_wrapper.h" #include "options/configurable_helper.h" #include "options/options_helper.h" @@ -448,7 +450,30 @@ Status CloudFileSystemEnv::NewAwsFileSystem( return NewAwsFileSystem(base_fs, options, logger, cfs); } +Status CloudFileSystemEnv::NewGcpFileSystem( + const std::shared_ptr& base_fs, + const std::string& src_cloud_bucket, const std::string& src_cloud_object, + const std::string& src_cloud_region, const std::string& dest_cloud_bucket, + const std::string& dest_cloud_object, const std::string& dest_cloud_region, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& logger, CloudFileSystem** cfs) { + CloudFileSystemOptions options = cloud_options; + if (!src_cloud_bucket.empty()) + options.src_bucket.SetBucketName(src_cloud_bucket); + if (!src_cloud_object.empty()) + options.src_bucket.SetObjectPath(src_cloud_object); + if (!src_cloud_region.empty()) options.src_bucket.SetRegion(src_cloud_region); + if (!dest_cloud_bucket.empty()) + options.dest_bucket.SetBucketName(dest_cloud_bucket); + if (!dest_cloud_object.empty()) + options.dest_bucket.SetObjectPath(dest_cloud_object); + if (!dest_cloud_region.empty()) + options.dest_bucket.SetRegion(dest_cloud_region); + return NewGcpFileSystem(base_fs, options, logger, cfs); +} + int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { + (void) arg; // Suppress unused parameter warning int count = 0; // Register the FileSystem types library.AddFactory( @@ -462,6 +487,11 @@ int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { }); count++; +#ifdef USE_GCP + count += CloudFileSystemImpl::RegisterGcpObjects(library, arg); +#endif + +#ifdef USE_AWS count += CloudFileSystemImpl::RegisterAwsObjects(library, arg); // Register the Cloud Log Controllers @@ -477,6 +507,7 @@ int DoRegisterCloudObjects(ObjectLibrary& library, const std::string& arg) { return guard->get(); }); count++; +#endif return count; } @@ -638,6 +669,38 @@ Status CloudFileSystemEnv::NewAwsFileSystem( } #endif +#ifndef USE_GCP +Status CloudFileSystemEnv::NewGcpFileSystem( + const std::shared_ptr& /*base_fs*/, + const CloudFileSystemOptions& /*options*/, + const std::shared_ptr& /*logger*/, CloudFileSystem** /*cfs*/) { + return Status::NotSupported("RocksDB Cloud not compiled with GCP support"); +} +#else +Status CloudFileSystemEnv::NewGcpFileSystem( + const std::shared_ptr& base_fs, + const CloudFileSystemOptions& options, + const std::shared_ptr& logger, CloudFileSystem** cfs) { + CloudFileSystemEnv::RegisterCloudObjects(); + // Dump out cloud fs options + options.Dump(logger.get()); + + Status st = GcpFileSystem::NewGcpFileSystem(base_fs, options, logger, cfs); + if (st.ok()) { + // store a copy to the logger + auto* cloud = static_cast(*cfs); + cloud->info_log_ = logger; + + // start the purge thread only if there is a destination bucket + if (options.dest_bucket.IsValid() && options.run_purger) { + cloud->purge_thread_ = std::thread([cloud] { cloud->Purger(); }); + } + } + + return st; +} +#endif + std::unique_ptr CloudFileSystemEnv::NewCompositeEnv( Env* env, const std::shared_ptr& fs) { return std::make_unique(env, fs); diff --git a/cloud/cloud_file_system_impl.cc b/cloud/cloud_file_system_impl.cc index 18a80669111..4817013ce91 100644 --- a/cloud/cloud_file_system_impl.cc +++ b/cloud/cloud_file_system_impl.cc @@ -2283,7 +2283,7 @@ IOStatus CloudFileSystemImpl::FindAllLiveFiles( // filename will be remapped correctly based on current_epoch of // cloud_manifest *manifest_file = - RemapFilename(ManifestFileWithEpoch("" /* dbname */, "" /* epoch */)); + RemapFilename(ManifestFileWithEpoch("" /* epoch */)); RemapFileNumbers(file_nums, live_sst_files); diff --git a/cloud/cloud_file_system_test.cc b/cloud/cloud_file_system_test.cc index d397b0670dd..cc902049bf5 100644 --- a/cloud/cloud_file_system_test.cc +++ b/cloud/cloud_file_system_test.cc @@ -1,4 +1,7 @@ // Copyright (c) 2017 Rockset +#ifndef ROCKSDB_LITE + +#ifdef USE_AWS #include "rocksdb/cloud/cloud_file_system.h" @@ -11,6 +14,8 @@ #include "test_util/testharness.h" #include "util/string_util.h" +#include + namespace ROCKSDB_NAMESPACE { TEST(CloudFileSystemTest, TestBucket) { @@ -242,5 +247,28 @@ TEST(CloudFileSystemTest, ConfigureKafkaController) { int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); + Aws::InitAPI(Aws::SDKOptions()); return RUN_ALL_TESTS(); } + +#else // USE_AWS + +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_AWS is defined.\n"); + return 0; +} +#endif // USE_AWS + +#else // ROCKSDB_LITE + +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/db_cloud_test.cc b/cloud/db_cloud_test.cc index 0cf5cbfeac9..4fea66af0e1 100644 --- a/cloud/db_cloud_test.cc +++ b/cloud/db_cloud_test.cc @@ -1766,8 +1766,13 @@ TEST_F(CloudTest, CheckpointToCloud) { auto checkpoint_bucket = cloud_fs_options_.dest_bucket; + std::string ckpt_from_object_path = + cloud_fs_options_.dest_bucket.GetObjectPath(); + ckpt_from_object_path += "_from"; cloud_fs_options_.src_bucket = BucketOptions(); + cloud_fs_options_.src_bucket.SetObjectPath(ckpt_from_object_path); cloud_fs_options_.dest_bucket = BucketOptions(); + cloud_fs_options_.dest_bucket.SetObjectPath(ckpt_from_object_path); // Create a DB with two files OpenDB(); @@ -1783,6 +1788,9 @@ TEST_F(CloudTest, CheckpointToCloud) { CloseDB(); DestroyDir(dbname_); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), + cloud_fs_options_.dest_bucket.GetObjectPath()); cloud_fs_options_.src_bucket = checkpoint_bucket; @@ -3251,4 +3259,4 @@ int main(int, char**) { return 0; } -#endif // !ROCKSDB_LITE +#endif // !ROCKSDB_LITE \ No newline at end of file diff --git a/cloud/filename.h b/cloud/filename.h index 887594b8a66..aa68423d883 100644 --- a/cloud/filename.h +++ b/cloud/filename.h @@ -198,6 +198,32 @@ inline bool IsCloudManifestFile(const std::string& pathname) { return false; } +inline std::string ReduceSlashes(const std::string& pathname) +{ + std::string result; + const char slash = '/'; + + bool previous_was_slash = false; + for (char c : pathname) + { + if (c == slash) + { + if (!previous_was_slash) + { + result += c; + previous_was_slash = true; + } + } + else + { + result += c; + previous_was_slash = false; + } + } + + return result; +} + enum class RocksDBFileType { kSstFile, kLogFile, @@ -228,4 +254,4 @@ inline RocksDBFileType GetFileType(const std::string& fname_with_epoch) { return RocksDBFileType::kUnknown; } -} // namespace +} // namespace \ No newline at end of file diff --git a/cloud/gcp/gcp_cs.cc b/cloud/gcp/gcp_cs.cc new file mode 100644 index 00000000000..72f67ebd71d --- /dev/null +++ b/cloud/gcp/gcp_cs.cc @@ -0,0 +1,726 @@ +#ifndef ROCKSDB_LITE +#ifdef USE_GCP +#include "google/cloud/storage/bucket_metadata.h" +#include "google/cloud/storage/client.h" + +namespace gcs = ::google::cloud::storage; +namespace gcp = ::google::cloud; +#endif + +#include "rocksdb/cloud/cloud_storage_provider_impl.h" +#include "cloud/filename.h" +#include "cloud/gcp/gcp_file_system.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include "rocksdb/convenience.h" +#include "port/port_posix.h" +#include +#include + +#ifdef _WIN32_WINNT +#undef GetMessage +#endif + +namespace ROCKSDB_NAMESPACE { +#ifdef USE_GCP + +static bool IsNotFound(gcp::Status const& status) { + return (status.code() == gcp::StatusCode::kNotFound); +} + +// AWS handle successive slashes in a path as a single slash, but GCS does not. +// So, we make it consistent by reducing multiple slashes to a single slash. +inline std::string normalzie_object_path(std::string const& object_path) { + std::string path = ReduceSlashes(object_path); + return ltrim_if(path, '/'); +} + +class CloudRequestCallbackGuard { + public: + CloudRequestCallbackGuard(CloudRequestCallback* callback, + CloudRequestOpType type, uint64_t size = 0) + : callback_(callback), type_(type), size_(size), start_(now()) {} + + ~CloudRequestCallbackGuard() { + if (callback_) { + (*callback_)(type_, size_, now() - start_, success_); + } + } + + void SetSize(uint64_t size) { size_ = size; } + void SetSuccess(bool success) { success_ = success; } + + private: + uint64_t now() { + return std::chrono::duration_cast( + std::chrono::system_clock::now() - + std::chrono::system_clock::from_time_t(0)) + .count(); + } + CloudRequestCallback* callback_; + CloudRequestOpType type_; + uint64_t size_; + bool success_{false}; + uint64_t start_; +}; + +/******************** GCSClientWrapper ******************/ + +class GCSClientWrapper { + public: + explicit GCSClientWrapper(CloudFileSystemOptions const& cloud_options, + gcp::Options gcp_options) + : cloud_request_callback_(cloud_options.cloud_request_callback) { + if (cloud_options.gcs_client_factory) { + client_ = cloud_options.gcs_client_factory(gcp_options); + } else { + client_ = std::make_shared(gcp_options); + } + } + + gcp::StatusOr CreateBucket( + std::string bucket_name, gcs::BucketMetadata metadata) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kCreateOp); + gcp::StatusOr bucket_metadata = + client_->CreateBucket(bucket_name, metadata); + t.SetSuccess(bucket_metadata.ok()); + return bucket_metadata; + } + + gcp::StatusOr ListCloudObjects( + std::string bucket_name, std::string prefix, int /*maxium*/) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kListOp); + gcp::StatusOr objects = client_->ListObjects( + bucket_name, gcs::Prefix(prefix) /*, gcs::MaxResults(maxium)*/); + t.SetSuccess(objects.ok()); + return objects; + } + + gcp::StatusOr HeadBucket(std::string bucket_name) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kInfoOp); + gcp::StatusOr bucket_metadata = + client_->GetBucketMetadata(bucket_name); + t.SetSuccess(bucket_metadata.ok()); + return bucket_metadata; + } + + gcp::Status DeleteCloudObject(std::string bucket_name, + std::string object_path) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kDeleteOp); + gcp::Status del = client_->DeleteObject(bucket_name, object_path); + t.SetSuccess(del.ok()); + return del; + } + + gcp::StatusOr CopyCloudObject( + std::string src_bucketname, std::string src_objectpath, + std::string dst_bucketname, std::string dst_objectpath) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kCopyOp); + gcp::StatusOr object_metadata = client_->CopyObject( + src_bucketname, src_objectpath, dst_bucketname, dst_objectpath); + t.SetSuccess(object_metadata.ok()); + return object_metadata; + } + + gcp::Status GetCloudObject(std::string bucket, std::string object, + int64_t start, size_t n, char* buf, + uint64_t* bytes_read) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kReadOp); + // create a range read request + // Ranges are inclusive, so we can't read 0 bytes; read 1 instead and + // drop it later + size_t rangeLen = (n != 0 ? n : 1); + uint64_t end = start + rangeLen; + *bytes_read = 0; + + gcs::ObjectReadStream obj = + client_->ReadObject(bucket, object, gcs::ReadRange(start, end)); + if (obj.bad()) { + return obj.status(); + } + + if (n != 0) { + obj.read(buf, n); + *bytes_read = obj.gcount(); + assert(*bytes_read <= n); + } + + t.SetSize(*bytes_read); + t.SetSuccess(true); + + return obj.status(); + } + + gcp::Status DownloadFile(std::string bucket_name, std::string object_path, + std::string dst_file, uint64_t* file_size) { + CloudRequestCallbackGuard guard(cloud_request_callback_.get(), + CloudRequestOpType::kReadOp); + + gcs::ObjectReadStream os = client_->ReadObject(bucket_name, object_path); + if (os.bad()) { + guard.SetSize(0); + guard.SetSuccess(false); + return os.status(); + } + + std::ofstream ofs(dst_file, std::ofstream::binary); + // if ofs is not open, return error with dst_file name in message + if (!ofs.is_open()) { + guard.SetSize(0); + guard.SetSuccess(false); + std::string errmsg("Unable to open dest file "); + errmsg.append(dst_file); + return gcp::Status(gcp::StatusCode::kInternal, errmsg); + } + + // Read stream for os and write to dst_file, then set the file size for + // guard + ofs << os.rdbuf(); + ofs.close(); + *file_size = os.size().value(); + guard.SetSize(*file_size); + guard.SetSuccess(true); + return gcp::Status(gcp::StatusCode::kOk, "OK"); + } + + // update object metadata + gcp::StatusOr PutCloudObject( + std::string bucket_name, std::string object_path, + std::unordered_map metadata, + uint64_t size_hint = 0) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kWriteOp, size_hint); + gcp::StatusOr object_meta = + client_->InsertObject(bucket_name, object_path, ""); + if (!object_meta.ok()) { + t.SetSuccess(false); + return object_meta; + } + gcs::ObjectMetadata new_object_meta = object_meta.value(); + for (auto kv : metadata) { + new_object_meta.mutable_metadata().emplace(kv.first, kv.second); + } + auto update_meta = + client_->UpdateObject(bucket_name, object_path, new_object_meta); + return update_meta; + } + + gcp::StatusOr UploadFile(std::string bucket_name, + std::string object_path, + std::string loc_file) { + CloudRequestCallbackGuard guard(cloud_request_callback_.get(), + CloudRequestOpType::kWriteOp); + + gcp::StatusOr object_meta = + client_->UploadFile(loc_file, bucket_name, object_path); + + if (!object_meta.ok()) { + guard.SetSize(0); + guard.SetSuccess(false); + return object_meta; + } + + guard.SetSize(object_meta.value().size()); + guard.SetSuccess(true); + + return object_meta; + } + + gcp::StatusOr HeadObject(std::string bucket_name, + std::string object_path) { + CloudRequestCallbackGuard t(cloud_request_callback_.get(), + CloudRequestOpType::kInfoOp); + gcp::StatusOr object_metadata = + client_->GetObjectMetadata(bucket_name, object_path); + t.SetSuccess(object_metadata.ok()); + return object_metadata; + } + + CloudRequestCallback* GetRequestCallback() { + return cloud_request_callback_.get(); + } + + private: + std::shared_ptr client_; + std::shared_ptr cloud_request_callback_; +}; + +/******************** GcsReadableFile ******************/ +class GcsReadableFile : public CloudStorageReadableFileImpl { + public: + GcsReadableFile(std::shared_ptr const& gcs_client, + Logger* info_log, std::string const& bucket, + std::string const& fname, uint64_t size, + std::string content_hash) + : CloudStorageReadableFileImpl(info_log, bucket, fname, size), + gcs_client_(gcs_client), + content_hash_(std::move(content_hash)) {} + + virtual char const* Type() const { return "gcs"; } + + size_t GetUniqueId(char* id, size_t max_size) const override { + if (content_hash_.empty()) { + return 0; + } + + max_size = std::min(content_hash_.size(), max_size); + memcpy(id, content_hash_.c_str(), max_size); + return max_size; + } + + // random access, read data from specified offset in file + IOStatus DoCloudRead(uint64_t offset, size_t n, IOOptions const& /*options*/, + char* scratch, uint64_t* bytes_read, + IODebugContext* /*dbg*/) const override { + // read the range + auto status = gcs_client_->GetCloudObject(bucket_, fname_, offset, n, + scratch, bytes_read); + if (!status.ok()) { + if (IsNotFound(status)) { + Log(InfoLogLevel::ERROR_LEVEL, info_log_, + "[gcs] GcsReadableFile ReadObject Not Found %s \n", fname_.c_str()); + return IOStatus::NotFound(); + } else { + Log(InfoLogLevel::ERROR_LEVEL, info_log_, + "[gcs] GcsReadableFile ReadObject error %s offset %" PRIu64 + " rangelen %" ROCKSDB_PRIszt ", message: %s\n", + fname_.c_str(), offset, n, status.message().c_str()); + return IOStatus::IOError(fname_.c_str(), status.message().c_str()); + } + } + + return IOStatus::OK(); + } + + private: + std::shared_ptr const& gcs_client_; + std::string content_hash_; +}; // End class GcsReadableFile + +/******************** Writablefile ******************/ + +class GcsWritableFile : public CloudStorageWritableFileImpl { + public: + GcsWritableFile(CloudFileSystem* fs, std::string const& local_fname, + std::string const& bucket, std::string const& cloud_fname, + FileOptions const& options) + : CloudStorageWritableFileImpl(fs, local_fname, bucket, cloud_fname, + options) {} + virtual char const* Name() const override { + return CloudStorageProviderImpl::kGcs(); + } +}; // End class GcsWritableFile + +/******************** GcsStorageProvider ******************/ +class GcsStorageProvider : public CloudStorageProviderImpl { + public: + ~GcsStorageProvider() override {} + virtual char const* Name() const override { return kGcs(); } + IOStatus CreateBucket(std::string const& bucket) override; + IOStatus ExistsBucket(std::string const& bucket) override; + IOStatus EmptyBucket(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus DeleteCloudObject(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus ListCloudObjects(std::string const& bucket_name, + std::string const& object_path, + std::vector* result) override; + IOStatus ExistsCloudObject(std::string const& bucket_name, + std::string const& object_path) override; + IOStatus GetCloudObjectSize(std::string const& bucket_name, + std::string const& object_path, + uint64_t* filesize) override; + IOStatus GetCloudObjectModificationTime(std::string const& bucket_name, + std::string const& object_path, + uint64_t* time) override; + IOStatus GetCloudObjectMetadata(std::string const& bucket_name, + std::string const& object_path, + CloudObjectInformation* info) override; + IOStatus PutCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + std::unordered_map const& metadata) override; + IOStatus CopyCloudObject(std::string const& bucket_name_src, + std::string const& object_path_src, + std::string const& bucket_name_dest, + std::string const& object_path_dest) override; + IOStatus DoNewCloudReadableFile( + std::string const& bucket, std::string const& fname, uint64_t fsize, + std::string const& content_hash, FileOptions const& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + IOStatus NewCloudWritableFile( + std::string const& local_path, std::string const& bucket_name, + std::string const& object_path, FileOptions const& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + Status PrepareOptions(ConfigOptions const& options) override; + + protected: + IOStatus DoGetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& destination, + uint64_t* remote_size) override; + IOStatus DoPutCloudObject(std::string const& local_file, + std::string const& bucket_name, + std::string const& object_path, + uint64_t file_size) override; + + private: + struct HeadObjectResult { + // If any of the field is non-nullptr, returns requested data + std::unordered_map* metadata = nullptr; + uint64_t* size = nullptr; + uint64_t* modtime = nullptr; + std::string* etag = nullptr; + }; + + // Retrieves metadata from an object + IOStatus HeadObject(std::string const& bucket, std::string const& path, + HeadObjectResult* result); + + // The Gcs client + std::shared_ptr gcs_client_; +}; // End class GcsStorageProvider + +/******************** GcsFileSystem ******************/ +IOStatus GcsStorageProvider::CreateBucket(std::string const& bucket) { + std::string bucket_location = + cfs_->GetCloudFileSystemOptions().dest_bucket.GetRegion(); + // storage_class: https://cloud.google.com/storage/docs/storage-classes + // default storage_class = STANDARD + std::string sc("STANDARD"); + auto bucket_metadata = gcs_client_->CreateBucket( + bucket, gcs::BucketMetadata().set_storage_class(sc).set_location( + bucket_location)); + if (!bucket_metadata.ok()) { + // Bucket already exists is not an error + if (gcp::StatusCode::kAlreadyExists != bucket_metadata.status().code()) { + std::string errmsg(bucket_metadata.status().message()); + return IOStatus::IOError(bucket.c_str(), errmsg.c_str()); + } + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ExistsBucket(std::string const& bucket) { + gcp::StatusOr bucketmetadata = + gcs_client_->HeadBucket(bucket); + if (IsNotFound(bucketmetadata.status())) { + return IOStatus::NotFound(); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::EmptyBucket(std::string const& bucket_name, + std::string const& object_path) { + std::vector results; + auto st = ListCloudObjects(bucket_name, object_path, &results); + if (!st.ok()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket unable to find objects in bucket %s %s", + bucket_name.c_str(), st.ToString().c_str()); + return st; + } + Log(InfoLogLevel::DEBUG_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket going to delete %" ROCKSDB_PRIszt + " objects in bucket %s", + results.size(), bucket_name.c_str()); + + // Delete all objects from bucket + for (auto const& path : results) { + st = DeleteCloudObject(bucket_name, object_path + "/" + path); + if (!st.ok()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] EmptyBucket Unable to delete %s in bucket %s %s", path.c_str(), + bucket_name.c_str(), st.ToString().c_str()); + return st; + } + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::DeleteCloudObject(std::string const& bucket_name, + std::string const& object_path) { + auto normalized_path = normalzie_object_path(object_path); + auto st = gcs_client_->DeleteCloudObject(bucket_name, normalized_path); + if (!st.ok()) { + if (IsNotFound(st)) { + return IOStatus::NotFound(object_path, st.message().c_str()); + } else { + return IOStatus::IOError(object_path, st.message().c_str()); + } + } + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[Gcs] DeleteFromGcs %s/%s, status %s", bucket_name.c_str(), + object_path.c_str(), st.message().c_str()); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ListCloudObjects( + std::string const& bucket_name, std::string const& object_path, + std::vector* result) { + // follow with aws_s3 + auto prefix = normalzie_object_path(object_path); + prefix = ensure_ends_with_pathsep(prefix); + // MaxResults is about page limits + // https://stackoverflow.com/questions/77069696/how-to-limit-number-of-objects-returned-from-listobjects + auto objects = gcs_client_->ListCloudObjects( + bucket_name, prefix, + cfs_->GetCloudFileSystemOptions().number_objects_listed_in_one_iteration); + if (!objects.ok()) { + std::string errmsg(objects.status().message()); + if (IsNotFound(objects.status())) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] GetChildren dir %s does not exist: %s", object_path.c_str(), + errmsg.c_str()); + return IOStatus::NotFound(object_path, errmsg.c_str()); + } + return IOStatus::IOError(object_path, errmsg.c_str()); + } + for (auto const& obj : objects.value()) { + // Our path should be a prefix of the fetched value + std::string name = obj.value().name(); + if (name.find(prefix) != 0) { // npos or other value + return IOStatus::IOError("Unexpected result from Gcs: " + name); + } + auto fname = name.substr(prefix.size()); + result->push_back(std::move(fname)); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::ExistsCloudObject(std::string const& bucket_name, + std::string const& object_path) { + HeadObjectResult result; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectSize(std::string const& bucket_name, + std::string const& object_path, + uint64_t* filesize) { + HeadObjectResult result; + result.size = filesize; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectModificationTime( + std::string const& bucket_name, std::string const& object_path, + uint64_t* time) { + HeadObjectResult result; + result.modtime = time; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::GetCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + CloudObjectInformation* info) { + assert(info != nullptr); + HeadObjectResult result; + result.metadata = &info->metadata; + result.size = &info->size; + result.modtime = &info->modification_time; + result.etag = &info->content_hash; + return HeadObject(bucket_name, object_path, &result); +} + +IOStatus GcsStorageProvider::PutCloudObjectMetadata( + std::string const& bucket_name, std::string const& object_path, + std::unordered_map const& metadata) { + auto normalized_path = normalzie_object_path(object_path); + auto outcome = + gcs_client_->PutCloudObject(bucket_name, normalized_path, metadata); + if (!outcome.ok()) { + auto const& error = outcome.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] Bucket %s error in saving metadata %s", bucket_name.c_str(), + errmsg.c_str()); + return IOStatus::IOError(object_path, errmsg.c_str()); + } + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::CopyCloudObject( + std::string const& bucket_name_src, std::string const& object_path_src, + std::string const& bucket_name_dest, std::string const& object_path_dest) { + std::string src_url = bucket_name_src + object_path_src; + auto normalized_src_path = normalzie_object_path(object_path_src); + auto normalized_dest_path = normalzie_object_path(object_path_dest); + auto copy = + gcs_client_->CopyCloudObject(bucket_name_src, normalized_src_path, + bucket_name_dest, normalized_dest_path); + if (!copy.ok()) { + auto const& error = copy.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[Gcs] GcsWritableFile src path %s error in copying to %s %s", + src_url.c_str(), object_path_dest.c_str(), errmsg.c_str()); + return IOStatus::IOError(object_path_dest.c_str(), errmsg.c_str()); + } + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[Gcs] GcsWritableFile src path %s copied to %s OK", src_url.c_str(), + object_path_dest.c_str()); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::DoNewCloudReadableFile( + std::string const& bucket, std::string const& fname, uint64_t fsize, + std::string const& content_hash, FileOptions const& /*options*/, + std::unique_ptr* result, + IODebugContext* /*dbg*/) { + auto normalized_path = normalzie_object_path(fname); + result->reset(new GcsReadableFile(gcs_client_, cfs_->GetLogger(), bucket, + normalized_path, fsize, content_hash)); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::NewCloudWritableFile( + std::string const& local_path, std::string const& bucket_name, + std::string const& object_path, FileOptions const& file_opts, + std::unique_ptr* result, + IODebugContext* /*dbg*/) { + auto normalized_path = normalzie_object_path(object_path); + result->reset(new GcsWritableFile(cfs_, local_path, bucket_name, + normalized_path, file_opts)); + return (*result)->status(); +} + +Status GcsStorageProvider::PrepareOptions(ConfigOptions const& options) { + auto cfs = dynamic_cast(options.env->GetFileSystem().get()); + assert(cfs); + auto const& cloud_opts = cfs->GetCloudFileSystemOptions(); + if (std::string(cfs->Name()) != CloudFileSystemImpl::kGcp()) { + return Status::InvalidArgument("gcs Provider requires gcp Environment"); + } + // TODO: support buckets being in different regions + if (!cfs->SrcMatchesDest() && cfs->HasSrcBucket() && cfs->HasDestBucket()) { + if (cloud_opts.src_bucket.GetRegion() != + cloud_opts.dest_bucket.GetRegion()) { + Log(InfoLogLevel::ERROR_LEVEL, cfs->GetLogger(), + "[gcp] NewGcpFileSystem Buckets %s, %s in two different regions %s, " + "%s is not supported", + cloud_opts.src_bucket.GetBucketName().c_str(), + cloud_opts.dest_bucket.GetBucketName().c_str(), + cloud_opts.src_bucket.GetRegion().c_str(), + cloud_opts.dest_bucket.GetRegion().c_str()); + return Status::InvalidArgument("Two different regions not supported"); + } + } + // initialize the Gcs client + gcp::Options gcp_options; + Status status = GcpCloudOptions::GetClientConfiguration( + cfs, cloud_opts.src_bucket.GetRegion(), gcp_options); + if (status.ok()) { + gcs_client_ = std::make_shared(cloud_opts, gcp_options); + return CloudStorageProviderImpl::PrepareOptions(options); + } + return status; +} + +IOStatus GcsStorageProvider::DoGetCloudObject(std::string const& bucket_name, + std::string const& object_path, + std::string const& destination, + uint64_t* remote_size) { + auto normalized_path = normalzie_object_path(object_path); + auto get = gcs_client_->DownloadFile(bucket_name, normalized_path, + destination, remote_size); + if (!get.ok()) { + std::string errmsg; + errmsg = get.message(); + if (IsNotFound(get)) { + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[gcs] GetObject %s/%s error %s.", bucket_name.c_str(), + object_path.c_str(), errmsg.c_str()); + return IOStatus::NotFound(std::move(errmsg)); + } else { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] GetObject %s/%s error %s.", bucket_name.c_str(), + object_path.c_str(), errmsg.c_str()); + return IOStatus::IOError(std::move(errmsg)); + } + } + return IOStatus::OK(); +} + +// Uploads local_file to GCS bucket_name/object_path +IOStatus GcsStorageProvider::DoPutCloudObject(std::string const& local_file, + std::string const& bucket_name, + std::string const& object_path, + uint64_t file_size) { + auto normalized_path = normalzie_object_path(object_path); + auto put = gcs_client_->UploadFile(bucket_name, normalized_path, local_file); + if (!put.ok()) { + auto const& error = put.status().message(); + std::string errmsg(error.c_str(), error.size()); + Log(InfoLogLevel::ERROR_LEVEL, cfs_->GetLogger(), + "[s3] PutCloudObject %s/%s, size %" PRIu64 ", ERROR %s", + bucket_name.c_str(), object_path.c_str(), file_size, errmsg.c_str()); + return IOStatus::IOError(local_file, errmsg); + } + + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] PutCloudObject %s/%s, size %" PRIu64 ", OK", bucket_name.c_str(), + object_path.c_str(), file_size); + return IOStatus::OK(); +} + +IOStatus GcsStorageProvider::HeadObject(std::string const& bucket, + std::string const& path, + HeadObjectResult* result) { + assert(result != nullptr); + auto object_path = normalzie_object_path(path); + auto head = gcs_client_->HeadObject(bucket, object_path); + if (!head.ok()) { + auto const& errMessage = head.status().message(); + Slice object_path_slice(object_path.data(), object_path.size()); + if (IsNotFound(head.status())) { + return IOStatus::NotFound(object_path_slice, errMessage.c_str()); + } else { + return IOStatus::IOError(object_path_slice, errMessage.c_str()); + } + } + + auto const& head_val = head.value(); + if (result->metadata != nullptr) { + // std::map metadata_ + for (auto const& m : head_val.metadata()) { + (*(result->metadata))[m.first.c_str()] = m.second.c_str(); + } + } + if (result->size != nullptr) { + *(result->size) = head_val.size(); + } + if ((result->modtime) != nullptr) { + int64_t modtime = std::chrono::duration_cast( + head_val.updated().time_since_epoch()) + .count(); + *(result->modtime) = modtime; + } + if ((result->etag) != nullptr) { + *(result->etag) = + std::string(head_val.etag().data(), head_val.etag().length()); + } + return IOStatus::OK(); +} + +#endif // USE_GCP + +Status CloudStorageProviderImpl::CreateGcsProvider( + std::unique_ptr* provider) { +#ifndef USE_GCP + provider->reset(); + return Status::NotSupported( + "In order to use Google Cloud Storage, make sure you're compiling with " + "USE_GCS=1"); +#else + provider->reset(new GcsStorageProvider()); + return Status::OK(); +#endif +} +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff --git a/cloud/gcp/gcp_db_cloud_test.cc b/cloud/gcp/gcp_db_cloud_test.cc new file mode 100644 index 00000000000..eabc493ef07 --- /dev/null +++ b/cloud/gcp/gcp_db_cloud_test.cc @@ -0,0 +1,3149 @@ +// Copyright (c) 2017 Rockset + +#ifndef ROCKSDB_LITE + +#ifdef USE_GCP + +#include "rocksdb/cloud/db_cloud.h" + +#include +#include +#include +#include +#include + +#include "rocksdb/cloud/cloud_file_deletion_scheduler.h" +#include "rocksdb/cloud/cloud_file_system_impl.h" +#include "cloud/cloud_scheduler.h" +#include "cloud/cloud_manifest.h" +#include "rocksdb/cloud/cloud_storage_provider_impl.h" +#include "cloud/db_cloud_impl.h" +#include "cloud/filename.h" +#include "cloud/manifest_reader.h" +#include "db/db_impl/db_impl.h" +#include "db/db_test_util.h" +#include "file/filename.h" +#include "logging/logging.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include "rocksdb/options.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/random.h" +#include "util/string_util.h" +#ifndef OS_WIN +#include +#endif + +namespace ROCKSDB_NAMESPACE { + +namespace { +const FileOptions kFileOptions; +const IOOptions kIOOptions; +IODebugContext* const kDbg = nullptr; +} // namespace + +class CloudTest : public testing::Test { + public: + CloudTest() { + Random64 rng(time(nullptr)); + test_id_ = std::to_string(rng.Next()); + fprintf(stderr, "Test ID: %s\n", test_id_.c_str()); + + base_env_ = Env::Default(); + dbname_ = test::TmpDir() + "/db_cloud-" + test_id_; + clone_dir_ = test::TmpDir() + "/ctest-" + test_id_; + cloud_fs_options_.TEST_Initialize("dbcloudtest.", dbname_); + // To catch any possible file deletion bugs, cloud files are deleted + // right away + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(0); + + options_.create_if_missing = true; + options_.stats_dump_period_sec = 0; + options_.stats_persist_period_sec = 0; + persistent_cache_path_ = ""; + persistent_cache_size_gb_ = 0; + db_ = nullptr; + + DestroyDir(dbname_); + base_env_->CreateDirIfMissing(dbname_); + base_env_->NewLogger(test::TmpDir(base_env_) + "/rocksdb-cloud.log", + &options_.info_log); + options_.info_log->SetInfoLogLevel(InfoLogLevel::DEBUG_LEVEL); + + Cleanup(); + } + + void Cleanup() { + ASSERT_TRUE(!aenv_); + + CloudFileSystem* afs; + // create a dummy Gcp env + ASSERT_OK(CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &afs)); + ASSERT_NE(afs, nullptr); + // delete all pre-existing contents from the bucket + auto st = afs->GetStorageProvider()->EmptyBucket(afs->GetSrcBucketName(), + dbname_); + delete afs; + ASSERT_TRUE(st.ok() || st.IsNotFound()); + + DestroyDir(clone_dir_); + ASSERT_OK(base_env_->CreateDir(clone_dir_)); + } + + std::set GetSSTFiles(std::string name) { + std::vector files; + GetCloudFileSystem()->GetBaseFileSystem()->GetChildren(name, kIOOptions, + &files, kDbg); + std::set sst_files; + for (auto& f : files) { + if (IsSstFile(RemoveEpoch(f))) { + sst_files.insert(f); + } + } + return sst_files; + } + + // Return total size of all sst files available locally + void GetSSTFilesTotalSize(std::string name, uint64_t* total_size) { + std::vector files; + GetCloudFileSystem()->GetBaseFileSystem()->GetChildren(name, kIOOptions, + &files, kDbg); + std::set sst_files; + uint64_t local_size = 0; + for (auto& f : files) { + if (IsSstFile(RemoveEpoch(f))) { + sst_files.insert(f); + std::string lpath = dbname_ + "/" + f; + ASSERT_OK(GetCloudFileSystem()->GetBaseFileSystem()->GetFileSize( + lpath, kIOOptions, &local_size, kDbg)); + (*total_size) += local_size; + } + } + } + + std::set GetSSTFilesClone(std::string name) { + std::string cname = clone_dir_ + "/" + name; + return GetSSTFiles(cname); + } + + void DestroyDir(const std::string& dir) { + std::string cmd = "rm -rf " + dir; + int rc = system(cmd.c_str()); + ASSERT_EQ(rc, 0); + } + + virtual ~CloudTest() { + // Cleanup the cloud bucket + if (!cloud_fs_options_.src_bucket.GetBucketName().empty()) { + CloudFileSystem* afs; + Status st = CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &afs); + if (st.ok()) { + afs->GetStorageProvider()->EmptyBucket(afs->GetSrcBucketName(), + dbname_); + delete afs; + } + } + + CloseDB(); + } + + void CreateCloudEnv() { + CloudFileSystem* cfs; + ASSERT_OK(CloudFileSystemEnv::NewGcpFileSystem(base_env_->GetFileSystem(), + cloud_fs_options_, + options_.info_log, &cfs)); + std::shared_ptr fs(cfs); + aenv_ = CloudFileSystemEnv::NewCompositeEnv(base_env_, std::move(fs)); + } + + // Open database via the cloud interface + void OpenDB() { + std::vector handles; + OpenDB(&handles); + // Delete the handle for the default column family because the DBImpl + // always holds a reference to it. + ASSERT_TRUE(handles.size() > 0); + delete handles[0]; + } + + // Open database via the cloud interface + void OpenDB(std::vector* handles) { + // default column family + OpenWithColumnFamilies({kDefaultColumnFamilyName}, handles); + } + + void OpenWithColumnFamilies(const std::vector& cfs, + std::vector* handles) { + // Create new Gcp env + CreateCloudEnv(); + options_.env = aenv_.get(); + // Sleep for a second because S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + ASSERT_TRUE(db_ == nullptr); + std::vector column_families; + for (size_t i = 0; i < cfs.size(); ++i) { + column_families.emplace_back(cfs[i], options_); + } + ASSERT_OK(DBCloud::Open(options_, dbname_, column_families, + persistent_cache_path_, persistent_cache_size_gb_, + handles, &db_)); + ASSERT_OK(db_->GetDbIdentity(dbid_)); + } + + // Try to open and return status + Status checkOpen() { + // Create new Gcp env + CreateCloudEnv(); + options_.env = aenv_.get(); + // Sleep for a second because S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + return DBCloud::Open(options_, dbname_, persistent_cache_path_, + persistent_cache_size_gb_, &db_); + } + + void CreateColumnFamilies(const std::vector& cfs, + std::vector* handles) { + ASSERT_NE(db_, nullptr); + size_t cfi = handles->size(); + handles->resize(cfi + cfs.size()); + for (auto cf : cfs) { + ASSERT_OK(db_->CreateColumnFamily(options_, cf, &handles->at(cfi++))); + } + } + + // Creates and Opens a clone + Status CloneDB(const std::string& clone_name, + const std::string& dest_bucket_name, + const std::string& dest_object_path, + std::unique_ptr* cloud_db, std::unique_ptr* env, + bool force_keep_local_on_invalid_dest_bucket = true) { + // The local directory where the clone resides + std::string cname = clone_dir_ + "/" + clone_name; + + CloudFileSystem* cfs; + DBCloud* clone_db; + + // If there is no destination bucket, then the clone needs to copy + // all sst fies from source bucket to local dir + auto copt = cloud_fs_options_; + if (dest_bucket_name == copt.src_bucket.GetBucketName()) { + copt.dest_bucket = copt.src_bucket; + } else { + copt.dest_bucket.SetBucketName(dest_bucket_name); + } + copt.dest_bucket.SetObjectPath(dest_object_path); + if (!copt.dest_bucket.IsValid() && + force_keep_local_on_invalid_dest_bucket) { + copt.keep_local_sst_files = true; + } + // Create new Gcp env + Status st = CloudFileSystemEnv::NewGcpFileSystem( + base_env_->GetFileSystem(), copt, options_.info_log, &cfs); + if (!st.ok()) { + return st; + } + + // sets the env to be used by the env wrapper, and returns that env + env->reset( + new CompositeEnvWrapper(base_env_, std::shared_ptr(cfs))); + options_.env = env->get(); + + // default column family + ColumnFamilyOptions cfopt = options_; + + std::vector column_families; + column_families.emplace_back( + ColumnFamilyDescriptor(kDefaultColumnFamilyName, cfopt)); + std::vector handles; + + st = DBCloud::Open(options_, cname, column_families, persistent_cache_path_, + persistent_cache_size_gb_, &handles, &clone_db); + if (!st.ok()) { + return st; + } + + cloud_db->reset(clone_db); + + // Delete the handle for the default column family because the DBImpl + // always holds a reference to it. + assert(handles.size() > 0); + delete handles[0]; + + return st; + } + + void CloseDB(std::vector* handles) { + for (auto h : *handles) { + delete h; + } + handles->clear(); + CloseDB(); + } + + void CloseDB() { + if (db_) { + db_->Flush(FlushOptions()); // convert pending writes to sst files + delete db_; + db_ = nullptr; + } + } + + void SetPersistentCache(const std::string& path, uint64_t size_gb) { + persistent_cache_path_ = path; + persistent_cache_size_gb_ = size_gb; + } + + Status GetCloudLiveFilesSrc(std::set* list) { + auto* cfs = GetCloudFileSystem(); + std::unique_ptr manifest( + new ManifestReader(options_.info_log, cfs, cfs->GetSrcBucketName())); + return manifest->GetLiveFiles(cfs->GetSrcObjectPath(), list); + } + + // Verify that local files are the same as cloud files in src bucket path + void ValidateCloudLiveFilesSrcSize() { + // Loop though all the files in the cloud manifest + std::set cloud_files; + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + for (uint64_t num : cloud_files) { + std::string pathname = MakeTableFileName(dbname_, num); + Log(options_.info_log, "cloud file list %s\n", pathname.c_str()); + } + + std::set localFiles = GetSSTFiles(dbname_); + uint64_t cloudSize = 0; + uint64_t localSize = 0; + + // loop through all the local files and validate + for (std::string path : localFiles) { + std::string cpath = GetCloudFileSystem()->GetSrcObjectPath() + "/" + path; + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->GetCloudObjectSize( + GetCloudFileSystem()->GetSrcBucketName(), cpath, &cloudSize)); + + // find the size of the file on local storage + std::string lpath = dbname_ + "/" + path; + ASSERT_OK(GetCloudFileSystem()->GetBaseFileSystem()->GetFileSize( + lpath, kIOOptions, &localSize, kDbg)); + ASSERT_TRUE(localSize == cloudSize); + Log(options_.info_log, "local file %s size %" PRIu64 "\n", lpath.c_str(), + localSize); + Log(options_.info_log, "cloud file %s size %" PRIu64 "\n", cpath.c_str(), + cloudSize); + printf("local file %s size %" PRIu64 "\n", lpath.c_str(), localSize); + printf("cloud file %s size %" PRIu64 "\n", cpath.c_str(), cloudSize); + } + } + + CloudFileSystem* GetCloudFileSystem() const { + EXPECT_TRUE(aenv_); + return static_cast(aenv_->GetFileSystem().get()); + } + CloudFileSystemImpl* GetCloudFileSystemImpl() const { + EXPECT_TRUE(aenv_); + return static_cast(aenv_->GetFileSystem().get()); + } + + DBImpl* GetDBImpl() const { + return static_cast(db_->GetBaseDB()); + } + + Status SwitchToNewCookie(std::string new_cookie) { + CloudManifestDelta delta{ + db_->GetNextFileNumber(), + new_cookie + }; + return ApplyCMDeltaToCloudDB(delta); + } + + Status ApplyCMDeltaToCloudDB(const CloudManifestDelta& delta) { + auto st = GetCloudFileSystem()->RollNewCookie(dbname_, delta.epoch, delta); + if (!st.ok()) { + return st; + } + bool applied = false; + st = GetCloudFileSystem()->ApplyCloudManifestDelta(delta, &applied); + assert(applied); + if (!st.ok()) { + return st; + } + db_->NewManifestOnNextUpdate(); + return st; + } + + protected: + void WaitUntilNoScheduledJobs() { + while (true) { + auto num = GetCloudFileSystemImpl()->TEST_NumScheduledJobs(); + if (num > 0) { + usleep(100); + } else { + return; + } + } + } + + std::vector GetAllLocalFiles() { + std::vector local_files; + assert(base_env_->GetChildrenFileAttributes(dbname_, &local_files).ok()); + return local_files; + } + + // Generate a few obsolete sst files on an empty db + static void GenerateObsoleteFilesOnEmptyDB( + DBImpl* db, CloudFileSystem* cfs, + std::vector* obsolete_files) { + ASSERT_OK(db->Put({}, "k1", "v1")); + ASSERT_OK(db->Flush({})); + + ASSERT_OK(db->Put({}, "k1", "v2")); + ASSERT_OK(db->Flush({})); + + std::vector sst_files; + db->GetLiveFilesMetaData(&sst_files); + ASSERT_EQ(sst_files.size(), 2); + for (auto& f: sst_files) { + obsolete_files->push_back(cfs->RemapFilename(f.relative_filename)); + } + + // trigger compaction, so previous 2 sst files will be obsolete + ASSERT_OK( + db->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + sst_files.clear(); + db->GetLiveFilesMetaData(&sst_files); + ASSERT_EQ(sst_files.size(), 1); + } + + // check that fname exists in in src bucket/object path + rocksdb::Status ExistsCloudObject(const std::string& filename) const { + return GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + filename); + } + + std::string test_id_; + Env* base_env_; + Options options_; + std::string dbname_; + std::string clone_dir_; + CloudFileSystemOptions cloud_fs_options_; + std::string dbid_; + std::string persistent_cache_path_; + uint64_t persistent_cache_size_gb_; + DBCloud* db_; + std::unique_ptr aenv_; +}; + +// +// Most basic test. Create DB, write one key, close it and then check to see +// that the key exists. +// +TEST_F(CloudTest, BasicTest) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + + std::set live_files; + ASSERT_OK(GetCloudLiveFilesSrc(&live_files)); + ASSERT_GT(live_files.size(), 0); + CloseDB(); +} + +TEST_F(CloudTest, FindAllLiveFilesTest) { + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // wait until files are persisted into s3 + GetDBImpl()->TEST_WaitForBackgroundWork(); + + CloseDB(); + + std::vector tablefiles; + std::string manifest; + // fetch latest manifest to local + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_EQ(tablefiles.size(), 1); + + for (auto name: tablefiles) { + EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile); + // verify that the sst file indeed exists in cloud + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + name)); + } + + EXPECT_EQ(GetFileType(manifest), RocksDBFileType::kManifestFile); + // verify that manifest file indeed exists in cloud + auto storage_provider = GetCloudFileSystem()->GetStorageProvider(); + auto bucket_name = GetCloudFileSystem()->GetSrcBucketName(); + auto object_path = GetCloudFileSystem()->GetSrcObjectPath() + pathsep + manifest; + EXPECT_OK(storage_provider->ExistsCloudObject(bucket_name, object_path)); +} + +// Files of dropped CF should not be included in live files +TEST_F(CloudTest, LiveFilesOfDroppedCFTest) { + std::vector handles; + OpenDB(&handles); + + std::vector tablefiles; + std::string manifest; + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + + EXPECT_TRUE(tablefiles.empty()); + CreateColumnFamilies({"cf1"}, &handles); + + // write to CF + ASSERT_OK(db_->Put(WriteOptions(), handles[1], "hello", "world")); + // flush cf1 + ASSERT_OK(db_->Flush({}, handles[1])); + + tablefiles.clear(); + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_TRUE(tablefiles.size() == 1); + + // Drop the CF + ASSERT_OK(db_->DropColumnFamily(handles[1])); + tablefiles.clear(); + // make sure that files are not listed as live for dropped CF + ASSERT_OK( + GetCloudFileSystem()->FindAllLiveFiles(dbname_, &tablefiles, &manifest)); + EXPECT_TRUE(tablefiles.empty()); + CloseDB(&handles); +} + +// Verifies that when we move files across levels, the files are still listed as +// live files +TEST_F(CloudTest, LiveFilesAfterChangingLevelTest) { + options_.num_levels = 3; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "1")); + ASSERT_OK(db_->Put(WriteOptions(), "b", "2")); + ASSERT_OK(db_->Flush({})); + auto db_impl = GetDBImpl(); + + std::vector tablefiles_before_move; + std::string manifest; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles( + dbname_, &tablefiles_before_move, &manifest)); + EXPECT_EQ(tablefiles_before_move.size(), 1); + + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + // Move the sst files to another level by compacting entire range + ASSERT_OK(db_->CompactRange(cro, nullptr /* begin */, nullptr /* end */)); + + ASSERT_OK(db_impl->TEST_WaitForBackgroundWork()); + + std::vector tablefiles_after_move; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles( + dbname_, &tablefiles_after_move, &manifest)); + EXPECT_EQ(tablefiles_before_move, tablefiles_after_move); +} + +TEST_F(CloudTest, GetChildrenTest) { + // Create some objects in S3 + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + CloseDB(); + DestroyDir(dbname_); + OpenDB(); + + std::vector children; + ASSERT_OK(aenv_->GetFileSystem()->GetChildren(dbname_, kIOOptions, &children, kDbg)); + int sst_files = 0; + for (auto c : children) { + if (IsSstFile(c)) { + sst_files++; + } + } + // This verifies that GetChildren() works on S3. We deleted the S3 file + // locally, so the only way to actually get it through GetChildren() if + // listing S3 buckets works. + EXPECT_EQ(sst_files, 1); +} + +TEST_F(CloudTest, FindLiveFilesFromLocalManifestTest) { + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "Universe")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // wait until files are persisted into s3 + GetDBImpl()->TEST_WaitForBackgroundWork(); + + CloseDB(); + + // determine the manifest name and store a copy in a different location + auto cfs = GetCloudFileSystem(); + auto manifest_file = cfs->RemapFilename("MANIFEST"); + auto manifest_path = std::filesystem::path(dbname_) / manifest_file; + + auto alt_manifest_path = + std::filesystem::temp_directory_path() / ("ALT-" + manifest_file); + std::filesystem::copy_file(manifest_path, alt_manifest_path); + + DestroyDir(dbname_); + + std::vector tablefiles; + // verify the copied manifest can be processed correctly + ASSERT_OK(GetCloudFileSystem()->FindLiveFilesFromLocalManifest( + alt_manifest_path, &tablefiles)); + + // verify the result + EXPECT_EQ(tablefiles.size(), 1); + + for (auto name : tablefiles) { + EXPECT_EQ(GetFileType(name), RocksDBFileType::kSstFile); + // verify that the sst file indeed exists in cloud + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + pathsep + name)); + } + + // clean up + std::filesystem::remove(alt_manifest_path); +} + +// +// Create and read from a clone. +// +TEST_F(CloudTest, Newdb) { + std::string master_dbid; + std::string newdb1_dbid; + std::string newdb2_dbid; + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + ASSERT_OK(db_->GetDbIdentity(master_dbid)); + CloseDB(); + value.clear(); + + { + // Create and Open a new ephemeral instance + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("newdb1", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // This is an ephemeral clone. Its dbid is a prefix of the master's. + ASSERT_NE(newdb1_dbid, master_dbid); + auto res = std::mismatch(master_dbid.begin(), master_dbid.end(), + newdb1_dbid.begin()); + ASSERT_TRUE(res.first == master_dbid.end()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Open master and write one more kv to it. This is written to + // src bucket as well. + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Dhruba", "Borthakur")); + + // check that the newly written kv exists + value.clear(); + ASSERT_OK(db_->Get(ReadOptions(), "Dhruba", &value)); + ASSERT_TRUE(value.compare("Borthakur") == 0); + + // check that the earlier kv exists too + value.clear(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + + // Assert that newdb1 cannot see the second kv because the second kv + // was written to local dir only of the ephemeral clone. + ASSERT_TRUE(cloud_db->Get(ReadOptions(), "Dhruba", &value).IsNotFound()); + } + { + // Create another ephemeral instance using a different local dir but the + // same two buckets as newdb1. This should be identical in contents with + // newdb1. + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("newdb2", "", "", &cloud_db, &env); + + // Retrieve the id of the second clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + + // Since we use two different local directories for the two ephemeral + // clones, their dbids should be different from one another + ASSERT_NE(newdb1_dbid, newdb2_dbid); + + // check that both the kvs appear in the new ephemeral clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Dhruba", &value)); + ASSERT_TRUE(value.compare("Borthakur") == 0); + } + + CloseDB(); +} + +TEST_F(CloudTest, ColumnFamilies) { + std::vector handles; + // Put one key-value + OpenDB(&handles); + + CreateColumnFamilies({"cf1", "cf2"}, &handles); + + ASSERT_OK(db_->Put(WriteOptions(), handles[0], "hello", "a")); + ASSERT_OK(db_->Put(WriteOptions(), handles[1], "hello", "b")); + ASSERT_OK(db_->Put(WriteOptions(), handles[2], "hello", "c")); + + auto validate = [&]() { + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), handles[0], "hello", &value)); + ASSERT_EQ(value, "a"); + ASSERT_OK(db_->Get(ReadOptions(), handles[1], "hello", &value)); + ASSERT_EQ(value, "b"); + ASSERT_OK(db_->Get(ReadOptions(), handles[2], "hello", &value)); + ASSERT_EQ(value, "c"); + }; + + validate(); + + CloseDB(&handles); + OpenWithColumnFamilies({kDefaultColumnFamilyName, "cf1", "cf2"}, &handles); + + validate(); + CloseDB(&handles); + + // destory local state + DestroyDir(dbname_); + + // new cloud env + CreateCloudEnv(); + options_.env = aenv_.get(); + + std::vector families; + ASSERT_OK(DBCloud::ListColumnFamilies(options_, dbname_, &families)); + std::sort(families.begin(), families.end()); + ASSERT_TRUE(families == std::vector( + {"cf1", "cf2", kDefaultColumnFamilyName})); + + OpenWithColumnFamilies({kDefaultColumnFamilyName, "cf1", "cf2"}, &handles); + validate(); + CloseDB(&handles); +} + +// +// Create and read from a clone. +// +TEST_F(CloudTest, DISABLED_TrueClone) { + std::string master_dbid; + std::string newdb1_dbid; + std::string newdb2_dbid; + std::string newdb3_dbid; + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + ASSERT_OK(db_->GetDbIdentity(master_dbid)); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + value.clear(); + auto clone_path1 = "clone1_path-" + test_id_; + { + // Create a new instance with different src and destination paths. + // This is true clone and should have all the contents of the masterdb + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath1", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // Since we used the different src and destination paths for both + // the master and clone1, the clone should have its own identity. + ASSERT_NE(master_dbid, newdb1_dbid); + + // check that the original kv appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // write a new value to the clone + ASSERT_OK(cloud_db->Put(WriteOptions(), "Hello", "Clone1")); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen clone1 with a different local path + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + ASSERT_EQ(newdb1_dbid, newdb2_dbid); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen clone1 with the same local path as above. + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + clone_path1, &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb2_dbid)); + ASSERT_EQ(newdb1_dbid, newdb2_dbid); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("Clone1") == 0); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + auto clone_path2 = "clone2_path-" + test_id_; + { + // Create clone2 + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath3", // xxx try with localpath2 + cloud_fs_options_.src_bucket.GetBucketName(), clone_path2, + &cloud_db, &env); + + // Retrieve the id of the clone db + ASSERT_OK(cloud_db->GetDbIdentity(newdb3_dbid)); + ASSERT_NE(newdb2_dbid, newdb3_dbid); + + // verify that data is still as it was in the original db. + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Assert that there are no redundant sst files + auto* cimpl = static_cast(env->GetFileSystem().get()); + std::vector to_be_deleted; + ASSERT_OK( + cimpl->FindObsoleteFiles(cimpl->GetSrcBucketName(), &to_be_deleted)); + // TODO(igor): Re-enable once purger code is fixed + // ASSERT_EQ(to_be_deleted.size(), 0); + + // Assert that there are no redundant dbid + ASSERT_OK( + cimpl->FindObsoleteDbid(cimpl->GetSrcBucketName(), &to_be_deleted)); + // TODO(igor): Re-enable once purger code is fixed + // ASSERT_EQ(to_be_deleted.size(), 0); + } + + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), clone_path1); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), clone_path2); +} + +// +// verify that dbid registry is appropriately handled +// +TEST_F(CloudTest, DbidRegistry) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // Assert that there is one db in the registry + DbidList dbs; + ASSERT_OK(GetCloudFileSystem()->GetDbidList( + GetCloudFileSystem()->GetSrcBucketName(), &dbs)); + ASSERT_GE(dbs.size(), 1); + + CloseDB(); +} + +TEST_F(CloudTest, KeepLocalFiles) { + cloud_fs_options_.keep_local_sst_files = true; + for (int iter = 0; iter < 4; ++iter) { + cloud_fs_options_.use_direct_io_for_cloud_download = + iter == 0 || iter == 1; + // Create two files + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "World2")); + ASSERT_OK(db_->Flush(FlushOptions())); + + CloseDB(); + DestroyDir(dbname_); + OpenDB(); + + std::vector files; + ASSERT_OK(Env::Default()->GetChildren(dbname_, &files)); + long sst_files = + std::count_if(files.begin(), files.end(), [](const std::string& file) { + return file.find("sst") != std::string::npos; + }); + ASSERT_EQ(sst_files, 2); + + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "World2"); + + CloseDB(); + ValidateCloudLiveFilesSrcSize(); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), dbname_); + DestroyDir(dbname_); + } +} + +TEST_F(CloudTest, CopyToFromGcs) { + std::string fname = dbname_ + "/100000.sst"; + + // iter 0 -- not using transfer manager + // iter 1 -- using transfer manager + for (int iter = 0; iter < 2; ++iter) { + // Create Gcp env + cloud_fs_options_.keep_local_sst_files = true; + CreateCloudEnv(); + auto* cimpl = GetCloudFileSystemImpl(); + cimpl->TEST_InitEmptyCloudManifest(); + char buffer[1 * 1024 * 1024]; + + // create a 10 MB file and upload it to cloud + { + std::unique_ptr writer; + ASSERT_OK(aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, + &writer, kDbg)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK( + writer->Append(Slice(buffer, sizeof(buffer)), kIOOptions, kDbg)); + } + // sync and close file + } + + // delete the file manually. + ASSERT_OK(base_env_->DeleteFile(fname)); + + // reopen file for reading. It should be refetched from cloud storage. + { + std::unique_ptr reader; + ASSERT_OK(aenv_->GetFileSystem()->NewRandomAccessFile(fname, kFileOptions, + &reader, kDbg)); + + uint64_t offset = 0; + for (int i = 0; i < 10; i++) { + Slice result; + char* scratch = &buffer[0]; + ASSERT_OK(reader->Read(offset, sizeof(buffer), kIOOptions, &result, + scratch, kDbg)); + ASSERT_EQ(result.size(), sizeof(buffer)); + offset += sizeof(buffer); + } + } + } +} + +TEST_F(CloudTest, DelayFileDeletion) { + std::string fname = dbname_ + "/000010.sst"; + + // Create Gcp env + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::seconds(2); + CreateCloudEnv(); + auto* cimpl = GetCloudFileSystemImpl(); + cimpl->TEST_InitEmptyCloudManifest(); + + auto createFile = [&]() { + std::unique_ptr writer; + ASSERT_OK(aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, + &writer, kDbg)); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(writer->Append("igor", kIOOptions, kDbg)); + } + // sync and close file + }; + + for (int iter = 0; iter <= 1; ++iter) { + createFile(); + // delete the file + ASSERT_OK(aenv_->GetFileSystem()->DeleteFile(fname, kIOOptions, kDbg)); + // file should still be there + ASSERT_OK(aenv_->GetFileSystem()->FileExists(fname, kIOOptions, kDbg)); + + if (iter == 1) { + // should prevent the deletion + createFile(); + } + + std::this_thread::sleep_for(std::chrono::seconds(3)); + auto st = aenv_->GetFileSystem()->FileExists(fname, kIOOptions, kDbg); + if (iter == 0) { + // in iter==0 file should be deleted after 2 seconds + ASSERT_TRUE(st.IsNotFound()); + } else { + // in iter==1 file should not be deleted because we wrote the new file + ASSERT_OK(st); + } + } +} + +// Verify that a savepoint copies all src files to destination +TEST_F(CloudTest, Savepoint) { + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + std::string dest_path = "/clone2_path-" + test_id_; + { + // Create a new instance with different src and destination paths. + // This is true clone and should have all the contents of the masterdb + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath1", cloud_fs_options_.src_bucket.GetBucketName(), + dest_path, &cloud_db, &env); + + // check that the original kv appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + + // there should be only one sst file + std::vector flist; + cloud_db->GetLiveFilesMetaData(&flist); + ASSERT_TRUE(flist.size() == 1); + + auto* cimpl = static_cast(env->GetFileSystem().get()); + auto remapped_fname = cimpl->RemapFilename(flist[0].name); + // source path + std::string spath = cimpl->GetSrcObjectPath() + "/" + remapped_fname; + ASSERT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetSrcBucketName(), spath)); + + // Verify that the destination path does not have any sst files + std::string dpath = dest_path + "/" + remapped_fname; + ASSERT_TRUE(cimpl->GetStorageProvider() + ->ExistsCloudObject(cimpl->GetSrcBucketName(), dpath) + .IsNotFound()); + + // write a new value to the clone + ASSERT_OK(cloud_db->Put(WriteOptions(), "Hell", "Done")); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hell", &value)); + ASSERT_TRUE(value.compare("Done") == 0); + + // Invoke savepoint to populate destination path from source path + ASSERT_OK(cloud_db->Savepoint()); + + // check that the sst file is copied to dest path + ASSERT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetSrcBucketName(), dpath)); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + } + { + // Reopen the clone + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("localpath2", cloud_fs_options_.src_bucket.GetBucketName(), + dest_path, &cloud_db, &env); + + // check that the both kvs appears in the clone + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + value.clear(); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hell", &value)); + ASSERT_TRUE(value.compare("Done") == 0); + } + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + GetCloudFileSystem()->GetSrcBucketName(), dest_path); +} + +TEST_F(CloudTest, DirectReads) { + options_.use_direct_reads = true; + options_.use_direct_io_for_flush_and_compaction = true; + BlockBasedTableOptions bbto; + bbto.no_block_cache = true; + bbto.block_size = 1024; + options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + OpenDB(); + + for (int i = 0; i < 50; ++i) { + ASSERT_OK(db_->Put(WriteOptions(), "Hello" + std::to_string(i), "World")); + } + // create a file + ASSERT_OK(db_->Flush(FlushOptions())); + + std::string value; + for (int i = 0; i < 50; ++i) { + ASSERT_OK(db_->Get(ReadOptions(), "Hello" + std::to_string(i), &value)); + ASSERT_EQ(value, "World"); + } + CloseDB(); +} + +// Test whether we are able to recover nicely from two different writers to the +// same S3 bucket. (The feature that was enabled by CLOUDMANIFEST) +TEST_F(CloudTest, TwoDBsOneBucket) { + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + cloud_fs_options_.keep_local_sst_files = true; + std::string value; + + cloud_fs_options_.resync_on_open = true; + OpenDB(); + auto* cimpl = GetCloudFileSystemImpl(); + auto firstManifestFile = + cimpl->GetDestObjectPath() + "/" + cimpl->RemapFilename("MANIFEST-1"); + EXPECT_OK(cimpl->GetStorageProvider()->ExistsCloudObject( + cimpl->GetDestBucketName(), firstManifestFile)); + // Create two files + ASSERT_OK(db_->Put(WriteOptions(), "First", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Second", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto files = GetSSTFiles(dbname_); + EXPECT_EQ(files.size(), 2); + CloseDB(); + + cloud_fs_options_.resync_on_open = false; + // Open again, with no destination bucket + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Third", "File")); + ASSERT_OK(db_->Flush(FlushOptions())); + auto newFiles = GetSSTFiles(dbname_); + EXPECT_EQ(newFiles.size(), 3); + // Remember the third file we created + std::vector diff; + std::set_difference(newFiles.begin(), newFiles.end(), files.begin(), + files.end(), std::inserter(diff, diff.begin())); + ASSERT_EQ(diff.size(), 1); + auto thirdFile = diff[0]; + CloseDB(); + + // Open in a different directory with destination bucket set + dbname_ = secondDB; + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + cloud_fs_options_.resync_on_open = true; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Third", "DifferentFile")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + + // Open back in the first directory with no destination + dbname_ = firstDB; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + cloud_fs_options_.resync_on_open = false; + OpenDB(); + // Changes to the cloud database should make no difference for us. This is an + // important check because we should not reinitialize from the cloud if we + // have a valid local directory! + ASSERT_OK(db_->Get(ReadOptions(), "Third", &value)); + EXPECT_EQ(value, "File"); + CloseDB(); + + // Reopen in the first directory, this time with destination path + dbname_ = firstDB; + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + cloud_fs_options_.resync_on_open = true; + OpenDB(); + // Changes to the cloud database should be pulled down now. + ASSERT_OK(db_->Get(ReadOptions(), "Third", &value)); + EXPECT_EQ(value, "DifferentFile"); + files = GetSSTFiles(dbname_); + // Should no longer be in my directory because it's not part of the new + // MANIFEST. + EXPECT_TRUE(files.find(thirdFile) == files.end()); + + // We need to sleep a bit because file deletion happens in a different thread, + // so it might not be immediately deleted. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + EXPECT_TRUE(GetCloudFileSystem() + ->GetStorageProvider() + ->ExistsCloudObject(GetCloudFileSystem()->GetDestBucketName(), + firstManifestFile) + .IsNotFound()); + CloseDB(); +} + +// This test is similar to TwoDBsOneBucket, but is much more chaotic and illegal +// -- it runs two databases on exact same S3 bucket. The work on CLOUDMANIFEST +// enables us to run in that configuration for extended amount of time (1 hour +// by default) without any issues -- the last CLOUDMANIFEST writer wins. +// This test only applies when cookie is empty. So whenever db is reopened, it +// always fetches the latest CM/M files from s3 +TEST_F(CloudTest, TwoConcurrentWritersCookieEmpty) { + cloud_fs_options_.resync_on_open = true; + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + + DBCloud *db1, *db2; + Env *aenv1, *aenv2; + + auto openDB1 = [&] { + dbname_ = firstDB; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB2 = [&] { + dbname_ = secondDB; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto closeDB1 = [&] { + db_ = db1; + aenv_.reset(aenv1); + CloseDB(); + }; + auto closeDB2 = [&] { + db_ = db2; + aenv_.reset(aenv2); + CloseDB(); + }; + + openDB1(); + openDB2(); + + // Create bunch of files, reopening the databases during + for (int i = 0; i < 5; ++i) { + closeDB1(); + if (i == 2) { + DestroyDir(firstDB); + } + // opening the database makes me a master (i.e. CLOUDMANIFEST points to my + // manifest), my writes are applied to the shared space! + openDB1(); + for (int j = 0; j < 5; ++j) { + auto key = std::to_string(i) + std::to_string(j) + "1"; + ASSERT_OK(db1->Put(WriteOptions(), key, "FirstDB")); + ASSERT_OK(db1->Flush(FlushOptions())); + } + closeDB2(); + if (i == 2) { + DestroyDir(secondDB); + } + // opening the database makes me a master (i.e. CLOUDMANIFEST points to my + // manifest), my writes are applied to the shared space! + openDB2(); + for (int j = 0; j < 5; ++j) { + auto key = std::to_string(i) + std::to_string(j) + "2"; + ASSERT_OK(db2->Put(WriteOptions(), key, "SecondDB")); + ASSERT_OK(db2->Flush(FlushOptions())); + } + } + + dbname_ = firstDB; + // This write should not be applied, because DB2 is currently the owner of the + // S3 bucket + ASSERT_OK(db1->Put(WriteOptions(), "ShouldNotBeApplied", "")); + ASSERT_OK(db1->Flush(FlushOptions())); + closeDB1(); + closeDB2(); + + openDB1(); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + std::string val; + auto key = std::to_string(i) + std::to_string(j); + ASSERT_OK(db1->Get(ReadOptions(), key + "1", &val)); + EXPECT_EQ(val, "FirstDB"); + ASSERT_OK(db1->Get(ReadOptions(), key + "2", &val)); + EXPECT_EQ(val, "SecondDB"); + } + } + + std::string v; + ASSERT_TRUE(db1->Get(ReadOptions(), "ShouldNotBeApplied", &v).IsNotFound()); + closeDB1(); +} + +// Creates a pure RocksDB database and makes sure we can migrate to RocksDB +// Cloud +TEST_F(CloudTest, MigrateFromPureRocksDB) { + { // Create local RocksDB + Options options; + options.create_if_missing = true; + DB* dbptr; + std::unique_ptr db; + ASSERT_OK(DB::Open(options, dbname_, &dbptr)); + db.reset(dbptr); + // create 5 files + for (int i = 0; i < 5; ++i) { + auto key = "key" + std::to_string(i); + ASSERT_OK(db->Put(WriteOptions(), key, key)); + ASSERT_OK(db->Flush(FlushOptions())); + } + } + + CreateCloudEnv(); + ASSERT_OK(GetCloudFileSystem()->MigrateFromPureRocksDB(dbname_)); + + // Now open RocksDB cloud + // TODO(dhruba) Figure out how to make this work without skipping dbid + // verification + cloud_fs_options_.skip_dbid_verification = true; + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.validate_filesize = false; + OpenDB(); + for (int i = 5; i < 10; ++i) { + auto key = "key" + std::to_string(i); + ASSERT_OK(db_->Put(WriteOptions(), key, key)); + ASSERT_OK(db_->Flush(FlushOptions())); + } + + for (int i = 0; i < 10; ++i) { + auto key = "key" + std::to_string(i); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), key, &value)); + ASSERT_EQ(value, key); + } + CloseDB(); +} + +// Tests that we can open cloud DB without destination and source bucket set. +// This is useful for tests. +TEST_F(CloudTest, NoDestOrSrc) { + DestroyDir(dbname_); + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.src_bucket.SetBucketName(""); + cloud_fs_options_.src_bucket.SetObjectPath(""); + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "key", "value")); + ASSERT_OK(db_->Flush(FlushOptions())); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "key", &value)); + ASSERT_EQ(value, "value"); + CloseDB(); + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "key", &value)); + ASSERT_EQ(value, "value"); + CloseDB(); +} + +TEST_F(CloudTest, PreloadCloudManifest) { + DestroyDir(dbname_); + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate, preload cloud manifest + GetCloudFileSystem()->PreloadCloudManifest(dbname_); + + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); +} + +// +// Test Ephemeral mode. In this mode, the database is cloned +// from a cloud bucket but new writes are not propagated +// back to any cloud bucket. Once cloned, all updates are local. +// +TEST_F(CloudTest, Ephemeral) { + cloud_fs_options_.keep_local_sst_files = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Create a primary DB with two files + OpenDB(); + std::string value; + std::string newdb1_dbid; + std::set cloud_files; + ASSERT_OK(db_->Put(WriteOptions(), "Name", "dhruba")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "borthakur")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // Reopen the same database in ephemeral mode by cloning the original. + // Do not destroy the local dir. Writes to this db does not make it back + // to any cloud storage. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // verify that we still have two sst files + ASSERT_EQ(2, GetSSTFilesClone("db_ephemeral").size()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Name", &value)); + ASSERT_EQ(value, "dhruba"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "borthakur"); + + // Write one more record. + // There should be 3 local sst files in the ephemeral db. + ASSERT_OK(cloud_db->Put(WriteOptions(), "zip", "94087")); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // check that cloud files did not get updated + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(2, cloud_files.size()); + cloud_files.clear(); + } + + // reopen main db and write two more records to it + OpenDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // write two more records to it. + ASSERT_OK(db_->Put(WriteOptions(), "Key1", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Key2", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(4, GetSSTFiles(dbname_).size()); + CloseDB(); + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(4, cloud_files.size()); + cloud_files.clear(); + + // At this point, the main db has 4 files while the ephemeral + // database has diverged earlier with 3 local files. If we try + // to reopen the ephemeral clone, it should not download new + // files from the cloud + { + std::unique_ptr env; + std::unique_ptr cloud_db; + std::string dbid; + options_.info_log = nullptr; + CreateLoggerFromOptions(clone_dir_ + "/db_ephemeral", options_, + &options_.info_log); + + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of this clone. It should be same as before + ASSERT_OK(cloud_db->GetDbIdentity(dbid)); + ASSERT_EQ(newdb1_dbid, dbid); + + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // verify that a key written to the ephemeral db still exists + ASSERT_OK(cloud_db->Get(ReadOptions(), "zip", &value)); + ASSERT_EQ(value, "94087"); + + // verify that keys written to the main db after the ephemeral + // was clones do not appear in the ephemeral db. + ASSERT_NOK(cloud_db->Get(ReadOptions(), "Key1", &value)); + ASSERT_NOK(cloud_db->Get(ReadOptions(), "Key2", &value)); + } +} + +// This test is performed in a rare race condition where ephemral clone is +// started after durable clone upload its CLOUDMANIFEST but before it uploads +// one of the MANIFEST. In this case, we want to verify that ephemeral clone is +// able to reinitialize instead of crash looping. +TEST_F(CloudTest, EphemeralOnCorruptedDB) { + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.resync_on_open = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + OpenDB(); + + std::vector files; + base_env_->GetChildren(dbname_, &files); + + // Get the MANIFEST file + std::string manifest_file_name; + for (const auto& file_name : files) { + if (file_name.rfind("MANIFEST", 0) == 0) { + manifest_file_name = file_name; + break; + } + } + + ASSERT_FALSE(manifest_file_name.empty()); + + // Delete MANIFEST file from S3 bucket. + // This is to simulate the scenario where CLOUDMANIFEST is uploaded, but + // MANIFEST is not yet uploaded from the durable shard. + ASSERT_NE(aenv_.get(), nullptr); + GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + "/" + manifest_file_name); + + // Ephemeral clone should fail. + std::unique_ptr clone_db; + std::unique_ptr env; + Status st = CloneDB("clone1", "", "", &clone_db, &env); + ASSERT_TRUE(st.IsCorruption()); + + // Put the MANIFEST file back + GetCloudFileSystem()->GetStorageProvider()->PutCloudObject( + dbname_ + "/" + manifest_file_name, + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->GetSrcObjectPath() + "/" + manifest_file_name); + + // Try one more time. This time it should succeed. + clone_db.reset(); + env.reset(); + st = CloneDB("clone1", "", "", &clone_db, &env); + ASSERT_OK(st); + + clone_db->Close(); + CloseDB(); +} + +// +// Test Ephemeral clones with resyncOnOpen mode. +// In this mode, every open of the ephemeral clone db causes its +// data to be resynced with the master db. +// +TEST_F(CloudTest, EphemeralResync) { + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.resync_on_open = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Create a primary DB with two files + OpenDB(); + std::string value; + std::string newdb1_dbid; + std::set cloud_files; + ASSERT_OK(db_->Put(WriteOptions(), "Name", "dhruba")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "borthakur")); + ASSERT_OK(db_->Flush(FlushOptions())); + CloseDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // Reopen the same database in ephemeral mode by cloning the original. + // Do not destroy the local dir. Writes to this db does not make it back + // to any cloud storage. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of the first reopen + ASSERT_OK(cloud_db->GetDbIdentity(newdb1_dbid)); + + // verify that we still have two sst files + ASSERT_EQ(2, GetSSTFilesClone("db_ephemeral").size()); + + ASSERT_OK(cloud_db->Get(ReadOptions(), "Name", &value)); + ASSERT_EQ(value, "dhruba"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Hello2", &value)); + ASSERT_EQ(value, "borthakur"); + + // Write one more record. + // There should be 3 local sst files in the ephemeral db. + ASSERT_OK(cloud_db->Put(WriteOptions(), "zip", "94087")); + ASSERT_OK(cloud_db->Flush(FlushOptions())); + ASSERT_EQ(3, GetSSTFilesClone("db_ephemeral").size()); + + // check that cloud files did not get updated + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(2, cloud_files.size()); + cloud_files.clear(); + } + + // reopen main db and write two more records to it + OpenDB(); + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + + // write two more records to it. + ASSERT_OK(db_->Put(WriteOptions(), "Key1", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "Key2", "onlyInMainDB")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(4, GetSSTFiles(dbname_).size()); + CloseDB(); + ASSERT_OK(GetCloudLiveFilesSrc(&cloud_files)); + ASSERT_EQ(4, cloud_files.size()); + cloud_files.clear(); + + // At this point, the main db has 4 files while the ephemeral + // database has diverged earlier with 3 local files. + // Reopen the ephemeral db with resync_on_open flag. + // This means that earlier updates to the ephemeral db are lost. + // It also means that the most latest updates in the master db + // are reflected in the newly opened ephemeral database. + { + std::unique_ptr env; + std::unique_ptr cloud_db; + std::string dbid; + options_.info_log = nullptr; + CreateLoggerFromOptions(clone_dir_ + "/db_ephemeral", options_, + &options_.info_log); + + CloneDB("db_ephemeral", "", "", &cloud_db, &env); + + // Retrieve the id of this clone. It should be same as before + ASSERT_OK(cloud_db->GetDbIdentity(dbid)); + ASSERT_EQ(newdb1_dbid, dbid); + + // verify that a key written to the ephemeral db does not exist + ASSERT_NOK(cloud_db->Get(ReadOptions(), "zip", &value)); + + // verify that keys written to the main db after the ephemeral + // was clones appear in the ephemeral db. + ASSERT_OK(cloud_db->Get(ReadOptions(), "Key1", &value)); + ASSERT_EQ(value, "onlyInMainDB"); + ASSERT_OK(cloud_db->Get(ReadOptions(), "Key2", &value)); + ASSERT_EQ(value, "onlyInMainDB"); + } +} + +TEST_F(CloudTest, CheckpointToCloud) { + cloud_fs_options_.keep_local_sst_files = true; + options_.level0_file_num_compaction_trigger = 100; // never compact + + // Pre-create the bucket. + CreateCloudEnv(); + aenv_.reset(); + + // S3 is eventual consistency. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + auto checkpoint_bucket = cloud_fs_options_.dest_bucket; + + std::string ckpt_from_object_path = + cloud_fs_options_.dest_bucket.GetObjectPath(); + ckpt_from_object_path += "_from"; + cloud_fs_options_.src_bucket = BucketOptions(); + cloud_fs_options_.src_bucket.SetObjectPath(ckpt_from_object_path); + cloud_fs_options_.dest_bucket = BucketOptions(); + cloud_fs_options_.dest_bucket.SetObjectPath(ckpt_from_object_path); + + // Create a DB with two files + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), "c", "d")); + ASSERT_OK(db_->Flush(FlushOptions())); + + ASSERT_OK( + db_->CheckpointToCloud(checkpoint_bucket, CheckpointToCloudOptions())); + + ASSERT_EQ(2, GetSSTFiles(dbname_).size()); + CloseDB(); + + DestroyDir(dbname_); + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), + cloud_fs_options_.dest_bucket.GetObjectPath()); + + cloud_fs_options_.src_bucket = checkpoint_bucket; + + OpenDB(); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "a", &value)); + ASSERT_EQ(value, "b"); + ASSERT_OK(db_->Get(ReadOptions(), "c", &value)); + ASSERT_EQ(value, "d"); + CloseDB(); + + GetCloudFileSystem()->GetStorageProvider()->EmptyBucket( + checkpoint_bucket.GetBucketName(), checkpoint_bucket.GetObjectPath()); +} + +// Basic test to copy object within S3. +TEST_F(CloudTest, CopyObjectTest) { + CreateCloudEnv(); + + // We need to open an empty DB in order for epoch to work. + OpenDB(); + + std::string content = "This is a test file"; + std::string fname = dbname_ + "/100000.sst"; + std::string dst_fname = dbname_ + "/200000.sst"; + + { + std::unique_ptr writableFile; + aenv_->GetFileSystem()->NewWritableFile(fname, kFileOptions, &writableFile, + kDbg); + writableFile->Append(content, kIOOptions, kDbg); + writableFile->Fsync(kIOOptions, kDbg); + } + + auto st = GetCloudFileSystem()->GetStorageProvider()->CopyCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + GetCloudFileSystem()->RemapFilename(fname), + GetCloudFileSystem()->GetSrcBucketName(), dst_fname); + ASSERT_OK(st); + + { + std::unique_ptr readableFile; + st = GetCloudFileSystem()->GetStorageProvider()->NewCloudReadableFile( + GetCloudFileSystem()->GetSrcBucketName(), dst_fname, kFileOptions, + &readableFile, kDbg); + ASSERT_OK(st); + + char scratch[100]; + Slice result; + std::unique_ptr sequentialFile(readableFile.release()); + st = sequentialFile->Read(100, kIOOptions, &result, scratch, kDbg); + ASSERT_OK(st); + ASSERT_EQ(19, result.size()); + ASSERT_EQ(result, Slice(content)); + } + + CloseDB(); +} + +// +// Verify that we can cache data from S3 in persistent cache. +// +TEST_F(CloudTest, PersistentCache) { + std::string pcache = test::TmpDir() + "/persistent_cache"; + SetPersistentCache(pcache, 1); + + // Put one key-value + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_TRUE(value.compare("World") == 0); + CloseDB(); + value.clear(); + + // Reopen and validate + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + CloseDB(); +} + +// This test create 2 DBs that shares a block cache. Ensure that reads from one +// DB do not get the values from the other DB. +TEST_F(CloudTest, SharedBlockCache) { + cloud_fs_options_.keep_local_sst_files = false; + + // Share the block cache. + BlockBasedTableOptions bbto; + bbto.block_cache = NewLRUCache(10 * 1024 * 1024); + bbto.format_version = 4; + options_.table_factory.reset(NewBlockBasedTableFactory(bbto)); + + OpenDB(); + + std::unique_ptr clone_env; + std::unique_ptr clone_db; + CloneDB("newdb1", cloud_fs_options_.src_bucket.GetBucketName(), + cloud_fs_options_.src_bucket.GetObjectPath() + "-clone", &clone_db, + &clone_env, false /* force_keep_local_on_invalid_dest_bucket */); + + // Flush the first DB. + db_->Put(WriteOptions(), "db", "original"); + db_->Flush(FlushOptions()); + + // Flush the second DB. + clone_db->Put(WriteOptions(), "db", "clone"); + clone_db->Flush(FlushOptions()); + + std::vector file_metadatas; + db_->GetLiveFilesMetaData(&file_metadatas); + ASSERT_EQ(1, file_metadatas.size()); + + file_metadatas.clear(); + clone_db->GetLiveFilesMetaData(&file_metadatas); + ASSERT_EQ(1, file_metadatas.size()); + + std::string value; + clone_db->Get(ReadOptions(), "db", &value); + ASSERT_EQ("clone", value); + + db_->Get(ReadOptions(), "db", &value); + ASSERT_EQ("original", value); + + // Cleanup + clone_db->Close(); + CloseDB(); + auto* clone_cloud_fs = + dynamic_cast(clone_env->GetFileSystem().get()); + clone_cloud_fs->GetStorageProvider()->EmptyBucket( + cloud_fs_options_.src_bucket.GetBucketName(), + cloud_fs_options_.src_bucket.GetObjectPath() + "-clone"); +} + +TEST_F(CloudTest, FindLiveFilesFetchManifestTest) { + OpenDB(); + ASSERT_OK(db_->Put({}, "a", "1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + DestroyDir(dbname_); + + // recreate cloud env, which points to the same bucket and objectpath + CreateCloudEnv(); + + std::vector live_sst_files; + std::string manifest_file; + + // fetch and load CloudManifest + ASSERT_OK(GetCloudFileSystem()->PreloadCloudManifest(dbname_)); + + // manifest file will be fetched to local db + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files, + &manifest_file)); + EXPECT_EQ(live_sst_files.size(), 1); +} + +TEST_F(CloudTest, FileModificationTimeTest) { + OpenDB(); + ASSERT_OK(db_->Put({}, "a", "1")); + ASSERT_OK(db_->Flush({})); + std::vector live_sst_files; + std::string manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files, + &manifest_file)); + uint64_t modtime1; + ASSERT_OK(aenv_->GetFileSystem()->GetFileModificationTime( + dbname_ + pathsep + manifest_file, kIOOptions, &modtime1, kDbg)); + CloseDB(); + DestroyDir(dbname_); + // don't roll cloud manifest so that manifest file epoch is not updated + cloud_fs_options_.roll_cloud_manifest_on_open = false; + OpenDB(); + uint64_t modtime2; + ASSERT_OK(aenv_->GetFileSystem()->GetFileModificationTime( + dbname_ + pathsep + manifest_file, kIOOptions, &modtime2, kDbg)); + // we read local file modification time, so the second time we open db, the + // modification time is changed + EXPECT_GT(modtime2, modtime1); +} + +TEST_F(CloudTest, EmptyCookieTest) { + // By default cookie is empty + OpenDB(); + auto* cfs_impl = GetCloudFileSystemImpl(); + auto cloud_manifest_file = cfs_impl->CloudManifestFile(dbname_); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST"); + CloseDB(); +} + +TEST_F(CloudTest, NonEmptyCookieTest) { + cloud_fs_options_.new_cookie_on_open = "000001"; + OpenDB(); + std::string value; + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + + auto cloud_manifest_file = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloud_manifest_file)); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST-000001"); + CloseDB(); + DestroyDir(dbname_); + cloud_fs_options_.cookie_on_open = "000001"; + cloud_fs_options_.new_cookie_on_open = "000001"; + OpenDB(); + + ASSERT_OK(db_->Get(ReadOptions(), "Hello", &value)); + ASSERT_EQ(value, "World"); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloud_manifest_file)); + EXPECT_EQ(basename(cloud_manifest_file), "CLOUDMANIFEST-000001"); + CloseDB(); +} + +// Verify that live sst files are the same after applying cloud manifest delta +TEST_F(CloudTest, LiveFilesConsistentAfterApplyCloudManifestDeltaTest) { + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + + ASSERT_OK(db_->Put(WriteOptions(), "Hello", "World")); + ASSERT_OK(db_->Flush(FlushOptions())); + + std::vector live_sst_files1; + std::string manifest_file1; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files1, + &manifest_file1)); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_OK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + bool applied = false; + ASSERT_OK(GetCloudFileSystemImpl()->ApplyCloudManifestDelta(delta, &applied)); + ASSERT_TRUE(applied); + + std::vector live_sst_files2; + std::string manifest_file2; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &live_sst_files2, + &manifest_file2)); + + EXPECT_EQ(live_sst_files1, live_sst_files2); + EXPECT_NE(manifest_file1, manifest_file2); + + CloseDB(); +} + + +// After calling `ApplyCloudManifestDelta`, writes should be persisted in +// sst files only visible in new Manifest +TEST_F(CloudTest, WriteAfterUpdateCloudManifestArePersistedInNewEpoch) { + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + ASSERT_OK(db_->Put(WriteOptions(), "Hello1", "world1")); + ASSERT_OK(db_->Flush(FlushOptions())); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_OK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + bool applied = false; + ASSERT_OK(GetCloudFileSystemImpl()->ApplyCloudManifestDelta(delta, &applied)); + ASSERT_TRUE(applied); + GetDBImpl()->NewManifestOnNextUpdate(); + + // following writes are not visible for old cookie + ASSERT_OK(db_->Put(WriteOptions(), "Hello2", "world2")); + ASSERT_OK(db_->Flush(FlushOptions())); + + // reopen with cookie = 1, new updates after rolling are not visible + CloseDB(); + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "1"; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + OpenDB(); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + EXPECT_NOK(db_->Get(ReadOptions(), "Hello2", &value)); + CloseDB(); + + // reopen with cookie = 2, new updates should still be visible + CloseDB(); + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + EXPECT_EQ(value, "world2"); + CloseDB(); + + // Make sure that the changes in cloud are correct + DestroyDir(dbname_); + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Get(ReadOptions(), "Hello1", &value)); + EXPECT_EQ(value, "world1"); + ASSERT_OK(db_->Get(ReadOptions(), "Hello2", &value)); + EXPECT_EQ(value, "world2"); + CloseDB(); +} + +// Test various cases of crashing in the middle during CloudManifestSwitch +TEST_F(CloudTest, CMSwitchCrashInMiddleTest) { + cloud_fs_options_.roll_cloud_manifest_on_open = false; + cloud_fs_options_.cookie_on_open = "1"; + + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::RollNewCookie:AfterManifestCopy", [](void* arg) { + // Simulate the case of crash in the middle of + // RollNewCookie + *reinterpret_cast(arg) = Status::Aborted("Aborted"); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + // case 1: Crash in the middle of updating local manifest files + // our guarantee: no CLOUDMANIFEST_new_cookie locally and remotely + OpenDB(); + + std::string new_cookie = "2"; + std::string new_epoch = "dca7f3e19212c4b3"; + + ASSERT_NOK(GetCloudFileSystemImpl()->RollNewCookie( + dbname_, new_cookie, + CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch})); + + CloseDB(); + + EXPECT_NOK(base_env_->FileExists(MakeCloudManifestFile(dbname_, new_cookie))); + + // case 2: Crash in the middle of uploading local manifest files + // our guarantee: no CLOUDMANFIEST_cookie remotely + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::UploadManifest:AfterUploadManifest", [](void* arg) { + // Simulate the case of crashing in the middle of + // UploadManifest + *reinterpret_cast(arg) = Status::Aborted("Aborted"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + + auto delta = CloudManifestDelta{GetDBImpl()->GetNextFileNumber(), new_epoch}; + ASSERT_NOK( + GetCloudFileSystemImpl()->RollNewCookie(dbname_, new_cookie, delta)); + + ASSERT_NOK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + MakeCloudManifestFile(GetCloudFileSystemImpl()->GetDestObjectPath(), + new_cookie))); + + CloseDB(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(CloudTest, RollNewEpochTest) { + OpenDB(); + auto epoch1 = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + EXPECT_OK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + ManifestFileWithEpoch(GetCloudFileSystemImpl()->GetDestObjectPath(), + epoch1))); + CloseDB(); + OpenDB(); + auto epoch2 = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + EXPECT_OK(GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + ManifestFileWithEpoch(GetCloudFileSystemImpl()->GetDestObjectPath(), + epoch2))); + CloseDB(); + EXPECT_NE(epoch1, epoch2); +} + +// Test that we can rollback to empty cookie +TEST_F(CloudTest, CookieBackwardsCompatibilityTest) { + cloud_fs_options_.resync_on_open = true; + cloud_fs_options_.roll_cloud_manifest_on_open = true; + + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // switch cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + std::string value; + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // switch back to empty cookie + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); + + // open with both cookies being empty + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); +} + +// Test that once we switch to non empty cookie, we can rollback to +// empty cookie immediately and files are not deleted mistakenly +TEST_F(CloudTest, CookieRollbackTest) { + cloud_fs_options_.resync_on_open = true; + + // Create CLOUDMANFIEST with empty cookie + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // Switch to cookie 1 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + CloseDB(); + + // rollback to empty cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = ""; + + // Setup syncpoint so that file deletion jobs are executed after we open db, + // but before we close db. This is to make sure that file deletion job + // won't delete files that are created when we open db (e.g., CLOUDMANIFEST + // files and MANIFEST files) and we can catch it in test if something is + // messed up + SyncPoint::GetInstance()->LoadDependency({ + {// only trigger file deletion job after db open + "CloudTest::CookieRollbackTest:AfterOpenDB", + "CloudSchedulerImpl::DoWork:BeforeGetJob"}, + }); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + TEST_SYNC_POINT("CloudTest::CookieRollbackTest:AfterOpenDB"); + // File deletion jobs are only triggered after this. Once it's triggered, + // the job deletion queue is not empty + + std::string v; + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + + // wait until no scheduled jobs for current local cloud env + // After waiting, we know for sure that all the deletion jobs scheduled + // when opening db are executed + WaitUntilNoScheduledJobs(); + CloseDB(); + + SyncPoint::GetInstance()->DisableProcessing(); + + // reopen with empty cookie + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + CloseDB(); +} + +TEST_F(CloudTest, NewCookieOnOpenTest) { + cloud_fs_options_.cookie_on_open = "1"; + + // when opening new db, only new_cookie_on_open is used as CLOUDMANIFEST suffix + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + + ASSERT_NOK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "1"))); + // CLOUDMANIFEST-2 should exist since this is a new db + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "2"))); + CloseDB(); + + // reopen and switch cookie + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + // CLOUDMANIFEST-3 is the new cloud manifest + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + MakeCloudManifestFile(dbname_, "3"))); + + std::string value; + ASSERT_OK(db_->Get({}, "k1", &value)); + EXPECT_EQ(value, "v1"); + + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // reopen DB, but don't switch CLOUDMANIFEST + cloud_fs_options_.cookie_on_open = "3"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + ASSERT_OK(db_->Get({}, "k2", &value)); + EXPECT_EQ(value, "v2"); + CloseDB(); +} + +// Test invisible file deletion when db is opened. +TEST_F(CloudTest, InvisibleFileDeletionOnDBOpenTest) { + std::string cookie1 = "", cookie2 = "-1-1"; + cloud_fs_options_.keep_local_sst_files = true; + + // opening with cookie1 + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + std::vector cookie1_sst_files; + std::string cookie1_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie1_sst_files, + &cookie1_manifest_file)); + ASSERT_EQ(cookie1_sst_files.size(), 1); + CloseDB(); + + // MANIFEST file path of cookie1 + auto cookie1_manifest_filepath = dbname_ + pathsep + cookie1_manifest_file; + // CLOUDMANIFEST file path of cookie1 + auto cookie1_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.cookie_on_open); + // sst file path of cookie1 + auto cookie1_sst_filepath = dbname_ + pathsep + cookie1_sst_files[0]; + + // opening with cookie1 and switch to cookie2 + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie2; + OpenDB(); + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + // CM/M/sst files of cookie1 won't be deleted + for (auto path : + {cookie1_cm_filepath, cookie1_manifest_filepath, cookie1_sst_filepath}) { + EXPECT_OK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)); + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + + std::vector cookie2_sst_files; + std::string cookie2_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie2_sst_files, + &cookie2_manifest_file)); + ASSERT_EQ(cookie2_sst_files.size(), 2); + CloseDB(); + + // MANIFEST file path of cookie2 + auto cookie2_manifest_filepath = dbname_ + pathsep + cookie2_manifest_file; + // CLOUDMANIFEST file path of cookie2 + auto cookie2_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + // find sst file path of cookie2 + auto cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[0]; + if (cookie2_sst_filepath == cookie1_sst_filepath) { + cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[1]; + } + + // Now we reopen db with cookie1 to force deleting all files generated in + // cookie2 + + // number of file deletion jobs is executed + std::atomic_int num_job_executed(0); + + // Syncpoint callback so that we can check when the files are actually + // deleted(which is async) + SyncPoint::GetInstance()->SetCallBack( + "LocalCloudScheduler::ScheduleJob:AfterEraseJob", [&](void* /*arg*/) { + num_job_executed++; + if (num_job_executed == 3) { + // CM/M/SST files of cookie2 are deleted in s3 + for (auto path : {cookie2_manifest_filepath, cookie2_cm_filepath, + cookie2_sst_filepath}) { + EXPECT_NOK( + GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + + // reopen db with cookie1 will force all files generated in cookie2 to be + // deleted + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie1; + OpenDB(); + // local obsolete CM/M/SST files will be deleted immediately + // files in cloud will be deleted later (checked in the callback) + for (auto path : + {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)) + << path; + } + CloseDB(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + WaitUntilNoScheduledJobs(); + // Make sure that these files are indeed deleted + EXPECT_EQ(num_job_executed, 3); +} + +// Verify that when opening with `delete_cloud_invisible_files_on_open`, local +// files will be deleted while cloud files will be kept +TEST_F(CloudTest, DisableInvisibleFileDeletionOnOpenTest) { + std::string cookie1 = "", cookie2 = "1"; + cloud_fs_options_.keep_local_sst_files = true; + cloud_fs_options_.cookie_on_open = cookie1; + cloud_fs_options_.new_cookie_on_open = cookie1; + + // opening with cookie1 + OpenDB(); + // generate sst file with cookie1 + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + + std::vector cookie1_sst_files; + std::string cookie1_manifest_file; + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie1_sst_files, + &cookie1_manifest_file)); + ASSERT_EQ(cookie1_sst_files.size(), 1); + + auto cookie1_manifest_filepath = dbname_ + pathsep + cookie1_manifest_file; + auto cookie1_cm_filepath = + MakeCloudManifestFile(dbname_, cloud_fs_options_.cookie_on_open); + auto cookie1_sst_filepath = dbname_ + pathsep + cookie1_sst_files[0]; + + ASSERT_OK(SwitchToNewCookie(cookie2)); + + // generate sst file with cookie2 + ASSERT_OK(db_->Put({}, "k2", "v2")); + ASSERT_OK(db_->Flush({})); + + std::vector cookie2_sst_files; + std::string cookie2_manifest_file; + + ASSERT_OK(GetCloudFileSystem()->FindAllLiveFiles(dbname_, &cookie2_sst_files, + &cookie2_manifest_file)); + ASSERT_EQ(cookie2_sst_files.size(), 2); + + // exclude cookie1_sst_files from cookie2_sst_files + std::sort(cookie2_sst_files.begin(), cookie2_sst_files.end()); + std::set_difference(cookie2_sst_files.begin(), cookie2_sst_files.end(), + cookie1_sst_files.begin(), cookie1_sst_files.end(), + cookie2_sst_files.begin()); + cookie2_sst_files.resize(1); + + auto cookie2_manifest_filepath = dbname_ + pathsep + cookie2_manifest_file; + auto cookie2_cm_filepath = + MakeCloudManifestFile(dbname_, cookie2); + auto cookie2_sst_filepath = dbname_ + pathsep + cookie2_sst_files[0]; + + CloseDB(); + + // reopen with cookie1 = "". cookie2 sst files are not visible + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + // files from cookie2 are deleted locally but exists in s3 + for (auto path: {cookie2_cm_filepath, cookie2_manifest_filepath, cookie2_sst_filepath}) { + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + path, kIOOptions, kDbg)); + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), path)); + } + std::string value; + EXPECT_OK(db_->Get({}, "k1", &value)); + EXPECT_NOK(db_->Get({}, "k2", &value)); + CloseDB(); + + cloud_fs_options_.cookie_on_open = cookie2; + cloud_fs_options_.new_cookie_on_open = cookie2; + // reopen with cookie2 also works since it will fetch files from s3 directly + OpenDB(); + EXPECT_OK(db_->Get({}, "k1", &value)); + EXPECT_OK(db_->Get({}, "k2", &value)); + CloseDB(); +} + +TEST_F(CloudTest, DisableObsoleteFileDeletionOnOpenTest) { + // Generate a few obsolete files first + options_.num_levels = 3; + options_.level0_file_num_compaction_trigger = 3; + options_.write_buffer_size = 110 << 10; // 110KB + options_.arena_block_size = 4 << 10; + options_.keep_log_file_num = 1; + options_.use_options_file = false; + // put wal files into one directory so that we don't need to count number of local + // wal files + options_.wal_dir = dbname_ + "/wal"; + cloud_fs_options_.keep_local_sst_files = true; + // disable cm roll so that no new manifest files generated + cloud_fs_options_.roll_cloud_manifest_on_open = false; + + WriteOptions wo; + wo.disableWAL = true; + OpenDB(); + ASSERT_OK(SwitchToNewCookie("")); + db_->DisableFileDeletions(); + + std::vector files; + + ASSERT_OK(db_->Put(wo, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + ASSERT_OK(db_->Put(wo, "k1", "v2")); + ASSERT_OK(db_->Flush({})); + db_->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 2); + + auto local_files = GetAllLocalFiles(); + // CM, MANIFEST1, MANIFEST2, CURRENT, IDENTITY, 2 sst files, wal directory + EXPECT_EQ(local_files.size(), 8); + + ASSERT_OK(GetDBImpl()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + + files.clear(); + db_->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 1); + + local_files = GetAllLocalFiles(); + // obsolete files are not deleted, also one extra sst files generated after compaction + EXPECT_EQ(local_files.size(), 9); + + CloseDB(); + + options_.disable_delete_obsolete_files_on_open = true; + OpenDB(); + // obsolete files are not deleted + EXPECT_EQ(GetAllLocalFiles().size(), 8); + // obsolete files are deleted! + db_->EnableFileDeletions(); + EXPECT_EQ(GetAllLocalFiles().size(), 6); + CloseDB(); +} + +// Verify invisible CLOUDMANIFEST file deleteion +TEST_F(CloudTest, CloudManifestFileDeletionTest) { + // create CLOUDMANIFEST file in s3 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = ""; + OpenDB(); + CloseDB(); + + // create CLOUDMANIFEST-1 file in s3 + cloud_fs_options_.cookie_on_open = ""; + cloud_fs_options_.new_cookie_on_open = "1"; + OpenDB(); + CloseDB(); + + auto checkCloudManifestFileExistence = [&](std::vector cookies) { + for (auto cookie : cookies) { + EXPECT_OK( + GetCloudFileSystemImpl()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystemImpl()->GetDestBucketName(), + MakeCloudManifestFile( + GetCloudFileSystemImpl()->GetDestObjectPath(), cookie))); + } + }; + + // double check that the CM files are indeed created + checkCloudManifestFileExistence({"", "1"}); + + // set large file deletion delay so that files are not deleted immediately + cloud_fs_options_.cloud_file_deletion_delay = std::chrono::hours(1); + EXPECT_EQ(GetCloudFileSystemImpl()->TEST_NumScheduledJobs(), 0); + + // now we reopen the db with empty cookie_on_open and new_cookie_on_open = + // "1". Double check that CLOUDMANIFEST-1 is not deleted! + OpenDB(); + checkCloudManifestFileExistence({"", "1"}); + CloseDB(); + + // switch to new cookie + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + // double check that CLOUDMANIFEST is never deleted + checkCloudManifestFileExistence({"", "1", "2"}); + CloseDB(); +} + +// verify that two writers with different cookies can write concurrently +TEST_F(CloudTest, TwoConcurrentWritersCookieNotEmpty) { + auto firstDB = dbname_; + auto secondDB = dbname_ + "-1"; + + DBCloud *db1, *db2; + Env *aenv1, *aenv2; + + auto openDB1 = [&] { + dbname_ = firstDB; + cloud_fs_options_.cookie_on_open = "1"; + cloud_fs_options_.new_cookie_on_open = "2"; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB1NoCookieSwitch = [&](const std::string& cookie) { + dbname_ = firstDB; + // when reopening DB1, we should set cookie_on_open = 2 to make sure + // we are opening with the right CM/M files + cloud_fs_options_.cookie_on_open = cookie; + cloud_fs_options_.new_cookie_on_open = cookie; + OpenDB(); + db1 = db_; + db_ = nullptr; + aenv1 = aenv_.release(); + }; + auto openDB2 = [&] { + dbname_ = secondDB; + cloud_fs_options_.cookie_on_open = "2"; + cloud_fs_options_.new_cookie_on_open = "3"; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto openDB2NoCookieSwitch = [&](const std::string& cookie) { + dbname_ = secondDB; + // when reopening DB1, we should set cookie_on_open = 3 to make sure + // we are opening with the right CM/M files + cloud_fs_options_.cookie_on_open = cookie; + cloud_fs_options_.new_cookie_on_open = cookie; + OpenDB(); + db2 = db_; + db_ = nullptr; + aenv2 = aenv_.release(); + }; + auto closeDB1 = [&] { + db_ = db1; + aenv_.reset(aenv1); + CloseDB(); + }; + auto closeDB2 = [&] { + db_ = db2; + aenv_.reset(aenv2); + CloseDB(); + }; + + openDB1(); + db1->Put({}, "k1", "v1"); + db1->Flush({}); + closeDB1(); + + // cleanup memtable of db1 to make sure k1/v1 indeed exists in sst files + DestroyDir(firstDB); + openDB1NoCookieSwitch("2" /* cookie */); + + // opening DB2 and running concurrently + openDB2(); + + db1->Put({}, "k2", "v2"); + db1->Flush({}); + + db2->Put({}, "k3", "v3"); + db2->Flush({}); + + std::string v; + ASSERT_OK(db1->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + ASSERT_OK(db2->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + + ASSERT_OK(db1->Get({}, "k2", &v)); + EXPECT_EQ(v, "v2"); + // k2 is written in db1 after db2 is opened, so it's not visible by db2 + EXPECT_NOK(db2->Get({}, "k2", &v)); + + // k3 is written in db2 after db1 is opened, so it's not visible by db1 + EXPECT_NOK(db1->Get({}, "k3", &v)); + ASSERT_OK(db2->Get({}, "k3", &v)); + EXPECT_EQ(v, "v3"); + + closeDB1(); + closeDB2(); + + // cleanup local state to make sure writes indeed exist in sst files + DestroyDir(firstDB); + DestroyDir(secondDB); + + // We can't reopen db with cookie=2 anymore, since that will remove all the + // files for cookie=3. This is guaranteed since whenever we reopen db, we + // always get the latest cookie from metadata store. + openDB2NoCookieSwitch("3" /* cookie */); + + ASSERT_OK(db2->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + EXPECT_NOK(db2->Get({}, "k2", &v)); + ASSERT_OK(db2->Get({}, "k3", &v)); + EXPECT_EQ(v, "v3"); + closeDB2(); +} + +// if file deletion fails, db should still be reopend +TEST_F(CloudTest, FileDeletionFailureIgnoredTest) { + std::string manifest_file_path; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + manifest_file_path = ManifestFileWithEpoch(dbname_, epoch); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + // bump the manifest epoch so that next time opening it, manifest file will be deleted + OpenDB(); + CloseDB(); + + // return error during file deletion + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::DeleteLocalInvisibleFiles:AfterListLocalFiles", + [](void* arg) { + auto st = reinterpret_cast(arg); + *st = + Status::Aborted("Manual abortion to simulate file listing failure"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + OpenDB(); + std::string v; + ASSERT_OK(db_->Get({}, "k1", &v)); + EXPECT_EQ(v, "v1"); + // Due to the Aborted error we generated, the manifest file which should have + // been deleted still exists. + EXPECT_OK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + manifest_file_path, kIOOptions, kDbg)); + CloseDB(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + // reopen the db should delete the obsolete manifest file after we cleanup syncpoint + OpenDB(); + EXPECT_NOK(GetCloudFileSystem()->GetBaseFileSystem()->FileExists( + manifest_file_path, kIOOptions, kDbg)); + CloseDB(); +} + +// verify that as long as CloudFileSystem is destructed, the file delection jobs +// waiting in the queue will be canceled +TEST_F(CloudTest, FileDeletionJobsCanceledWhenCloudEnvDestructed) { + std::string manifest_file_path; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + manifest_file_path = ManifestFileWithEpoch(dbname_, epoch); + CloseDB(); + + // bump epoch of manifest file so next open will delete previous manifest file + OpenDB(); + CloseDB(); + + // Setup syncpoint dependency to prevent cloud scheduler from executing file + // deletion job in the queue until CloudFileSystem is destructed + SyncPoint::GetInstance()->LoadDependency( + {{"CloudTest::FileDeletionJobsCanceledWhenCloudEnvDestructed:" + "AfterCloudEnvDestruction", + "CloudSchedulerImpl::DoWork:BeforeGetJob"}}); + SyncPoint::GetInstance()->EnableProcessing(); + OpenDB(); + CloseDB(); + + // delete CloudFileSystem will cancel all file deletion jobs in the queue + aenv_.reset(); + + // jobs won't be executed until after this point. But the file deletion job + // in the queue should have already been canceled + TEST_SYNC_POINT( + "CloudTest::FileDeletionJobsCanceledWhenCloudEnvDestructed:" + "AfterCloudEnvDestruction"); + + SyncPoint::GetInstance()->DisableProcessing(); + + // recreate cloud env to check s3 file existence + CreateCloudEnv(); + + // wait for a while so that the rest uncanceled jobs are indeed executed by + // cloud scheduler. + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // the old manifest file is still there! + EXPECT_OK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), manifest_file_path)); + + // reopen db to delete the old manifest file + OpenDB(); + EXPECT_NOK(GetCloudFileSystem()->GetStorageProvider()->ExistsCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), manifest_file_path)); + CloseDB(); +} + +// The failure case of opening a corrupted db which doesn't have MANIFEST file +TEST_F(CloudTest, OpenWithManifestMissing) { + cloud_fs_options_.resync_on_open = true; + OpenDB(); + auto epoch = GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + CloseDB(); + + // Remove the MANIFEST file from s3 + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), + ManifestFileWithEpoch(GetCloudFileSystem()->GetSrcObjectPath(), epoch))); + DestroyDir(dbname_); + + EXPECT_TRUE(checkOpen().IsCorruption()); +} + +// verify that ephemeral clone won't reference old sst file if it's reopened +// after sst file deletion on durable +// Ordering of events: +// - open durable (epoch = 1) +// - open ephemeral (epoch = 1, new_epoch=?) +// - durable delete sst files +// - reopen ephemeral (epoch = 1) +TEST_F(CloudTest, ReopenEphemeralAfterFileDeletion) { + cloud_fs_options_.resync_on_open = true; + cloud_fs_options_.keep_local_sst_files = false; + + auto durableDBName = dbname_; + + DBCloud *durable, *ephemeral; + Env *durableEnv, *ephemeralEnv; + std::vector durableHandles; + + auto openDurable = [&] { + dbname_ = durableDBName; + + OpenDB(&durableHandles); + durable = db_; + db_ = nullptr; + durableEnv = aenv_.release(); + }; + + auto openEphemeral = [&] { + std::unique_ptr env; + std::unique_ptr cloud_db; + // open ephemeral clone with force_keep_local_on_invalid_dest_bucket=false + // so that sst files are not kept locally + ASSERT_OK(CloneDB("ephemeral" /* clone_name */, "" /* dest_bucket_name */, + "" /* dest_object_path */, &cloud_db, &env, + false /* force_keep_local_on_invalid_dest_bucket */)); + ephemeral = cloud_db.release(); + ephemeralEnv = env.release(); + }; + + auto closeDurable = [&] { + db_ = durable; + aenv_.reset(durableEnv); + CloseDB(&durableHandles); + }; + + auto closeEphemeral = [&] { + db_ = ephemeral; + aenv_.reset(ephemeralEnv); + CloseDB(); + }; + + options_.disable_auto_compactions = true; + openDurable(); + + ASSERT_OK(durable->Put({}, "key1", "val1")); + ASSERT_OK(durable->Flush({})); + + ASSERT_OK(durable->Put({}, "key1", "val2")); + ASSERT_OK(durable->Flush({})); + + closeDurable(); + + openDurable(); + openEphemeral(); + + std::vector files; + durable->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 2); + // trigger compaction on durable with trivial file moves disabled, which will delete previously generated sst files + ASSERT_OK( + static_cast(durable->GetBaseDB()) + ->TEST_CompactRange(0, nullptr, nullptr, durableHandles[0], true)); + files.clear(); + durable->GetLiveFilesMetaData(&files); + ASSERT_EQ(files.size(), 1); + + // reopen ephemeral + closeEphemeral(); + openEphemeral(); + + std::string val; + ASSERT_OK(ephemeral->Get({}, "key1", &val)); + EXPECT_EQ(val, "val2"); + closeEphemeral(); + closeDurable(); +} + +TEST_F(CloudTest, SanitizeDirectoryTest) { + cloud_fs_options_.keep_local_sst_files = true; + OpenDB(); + ASSERT_OK(db_->Put({}, "k1", "v1")); + ASSERT_OK(db_->Flush({})); + CloseDB(); + + auto local_files = GetAllLocalFiles(); + // Files exist locally: cm/m, sst, options-xxx, xxx.log, identity, current + EXPECT_EQ(local_files.size(), 7); + + EXPECT_OK( + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); + + // cleaning up during sanitization not triggered + EXPECT_EQ(local_files.size(), GetAllLocalFiles().size()); + + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + EXPECT_OK( + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); + + local_files = GetAllLocalFiles(); + // IDENTITY file is downloaded after cleaning up, which is the only file that + // exists locally + EXPECT_EQ(GetAllLocalFiles().size(), 1); + + // reinitialize local directory + OpenDB(); + CloseDB(); + local_files = GetAllLocalFiles(); + // we have two local MANIFEST files after opening second time. + EXPECT_EQ(local_files.size(), 8); + + // create some random directory, which is expected to be not deleted + ASSERT_OK(base_env_->CreateDir(dbname_ + "/tmp_writes")); + + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + ASSERT_OK( + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); + + // IDENTITY file + the random directory we created + EXPECT_EQ(GetAllLocalFiles().size(), 2); + + // reinitialize local directory + OpenDB(); + CloseDB(); + + // inject io errors during cleaning up. The io errors should be ignored + SyncPoint::GetInstance()->SetCallBack( + "CloudFileSystemImpl::SanitizeDirectory:AfterDeleteFile", [](void* arg) { + auto st = reinterpret_cast(arg); + *st = Status::IOError("Inject io error during cleaning up"); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + // Delete the local CLOUDMANIFEST file to force cleaning up + ASSERT_OK( + base_env_->DeleteFile(MakeCloudManifestFile(dbname_, "" /* cooke */))); + + ASSERT_OK( + GetCloudFileSystemImpl()->SanitizeLocalDirectory(options_, dbname_, false)); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(CloudTest, CloudFileDeletionNotTriggeredIfDestBucketNotSet) { + std::vector files_to_delete; + + // generate invisible MANIFEST file to delete + OpenDB(); + std::string manifest_file = ManifestFileWithEpoch( + dbname_, GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch()); + files_to_delete.push_back(basename(manifest_file)); + CloseDB(); + + // generate obsolete sst files to delete + options_.disable_delete_obsolete_files_on_open = true; + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + GenerateObsoleteFilesOnEmptyDB(GetDBImpl(), GetCloudFileSystem(), + &files_to_delete); + CloseDB(); + + options_.disable_delete_obsolete_files_on_open = false; + cloud_fs_options_.dest_bucket.SetBucketName(""); + cloud_fs_options_.dest_bucket.SetObjectPath(""); + cloud_fs_options_.delete_cloud_invisible_files_on_open = true; + OpenDB(); + WaitUntilNoScheduledJobs(); + for (auto& fname: files_to_delete) { + EXPECT_OK(ExistsCloudObject(fname)); + } + CloseDB(); + + cloud_fs_options_.dest_bucket = cloud_fs_options_.src_bucket; + OpenDB(); + WaitUntilNoScheduledJobs(); + for (auto& fname: files_to_delete) { + EXPECT_NOK(ExistsCloudObject(fname)); + } + CloseDB(); +} + +TEST_F(CloudTest, ScheduleFileDeletionTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::seconds(0)); + + std::atomic_int counter{0}; + int num_file_deletions = 10; + for (int i = 0; i < num_file_deletions; i++) { + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion( + std::to_string(i) + ".sst", [&counter]() { counter++; })); + } + + // wait until no scheduled jobs + while (scheduler->TEST_NumScheduledJobs() > 0) { + usleep(100); + } + EXPECT_EQ(counter, num_file_deletions); + EXPECT_EQ(deletion_scheduler->TEST_FilesToDelete().size(), 0); +} + +TEST_F(CloudTest, SameFileDeletedMultipleTimesTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion("filename", []() {})); + ASSERT_OK(deletion_scheduler->ScheduleFileDeletion("filename", []() {})); + EXPECT_EQ(deletion_scheduler->TEST_FilesToDelete().size(), 1); +} + +TEST_F(CloudTest, UnscheduleFileDeletionTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + + std::atomic_int counter{0}; + int num_file_deletions = 10; + std::vector files_to_delete; + for (int i = 0; i < num_file_deletions; i++) { + std::string filename = std::to_string(i) + ".sst"; + files_to_delete.push_back(filename); + ASSERT_OK( + deletion_scheduler->ScheduleFileDeletion(filename, [&counter]() { counter++; })); + } + auto actual_files_to_delete = deletion_scheduler->TEST_FilesToDelete(); + std::sort(actual_files_to_delete.begin(), actual_files_to_delete.end()); + EXPECT_EQ(actual_files_to_delete, files_to_delete); + + int num_scheduled_jobs = num_file_deletions; + for (auto& fname: files_to_delete) { + deletion_scheduler->UnscheduleFileDeletion(fname); + num_scheduled_jobs -= 1; + EXPECT_EQ(scheduler->TEST_NumScheduledJobs(), num_scheduled_jobs); + } +} + +TEST_F(CloudTest, UnscheduleUnknownFileTest) { + auto scheduler = CloudScheduler::Get(); + auto deletion_scheduler = + CloudFileDeletionScheduler::Create(scheduler, std::chrono::hours(1)); + deletion_scheduler->UnscheduleFileDeletion("unknown file"); +} + +// Verifies that as long as `CloudFileDeletionScheduler` is destructed, no file +// deletion job will actually be scheduled +// This is also a repro of SYS-3456, which is a race between CloudFileSystemImpl +// destruction and cloud file deletion +// TODO(SYS-3996) Re-enable +TEST_F( + CloudTest, + DISABLED_FileDeletionNotScheduledOnceCloudFileDeletionSchedulerDestructed) { + // Generate some invisible files to delete + // Disable file deletion to make sure these files are not deleted + // automatically + options_.disable_delete_obsolete_files_on_open = true; + cloud_fs_options_.delete_cloud_invisible_files_on_open = false; + OpenDB(); + std::vector obsolete_files; + GenerateObsoleteFilesOnEmptyDB(GetDBImpl(), GetCloudFileSystem(), + &obsolete_files); + CloseDB(); + + // Order of execution: + // - scheduled file deletion job starts running (but file not deleted yet) + // - destruct CloudFileDeletionScheduler + // - file deletion job deletes the file + SyncPoint::GetInstance()->LoadDependency({ + { + // `BeforeCancelJobs` happens-after `BeforeFileDeletion` + "CloudFileDeletionScheduler::ScheduleFileDeletion:BeforeFileDeletion", + "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:BeforeCancelJobs", + }, + { + "CloudFileDeletionScheduler::~CloudFileDeletionScheduler:BeforeCancelJobs", + "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion" + } + }); + + std::atomic num_jobs_finished{0}; + SyncPoint::GetInstance()->SetCallBack( + "CloudFileDeletionScheduler::ScheduleFileDeletion:AfterFileDeletion", + [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto file_deleted = *reinterpret_cast(arg); + EXPECT_FALSE(file_deleted); + num_jobs_finished++; + }); + SyncPoint::GetInstance()->EnableProcessing(); + // file not deleted immediately but just scheduled + ASSERT_OK(aenv_->GetFileSystem()->DeleteFile(obsolete_files[0], kIOOptions, kDbg)); + EXPECT_EQ(GetCloudFileSystemImpl()->TEST_NumScheduledJobs(), 1); + // destruct `CloudFileSystem`, which will cause `CloudFileDeletionScheduler` + // to be destructed + aenv_.reset(); + // wait until file deletion job is done + while (num_jobs_finished.load() != 1) { + usleep(100); + } + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(CloudTest, UniqueCurrentEpochAcrossDBRestart) { + constexpr int kNumRestarts = 3; + std::unordered_set epochs; + for (int i = 0; i < kNumRestarts; i++) { + OpenDB(); + auto [it, inserted] = epochs.emplace( + GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch()); + EXPECT_TRUE(inserted); + CloseDB(); + } +} + +TEST_F(CloudTest, ReplayCloudManifestDeltaTest) { + OpenDB(); + constexpr int kNumKeys = 3; + std::vector deltas; + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(db_->Put({}, "k" + std::to_string(i), "v" + std::to_string(i))); + ASSERT_OK(db_->Flush({})); + + auto cookie1 = std::to_string(i) + "0"; + auto filenum1 = db_->GetNextFileNumber(); + deltas.push_back({filenum1, cookie1}); + ASSERT_OK(SwitchToNewCookie(cookie1)); + + // apply again with same file number but different cookie + auto cookie2 = std::to_string(i) + "1"; + auto filenum2 = db_->GetNextFileNumber(); + EXPECT_EQ(filenum1, filenum2); + deltas.push_back({filenum2, cookie2}); + ASSERT_OK(SwitchToNewCookie(cookie2)); + } + + auto currentEpoch = + GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(); + + // replay the deltas one more time + for (const auto& delta : deltas) { + EXPECT_TRUE(GetCloudFileSystem() + ->RollNewCookie(dbname_, delta.epoch, delta) + .IsInvalidArgument()); + bool applied = false; + ASSERT_OK(GetCloudFileSystem()->ApplyCloudManifestDelta(delta, &applied)); + EXPECT_FALSE(applied); + // current epoch not changed + EXPECT_EQ(GetCloudFileSystemImpl()->GetCloudManifest()->GetCurrentEpoch(), + currentEpoch); + } + + for (int i = 0; i < kNumKeys; i++) { + std::string v; + ASSERT_OK(db_->Get({}, "k" + std::to_string(i), &v)); + EXPECT_EQ(v, "v" + std::to_string(i)); + } + CloseDB(); +} + +TEST_F(CloudTest, CreateIfMissing) { + options_.create_if_missing = false; + ASSERT_TRUE(checkOpen().IsNotFound()); + options_.create_if_missing = true; + OpenDB(); + CloseDB(); + + // delete `CURRENT` file + DestroyDir(dbname_); + OpenDB(); + CloseDB(); + + // Delete `CLOUDMANFIEST` file in cloud + auto cloudManifestFile = + MakeCloudManifestFile(dbname_, cloud_fs_options_.new_cookie_on_open); + ASSERT_OK(GetCloudFileSystem()->GetStorageProvider()->DeleteCloudObject( + GetCloudFileSystem()->GetSrcBucketName(), cloudManifestFile)); + + options_.create_if_missing = false; + ASSERT_TRUE(checkOpen().IsNotFound()); +} + +} // namespace ROCKSDB_NAMESPACE + +// A black-box test for the cloud wrapper around rocksdb +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + auto r = RUN_ALL_TESTS(); + return r; +} + +#else // USE_GCP + +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_Gcp is defined.\n"); + return 0; +} +#endif + +#else // ROCKSDB_LITE + +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // !ROCKSDB_LITE diff --git a/cloud/gcp/gcp_file_system.cc b/cloud/gcp/gcp_file_system.cc new file mode 100644 index 00000000000..981510349a7 --- /dev/null +++ b/cloud/gcp/gcp_file_system.cc @@ -0,0 +1,111 @@ +#ifndef ROCKSDB_LITE + +#ifdef USE_GCP + +#include "cloud/gcp/gcp_file_system.h" + +#include + +#include "rocksdb/cloud/cloud_storage_provider_impl.h" +#include "rocksdb/convenience.h" +#include "rocksdb/utilities/object_registry.h" + +namespace ROCKSDB_NAMESPACE { +GcpFileSystem::GcpFileSystem(std::shared_ptr const& underlying_fs, + CloudFileSystemOptions const& cloud_options, + std::shared_ptr const& info_log) + : CloudFileSystemImpl(cloud_options, underlying_fs, info_log) {} + +Status GcpFileSystem::NewGcpFileSystem( + std::shared_ptr const& base_fs, + CloudFileSystemOptions const& cloud_options, + std::shared_ptr const& info_log, CloudFileSystem** cfs) { + Status status; + *cfs = nullptr; + auto fs = base_fs; + if (!fs) { + fs = FileSystem::Default(); + } + std::unique_ptr gfs( + new GcpFileSystem(fs, cloud_options, info_log)); + auto env = + CloudFileSystemEnv::NewCompositeEnvFromFs(gfs.get(), Env::Default()); + ConfigOptions config_options; + config_options.env = env.get(); + status = gfs->PrepareOptions(config_options); + if (status.ok()) { + *cfs = gfs.release(); + } + return status; +} + +Status GcpFileSystem::NewGcpFileSystem(std::shared_ptr const& fs, + std::unique_ptr* cfs) { + cfs->reset(new GcpFileSystem(fs, CloudFileSystemOptions())); + return Status::OK(); +} + +Status GcpFileSystem::PrepareOptions(ConfigOptions const& options) { + if (cloud_fs_options.src_bucket.GetRegion().empty() || + cloud_fs_options.dest_bucket.GetRegion().empty()) { + std::string region; + if (!CloudFileSystemOptions::GetNameFromEnvironment( + "GCP_DEFAULT_REGION", "gcp_default_region", ®ion)) { + region = default_region; + } + if (cloud_fs_options.src_bucket.GetRegion().empty()) { + cloud_fs_options.src_bucket.SetRegion(region); + } + if (cloud_fs_options.dest_bucket.GetRegion().empty()) { + cloud_fs_options.dest_bucket.SetRegion(region); + } + } + if (cloud_fs_options.storage_provider == nullptr) { + // If the user has not specified a storage provider, then use the default + // provider for this CloudType + Status s = CloudStorageProvider::CreateFromString( + options, CloudStorageProviderImpl::kGcs(), + &cloud_fs_options.storage_provider); + if (!s.ok()) { + return s; + } + } + return CloudFileSystemImpl::PrepareOptions(options); +} + +int CloudFileSystemImpl::RegisterGcpObjects(ObjectLibrary& library, + std::string const& /*arg*/) { + int count = 0; + library.AddFactory( + CloudFileSystemImpl::kGcp(), + [](std::string const& /*uri*/, std::unique_ptr* guard, + std::string* errmsg) { + std::unique_ptr cguard; + Status s = + GcpFileSystem::NewGcpFileSystem(FileSystem::Default(), &cguard); + if (s.ok()) { + guard->reset(cguard.release()); + return guard->get(); + } else { + *errmsg = s.ToString(); + return static_cast(nullptr); + } + }); + count++; + + library.AddFactory( + CloudStorageProviderImpl::kGcs(), + [](std::string const& /*uri*/, + std::unique_ptr* guard, std::string* errmsg) { + Status s = CloudStorageProviderImpl::CreateGcsProvider(guard); + if (!s.ok()) { + *errmsg = s.ToString(); + } + return guard->get(); + }); + count++; + return count; +} +} // namespace ROCKSDB_NAMESPACE +#endif // USE_GCP +#endif // ROCKSDB_LITE diff --git a/cloud/gcp/gcp_file_system.h b/cloud/gcp/gcp_file_system.h new file mode 100644 index 00000000000..056b5f19f9f --- /dev/null +++ b/cloud/gcp/gcp_file_system.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include "rocksdb/cloud/cloud_file_system_impl.h" +#include "cloud/filename.h" + +#ifdef USE_GCP + +namespace ROCKSDB_NAMESPACE { +class GcpFileSystem : public CloudFileSystemImpl { + public: + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& info_log, + CloudFileSystem** cfs); + static Status NewGcpFileSystem(const std::shared_ptr& fs, + std::unique_ptr* cfs); + virtual ~GcpFileSystem() {} + + static char const* kName() { return kGcp(); } + const char* Name() const override { return kGcp(); } + + Status PrepareOptions(const ConfigOptions& options) override; + + static constexpr char const* default_region = "asia-northeast1"; + + private: + explicit GcpFileSystem(const std::shared_ptr& underlying_fs, + const CloudFileSystemOptions& cloud_options, + const std::shared_ptr& info_log = nullptr); +}; + +class GcpCloudOptions { + public: + static Status GetClientConfiguration( + CloudFileSystem* fs, std::string const& region, + google::cloud::Options& options); +}; +} // namespace ROCKSDB_NAMESPACE +#endif diff --git a/cloud/gcp/gcp_file_system_test.cc b/cloud/gcp/gcp_file_system_test.cc new file mode 100644 index 00000000000..1e2c96691a3 --- /dev/null +++ b/cloud/gcp/gcp_file_system_test.cc @@ -0,0 +1,230 @@ +// Copyright (c) 2017 Rockset + +#ifndef ROCKSDB_LITE + +#include + +#ifdef USE_GCP + +#include "cloud/cloud_log_controller_impl.h" +#include "rocksdb/cloud/cloud_file_system.h" +#include "rocksdb/cloud/cloud_log_controller.h" +#include "rocksdb/cloud/cloud_storage_provider.h" +#include "rocksdb/cloud/cloud_storage_provider_impl.h" +#include "rocksdb/convenience.h" +#include "rocksdb/env.h" +#include "test_util/testharness.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { + +TEST(CloudFileSystemTest, TestBucket) { + CloudFileSystemOptions copts; + copts.src_bucket.SetRegion("North"); + copts.src_bucket.SetBucketName("Input", "src."); + ASSERT_FALSE(copts.src_bucket.IsValid()); + copts.src_bucket.SetObjectPath("Here"); + ASSERT_TRUE(copts.src_bucket.IsValid()); + + copts.dest_bucket.SetRegion("South"); + copts.dest_bucket.SetObjectPath("There"); + ASSERT_FALSE(copts.dest_bucket.IsValid()); + copts.dest_bucket.SetBucketName("Output", "dest."); + ASSERT_TRUE(copts.dest_bucket.IsValid()); +} + +TEST(CloudFileSystemTest, ConfigureOptions) { + ConfigOptions config_options; + CloudFileSystemOptions copts, copy; + copts.keep_local_sst_files = false; + copts.keep_local_log_files = false; + copts.create_bucket_if_missing = false; + copts.validate_filesize = false; + copts.skip_dbid_verification = false; + copts.resync_on_open = false; + copts.skip_cloud_files_in_getchildren = false; + copts.constant_sst_file_size_in_sst_file_manager = 100; + copts.run_purger = false; + copts.purger_periodicity_millis = 101; + + std::string str; + ASSERT_OK(copts.Serialize(config_options, &str)); + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_FALSE(copy.keep_local_sst_files); + ASSERT_FALSE(copy.keep_local_log_files); + ASSERT_FALSE(copy.create_bucket_if_missing); + ASSERT_FALSE(copy.validate_filesize); + ASSERT_FALSE(copy.skip_dbid_verification); + ASSERT_FALSE(copy.resync_on_open); + ASSERT_FALSE(copy.skip_cloud_files_in_getchildren); + ASSERT_FALSE(copy.run_purger); + ASSERT_EQ(copy.constant_sst_file_size_in_sst_file_manager, 100); + ASSERT_EQ(copy.purger_periodicity_millis, 101); + + // Now try a different value + copts.keep_local_sst_files = true; + copts.keep_local_log_files = true; + copts.create_bucket_if_missing = true; + copts.validate_filesize = true; + copts.skip_dbid_verification = true; + copts.resync_on_open = true; + copts.skip_cloud_files_in_getchildren = true; + copts.constant_sst_file_size_in_sst_file_manager = 200; + copts.run_purger = true; + copts.purger_periodicity_millis = 201; + + ASSERT_OK(copts.Serialize(config_options, &str)); + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_TRUE(copy.keep_local_sst_files); + ASSERT_TRUE(copy.keep_local_log_files); + ASSERT_TRUE(copy.create_bucket_if_missing); + ASSERT_TRUE(copy.validate_filesize); + ASSERT_TRUE(copy.skip_dbid_verification); + ASSERT_TRUE(copy.resync_on_open); + ASSERT_TRUE(copy.skip_cloud_files_in_getchildren); + ASSERT_TRUE(copy.run_purger); + ASSERT_EQ(copy.constant_sst_file_size_in_sst_file_manager, 200); + ASSERT_EQ(copy.purger_periodicity_millis, 201); +} + +TEST(CloudFileSystemTest, ConfigureBucketOptions) { + ConfigOptions config_options; + CloudFileSystemOptions copts, copy; + std::string str; + copts.src_bucket.SetBucketName("source", "src."); + copts.src_bucket.SetObjectPath("foo"); + copts.src_bucket.SetRegion("north"); + copts.dest_bucket.SetBucketName("dest"); + copts.dest_bucket.SetObjectPath("bar"); + ASSERT_OK(copts.Serialize(config_options, &str)); + + ASSERT_OK(copy.Configure(config_options, str)); + ASSERT_EQ(copts.src_bucket.GetBucketName(), copy.src_bucket.GetBucketName()); + ASSERT_EQ(copts.src_bucket.GetObjectPath(), copy.src_bucket.GetObjectPath()); + ASSERT_EQ(copts.src_bucket.GetRegion(), copy.src_bucket.GetRegion()); + + ASSERT_EQ(copts.dest_bucket.GetBucketName(), + copy.dest_bucket.GetBucketName()); + ASSERT_EQ(copts.dest_bucket.GetObjectPath(), + copy.dest_bucket.GetObjectPath()); + ASSERT_EQ(copts.dest_bucket.GetRegion(), copy.dest_bucket.GetRegion()); +} + +TEST(CloudFileSystemTest, ConfigureEnv) { + std::unique_ptr cfs; + + ConfigOptions config_options; + config_options.invoke_prepare_options = false; + ASSERT_OK(CloudFileSystemEnv::CreateFromString( + config_options, "keep_local_sst_files=true", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + auto copts = cfs->GetOptions(); + ASSERT_NE(copts, nullptr); + ASSERT_TRUE(copts->keep_local_sst_files); +} + +TEST(CloudFileSystemTest, TestInitialize) { + std::unique_ptr cfs; + BucketOptions bucket; + ConfigOptions config_options; + config_options.invoke_prepare_options = false; + ASSERT_OK(CloudFileSystemEnv::CreateFromString( + config_options, "id=cloud; TEST=cloudenvtest:/test/path", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + + ASSERT_TRUE(StartsWith(cfs->GetSrcBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest.")); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path"); + ASSERT_TRUE(cfs->SrcMatchesDest()); + + ASSERT_OK(CloudFileSystemEnv::CreateFromString( + config_options, "id=cloud; TEST=cloudenvtest2:/test/path2?here", &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + ASSERT_TRUE(StartsWith(cfs->GetSrcBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest2.")); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path2"); + ASSERT_EQ(cfs->GetCloudFileSystemOptions().src_bucket.GetRegion(), "here"); + ASSERT_TRUE(cfs->SrcMatchesDest()); + + ASSERT_OK(CloudFileSystemEnv::CreateFromString( + config_options, + "id=cloud; TEST=cloudenvtest3:/test/path3; " + "src.bucket=my_bucket; dest.object=/my_path", + &cfs)); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "cloud"); + ASSERT_EQ(cfs->GetSrcBucketName(), bucket.GetBucketPrefix() + "my_bucket"); + ASSERT_EQ(cfs->GetSrcObjectPath(), "/test/path3"); + ASSERT_TRUE(StartsWith(cfs->GetDestBucketName(), + bucket.GetBucketPrefix() + "cloudenvtest3.")); + ASSERT_EQ(cfs->GetDestObjectPath(), "/my_path"); +} + +TEST(CloudFileSystemTest, ConfigureGcpEnv) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = CloudFileSystemEnv::CreateFromString( + config_options, "id=gcp; keep_local_sst_files=true", &cfs); +#ifdef USE_GCP + ASSERT_OK(s); + ASSERT_NE(cfs, nullptr); + ASSERT_STREQ(cfs->Name(), "gcp"); + auto copts = cfs->GetOptions(); + ASSERT_NE(copts, nullptr); + ASSERT_TRUE(copts->keep_local_sst_files); + ASSERT_NE(cfs->GetStorageProvider(), nullptr); + ASSERT_STREQ(cfs->GetStorageProvider()->Name(), + CloudStorageProviderImpl::kGcs()); +#else + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); +#endif +} + +TEST(CloudFileSystemTest, ConfigureGcsProvider) { + std::unique_ptr cfs; + + ConfigOptions config_options; + Status s = CloudFileSystemEnv::CreateFromString(config_options, + "provider=gcs", &cfs); + ASSERT_NOK(s); + ASSERT_EQ(cfs, nullptr); + +#ifdef USE_GCP + ASSERT_OK(CloudFileSystemEnv::CreateFromString(config_options, + "id=gcp; provider=gcs", &cfs)); + ASSERT_STREQ(cfs->Name(), "gcp"); + ASSERT_NE(cfs->GetStorageProvider(), nullptr); + ASSERT_STREQ(cfs->GetStorageProvider()->Name(), + CloudStorageProviderImpl::kGcs()); +#endif +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +#else // USE_GCP +#include + +int main(int, char**) { + fprintf(stderr, + "SKIPPED as DBCloud is supported only when USE_GCP is defined.\n"); + return 0; +} +#endif // USE_GCP + +#else // ROCKSDB_LITE +#include + +int main(int, char**) { + fprintf(stderr, "SKIPPED as DBCloud is not supported in ROCKSDB_LITE.\n"); + return 0; +} +#endif // ROCKSDB_LITE diff --git a/cloud/gcp/gcp_retry.cc b/cloud/gcp/gcp_retry.cc new file mode 100644 index 00000000000..6409ee370b7 --- /dev/null +++ b/cloud/gcp/gcp_retry.cc @@ -0,0 +1,126 @@ +#include + +#include "rocksdb/cloud/cloud_file_system.h" + +#ifdef USE_GCP +#include "cloud/gcp/gcp_file_system.h" + +#include +#include +#include +#include +#include +#include +#endif // USE_GCP + +namespace ROCKSDB_NAMESPACE { +#ifdef USE_GCP +namespace gcp = ::google::cloud; +namespace gcs = ::google::cloud::storage; + +// A retry policy that limits the total time spent and counts retrying. +class GcpRetryPolicy : public gcs::RetryPolicy { + public: + template + explicit GcpRetryPolicy( + CloudFileSystem* fs, + std::chrono::duration maximum_duration) + : cfs_(fs), + deadline_(std::chrono::system_clock::now() + maximum_duration), + time_based_policy_(maximum_duration) {} + + std::chrono::milliseconds maximum_duration() const { + return time_based_policy_.maximum_duration(); + } + + bool OnFailure(gcp::Status const& s) override { + bool is_retryable = time_based_policy_.OnFailure(s); + ++failure_count_; + if (is_retryable) { + // transient failure and resource available + if (failure_count_ <= maximum_failures_) { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered failure: %s" + "retried %d / %d times. Retrying...", + s.message().c_str(), failure_count_, maximum_failures_); + return true; + } else { + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered failure: %s" + "retry attempt %d exceeds max retries %d. Aborting...", + s.message().c_str(), failure_count_, maximum_failures_); + // retry count exceed maxium, but is not nonretryable + return false; + } + } else { + // non-transient failure or resource exhausted + Log(InfoLogLevel::INFO_LEVEL, cfs_->GetLogger(), + "[gcs] Encountered permanent failure: %s" + "retry attempt %d / %d. Aborting...", + s.message().c_str(), failure_count_, maximum_failures_); + return false; + } + } + + bool IsExhausted() const override { + return (time_based_policy_.IsExhausted() || + failure_count_ > maximum_failures_); + } + bool IsPermanentFailure(gcp::Status const& s) const override { + return gcs::internal::StatusTraits::IsPermanentFailure(s); + } + + std::unique_ptr clone() const override { + return std::make_unique( + cfs_, time_based_policy_.maximum_duration()); + } + + private: + // rocksdb retries, etc + int failure_count_ = 0; + int maximum_failures_ = 10; + CloudFileSystem* cfs_; + std::chrono::system_clock::time_point deadline_; + // non-permermanent status in gcs::internal::StatusTraits + gcp::internal::LimitedTimeRetryPolicy + time_based_policy_; +}; + +#endif /* USE_GCP */ + +#ifdef USE_GCP +Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem* fs, + std::string const& /*region*/, + gcp::Options& options) { + // Default gcs operation timeout is 10 minutes after all retrys. + uint64_t timeout_ms = 600000; + // All storage operations are idempotent, so we can use always retry. + options.set( + gcs::AlwaysRetryIdempotencyPolicy().clone()); + + // Use exponential backoff with a 500ms initial delay, 1 minute maximum delay, + // GCS only allows one write per second per object + options.set( + gcs::ExponentialBackoffPolicy(std::chrono::milliseconds(500), + std::chrono::minutes(1), 2.0) + .clone()); + + // Use request_timeout_ms from CloudFileSystemOptions if set. + auto const& cloud_fs_options = fs->GetCloudFileSystemOptions(); + if (cloud_fs_options.request_timeout_ms != 0) { + timeout_ms = cloud_fs_options.request_timeout_ms; + } + // Use timed and max retry count based retry policy. + options.set( + GcpRetryPolicy(fs, std::chrono::milliseconds(timeout_ms)).clone()); + return Status::OK(); +} +//#else +//Status GcpCloudOptions::GetClientConfiguration(CloudFileSystem*, + //std::string const&, + //gcp::Options&) { + //return Status::NotSupported("Not configured for GCP support"); +//} +#endif /* USE_GCP */ + +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/db/version_set.cc b/db/version_set.cc index f3aacbc518c..e6d066bfde0 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5902,7 +5902,9 @@ Status VersionSet::ProcessManifestWrites( auto epoch = db_options_->replication_epoch_extractor ->EpochOfReplicationSequence( *pending_persist_replication_sequence); +#ifndef NDEBUG bool replication_epoch_set_empty = replication_epochs_.empty(); +#endif replication_epochs_.DeleteEpochsBefore(epoch); // If replication epoch set is not empty before pruning, then it won't // be empty after pruning diff --git a/include/rocksdb/cloud/cloud_file_system.h b/include/rocksdb/cloud/cloud_file_system.h index 31f8dc84804..f408335a528 100644 --- a/include/rocksdb/cloud/cloud_file_system.h +++ b/include/rocksdb/cloud/cloud_file_system.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "rocksdb/cache.h" @@ -25,6 +26,28 @@ class S3Client; } } // namespace Aws +#ifdef USE_GCP +#include + +namespace google { +namespace cloud { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +class Options; +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace cloud +} // namespace google + +namespace google { +namespace cloud { +namespace storage { +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN +class Client; +GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END +} // namespace storage +} // namespace cloud +} // namespace google +#endif + namespace ROCKSDB_NAMESPACE { class CloudFileSystem; @@ -98,6 +121,12 @@ using S3ClientFactory = std::function( const std::shared_ptr&, const Aws::Client::ClientConfiguration&)>; +#ifdef USE_GCP +using GCSClientFactory = + std::function( + const google::cloud::Options&)>; +#endif + // Defines parameters required to connect to Kafka class KafkaLogOptions { public: @@ -212,9 +241,20 @@ class CloudFileSystemOptions { // Access credentials AwsCloudAccessCredentials credentials; + // Access credentials for GCP + // It is ADC based, which is not managable by user land + // Reserved for future use + // GcpCloudAccessCredentials gcp_credentials; + // If present, s3_client_factory will be used to create S3Client instances S3ClientFactory s3_client_factory; + // If present, gcs_client_factory will be used to create + // GCSCliet instances +#ifdef USE_GCP + GCSClientFactory gcs_client_factory; +#endif + // Only used if keep_local_log_files is true and log_type is kKafka. KafkaLogOptions kafka_log_options; @@ -480,6 +520,7 @@ class CloudFileSystem : public FileSystem { public: static const char* kCloud() { return "cloud"; } static const char* kAws() { return "aws"; } + static char const* kGcp() { return "gcp"; } // Returns the underlying file system virtual const std::shared_ptr& GetBaseFileSystem() const = 0; @@ -587,6 +628,7 @@ class CloudFileSystem : public FileSystem { // Delete both local and cloud invisble files virtual IOStatus DeleteCloudInvisibleFiles( const std::vector& active_cookies) = 0; + // Delete local invisible files. This could be helpful when there is one // single instance managing lifetime of files in cloud while the other // instances reference and download the files in cloud. The other instances @@ -672,6 +714,21 @@ class CloudFileSystemEnv { const std::shared_ptr& logger, CloudFileSystem** cfs); + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const std::string& src_bucket_name, + const std::string& src_object_prefix, + const std::string& src_buck_region, + const std::string& dest_bucket_name, + const std::string& dest_bucket_prefix, + const std::string& dest_bucket_region, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, + CloudFileSystem** cfs); + static Status NewGcpFileSystem(const std::shared_ptr& base_fs, + const CloudFileSystemOptions& fs_options, + const std::shared_ptr& logger, + CloudFileSystem** cfs); + // Creates a new Env that delegates all thread/time related // calls to env, and all file operations to fs static std::unique_ptr NewCompositeEnv( @@ -683,5 +740,4 @@ class CloudFileSystemEnv { const std::string& cookie, std::unique_ptr* cloud_manifest); }; - } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/cloud/cloud_file_system_impl.h b/include/rocksdb/cloud/cloud_file_system_impl.h index cb672b10e8a..6d7193501e7 100644 --- a/include/rocksdb/cloud/cloud_file_system_impl.h +++ b/include/rocksdb/cloud/cloud_file_system_impl.h @@ -29,6 +29,8 @@ class CloudFileSystemImpl : public CloudFileSystem { mutable std::shared_ptr info_log_; // informational messages static int RegisterAwsObjects(ObjectLibrary& library, const std::string& arg); + static int RegisterGcpObjects(ObjectLibrary& library, const std::string& arg); + // Constructor CloudFileSystemImpl(const CloudFileSystemOptions& options, const std::shared_ptr& base_fs, @@ -454,4 +456,4 @@ class CloudFileSystemImpl : public CloudFileSystem { std::shared_ptr cloud_file_deletion_scheduler_; }; -} // namespace ROCKSDB_NAMESPACE +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/include/rocksdb/cloud/cloud_storage_provider_impl.h b/include/rocksdb/cloud/cloud_storage_provider_impl.h index 22d8aa47dc9..cc4cb2f16fa 100644 --- a/include/rocksdb/cloud/cloud_storage_provider_impl.h +++ b/include/rocksdb/cloud/cloud_storage_provider_impl.h @@ -117,7 +117,10 @@ class Random64; class CloudStorageProviderImpl : public CloudStorageProvider { public: static Status CreateS3Provider(std::unique_ptr* result); + static Status CreateGcsProvider( + std::unique_ptr* result); static const char* kS3() { return "s3"; } + static const char* kGcs() { return "gcs"; } CloudStorageProviderImpl(); virtual ~CloudStorageProviderImpl(); diff --git a/src.mk b/src.mk index 238d57bf2d0..cb9767970b3 100644 --- a/src.mk +++ b/src.mk @@ -18,6 +18,9 @@ LIB_SOURCES = \ cloud/aws/aws_kinesis.cc \ cloud/aws/aws_retry.cc \ cloud/aws/aws_s3.cc \ + cloud/gcp/gcp_cs.cc \ + cloud/gcp/gcp_file_system.cc \ + cloud/gcp/gcp_retry.cc \ cloud/db_cloud_impl.cc \ cloud/cloud_file_system.cc \ cloud/cloud_file_system_impl.cc \ @@ -455,6 +458,8 @@ TEST_MAIN_SOURCES = \ cache/cache_reservation_manager_test.cc \ cloud/db_cloud_test.cc \ cloud/cloud_file_system_test.cc \ + cloud/gcp/gcp_file_system_test.cc \ + cloud/gcp/gcp_db_cloud_test.cc \ cloud/cloud_manifest_test.cc \ cloud/cloud_scheduler_test.cc \ cloud/replication_test.cc \ @@ -750,4 +755,4 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/writebatchhandlerjnicallback.cc \ java/rocksjni/write_batch_test.cc \ java/rocksjni/write_batch_with_index.cc \ - java/rocksjni/write_buffer_manager.cc + java/rocksjni/write_buffer_manager.cc \ No newline at end of file