Skip to content

Commit

Permalink
DAOS-16752 build: Merge branch 'release/2.6' into soumagne/mercury_24…
Browse files Browse the repository at this point in the history
…0_final_26

PR-repos: mercury@PR-122:lastBuild

BuildPriority: 2

Required-githooks: true
  • Loading branch information
soumagne committed Nov 5, 2024
2 parents 84a56d1 + 9ab3200 commit 1528aae
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 20 deletions.
4 changes: 4 additions & 0 deletions src/common/checksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ daos_csummer_compare_csum_info(struct daos_csummer *obj,
match = daos_csummer_csum_compare(obj, ci_idx2csum(a, i),
ci_idx2csum(b, i),
a->cs_len);
if (unlikely(!match))
D_ERROR("Checksum mismatch at index %d/%d "DF_CI_BUF" != "DF_CI_BUF"\n", i,
a->cs_nr, DP_CI_BUF(ci_idx2csum(a, i), a->cs_len),
DP_CI_BUF(ci_idx2csum(b, i), b->cs_len));
}

return match;
Expand Down
3 changes: 3 additions & 0 deletions src/object/cli_csum.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <daos/cont_props.h>
#include "obj_internal.h"

/** How many times to retry UPDATE RPCs on checksum error */
#define MAX_CSUM_RETRY 10

int dc_obj_csum_update(struct daos_csummer *csummer, struct cont_props props, daos_obj_id_t param,
daos_key_t *dkey, daos_iod_t *iods, d_sg_list_t *sgls, const uint32_t iod_nr,
struct dcs_layout *layout, struct dcs_csum_info **dkey_csum,
Expand Down
38 changes: 30 additions & 8 deletions src/object/cli_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -4684,12 +4684,15 @@ obj_comp_cb(tse_task_t *task, void *data)
int rc;

obj_auxi = tse_task_stack_pop(task, sizeof(*obj_auxi));
obj_auxi->io_retry = 0;
obj_auxi->result = 0;
obj_auxi->csum_retry = 0;
obj_auxi->tx_uncertain = 0;
obj_auxi->nvme_io_err = 0;
obj = obj_auxi->obj;

/** Clear various bits for a new attempt */
obj_auxi->io_retry = 0;
obj_auxi->result = 0;
obj_auxi->csum_retry = 0;
obj_auxi->tx_uncertain = 0;
obj_auxi->nvme_io_err = 0;

rc = obj_comp_cb_internal(obj_auxi);
if (rc != 0 || obj_auxi->result) {
if (task->dt_result == 0)
Expand Down Expand Up @@ -4760,9 +4763,28 @@ obj_comp_cb(tse_task_t *task, void *data)
obj_auxi->tx_uncertain = 1;
else
obj_auxi->nvme_io_err = 1;
} else if (task->dt_result != -DER_NVME_IO) {
/* Don't retry update for CSUM & UNCERTAIN errors */
obj_auxi->io_retry = 0;
} else {
if (obj_auxi->opc == DAOS_OBJ_RPC_UPDATE &&
task->dt_result == -DER_CSUM) {
struct shard_rw_args *rw_arg = &obj_auxi->rw_args;

/** Retry a few times on checksum error on update */
if (rw_arg->csum_retry_cnt < MAX_CSUM_RETRY) {
obj_auxi->csum_retry = 1;
rw_arg->csum_retry_cnt++;
D_DEBUG(DB_IO, DF_OID" checksum error on "
"update, retrying\n",
DP_OID(obj->cob_md.omd_id));
} else {
D_ERROR(DF_OID" checksum error on update, "
"too many retries. Failing I/O\n",
DP_OID(obj->cob_md.omd_id));
obj_auxi->io_retry = 0;
}
} else if (task->dt_result != -DER_NVME_IO) {
/* Don't retry update for UNCERTAIN errors */
obj_auxi->io_retry = 0;
}
}
} else {
obj_auxi->io_retry = 0;
Expand Down
3 changes: 2 additions & 1 deletion src/object/obj_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ struct shard_rw_args {
struct dcs_csum_info *dkey_csum;
struct dcs_iod_csums *iod_csums;
struct obj_reasb_req *reasb_req;
uint16_t csum_retry_cnt;
};

struct coll_sparse_targets {
Expand Down Expand Up @@ -480,8 +481,8 @@ struct obj_auxi_args {
rebuilding:1,
for_migrate:1;
/* request flags. currently only: ORF_RESEND */
uint32_t flags;
uint32_t specified_shard;
uint32_t flags;
uint16_t retry_cnt;
uint16_t inprogress_cnt;
struct obj_req_tgts req_tgts;
Expand Down
31 changes: 22 additions & 9 deletions src/vos/vos_obj_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,13 +791,14 @@ oi_iter_process(struct vos_iterator *iter, vos_iter_proc_op_t op, void *args)
int
oi_iter_check_punch(daos_handle_t ih)
{
struct vos_iterator *iter = vos_hdl2iter(ih);
struct vos_oi_iter *oiter = iter2oiter(iter);
struct vos_obj_df *obj;
struct oi_delete_arg del_arg;
daos_unit_oid_t oid;
d_iov_t rec_iov;
int rc;
struct vos_iterator *iter = vos_hdl2iter(ih);
struct vos_oi_iter *oiter = iter2oiter(iter);
struct vos_container *cont = oiter->oit_cont;
struct vos_obj_df *obj;
struct oi_delete_arg del_arg;
daos_unit_oid_t oid;
d_iov_t rec_iov;
int rc;

D_ASSERT(iter->it_type == VOS_ITER_OBJ);

Expand All @@ -811,10 +812,22 @@ oi_iter_check_punch(daos_handle_t ih)
obj = (struct vos_obj_df *)rec_iov.iov_buf;
oid = obj->vo_id;

if (!vos_ilog_is_punched(vos_cont2hdl(oiter->oit_cont), &obj->vo_ilog, &oiter->oit_epr,
NULL, &oiter->oit_ilog_info))
if (!vos_ilog_is_punched(vos_cont2hdl(cont), &obj->vo_ilog, &oiter->oit_epr, NULL,
&oiter->oit_ilog_info))
return 0;

rc = vos_obj_hold(vos_obj_cache_current(cont->vc_pool->vp_sysdb), cont, oid,
&oiter->oit_epr, iter->it_bound, VOS_OBJ_AGGREGATE | VOS_OBJ_NO_HOLD,
DAOS_INTENT_PURGE, NULL, NULL);
if (rc != 0) {
/** -DER_BUSY means the object is in-use already. We will after a yield in this
* case.
*/
D_CDEBUG(rc == -DER_BUSY, DB_EPC, DLOG_ERR, "Hold check failed for " DF_UOID "\n",
DP_UOID(oid));
return rc;
}

/** Ok, ilog is fully punched, so we can move it to gc heap */
rc = umem_tx_begin(vos_cont2umm(oiter->oit_cont), NULL);
if (rc != 0)
Expand Down
3 changes: 1 addition & 2 deletions utils/build.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pmdk=2.1.0
isal=v2.30.0
isal_crypto=v2.23.0
spdk=v22.01.2
ofi=v1.19.1
ofi=v1.22.0
mercury=v2.4.0
protobufc=v1.3.3
ucx=v1.14.1
Expand All @@ -27,7 +27,6 @@ ucx=https://github.com/openucx/ucx.git

[patch_versions]
spdk=https://github.com/spdk/spdk/commit/b0aba3fcd5aceceea530a702922153bc75664978.diff,https://github.com/spdk/spdk/commit/445a4c808badbad3942696ecf16fa60e8129a747.diff
ofi=https://github.com/ofiwg/libfabric/commit/d827c6484cc5bf67dfbe395890e258860c3f0979.diff
fuse=https://github.com/libfuse/libfuse/commit/c9905341ea34ff9acbc11b3c53ba8bcea35eeed8.diff
mercury=https://raw.githubusercontent.com/daos-stack/mercury/f3dc286fb40ec1a3a38a2e17c45497bc2aa6290d/na_ucx.patch
pmdk=https://github.com/pmem/pmdk/commit/2abe15ac0b4eed894b6768cd82a3b0a7c4336284.diff

0 comments on commit 1528aae

Please sign in to comment.