From 1144bda2d8d5c4eb8259915d78157bb2e6ca0aa5 Mon Sep 17 00:00:00 2001 From: David Teigland Date: May 14 2024 15:31:32 +0000 Subject: sanlock: skip delay when same host acquires delta lease When the previous delta lease owner matches our host name, and our host name includes the local product_uuid, then allow reacquiring the delta lease without a delay, even if it's not free (assumption is this host did not release it cleanly previously.) --- diff --git a/src/delta_lease.c b/src/delta_lease.c index 4ac63ef..55a5bba 100644 --- a/src/delta_lease.c +++ b/src/delta_lease.c @@ -340,7 +340,8 @@ int delta_lease_acquire(struct task *task, uint32_t checksum; int other_io_timeout, other_host_dead_seconds, other_id_renewal_seconds; int i, error, rv, delay, delta_large_delay; - int fast_free_delay = 0; + int is_free, is_same; + int no_delay = 0; log_space(sp, "delta_acquire begin %.48s:%llu", sp->space_name, (unsigned long long)host_id); @@ -353,37 +354,58 @@ int delta_lease_acquire(struct task *task, } other_io_timeout = leader.io_timeout; - - if (!other_io_timeout) { - log_erros(sp, "delta_acquire use own io_timeout %d", sp->io_timeout); + if (!other_io_timeout) other_io_timeout = sp->io_timeout; - } else if (other_io_timeout != sp->io_timeout) { - log_erros(sp, "delta_acquire other_io_timeout %u our %u", - leader.io_timeout, sp->io_timeout); - } /* - * This delay-free reacquire could probably be used whenever - * our_host_name matches the current leader.resource_name, - * regardless of wheter the lease is free (i.e. we cleanly - * released the lease last time we held it.) + * If the delta lease is free, and the prev owner matches our host + * name, then reacquire with no delay. + * + * If the delta lease is not free, and the prev owner matches our host + * name, and our host name is from product_uuid, then reacquire with no + * delay. Assumption here is that the delta lease is not free because + * this host did not release it cleanly last time. The non-free lease + * generally indicates it's being used by someone, and we should + * monitor it for a renewal period for updates. But, we skip this + * monitoring given the certainty that comes from the owner being + * the product_uuid, and the assumption that our product_uuid will not + * be used by another host. + * + * If the prev owner was not our name, we delay for a rewnewal period + * to monitor for current use from another host. + * + * If the lease is not free, we delay and monitor for a renewal period + * to monitor (except when owner is our product_uuid per above.) + * + * If the lease is not free, and the owner does not match our host + * name, then use a long delay to monitor for other hosts using it, + * or to ensure a prev host using this lease is dead. */ - if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE) && - (leader.timestamp == LEASE_FREE)) { - log_space(sp, "delta_acquire free fast reacquire"); - fast_free_delay = 1; - goto write_new; - } - if (leader.timestamp == LEASE_FREE) - goto write_new; + is_free = (leader.timestamp == LEASE_FREE); + is_same = !memcmp(leader.resource_name, our_host_name, NAME_ID_SIZE); + + if (!is_same) + log_debug("delta_acquire new owner %.48s old owner %.48s", our_host_name, leader.resource_name); + + if (is_same || is_free) { + if (is_same && is_free) + no_delay = 1; + if (is_same && our_host_name_matches_product_uuid) + no_delay = 1; + + log_space(sp, "delta_acquire %s owner, %s free, %s our_product_uuid, %s delay, other_io_timeout %d", + is_same ? "same" : "new", + is_free ? "is" : "not", + our_host_name_matches_product_uuid ? "is" : "not", + no_delay ? "no" : "short", + other_io_timeout); - if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE)) { - log_space(sp, "delta_acquire fast reacquire"); goto write_new; } - /* we need to ensure that a host_id cannot be acquired and released + /* + * we need to ensure that a host_id cannot be acquired and released * sooner than host_dead_seconds because the change in host_id * ownership affects the host_id "liveness" determination used by paxos * leases, and the ownership of paxos leases cannot change until after @@ -399,18 +421,15 @@ int delta_lease_acquire(struct task *task, * paxos leases cannot change ownership until a min of * host_dead_seconds to ensure the watchdog has fired. So, the timeout * we use here must be the max of the delta delay (D+6d) and - * host_dead_seconds */ - - /* - * delay = task->host_dead_seconds; - * delta_large_delay = task->id_renewal_seconds + (6 * task->io_timeout_seconds); - * if (delta_large_delay > delay) - * delay = delta_large_delay; + * host_dead_seconds. */ other_host_dead_seconds = calc_host_dead_seconds(other_io_timeout); other_id_renewal_seconds = calc_id_renewal_seconds(other_io_timeout); + log_space(sp, "delta_acquire other_host %.48s, timestamp %llu, other_io_timeout %d", + leader.resource_name, (unsigned long long)leader.timestamp, other_io_timeout); + delay = other_host_dead_seconds; delta_large_delay = other_id_renewal_seconds + (6 * other_io_timeout); if (delta_large_delay > delay) @@ -458,11 +477,12 @@ int delta_lease_acquire(struct task *task, write_new: new_ts = monotime(); + leader.version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR; leader.timestamp = new_ts; leader.io_timeout = (sp->io_timeout & 0x00FF); leader.owner_id = host_id; leader.owner_generation++; - snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name); + memcpy(leader.resource_name, our_host_name, NAME_ID_SIZE); leader.checksum = 0; /* set below */ log_space(sp, "delta_acquire write %llu %llu %llu %.48s", @@ -489,10 +509,12 @@ int delta_lease_acquire(struct task *task, memcpy(&leader1, &leader, sizeof(struct leader_record)); - if (fast_free_delay) - delay = 1; - else - delay = 2 * other_io_timeout; + if (no_delay) { + usleep(10000); + goto reread; + } + + delay = 2 * other_io_timeout; log_space(sp, "delta_acquire delta_short_delay %d", delay); for (i = 0; i < delay; i++) { @@ -504,6 +526,7 @@ int delta_lease_acquire(struct task *task, sleep(1); } +reread: error = delta_lease_leader_read(task, sp->sector_size, sp->io_timeout, disk, space_name, host_id, &leader, "delta_acquire_check"); if (error < 0) { diff --git a/src/main.c b/src/main.c index eb906c9..5404554 100644 --- a/src/main.c +++ b/src/main.c @@ -1486,17 +1486,20 @@ int get_rand(int a, int b) return a + (int) (((float)(b - a + 1)) * rv / (RAND_MAX+1.0)); } -static void read_product_uuid(char *buf, size_t buf_size) +static void read_product_uuid(void) { + FILE *fp; char full[256] = { 0 }; int len; - FILE *fp; + int i, j; + + memset(our_product_uuid, 0, sizeof(our_product_uuid)); + memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact)); if (!(fp = fopen("/sys/devices/virtual/dmi/id/product_uuid", "r"))) return; if (!fgets(full, sizeof(full), fp)) { - buf[0] = '\0'; fclose(fp); return; } @@ -1509,13 +1512,14 @@ static void read_product_uuid(char *buf, size_t buf_size) if (len && full[len - 1] == '\n') full[--len] = '\0'; + len = strlen(full); + /* * Randomly pick 16 as a minimum legitimate size for a product_uuid * (expected to be 36 for a proper uuid including dashes) */ if (len < 16) { log_debug("Ignore product_uuid that is too short %d (%s)", len, full); - buf[0] = '\0'; return; } @@ -1525,27 +1529,34 @@ static void read_product_uuid(char *buf, size_t buf_size) */ if (len > SANLK_NAME_LEN) { log_debug("Ignore product_uuid that is too long %d (%s)", len, full); - buf[0] = '\0'; return; } - /* - * buf_size is NAME_LEN+1 for easy printing (+1 is \0), - * the actual size used in leader_record is NAME_LEN - * with no required termination. - */ - memcpy(buf, full, SANLK_NAME_LEN); + memcpy(our_product_uuid, full, SANLK_NAME_LEN); + + if (strchr(our_product_uuid, '-')) { + for (i = 0, j = 0; i < len; i++) { + if (our_product_uuid[i] == '-') + continue; + our_product_uuid_compact[j++] = our_product_uuid[i]; + } + + if (strlen(our_product_uuid_compact) < 16) { + log_debug("Ignore compact product_uuid that is too short (%s)", our_product_uuid_compact); + memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact)); + } + } } static void setup_host_name(void) { char our_host_name_long[1024] = { 0 }; /* temp buf for snprintf, then memcpy to _global */ - char product_uuid[SANLK_NAME_LEN+1] = { 0 }; char rand_uuid[37] = { 0 }; struct utsname name = { 0 }; uuid_t uu; - memset(&our_host_name_global, 0, sizeof(our_host_name_global)); + read_product_uuid(); + uname(&name); /* * Get host name value from: @@ -1557,20 +1568,42 @@ static void setup_host_name(void) if (com.our_host_name_opt[0]) { memcpy(our_host_name_global, com.our_host_name_opt, SANLK_NAME_LEN); + + /* + * user could configure our_host_name using product_uuid, + * in which case we can enable the no delay optimization + * when using product_uuid. + */ + + if (our_product_uuid[0] && + !strncmp(our_host_name_global, our_product_uuid, strlen(our_product_uuid))) + our_host_name_matches_product_uuid = 1; + + if (our_product_uuid_compact[0] && + !strncmp(our_host_name_global, our_product_uuid_compact, strlen(our_product_uuid_compact))) + our_host_name_matches_product_uuid = 1; + if (strlen(com.our_host_name_opt) > SANLK_NAME_LEN) - log_warn("our_host_name shortened from config %s to: %s", - com.our_host_name_opt, our_host_name_global); + log_warn("our_host_name shortened from config %s len %ld to: %s", + com.our_host_name_opt, strlen(com.our_host_name_opt), our_host_name_global); else log_debug("our_host_name set from config: %s", our_host_name_global); + + if (our_host_name_matches_product_uuid) + log_debug("our_host_name uses product_uuid"); return; } - read_product_uuid(product_uuid, sizeof(product_uuid)); - uname(&name); + /* + * A typical 36 char uuid, plus a '.' separator, leaves space for the first + * 11 characters of the local nodename, which is included at the end mainly + * to help with readability/debugging. + */ - if (product_uuid[0]) { - snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", product_uuid, name.nodename); + if (our_product_uuid[0]) { + snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", our_product_uuid, name.nodename); memcpy(our_host_name_global, our_host_name_long, SANLK_NAME_LEN); + our_host_name_matches_product_uuid = 1; log_debug("our_host_name set from product_uuid: %s", our_host_name_global); } else { memset(rand_state, 0, sizeof(rand_state)); diff --git a/src/sanlock.8 b/src/sanlock.8 index cddf23f..9b71059 100644 --- a/src/sanlock.8 +++ b/src/sanlock.8 @@ -1350,6 +1350,7 @@ command line (-e), sanlock attempts to set our_host_name from /sys/devices/virtual/dmi/id/product_uuid. If that is not available, sanlock generates a random uuid to use as our_host_name. Using a fixed our_host_name value will reduce delays when using a lockspace. +Using product_uuid will reduce delays further. .IP \[bu] 2 renewal_read_extend_sec = diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index 5de92b5..dda7d25 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -457,7 +457,10 @@ enum { }; EXTERN int external_shutdown; +EXTERN int our_host_name_matches_product_uuid; EXTERN char our_host_name_global[SANLK_NAME_LEN+1]; +EXTERN char our_product_uuid[SANLK_NAME_LEN+1]; +EXTERN char our_product_uuid_compact[SANLK_NAME_LEN+1]; /* dash chars omitted from uuid */ EXTERN int kill_count_max; EXTERN int is_helper;