From 00115728fbc5da88c6330ccac4ded2275bef69bc Mon Sep 17 00:00:00 2001 From: David Teigland Date: Aug 08 2012 21:57:22 +0000 Subject: sanlock/wdmd: remove global connection As long as the sanlock daemon was running, it kept a constant connection to wdmd, even when no lockspaces existed. This prevented sanlock and wdmd from being restarted independently, even when they were unused. Independent restarting is necessary for upgrades, so remove the global connection from sanlock to wdmd and leave only the per-lockspace connections. The lockspace connections now need to hold a refcount on wdmd which prevents wdmd restarts. Also in wdmd, if a client connection is closed, the refcount must not be cleared on it, otherwise wdmd could possibly be cleanly shutdown from sigterm while an expired connection was awaiting a watchdog reset. Also log at the error level when we kill a pid for recovery, when that pid exits, and when all pids are clear for recovery. This makes it much simpler to see exactly what led up to a watchdog reset after the fact. Signed-off-by: David Teigland --- diff --git a/src/cmd.c b/src/cmd.c index bb9b08c..bc7d7da 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -1422,6 +1422,7 @@ static int print_state_lockspace(struct space *sp, char *str, const char *list_n "list=%s " "space_id=%u " "host_generation=%llu " + "renew_fail=%d " "space_dead=%d " "killing_pids=%d " "corrupt_result=%d " @@ -1434,6 +1435,7 @@ static int print_state_lockspace(struct space *sp, char *str, const char *list_n list_name, sp->space_id, (unsigned long long)sp->host_generation, + sp->renew_fail, sp->space_dead, sp->killing_pids, sp->lease_status.corrupt_result, diff --git a/src/lockspace.c b/src/lockspace.c index 6618f16..3b4063c 100644 --- a/src/lockspace.c +++ b/src/lockspace.c @@ -450,7 +450,7 @@ static void *lockspace_thread(void *arg_in) rv = create_watchdog_file(sp, last_success); if (rv < 0) { log_erros(sp, "create_watchdog failed %d", rv); - acquire_result = SANLK_ERROR; + acquire_result = SANLK_WD_ERROR; } } @@ -704,7 +704,7 @@ int add_lockspace_wait(struct space *sp) /* the thread exits right away if acquire fails */ pthread_join(sp->thread, NULL); rv = result; - log_space(sp, "add_lockspace fail lease_status %d", result); + log_erros(sp, "add_lockspace fail result %d", result); goto fail_del; } diff --git a/src/main.c b/src/main.c index 247b3f4..e5f0885 100644 --- a/src/main.c +++ b/src/main.c @@ -109,7 +109,7 @@ static void close_helper(void) * msgs before getting EAGAIN. */ -static void send_helper_kill(struct client *cl, int sig) +static void send_helper_kill(struct space *sp, struct client *cl, int sig) { struct helper_msg hm; int rv; @@ -140,6 +140,8 @@ static void send_helper_kill(struct client *cl, int sig) hm.pid = cl->pid; } + log_erros(sp, "kill %d sig %d count %d", cl->pid, sig, cl->kill_count); + retry: rv = write(helper_kill_fd, &hm, sizeof(hm)); if (rv == -1 && errno == EINTR) @@ -148,21 +150,21 @@ static void send_helper_kill(struct client *cl, int sig) /* pipe is full, we'll try again in a second */ if (rv == -1 && errno == EAGAIN) { helper_full_count++; - log_debug("send_helper_kill pid %d sig %d full_count %u", + log_space(sp, "send_helper_kill pid %d sig %d full_count %u", cl->pid, sig, helper_full_count); return; } /* helper exited or closed fd, quit using helper */ if (rv == -1 && errno == EPIPE) { - log_error("send_helper_kill EPIPE"); + log_erros(sp, "send_helper_kill EPIPE"); close_helper(); return; } if (rv != sizeof(hm)) { /* this shouldn't happen */ - log_error("send_helper_kill pid %d error %d %d", + log_erros(sp, "send_helper_kill pid %d error %d %d", cl->pid, rv, errno); close_helper(); return; @@ -445,6 +447,9 @@ void client_pid_dead(int ci) log_debug("client_pid_dead %d,%d,%d cmd_active %d suspend %d", ci, cl->fd, cl->pid, cl->cmd_active, cl->suspend); + if (cl->kill_count) + log_error("dead %d ci %d count %d", cl->pid, ci, cl->kill_count); + cmd_active = cl->cmd_active; pid = cl->pid; cl->pid = -1; @@ -608,15 +613,7 @@ static void kill_pids(struct space *sp) if (!do_kill) continue; - if (cl->kill_count == kill_count_max) { - log_erros(sp, "kill %d,%d,%d sig %d count %d final attempt", - ci, fd, pid, sig, cl->kill_count); - } else { - log_space(sp, "kill %d,%d,%d sig %d count %d", - ci, fd, pid, sig, cl->kill_count); - } - - send_helper_kill(cl, sig); + send_helper_kill(sp, cl, sig); } } @@ -654,7 +651,11 @@ static int all_pids_dead(struct space *sp) if (stuck || check) return 0; - log_space(sp, "used by no pids"); + if (sp->renew_fail) + log_erros(sp, "all pids clear"); + else + log_space(sp, "all pids clear"); + return 1; } @@ -765,6 +766,8 @@ static int main_loop(void) check_all = 0; rv = check_our_lease(&main_task, sp, &check_all, check_buf); + if (rv) + sp->renew_fail = 1; if (rv || sp->external_remove || (external_shutdown > 1)) { log_space(sp, "set killing_pids check %d remove %d", @@ -1578,10 +1581,6 @@ static int do_daemon(void) if (rv < 0) goto out_logging; - rv = setup_watchdog(); - if (rv < 0) - goto out_threads; - rv = setup_listener(); if (rv < 0) goto out_threads; @@ -1594,8 +1593,6 @@ static int do_daemon(void) close_token_manager(); - close_watchdog(); - out_threads: thread_pool_free(); out_logging: diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index a69b367..c8301eb 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -144,6 +144,7 @@ struct space { uint64_t host_generation; struct sync_disk host_id_disk; int align_size; + int renew_fail; int space_dead; int killing_pids; int external_remove; diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h index 95234c7..686603a 100644 --- a/src/sanlock_rv.h +++ b/src/sanlock_rv.h @@ -14,6 +14,7 @@ #define SANLK_NONE 0 /* unused */ #define SANLK_ERROR -201 #define SANLK_AIO_TIMEOUT -202 +#define SANLK_WD_ERROR -203 /* run_ballot */ diff --git a/src/watchdog.c b/src/watchdog.c index 3387eb9..296d735 100644 --- a/src/watchdog.c +++ b/src/watchdog.c @@ -39,8 +39,6 @@ #include "../wdmd/wdmd.h" -static int daemon_wdmd_con; - void update_watchdog_file(struct space *sp, uint64_t timestamp) { int rv; @@ -50,12 +48,15 @@ void update_watchdog_file(struct space *sp, uint64_t timestamp) rv = wdmd_test_live(sp->wd_fd, timestamp, timestamp + main_task.id_renewal_fail_seconds); if (rv < 0) - log_erros(sp, "wdmd_test_live failed %d", rv); + log_erros(sp, "wdmd_test_live %llu failed %d", + (unsigned long long)timestamp, rv); } int create_watchdog_file(struct space *sp, uint64_t timestamp) { char name[WDMD_NAME_SIZE]; + int test_interval, fire_timeout; + uint64_t last_keepalive; int con, rv; if (!com.use_watchdog) @@ -63,30 +64,52 @@ int create_watchdog_file(struct space *sp, uint64_t timestamp) con = wdmd_connect(); if (con < 0) { - log_erros(sp, "wdmd connect failed %d", con); + log_erros(sp, "wdmd_connect failed %d", con); goto fail; } memset(name, 0, sizeof(name)); - snprintf(name, WDMD_NAME_SIZE - 1, "sanlock_%s_hostid%llu", + snprintf(name, WDMD_NAME_SIZE - 1, "sanlock_%s:%llu", sp->space_name, (unsigned long long)sp->host_id); rv = wdmd_register(con, name); if (rv < 0) { - log_erros(sp, "wdmd register failed %d", rv); + log_erros(sp, "wdmd_register failed %d", rv); goto fail_close; } - rv = wdmd_test_live(con, timestamp, timestamp + main_task.id_renewal_fail_seconds); + /* the refcount tells wdmd that it should not cleanly exit */ + + rv = wdmd_refcount_set(con); if (rv < 0) { - log_erros(sp, "wdmd_test_live failed %d", rv); + log_erros(sp, "wdmd_refcount_set failed %d", rv); goto fail_close; } + rv = wdmd_status(con, &test_interval, &fire_timeout, &last_keepalive); + if (rv < 0) { + log_erros(sp, "wdmd_status failed %d", rv); + goto fail_clear; + } + + if (fire_timeout != WATCHDOG_FIRE_TIMEOUT) { + log_erros(sp, "wdmd invalid fire_timeout %d vs %d", + fire_timeout, WATCHDOG_FIRE_TIMEOUT); + goto fail_clear; + } + + rv = wdmd_test_live(con, timestamp, timestamp + main_task.id_renewal_fail_seconds); + if (rv < 0) { + log_erros(sp, "wdmd_test_live in create failed %d", rv); + goto fail_clear; + } + sp->wd_fd = con; return 0; + fail_clear: + wdmd_refcount_clear(con); fail_close: close(con); fail: @@ -103,85 +126,27 @@ void unlink_watchdog_file(struct space *sp) log_space(sp, "wdmd_test_live 0 0 to disable"); rv = wdmd_test_live(sp->wd_fd, 0, 0); - if (rv < 0) - log_erros(sp, "wdmd_test_live failed %d", rv); -} + if (rv < 0) { + log_erros(sp, "wdmd_test_live in unlink failed %d", rv); -void close_watchdog_file(struct space *sp) -{ - if (!com.use_watchdog) - return; + /* We really want this to succeed to avoid a reset, so retry + after a short delay in case the problem was transient... */ - close(sp->wd_fd); -} + usleep(500000); -void close_watchdog(void) -{ - if (!com.use_watchdog) - return; + rv = wdmd_test_live(sp->wd_fd, 0, 0); + if (rv < 0) + log_erros(sp, "wdmd_test_live in unlink 2 failed %d", rv); + } - wdmd_refcount_clear(daemon_wdmd_con); - close(daemon_wdmd_con); + wdmd_refcount_clear(sp->wd_fd); } -/* TODO: add wdmd connection as client so poll detects if it fails? */ - -int setup_watchdog(void) +void close_watchdog_file(struct space *sp) { - char name[WDMD_NAME_SIZE]; - int test_interval, fire_timeout; - uint64_t last_keepalive; - int con, rv; - if (!com.use_watchdog) - return 0; - - memset(name, 0, sizeof(name)); - - snprintf(name, WDMD_NAME_SIZE - 1, "%s", "sanlock_daemon"); - - con = wdmd_connect(); - if (con < 0) { - log_error("wdmd connect failed for watchdog handling"); - goto fail; - } - - rv = wdmd_register(con, name); - if (rv < 0) { - log_error("wdmd register failed"); - goto fail_close; - } - - rv = wdmd_refcount_set(con); - if (rv < 0) { - log_error("wdmd refcount failed"); - goto fail_close; - } - - rv = wdmd_status(con, &test_interval, &fire_timeout, &last_keepalive); - if (rv < 0) { - log_error("wdmd status failed"); - goto fail_clear; - } - - log_debug("wdmd test_interval %d fire_timeout %d last_keepalive %llu", - test_interval, fire_timeout, - (unsigned long long)last_keepalive); - - if (fire_timeout != WATCHDOG_FIRE_TIMEOUT) { - log_error("invalid watchdog fire_timeout %d vs %d", - fire_timeout, WATCHDOG_FIRE_TIMEOUT); - goto fail_clear; - } - - daemon_wdmd_con = con; - return 0; + return; - fail_clear: - wdmd_refcount_clear(con); - fail_close: - close(con); - fail: - return -1; + close(sp->wd_fd); } diff --git a/src/watchdog.h b/src/watchdog.h index 06d3c67..ec3c853 100644 --- a/src/watchdog.h +++ b/src/watchdog.h @@ -14,7 +14,4 @@ int create_watchdog_file(struct space *sp, uint64_t timestamp); void unlink_watchdog_file(struct space *sp); void close_watchdog_file(struct space *sp); -int setup_watchdog(void); -void close_watchdog(void); - #endif diff --git a/wdmd/main.c b/wdmd/main.c index 17687be..5ed2cd6 100644 --- a/wdmd/main.c +++ b/wdmd/main.c @@ -182,6 +182,9 @@ static void client_pid_dead(int ci) close(client[ci].fd); + /* refcount automatically dropped if a client with + no expiration is closed */ + client[ci].used = 0; memset(&client[ci], 0, sizeof(struct client)); @@ -189,16 +192,32 @@ static void client_pid_dead(int ci) pollfd[ci].fd = -1; pollfd[ci].events = 0; } else { - /* test_clients() needs to continue watching this ci so - it can expire */ - - log_debug("client_pid_dead ci %d expire %llu", ci, - (unsigned long long)client[ci].expire); + /* + * Leave used and expire set so that test_clients will continue + * monitoring this client and expire if necessary. + * + * Leave refcount set so that the daemon will not cleanly shut + * down if it gets a sigterm. + * + * This case of a client con with an expire time being closed + * is a fatal condition; there's no way to clear or extend the + * expire time and no way to cleanly shut down the daemon. + * This should never happen. + * + * (We don't enforce that a client with an expire time also has refcount + * set, but I can't think of case where setting expire but not refcount + * would be useful.) + */ + + log_error("client dead ci %d fd %d pid %d renewal %llu expire %llu %s", + ci, client[ci].fd, client[ci].pid, + (unsigned long long)client[ci].renewal, + (unsigned long long)client[ci].expire, + client[ci].name); close(client[ci].fd); client[ci].pid_dead = 1; - client[ci].refcount = 0; client[ci].fd = -1; pollfd[ci].fd = -1; @@ -380,12 +399,13 @@ static int test_clients(void) continue; if (t >= client[i].expire) { - log_error("test failed pid %d now %llu keepalive %llu renewal %llu expire %llu", - client[i].pid, + log_error("test failed ci %d pid %d now %llu keepalive %llu renewal %llu expire %llu %s", + i, client[i].pid, (unsigned long long)t, (unsigned long long)last_keepalive, (unsigned long long)client[i].renewal, - (unsigned long long)client[i].expire); + (unsigned long long)client[i].expire, + client[i].name); fail_count++; } }