From 748e8325fd0b2e09469c76f584b8e08c1ef03ca6 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Nov 15 2022 16:11:30 +0000
Subject: watchdog timeout configuration


Make the watchdog timeout configurable.  The watchdog device must
support the configured value, and all hosts must use the same value.
The io_timeout should usually be configured by a similar factor.

Set watchdog_fire_timeout and io_timeout in sanlock.conf, e.g.
watchdog_fire_timeout=30
io_timeout=5

The defaults remain watchdog_fire_timeout 60 and io_timeout 10.

wdmd --trytimeout <sec> can be used test if the watchdog device
supports a certain timeout value.

---
diff --git a/src/cmd.c b/src/cmd.c
index 5ab0ae2..ae57bf3 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -1326,7 +1326,7 @@ static void cmd_add_lockspace(struct cmd_args *ca, uint32_t cmd)
 
 	io_timeout = ca->header.data;
 	if (!io_timeout)
-		io_timeout = DEFAULT_IO_TIMEOUT;
+		io_timeout = com.io_timeout;
 
 	rv = add_lockspace_start(&lockspace, io_timeout, &sp);
 	if (rv < 0) {
@@ -1577,7 +1577,7 @@ static void cmd_read_lockspace(struct task *task, struct cmd_args *ca, uint32_t 
 
 	if (!sector_size) {
 		/* reads the first leader record to get sector size */
-		result = delta_read_lockspace_sizes(task, &sd, DEFAULT_IO_TIMEOUT, &sector_size, &align_size);
+		result = delta_read_lockspace_sizes(task, &sd, com.io_timeout, &sector_size, &align_size);
 		if (result < 0)
 			goto out_close;
 		if ((sector_size != 512) && (sector_size != 4096)) {
@@ -1588,7 +1588,7 @@ static void cmd_read_lockspace(struct task *task, struct cmd_args *ca, uint32_t 
 
 	/* sets ls->name and io_timeout */
 	result = delta_read_lockspace(task, &sd, sector_size, align_size, host_id, &lockspace,
-				      DEFAULT_IO_TIMEOUT, &io_timeout);
+				      com.io_timeout, &io_timeout);
 	if (result == SANLK_OK)
 		result = 0;
 
@@ -1677,7 +1677,7 @@ static void cmd_read_resource(struct task *task, struct cmd_args *ca, uint32_t c
 		goto reply;
 	}
 
-	token->io_timeout = DEFAULT_IO_TIMEOUT;
+	token->io_timeout = com.io_timeout;
 
 	/*
 	 * These may be zero, in which case paxos_read_resource reads a 4K sector
@@ -1778,7 +1778,7 @@ static void cmd_read_resource_owners(struct task *task, struct cmd_args *ca, uin
 		goto reply;
 	}
 
-	token->io_timeout = DEFAULT_IO_TIMEOUT;
+	token->io_timeout = com.io_timeout;
 
 	/*
 	 * These may be zero, in which case paxos_read_resource reads a 4K sector
@@ -1820,7 +1820,7 @@ static void cmd_write_lockspace(struct task *task, struct cmd_args *ca, uint32_t
 	struct sanlk_lockspace lockspace;
 	struct sync_disk sd;
 	int fd, rv, result;
-	int io_timeout = DEFAULT_IO_TIMEOUT;
+	int io_timeout = com.io_timeout;
 
 	fd = client[ca->ci_in].fd;
 
@@ -1953,7 +1953,7 @@ static void cmd_write_resource(struct task *task, struct cmd_args *ca, uint32_t 
 		goto reply;
 	}
 
-	token->io_timeout = DEFAULT_IO_TIMEOUT;
+	token->io_timeout = com.io_timeout;
 
 	result = paxos_lease_init(task, token, num_hosts, write_clear);
 
@@ -2302,6 +2302,8 @@ static int print_state_daemon(char *str)
 		 "max_worker_threads=%d "
 		 "write_init_io_timeout=%u "
 		 "use_aio=%d "
+		 "io_timeout=%d "
+		 "watchdog_fire_timeout=%d "
 		 "kill_grace_seconds=%d "
 		 "helper_pid=%d "
 		 "helper_kill_fd=%d "
@@ -2330,7 +2332,9 @@ static int print_state_daemon(char *str)
 		 com.max_worker_threads,
 		 com.write_init_io_timeout,
 		 main_task.use_aio,
-		 kill_grace_seconds,
+		 com.io_timeout,
+		 com.watchdog_fire_timeout,
+		 com.kill_grace_seconds,
 		 helper_pid,
 		 helper_kill_fd,
 		 helper_full_count,
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 9a8fc22..9d5aafd 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -849,7 +849,7 @@ int delta_lease_init(struct task *task,
 	uint32_t checksum;
 
 	if (!io_timeout)
-		io_timeout = DEFAULT_IO_TIMEOUT;
+		io_timeout = com.io_timeout;
 
 	rv = sizes_from_flags(ls->flags, &sector_size, &align_size, &max_hosts, "LSF");
 	if (rv)
diff --git a/src/direct.c b/src/direct.c
index 661c4e8..7e00c52 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -55,7 +55,7 @@ static int direct_read_leader_sizes(struct task *task, struct sync_disk *sd,
 
 	memset(data, 0, datalen);
 
-	rv = read_sectors(sd, 4096, 0, 1, data, datalen, task, DEFAULT_IO_TIMEOUT, "read_sector_size");
+	rv = read_sectors(sd, 4096, 0, 1, data, datalen, task, com.io_timeout, "read_sector_size");
 	if (rv < 0) {
 		free(data);
 		return rv;
@@ -134,7 +134,7 @@ static int do_paxos_action(int action, struct task *task, int io_timeout, struct
 	int j, rv = 0;
 
 	if (!io_timeout)
-		io_timeout = DEFAULT_IO_TIMEOUT;
+		io_timeout = com.io_timeout;
 
 	rv = sizes_from_flags(res->flags, &sector_size, &align_size, &max_hosts, "RES");
 	if (rv)
@@ -309,7 +309,7 @@ static int do_delta_action(int action,
 	memset(bitmap, 0, sizeof(bitmap));
 
 	if (!io_timeout)
-		io_timeout = DEFAULT_IO_TIMEOUT;
+		io_timeout = com.io_timeout;
 
 	rv = sizes_from_flags(ls->flags, &sector_size, &align_size, &max_hosts, "LSF");
 	if (rv)
@@ -706,7 +706,7 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
 		memset(data, 0, sector_size);
 
 		rv = read_sectors(&sd, sector_size, sector_nr, sector_count, data, datalen,
-				  task, DEFAULT_IO_TIMEOUT, "dump");
+				  task, com.io_timeout, "dump");
 
 		magic_in(data, &magic);
 
@@ -906,7 +906,7 @@ int direct_next_free(struct task *task, char *path)
 		memset(data, 0, sector_size);
 
 		rv = read_sectors(&sd, sector_size, sector_nr, 1, data, datalen,
-				  task, DEFAULT_IO_TIMEOUT, "next_free");
+				  task, com.io_timeout, "next_free");
 
 		lr_end = (struct leader_record *)data;
 
diff --git a/src/lockspace.c b/src/lockspace.c
index 3deb2b0..a874148 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -842,6 +842,27 @@ static void *lockspace_thread(void *arg_in)
 	}
 
 	/*
+	 * Tell wdmd to open the watchdog device, set the fire timeout and
+	 * begin the keepalive loop that regularly pets the watchdog.  This
+	 * only happens for the first client/lockspace.  This fails if the
+	 * watchdog device cannot be opened by wdmd or does not support the
+	 * requested fire timeout.
+	 *
+	 * For later clients/lockspaces, when wdmd already has the watchdog
+	 * open, this does nothing (just verifies that fire timeout matches
+	 * what's in use.)
+	 */
+	rv = open_watchdog(wd_con, com.watchdog_fire_timeout);
+	if (rv < 0) {
+		log_erros(sp, "open_watchdog with fire_timeout %d failed %d",
+			  com.watchdog_fire_timeout, wd_con);
+		acquire_result = SANLK_WD_ERROR;
+		delta_result = -1;
+		disconnect_watchdog(sp);
+		goto set_status;
+	}
+
+	/*
 	 * acquire the delta lease
 	 */
 
@@ -989,7 +1010,7 @@ static void *lockspace_thread(void *arg_in)
 	/* watchdog unlink was done in main_loop when thread_stop was set, to
 	   get it done as quickly as possible in case the wd is about to fire. */
 
-	close_watchdog(sp);
+	disconnect_watchdog(sp);
  out:
 	if (delta_result == SANLK_OK)
 		delta_lease_release(&task, sp, &sp->host_id_disk,
diff --git a/src/main.c b/src/main.c
index 4c022de..a9ceee5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -658,11 +658,11 @@ static void kill_pids(struct space *sp)
 		 * kill_grace_seconds
 		 */
 
-		in_grace = now < (last_success + id_renewal_fail_seconds + kill_grace_seconds);
+		in_grace = now < (last_success + id_renewal_fail_seconds + com.kill_grace_seconds);
 
 		if (sp->external_remove || (external_shutdown > 1)) {
 			sig = SIGKILL;
-		} else if ((kill_grace_seconds > 0) && in_grace && cl->killpath[0]) {
+		} else if ((com.kill_grace_seconds > 0) && in_grace && cl->killpath[0]) {
 			sig = SIGRUNPATH;
 		} else if (in_grace) {
 			sig = SIGTERM;
@@ -1736,6 +1736,7 @@ static int do_daemon(void)
 	}
 
 	setup_limits();
+	setup_timeouts();
 	setup_helper();
 
 	/* main task never does disk io, so we don't really need to set
@@ -1780,7 +1781,12 @@ static int do_daemon(void)
 
 	uname(&nodename);
 
-	log_warn("sanlock daemon started %s host %s (%s)", VERSION, our_host_name_global, nodename.nodename);
+	if (com.io_timeout != DEFAULT_IO_TIMEOUT || com.watchdog_fire_timeout != DEFAULT_WATCHDOG_FIRE_TIMEOUT)
+		log_warn("sanlock daemon started %s host %s (%s) io_timeout %u watchdog_fire_timeout %u",
+			 VERSION, our_host_name_global, nodename.nodename, com.io_timeout, com.watchdog_fire_timeout);
+	else
+		log_warn("sanlock daemon started %s host %s (%s)",
+			 VERSION, our_host_name_global, nodename.nodename);
 
 	setup_priority();
 
@@ -2110,6 +2116,7 @@ static void print_usage(void)
 	printf("  -G <gid>      group id\n");
 	printf("  -t <num>      max worker threads (%d)\n", DEFAULT_MAX_WORKER_THREADS);
 	printf("  -g <sec>      seconds for graceful recovery (%d)\n", DEFAULT_GRACE_SEC);
+	printf("  -o <sec>      io timeout (%d)\n", DEFAULT_IO_TIMEOUT);
 	printf("  -w 0|1        use watchdog through wdmd (%d)\n", DEFAULT_USE_WATCHDOG);
 	printf("  -h 0|1        use high priority (RR) scheduling (%d)\n", DEFAULT_HIGH_PRIORITY);
 	printf("  -l <num>      use mlockall (0 none, 1 current, 2 current and future) (%d)\n", DEFAULT_MLOCK_LEVEL);
@@ -2189,7 +2196,7 @@ static int read_command_line(int argc, char *argv[])
 	char *p;
 	char *arg1 = argv[1];
 	char *act;
-	int i, j, len, sec, begin_command = 0;
+	int i, j, len, sec, val, begin_command = 0;
 
 	if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") ||
 	    !strcmp(arg1, "-h")) {
@@ -2432,9 +2439,9 @@ static int read_command_line(int argc, char *argv[])
 			if (com.action == ACT_STATUS) {
 				com.sort_arg = *optionarg;
 			} else {
-				com.io_timeout_arg = atoi(optionarg);
-				if (!com.io_timeout_arg)
-					com.io_timeout_arg = DEFAULT_IO_TIMEOUT;
+				val = atoi(optionarg);
+				if (val > 0)
+					com.io_timeout = val;
 			}
 			break;
 		case 'b':
@@ -2466,8 +2473,10 @@ static int read_command_line(int argc, char *argv[])
 		case 'g':
 			if (com.type == COM_DAEMON) {
 				sec = atoi(optionarg);
-				if (sec <= 60 && sec >= 0)
-					kill_grace_seconds = sec;
+				if (sec <= 60 && sec >= 0) {
+					com.kill_grace_seconds = sec;
+					com.kill_grace_set = 1;
+				}
 			} else {
 				com.host_generation = strtoull(optionarg, NULL, 0);
 			}
@@ -2806,6 +2815,23 @@ static void read_config_file(void)
 			get_val_int(line, &val);
 			com.use_watchdog = val;
 
+		} else if (!strcmp(str, "io_timeout")) {
+			get_val_int(line, &val);
+			if (val > 0)
+				com.io_timeout = val;
+
+		} else if (!strcmp(str, "watchdog_fire_timeout")) {
+			get_val_int(line, &val);
+			if (val > 0)
+				com.watchdog_fire_timeout = val;
+
+		} else if (!strcmp(str, "kill_grace_seconds")) {
+			get_val_int(line, &val);
+			if (val <= 60 && val >= 0) {
+				com.kill_grace_seconds = val;
+				com.kill_grace_set = 1;
+			}
+
 		} else if (!strcmp(str, "high_priority")) {
 			get_val_int(line, &val);
 			com.high_priority = val;
@@ -3215,10 +3241,10 @@ static int do_client(void)
 		break;
 
 	case ACT_ADD_LOCKSPACE:
-		if (com.io_timeout_arg != DEFAULT_IO_TIMEOUT) {
-			log_tool("add_lockspace_timeout %d", com.io_timeout_arg);
+		if (com.io_timeout != DEFAULT_IO_TIMEOUT) {
+			log_tool("add_lockspace_timeout %d", com.io_timeout);
 			rv = sanlock_add_lockspace_timeout(&com.lockspace, 0,
-							   com.io_timeout_arg);
+							   com.io_timeout);
 			log_tool("add_lockspace_timeout done %d", rv);
 		} else {
 			log_tool("add_lockspace");
@@ -3343,7 +3369,7 @@ static int do_client(void)
 
 			rv = sanlock_write_lockspace(&com.lockspace,
 						     com.max_hosts, 0,
-						     com.io_timeout_arg);
+						     com.io_timeout);
 		} else {
 			if (com.sector_size)
 				com.res_args[0]->flags |= sanlk_res_sector_size_to_flag(com.sector_size);
@@ -3595,7 +3621,7 @@ static int do_direct_read_leader(void)
 	struct leader_record leader;
 	int rv;
 
-	rv = direct_read_leader(&main_task, com.io_timeout_arg,
+	rv = direct_read_leader(&main_task, com.io_timeout,
 				&com.lockspace, com.res_args[0],
 				&leader);
 
@@ -3620,7 +3646,7 @@ static int do_direct_write_leader(void)
 
 	memset(&leader, 0, sizeof(leader));
 
-	direct_read_leader(&main_task, com.io_timeout_arg,
+	direct_read_leader(&main_task, com.io_timeout,
 			   &com.lockspace, com.res_args[0],
 			   &leader);
 
@@ -3643,7 +3669,7 @@ static int do_direct_write_leader(void)
 		syslog(LOG_WARNING, "write_leader resource %s", res_str);
 	}
 
-	rv = direct_write_leader(&main_task, com.io_timeout_arg,
+	rv = direct_write_leader(&main_task, com.io_timeout,
 				 &com.lockspace, com.res_args[0],
 				 &leader);
 out:
@@ -3676,8 +3702,7 @@ static int do_direct_init(void)
 		       (unsigned long long)com.lockspace.host_id_disk.offset,
 		       com.lockspace.flags);
 
-		rv = direct_write_lockspace(&main_task, &com.lockspace,
-					    com.io_timeout_arg);
+		rv = direct_write_lockspace(&main_task, &com.lockspace, com.io_timeout);
 	} else if (com.res_args[0]) {
 		if (com.sector_size)
 			com.res_args[0]->flags |= sanlk_res_sector_size_to_flag(com.sector_size);
@@ -3783,7 +3808,7 @@ static int do_direct(void)
 
 	case ACT_ACQUIRE:
 		syslog(LOG_WARNING, "acquire");
-		rv = direct_acquire(&main_task, com.io_timeout_arg,
+		rv = direct_acquire(&main_task, com.io_timeout,
 				    com.res_args[0], com.num_hosts,
 				    com.host_id, com.host_generation,
 				    &leader);
@@ -3792,7 +3817,7 @@ static int do_direct(void)
 
 	case ACT_RELEASE:
 		syslog(LOG_WARNING, "release");
-		rv = direct_release(&main_task, com.io_timeout_arg,
+		rv = direct_release(&main_task, com.io_timeout,
 				    com.res_args[0], &leader);
 		log_tool("release done %d", rv);
 		break;
@@ -3801,20 +3826,20 @@ static int do_direct(void)
 		syslog(LOG_WARNING, "acquire_id");
 		setup_host_name();
 
-		rv = direct_acquire_id(&main_task, com.io_timeout_arg,
+		rv = direct_acquire_id(&main_task, com.io_timeout,
 				       &com.lockspace, our_host_name_global);
 		log_tool("acquire_id done %d", rv);
 		break;
 
 	case ACT_RELEASE_ID:
 		syslog(LOG_WARNING, "release_id");
-		rv = direct_release_id(&main_task, com.io_timeout_arg, &com.lockspace);
+		rv = direct_release_id(&main_task, com.io_timeout, &com.lockspace);
 		log_tool("release_id done %d", rv);
 		break;
 
 	case ACT_RENEW_ID:
 		syslog(LOG_WARNING, "renew_id");
-		rv = direct_renew_id(&main_task, com.io_timeout_arg, &com.lockspace);
+		rv = direct_renew_id(&main_task, com.io_timeout, &com.lockspace);
 		log_tool("rewew_id done %d", rv);
 		break;
 
@@ -3877,7 +3902,6 @@ int main(int argc, char *argv[])
 	set_sanlock_version();
 
 	kill_count_max = 100;
-	kill_grace_seconds = DEFAULT_GRACE_SEC;
 	helper_ci = -1;
 	helper_pid = -1;
 	helper_kill_fd = -1;
@@ -3890,11 +3914,13 @@ int main(int argc, char *argv[])
 
 	memset(&com, 0, sizeof(com));
 	com.use_watchdog = DEFAULT_USE_WATCHDOG;
+	com.watchdog_fire_timeout = DEFAULT_WATCHDOG_FIRE_TIMEOUT;
+	com.kill_grace_seconds = DEFAULT_GRACE_SEC;
 	com.high_priority = DEFAULT_HIGH_PRIORITY;
 	com.mlock_level = DEFAULT_MLOCK_LEVEL;
 	com.names_log_priority = LOG_WARNING;
 	com.max_worker_threads = DEFAULT_MAX_WORKER_THREADS;
-	com.io_timeout_arg = DEFAULT_IO_TIMEOUT;
+	com.io_timeout = DEFAULT_IO_TIMEOUT;
 	com.write_init_io_timeout = DEFAULT_WRITE_INIT_IO_TIMEOUT;
 	com.aio_arg = DEFAULT_USE_AIO;
 	com.pid = -1;
diff --git a/src/rindex.c b/src/rindex.c
index 7ee4e54..9ef02da 100644
--- a/src/rindex.c
+++ b/src/rindex.c
@@ -64,7 +64,7 @@ static struct token *setup_rindex_token(struct rindex_info *rx,
 	strcpy(token->r.name, "rindex_lease");
 	token->sector_size = sector_size;
 	token->align_size = align_size;
-	token->io_timeout = spi ? spi->io_timeout : DEFAULT_IO_TIMEOUT;
+	token->io_timeout = spi ? spi->io_timeout : com.io_timeout;
 	token->r.num_disks = 1;
 	token->r.flags |= sanlk_res_sector_size_to_flag(sector_size);
 	token->r.flags |= sanlk_res_align_size_to_flag(align_size);
@@ -105,7 +105,7 @@ static struct token *setup_resource_token(struct rindex_info *rx,
 	memcpy(token->r.name, res_name, SANLK_NAME_LEN);
 	token->sector_size = sector_size;
 	token->align_size = align_size;
-	token->io_timeout = spi ? spi->io_timeout : DEFAULT_IO_TIMEOUT;
+	token->io_timeout = spi ? spi->io_timeout : com.io_timeout;
 	token->r.num_disks = 1;
 	token->r.flags |= sanlk_res_sector_size_to_flag(sector_size);
 	token->r.flags |= sanlk_res_align_size_to_flag(align_size);
@@ -291,7 +291,7 @@ static int read_rindex_header(struct task *task,
 	if (!sector_size)
 		sector_size = 4096;
 	if (!io_timeout) {
-		io_timeout = DEFAULT_IO_TIMEOUT;
+		io_timeout = com.io_timeout;
 		spi->io_timeout = io_timeout;
 	}
 
@@ -447,7 +447,7 @@ int rindex_format(struct task *task, struct sanlk_rindex *ri)
 	if (com.write_init_io_timeout)
 		write_io_timeout = com.write_init_io_timeout;
 	else
-		write_io_timeout = DEFAULT_IO_TIMEOUT;
+		write_io_timeout = com.io_timeout;
 
 	rv = write_iobuf(rx.disk->fd, rx.disk->offset, iobuf, iobuf_len, task, write_io_timeout, NULL);
 	if (rv < 0) {
diff --git a/src/sanlock.8 b/src/sanlock.8
index 2021b4a..0776b8f 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -599,6 +599,9 @@ seconds for graceful recovery
 .BR -w " 0|1"
 use watchdog through wdmd
 
+.BI -o " sec"
+io timeout
+
 .BR -h " 0|1"
 use high priority (RR) scheduling
 
@@ -1071,6 +1074,34 @@ the full history of renewals saved by sanlock, which by default
 is 180 records, about 1 hour of history when using a 20 second
 renewal interval for a 10 second io timeout.
 
+.P
+
+.SS Configurable watchdog timeout
+
+Watchdog devices usually have a 60 second timeout, but some devices
+have a configurable timeout.  To use a different watchdog timeout, set
+sanlock.conf watchdog_fire_timeout (in seconds) to a value supported by
+the device.  The same watchdog_fire_timeout must be configured on all
+hosts (so all hosts must have watchdog devices that support the same
+timeout).  Unmatching values will invalidate the lease protection provided
+by the watchdog.
+
+watchdog_fire_timeout and io_timeout should usually be configured
+together.  By default, sanlock uses watchdog_fire_timeout=60 with
+io_timeout=10.  Other combinations to consider are:
+.br
+watchdog_fire_timeout=30 with io_timeout=5
+.br
+watchdog_fire_timeout=10 with io_timeout=2
+
+Smaller values make it more likely that a host will be reset by the
+watchdog while waiting for slow io to complete or for temporary io
+failures to be resolved.  Spurious watchdog resets will also become
+more likely due to independent, overlapping lockspace outages, each
+of which would be inconsequential by itself.
+
+.P
+
 .SH INTERNALS
 
 .SS Disk Format
@@ -1377,6 +1408,23 @@ max_worker_threads = <num>
 .br
 See -t
 
+.IP \[bu] 2
+io_timeout = <seconds>
+.br
+The io timeout for disk operations, most notably delta lease renewals.
+This value is basis for calculating most other timeout values.  (Some
+special cases may use a different io timeout.)  Tune this value with
+caution, it can substantially alter the overall sanlock behavior.
+
+.IP \[bu] 2
+watchdog_fire_timeout = <seconds>
+.br
+The watchdog device timeout.  The watchdog device must support the
+specified value.  It is critical that all hosts use the same value.
+Not doing so will invalidate the lease protection provided by sanlock.
+The io_timeout should usually be tuned along with this value, e.g.
+watchdog_fire_timeout = 30 with io_timeout = 5.
+
 .SH SEE ALSO
 .BR wdmd (8)
 
diff --git a/src/sanlock.conf b/src/sanlock.conf
index 2909a9c..89899f1 100644
--- a/src/sanlock.conf
+++ b/src/sanlock.conf
@@ -69,3 +69,9 @@
 #
 # max_worker_threads = 8
 # command line: -t 8
+#
+# io_timeout = 10
+# command line: -o <seconds>
+#
+# watchdog_fire_timeout = 60
+# command line: n/a
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 7fe0a5b..d8c027f 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -315,7 +315,7 @@ struct client {
 
 EXTERN struct client *client;
 
-#define WATCHDOG_FIRE_TIMEOUT 60
+#define DEFAULT_WATCHDOG_FIRE_TIMEOUT 60
 #define DEFAULT_USE_AIO 1
 #define DEFAULT_IO_TIMEOUT 10
 #define DEFAULT_GRACE_SEC 40
@@ -352,13 +352,16 @@ struct command_line {
 	int quiet_fail;
 	int wait;
 	int use_watchdog;
+	int watchdog_fire_timeout;
+	int io_timeout;			/* DEFAULT_IO_TIMEOUT or sanlock.conf io_timeout */
+	int kill_grace_seconds;		/* -g */
+	int kill_grace_set;
 	int high_priority;		/* -h */
 	int get_hosts;			/* -h */
 	int names_log_priority;
 	int mlock_level;
 	int max_worker_threads;
 	int aio_arg;
-	int io_timeout_arg;
 	int write_init_io_timeout;
 	int set_bitmap_seconds;
 	int persistent;
@@ -455,7 +458,6 @@ EXTERN int external_shutdown;
 EXTERN char our_host_name_global[SANLK_NAME_LEN+1];
 
 EXTERN int kill_count_max;
-EXTERN int kill_grace_seconds;
 EXTERN int is_helper;
 EXTERN int helper_ci;
 EXTERN int helper_pid;
diff --git a/src/task.c b/src/task.c
index f3c10f8..ad2e761 100644
--- a/src/task.c
+++ b/src/task.c
@@ -76,7 +76,7 @@ void close_task_aio(struct task *task)
 		goto skip_aio;
 
 	memset(&ts, 0, sizeof(struct timespec));
-	ts.tv_sec = DEFAULT_IO_TIMEOUT;
+	ts.tv_sec = com.io_timeout;
 
 	last_warn = time(NULL);
 	begin = last_warn;
@@ -87,7 +87,7 @@ void close_task_aio(struct task *task)
 	while (1) {
 		now = time(NULL);
 
-		if (now - last_warn >= (DEFAULT_IO_TIMEOUT * 6)) {
+		if (now - last_warn >= (com.io_timeout * 6)) {
 			last_warn = now;
 			lvl = LOG_ERR;
 		} else {
diff --git a/src/timeouts.c b/src/timeouts.c
index 6d78b55..8884518 100644
--- a/src/timeouts.c
+++ b/src/timeouts.c
@@ -27,10 +27,50 @@
 #include "task.h"
 #include "timeouts.h"
 
+void setup_timeouts(void)
+{
+	/*
+	 * graceful shutdown is client pids stopping their activity and
+	 * releasing their sanlock leases in response to a killpath program
+	 * they configured, or in response to sigterm from sanlock if they
+	 * did not set a killpath program.  It's an opportunity for the client
+	 * pid to exit more gracefully than getting sigkill.  If the client
+	 * pid does not release leases in response to the killpath/sigterm,
+	 * then eventually sanlock will escalate and send a sigkill.
+	 *
+	 * It's hard to know what portion of recovery time should be allocated
+	 * to graceful shutdown before escalating to sigkill.  The smaller the
+	 * watchdog timeout, the less time between entering recovery mode and
+	 * the watchdog potentially firing.  10 seconds before the watchdog
+	 * will fire, the idea is to give up on graceful shutdown and resort
+	 * to sending sigkill to any client pids that have not released their
+	 * leases.  This gives 10 sec for the pids to exit from sigkill,
+	 * sanlock to get the exit statuses, clear the expiring wdmd connection,
+	 * and hopefully have wdmd ping the watchdog again before it fires.
+	 * A graceful shutdown period of less than 10/15 sec seems pointless,
+	 * so if there is anything less than 10/15 sec available for a graceful
+	 * shutdown we don't bother and go directly to sigkill (this could
+	 * of course be changed if programs are indeed able to respond
+	 * quickly during graceful shutdown.)
+	 */
+	if (!com.kill_grace_set && (com.watchdog_fire_timeout < DEFAULT_WATCHDOG_FIRE_TIMEOUT)) {
+		if (com.watchdog_fire_timeout < 60 && com.watchdog_fire_timeout >= 30)
+			com.kill_grace_seconds = 15;
+		else if (com.watchdog_fire_timeout < 30)
+			com.kill_grace_seconds = 0;
+	}
+}
+
+/*
+ * Some of these timeouts depend on the the io_timeout used by *another*
+ * host, passed as the arg, not the local io_timeout.
+ */
+
+/* All hosts are required to use the same watchdog_fire_timeout. */
 int calc_host_dead_seconds(int io_timeout)
 {
-	/* id_renewal_fail_seconds + WATCHDOG_FIRE_TIMEOUT */
-	return (8 * io_timeout) + WATCHDOG_FIRE_TIMEOUT;
+	/* id_renewal_fail_seconds + com.watchdog_fire_timeout */
+	return (8 * io_timeout) + com.watchdog_fire_timeout;
 }
 
 int calc_id_renewal_seconds(int io_timeout)
@@ -65,7 +105,7 @@ void log_timeouts(int io_timeout_arg)
 
 	/* those above are chosen by us, the rest are based on them */
 
-	int host_dead_seconds      = id_renewal_fail_seconds + WATCHDOG_FIRE_TIMEOUT;
+	int host_dead_seconds      = id_renewal_fail_seconds + com.watchdog_fire_timeout;
 	int delta_large_delay      = id_renewal_seconds + (6 * io_timeout_seconds);
 	int delta_short_delay      = 2 * io_timeout_seconds;
 
diff --git a/src/timeouts.h b/src/timeouts.h
index c6dde69..ac9842a 100644
--- a/src/timeouts.h
+++ b/src/timeouts.h
@@ -112,6 +112,244 @@
  * that sanlock cannot successfully kill the pids it is supervising that
  * depend on the given host_id.
  *
+ * This analyzes the sanlock and wdmd operations every 5 seconds, and
+ * assumes that the sanlock and wdmd daemons are both performing their
+ * steps right at each 5 second mark, but in reality they will likely be
+ * offset from each other.
+ *
+ * 
+ * Using these values in the example
+ * wdmd test interval           = 5
+ * watchdog_fire_timeout        = 30
+ * io_timeout_seconds           = 5
+ * id_renewal_seconds           = 10
+ * id_renewal_fail_seconds      = 40
+ * host_dead_seconds            = 70
+ *
+ * wdmd_test_live(renewal_time [now],
+ *                expire_time  [now + id_renewal_fail_seconds])
+ *
+ *   T  time in seconds (now)
+ *
+ *   0: sanlock renews host_id on disk
+ *      sanlock calls wdmd_test_live(0, 40) [expire 40 from 0 + 40]
+ *      wdmd test_client sees now 0 < expire 40 ok -> keepalive
+ *
+ *   5: wdmd test_client sees now 5 < expire 40 ok -> keepalive
+ *
+ *  10: sanlock renews host_id on disk ok
+ *      sanlock calls wdmd_test_live(10, 50) [expire 50 from 10 + 40]
+ *      wdmd test_client sees now 10 < expire 50 or 40 ok -> keepalive
+ *      (50 if the wdmd check is right after this wdmd_test_live, or
+ *      (40 if the wdmd check is right before this wdmd_test_live)
+ *
+ *  15: wdmd test_client sees now 15 < expire 50 ok -> keepalive
+ *
+ *  20: sanlock renews host_id on disk ok
+ *      sanlock calls wdmd_test_live(20, 60) [expire 60 from 20 + 40]
+ *      wdmd test_client sees now 20 < expire 60 or 50 ok -> keepalive
+ *
+ *  25: wdmd test_client sees now 25 < expire 60 ok -> keepalive
+ *
+ *  all normal until 29
+ *  ---------------------------------------------------------
+ *  problems begin at 30
+ *
+ *  30: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 30 < expire 60 ok -> keepalive
+ *
+ *  35: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 35 < expire 60 ok -> keepalive
+ *
+ *  40: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 40 < expire 60 ok -> keepalive
+ *
+ *  45: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 45 < expire 60 ok -> keepalive
+ *
+ *  50: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 50 < expire 60 ok -> keepalive
+ *      messages: check_our_lease warning (sanlock)
+ *
+ *  55: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 55 < expire 60 ok -> keepalive (from dev close)
+ *      (wdmd sees now >= expire - test_interval)
+ *      messages: watchdog closed unclean (wdmd), test warning (wdmd)
+ *
+ *  60: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      sanlock enters recovery mode and starts killing pids because we have reached
+ *      now (60) is id_renewal_fail_seconds (40) after last renewal (20)
+ *      wdmd test_client sees now 60 >= expire 60 fail -> no keepalive
+ *      messages: check_our_lease failed (sanlock), test failed (wdmd)
+ *
+ *      . /dev/watchdog will fire at last keepalive + watchdog_fire_timeout =
+ *        T55 + 30 = T85
+ *      . host_id will expire at
+ *        last disk renewal ok + id_renewal_fail_seconds + watchdog_fire_timeout
+ *        T20 + 40 + 30 = T90
+ *        (aka last disk renewal ok + host_dead_seconds, T20 + 70 = T90)
+ *      . the wdmd test at T55 could have been at T59, so wdmd would have
+ *        seen the client unexpired/ok and done keepalive at 59 just before the
+ *        expiry at 60, which would lead to /dev/watchdog firing at 59+30 = T89
+ *      . so, the watchdog could fire as early as T85 or as late as T89, but
+ *        the host_id will not expire until T90
+ *
+ *  65: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 65 > expire 60 fail -> no keepalive
+ *
+ *  70: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 70 > expire 60 fail -> no keepalive
+ *
+ *  75: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 75 > expire 60 fail -> no keepalive
+ *
+ *  80: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 80 > expire 60 fail -> no keepalive
+ *
+ *  85: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 85 > expire 60 fail -> no keepalive
+ *      /dev/watchdog fires because last keepalive was T55, 30 seconds ago
+ *      (earliest possible /dev/watchdog firing due to wdmd checking expiry just
+ *      after sanlock calls wdmd_test_live at T55 and just after the expiry at T60)
+ *
+ *  89: (latest possible /dev/watchdog firing due to wdmd checking expiry just
+ *       before the expiry at T59)
+ *
+ *  90: another host can acquire leases held by host_id.
+ *      This is host_dead_seconds (70) after the last successful renewal (T20)
+ * --
+ *
+ * entering recovery mode at 60 until watchdog firing at 85 is 25 seconds
+ * to hopefully clear client usage of leases and avert the watchdog firing.
+ * kill_grace_seconds is 15, leaving clients 15 seconds to do a graceful
+ * shutdown using their killpath or respond to sigterm.  If the client
+ * hasn't dropped its leases in these 15 seconds, sanlock escalates to
+ * using sigkill with 10 seconds remaining until the watchdog fires.
+ * 10 seconds is hopefully long enough for client pids to exit, sanlock
+ * to collect the exit status and clear the wdmd connection before the
+ * watchdog really fires.
+ */
+
+/*
+ * Example of watchdog behavior when host_id renewals fail, assuming
+ * that sanlock cannot successfully kill the pids it is supervising that
+ * depend on the given host_id.
+ *
+ * This analyzes the sanlock and wdmd operations every 2 seconds, and
+ * assumes that the sanlock and wdmd daemons are both performing their
+ * steps right at each 2 second mark, but in reality they will likely be
+ * offset from each other.
+ *
+ * Using these values in the example
+ * wdmd test interval           = 2
+ * watchdog_fire_timeout        = 10
+ * io_timeout_seconds           = 2
+ * id_renewal_seconds           = 4
+ * id_renewal_fail_seconds      = 16
+ * host_dead_seconds            = 26
+ *
+ * wdmd_test_live(renewal_time [now],
+ *                expire_time  [now + id_renewal_fail_seconds])
+ *
+ *   T  time in seconds (now)
+ *
+ *   0: sanlock renews host_id on disk
+ *      sanlock calls wdmd_test_live(0, 16) [expire 16 from 0 + 16]
+ *      wdmd test_client sees now 0 < expire 16 ok -> keepalive
+ *
+ *   2: wdmd test_client sees now 2 < expire 16 ok -> keepalive
+ *
+ *   4: sanlock renews host_id on disk ok
+ *      sanlock calls wdmd_test_live(4, 20) [expire 20 from 4 + 16]
+ *      wdmd test_client sees now 4 < expire 16 or 20 ok -> keepalive
+ *
+ *   6: wdmd test_client sees now 6 < expire 20 ok -> keepalive
+ *
+ *   8: sanlock renews host_id on disk ok
+ *      sanlock calls wdmd_test_live(8, 24) [expire 24 from 8 + 16]
+ *      wdmd test_client sees now 8 < expire 20 or 24 ok -> keepalive
+ *
+ *  10: wdmd test_client sees now 10 < expire 24 ok -> keepalive
+ *
+ *  all normal until 11
+ *  ---------------------------------------------------------
+ *  problems begin at 12
+ *
+ *  12: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 12 < expire 24 ok -> keepalive
+ *
+ *  14: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 14 < expire 24 ok -> keepalive
+ *
+ *  16: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 16 < expire 24 ok -> keepalive
+ *
+ *  18: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 18 < expire 24 ok -> keepalive
+ *
+ *  20: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 20 < expire 24 ok -> keepalive
+ *      messages: check_our_lease warning (sanlock)
+ *      (these warnings appear 6*io_timeout after last renewal)
+ *
+ *  22: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 22 < expire 24 ok -> keepalive (from dev close)
+ *      (wdmd sees now >= expire - test_interval)
+ *      messages: watchdog closed unclean (wdmd), test warning (wdmd)
+ *
+ *  24: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      sanlock enters recovery mode and starts killing pids because we have reached
+ *      now (24) is id_renewal_fail_seconds (16) after last renewal (8)
+ *      wdmd test_client sees now 24 >= expire 24 fail -> no keepalive
+ *      messages: check_our_lease failed (sanlock), test failed (wdmd)
+ *
+ *      . /dev/watchdog will fire at last keepalive + watchdog_fire_timeout =
+ *        T22 + 10 = T32
+ *      . host_id will expire at
+ *        last disk renewal ok + id_renewal_fail_seconds + watchdog_fire_timeout
+ *        T8 + 16 + 10 = T34
+ *        (aka last disk renewal ok + host_dead_seconds, T8 + 26 = T34)
+ *      . the wdmd test at T22 could have been at T23, so wdmd would have
+ *        seen the client unexpired/ok and done keepalive at 23 just before the
+ *        expiry at 24, which would lead to /dev/watchdog firing at 23+10 = T33
+ *      . so, the watchdog could fire as early as T32 or as late as T33, but
+ *        the host_id will not expire until T34
+ *
+ *  26: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 26 > expire 24 fail -> no keepalive
+ *
+ *  28: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 28 > expire 24 fail -> no keepalive
+ *
+ *  30: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 30 > expire 24 fail -> no keepalive
+ *
+ *  32: sanlock fails to renew host_id on disk -> no wdmd_test_live
+ *      wdmd test_client sees now 32 > expire 24 fail -> no keepalive
+ *      /dev/watchdog fires because last keepalive was T22, 10 seconds ago
+ *      (earliest possible /dev/watchdog firing due to wdmd checking expiry just
+ *      after sanlock calls wdmd_test_live at T22 and just after the expiry at T24)
+ *
+ *  33: (latest possible /dev/watchdog firing due to wdmd checking expiry just
+ *       before the expiry at T23)
+ *
+ *  34: another host can acquire leases held by host_id.
+ *      This is host_dead_seconds (26) after the last successful renewal (T8)
+ *
+ * --
+ *
+ * entering recovery mode at 24 until watchdog firing at 32 is only 8 seconds,
+ * so there is no time for graceful recovery, so kill_grace_seconds would be
+ * set to 0 here.  All 8 seconds would be used to hopefully complete sigkill,
+ * collect client exit statuses, and clear the expiring wdmd connection
+ * before the watchdog actually fires.
+ */
+
+/*
+ * Example of watchdog behavior when host_id renewals fail, assuming
+ * that sanlock cannot successfully kill the pids it is supervising that
+ * depend on the given host_id.
+ *
  *
  * Using these values in the example
  * wdmd test interval           = 10 (defined in wdmd/main.c)
@@ -213,6 +451,17 @@
  *
  * 300: another host can acquire leases held by host_id
  *      This is host_dead_seconds (220) after last successful renewal (T80)
+ * --
+ *
+ * entering recovery mode at 240 until watchdog firing at 290 is 50 seconds
+ * to hopefully clear client usage of leases and avert the watchdog firing.
+ * kill_grace_seconds is 40, leaving clients 40 seconds to do a graceful
+ * shutdown using their killpath or respond to sigterm.  If the client
+ * hasn't dropped its leases in these 40 seconds, sanlock escalates to
+ * using sigkill with 10 seconds remaining until the watchdog fires.
+ * 10 seconds is hopefully long enough for client pids to exit, sanlock
+ * to collect the exit status and clear the wdmd connection before the
+ * watchdog really fires.
  */
 
 
@@ -448,6 +697,7 @@
 #ifndef __TIMEOUTS_H__
 #define __TIMEOUTS_H__
 
+void setup_timeouts(void);
 int calc_host_dead_seconds(int io_timeout);
 int calc_id_renewal_seconds(int io_timeout);
 int calc_id_renewal_fail_seconds(int io_timeout);
diff --git a/src/watchdog.c b/src/watchdog.c
index 2c6c5b8..d0077dd 100644
--- a/src/watchdog.c
+++ b/src/watchdog.c
@@ -39,6 +39,24 @@
 
 #include "../wdmd/wdmd.h"
 
+/* tell wdmd to open the watchdog device, set the fire timeout and begin keepalives */
+int open_watchdog(int con, int fire_timeout)
+{
+	int rv;
+
+	if (!com.use_watchdog)
+		return 0;
+
+	rv = wdmd_open_watchdog(con, fire_timeout);
+	if (rv < 0) {
+		log_error("wdmd_open_watchdog fire_timeout %d error", fire_timeout);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* tell wdmd that this connection is still good and watchdog pings can continue for it */
 void update_watchdog(struct space *sp, uint64_t timestamp,
 		     int id_renewal_fail_seconds)
 {
@@ -53,6 +71,7 @@ void update_watchdog(struct space *sp, uint64_t timestamp,
 			  (unsigned long long)timestamp, rv);
 }
 
+/* connects to the wdmd daemon */
 int connect_watchdog(struct space *sp)
 {
 	int con;
@@ -69,6 +88,7 @@ int connect_watchdog(struct space *sp)
 	return con;
 }
 
+/* associate wdmd keepalives to the continued liveness of this lockspace */
 int activate_watchdog(struct space *sp, uint64_t timestamp,
 		      int id_renewal_fail_seconds, int con)
 {
@@ -105,9 +125,9 @@ int activate_watchdog(struct space *sp, uint64_t timestamp,
 		goto fail_clear;
 	}
 
-	if (fire_timeout != WATCHDOG_FIRE_TIMEOUT) {
+	if (fire_timeout != com.watchdog_fire_timeout) {
 		log_erros(sp, "wdmd invalid fire_timeout %d vs %d",
-			  fire_timeout, WATCHDOG_FIRE_TIMEOUT);
+			  fire_timeout, com.watchdog_fire_timeout);
 		goto fail_clear;
 	}
 
@@ -153,7 +173,7 @@ void deactivate_watchdog(struct space *sp)
 	wdmd_refcount_clear(sp->wd_fd);
 }
 
-void close_watchdog(struct space *sp)
+void disconnect_watchdog(struct space *sp)
 {
 	if (!com.use_watchdog)
 		return;
diff --git a/src/watchdog.h b/src/watchdog.h
index a462559..b872757 100644
--- a/src/watchdog.h
+++ b/src/watchdog.h
@@ -9,12 +9,20 @@
 #ifndef __WATCHDOG_H__
 #define __WATCHDOG_H__
 
-void update_watchdog(struct space *sp, uint64_t timestamp,
-		     int id_renewal_fail_seconds);
+/* open/close socket connection to wdmd daemon */
 int connect_watchdog(struct space *sp);
+void disconnect_watchdog(struct space *sp);
+
+/* tell wdmd to open the watchdog device which arms it
+   and wdmd begins keepalive loop, but the watchdog
+   keepalive is not yet influenced by lockspace renewals. */
+int open_watchdog(int con, int fire_timeout);
+
+/* associate per-lockspace renewals in sanlock with
+   watchdog petting in wdmd */
 int activate_watchdog(struct space *sp, uint64_t timestamp,
 		      int id_renewal_fail_seconds, int con);
 void deactivate_watchdog(struct space *sp);
-void close_watchdog(struct space *sp);
-
+void update_watchdog(struct space *sp, uint64_t timestamp,
+		     int id_renewal_fail_seconds);
 #endif
diff --git a/wdmd/client.c b/wdmd/client.c
index 87fcd40..be5278a 100644
--- a/wdmd/client.c
+++ b/wdmd/client.c
@@ -77,6 +77,29 @@ static int send_header(int con, int cmd)
 	return 0;
 }
 
+int wdmd_open_watchdog(int con, int fire_timeout)
+{
+	struct wdmd_header h;
+	int rv;
+
+	memset(&h, 0, sizeof(h));
+	h.cmd = CMD_OPEN_WATCHDOG;
+	h.fire_timeout = fire_timeout;
+
+	rv = send(con, (void *)&h, sizeof(struct wdmd_header), 0);
+	if (rv < 0)
+		return -errno;
+
+	memset(&h, 0, sizeof(h));
+	rv = recv(con, &h, sizeof(h), MSG_WAITALL);
+	if (rv < 0)
+		return -errno;
+
+	if (h.fire_timeout != fire_timeout)
+		return -1;
+	return 0;
+}
+
 int wdmd_refcount_set(int con)
 {
 	return send_header(con, CMD_REFCOUNT_SET);
diff --git a/wdmd/main.c b/wdmd/main.c
index aebacbe..e89ab5e 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -56,16 +56,20 @@
 
 #define WDPATH_SIZE 64
 
-static int test_interval = DEFAULT_TEST_INTERVAL;
+static int standard_test_interval = DEFAULT_TEST_INTERVAL;
+static int test_interval= DEFAULT_TEST_INTERVAL;
 static int fire_timeout = DEFAULT_FIRE_TIMEOUT;
 static int high_priority = DEFAULT_HIGH_PRIORITY;
 static int daemon_quit;
 static int daemon_debug;
+static int try_timeout;
+static int forcefire;
 static int socket_gid;
 static char *socket_gname = (char *)SOCKET_GNAME;
 static time_t last_keepalive;
 static time_t last_closeunclean;
 static char lockfile_path[PATH_MAX];
+static int test_loop_enable;
 static int dev_fd = -1;
 static int shm_fd;
 
@@ -334,6 +338,155 @@ static void dump_debug(int fd)
 	send(fd, debug_buf, debug_len, MSG_NOSIGNAL);
 }
 
+static void _init_test_interval(void)
+{
+	if (fire_timeout >= 60) {
+		standard_test_interval = 10;
+		test_interval = 10;
+	} else if (fire_timeout >= 30 && fire_timeout < 60) {
+		standard_test_interval = 5;
+		test_interval = 5;
+	} else if (fire_timeout >= 10 && fire_timeout < 30) {
+		standard_test_interval = 2;
+		test_interval = 2;
+	} else {
+		standard_test_interval = 1;
+		test_interval = 1;
+	}
+}
+
+static int open_dev(void)
+{
+	int fd;
+
+	if (dev_fd != -1) {
+		log_error("watchdog already open fd %d", dev_fd);
+		return -1;
+	}
+
+	fd = open(watchdog_path, O_WRONLY | O_CLOEXEC);
+	if (fd < 0) {
+		log_error("open %s error %d", watchdog_path, errno);
+		return fd;
+	}
+
+	dev_fd = fd;
+	return 0;
+}
+
+static void close_watchdog(void)
+{
+	int rv;
+
+	if (dev_fd == -1) {
+		log_debug("close_watchdog already closed");
+		return;
+	}
+
+	rv = write(dev_fd, "V", 1);
+	if (rv < 0)
+		log_error("%s disarm write error %d", watchdog_path, errno);
+	else
+		log_error("%s disarmed", watchdog_path);
+
+	close(dev_fd);
+	dev_fd = -1;
+}
+
+static void close_watchdog_unclean(void)
+{
+	if (dev_fd == -1)
+		return;
+
+	log_error("%s closed unclean", watchdog_path);
+	close(dev_fd);
+	dev_fd = -1;
+
+	last_closeunclean = monotime();
+}
+
+static void pet_watchdog(void)
+{
+	int rv, unused;
+
+	rv = ioctl(dev_fd, WDIOC_KEEPALIVE, &unused);
+
+	last_keepalive = monotime();
+	log_debug("keepalive %d", rv);
+}
+
+static int _open_watchdog(struct wdmd_header *h)
+{
+	int get_timeout, set_timeout;
+	int rv;
+
+	/* Don't check dev_fd for -1 because dev_fd will be closed
+	   and set to -1 prior to timeout in close_watchdog_unclean().  */
+
+	if (test_loop_enable)
+		return 0;
+
+	if (!h->fire_timeout)
+		return -1;
+
+	rv = open_dev();
+	if (rv < 0)
+		return -1;
+
+	get_timeout = 0;
+
+	rv = ioctl(dev_fd, WDIOC_GETTIMEOUT, &get_timeout);
+	if (rv < 0) {
+		log_error("open_watchdog gettimeout error %d", errno);
+		close_watchdog();
+		return -1;
+	}
+
+	if (get_timeout == h->fire_timeout) {
+		/* success, requested value matches the default value */
+		fire_timeout = get_timeout;
+		_init_test_interval();
+		log_error("%s open with timeout %d", watchdog_path, get_timeout);
+		pet_watchdog();
+		test_loop_enable = 1;
+		return 0;
+	}
+
+	set_timeout = h->fire_timeout;
+
+	rv = ioctl(dev_fd, WDIOC_SETTIMEOUT, &set_timeout);
+	if (rv < 0) {
+		log_error("open_watchdog settimeout %d error %d", set_timeout, errno);
+		close_watchdog();
+		return -1;
+	}
+
+	get_timeout = 0;
+
+	rv = ioctl(dev_fd, WDIOC_GETTIMEOUT, &get_timeout);
+	if (rv < 0) {
+		log_error("open_watchdog gettimeout check error %d", errno);
+		close_watchdog();
+		return -1;
+	}
+
+	if (get_timeout == set_timeout) {
+		/* success setting a custom timeout */
+		fire_timeout = get_timeout;
+		_init_test_interval();
+		log_error("%s open with timeout %d", watchdog_path, get_timeout);
+		pet_watchdog();
+		test_loop_enable = 1;
+		return 0;
+	}
+	
+	/* failed to set a custom timeout */
+	log_error("open_watchdog gettimeout value %d set %d",
+		  get_timeout, set_timeout);
+	close_watchdog();
+	return -1;
+}
+
 static void process_connection(int ci)
 {
 	struct wdmd_header h;
@@ -378,6 +531,17 @@ static void process_connection(int ci)
 		client[ci].refcount = 0;
 		break;
 
+	case CMD_OPEN_WATCHDOG:
+		memcpy(&h_ret, &h, sizeof(h));
+		rv = _open_watchdog(&h);
+		if (rv < 0)
+			h_ret.fire_timeout = 0;
+		else
+			h_ret.fire_timeout = fire_timeout;
+		log_debug("open_watchdog fire_timeout %u result %u", h.fire_timeout, fire_timeout);
+		send(client[ci].fd, &h_ret, sizeof(h_ret), MSG_NOSIGNAL);
+		break;
+
 	case CMD_TEST_LIVE:
 		client[ci].renewal = h.renewal_time;
 		client[ci].expire = h.expire_time;
@@ -509,7 +673,7 @@ static int test_clients(void)
 
 		if (t >= client[i].expire) {
 			log_error("test failed rem %d now %llu ping %llu close %llu renewal %llu expire %llu client %d %s",
-				  DEFAULT_FIRE_TIMEOUT - (int)(t - last_ping),
+				  fire_timeout - (int)(t - last_ping),
 				  (unsigned long long)t,
 				  (unsigned long long)last_keepalive,
 				  (unsigned long long)last_closeunclean,
@@ -540,7 +704,7 @@ static int test_clients(void)
 		 * expiration time.
 		 */
 
-		if (t >= client[i].expire - DEFAULT_TEST_INTERVAL) {
+		if (t >= client[i].expire - standard_test_interval) {
 			log_error("test warning now %llu ping %llu close %llu renewal %llu expire %llu client %d %s",
 				  (unsigned long long)t,
 				  (unsigned long long)last_keepalive,
@@ -791,7 +955,7 @@ static int test_scripts(void)
 		 */
 
 		if (!scripts[i].last_result &&
-		    ((begin - scripts[i].start) < (DEFAULT_TEST_INTERVAL - 1)))
+		    ((begin - scripts[i].start) < (standard_test_interval - 1)))
 			continue;
 
 		pid = run_script(i);
@@ -806,7 +970,7 @@ static int test_scripts(void)
 		}
 	}
 
-	/* wait up to DEFAULT_TEST_INTERVAL-1 for the pids to finish */
+	/* wait up to standard_test_interval-1 for the pids to finish */
 
 	while (1) {
 		running = 0;
@@ -893,7 +1057,7 @@ static int test_scripts(void)
 		if (!running)
 			break;
 
-		if (monotime() - begin >= DEFAULT_TEST_INTERVAL - 1)
+		if (monotime() - begin >= standard_test_interval - 1)
 			break;
 
 		sleep(1);
@@ -927,58 +1091,6 @@ static int test_scripts(void)
 	return fail_count;
 }
 
-static int open_dev(void)
-{
-	int fd;
-
-	if (dev_fd != -1) {
-		log_error("watchdog already open fd %d", dev_fd);
-		return -1;
-	}
-
-	fd = open(watchdog_path, O_WRONLY | O_CLOEXEC);
-	if (fd < 0) {
-		log_error("open %s error %d", watchdog_path, errno);
-		return fd;
-	}
-
-	dev_fd = fd;
-	return 0;
-}
-
-static void close_watchdog_unclean(void)
-{
-	if (dev_fd == -1) {
-		log_debug("close_watchdog_unclean already closed");
-		return;
-	}
-
-	log_error("%s closed unclean", watchdog_path);
-	close(dev_fd);
-	dev_fd = -1;
-
-	last_closeunclean = monotime();
-}
-
-static void close_watchdog(void)
-{
-	int rv;
-
-	if (dev_fd == -1) {
-		log_error("close_watchdog already closed");
-		return;
-	}
-
-	rv = write(dev_fd, "V", 1);
-	if (rv < 0)
-		log_error("%s disarm write error %d", watchdog_path, errno);
-	else
-		log_error("%s disarmed", watchdog_path);
-
-	close(dev_fd);
-	dev_fd = -1;
-}
-
 static int _setup_watchdog(char *path)
 {
 	struct stat buf;
@@ -1004,25 +1116,9 @@ static int _setup_watchdog(char *path)
 		return -1;
 	}
 
-	if (timeout == fire_timeout)
-		goto out;
-
-	timeout = fire_timeout;
-
-	rv = ioctl(dev_fd, WDIOC_SETTIMEOUT, &timeout);
-	if (rv < 0) {
-		log_error("%s failed to set timeout", watchdog_path);
-		close_watchdog();
-		return -1;
-	}
+	log_debug("%s gettimeout reported %u", watchdog_path, timeout);
 
-	if (timeout != fire_timeout) {
-		log_error("%s failed to set new timeout", watchdog_path);
-		close_watchdog();
-		return -1;
-	}
- out:
-	log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout);
+	close_watchdog();
 
 	/* TODO: save watchdog_path in /run/wdmd/saved_path,
 	 * and in startup read that file, copying it to saved_path */
@@ -1091,7 +1187,103 @@ static int setup_watchdog(void)
 
 }
 
-static int probe_dev(const char *path)
+static int _try_timeout(const char *path)
+{
+	struct stat buf;
+	int get_timeout, set_timeout;
+	int unused, fd, err, rv;
+
+	rv = stat(path, &buf);
+	if (rv < 0) {
+		fprintf(stderr, "%s stat error %d\n", path, errno);
+		return -1;
+	}
+
+	fd = open(path, O_WRONLY | O_CLOEXEC);
+	if (fd < 0) {
+		fprintf(stderr, "%s open error %d\n", path, errno);
+		return fd;
+	}
+
+	get_timeout = 0;
+
+	rv = ioctl(fd, WDIOC_GETTIMEOUT, &get_timeout);
+	if (rv < 0) {
+		fprintf(stderr, "%s gettimeout error %d\n", path, errno);
+		rv = -1;
+		goto out;
+	}
+
+	printf("%s gettimeout %d\n", path, get_timeout);
+
+	set_timeout = try_timeout;
+
+	rv = ioctl(fd, WDIOC_SETTIMEOUT, &set_timeout);
+	if (rv < 0) {
+		fprintf(stderr, "%s settimeout %d error %d\n", path, set_timeout, errno);
+		rv = -1;
+		goto out;
+	}
+
+	printf("%s settimeout %d result %d\n", path, try_timeout, set_timeout);
+
+	if (set_timeout != try_timeout) {
+		fprintf(stderr, "%s settimeout %d failed\n", path, try_timeout);
+		rv = -1;
+		goto out;
+	}
+
+	get_timeout = 0;
+
+	rv = ioctl(fd, WDIOC_GETTIMEOUT, &get_timeout);
+	if (rv < 0) {
+		fprintf(stderr, "%s gettimeout error %d\n", path, errno);
+		rv = -1;
+		goto out;
+	}
+
+	printf("%s gettimeout %d\n", path, get_timeout);
+
+	rv = ioctl(fd, WDIOC_KEEPALIVE, &unused);
+	if (rv < 0) {
+		fprintf(stderr, "%s keepalive error %d\n", path, errno);
+		rv = -1;
+		goto out;
+	}
+
+	if (forcefire) {
+		int sleep_sec = 0;
+		int i;
+		setbuf(stdout, NULL);
+		printf("waiting for watchdog to reset machine:\n");
+		for (i = 1; i < get_timeout + 5; i++) {
+			sleep(1);
+			sleep_sec++;
+			if (sleep_sec == get_timeout+1) {
+				printf("\n");
+				printf("%d %s failed to fire after timeout %d seconds\n", i, path, get_timeout);
+			} else if (sleep_sec > get_timeout+1) {
+				printf("%d %s failed to fire after timeout %d seconds\n", i, path, get_timeout);
+			} else {
+				printf("%d ", i);
+			}
+		}
+	}
+
+	rv = 0;
+ out:
+	err = write(fd, "V", 1);
+	if (err < 0) {
+		fprintf(stderr, "trytimeout failed to disarm %s error %d %d\n", path, err, errno);
+		openlog("wdmd", LOG_CONS | LOG_PID, LOG_DAEMON);
+		syslog(LOG_ERR, "trytimeout failed to disarm %s error %d %d\n", path, err, errno);
+	}
+
+	close(fd);
+	return rv;
+}
+
+static int _probe_dev(const char *path)
 {
 	struct stat buf;
 	int fd, err, rv, timeout;
@@ -1153,6 +1345,14 @@ static int probe_dev(const char *path)
 	return rv;
 }
 
+static int probe_dev(const char *path)
+{
+	if (try_timeout)
+		return _try_timeout(path);
+	else
+		return _probe_dev(path);
+}
+
 static int probe_watchdog(void)
 {
 	int rv;
@@ -1205,16 +1405,6 @@ static int probe_watchdog(void)
 
 }
 
-static void pet_watchdog(void)
-{
-	int rv, unused;
-
-	rv = ioctl(dev_fd, WDIOC_KEEPALIVE, &unused);
-
-	last_keepalive = monotime();
-	log_debug("keepalive %d", rv);
-}
-
 static void process_signals(int ci)
 {
 	struct signalfd_siginfo fdsi;
@@ -1305,8 +1495,6 @@ static int test_loop(void)
 	int fail_count;
 	int rv, i;
 
-	pet_watchdog();
-
 	test_time = 0;
 	poll_timeout = test_interval * 1000;
 
@@ -1335,6 +1523,12 @@ static int test_loop(void)
 		if (daemon_quit && !active_clients())
 			break;
 
+		/*
+		 * No client has called open_watchdog() so the wd device is not open yet.
+		 */
+		if (!test_loop_enable)
+			continue;
+
 		if (monotime() - test_time >= test_interval) {
 			test_time = monotime();
 			log_debug("test_time %llu",
@@ -1354,7 +1548,7 @@ static int test_loop(void)
 					pet_watchdog();
 				}
 
-				test_interval = DEFAULT_TEST_INTERVAL;
+				test_interval = standard_test_interval;
 			} else {
 				/* If we can patch the kernel so that close
 				   does not generate a ping, then we can skip
@@ -1513,19 +1707,21 @@ static void print_usage_and_exit(int status)
 {
 	printf("Usage:\n");
 	printf("wdmd [options]\n\n");
-	printf("--version, -V         print version\n");
-	printf("--help, -h            print usage\n");
-	printf("--dump, -d            print debug from daemon\n");
-	printf("--probe, -p           print path of functional watchdog device\n");
-	printf("-D                    debug: no fork and print all logging to stderr\n");
-	printf("-H 0|1                use high priority features (1 yes, 0 no, default %d)\n",
-				      DEFAULT_HIGH_PRIORITY);
-	printf("-G <name>             group ownership for the socket\n");
-	printf("-S 0|1                allow script tests (default %d)\n", allow_scripts);
-	printf("-s <path>             path to scripts dir (default %s)\n", scripts_dir);
-	printf("-k <num>              kill unfinished scripts after num seconds (default %d)\n",
-				      kill_script_sec);
-	printf("-w /dev/watchdog      path to the watchdog device to try first\n");
+	printf("--version, -V          print version\n");
+	printf("--help, -h             print usage\n");
+	printf("--dump, -d             print debug from daemon\n");
+	printf("--probe, -p            print path of functional watchdog device\n");
+	printf("--trytimeout, -t <sec> set the timeout value for watchdog device\n");
+	printf("--forcefire, -F        force watchdog to fire and reset machine, use with -t\n");
+	printf("-D                     debug: no fork and print all logging to stderr\n");
+	printf("-H 0|1                 use high priority features (1 yes, 0 no, default %d)\n",
+				       DEFAULT_HIGH_PRIORITY);
+	printf("-G <name>              group ownership for the socket\n");
+	printf("-S 0|1                 allow script tests (default %d)\n", allow_scripts);
+	printf("-s <path>              path to scripts dir (default %s)\n", scripts_dir);
+	printf("-k <num>               kill unfinished scripts after num seconds (default %d)\n",
+				       kill_script_sec);
+	printf("-w <path>              path to the watchdog device to try first\n");
 	exit(status);
 }
 
@@ -1553,14 +1749,16 @@ int main(int argc, char *argv[])
 	    int option_index = 0;
 
 	    static struct option long_options[] = {
-	        {"help",    no_argument, 0,  'h' },
-	        {"probe",   no_argument, 0,  'p' },
-	        {"dump",    no_argument, 0,  'd' },
-	        {"version", no_argument, 0,  'V' },
-	        {0,         0,           0,  0 }
+	        {"help",       no_argument,       0,  'h' },
+	        {"probe",      no_argument,       0,  'p' },
+	        {"dump",       no_argument,       0,  'd' },
+	        {"trytimeout", required_argument, 0,  't' },
+	        {"forcefire",  no_argument,       0,  'F' },
+	        {"version",    no_argument,       0,  'V' },
+	        {0,            0,                 0,  0 }
 	    };
 
-	    c = getopt_long(argc, argv, "hpdVDH:G:S:s:k:w:",
+	    c = getopt_long(argc, argv, "hpdVDH:G:S:s:k:w:t:F",
 	                    long_options, &option_index);
 	    if (c == -1)
 	         break;
@@ -1572,6 +1770,13 @@ int main(int argc, char *argv[])
 		case 'p':
 		    do_probe = 1;
 		    break;
+		case 't':
+		    do_probe = 1;
+		    try_timeout = atoi(optarg);
+		    break;
+		case 'F':
+		    forcefire = 1;
+		    break;
 		case 'd':
 		    print_debug_and_exit();
 		    break;
diff --git a/wdmd/wdmd.8 b/wdmd/wdmd.8
index cc03be7..6cbb813 100644
--- a/wdmd/wdmd.8
+++ b/wdmd/wdmd.8
@@ -10,9 +10,9 @@ wdmd \- watchdog multiplexing daemon
 .SH DESCRIPTION
 
 This daemon opens /dev/watchdog and allows multiple independent sources to
-detmermine whether each KEEPALIVE is done.  Every test interval (10
+detmermine whether each KEEPALIVE is done.  Every test interval (default 10
 seconds), the daemon tests each source.  If any test fails, the KEEPALIVE
-is not done.  In a standard configuration, the watchdog timer will reset
+is not done.  In the default configuration, the watchdog timer will reset
 the system if no KEEPALIVE is done for 60 seconds ("fire timeout").  This
 means that if a single test fails 5-6 times in row, the watchdog will fire
 and reset the system.  With multiple test sources, fewer separate failures
@@ -40,8 +40,8 @@ T60: watchdog fires, system resets
 T60, and the tests at T60 would not be run.)
 
 A crucial aspect to the design and function of wdmd is that if any single
-source does not pass tests for the fire timeout, the watchdog is
-guaranteed to fire, regardless of whether other sources on the system have
+source does not pass the test for the length of the fire timeout, the watchdog
+is guaranteed to fire, regardless of whether other sources on the system have
 passed or failed.  A spurious reset due to the combined effects of
 multiple failing tests as shown above, is an accepted side effect.
 
@@ -113,4 +113,13 @@ it is considered a failure.
 .BI \-w " path"
   The path to the watchdog device to try first.
 
+.TP
+.BI "\-\-trytimeout, \-t" " seconds"
+  Set the timeout for the watchdog device.  Use this to check for supported
+  timeout values.
+
+.TP
+.B \-\-forcefire, \-F
+  Force the watchdog to fire and reset the machine.  Use with -t.
+
 
diff --git a/wdmd/wdmd.h b/wdmd/wdmd.h
index b32598f..01725a9 100644
--- a/wdmd/wdmd.h
+++ b/wdmd/wdmd.h
@@ -13,6 +13,7 @@
 #define WDMD_NAME_SIZE 128
 
 int wdmd_connect(void);
+int wdmd_open_watchdog(int con, int fire_timeout);
 int wdmd_register(int con, char *name);
 int wdmd_refcount_set(int con);
 int wdmd_refcount_clear(int con);
diff --git a/wdmd/wdmd_sock.h b/wdmd/wdmd_sock.h
index f5ed27e..b554631 100644
--- a/wdmd/wdmd_sock.h
+++ b/wdmd/wdmd_sock.h
@@ -20,6 +20,7 @@ enum {
 	CMD_TEST_LIVE,
 	CMD_STATUS,
 	CMD_DUMP_DEBUG,
+	CMD_OPEN_WATCHDOG,
 };
 
 struct wdmd_header {