From 34af016589c3cb733731ab33b9e249b4943fbcb9 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mar 28 2022 19:30:44 +0000 Subject: sanlock: use helper to set max_sectors_kb When the sanlock daemon is not run as the root user, it doesn't have permission to write to sysfs max_sectors_kb, so use the root helper process to do that. --- diff --git a/src/cmd.c b/src/cmd.c index 3067e3e..5ab0ae2 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -2397,6 +2397,7 @@ static int print_state_lockspace(struct space *sp, char *str, const char *list_n "external_used=%d " "used_by_orphans=%d " "renewal_read_extend_sec=%u " + "set_max_sectors_kb=%u " "corrupt_result=%d " "acquire_last_result=%d " "renewal_last_result=%d " @@ -2417,6 +2418,7 @@ static int print_state_lockspace(struct space *sp, char *str, const char *list_n (sp->flags & SP_EXTERNAL_USED) ? 1 : 0, (sp->flags & SP_USED_BY_ORPHANS) ? 1 : 0, sp->renewal_read_extend_sec, + sp->set_max_sectors_kb, sp->lease_status.corrupt_result, sp->lease_status.acquire_last_result, sp->lease_status.renewal_last_result, diff --git a/src/diskio.c b/src/diskio.c index 563647b..488427e 100644 --- a/src/diskio.c +++ b/src/diskio.c @@ -32,25 +32,13 @@ #include "direct.h" #include "log.h" -int read_sysfs_size(const char *disk_path, const char *name, unsigned int *val) +int read_sysfs_uint(char *path, unsigned int *val) { - char path[PATH_MAX]; - char buf[32]; - struct stat st; - int major, minor; - size_t len; + char buf[32] = { 0 }; + int len; int fd; int rv = -1; - rv = stat(disk_path, &st); - if (rv < 0) - return -1; - - major = (int)major(st.st_rdev); - minor = (int)minor(st.st_rdev); - - snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", major, minor, name); - fd = open(path, O_RDONLY, 0); if (fd < 0) return -1; @@ -73,38 +61,23 @@ int read_sysfs_size(const char *disk_path, const char *name, unsigned int *val) return rv; } -static int write_sysfs_size(const char *disk_path, const char *name, unsigned int val) +int write_sysfs_uint(char *path, unsigned int val) { - char path[PATH_MAX]; - char buf[32]; - struct stat st; - int major, minor; + char buf[32] = { 0 }; int fd; int rv; - rv = stat(disk_path, &st); - if (rv < 0) { - log_debug("write_sysfs_size stat error %d %s", errno, disk_path); - return -1; - } - - major = (int)major(st.st_rdev); - minor = (int)minor(st.st_rdev); - - snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/queue/%s", major, minor, name); - - memset(buf, 0, sizeof(buf)); - snprintf(buf, sizeof(buf), "%u", val); - fd = open(path, O_RDWR, 0); if (fd < 0) { - log_debug("write_sysfs_size open error %d %s", errno, path); + log_debug("write_sysfs_uint open error %d %s", errno, path); return -1; } + snprintf(buf, sizeof(buf), "%u", val); + rv = write(fd, buf, strlen(buf)); if (rv < 0) { - log_debug("write_sysfs_size write %s error %d %s", buf, errno, path); + log_debug("write_sysfs_uint write %s error %d %s", buf, errno, path); close(fd); return -1; } @@ -113,6 +86,52 @@ static int write_sysfs_size(const char *disk_path, const char *name, unsigned in return 0; } +int read_sysfs_size(const char *disk_path, const char *name, unsigned int *val) +{ + char sysfs_path[PATH_MAX] = { 0 }; + struct stat st; + int ma, mi; + int rv; + + rv = stat(disk_path, &st); + if (rv < 0) + return -1; + + ma = (int)major(st.st_rdev); + mi = (int)minor(st.st_rdev); + + snprintf(sysfs_path, sizeof(sysfs_path), "/sys/dev/block/%d:%d/queue/%s", ma, mi, name); + sysfs_path[PATH_MAX-1] = '\0'; + + rv = read_sysfs_uint(sysfs_path, val); + + return rv; +} + +static int write_sysfs_size(const char *disk_path, const char *name, unsigned int val) +{ + char sysfs_path[PATH_MAX] = { 0 }; + struct stat st; + int major, minor; + int rv; + + rv = stat(disk_path, &st); + if (rv < 0) { + log_debug("write_sysfs_size stat error %d %s", errno, disk_path); + return -1; + } + + major = (int)major(st.st_rdev); + minor = (int)minor(st.st_rdev); + + snprintf(sysfs_path, sizeof(sysfs_path), "/sys/dev/block/%d:%d/queue/%s", major, minor, name); + sysfs_path[PATH_MAX-1] = '\0'; + + rv = write_sysfs_uint(sysfs_path, val); + + return rv; +} + /* * The default max_sectors_kb is 512 (KB), so a 1MB read is split into two * 512KB reads. Adjust this to at least do 1MB io's. diff --git a/src/diskio.h b/src/diskio.h index 1ed65d2..db5fe3a 100644 --- a/src/diskio.h +++ b/src/diskio.h @@ -20,6 +20,8 @@ int majority_disks(int num_disks, int num); int read_sysfs_size(const char *path, const char *name, unsigned int *val); int set_max_sectors_kb(struct sync_disk *disk, uint32_t max_sectors_kb); int get_max_sectors_kb(struct sync_disk *disk, uint32_t *max_sectors_kb); +int read_sysfs_uint(char *path, unsigned int *val); +int write_sysfs_uint(char *path, unsigned int val); /* * iobuf functions require the caller to allocate iobuf using posix_memalign diff --git a/src/helper.c b/src/helper.c index 0022092..84b5263 100644 --- a/src/helper.c +++ b/src/helper.c @@ -31,6 +31,11 @@ #include "monotime.h" #include "helper.h" +/* only need sysfs functions */ +struct sync_disk; +struct task; +#include "diskio.h" + #define MAX_AV_COUNT 8 static void run_path(struct helper_msg *hm) @@ -139,7 +144,7 @@ static int send_status(int fd) #define log_debug(fmt, args...) \ do { \ if (log_stderr) \ - fprintf(stderr, "%ld " fmt "\n", time(NULL), ##args); \ + fprintf(stderr, "helper %ld " fmt "\n", time(NULL), ##args); \ } while (0) #define STANDARD_TIMEOUT_MS (HELPER_STATUS_INTERVAL*1000) @@ -147,6 +152,7 @@ do { \ int run_helper(int in_fd, int out_fd, int log_stderr) { + unsigned int sysfs_val; char name[16]; struct pollfd pollfd; struct helper_msg hm; @@ -215,6 +221,11 @@ int run_helper(int in_fd, int out_fd, int log_stderr) */ } else if (hm.type == HELPER_MSG_KILLPID) { kill(hm.pid, hm.sig); + + } else if (hm.type == HELPER_MSG_WRITE_SYSFS) { + sysfs_val = atoi(hm.args); + rv = write_sysfs_uint(hm.path, sysfs_val); + log_debug("write_sysfs %s %u rv %d", hm.path, sysfs_val, rv); } } diff --git a/src/helper.h b/src/helper.h index b87bc61..3368390 100644 --- a/src/helper.h +++ b/src/helper.h @@ -19,6 +19,7 @@ #define HELPER_MSG_RUNPATH 1 #define HELPER_MSG_KILLPID 2 +#define HELPER_MSG_WRITE_SYSFS 3 struct helper_msg { uint8_t type; diff --git a/src/lockspace.c b/src/lockspace.c index d9b79f6..3deb2b0 100644 --- a/src/lockspace.c +++ b/src/lockspace.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "sanlock_internal.h" #include "sanlock_admin.h" @@ -38,9 +39,85 @@ #include "task.h" #include "timeouts.h" #include "direct.h" +#include "helper.h" static uint32_t space_id_counter = 1; +/* + * When the sanlock daemon is not root, set_max_sectors_kb() needs to use the + * root helper process to write to sysfs. + */ + +static int helper_set_max_sectors_kb(struct sync_disk *disk, uint32_t set_kb) +{ + char sysfs_path[SANLK_HELPER_PATH_LEN] = { 0 }; + struct helper_msg hm; + struct stat st; + int ma, mi; + unsigned int max_kb = 0; + int rv; + + rv = stat(disk->path, &st); + if (rv < 0) { + log_debug("helper_set_max_sectors_kb stat error %d %s", errno, disk->path); + return -1; + } + ma = (int)major(st.st_rdev); + mi = (int)minor(st.st_rdev); + + snprintf(sysfs_path, sizeof(sysfs_path), "/sys/dev/block/%d:%d/queue/max_sectors_kb", ma, mi); + sysfs_path[SANLK_HELPER_PATH_LEN-1] = '\0'; + + rv = read_sysfs_uint(sysfs_path, &max_kb); + if (rv < 0) { + /* max_sectors_kb setting may not exist */ + return -1; + } + + if (max_kb == set_kb) + return 0; + + if (helper_kill_fd == -1) + return -1; + + memset(&hm, 0, sizeof(hm)); + hm.type = HELPER_MSG_WRITE_SYSFS; + memcpy(hm.path, sysfs_path, SANLK_HELPER_PATH_LEN); + snprintf(hm.args, sizeof(hm.args), "%u", set_kb); + + retry: + rv = write(helper_kill_fd, &hm, sizeof(hm)); + if (rv == -1 && errno == EINTR) + goto retry; + + /* pipe is full, we'll try again in a second */ + if (rv == -1 && errno == EAGAIN) { + log_error("helper_set_max_sectors_kb send EAGAIN"); + return -1; + } + + /* helper exited or closed fd, quit using helper */ + if (rv == -1 && errno == EPIPE) { + log_error("helper_set_max_sectors_kb send EPIPE"); + return -1; + } + + if (rv == -1) { + log_error("helper_set_max_sectors_kb send errno %d", errno); + return rv; + } + + /* We don't try to wait for the helper process to do the write, + although we could probably do something with the status msg. + It shouldn't matter when the sysfs field is written wrt + reading/writing the device. Add a slight delay here which + should usually let the sysfs update happen first. */ + + usleep(2000); + + return 0; +} + static struct space *_search_space(const char *name, struct sync_disk *disk, uint64_t host_id, @@ -634,7 +711,10 @@ static void set_lockspace_max_sectors_kb(struct space *sp, int sector_size, int log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u using 1024", hw_kb); - rv = set_max_sectors_kb(&sp->host_id_disk, set_kb); + if (!com.uid) + rv = set_max_sectors_kb(&sp->host_id_disk, set_kb); + else + rv = helper_set_max_sectors_kb(&sp->host_id_disk, set_kb); if (rv < 0) { log_space(sp, "set_lockspace_max_sectors_kb small hw_kb %u set 1024 error %d", hw_kb, rv); return; @@ -646,12 +726,16 @@ static void set_lockspace_max_sectors_kb(struct space *sp, int sector_size, int log_space(sp, "set_lockspace_max_sectors_kb hw_kb %u setting %u", hw_kb, set_kb); - rv = set_max_sectors_kb(&sp->host_id_disk, set_kb); + if (!com.uid) + rv = set_max_sectors_kb(&sp->host_id_disk, set_kb); + else + rv = helper_set_max_sectors_kb(&sp->host_id_disk, set_kb); if (rv < 0) { log_space(sp, "set_lockspace_max_sectors_kb hw_kb %u set %u error %d", hw_kb, set_kb, rv); return; } } + sp->set_max_sectors_kb = set_kb; } /* diff --git a/src/log.c b/src/log.c index 23fad06..129c68c 100644 --- a/src/log.c +++ b/src/log.c @@ -128,6 +128,9 @@ void log_level(uint32_t space_id, uint32_t res_id, char *name_in, int level, con struct tm time_info; pid_t tid; + if (is_helper) + return; + memset(name, 0, sizeof(name)); if (level == LOG_CLIENT) { diff --git a/src/main.c b/src/main.c index 5a0f9ba..4c022de 100644 --- a/src/main.c +++ b/src/main.c @@ -1649,6 +1649,7 @@ static int setup_helper(void) } else { close(pr_fd); close(pw_fd); + is_helper = 1; run_helper(cr_fd, cw_fd, (log_stderr_priority == LOG_DEBUG)); exit(0); } diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index 4e04fa8..7fe0a5b 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -209,6 +209,7 @@ struct space { uint32_t used_retries; uint32_t renewal_read_extend_sec; /* defaults to io_timeout */ uint32_t rindex_op; + unsigned int set_max_sectors_kb; int sector_size; int align_size; int max_hosts; @@ -455,6 +456,7 @@ EXTERN char our_host_name_global[SANLK_NAME_LEN+1]; EXTERN int kill_count_max; EXTERN int kill_grace_seconds; +EXTERN int is_helper; EXTERN int helper_ci; EXTERN int helper_pid; EXTERN int helper_kill_fd;