From 75416303cf46553a02472df2b93af7c1741916b4 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Dec 21 2017 17:32:35 +0000 Subject: sanlock: reuse resource structs Keep unused resource structs on a list and reuse them if the same resource is requested again. This is meant to keep the resource id (rN) the same instead of increasing for every request. The limited number of unused saved structs means that the same resource can get a new id between requests. Internally, the token_id (unique for each token/request) is replaced by the res_id. --- diff --git a/src/cmd.c b/src/cmd.c index b32a91b..fa90482 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -367,6 +367,7 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca) } token->host_id = spi.host_id; token->host_generation = spi.host_generation; + token->space_id = spi.space_id; token->pid = cl_pid; token->io_timeout = spi.io_timeout; token->sector_size = spi.sector_size; @@ -376,16 +377,6 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca) if (cl->restricted & SANLK_RESTRICT_SIGTERM) token->flags |= T_RESTRICT_SIGTERM; - /* save a record of what this token_id is for later debugging */ - - log_level(spi.space_id, token->token_id, NULL, com.names_log_priority, - "resource %.48s:%.48s:%.256s:%llu%s for %d,%d,%d", - token->r.lockspace_name, - token->r.name, - token->r.disks[0].path, - (unsigned long long)token->r.disks[0].offset, - (token->acquire_flags & SANLK_RES_SHARED) ? ":SH" : "", - cl_ci, cl_fd, cl_pid); } for (i = 0; i < new_tokens_count; i++) { @@ -408,9 +399,16 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca) default: lvl = LOG_ERR; } - log_level(0, token->token_id, NULL, lvl, - "cmd_acquire %d,%d,%d acquire_token %d %s", - cl_ci, cl_fd, cl_pid, rv, acquire_error_str(rv)); + + if (token->res_id) + log_level(token->space_id, token->res_id, NULL, lvl, + "cmd_acquire %d,%d,%d acquire_token %d %s", + cl_ci, cl_fd, cl_pid, rv, acquire_error_str(rv)); + else + log_level(token->space_id, 0, NULL, lvl, + "cmd_acquire %d,%d,%d acquire_token %s %d %s", + cl_ci, cl_fd, cl_pid, + token->r.name, rv, acquire_error_str(rv)); result = rv; goto done; } @@ -2262,11 +2260,15 @@ static int print_state_resource(struct resource *r, char *str, const char *list_ "flags=%x " "sector_size=%d " "lver=%llu " + "reused=%u " + "res_id=%u " "token_id=%u", list_name, r->flags, r->sector_size, (unsigned long long)r->leader.lver, + r->reused, + r->res_id, token_id); return strlen(str) + 1; diff --git a/src/direct_lib.c b/src/direct_lib.c index a7f1967..b4065cc 100644 --- a/src/direct_lib.c +++ b/src/direct_lib.c @@ -23,11 +23,11 @@ #include "task.h" #include "timeouts.h" -void log_level(uint32_t space_id GNUC_UNUSED, uint32_t token_id GNUC_UNUSED, +void log_level(uint32_t space_id GNUC_UNUSED, uint32_t res_id GNUC_UNUSED, char *name GNUC_UNUSED, int level GNUC_UNUSED, const char *fmt GNUC_UNUSED, ...); -void log_level(uint32_t space_id GNUC_UNUSED, uint32_t token_id GNUC_UNUSED, +void log_level(uint32_t space_id GNUC_UNUSED, uint32_t res_id GNUC_UNUSED, char *name GNUC_UNUSED, int level GNUC_UNUSED, const char *fmt GNUC_UNUSED, ...) { diff --git a/src/list.h b/src/list.h index 27751a5..5518407 100644 --- a/src/list.h +++ b/src/list.h @@ -362,6 +362,17 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((ptr)->next, type, member) /** + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/src/lockspace.c b/src/lockspace.c index 5ab2be4..a82c117 100644 --- a/src/lockspace.c +++ b/src/lockspace.c @@ -354,8 +354,7 @@ void check_other_leases(struct space *sp, char *buf) if (!hs->lease_bad && (strncmp(hs->owner_name, leader->resource_name, NAME_ID_SIZE) || (hs->owner_generation != leader->owner_generation))) { - log_level(sp->space_id, 0, NULL, LOG_WARNING, - "host %llu %llu %llu %.48s", + log_warns(sp, "host %llu %llu %llu %.48s", (unsigned long long)leader->owner_id, (unsigned long long)leader->owner_generation, (unsigned long long)leader->timestamp, @@ -825,6 +824,7 @@ static void *lockspace_thread(void *arg_in) */ purge_resource_orphans(sp->space_name); + purge_resource_free(sp->space_name); close_event_fds(sp); @@ -958,8 +958,7 @@ int add_lockspace_start(struct sanlk_lockspace *ls, uint32_t io_timeout, struct pthread_mutex_unlock(&spaces_mutex); /* save a record of what this space_id is for later debugging */ - log_level(sp->space_id, 0, NULL, LOG_WARNING, - "lockspace %.48s:%llu:%.256s:%llu", + log_warns(sp, "lockspace %.48s:%llu:%.256s:%llu", sp->space_name, (unsigned long long)sp->host_id, sp->host_id_disk.path, @@ -1586,8 +1585,7 @@ int lockspace_set_event(struct sanlk_lockspace *ls, struct sanlk_host_event *he, if ((now - sp->set_event_time < sp->set_bitmap_seconds) && sp->host_event.event && he->event && (sp->host_event.event != he->event)) { - log_level(sp->space_id, 0, NULL, LOG_WARNING, - "event %llu %llu %llu %llu replaced by %llu %llu %llu %llu t %llu", + log_warns(sp, "event %llu %llu %llu %llu replaced by %llu %llu %llu %llu t %llu", (unsigned long long)sp->host_event.host_id, (unsigned long long)sp->host_event.generation, (unsigned long long)sp->host_event.event, diff --git a/src/log.c b/src/log.c index df9d87a..ed04790 100644 --- a/src/log.c +++ b/src/log.c @@ -115,7 +115,7 @@ static void _log_save_ent(int level, int len) * logfile and/or syslog (so callers don't block writing messages to files) */ -void log_level(uint32_t space_id, uint32_t token_id, char *name_in, int level, const char *fmt, ...) +void log_level(uint32_t space_id, uint32_t res_id, char *name_in, int level, const char *fmt, ...) { va_list ap; char name[NAME_ID_SIZE + 1]; @@ -127,12 +127,12 @@ void log_level(uint32_t space_id, uint32_t token_id, char *name_in, int level, c memset(name, 0, sizeof(name)); - if (space_id && !token_id) + if (space_id && !res_id) snprintf(name, NAME_ID_SIZE, "s%u ", space_id); - else if (!space_id && token_id) - snprintf(name, NAME_ID_SIZE, "r%u ", token_id); - else if (space_id && token_id) - snprintf(name, NAME_ID_SIZE, "s%u:r%u ", space_id, token_id); + else if (!space_id && res_id) + snprintf(name, NAME_ID_SIZE, "r%u ", res_id); + else if (space_id && res_id) + snprintf(name, NAME_ID_SIZE, "s%u:r%u ", space_id, res_id); else if (name_in) snprintf(name, NAME_ID_SIZE, "%.8s ", name_in); diff --git a/src/log.h b/src/log.h index 10b8c36..ff11001 100644 --- a/src/log.h +++ b/src/log.h @@ -24,7 +24,7 @@ * it should be LOG_WARNING (goes only to sanlock.log) */ -void log_level(uint32_t space_id, uint32_t token_id, char *name_in, int level, const char *fmt, ...) +void log_level(uint32_t space_id, uint32_t res_id, char *name_in, int level, const char *fmt, ...) __attribute__((format(printf, 5, 6))); int setup_logging(void); @@ -33,13 +33,15 @@ void copy_log_dump(char *buf, int *len); #define log_debug(fmt, args...) log_level(0, 0, NULL, LOG_DEBUG, fmt, ##args) #define log_space(space, fmt, args...) log_level(space->space_id, 0, NULL, LOG_DEBUG, fmt, ##args) -#define log_token(token, fmt, args...) log_level(0, token->token_id, NULL, LOG_DEBUG, fmt, ##args) -#define log_spoke(space, token, fmt, args...) log_level(space->space_id, token->token_id, NULL, LOG_DEBUG, fmt, ##args) +#define log_token(token, fmt, args...) log_level(token->space_id, token->res_id, NULL, LOG_DEBUG, fmt, ##args) + +#define log_warn(fmt, args...) log_level(0, 0, NULL, LOG_WARNING, fmt, ##args) +#define log_warns(space, fmt, args...) log_level(space->space_id, 0, NULL, LOG_WARNING, fmt, ##args) +#define log_warnt(token, fmt, args...) log_level(token->space_id, token->res_id, NULL, LOG_WARNING, fmt, ##args) #define log_error(fmt, args...) log_level(0, 0, NULL, LOG_ERR, fmt, ##args) #define log_erros(space, fmt, args...) log_level(space->space_id, 0, NULL, LOG_ERR, fmt, ##args) -#define log_errot(token, fmt, args...) log_level(0, token->token_id, NULL, LOG_ERR, fmt, ##args) -#define log_errst(space, token, fmt, args...) log_level(space->space_id, token->token_id, NULL, LOG_ERR, fmt, ##args) +#define log_errot(token, fmt, args...) log_level(token->space_id, token->res_id, NULL, LOG_ERR, fmt, ##args) #define log_taske(task, fmt, args...) log_level(0, 0, task->name, LOG_ERR, fmt, ##args) #define log_taskw(task, fmt, args...) log_level(0, 0, task->name, LOG_WARNING, fmt, ##args) diff --git a/src/main.c b/src/main.c index 2bc4d1b..9fc033f 100644 --- a/src/main.c +++ b/src/main.c @@ -551,7 +551,7 @@ static int client_using_space(struct client *cl, struct space *sp) continue; if (!cl->kill_count) - log_spoke(sp, token, "client_using_space pid %d", cl->pid); + log_token(token, "client_using_space pid %d", cl->pid); if (sp->space_dead) token->space_dead = sp->space_dead; rv = 1; @@ -865,7 +865,7 @@ static int main_loop(void) } free_lockspaces(0); - free_resources(); + rem_resources(); gettimeofday(&now, NULL); ms = time_diff(&last_check, &now); @@ -1675,8 +1675,7 @@ static int do_daemon(void) setup_uid_gid(); - log_level(0, 0, NULL, LOG_WARNING, "sanlock daemon started %s host %s", - VERSION, our_host_name_global); + log_warn("sanlock daemon started %s host %s", VERSION, our_host_name_global); setup_priority(); diff --git a/src/paxos_lease.c b/src/paxos_lease.c index 09e4f59..2303873 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -591,8 +591,7 @@ static int run_ballot(struct task *task, struct token *token, uint32_t flags, continue; if (bk->lver > dblock.lver) { - log_level(0, token->token_id, NULL, LOG_WARNING, - "ballot %llu abort1 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " + log_warnt(token, "ballot %llu abort1 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " "our dblock %llu:%llu:%llu:%llu:%llu:%llu", (unsigned long long)next_lver, q, (unsigned long long)bk->mbal, @@ -618,8 +617,7 @@ static int run_ballot(struct task *task, struct token *token, uint32_t flags, /* see "It aborts the ballot" in comment above */ if (bk->mbal > dblock.mbal) { - log_level(0, token->token_id, NULL, LOG_WARNING, - "ballot %llu abort1 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " + log_warnt(token, "ballot %llu abort1 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " "our dblock %llu:%llu:%llu:%llu:%llu:%llu", (unsigned long long)next_lver, q, (unsigned long long)bk->mbal, @@ -812,8 +810,7 @@ static int run_ballot(struct task *task, struct token *token, uint32_t flags, * also be caught the the bk->mbal > dblock.mbal condition * below. */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "ballot %llu abort2 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " + log_warnt(token, "ballot %llu abort2 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " "our dblock %llu:%llu:%llu:%llu:%llu:%llu", (unsigned long long)next_lver, q, (unsigned long long)bk->mbal, @@ -839,8 +836,7 @@ static int run_ballot(struct task *task, struct token *token, uint32_t flags, /* see "It aborts the ballot" in comment above */ if (bk->mbal > dblock.mbal) { - log_level(0, token->token_id, NULL, LOG_WARNING, - "ballot %llu abort2 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " + log_warnt(token, "ballot %llu abort2 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu " "our dblock %llu:%llu:%llu:%llu:%llu:%llu", (unsigned long long)next_lver, q, (unsigned long long)bk->mbal, @@ -1835,8 +1831,7 @@ int paxos_lease_acquire(struct task *task, cur_leader.owner_id, &owner_dblock); if (!rv && (owner_dblock.flags & DBLOCK_FL_RELEASED)) { /* not an error, but interesting to see */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "paxos_acquire owner %llu %llu %llu writer %llu owner dblock released", + log_warnt(token, "paxos_acquire owner %llu %llu %llu writer %llu owner dblock released", (unsigned long long)cur_leader.owner_id, (unsigned long long)cur_leader.owner_generation, (unsigned long long)cur_leader.timestamp, @@ -1967,8 +1962,7 @@ int paxos_lease_acquire(struct task *task, tmp_leader.owner_generation == token->host_generation) { /* not a problem, but interesting to see */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "paxos_acquire %llu owner is our inp " + log_warnt(token, "paxos_acquire %llu owner is our inp " "%llu %llu %llu commited by %llu", (unsigned long long)next_lver, (unsigned long long)tmp_leader.owner_id, @@ -1981,8 +1975,7 @@ int paxos_lease_acquire(struct task *task, } else { /* not a problem, but interesting to see */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "paxos_acquire %llu owner is %llu %llu %llu", + log_warnt(token, "paxos_acquire %llu owner is %llu %llu %llu", (unsigned long long)next_lver, (unsigned long long)tmp_leader.owner_id, (unsigned long long)tmp_leader.owner_generation, @@ -2098,8 +2091,7 @@ int paxos_lease_acquire(struct task *task, owner host_id is alive but with a newer generation, and we'd be able to get the lease by running the ballot again. */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "ballot %llu commit other owner %llu %llu %llu", + log_warnt(token, "ballot %llu commit other owner %llu %llu %llu", (unsigned long long)new_leader.lver, (unsigned long long)new_leader.owner_id, (unsigned long long)new_leader.owner_generation, @@ -2212,8 +2204,7 @@ int paxos_lease_release(struct task *task, * new leader. */ if (leader.write_id != token->host_id) { - log_level(0, token->token_id, NULL, LOG_WARNING, - "paxos_release skip write " + log_warnt(token, "paxos_release skip write " "last lver %llu owner %llu %llu %llu writer %llu %llu %llu " "disk lver %llu owner %llu %llu %llu writer %llu %llu %llu", (unsigned long long)last->lver, diff --git a/src/resource.c b/src/resource.c index cd0cb1b..41ac7c4 100644 --- a/src/resource.c +++ b/src/resource.c @@ -44,6 +44,7 @@ static pthread_t resource_pt; static int resource_thread_stop; static int resource_thread_work; static int resource_thread_work_examine; +static struct list_head resources_free; static struct list_head resources_held; static struct list_head resources_add; static struct list_head resources_rem; @@ -51,13 +52,78 @@ static struct list_head resources_orphan; static pthread_mutex_t resource_mutex; static pthread_cond_t resource_cond; static struct list_head host_events; +static int resources_free_count; +static uint32_t resource_id_counter = 1; +#define FREE_RES_COUNT 128 + +/* + * There's not much advantage to saving resource structs and reusing them again + * when they are requested again. One advantage can be that the res_id remains + * unchanged for frequently requested resources, so a new resource description + * isn't logged each time it's requested. There may be some other + * optimizations that could be added. We may want per-lockspace lists of + * resources, or purge free resources when lockspaces are removed. + */ static void free_resource(struct resource *r) { + struct resource *rtmp = NULL; + struct resource *rmin = NULL; + if (r->lvb) free(r->lvb); - free(r); + + if (resources_free_count < FREE_RES_COUNT) { + resources_free_count++; + list_add(&r->list, &resources_free); + return; + } + + /* the max are being saved, free the least used before saving this one */ + + list_for_each_entry_reverse(rtmp, &resources_free, list) { + if (!rtmp->reused) { + list_del(&rtmp->list); + free(rtmp); + goto out; + } + + if (!rmin || (rtmp->reused < rmin->reused)) + rmin = rtmp; + } + + if (rmin) { + list_del(&rmin->list); + free(rmin); + } + out: + list_add(&r->list, &resources_free); +} + +static struct resource *get_free_resource(struct token *token, int *token_matches) +{ + struct resource *r; + + /* find a previous r that matches token */ + list_for_each_entry(r, &resources_free, list) { + if (strcmp(r->r.lockspace_name, token->r.lockspace_name)) + continue; + if (strcmp(r->r.name, token->r.name)) + continue; + if (r->r.num_disks != token->r.num_disks) + continue; + if (strcmp(r->r.disks[0].path, token->r.disks[0].path)) + continue; + + *token_matches = 1; + resources_free_count--; + list_del(&r->list); + r->reused++; + return r; + } + + return NULL; } /* N.B. the reporting function looks for the @@ -81,10 +147,10 @@ void send_state_resources(int fd) } list_for_each_entry(r, &resources_rem, list) - send_state_resource(fd, r, "rem", r->pid, r->release_token_id); + send_state_resource(fd, r, "rem", r->pid, 0); list_for_each_entry(r, &resources_orphan, list) - send_state_resource(fd, r, "orphan", r->pid, r->release_token_id); + send_state_resource(fd, r, "orphan", r->pid, 0); pthread_mutex_unlock(&resource_mutex); } @@ -929,8 +995,7 @@ static int _release_token(struct task *task, struct token *token, retry_async = 1; /* want to see this result in sanlock.log but not worry people with error */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "release_token erase all leader lver %llu rv %d", + log_warnt(token, "release_token erase all leader lver %llu rv %d", (unsigned long long)lver, rv); } else if (r_flags & R_UNDO_SHARED) { @@ -1008,8 +1073,8 @@ static int _release_token(struct task *task, struct token *token, log_token(token, "release_token done r_flags %x", r_flags); pthread_mutex_lock(&resource_mutex); list_del(&r->list); - pthread_mutex_unlock(&resource_mutex); free_resource(r); + pthread_mutex_unlock(&resource_mutex); return ret; } @@ -1024,7 +1089,6 @@ static int _release_token(struct task *task, struct token *token, log_errot(token, "release_token timeout r_flags %x", r_flags); pthread_mutex_lock(&resource_mutex); r->flags |= R_THREAD_RELEASE; - r->release_token_id = token->token_id; pthread_mutex_unlock(&resource_mutex); return SANLK_AIO_TIMEOUT; } @@ -1063,11 +1127,9 @@ void release_token_async(struct token *token) list_del(&r->list); free_resource(r); } else if (token->acquire_flags & SANLK_RES_PERSISTENT) { - r->release_token_id = token->token_id; list_move(&r->list, &resources_orphan); } else { r->flags |= R_THREAD_RELEASE; - r->release_token_id = token->token_id; resource_thread_work = 1; list_move(&r->list, &resources_rem); pthread_cond_signal(&resource_cond); @@ -1159,21 +1221,41 @@ static void copy_disks(void *dst, void *src, int num_disks) } } -static struct resource *new_resource(struct token *token) +static struct resource *get_resource(struct token *token, int *new_id) { struct resource *r; + int token_matches = 0; + uint32_t res_id = 0; + uint32_t reused = 0; int disks_len, r_len; disks_len = token->r.num_disks * sizeof(struct sync_disk); r_len = sizeof(struct resource) + disks_len; - r = malloc(r_len); - if (!r) - return NULL; + r = get_free_resource(token, &token_matches); + + if (r && token_matches) { + res_id = r->res_id; + reused = r->reused; + *new_id = 0; + } else if (r) { + res_id = resource_id_counter++; + *new_id = 1; + } else { + r = malloc(r_len); + if (!r) + return NULL; + res_id = resource_id_counter++; + *new_id = 1; + } memset(r, 0, r_len); - memcpy(&r->r, &token->r, sizeof(struct sanlk_resource)); + /* preserved from one use to the next */ + r->res_id = res_id; + r->reused = reused; + + memcpy(&r->r, &token->r, sizeof(struct sanlk_resource)); r->io_timeout = token->io_timeout; /* disks copied after open_disks because open_disks sets sector_size @@ -1509,6 +1591,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, int allow_orphan = 0; int only_orphan = 0; int owner_nowait = 0; + int new_id = 0; int rv; if (token->acquire_flags & SANLK_RES_LVER) @@ -1531,6 +1614,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, r = find_resource(token, &resources_rem); if (r) { + token->res_id = r->res_id; if (!com.quiet_fail) log_errot(token, "acquire_token resource being removed"); pthread_mutex_unlock(&resource_mutex); @@ -1539,6 +1623,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, r = find_resource(token, &resources_add); if (r) { + token->res_id = r->res_id; if (!com.quiet_fail) log_errot(token, "acquire_token resource being added"); pthread_mutex_unlock(&resource_mutex); @@ -1548,6 +1633,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, r = find_resource(token, &resources_held); if (r && (token->acquire_flags & SANLK_RES_SHARED) && (r->flags & R_SHARED)) { /* multiple shared holders allowed */ + token->res_id = r->res_id; log_token(token, "acquire_token add shared"); copy_disks(&token->r.disks, &r->r.disks, token->r.num_disks); token->resource = r; @@ -1557,6 +1643,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, } if (r) { + token->res_id = r->res_id; if (!com.quiet_fail) log_errot(token, "acquire_token resource exists"); pthread_mutex_unlock(&resource_mutex); @@ -1567,6 +1654,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, r = find_resource(token, &resources_orphan); if (r && !allow_orphan) { + token->res_id = r->res_id; log_errot(token, "acquire_token found orphan"); pthread_mutex_unlock(&resource_mutex); return -EUCLEAN; @@ -1576,6 +1664,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, if (r && allow_orphan && (r->flags & R_SHARED) && !(token->acquire_flags & SANLK_RES_SHARED)) { + token->res_id = r->res_id; log_errot(token, "acquire_token orphan is shared"); pthread_mutex_unlock(&resource_mutex); return -EUCLEAN; @@ -1585,6 +1674,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, if (r && allow_orphan && !(r->flags & R_SHARED) && (token->acquire_flags & SANLK_RES_SHARED)) { + token->res_id = r->res_id; log_errot(token, "acquire_token orphan is exclusive"); pthread_mutex_unlock(&resource_mutex); return -EUCLEAN; @@ -1594,6 +1684,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, if (r && allow_orphan && (r->flags & R_SHARED) && (token->acquire_flags & SANLK_RES_SHARED)) { + token->res_id = r->res_id; log_token(token, "acquire_token adopt shared orphan"); token->resource = r; list_add(&token->list, &r->tokens); @@ -1616,6 +1707,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, if (r && allow_orphan && !(r->flags & R_SHARED) && !(token->acquire_flags & SANLK_RES_SHARED)) { + token->res_id = r->res_id; log_token(token, "acquire_token adopt orphan"); token->r.lver = r->leader.lver; r->pid = token->pid; @@ -1647,7 +1739,7 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, * The resource does not exist, so create it. */ - r = new_resource(token); + r = get_resource(token, &new_id); if (!r) { pthread_mutex_unlock(&resource_mutex); return -ENOMEM; @@ -1657,9 +1749,19 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags, memcpy(r->killargs, killargs, SANLK_HELPER_ARGS_LEN); list_add(&token->list, &r->tokens); list_add(&r->list, &resources_add); + token->res_id = r->res_id; token->resource = r; pthread_mutex_unlock(&resource_mutex); + if (new_id) { + /* save a record of what this id is for later debugging */ + log_warnt(token, "resource %.48s:%.48s:%.256s:%llu", + token->r.lockspace_name, + token->r.name, + token->r.disks[0].path, + (unsigned long long)token->r.disks[0].offset); + } + rv = open_disks(token->disks, token->r.num_disks); if (rv < 0) { log_errot(token, "acquire_token open error %d", rv); @@ -2139,8 +2241,7 @@ static void resource_thread_release(struct task *task, struct resource *r, struc retry_async = 1; /* want to see this result in sanlock.log but not worry people with error */ - log_level(0, token->token_id, NULL, LOG_WARNING, - "release async erase all leader lver %llu rv %d", + log_warnt(token, "release async erase all leader lver %llu rv %d", (unsigned long long)r->leader.lver, rv); } else if (r_flags & R_UNDO_SHARED) { @@ -2199,8 +2300,8 @@ static void resource_thread_release(struct task *task, struct resource *r, struc log_token(token, "release async done r_flags %x", r_flags); pthread_mutex_lock(&resource_mutex); list_del(&r->list); - pthread_mutex_unlock(&resource_mutex); free_resource(r); + pthread_mutex_unlock(&resource_mutex); return; } @@ -2340,7 +2441,7 @@ static void *resource_thread(void *arg GNUC_UNUSED) copy_disks(&tt->r.disks, &r->r.disks, r->r.num_disks); tt->host_id = r->host_id; tt->host_generation = r->host_generation; - tt->token_id = r->release_token_id; + tt->res_id = r->res_id; tt->io_timeout = r->io_timeout; tt->sector_size = r->sector_size; tt->align_size = sector_size_to_align_size(r->sector_size); @@ -2430,21 +2531,32 @@ int release_orphan(struct sanlk_resource *res) return count; } -void purge_resource_orphans(char *space_name) +static void purge_resource_list(struct list_head *head, char *space_name, const char *list_name) { struct resource *r, *safe; pthread_mutex_lock(&resource_mutex); - list_for_each_entry_safe(r, safe, &resources_orphan, list) { + list_for_each_entry_safe(r, safe, head, list) { if (strncmp(r->r.lockspace_name, space_name, NAME_ID_SIZE)) continue; - log_debug("purge orphan %.48s:%.48s", r->r.lockspace_name, r->r.name); + if (list_name) + log_debug("purge %s %.48s:%.48s", list_name, r->r.lockspace_name, r->r.name); list_del(&r->list); - free_resource(r); + free(r); } pthread_mutex_unlock(&resource_mutex); } +void purge_resource_orphans(char *space_name) +{ + purge_resource_list(&resources_orphan, space_name, "orphan_list"); +} + +void purge_resource_free(char *space_name) +{ + purge_resource_list(&resources_free, space_name, "free_list"); +} + /* * This is called by the main_loop once a second during normal operation. * The resources_rem list should normally be empty, so this does nothing. @@ -2452,7 +2564,7 @@ void purge_resource_orphans(char *space_name) * that had timed out previously and need to be retried. */ -void free_resources(void) +void rem_resources(void) { pthread_mutex_lock(&resource_mutex); if (!list_empty(&resources_rem) && !resource_thread_work) { @@ -2471,6 +2583,7 @@ int setup_token_manager(void) INIT_LIST_HEAD(&resources_add); INIT_LIST_HEAD(&resources_rem); INIT_LIST_HEAD(&resources_held); + INIT_LIST_HEAD(&resources_free); INIT_LIST_HEAD(&resources_orphan); INIT_LIST_HEAD(&host_events); diff --git a/src/resource.h b/src/resource.h index 70cfd04..73c13cd 100644 --- a/src/resource.h +++ b/src/resource.h @@ -60,13 +60,14 @@ int read_resource_owners(struct task *task, struct token *token, char **send_buf, int *send_len, int *count); /* locks resource_mutex */ -void free_resources(void); +void rem_resources(void); /* locks resource_mutex */ int release_orphan(struct sanlk_resource *res); /* locks resource_mutex */ void purge_resource_orphans(char *space_name); +void purge_resource_free(char *space_name); /* locks resource_mutex */ void add_host_event(uint32_t space_id, struct sanlk_host_event *he, diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index 1a865e7..e75979d 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -91,6 +91,7 @@ struct token { /* copied from the sp with r.lockspace_name */ uint64_t host_id; uint64_t host_generation; + uint32_t space_id; uint32_t io_timeout; /* internal */ @@ -98,7 +99,8 @@ struct token { struct resource *resource; int pid; uint32_t flags; /* be careful to avoid using this from different threads */ - uint32_t token_id; /* used to refer to this token instance in log messages */ + uint32_t token_id; + uint32_t res_id; int sector_size; int align_size; int space_dead; /* copied from sp->space_dead, set by main thread */ @@ -126,8 +128,9 @@ struct resource { uint32_t io_timeout; int pid; /* copied from token when ex */ int sector_size; + uint32_t res_id; + uint32_t reused; uint32_t flags; - uint32_t release_token_id; /* copy to temp token (tt) for log messages */ uint64_t thread_release_retry; char *lvb; char killpath[SANLK_HELPER_PATH_LEN]; /* copied from client */