From 92e3cc91d8f51ce64a8b7c696377180953dd316e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Oct 2020 14:11:58 +0100 Subject: [PATCH 1/6] afs: Fix rapid cell addition/removal by not using RCU on cells tree There are a number of problems that are being seen by the rapidly mounting and unmounting an afs dynamic root with an explicit cell and volume specified (which should probably be rejected, but that's a separate issue): What the tests are doing is to look up/create a cell record for the name given and then tear it down again without actually using it to try to talk to a server. This is repeated endlessly, very fast, and the new cell collides with the old one if it's not quick enough to reuse it. It appears (as suggested by Hillf Danton) that the search through the RB tree under a read_seqbegin_or_lock() under RCU conditions isn't safe and that it's not blocking the write_seqlock(), despite taking two passes at it. He suggested that the code should take a ref on the cell it's attempting to look at - but this shouldn't be necessary until we've compared the cell names. It's possible that I'm missing a barrier somewhere. However, using an RCU search for this is overkill, really - we only need to access the cell name in a few places, and they're places where we're may end up sleeping anyway. Fix this by switching to an R/W semaphore instead. Additionally, draw the down_read() call inside the function (renamed to afs_find_cell()) since all the callers were taking the RCU read lock (or should've been[*]). [*] afs_probe_cell_name() should have been, but that doesn't appear to be involved in the bug reports. The symptoms of this look like: general protection fault, probably for non-canonical address 0xf27d208691691fdb: 0000 [#1] PREEMPT SMP KASAN KASAN: maybe wild-memory-access in range [0x93e924348b48fed8-0x93e924348b48fedf] ... RIP: 0010:strncasecmp lib/string.c:52 [inline] RIP: 0010:strncasecmp+0x5f/0x240 lib/string.c:43 afs_lookup_cell_rcu+0x313/0x720 fs/afs/cell.c:88 afs_lookup_cell+0x2ee/0x1440 fs/afs/cell.c:249 afs_parse_source fs/afs/super.c:290 [inline] ... Fixes: 989782dcdc91 ("afs: Overhaul cell database management") Reported-by: syzbot+459a5dce0b4cb70fd076@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Hillf Danton cc: syzkaller-bugs@googlegroups.com --- fs/afs/cell.c | 127 ++++++++++++++++++++-------------------------- fs/afs/dynroot.c | 21 +++----- fs/afs/internal.h | 6 +-- fs/afs/main.c | 2 +- fs/afs/super.c | 4 +- 5 files changed, 69 insertions(+), 91 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5b79cdceefa0..5da83e84952a 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -41,15 +41,15 @@ static void afs_set_cell_timer(struct afs_net *net, time64_t delay) } /* - * Look up and get an activation reference on a cell record under RCU - * conditions. The caller must hold the RCU read lock. + * Look up and get an activation reference on a cell record. The caller must + * hold net->cells_lock at least read-locked. */ -struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, - const char *name, unsigned int namesz) +static struct afs_cell *afs_find_cell_locked(struct afs_net *net, + const char *name, unsigned int namesz) { struct afs_cell *cell = NULL; struct rb_node *p; - int n, seq = 0, ret = 0; + int n; _enter("%*.*s", namesz, namesz, name); @@ -58,61 +58,48 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, if (namesz > AFS_MAXCELLNAME) return ERR_PTR(-ENAMETOOLONG); - do { - /* Unfortunately, rbtree walking doesn't give reliable results - * under just the RCU read lock, so we have to check for - * changes. - */ - if (cell) - afs_put_cell(net, cell); - cell = NULL; - ret = -ENOENT; + if (!name) { + cell = net->ws_cell; + if (!cell) + return ERR_PTR(-EDESTADDRREQ); + afs_get_cell(cell); + return cell; + } - read_seqbegin_or_lock(&net->cells_lock, &seq); + p = net->cells.rb_node; + while (p) { + cell = rb_entry(p, struct afs_cell, net_node); - if (!name) { - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - afs_get_cell(cell); - ret = 0; - break; - } - ret = -EDESTADDRREQ; - continue; - } + n = strncasecmp(cell->name, name, + min_t(size_t, cell->name_len, namesz)); + if (n == 0) + n = cell->name_len - namesz; + if (n < 0) + p = p->rb_left; + else if (n > 0) + p = p->rb_right; + else + goto found; + } - p = rcu_dereference_raw(net->cells.rb_node); - while (p) { - cell = rb_entry(p, struct afs_cell, net_node); + return ERR_PTR(-ENOENT); - n = strncasecmp(cell->name, name, - min_t(size_t, cell->name_len, namesz)); - if (n == 0) - n = cell->name_len - namesz; - if (n < 0) { - p = rcu_dereference_raw(p->rb_left); - } else if (n > 0) { - p = rcu_dereference_raw(p->rb_right); - } else { - if (atomic_inc_not_zero(&cell->usage)) { - ret = 0; - break; - } - /* We want to repeat the search, this time with - * the lock properly locked. - */ - } - cell = NULL; - } +found: + if (!atomic_inc_not_zero(&cell->usage)) + return ERR_PTR(-ENOENT); - } while (need_seqretry(&net->cells_lock, seq)); + return cell; +} - done_seqretry(&net->cells_lock, seq); +struct afs_cell *afs_find_cell(struct afs_net *net, + const char *name, unsigned int namesz) +{ + struct afs_cell *cell; - if (ret != 0 && cell) - afs_put_cell(net, cell); - - return ret == 0 ? cell : ERR_PTR(ret); + down_read(&net->cells_lock); + cell = afs_find_cell_locked(net, name, namesz); + up_read(&net->cells_lock); + return cell; } /* @@ -245,9 +232,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, _enter("%s,%s", name, vllist); if (!excl) { - rcu_read_lock(); - cell = afs_lookup_cell_rcu(net, name, namesz); - rcu_read_unlock(); + cell = afs_find_cell(net, name, namesz); if (!IS_ERR(cell)) goto wait_for_cell; } @@ -268,7 +253,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, /* Find the insertion point and check to see if someone else added a * cell whilst we were allocating. */ - write_seqlock(&net->cells_lock); + down_write(&net->cells_lock); pp = &net->cells.rb_node; parent = NULL; @@ -293,7 +278,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); atomic_inc(&net->cells_outstanding); - write_sequnlock(&net->cells_lock); + up_write(&net->cells_lock); queue_work(afs_wq, &cell->manager); @@ -323,7 +308,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, afs_get_cell(cursor); ret = 0; } - write_sequnlock(&net->cells_lock); + up_write(&net->cells_lock); kfree(candidate); if (ret == 0) goto wait_for_cell; @@ -377,10 +362,10 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) afs_get_cell(new_root); /* install the new cell */ - write_seqlock(&net->cells_lock); - old_root = rcu_access_pointer(net->ws_cell); - rcu_assign_pointer(net->ws_cell, new_root); - write_sequnlock(&net->cells_lock); + down_write(&net->cells_lock); + old_root = net->ws_cell; + net->ws_cell = new_root; + up_write(&net->cells_lock); afs_put_cell(net, old_root); _leave(" = 0"); @@ -674,12 +659,12 @@ static void afs_manage_cell(struct work_struct *work) switch (cell->state) { case AFS_CELL_INACTIVE: case AFS_CELL_FAILED: - write_seqlock(&net->cells_lock); + down_write(&net->cells_lock); usage = 1; deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); if (deleted) rb_erase(&cell->net_node, &net->cells); - write_sequnlock(&net->cells_lock); + up_write(&net->cells_lock); if (deleted) goto final_destruction; if (cell->state == AFS_CELL_FAILED) @@ -779,7 +764,7 @@ void afs_manage_cells(struct work_struct *work) * lack of use and cells whose DNS results have expired and dispatch * their managers. */ - read_seqlock_excl(&net->cells_lock); + down_read(&net->cells_lock); for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { struct afs_cell *cell = @@ -824,7 +809,7 @@ void afs_manage_cells(struct work_struct *work) queue_work(afs_wq, &cell->manager); } - read_sequnlock_excl(&net->cells_lock); + up_read(&net->cells_lock); /* Update the timer on the way out. We have to pass an increment on * cells_outstanding in the namespace that we are in to the timer or @@ -854,10 +839,10 @@ void afs_cell_purge(struct afs_net *net) _enter(""); - write_seqlock(&net->cells_lock); - ws = rcu_access_pointer(net->ws_cell); - RCU_INIT_POINTER(net->ws_cell, NULL); - write_sequnlock(&net->cells_lock); + down_write(&net->cells_lock); + ws = net->ws_cell; + net->ws_cell = NULL; + up_write(&net->cells_lock); afs_put_cell(net, ws); _debug("del timer"); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 7b784af604fd..5b8de4fee6cd 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -123,7 +123,7 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(net, name, len); + cell = afs_find_cell(net, name, len); if (!IS_ERR(cell)) { afs_put_cell(net, cell); return 0; @@ -179,7 +179,6 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry) struct afs_cell *cell; struct afs_net *net = afs_d2net(dentry); struct dentry *ret; - unsigned int seq = 0; char *name; int len; @@ -191,17 +190,13 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry) if (!name) goto out_p; - rcu_read_lock(); - do { - read_seqbegin_or_lock(&net->cells_lock, &seq); - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - len = cell->name_len; - memcpy(name, cell->name, len + 1); - } - } while (need_seqretry(&net->cells_lock, seq)); - done_seqretry(&net->cells_lock, seq); - rcu_read_unlock(); + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len + 1); + } + up_read(&net->cells_lock); ret = ERR_PTR(-ENOENT); if (!cell) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e5f0446f27e5..257c0f07742f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -263,11 +263,11 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell __rcu *ws_cell; + struct afs_cell *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; - seqlock_t cells_lock; + struct rw_semaphore cells_lock; struct mutex cells_alias_lock; struct mutex proc_cells_lock; @@ -917,7 +917,7 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, * cell.c */ extern int afs_cell_init(struct afs_net *, const char *); -extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); +extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned); extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, const char *, bool); extern struct afs_cell *afs_get_cell(struct afs_cell *); diff --git a/fs/afs/main.c b/fs/afs/main.c index 31b472f7c734..accdd8970e7c 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -78,7 +78,7 @@ static int __net_init afs_net_init(struct net *net_ns) mutex_init(&net->socket_mutex); net->cells = RB_ROOT; - seqlock_init(&net->cells_lock); + init_rwsem(&net->cells_lock); INIT_WORK(&net->cells_manager, afs_manage_cells); timer_setup(&net->cells_timer, afs_cells_timer, 0); diff --git a/fs/afs/super.c b/fs/afs/super.c index b552357b1d13..0be99016ecfb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -634,9 +634,7 @@ static int afs_init_fs_context(struct fs_context *fc) ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ - rcu_read_lock(); - cell = afs_lookup_cell_rcu(ctx->net, NULL, 0); - rcu_read_unlock(); + cell = afs_find_cell(ctx->net, NULL, 0); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; From 88c853c3f5c0a07c5db61b494ee25152535cfeee Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Jul 2019 11:24:59 +0100 Subject: [PATCH 2/6] afs: Fix cell refcounting by splitting the usage counter Management of the lifetime of afs_cell struct has some problems due to the usage counter being used to determine whether objects of that type are in use in addition to whether anyone might be interested in the structure. This is made trickier by cell objects being cached for a period of time in case they're quickly reused as they hold the result of a setup process that may be slow (DNS lookups, AFS RPC ops). Problems include the cached root volume from alias resolution pinning its parent cell record, rmmod occasionally hanging and occasionally producing assertion failures. Fix this by splitting the count of active users from the struct reference count. Things then work as follows: (1) The cell cache keeps +1 on the cell's activity count and this has to be dropped before the cell can be removed. afs_manage_cell() tries to exchange the 1 to a 0 with the cells_lock write-locked, and if successful, the record is removed from the net->cells. (2) One struct ref is 'owned' by the activity count. That is put when the active count is reduced to 0 (final_destruction label). (3) A ref can be held on a cell whilst it is queued for management on a work queue without confusing the active count. afs_queue_cell() is added to wrap this. (4) The queue's ref is dropped at the end of the management. This is split out into a separate function, afs_manage_cell_work(). (5) The root volume record is put after a cell is removed (at the final_destruction label) rather then in the RCU destruction routine. (6) Volumes hold struct refs, but aren't active users. (7) Both counts are displayed in /proc/net/afs/cells. There are some management function changes: (*) afs_put_cell() now just decrements the refcount and triggers the RCU destruction if it becomes 0. It no longer sets a timer to have the manager do this. (*) afs_use_cell() and afs_unuse_cell() are added to increase and decrease the active count. afs_unuse_cell() sets the management timer. (*) afs_queue_cell() is added to queue a cell with approprate refs. There are also some other fixes: (*) Don't let /proc/net/afs/cells access a cell's vllist if it's NULL. (*) Make sure that candidate cells in lookups are properly destroyed rather than being simply kfree'd. This ensures the bits it points to are destroyed also. (*) afs_dec_cells_outstanding() is now called in cell destruction rather than at "final_destruction". This ensures that cell->net is still valid to the end of the destructor. (*) As a consequence of the previous two changes, move the increment of net->cells_outstanding that was at the point of insertion into the tree to the allocation routine to correctly balance things. Fixes: 989782dcdc91 ("afs: Overhaul cell database management") Signed-off-by: David Howells --- fs/afs/cell.c | 149 +++++++++++++++++++++++++++++++-------------- fs/afs/dynroot.c | 2 +- fs/afs/internal.h | 8 ++- fs/afs/mntpt.c | 4 +- fs/afs/proc.c | 23 ++++--- fs/afs/super.c | 12 ++-- fs/afs/vl_alias.c | 8 +-- fs/afs/vl_rotate.c | 2 +- fs/afs/volume.c | 4 +- 9 files changed, 136 insertions(+), 76 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5da83e84952a..c906000b0ff8 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -19,7 +19,7 @@ static unsigned __read_mostly afs_cell_gc_delay = 10; static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; -static void afs_manage_cell(struct work_struct *); +static void afs_manage_cell_work(struct work_struct *); static void afs_dec_cells_outstanding(struct afs_net *net) { @@ -62,8 +62,7 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, cell = net->ws_cell; if (!cell) return ERR_PTR(-EDESTADDRREQ); - afs_get_cell(cell); - return cell; + goto found; } p = net->cells.rb_node; @@ -85,12 +84,12 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENOENT); found: - if (!atomic_inc_not_zero(&cell->usage)) - return ERR_PTR(-ENOENT); - - return cell; + return afs_use_cell(cell); } +/* + * Look up and get an activation reference on a cell record. + */ struct afs_cell *afs_find_cell(struct afs_net *net, const char *name, unsigned int namesz) { @@ -153,8 +152,9 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, cell->name[i] = tolower(name[i]); cell->name[i] = 0; - atomic_set(&cell->usage, 2); - INIT_WORK(&cell->manager, afs_manage_cell); + atomic_set(&cell->ref, 1); + atomic_set(&cell->active, 0); + INIT_WORK(&cell->manager, afs_manage_cell_work); cell->volumes = RB_ROOT; INIT_HLIST_HEAD(&cell->proc_volumes); seqlock_init(&cell->volume_lock); @@ -193,6 +193,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, cell->dns_source = vllist->source; cell->dns_status = vllist->status; smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ + atomic_inc(&net->cells_outstanding); _leave(" = %p", cell); return cell; @@ -275,12 +276,12 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, cell = candidate; candidate = NULL; + atomic_set(&cell->active, 2); rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); - atomic_inc(&net->cells_outstanding); up_write(&net->cells_lock); - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell); wait_for_cell: _debug("wait_for_cell"); @@ -305,16 +306,17 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, if (excl) { ret = -EEXIST; } else { - afs_get_cell(cursor); + afs_use_cell(cursor); ret = 0; } up_write(&net->cells_lock); - kfree(candidate); + if (candidate) + afs_put_cell(candidate); if (ret == 0) goto wait_for_cell; goto error_noput; error: - afs_put_cell(net, cell); + afs_unuse_cell(net, cell); error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); @@ -359,7 +361,7 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) } if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) - afs_get_cell(new_root); + afs_use_cell(new_root); /* install the new cell */ down_write(&net->cells_lock); @@ -367,7 +369,7 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) net->ws_cell = new_root; up_write(&net->cells_lock); - afs_put_cell(net, old_root); + afs_unuse_cell(net, old_root); _leave(" = 0"); return 0; } @@ -473,18 +475,21 @@ static int afs_update_cell(struct afs_cell *cell) static void afs_cell_destroy(struct rcu_head *rcu) { struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); + struct afs_net *net = cell->net; + int u; _enter("%p{%s}", cell, cell->name); - ASSERTCMP(atomic_read(&cell->usage), ==, 0); + u = atomic_read(&cell->ref); + ASSERTCMP(u, ==, 0); - afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); - afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); - afs_put_cell(cell->net, cell->alias_of); + afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); + afs_unuse_cell(net, cell->alias_of); key_put(cell->anonymous_key); kfree(cell->name); kfree(cell); + afs_dec_cells_outstanding(net); _leave(" [destroyed]"); } @@ -519,16 +524,50 @@ void afs_cells_timer(struct timer_list *timer) */ struct afs_cell *afs_get_cell(struct afs_cell *cell) { - atomic_inc(&cell->usage); + if (atomic_read(&cell->ref) <= 0) + BUG(); + + atomic_inc(&cell->ref); return cell; } /* * Drop a reference on a cell record. */ -void afs_put_cell(struct afs_net *net, struct afs_cell *cell) +void afs_put_cell(struct afs_cell *cell) +{ + if (cell) { + unsigned int u, a; + + u = atomic_dec_return(&cell->ref); + if (u == 0) { + a = atomic_read(&cell->active); + WARN(a != 0, "Cell active count %u > 0\n", a); + call_rcu(&cell->rcu, afs_cell_destroy); + } + } +} + +/* + * Note a cell becoming more active. + */ +struct afs_cell *afs_use_cell(struct afs_cell *cell) +{ + if (atomic_read(&cell->ref) <= 0) + BUG(); + + atomic_inc(&cell->active); + return cell; +} + +/* + * Record a cell becoming less active. When the active counter reaches 1, it + * is scheduled for destruction, but may get reactivated. + */ +void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell) { time64_t now, expire_delay; + int a; if (!cell) return; @@ -541,11 +580,21 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell) if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; - if (atomic_dec_return(&cell->usage) > 1) - return; + a = atomic_dec_return(&cell->active); + WARN_ON(a == 0); + if (a == 1) + /* 'cell' may now be garbage collected. */ + afs_set_cell_timer(net, expire_delay); +} - /* 'cell' may now be garbage collected. */ - afs_set_cell_timer(net, expire_delay); +/* + * Queue a cell for management, giving the workqueue a ref to hold. + */ +void afs_queue_cell(struct afs_cell *cell) +{ + afs_get_cell(cell); + if (!queue_work(afs_wq, &cell->manager)) + afs_put_cell(cell); } /* @@ -645,12 +694,11 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) * Manage a cell record, initialising and destroying it, maintaining its DNS * records. */ -static void afs_manage_cell(struct work_struct *work) +static void afs_manage_cell(struct afs_cell *cell) { - struct afs_cell *cell = container_of(work, struct afs_cell, manager); struct afs_net *net = cell->net; bool deleted; - int ret, usage; + int ret, active; _enter("%s", cell->name); @@ -660,10 +708,11 @@ static void afs_manage_cell(struct work_struct *work) case AFS_CELL_INACTIVE: case AFS_CELL_FAILED: down_write(&net->cells_lock); - usage = 1; - deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); - if (deleted) + active = 1; + deleted = atomic_try_cmpxchg_relaxed(&cell->active, &active, 0); + if (deleted) { rb_erase(&cell->net_node, &net->cells); + } up_write(&net->cells_lock); if (deleted) goto final_destruction; @@ -688,7 +737,7 @@ static void afs_manage_cell(struct work_struct *work) goto again; case AFS_CELL_ACTIVE: - if (atomic_read(&cell->usage) > 1) { + if (atomic_read(&cell->active) > 1) { if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { ret = afs_update_cell(cell); if (ret < 0) @@ -701,7 +750,7 @@ static void afs_manage_cell(struct work_struct *work) goto again; case AFS_CELL_DEACTIVATING: - if (atomic_read(&cell->usage) > 1) + if (atomic_read(&cell->active) > 1) goto reverse_deactivation; afs_deactivate_cell(net, cell); smp_store_release(&cell->state, AFS_CELL_INACTIVE); @@ -733,9 +782,18 @@ static void afs_manage_cell(struct work_struct *work) return; final_destruction: - call_rcu(&cell->rcu, afs_cell_destroy); - afs_dec_cells_outstanding(net); - _leave(" [destruct %d]", atomic_read(&net->cells_outstanding)); + /* The root volume is pinning the cell */ + afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); + cell->root_volume = NULL; + afs_put_cell(cell); +} + +static void afs_manage_cell_work(struct work_struct *work) +{ + struct afs_cell *cell = container_of(work, struct afs_cell, manager); + + afs_manage_cell(cell); + afs_put_cell(cell); } /* @@ -769,21 +827,20 @@ void afs_manage_cells(struct work_struct *work) for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node); - unsigned usage; + unsigned active; bool sched_cell = false; - usage = atomic_read(&cell->usage); - _debug("manage %s %u", cell->name, usage); + active = atomic_read(&cell->active); + _debug("manage %s %u %u", cell->name, atomic_read(&cell->ref), active); - ASSERTCMP(usage, >=, 1); + ASSERTCMP(active, >=, 1); if (purging) { if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - usage = atomic_dec_return(&cell->usage); - ASSERTCMP(usage, ==, 1); + atomic_dec(&cell->active); } - if (usage == 1) { + if (active == 1) { struct afs_vlserver_list *vllist; time64_t expire_at = cell->last_inactive; @@ -806,7 +863,7 @@ void afs_manage_cells(struct work_struct *work) } if (sched_cell) - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell); } up_read(&net->cells_lock); @@ -843,7 +900,7 @@ void afs_cell_purge(struct afs_net *net) ws = net->ws_cell; net->ws_cell = NULL; up_write(&net->cells_lock); - afs_put_cell(net, ws); + afs_unuse_cell(net, ws); _debug("del timer"); if (del_timer_sync(&net->cells_timer)) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 5b8de4fee6cd..da32797dd425 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -125,7 +125,7 @@ static int afs_probe_cell_name(struct dentry *dentry) cell = afs_find_cell(net, name, len); if (!IS_ERR(cell)) { - afs_put_cell(net, cell); + afs_unuse_cell(net, cell); return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 257c0f07742f..0363511290c8 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -363,7 +363,8 @@ struct afs_cell { #endif time64_t dns_expiry; /* Time AFSDB/SRV record expires */ time64_t last_inactive; /* Time of last drop of usage count */ - atomic_t usage; + atomic_t ref; /* Struct refcount */ + atomic_t active; /* Active usage counter */ unsigned long flags; #define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ #define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ @@ -920,8 +921,11 @@ extern int afs_cell_init(struct afs_net *, const char *); extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned); extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, const char *, bool); +extern struct afs_cell *afs_use_cell(struct afs_cell *); +extern void afs_unuse_cell(struct afs_net *, struct afs_cell *); extern struct afs_cell *afs_get_cell(struct afs_cell *); -extern void afs_put_cell(struct afs_net *, struct afs_cell *); +extern void afs_put_cell(struct afs_cell *); +extern void afs_queue_cell(struct afs_cell *); extern void afs_manage_cells(struct work_struct *); extern void afs_cells_timer(struct timer_list *); extern void __net_exit afs_cell_purge(struct afs_net *); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 79bc5f1338ed..c69a0282960c 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -88,7 +88,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ctx->force = true; } if (ctx->cell) { - afs_put_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell); ctx->cell = NULL; } if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { @@ -124,7 +124,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) char *buf; if (src_as->cell) - ctx->cell = afs_get_cell(src_as->cell); + ctx->cell = afs_use_cell(src_as->cell); if (size < 2 || size > PAGE_SIZE - 1) return -EINVAL; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index e8babb62ed44..76fbe0560cfb 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -38,7 +38,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) if (v == SEQ_START_TOKEN) { /* display header on line 1 */ - seq_puts(m, "USE TTL SV ST NAME\n"); + seq_puts(m, "USE ACT TTL SV ST NAME\n"); return 0; } @@ -46,10 +46,11 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) vllist = rcu_dereference(cell->vl_servers); /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %6lld %2u %2u %s\n", - atomic_read(&cell->usage), + seq_printf(m, "%3u %3u %6lld %2u %2u %s\n", + atomic_read(&cell->ref), + atomic_read(&cell->active), cell->dns_expiry - ktime_get_real_seconds(), - vllist->nr_servers, + vllist ? vllist->nr_servers : 0, cell->state, cell->name); return 0; @@ -128,7 +129,7 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) } if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - afs_put_cell(net, cell); + afs_unuse_cell(net, cell); } else { goto inval; } @@ -154,13 +155,11 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) struct afs_net *net; net = afs_seq2net_single(m); - if (rcu_access_pointer(net->ws_cell)) { - rcu_read_lock(); - cell = rcu_dereference(net->ws_cell); - if (cell) - seq_printf(m, "%s\n", cell->name); - rcu_read_unlock(); - } + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) + seq_printf(m, "%s\n", cell->name); + up_read(&net->cells_lock); return 0; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 0be99016ecfb..e72c223f831d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -294,7 +294,7 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell); ctx->cell = cell; } @@ -389,8 +389,8 @@ static int afs_validate_fc(struct fs_context *fc) _debug("switch to alias"); key_put(ctx->key); ctx->key = NULL; - cell = afs_get_cell(ctx->cell->alias_of); - afs_put_cell(ctx->net, ctx->cell); + cell = afs_use_cell(ctx->cell->alias_of); + afs_unuse_cell(ctx->net, ctx->cell); ctx->cell = cell; goto reget_key; } @@ -508,7 +508,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) if (ctx->dyn_root) { as->dyn_root = true; } else { - as->cell = afs_get_cell(ctx->cell); + as->cell = afs_use_cell(ctx->cell); as->volume = afs_get_volume(ctx->volume, afs_volume_trace_get_alloc_sbi); } @@ -521,7 +521,7 @@ static void afs_destroy_sbi(struct afs_super_info *as) if (as) { struct afs_net *net = afs_net(as->net_ns); afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi); - afs_put_cell(net, as->cell); + afs_unuse_cell(net, as->cell); put_net(as->net_ns); kfree(as); } @@ -607,7 +607,7 @@ static void afs_free_fc(struct fs_context *fc) afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc); - afs_put_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell); key_put(ctx->key); kfree(ctx); } diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 5082ef04e99c..ddb4cb67d0fd 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -177,7 +177,7 @@ static int afs_compare_cell_roots(struct afs_cell *cell) is_alias: rcu_read_unlock(); - cell->alias_of = afs_get_cell(p); + cell->alias_of = afs_use_cell(p); return 1; } @@ -247,18 +247,18 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key) continue; if (p->root_volume) continue; /* Ignore cells that have a root.cell volume. */ - afs_get_cell(p); + afs_use_cell(p); mutex_unlock(&cell->net->proc_cells_lock); if (afs_query_for_alias_one(cell, key, p) != 0) goto is_alias; if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { - afs_put_cell(cell->net, p); + afs_unuse_cell(cell->net, p); return -ERESTARTSYS; } - afs_put_cell(cell->net, p); + afs_unuse_cell(cell->net, p); } mutex_unlock(&cell->net->proc_cells_lock); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index c0458c903b31..da3b072d4d63 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -45,7 +45,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) cell->dns_expiry <= ktime_get_real_seconds()) { dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell); if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { if (wait_var_event_interruptible( diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 9bc0509e3634..a838030e9563 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -106,7 +106,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, return volume; error_1: - afs_put_cell(params->net, volume->cell); + afs_put_cell(volume->cell); kfree(volume); error_0: return ERR_PTR(ret); @@ -228,7 +228,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) afs_remove_volume_from_cell(volume); afs_put_serverlist(net, rcu_access_pointer(volume->servers)); - afs_put_cell(net, volume->cell); + afs_put_cell(volume->cell); trace_afs_volume(volume->vid, atomic_read(&volume->usage), afs_volume_trace_free); kfree_rcu(volume, rcu); From 286377f6bdf71568a4cf07104fe44006ae0dba6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Oct 2020 11:05:01 +0100 Subject: [PATCH 3/6] afs: Fix cell purging with aliases When the afs module is removed, one of the things that has to be done is to purge the cell database. afs_cell_purge() cancels the management timer and then starts the cell manager work item to do the purging. This does a single run through and then assumes that all cells are now purged - but this is no longer the case. With the introduction of alias detection, a later cell in the database can now be holding an active count on an earlier cell (cell->alias_of). The purge scan passes by the earlier cell first, but this can't be got rid of until it has discarded the alias. Ordinarily, afs_unuse_cell() would handle this by setting the management timer to trigger another pass - but afs_set_cell_timer() doesn't do anything if the namespace is being removed (net->live == false). rmmod then hangs in the wait on cells_outstanding in afs_cell_purge(). Fix this by making afs_set_cell_timer() directly queue the cell manager if net->live is false. This causes additional management passes. Queueing the cell manager increments cells_outstanding to make sure the wait won't complete until all cells are destroyed. Fixes: 8a070a964877 ("afs: Detect cell aliases 1 - Cells with root volumes") Signed-off-by: David Howells --- fs/afs/cell.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index c906000b0ff8..1944be78e9b0 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -19,6 +19,7 @@ static unsigned __read_mostly afs_cell_gc_delay = 10; static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; +static void afs_queue_cell_manager(struct afs_net *); static void afs_manage_cell_work(struct work_struct *); static void afs_dec_cells_outstanding(struct afs_net *net) @@ -37,6 +38,8 @@ static void afs_set_cell_timer(struct afs_net *net, time64_t delay) atomic_inc(&net->cells_outstanding); if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) afs_dec_cells_outstanding(net); + } else { + afs_queue_cell_manager(net); } } From 1d0e850a49a5b56f8f3cb51e74a11e2fedb96be6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 16 Oct 2020 13:21:14 +0100 Subject: [PATCH 4/6] afs: Fix cell removal Fix cell removal by inserting a more final state than AFS_CELL_FAILED that indicates that the cell has been unpublished in case the manager is already requeued and will go through again. The new AFS_CELL_REMOVED state will just immediately leave the manager function. Going through a second time in the AFS_CELL_FAILED state will cause it to try to remove the cell again, potentially leading to the proc list being removed. Fixes: 989782dcdc91 ("afs: Overhaul cell database management") Reported-by: syzbot+b994ecf2b023f14832c1@syzkaller.appspotmail.com Reported-by: syzbot+0e0db88e1eb44a91ae8d@syzkaller.appspotmail.com Reported-by: syzbot+2d0585e5efcd43d113c2@syzkaller.appspotmail.com Reported-by: syzbot+1ecc2f9d3387f1d79d42@syzkaller.appspotmail.com Reported-by: syzbot+18d51774588492bf3f69@syzkaller.appspotmail.com Reported-by: syzbot+a5e4946b04d6ca8fa5f3@syzkaller.appspotmail.com Suggested-by: Hillf Danton Signed-off-by: David Howells cc: Hillf Danton --- fs/afs/cell.c | 16 ++++++++++------ fs/afs/internal.h | 1 + 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 1944be78e9b0..bc7ed46aaca9 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -291,11 +291,11 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, wait_var_event(&cell->state, ({ state = smp_load_acquire(&cell->state); /* vs error */ - state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED; + state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED; })); /* Check the state obtained from the wait check. */ - if (state == AFS_CELL_FAILED) { + if (state == AFS_CELL_REMOVED) { ret = cell->error; goto error; } @@ -700,7 +700,6 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) static void afs_manage_cell(struct afs_cell *cell) { struct afs_net *net = cell->net; - bool deleted; int ret, active; _enter("%s", cell->name); @@ -712,13 +711,15 @@ static void afs_manage_cell(struct afs_cell *cell) case AFS_CELL_FAILED: down_write(&net->cells_lock); active = 1; - deleted = atomic_try_cmpxchg_relaxed(&cell->active, &active, 0); - if (deleted) { + if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { rb_erase(&cell->net_node, &net->cells); + smp_store_release(&cell->state, AFS_CELL_REMOVED); } up_write(&net->cells_lock); - if (deleted) + if (cell->state == AFS_CELL_REMOVED) { + wake_up_var(&cell->state); goto final_destruction; + } if (cell->state == AFS_CELL_FAILED) goto done; smp_store_release(&cell->state, AFS_CELL_UNSET); @@ -760,6 +761,9 @@ static void afs_manage_cell(struct afs_cell *cell) wake_up_var(&cell->state); goto again; + case AFS_CELL_REMOVED: + goto done; + default: break; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 0363511290c8..06e617ee4cd1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -326,6 +326,7 @@ enum afs_cell_state { AFS_CELL_DEACTIVATING, AFS_CELL_INACTIVE, AFS_CELL_FAILED, + AFS_CELL_REMOVED, }; /* From dca54a7bbb8ca9148ae10d60c66c926e222a9c4b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Oct 2020 20:51:59 +0100 Subject: [PATCH 5/6] afs: Add tracing for cell refcount and active user count Add a tracepoint to log the cell refcount and active user count and pass in a reason code through various functions that manipulate these counters. Additionally, a helper function, afs_see_cell(), is provided to log interesting places that deal with a cell without actually doing any accounting directly. Signed-off-by: David Howells --- fs/afs/cell.c | 99 ++++++++++++++++++++++++---------- fs/afs/dynroot.c | 4 +- fs/afs/internal.h | 15 +++--- fs/afs/mntpt.c | 4 +- fs/afs/proc.c | 2 +- fs/afs/super.c | 16 +++--- fs/afs/vl_alias.c | 8 +-- fs/afs/vl_rotate.c | 2 +- fs/afs/volume.c | 6 +-- include/trace/events/afs.h | 107 +++++++++++++++++++++++++++++++++++++ 10 files changed, 208 insertions(+), 55 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index bc7ed46aaca9..52233fa6195f 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -18,6 +18,7 @@ static unsigned __read_mostly afs_cell_gc_delay = 10; static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; +static atomic_t cell_debug_id; static void afs_queue_cell_manager(struct afs_net *); static void afs_manage_cell_work(struct work_struct *); @@ -48,7 +49,8 @@ static void afs_set_cell_timer(struct afs_net *net, time64_t delay) * hold net->cells_lock at least read-locked. */ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, - const char *name, unsigned int namesz) + const char *name, unsigned int namesz, + enum afs_cell_trace reason) { struct afs_cell *cell = NULL; struct rb_node *p; @@ -87,19 +89,20 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net, return ERR_PTR(-ENOENT); found: - return afs_use_cell(cell); + return afs_use_cell(cell, reason); } /* * Look up and get an activation reference on a cell record. */ struct afs_cell *afs_find_cell(struct afs_net *net, - const char *name, unsigned int namesz) + const char *name, unsigned int namesz, + enum afs_cell_trace reason) { struct afs_cell *cell; down_read(&net->cells_lock); - cell = afs_find_cell_locked(net, name, namesz); + cell = afs_find_cell_locked(net, name, namesz, reason); up_read(&net->cells_lock); return cell; } @@ -197,6 +200,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, cell->dns_status = vllist->status; smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ atomic_inc(&net->cells_outstanding); + cell->debug_id = atomic_inc_return(&cell_debug_id); + trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc); _leave(" = %p", cell); return cell; @@ -236,7 +241,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, _enter("%s,%s", name, vllist); if (!excl) { - cell = afs_find_cell(net, name, namesz); + cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup); if (!IS_ERR(cell)) goto wait_for_cell; } @@ -280,13 +285,16 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, cell = candidate; candidate = NULL; atomic_set(&cell->active, 2); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 2, afs_cell_trace_insert); rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); up_write(&net->cells_lock); - afs_queue_cell(cell); + afs_queue_cell(cell, afs_cell_trace_get_queue_new); wait_for_cell: + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), atomic_read(&cell->active), + afs_cell_trace_wait); _debug("wait_for_cell"); wait_var_event(&cell->state, ({ @@ -309,17 +317,17 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, if (excl) { ret = -EEXIST; } else { - afs_use_cell(cursor); + afs_use_cell(cursor, afs_cell_trace_use_lookup); ret = 0; } up_write(&net->cells_lock); if (candidate) - afs_put_cell(candidate); + afs_put_cell(candidate, afs_cell_trace_put_candidate); if (ret == 0) goto wait_for_cell; goto error_noput; error: - afs_unuse_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup); error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); @@ -364,15 +372,16 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) } if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) - afs_use_cell(new_root); + afs_use_cell(new_root, afs_cell_trace_use_pin); /* install the new cell */ down_write(&net->cells_lock); + afs_see_cell(new_root, afs_cell_trace_see_ws); old_root = net->ws_cell; net->ws_cell = new_root; up_write(&net->cells_lock); - afs_unuse_cell(net, old_root); + afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); _leave(" = 0"); return 0; } @@ -485,9 +494,10 @@ static void afs_cell_destroy(struct rcu_head *rcu) u = atomic_read(&cell->ref); ASSERTCMP(u, ==, 0); + trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), afs_cell_trace_free); afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); - afs_unuse_cell(net, cell->alias_of); + afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias); key_put(cell->anonymous_key); kfree(cell->name); kfree(cell); @@ -525,24 +535,30 @@ void afs_cells_timer(struct timer_list *timer) /* * Get a reference on a cell record. */ -struct afs_cell *afs_get_cell(struct afs_cell *cell) +struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason) { + int u; + if (atomic_read(&cell->ref) <= 0) BUG(); - atomic_inc(&cell->ref); + u = atomic_inc_return(&cell->ref); + trace_afs_cell(cell->debug_id, u, atomic_read(&cell->active), reason); return cell; } /* * Drop a reference on a cell record. */ -void afs_put_cell(struct afs_cell *cell) +void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason) { if (cell) { + unsigned int debug_id = cell->debug_id; unsigned int u, a; + a = atomic_read(&cell->active); u = atomic_dec_return(&cell->ref); + trace_afs_cell(debug_id, u, a, reason); if (u == 0) { a = atomic_read(&cell->active); WARN(a != 0, "Cell active count %u > 0\n", a); @@ -554,12 +570,16 @@ void afs_put_cell(struct afs_cell *cell) /* * Note a cell becoming more active. */ -struct afs_cell *afs_use_cell(struct afs_cell *cell) +struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) { + int u, a; + if (atomic_read(&cell->ref) <= 0) BUG(); - atomic_inc(&cell->active); + u = atomic_read(&cell->ref); + a = atomic_inc_return(&cell->active); + trace_afs_cell(cell->debug_id, u, a, reason); return cell; } @@ -567,10 +587,11 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell) * Record a cell becoming less active. When the active counter reaches 1, it * is scheduled for destruction, but may get reactivated. */ -void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell) +void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) { + unsigned int debug_id = cell->debug_id; time64_t now, expire_delay; - int a; + int u, a; if (!cell) return; @@ -583,21 +604,35 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell) if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; + u = atomic_read(&cell->ref); a = atomic_dec_return(&cell->active); + trace_afs_cell(debug_id, u, a, reason); WARN_ON(a == 0); if (a == 1) /* 'cell' may now be garbage collected. */ afs_set_cell_timer(net, expire_delay); } +/* + * Note that a cell has been seen. + */ +void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + int u, a; + + u = atomic_read(&cell->ref); + a = atomic_read(&cell->active); + trace_afs_cell(cell->debug_id, u, a, reason); +} + /* * Queue a cell for management, giving the workqueue a ref to hold. */ -void afs_queue_cell(struct afs_cell *cell) +void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason) { - afs_get_cell(cell); + afs_get_cell(cell, reason); if (!queue_work(afs_wq, &cell->manager)) - afs_put_cell(cell); + afs_put_cell(cell, afs_cell_trace_put_queue_fail); } /* @@ -713,6 +748,8 @@ static void afs_manage_cell(struct afs_cell *cell) active = 1; if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { rb_erase(&cell->net_node, &net->cells); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), 0, + afs_cell_trace_unuse_delete); smp_store_release(&cell->state, AFS_CELL_REMOVED); } up_write(&net->cells_lock); @@ -792,7 +829,7 @@ static void afs_manage_cell(struct afs_cell *cell) /* The root volume is pinning the cell */ afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); cell->root_volume = NULL; - afs_put_cell(cell); + afs_put_cell(cell, afs_cell_trace_put_destroy); } static void afs_manage_cell_work(struct work_struct *work) @@ -800,7 +837,7 @@ static void afs_manage_cell_work(struct work_struct *work) struct afs_cell *cell = container_of(work, struct afs_cell, manager); afs_manage_cell(cell); - afs_put_cell(cell); + afs_put_cell(cell, afs_cell_trace_put_queue_work); } /* @@ -838,13 +875,17 @@ void afs_manage_cells(struct work_struct *work) bool sched_cell = false; active = atomic_read(&cell->active); - _debug("manage %s %u %u", cell->name, atomic_read(&cell->ref), active); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), + active, afs_cell_trace_manage); ASSERTCMP(active, >=, 1); if (purging) { - if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - atomic_dec(&cell->active); + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) { + active = atomic_dec_return(&cell->active); + trace_afs_cell(cell->debug_id, atomic_read(&cell->ref), + active, afs_cell_trace_unuse_pin); + } } if (active == 1) { @@ -870,7 +911,7 @@ void afs_manage_cells(struct work_struct *work) } if (sched_cell) - afs_queue_cell(cell); + afs_queue_cell(cell, afs_cell_trace_get_queue_manage); } up_read(&net->cells_lock); @@ -907,7 +948,7 @@ void afs_cell_purge(struct afs_net *net) ws = net->ws_cell; net->ws_cell = NULL; up_write(&net->cells_lock); - afs_unuse_cell(net, ws); + afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); _debug("del timer"); if (del_timer_sync(&net->cells_timer)) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index da32797dd425..db832cc931c8 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -123,9 +123,9 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_find_cell(net, name, len); + cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe); if (!IS_ERR(cell)) { - afs_unuse_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe); return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 06e617ee4cd1..81b0485fd22a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -375,6 +375,7 @@ struct afs_cell { enum dns_record_source dns_source:8; /* Latest source of data from lookup */ enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ unsigned int dns_lookup_count; /* Counter of DNS lookups */ + unsigned int debug_id; /* The volumes belonging to this cell */ struct rb_root volumes; /* Tree of volumes on this server */ @@ -919,14 +920,16 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, * cell.c */ extern int afs_cell_init(struct afs_net *, const char *); -extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned); +extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned, + enum afs_cell_trace); extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, const char *, bool); -extern struct afs_cell *afs_use_cell(struct afs_cell *); -extern void afs_unuse_cell(struct afs_net *, struct afs_cell *); -extern struct afs_cell *afs_get_cell(struct afs_cell *); -extern void afs_put_cell(struct afs_cell *); -extern void afs_queue_cell(struct afs_cell *); +extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace); +extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace); extern void afs_manage_cells(struct work_struct *); extern void afs_cells_timer(struct timer_list *); extern void __net_exit afs_cell_purge(struct afs_net *); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index c69a0282960c..052dab2f5c03 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -88,7 +88,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ctx->force = true; } if (ctx->cell) { - afs_unuse_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt); ctx->cell = NULL; } if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { @@ -124,7 +124,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) char *buf; if (src_as->cell) - ctx->cell = afs_use_cell(src_as->cell); + ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt); if (size < 2 || size > PAGE_SIZE - 1) return -EINVAL; diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 76fbe0560cfb..065a28bfa3f1 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -129,7 +129,7 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) } if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - afs_unuse_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin); } else { goto inval; } diff --git a/fs/afs/super.c b/fs/afs/super.c index e72c223f831d..ac4e3ed4e9bd 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -294,7 +294,8 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_unuse_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); + afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } @@ -389,8 +390,9 @@ static int afs_validate_fc(struct fs_context *fc) _debug("switch to alias"); key_put(ctx->key); ctx->key = NULL; - cell = afs_use_cell(ctx->cell->alias_of); - afs_unuse_cell(ctx->net, ctx->cell); + cell = afs_use_cell(ctx->cell->alias_of, + afs_cell_trace_use_fc_alias); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); ctx->cell = cell; goto reget_key; } @@ -508,7 +510,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) if (ctx->dyn_root) { as->dyn_root = true; } else { - as->cell = afs_use_cell(ctx->cell); + as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); as->volume = afs_get_volume(ctx->volume, afs_volume_trace_get_alloc_sbi); } @@ -521,7 +523,7 @@ static void afs_destroy_sbi(struct afs_super_info *as) if (as) { struct afs_net *net = afs_net(as->net_ns); afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi); - afs_unuse_cell(net, as->cell); + afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } @@ -607,7 +609,7 @@ static void afs_free_fc(struct fs_context *fc) afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc); - afs_unuse_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } @@ -634,7 +636,7 @@ static int afs_init_fs_context(struct fs_context *fc) ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ - cell = afs_find_cell(ctx->net, NULL, 0); + cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index ddb4cb67d0fd..f04a80e4f5c3 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -177,7 +177,7 @@ static int afs_compare_cell_roots(struct afs_cell *cell) is_alias: rcu_read_unlock(); - cell->alias_of = afs_use_cell(p); + cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias); return 1; } @@ -247,18 +247,18 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key) continue; if (p->root_volume) continue; /* Ignore cells that have a root.cell volume. */ - afs_use_cell(p); + afs_use_cell(p, afs_cell_trace_use_check_alias); mutex_unlock(&cell->net->proc_cells_lock); if (afs_query_for_alias_one(cell, key, p) != 0) goto is_alias; if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { - afs_unuse_cell(cell->net, p); + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); return -ERESTARTSYS; } - afs_unuse_cell(cell->net, p); + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); } mutex_unlock(&cell->net->proc_cells_lock); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index da3b072d4d63..488e58490b16 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -45,7 +45,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) cell->dns_expiry <= ktime_get_real_seconds()) { dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); - afs_queue_cell(cell); + afs_queue_cell(cell, afs_cell_trace_get_queue_dns); if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { if (wait_var_event_interruptible( diff --git a/fs/afs/volume.c b/fs/afs/volume.c index a838030e9563..f84194b791d3 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -83,7 +83,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, volume->vid = vldb->vid[params->type]; volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; - volume->cell = afs_get_cell(params->cell); + volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); volume->type = params->type; volume->type_force = params->force; volume->name_len = vldb->name_len; @@ -106,7 +106,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, return volume; error_1: - afs_put_cell(volume->cell); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); kfree(volume); error_0: return ERR_PTR(ret); @@ -228,7 +228,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) afs_remove_volume_from_cell(volume); afs_put_serverlist(net, rcu_access_pointer(volume->servers)); - afs_put_cell(volume->cell); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); trace_afs_volume(volume->vid, atomic_read(&volume->usage), afs_volume_trace_free); kfree_rcu(volume, rcu); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5f0c1cf1ea13..a79c5ca376b3 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -50,6 +50,7 @@ enum afs_server_trace { afs_server_trace_update, }; + enum afs_volume_trace { afs_volume_trace_alloc, afs_volume_trace_free, @@ -67,6 +68,46 @@ enum afs_volume_trace { afs_volume_trace_remove, }; +enum afs_cell_trace { + afs_cell_trace_alloc, + afs_cell_trace_free, + afs_cell_trace_get_queue_dns, + afs_cell_trace_get_queue_manage, + afs_cell_trace_get_queue_new, + afs_cell_trace_get_vol, + afs_cell_trace_insert, + afs_cell_trace_manage, + afs_cell_trace_put_candidate, + afs_cell_trace_put_destroy, + afs_cell_trace_put_queue_fail, + afs_cell_trace_put_queue_work, + afs_cell_trace_put_vol, + afs_cell_trace_see_source, + afs_cell_trace_see_ws, + afs_cell_trace_unuse_alias, + afs_cell_trace_unuse_check_alias, + afs_cell_trace_unuse_delete, + afs_cell_trace_unuse_fc, + afs_cell_trace_unuse_lookup, + afs_cell_trace_unuse_mntpt, + afs_cell_trace_unuse_no_pin, + afs_cell_trace_unuse_parse, + afs_cell_trace_unuse_pin, + afs_cell_trace_unuse_probe, + afs_cell_trace_unuse_sbi, + afs_cell_trace_unuse_ws, + afs_cell_trace_use_alias, + afs_cell_trace_use_check_alias, + afs_cell_trace_use_fc, + afs_cell_trace_use_fc_alias, + afs_cell_trace_use_lookup, + afs_cell_trace_use_mntpt, + afs_cell_trace_use_pin, + afs_cell_trace_use_probe, + afs_cell_trace_use_sbi, + afs_cell_trace_wait, +}; + enum afs_fs_operation { afs_FS_FetchData = 130, /* AFS Fetch file data */ afs_FS_FetchACL = 131, /* AFS Fetch file ACL */ @@ -295,6 +336,44 @@ enum afs_cb_break_reason { EM(afs_volume_trace_put_validate_fc, "PUT fc-validat") \ E_(afs_volume_trace_remove, "REMOVE ") +#define afs_cell_traces \ + EM(afs_cell_trace_alloc, "ALLOC ") \ + EM(afs_cell_trace_free, "FREE ") \ + EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \ + EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \ + EM(afs_cell_trace_get_queue_new, "GET q-new ") \ + EM(afs_cell_trace_get_vol, "GET vol ") \ + EM(afs_cell_trace_insert, "INSERT ") \ + EM(afs_cell_trace_manage, "MANAGE ") \ + EM(afs_cell_trace_put_candidate, "PUT candid") \ + EM(afs_cell_trace_put_destroy, "PUT destry") \ + EM(afs_cell_trace_put_queue_work, "PUT q-work") \ + EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \ + EM(afs_cell_trace_put_vol, "PUT vol ") \ + EM(afs_cell_trace_see_source, "SEE source") \ + EM(afs_cell_trace_see_ws, "SEE ws ") \ + EM(afs_cell_trace_unuse_alias, "UNU alias ") \ + EM(afs_cell_trace_unuse_check_alias, "UNU chk-al") \ + EM(afs_cell_trace_unuse_delete, "UNU delete") \ + EM(afs_cell_trace_unuse_fc, "UNU fc ") \ + EM(afs_cell_trace_unuse_lookup, "UNU lookup") \ + EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \ + EM(afs_cell_trace_unuse_parse, "UNU parse ") \ + EM(afs_cell_trace_unuse_pin, "UNU pin ") \ + EM(afs_cell_trace_unuse_probe, "UNU probe ") \ + EM(afs_cell_trace_unuse_sbi, "UNU sbi ") \ + EM(afs_cell_trace_unuse_ws, "UNU ws ") \ + EM(afs_cell_trace_use_alias, "USE alias ") \ + EM(afs_cell_trace_use_check_alias, "USE chk-al") \ + EM(afs_cell_trace_use_fc, "USE fc ") \ + EM(afs_cell_trace_use_fc_alias, "USE fc-al ") \ + EM(afs_cell_trace_use_lookup, "USE lookup") \ + EM(afs_cell_trace_use_mntpt, "USE mntpt ") \ + EM(afs_cell_trace_use_pin, "USE pin ") \ + EM(afs_cell_trace_use_probe, "USE probe ") \ + EM(afs_cell_trace_use_sbi, "USE sbi ") \ + E_(afs_cell_trace_wait, "WAIT ") + #define afs_fs_operations \ EM(afs_FS_FetchData, "FS.FetchData") \ EM(afs_FS_FetchStatus, "FS.FetchStatus") \ @@ -483,6 +562,7 @@ enum afs_cb_break_reason { afs_call_traces; afs_server_traces; +afs_cell_traces; afs_fs_operations; afs_vl_operations; afs_edit_dir_ops; @@ -1358,6 +1438,33 @@ TRACE_EVENT(afs_volume, __entry->ref) ); +TRACE_EVENT(afs_cell, + TP_PROTO(unsigned int cell_debug_id, int usage, int active, + enum afs_cell_trace reason), + + TP_ARGS(cell_debug_id, usage, active, reason), + + TP_STRUCT__entry( + __field(unsigned int, cell ) + __field(int, usage ) + __field(int, active ) + __field(int, reason ) + ), + + TP_fast_assign( + __entry->cell = cell_debug_id; + __entry->usage = usage; + __entry->active = active; + __entry->reason = reason; + ), + + TP_printk("L=%08x %s u=%d a=%d", + __entry->cell, + __print_symbolic(__entry->reason, afs_cell_traces), + __entry->usage, + __entry->active) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From 7530d3eb3dcf1a30750e8e7f1f88b782b96b72b8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Oct 2020 09:02:25 +0100 Subject: [PATCH 6/6] afs: Don't assert on unpurgeable server records Don't give an assertion failure on unpurgeable afs_server records - which kills the thread - but rather emit a trace line when we are purging a record (which only happens during network namespace removal or rmmod) and print a notice of the problem. Signed-off-by: David Howells --- fs/afs/server.c | 7 ++++++- include/trace/events/afs.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index e82e452e2612..684a2b02b9ff 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -550,7 +550,12 @@ void afs_manage_servers(struct work_struct *work) _debug("manage %pU %u", &server->uuid, active); - ASSERTIFCMP(purging, active, ==, 0); + if (purging) { + trace_afs_server(server, atomic_read(&server->ref), + active, afs_server_trace_purging); + if (active != 0) + pr_notice("Can't purge s=%08x\n", server->debug_id); + } if (active == 0) { time64_t expire_at = server->unuse_time; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index a79c5ca376b3..8eb49231c6bb 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -40,6 +40,7 @@ enum afs_server_trace { afs_server_trace_get_new_cbi, afs_server_trace_get_probe, afs_server_trace_give_up_cb, + afs_server_trace_purging, afs_server_trace_put_call, afs_server_trace_put_cbi, afs_server_trace_put_find_rsq, @@ -311,6 +312,7 @@ enum afs_cb_break_reason { EM(afs_server_trace_get_new_cbi, "GET cbi ") \ EM(afs_server_trace_get_probe, "GET probe") \ EM(afs_server_trace_give_up_cb, "giveup-cb") \ + EM(afs_server_trace_purging, "PURGE ") \ EM(afs_server_trace_put_call, "PUT call ") \ EM(afs_server_trace_put_cbi, "PUT cbi ") \ EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \