forked from luck/tmp_suning_uos_patched
rcu: Support kfree_bulk() interface in kfree_rcu()
The kfree_rcu() logic can be improved further by using kfree_bulk() interface along with "basic batching support" introduced earlier. The are at least two advantages of using "bulk" interface: - in case of large number of kfree_rcu() requests kfree_bulk() reduces the per-object overhead caused by calling kfree() per-object. - reduces the number of cache-misses due to "pointer chasing" between objects which can be far spread between each other. This approach defines a new kfree_rcu_bulk_data structure that stores pointers in an array with a specific size. Number of entries in that array depends on PAGE_SIZE making kfree_rcu_bulk_data structure to be exactly one page. Since it deals with "block-chain" technique there is an extra need in dynamic allocation when a new block is required. Memory is allocated with GFP_NOWAIT | __GFP_NOWARN flags, i.e. that allows to skip direct reclaim under low memory condition to prevent stalling and fails silently under high memory pressure. The "emergency path" gets maintained when a system is run out of memory. In that case objects are linked into regular list. The "rcuperf" was run to analyze this change in terms of memory consumption and kfree_bulk() throughput. 1) Testing on the Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz, 12xCPUs with following parameters: kfree_loops=200000 kfree_alloc_num=1000 kfree_rcu_test=1 kfree_vary_obj_size=1 dev.2020.01.10a branch Default / CONFIG_SLAB 53607352517 ns, loops: 200000, batches: 1885, memory footprint: 1248MB 53529637912 ns, loops: 200000, batches: 1921, memory footprint: 1193MB 53570175705 ns, loops: 200000, batches: 1929, memory footprint: 1250MB Patch / CONFIG_SLAB 23981587315 ns, loops: 200000, batches: 810, memory footprint: 1219MB 23879375281 ns, loops: 200000, batches: 822, memory footprint: 1190MB 24086841707 ns, loops: 200000, batches: 794, memory footprint: 1380MB Default / CONFIG_SLUB 51291025022 ns, loops: 200000, batches: 1713, memory footprint: 741MB 51278911477 ns, loops: 200000, batches: 1671, memory footprint: 719MB 51256183045 ns, loops: 200000, batches: 1719, memory footprint: 647MB Patch / CONFIG_SLUB 50709919132 ns, loops: 200000, batches: 1618, memory footprint: 456MB 50736297452 ns, loops: 200000, batches: 1633, memory footprint: 507MB 50660403893 ns, loops: 200000, batches: 1628, memory footprint: 429MB in case of CONFIG_SLAB there is double increase in performance and slightly higher memory usage. As for CONFIG_SLUB, the performance figures are better together with lower memory usage. 2) Testing on the HiKey-960, arm64, 8xCPUs with below parameters: CONFIG_SLAB=y kfree_loops=200000 kfree_alloc_num=1000 kfree_rcu_test=1 102898760401 ns, loops: 200000, batches: 5822, memory footprint: 158MB 89947009882 ns, loops: 200000, batches: 6715, memory footprint: 115MB rcuperf shows approximately ~12% better throughput in case of using "bulk" interface. The "drain logic" or its RCU callback does the work faster that leads to better throughput. Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com> Tested-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
parent
bb6d3fb354
commit
34c8817455
|
@ -2689,22 +2689,47 @@ EXPORT_SYMBOL_GPL(call_rcu);
|
||||||
#define KFREE_DRAIN_JIFFIES (HZ / 50)
|
#define KFREE_DRAIN_JIFFIES (HZ / 50)
|
||||||
#define KFREE_N_BATCHES 2
|
#define KFREE_N_BATCHES 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This macro defines how many entries the "records" array
|
||||||
|
* will contain. It is based on the fact that the size of
|
||||||
|
* kfree_rcu_bulk_data structure becomes exactly one page.
|
||||||
|
*/
|
||||||
|
#define KFREE_BULK_MAX_ENTR ((PAGE_SIZE / sizeof(void *)) - 3)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct kfree_rcu_bulk_data - single block to store kfree_rcu() pointers
|
||||||
|
* @nr_records: Number of active pointers in the array
|
||||||
|
* @records: Array of the kfree_rcu() pointers
|
||||||
|
* @next: Next bulk object in the block chain
|
||||||
|
* @head_free_debug: For debug, when CONFIG_DEBUG_OBJECTS_RCU_HEAD is set
|
||||||
|
*/
|
||||||
|
struct kfree_rcu_bulk_data {
|
||||||
|
unsigned long nr_records;
|
||||||
|
void *records[KFREE_BULK_MAX_ENTR];
|
||||||
|
struct kfree_rcu_bulk_data *next;
|
||||||
|
struct rcu_head *head_free_debug;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
|
* struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
|
||||||
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
|
* @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
|
||||||
* @head_free: List of kfree_rcu() objects waiting for a grace period
|
* @head_free: List of kfree_rcu() objects waiting for a grace period
|
||||||
|
* @bhead_free: Bulk-List of kfree_rcu() objects waiting for a grace period
|
||||||
* @krcp: Pointer to @kfree_rcu_cpu structure
|
* @krcp: Pointer to @kfree_rcu_cpu structure
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct kfree_rcu_cpu_work {
|
struct kfree_rcu_cpu_work {
|
||||||
struct rcu_work rcu_work;
|
struct rcu_work rcu_work;
|
||||||
struct rcu_head *head_free;
|
struct rcu_head *head_free;
|
||||||
|
struct kfree_rcu_bulk_data *bhead_free;
|
||||||
struct kfree_rcu_cpu *krcp;
|
struct kfree_rcu_cpu *krcp;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
|
* struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
|
||||||
* @head: List of kfree_rcu() objects not yet waiting for a grace period
|
* @head: List of kfree_rcu() objects not yet waiting for a grace period
|
||||||
|
* @bhead: Bulk-List of kfree_rcu() objects not yet waiting for a grace period
|
||||||
|
* @bcached: Keeps at most one object for later reuse when build chain blocks
|
||||||
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
|
* @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
|
||||||
* @lock: Synchronize access to this structure
|
* @lock: Synchronize access to this structure
|
||||||
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
|
* @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
|
||||||
|
@ -2718,6 +2743,8 @@ struct kfree_rcu_cpu_work {
|
||||||
*/
|
*/
|
||||||
struct kfree_rcu_cpu {
|
struct kfree_rcu_cpu {
|
||||||
struct rcu_head *head;
|
struct rcu_head *head;
|
||||||
|
struct kfree_rcu_bulk_data *bhead;
|
||||||
|
struct kfree_rcu_bulk_data *bcached;
|
||||||
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
|
struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
struct delayed_work monitor_work;
|
struct delayed_work monitor_work;
|
||||||
|
@ -2727,14 +2754,24 @@ struct kfree_rcu_cpu {
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
|
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
|
||||||
|
|
||||||
|
static __always_inline void
|
||||||
|
debug_rcu_head_unqueue_bulk(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
|
||||||
|
for (; head; head = head->next)
|
||||||
|
debug_rcu_head_unqueue(head);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is invoked in workqueue context after a grace period.
|
* This function is invoked in workqueue context after a grace period.
|
||||||
* It frees all the objects queued on ->head_free.
|
* It frees all the objects queued on ->bhead_free or ->head_free.
|
||||||
*/
|
*/
|
||||||
static void kfree_rcu_work(struct work_struct *work)
|
static void kfree_rcu_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct rcu_head *head, *next;
|
struct rcu_head *head, *next;
|
||||||
|
struct kfree_rcu_bulk_data *bhead, *bnext;
|
||||||
struct kfree_rcu_cpu *krcp;
|
struct kfree_rcu_cpu *krcp;
|
||||||
struct kfree_rcu_cpu_work *krwp;
|
struct kfree_rcu_cpu_work *krwp;
|
||||||
|
|
||||||
|
@ -2744,22 +2781,41 @@ static void kfree_rcu_work(struct work_struct *work)
|
||||||
spin_lock_irqsave(&krcp->lock, flags);
|
spin_lock_irqsave(&krcp->lock, flags);
|
||||||
head = krwp->head_free;
|
head = krwp->head_free;
|
||||||
krwp->head_free = NULL;
|
krwp->head_free = NULL;
|
||||||
|
bhead = krwp->bhead_free;
|
||||||
|
krwp->bhead_free = NULL;
|
||||||
spin_unlock_irqrestore(&krcp->lock, flags);
|
spin_unlock_irqrestore(&krcp->lock, flags);
|
||||||
|
|
||||||
// List "head" is now private, so traverse locklessly.
|
/* "bhead" is now private, so traverse locklessly. */
|
||||||
|
for (; bhead; bhead = bnext) {
|
||||||
|
bnext = bhead->next;
|
||||||
|
|
||||||
|
debug_rcu_head_unqueue_bulk(bhead->head_free_debug);
|
||||||
|
|
||||||
|
rcu_lock_acquire(&rcu_callback_map);
|
||||||
|
kfree_bulk(bhead->nr_records, bhead->records);
|
||||||
|
rcu_lock_release(&rcu_callback_map);
|
||||||
|
|
||||||
|
if (cmpxchg(&krcp->bcached, NULL, bhead))
|
||||||
|
free_page((unsigned long) bhead);
|
||||||
|
|
||||||
|
cond_resched_tasks_rcu_qs();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Emergency case only. It can happen under low memory
|
||||||
|
* condition when an allocation gets failed, so the "bulk"
|
||||||
|
* path can not be temporary maintained.
|
||||||
|
*/
|
||||||
for (; head; head = next) {
|
for (; head; head = next) {
|
||||||
unsigned long offset = (unsigned long)head->func;
|
unsigned long offset = (unsigned long)head->func;
|
||||||
|
|
||||||
next = head->next;
|
next = head->next;
|
||||||
// Potentially optimize with kfree_bulk in future.
|
|
||||||
debug_rcu_head_unqueue(head);
|
debug_rcu_head_unqueue(head);
|
||||||
rcu_lock_acquire(&rcu_callback_map);
|
rcu_lock_acquire(&rcu_callback_map);
|
||||||
trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
|
trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
|
||||||
|
|
||||||
if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
|
if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset)))
|
||||||
/* Could be optimized with kfree_bulk() in future. */
|
|
||||||
kfree((void *)head - offset);
|
kfree((void *)head - offset);
|
||||||
}
|
|
||||||
|
|
||||||
rcu_lock_release(&rcu_callback_map);
|
rcu_lock_release(&rcu_callback_map);
|
||||||
cond_resched_tasks_rcu_qs();
|
cond_resched_tasks_rcu_qs();
|
||||||
|
@ -2774,26 +2830,48 @@ static void kfree_rcu_work(struct work_struct *work)
|
||||||
*/
|
*/
|
||||||
static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
||||||
{
|
{
|
||||||
|
struct kfree_rcu_cpu_work *krwp;
|
||||||
|
bool queued = false;
|
||||||
int i;
|
int i;
|
||||||
struct kfree_rcu_cpu_work *krwp = NULL;
|
|
||||||
|
|
||||||
lockdep_assert_held(&krcp->lock);
|
lockdep_assert_held(&krcp->lock);
|
||||||
for (i = 0; i < KFREE_N_BATCHES; i++)
|
|
||||||
if (!krcp->krw_arr[i].head_free) {
|
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||||
krwp = &(krcp->krw_arr[i]);
|
krwp = &(krcp->krw_arr[i]);
|
||||||
break;
|
|
||||||
|
/*
|
||||||
|
* Try to detach bhead or head and attach it over any
|
||||||
|
* available corresponding free channel. It can be that
|
||||||
|
* a previous RCU batch is in progress, it means that
|
||||||
|
* immediately to queue another one is not possible so
|
||||||
|
* return false to tell caller to retry.
|
||||||
|
*/
|
||||||
|
if ((krcp->bhead && !krwp->bhead_free) ||
|
||||||
|
(krcp->head && !krwp->head_free)) {
|
||||||
|
/* Channel 1. */
|
||||||
|
if (!krwp->bhead_free) {
|
||||||
|
krwp->bhead_free = krcp->bhead;
|
||||||
|
krcp->bhead = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Channel 2. */
|
||||||
|
if (!krwp->head_free) {
|
||||||
|
krwp->head_free = krcp->head;
|
||||||
|
krcp->head = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* One work is per one batch, so there are two "free channels",
|
||||||
|
* "bhead_free" and "head_free" the batch can handle. It can be
|
||||||
|
* that the work is in the pending state when two channels have
|
||||||
|
* been detached following each other, one by one.
|
||||||
|
*/
|
||||||
|
queue_rcu_work(system_wq, &krwp->rcu_work);
|
||||||
|
queued = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If a previous RCU batch is in progress, we cannot immediately
|
return queued;
|
||||||
// queue another one, so return false to tell caller to retry.
|
|
||||||
if (!krwp)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
krwp->head_free = krcp->head;
|
|
||||||
krcp->head = NULL;
|
|
||||||
INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
|
|
||||||
queue_rcu_work(system_wq, &krwp->rcu_work);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
|
static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
|
||||||
|
@ -2830,19 +2908,65 @@ static void kfree_rcu_monitor(struct work_struct *work)
|
||||||
spin_unlock_irqrestore(&krcp->lock, flags);
|
spin_unlock_irqrestore(&krcp->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
kfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp,
|
||||||
|
struct rcu_head *head, rcu_callback_t func)
|
||||||
|
{
|
||||||
|
struct kfree_rcu_bulk_data *bnode;
|
||||||
|
|
||||||
|
if (unlikely(!krcp->initialized))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
lockdep_assert_held(&krcp->lock);
|
||||||
|
|
||||||
|
/* Check if a new block is required. */
|
||||||
|
if (!krcp->bhead ||
|
||||||
|
krcp->bhead->nr_records == KFREE_BULK_MAX_ENTR) {
|
||||||
|
bnode = xchg(&krcp->bcached, NULL);
|
||||||
|
if (!bnode) {
|
||||||
|
WARN_ON_ONCE(sizeof(struct kfree_rcu_bulk_data) > PAGE_SIZE);
|
||||||
|
|
||||||
|
bnode = (struct kfree_rcu_bulk_data *)
|
||||||
|
__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Switch to emergency path. */
|
||||||
|
if (unlikely(!bnode))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Initialize the new block. */
|
||||||
|
bnode->nr_records = 0;
|
||||||
|
bnode->next = krcp->bhead;
|
||||||
|
bnode->head_free_debug = NULL;
|
||||||
|
|
||||||
|
/* Attach it to the head. */
|
||||||
|
krcp->bhead = bnode;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
|
||||||
|
head->func = func;
|
||||||
|
head->next = krcp->bhead->head_free_debug;
|
||||||
|
krcp->bhead->head_free_debug = head;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Finally insert. */
|
||||||
|
krcp->bhead->records[krcp->bhead->nr_records++] =
|
||||||
|
(void *) head - (unsigned long) func;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Queue a request for lazy invocation of kfree() after a grace period.
|
* Queue a request for lazy invocation of kfree_bulk()/kfree() after a grace
|
||||||
|
* period. Please note there are two paths are maintained, one is the main one
|
||||||
|
* that uses kfree_bulk() interface and second one is emergency one, that is
|
||||||
|
* used only when the main path can not be maintained temporary, due to memory
|
||||||
|
* pressure.
|
||||||
*
|
*
|
||||||
* Each kfree_call_rcu() request is added to a batch. The batch will be drained
|
* Each kfree_call_rcu() request is added to a batch. The batch will be drained
|
||||||
* every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch
|
* every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
|
||||||
* will be kfree'd in workqueue context. This allows us to:
|
* be free'd in workqueue context. This allows us to: batch requests together to
|
||||||
*
|
* reduce the number of grace periods during heavy kfree_rcu() load.
|
||||||
* 1. Batch requests together to reduce the number of grace periods during
|
|
||||||
* heavy kfree_rcu() load.
|
|
||||||
*
|
|
||||||
* 2. It makes it possible to use kfree_bulk() on a large number of
|
|
||||||
* kfree_rcu() requests thus reducing cache misses and the per-object
|
|
||||||
* overhead of kfree().
|
|
||||||
*/
|
*/
|
||||||
void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||||
{
|
{
|
||||||
|
@ -2861,9 +2985,16 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||||
__func__, head);
|
__func__, head);
|
||||||
goto unlock_return;
|
goto unlock_return;
|
||||||
}
|
}
|
||||||
head->func = func;
|
|
||||||
head->next = krcp->head;
|
/*
|
||||||
krcp->head = head;
|
* Under high memory pressure GFP_NOWAIT can fail,
|
||||||
|
* in that case the emergency path is maintained.
|
||||||
|
*/
|
||||||
|
if (unlikely(!kfree_call_rcu_add_ptr_to_bulk(krcp, head, func))) {
|
||||||
|
head->func = func;
|
||||||
|
head->next = krcp->head;
|
||||||
|
krcp->head = head;
|
||||||
|
}
|
||||||
|
|
||||||
// Set timer to drain after KFREE_DRAIN_JIFFIES.
|
// Set timer to drain after KFREE_DRAIN_JIFFIES.
|
||||||
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
|
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
|
||||||
|
@ -3769,8 +3900,11 @@ static void __init kfree_rcu_batch_init(void)
|
||||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||||
|
|
||||||
spin_lock_init(&krcp->lock);
|
spin_lock_init(&krcp->lock);
|
||||||
for (i = 0; i < KFREE_N_BATCHES; i++)
|
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||||
|
INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
|
||||||
krcp->krw_arr[i].krcp = krcp;
|
krcp->krw_arr[i].krcp = krcp;
|
||||||
|
}
|
||||||
|
|
||||||
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
|
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
|
||||||
krcp->initialized = true;
|
krcp->initialized = true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user