Merge branch 'page_pool-NUMA-node-handling-fixes'

Jesper Dangaard Brouer says:

====================
page_pool: NUMA node handling fixes

The recently added NUMA changes (merged for v5.5) to page_pool, it both
contains a bug in handling NUMA_NO_NODE condition, and added code to
the fast-path.

This patchset fixes the bug and moves code out of fast-path. The first
patch contains a fix that should be considered for 5.5. The second
patch reduce code size and overhead in case CONFIG_NUMA is disabled.

Currently the NUMA_NO_NODE setting bug only affects driver 'ti_cpsw'
(drivers/net/ethernet/ti/), but after this patchset, we plan to move
other drivers (netsec and mvneta) to use NUMA_NO_NODE setting.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-01-02 15:37:53 -08:00
commit c9a2069b1d

View File

@ -96,10 +96,65 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
static void __page_pool_return_page(struct page_pool *pool, struct page *page);
noinline
static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
bool refill)
{
struct ptr_ring *r = &pool->ring;
struct page *page;
int pref_nid; /* preferred NUMA node */
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r))
return NULL;
/* Softirq guarantee CPU and thus NUMA node is stable. This,
* assumes CPU refilling driver RX-ring will also run RX-NAPI.
*/
#ifdef CONFIG_NUMA
pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
#else
/* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
#endif
/* Slower-path: Get pages from locked ring queue */
spin_lock(&r->consumer_lock);
/* Refill alloc array, but only if NUMA match */
do {
page = __ptr_ring_consume(r);
if (unlikely(!page))
break;
if (likely(page_to_nid(page) == pref_nid)) {
pool->alloc.cache[pool->alloc.count++] = page;
} else {
/* NUMA mismatch;
* (1) release 1 page to page-allocator and
* (2) break out to fallthrough to alloc_pages_node.
* This limit stress on page buddy alloactor.
*/
__page_pool_return_page(pool, page);
page = NULL;
break;
}
} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
refill);
/* Return last page */
if (likely(pool->alloc.count > 0))
page = pool->alloc.cache[--pool->alloc.count];
spin_unlock(&r->consumer_lock);
return page;
}
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
struct ptr_ring *r = &pool->ring;
bool refill = false;
struct page *page;
@ -113,20 +168,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
refill = true;
}
/* Quicker fallback, avoid locks when ring is empty */
if (__ptr_ring_empty(r))
return NULL;
/* Slow-path: Get page from locked ring queue,
* refill alloc array if requested.
*/
spin_lock(&r->consumer_lock);
page = __ptr_ring_consume(r);
if (refill)
pool->alloc.count = __ptr_ring_consume_batched(r,
pool->alloc.cache,
PP_ALLOC_CACHE_REFILL);
spin_unlock(&r->consumer_lock);
page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
@ -163,7 +205,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
/* Cache was empty, do real allocation */
#ifdef CONFIG_NUMA
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
#else
page = alloc_pages(gfp, pool->p.order);
#endif
if (!page)
return NULL;
@ -311,13 +357,10 @@ static bool __page_pool_recycle_direct(struct page *page,
/* page is NOT reusable when:
* 1) allocated when system is under some pressure. (page_is_pfmemalloc)
* 2) belongs to a different NUMA node than pool->p.nid.
*
* To update pool->p.nid users must call page_pool_update_nid.
*/
static bool pool_page_reusable(struct page_pool *pool, struct page *page)
{
return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
return !page_is_pfmemalloc(page);
}
void __page_pool_put_page(struct page_pool *pool, struct page *page,
@ -484,7 +527,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
struct page *page;
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
/* Flush pool alloc cache, as refill will check NUMA node */
while (pool->alloc.count) {
page = pool->alloc.cache[--pool->alloc.count];
__page_pool_return_page(pool, page);
}
}
EXPORT_SYMBOL(page_pool_update_nid);