PCI: pci-iommu-iotlb-flushing-speedup
The following patch is an update to use an array instead of a list of IOVA's in the implementation of defered iotlb flushes. It takes inspiration from sba_iommu.c I like this implementation better as it encapsulates the batch process within intel-iommu.c, and no longer touches iova.h (which is shared) Performance data: Netperf 32byte UDP streaming 2.6.25-rc3-mm1: IOMMU-strict : 58Mps @ 62% cpu NO-IOMMU : 71Mbs @ 41% cpu List-based IOMMU-default-batched-IOTLB flush: 66Mbps @ 57% cpu with this patch: IOMMU-strict : 73Mps @ 75% cpu NO-IOMMU : 74Mbs @ 42% cpu Array-based IOMMU-default-batched-IOTLB flush: 72Mbps @ 62% cpu Signed-off-by: <mgross@linux.intel.com> Cc: Grant Grundler <grundler@parisc-linux.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
parent
a391f19717
commit
80b20dd853
@ -59,8 +59,17 @@ static void flush_unmaps_timeout(unsigned long data);
|
|||||||
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
|
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
|
||||||
|
|
||||||
static struct intel_iommu *g_iommus;
|
static struct intel_iommu *g_iommus;
|
||||||
|
|
||||||
|
#define HIGH_WATER_MARK 250
|
||||||
|
struct deferred_flush_tables {
|
||||||
|
int next;
|
||||||
|
struct iova *iova[HIGH_WATER_MARK];
|
||||||
|
struct dmar_domain *domain[HIGH_WATER_MARK];
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct deferred_flush_tables *deferred_flush;
|
||||||
|
|
||||||
/* bitmap for indexing intel_iommus */
|
/* bitmap for indexing intel_iommus */
|
||||||
static unsigned long *g_iommus_to_flush;
|
|
||||||
static int g_num_of_iommus;
|
static int g_num_of_iommus;
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(async_umap_flush_lock);
|
static DEFINE_SPINLOCK(async_umap_flush_lock);
|
||||||
@ -68,10 +77,6 @@ static LIST_HEAD(unmaps_to_do);
|
|||||||
|
|
||||||
static int timer_on;
|
static int timer_on;
|
||||||
static long list_size;
|
static long list_size;
|
||||||
static int high_watermark;
|
|
||||||
|
|
||||||
static struct dentry *intel_iommu_debug, *debug;
|
|
||||||
|
|
||||||
|
|
||||||
static void domain_remove_dev_info(struct dmar_domain *domain);
|
static void domain_remove_dev_info(struct dmar_domain *domain);
|
||||||
|
|
||||||
@ -1692,7 +1697,7 @@ int __init init_dmars(void)
|
|||||||
struct dmar_rmrr_unit *rmrr;
|
struct dmar_rmrr_unit *rmrr;
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
struct intel_iommu *iommu;
|
struct intel_iommu *iommu;
|
||||||
int nlongs, i, ret, unit = 0;
|
int i, ret, unit = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for each drhd
|
* for each drhd
|
||||||
@ -1711,17 +1716,16 @@ int __init init_dmars(void)
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
nlongs = BITS_TO_LONGS(g_num_of_iommus);
|
|
||||||
g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
|
|
||||||
if (!g_iommus_to_flush) {
|
|
||||||
printk(KERN_ERR "Intel-IOMMU: "
|
|
||||||
"Allocating bitmap array failed\n");
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
|
g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
|
||||||
if (!g_iommus) {
|
if (!g_iommus) {
|
||||||
kfree(g_iommus_to_flush);
|
ret = -ENOMEM;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
deferred_flush = kzalloc(g_num_of_iommus *
|
||||||
|
sizeof(struct deferred_flush_tables), GFP_KERNEL);
|
||||||
|
if (!deferred_flush) {
|
||||||
|
kfree(g_iommus);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1970,42 +1974,48 @@ static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
|
|||||||
|
|
||||||
static void flush_unmaps(void)
|
static void flush_unmaps(void)
|
||||||
{
|
{
|
||||||
struct iova *node, *n;
|
int i, j;
|
||||||
unsigned long flags;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
|
||||||
timer_on = 0;
|
timer_on = 0;
|
||||||
|
|
||||||
/* just flush them all */
|
/* just flush them all */
|
||||||
for (i = 0; i < g_num_of_iommus; i++) {
|
for (i = 0; i < g_num_of_iommus; i++) {
|
||||||
if (test_and_clear_bit(i, g_iommus_to_flush))
|
if (deferred_flush[i].next) {
|
||||||
iommu_flush_iotlb_global(&g_iommus[i], 0);
|
iommu_flush_iotlb_global(&g_iommus[i], 0);
|
||||||
|
for (j = 0; j < deferred_flush[i].next; j++) {
|
||||||
|
__free_iova(&deferred_flush[i].domain[j]->iovad,
|
||||||
|
deferred_flush[i].iova[j]);
|
||||||
|
}
|
||||||
|
deferred_flush[i].next = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry_safe(node, n, &unmaps_to_do, list) {
|
|
||||||
/* free iova */
|
|
||||||
list_del(&node->list);
|
|
||||||
__free_iova(&((struct dmar_domain *)node->dmar)->iovad, node);
|
|
||||||
|
|
||||||
}
|
|
||||||
list_size = 0;
|
list_size = 0;
|
||||||
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flush_unmaps_timeout(unsigned long data)
|
static void flush_unmaps_timeout(unsigned long data)
|
||||||
{
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
||||||
flush_unmaps();
|
flush_unmaps();
|
||||||
|
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_unmap(struct dmar_domain *dom, struct iova *iova)
|
static void add_unmap(struct dmar_domain *dom, struct iova *iova)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
int next, iommu_id;
|
||||||
|
|
||||||
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
||||||
iova->dmar = dom;
|
if (list_size == HIGH_WATER_MARK)
|
||||||
list_add(&iova->list, &unmaps_to_do);
|
flush_unmaps();
|
||||||
set_bit((dom->iommu - g_iommus), g_iommus_to_flush);
|
|
||||||
|
iommu_id = dom->iommu - g_iommus;
|
||||||
|
next = deferred_flush[iommu_id].next;
|
||||||
|
deferred_flush[iommu_id].domain[next] = dom;
|
||||||
|
deferred_flush[iommu_id].iova[next] = iova;
|
||||||
|
deferred_flush[iommu_id].next++;
|
||||||
|
|
||||||
if (!timer_on) {
|
if (!timer_on) {
|
||||||
mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
|
mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
|
||||||
@ -2054,8 +2064,6 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
|
|||||||
* queue up the release of the unmap to save the 1/6th of the
|
* queue up the release of the unmap to save the 1/6th of the
|
||||||
* cpu used up by the iotlb flush operation...
|
* cpu used up by the iotlb flush operation...
|
||||||
*/
|
*/
|
||||||
if (list_size > high_watermark)
|
|
||||||
flush_unmaps();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2380,10 +2388,6 @@ int __init intel_iommu_init(void)
|
|||||||
if (dmar_table_init())
|
if (dmar_table_init())
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
high_watermark = 250;
|
|
||||||
intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL);
|
|
||||||
debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO,
|
|
||||||
intel_iommu_debug, &high_watermark);
|
|
||||||
iommu_init_mempool();
|
iommu_init_mempool();
|
||||||
dmar_init_reserved_ranges();
|
dmar_init_reserved_ranges();
|
||||||
|
|
||||||
|
@ -24,8 +24,6 @@ struct iova {
|
|||||||
struct rb_node node;
|
struct rb_node node;
|
||||||
unsigned long pfn_hi; /* IOMMU dish out addr hi */
|
unsigned long pfn_hi; /* IOMMU dish out addr hi */
|
||||||
unsigned long pfn_lo; /* IOMMU dish out addr lo */
|
unsigned long pfn_lo; /* IOMMU dish out addr lo */
|
||||||
struct list_head list;
|
|
||||||
void *dmar;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* holds all the iova translations for a domain */
|
/* holds all the iova translations for a domain */
|
||||||
|
Loading…
Reference in New Issue
Block a user