diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 9f5dac64aa8f..ed4350ced3d0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -675,6 +675,9 @@ END(invalidate_interrupt\num) ENTRY(call_function_interrupt) apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt END(call_function_interrupt) +ENTRY(irq_move_cleanup_interrupt) + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt +END(irq_move_cleanup_interrupt) #endif ENTRY(apic_timer_interrupt) diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 45d85630196a..21d95b747437 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -450,6 +450,7 @@ void spurious_interrupt(void); void error_interrupt(void); void reschedule_interrupt(void); void call_function_interrupt(void); +void irq_move_cleanup_interrupt(void); void invalidate_interrupt0(void); void invalidate_interrupt1(void); void invalidate_interrupt2(void); @@ -537,7 +538,10 @@ void __init init_IRQ(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); -#endif + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); +#endif set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 8dede0bd2267..48593f6b708f 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -36,6 +36,7 @@ #include #endif +#include #include #include #include @@ -49,7 +50,10 @@ struct irq_cfg { cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; u8 vector; + u8 move_in_progress : 1; }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -652,7 +656,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - cpumask_t old_mask = CPU_MASK_NONE; unsigned int old_vector; int cpu; struct irq_cfg *cfg; @@ -663,18 +666,20 @@ static int __assign_irq_vector(int irq, cpumask_t mask) /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; + old_vector = cfg->vector; if (old_vector) { cpumask_t tmp; cpus_and(tmp, cfg->domain, mask); if (!cpus_empty(tmp)) return 0; - cpus_and(old_mask, cfg->domain, cpu_online_map); } for_each_cpu_mask(cpu, mask) { cpumask_t domain, new_mask; - int new_cpu, old_cpu; + int new_cpu; int vector, offset; domain = vector_allocation_domain(cpu); @@ -699,8 +704,10 @@ static int __assign_irq_vector(int irq, cpumask_t mask) /* Found one! */ current_vector = vector; current_offset = offset; - for_each_cpu_mask(old_cpu, old_mask) - per_cpu(vector_irq, old_cpu)[old_vector] = -1; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + } for_each_cpu_mask(new_cpu, new_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; @@ -1360,8 +1367,68 @@ static int ioapic_retrigger_irq(unsigned int irq) * races. */ +#ifdef CONFIG_SMP +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); + exit_idle(); + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + if (irq >= NR_IRQS) + continue; + + desc = irq_desc + irq; + cfg = irq_cfg + irq; + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_rax; + me = smp_processor_id(); + if ((vector == cfg->vector) && + cpu_isset(smp_processor_id(), cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif + static void ack_apic_edge(unsigned int irq) { + irq_complete_move(irq); move_native_irq(irq); ack_APIC_irq(); } @@ -1370,6 +1437,7 @@ static void ack_apic_level(unsigned int irq) { int do_unmask_irq = 0; + irq_complete_move(irq); #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) /* If we are moving the irq we need to mask it */ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index dc395edc2f2a..2e4b7a5ed1c4 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -32,10 +32,15 @@ #define IA32_SYSCALL_VECTOR 0x80 +/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * cleanup after irq migration. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + /* * Vectors 0x20-0x2f are used for ISA interrupts. */ -#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR +#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10 #define IRQ1_VECTOR IRQ0_VECTOR + 1 #define IRQ2_VECTOR IRQ0_VECTOR + 2 #define IRQ3_VECTOR IRQ0_VECTOR + 3 @@ -82,7 +87,7 @@ /* * First APIC vector available to drivers: (vectors 0x30-0xee) - * we start at 0x31 to spread out vectors evenly between priority + * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)