kernel_optimize_test/drivers/pci/pcie/aer/aerdrv.h
Sebastian Andrzej Siewior 4ae2182b1e PCI/AER: Flush workqueue on device remove to avoid use-after-free
A Root Port's AER structure (rpc) contains a queue of events.  aer_irq()
enqueues AER status information and schedules aer_isr() to dequeue and
process it.  When we remove a device, aer_remove() waits for the queue to
be empty, then frees the rpc struct.

But aer_isr() references the rpc struct after dequeueing and possibly
emptying the queue, which can cause a use-after-free error as in the
following scenario with two threads, aer_isr() on the left and a
concurrent aer_remove() on the right:

  Thread A                      Thread B
  --------                      --------
  aer_irq():
    rpc->prod_idx++
                                aer_remove():
                                  wait_event(rpc->prod_idx == rpc->cons_idx)
                                  # now blocked until queue becomes empty
  aer_isr():                      # ...
    rpc->cons_idx++               # unblocked because queue is now empty
    ...                           kfree(rpc)
    mutex_unlock(&rpc->rpc_mutex)

To prevent this problem, use flush_work() to wait until the last scheduled
instance of aer_isr() has completed before freeing the rpc struct in
aer_remove().

I reproduced this use-after-free by flashing a device FPGA and
re-enumerating the bus to find the new device.  With SLUB debug, this
crashes with 0x6b bytes (POISON_FREE, the use-after-free magic number) in
GPR25:

  pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000
  Unable to handle kernel paging request for data at address 0x27ef9e3e
  Workqueue: events aer_isr
  GPR24: dd6aa000 6b6b6b6b 605f8378 605f8360 d99b12c0 604fc674 606b1704 d99b12c0
  NIP [602f5328] pci_walk_bus+0xd4/0x104

[bhelgaas: changelog, stable tag]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org
2016-01-25 10:08:00 -06:00

132 lines
3.3 KiB
C

/*
* Copyright (C) 2006 Intel Corp.
* Tom Long Nguyen (tom.l.nguyen@intel.com)
* Zhang Yanmin (yanmin.zhang@intel.com)
*
*/
#ifndef _AERDRV_H_
#define _AERDRV_H_
#include <linux/workqueue.h>
#include <linux/pcieport_if.h>
#include <linux/aer.h>
#include <linux/interrupt.h>
#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
PCI_EXP_RTCTL_SENFEE| \
PCI_EXP_RTCTL_SEFEE)
#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
PCI_ERR_ROOT_CMD_NONFATAL_EN| \
PCI_ERR_ROOT_CMD_FATAL_EN)
#define ERR_COR_ID(d) (d & 0xffff)
#define ERR_UNCOR_ID(d) (d >> 16)
#define AER_ERROR_SOURCES_MAX 100
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
PCI_ERR_UNC_ECRC| \
PCI_ERR_UNC_UNSUP| \
PCI_ERR_UNC_COMP_ABORT| \
PCI_ERR_UNC_UNX_COMP| \
PCI_ERR_UNC_MALF_TLP)
#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
struct aer_err_info {
struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
int error_dev_num;
unsigned int id:16;
unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */
unsigned int __pad1:5;
unsigned int multi_error_valid:1;
unsigned int first_error:5;
unsigned int __pad2:2;
unsigned int tlp_header_valid:1;
unsigned int status; /* COR/UNCOR Error Status */
unsigned int mask; /* COR/UNCOR Error Mask */
struct aer_header_log_regs tlp; /* TLP Header */
};
struct aer_err_source {
unsigned int status;
unsigned int id;
};
struct aer_rpc {
struct pcie_device *rpd; /* Root Port device */
struct work_struct dpc_handler;
struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
unsigned short prod_idx; /* Error Producer Index */
unsigned short cons_idx; /* Error Consumer Index */
int isr;
spinlock_t e_lock; /*
* Lock access to Error Status/ID Regs
* and error producer/consumer index
*/
struct mutex rpc_mutex; /*
* only one thread could do
* recovery on the same
* root port hierarchy
*/
};
struct aer_broadcast_data {
enum pci_channel_state state;
enum pci_ers_result result;
};
static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
enum pci_ers_result new)
{
if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
return PCI_ERS_RESULT_NO_AER_DRIVER;
if (new == PCI_ERS_RESULT_NONE)
return orig;
switch (orig) {
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
orig = new;
break;
case PCI_ERS_RESULT_DISCONNECT:
if (new == PCI_ERS_RESULT_NEED_RESET)
orig = PCI_ERS_RESULT_NEED_RESET;
break;
default:
break;
}
return orig;
}
extern struct bus_type pcie_port_bus_type;
int aer_init(struct pcie_device *dev);
void aer_isr(struct work_struct *work);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
irqreturn_t aer_irq(int irq, void *context);
#ifdef CONFIG_ACPI_APEI
int pcie_aer_get_firmware_first(struct pci_dev *pci_dev);
#else
static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev)
{
if (pci_dev->__aer_firmware_first_valid)
return pci_dev->__aer_firmware_first;
return 0;
}
#endif
static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev,
int enable)
{
pci_dev->__aer_firmware_first = !!enable;
pci_dev->__aer_firmware_first_valid = 1;
}
#endif /* _AERDRV_H_ */