mtip32xx: Add SRSI support

This patch add support for SRSI(Surprise Removal Surprise Insertion).

Approach:
---------
Surprise Removal:
-----------------
On surprise removal of the device, gendisk, request queue, device index, sysfs
entries, etc are retained as long as device is in use - mounted filesystem,
device opened by an application, etc. The service thread breaks out of the main
while loop, waits for pci remove to exit, and then waits for device to become
free. When there no holders of the device, service thread cleans up the block
and device related stuff and returns.

Surprise Insertion:
-------------------
No change, this scenario follows the normal pci probe() function flow.

Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Asai Thambi S P 2013-09-11 13:14:42 -06:00 committed by Jens Axboe
parent 1b21f5b2ad
commit 8f8b899563
2 changed files with 291 additions and 184 deletions

View File

@ -126,64 +126,30 @@ struct mtip_compat_ide_task_request_s {
static bool mtip_check_surprise_removal(struct pci_dev *pdev)
{
u16 vendor_id = 0;
struct driver_data *dd = pci_get_drvdata(pdev);
if (dd->sr)
return true;
/* Read the vendorID from the configuration space */
pci_read_config_word(pdev, 0x00, &vendor_id);
if (vendor_id == 0xFFFF)
if (vendor_id == 0xFFFF) {
dd->sr = true;
if (dd->queue)
set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
else
dev_warn(&dd->pdev->dev,
"%s: dd->queue is NULL\n", __func__);
if (dd->port) {
set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
wake_up_interruptible(&dd->port->svc_wait);
} else
dev_warn(&dd->pdev->dev,
"%s: dd->port is NULL\n", __func__);
return true; /* device removed */
return false; /* device present */
}
/*
* This function is called for clean the pending command in the
* command slot during the surprise removal of device and return
* error to the upper layer.
*
* @dd Pointer to the DRIVER_DATA structure.
*
* return value
* None
*/
static void mtip_command_cleanup(struct driver_data *dd)
{
int group = 0, commandslot = 0, commandindex = 0;
struct mtip_cmd *command;
struct mtip_port *port = dd->port;
static int in_progress;
if (in_progress)
return;
in_progress = 1;
for (group = 0; group < 4; group++) {
for (commandslot = 0; commandslot < 32; commandslot++) {
if (!(port->allocated[group] & (1 << commandslot)))
continue;
commandindex = group << 5 | commandslot;
command = &port->commands[commandindex];
if (atomic_read(&command->active)
&& (command->async_callback)) {
command->async_callback(command->async_data,
-ENODEV);
command->async_callback = NULL;
command->async_data = NULL;
}
dma_unmap_sg(&port->dd->pdev->dev,
command->sg,
command->scatter_ents,
command->direction);
}
}
up(&port->cmd_slot);
set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
in_progress = 0;
return false; /* device present */
}
/*
@ -222,10 +188,7 @@ static int get_slot(struct mtip_port *port)
}
dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
if (mtip_check_surprise_removal(port->dd->pdev)) {
/* Device not present, clean outstanding commands */
mtip_command_cleanup(port->dd);
}
mtip_check_surprise_removal(port->dd->pdev);
return -1;
}
@ -245,6 +208,107 @@ static inline void release_slot(struct mtip_port *port, int tag)
smp_mb__after_clear_bit();
}
/*
* IO completion function.
*
* This completion function is called by the driver ISR when a
* command that was issued by the kernel completes. It first calls the
* asynchronous completion function which normally calls back into the block
* layer passing the asynchronous callback data, then unmaps the
* scatter list associated with the completed command, and finally
* clears the allocated bit associated with the completed command.
*
* @port Pointer to the port data structure.
* @tag Tag of the command.
* @data Pointer to driver_data.
* @status Completion status.
*
* return value
* None
*/
static void mtip_async_complete(struct mtip_port *port,
int tag,
void *data,
int status)
{
struct mtip_cmd *command;
struct driver_data *dd = data;
int cb_status = status ? -EIO : 0;
if (unlikely(!dd) || unlikely(!port))
return;
command = &port->commands[tag];
if (unlikely(status == PORT_IRQ_TF_ERR)) {
dev_warn(&port->dd->pdev->dev,
"Command tag %d failed due to TFE\n", tag);
}
/* Upper layer callback */
if (likely(command->async_callback))
command->async_callback(command->async_data, cb_status);
command->async_callback = NULL;
command->comp_func = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev,
command->sg,
command->scatter_ents,
command->direction);
/* Clear the allocated and active bits for the command */
atomic_set(&port->commands[tag].active, 0);
release_slot(port, tag);
up(&port->cmd_slot);
}
/*
* This function is called for clean the pending command in the
* command slot during the surprise removal of device and return
* error to the upper layer.
*
* @dd Pointer to the DRIVER_DATA structure.
*
* return value
* None
*/
static void mtip_command_cleanup(struct driver_data *dd)
{
int tag = 0;
struct mtip_cmd *cmd;
struct mtip_port *port = dd->port;
unsigned int num_cmd_slots = dd->slot_groups * 32;
if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
return;
if (!port)
return;
cmd = &port->commands[MTIP_TAG_INTERNAL];
if (atomic_read(&cmd->active))
if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) &
(1 << MTIP_TAG_INTERNAL))
if (cmd->comp_func)
cmd->comp_func(port, MTIP_TAG_INTERNAL,
cmd->comp_data, -ENODEV);
while (1) {
tag = find_next_bit(port->allocated, num_cmd_slots, tag);
if (tag >= num_cmd_slots)
break;
cmd = &port->commands[tag];
if (atomic_read(&cmd->active))
mtip_async_complete(port, tag, dd, -ENODEV);
}
set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
}
/*
* Reset the HBA (without sleeping)
*
@ -584,6 +648,9 @@ static void mtip_timeout_function(unsigned long int data)
if (unlikely(!port))
return;
if (unlikely(port->dd->sr))
return;
if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
mod_timer(&port->cmd_timer,
jiffies + msecs_to_jiffies(30000));
@ -674,66 +741,6 @@ static void mtip_timeout_function(unsigned long int data)
jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
}
/*
* IO completion function.
*
* This completion function is called by the driver ISR when a
* command that was issued by the kernel completes. It first calls the
* asynchronous completion function which normally calls back into the block
* layer passing the asynchronous callback data, then unmaps the
* scatter list associated with the completed command, and finally
* clears the allocated bit associated with the completed command.
*
* @port Pointer to the port data structure.
* @tag Tag of the command.
* @data Pointer to driver_data.
* @status Completion status.
*
* return value
* None
*/
static void mtip_async_complete(struct mtip_port *port,
int tag,
void *data,
int status)
{
struct mtip_cmd *command;
struct driver_data *dd = data;
int cb_status = status ? -EIO : 0;
if (unlikely(!dd) || unlikely(!port))
return;
command = &port->commands[tag];
if (unlikely(status == PORT_IRQ_TF_ERR)) {
dev_warn(&port->dd->pdev->dev,
"Command tag %d failed due to TFE\n", tag);
}
/* Upper layer callback */
if (likely(command->async_callback))
command->async_callback(command->async_data, cb_status);
command->async_callback = NULL;
command->comp_func = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev,
command->sg,
command->scatter_ents,
command->direction);
/* Clear the allocated and active bits for the command */
atomic_set(&port->commands[tag].active, 0);
release_slot(port, tag);
if (unlikely(command->unaligned))
up(&port->cmd_slot_unal);
else
up(&port->cmd_slot);
}
/*
* Internal command completion callback function.
*
@ -854,7 +861,6 @@ static void mtip_handle_tfe(struct driver_data *dd)
"Missing completion func for tag %d",
tag);
if (mtip_check_surprise_removal(dd->pdev)) {
mtip_command_cleanup(dd);
/* don't proceed further */
return;
}
@ -1018,14 +1024,12 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
command->comp_data,
0);
} else {
dev_warn(&dd->pdev->dev,
"Null completion "
"for tag %d",
dev_dbg(&dd->pdev->dev,
"Null completion for tag %d",
tag);
if (mtip_check_surprise_removal(
dd->pdev)) {
mtip_command_cleanup(dd);
return;
}
}
@ -1145,7 +1149,6 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
if (unlikely(port_stat & PORT_IRQ_ERR)) {
if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
mtip_command_cleanup(dd);
/* don't proceed further */
return IRQ_HANDLED;
}
@ -3006,6 +3009,46 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
debugfs_remove_recursive(dd->dfs_node);
}
static int mtip_free_orphan(struct driver_data *dd)
{
struct kobject *kobj;
if (dd->bdev) {
if (dd->bdev->bd_holders >= 1)
return -2;
bdput(dd->bdev);
dd->bdev = NULL;
}
mtip_hw_debugfs_exit(dd);
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index);
spin_unlock(&rssd_index_lock);
if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
put_disk(dd->disk);
} else {
if (dd->disk) {
kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
if (kobj) {
mtip_hw_sysfs_exit(dd, kobj);
kobject_put(kobj);
}
del_gendisk(dd->disk);
dd->disk = NULL;
}
if (dd->queue) {
dd->queue->queuedata = NULL;
blk_cleanup_queue(dd->queue);
dd->queue = NULL;
}
}
kfree(dd);
return 0;
}
/*
* Perform any init/resume time hardware setup
@ -3154,6 +3197,7 @@ static int mtip_service_thread(void *data)
unsigned long slot, slot_start, slot_wrap;
unsigned int num_cmd_slots = dd->slot_groups * 32;
struct mtip_port *port = dd->port;
int ret;
while (1) {
/*
@ -3164,13 +3208,18 @@ static int mtip_service_thread(void *data)
!(port->flags & MTIP_PF_PAUSE_IO));
if (kthread_should_stop())
goto st_out;
set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
/* If I am an orphan, start self cleanup */
if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
break;
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag)))
break;
goto st_out;
set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
slot = 1;
/* used to restrict the loop to one iteration */
@ -3201,7 +3250,7 @@ static int mtip_service_thread(void *data)
clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
} else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
if (!mtip_ftl_rebuild_poll(dd))
if (mtip_ftl_rebuild_poll(dd) < 0)
set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
&dd->dd_flag);
clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
@ -3209,8 +3258,30 @@ static int mtip_service_thread(void *data)
clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
break;
goto st_out;
}
/* wait for pci remove to exit */
while (1) {
if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
break;
msleep_interruptible(1000);
if (kthread_should_stop())
goto st_out;
}
while (1) {
ret = mtip_free_orphan(dd);
if (!ret) {
/* NOTE: All data structures are invalid, do not
* access any here */
return 0;
}
msleep_interruptible(1000);
if (kthread_should_stop())
goto st_out;
}
st_out:
return 0;
}
@ -3437,13 +3508,13 @@ static int mtip_hw_init(struct driver_data *dd)
rv = -EFAULT;
goto out3;
}
mtip_dump_identify(dd->port);
if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
MTIP_FTL_REBUILD_MAGIC) {
set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
return MTIP_FTL_REBUILD_MAGIC;
}
mtip_dump_identify(dd->port);
/* check write protect, over temp and rebuild statuses */
rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
@ -3467,8 +3538,8 @@ static int mtip_hw_init(struct driver_data *dd)
}
if (buf[288] == 0xBF) {
dev_info(&dd->pdev->dev,
"Drive indicates rebuild has failed.\n");
/* TODO */
"Drive is in security locked state.\n");
set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
}
}
@ -3523,9 +3594,8 @@ static int mtip_hw_exit(struct driver_data *dd)
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags))
if (!dd->sr) {
if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
if (mtip_standby_immediate(dd->port))
dev_warn(&dd->pdev->dev,
"STANDBY IMMEDIATE failed\n");
@ -3551,6 +3621,7 @@ static int mtip_hw_exit(struct driver_data *dd)
dd->port->command_list_dma);
/* Free the memory allocated for the for structure. */
kfree(dd->port);
dd->port = NULL;
return 0;
}
@ -3572,7 +3643,8 @@ static int mtip_hw_shutdown(struct driver_data *dd)
* Send standby immediate (E0h) to the drive so that it
* saves its state.
*/
mtip_standby_immediate(dd->port);
if (!dd->sr && dd->port)
mtip_standby_immediate(dd->port);
return 0;
}
@ -3887,6 +3959,10 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
bio_endio(bio, -ENODATA);
return;
}
if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
bio_endio(bio, -ENXIO);
return;
}
}
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
@ -4010,6 +4086,8 @@ static int mtip_block_initialize(struct driver_data *dd)
dd->disk->private_data = dd;
dd->index = index;
mtip_hw_debugfs_init(dd);
/*
* if rebuild pending, start the service thread, and delay the block
* queue creation and add_disk()
@ -4068,6 +4146,7 @@ static int mtip_block_initialize(struct driver_data *dd)
/* Enable the block device and add it to /dev */
add_disk(dd->disk);
dd->bdev = bdget_disk(dd->disk, 0);
/*
* Now that the disk is active, initialize any sysfs attributes
* managed by the protocol layer.
@ -4077,7 +4156,6 @@ static int mtip_block_initialize(struct driver_data *dd)
mtip_hw_sysfs_init(dd, kobj);
kobject_put(kobj);
}
mtip_hw_debugfs_init(dd);
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
@ -4103,7 +4181,8 @@ static int mtip_block_initialize(struct driver_data *dd)
return rv;
kthread_run_error:
mtip_hw_debugfs_exit(dd);
bdput(dd->bdev);
dd->bdev = NULL;
/* Delete our gendisk. This also removes the device from /dev */
del_gendisk(dd->disk);
@ -4112,6 +4191,7 @@ static int mtip_block_initialize(struct driver_data *dd)
blk_cleanup_queue(dd->queue);
block_queue_alloc_init_error:
mtip_hw_debugfs_exit(dd);
disk_index_error:
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, index);
@ -4141,40 +4221,48 @@ static int mtip_block_remove(struct driver_data *dd)
{
struct kobject *kobj;
if (dd->mtip_svc_handler) {
set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
wake_up_interruptible(&dd->port->svc_wait);
kthread_stop(dd->mtip_svc_handler);
}
if (!dd->sr) {
mtip_hw_debugfs_exit(dd);
/* Clean up the sysfs attributes, if created */
if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
if (kobj) {
mtip_hw_sysfs_exit(dd, kobj);
kobject_put(kobj);
if (dd->mtip_svc_handler) {
set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
wake_up_interruptible(&dd->port->svc_wait);
kthread_stop(dd->mtip_svc_handler);
}
/* Clean up the sysfs attributes, if created */
if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
if (kobj) {
mtip_hw_sysfs_exit(dd, kobj);
kobject_put(kobj);
}
}
/*
* Delete our gendisk structure. This also removes the device
* from /dev
*/
if (dd->bdev) {
bdput(dd->bdev);
dd->bdev = NULL;
}
if (dd->disk) {
if (dd->disk->queue) {
del_gendisk(dd->disk);
blk_cleanup_queue(dd->queue);
dd->queue = NULL;
} else
put_disk(dd->disk);
}
dd->disk = NULL;
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index);
spin_unlock(&rssd_index_lock);
} else {
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
dd->disk->disk_name);
}
mtip_hw_debugfs_exit(dd);
/*
* Delete our gendisk structure. This also removes the device
* from /dev
*/
if (dd->disk) {
if (dd->disk->queue)
del_gendisk(dd->disk);
else
put_disk(dd->disk);
}
spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index);
spin_unlock(&rssd_index_lock);
blk_cleanup_queue(dd->queue);
dd->disk = NULL;
dd->queue = NULL;
/* De-initialize the protocol layer. */
mtip_hw_exit(dd);
@ -4490,8 +4578,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
static void mtip_pci_remove(struct pci_dev *pdev)
{
struct driver_data *dd = pci_get_drvdata(pdev);
int counter = 0;
unsigned long flags;
unsigned long flags, to;
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
@ -4500,17 +4587,22 @@ static void mtip_pci_remove(struct pci_dev *pdev)
list_add(&dd->remove_list, &removing_list);
spin_unlock_irqrestore(&dev_lock, flags);
if (mtip_check_surprise_removal(pdev)) {
while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
counter++;
msleep(20);
if (counter == 10) {
/* Cleanup the outstanding commands */
mtip_command_cleanup(dd);
break;
}
}
mtip_check_surprise_removal(pdev);
synchronize_irq(dd->pdev->irq);
/* Spin until workers are done */
to = jiffies + msecs_to_jiffies(4000);
do {
msleep(20);
} while (atomic_read(&dd->irq_workers_active) != 0 &&
time_before(jiffies, to));
if (atomic_read(&dd->irq_workers_active) != 0) {
dev_warn(&dd->pdev->dev,
"Completion workers still active!\n");
}
/* Cleanup the outstanding commands */
mtip_command_cleanup(dd);
/* Clean up the block layer. */
mtip_block_remove(dd);
@ -4529,8 +4621,15 @@ static void mtip_pci_remove(struct pci_dev *pdev)
list_del_init(&dd->remove_list);
spin_unlock_irqrestore(&dev_lock, flags);
kfree(dd);
if (!dd->sr)
kfree(dd);
else
set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
pci_dev_put(pdev);
}
/*

View File

@ -140,6 +140,7 @@ enum {
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6,
MTIP_PF_SR_CLEANUP_BIT = 7,
MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */
@ -147,15 +148,18 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT)),
MTIP_DDF_REMOVE_DONE_BIT = 4,
MTIP_DDF_CLEANUP_BIT = 5,
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
MTIP_DDF_REBUILD_FAILED_BIT = 8,
MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
(1 << MTIP_DDF_SEC_LOCK_BIT) |
(1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT) |
(1 << MTIP_DDF_REBUILD_FAILED_BIT)),
};
struct smart_attr {
@ -499,6 +503,8 @@ struct driver_data {
bool trim_supp; /* flag indicating trim support */
bool sr;
int numa_node; /* NUMA support */
char workq_name[32];
@ -511,6 +517,8 @@ struct driver_data {
int isr_binding;
struct block_device *bdev;
int unal_qdepth; /* qdepth of unaligned IO queue */
struct list_head online_list; /* linkage for online list */