1
0

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

* 'for-linus' of git://git.kernel.dk/linux-block: (23 commits)
  Revert "cfq: Remove special treatment for metadata rqs."
  block: fix flush machinery for stacking drivers with differring flush flags
  block: improve rq_affinity placement
  blktrace: add FLUSH/FUA support
  Move some REQ flags to the common bio/request area
  allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH
  xen/blkback: Make description more obvious.
  cfq-iosched: Add documentation about idling
  block: Make rq_affinity = 1 work as expected
  block: swim3: fix unterminated of_device_id table
  block/genhd.c: remove useless cast in diskstats_show()
  drivers/cdrom/cdrom.c: relax check on dvd manufacturer value
  drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse
  bsg-lib: add module.h include
  cfq-iosched: Reduce linked group count upon group destruction
  blk-throttle: correctly determine sync bio
  loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other
  loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices
  loop: add management interface for on-demand device allocation
  loop: replace linked list of allocated devices with an idr index
  ...
This commit is contained in:
Linus Torvalds
2011-08-19 10:47:07 -07:00
26 changed files with 801 additions and 136 deletions

View File

@@ -65,6 +65,16 @@ config BLK_DEV_BSG
If unsure, say Y.
config BLK_DEV_BSGLIB
bool "Block layer SG support v4 helper lib"
default n
select BLK_DEV_BSG
help
Subsystems will normally enable this if needed. Users will not
normally need to manually enable this.
If unsure, say N.
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
---help---

View File

@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o

View File

@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
int where = ELEVATOR_INSERT_BACK;
if (blk_rq_check_limits(q, rq))
return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
where = ELEVATOR_INSERT_FLUSH;
add_acct_request(q, rq, where);
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
* %false - we are done with this request
* %true - still buffers pending for this request
**/
static bool __blk_end_bidi_request(struct request *rq, int error,
bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))

View File

@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
{
unsigned int policy = 0;
if (blk_rq_sectors(rq))
policy |= REQ_FSEQ_DATA;
if (fflags & REQ_FLUSH) {
if (rq->cmd_flags & REQ_FLUSH)
policy |= REQ_FSEQ_PREFLUSH;
if (blk_rq_sectors(rq))
policy |= REQ_FSEQ_DATA;
if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
policy |= REQ_FSEQ_POSTFLUSH;
}
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
/* make @rq a normal request */
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
rq->end_io = NULL;
rq->end_io = rq->flush.saved_end_io;
}
/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
unsigned int fflags = q->flush_flags; /* may change, cache */
unsigned int policy = blk_flush_policy(fflags, rq);
BUG_ON(rq->end_io);
BUG_ON(!rq->bio || rq->bio != rq->biotail);
/*
* @policy now records what operations need to be done. Adjust
* REQ_FLUSH and FUA for the driver.
@@ -311,6 +309,19 @@ void blk_insert_flush(struct request *rq)
if (!(fflags & REQ_FUA))
rq->cmd_flags &= ~REQ_FUA;
/*
* An empty flush handed down from a stacking driver may
* translate into nothing if the underlying device does not
* advertise a write-back cache. In this case, simply
* complete the request.
*/
if (!policy) {
__blk_end_bidi_request(rq, 0, 0, 0);
return;
}
BUG_ON(!rq->bio || rq->bio != rq->biotail);
/*
* If there's data but flush is not necessary, the request can be
* processed directly without going through flush machinery. Queue
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
blk_run_queue_async(q);
return;
}
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
memset(&rq->flush, 0, sizeof(rq->flush));
INIT_LIST_HEAD(&rq->flush.list);
rq->cmd_flags |= REQ_FLUSH_SEQ;
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
rq->end_io = flush_data_end_io;
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);

View File

@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
} else
ccpu = cpu;
/*
* If current CPU and requested CPU are in the same group, running
* softirq in current CPU. One might concern this is just like
* QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
* running in interrupt handler, and currently I/O controller doesn't
* support multiple interrupts, so current CPU is unique actually. This
* avoids IPI sending from current CPU to the first CPU of a group.
*/
if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:

View File

@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
bool sync = bio->bi_rw & REQ_SYNC;
bool sync = rw_is_sync(bio->bi_rw);
/* Charge the bio to the group */
tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
if (tg_no_rule_group(tg, rw)) {
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
rw, bio->bi_rw & REQ_SYNC);
rw, rw_is_sync(bio->bi_rw));
rcu_read_unlock();
return 0;
}

View File

@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);

298
block/bsg-lib.c Normal file
View File

@@ -0,0 +1,298 @@
/*
* BSG helper library
*
* Copyright (C) 2008 James Smart, Emulex Corporation
* Copyright (C) 2011 Red Hat, Inc. All rights reserved.
* Copyright (C) 2011 Mike Christie
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/bsg-lib.h>
#include <linux/module.h>
#include <scsi/scsi_cmnd.h>
/**
* bsg_destroy_job - routine to teardown/delete a bsg job
* @job: bsg_job that is to be torn down
*/
static void bsg_destroy_job(struct bsg_job *job)
{
put_device(job->dev); /* release reference for the request */
kfree(job->request_payload.sg_list);
kfree(job->reply_payload.sg_list);
kfree(job);
}
/**
* bsg_job_done - completion routine for bsg requests
* @job: bsg_job that is complete
* @result: job reply result
* @reply_payload_rcv_len: length of payload recvd
*
* The LLD should call this when the bsg job has completed.
*/
void bsg_job_done(struct bsg_job *job, int result,
unsigned int reply_payload_rcv_len)
{
struct request *req = job->req;
struct request *rsp = req->next_rq;
int err;
err = job->req->errors = result;
if (err < 0)
/* we're only returning the result field in the reply */
job->req->sense_len = sizeof(u32);
else
job->req->sense_len = job->reply_len;
/* we assume all request payload was transferred, residual == 0 */
req->resid_len = 0;
if (rsp) {
WARN_ON(reply_payload_rcv_len > rsp->resid_len);
/* set reply (bidi) residual */
rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
}
blk_complete_request(req);
}
EXPORT_SYMBOL_GPL(bsg_job_done);
/**
* bsg_softirq_done - softirq done routine for destroying the bsg requests
* @rq: BSG request that holds the job to be destroyed
*/
static void bsg_softirq_done(struct request *rq)
{
struct bsg_job *job = rq->special;
blk_end_request_all(rq, rq->errors);
bsg_destroy_job(job);
}
static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
{
size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
BUG_ON(!req->nr_phys_segments);
buf->sg_list = kzalloc(sz, GFP_KERNEL);
if (!buf->sg_list)
return -ENOMEM;
sg_init_table(buf->sg_list, req->nr_phys_segments);
buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
buf->payload_len = blk_rq_bytes(req);
return 0;
}
/**
* bsg_create_job - create the bsg_job structure for the bsg request
* @dev: device that is being sent the bsg request
* @req: BSG request that needs a job structure
*/
static int bsg_create_job(struct device *dev, struct request *req)
{
struct request *rsp = req->next_rq;
struct request_queue *q = req->q;
struct bsg_job *job;
int ret;
BUG_ON(req->special);
job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
if (!job)
return -ENOMEM;
req->special = job;
job->req = req;
if (q->bsg_job_size)
job->dd_data = (void *)&job[1];
job->request = req->cmd;
job->request_len = req->cmd_len;
job->reply = req->sense;
job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
* allocated */
if (req->bio) {
ret = bsg_map_buffer(&job->request_payload, req);
if (ret)
goto failjob_rls_job;
}
if (rsp && rsp->bio) {
ret = bsg_map_buffer(&job->reply_payload, rsp);
if (ret)
goto failjob_rls_rqst_payload;
}
job->dev = dev;
/* take a reference for the request */
get_device(job->dev);
return 0;
failjob_rls_rqst_payload:
kfree(job->request_payload.sg_list);
failjob_rls_job:
kfree(job);
return -ENOMEM;
}
/*
* bsg_goose_queue - restart queue in case it was stopped
* @q: request q to be restarted
*/
void bsg_goose_queue(struct request_queue *q)
{
if (!q)
return;
blk_run_queue_async(q);
}
EXPORT_SYMBOL_GPL(bsg_goose_queue);
/**
* bsg_request_fn - generic handler for bsg requests
* @q: request queue to manage
*
* On error the create_bsg_job function should return a -Exyz error value
* that will be set to the req->errors.
*
* Drivers/subsys should pass this to the queue init function.
*/
void bsg_request_fn(struct request_queue *q)
{
struct device *dev = q->queuedata;
struct request *req;
struct bsg_job *job;
int ret;
if (!get_device(dev))
return;
while (1) {
req = blk_fetch_request(q);
if (!req)
break;
spin_unlock_irq(q->queue_lock);
ret = bsg_create_job(dev, req);
if (ret) {
req->errors = ret;
blk_end_request_all(req, ret);
spin_lock_irq(q->queue_lock);
continue;
}
job = req->special;
ret = q->bsg_job_fn(job);
spin_lock_irq(q->queue_lock);
if (ret)
break;
}
spin_unlock_irq(q->queue_lock);
put_device(dev);
spin_lock_irq(q->queue_lock);
}
EXPORT_SYMBOL_GPL(bsg_request_fn);
/**
* bsg_setup_queue - Create and add the bsg hooks so we can receive requests
* @dev: device to attach bsg device to
* @q: request queue setup by caller
* @name: device to give bsg device
* @job_fn: bsg job handler
* @dd_job_size: size of LLD data needed for each job
*
* The caller should have setup the reuqest queue with bsg_request_fn
* as the request_fn.
*/
int bsg_setup_queue(struct device *dev, struct request_queue *q,
char *name, bsg_job_fn *job_fn, int dd_job_size)
{
int ret;
q->queuedata = dev;
q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
ret = bsg_register_queue(q, dev, name, NULL);
if (ret) {
printk(KERN_ERR "%s: bsg interface failed to "
"initialize - register queue\n", dev->kobj.name);
return ret;
}
return 0;
}
EXPORT_SYMBOL_GPL(bsg_setup_queue);
/**
* bsg_remove_queue - Deletes the bsg dev from the q
* @q: the request_queue that is to be torn down.
*
* Notes:
* Before unregistering the queue empty any requests that are blocked
*/
void bsg_remove_queue(struct request_queue *q)
{
struct request *req; /* block request */
int counts; /* totals for request_list count and starved */
if (!q)
return;
/* Stop taking in new requests */
spin_lock_irq(q->queue_lock);
blk_stop_queue(q);
/* drain all requests in the queue */
while (1) {
/* need the lock to fetch a request
* this may fetch the same reqeust as the previous pass
*/
req = blk_fetch_request(q);
/* save requests in use and starved */
counts = q->rq.count[0] + q->rq.count[1] +
q->rq.starved[0] + q->rq.starved[1];
spin_unlock_irq(q->queue_lock);
/* any requests still outstanding? */
if (counts == 0)
break;
/* This may be the same req as the previous iteration,
* always send the blk_end_request_all after a prefetch.
* It is not okay to not end the request because the
* prefetch started the request.
*/
if (req) {
/* return -ENXIO to indicate that this queue is
* going away
*/
req->errors = -ENXIO;
blk_end_request_all(req, -ENXIO);
}
msleep(200); /* allow bsg to possibly finish */
spin_lock_irq(q->queue_lock);
}
bsg_unregister_queue(q);
}
EXPORT_SYMBOL_GPL(bsg_remove_queue);

View File

@@ -130,6 +130,8 @@ struct cfq_queue {
unsigned long slice_end;
long slice_resid;
/* pending metadata requests */
int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
if (rq_is_sync(rq1) != rq_is_sync(rq2))
return rq_is_sync(rq1) ? rq1 : rq2;
if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
return rq1->cmd_flags & REQ_META ? rq1 : rq2;
s1 = blk_rq_pos(rq1);
s2 = blk_rq_pos(rq2);
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
hlist_del_init(&cfqg->cfqd_node);
BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
cfqd->nr_blkcg_linked_grps--;
/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
cfqq->cfqd->rq_queued--;
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(rq), rq_is_sync(rq));
if (rq->cmd_flags & REQ_META) {
WARN_ON(!cfqq->meta_pending);
cfqq->meta_pending--;
}
}
static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3356,6 +3368,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
RB_EMPTY_ROOT(&cfqq->sort_list))
return true;
/*
* So both queues are sync. Let the new request get disk time if
* it's a metadata request and the current queue is doing regular IO.
*/
if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
return true;
/*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
*/
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
if (rq->cmd_flags & REQ_META)
cfqq->meta_pending++;
cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);

View File

@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
cpu = part_stat_lock();
part_round_stats(cpu, hd);
part_stat_unlock();
seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
"%u %lu %lu %llu %u %u %u %u\n",
seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
"%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[READ]),
part_stat_read(hd, merges[READ]),
(unsigned long long)part_stat_read(hd, sectors[READ]),
part_stat_read(hd, sectors[READ]),
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
part_stat_read(hd, ios[WRITE]),
part_stat_read(hd, merges[WRITE]),
(unsigned long long)part_stat_read(hd, sectors[WRITE]),
part_stat_read(hd, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
part_in_flight(hd),
jiffies_to_msecs(part_stat_read(hd, io_ticks)),