mirror of
https://kernel.googlesource.com/pub/scm/linux/kernel/git/stable/linux-stable.git
synced 2025-09-13 11:07:46 +10:00
lib/sbitmap: convert shallow_depth from one word to the whole sbitmap
[ Upstream commit 42e6c6ce03
]
Currently elevators will record internal 'async_depth' to throttle
asynchronous requests, and they both calculate shallow_dpeth based on
sb->shift, with the respect that sb->shift is the available tags in one
word.
However, sb->shift is not the availbale tags in the last word, see
__map_depth:
if (index == sb->map_nr - 1)
return sb->depth - (index << sb->shift);
For consequence, if the last word is used, more tags can be get than
expected, for example, assume nr_requests=256 and there are four words,
in the worst case if user set nr_requests=32, then the first word is
the last word, and still use bits per word, which is 64, to calculate
async_depth is wrong.
One the ohter hand, due to cgroup qos, bfq can allow only one request
to be allocated, and set shallow_dpeth=1 will still allow the number
of words request to be allocated.
Fix this problems by using shallow_depth to the whole sbitmap instead
of per word, also change kyber, mq-deadline and bfq to follow this,
a new helper __map_depth_with_shallow() is introduced to calculate
available bits in each word.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
4191feb410
commit
ed30c38d1e
@ -701,17 +701,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
|||||||
{
|
{
|
||||||
struct bfq_data *bfqd = data->q->elevator->elevator_data;
|
struct bfq_data *bfqd = data->q->elevator->elevator_data;
|
||||||
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
|
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
|
||||||
int depth;
|
unsigned int limit, act_idx;
|
||||||
unsigned limit = data->q->nr_requests;
|
|
||||||
unsigned int act_idx;
|
|
||||||
|
|
||||||
/* Sync reads have full depth available */
|
/* Sync reads have full depth available */
|
||||||
if (op_is_sync(opf) && !op_is_write(opf)) {
|
if (op_is_sync(opf) && !op_is_write(opf))
|
||||||
depth = 0;
|
limit = data->q->nr_requests;
|
||||||
} else {
|
else
|
||||||
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
|
limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
|
||||||
limit = (limit * depth) >> bfqd->full_depth_shift;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
|
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
|
||||||
/* Fast path to check if bfqq is already allocated. */
|
/* Fast path to check if bfqq is already allocated. */
|
||||||
@ -725,14 +721,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
|||||||
* available requests and thus starve other entities.
|
* available requests and thus starve other entities.
|
||||||
*/
|
*/
|
||||||
if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
|
if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
|
||||||
depth = 1;
|
limit = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
|
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
|
||||||
__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
|
__func__, bfqd->wr_busy_queues, op_is_sync(opf), limit);
|
||||||
if (depth)
|
|
||||||
data->shallow_depth = depth;
|
if (limit < data->q->nr_requests)
|
||||||
|
data->shallow_depth = limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bfq_queue *
|
static struct bfq_queue *
|
||||||
@ -7128,9 +7126,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
|
|||||||
*/
|
*/
|
||||||
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
||||||
{
|
{
|
||||||
unsigned int depth = 1U << bt->sb.shift;
|
unsigned int nr_requests = bfqd->queue->nr_requests;
|
||||||
|
|
||||||
bfqd->full_depth_shift = bt->sb.shift;
|
|
||||||
/*
|
/*
|
||||||
* In-word depths if no bfq_queue is being weight-raised:
|
* In-word depths if no bfq_queue is being weight-raised:
|
||||||
* leaving 25% of tags only for sync reads.
|
* leaving 25% of tags only for sync reads.
|
||||||
@ -7142,13 +7139,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
|||||||
* limit 'something'.
|
* limit 'something'.
|
||||||
*/
|
*/
|
||||||
/* no more than 50% of tags for async I/O */
|
/* no more than 50% of tags for async I/O */
|
||||||
bfqd->word_depths[0][0] = max(depth >> 1, 1U);
|
bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U);
|
||||||
/*
|
/*
|
||||||
* no more than 75% of tags for sync writes (25% extra tags
|
* no more than 75% of tags for sync writes (25% extra tags
|
||||||
* w.r.t. async I/O, to prevent async I/O from starving sync
|
* w.r.t. async I/O, to prevent async I/O from starving sync
|
||||||
* writes)
|
* writes)
|
||||||
*/
|
*/
|
||||||
bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U);
|
bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In-word depths in case some bfq_queue is being weight-
|
* In-word depths in case some bfq_queue is being weight-
|
||||||
@ -7158,9 +7155,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
|||||||
* shortage.
|
* shortage.
|
||||||
*/
|
*/
|
||||||
/* no more than ~18% of tags for async I/O */
|
/* no more than ~18% of tags for async I/O */
|
||||||
bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U);
|
bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U);
|
||||||
/* no more than ~37% of tags for sync writes (~20% extra tags) */
|
/* no more than ~37% of tags for sync writes (~20% extra tags) */
|
||||||
bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U);
|
bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
|
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||||
|
@ -813,8 +813,7 @@ struct bfq_data {
|
|||||||
* Depth limits used in bfq_limit_depth (see comments on the
|
* Depth limits used in bfq_limit_depth (see comments on the
|
||||||
* function)
|
* function)
|
||||||
*/
|
*/
|
||||||
unsigned int word_depths[2][2];
|
unsigned int async_depths[2][2];
|
||||||
unsigned int full_depth_shift;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of independent actuators. This is equal to 1 in
|
* Number of independent actuators. This is equal to 1 in
|
||||||
|
@ -157,10 +157,7 @@ struct kyber_queue_data {
|
|||||||
*/
|
*/
|
||||||
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
|
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
|
||||||
|
|
||||||
/*
|
/* Number of allowed async requests. */
|
||||||
* Async request percentage, converted to per-word depth for
|
|
||||||
* sbitmap_get_shallow().
|
|
||||||
*/
|
|
||||||
unsigned int async_depth;
|
unsigned int async_depth;
|
||||||
|
|
||||||
struct kyber_cpu_latency __percpu *cpu_latency;
|
struct kyber_cpu_latency __percpu *cpu_latency;
|
||||||
@ -454,10 +451,8 @@ static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
|
|||||||
{
|
{
|
||||||
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
|
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
|
||||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||||
unsigned int shift = tags->bitmap_tags.sb.shift;
|
|
||||||
|
|
||||||
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
|
|
||||||
|
|
||||||
|
kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U;
|
||||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
|
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -487,20 +487,6 @@ unlock:
|
|||||||
return rq;
|
return rq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* 'depth' is a number in the range 1..INT_MAX representing a number of
|
|
||||||
* requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
|
|
||||||
* 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
|
|
||||||
* Values larger than q->nr_requests have the same effect as q->nr_requests.
|
|
||||||
*/
|
|
||||||
static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
|
|
||||||
{
|
|
||||||
struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
|
|
||||||
const unsigned int nrr = hctx->queue->nr_requests;
|
|
||||||
|
|
||||||
return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
|
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
|
||||||
* function is used by __blk_mq_get_tag().
|
* function is used by __blk_mq_get_tag().
|
||||||
@ -517,7 +503,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
|||||||
* Throttle asynchronous requests and writes such that these requests
|
* Throttle asynchronous requests and writes such that these requests
|
||||||
* do not block the allocation of synchronous requests.
|
* do not block the allocation of synchronous requests.
|
||||||
*/
|
*/
|
||||||
data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
|
data->shallow_depth = dd->async_depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Called by blk_mq_update_nr_requests(). */
|
/* Called by blk_mq_update_nr_requests(). */
|
||||||
|
@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb);
|
|||||||
* sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
|
* sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
|
||||||
* limiting the depth used from each word.
|
* limiting the depth used from each word.
|
||||||
* @sb: Bitmap to allocate from.
|
* @sb: Bitmap to allocate from.
|
||||||
* @shallow_depth: The maximum number of bits to allocate from a single word.
|
* @shallow_depth: The maximum number of bits to allocate from the bitmap.
|
||||||
*
|
*
|
||||||
* This rather specific operation allows for having multiple users with
|
* This rather specific operation allows for having multiple users with
|
||||||
* different allocation limits. E.g., there can be a high-priority class that
|
* different allocation limits. E.g., there can be a high-priority class that
|
||||||
* uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
|
* uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
|
||||||
* with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
|
* with a @shallow_depth of (sb->depth >> 1). Then, the low-priority
|
||||||
* class can only allocate half of the total bits in the bitmap, preventing it
|
* class can only allocate half of the total bits in the bitmap, preventing it
|
||||||
* from starving out the high-priority class.
|
* from starving out the high-priority class.
|
||||||
*
|
*
|
||||||
@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
|
|||||||
* sbitmap_queue, limiting the depth used from each word, with preemption
|
* sbitmap_queue, limiting the depth used from each word, with preemption
|
||||||
* already disabled.
|
* already disabled.
|
||||||
* @sbq: Bitmap queue to allocate from.
|
* @sbq: Bitmap queue to allocate from.
|
||||||
* @shallow_depth: The maximum number of bits to allocate from a single word.
|
* @shallow_depth: The maximum number of bits to allocate from the queue.
|
||||||
* See sbitmap_get_shallow().
|
* See sbitmap_get_shallow().
|
||||||
*
|
*
|
||||||
* If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
|
* If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
|
||||||
|
@ -208,8 +208,28 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
|
|||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int __map_depth_with_shallow(const struct sbitmap *sb,
|
||||||
|
int index,
|
||||||
|
unsigned int shallow_depth)
|
||||||
|
{
|
||||||
|
u64 shallow_word_depth;
|
||||||
|
unsigned int word_depth, reminder;
|
||||||
|
|
||||||
|
word_depth = __map_depth(sb, index);
|
||||||
|
if (shallow_depth >= sb->depth)
|
||||||
|
return word_depth;
|
||||||
|
|
||||||
|
shallow_word_depth = word_depth * shallow_depth;
|
||||||
|
reminder = do_div(shallow_word_depth, sb->depth);
|
||||||
|
|
||||||
|
if (reminder >= (index + 1) * word_depth)
|
||||||
|
shallow_word_depth++;
|
||||||
|
|
||||||
|
return (unsigned int)shallow_word_depth;
|
||||||
|
}
|
||||||
|
|
||||||
static int sbitmap_find_bit(struct sbitmap *sb,
|
static int sbitmap_find_bit(struct sbitmap *sb,
|
||||||
unsigned int depth,
|
unsigned int shallow_depth,
|
||||||
unsigned int index,
|
unsigned int index,
|
||||||
unsigned int alloc_hint,
|
unsigned int alloc_hint,
|
||||||
bool wrap)
|
bool wrap)
|
||||||
@ -218,12 +238,12 @@ static int sbitmap_find_bit(struct sbitmap *sb,
|
|||||||
int nr = -1;
|
int nr = -1;
|
||||||
|
|
||||||
for (i = 0; i < sb->map_nr; i++) {
|
for (i = 0; i < sb->map_nr; i++) {
|
||||||
nr = sbitmap_find_bit_in_word(&sb->map[index],
|
unsigned int depth = __map_depth_with_shallow(sb, index,
|
||||||
min_t(unsigned int,
|
shallow_depth);
|
||||||
__map_depth(sb, index),
|
|
||||||
depth),
|
|
||||||
alloc_hint, wrap);
|
|
||||||
|
|
||||||
|
if (depth)
|
||||||
|
nr = sbitmap_find_bit_in_word(&sb->map[index], depth,
|
||||||
|
alloc_hint, wrap);
|
||||||
if (nr != -1) {
|
if (nr != -1) {
|
||||||
nr += index << sb->shift;
|
nr += index << sb->shift;
|
||||||
break;
|
break;
|
||||||
@ -406,27 +426,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
|
|||||||
static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
|
static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
|
||||||
unsigned int depth)
|
unsigned int depth)
|
||||||
{
|
{
|
||||||
unsigned int wake_batch;
|
return clamp_t(unsigned int,
|
||||||
unsigned int shallow_depth;
|
min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES,
|
||||||
|
1, SBQ_WAKE_BATCH);
|
||||||
/*
|
|
||||||
* Each full word of the bitmap has bits_per_word bits, and there might
|
|
||||||
* be a partial word. There are depth / bits_per_word full words and
|
|
||||||
* depth % bits_per_word bits left over. In bitwise arithmetic:
|
|
||||||
*
|
|
||||||
* bits_per_word = 1 << shift
|
|
||||||
* depth / bits_per_word = depth >> shift
|
|
||||||
* depth % bits_per_word = depth & ((1 << shift) - 1)
|
|
||||||
*
|
|
||||||
* Each word can be limited to sbq->min_shallow_depth bits.
|
|
||||||
*/
|
|
||||||
shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
|
|
||||||
depth = ((depth >> sbq->sb.shift) * shallow_depth +
|
|
||||||
min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
|
|
||||||
wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
|
|
||||||
SBQ_WAKE_BATCH);
|
|
||||||
|
|
||||||
return wake_batch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
|
int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
|
||||||
|
Loading…
Reference in New Issue
Block a user