From: Pierre Habouzit Date: Sat, 14 Jan 2012 20:36:45 +0000 (+0100) Subject: rework the quarantine: it doesn't really need to be accounted. X-Git-Url: http://git.madism.org/?p=~madcoder%2Fpwqr.git;a=commitdiff_plain;h=170387f1b2fdd96e6271d658161926086a9c2586;hp=b29ccbcc4ab80bc7e7b2131349459cc9e96d49ef rework the quarantine: it doesn't really need to be accounted. The notion of quarantine is purely virtual and we only care about it at WAKE time. It significantly simplifies the fastpath of our code, namely __pwqr_sb_update_state. Also allow the WAKE command to directly unpark threads if that fits with the concurrency level. I don't really expect userland to really use that, but it doesn't break anything and makes sense. Signed-off-by: Pierre Habouzit --- diff --git a/Documentation/pwqr.adoc b/Documentation/pwqr.adoc index 067b2ae..0c04746 100644 --- a/Documentation/pwqr.adoc +++ b/Documentation/pwqr.adoc @@ -30,7 +30,8 @@ quarantined:: + This state avoids waking a thread to force userland to "park" the thread, this is racy, make the scheduler work for nothing useful. Though if `PWQR_WAKE` is -called, quarantined threads are woken but with a `EDQUOT` errno set. +called, quarantined threads are woken but with a `EDQUOT` errno set, and only +one by one, no matter how wakes have been asked. + This state actually has only one impact: when `PWQR_WAKE` is called for more than one threads, for example 4, and that userland knows that there is 5 @@ -226,13 +227,27 @@ Valid values for the `op` argument are: `PWQR_WAKE`:: Tries to wake `val` threads from the pool. This is done according to - the current concurrency level not to overcommit. On success, the - number of woken threads is returned, it can be 0. + the current concurrency level not to overcommit. On success, a hint of + the number of woken threads is returned, it can be 0. ++ +This is only a hint of the number of threads woken up for two reasons. First, +the kernel could really have woken up a thread, but when it becomes scheduled, +it could *then* decide that it would overcommit (because some other thread +unblocked inbetween for example), and block it again. ++ +But it can also lie in the other direction: userland is supposed to account +for waiting threads. So when we're overcommiting and userland want a waiting +thread to be unblocked, we actually say we woke none, but still unblock one +(the famous quarantined threads we talk about above). This allow the userland +counter of waiting threads to decrease, but we know the thread won't be usable +so we return 0. `PWQR_WAKE_OC`:: Tries to wake `val` threads from the pool. This is done bypassing the current concurrency level (`OC` stands for `OVERCOMMIT`). On success, - the number of woken threads is returned, it can be 0. + the number of woken threads is returned, it can be 0, but it's the + real count that has been (or will soon be) woken up. If it's less than + required, it's because there aren't enough parked threads. `PWQR_WAIT`:: Puts the thread to wait for a future `PWQR_WAKE` command. If this diff --git a/kernel/pwqr.c b/kernel/pwqr.c index e4e3f58..8171596 100644 --- a/kernel/pwqr.c +++ b/kernel/pwqr.c @@ -60,7 +60,6 @@ struct pwqr_sb { unsigned running; unsigned waiting; - unsigned quarantined; unsigned parked; unsigned overcommit_wakes; @@ -96,36 +95,16 @@ static struct preempt_ops pwqr_preempt_noop_ops; static inline void __pwqr_sb_update_state(struct pwqr_sb *sb, int running_delta) { - int overcommit; - sb->running += running_delta; - overcommit = sb->running + sb->waiting - sb->concurrency; - if (overcommit == 0) { + if (sb->running > sb->concurrency) { + /* TODO see ../Documentation/pwqr.adoc */ + } else if (sb->running == sb->concurrency) { /* do nothing */ - } else if (overcommit > 0) { - if (overcommit > sb->waiting) { - sb->quarantined += sb->waiting; - sb->waiting = 0; - } else { - sb->quarantined += overcommit; - sb->waiting -= overcommit; - } - } else { - unsigned undercommit = -overcommit; - - if (undercommit < sb->quarantined) { - sb->waiting += undercommit; - sb->quarantined -= undercommit; - } else if (sb->quarantined) { - sb->waiting += sb->quarantined; - sb->quarantined = 0; - } else if (sb->waiting == 0 && sb->parked) { - if (!timer_pending(&sb->timer)) { - mod_timer(&sb->timer, jiffies + - PWQR_UNPARK_DELAY); - } - return; + } else if (sb->waiting == 0 && sb->parked) { + if (!timer_pending(&sb->timer)) { + mod_timer(&sb->timer, jiffies + PWQR_UNPARK_DELAY); } + return; } if (timer_pending(&sb->timer)) @@ -380,7 +359,7 @@ static int pwqr_release(struct inode *inode, struct file *filp) static long do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, - int in_pool, struct pwqr_ioc_wait __user *arg) + int is_wait, struct pwqr_ioc_wait __user *arg) { unsigned long flags; struct pwqr_ioc_wait wait; @@ -389,14 +368,14 @@ do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, preempt_notifier_unregister(&pwqt->notifier); - if (in_pool && copy_from_user(&wait, arg, sizeof(wait))) { + if (is_wait && copy_from_user(&wait, arg, sizeof(wait))) { rc = -EFAULT; goto out; } pwqr_sb_lock_irqsave(sb, flags); if (sb->running + sb->waiting <= sb->concurrency) { - if (in_pool) { + if (is_wait) { while (probe_kernel_address(wait.pwqr_uaddr, uval)) { pwqr_sb_unlock_irqrestore(sb, flags); rc = get_user(uval, (u32 *)wait.pwqr_uaddr); @@ -410,7 +389,6 @@ do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, goto out_unlock; } } else { - BUG_ON(sb->quarantined != 0); goto out_unlock; } } @@ -421,7 +399,7 @@ do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, __wait.flags |= WQ_FLAG_EXCLUSIVE; - if (in_pool) { + if (is_wait) { sb->waiting++; __add_wait_queue(&sb->wqh, &__wait); } else { @@ -441,7 +419,7 @@ do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, spin_unlock_irq(&sb->wqh.lock); schedule(); spin_lock_irq(&sb->wqh.lock); - if (in_pool && sb->waiting) + if (is_wait) break; if (sb->running + sb->waiting < sb->concurrency) break; @@ -450,13 +428,8 @@ do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt, __remove_wait_queue(&sb->wqh, &__wait); __set_current_state(TASK_RUNNING); - if (in_pool) { - if (sb->waiting) { - sb->waiting--; - } else { - BUG_ON(!sb->quarantined); - sb->quarantined--; - } + if (is_wait) { + sb->waiting--; } else { sb->parked--; } @@ -515,7 +488,7 @@ static long do_pwqr_wake(struct pwqr_sb *sb, int oc, int count) pwqr_sb_lock_irqsave(sb, flags); if (oc) { - nwake = sb->waiting + sb->quarantined + sb->parked - sb->overcommit_wakes; + nwake = sb->waiting + sb->parked - sb->overcommit_wakes; if (count > nwake) { count = nwake; } else { @@ -524,6 +497,10 @@ static long do_pwqr_wake(struct pwqr_sb *sb, int oc, int count) sb->overcommit_wakes += count; } else if (sb->running + sb->overcommit_wakes < sb->concurrency) { nwake = sb->concurrency - sb->overcommit_wakes - sb->running; + if (nwake > sb->waiting + sb->parked - sb->overcommit_wakes) { + nwake = sb->waiting + sb->parked - + sb->overcommit_wakes; + } if (count > nwake) { count = nwake; } else { @@ -531,23 +508,25 @@ static long do_pwqr_wake(struct pwqr_sb *sb, int oc, int count) } } else { /* - * This codepath deserves an explanation: when the thread is - * quarantined, for us, really, it's already "parked". Though - * userland doesn't know about, so wake as many threads as - * userlands would have liked to, and let the wakeup tell - * userland those should be parked. + * This codepath deserves an explanation: waking the thread + * "for real" would overcommit, though userspace KNOWS there + * is at least one waiting thread. Such threads are threads + * that are "quarantined". + * + * Quarantined threads are woken up one by one, to allow a + * slow ramp down, trying to minimize "waiting" <-> "parked" + * flip-flops, no matter how many wakes have been asked. * - * That's why we lie about the number of woken threads, - * really, userlandwise we woke up a thread so that it could - * be parked for real and avoid spurious syscalls. So it's as - * if we woke up 0 threads. + * Since releasing one quarantined thread will wake up a + * thread that will (almost) straight go to parked mode, lie + * to userland about the fact that we unblocked that thread, + * and return 0. + * + * Though if we're already waking all waiting threads for + * overcommitting jobs, well, we don't need that. */ - nwake = sb->quarantined; - if (sb->waiting < sb->overcommit_wakes) - nwake -= sb->overcommit_wakes - sb->waiting; - if (nwake > count) - nwake = count; count = 0; + nwake = sb->waiting > sb->overcommit_wakes; } while (nwake-- > 0) wake_up_locked(&sb->wqh);