+
This state avoids waking a thread to force userland to "park" the thread, this
is racy, make the scheduler work for nothing useful. Though if `PWQR_WAKE` is
-called, quarantined threads are woken but with a `EDQUOT` errno set.
+called, quarantined threads are woken but with a `EDQUOT` errno set, and only
+one by one, no matter how wakes have been asked.
+
This state actually has only one impact: when `PWQR_WAKE` is called for more
than one threads, for example 4, and that userland knows that there is 5
`PWQR_WAKE`::
Tries to wake `val` threads from the pool. This is done according to
- the current concurrency level not to overcommit. On success, the
- number of woken threads is returned, it can be 0.
+ the current concurrency level not to overcommit. On success, a hint of
+ the number of woken threads is returned, it can be 0.
++
+This is only a hint of the number of threads woken up for two reasons. First,
+the kernel could really have woken up a thread, but when it becomes scheduled,
+it could *then* decide that it would overcommit (because some other thread
+unblocked inbetween for example), and block it again.
++
+But it can also lie in the other direction: userland is supposed to account
+for waiting threads. So when we're overcommiting and userland want a waiting
+thread to be unblocked, we actually say we woke none, but still unblock one
+(the famous quarantined threads we talk about above). This allow the userland
+counter of waiting threads to decrease, but we know the thread won't be usable
+so we return 0.
`PWQR_WAKE_OC`::
Tries to wake `val` threads from the pool. This is done bypassing the
current concurrency level (`OC` stands for `OVERCOMMIT`). On success,
- the number of woken threads is returned, it can be 0.
+ the number of woken threads is returned, it can be 0, but it's the
+ real count that has been (or will soon be) woken up. If it's less than
+ required, it's because there aren't enough parked threads.
`PWQR_WAIT`::
Puts the thread to wait for a future `PWQR_WAKE` command. If this
unsigned running;
unsigned waiting;
- unsigned quarantined;
unsigned parked;
unsigned overcommit_wakes;
static inline void __pwqr_sb_update_state(struct pwqr_sb *sb, int running_delta)
{
- int overcommit;
-
sb->running += running_delta;
- overcommit = sb->running + sb->waiting - sb->concurrency;
- if (overcommit == 0) {
+ if (sb->running > sb->concurrency) {
+ /* TODO see ../Documentation/pwqr.adoc */
+ } else if (sb->running == sb->concurrency) {
/* do nothing */
- } else if (overcommit > 0) {
- if (overcommit > sb->waiting) {
- sb->quarantined += sb->waiting;
- sb->waiting = 0;
- } else {
- sb->quarantined += overcommit;
- sb->waiting -= overcommit;
- }
- } else {
- unsigned undercommit = -overcommit;
-
- if (undercommit < sb->quarantined) {
- sb->waiting += undercommit;
- sb->quarantined -= undercommit;
- } else if (sb->quarantined) {
- sb->waiting += sb->quarantined;
- sb->quarantined = 0;
- } else if (sb->waiting == 0 && sb->parked) {
- if (!timer_pending(&sb->timer)) {
- mod_timer(&sb->timer, jiffies +
- PWQR_UNPARK_DELAY);
- }
- return;
+ } else if (sb->waiting == 0 && sb->parked) {
+ if (!timer_pending(&sb->timer)) {
+ mod_timer(&sb->timer, jiffies + PWQR_UNPARK_DELAY);
}
+ return;
}
if (timer_pending(&sb->timer))
static long
do_pwqr_wait(struct pwqr_sb *sb, struct pwqr_task *pwqt,
- int in_pool, struct pwqr_ioc_wait __user *arg)
+ int is_wait, struct pwqr_ioc_wait __user *arg)
{
unsigned long flags;
struct pwqr_ioc_wait wait;
preempt_notifier_unregister(&pwqt->notifier);
- if (in_pool && copy_from_user(&wait, arg, sizeof(wait))) {
+ if (is_wait && copy_from_user(&wait, arg, sizeof(wait))) {
rc = -EFAULT;
goto out;
}
pwqr_sb_lock_irqsave(sb, flags);
if (sb->running + sb->waiting <= sb->concurrency) {
- if (in_pool) {
+ if (is_wait) {
while (probe_kernel_address(wait.pwqr_uaddr, uval)) {
pwqr_sb_unlock_irqrestore(sb, flags);
rc = get_user(uval, (u32 *)wait.pwqr_uaddr);
goto out_unlock;
}
} else {
- BUG_ON(sb->quarantined != 0);
goto out_unlock;
}
}
__wait.flags |= WQ_FLAG_EXCLUSIVE;
- if (in_pool) {
+ if (is_wait) {
sb->waiting++;
__add_wait_queue(&sb->wqh, &__wait);
} else {
spin_unlock_irq(&sb->wqh.lock);
schedule();
spin_lock_irq(&sb->wqh.lock);
- if (in_pool && sb->waiting)
+ if (is_wait)
break;
if (sb->running + sb->waiting < sb->concurrency)
break;
__remove_wait_queue(&sb->wqh, &__wait);
__set_current_state(TASK_RUNNING);
- if (in_pool) {
- if (sb->waiting) {
- sb->waiting--;
- } else {
- BUG_ON(!sb->quarantined);
- sb->quarantined--;
- }
+ if (is_wait) {
+ sb->waiting--;
} else {
sb->parked--;
}
pwqr_sb_lock_irqsave(sb, flags);
if (oc) {
- nwake = sb->waiting + sb->quarantined + sb->parked - sb->overcommit_wakes;
+ nwake = sb->waiting + sb->parked - sb->overcommit_wakes;
if (count > nwake) {
count = nwake;
} else {
sb->overcommit_wakes += count;
} else if (sb->running + sb->overcommit_wakes < sb->concurrency) {
nwake = sb->concurrency - sb->overcommit_wakes - sb->running;
+ if (nwake > sb->waiting + sb->parked - sb->overcommit_wakes) {
+ nwake = sb->waiting + sb->parked -
+ sb->overcommit_wakes;
+ }
if (count > nwake) {
count = nwake;
} else {
}
} else {
/*
- * This codepath deserves an explanation: when the thread is
- * quarantined, for us, really, it's already "parked". Though
- * userland doesn't know about, so wake as many threads as
- * userlands would have liked to, and let the wakeup tell
- * userland those should be parked.
+ * This codepath deserves an explanation: waking the thread
+ * "for real" would overcommit, though userspace KNOWS there
+ * is at least one waiting thread. Such threads are threads
+ * that are "quarantined".
+ *
+ * Quarantined threads are woken up one by one, to allow a
+ * slow ramp down, trying to minimize "waiting" <-> "parked"
+ * flip-flops, no matter how many wakes have been asked.
*
- * That's why we lie about the number of woken threads,
- * really, userlandwise we woke up a thread so that it could
- * be parked for real and avoid spurious syscalls. So it's as
- * if we woke up 0 threads.
+ * Since releasing one quarantined thread will wake up a
+ * thread that will (almost) straight go to parked mode, lie
+ * to userland about the fact that we unblocked that thread,
+ * and return 0.
+ *
+ * Though if we're already waking all waiting threads for
+ * overcommitting jobs, well, we don't need that.
*/
- nwake = sb->quarantined;
- if (sb->waiting < sb->overcommit_wakes)
- nwake -= sb->overcommit_wakes - sb->waiting;
- if (nwake > count)
- nwake = count;
count = 0;
+ nwake = sb->waiting > sb->overcommit_wakes;
}
while (nwake-- > 0)
wake_up_locked(&sb->wqh);