#include <linux/timer.h>
#include <linux/uaccess.h>
#include <linux/wait.h>
+#include <linux/version.h>
+
+/*
+ * The pthread workqueue regulator code is for now written as a proof of
+ * concept module, meant to work with 2.6.23+ kernels or redhat5 ones.
+ *
+ * For now it uses a device /dev/pwq, which spawns magic file-descriptors
+ * supporting a few ioctl operations (see Documentation/pwqr.adoc shipped in
+ * the same git repository).
+ *
+ * This code is meant to be merged into mainline, but after the following
+ * changes, kept here as a "todolist":
+ *
+ * - get rid of the device stuff (which is 100% of the init code for 2.6.23
+ * kernels);
+ *
+ * - resubmit the patch that makes it possible to call
+ * preempt_notifier_unregister from sched_in/sched_out (just a matter of a
+ * hlist_for_each_safe instead of hlist_for_each), and fix
+ * pwqr_task_release to not require RCU anymore. It makes
+ * pwqr_preempt_noop_ops go away.
+ *
+ * - think about the possibility to add a pwq_notifier pointer directly into
+ * the task_struct, thought it's not *that* necessary, it grows the
+ * structure for a speed gain we don't really need (making pwqr_ctl
+ * faster). I think it's okay to crawl the preempt_notifier list instead.
+ * We may want to add nice "macros" for that though.
+ *
+ * - replace the ioctl with a pwqr_ctl syscall
+ *
+ * - create a pwqr_create() syscall to create a pwqr file-descriptor.
+ *
+ * Summary: most of the code should be untouched or almost not changed,
+ * pwqr_ioctl adapted to become a syscall, and the module boilerplate replaced
+ * with pwqr_create() and file-descriptor creation boilerplate instead. But
+ * looking at fs/eventfd.c this looks rather simple.
+ */
#ifndef CONFIG_PREEMPT_NOTIFIERS
# error PWQ module requires CONFIG_PREEMPT_NOTIFIERS
-#endif
+#else
#include "pwqr.h"
-#define PWQR_HASH_BITS 5
-#define PWQR_HASH_SIZE (1 << PWQR_HASH_BITS)
-
#define PWQR_UC_DELAY (HZ / 10)
#define PWQR_OC_DELAY (HZ / 20)
#define PWQR_STATE_OC 2
#define PWQR_STATE_DEAD (-1)
-struct pwqr_task_bucket {
- spinlock_t lock;
- struct hlist_head tasks;
-};
+/*
+ * This is the first inclusion of CONFIG_PREEMPT_NOTIFIERS in the kernel.
+ *
+ * Though I want it to work on older redhat 5 kernels, that have an emulation
+ * of the feature but not implemented the same way, and instead of linking the
+ * preempt_notifiers from the task_struct directly, they have a private
+ * h-table I don't have access to, so I need my own too.
+ *
+ * For vanilla kernels we crawl through the task_struct::preempt_notifiers
+ * hlist until we find our entry, this list is often very short, and it's no
+ * slower than the global h-table which also crawls a list anyway.
+ */
+#define IS_PRE_2_6_23 (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23))
struct pwqr_sb {
struct kref kref;
struct pwqr_task {
struct preempt_notifier notifier;
- struct hlist_node link;
+ struct pwqr_sb *sb;
struct rcu_head rcu;
+#if IS_PRE_2_6_23
+ struct hlist_node link;
struct task_struct *task;
- struct pwqr_sb *sb;
+#endif
+};
+
+#if IS_PRE_2_6_23
+
+#define PWQR_HASH_BITS 5
+#define PWQR_HASH_SIZE (1 << PWQR_HASH_BITS)
+
+struct pwqr_task_bucket {
+ spinlock_t lock;
+ struct hlist_head tasks;
};
+static struct pwqr_task_bucket pwqr_tasks_hash[PWQR_HASH_SIZE];
+#endif
+
/*
* Global variables
*/
static struct class *pwqr_class;
static int pwqr_major;
-static struct pwqr_task_bucket pwqr_tasks_hash[PWQR_HASH_SIZE];
static struct preempt_ops pwqr_preempt_running_ops;
static struct preempt_ops pwqr_preempt_blocked_ops;
static struct preempt_ops pwqr_preempt_noop_ops;
/*****************************************************************************
* tasks
*/
+#if IS_PRE_2_6_23
static inline struct pwqr_task_bucket *task_hbucket(struct task_struct *task)
{
return &pwqr_tasks_hash[hash_ptr(task, PWQR_HASH_BITS)];
spin_unlock(&b->lock);
return pwqt;
}
+#else
+static struct pwqr_task *pwqr_task_find(struct task_struct *task)
+{
+ struct hlist_node *node;
+ struct preempt_notifier *it;
+ struct pwqr_task *pwqt = NULL;
+
+ hlist_for_each_entry(it, node, &task->preempt_notifiers, link) {
+ if (it->ops == &pwqr_preempt_running_ops ||
+ it->ops == &pwqr_preempt_blocked_ops ||
+ it->ops == &pwqr_preempt_noop_ops)
+ {
+ pwqt = container_of(it, struct pwqr_task, notifier);
+ break;
+ }
+ }
+
+ return pwqt;
+}
+#endif
static struct pwqr_task *pwqr_task_create(struct task_struct *task)
{
- struct pwqr_task_bucket *b = task_hbucket(task);
struct pwqr_task *pwqt;
pwqt = kmalloc(sizeof(*pwqt), GFP_KERNEL);
preempt_notifier_init(&pwqt->notifier, &pwqr_preempt_running_ops);
preempt_notifier_register(&pwqt->notifier);
- pwqt->task = task;
-
- spin_lock(&b->lock);
- hlist_add_head(&pwqt->link, &b->tasks);
- spin_unlock(&b->lock);
-
+#if IS_PRE_2_6_23
+ {
+ struct pwqr_task_bucket *b = task_hbucket(task);
+
+ pwqt->task = task;
+ spin_lock(&b->lock);
+ hlist_add_head(&pwqt->link, &b->tasks);
+ spin_unlock(&b->lock);
+ }
+#endif
return pwqt;
}
__cold
static void pwqr_task_release(struct pwqr_task *pwqt, bool from_notifier)
{
+#if IS_PRE_2_6_23
struct pwqr_task_bucket *b = task_hbucket(pwqt->task);
spin_lock(&b->lock);
hlist_del(&pwqt->link);
spin_unlock(&b->lock);
+#endif
pwqt->notifier.ops = &pwqr_preempt_noop_ops;
if (from_notifier) {
* callbacks if we're not dying, it'll panic on the next
* sched_{in,out} call.
*/
- BUG_ON(!(pwqt->task->state & TASK_DEAD));
+ BUG_ON(!(current->state & TASK_DEAD));
kfree_rcu(pwqt, rcu);
} else {
preempt_notifier_unregister(&pwqt->notifier);
}
static void pwqr_task_sched_out(struct preempt_notifier *notifier,
- struct task_struct *next)
+ struct task_struct *next)
{
struct pwqr_task *pwqt = container_of(notifier, struct pwqr_task, notifier);
struct pwqr_sb *sb = pwqt->sb;
- struct task_struct *p = pwqt->task;
+ struct task_struct *p = current;
if (unlikely(p->state & TASK_DEAD) || unlikely(sb->state < 0)) {
pwqr_task_detach(pwqt, sb);
int rc = 0;
switch (command) {
- case PWQR_GET_CONC:
+ case PWQR_CTL_GET_CONC:
return sb->concurrency;
- case PWQR_SET_CONC:
+ case PWQR_CTL_SET_CONC:
return do_pwqr_set_conc(sb, (int)arg);
- case PWQR_WAKE:
- case PWQR_WAKE_OC:
- return do_pwqr_wake(sb, command == PWQR_WAKE_OC, (int)arg);
+ case PWQR_CTL_WAKE:
+ case PWQR_CTL_WAKE_OC:
+ return do_pwqr_wake(sb, command == PWQR_CTL_WAKE_OC, (int)arg);
- case PWQR_WAIT:
- case PWQR_PARK:
- case PWQR_REGISTER:
- case PWQR_UNREGISTER:
+ case PWQR_CTL_WAIT:
+ case PWQR_CTL_PARK:
+ case PWQR_CTL_REGISTER:
+ case PWQR_CTL_UNREGISTER:
break;
default:
return -EINVAL;
}
pwqt = pwqr_task_find(task);
- if (command == PWQR_UNREGISTER)
+ if (command == PWQR_CTL_UNREGISTER)
return do_pwqr_unregister(sb, pwqt);
if (pwqt == NULL) {
}
switch (command) {
- case PWQR_WAIT:
+ case PWQR_CTL_WAIT:
rc = do_pwqr_wait(sb, pwqt, true, (struct pwqr_ioc_wait __user *)arg);
break;
- case PWQR_PARK:
+ case PWQR_CTL_PARK:
rc = do_pwqr_wait(sb, pwqt, false, NULL);
break;
}
-
- if (unlikely(sb->state < 0)) {
- pwqr_task_detach(pwqt, pwqt->sb);
- return -EBADFD;
- }
return rc;
}
*/
static int __init pwqr_start(void)
{
+#if IS_PRE_2_6_23
int i;
for (i = 0; i < PWQR_HASH_SIZE; i++) {
spin_lock_init(&pwqr_tasks_hash[i].lock);
INIT_HLIST_HEAD(&pwqr_tasks_hash[i].tasks);
}
+#endif
/* Register as a character device */
pwqr_major = register_chrdev(0, "pwqr", &pwqr_dev_fops);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pierre Habouzit <pierre.habouzit@intersec.com>");
MODULE_DESCRIPTION("PThreads Work Queues Regulator");
+#endif
// vim:noet:sw=8:cinoptions+=\:0,L-1,=1s: