Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
2005-04-16 15:20:36 -07:00
commit 1da177e4c3
17291 changed files with 6718755 additions and 0 deletions
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -0,0 +1,53 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+	    exit.o itimer.o time.o softirq.o resource.o \
+	    sysctl.o capability.o ptrace.o timer.o user.o \
+	    signal.o sys.o kmod.o workqueue.o pid.o \
+	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
+	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
+
+obj-$(CONFIG_FUTEX) += futex.o
+obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+obj-$(CONFIG_SMP) += cpu.o spinlock.o
+obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_KALLSYMS) += kallsyms.o
+obj-$(CONFIG_PM) += power/
+obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_IKCONFIG) += configs.o
+obj-$(CONFIG_IKCONFIG_PROC) += configs.o
+obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_AUDIT) += audit.o
+obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_SYSFS) += ksysfs.o
+obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+obj-$(CONFIG_SECCOMP) += seccomp.o
+
+ifneq ($(CONFIG_IA64),y)
+# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+# needed for x86 only.  Why this used to be enabled for all architectures is beyond
+# me.  I suspect most platforms don't need this, but until we know that for sure
+# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
+CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
+endif
+
+$(obj)/configs.o: $(obj)/config_data.h
+
+# config_data.h contains the same information as ikconfig.h but gzipped.
+# Info from config_data can be extracted from /proc/config*
+targets += config_data.gz
+$(obj)/config_data.gz: .config FORCE
+	$(call if_changed,gzip)
+
+quiet_cmd_ikconfiggz = IKCFG   $@
+      cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
+targets += config_data.h
+$(obj)/config_data.h: $(obj)/config_data.gz FORCE
+	$(call if_changed,ikconfiggz)
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -0,0 +1,561 @@
+/*
+ *  linux/kernel/acct.c
+ *
+ *  BSD Process Accounting for Linux
+ *
+ *  Author: Marco van Wieringen <mvw@planets.elm.net>
+ *
+ *  Some code based on ideas and code from:
+ *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
+ *
+ *  This file implements BSD-style process accounting. Whenever any
+ *  process exits, an accounting record of type "struct acct" is
+ *  written to the file specified with the acct() system call. It is
+ *  up to user-level programs to do useful things with the accounting
+ *  log. The kernel just provides the raw accounting information.
+ *
+ * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
+ *
+ *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
+ *  the file happened to be read-only. 2) If the accounting was suspended
+ *  due to the lack of space it happily allowed to reopen it and completely
+ *  lost the old acct_file. 3/10/98, Al Viro.
+ *
+ *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
+ *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
+ *
+ *  Fixed a nasty interaction with with sys_umount(). If the accointing
+ *  was suspeneded we failed to stop it on umount(). Messy.
+ *  Another one: remount to readonly didn't stop accounting.
+ *	Question: what should we do if we have CAP_SYS_ADMIN but not
+ *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
+ *  unless we are messing with the root. In that case we are getting a
+ *  real mess with do_remount_sb(). 9/11/98, AV.
+ *
+ *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
+ *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
+ *  one race (and leak) in BSD implementation.
+ *  OK, that's better. ANOTHER race and leak in BSD variant. There always
+ *  is one more bug... 10/11/98, AV.
+ *
+ *	Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
+ * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
+ * a struct file opened for write. Fixed. 2/6/2000, AV.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/acct.h>
+#include <linux/file.h>
+#include <linux/tty.h>
+#include <linux/security.h>
+#include <linux/vfs.h>
+#include <linux/jiffies.h>
+#include <linux/times.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include <linux/blkdev.h> /* sector_div */
+
+/*
+ * These constants control the amount of freespace that suspend and
+ * resume the process accounting system, and the time delay between
+ * each check.
+ * Turned into sysctl-controllable parameters. AV, 12/11/98
+ */
+
+int acct_parm[3] = {4, 2, 30};
+#define RESUME		(acct_parm[0])	/* >foo% free space - resume */
+#define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
+#define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
+
+/*
+ * External references and all of the globals.
+ */
+static void do_acct_process(long, struct file *);
+
+/*
+ * This structure is used so that all the data protected by lock
+ * can be placed in the same cache line as the lock.  This primes
+ * the cache line to have the data after getting the lock.
+ */
+struct acct_glbs {
+	spinlock_t		lock;
+	volatile int		active;
+	volatile int		needcheck;
+	struct file		*file;
+	struct timer_list	timer;
+};
+
+static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
+
+/*
+ * Called whenever the timer says to check the free space.
+ */
+static void acct_timeout(unsigned long unused)
+{
+	acct_globals.needcheck = 1;
+}
+
+/*
+ * Check the amount of free space and suspend/resume accordingly.
+ */
+static int check_free_space(struct file *file)
+{
+	struct kstatfs sbuf;
+	int res;
+	int act;
+	sector_t resume;
+	sector_t suspend;
+
+	spin_lock(&acct_globals.lock);
+	res = acct_globals.active;
+	if (!file || !acct_globals.needcheck)
+		goto out;
+	spin_unlock(&acct_globals.lock);
+
+	/* May block */
+	if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
+		return res;
+	suspend = sbuf.f_blocks * SUSPEND;
+	resume = sbuf.f_blocks * RESUME;
+
+	sector_div(suspend, 100);
+	sector_div(resume, 100);
+
+	if (sbuf.f_bavail <= suspend)
+		act = -1;
+	else if (sbuf.f_bavail >= resume)
+		act = 1;
+	else
+		act = 0;
+
+	/*
+	 * If some joker switched acct_globals.file under us we'ld better be
+	 * silent and _not_ touch anything.
+	 */
+	spin_lock(&acct_globals.lock);
+	if (file != acct_globals.file) {
+		if (act)
+			res = act>0;
+		goto out;
+	}
+
+	if (acct_globals.active) {
+		if (act < 0) {
+			acct_globals.active = 0;
+			printk(KERN_INFO "Process accounting paused\n");
+		}
+	} else {
+		if (act > 0) {
+			acct_globals.active = 1;
+			printk(KERN_INFO "Process accounting resumed\n");
+		}
+	}
+
+	del_timer(&acct_globals.timer);
+	acct_globals.needcheck = 0;
+	acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+	add_timer(&acct_globals.timer);
+	res = acct_globals.active;
+out:
+	spin_unlock(&acct_globals.lock);
+	return res;
+}
+
+/*
+ * Close the old accouting file (if currently open) and then replace
+ * it with file (if non-NULL).
+ *
+ * NOTE: acct_globals.lock MUST be held on entry and exit.
+ */
+static void acct_file_reopen(struct file *file)
+{
+	struct file *old_acct = NULL;
+
+	if (acct_globals.file) {
+		old_acct = acct_globals.file;
+		del_timer(&acct_globals.timer);
+		acct_globals.active = 0;
+		acct_globals.needcheck = 0;
+		acct_globals.file = NULL;
+	}
+	if (file) {
+		acct_globals.file = file;
+		acct_globals.needcheck = 0;
+		acct_globals.active = 1;
+		/* It's been deleted if it was used before so this is safe */
+		init_timer(&acct_globals.timer);
+		acct_globals.timer.function = acct_timeout;
+		acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+		add_timer(&acct_globals.timer);
+	}
+	if (old_acct) {
+		spin_unlock(&acct_globals.lock);
+		do_acct_process(0, old_acct);
+		filp_close(old_acct, NULL);
+		spin_lock(&acct_globals.lock);
+	}
+}
+
+/*
+ *  sys_acct() is the only system call needed to implement process
+ *  accounting. It takes the name of the file where accounting records
+ *  should be written. If the filename is NULL, accounting will be
+ *  shutdown.
+ */
+asmlinkage long sys_acct(const char __user *name)
+{
+	struct file *file = NULL;
+	char *tmp;
+	int error;
+
+	if (!capable(CAP_SYS_PACCT))
+		return -EPERM;
+
+	if (name) {
+		tmp = getname(name);
+		if (IS_ERR(tmp)) {
+			return (PTR_ERR(tmp));
+		}
+		/* Difference from BSD - they don't do O_APPEND */
+		file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
+		putname(tmp);
+		if (IS_ERR(file)) {
+			return (PTR_ERR(file));
+		}
+		if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+			filp_close(file, NULL);
+			return (-EACCES);
+		}
+
+		if (!file->f_op->write) {
+			filp_close(file, NULL);
+			return (-EIO);
+		}
+	}
+
+	error = security_acct(file);
+	if (error) {
+		if (file)
+			filp_close(file, NULL);
+		return error;
+	}
+
+	spin_lock(&acct_globals.lock);
+	acct_file_reopen(file);
+	spin_unlock(&acct_globals.lock);
+
+	return (0);
+}
+
+/*
+ * If the accouting is turned on for a file in the filesystem pointed
+ * to by sb, turn accouting off.
+ */
+void acct_auto_close(struct super_block *sb)
+{
+	spin_lock(&acct_globals.lock);
+	if (acct_globals.file &&
+	    acct_globals.file->f_dentry->d_inode->i_sb == sb) {
+		acct_file_reopen((struct file *)NULL);
+	}
+	spin_unlock(&acct_globals.lock);
+}
+
+/*
+ *  encode an unsigned long into a comp_t
+ *
+ *  This routine has been adopted from the encode_comp_t() function in
+ *  the kern_acct.c file of the FreeBSD operating system. The encoding
+ *  is a 13-bit fraction with a 3-bit (base 8) exponent.
+ */
+
+#define	MANTSIZE	13			/* 13 bit mantissa. */
+#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
+#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
+
+static comp_t encode_comp_t(unsigned long value)
+{
+	int exp, rnd;
+
+	exp = rnd = 0;
+	while (value > MAXFRACT) {
+		rnd = value & (1 << (EXPSIZE - 1));	/* Round up? */
+		value >>= EXPSIZE;	/* Base 8 exponent == 3 bit shift. */
+		exp++;
+	}
+
+	/*
+         * If we need to round up, do it (and handle overflow correctly).
+         */
+	if (rnd && (++value > MAXFRACT)) {
+		value >>= EXPSIZE;
+		exp++;
+	}
+
+	/*
+         * Clean it up and polish it off.
+         */
+	exp <<= MANTSIZE;		/* Shift the exponent into place */
+	exp += value;			/* and add on the mantissa. */
+	return exp;
+}
+
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+/*
+ * encode an u64 into a comp2_t (24 bits)
+ *
+ * Format: 5 bit base 2 exponent, 20 bits mantissa.
+ * The leading bit of the mantissa is not stored, but implied for
+ * non-zero exponents.
+ * Largest encodable value is 50 bits.
+ */
+
+#define MANTSIZE2       20                      /* 20 bit mantissa. */
+#define EXPSIZE2        5                       /* 5 bit base 2 exponent. */
+#define MAXFRACT2       ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
+#define MAXEXP2         ((1 <<EXPSIZE2) - 1)    /* Maximum exponent. */
+
+static comp2_t encode_comp2_t(u64 value)
+{
+        int exp, rnd;
+
+        exp = (value > (MAXFRACT2>>1));
+        rnd = 0;
+        while (value > MAXFRACT2) {
+                rnd = value & 1;
+                value >>= 1;
+                exp++;
+        }
+
+        /*
+         * If we need to round up, do it (and handle overflow correctly).
+         */
+        if (rnd && (++value > MAXFRACT2)) {
+                value >>= 1;
+                exp++;
+        }
+
+        if (exp > MAXEXP2) {
+                /* Overflow. Return largest representable number instead. */
+                return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
+        } else {
+                return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
+        }
+}
+#endif
+
+#if ACCT_VERSION==3
+/*
+ * encode an u64 into a 32 bit IEEE float
+ */
+static u32 encode_float(u64 value)
+{
+	unsigned exp = 190;
+	unsigned u;
+
+	if (value==0) return 0;
+	while ((s64)value > 0){
+		value <<= 1;
+		exp--;
+	}
+	u = (u32)(value >> 40) & 0x7fffffu;
+	return u | (exp << 23);
+}
+#endif
+
+/*
+ *  Write an accounting entry for an exiting process
+ *
+ *  The acct_process() call is the workhorse of the process
+ *  accounting system. The struct acct is built here and then written
+ *  into the accounting file. This function should only be called from
+ *  do_exit().
+ */
+
+/*
+ *  do_acct_process does all actual work. Caller holds the reference to file.
+ */
+static void do_acct_process(long exitcode, struct file *file)
+{
+	acct_t ac;
+	mm_segment_t fs;
+	unsigned long vsize;
+	unsigned long flim;
+	u64 elapsed;
+	u64 run_time;
+	struct timespec uptime;
+
+	/*
+	 * First check to see if there is enough free_space to continue
+	 * the process accounting system.
+	 */
+	if (!check_free_space(file))
+		return;
+
+	/*
+	 * Fill the accounting struct with the needed info as recorded
+	 * by the different kernel functions.
+	 */
+	memset((caddr_t)&ac, 0, sizeof(acct_t));
+
+	ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
+	strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
+
+	/* calculate run_time in nsec*/
+	do_posix_clock_monotonic_gettime(&uptime);
+	run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
+	run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC
+					+ current->start_time.tv_nsec;
+	/* convert nsec -> AHZ */
+	elapsed = nsec_to_AHZ(run_time);
+#if ACCT_VERSION==3
+	ac.ac_etime = encode_float(elapsed);
+#else
+	ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
+	                       (unsigned long) elapsed : (unsigned long) -1l);
+#endif
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+	{
+		/* new enlarged etime field */
+		comp2_t etime = encode_comp2_t(elapsed);
+		ac.ac_etime_hi = etime >> 16;
+		ac.ac_etime_lo = (u16) etime;
+	}
+#endif
+	do_div(elapsed, AHZ);
+	ac.ac_btime = xtime.tv_sec - elapsed;
+	ac.ac_utime = encode_comp_t(jiffies_to_AHZ(
+					    current->signal->utime +
+					    current->group_leader->utime));
+	ac.ac_stime = encode_comp_t(jiffies_to_AHZ(
+					    current->signal->stime +
+					    current->group_leader->stime));
+	/* we really need to bite the bullet and change layout */
+	ac.ac_uid = current->uid;
+	ac.ac_gid = current->gid;
+#if ACCT_VERSION==2
+	ac.ac_ahz = AHZ;
+#endif
+#if ACCT_VERSION==1 || ACCT_VERSION==2
+	/* backward-compatible 16 bit fields */
+	ac.ac_uid16 = current->uid;
+	ac.ac_gid16 = current->gid;
+#endif
+#if ACCT_VERSION==3
+	ac.ac_pid = current->tgid;
+	ac.ac_ppid = current->parent->tgid;
+#endif
+
+	read_lock(&tasklist_lock);	/* pin current->signal */
+	ac.ac_tty = current->signal->tty ?
+		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
+	read_unlock(&tasklist_lock);
+
+	ac.ac_flag = 0;
+	if (current->flags & PF_FORKNOEXEC)
+		ac.ac_flag |= AFORK;
+	if (current->flags & PF_SUPERPRIV)
+		ac.ac_flag |= ASU;
+	if (current->flags & PF_DUMPCORE)
+		ac.ac_flag |= ACORE;
+	if (current->flags & PF_SIGNALED)
+		ac.ac_flag |= AXSIG;
+
+	vsize = 0;
+	if (current->mm) {
+		struct vm_area_struct *vma;
+		down_read(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			vsize += vma->vm_end - vma->vm_start;
+			vma = vma->vm_next;
+		}
+		up_read(&current->mm->mmap_sem);
+	}
+	vsize = vsize / 1024;
+	ac.ac_mem = encode_comp_t(vsize);
+	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
+	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
+	ac.ac_minflt = encode_comp_t(current->signal->min_flt +
+				     current->group_leader->min_flt);
+	ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
+				     current->group_leader->maj_flt);
+	ac.ac_swaps = encode_comp_t(0);
+	ac.ac_exitcode = exitcode;
+
+	/*
+         * Kernel segment override to datasegment and write it
+         * to the accounting file.
+         */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	/*
+ 	 * Accounting records are not subject to resource limits.
+ 	 */
+	flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+	file->f_op->write(file, (char *)&ac,
+			       sizeof(acct_t), &file->f_pos);
+	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
+	set_fs(fs);
+}
+
+/*
+ * acct_process - now just a wrapper around do_acct_process
+ */
+void acct_process(long exitcode)
+{
+	struct file *file = NULL;
+
+	/*
+	 * accelerate the common fastpath:
+	 */
+	if (!acct_globals.file)
+		return;
+
+	spin_lock(&acct_globals.lock);
+	file = acct_globals.file;
+	if (unlikely(!file)) {
+		spin_unlock(&acct_globals.lock);
+		return;
+	}
+	get_file(file);
+	spin_unlock(&acct_globals.lock);
+
+	do_acct_process(exitcode, file);
+	fput(file);
+}
+
+
+/*
+ * acct_update_integrals
+ *    -  update mm integral fields in task_struct
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+	if (likely(tsk->mm)) {
+		long delta = tsk->stime - tsk->acct_stimexpd;
+
+		if (delta == 0)
+			return;
+		tsk->acct_stimexpd = tsk->stime;
+		tsk->acct_rss_mem1 += delta * get_mm_counter(tsk->mm, rss);
+		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
+	}
+}
+
+/*
+ * acct_clear_integrals
+ *    - clear the mm integral fields in task_struct
+ */
+void acct_clear_integrals(struct task_struct *tsk)
+{
+	if (tsk) {
+		tsk->acct_stimexpd = 0;
+		tsk->acct_rss_mem1 = 0;
+		tsk->acct_vm_mem1 = 0;
+	}
+}
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -0,0 +1,839 @@
+/* audit.c -- Auditing support -*- linux-c -*-
+ * Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
+ * System-call specific features have moved to auditsc.c
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Written by Rickard E. (Rik) Faith <faith@redhat.com>
+ *
+ * Goals: 1) Integrate fully with SELinux.
+ *	  2) Minimal run-time overhead:
+ *	     a) Minimal when syscall auditing is disabled (audit_enable=0).
+ *	     b) Small when syscall auditing is enabled and no audit record
+ *		is generated (defer as much work as possible to record
+ *		generation time):
+ *		i) context is allocated,
+ *		ii) names from getname are stored without a copy, and
+ *		iii) inode information stored from path_lookup.
+ *	  3) Ability to disable syscall auditing at boot time (audit=0).
+ *	  4) Usable by other parts of the kernel (if audit_log* is called,
+ *	     then a syscall record will be generated automatically for the
+ *	     current syscall).
+ *	  5) Netlink interface to user-space.
+ *	  6) Support low-overhead kernel-based filtering to minimize the
+ *	     information that must be passed to user-space.
+ *
+ * Example user-space utilities: http://people.redhat.com/faith/audit/
+ */
+
+#include <linux/init.h>
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <linux/audit.h>
+
+#include <net/sock.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+
+/* No auditing will take place until audit_initialized != 0.
+ * (Initialization happens after skb_init is called.) */
+static int	audit_initialized;
+
+/* No syscall auditing will take place unless audit_enabled != 0. */
+int		audit_enabled;
+
+/* Default state when kernel boots without any parameters. */
+static int	audit_default;
+
+/* If auditing cannot proceed, audit_failure selects what happens. */
+static int	audit_failure = AUDIT_FAIL_PRINTK;
+
+/* If audit records are to be written to the netlink socket, audit_pid
+ * contains the (non-zero) pid. */
+static int	audit_pid;
+
+/* If audit_limit is non-zero, limit the rate of sending audit records
+ * to that number per second.  This prevents DoS attacks, but results in
+ * audit records being dropped. */
+static int	audit_rate_limit;
+
+/* Number of outstanding audit_buffers allowed. */
+static int	audit_backlog_limit = 64;
+static atomic_t	audit_backlog	    = ATOMIC_INIT(0);
+
+/* Records can be lost in several ways:
+   0) [suppressed in audit_alloc]
+   1) out of memory in audit_log_start [kmalloc of struct audit_buffer]
+   2) out of memory in audit_log_move [alloc_skb]
+   3) suppressed due to audit_rate_limit
+   4) suppressed due to audit_backlog_limit
+*/
+static atomic_t    audit_lost = ATOMIC_INIT(0);
+
+/* The netlink socket. */
+static struct sock *audit_sock;
+
+/* There are two lists of audit buffers.  The txlist contains audit
+ * buffers that cannot be sent immediately to the netlink device because
+ * we are in an irq context (these are sent later in a tasklet).
+ *
+ * The second list is a list of pre-allocated audit buffers (if more
+ * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
+ * being placed on the freelist). */
+static DEFINE_SPINLOCK(audit_txlist_lock);
+static DEFINE_SPINLOCK(audit_freelist_lock);
+static int	   audit_freelist_count = 0;
+static LIST_HEAD(audit_txlist);
+static LIST_HEAD(audit_freelist);
+
+/* There are three lists of rules -- one to search at task creation
+ * time, one to search at syscall entry time, and another to search at
+ * syscall exit time. */
+static LIST_HEAD(audit_tsklist);
+static LIST_HEAD(audit_entlist);
+static LIST_HEAD(audit_extlist);
+
+/* The netlink socket is only to be read by 1 CPU, which lets us assume
+ * that list additions and deletions never happen simultaneiously in
+ * auditsc.c */
+static DECLARE_MUTEX(audit_netlink_sem);
+
+/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
+ * audit records.  Since printk uses a 1024 byte buffer, this buffer
+ * should be at least that large. */
+#define AUDIT_BUFSIZ 1024
+
+/* AUDIT_MAXFREE is the number of empty audit_buffers we keep on the
+ * audit_freelist.  Doing so eliminates many kmalloc/kfree calls. */
+#define AUDIT_MAXFREE  (2*NR_CPUS)
+
+/* The audit_buffer is used when formatting an audit record.  The caller
+ * locks briefly to get the record off the freelist or to allocate the
+ * buffer, and locks briefly to send the buffer to the netlink layer or
+ * to place it on a transmit queue.  Multiple audit_buffers can be in
+ * use simultaneously. */
+struct audit_buffer {
+	struct list_head     list;
+	struct sk_buff_head  sklist;	/* formatted skbs ready to send */
+	struct audit_context *ctx;	/* NULL or associated context */
+	int		     len;	/* used area of tmp */
+	char		     tmp[AUDIT_BUFSIZ];
+
+				/* Pointer to header and contents */
+	struct nlmsghdr      *nlh;
+	int		     total;
+	int		     type;
+	int		     pid;
+	int		     count; /* Times requeued */
+};
+
+void audit_set_type(struct audit_buffer *ab, int type)
+{
+	ab->type = type;
+}
+
+struct audit_entry {
+	struct list_head  list;
+	struct audit_rule rule;
+};
+
+static void audit_log_end_irq(struct audit_buffer *ab);
+static void audit_log_end_fast(struct audit_buffer *ab);
+
+static void audit_panic(const char *message)
+{
+	switch (audit_failure)
+	{
+	case AUDIT_FAIL_SILENT:
+		break;
+	case AUDIT_FAIL_PRINTK:
+		printk(KERN_ERR "audit: %s\n", message);
+		break;
+	case AUDIT_FAIL_PANIC:
+		panic("audit: %s\n", message);
+		break;
+	}
+}
+
+static inline int audit_rate_check(void)
+{
+	static unsigned long	last_check = 0;
+	static int		messages   = 0;
+	static DEFINE_SPINLOCK(lock);
+	unsigned long		flags;
+	unsigned long		now;
+	unsigned long		elapsed;
+	int			retval	   = 0;
+
+	if (!audit_rate_limit) return 1;
+
+	spin_lock_irqsave(&lock, flags);
+	if (++messages < audit_rate_limit) {
+		retval = 1;
+	} else {
+		now     = jiffies;
+		elapsed = now - last_check;
+		if (elapsed > HZ) {
+			last_check = now;
+			messages   = 0;
+			retval     = 1;
+		}
+	}
+	spin_unlock_irqrestore(&lock, flags);
+
+	return retval;
+}
+
+/* Emit at least 1 message per second, even if audit_rate_check is
+ * throttling. */
+void audit_log_lost(const char *message)
+{
+	static unsigned long	last_msg = 0;
+	static DEFINE_SPINLOCK(lock);
+	unsigned long		flags;
+	unsigned long		now;
+	int			print;
+
+	atomic_inc(&audit_lost);
+
+	print = (audit_failure == AUDIT_FAIL_PANIC || !audit_rate_limit);
+
+	if (!print) {
+		spin_lock_irqsave(&lock, flags);
+		now = jiffies;
+		if (now - last_msg > HZ) {
+			print = 1;
+			last_msg = now;
+		}
+		spin_unlock_irqrestore(&lock, flags);
+	}
+
+	if (print) {
+		printk(KERN_WARNING
+		       "audit: audit_lost=%d audit_backlog=%d"
+		       " audit_rate_limit=%d audit_backlog_limit=%d\n",
+		       atomic_read(&audit_lost),
+		       atomic_read(&audit_backlog),
+		       audit_rate_limit,
+		       audit_backlog_limit);
+		audit_panic(message);
+	}
+
+}
+
+static int audit_set_rate_limit(int limit)
+{
+	int old		 = audit_rate_limit;
+	audit_rate_limit = limit;
+	audit_log(current->audit_context, "audit_rate_limit=%d old=%d",
+		  audit_rate_limit, old);
+	return old;
+}
+
+static int audit_set_backlog_limit(int limit)
+{
+	int old		 = audit_backlog_limit;
+	audit_backlog_limit = limit;
+	audit_log(current->audit_context, "audit_backlog_limit=%d old=%d",
+		  audit_backlog_limit, old);
+	return old;
+}
+
+static int audit_set_enabled(int state)
+{
+	int old		 = audit_enabled;
+	if (state != 0 && state != 1)
+		return -EINVAL;
+	audit_enabled = state;
+	audit_log(current->audit_context, "audit_enabled=%d old=%d",
+		  audit_enabled, old);
+	return old;
+}
+
+static int audit_set_failure(int state)
+{
+	int old		 = audit_failure;
+	if (state != AUDIT_FAIL_SILENT
+	    && state != AUDIT_FAIL_PRINTK
+	    && state != AUDIT_FAIL_PANIC)
+		return -EINVAL;
+	audit_failure = state;
+	audit_log(current->audit_context, "audit_failure=%d old=%d",
+		  audit_failure, old);
+	return old;
+}
+
+#ifdef CONFIG_NET
+void audit_send_reply(int pid, int seq, int type, int done, int multi,
+		      void *payload, int size)
+{
+	struct sk_buff	*skb;
+	struct nlmsghdr	*nlh;
+	int		len = NLMSG_SPACE(size);
+	void		*data;
+	int		flags = multi ? NLM_F_MULTI : 0;
+	int		t     = done  ? NLMSG_DONE  : type;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		goto nlmsg_failure;
+
+	nlh		 = NLMSG_PUT(skb, pid, seq, t, len - sizeof(*nlh));
+	nlh->nlmsg_flags = flags;
+	data		 = NLMSG_DATA(nlh);
+	memcpy(data, payload, size);
+	netlink_unicast(audit_sock, skb, pid, MSG_DONTWAIT);
+	return;
+
+nlmsg_failure:			/* Used by NLMSG_PUT */
+	if (skb)
+		kfree_skb(skb);
+}
+
+/*
+ * Check for appropriate CAP_AUDIT_ capabilities on incoming audit
+ * control messages.
+ */
+static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
+{
+	int err = 0;
+
+	switch (msg_type) {
+	case AUDIT_GET:
+	case AUDIT_LIST:
+	case AUDIT_SET:
+	case AUDIT_ADD:
+	case AUDIT_DEL:
+		if (!cap_raised(eff_cap, CAP_AUDIT_CONTROL))
+			err = -EPERM;
+		break;
+	case AUDIT_USER:
+		if (!cap_raised(eff_cap, CAP_AUDIT_WRITE))
+			err = -EPERM;
+		break;
+	default:  /* bad msg */
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	u32			uid, pid, seq;
+	void			*data;
+	struct audit_status	*status_get, status_set;
+	int			err;
+	struct audit_buffer	*ab;
+	u16			msg_type = nlh->nlmsg_type;
+
+	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
+	if (err)
+		return err;
+
+	pid  = NETLINK_CREDS(skb)->pid;
+	uid  = NETLINK_CREDS(skb)->uid;
+	seq  = nlh->nlmsg_seq;
+	data = NLMSG_DATA(nlh);
+
+	switch (msg_type) {
+	case AUDIT_GET:
+		status_set.enabled	 = audit_enabled;
+		status_set.failure	 = audit_failure;
+		status_set.pid		 = audit_pid;
+		status_set.rate_limit	 = audit_rate_limit;
+		status_set.backlog_limit = audit_backlog_limit;
+		status_set.lost		 = atomic_read(&audit_lost);
+		status_set.backlog	 = atomic_read(&audit_backlog);
+		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0,
+				 &status_set, sizeof(status_set));
+		break;
+	case AUDIT_SET:
+		if (nlh->nlmsg_len < sizeof(struct audit_status))
+			return -EINVAL;
+		status_get   = (struct audit_status *)data;
+		if (status_get->mask & AUDIT_STATUS_ENABLED) {
+			err = audit_set_enabled(status_get->enabled);
+			if (err < 0) return err;
+		}
+		if (status_get->mask & AUDIT_STATUS_FAILURE) {
+			err = audit_set_failure(status_get->failure);
+			if (err < 0) return err;
+		}
+		if (status_get->mask & AUDIT_STATUS_PID) {
+			int old   = audit_pid;
+			audit_pid = status_get->pid;
+			audit_log(current->audit_context,
+				  "audit_pid=%d old=%d", audit_pid, old);
+		}
+		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
+			audit_set_rate_limit(status_get->rate_limit);
+		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
+			audit_set_backlog_limit(status_get->backlog_limit);
+		break;
+	case AUDIT_USER:
+		ab = audit_log_start(NULL);
+		if (!ab)
+			break;	/* audit_panic has been called */
+		audit_log_format(ab,
+				 "user pid=%d uid=%d length=%d msg='%.1024s'",
+				 pid, uid,
+				 (int)(nlh->nlmsg_len
+				       - ((char *)data - (char *)nlh)),
+				 (char *)data);
+		ab->type = AUDIT_USER;
+		ab->pid  = pid;
+		audit_log_end(ab);
+		break;
+	case AUDIT_ADD:
+	case AUDIT_DEL:
+		if (nlh->nlmsg_len < sizeof(struct audit_rule))
+			return -EINVAL;
+		/* fallthrough */
+	case AUDIT_LIST:
+#ifdef CONFIG_AUDITSYSCALL
+		err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
+					   uid, seq, data);
+#else
+		err = -EOPNOTSUPP;
+#endif
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err < 0 ? err : 0;
+}
+
+/* Get message from skb (based on rtnetlink_rcv_skb).  Each message is
+ * processed by audit_receive_msg.  Malformed skbs with wrong length are
+ * discarded silently.  */
+static int audit_receive_skb(struct sk_buff *skb)
+{
+	int		err;
+	struct nlmsghdr	*nlh;
+	u32		rlen;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		nlh = (struct nlmsghdr *)skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return 0;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		if ((err = audit_receive_msg(skb, nlh))) {
+			netlink_ack(skb, nlh, err);
+		} else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+	return 0;
+}
+
+/* Receive messages from netlink socket. */
+static void audit_receive(struct sock *sk, int length)
+{
+	struct sk_buff  *skb;
+
+	if (down_trylock(&audit_netlink_sem))
+		return;
+
+				/* FIXME: this must not cause starvation */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+		if (audit_receive_skb(skb) && skb->len)
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		else
+			kfree_skb(skb);
+	}
+	up(&audit_netlink_sem);
+}
+
+/* Move data from tmp buffer into an skb.  This is an extra copy, and
+ * that is unfortunate.  However, the copy will only occur when a record
+ * is being written to user space, which is already a high-overhead
+ * operation.  (Elimination of the copy is possible, for example, by
+ * writing directly into a pre-allocated skb, at the cost of wasting
+ * memory. */
+static void audit_log_move(struct audit_buffer *ab)
+{
+	struct sk_buff	*skb;
+	char		*start;
+	int		extra = ab->nlh ? 0 : NLMSG_SPACE(0);
+
+	/* possible resubmission */
+	if (ab->len == 0)
+		return;
+
+	skb = skb_peek(&ab->sklist);
+	if (!skb || skb_tailroom(skb) <= ab->len + extra) {
+		skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC);
+		if (!skb) {
+			ab->len = 0; /* Lose information in ab->tmp */
+			audit_log_lost("out of memory in audit_log_move");
+			return;
+		}
+		__skb_queue_tail(&ab->sklist, skb);
+		if (!ab->nlh)
+			ab->nlh = (struct nlmsghdr *)skb_put(skb,
+							     NLMSG_SPACE(0));
+	}
+	start = skb_put(skb, ab->len);
+	memcpy(start, ab->tmp, ab->len);
+	ab->len = 0;
+}
+
+/* Iterate over the skbuff in the audit_buffer, sending their contents
+ * to user space. */
+static inline int audit_log_drain(struct audit_buffer *ab)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&ab->sklist))) {
+		int retval = 0;
+
+		if (audit_pid) {
+			if (ab->nlh) {
+				ab->nlh->nlmsg_len   = ab->total;
+				ab->nlh->nlmsg_type  = ab->type;
+				ab->nlh->nlmsg_flags = 0;
+				ab->nlh->nlmsg_seq   = 0;
+				ab->nlh->nlmsg_pid   = ab->pid;
+			}
+			skb_get(skb); /* because netlink_* frees */
+			retval = netlink_unicast(audit_sock, skb, audit_pid,
+						 MSG_DONTWAIT);
+		}
+		if (retval == -EAGAIN && ab->count < 5) {
+			++ab->count;
+			skb_queue_tail(&ab->sklist, skb);
+			audit_log_end_irq(ab);
+			return 1;
+		}
+		if (retval < 0) {
+			if (retval == -ECONNREFUSED) {
+				printk(KERN_ERR
+				       "audit: *NO* daemon at audit_pid=%d\n",
+				       audit_pid);
+				audit_pid = 0;
+			} else
+				audit_log_lost("netlink socket too busy");
+		}
+		if (!audit_pid) { /* No daemon */
+			int offset = ab->nlh ? NLMSG_SPACE(0) : 0;
+			int len    = skb->len - offset;
+			printk(KERN_ERR "%*.*s\n",
+			       len, len, skb->data + offset);
+		}
+		kfree_skb(skb);
+		ab->nlh = NULL;
+	}
+	return 0;
+}
+
+/* Initialize audit support at boot time. */
+static int __init audit_init(void)
+{
+	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
+	       audit_default ? "enabled" : "disabled");
+	audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive);
+	if (!audit_sock)
+		audit_panic("cannot initialize netlink socket");
+
+	audit_initialized = 1;
+	audit_enabled = audit_default;
+	audit_log(NULL, "initialized");
+	return 0;
+}
+
+#else
+/* Without CONFIG_NET, we have no skbuffs.  For now, print what we have
+ * in the buffer. */
+static void audit_log_move(struct audit_buffer *ab)
+{
+	printk(KERN_ERR "%*.*s\n", ab->len, ab->len, ab->tmp);
+	ab->len = 0;
+}
+
+static inline int audit_log_drain(struct audit_buffer *ab)
+{
+	return 0;
+}
+
+/* Initialize audit support at boot time. */
+int __init audit_init(void)
+{
+	printk(KERN_INFO "audit: initializing WITHOUT netlink support\n");
+	audit_sock = NULL;
+	audit_pid  = 0;
+
+	audit_initialized = 1;
+	audit_enabled = audit_default;
+	audit_log(NULL, "initialized");
+	return 0;
+}
+#endif
+
+__initcall(audit_init);
+
+/* Process kernel command-line parameter at boot time.  audit=0 or audit=1. */
+static int __init audit_enable(char *str)
+{
+	audit_default = !!simple_strtol(str, NULL, 0);
+	printk(KERN_INFO "audit: %s%s\n",
+	       audit_default ? "enabled" : "disabled",
+	       audit_initialized ? "" : " (after initialization)");
+	if (audit_initialized)
+		audit_enabled = audit_default;
+	return 0;
+}
+
+__setup("audit=", audit_enable);
+
+
+/* Obtain an audit buffer.  This routine does locking to obtain the
+ * audit buffer, but then no locking is required for calls to
+ * audit_log_*format.  If the tsk is a task that is currently in a
+ * syscall, then the syscall is marked as auditable and an audit record
+ * will be written at syscall exit.  If there is no associated task, tsk
+ * should be NULL. */
+struct audit_buffer *audit_log_start(struct audit_context *ctx)
+{
+	struct audit_buffer	*ab	= NULL;
+	unsigned long		flags;
+	struct timespec		t;
+	int			serial	= 0;
+
+	if (!audit_initialized)
+		return NULL;
+
+	if (audit_backlog_limit
+	    && atomic_read(&audit_backlog) > audit_backlog_limit) {
+		if (audit_rate_check())
+			printk(KERN_WARNING
+			       "audit: audit_backlog=%d > "
+			       "audit_backlog_limit=%d\n",
+			       atomic_read(&audit_backlog),
+			       audit_backlog_limit);
+		audit_log_lost("backlog limit exceeded");
+		return NULL;
+	}
+
+	spin_lock_irqsave(&audit_freelist_lock, flags);
+	if (!list_empty(&audit_freelist)) {
+		ab = list_entry(audit_freelist.next,
+				struct audit_buffer, list);
+		list_del(&ab->list);
+		--audit_freelist_count;
+	}
+	spin_unlock_irqrestore(&audit_freelist_lock, flags);
+
+	if (!ab)
+		ab = kmalloc(sizeof(*ab), GFP_ATOMIC);
+	if (!ab) {
+		audit_log_lost("out of memory in audit_log_start");
+		return NULL;
+	}
+
+	atomic_inc(&audit_backlog);
+	skb_queue_head_init(&ab->sklist);
+
+	ab->ctx   = ctx;
+	ab->len   = 0;
+	ab->nlh   = NULL;
+	ab->total = 0;
+	ab->type  = AUDIT_KERNEL;
+	ab->pid   = 0;
+	ab->count = 0;
+
+#ifdef CONFIG_AUDITSYSCALL
+	if (ab->ctx)
+		audit_get_stamp(ab->ctx, &t, &serial);
+	else
+#endif
+		t = CURRENT_TIME;
+
+	audit_log_format(ab, "audit(%lu.%03lu:%u): ",
+			 t.tv_sec, t.tv_nsec/1000000, serial);
+	return ab;
+}
+
+
+/* Format an audit message into the audit buffer.  If there isn't enough
+ * room in the audit buffer, more room will be allocated and vsnprint
+ * will be called a second time.  Currently, we assume that a printk
+ * can't format message larger than 1024 bytes, so we don't either. */
+static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
+			      va_list args)
+{
+	int len, avail;
+
+	if (!ab)
+		return;
+
+	avail = sizeof(ab->tmp) - ab->len;
+	if (avail <= 0) {
+		audit_log_move(ab);
+		avail = sizeof(ab->tmp) - ab->len;
+	}
+	len   = vsnprintf(ab->tmp + ab->len, avail, fmt, args);
+	if (len >= avail) {
+		/* The printk buffer is 1024 bytes long, so if we get
+		 * here and AUDIT_BUFSIZ is at least 1024, then we can
+		 * log everything that printk could have logged. */
+		audit_log_move(ab);
+		avail = sizeof(ab->tmp) - ab->len;
+		len   = vsnprintf(ab->tmp + ab->len, avail, fmt, args);
+	}
+	ab->len   += (len < avail) ? len : avail;
+	ab->total += (len < avail) ? len : avail;
+}
+
+/* Format a message into the audit buffer.  All the work is done in
+ * audit_log_vformat. */
+void audit_log_format(struct audit_buffer *ab, const char *fmt, ...)
+{
+	va_list args;
+
+	if (!ab)
+		return;
+	va_start(args, fmt);
+	audit_log_vformat(ab, fmt, args);
+	va_end(args);
+}
+
+/* This is a helper-function to print the d_path without using a static
+ * buffer or allocating another buffer in addition to the one in
+ * audit_buffer. */
+void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
+		      struct dentry *dentry, struct vfsmount *vfsmnt)
+{
+	char *p;
+	int  len, avail;
+
+	if (prefix) audit_log_format(ab, " %s", prefix);
+
+	if (ab->len > 128)
+		audit_log_move(ab);
+	avail = sizeof(ab->tmp) - ab->len;
+	p = d_path(dentry, vfsmnt, ab->tmp + ab->len, avail);
+	if (IS_ERR(p)) {
+		/* FIXME: can we save some information here? */
+		audit_log_format(ab, "<toolong>");
+	} else {
+				/* path isn't at start of buffer */
+		len	   = (ab->tmp + sizeof(ab->tmp) - 1) - p;
+		memmove(ab->tmp + ab->len, p, len);
+		ab->len   += len;
+		ab->total += len;
+	}
+}
+
+/* Remove queued messages from the audit_txlist and send them to userspace. */
+static void audit_tasklet_handler(unsigned long arg)
+{
+	LIST_HEAD(list);
+	struct audit_buffer *ab;
+	unsigned long	    flags;
+
+	spin_lock_irqsave(&audit_txlist_lock, flags);
+	list_splice_init(&audit_txlist, &list);
+	spin_unlock_irqrestore(&audit_txlist_lock, flags);
+
+	while (!list_empty(&list)) {
+		ab = list_entry(list.next, struct audit_buffer, list);
+		list_del(&ab->list);
+		audit_log_end_fast(ab);
+	}
+}
+
+static DECLARE_TASKLET(audit_tasklet, audit_tasklet_handler, 0);
+
+/* The netlink_* functions cannot be called inside an irq context, so
+ * the audit buffer is places on a queue and a tasklet is scheduled to
+ * remove them from the queue outside the irq context.  May be called in
+ * any context. */
+static void audit_log_end_irq(struct audit_buffer *ab)
+{
+	unsigned long flags;
+
+	if (!ab)
+		return;
+	spin_lock_irqsave(&audit_txlist_lock, flags);
+	list_add_tail(&ab->list, &audit_txlist);
+	spin_unlock_irqrestore(&audit_txlist_lock, flags);
+
+	tasklet_schedule(&audit_tasklet);
+}
+
+/* Send the message in the audit buffer directly to user space.  May not
+ * be called in an irq context. */
+static void audit_log_end_fast(struct audit_buffer *ab)
+{
+	unsigned long flags;
+
+	BUG_ON(in_irq());
+	if (!ab)
+		return;
+	if (!audit_rate_check()) {
+		audit_log_lost("rate limit exceeded");
+	} else {
+		audit_log_move(ab);
+		if (audit_log_drain(ab))
+			return;
+	}
+
+	atomic_dec(&audit_backlog);
+	spin_lock_irqsave(&audit_freelist_lock, flags);
+	if (++audit_freelist_count > AUDIT_MAXFREE)
+		kfree(ab);
+	else
+		list_add(&ab->list, &audit_freelist);
+	spin_unlock_irqrestore(&audit_freelist_lock, flags);
+}
+
+/* Send or queue the message in the audit buffer, depending on the
+ * current context.  (A convenience function that may be called in any
+ * context.) */
+void audit_log_end(struct audit_buffer *ab)
+{
+	if (in_irq())
+		audit_log_end_irq(ab);
+	else
+		audit_log_end_fast(ab);
+}
+
+/* Log an audit record.  This is a convenience function that calls
+ * audit_log_start, audit_log_vformat, and audit_log_end.  It may be
+ * called in any context. */
+void audit_log(struct audit_context *ctx, const char *fmt, ...)
+{
+	struct audit_buffer *ab;
+	va_list args;
+
+	ab = audit_log_start(ctx);
+	if (ab) {
+		va_start(args, fmt);
+		audit_log_vformat(ab, fmt, args);
+		va_end(args);
+		audit_log_end(ab);
+	}
+}
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -0,0 +1,220 @@
+/*
+ * linux/kernel/capability.c
+ *
+ * Copyright (C) 1997  Andrew Main <zefram@fysh.org>
+ *
+ * Integrated into 2.1.97+,  Andrew G. Morgan <morgan@transmeta.com>
+ * 30 May 2002:	Cleanup, Robert M. Love <rml@tech9.net>
+ */ 
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+
+unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
+kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
+
+EXPORT_SYMBOL(securebits);
+EXPORT_SYMBOL(cap_bset);
+
+/*
+ * This lock protects task->cap_* for all tasks including current.
+ * Locking rule: acquire this prior to tasklist_lock.
+ */
+static DEFINE_SPINLOCK(task_capability_lock);
+
+/*
+ * For sys_getproccap() and sys_setproccap(), any of the three
+ * capability set pointers may be NULL -- indicating that that set is
+ * uninteresting and/or not to be changed.
+ */
+
+/*
+ * sys_capget - get the capabilities of a given process.
+ */
+asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
+{
+     int ret = 0;
+     pid_t pid;
+     __u32 version;
+     task_t *target;
+     struct __user_cap_data_struct data;
+
+     if (get_user(version, &header->version))
+	     return -EFAULT;
+
+     if (version != _LINUX_CAPABILITY_VERSION) {
+	     if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
+		     return -EFAULT; 
+             return -EINVAL;
+     }
+
+     if (get_user(pid, &header->pid))
+	     return -EFAULT;
+
+     if (pid < 0) 
+             return -EINVAL;
+
+     spin_lock(&task_capability_lock);
+     read_lock(&tasklist_lock); 
+
+     if (pid && pid != current->pid) {
+	     target = find_task_by_pid(pid);
+	     if (!target) {
+	          ret = -ESRCH;
+	          goto out;
+	     }
+     } else
+	     target = current;
+
+     ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted);
+
+out:
+     read_unlock(&tasklist_lock); 
+     spin_unlock(&task_capability_lock);
+
+     if (!ret && copy_to_user(dataptr, &data, sizeof data))
+          return -EFAULT; 
+
+     return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group.  We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
+			      kernel_cap_t *inheritable,
+			      kernel_cap_t *permitted)
+{
+	task_t *g, *target;
+	int ret = -EPERM;
+	int found = 0;
+
+	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
+		target = g;
+		while_each_thread(g, target) {
+			if (!security_capset_check(target, effective,
+							inheritable,
+							permitted)) {
+				security_capset_set(target, effective,
+							inheritable,
+							permitted);
+				ret = 0;
+			}
+			found = 1;
+		}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+
+	if (!found)
+	     ret = 0;
+	return ret;
+}
+
+/*
+ * cap_set_all - set capabilities for all processes other than init
+ * and self.  We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_all(kernel_cap_t *effective,
+			       kernel_cap_t *inheritable,
+			       kernel_cap_t *permitted)
+{
+     task_t *g, *target;
+     int ret = -EPERM;
+     int found = 0;
+
+     do_each_thread(g, target) {
+             if (target == current || target->pid == 1)
+                     continue;
+             found = 1;
+	     if (security_capset_check(target, effective, inheritable,
+						permitted))
+		     continue;
+	     ret = 0;
+	     security_capset_set(target, effective, inheritable, permitted);
+     } while_each_thread(g, target);
+
+     if (!found)
+	     ret = 0;
+     return ret;
+}
+
+/*
+ * sys_capset - set capabilities for a given process, all processes, or all
+ * processes in a given process group.
+ *
+ * The restrictions on setting capabilities are specified as:
+ *
+ * [pid is for the 'target' task.  'current' is the calling task.]
+ *
+ * I: any raised capabilities must be a subset of the (old current) permitted
+ * P: any raised capabilities must be a subset of the (old current) permitted
+ * E: must be set to a subset of (new target) permitted
+ */
+asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
+{
+     kernel_cap_t inheritable, permitted, effective;
+     __u32 version;
+     task_t *target;
+     int ret;
+     pid_t pid;
+
+     if (get_user(version, &header->version))
+	     return -EFAULT; 
+
+     if (version != _LINUX_CAPABILITY_VERSION) {
+	     if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
+		     return -EFAULT; 
+             return -EINVAL;
+     }
+
+     if (get_user(pid, &header->pid))
+	     return -EFAULT; 
+
+     if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+             return -EPERM;
+
+     if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
+	 copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
+	 copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
+	     return -EFAULT; 
+
+     spin_lock(&task_capability_lock);
+     read_lock(&tasklist_lock);
+
+     if (pid > 0 && pid != current->pid) {
+          target = find_task_by_pid(pid);
+          if (!target) {
+               ret = -ESRCH;
+               goto out;
+          }
+     } else
+               target = current;
+
+     ret = 0;
+
+     /* having verified that the proposed changes are legal,
+           we now put them into effect. */
+     if (pid < 0) {
+             if (pid == -1)  /* all procs other than current and init */
+                     ret = cap_set_all(&effective, &inheritable, &permitted);
+
+             else            /* all procs in process group */
+                     ret = cap_set_pg(-pid, &effective, &inheritable,
+		     					&permitted);
+     } else {
+	     ret = security_capset_check(target, &effective, &inheritable,
+	     						&permitted);
+	     if (!ret)
+		     security_capset_set(target, &effective, &inheritable,
+		     					&permitted);
+     }
+
+out:
+     read_unlock(&tasklist_lock);
+     spin_unlock(&task_capability_lock);
+
+     return ret;
+}
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -0,0 +1,860 @@
+/*
+ *  linux/kernel/compat.c
+ *
+ *  Kernel compatibililty routines for e.g. 32 bit syscall support
+ *  on 64 bit kernels.
+ *
+ *  Copyright (C) 2002-2003 Stephen Rothwell, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/time.h>
+#include <linux/signal.h>
+#include <linux/sched.h>	/* for MAX_SCHEDULE_TIMEOUT */
+#include <linux/futex.h>	/* for FUTEX_WAIT */
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/security.h>
+
+#include <asm/uaccess.h>
+#include <asm/bug.h>
+
+int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
+{
+	return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
+			__get_user(ts->tv_sec, &cts->tv_sec) ||
+			__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
+}
+
+int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
+{
+	return (!access_ok(VERIFY_WRITE, cts, sizeof(*cts)) ||
+			__put_user(ts->tv_sec, &cts->tv_sec) ||
+			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
+}
+
+static long compat_nanosleep_restart(struct restart_block *restart)
+{
+	unsigned long expire = restart->arg0, now = jiffies;
+	struct compat_timespec __user *rmtp;
+
+	/* Did it expire while we handled signals? */
+	if (!time_after(expire, now))
+		return 0;
+
+	current->state = TASK_INTERRUPTIBLE;
+	expire = schedule_timeout(expire - now);
+	if (expire == 0)
+		return 0;
+
+	rmtp = (struct compat_timespec __user *)restart->arg1;
+	if (rmtp) {
+		struct compat_timespec ct;
+		struct timespec t;
+
+		jiffies_to_timespec(expire, &t);
+		ct.tv_sec = t.tv_sec;
+		ct.tv_nsec = t.tv_nsec;
+		if (copy_to_user(rmtp, &ct, sizeof(ct)))
+			return -EFAULT;
+	}
+	/* The 'restart' block is already filled in */
+	return -ERESTART_RESTARTBLOCK;
+}
+
+asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
+		struct compat_timespec __user *rmtp)
+{
+	struct timespec t;
+	struct restart_block *restart;
+	unsigned long expire;
+
+	if (get_compat_timespec(&t, rqtp))
+		return -EFAULT;
+
+	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
+		return -EINVAL;
+
+	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+	current->state = TASK_INTERRUPTIBLE;
+	expire = schedule_timeout(expire);
+	if (expire == 0)
+		return 0;
+
+	if (rmtp) {
+		jiffies_to_timespec(expire, &t);
+		if (put_compat_timespec(&t, rmtp))
+			return -EFAULT;
+	}
+	restart = &current_thread_info()->restart_block;
+	restart->fn = compat_nanosleep_restart;
+	restart->arg0 = jiffies + expire;
+	restart->arg1 = (unsigned long) rmtp;
+	return -ERESTART_RESTARTBLOCK;
+}
+
+static inline long get_compat_itimerval(struct itimerval *o,
+		struct compat_itimerval __user *i)
+{
+	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+		(__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) |
+		 __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) |
+		 __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) |
+		 __get_user(o->it_value.tv_usec, &i->it_value.tv_usec)));
+}
+
+static inline long put_compat_itimerval(struct compat_itimerval __user *o,
+		struct itimerval *i)
+{
+	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+		(__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) |
+		 __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) |
+		 __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) |
+		 __put_user(i->it_value.tv_usec, &o->it_value.tv_usec)));
+}
+
+asmlinkage long compat_sys_getitimer(int which,
+		struct compat_itimerval __user *it)
+{
+	struct itimerval kit;
+	int error;
+
+	error = do_getitimer(which, &kit);
+	if (!error && put_compat_itimerval(it, &kit))
+		error = -EFAULT;
+	return error;
+}
+
+asmlinkage long compat_sys_setitimer(int which,
+		struct compat_itimerval __user *in,
+		struct compat_itimerval __user *out)
+{
+	struct itimerval kin, kout;
+	int error;
+
+	if (in) {
+		if (get_compat_itimerval(&kin, in))
+			return -EFAULT;
+	} else
+		memset(&kin, 0, sizeof(kin));
+
+	error = do_setitimer(which, &kin, out ? &kout : NULL);
+	if (error || !out)
+		return error;
+	if (put_compat_itimerval(out, &kout))
+		return -EFAULT;
+	return 0;
+}
+
+asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
+{
+	/*
+	 *	In the SMP world we might just be unlucky and have one of
+	 *	the times increment as we use it. Since the value is an
+	 *	atomically safe type this is just fine. Conceptually its
+	 *	as if the syscall took an instant longer to occur.
+	 */
+	if (tbuf) {
+		struct compat_tms tmp;
+		struct task_struct *tsk = current;
+		struct task_struct *t;
+		cputime_t utime, stime, cutime, cstime;
+
+		read_lock(&tasklist_lock);
+		utime = tsk->signal->utime;
+		stime = tsk->signal->stime;
+		t = tsk;
+		do {
+			utime = cputime_add(utime, t->utime);
+			stime = cputime_add(stime, t->stime);
+			t = next_thread(t);
+		} while (t != tsk);
+
+		/*
+		 * While we have tasklist_lock read-locked, no dying thread
+		 * can be updating current->signal->[us]time.  Instead,
+		 * we got their counts included in the live thread loop.
+		 * However, another thread can come in right now and
+		 * do a wait call that updates current->signal->c[us]time.
+		 * To make sure we always see that pair updated atomically,
+		 * we take the siglock around fetching them.
+		 */
+		spin_lock_irq(&tsk->sighand->siglock);
+		cutime = tsk->signal->cutime;
+		cstime = tsk->signal->cstime;
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+
+		tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
+		tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
+		tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
+		tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
+			return -EFAULT;
+	}
+	return compat_jiffies_to_clock_t(jiffies);
+}
+
+/*
+ * Assumption: old_sigset_t and compat_old_sigset_t are both
+ * types that can be passed to put_user()/get_user().
+ */
+
+asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
+{
+	old_sigset_t s;
+	long ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_sigpending((old_sigset_t __user *) &s);
+	set_fs(old_fs);
+	if (ret == 0)
+		ret = put_user(s, set);
+	return ret;
+}
+
+asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
+		compat_old_sigset_t __user *oset)
+{
+	old_sigset_t s;
+	long ret;
+	mm_segment_t old_fs;
+
+	if (set && get_user(s, set))
+		return -EFAULT;
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = sys_sigprocmask(how,
+			      set ? (old_sigset_t __user *) &s : NULL,
+			      oset ? (old_sigset_t __user *) &s : NULL);
+	set_fs(old_fs);
+	if (ret == 0)
+		if (oset)
+			ret = put_user(s, oset);
+	return ret;
+}
+
+#ifdef CONFIG_FUTEX
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
+		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		int val3)
+{
+	struct timespec t;
+	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	int val2 = 0;
+
+	if ((op == FUTEX_WAIT) && utime) {
+		if (get_compat_timespec(&t, utime))
+			return -EFAULT;
+		timeout = timespec_to_jiffies(&t) + 1;
+	}
+	if (op >= FUTEX_REQUEUE)
+		val2 = (int) (unsigned long) utime;
+
+	return do_futex((unsigned long)uaddr, op, val, timeout,
+			(unsigned long)uaddr2, val2, val3);
+}
+#endif
+
+asmlinkage long compat_sys_setrlimit(unsigned int resource,
+		struct compat_rlimit __user *rlim)
+{
+	struct rlimit r;
+	int ret;
+	mm_segment_t old_fs = get_fs ();
+
+	if (resource >= RLIM_NLIMITS) 
+		return -EINVAL;	
+
+	if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
+	    __get_user(r.rlim_cur, &rlim->rlim_cur) ||
+	    __get_user(r.rlim_max, &rlim->rlim_max))
+		return -EFAULT;
+
+	if (r.rlim_cur == COMPAT_RLIM_INFINITY)
+		r.rlim_cur = RLIM_INFINITY;
+	if (r.rlim_max == COMPAT_RLIM_INFINITY)
+		r.rlim_max = RLIM_INFINITY;
+	set_fs(KERNEL_DS);
+	ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
+	set_fs(old_fs);
+	return ret;
+}
+
+#ifdef COMPAT_RLIM_OLD_INFINITY
+
+asmlinkage long compat_sys_old_getrlimit(unsigned int resource,
+		struct compat_rlimit __user *rlim)
+{
+	struct rlimit r;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_old_getrlimit(resource, &r);
+	set_fs(old_fs);
+
+	if (!ret) {
+		if (r.rlim_cur > COMPAT_RLIM_OLD_INFINITY)
+			r.rlim_cur = COMPAT_RLIM_INFINITY;
+		if (r.rlim_max > COMPAT_RLIM_OLD_INFINITY)
+			r.rlim_max = COMPAT_RLIM_INFINITY;
+
+		if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
+		    __put_user(r.rlim_cur, &rlim->rlim_cur) ||
+		    __put_user(r.rlim_max, &rlim->rlim_max))
+			return -EFAULT;
+	}
+	return ret;
+}
+
+#endif
+
+asmlinkage long compat_sys_getrlimit (unsigned int resource,
+		struct compat_rlimit __user *rlim)
+{
+	struct rlimit r;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_getrlimit(resource, (struct rlimit __user *) &r);
+	set_fs(old_fs);
+	if (!ret) {
+		if (r.rlim_cur > COMPAT_RLIM_INFINITY)
+			r.rlim_cur = COMPAT_RLIM_INFINITY;
+		if (r.rlim_max > COMPAT_RLIM_INFINITY)
+			r.rlim_max = COMPAT_RLIM_INFINITY;
+
+		if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
+		    __put_user(r.rlim_cur, &rlim->rlim_cur) ||
+		    __put_user(r.rlim_max, &rlim->rlim_max))
+			return -EFAULT;
+	}
+	return ret;
+}
+
+int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
+{
+	if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
+	    __put_user(r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
+	    __put_user(r->ru_utime.tv_usec, &ru->ru_utime.tv_usec) ||
+	    __put_user(r->ru_stime.tv_sec, &ru->ru_stime.tv_sec) ||
+	    __put_user(r->ru_stime.tv_usec, &ru->ru_stime.tv_usec) ||
+	    __put_user(r->ru_maxrss, &ru->ru_maxrss) ||
+	    __put_user(r->ru_ixrss, &ru->ru_ixrss) ||
+	    __put_user(r->ru_idrss, &ru->ru_idrss) ||
+	    __put_user(r->ru_isrss, &ru->ru_isrss) ||
+	    __put_user(r->ru_minflt, &ru->ru_minflt) ||
+	    __put_user(r->ru_majflt, &ru->ru_majflt) ||
+	    __put_user(r->ru_nswap, &ru->ru_nswap) ||
+	    __put_user(r->ru_inblock, &ru->ru_inblock) ||
+	    __put_user(r->ru_oublock, &ru->ru_oublock) ||
+	    __put_user(r->ru_msgsnd, &ru->ru_msgsnd) ||
+	    __put_user(r->ru_msgrcv, &ru->ru_msgrcv) ||
+	    __put_user(r->ru_nsignals, &ru->ru_nsignals) ||
+	    __put_user(r->ru_nvcsw, &ru->ru_nvcsw) ||
+	    __put_user(r->ru_nivcsw, &ru->ru_nivcsw))
+		return -EFAULT;
+	return 0;
+}
+
+asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
+{
+	struct rusage r;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_getrusage(who, (struct rusage __user *) &r);
+	set_fs(old_fs);
+
+	if (ret)
+		return ret;
+
+	if (put_compat_rusage(&r, ru))
+		return -EFAULT;
+
+	return 0;
+}
+
+asmlinkage long
+compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
+	struct compat_rusage __user *ru)
+{
+	if (!ru) {
+		return sys_wait4(pid, stat_addr, options, NULL);
+	} else {
+		struct rusage r;
+		int ret;
+		unsigned int status;
+		mm_segment_t old_fs = get_fs();
+
+		set_fs (KERNEL_DS);
+		ret = sys_wait4(pid,
+				(stat_addr ?
+				 (unsigned int __user *) &status : NULL),
+				options, (struct rusage __user *) &r);
+		set_fs (old_fs);
+
+		if (ret > 0) {
+			if (put_compat_rusage(&r, ru))
+				return -EFAULT;
+			if (stat_addr && put_user(status, stat_addr))
+				return -EFAULT;
+		}
+		return ret;
+	}
+}
+
+asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
+		struct compat_siginfo __user *uinfo, int options,
+		struct compat_rusage __user *uru)
+{
+	siginfo_t info;
+	struct rusage ru;
+	long ret;
+	mm_segment_t old_fs = get_fs();
+
+	memset(&info, 0, sizeof(info));
+
+	set_fs(KERNEL_DS);
+	ret = sys_waitid(which, pid, (siginfo_t __user *)&info, options,
+			 uru ? (struct rusage __user *)&ru : NULL);
+	set_fs(old_fs);
+
+	if ((ret < 0) || (info.si_signo == 0))
+		return ret;
+
+	if (uru) {
+		ret = put_compat_rusage(&ru, uru);
+		if (ret)
+			return ret;
+	}
+
+	BUG_ON(info.si_code & __SI_MASK);
+	info.si_code |= __SI_CHLD;
+	return copy_siginfo_to_user32(uinfo, &info);
+}
+
+static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr,
+				    unsigned len, cpumask_t *new_mask)
+{
+	unsigned long *k;
+
+	if (len < sizeof(cpumask_t))
+		memset(new_mask, 0, sizeof(cpumask_t));
+	else if (len > sizeof(cpumask_t))
+		len = sizeof(cpumask_t);
+
+	k = cpus_addr(*new_mask);
+	return compat_get_bitmap(k, user_mask_ptr, len * 8);
+}
+
+asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
+					     unsigned int len,
+					     compat_ulong_t __user *user_mask_ptr)
+{
+	cpumask_t new_mask;
+	int retval;
+
+	retval = compat_get_user_cpu_mask(user_mask_ptr, len, &new_mask);
+	if (retval)
+		return retval;
+
+	return sched_setaffinity(pid, new_mask);
+}
+
+asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
+					     compat_ulong_t __user *user_mask_ptr)
+{
+	int ret;
+	cpumask_t mask;
+	unsigned long *k;
+	unsigned int min_length = sizeof(cpumask_t);
+
+	if (NR_CPUS <= BITS_PER_COMPAT_LONG)
+		min_length = sizeof(compat_ulong_t);
+
+	if (len < min_length)
+		return -EINVAL;
+
+	ret = sched_getaffinity(pid, &mask);
+	if (ret < 0)
+		return ret;
+
+	k = cpus_addr(mask);
+	ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
+	if (ret)
+		return ret;
+
+	return min_length;
+}
+
+static int get_compat_itimerspec(struct itimerspec *dst, 
+				 struct compat_itimerspec __user *src)
+{ 
+	if (get_compat_timespec(&dst->it_interval, &src->it_interval) ||
+	    get_compat_timespec(&dst->it_value, &src->it_value))
+		return -EFAULT;
+	return 0;
+} 
+
+static int put_compat_itimerspec(struct compat_itimerspec __user *dst, 
+				 struct itimerspec *src)
+{ 
+	if (put_compat_timespec(&src->it_interval, &dst->it_interval) ||
+	    put_compat_timespec(&src->it_value, &dst->it_value))
+		return -EFAULT;
+	return 0;
+} 
+
+long compat_sys_timer_settime(timer_t timer_id, int flags,
+			  struct compat_itimerspec __user *new, 
+			  struct compat_itimerspec __user *old)
+{ 
+	long err;
+	mm_segment_t oldfs;
+	struct itimerspec newts, oldts;
+
+	if (!new)
+		return -EINVAL;
+	if (get_compat_itimerspec(&newts, new))
+		return -EFAULT;	
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_timer_settime(timer_id, flags,
+				(struct itimerspec __user *) &newts,
+				(struct itimerspec __user *) &oldts);
+	set_fs(oldfs); 
+	if (!err && old && put_compat_itimerspec(old, &oldts))
+		return -EFAULT;
+	return err;
+} 
+
+long compat_sys_timer_gettime(timer_t timer_id,
+		struct compat_itimerspec __user *setting)
+{ 
+	long err;
+	mm_segment_t oldfs;
+	struct itimerspec ts; 
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_timer_gettime(timer_id,
+				(struct itimerspec __user *) &ts); 
+	set_fs(oldfs); 
+	if (!err && put_compat_itimerspec(setting, &ts))
+		return -EFAULT;
+	return err;
+} 
+
+long compat_sys_clock_settime(clockid_t which_clock,
+		struct compat_timespec __user *tp)
+{
+	long err;
+	mm_segment_t oldfs;
+	struct timespec ts; 
+
+	if (get_compat_timespec(&ts, tp))
+		return -EFAULT; 
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);	
+	err = sys_clock_settime(which_clock,
+				(struct timespec __user *) &ts);
+	set_fs(oldfs);
+	return err;
+} 
+
+long compat_sys_clock_gettime(clockid_t which_clock,
+		struct compat_timespec __user *tp)
+{
+	long err;
+	mm_segment_t oldfs;
+	struct timespec ts; 
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_clock_gettime(which_clock,
+				(struct timespec __user *) &ts);
+	set_fs(oldfs);
+	if (!err && put_compat_timespec(&ts, tp))
+		return -EFAULT; 
+	return err;
+} 
+
+long compat_sys_clock_getres(clockid_t which_clock,
+		struct compat_timespec __user *tp)
+{
+	long err;
+	mm_segment_t oldfs;
+	struct timespec ts; 
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_clock_getres(which_clock,
+			       (struct timespec __user *) &ts);
+	set_fs(oldfs);
+	if (!err && tp && put_compat_timespec(&ts, tp))
+		return -EFAULT; 
+	return err;
+} 
+
+long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
+			    struct compat_timespec __user *rqtp,
+			    struct compat_timespec __user *rmtp)
+{
+	long err;
+	mm_segment_t oldfs;
+	struct timespec in, out; 
+
+	if (get_compat_timespec(&in, rqtp)) 
+		return -EFAULT;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = sys_clock_nanosleep(which_clock, flags,
+				  (struct timespec __user *) &in,
+				  (struct timespec __user *) &out);
+	set_fs(oldfs);
+	if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
+	    put_compat_timespec(&out, rmtp))
+		return -EFAULT;
+	return err;	
+} 
+
+/*
+ * We currently only need the following fields from the sigevent
+ * structure: sigev_value, sigev_signo, sig_notify and (sometimes
+ * sigev_notify_thread_id).  The others are handled in user mode.
+ * We also assume that copying sigev_value.sival_int is sufficient
+ * to keep all the bits of sigev_value.sival_ptr intact.
+ */
+int get_compat_sigevent(struct sigevent *event,
+		const struct compat_sigevent __user *u_event)
+{
+	memset(&event, 0, sizeof(*event));
+	return (!access_ok(VERIFY_READ, u_event, sizeof(*u_event)) ||
+		__get_user(event->sigev_value.sival_int,
+			&u_event->sigev_value.sival_int) ||
+		__get_user(event->sigev_signo, &u_event->sigev_signo) ||
+		__get_user(event->sigev_notify, &u_event->sigev_notify) ||
+		__get_user(event->sigev_notify_thread_id,
+			&u_event->sigev_notify_thread_id))
+		? -EFAULT : 0;
+}
+
+/* timer_create is architecture specific because it needs sigevent conversion */
+
+long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
+		       unsigned long bitmap_size)
+{
+	int i, j;
+	unsigned long m;
+	compat_ulong_t um;
+	unsigned long nr_compat_longs;
+
+	/* align bitmap up to nearest compat_long_t boundary */
+	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
+
+	if (!access_ok(VERIFY_READ, umask, bitmap_size / 8))
+		return -EFAULT;
+
+	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
+
+	for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
+		m = 0;
+
+		for (j = 0; j < sizeof(m)/sizeof(um); j++) {
+			/*
+			 * We dont want to read past the end of the userspace
+			 * bitmap. We must however ensure the end of the
+			 * kernel bitmap is zeroed.
+			 */
+			if (nr_compat_longs-- > 0) {
+				if (__get_user(um, umask))
+					return -EFAULT;
+			} else {
+				um = 0;
+			}
+
+			umask++;
+			m |= (long)um << (j * BITS_PER_COMPAT_LONG);
+		}
+		*mask++ = m;
+	}
+
+	return 0;
+}
+
+long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
+		       unsigned long bitmap_size)
+{
+	int i, j;
+	unsigned long m;
+	compat_ulong_t um;
+	unsigned long nr_compat_longs;
+
+	/* align bitmap up to nearest compat_long_t boundary */
+	bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
+
+	if (!access_ok(VERIFY_WRITE, umask, bitmap_size / 8))
+		return -EFAULT;
+
+	nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
+
+	for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
+		m = *mask++;
+
+		for (j = 0; j < sizeof(m)/sizeof(um); j++) {
+			um = m;
+
+			/*
+			 * We dont want to write past the end of the userspace
+			 * bitmap.
+			 */
+			if (nr_compat_longs-- > 0) {
+				if (__put_user(um, umask))
+					return -EFAULT;
+			}
+
+			umask++;
+			m >>= 4*sizeof(um);
+			m >>= 4*sizeof(um);
+		}
+	}
+
+	return 0;
+}
+
+void
+sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
+{
+	switch (_NSIG_WORDS) {
+#if defined (__COMPAT_ENDIAN_SWAP__)
+	case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
+	case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
+	case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
+	case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
+#else
+	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
+	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
+	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
+	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
+#endif
+	}
+}
+
+asmlinkage long
+compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+		struct compat_siginfo __user *uinfo,
+		struct compat_timespec __user *uts, compat_size_t sigsetsize)
+{
+	compat_sigset_t s32;
+	sigset_t s;
+	int sig;
+	struct timespec t;
+	siginfo_t info;
+	long ret, timeout = 0;
+
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
+		return -EFAULT;
+	sigset_from_compat(&s, &s32);
+	sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
+	signotset(&s);
+
+	if (uts) {
+		if (get_compat_timespec (&t, uts))
+			return -EFAULT;
+		if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
+				|| t.tv_sec < 0)
+			return -EINVAL;
+	}
+
+	spin_lock_irq(&current->sighand->siglock);
+	sig = dequeue_signal(current, &s, &info);
+	if (!sig) {
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (uts)
+			timeout = timespec_to_jiffies(&t)
+				+(t.tv_sec || t.tv_nsec);
+		if (timeout) {
+			current->real_blocked = current->blocked;
+			sigandsets(&current->blocked, &current->blocked, &s);
+
+			recalc_sigpending();
+			spin_unlock_irq(&current->sighand->siglock);
+
+			current->state = TASK_INTERRUPTIBLE;
+			timeout = schedule_timeout(timeout);
+
+			spin_lock_irq(&current->sighand->siglock);
+			sig = dequeue_signal(current, &s, &info);
+			current->blocked = current->real_blocked;
+			siginitset(&current->real_blocked, 0);
+			recalc_sigpending();
+		}
+	}
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (sig) {
+		ret = sig;
+		if (uinfo) {
+			if (copy_siginfo_to_user32(uinfo, &info))
+				ret = -EFAULT;
+		}
+	}else {
+		ret = timeout?-EINTR:-EAGAIN;
+	}
+	return ret;
+
+}
+
+#ifdef __ARCH_WANT_COMPAT_SYS_TIME
+
+/* compat_time_t is a 32 bit "long" and needs to get converted. */
+
+asmlinkage long compat_sys_time(compat_time_t __user * tloc)
+{
+	compat_time_t i;
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	i = tv.tv_sec;
+
+	if (tloc) {
+		if (put_user(i,tloc))
+			i = -EFAULT;
+	}
+	return i;
+}
+
+asmlinkage long compat_sys_stime(compat_time_t __user *tptr)
+{
+	struct timespec tv;
+	int err;
+
+	if (get_user(tv.tv_sec, tptr))
+		return -EFAULT;
+
+	tv.tv_nsec = 0;
+
+	err = security_settime(&tv, NULL);
+	if (err)
+		return err;
+
+	do_settimeofday(&tv);
+	return 0;
+}
+
+#endif /* __ARCH_WANT_COMPAT_SYS_TIME */
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -0,0 +1,118 @@
+/*
+ * kernel/configs.c
+ * Echo the kernel .config file used to build the kernel
+ *
+ * Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
+ * Copyright (C) 2002 Randy Dunlap <rddunlap@osdl.org>
+ * Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
+ * Copyright (C) 2002 Hewlett-Packard Company
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+/**************************************************/
+/* the actual current config file                 */
+
+/*
+ * Define kernel_config_data and kernel_config_data_size, which contains the
+ * wrapped and compressed configuration file.  The file is first compressed
+ * with gzip and then bounded by two eight byte magic numbers to allow
+ * extraction from a binary kernel image:
+ *
+ *   IKCFG_ST
+ *   <image>
+ *   IKCFG_ED
+ */
+#define MAGIC_START	"IKCFG_ST"
+#define MAGIC_END	"IKCFG_ED"
+#include "config_data.h"
+
+
+#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
+#define kernel_config_data_size \
+	(sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
+
+#ifdef CONFIG_IKCONFIG_PROC
+
+/**************************************************/
+/* globals and useful constants                   */
+
+static ssize_t
+ikconfig_read_current(struct file *file, char __user *buf,
+		      size_t len, loff_t * offset)
+{
+	loff_t pos = *offset;
+	ssize_t count;
+
+	if (pos >= kernel_config_data_size)
+		return 0;
+
+	count = min(len, (size_t)(kernel_config_data_size - pos));
+	if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count))
+		return -EFAULT;
+
+	*offset += count;
+	return count;
+}
+
+static struct file_operations ikconfig_file_ops = {
+	.owner = THIS_MODULE,
+	.read = ikconfig_read_current,
+};
+
+/***************************************************/
+/* ikconfig_init: start up everything we need to */
+
+static int __init ikconfig_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	/* create the current config file */
+	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
+				  &proc_root);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->proc_fops = &ikconfig_file_ops;
+	entry->size = kernel_config_data_size;
+
+	return 0;
+}
+
+/***************************************************/
+/* ikconfig_cleanup: clean up our mess           */
+
+static void __exit ikconfig_cleanup(void)
+{
+	remove_proc_entry("config.gz", &proc_root);
+}
+
+module_init(ikconfig_init);
+module_exit(ikconfig_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Randy Dunlap");
+MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
+
+#endif /* CONFIG_IKCONFIG_PROC */
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -0,0 +1,193 @@
+/* CPU control.
+ * (C) 2001, 2002, 2003, 2004 Rusty Russell
+ *
+ * This code is licenced under the GPL.
+ */
+#include <linux/proc_fs.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/unistd.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/stop_machine.h>
+#include <asm/semaphore.h>
+
+/* This protects CPUs going up and down... */
+DECLARE_MUTEX(cpucontrol);
+
+static struct notifier_block *cpu_chain;
+
+/* Need to know about CPUs going up/down? */
+int register_cpu_notifier(struct notifier_block *nb)
+{
+	int ret;
+
+	if ((ret = down_interruptible(&cpucontrol)) != 0)
+		return ret;
+	ret = notifier_chain_register(&cpu_chain, nb);
+	up(&cpucontrol);
+	return ret;
+}
+EXPORT_SYMBOL(register_cpu_notifier);
+
+void unregister_cpu_notifier(struct notifier_block *nb)
+{
+	down(&cpucontrol);
+	notifier_chain_unregister(&cpu_chain, nb);
+	up(&cpucontrol);
+}
+EXPORT_SYMBOL(unregister_cpu_notifier);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void check_for_tasks(int cpu)
+{
+	struct task_struct *p;
+
+	write_lock_irq(&tasklist_lock);
+	for_each_process(p) {
+		if (task_cpu(p) == cpu &&
+		    (!cputime_eq(p->utime, cputime_zero) ||
+		     !cputime_eq(p->stime, cputime_zero)))
+			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+				(state = %ld, flags = %lx) \n",
+				 p->comm, p->pid, cpu, p->state, p->flags);
+	}
+	write_unlock_irq(&tasklist_lock);
+}
+
+/* Take this CPU down. */
+static int take_cpu_down(void *unused)
+{
+	int err;
+
+	/* Take offline: makes arch_cpu_down somewhat easier. */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+
+	/* Ensure this CPU doesn't handle any more interrupts. */
+	err = __cpu_disable();
+	if (err < 0)
+		cpu_set(smp_processor_id(), cpu_online_map);
+	else
+		/* Force idle task to run as soon as we yield: it should
+		   immediately notice cpu is offline and die quickly. */
+		sched_idle_next();
+
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err;
+	struct task_struct *p;
+	cpumask_t old_allowed, tmp;
+
+	if ((err = lock_cpu_hotplug_interruptible()) != 0)
+		return err;
+
+	if (num_online_cpus() == 1) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!cpu_online(cpu)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
+						(void *)(long)cpu);
+	if (err == NOTIFY_BAD) {
+		printk("%s: attempt to take down CPU %u failed\n",
+				__FUNCTION__, cpu);
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Ensure that we are not runnable on dying cpu */
+	old_allowed = current->cpus_allowed;
+	tmp = CPU_MASK_ALL;
+	cpu_clear(cpu, tmp);
+	set_cpus_allowed(current, tmp);
+
+	p = __stop_machine_run(take_cpu_down, NULL, cpu);
+	if (IS_ERR(p)) {
+		/* CPU didn't die: tell everyone.  Can't complain. */
+		if (notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
+				(void *)(long)cpu) == NOTIFY_BAD)
+			BUG();
+
+		err = PTR_ERR(p);
+		goto out_allowed;
+	}
+
+	if (cpu_online(cpu))
+		goto out_thread;
+
+	/* Wait for it to sleep (leaving idle task). */
+	while (!idle_cpu(cpu))
+		yield();
+
+	/* This actually kills the CPU. */
+	__cpu_die(cpu);
+
+	/* Move it here so it can run. */
+	kthread_bind(p, get_cpu());
+	put_cpu();
+
+	/* CPU is completely dead: tell everyone.  Too late to complain. */
+	if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
+	    == NOTIFY_BAD)
+		BUG();
+
+	check_for_tasks(cpu);
+
+out_thread:
+	err = kthread_stop(p);
+out_allowed:
+	set_cpus_allowed(current, old_allowed);
+out:
+	unlock_cpu_hotplug();
+	return err;
+}
+#endif /*CONFIG_HOTPLUG_CPU*/
+
+int __devinit cpu_up(unsigned int cpu)
+{
+	int ret;
+	void *hcpu = (void *)(long)cpu;
+
+	if ((ret = down_interruptible(&cpucontrol)) != 0)
+		return ret;
+
+	if (cpu_online(cpu) || !cpu_present(cpu)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+	if (ret == NOTIFY_BAD) {
+		printk("%s: attempt to bring up CPU %u failed\n",
+				__FUNCTION__, cpu);
+		ret = -EINVAL;
+		goto out_notify;
+	}
+
+	/* Arch-specific enabling code. */
+	ret = __cpu_up(cpu);
+	if (ret != 0)
+		goto out_notify;
+	if (!cpu_online(cpu))
+		BUG();
+
+	/* Now call notifier in preparation. */
+	notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+
+out_notify:
+	if (ret != 0)
+		notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
+out:
+	up(&cpucontrol);
+	return ret;
+}
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -0,0 +1,158 @@
+/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $
+ * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
+ *
+ * Written by Hennus Bergman, 1992.
+ *
+ * 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
+ *   In the previous version the reported device could end up being wrong,
+ *   if a device requested a DMA channel that was already in use.
+ *   [It also happened to remove the sizeof(char *) == sizeof(int)
+ *   assumption introduced because of those /proc/dma patches. -- Hennus]
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/dma.h>
+#include <asm/system.h>
+
+ 
+
+/* A note on resource allocation:
+ *
+ * All drivers needing DMA channels, should allocate and release them
+ * through the public routines `request_dma()' and `free_dma()'.
+ *
+ * In order to avoid problems, all processes should allocate resources in
+ * the same sequence and release them in the reverse order.
+ *
+ * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
+ * When releasing them, first release the DMA, then release the IRQ.
+ * If you don't, you may cause allocation requests to fail unnecessarily.
+ * This doesn't really matter now, but it will once we get real semaphores
+ * in the kernel.
+ */
+
+
+DEFINE_SPINLOCK(dma_spin_lock);
+
+/*
+ *	If our port doesn't define this it has no PC like DMA
+ */
+
+#ifdef MAX_DMA_CHANNELS
+
+
+/* Channel n is busy iff dma_chan_busy[n].lock != 0.
+ * DMA0 used to be reserved for DRAM refresh, but apparently not any more...
+ * DMA4 is reserved for cascading.
+ */
+
+struct dma_chan {
+	int  lock;
+	const char *device_id;
+};
+
+static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
+	[4] = { 1, "cascade" },
+};
+
+
+int request_dma(unsigned int dmanr, const char * device_id)
+{
+	if (dmanr >= MAX_DMA_CHANNELS)
+		return -EINVAL;
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
+		return -EBUSY;
+
+	dma_chan_busy[dmanr].device_id = device_id;
+
+	/* old flag was 0, now contains 1 to indicate busy */
+	return 0;
+} /* request_dma */
+
+
+void free_dma(unsigned int dmanr)
+{
+	if (dmanr >= MAX_DMA_CHANNELS) {
+		printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
+		return;
+	}
+
+	if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
+		printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
+		return;
+	}	
+
+} /* free_dma */
+
+#else
+
+int request_dma(unsigned int dmanr, const char *device_id)
+{
+	return -EINVAL;
+}
+
+void free_dma(unsigned int dmanr)
+{
+}
+
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#ifdef MAX_DMA_CHANNELS
+static int proc_dma_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
+		if (dma_chan_busy[i].lock) {
+		    seq_printf(m, "%2d: %s\n", i,
+			       dma_chan_busy[i].device_id);
+		}
+	}
+	return 0;
+}
+#else
+static int proc_dma_show(struct seq_file *m, void *v)
+{
+	seq_puts(m, "No DMA\n");
+	return 0;
+}
+#endif /* MAX_DMA_CHANNELS */
+
+static int proc_dma_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_dma_show, NULL);
+}
+
+static struct file_operations proc_dma_operations = {
+	.open		= proc_dma_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init proc_dma_init(void)
+{
+	struct proc_dir_entry *e;
+
+	e = create_proc_entry("dma", 0, NULL);
+	if (e)
+		e->proc_fops = &proc_dma_operations;
+
+	return 0;
+}
+
+__initcall(proc_dma_init);
+#endif
+
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
+EXPORT_SYMBOL(dma_spin_lock);
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -0,0 +1,209 @@
+/*
+ * Handling of different ABIs (personalities).
+ *
+ * We group personalities into execution domains which have their
+ * own handlers for kernel entry points, signal mapping, etc...
+ *
+ * 2001-05-06	Complete rewrite,  Christoph Hellwig (hch@infradead.org)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/personality.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+
+
+static void default_handler(int, struct pt_regs *);
+
+static struct exec_domain *exec_domains = &default_exec_domain;
+static DEFINE_RWLOCK(exec_domains_lock);
+
+
+static u_long ident_map[32] = {
+	0,	1,	2,	3,	4,	5,	6,	7,
+	8,	9,	10,	11,	12,	13,	14,	15,
+	16,	17,	18,	19,	20,	21,	22,	23,
+	24,	25,	26,	27,	28,	29,	30,	31
+};
+
+struct exec_domain default_exec_domain = {
+	.name		= "Linux",		/* name */
+	.handler	= default_handler,	/* lcall7 causes a seg fault. */
+	.pers_low	= 0, 			/* PER_LINUX personality. */
+	.pers_high	= 0,			/* PER_LINUX personality. */
+	.signal_map	= ident_map,		/* Identity map signals. */
+	.signal_invmap	= ident_map,		/*  - both ways. */
+};
+
+
+static void
+default_handler(int segment, struct pt_regs *regp)
+{
+	set_personality(0);
+
+	if (current_thread_info()->exec_domain->handler != default_handler)
+		current_thread_info()->exec_domain->handler(segment, regp);
+	else
+		send_sig(SIGSEGV, current, 1);
+}
+
+static struct exec_domain *
+lookup_exec_domain(u_long personality)
+{
+	struct exec_domain *	ep;
+	u_long			pers = personality(personality);
+		
+	read_lock(&exec_domains_lock);
+	for (ep = exec_domains; ep; ep = ep->next) {
+		if (pers >= ep->pers_low && pers <= ep->pers_high)
+			if (try_module_get(ep->module))
+				goto out;
+	}
+
+#ifdef CONFIG_KMOD
+	read_unlock(&exec_domains_lock);
+	request_module("personality-%ld", pers);
+	read_lock(&exec_domains_lock);
+
+	for (ep = exec_domains; ep; ep = ep->next) {
+		if (pers >= ep->pers_low && pers <= ep->pers_high)
+			if (try_module_get(ep->module))
+				goto out;
+	}
+#endif
+
+	ep = &default_exec_domain;
+out:
+	read_unlock(&exec_domains_lock);
+	return (ep);
+}
+
+int
+register_exec_domain(struct exec_domain *ep)
+{
+	struct exec_domain	*tmp;
+	int			err = -EBUSY;
+
+	if (ep == NULL)
+		return -EINVAL;
+
+	if (ep->next != NULL)
+		return -EBUSY;
+
+	write_lock(&exec_domains_lock);
+	for (tmp = exec_domains; tmp; tmp = tmp->next) {
+		if (tmp == ep)
+			goto out;
+	}
+
+	ep->next = exec_domains;
+	exec_domains = ep;
+	err = 0;
+
+out:
+	write_unlock(&exec_domains_lock);
+	return (err);
+}
+
+int
+unregister_exec_domain(struct exec_domain *ep)
+{
+	struct exec_domain	**epp;
+
+	epp = &exec_domains;
+	write_lock(&exec_domains_lock);
+	for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
+		if (ep == *epp)
+			goto unregister;
+	}
+	write_unlock(&exec_domains_lock);
+	return -EINVAL;
+
+unregister:
+	*epp = ep->next;
+	ep->next = NULL;
+	write_unlock(&exec_domains_lock);
+	return 0;
+}
+
+int
+__set_personality(u_long personality)
+{
+	struct exec_domain	*ep, *oep;
+
+	ep = lookup_exec_domain(personality);
+	if (ep == current_thread_info()->exec_domain) {
+		current->personality = personality;
+		return 0;
+	}
+
+	if (atomic_read(&current->fs->count) != 1) {
+		struct fs_struct *fsp, *ofsp;
+
+		fsp = copy_fs_struct(current->fs);
+		if (fsp == NULL) {
+			module_put(ep->module);
+			return -ENOMEM;
+		}
+
+		task_lock(current);
+		ofsp = current->fs;
+		current->fs = fsp;
+		task_unlock(current);
+
+		put_fs_struct(ofsp);
+	}
+
+	/*
+	 * At that point we are guaranteed to be the sole owner of
+	 * current->fs.
+	 */
+
+	current->personality = personality;
+	oep = current_thread_info()->exec_domain;
+	current_thread_info()->exec_domain = ep;
+	set_fs_altroot();
+
+	module_put(oep->module);
+	return 0;
+}
+
+int
+get_exec_domain_list(char *page)
+{
+	struct exec_domain	*ep;
+	int			len = 0;
+
+	read_lock(&exec_domains_lock);
+	for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next)
+		len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n",
+			       ep->pers_low, ep->pers_high, ep->name,
+			       module_name(ep->module));
+	read_unlock(&exec_domains_lock);
+	return (len);
+}
+
+asmlinkage long
+sys_personality(u_long personality)
+{
+	u_long old = current->personality;
+
+	if (personality != 0xffffffff) {
+		set_personality(personality);
+		if (current->personality != personality)
+			return -EINVAL;
+	}
+
+	return (long)old;
+}
+
+
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+EXPORT_SYMBOL(__set_personality);
--- a/kernel/exit.c
+++ b/kernel/exit.c
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -0,0 +1,67 @@
+/* Rewritten by Rusty Russell, on the backs of many others...
+   Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+#include <asm/sections.h>
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+/* Sort the kernel's built-in exception table */
+void __init sort_main_extable(void)
+{
+	sort_extable(__start___ex_table, __stop___ex_table);
+}
+
+/* Given an address, look for it in the exception tables. */
+const struct exception_table_entry *search_exception_tables(unsigned long addr)
+{
+	const struct exception_table_entry *e;
+
+	e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+	if (!e)
+		e = search_module_extables(addr);
+	return e;
+}
+
+static int core_kernel_text(unsigned long addr)
+{
+	if (addr >= (unsigned long)_stext &&
+	    addr <= (unsigned long)_etext)
+		return 1;
+
+	if (addr >= (unsigned long)_sinittext &&
+	    addr <= (unsigned long)_einittext)
+		return 1;
+	return 0;
+}
+
+int __kernel_text_address(unsigned long addr)
+{
+	if (core_kernel_text(addr))
+		return 1;
+	return __module_text_address(addr) != NULL;
+}
+
+int kernel_text_address(unsigned long addr)
+{
+	if (core_kernel_text(addr))
+		return 1;
+	return module_text_address(addr) != NULL;
+}
--- a/kernel/fork.c
+++ b/kernel/fork.c
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -0,0 +1,798 @@
+/*
+ *  Fast Userspace Mutexes (which I call "Futexes!").
+ *  (C) Rusty Russell, IBM 2002
+ *
+ *  Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
+ *  (C) Copyright 2003 Red Hat Inc, All Rights Reserved
+ *
+ *  Removed page pinning, fix privately mapped COW pages and other cleanups
+ *  (C) Copyright 2003, 2004 Jamie Lokier
+ *
+ *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
+ *  enough at me, Linus for the original (flawed) idea, Matthew
+ *  Kirkwood for proof-of-concept implementation.
+ *
+ *  "The futexes are also cursed."
+ *  "But they come in a choice of three flavours!"
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/jhash.h>
+#include <linux/init.h>
+#include <linux/futex.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/syscalls.h>
+
+#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
+
+/*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+ * Don't rearrange members without looking at hash_futex().
+ *
+ * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
+ * We set bit 0 to indicate if it's an inode-based key.
+ */
+union futex_key {
+	struct {
+		unsigned long pgoff;
+		struct inode *inode;
+		int offset;
+	} shared;
+	struct {
+		unsigned long uaddr;
+		struct mm_struct *mm;
+		int offset;
+	} private;
+	struct {
+		unsigned long word;
+		void *ptr;
+		int offset;
+	} both;
+};
+
+/*
+ * We use this hashed waitqueue instead of a normal wait_queue_t, so
+ * we can wake only the relevant ones (hashed queues may be shared).
+ *
+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+ * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * The order of wakup is always to make the first condition true, then
+ * wake up q->waiters, then make the second condition true.
+ */
+struct futex_q {
+	struct list_head list;
+	wait_queue_head_t waiters;
+
+	/* Which hash list lock to use. */
+	spinlock_t *lock_ptr;
+
+	/* Key which the futex is hashed on. */
+	union futex_key key;
+
+	/* For fd, sigio sent using these. */
+	int fd;
+	struct file *filp;
+};
+
+/*
+ * Split the global futex_lock into every hash list lock.
+ */
+struct futex_hash_bucket {
+       spinlock_t              lock;
+       struct list_head       chain;
+};
+
+static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
+
+/* Futex-fs vfsmount entry: */
+static struct vfsmount *futex_mnt;
+
+/*
+ * We hash on the keys returned from get_futex_key (see below).
+ */
+static struct futex_hash_bucket *hash_futex(union futex_key *key)
+{
+	u32 hash = jhash2((u32*)&key->both.word,
+			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+			  key->both.offset);
+	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
+}
+
+/*
+ * Return 1 if two futex_keys are equal, 0 otherwise.
+ */
+static inline int match_futex(union futex_key *key1, union futex_key *key2)
+{
+	return (key1->both.word == key2->both.word
+		&& key1->both.ptr == key2->both.ptr
+		&& key1->both.offset == key2->both.offset);
+}
+
+/*
+ * Get parameters which are the keys for a futex.
+ *
+ * For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode,
+ * offset_within_page).  For private mappings, it's (uaddr, current->mm).
+ * We can usually work out the index without swapping in the page.
+ *
+ * Returns: 0, or negative error code.
+ * The key words are stored in *key on success.
+ *
+ * Should be called with &current->mm->mmap_sem but NOT any spinlocks.
+ */
+static int get_futex_key(unsigned long uaddr, union futex_key *key)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct page *page;
+	int err;
+
+	/*
+	 * The futex address must be "naturally" aligned.
+	 */
+	key->both.offset = uaddr % PAGE_SIZE;
+	if (unlikely((key->both.offset % sizeof(u32)) != 0))
+		return -EINVAL;
+	uaddr -= key->both.offset;
+
+	/*
+	 * The futex is hashed differently depending on whether
+	 * it's in a shared or private mapping.  So check vma first.
+	 */
+	vma = find_extend_vma(mm, uaddr);
+	if (unlikely(!vma))
+		return -EFAULT;
+
+	/*
+	 * Permissions.
+	 */
+	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
+		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+
+	/*
+	 * Private mappings are handled in a simple way.
+	 *
+	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
+	 * it's a read-only handle, it's expected that futexes attach to
+	 * the object not the particular process.  Therefore we use
+	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
+	 * mappings of _writable_ handles.
+	 */
+	if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
+		key->private.mm = mm;
+		key->private.uaddr = uaddr;
+		return 0;
+	}
+
+	/*
+	 * Linear file mappings are also simple.
+	 */
+	key->shared.inode = vma->vm_file->f_dentry->d_inode;
+	key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
+	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
+		key->shared.pgoff = (((uaddr - vma->vm_start) >> PAGE_SHIFT)
+				     + vma->vm_pgoff);
+		return 0;
+	}
+
+	/*
+	 * We could walk the page table to read the non-linear
+	 * pte, and get the page index without fetching the page
+	 * from swap.  But that's a lot of code to duplicate here
+	 * for a rare case, so we simply fetch the page.
+	 */
+
+	/*
+	 * Do a quick atomic lookup first - this is the fastpath.
+	 */
+	spin_lock(&current->mm->page_table_lock);
+	page = follow_page(mm, uaddr, 0);
+	if (likely(page != NULL)) {
+		key->shared.pgoff =
+			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		spin_unlock(&current->mm->page_table_lock);
+		return 0;
+	}
+	spin_unlock(&current->mm->page_table_lock);
+
+	/*
+	 * Do it the general way.
+	 */
+	err = get_user_pages(current, mm, uaddr, 1, 0, 0, &page, NULL);
+	if (err >= 0) {
+		key->shared.pgoff =
+			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		put_page(page);
+		return 0;
+	}
+	return err;
+}
+
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ * NOTE: mmap_sem MUST be held between get_futex_key() and calling this
+ * function, if it is called at all.  mmap_sem keeps key->shared.inode valid.
+ */
+static inline void get_key_refs(union futex_key *key)
+{
+	if (key->both.ptr != 0) {
+		if (key->both.offset & 1)
+			atomic_inc(&key->shared.inode->i_count);
+		else
+			atomic_inc(&key->private.mm->mm_count);
+	}
+}
+
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_key_refs(union futex_key *key)
+{
+	if (key->both.ptr != 0) {
+		if (key->both.offset & 1)
+			iput(key->shared.inode);
+		else
+			mmdrop(key->private.mm);
+	}
+}
+
+static inline int get_futex_value_locked(int *dest, int __user *from)
+{
+	int ret;
+
+	inc_preempt_count();
+	ret = __copy_from_user_inatomic(dest, from, sizeof(int));
+	dec_preempt_count();
+
+	return ret ? -EFAULT : 0;
+}
+
+/*
+ * The hash bucket lock must be held when this is called.
+ * Afterwards, the futex_q must not be accessed.
+ */
+static void wake_futex(struct futex_q *q)
+{
+	list_del_init(&q->list);
+	if (q->filp)
+		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
+	/*
+	 * The lock in wake_up_all() is a crucial memory barrier after the
+	 * list_del_init() and also before assigning to q->lock_ptr.
+	 */
+	wake_up_all(&q->waiters);
+	/*
+	 * The waiting task can free the futex_q as soon as this is written,
+	 * without taking any locks.  This must come last.
+	 */
+	q->lock_ptr = NULL;
+}
+
+/*
+ * Wake up all waiters hashed on the physical page that is mapped
+ * to this virtual address:
+ */
+static int futex_wake(unsigned long uaddr, int nr_wake)
+{
+	union futex_key key;
+	struct futex_hash_bucket *bh;
+	struct list_head *head;
+	struct futex_q *this, *next;
+	int ret;
+
+	down_read(&current->mm->mmap_sem);
+
+	ret = get_futex_key(uaddr, &key);
+	if (unlikely(ret != 0))
+		goto out;
+
+	bh = hash_futex(&key);
+	spin_lock(&bh->lock);
+	head = &bh->chain;
+
+	list_for_each_entry_safe(this, next, head, list) {
+		if (match_futex (&this->key, &key)) {
+			wake_futex(this);
+			if (++ret >= nr_wake)
+				break;
+		}
+	}
+
+	spin_unlock(&bh->lock);
+out:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+/*
+ * Requeue all waiters hashed on one physical page to another
+ * physical page.
+ */
+static int futex_requeue(unsigned long uaddr1, unsigned long uaddr2,
+			 int nr_wake, int nr_requeue, int *valp)
+{
+	union futex_key key1, key2;
+	struct futex_hash_bucket *bh1, *bh2;
+	struct list_head *head1;
+	struct futex_q *this, *next;
+	int ret, drop_count = 0;
+
+ retry:
+	down_read(&current->mm->mmap_sem);
+
+	ret = get_futex_key(uaddr1, &key1);
+	if (unlikely(ret != 0))
+		goto out;
+	ret = get_futex_key(uaddr2, &key2);
+	if (unlikely(ret != 0))
+		goto out;
+
+	bh1 = hash_futex(&key1);
+	bh2 = hash_futex(&key2);
+
+	if (bh1 < bh2)
+		spin_lock(&bh1->lock);
+	spin_lock(&bh2->lock);
+	if (bh1 > bh2)
+		spin_lock(&bh1->lock);
+
+	if (likely(valp != NULL)) {
+		int curval;
+
+		ret = get_futex_value_locked(&curval, (int __user *)uaddr1);
+
+		if (unlikely(ret)) {
+			spin_unlock(&bh1->lock);
+			if (bh1 != bh2)
+				spin_unlock(&bh2->lock);
+
+			/* If we would have faulted, release mmap_sem, fault
+			 * it in and start all over again.
+			 */
+			up_read(&current->mm->mmap_sem);
+
+			ret = get_user(curval, (int __user *)uaddr1);
+
+			if (!ret)
+				goto retry;
+
+			return ret;
+		}
+		if (curval != *valp) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+	}
+
+	head1 = &bh1->chain;
+	list_for_each_entry_safe(this, next, head1, list) {
+		if (!match_futex (&this->key, &key1))
+			continue;
+		if (++ret <= nr_wake) {
+			wake_futex(this);
+		} else {
+			list_move_tail(&this->list, &bh2->chain);
+			this->lock_ptr = &bh2->lock;
+			this->key = key2;
+			get_key_refs(&key2);
+			drop_count++;
+
+			if (ret - nr_wake >= nr_requeue)
+				break;
+			/* Make sure to stop if key1 == key2 */
+			if (head1 == &bh2->chain && head1 != &next->list)
+				head1 = &this->list;
+		}
+	}
+
+out_unlock:
+	spin_unlock(&bh1->lock);
+	if (bh1 != bh2)
+		spin_unlock(&bh2->lock);
+
+	/* drop_key_refs() must be called outside the spinlocks. */
+	while (--drop_count >= 0)
+		drop_key_refs(&key1);
+
+out:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+/* The key must be already stored in q->key. */
+static inline struct futex_hash_bucket *
+queue_lock(struct futex_q *q, int fd, struct file *filp)
+{
+	struct futex_hash_bucket *bh;
+
+	q->fd = fd;
+	q->filp = filp;
+
+	init_waitqueue_head(&q->waiters);
+
+	get_key_refs(&q->key);
+	bh = hash_futex(&q->key);
+	q->lock_ptr = &bh->lock;
+
+	spin_lock(&bh->lock);
+	return bh;
+}
+
+static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *bh)
+{
+	list_add_tail(&q->list, &bh->chain);
+	spin_unlock(&bh->lock);
+}
+
+static inline void
+queue_unlock(struct futex_q *q, struct futex_hash_bucket *bh)
+{
+	spin_unlock(&bh->lock);
+	drop_key_refs(&q->key);
+}
+
+/*
+ * queue_me and unqueue_me must be called as a pair, each
+ * exactly once.  They are called with the hashed spinlock held.
+ */
+
+/* The key must be already stored in q->key. */
+static void queue_me(struct futex_q *q, int fd, struct file *filp)
+{
+	struct futex_hash_bucket *bh;
+	bh = queue_lock(q, fd, filp);
+	__queue_me(q, bh);
+}
+
+/* Return 1 if we were still queued (ie. 0 means we were woken) */
+static int unqueue_me(struct futex_q *q)
+{
+	int ret = 0;
+	spinlock_t *lock_ptr;
+
+	/* In the common case we don't take the spinlock, which is nice. */
+ retry:
+	lock_ptr = q->lock_ptr;
+	if (lock_ptr != 0) {
+		spin_lock(lock_ptr);
+		/*
+		 * q->lock_ptr can change between reading it and
+		 * spin_lock(), causing us to take the wrong lock.  This
+		 * corrects the race condition.
+		 *
+		 * Reasoning goes like this: if we have the wrong lock,
+		 * q->lock_ptr must have changed (maybe several times)
+		 * between reading it and the spin_lock().  It can
+		 * change again after the spin_lock() but only if it was
+		 * already changed before the spin_lock().  It cannot,
+		 * however, change back to the original value.  Therefore
+		 * we can detect whether we acquired the correct lock.
+		 */
+		if (unlikely(lock_ptr != q->lock_ptr)) {
+			spin_unlock(lock_ptr);
+			goto retry;
+		}
+		WARN_ON(list_empty(&q->list));
+		list_del(&q->list);
+		spin_unlock(lock_ptr);
+		ret = 1;
+	}
+
+	drop_key_refs(&q->key);
+	return ret;
+}
+
+static int futex_wait(unsigned long uaddr, int val, unsigned long time)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int ret, curval;
+	struct futex_q q;
+	struct futex_hash_bucket *bh;
+
+ retry:
+	down_read(&current->mm->mmap_sem);
+
+	ret = get_futex_key(uaddr, &q.key);
+	if (unlikely(ret != 0))
+		goto out_release_sem;
+
+	bh = queue_lock(&q, -1, NULL);
+
+	/*
+	 * Access the page AFTER the futex is queued.
+	 * Order is important:
+	 *
+	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
+	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
+	 *
+	 * The basic logical guarantee of a futex is that it blocks ONLY
+	 * if cond(var) is known to be true at the time of blocking, for
+	 * any cond.  If we queued after testing *uaddr, that would open
+	 * a race condition where we could block indefinitely with
+	 * cond(var) false, which would violate the guarantee.
+	 *
+	 * A consequence is that futex_wait() can return zero and absorb
+	 * a wakeup when *uaddr != val on entry to the syscall.  This is
+	 * rare, but normal.
+	 *
+	 * We hold the mmap semaphore, so the mapping cannot have changed
+	 * since we looked it up in get_futex_key.
+	 */
+
+	ret = get_futex_value_locked(&curval, (int __user *)uaddr);
+
+	if (unlikely(ret)) {
+		queue_unlock(&q, bh);
+
+		/* If we would have faulted, release mmap_sem, fault it in and
+		 * start all over again.
+		 */
+		up_read(&current->mm->mmap_sem);
+
+		ret = get_user(curval, (int __user *)uaddr);
+
+		if (!ret)
+			goto retry;
+		return ret;
+	}
+	if (curval != val) {
+		ret = -EWOULDBLOCK;
+		queue_unlock(&q, bh);
+		goto out_release_sem;
+	}
+
+	/* Only actually queue if *uaddr contained val.  */
+	__queue_me(&q, bh);
+
+	/*
+	 * Now the futex is queued and we have checked the data, we
+	 * don't want to hold mmap_sem while we sleep.
+	 */	
+	up_read(&current->mm->mmap_sem);
+
+	/*
+	 * There might have been scheduling since the queue_me(), as we
+	 * cannot hold a spinlock across the get_user() in case it
+	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
+	 * queueing ourselves into the futex hash.  This code thus has to
+	 * rely on the futex_wake() code removing us from hash when it
+	 * wakes us up.
+	 */
+
+	/* add_wait_queue is the barrier after __set_current_state. */
+	__set_current_state(TASK_INTERRUPTIBLE);
+	add_wait_queue(&q.waiters, &wait);
+	/*
+	 * !list_empty() is safe here without any lock.
+	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+	 */
+	if (likely(!list_empty(&q.list)))
+		time = schedule_timeout(time);
+	__set_current_state(TASK_RUNNING);
+
+	/*
+	 * NOTE: we don't remove ourselves from the waitqueue because
+	 * we are the only user of it.
+	 */
+
+	/* If we were woken (and unqueued), we succeeded, whatever. */
+	if (!unqueue_me(&q))
+		return 0;
+	if (time == 0)
+		return -ETIMEDOUT;
+	/* We expect signal_pending(current), but another thread may
+	 * have handled it for us already. */
+	return -EINTR;
+
+ out_release_sem:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+static int futex_close(struct inode *inode, struct file *filp)
+{
+	struct futex_q *q = filp->private_data;
+
+	unqueue_me(q);
+	kfree(q);
+	return 0;
+}
+
+/* This is one-shot: once it's gone off you need a new fd */
+static unsigned int futex_poll(struct file *filp,
+			       struct poll_table_struct *wait)
+{
+	struct futex_q *q = filp->private_data;
+	int ret = 0;
+
+	poll_wait(filp, &q->waiters, wait);
+
+	/*
+	 * list_empty() is safe here without any lock.
+	 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
+	 */
+	if (list_empty(&q->list))
+		ret = POLLIN | POLLRDNORM;
+
+	return ret;
+}
+
+static struct file_operations futex_fops = {
+	.release	= futex_close,
+	.poll		= futex_poll,
+};
+
+/*
+ * Signal allows caller to avoid the race which would occur if they
+ * set the sigio stuff up afterwards.
+ */
+static int futex_fd(unsigned long uaddr, int signal)
+{
+	struct futex_q *q;
+	struct file *filp;
+	int ret, err;
+
+	ret = -EINVAL;
+	if (signal < 0 || signal > _NSIG)
+		goto out;
+
+	ret = get_unused_fd();
+	if (ret < 0)
+		goto out;
+	filp = get_empty_filp();
+	if (!filp) {
+		put_unused_fd(ret);
+		ret = -ENFILE;
+		goto out;
+	}
+	filp->f_op = &futex_fops;
+	filp->f_vfsmnt = mntget(futex_mnt);
+	filp->f_dentry = dget(futex_mnt->mnt_root);
+	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+
+	if (signal) {
+		int err;
+		err = f_setown(filp, current->pid, 1);
+		if (err < 0) {
+			put_unused_fd(ret);
+			put_filp(filp);
+			ret = err;
+			goto out;
+		}
+		filp->f_owner.signum = signal;
+	}
+
+	q = kmalloc(sizeof(*q), GFP_KERNEL);
+	if (!q) {
+		put_unused_fd(ret);
+		put_filp(filp);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_read(&current->mm->mmap_sem);
+	err = get_futex_key(uaddr, &q->key);
+
+	if (unlikely(err != 0)) {
+		up_read(&current->mm->mmap_sem);
+		put_unused_fd(ret);
+		put_filp(filp);
+		kfree(q);
+		return err;
+	}
+
+	/*
+	 * queue_me() must be called before releasing mmap_sem, because
+	 * key->shared.inode needs to be referenced while holding it.
+	 */
+	filp->private_data = q;
+
+	queue_me(q, ret, filp);
+	up_read(&current->mm->mmap_sem);
+
+	/* Now we map fd to filp, so userspace can access it */
+	fd_install(ret, filp);
+out:
+	return ret;
+}
+
+long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
+		unsigned long uaddr2, int val2, int val3)
+{
+	int ret;
+
+	switch (op) {
+	case FUTEX_WAIT:
+		ret = futex_wait(uaddr, val, timeout);
+		break;
+	case FUTEX_WAKE:
+		ret = futex_wake(uaddr, val);
+		break;
+	case FUTEX_FD:
+		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
+		ret = futex_fd(uaddr, val);
+		break;
+	case FUTEX_REQUEUE:
+		ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+		break;
+	case FUTEX_CMP_REQUEUE:
+		ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+	return ret;
+}
+
+
+asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
+			  struct timespec __user *utime, u32 __user *uaddr2,
+			  int val3)
+{
+	struct timespec t;
+	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+	int val2 = 0;
+
+	if ((op == FUTEX_WAIT) && utime) {
+		if (copy_from_user(&t, utime, sizeof(t)) != 0)
+			return -EFAULT;
+		timeout = timespec_to_jiffies(&t) + 1;
+	}
+	/*
+	 * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
+	 */
+	if (op >= FUTEX_REQUEUE)
+		val2 = (int) (unsigned long) utime;
+
+	return do_futex((unsigned long)uaddr, op, val, timeout,
+			(unsigned long)uaddr2, val2, val3);
+}
+
+static struct super_block *
+futexfs_get_sb(struct file_system_type *fs_type,
+	       int flags, const char *dev_name, void *data)
+{
+	return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
+}
+
+static struct file_system_type futex_fs_type = {
+	.name		= "futexfs",
+	.get_sb		= futexfs_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+static int __init init(void)
+{
+	unsigned int i;
+
+	register_filesystem(&futex_fs_type);
+	futex_mnt = kern_mount(&futex_fs_type);
+
+	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
+		INIT_LIST_HEAD(&futex_queues[i].chain);
+		spin_lock_init(&futex_queues[i].lock);
+	}
+	return 0;
+}
+__initcall(init);
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -0,0 +1,182 @@
+/* Deprecated, do not use.  Moved from module.c to here. --RR */
+
+/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* inter_module functions are always available, even when the kernel is
+ * compiled without modules.  Consumers of inter_module_xxx routines
+ * will always work, even when both are built into the kernel, this
+ * approach removes lots of #ifdefs in mainline code.
+ */
+
+static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
+static DEFINE_SPINLOCK(ime_lock);
+static int kmalloc_failed;
+
+struct inter_module_entry {
+	struct list_head list;
+	const char *im_name;
+	struct module *owner;
+	const void *userdata;
+};
+
+/**
+ * inter_module_register - register a new set of inter module data.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ * @owner: module that is registering the data, always use THIS_MODULE
+ * @userdata: pointer to arbitrary userdata to be registered
+ *
+ * Description: Check that the im_name has not already been registered,
+ * complain if it has.  For new data, add it to the inter_module_entry
+ * list.
+ */
+void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime, *ime_new;
+
+	if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) {
+		/* Overloaded kernel, not fatal */
+		printk(KERN_ERR
+			"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
+			im_name);
+		kmalloc_failed = 1;
+		return;
+	}
+	memset(ime_new, 0, sizeof(*ime_new));
+	ime_new->im_name = im_name;
+	ime_new->owner = owner;
+	ime_new->userdata = userdata;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			spin_unlock(&ime_lock);
+			kfree(ime_new);
+			/* Program logic error, fatal */
+			printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
+			BUG();
+		}
+	}
+	list_add(&(ime_new->list), &ime_list);
+	spin_unlock(&ime_lock);
+}
+
+/**
+ * inter_module_unregister - unregister a set of inter module data.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: Check that the im_name has been registered, complain if
+ * it has not.  For existing data, remove it from the
+ * inter_module_entry list.
+ */
+void inter_module_unregister(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			list_del(&(ime->list));
+			spin_unlock(&ime_lock);
+			kfree(ime);
+			return;
+		}
+	}
+	spin_unlock(&ime_lock);
+	if (kmalloc_failed) {
+		printk(KERN_ERR
+			"inter_module_unregister: no entry for '%s', "
+			"probably caused by previous kmalloc failure\n",
+			im_name);
+		return;
+	}
+	else {
+		/* Program logic error, fatal */
+		printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
+		BUG();
+	}
+}
+
+/**
+ * inter_module_get - return arbitrary userdata from another module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: If the im_name has not been registered, return NULL.
+ * Try to increment the use count on the owning module, if that fails
+ * then return NULL.  Otherwise return the userdata.
+ */
+static const void *inter_module_get(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+	const void *result = NULL;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			if (try_module_get(ime->owner))
+				result = ime->userdata;
+			break;
+		}
+	}
+	spin_unlock(&ime_lock);
+	return(result);
+}
+
+/**
+ * inter_module_get_request - im get with automatic request_module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ * @modname: module that is expected to register im_name
+ *
+ * Description: If inter_module_get fails, do request_module then retry.
+ */
+const void *inter_module_get_request(const char *im_name, const char *modname)
+{
+	const void *result = inter_module_get(im_name);
+	if (!result) {
+		request_module("%s", modname);
+		result = inter_module_get(im_name);
+	}
+	return(result);
+}
+
+/**
+ * inter_module_put - release use of data from another module.
+ * @im_name: an arbitrary string to identify the data, must be unique
+ *
+ * Description: If the im_name has not been registered, complain,
+ * otherwise decrement the use count on the owning module.
+ */
+void inter_module_put(const char *im_name)
+{
+	struct list_head *tmp;
+	struct inter_module_entry *ime;
+
+	spin_lock(&ime_lock);
+	list_for_each(tmp, &ime_list) {
+		ime = list_entry(tmp, struct inter_module_entry, list);
+		if (strcmp(ime->im_name, im_name) == 0) {
+			if (ime->owner)
+				module_put(ime->owner);
+			spin_unlock(&ime_lock);
+			return;
+		}
+	}
+	spin_unlock(&ime_lock);
+	printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
+	BUG();
+}
+
+EXPORT_SYMBOL(inter_module_register);
+EXPORT_SYMBOL(inter_module_unregister);
+EXPORT_SYMBOL(inter_module_get_request);
+EXPORT_SYMBOL(inter_module_put);
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -0,0 +1,5 @@
+
+obj-y := handle.o manage.o spurious.o
+obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
+obj-$(CONFIG_PROC_FS) += proc.o
+
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -0,0 +1,189 @@
+/*
+ * linux/kernel/irq/autoprobe.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the interrupt probing code and driver APIs.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+/*
+ * Autodetection depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck with
+ * "IRQ_WAITING" cleared and the interrupt disabled.
+ */
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *	probe_irq_on	- begin an interrupt autodetect
+ *
+ *	Commence probing for an interrupt. The interrupts are scanned
+ *	and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+	unsigned long val, delay;
+	irq_desc_t *desc;
+	unsigned int i;
+
+	down(&probe_sem);
+	/*
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious.
+	 */
+	for (i = NR_IRQS-1; i > 0; i--) {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!irq_desc[i].action)
+			irq_desc[i].handler->startup(i);
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ barrier();
+
+	/*
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
+	 */
+	for (i = NR_IRQS-1; i > 0; i--) {
+		desc = irq_desc + i;
+
+		spin_lock_irq(&desc->lock);
+		if (!desc->action) {
+			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+			if (desc->handler->startup(i))
+				desc->status |= IRQ_PENDING;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	/*
+	 * Wait for spurious interrupts to trigger
+	 */
+	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+		/* about 100ms delay */ barrier();
+
+	/*
+	 * Now filter out any obviously spurious interrupts
+	 */
+	val = 0;
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			/* It triggered already - consider it spurious. */
+			if (!(status & IRQ_WAITING)) {
+				desc->status = status & ~IRQ_AUTODETECT;
+				desc->handler->shutdown(i);
+			} else
+				if (i < 32)
+					val |= 1 << i;
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+
+	return val;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ *	probe_irq_mask - scan a bitmap of interrupt lines
+ *	@val:	mask of interrupts to consider
+ *
+ *	Scan the interrupt lines and return a bitmap of active
+ *	autodetect interrupts. The interrupt probe logic state
+ *	is then returned to its previous value.
+ *
+ *	Note: we need to scan all the irq's even though we will
+ *	only return autodetect irq numbers - just so that we reset
+ *	them all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+	unsigned int mask;
+	int i;
+
+	mask = 0;
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (i < 16 && !(status & IRQ_WAITING))
+				mask |= 1 << i;
+
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	return mask & val;
+}
+EXPORT_SYMBOL(probe_irq_mask);
+
+/**
+ *	probe_irq_off	- end an interrupt autodetect
+ *	@val: mask of potential interrupts (unused)
+ *
+ *	Scans the unused interrupt lines and returns the line which
+ *	appears to have triggered the interrupt. If no interrupt was
+ *	found then zero is returned. If more than one interrupt is
+ *	found then minus the first candidate is returned to indicate
+ *	their is doubt.
+ *
+ *	The interrupt probe logic state is returned to its previous
+ *	value.
+ *
+ *	BUGS: When used in a module (which arguably shouldn't happen)
+ *	nothing prevents two IRQ probe callers from overlapping. The
+ *	results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+	int i, irq_found = 0, nr_irqs = 0;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		unsigned int status;
+
+		spin_lock_irq(&desc->lock);
+		status = desc->status;
+
+		if (status & IRQ_AUTODETECT) {
+			if (!(status & IRQ_WAITING)) {
+				if (!nr_irqs)
+					irq_found = i;
+				nr_irqs++;
+			}
+			desc->status = status & ~IRQ_AUTODETECT;
+			desc->handler->shutdown(i);
+		}
+		spin_unlock_irq(&desc->lock);
+	}
+	up(&probe_sem);
+
+	if (nr_irqs > 1)
+		irq_found = -irq_found;
+	return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -0,0 +1,193 @@
+/*
+ * linux/kernel/irq/handle.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the core interrupt handling code.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+/*
+ * Linux has a controller-independent interrupt architecture.
+ * Every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * The code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic or
+ * having to touch the generic code.
+ *
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+	[0 ... NR_IRQS-1] = {
+		.handler = &no_irq_type,
+		.lock = SPIN_LOCK_UNLOCKED
+	}
+};
+
+/*
+ * Generic 'no controller' code
+ */
+static void end_none(unsigned int irq) { }
+static void enable_none(unsigned int irq) { }
+static void disable_none(unsigned int irq) { }
+static void shutdown_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+
+static void ack_none(unsigned int irq)
+{
+	/*
+	 * 'what should we do if we get a hw irq event on an illegal vector'.
+	 * each architecture has to answer this themself.
+	 */
+	ack_bad_irq(irq);
+}
+
+struct hw_interrupt_type no_irq_type = {
+	.typename = 	"none",
+	.startup = 	startup_none,
+	.shutdown = 	shutdown_none,
+	.enable = 	enable_none,
+	.disable = 	disable_none,
+	.ack = 		ack_none,
+	.end = 		end_none,
+	.set_affinity = NULL
+};
+
+/*
+ * Special, empty irq handler:
+ */
+irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{
+	return IRQ_NONE;
+}
+
+/*
+ * Have got an event to handle:
+ */
+fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+				struct irqaction *action)
+{
+	int ret, retval = 0, status = 0;
+
+	if (!(action->flags & SA_INTERRUPT))
+		local_irq_enable();
+
+	do {
+		ret = action->handler(irq, action->dev_id, regs);
+		if (ret == IRQ_HANDLED)
+			status |= action->flags;
+		retval |= ret;
+		action = action->next;
+	} while (action);
+
+	if (status & SA_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+	local_irq_disable();
+
+	return retval;
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	struct irqaction * action;
+	unsigned int status;
+
+	kstat_this_cpu.irqs[irq]++;
+	if (desc->status & IRQ_PER_CPU) {
+		irqreturn_t action_ret;
+
+		/*
+		 * No locking required for CPU-local interrupts:
+		 */
+		desc->handler->ack(irq);
+		action_ret = handle_IRQ_event(irq, regs, desc->action);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		desc->handler->end(irq);
+		return 1;
+	}
+
+	spin_lock(&desc->lock);
+	desc->handler->ack(irq);
+	/*
+	 * REPLAY is when Linux resends an IRQ that was dropped earlier
+	 * WAITING is used by probe to mark irqs that are being tested
+	 */
+	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+	status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+	action = NULL;
+	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+		action = desc->action;
+		status &= ~IRQ_PENDING; /* we commit to handling */
+		status |= IRQ_INPROGRESS; /* we are handling it */
+	}
+	desc->status = status;
+
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	 * Since we set PENDING, if another processor is handling
+	 * a different instance of this same irq, the other processor
+	 * will take care of it.
+	 */
+	if (unlikely(!action))
+		goto out;
+
+	/*
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+	for (;;) {
+		irqreturn_t action_ret;
+
+		spin_unlock(&desc->lock);
+
+		action_ret = handle_IRQ_event(irq, regs, action);
+
+		spin_lock(&desc->lock);
+		if (!noirqdebug)
+			note_interrupt(irq, desc, action_ret);
+		if (likely(!(desc->status & IRQ_PENDING)))
+			break;
+		desc->status &= ~IRQ_PENDING;
+	}
+	desc->status &= ~IRQ_INPROGRESS;
+
+out:
+	/*
+	 * The ->end() handler has to deal with interrupts which got
+	 * disabled while the handler was running.
+	 */
+	desc->handler->end(irq);
+	spin_unlock(&desc->lock);
+
+	return 1;
+}
+
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -0,0 +1,18 @@
+/*
+ * IRQ subsystem internal functions and variables:
+ */
+
+extern int noirqdebug;
+
+#ifdef CONFIG_PROC_FS
+extern void register_irq_proc(unsigned int irq);
+extern void register_handler_proc(unsigned int irq, struct irqaction *action);
+extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
+#else
+static inline void register_irq_proc(unsigned int irq) { }
+static inline void register_handler_proc(unsigned int irq,
+					 struct irqaction *action) { }
+static inline void unregister_handler_proc(unsigned int irq,
+					   struct irqaction *action) { }
+#endif
+
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -0,0 +1,349 @@
+/*
+ * linux/kernel/irq/manage.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains driver APIs to the irq subsystem.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+#ifdef CONFIG_SMP
+
+cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+
+/**
+ *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ *
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+void synchronize_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_desc + irq;
+
+	while (desc->status & IRQ_INPROGRESS)
+		cpu_relax();
+}
+
+EXPORT_SYMBOL(synchronize_irq);
+
+#endif
+
+/**
+ *	disable_irq_nosync - disable an irq without waiting
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Disables and Enables are
+ *	nested.
+ *	Unlike disable_irq(), this function does not ensure existing
+ *	instances of the IRQ handler have completed before returning.
+ *
+ *	This function may be called from IRQ context.
+ */
+void disable_irq_nosync(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (!desc->depth++) {
+		desc->status |= IRQ_DISABLED;
+		desc->handler->disable(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+EXPORT_SYMBOL(disable_irq_nosync);
+
+/**
+ *	disable_irq - disable an irq and wait for completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  Enables and Disables are
+ *	nested.
+ *	This function waits for any pending IRQ handlers for this interrupt
+ *	to complete before returning. If you use this function while
+ *	holding a resource the IRQ handler may need you will deadlock.
+ *
+ *	This function may be called - with care - from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+
+	disable_irq_nosync(irq);
+	if (desc->action)
+		synchronize_irq(irq);
+}
+
+EXPORT_SYMBOL(disable_irq);
+
+/**
+ *	enable_irq - enable handling of an irq
+ *	@irq: Interrupt to enable
+ *
+ *	Undoes the effect of one call to disable_irq().  If this
+ *	matches the last disable, processing of interrupts on this
+ *	IRQ line is re-enabled.
+ *
+ *	This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	switch (desc->depth) {
+	case 0:
+		WARN_ON(1);
+		break;
+	case 1: {
+		unsigned int status = desc->status & ~IRQ_DISABLED;
+
+		desc->status = status;
+		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+			desc->status = status | IRQ_REPLAY;
+			hw_resend_irq(desc->handler,irq);
+		}
+		desc->handler->enable(irq);
+		/* fall-through */
+	}
+	default:
+		desc->depth--;
+	}
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+EXPORT_SYMBOL(enable_irq);
+
+/*
+ * Internal function that tells the architecture code whether a
+ * particular irq has been exclusively allocated or is available
+ * for driver use.
+ */
+int can_request_irq(unsigned int irq, unsigned long irqflags)
+{
+	struct irqaction *action;
+
+	if (irq >= NR_IRQS)
+		return 0;
+
+	action = irq_desc[irq].action;
+	if (action)
+		if (irqflags & action->flags & SA_SHIRQ)
+			action = NULL;
+
+	return !action;
+}
+
+/*
+ * Internal function to register an irqaction - typically used to
+ * allocate special interrupts that are part of the architecture.
+ */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+	struct irq_desc *desc = irq_desc + irq;
+	struct irqaction *old, **p;
+	unsigned long flags;
+	int shared = 0;
+
+	if (desc->handler == &no_irq_type)
+		return -ENOSYS;
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
+		rand_initialize_irq(irq);
+	}
+
+	/*
+	 * The following block of code has to be executed atomically
+	 */
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	if ((old = *p) != NULL) {
+		/* Can't share interrupts unless both agree to */
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			spin_unlock_irqrestore(&desc->lock,flags);
+			return -EBUSY;
+		}
+
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+
+	*p = new;
+
+	if (!shared) {
+		desc->depth = 0;
+		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
+				  IRQ_WAITING | IRQ_INPROGRESS);
+		if (desc->handler->startup)
+			desc->handler->startup(irq);
+		else
+			desc->handler->enable(irq);
+	}
+	spin_unlock_irqrestore(&desc->lock,flags);
+
+	new->irq = irq;
+	register_irq_proc(irq);
+	new->dir = NULL;
+	register_handler_proc(irq, new);
+
+	return 0;
+}
+
+/**
+ *	free_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function. The function
+ *	does not return until any executing interrupts for this IRQ
+ *	have completed.
+ *
+ *	This function must not be called from interrupt context.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+	struct irq_desc *desc;
+	struct irqaction **p;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS)
+		return;
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&desc->lock,flags);
+	p = &desc->action;
+	for (;;) {
+		struct irqaction * action = *p;
+
+		if (action) {
+			struct irqaction **pp = p;
+
+			p = &action->next;
+			if (action->dev_id != dev_id)
+				continue;
+
+			/* Found it - now remove it from the list of entries */
+			*pp = action->next;
+			if (!desc->action) {
+				desc->status |= IRQ_DISABLED;
+				if (desc->handler->shutdown)
+					desc->handler->shutdown(irq);
+				else
+					desc->handler->disable(irq);
+			}
+			spin_unlock_irqrestore(&desc->lock,flags);
+			unregister_handler_proc(irq, action);
+
+			/* Make sure it's not being used on another CPU */
+			synchronize_irq(irq);
+			kfree(action);
+			return;
+		}
+		printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+		spin_unlock_irqrestore(&desc->lock,flags);
+		return;
+	}
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/**
+ *	request_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. From the point this
+ *	call is made your handler function may be invoked. Since
+ *	your handler function must clear any interrupt the board
+ *	raises, you must take care both to initialise your hardware
+ *	and to set up the interrupt handler in the right order.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If your interrupt is shared you must pass a non NULL dev_id
+ *	as this is required when freeing the interrupt.
+ *
+ *	Flags:
+ *
+ *	SA_SHIRQ		Interrupt is shared
+ *	SA_INTERRUPT		Disable local interrupts while processing
+ *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *
+ */
+int request_irq(unsigned int irq,
+		irqreturn_t (*handler)(int, void *, struct pt_regs *),
+		unsigned long irqflags, const char * devname, void *dev_id)
+{
+	struct irqaction * action;
+	int retval;
+
+	/*
+	 * Sanity-check: shared interrupts must pass in a real dev-ID,
+	 * otherwise we'll have trouble later trying to figure out
+	 * which interrupt is which (messes up the interrupt freeing
+	 * logic etc).
+	 */
+	if ((irqflags & SA_SHIRQ) && !dev_id)
+		return -EINVAL;
+	if (irq >= NR_IRQS)
+		return -EINVAL;
+	if (!handler)
+		return -EINVAL;
+
+	action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irqflags;
+	cpus_clear(action->mask);
+	action->name = devname;
+	action->next = NULL;
+	action->dev_id = dev_id;
+
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+
+	return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -0,0 +1,159 @@
+/*
+ * linux/kernel/irq/proc.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the /proc/irq/ handling code.
+ */
+
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+
+static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
+
+#ifdef CONFIG_SMP
+
+/*
+ * The /proc/irq/<irq>/smp_affinity values:
+ */
+static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
+
+void __attribute__((weak))
+proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
+{
+	irq_affinity[irq] = mask_val;
+	irq_desc[irq].handler->set_affinity(irq, mask_val);
+}
+
+static int irq_affinity_read_proc(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
+
+	if (count - len < 2)
+		return -EINVAL;
+	len += sprintf(page + len, "\n");
+	return len;
+}
+
+int no_irq_affinity;
+static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
+				   unsigned long count, void *data)
+{
+	unsigned int irq = (int)(long)data, full_count = count, err;
+	cpumask_t new_value, tmp;
+
+	if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+		return -EIO;
+
+	err = cpumask_parse(buffer, count, new_value);
+	if (err)
+		return err;
+
+	/*
+	 * Do not allow disabling IRQs completely - it's a too easy
+	 * way to make the system unusable accidentally :-) At least
+	 * one online CPU still has to be targeted.
+	 */
+	cpus_and(tmp, new_value, cpu_online_map);
+	if (cpus_empty(tmp))
+		return -EINVAL;
+
+	proc_set_irq_affinity(irq, new_value);
+
+	return full_count;
+}
+
+#endif
+
+#define MAX_NAMELEN 128
+
+static int name_unique(unsigned int irq, struct irqaction *new_action)
+{
+	struct irq_desc *desc = irq_desc + irq;
+	struct irqaction *action;
+
+	for (action = desc->action ; action; action = action->next)
+		if ((action != new_action) && action->name &&
+				!strcmp(new_action->name, action->name))
+			return 0;
+	return 1;
+}
+
+void register_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	char name [MAX_NAMELEN];
+
+	if (!irq_dir[irq] || action->dir || !action->name ||
+					!name_unique(irq, action))
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	snprintf(name, MAX_NAMELEN, "%s", action->name);
+
+	/* create /proc/irq/1234/handler/ */
+	action->dir = proc_mkdir(name, irq_dir[irq]);
+}
+
+#undef MAX_NAMELEN
+
+#define MAX_NAMELEN 10
+
+void register_irq_proc(unsigned int irq)
+{
+	char name [MAX_NAMELEN];
+
+	if (!root_irq_dir ||
+		(irq_desc[irq].handler == &no_irq_type) ||
+			irq_dir[irq])
+		return;
+
+	memset(name, 0, MAX_NAMELEN);
+	sprintf(name, "%d", irq);
+
+	/* create /proc/irq/1234 */
+	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+	{
+		struct proc_dir_entry *entry;
+
+		/* create /proc/irq/<irq>/smp_affinity */
+		entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+		if (entry) {
+			entry->nlink = 1;
+			entry->data = (void *)(long)irq;
+			entry->read_proc = irq_affinity_read_proc;
+			entry->write_proc = irq_affinity_write_proc;
+		}
+		smp_affinity_entry[irq] = entry;
+	}
+#endif
+}
+
+#undef MAX_NAMELEN
+
+void unregister_handler_proc(unsigned int irq, struct irqaction *action)
+{
+	if (action->dir)
+		remove_proc_entry(action->dir->name, irq_dir[irq]);
+}
+
+void init_irq_proc(void)
+{
+	int i;
+
+	/* create /proc/irq */
+	root_irq_dir = proc_mkdir("irq", NULL);
+	if (!root_irq_dir)
+		return;
+
+	/*
+	 * Create entries for all existing IRQs.
+	 */
+	for (i = 0; i < NR_IRQS; i++)
+		register_irq_proc(i);
+}
+
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -0,0 +1,96 @@
+/*
+ * linux/kernel/irq/spurious.c
+ *
+ * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains spurious interrupt handling.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled
+ * then assume that the IRQ is stuck in some manner. Drop a diagnostic
+ * and try to turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly
+ *  functioning device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+
+static void
+__report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	struct irqaction *action;
+
+	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+		printk(KERN_ERR "irq event %d: bogus return value %x\n",
+				irq, action_ret);
+	} else {
+		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+	}
+	dump_stack();
+	printk(KERN_ERR "handlers:\n");
+	action = desc->action;
+	while (action) {
+		printk(KERN_ERR "[<%p>]", action->handler);
+		print_symbol(" (%s)",
+			(unsigned long)action->handler);
+		printk("\n");
+		action = action->next;
+	}
+}
+
+void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	static int count = 100;
+
+	if (count > 0) {
+		count--;
+		__report_bad_irq(irq, desc, action_ret);
+	}
+}
+
+void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+	if (action_ret != IRQ_HANDLED) {
+		desc->irqs_unhandled++;
+		if (action_ret != IRQ_NONE)
+			report_bad_irq(irq, desc, action_ret);
+	}
+
+	desc->irq_count++;
+	if (desc->irq_count < 100000)
+		return;
+
+	desc->irq_count = 0;
+	if (desc->irqs_unhandled > 99900) {
+		/*
+		 * The interrupt is stuck
+		 */
+		__report_bad_irq(irq, desc, action_ret);
+		/*
+		 * Now kill the IRQ
+		 */
+		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+		desc->status |= IRQ_DISABLED;
+		desc->handler->disable(irq);
+	}
+	desc->irqs_unhandled = 0;
+}
+
+int noirqdebug;
+
+int __init noirqdebug_setup(char *str)
+{
+	noirqdebug = 1;
+	printk(KERN_INFO "IRQ lockup detection disabled\n");
+	return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -0,0 +1,241 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* These are all the functions necessary to implement itimers */
+
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/posix-timers.h>
+
+#include <asm/uaccess.h>
+
+static unsigned long it_real_value(struct signal_struct *sig)
+{
+	unsigned long val = 0;
+	if (timer_pending(&sig->real_timer)) {
+		val = sig->real_timer.expires - jiffies;
+
+		/* look out for negative/zero itimer.. */
+		if ((long) val <= 0)
+			val = 1;
+	}
+	return val;
+}
+
+int do_getitimer(int which, struct itimerval *value)
+{
+	struct task_struct *tsk = current;
+	unsigned long interval, val;
+	cputime_t cinterval, cval;
+
+	switch (which) {
+	case ITIMER_REAL:
+		spin_lock_irq(&tsk->sighand->siglock);
+		interval = tsk->signal->it_real_incr;
+		val = it_real_value(tsk->signal);
+		spin_unlock_irq(&tsk->sighand->siglock);
+		jiffies_to_timeval(val, &value->it_value);
+		jiffies_to_timeval(interval, &value->it_interval);
+		break;
+	case ITIMER_VIRTUAL:
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		cval = tsk->signal->it_virt_expires;
+		cinterval = tsk->signal->it_virt_incr;
+		if (!cputime_eq(cval, cputime_zero)) {
+			struct task_struct *t = tsk;
+			cputime_t utime = tsk->signal->utime;
+			do {
+				utime = cputime_add(utime, t->utime);
+				t = next_thread(t);
+			} while (t != tsk);
+			if (cputime_le(cval, utime)) { /* about to fire */
+				cval = jiffies_to_cputime(1);
+			} else {
+				cval = cputime_sub(cval, utime);
+			}
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		cputime_to_timeval(cval, &value->it_value);
+		cputime_to_timeval(cinterval, &value->it_interval);
+		break;
+	case ITIMER_PROF:
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		cval = tsk->signal->it_prof_expires;
+		cinterval = tsk->signal->it_prof_incr;
+		if (!cputime_eq(cval, cputime_zero)) {
+			struct task_struct *t = tsk;
+			cputime_t ptime = cputime_add(tsk->signal->utime,
+						      tsk->signal->stime);
+			do {
+				ptime = cputime_add(ptime,
+						    cputime_add(t->utime,
+								t->stime));
+				t = next_thread(t);
+			} while (t != tsk);
+			if (cputime_le(cval, ptime)) { /* about to fire */
+				cval = jiffies_to_cputime(1);
+			} else {
+				cval = cputime_sub(cval, ptime);
+			}
+		}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		cputime_to_timeval(cval, &value->it_value);
+		cputime_to_timeval(cinterval, &value->it_interval);
+		break;
+	default:
+		return(-EINVAL);
+	}
+	return 0;
+}
+
+asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
+{
+	int error = -EFAULT;
+	struct itimerval get_buffer;
+
+	if (value) {
+		error = do_getitimer(which, &get_buffer);
+		if (!error &&
+		    copy_to_user(value, &get_buffer, sizeof(get_buffer)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+/*
+ * Called with P->sighand->siglock held and P->signal->real_timer inactive.
+ * If interval is nonzero, arm the timer for interval ticks from now.
+ */
+static inline void it_real_arm(struct task_struct *p, unsigned long interval)
+{
+	p->signal->it_real_value = interval; /* XXX unnecessary field?? */
+	if (interval == 0)
+		return;
+	if (interval > (unsigned long) LONG_MAX)
+		interval = LONG_MAX;
+	p->signal->real_timer.expires = jiffies + interval;
+	add_timer(&p->signal->real_timer);
+}
+
+void it_real_fn(unsigned long __data)
+{
+	struct task_struct * p = (struct task_struct *) __data;
+
+	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
+
+	/*
+	 * Now restart the timer if necessary.  We don't need any locking
+	 * here because do_setitimer makes sure we have finished running
+	 * before it touches anything.
+	 */
+	it_real_arm(p, p->signal->it_real_incr);
+}
+
+int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+	struct task_struct *tsk = current;
+ 	unsigned long val, interval;
+	cputime_t cval, cinterval, nval, ninterval;
+
+	switch (which) {
+	case ITIMER_REAL:
+		spin_lock_irq(&tsk->sighand->siglock);
+		interval = tsk->signal->it_real_incr;
+		val = it_real_value(tsk->signal);
+		if (val)
+			del_timer_sync(&tsk->signal->real_timer);
+		tsk->signal->it_real_incr =
+			timeval_to_jiffies(&value->it_interval);
+		it_real_arm(tsk, timeval_to_jiffies(&value->it_value));
+		spin_unlock_irq(&tsk->sighand->siglock);
+		if (ovalue) {
+			jiffies_to_timeval(val, &ovalue->it_value);
+			jiffies_to_timeval(interval,
+					   &ovalue->it_interval);
+		}
+		break;
+	case ITIMER_VIRTUAL:
+		nval = timeval_to_cputime(&value->it_value);
+		ninterval = timeval_to_cputime(&value->it_interval);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		cval = tsk->signal->it_virt_expires;
+		cinterval = tsk->signal->it_virt_incr;
+		if (!cputime_eq(cval, cputime_zero) ||
+		    !cputime_eq(nval, cputime_zero)) {
+			if (cputime_gt(nval, cputime_zero))
+				nval = cputime_add(nval,
+						   jiffies_to_cputime(1));
+			set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
+					      &nval, &cval);
+		}
+		tsk->signal->it_virt_expires = nval;
+		tsk->signal->it_virt_incr = ninterval;
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		if (ovalue) {
+			cputime_to_timeval(cval, &ovalue->it_value);
+			cputime_to_timeval(cinterval, &ovalue->it_interval);
+		}
+		break;
+	case ITIMER_PROF:
+		nval = timeval_to_cputime(&value->it_value);
+		ninterval = timeval_to_cputime(&value->it_interval);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		cval = tsk->signal->it_prof_expires;
+		cinterval = tsk->signal->it_prof_incr;
+		if (!cputime_eq(cval, cputime_zero) ||
+		    !cputime_eq(nval, cputime_zero)) {
+			if (cputime_gt(nval, cputime_zero))
+				nval = cputime_add(nval,
+						   jiffies_to_cputime(1));
+			set_process_cpu_timer(tsk, CPUCLOCK_PROF,
+					      &nval, &cval);
+		}
+		tsk->signal->it_prof_expires = nval;
+		tsk->signal->it_prof_incr = ninterval;
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		if (ovalue) {
+			cputime_to_timeval(cval, &ovalue->it_value);
+			cputime_to_timeval(cinterval, &ovalue->it_interval);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+asmlinkage long sys_setitimer(int which,
+			      struct itimerval __user *value,
+			      struct itimerval __user *ovalue)
+{
+	struct itimerval set_buffer, get_buffer;
+	int error;
+
+	if (value) {
+		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
+			return -EFAULT;
+	} else
+		memset((char *) &set_buffer, 0, sizeof(set_buffer));
+
+	error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
+	if (error || !ovalue)
+		return error;
+
+	if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
+		return -EFAULT; 
+	return 0;
+}
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -0,0 +1,411 @@
+/*
+ * kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
+ *
+ * Rewritten and vastly simplified by Rusty Russell for in-kernel
+ * module loader:
+ *   Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression (see scripts/kallsyms.c for a more complete description)
+ */
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+
+#ifdef CONFIG_KALLSYMS_ALL
+#define all_var 1
+#else
+#define all_var 0
+#endif
+
+/* These will be re-linked against their real values during the second link stage */
+extern unsigned long kallsyms_addresses[] __attribute__((weak));
+extern unsigned long kallsyms_num_syms __attribute__((weak,section("data")));
+extern u8 kallsyms_names[] __attribute__((weak));
+
+extern u8 kallsyms_token_table[] __attribute__((weak));
+extern u16 kallsyms_token_index[] __attribute__((weak));
+
+extern unsigned long kallsyms_markers[] __attribute__((weak));
+
+static inline int is_kernel_inittext(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sinittext
+	    && addr <= (unsigned long)_einittext)
+		return 1;
+	return 0;
+}
+
+static inline int is_kernel_text(unsigned long addr)
+{
+	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
+		return 1;
+	return in_gate_area_no_task(addr);
+}
+
+static inline int is_kernel(unsigned long addr)
+{
+	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
+		return 1;
+	return in_gate_area_no_task(addr);
+}
+
+/* expand a compressed symbol data into the resulting uncompressed string,
+   given the offset to where the symbol is in the compressed stream */
+static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
+{
+	int len, skipped_first = 0;
+	u8 *tptr, *data;
+
+	/* get the compressed symbol length from the first symbol byte */
+	data = &kallsyms_names[off];
+	len = *data;
+	data++;
+
+	/* update the offset to return the offset for the next symbol on
+	 * the compressed stream */
+	off += len + 1;
+
+	/* for every byte on the compressed symbol data, copy the table
+	   entry for that byte */
+	while(len) {
+		tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
+		data++;
+		len--;
+
+		while (*tptr) {
+			if(skipped_first) {
+				*result = *tptr;
+				result++;
+			} else
+				skipped_first = 1;
+			tptr++;
+		}
+	}
+
+	*result = '\0';
+
+	/* return to offset to the next symbol */
+	return off;
+}
+
+/* get symbol type information. This is encoded as a single char at the
+ * begining of the symbol name */
+static char kallsyms_get_symbol_type(unsigned int off)
+{
+	/* get just the first code, look it up in the token table, and return the
+	 * first char from this token */
+	return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
+}
+
+
+/* find the offset on the compressed stream given and index in the
+ * kallsyms array */
+static unsigned int get_symbol_offset(unsigned long pos)
+{
+	u8 *name;
+	int i;
+
+	/* use the closest marker we have. We have markers every 256 positions,
+	 * so that should be close enough */
+	name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
+
+	/* sequentially scan all the symbols up to the point we're searching for.
+	 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
+	 * just need to add the len to the current pointer for every symbol we
+	 * wish to skip */
+	for(i = 0; i < (pos&0xFF); i++)
+		name = name + (*name) + 1;
+
+	return name - kallsyms_names;
+}
+
+/* Lookup the address for this symbol. Returns 0 if not found. */
+unsigned long kallsyms_lookup_name(const char *name)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long i;
+	unsigned int off;
+
+	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+		off = kallsyms_expand_symbol(off, namebuf);
+
+		if (strcmp(namebuf, name) == 0)
+			return kallsyms_addresses[i];
+	}
+	return module_kallsyms_lookup_name(name);
+}
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
+
+/*
+ * Lookup an address
+ * - modname is set to NULL if it's in the kernel
+ * - we guarantee that the returned name is valid until we reschedule even if
+ *   it resides in a module
+ * - we also guarantee that modname will be valid until rescheduled
+ */
+const char *kallsyms_lookup(unsigned long addr,
+			    unsigned long *symbolsize,
+			    unsigned long *offset,
+			    char **modname, char *namebuf)
+{
+	unsigned long i, low, high, mid;
+	const char *msym;
+
+	/* This kernel should never had been booted. */
+	BUG_ON(!kallsyms_addresses);
+
+	namebuf[KSYM_NAME_LEN] = 0;
+	namebuf[0] = 0;
+
+	if ((all_var && is_kernel(addr)) ||
+	    (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr)))) {
+		unsigned long symbol_end=0;
+
+		/* do a binary search on the sorted kallsyms_addresses array */
+		low = 0;
+		high = kallsyms_num_syms;
+
+		while (high-low > 1) {
+			mid = (low + high) / 2;
+			if (kallsyms_addresses[mid] <= addr) low = mid;
+			else high = mid;
+		}
+
+		/* search for the first aliased symbol. Aliased symbols are
+		   symbols with the same address */
+		while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
+			--low;
+
+		/* Grab name */
+		kallsyms_expand_symbol(get_symbol_offset(low), namebuf);
+
+		/* Search for next non-aliased symbol */
+		for (i = low + 1; i < kallsyms_num_syms; i++) {
+			if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
+				symbol_end = kallsyms_addresses[i];
+				break;
+			}
+		}
+
+		/* if we found no next symbol, we use the end of the section */
+		if (!symbol_end) {
+			if (is_kernel_inittext(addr))
+				symbol_end = (unsigned long)_einittext;
+			else
+				symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext;
+		}
+
+		*symbolsize = symbol_end - kallsyms_addresses[low];
+		*modname = NULL;
+		*offset = addr - kallsyms_addresses[low];
+		return namebuf;
+	}
+
+	/* see if it's in a module */
+	msym = module_address_lookup(addr, symbolsize, offset, modname);
+	if (msym)
+		return strncpy(namebuf, msym, KSYM_NAME_LEN);
+
+	return NULL;
+}
+
+/* Replace "%s" in format with address, or returns -errno. */
+void __print_symbol(const char *fmt, unsigned long address)
+{
+	char *modname;
+	const char *name;
+	unsigned long offset, size;
+	char namebuf[KSYM_NAME_LEN+1];
+	char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
+		    2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1];
+
+	name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+
+	if (!name)
+		sprintf(buffer, "0x%lx", address);
+	else {
+		if (modname)
+			sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
+				size, modname);
+		else
+			sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
+	}
+	printk(fmt, buffer);
+}
+
+/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
+struct kallsym_iter
+{
+	loff_t pos;
+	struct module *owner;
+	unsigned long value;
+	unsigned int nameoff; /* If iterating in core kernel symbols */
+	char type;
+	char name[KSYM_NAME_LEN+1];
+};
+
+/* Only label it "global" if it is exported. */
+static void upcase_if_global(struct kallsym_iter *iter)
+{
+	if (is_exported(iter->name, iter->owner))
+		iter->type += 'A' - 'a';
+}
+
+static int get_ksymbol_mod(struct kallsym_iter *iter)
+{
+	iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms,
+					 &iter->value,
+					 &iter->type, iter->name);
+	if (iter->owner == NULL)
+		return 0;
+
+	upcase_if_global(iter);
+	return 1;
+}
+
+/* Returns space to next name. */
+static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
+{
+	unsigned off = iter->nameoff;
+
+	iter->owner = NULL;
+	iter->value = kallsyms_addresses[iter->pos];
+
+	iter->type = kallsyms_get_symbol_type(off);
+
+	off = kallsyms_expand_symbol(off, iter->name);
+
+	return off - iter->nameoff;
+}
+
+static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
+{
+	iter->name[0] = '\0';
+	iter->nameoff = get_symbol_offset(new_pos);
+	iter->pos = new_pos;
+}
+
+/* Returns false if pos at or past end of file. */
+static int update_iter(struct kallsym_iter *iter, loff_t pos)
+{
+	/* Module symbols can be accessed randomly. */
+	if (pos >= kallsyms_num_syms) {
+		iter->pos = pos;
+		return get_ksymbol_mod(iter);
+	}
+	
+	/* If we're not on the desired position, reset to new position. */
+	if (pos != iter->pos)
+		reset_iter(iter, pos);
+
+	iter->nameoff += get_ksymbol_core(iter);
+	iter->pos++;
+
+	return 1;
+}
+
+static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	(*pos)++;
+
+	if (!update_iter(m->private, *pos))
+		return NULL;
+	return p;
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+	if (!update_iter(m->private, *pos))
+		return NULL;
+	return m->private;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+}
+
+static int s_show(struct seq_file *m, void *p)
+{
+	struct kallsym_iter *iter = m->private;
+
+	/* Some debugging symbols have no name.  Ignore them. */ 
+	if (!iter->name[0])
+		return 0;
+
+	if (iter->owner)
+		seq_printf(m, "%0*lx %c %s\t[%s]\n",
+			   (int)(2*sizeof(void*)),
+			   iter->value, iter->type, iter->name,
+			   module_name(iter->owner));
+	else
+		seq_printf(m, "%0*lx %c %s\n",
+			   (int)(2*sizeof(void*)),
+			   iter->value, iter->type, iter->name);
+	return 0;
+}
+
+static struct seq_operations kallsyms_op = {
+	.start = s_start,
+	.next = s_next,
+	.stop = s_stop,
+	.show = s_show
+};
+
+static int kallsyms_open(struct inode *inode, struct file *file)
+{
+	/* We keep iterator in m->private, since normal case is to
+	 * s_start from where we left off, so we avoid doing
+	 * using get_symbol_offset for every symbol */
+	struct kallsym_iter *iter;
+	int ret;
+
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
+	reset_iter(iter, 0);
+
+	ret = seq_open(file, &kallsyms_op);
+	if (ret == 0)
+		((struct seq_file *)file->private_data)->private = iter;
+	else
+		kfree(iter);
+	return ret;
+}
+
+static int kallsyms_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = (struct seq_file *)file->private_data;
+	kfree(m->private);
+	return seq_release(inode, file);
+}
+
+static struct file_operations kallsyms_operations = {
+	.open = kallsyms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = kallsyms_release,
+};
+
+static int __init kallsyms_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("kallsyms", 0444, NULL);
+	if (entry)
+		entry->proc_fops = &kallsyms_operations;
+	return 0;
+}
+__initcall(kallsyms_init);
+
+EXPORT_SYMBOL(__print_symbol);
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -0,0 +1,168 @@
+/*
+ * A simple kernel FIFO implementation.
+ *
+ * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/kfifo.h>
+
+/**
+ * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * @buffer: the preallocated buffer to be used.
+ * @size: the size of the internal buffer, this have to be a power of 2.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the
+ * struct kfifo with kfree().
+ */
+struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
+			 unsigned int __nocast gfp_mask, spinlock_t *lock)
+{
+	struct kfifo *fifo;
+
+	/* size must be a power of 2 */
+	BUG_ON(size & (size - 1));
+
+	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
+	if (!fifo)
+		return ERR_PTR(-ENOMEM);
+
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->in = fifo->out = 0;
+	fifo->lock = lock;
+
+	return fifo;
+}
+EXPORT_SYMBOL(kfifo_init);
+
+/**
+ * kfifo_alloc - allocates a new FIFO and its internal buffer
+ * @size: the size of the internal buffer to be allocated.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * The size will be rounded-up to a power of 2.
+ */
+struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock)
+{
+	unsigned char *buffer;
+	struct kfifo *ret;
+
+	/*
+	 * round up to the next power of 2, since our 'let the indices
+	 * wrap' tachnique works only in this case.
+	 */
+	if (size & (size - 1)) {
+		BUG_ON(size > 0x80000000);
+		size = roundup_pow_of_two(size);
+	}
+
+	buffer = kmalloc(size, gfp_mask);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	ret = kfifo_init(buffer, size, gfp_mask, lock);
+
+	if (IS_ERR(ret))
+		kfree(buffer);
+
+	return ret;
+}
+EXPORT_SYMBOL(kfifo_alloc);
+
+/**
+ * kfifo_free - frees the FIFO
+ * @fifo: the fifo to be freed.
+ */
+void kfifo_free(struct kfifo *fifo)
+{
+	kfree(fifo->buffer);
+	kfree(fifo);
+}
+EXPORT_SYMBOL(kfifo_free);
+
+/**
+ * __kfifo_put - puts some data into the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most 'len' bytes from the 'buffer' into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_put(struct kfifo *fifo,
+			 unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	memcpy(fifo->buffer, buffer + l, len - l);
+
+	fifo->in += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_put);
+
+/**
+ * __kfifo_get - gets some data from the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most 'len' bytes from the FIFO into the
+ * 'buffer' and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_get(struct kfifo *fifo,
+			 unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
+	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	memcpy(buffer + l, fifo->buffer, len - l);
+
+	fifo->out += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_get);
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -0,0 +1,256 @@
+/*
+	kmod, the new module loader (replaces kerneld)
+	Kirk Petersen
+
+	Reorganized not to be a daemon by Adam Richter, with guidance
+	from Greg Zornetzer.
+
+	Modified to avoid chroot and file sharing problems.
+	Mikael Pettersson
+
+	Limit the concurrent number of kmod modprobes to catch loops from
+	"modprobe needs a service that is in a module".
+	Keith Owens <kaos@ocs.com.au> December 1999
+
+	Unblock all signals when we exec a usermode process.
+	Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
+
+	call_usermodehelper wait flag, and remove exec_usermodehelper.
+	Rusty Russell <rusty@rustcorp.com.au>  Jan 2003
+*/
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/kmod.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/namespace.h>
+#include <linux/completion.h>
+#include <linux/file.h>
+#include <linux/workqueue.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+
+extern int max_threads;
+
+static struct workqueue_struct *khelper_wq;
+
+#ifdef CONFIG_KMOD
+
+/*
+	modprobe_path is set via /proc/sys.
+*/
+char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
+
+/**
+ * request_module - try to load a kernel module
+ * @fmt:     printf style format string for the name of the module
+ * @varargs: arguements as specified in the format string
+ *
+ * Load a module using the user mode module loader. The function returns
+ * zero on success or a negative errno code on failure. Note that a
+ * successful module load does not mean the module did not then unload
+ * and exit on an error of its own. Callers must check that the service
+ * they requested is now available not blindly invoke it.
+ *
+ * If module auto-loading support is disabled then this function
+ * becomes a no-operation.
+ */
+int request_module(const char *fmt, ...)
+{
+	va_list args;
+	char module_name[MODULE_NAME_LEN];
+	unsigned int max_modprobes;
+	int ret;
+	char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
+	static char *envp[] = { "HOME=/",
+				"TERM=linux",
+				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+				NULL };
+	static atomic_t kmod_concurrent = ATOMIC_INIT(0);
+#define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+	static int kmod_loop_msg;
+
+	va_start(args, fmt);
+	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+	va_end(args);
+	if (ret >= MODULE_NAME_LEN)
+		return -ENAMETOOLONG;
+
+	/* If modprobe needs a service that is in a module, we get a recursive
+	 * loop.  Limit the number of running kmod threads to max_threads/2 or
+	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
+	 * would be to run the parents of this process, counting how many times
+	 * kmod was invoked.  That would mean accessing the internals of the
+	 * process tables to get the command line, proc_pid_cmdline is static
+	 * and it is not worth changing the proc code just to handle this case. 
+	 * KAO.
+	 *
+	 * "trace the ppid" is simple, but will fail if someone's
+	 * parent exits.  I think this is as good as it gets. --RR
+	 */
+	max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
+	atomic_inc(&kmod_concurrent);
+	if (atomic_read(&kmod_concurrent) > max_modprobes) {
+		/* We may be blaming an innocent here, but unlikely */
+		if (kmod_loop_msg++ < 5)
+			printk(KERN_ERR
+			       "request_module: runaway loop modprobe %s\n",
+			       module_name);
+		atomic_dec(&kmod_concurrent);
+		return -ENOMEM;
+	}
+
+	ret = call_usermodehelper(modprobe_path, argv, envp, 1);
+	atomic_dec(&kmod_concurrent);
+	return ret;
+}
+EXPORT_SYMBOL(request_module);
+#endif /* CONFIG_KMOD */
+
+struct subprocess_info {
+	struct completion *complete;
+	char *path;
+	char **argv;
+	char **envp;
+	int wait;
+	int retval;
+};
+
+/*
+ * This is the task which runs the usermode application
+ */
+static int ____call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	int retval;
+
+	/* Unblock all signals. */
+	flush_signals(current);
+	spin_lock_irq(&current->sighand->siglock);
+	flush_signal_handlers(current, 1);
+	sigemptyset(&current->blocked);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	/* We can run anywhere, unlike our parent keventd(). */
+	set_cpus_allowed(current, CPU_MASK_ALL);
+
+	retval = -EPERM;
+	if (current->fs->root)
+		retval = execve(sub_info->path, sub_info->argv,sub_info->envp);
+
+	/* Exec failed? */
+	sub_info->retval = retval;
+	do_exit(0);
+}
+
+/* Keventd can't block, but this (a child) can. */
+static int wait_for_helper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	pid_t pid;
+	struct k_sigaction sa;
+
+	/* Install a handler: if SIGCLD isn't handled sys_wait4 won't
+	 * populate the status, but will return -ECHILD. */
+	sa.sa.sa_handler = SIG_IGN;
+	sa.sa.sa_flags = 0;
+	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
+	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
+	allow_signal(SIGCHLD);
+
+	pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
+	if (pid < 0) {
+		sub_info->retval = pid;
+	} else {
+		/*
+		 * Normally it is bogus to call wait4() from in-kernel because
+		 * wait4() wants to write the exit code to a userspace address.
+		 * But wait_for_helper() always runs as keventd, and put_user()
+		 * to a kernel address works OK for kernel threads, due to their
+		 * having an mm_segment_t which spans the entire address space.
+		 *
+		 * Thus the __user pointer cast is valid here.
+		 */
+		sys_wait4(pid, (int __user *) &sub_info->retval, 0, NULL);
+	}
+
+	complete(sub_info->complete);
+	return 0;
+}
+
+/* This is run by khelper thread  */
+static void __call_usermodehelper(void *data)
+{
+	struct subprocess_info *sub_info = data;
+	pid_t pid;
+
+	/* CLONE_VFORK: wait until the usermode helper has execve'd
+	 * successfully We need the data structures to stay around
+	 * until that is done.  */
+	if (sub_info->wait)
+		pid = kernel_thread(wait_for_helper, sub_info,
+				    CLONE_FS | CLONE_FILES | SIGCHLD);
+	else
+		pid = kernel_thread(____call_usermodehelper, sub_info,
+				    CLONE_VFORK | SIGCHLD);
+
+	if (pid < 0) {
+		sub_info->retval = pid;
+		complete(sub_info->complete);
+	} else if (!sub_info->wait)
+		complete(sub_info->complete);
+}
+
+/**
+ * call_usermodehelper - start a usermode application
+ * @path: pathname for the application
+ * @argv: null-terminated argument list
+ * @envp: null-terminated environment list
+ * @wait: wait for the application to finish and return status.
+ *
+ * Runs a user-space application.  The application is started
+ * asynchronously if wait is not set, and runs as a child of keventd.
+ * (ie. it runs with full root capabilities).
+ *
+ * Must be called from process context.  Returns a negative error code
+ * if program was not execed successfully, or 0.
+ */
+int call_usermodehelper(char *path, char **argv, char **envp, int wait)
+{
+	DECLARE_COMPLETION(done);
+	struct subprocess_info sub_info = {
+		.complete	= &done,
+		.path		= path,
+		.argv		= argv,
+		.envp		= envp,
+		.wait		= wait,
+		.retval		= 0,
+	};
+	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+
+	if (!khelper_wq)
+		return -EBUSY;
+
+	if (path[0] == '\0')
+		return 0;
+
+	queue_work(khelper_wq, &work);
+	wait_for_completion(&done);
+	return sub_info.retval;
+}
+EXPORT_SYMBOL(call_usermodehelper);
+
+void __init usermodehelper_init(void)
+{
+	khelper_wq = create_singlethread_workqueue("khelper");
+	BUG_ON(!khelper_wq);
+}
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -0,0 +1,157 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation (includes suggestions from
+ *		Rusty Russell).
+ * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
+ *		hlists and exceptions notifier as suggested by Andi Kleen.
+ * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ *		interface to access function arguments.
+ * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
+ *		exceptions notifier to be first on the priority list.
+ */
+#include <linux/kprobes.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/errno.h>
+#include <asm/kdebug.h>
+
+#define KPROBE_HASH_BITS 6
+#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+
+static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
+
+unsigned int kprobe_cpu = NR_CPUS;
+static DEFINE_SPINLOCK(kprobe_lock);
+
+/* Locks kprobe: irqs must be disabled */
+void lock_kprobes(void)
+{
+	spin_lock(&kprobe_lock);
+	kprobe_cpu = smp_processor_id();
+}
+
+void unlock_kprobes(void)
+{
+	kprobe_cpu = NR_CPUS;
+	spin_unlock(&kprobe_lock);
+}
+
+/* You have to be holding the kprobe_lock */
+struct kprobe *get_kprobe(void *addr)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
+	hlist_for_each(node, head) {
+		struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
+		if (p->addr == addr)
+			return p;
+	}
+	return NULL;
+}
+
+int register_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	unsigned long flags = 0;
+
+	if ((ret = arch_prepare_kprobe(p)) != 0) {
+		goto rm_kprobe;
+	}
+	spin_lock_irqsave(&kprobe_lock, flags);
+	INIT_HLIST_NODE(&p->hlist);
+	if (get_kprobe(p->addr)) {
+		ret = -EEXIST;
+		goto out;
+	}
+	arch_copy_kprobe(p);
+
+	hlist_add_head(&p->hlist,
+		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
+
+	p->opcode = *p->addr;
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+out:
+	spin_unlock_irqrestore(&kprobe_lock, flags);
+rm_kprobe:
+	if (ret == -EEXIST)
+		arch_remove_kprobe(p);
+	return ret;
+}
+
+void unregister_kprobe(struct kprobe *p)
+{
+	unsigned long flags;
+	arch_remove_kprobe(p);
+	spin_lock_irqsave(&kprobe_lock, flags);
+	*p->addr = p->opcode;
+	hlist_del(&p->hlist);
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+	spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
+static struct notifier_block kprobe_exceptions_nb = {
+	.notifier_call = kprobe_exceptions_notify,
+	.priority = 0x7fffffff /* we need to notified first */
+};
+
+int register_jprobe(struct jprobe *jp)
+{
+	/* Todo: Verify probepoint is a function entry point */
+	jp->kp.pre_handler = setjmp_pre_handler;
+	jp->kp.break_handler = longjmp_break_handler;
+
+	return register_kprobe(&jp->kp);
+}
+
+void unregister_jprobe(struct jprobe *jp)
+{
+	unregister_kprobe(&jp->kp);
+}
+
+static int __init init_kprobes(void)
+{
+	int i, err = 0;
+
+	/* FIXME allocate the probe table, currently defined statically */
+	/* initialize all list heads */
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++)
+		INIT_HLIST_HEAD(&kprobe_table[i]);
+
+	err = register_die_notifier(&kprobe_exceptions_nb);
+	return err;
+}
+
+__initcall(init_kprobes);
+
+EXPORT_SYMBOL_GPL(register_kprobe);
+EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_jprobe);
+EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(jprobe_return);
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -0,0 +1,57 @@
+/*
+ * kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
+ * 		     are not related to any other subsystem
+ *
+ * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
+ * 
+ * This file is release under the GPLv2
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#define KERNEL_ATTR_RO(_name) \
+static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
+
+#define KERNEL_ATTR_RW(_name) \
+static struct subsys_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+#ifdef CONFIG_HOTPLUG
+static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)hotplug_seqnum);
+}
+KERNEL_ATTR_RO(hotplug_seqnum);
+#endif
+
+decl_subsys(kernel, NULL, NULL);
+EXPORT_SYMBOL_GPL(kernel_subsys);
+
+static struct attribute * kernel_attrs[] = {
+#ifdef CONFIG_HOTPLUG
+	&hotplug_seqnum_attr.attr,
+#endif
+	NULL
+};
+
+static struct attribute_group kernel_attr_group = {
+	.attrs = kernel_attrs,
+};
+
+static int __init ksysfs_init(void)
+{
+	int error = subsystem_register(&kernel_subsys);
+	if (!error)
+		error = sysfs_create_group(&kernel_subsys.kset.kobj,
+					   &kernel_attr_group);
+
+	return error;
+}
+
+core_initcall(ksysfs_init);
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -0,0 +1,202 @@
+/* Kernel thread helper functions.
+ *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
+ *
+ * Creation is done via keventd, so that we get a clean environment
+ * even if we're invoked from userspace (think modprobe, hotplug cpu,
+ * etc.).
+ */
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/unistd.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <asm/semaphore.h>
+
+/*
+ * We dont want to execute off keventd since it might
+ * hold a semaphore our callers hold too:
+ */
+static struct workqueue_struct *helper_wq;
+
+struct kthread_create_info
+{
+	/* Information passed to kthread() from keventd. */
+	int (*threadfn)(void *data);
+	void *data;
+	struct completion started;
+
+	/* Result passed back to kthread_create() from keventd. */
+	struct task_struct *result;
+	struct completion done;
+};
+
+struct kthread_stop_info
+{
+	struct task_struct *k;
+	int err;
+	struct completion done;
+};
+
+/* Thread stopping is done by setthing this var: lock serializes
+ * multiple kthread_stop calls. */
+static DECLARE_MUTEX(kthread_stop_lock);
+static struct kthread_stop_info kthread_stop_info;
+
+int kthread_should_stop(void)
+{
+	return (kthread_stop_info.k == current);
+}
+EXPORT_SYMBOL(kthread_should_stop);
+
+static void kthread_exit_files(void)
+{
+	struct fs_struct *fs;
+	struct task_struct *tsk = current;
+
+	exit_fs(tsk);		/* current->fs->count--; */
+	fs = init_task.fs;
+	tsk->fs = fs;
+	atomic_inc(&fs->count);
+ 	exit_files(tsk);
+	current->files = init_task.files;
+	atomic_inc(&tsk->files->count);
+}
+
+static int kthread(void *_create)
+{
+	struct kthread_create_info *create = _create;
+	int (*threadfn)(void *data);
+	void *data;
+	sigset_t blocked;
+	int ret = -EINTR;
+
+	kthread_exit_files();
+
+	/* Copy data: it's on keventd's stack */
+	threadfn = create->threadfn;
+	data = create->data;
+
+	/* Block and flush all signals (in case we're not from keventd). */
+	sigfillset(&blocked);
+	sigprocmask(SIG_BLOCK, &blocked, NULL);
+	flush_signals(current);
+
+	/* By default we can run anywhere, unlike keventd. */
+	set_cpus_allowed(current, CPU_MASK_ALL);
+
+	/* OK, tell user we're spawned, wait for stop or wakeup */
+	__set_current_state(TASK_INTERRUPTIBLE);
+	complete(&create->started);
+	schedule();
+
+	if (!kthread_should_stop())
+		ret = threadfn(data);
+
+	/* It might have exited on its own, w/o kthread_stop.  Check. */
+	if (kthread_should_stop()) {
+		kthread_stop_info.err = ret;
+		complete(&kthread_stop_info.done);
+	}
+	return 0;
+}
+
+/* We are keventd: create a thread. */
+static void keventd_create_kthread(void *_create)
+{
+	struct kthread_create_info *create = _create;
+	int pid;
+
+	/* We want our own signal handler (we take no signals by default). */
+	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+	if (pid < 0) {
+		create->result = ERR_PTR(pid);
+	} else {
+		wait_for_completion(&create->started);
+		create->result = find_task_by_pid(pid);
+	}
+	complete(&create->done);
+}
+
+struct task_struct *kthread_create(int (*threadfn)(void *data),
+				   void *data,
+				   const char namefmt[],
+				   ...)
+{
+	struct kthread_create_info create;
+	DECLARE_WORK(work, keventd_create_kthread, &create);
+
+	create.threadfn = threadfn;
+	create.data = data;
+	init_completion(&create.started);
+	init_completion(&create.done);
+
+	/*
+	 * The workqueue needs to start up first:
+	 */
+	if (!helper_wq)
+		work.func(work.data);
+	else {
+		queue_work(helper_wq, &work);
+		wait_for_completion(&create.done);
+	}
+	if (!IS_ERR(create.result)) {
+		va_list args;
+		va_start(args, namefmt);
+		vsnprintf(create.result->comm, sizeof(create.result->comm),
+			  namefmt, args);
+		va_end(args);
+	}
+
+	return create.result;
+}
+EXPORT_SYMBOL(kthread_create);
+
+void kthread_bind(struct task_struct *k, unsigned int cpu)
+{
+	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+	/* Must have done schedule() in kthread() before we set_task_cpu */
+	wait_task_inactive(k);
+	set_task_cpu(k, cpu);
+	k->cpus_allowed = cpumask_of_cpu(cpu);
+}
+EXPORT_SYMBOL(kthread_bind);
+
+int kthread_stop(struct task_struct *k)
+{
+	int ret;
+
+	down(&kthread_stop_lock);
+
+	/* It could exit after stop_info.k set, but before wake_up_process. */
+	get_task_struct(k);
+
+	/* Must init completion *before* thread sees kthread_stop_info.k */
+	init_completion(&kthread_stop_info.done);
+	wmb();
+
+	/* Now set kthread_should_stop() to true, and wake it up. */
+	kthread_stop_info.k = k;
+	wake_up_process(k);
+	put_task_struct(k);
+
+	/* Once it dies, reset stop ptr, gather result and we're done. */
+	wait_for_completion(&kthread_stop_info.done);
+	kthread_stop_info.k = NULL;
+	ret = kthread_stop_info.err;
+	up(&kthread_stop_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(kthread_stop);
+
+static __init int helper_init(void)
+{
+	helper_wq = create_singlethread_workqueue("kthread");
+	BUG_ON(!helper_wq);
+
+	return 0;
+}
+core_initcall(helper_init);
+
--- a/kernel/module.c
+++ b/kernel/module.c
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -0,0 +1,157 @@
+/*
+ *  linux/kernel/panic.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * This function is used through-out the kernel (including mm and fs)
+ * to indicate a major problem.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/sysrq.h>
+#include <linux/interrupt.h>
+#include <linux/nmi.h>
+
+int panic_timeout;
+int panic_on_oops;
+int tainted;
+
+EXPORT_SYMBOL(panic_timeout);
+
+struct notifier_block *panic_notifier_list;
+
+EXPORT_SYMBOL(panic_notifier_list);
+
+static int __init panic_setup(char *str)
+{
+	panic_timeout = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("panic=", panic_setup);
+
+static long no_blink(long time)
+{
+	return 0;
+}
+
+/* Returns how long it waited in ms */
+long (*panic_blink)(long time);
+EXPORT_SYMBOL(panic_blink);
+
+/**
+ *	panic - halt the system
+ *	@fmt: The text string to print
+ *
+ *	Display a message, then perform cleanups.
+ *
+ *	This function never returns.
+ */
+ 
+NORET_TYPE void panic(const char * fmt, ...)
+{
+	long i;
+	static char buf[1024];
+	va_list args;
+#if defined(CONFIG_ARCH_S390)
+        unsigned long caller = (unsigned long) __builtin_return_address(0);
+#endif
+
+	bust_spinlocks(1);
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
+	bust_spinlocks(0);
+
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
+
+	notifier_call_chain(&panic_notifier_list, 0, buf);
+
+	if (!panic_blink)
+		panic_blink = no_blink;
+
+	if (panic_timeout > 0)
+	{
+		/*
+	 	 * Delay timeout seconds before rebooting the machine. 
+		 * We can't use the "normal" timers since we just panicked..
+	 	 */
+		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+		for (i = 0; i < panic_timeout*1000; ) {
+			touch_nmi_watchdog();
+			i += panic_blink(i);
+			mdelay(1);
+			i++;
+		}
+		/*
+		 *	Should we run the reboot notifier. For the moment Im
+		 *	choosing not too. It might crash, be corrupt or do
+		 *	more harm than good for other reasons.
+		 */
+		machine_restart(NULL);
+	}
+#ifdef __sparc__
+	{
+		extern int stop_a_enabled;
+		/* Make sure the user can actually press L1-A */
+		stop_a_enabled = 1;
+		printk(KERN_EMERG "Press L1-A to return to the boot prom\n");
+	}
+#endif
+#if defined(CONFIG_ARCH_S390)
+        disabled_wait(caller);
+#endif
+	local_irq_enable();
+	for (i = 0;;) {
+		i += panic_blink(i);
+		mdelay(1);
+		i++;
+	}
+}
+
+EXPORT_SYMBOL(panic);
+
+/**
+ *	print_tainted - return a string to represent the kernel taint state.
+ *
+ *  'P' - Proprietary module has been loaded.
+ *  'F' - Module has been forcibly loaded.
+ *  'S' - SMP with CPUs not designed for SMP.
+ *  'R' - User forced a module unload.
+ *  'M' - Machine had a machine check experience.
+ *  'B' - System has hit bad_page.
+ *
+ *	The string is overwritten by the next call to print_taint().
+ */
+ 
+const char *print_tainted(void)
+{
+	static char buf[20];
+	if (tainted) {
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
+			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
+			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
+			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+			tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
+ 			tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
+			tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+	}
+	else
+		snprintf(buf, sizeof(buf), "Not tainted");
+	return(buf);
+}
+
+void add_taint(unsigned flag)
+{
+	tainted |= flag;
+}
+EXPORT_SYMBOL(add_taint);
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -0,0 +1,721 @@
+/* Helpers for initial module or kernel cmdline parsing
+   Copyright (C) 2001 Rusty Russell.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <linux/config.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt, a...)
+#endif
+
+static inline int dash2underscore(char c)
+{
+	if (c == '-')
+		return '_';
+	return c;
+}
+
+static inline int parameq(const char *input, const char *paramname)
+{
+	unsigned int i;
+	for (i = 0; dash2underscore(input[i]) == paramname[i]; i++)
+		if (input[i] == '\0')
+			return 1;
+	return 0;
+}
+
+static int parse_one(char *param,
+		     char *val,
+		     struct kernel_param *params, 
+		     unsigned num_params,
+		     int (*handle_unknown)(char *param, char *val))
+{
+	unsigned int i;
+
+	/* Find parameter */
+	for (i = 0; i < num_params; i++) {
+		if (parameq(param, params[i].name)) {
+			DEBUGP("They are equal!  Calling %p\n",
+			       params[i].set);
+			return params[i].set(val, &params[i]);
+		}
+	}
+
+	if (handle_unknown) {
+		DEBUGP("Unknown argument: calling %p\n", handle_unknown);
+		return handle_unknown(param, val);
+	}
+
+	DEBUGP("Unknown argument `%s'\n", param);
+	return -ENOENT;
+}
+
+/* You can use " around spaces, but can't escape ". */
+/* Hyphens and underscores equivalent in parameter names. */
+static char *next_arg(char *args, char **param, char **val)
+{
+	unsigned int i, equals = 0;
+	int in_quote = 0, quoted = 0;
+	char *next;
+
+	/* Chew any extra spaces */
+	while (*args == ' ') args++;
+	if (*args == '"') {
+		args++;
+		in_quote = 1;
+		quoted = 1;
+	}
+
+	for (i = 0; args[i]; i++) {
+		if (args[i] == ' ' && !in_quote)
+			break;
+		if (equals == 0) {
+			if (args[i] == '=')
+				equals = i;
+		}
+		if (args[i] == '"')
+			in_quote = !in_quote;
+	}
+
+	*param = args;
+	if (!equals)
+		*val = NULL;
+	else {
+		args[equals] = '\0';
+		*val = args + equals + 1;
+
+		/* Don't include quotes in value. */
+		if (**val == '"') {
+			(*val)++;
+			if (args[i-1] == '"')
+				args[i-1] = '\0';
+		}
+		if (quoted && args[i-1] == '"')
+			args[i-1] = '\0';
+	}
+
+	if (args[i]) {
+		args[i] = '\0';
+		next = args + i + 1;
+	} else
+		next = args + i;
+	return next;
+}
+
+/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
+int parse_args(const char *name,
+	       char *args,
+	       struct kernel_param *params,
+	       unsigned num,
+	       int (*unknown)(char *param, char *val))
+{
+	char *param, *val;
+
+	DEBUGP("Parsing ARGS: %s\n", args);
+
+	while (*args) {
+		int ret;
+
+		args = next_arg(args, &param, &val);
+		ret = parse_one(param, val, params, num, unknown);
+		switch (ret) {
+		case -ENOENT:
+			printk(KERN_ERR "%s: Unknown parameter `%s'\n",
+			       name, param);
+			return ret;
+		case -ENOSPC:
+			printk(KERN_ERR
+			       "%s: `%s' too large for parameter `%s'\n",
+			       name, val ?: "", param);
+			return ret;
+		case 0:
+			break;
+		default:
+			printk(KERN_ERR
+			       "%s: `%s' invalid for parameter `%s'\n",
+			       name, val ?: "", param);
+			return ret;
+		}
+	}
+
+	/* All parsed OK. */
+	return 0;
+}
+
+/* Lazy bastard, eh? */
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)      	\
+	int param_set_##name(const char *val, struct kernel_param *kp)	\
+	{								\
+		char *endp;						\
+		tmptype l;						\
+									\
+		if (!val) return -EINVAL;				\
+		l = strtolfn(val, &endp, 0);				\
+		if (endp == val || ((type)l != l))			\
+			return -EINVAL;					\
+		*((type *)kp->arg) = l;					\
+		return 0;						\
+	}								\
+	int param_get_##name(char *buffer, struct kernel_param *kp)	\
+	{								\
+		return sprintf(buffer, format, *((type *)kp->arg));	\
+	}
+
+STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol);
+STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol);
+STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol);
+STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul);
+
+int param_set_charp(const char *val, struct kernel_param *kp)
+{
+	if (!val) {
+		printk(KERN_ERR "%s: string parameter expected\n",
+		       kp->name);
+		return -EINVAL;
+	}
+
+	if (strlen(val) > 1024) {
+		printk(KERN_ERR "%s: string parameter too long\n",
+		       kp->name);
+		return -ENOSPC;
+	}
+
+	*(char **)kp->arg = (char *)val;
+	return 0;
+}
+
+int param_get_charp(char *buffer, struct kernel_param *kp)
+{
+	return sprintf(buffer, "%s", *((char **)kp->arg));
+}
+
+int param_set_bool(const char *val, struct kernel_param *kp)
+{
+	/* No equals means "set"... */
+	if (!val) val = "1";
+
+	/* One of =[yYnN01] */
+	switch (val[0]) {
+	case 'y': case 'Y': case '1':
+		*(int *)kp->arg = 1;
+		return 0;
+	case 'n': case 'N': case '0':
+		*(int *)kp->arg = 0;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+int param_get_bool(char *buffer, struct kernel_param *kp)
+{
+	/* Y and N chosen as being relatively non-coder friendly */
+	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
+}
+
+int param_set_invbool(const char *val, struct kernel_param *kp)
+{
+	int boolval, ret;
+	struct kernel_param dummy = { .arg = &boolval };
+
+	ret = param_set_bool(val, &dummy);
+	if (ret == 0)
+		*(int *)kp->arg = !boolval;
+	return ret;
+}
+
+int param_get_invbool(char *buffer, struct kernel_param *kp)
+{
+	int val;
+	struct kernel_param dummy = { .arg = &val };
+
+	val = !*(int *)kp->arg;
+	return param_get_bool(buffer, &dummy);
+}
+
+/* We cheat here and temporarily mangle the string. */
+int param_array(const char *name,
+		const char *val,
+		unsigned int min, unsigned int max,
+		void *elem, int elemsize,
+		int (*set)(const char *, struct kernel_param *kp),
+		int *num)
+{
+	int ret;
+	struct kernel_param kp;
+	char save;
+
+	/* Get the name right for errors. */
+	kp.name = name;
+	kp.arg = elem;
+
+	/* No equals sign? */
+	if (!val) {
+		printk(KERN_ERR "%s: expects arguments\n", name);
+		return -EINVAL;
+	}
+
+	*num = 0;
+	/* We expect a comma-separated list of values. */
+	do {
+		int len;
+
+		if (*num == max) {
+			printk(KERN_ERR "%s: can only take %i arguments\n",
+			       name, max);
+			return -EINVAL;
+		}
+		len = strcspn(val, ",");
+
+		/* nul-terminate and parse */
+		save = val[len];
+		((char *)val)[len] = '\0';
+		ret = set(val, &kp);
+
+		if (ret != 0)
+			return ret;
+		kp.arg += elemsize;
+		val += len+1;
+		(*num)++;
+	} while (save == ',');
+
+	if (*num < min) {
+		printk(KERN_ERR "%s: needs at least %i arguments\n",
+		       name, min);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int param_array_set(const char *val, struct kernel_param *kp)
+{
+	struct kparam_array *arr = kp->arg;
+
+	return param_array(kp->name, val, 1, arr->max, arr->elem,
+			   arr->elemsize, arr->set, arr->num ?: &arr->max);
+}
+
+int param_array_get(char *buffer, struct kernel_param *kp)
+{
+	int i, off, ret;
+	struct kparam_array *arr = kp->arg;
+	struct kernel_param p;
+
+	p = *kp;
+	for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
+		if (i)
+			buffer[off++] = ',';
+		p.arg = arr->elem + arr->elemsize * i;
+		ret = arr->get(buffer + off, &p);
+		if (ret < 0)
+			return ret;
+		off += ret;
+	}
+	buffer[off] = '\0';
+	return off;
+}
+
+int param_set_copystring(const char *val, struct kernel_param *kp)
+{
+	struct kparam_string *kps = kp->arg;
+
+	if (strlen(val)+1 > kps->maxlen) {
+		printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
+		       kp->name, kps->maxlen-1);
+		return -ENOSPC;
+	}
+	strcpy(kps->string, val);
+	return 0;
+}
+
+int param_get_string(char *buffer, struct kernel_param *kp)
+{
+	struct kparam_string *kps = kp->arg;
+	return strlcpy(buffer, kps->string, kps->maxlen);
+}
+
+/* sysfs output in /sys/modules/XYZ/parameters/ */
+
+extern struct kernel_param __start___param[], __stop___param[];
+
+#define MAX_KBUILD_MODNAME KOBJ_NAME_LEN
+
+struct param_attribute
+{
+	struct module_attribute mattr;
+	struct kernel_param *param;
+};
+
+struct module_param_attrs
+{
+	struct attribute_group grp;
+	struct param_attribute attrs[0];
+};
+
+#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
+
+static ssize_t param_attr_show(struct module_attribute *mattr,
+			       struct module *mod, char *buf)
+{
+	int count;
+	struct param_attribute *attribute = to_param_attr(mattr);
+
+	if (!attribute->param->get)
+		return -EPERM;
+
+	count = attribute->param->get(buf, attribute->param);
+	if (count > 0) {
+		strcat(buf, "\n");
+		++count;
+	}
+	return count;
+}
+
+/* sysfs always hands a nul-terminated string in buf.  We rely on that. */
+static ssize_t param_attr_store(struct module_attribute *mattr,
+				struct module *owner,
+				const char *buf, size_t len)
+{
+ 	int err;
+	struct param_attribute *attribute = to_param_attr(mattr);
+
+	if (!attribute->param->set)
+		return -EPERM;
+
+	err = attribute->param->set(buf, attribute->param);
+	if (!err)
+		return len;
+	return err;
+}
+
+#ifdef CONFIG_MODULES
+#define __modinit
+#else
+#define __modinit __init
+#endif
+
+/*
+ * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
+ * @mk: struct module_kobject (contains parent kobject)
+ * @kparam: array of struct kernel_param, the actual parameter definitions
+ * @num_params: number of entries in array
+ * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
+ *
+ * Create a kobject for a (per-module) group of parameters, and create files
+ * in sysfs. A pointer to the param_kobject is returned on success,
+ * NULL if there's no parameter to export, or other ERR_PTR(err).
+ */
+static __modinit struct module_param_attrs *
+param_sysfs_setup(struct module_kobject *mk,
+		  struct kernel_param *kparam,
+		  unsigned int num_params,
+		  unsigned int name_skip)
+{
+	struct module_param_attrs *mp;
+	unsigned int valid_attrs = 0;
+	unsigned int i, size[2];
+	struct param_attribute *pattr;
+	struct attribute **gattr;
+	int err;
+
+	for (i=0; i<num_params; i++) {
+		if (kparam[i].perm)
+			valid_attrs++;
+	}
+
+	if (!valid_attrs)
+		return NULL;
+
+	size[0] = ALIGN(sizeof(*mp) +
+			valid_attrs * sizeof(mp->attrs[0]),
+			sizeof(mp->grp.attrs[0]));
+	size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
+
+	mp = kmalloc(size[0] + size[1], GFP_KERNEL);
+	if (!mp)
+		return ERR_PTR(-ENOMEM);
+
+	mp->grp.name = "parameters";
+	mp->grp.attrs = (void *)mp + size[0];
+
+	pattr = &mp->attrs[0];
+	gattr = &mp->grp.attrs[0];
+	for (i = 0; i < num_params; i++) {
+		struct kernel_param *kp = &kparam[i];
+		if (kp->perm) {
+			pattr->param = kp;
+			pattr->mattr.show = param_attr_show;
+			pattr->mattr.store = param_attr_store;
+			pattr->mattr.attr.name = (char *)&kp->name[name_skip];
+			pattr->mattr.attr.owner = mk->mod;
+			pattr->mattr.attr.mode = kp->perm;
+			*(gattr++) = &(pattr++)->mattr.attr;
+		}
+	}
+	*gattr = NULL;
+
+	if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) {
+		kfree(mp);
+		return ERR_PTR(err);
+	}
+	return mp;
+}
+
+
+#ifdef CONFIG_MODULES
+
+/*
+ * module_param_sysfs_setup - setup sysfs support for one module
+ * @mod: module
+ * @kparam: module parameters (array)
+ * @num_params: number of module parameters
+ *
+ * Adds sysfs entries for module parameters, and creates a link from
+ * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/
+ */
+int module_param_sysfs_setup(struct module *mod,
+			     struct kernel_param *kparam,
+			     unsigned int num_params)
+{
+	struct module_param_attrs *mp;
+
+	mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0);
+	if (IS_ERR(mp))
+		return PTR_ERR(mp);
+
+	mod->param_attrs = mp;
+	return 0;
+}
+
+/*
+ * module_param_sysfs_remove - remove sysfs support for one module
+ * @mod: module
+ *
+ * Remove sysfs entries for module parameters and the corresponding
+ * kobject.
+ */
+void module_param_sysfs_remove(struct module *mod)
+{
+	if (mod->param_attrs) {
+		sysfs_remove_group(&mod->mkobj.kobj,
+				   &mod->param_attrs->grp);
+		/* We are positive that no one is using any param
+		 * attrs at this point.  Deallocate immediately. */
+		kfree(mod->param_attrs);
+		mod->param_attrs = NULL;
+	}
+}
+#endif
+
+/*
+ * kernel_param_sysfs_setup - wrapper for built-in params support
+ */
+static void __init kernel_param_sysfs_setup(const char *name,
+					    struct kernel_param *kparam,
+					    unsigned int num_params,
+					    unsigned int name_skip)
+{
+	struct module_kobject *mk;
+
+	mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL);
+	memset(mk, 0, sizeof(struct module_kobject));
+
+	mk->mod = THIS_MODULE;
+	kobj_set_kset_s(mk, module_subsys);
+	kobject_set_name(&mk->kobj, name);
+	kobject_register(&mk->kobj);
+
+	/* no need to keep the kobject if no parameter is exported */
+	if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) {
+		kobject_unregister(&mk->kobj);
+		kfree(mk);
+	}
+}
+
+/*
+ * param_sysfs_builtin - add contents in /sys/parameters for built-in modules
+ *
+ * Add module_parameters to sysfs for "modules" built into the kernel.
+ *
+ * The "module" name (KBUILD_MODNAME) is stored before a dot, the
+ * "parameter" name is stored behind a dot in kernel_param->name. So,
+ * extract the "module" name for all built-in kernel_param-eters,
+ * and for all who have the same, call kernel_param_sysfs_setup.
+ */
+static void __init param_sysfs_builtin(void)
+{
+	struct kernel_param *kp, *kp_begin = NULL;
+	unsigned int i, name_len, count = 0;
+	char modname[MAX_KBUILD_MODNAME + 1] = "";
+
+	for (i=0; i < __stop___param - __start___param; i++) {
+		char *dot;
+
+		kp = &__start___param[i];
+
+		/* We do not handle args without periods. */
+		dot = memchr(kp->name, '.', MAX_KBUILD_MODNAME);
+		if (!dot) {
+			DEBUGP("couldn't find period in %s\n", kp->name);
+			continue;
+		}
+		name_len = dot - kp->name;
+
+ 		/* new kbuild_modname? */
+		if (strlen(modname) != name_len
+		    || strncmp(modname, kp->name, name_len) != 0) {
+			/* add a new kobject for previous kernel_params. */
+			if (count)
+				kernel_param_sysfs_setup(modname,
+							 kp_begin,
+							 count,
+							 strlen(modname)+1);
+
+			strncpy(modname, kp->name, name_len);
+			modname[name_len] = '\0';
+			count = 0;
+			kp_begin = kp;
+		}
+		count++;
+	}
+
+	/* last kernel_params need to be registered as well */
+	if (count)
+		kernel_param_sysfs_setup(modname, kp_begin, count,
+					 strlen(modname)+1);
+}
+
+
+/* module-related sysfs stuff */
+#ifdef CONFIG_MODULES
+
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
+
+static ssize_t module_attr_show(struct kobject *kobj,
+				struct attribute *attr,
+				char *buf)
+{
+	struct module_attribute *attribute;
+	struct module_kobject *mk;
+	int ret;
+
+	attribute = to_module_attr(attr);
+	mk = to_module_kobject(kobj);
+
+	if (!attribute->show)
+		return -EPERM;
+
+	if (!try_module_get(mk->mod))
+		return -ENODEV;
+
+	ret = attribute->show(attribute, mk->mod, buf);
+
+	module_put(mk->mod);
+
+	return ret;
+}
+
+static ssize_t module_attr_store(struct kobject *kobj,
+				struct attribute *attr,
+				const char *buf, size_t len)
+{
+	struct module_attribute *attribute;
+	struct module_kobject *mk;
+	int ret;
+
+	attribute = to_module_attr(attr);
+	mk = to_module_kobject(kobj);
+
+	if (!attribute->store)
+		return -EPERM;
+
+	if (!try_module_get(mk->mod))
+		return -ENODEV;
+
+	ret = attribute->store(attribute, mk->mod, buf, len);
+
+	module_put(mk->mod);
+
+	return ret;
+}
+
+static struct sysfs_ops module_sysfs_ops = {
+	.show = module_attr_show,
+	.store = module_attr_store,
+};
+
+#else
+static struct sysfs_ops module_sysfs_ops = {
+	.show = NULL,
+	.store = NULL,
+};
+#endif
+
+static struct kobj_type module_ktype = {
+	.sysfs_ops =	&module_sysfs_ops,
+};
+
+decl_subsys(module, &module_ktype, NULL);
+
+/*
+ * param_sysfs_init - wrapper for built-in params support
+ */
+static int __init param_sysfs_init(void)
+{
+	subsystem_register(&module_subsys);
+
+	param_sysfs_builtin();
+
+	return 0;
+}
+__initcall(param_sysfs_init);
+
+EXPORT_SYMBOL(param_set_byte);
+EXPORT_SYMBOL(param_get_byte);
+EXPORT_SYMBOL(param_set_short);
+EXPORT_SYMBOL(param_get_short);
+EXPORT_SYMBOL(param_set_ushort);
+EXPORT_SYMBOL(param_get_ushort);
+EXPORT_SYMBOL(param_set_int);
+EXPORT_SYMBOL(param_get_int);
+EXPORT_SYMBOL(param_set_uint);
+EXPORT_SYMBOL(param_get_uint);
+EXPORT_SYMBOL(param_set_long);
+EXPORT_SYMBOL(param_get_long);
+EXPORT_SYMBOL(param_set_ulong);
+EXPORT_SYMBOL(param_get_ulong);
+EXPORT_SYMBOL(param_set_charp);
+EXPORT_SYMBOL(param_get_charp);
+EXPORT_SYMBOL(param_set_bool);
+EXPORT_SYMBOL(param_get_bool);
+EXPORT_SYMBOL(param_set_invbool);
+EXPORT_SYMBOL(param_get_invbool);
+EXPORT_SYMBOL(param_array_set);
+EXPORT_SYMBOL(param_array_get);
+EXPORT_SYMBOL(param_set_copystring);
+EXPORT_SYMBOL(param_get_string);
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -0,0 +1,292 @@
+/*
+ * Generic pidhash and scalable, time-bounded PID allocator
+ *
+ * (C) 2002-2003 William Irwin, IBM
+ * (C) 2004 William Irwin, Oracle
+ * (C) 2002-2004 Ingo Molnar, Red Hat
+ *
+ * pid-structures are backing objects for tasks sharing a given ID to chain
+ * against. There is very little to them aside from hashing them and
+ * parking tasks using given ID's on a list.
+ *
+ * The hash is always changed with the tasklist_lock write-acquired,
+ * and the hash is only accessed with the tasklist_lock at least
+ * read-acquired, so there's no additional SMP locking needed here.
+ *
+ * We have a list of bitmap pages, which bitmaps represent the PID space.
+ * Allocating and freeing PIDs is completely lockless. The worst-case
+ * allocation scenario when all but one out of 1 million PIDs possible are
+ * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
+ * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/hash.h>
+
+#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+static struct hlist_head *pid_hash[PIDTYPE_MAX];
+static int pidhash_shift;
+
+int pid_max = PID_MAX_DEFAULT;
+int last_pid;
+
+#define RESERVED_PIDS		300
+
+int pid_max_min = RESERVED_PIDS + 1;
+int pid_max_max = PID_MAX_LIMIT;
+
+#define PIDMAP_ENTRIES		((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+#define BITS_PER_PAGE		(PAGE_SIZE*8)
+#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
+#define mk_pid(map, off)	(((map) - pidmap_array)*BITS_PER_PAGE + (off))
+#define find_next_offset(map, off)					\
+		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
+
+/*
+ * PID-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way a low pid_max
+ * value does not cause lots of bitmaps to be allocated, but
+ * the scheme scales to up to 4 million PIDs, runtime.
+ */
+typedef struct pidmap {
+	atomic_t nr_free;
+	void *page;
+} pidmap_t;
+
+static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
+
+fastcall void free_pidmap(int pid)
+{
+	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+	int offset = pid & BITS_PER_PAGE_MASK;
+
+	clear_bit(offset, map->page);
+	atomic_inc(&map->nr_free);
+}
+
+int alloc_pidmap(void)
+{
+	int i, offset, max_scan, pid, last = last_pid;
+	pidmap_t *map;
+
+	pid = last + 1;
+	if (pid >= pid_max)
+		pid = RESERVED_PIDS;
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pidmap_array[pid/BITS_PER_PAGE];
+	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
+	for (i = 0; i <= max_scan; ++i) {
+		if (unlikely(!map->page)) {
+			unsigned long page = get_zeroed_page(GFP_KERNEL);
+			/*
+			 * Free the page if someone raced with us
+			 * installing it:
+			 */
+			spin_lock(&pidmap_lock);
+			if (map->page)
+				free_page(page);
+			else
+				map->page = (void *)page;
+			spin_unlock(&pidmap_lock);
+			if (unlikely(!map->page))
+				break;
+		}
+		if (likely(atomic_read(&map->nr_free))) {
+			do {
+				if (!test_and_set_bit(offset, map->page)) {
+					atomic_dec(&map->nr_free);
+					last_pid = pid;
+					return pid;
+				}
+				offset = find_next_offset(map, offset);
+				pid = mk_pid(map, offset);
+			/*
+			 * find_next_offset() found a bit, the pid from it
+			 * is in-bounds, and if we fell back to the last
+			 * bitmap block and the final block was the same
+			 * as the starting point, pid is before last_pid.
+			 */
+			} while (offset < BITS_PER_PAGE && pid < pid_max &&
+					(i != max_scan || pid < last ||
+					    !((last+1) & BITS_PER_PAGE_MASK)));
+		}
+		if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+			++map;
+			offset = 0;
+		} else {
+			map = &pidmap_array[0];
+			offset = RESERVED_PIDS;
+			if (unlikely(last == offset))
+				break;
+		}
+		pid = mk_pid(map, offset);
+	}
+	return -1;
+}
+
+struct pid * fastcall find_pid(enum pid_type type, int nr)
+{
+	struct hlist_node *elem;
+	struct pid *pid;
+
+	hlist_for_each_entry(pid, elem,
+			&pid_hash[type][pid_hashfn(nr)], pid_chain) {
+		if (pid->nr == nr)
+			return pid;
+	}
+	return NULL;
+}
+
+int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+{
+	struct pid *pid, *task_pid;
+
+	task_pid = &task->pids[type];
+	pid = find_pid(type, nr);
+	if (pid == NULL) {
+		hlist_add_head(&task_pid->pid_chain,
+				&pid_hash[type][pid_hashfn(nr)]);
+		INIT_LIST_HEAD(&task_pid->pid_list);
+	} else {
+		INIT_HLIST_NODE(&task_pid->pid_chain);
+		list_add_tail(&task_pid->pid_list, &pid->pid_list);
+	}
+	task_pid->nr = nr;
+
+	return 0;
+}
+
+static fastcall int __detach_pid(task_t *task, enum pid_type type)
+{
+	struct pid *pid, *pid_next;
+	int nr = 0;
+
+	pid = &task->pids[type];
+	if (!hlist_unhashed(&pid->pid_chain)) {
+		hlist_del(&pid->pid_chain);
+
+		if (list_empty(&pid->pid_list))
+			nr = pid->nr;
+		else {
+			pid_next = list_entry(pid->pid_list.next,
+						struct pid, pid_list);
+			/* insert next pid from pid_list to hash */
+			hlist_add_head(&pid_next->pid_chain,
+				&pid_hash[type][pid_hashfn(pid_next->nr)]);
+		}
+	}
+
+	list_del(&pid->pid_list);
+	pid->nr = 0;
+
+	return nr;
+}
+
+void fastcall detach_pid(task_t *task, enum pid_type type)
+{
+	int tmp, nr;
+
+	nr = __detach_pid(task, type);
+	if (!nr)
+		return;
+
+	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
+		if (tmp != type && find_pid(tmp, nr))
+			return;
+
+	free_pidmap(nr);
+}
+
+task_t *find_task_by_pid_type(int type, int nr)
+{
+	struct pid *pid;
+
+	pid = find_pid(type, nr);
+	if (!pid)
+		return NULL;
+
+	return pid_task(&pid->pid_list, type);
+}
+
+EXPORT_SYMBOL(find_task_by_pid_type);
+
+/*
+ * This function switches the PIDs if a non-leader thread calls
+ * sys_execve() - this must be done without releasing the PID.
+ * (which a detach_pid() would eventually do.)
+ */
+void switch_exec_pids(task_t *leader, task_t *thread)
+{
+	__detach_pid(leader, PIDTYPE_PID);
+	__detach_pid(leader, PIDTYPE_TGID);
+	__detach_pid(leader, PIDTYPE_PGID);
+	__detach_pid(leader, PIDTYPE_SID);
+
+	__detach_pid(thread, PIDTYPE_PID);
+	__detach_pid(thread, PIDTYPE_TGID);
+
+	leader->pid = leader->tgid = thread->pid;
+	thread->pid = thread->tgid;
+
+	attach_pid(thread, PIDTYPE_PID, thread->pid);
+	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
+	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
+	list_add_tail(&thread->tasks, &init_task.tasks);
+
+	attach_pid(leader, PIDTYPE_PID, leader->pid);
+	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
+	attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
+	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
+}
+
+/*
+ * The pid hash table is scaled according to the amount of memory in the
+ * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
+ * more.
+ */
+void __init pidhash_init(void)
+{
+	int i, j, pidhash_size;
+	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
+
+	pidhash_shift = max(4, fls(megabytes * 4));
+	pidhash_shift = min(12, pidhash_shift);
+	pidhash_size = 1 << pidhash_shift;
+
+	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
+		pidhash_size, pidhash_shift,
+		PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
+
+	for (i = 0; i < PIDTYPE_MAX; i++) {
+		pid_hash[i] = alloc_bootmem(pidhash_size *
+					sizeof(*(pid_hash[i])));
+		if (!pid_hash[i])
+			panic("Could not alloc pidhash!\n");
+		for (j = 0; j < pidhash_size; j++)
+			INIT_HLIST_HEAD(&pid_hash[i][j]);
+	}
+}
+
+void __init pidmap_init(void)
+{
+	int i;
+
+	pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+	set_bit(0, pidmap_array->page);
+	atomic_dec(&pidmap_array->nr_free);
+
+	/*
+	 * Allocate PID 0, and hash it via all PID types:
+	 */
+
+	for (i = 0; i < PIDTYPE_MAX; i++)
+		attach_pid(current, i, 0);
+}
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -0,0 +1,74 @@
+config PM
+	bool "Power Management support"
+	---help---
+	  "Power Management" means that parts of your computer are shut
+	  off or put into a power conserving "sleep" mode if they are not
+	  being used.  There are two competing standards for doing this: APM
+	  and ACPI.  If you want to use either one, say Y here and then also
+	  to the requisite support below.
+
+	  Power Management is most important for battery powered laptop
+	  computers; if you have a laptop, check out the Linux Laptop home
+	  page on the WWW at <http://www.linux-on-laptops.com/> or
+	  Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
+	  and the Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that, even if you say N here, Linux on the x86 architecture
+	  will issue the hlt instruction if nothing is to be done, thereby
+	  sending the processor to sleep and saving power.
+
+config PM_DEBUG
+	bool "Power Management Debug Support"
+	depends on PM
+	---help---
+	This option enables verbose debugging support in the Power Management
+	code. This is helpful when debugging and reporting various PM bugs, 
+	like suspend support.
+
+config SOFTWARE_SUSPEND
+	bool "Software Suspend (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PM && SWAP
+	---help---
+	  Enable the possibility of suspending the machine.
+	  It doesn't need APM.
+	  You may suspend your machine by 'swsusp' or 'shutdown -z <time>' 
+	  (patch for sysvinit needed). 
+
+	  It creates an image which is saved in your active swap. Upon next
+	  boot, pass the 'resume=/dev/swappartition' argument to the kernel to
+	  have it detect the saved image, restore memory state from it, and
+	  continue to run as before. If you do not want the previous state to
+	  be reloaded, then use the 'noresume' kernel argument. However, note
+	  that your partitions will be fsck'd and you must re-mkswap your swap
+	  partitions. It does not work with swap files.
+
+	  Right now you may boot without resuming and then later resume but
+	  in meantime you cannot use those swap partitions/files which were
+	  involved in suspending. Also in this case there is a risk that buffers
+	  on disk won't match with saved ones.
+
+	  For more information take a look at <file:Documentation/power/swsusp.txt>.
+
+config PM_STD_PARTITION
+	string "Default resume partition"
+	depends on SOFTWARE_SUSPEND
+	default ""
+	---help---
+	  The default resume partition is the partition that the suspend-
+	  to-disk implementation will look for a suspended disk image. 
+
+	  The partition specified here will be different for almost every user. 
+	  It should be a valid swap partition (at least for now) that is turned
+	  on before suspending. 
+
+	  The partition specified can be overridden by specifying:
+
+		resume=/dev/<other device> 
+
+	  which will set the resume partition to the device specified. 
+
+	  Note there is currently not a way to specify which device to save the
+	  suspended image to. It will simply pick the first available swap 
+	  device.
+
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -0,0 +1,11 @@
+
+ifeq ($(CONFIG_PM_DEBUG),y)
+EXTRA_CFLAGS	+=	-DDEBUG
+endif
+
+swsusp-smp-$(CONFIG_SMP)	+= smp.o
+
+obj-y				:= main.o process.o console.o pm.o
+obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o $(swsusp-smp-y) disk.o
+
+obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -0,0 +1,58 @@
+/*
+ * drivers/power/process.c - Functions for saving/restoring console.
+ *
+ * Originally from swsusp.
+ */
+
+#include <linux/vt_kern.h>
+#include <linux/kbd_kern.h>
+#include <linux/console.h>
+#include "power.h"
+
+static int new_loglevel = 10;
+static int orig_loglevel;
+#ifdef SUSPEND_CONSOLE
+static int orig_fgconsole, orig_kmsg;
+#endif
+
+int pm_prepare_console(void)
+{
+	orig_loglevel = console_loglevel;
+	console_loglevel = new_loglevel;
+
+#ifdef SUSPEND_CONSOLE
+	acquire_console_sem();
+
+	orig_fgconsole = fg_console;
+
+	if (vc_allocate(SUSPEND_CONSOLE)) {
+	  /* we can't have a free VC for now. Too bad,
+	   * we don't want to mess the screen for now. */
+		release_console_sem();
+		return 1;
+	}
+
+	set_console(SUSPEND_CONSOLE);
+	release_console_sem();
+
+	if (vt_waitactive(SUSPEND_CONSOLE)) {
+		pr_debug("Suspend: Can't switch VCs.");
+		return 1;
+	}
+	orig_kmsg = kmsg_redirect;
+	kmsg_redirect = SUSPEND_CONSOLE;
+#endif
+	return 0;
+}
+
+void pm_restore_console(void)
+{
+	console_loglevel = orig_loglevel;
+#ifdef SUSPEND_CONSOLE
+	acquire_console_sem();
+	set_console(orig_fgconsole);
+	release_console_sem();
+	kmsg_redirect = orig_kmsg;
+#endif
+	return;
+}
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -0,0 +1,431 @@
+/*
+ * kernel/power/disk.c - Suspend-to-disk support.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include "power.h"
+
+
+extern suspend_disk_method_t pm_disk_mode;
+extern struct pm_ops * pm_ops;
+
+extern int swsusp_suspend(void);
+extern int swsusp_write(void);
+extern int swsusp_check(void);
+extern int swsusp_read(void);
+extern void swsusp_close(void);
+extern int swsusp_resume(void);
+extern int swsusp_free(void);
+
+
+static int noresume = 0;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
+dev_t swsusp_resume_device;
+
+/**
+ *	power_down - Shut machine down for hibernate.
+ *	@mode:		Suspend-to-disk mode
+ *
+ *	Use the platform driver, if configured so, and return gracefully if it
+ *	fails.
+ *	Otherwise, try to power off and reboot. If they fail, halt the machine,
+ *	there ain't no turning back.
+ */
+
+static void power_down(suspend_disk_method_t mode)
+{
+	unsigned long flags;
+	int error = 0;
+
+	local_irq_save(flags);
+	switch(mode) {
+	case PM_DISK_PLATFORM:
+ 		device_shutdown();
+		error = pm_ops->enter(PM_SUSPEND_DISK);
+		break;
+	case PM_DISK_SHUTDOWN:
+		printk("Powering off system\n");
+		device_shutdown();
+		machine_power_off();
+		break;
+	case PM_DISK_REBOOT:
+		device_shutdown();
+		machine_restart(NULL);
+		break;
+	}
+	machine_halt();
+	/* Valid image is on the disk, if we continue we risk serious data corruption
+	   after resume. */
+	printk(KERN_CRIT "Please power me down manually\n");
+	while(1);
+}
+
+
+static int in_suspend __nosavedata = 0;
+
+
+/**
+ *	free_some_memory -  Try to free as much memory as possible
+ *
+ *	... but do not OOM-kill anyone
+ *
+ *	Notice: all userland should be stopped at this point, or
+ *	livelock is possible.
+ */
+
+static void free_some_memory(void)
+{
+	unsigned int i = 0;
+	unsigned int tmp;
+	unsigned long pages = 0;
+	char *p = "-\\|/";
+
+	printk("Freeing memory...  ");
+	while ((tmp = shrink_all_memory(10000))) {
+		pages += tmp;
+		printk("\b%c", p[i]);
+		i++;
+		if (i > 3)
+			i = 0;
+	}
+	printk("\bdone (%li pages freed)\n", pages);
+}
+
+
+static inline void platform_finish(void)
+{
+	if (pm_disk_mode == PM_DISK_PLATFORM) {
+		if (pm_ops && pm_ops->finish)
+			pm_ops->finish(PM_SUSPEND_DISK);
+	}
+}
+
+static void finish(void)
+{
+	device_resume();
+	platform_finish();
+	enable_nonboot_cpus();
+	thaw_processes();
+	pm_restore_console();
+}
+
+
+static int prepare_processes(void)
+{
+	int error;
+
+	pm_prepare_console();
+
+	sys_sync();
+
+	if (freeze_processes()) {
+		error = -EBUSY;
+		return error;
+	}
+
+	if (pm_disk_mode == PM_DISK_PLATFORM) {
+		if (pm_ops && pm_ops->prepare) {
+			if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
+				return error;
+		}
+	}
+
+	/* Free memory before shutting down devices. */
+	free_some_memory();
+
+	return 0;
+}
+
+static void unprepare_processes(void)
+{
+	enable_nonboot_cpus();
+	thaw_processes();
+	pm_restore_console();
+}
+
+static int prepare_devices(void)
+{
+	int error;
+
+	disable_nonboot_cpus();
+	if ((error = device_suspend(PMSG_FREEZE))) {
+		printk("Some devices failed to suspend\n");
+		platform_finish();
+		enable_nonboot_cpus();
+		return error;
+	}
+
+	return 0;
+}
+
+/**
+ *	pm_suspend_disk - The granpappy of power management.
+ *
+ *	If we're going through the firmware, then get it over with quickly.
+ *
+ *	If not, then call swsusp to do its thing, then figure out how
+ *	to power down the system.
+ */
+
+int pm_suspend_disk(void)
+{
+	int error;
+
+	error = prepare_processes();
+	if (!error) {
+		error = prepare_devices();
+	}
+
+	if (error) {
+		unprepare_processes();
+		return error;
+	}
+
+	pr_debug("PM: Attempting to suspend to disk.\n");
+	if (pm_disk_mode == PM_DISK_FIRMWARE)
+		return pm_ops->enter(PM_SUSPEND_DISK);
+
+	pr_debug("PM: snapshotting memory.\n");
+	in_suspend = 1;
+	if ((error = swsusp_suspend()))
+		goto Done;
+
+	if (in_suspend) {
+		pr_debug("PM: writing image.\n");
+		error = swsusp_write();
+		if (!error)
+			power_down(pm_disk_mode);
+	} else
+		pr_debug("PM: Image restored successfully.\n");
+	swsusp_free();
+ Done:
+	finish();
+	return error;
+}
+
+
+/**
+ *	software_resume - Resume from a saved image.
+ *
+ *	Called as a late_initcall (so all devices are discovered and
+ *	initialized), we call swsusp to see if we have a saved image or not.
+ *	If so, we quiesce devices, the restore the saved image. We will
+ *	return above (in pm_suspend_disk() ) if everything goes well.
+ *	Otherwise, we fail gracefully and return to the normally
+ *	scheduled program.
+ *
+ */
+
+static int software_resume(void)
+{
+	int error;
+
+	if (noresume) {
+		/**
+		 * FIXME: If noresume is specified, we need to find the partition
+		 * and reset it back to normal swap space.
+		 */
+		return 0;
+	}
+
+	pr_debug("PM: Checking swsusp image.\n");
+
+	if ((error = swsusp_check()))
+		goto Done;
+
+	pr_debug("PM: Preparing processes for restore.\n");
+
+	if ((error = prepare_processes())) {
+		swsusp_close();
+		goto Cleanup;
+	}
+
+	pr_debug("PM: Reading swsusp image.\n");
+
+	if ((error = swsusp_read()))
+		goto Cleanup;
+
+	pr_debug("PM: Preparing devices for restore.\n");
+
+	if ((error = prepare_devices()))
+		goto Free;
+
+	mb();
+
+	pr_debug("PM: Restoring saved image.\n");
+	swsusp_resume();
+	pr_debug("PM: Restore failed, recovering.n");
+	finish();
+ Free:
+	swsusp_free();
+ Cleanup:
+	unprepare_processes();
+ Done:
+	pr_debug("PM: Resume from disk failed.\n");
+	return 0;
+}
+
+late_initcall(software_resume);
+
+
+static char * pm_disk_modes[] = {
+	[PM_DISK_FIRMWARE]	= "firmware",
+	[PM_DISK_PLATFORM]	= "platform",
+	[PM_DISK_SHUTDOWN]	= "shutdown",
+	[PM_DISK_REBOOT]	= "reboot",
+};
+
+/**
+ *	disk - Control suspend-to-disk mode
+ *
+ *	Suspend-to-disk can be handled in several ways. The greatest
+ *	distinction is who writes memory to disk - the firmware or the OS.
+ *	If the firmware does it, we assume that it also handles suspending
+ *	the system.
+ *	If the OS does it, then we have three options for putting the system
+ *	to sleep - using the platform driver (e.g. ACPI or other PM registers),
+ *	powering off the system or rebooting the system (for testing).
+ *
+ *	The system will support either 'firmware' or 'platform', and that is
+ *	known a priori (and encoded in pm_ops). But, the user may choose
+ *	'shutdown' or 'reboot' as alternatives.
+ *
+ *	show() will display what the mode is currently set to.
+ *	store() will accept one of
+ *
+ *	'firmware'
+ *	'platform'
+ *	'shutdown'
+ *	'reboot'
+ *
+ *	It will only change to 'firmware' or 'platform' if the system
+ *	supports it (as determined from pm_ops->pm_disk_mode).
+ */
+
+static ssize_t disk_show(struct subsystem * subsys, char * buf)
+{
+	return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
+}
+
+
+static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
+{
+	int error = 0;
+	int i;
+	int len;
+	char *p;
+	suspend_disk_method_t mode = 0;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	down(&pm_sem);
+	for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
+		if (!strncmp(buf, pm_disk_modes[i], len)) {
+			mode = i;
+			break;
+		}
+	}
+	if (mode) {
+		if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+			pm_disk_mode = mode;
+		else {
+			if (pm_ops && pm_ops->enter &&
+			    (mode == pm_ops->pm_disk_mode))
+				pm_disk_mode = mode;
+			else
+				error = -EINVAL;
+		}
+	} else
+		error = -EINVAL;
+
+	pr_debug("PM: suspend-to-disk mode set to '%s'\n",
+		 pm_disk_modes[mode]);
+	up(&pm_sem);
+	return error ? error : n;
+}
+
+power_attr(disk);
+
+static ssize_t resume_show(struct subsystem * subsys, char *buf)
+{
+	return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
+		       MINOR(swsusp_resume_device));
+}
+
+static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+	int len;
+	char *p;
+	unsigned int maj, min;
+	int error = -EINVAL;
+	dev_t res;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
+		res = MKDEV(maj,min);
+		if (maj == MAJOR(res) && min == MINOR(res)) {
+			swsusp_resume_device = res;
+			printk("Attempting manual resume\n");
+			noresume = 0;
+			software_resume();
+		}
+	}
+
+	return error >= 0 ? n : error;
+}
+
+power_attr(resume);
+
+static struct attribute * g[] = {
+	&disk_attr.attr,
+	&resume_attr.attr,
+	NULL,
+};
+
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+
+static int __init pm_disk_init(void)
+{
+	return sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
+}
+
+core_initcall(pm_disk_init);
+
+
+static int __init resume_setup(char *str)
+{
+	if (noresume)
+		return 1;
+
+	strncpy( resume_file, str, 255 );
+	return 1;
+}
+
+static int __init noresume_setup(char *str)
+{
+	noresume = 1;
+	return 1;
+}
+
+__setup("noresume", noresume_setup);
+__setup("resume=", resume_setup);
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -0,0 +1,269 @@
+/*
+ * kernel/power/main.c - PM subsystem core functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * 
+ * This file is released under the GPLv2
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+
+
+#include "power.h"
+
+DECLARE_MUTEX(pm_sem);
+
+struct pm_ops * pm_ops = NULL;
+suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
+
+/**
+ *	pm_set_ops - Set the global power method table. 
+ *	@ops:	Pointer to ops structure.
+ */
+
+void pm_set_ops(struct pm_ops * ops)
+{
+	down(&pm_sem);
+	pm_ops = ops;
+	up(&pm_sem);
+}
+
+
+/**
+ *	suspend_prepare - Do prep work before entering low-power state.
+ *	@state:		State we're entering.
+ *
+ *	This is common code that is called for each state that we're 
+ *	entering. Allocate a console, stop all processes, then make sure
+ *	the platform can enter the requested state.
+ */
+
+static int suspend_prepare(suspend_state_t state)
+{
+	int error = 0;
+
+	if (!pm_ops || !pm_ops->enter)
+		return -EPERM;
+
+	pm_prepare_console();
+
+	if (freeze_processes()) {
+		error = -EAGAIN;
+		goto Thaw;
+	}
+
+	if (pm_ops->prepare) {
+		if ((error = pm_ops->prepare(state)))
+			goto Thaw;
+	}
+
+	if ((error = device_suspend(PMSG_SUSPEND))) {
+		printk(KERN_ERR "Some devices failed to suspend\n");
+		goto Finish;
+	}
+	return 0;
+ Finish:
+	if (pm_ops->finish)
+		pm_ops->finish(state);
+ Thaw:
+	thaw_processes();
+	pm_restore_console();
+	return error;
+}
+
+
+static int suspend_enter(suspend_state_t state)
+{
+	int error = 0;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	if ((error = device_power_down(PMSG_SUSPEND))) {
+		printk(KERN_ERR "Some devices failed to power down\n");
+		goto Done;
+	}
+	error = pm_ops->enter(state);
+	device_power_up();
+ Done:
+	local_irq_restore(flags);
+	return error;
+}
+
+
+/**
+ *	suspend_finish - Do final work before exiting suspend sequence.
+ *	@state:		State we're coming out of.
+ *
+ *	Call platform code to clean up, restart processes, and free the 
+ *	console that we've allocated. This is not called for suspend-to-disk.
+ */
+
+static void suspend_finish(suspend_state_t state)
+{
+	device_resume();
+	if (pm_ops && pm_ops->finish)
+		pm_ops->finish(state);
+	thaw_processes();
+	pm_restore_console();
+}
+
+
+
+
+static char * pm_states[] = {
+	[PM_SUSPEND_STANDBY]	= "standby",
+	[PM_SUSPEND_MEM]	= "mem",
+	[PM_SUSPEND_DISK]	= "disk",
+	NULL,
+};
+
+
+/**
+ *	enter_state - Do common work of entering low-power state.
+ *	@state:		pm_state structure for state we're entering.
+ *
+ *	Make sure we're the only ones trying to enter a sleep state. Fail
+ *	if someone has beat us to it, since we don't want anything weird to
+ *	happen when we wake up.
+ *	Then, do the setup for suspend, enter the state, and cleaup (after
+ *	we've woken up).
+ */
+
+static int enter_state(suspend_state_t state)
+{
+	int error;
+
+	if (down_trylock(&pm_sem))
+		return -EBUSY;
+
+	if (state == PM_SUSPEND_DISK) {
+		error = pm_suspend_disk();
+		goto Unlock;
+	}
+
+	/* Suspend is hard to get right on SMP. */
+	if (num_online_cpus() != 1) {
+		error = -EPERM;
+		goto Unlock;
+	}
+
+	pr_debug("PM: Preparing system for suspend\n");
+	if ((error = suspend_prepare(state)))
+		goto Unlock;
+
+	pr_debug("PM: Entering state.\n");
+	error = suspend_enter(state);
+
+	pr_debug("PM: Finishing up.\n");
+	suspend_finish(state);
+ Unlock:
+	up(&pm_sem);
+	return error;
+}
+
+/*
+ * This is main interface to the outside world. It needs to be
+ * called from process context.
+ */
+int software_suspend(void)
+{
+	return enter_state(PM_SUSPEND_DISK);
+}
+
+
+/**
+ *	pm_suspend - Externally visible function for suspending system.
+ *	@state:		Enumarted value of state to enter.
+ *
+ *	Determine whether or not value is within range, get state 
+ *	structure, and enter (above).
+ */
+
+int pm_suspend(suspend_state_t state)
+{
+	if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
+		return enter_state(state);
+	return -EINVAL;
+}
+
+
+
+decl_subsys(power,NULL,NULL);
+
+
+/**
+ *	state - control system power state.
+ *
+ *	show() returns what states are supported, which is hard-coded to
+ *	'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
+ *	'disk' (Suspend-to-Disk).
+ *
+ *	store() accepts one of those strings, translates it into the 
+ *	proper enumerated value, and initiates a suspend transition.
+ */
+
+static ssize_t state_show(struct subsystem * subsys, char * buf)
+{
+	int i;
+	char * s = buf;
+
+	for (i = 0; i < PM_SUSPEND_MAX; i++) {
+		if (pm_states[i])
+			s += sprintf(s,"%s ",pm_states[i]);
+	}
+	s += sprintf(s,"\n");
+	return (s - buf);
+}
+
+static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
+{
+	suspend_state_t state = PM_SUSPEND_STANDBY;
+	char ** s;
+	char *p;
+	int error;
+	int len;
+
+	p = memchr(buf, '\n', n);
+	len = p ? p - buf : n;
+
+	for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
+		if (*s && !strncmp(buf, *s, len))
+			break;
+	}
+	if (*s)
+		error = enter_state(state);
+	else
+		error = -EINVAL;
+	return error ? error : n;
+}
+
+power_attr(state);
+
+static struct attribute * g[] = {
+	&state_attr.attr,
+	NULL,
+};
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+
+static int __init pm_init(void)
+{
+	int error = subsystem_register(&power_subsys);
+	if (!error)
+		error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
+	return error;
+}
+
+core_initcall(pm_init);
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -0,0 +1,265 @@
+/*
+ *  pm.c - Power management interface
+ *
+ *  Copyright (C) 2000 Andrew Henroid
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+#include <linux/interrupt.h>
+
+int pm_active;
+
+/*
+ *	Locking notes:
+ *		pm_devs_lock can be a semaphore providing pm ops are not called
+ *	from an interrupt handler (already a bad idea so no change here). Each
+ *	change must be protected so that an unlink of an entry doesn't clash
+ *	with a pm send - which is permitted to sleep in the current architecture
+ *
+ *	Module unloads clashing with pm events now work out safely, the module 
+ *	unload path will block until the event has been sent. It may well block
+ *	until a resume but that will be fine.
+ */
+ 
+static DECLARE_MUTEX(pm_devs_lock);
+static LIST_HEAD(pm_devs);
+
+/**
+ *	pm_register - register a device with power management
+ *	@type: device type 
+ *	@id: device ID
+ *	@callback: callback function
+ *
+ *	Add a device to the list of devices that wish to be notified about
+ *	power management events. A &pm_dev structure is returned on success,
+ *	on failure the return is %NULL.
+ *
+ *      The callback function will be called in process context and
+ *      it may sleep.
+ */
+ 
+struct pm_dev *pm_register(pm_dev_t type,
+			   unsigned long id,
+			   pm_callback callback)
+{
+	struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+	if (dev) {
+		memset(dev, 0, sizeof(*dev));
+		dev->type = type;
+		dev->id = id;
+		dev->callback = callback;
+
+		down(&pm_devs_lock);
+		list_add(&dev->entry, &pm_devs);
+		up(&pm_devs_lock);
+	}
+	return dev;
+}
+
+/**
+ *	pm_unregister -  unregister a device with power management
+ *	@dev: device to unregister
+ *
+ *	Remove a device from the power management notification lists. The
+ *	dev passed must be a handle previously returned by pm_register.
+ */
+ 
+void pm_unregister(struct pm_dev *dev)
+{
+	if (dev) {
+		down(&pm_devs_lock);
+		list_del(&dev->entry);
+		up(&pm_devs_lock);
+
+		kfree(dev);
+	}
+}
+
+static void __pm_unregister(struct pm_dev *dev)
+{
+	if (dev) {
+		list_del(&dev->entry);
+		kfree(dev);
+	}
+}
+
+/**
+ *	pm_unregister_all - unregister all devices with matching callback
+ *	@callback: callback function pointer
+ *
+ *	Unregister every device that would call the callback passed. This
+ *	is primarily meant as a helper function for loadable modules. It
+ *	enables a module to give up all its managed devices without keeping
+ *	its own private list.
+ */
+ 
+void pm_unregister_all(pm_callback callback)
+{
+	struct list_head *entry;
+
+	if (!callback)
+		return;
+
+	down(&pm_devs_lock);
+	entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		entry = entry->next;
+		if (dev->callback == callback)
+			__pm_unregister(dev);
+	}
+	up(&pm_devs_lock);
+}
+
+/**
+ *	pm_send - send request to a single device
+ *	@dev: device to send to
+ *	@rqst: power management request
+ *	@data: data for the callback
+ *
+ *	Issue a power management request to a given device. The 
+ *	%PM_SUSPEND and %PM_RESUME events are handled specially. The
+ *	data field must hold the intended next state. No call is made
+ *	if the state matches.
+ *
+ *	BUGS: what stops two power management requests occurring in parallel
+ *	and conflicting.
+ *
+ *	WARNING: Calling pm_send directly is not generally recommended, in
+ *	particular there is no locking against the pm_dev going away. The
+ *	caller must maintain all needed locking or have 'inside knowledge'
+ *	on the safety. Also remember that this function is not locked against
+ *	pm_unregister. This means that you must handle SMP races on callback
+ *	execution and unload yourself.
+ */
+ 
+static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+	int status = 0;
+	unsigned long prev_state, next_state;
+
+	if (in_interrupt())
+		BUG();
+
+	switch (rqst) {
+	case PM_SUSPEND:
+	case PM_RESUME:
+		prev_state = dev->state;
+		next_state = (unsigned long) data;
+		if (prev_state != next_state) {
+			if (dev->callback)
+				status = (*dev->callback)(dev, rqst, data);
+			if (!status) {
+				dev->state = next_state;
+				dev->prev_state = prev_state;
+			}
+		}
+		else {
+			dev->prev_state = prev_state;
+		}
+		break;
+	default:
+		if (dev->callback)
+			status = (*dev->callback)(dev, rqst, data);
+		break;
+	}
+	return status;
+}
+
+/*
+ * Undo incomplete request
+ */
+static void pm_undo_all(struct pm_dev *last)
+{
+	struct list_head *entry = last->entry.prev;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->state != dev->prev_state) {
+			/* previous state was zero (running) resume or
+			 * previous state was non-zero (suspended) suspend
+			 */
+			pm_request_t undo = (dev->prev_state
+					     ? PM_SUSPEND:PM_RESUME);
+			pm_send(dev, undo, (void*) dev->prev_state);
+		}
+		entry = entry->prev;
+	}
+}
+
+/**
+ *	pm_send_all - send request to all managed devices
+ *	@rqst: power management request
+ *	@data: data for the callback
+ *
+ *	Issue a power management request to a all devices. The 
+ *	%PM_SUSPEND events are handled specially. Any device is 
+ *	permitted to fail a suspend by returning a non zero (error)
+ *	value from its callback function. If any device vetoes a 
+ *	suspend request then all other devices that have suspended 
+ *	during the processing of this request are restored to their
+ *	previous state.
+ *
+ *	WARNING:  This function takes the pm_devs_lock. The lock is not dropped until
+ *	the callbacks have completed. This prevents races against pm locking
+ *	functions, races against module unload pm_unregister code. It does
+ *	mean however that you must not issue pm_ functions within the callback
+ *	or you will deadlock and users will hate you.
+ *
+ *	Zero is returned on success. If a suspend fails then the status
+ *	from the device that vetoes the suspend is returned.
+ *
+ *	BUGS: what stops two power management requests occurring in parallel
+ *	and conflicting.
+ */
+ 
+int pm_send_all(pm_request_t rqst, void *data)
+{
+	struct list_head *entry;
+	
+	down(&pm_devs_lock);
+	entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->callback) {
+			int status = pm_send(dev, rqst, data);
+			if (status) {
+				/* return devices to previous state on
+				 * failed suspend request
+				 */
+				if (rqst == PM_SUSPEND)
+					pm_undo_all(dev);
+				up(&pm_devs_lock);
+				return status;
+			}
+		}
+		entry = entry->next;
+	}
+	up(&pm_devs_lock);
+	return 0;
+}
+
+EXPORT_SYMBOL(pm_register);
+EXPORT_SYMBOL(pm_unregister);
+EXPORT_SYMBOL(pm_unregister_all);
+EXPORT_SYMBOL(pm_send_all);
+EXPORT_SYMBOL(pm_active);
+
+
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -0,0 +1,52 @@
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+
+/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
+   we probably do not take enough locks for switching consoles, etc,
+   so bad things might happen.
+*/
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
+#endif
+
+
+struct swsusp_info {
+	struct new_utsname	uts;
+	u32			version_code;
+	unsigned long		num_physpages;
+	int			cpus;
+	unsigned long		image_pages;
+	unsigned long		pagedir_pages;
+	suspend_pagedir_t	* suspend_pagedir;
+	swp_entry_t		pagedir[768];
+} __attribute__((aligned(PAGE_SIZE)));
+
+
+
+#ifdef CONFIG_SOFTWARE_SUSPEND
+extern int pm_suspend_disk(void);
+
+#else
+static inline int pm_suspend_disk(void)
+{
+	return -EPERM;
+}
+#endif
+extern struct semaphore pm_sem;
+#define power_attr(_name) \
+static struct subsys_attribute _name##_attr = {	\
+	.attr	= {				\
+		.name = __stringify(_name),	\
+		.mode = 0644,			\
+	},					\
+	.show	= _name##_show,			\
+	.store	= _name##_store,		\
+}
+
+extern struct subsystem power_subsys;
+
+extern int freeze_processes(void);
+extern void thaw_processes(void);
+
+extern int pm_prepare_console(void);
+extern void pm_restore_console(void);
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -0,0 +1,45 @@
+/*
+ * poweroff.c - sysrq handler to gracefully power down machine.
+ *
+ * This file is released under the GPL v2
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/workqueue.h>
+
+/*
+ * When the user hits Sys-Rq o to power down the machine this is the
+ * callback we use.
+ */
+
+static void do_poweroff(void *dummy)
+{
+	if (pm_power_off)
+		pm_power_off();
+}
+
+static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+
+static void handle_poweroff(int key, struct pt_regs *pt_regs,
+				struct tty_struct *tty)
+{
+	schedule_work(&poweroff_work);
+}
+
+static struct sysrq_key_op	sysrq_poweroff_op = {
+	.handler        = handle_poweroff,
+	.help_msg       = "powerOff",
+	.action_msg     = "Power Off",
+ 	.enable_mask	= SYSRQ_ENABLE_BOOT,
+};
+
+static int pm_sysrq_init(void)
+{
+	register_sysrq_key('o', &sysrq_poweroff_op);
+	return 0;
+}
+
+subsys_initcall(pm_sysrq_init);
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -0,0 +1,121 @@
+/*
+ * drivers/power/process.c - Functions for starting/stopping processes on 
+ *                           suspend transitions.
+ *
+ * Originally from swsusp.
+ */
+
+
+#undef DEBUG
+
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+
+/* 
+ * Timeout for stopping processes
+ */
+#define TIMEOUT	(6 * HZ)
+
+
+static inline int freezeable(struct task_struct * p)
+{
+	if ((p == current) || 
+	    (p->flags & PF_NOFREEZE) ||
+	    (p->exit_state == EXIT_ZOMBIE) ||
+	    (p->exit_state == EXIT_DEAD) ||
+	    (p->state == TASK_STOPPED) ||
+	    (p->state == TASK_TRACED))
+		return 0;
+	return 1;
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(unsigned long flag)
+{
+	/* Hmm, should we be allowed to suspend when there are realtime
+	   processes around? */
+	long save;
+	save = current->state;
+	current->state = TASK_UNINTERRUPTIBLE;
+	pr_debug("%s entered refrigerator\n", current->comm);
+	printk("=");
+	current->flags &= ~PF_FREEZE;
+
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->flags |= PF_FROZEN;
+	while (current->flags & PF_FROZEN)
+		schedule();
+	pr_debug("%s left refrigerator\n", current->comm);
+	current->state = save;
+}
+
+/* 0 = success, else # of processes that we failed to stop */
+int freeze_processes(void)
+{
+       int todo;
+       unsigned long start_time;
+	struct task_struct *g, *p;
+	
+	printk( "Stopping tasks: " );
+	start_time = jiffies;
+	do {
+		todo = 0;
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			unsigned long flags;
+			if (!freezeable(p))
+				continue;
+			if ((p->flags & PF_FROZEN) ||
+			    (p->state == TASK_TRACED) ||
+			    (p->state == TASK_STOPPED))
+				continue;
+
+			/* FIXME: smp problem here: we may not access other process' flags
+			   without locking */
+			p->flags |= PF_FREEZE;
+			spin_lock_irqsave(&p->sighand->siglock, flags);
+			signal_wake_up(p, 0);
+			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+			todo++;
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
+		yield();			/* Yield is okay here */
+		if (time_after(jiffies, start_time + TIMEOUT)) {
+			printk( "\n" );
+			printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
+			return todo;
+		}
+	} while(todo);
+	
+	printk( "|\n" );
+	BUG_ON(in_atomic());
+	return 0;
+}
+
+void thaw_processes(void)
+{
+	struct task_struct *g, *p;
+
+	printk( "Restarting tasks..." );
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if (!freezeable(p))
+			continue;
+		if (p->flags & PF_FROZEN) {
+			p->flags &= ~PF_FROZEN;
+			wake_up_process(p);
+		} else
+			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+	} while_each_thread(g, p);
+
+	read_unlock(&tasklist_lock);
+	schedule();
+	printk( " done\n" );
+}
+
+EXPORT_SYMBOL(refrigerator);
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -0,0 +1,85 @@
+/*
+ * drivers/power/smp.c - Functions for stopping other CPUs.
+ *
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#undef DEBUG
+
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+
+static atomic_t cpu_counter, freeze;
+
+
+static void smp_pause(void * data)
+{
+	struct saved_context ctxt;
+	__save_processor_state(&ctxt);
+	printk("Sleeping in:\n");
+	dump_stack();
+	atomic_inc(&cpu_counter);
+	while (atomic_read(&freeze)) {
+		/* FIXME: restore takes place at random piece inside this.
+		   This should probably be written in assembly, and
+		   preserve general-purpose registers, too
+
+		   What about stack? We may need to move to new stack here.
+
+		   This should better be ran with interrupts disabled.
+		 */
+		cpu_relax();
+		barrier();
+	}
+	atomic_dec(&cpu_counter);
+	__restore_processor_state(&ctxt);
+}
+
+static cpumask_t oldmask;
+
+void disable_nonboot_cpus(void)
+{
+	printk("Freezing CPUs (at %d)", smp_processor_id());
+	oldmask = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(0));
+	current->state = TASK_INTERRUPTIBLE;
+	schedule_timeout(HZ);
+	printk("...");
+	BUG_ON(smp_processor_id() != 0);
+
+	/* FIXME: for this to work, all the CPUs must be running
+	 * "idle" thread (or we deadlock). Is that guaranteed? */
+
+	atomic_set(&cpu_counter, 0);
+	atomic_set(&freeze, 1);
+	smp_call_function(smp_pause, NULL, 0, 0);
+	while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
+		cpu_relax();
+		barrier();
+	}
+	printk("ok\n");
+}
+
+void enable_nonboot_cpus(void)
+{
+	printk("Restarting CPUs");
+	atomic_set(&freeze, 0);
+	while (atomic_read(&cpu_counter)) {
+		cpu_relax();
+		barrier();
+	}
+	printk("...");
+	set_cpus_allowed(current, oldmask);
+	schedule();
+	printk("ok\n");
+
+}
+
+
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -0,0 +1,996 @@
+/*
+ *  linux/kernel/printk.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * Modified to make sys_syslog() more flexible: added commands to
+ * return the last 4k of kernel messages, regardless of whether
+ * they've been read or not.  Added option to suppress kernel printk's
+ * to the console.  Added hook for sending the console messages
+ * elsewhere, in preparation for a serial line console (someday).
+ * Ted Ts'o, 2/11/93.
+ * Modified for sysctl support, 1/8/97, Chris Horn.
+ * Fixed SMP synchronization, 08/08/99, Manfred Spraul 
+ *     manfreds@colorfullife.com
+ * Rewrote bits to get rid of console_lock
+ *	01Mar01 Andrew Morton <andrewm@uow.edu.au>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/smp_lock.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>			/* For in_interrupt() */
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/syscalls.h>
+
+#include <asm/uaccess.h>
+
+#define __LOG_BUF_LEN	(1 << CONFIG_LOG_BUF_SHIFT)
+
+/* printk's without a loglevel use this.. */
+#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
+
+/* We show everything that is MORE important than this.. */
+#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
+#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
+
+DECLARE_WAIT_QUEUE_HEAD(log_wait);
+
+int console_printk[4] = {
+	DEFAULT_CONSOLE_LOGLEVEL,	/* console_loglevel */
+	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */
+	MINIMUM_CONSOLE_LOGLEVEL,	/* minimum_console_loglevel */
+	DEFAULT_CONSOLE_LOGLEVEL,	/* default_console_loglevel */
+};
+
+EXPORT_SYMBOL(console_printk);
+
+/*
+ * Low lever drivers may need that to know if they can schedule in
+ * their unblank() callback or not. So let's export it.
+ */
+int oops_in_progress;
+EXPORT_SYMBOL(oops_in_progress);
+
+/*
+ * console_sem protects the console_drivers list, and also
+ * provides serialisation for access to the entire console
+ * driver system.
+ */
+static DECLARE_MUTEX(console_sem);
+struct console *console_drivers;
+/*
+ * This is used for debugging the mess that is the VT code by
+ * keeping track if we have the console semaphore held. It's
+ * definitely not the perfect debug tool (we don't know if _WE_
+ * hold it are racing, but it helps tracking those weird code
+ * path in the console code where we end up in places I want
+ * locked without the console sempahore held
+ */
+static int console_locked;
+
+/*
+ * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
+ * It is also used in interesting ways to provide interlocking in
+ * release_console_sem().
+ */
+static DEFINE_SPINLOCK(logbuf_lock);
+
+static char __log_buf[__LOG_BUF_LEN];
+static char *log_buf = __log_buf;
+static int log_buf_len = __LOG_BUF_LEN;
+
+#define LOG_BUF_MASK	(log_buf_len-1)
+#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+
+/*
+ * The indices into log_buf are not constrained to log_buf_len - they
+ * must be masked before subscripting
+ */
+static unsigned long log_start;	/* Index into log_buf: next char to be read by syslog() */
+static unsigned long con_start;	/* Index into log_buf: next char to be sent to consoles */
+static unsigned long log_end;	/* Index into log_buf: most-recently-written-char + 1 */
+static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
+
+/*
+ *	Array of consoles built from command line options (console=)
+ */
+struct console_cmdline
+{
+	char	name[8];			/* Name of the driver	    */
+	int	index;				/* Minor dev. to use	    */
+	char	*options;			/* Options for the driver   */
+};
+
+#define MAX_CMDLINECONSOLES 8
+
+static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
+static int selected_console = -1;
+static int preferred_console = -1;
+
+/* Flag: console code may call schedule() */
+static int console_may_schedule;
+
+/*
+ *	Setup a list of consoles. Called from init/main.c
+ */
+static int __init console_setup(char *str)
+{
+	char name[sizeof(console_cmdline[0].name)];
+	char *s, *options;
+	int idx;
+
+	/*
+	 *	Decode str into name, index, options.
+	 */
+	if (str[0] >= '0' && str[0] <= '9') {
+		strcpy(name, "ttyS");
+		strncpy(name + 4, str, sizeof(name) - 5);
+	} else
+		strncpy(name, str, sizeof(name) - 1);
+	name[sizeof(name) - 1] = 0;
+	if ((options = strchr(str, ',')) != NULL)
+		*(options++) = 0;
+#ifdef __sparc__
+	if (!strcmp(str, "ttya"))
+		strcpy(name, "ttyS0");
+	if (!strcmp(str, "ttyb"))
+		strcpy(name, "ttyS1");
+#endif
+	for(s = name; *s; s++)
+		if ((*s >= '0' && *s <= '9') || *s == ',')
+			break;
+	idx = simple_strtoul(s, NULL, 10);
+	*s = 0;
+
+	add_preferred_console(name, idx, options);
+	return 1;
+}
+
+__setup("console=", console_setup);
+
+/**
+ * add_preferred_console - add a device to the list of preferred consoles.
+ *
+ * The last preferred console added will be used for kernel messages
+ * and stdin/out/err for init.  Normally this is used by console_setup
+ * above to handle user-supplied console arguments; however it can also
+ * be used by arch-specific code either to override the user or more
+ * commonly to provide a default console (ie from PROM variables) when
+ * the user has not supplied one.
+ */
+int __init add_preferred_console(char *name, int idx, char *options)
+{
+	struct console_cmdline *c;
+	int i;
+
+	/*
+	 *	See if this tty is not yet registered, and
+	 *	if we have a slot free.
+	 */
+	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
+		if (strcmp(console_cmdline[i].name, name) == 0 &&
+			  console_cmdline[i].index == idx) {
+				selected_console = i;
+				return 0;
+		}
+	if (i == MAX_CMDLINECONSOLES)
+		return -E2BIG;
+	selected_console = i;
+	c = &console_cmdline[i];
+	memcpy(c->name, name, sizeof(c->name));
+	c->name[sizeof(c->name) - 1] = 0;
+	c->options = options;
+	c->index = idx;
+	return 0;
+}
+
+static int __init log_buf_len_setup(char *str)
+{
+	unsigned long size = memparse(str, &str);
+	unsigned long flags;
+
+	if (size)
+		size = roundup_pow_of_two(size);
+	if (size > log_buf_len) {
+		unsigned long start, dest_idx, offset;
+		char * new_log_buf;
+
+		new_log_buf = alloc_bootmem(size);
+		if (!new_log_buf) {
+			printk("log_buf_len: allocation failed\n");
+			goto out;
+		}
+
+		spin_lock_irqsave(&logbuf_lock, flags);
+		log_buf_len = size;
+		log_buf = new_log_buf;
+
+		offset = start = min(con_start, log_start);
+		dest_idx = 0;
+		while (start != log_end) {
+			log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
+			start++;
+			dest_idx++;
+		}
+		log_start -= offset;
+		con_start -= offset;
+		log_end -= offset;
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+
+		printk("log_buf_len: %d\n", log_buf_len);
+	}
+out:
+
+	return 1;
+}
+
+__setup("log_buf_len=", log_buf_len_setup);
+
+/*
+ * Commands to do_syslog:
+ *
+ * 	0 -- Close the log.  Currently a NOP.
+ * 	1 -- Open the log. Currently a NOP.
+ * 	2 -- Read from the log.
+ * 	3 -- Read all messages remaining in the ring buffer.
+ * 	4 -- Read and clear all messages remaining in the ring buffer
+ * 	5 -- Clear ring buffer.
+ * 	6 -- Disable printk's to console
+ * 	7 -- Enable printk's to console
+ *	8 -- Set level of messages printed to console
+ *	9 -- Return number of unread characters in the log buffer
+ *     10 -- Return size of the log buffer
+ */
+int do_syslog(int type, char __user * buf, int len)
+{
+	unsigned long i, j, limit, count;
+	int do_clear = 0;
+	char c;
+	int error = 0;
+
+	error = security_syslog(type);
+	if (error)
+		return error;
+
+	switch (type) {
+	case 0:		/* Close log */
+		break;
+	case 1:		/* Open log */
+		break;
+	case 2:		/* Read from log */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		if (!access_ok(VERIFY_WRITE, buf, len)) {
+			error = -EFAULT;
+			goto out;
+		}
+		error = wait_event_interruptible(log_wait, (log_start - log_end));
+		if (error)
+			goto out;
+		i = 0;
+		spin_lock_irq(&logbuf_lock);
+		while (!error && (log_start != log_end) && i < len) {
+			c = LOG_BUF(log_start);
+			log_start++;
+			spin_unlock_irq(&logbuf_lock);
+			error = __put_user(c,buf);
+			buf++;
+			i++;
+			cond_resched();
+			spin_lock_irq(&logbuf_lock);
+		}
+		spin_unlock_irq(&logbuf_lock);
+		if (!error)
+			error = i;
+		break;
+	case 4:		/* Read/clear last kernel messages */
+		do_clear = 1; 
+		/* FALL THRU */
+	case 3:		/* Read last kernel messages */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		if (!access_ok(VERIFY_WRITE, buf, len)) {
+			error = -EFAULT;
+			goto out;
+		}
+		count = len;
+		if (count > log_buf_len)
+			count = log_buf_len;
+		spin_lock_irq(&logbuf_lock);
+		if (count > logged_chars)
+			count = logged_chars;
+		if (do_clear)
+			logged_chars = 0;
+		limit = log_end;
+		/*
+		 * __put_user() could sleep, and while we sleep
+		 * printk() could overwrite the messages 
+		 * we try to copy to user space. Therefore
+		 * the messages are copied in reverse. <manfreds>
+		 */
+		for(i = 0; i < count && !error; i++) {
+			j = limit-1-i;
+			if (j + log_buf_len < log_end)
+				break;
+			c = LOG_BUF(j);
+			spin_unlock_irq(&logbuf_lock);
+			error = __put_user(c,&buf[count-1-i]);
+			cond_resched();
+			spin_lock_irq(&logbuf_lock);
+		}
+		spin_unlock_irq(&logbuf_lock);
+		if (error)
+			break;
+		error = i;
+		if(i != count) {
+			int offset = count-error;
+			/* buffer overflow during copy, correct user buffer. */
+			for(i=0;i<error;i++) {
+				if (__get_user(c,&buf[i+offset]) ||
+				    __put_user(c,&buf[i])) {
+					error = -EFAULT;
+					break;
+				}
+				cond_resched();
+			}
+		}
+		break;
+	case 5:		/* Clear ring buffer */
+		logged_chars = 0;
+		break;
+	case 6:		/* Disable logging to console */
+		console_loglevel = minimum_console_loglevel;
+		break;
+	case 7:		/* Enable logging to console */
+		console_loglevel = default_console_loglevel;
+		break;
+	case 8:		/* Set level of messages printed to console */
+		error = -EINVAL;
+		if (len < 1 || len > 8)
+			goto out;
+		if (len < minimum_console_loglevel)
+			len = minimum_console_loglevel;
+		console_loglevel = len;
+		error = 0;
+		break;
+	case 9:		/* Number of chars in the log buffer */
+		error = log_end - log_start;
+		break;
+	case 10:	/* Size of the log buffer */
+		error = log_buf_len;
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+out:
+	return error;
+}
+
+asmlinkage long sys_syslog(int type, char __user * buf, int len)
+{
+	return do_syslog(type, buf, len);
+}
+
+/*
+ * Call the console drivers on a range of log_buf
+ */
+static void __call_console_drivers(unsigned long start, unsigned long end)
+{
+	struct console *con;
+
+	for (con = console_drivers; con; con = con->next) {
+		if ((con->flags & CON_ENABLED) && con->write)
+			con->write(con, &LOG_BUF(start), end - start);
+	}
+}
+
+/*
+ * Write out chars from start to end - 1 inclusive
+ */
+static void _call_console_drivers(unsigned long start,
+				unsigned long end, int msg_log_level)
+{
+	if (msg_log_level < console_loglevel &&
+			console_drivers && start != end) {
+		if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
+			/* wrapped write */
+			__call_console_drivers(start & LOG_BUF_MASK,
+						log_buf_len);
+			__call_console_drivers(0, end & LOG_BUF_MASK);
+		} else {
+			__call_console_drivers(start, end);
+		}
+	}
+}
+
+/*
+ * Call the console drivers, asking them to write out
+ * log_buf[start] to log_buf[end - 1].
+ * The console_sem must be held.
+ */
+static void call_console_drivers(unsigned long start, unsigned long end)
+{
+	unsigned long cur_index, start_print;
+	static int msg_level = -1;
+
+	if (((long)(start - end)) > 0)
+		BUG();
+
+	cur_index = start;
+	start_print = start;
+	while (cur_index != end) {
+		if (	msg_level < 0 &&
+			((end - cur_index) > 2) &&
+			LOG_BUF(cur_index + 0) == '<' &&
+			LOG_BUF(cur_index + 1) >= '0' &&
+			LOG_BUF(cur_index + 1) <= '7' &&
+			LOG_BUF(cur_index + 2) == '>')
+		{
+			msg_level = LOG_BUF(cur_index + 1) - '0';
+			cur_index += 3;
+			start_print = cur_index;
+		}
+		while (cur_index != end) {
+			char c = LOG_BUF(cur_index);
+			cur_index++;
+
+			if (c == '\n') {
+				if (msg_level < 0) {
+					/*
+					 * printk() has already given us loglevel tags in
+					 * the buffer.  This code is here in case the
+					 * log buffer has wrapped right round and scribbled
+					 * on those tags
+					 */
+					msg_level = default_message_loglevel;
+				}
+				_call_console_drivers(start_print, cur_index, msg_level);
+				msg_level = -1;
+				start_print = cur_index;
+				break;
+			}
+		}
+	}
+	_call_console_drivers(start_print, end, msg_level);
+}
+
+static void emit_log_char(char c)
+{
+	LOG_BUF(log_end) = c;
+	log_end++;
+	if (log_end - log_start > log_buf_len)
+		log_start = log_end - log_buf_len;
+	if (log_end - con_start > log_buf_len)
+		con_start = log_end - log_buf_len;
+	if (logged_chars < log_buf_len)
+		logged_chars++;
+}
+
+/*
+ * Zap console related locks when oopsing. Only zap at most once
+ * every 10 seconds, to leave time for slow consoles to print a
+ * full oops.
+ */
+static void zap_locks(void)
+{
+	static unsigned long oops_timestamp;
+
+	if (time_after_eq(jiffies, oops_timestamp) &&
+			!time_after(jiffies, oops_timestamp + 30*HZ))
+		return;
+
+	oops_timestamp = jiffies;
+
+	/* If a crash is occurring, make sure we can't deadlock */
+	spin_lock_init(&logbuf_lock);
+	/* And make sure that we print immediately */
+	init_MUTEX(&console_sem);
+}
+
+#if defined(CONFIG_PRINTK_TIME)
+static int printk_time = 1;
+#else
+static int printk_time = 0;
+#endif
+
+static int __init printk_time_setup(char *str)
+{
+	if (*str)
+		return 0;
+	printk_time = 1;
+	return 1;
+}
+
+__setup("time", printk_time_setup);
+
+/*
+ * This is printk.  It can be called from any context.  We want it to work.
+ * 
+ * We try to grab the console_sem.  If we succeed, it's easy - we log the output and
+ * call the console drivers.  If we fail to get the semaphore we place the output
+ * into the log buffer and return.  The current holder of the console_sem will
+ * notice the new output in release_console_sem() and will send it to the
+ * consoles before releasing the semaphore.
+ *
+ * One effect of this deferred printing is that code which calls printk() and
+ * then changes console_loglevel may break. This is because console_loglevel
+ * is inspected when the actual printing occurs.
+ */
+asmlinkage int printk(const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = vprintk(fmt, args);
+	va_end(args);
+
+	return r;
+}
+
+asmlinkage int vprintk(const char *fmt, va_list args)
+{
+	unsigned long flags;
+	int printed_len;
+	char *p;
+	static char printk_buf[1024];
+	static int log_level_unknown = 1;
+
+	if (unlikely(oops_in_progress))
+		zap_locks();
+
+	/* This stops the holder of console_sem just where we want him */
+	spin_lock_irqsave(&logbuf_lock, flags);
+
+	/* Emit the output into the temporary buffer */
+	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+
+	/*
+	 * Copy the output into log_buf.  If the caller didn't provide
+	 * appropriate log level tags, we insert them here
+	 */
+	for (p = printk_buf; *p; p++) {
+		if (log_level_unknown) {
+                        /* log_level_unknown signals the start of a new line */
+			if (printk_time) {
+				int loglev_char;
+				char tbuf[50], *tp;
+				unsigned tlen;
+				unsigned long long t;
+				unsigned long nanosec_rem;
+
+				/*
+				 * force the log level token to be
+				 * before the time output.
+				 */
+				if (p[0] == '<' && p[1] >='0' &&
+				   p[1] <= '7' && p[2] == '>') {
+					loglev_char = p[1];
+					p += 3;
+					printed_len += 3;
+				} else {
+					loglev_char = default_message_loglevel
+						+ '0';
+				}
+				t = sched_clock();
+				nanosec_rem = do_div(t, 1000000000);
+				tlen = sprintf(tbuf,
+						"<%c>[%5lu.%06lu] ",
+						loglev_char,
+						(unsigned long)t,
+						nanosec_rem/1000);
+
+				for (tp = tbuf; tp < tbuf + tlen; tp++)
+					emit_log_char(*tp);
+				printed_len += tlen - 3;
+			} else {
+				if (p[0] != '<' || p[1] < '0' ||
+				   p[1] > '7' || p[2] != '>') {
+					emit_log_char('<');
+					emit_log_char(default_message_loglevel
+						+ '0');
+					emit_log_char('>');
+				}
+				printed_len += 3;
+			}
+			log_level_unknown = 0;
+			if (!*p)
+				break;
+		}
+		emit_log_char(*p);
+		if (*p == '\n')
+			log_level_unknown = 1;
+	}
+
+	if (!cpu_online(smp_processor_id()) &&
+	    system_state != SYSTEM_RUNNING) {
+		/*
+		 * Some console drivers may assume that per-cpu resources have
+		 * been allocated.  So don't allow them to be called by this
+		 * CPU until it is officially up.  We shouldn't be calling into
+		 * random console drivers on a CPU which doesn't exist yet..
+		 */
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		goto out;
+	}
+	if (!down_trylock(&console_sem)) {
+		console_locked = 1;
+		/*
+		 * We own the drivers.  We can drop the spinlock and let
+		 * release_console_sem() print the text
+		 */
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+		console_may_schedule = 0;
+		release_console_sem();
+	} else {
+		/*
+		 * Someone else owns the drivers.  We drop the spinlock, which
+		 * allows the semaphore holder to proceed and to call the
+		 * console drivers with the output which we just produced.
+		 */
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+	}
+out:
+	return printed_len;
+}
+EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(vprintk);
+
+/**
+ * acquire_console_sem - lock the console system for exclusive use.
+ *
+ * Acquires a semaphore which guarantees that the caller has
+ * exclusive access to the console system and the console_drivers list.
+ *
+ * Can sleep, returns nothing.
+ */
+void acquire_console_sem(void)
+{
+	if (in_interrupt())
+		BUG();
+	down(&console_sem);
+	console_locked = 1;
+	console_may_schedule = 1;
+}
+EXPORT_SYMBOL(acquire_console_sem);
+
+int try_acquire_console_sem(void)
+{
+	if (down_trylock(&console_sem))
+		return -1;
+	console_locked = 1;
+	console_may_schedule = 0;
+	return 0;
+}
+EXPORT_SYMBOL(try_acquire_console_sem);
+
+int is_console_locked(void)
+{
+	return console_locked;
+}
+EXPORT_SYMBOL(is_console_locked);
+
+/**
+ * release_console_sem - unlock the console system
+ *
+ * Releases the semaphore which the caller holds on the console system
+ * and the console driver list.
+ *
+ * While the semaphore was held, console output may have been buffered
+ * by printk().  If this is the case, release_console_sem() emits
+ * the output prior to releasing the semaphore.
+ *
+ * If there is output waiting for klogd, we wake it up.
+ *
+ * release_console_sem() may be called from any context.
+ */
+void release_console_sem(void)
+{
+	unsigned long flags;
+	unsigned long _con_start, _log_end;
+	unsigned long wake_klogd = 0;
+
+	for ( ; ; ) {
+		spin_lock_irqsave(&logbuf_lock, flags);
+		wake_klogd |= log_start - log_end;
+		if (con_start == log_end)
+			break;			/* Nothing to print */
+		_con_start = con_start;
+		_log_end = log_end;
+		con_start = log_end;		/* Flush */
+		spin_unlock(&logbuf_lock);
+		call_console_drivers(_con_start, _log_end);
+		local_irq_restore(flags);
+	}
+	console_locked = 0;
+	console_may_schedule = 0;
+	up(&console_sem);
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+		wake_up_interruptible(&log_wait);
+}
+EXPORT_SYMBOL(release_console_sem);
+
+/** console_conditional_schedule - yield the CPU if required
+ *
+ * If the console code is currently allowed to sleep, and
+ * if this CPU should yield the CPU to another task, do
+ * so here.
+ *
+ * Must be called within acquire_console_sem().
+ */
+void __sched console_conditional_schedule(void)
+{
+	if (console_may_schedule)
+		cond_resched();
+}
+EXPORT_SYMBOL(console_conditional_schedule);
+
+void console_print(const char *s)
+{
+	printk(KERN_EMERG "%s", s);
+}
+EXPORT_SYMBOL(console_print);
+
+void console_unblank(void)
+{
+	struct console *c;
+
+	/*
+	 * console_unblank can no longer be called in interrupt context unless
+	 * oops_in_progress is set to 1..
+	 */
+	if (oops_in_progress) {
+		if (down_trylock(&console_sem) != 0)
+			return;
+	} else
+		acquire_console_sem();
+
+	console_locked = 1;
+	console_may_schedule = 0;
+	for (c = console_drivers; c != NULL; c = c->next)
+		if ((c->flags & CON_ENABLED) && c->unblank)
+			c->unblank();
+	release_console_sem();
+}
+EXPORT_SYMBOL(console_unblank);
+
+/*
+ * Return the console tty driver structure and its associated index
+ */
+struct tty_driver *console_device(int *index)
+{
+	struct console *c;
+	struct tty_driver *driver = NULL;
+
+	acquire_console_sem();
+	for (c = console_drivers; c != NULL; c = c->next) {
+		if (!c->device)
+			continue;
+		driver = c->device(c, index);
+		if (driver)
+			break;
+	}
+	release_console_sem();
+	return driver;
+}
+
+/*
+ * Prevent further output on the passed console device so that (for example)
+ * serial drivers can disable console output before suspending a port, and can
+ * re-enable output afterwards.
+ */
+void console_stop(struct console *console)
+{
+	acquire_console_sem();
+	console->flags &= ~CON_ENABLED;
+	release_console_sem();
+}
+EXPORT_SYMBOL(console_stop);
+
+void console_start(struct console *console)
+{
+	acquire_console_sem();
+	console->flags |= CON_ENABLED;
+	release_console_sem();
+}
+EXPORT_SYMBOL(console_start);
+
+/*
+ * The console driver calls this routine during kernel initialization
+ * to register the console printing procedure with printk() and to
+ * print any messages that were printed by the kernel before the
+ * console driver was initialized.
+ */
+void register_console(struct console * console)
+{
+	int     i;
+	unsigned long flags;
+
+	if (preferred_console < 0)
+		preferred_console = selected_console;
+
+	/*
+	 *	See if we want to use this console driver. If we
+	 *	didn't select a console we take the first one
+	 *	that registers here.
+	 */
+	if (preferred_console < 0) {
+		if (console->index < 0)
+			console->index = 0;
+		if (console->setup == NULL ||
+		    console->setup(console, NULL) == 0) {
+			console->flags |= CON_ENABLED | CON_CONSDEV;
+			preferred_console = 0;
+		}
+	}
+
+	/*
+	 *	See if this console matches one we selected on
+	 *	the command line.
+	 */
+	for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) {
+		if (strcmp(console_cmdline[i].name, console->name) != 0)
+			continue;
+		if (console->index >= 0 &&
+		    console->index != console_cmdline[i].index)
+			continue;
+		if (console->index < 0)
+			console->index = console_cmdline[i].index;
+		if (console->setup &&
+		    console->setup(console, console_cmdline[i].options) != 0)
+			break;
+		console->flags |= CON_ENABLED;
+		console->index = console_cmdline[i].index;
+		if (i == preferred_console)
+			console->flags |= CON_CONSDEV;
+		break;
+	}
+
+	if (!(console->flags & CON_ENABLED))
+		return;
+
+	if (console_drivers && (console_drivers->flags & CON_BOOT)) {
+		unregister_console(console_drivers);
+		console->flags &= ~CON_PRINTBUFFER;
+	}
+
+	/*
+	 *	Put this console in the list - keep the
+	 *	preferred driver at the head of the list.
+	 */
+	acquire_console_sem();
+	if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
+		console->next = console_drivers;
+		console_drivers = console;
+	} else {
+		console->next = console_drivers->next;
+		console_drivers->next = console;
+	}
+	if (console->flags & CON_PRINTBUFFER) {
+		/*
+		 * release_console_sem() will print out the buffered messages
+		 * for us.
+		 */
+		spin_lock_irqsave(&logbuf_lock, flags);
+		con_start = log_start;
+		spin_unlock_irqrestore(&logbuf_lock, flags);
+	}
+	release_console_sem();
+}
+EXPORT_SYMBOL(register_console);
+
+int unregister_console(struct console * console)
+{
+        struct console *a,*b;
+	int res = 1;
+
+	acquire_console_sem();
+	if (console_drivers == console) {
+		console_drivers=console->next;
+		res = 0;
+	} else {
+		for (a=console_drivers->next, b=console_drivers ;
+		     a; b=a, a=b->next) {
+			if (a == console) {
+				b->next = a->next;
+				res = 0;
+				break;
+			}  
+		}
+	}
+	
+	/* If last console is removed, we re-enable picking the first
+	 * one that gets registered. Without that, pmac early boot console
+	 * would prevent fbcon from taking over.
+	 */
+	if (console_drivers == NULL)
+		preferred_console = selected_console;
+		
+
+	release_console_sem();
+	return res;
+}
+EXPORT_SYMBOL(unregister_console);
+	
+/**
+ * tty_write_message - write a message to a certain tty, not just the console.
+ *
+ * This is used for messages that need to be redirected to a specific tty.
+ * We don't put it into the syslog queue right now maybe in the future if
+ * really needed.
+ */
+void tty_write_message(struct tty_struct *tty, char *msg)
+{
+	if (tty && tty->driver->write)
+		tty->driver->write(tty, msg, strlen(msg));
+	return;
+}
+
+/*
+ * printk rate limiting, lifted from the networking subsystem.
+ *
+ * This enforces a rate limit: not more than one kernel message
+ * every printk_ratelimit_jiffies to make a denial-of-service
+ * attack impossible.
+ */
+int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+{
+	static DEFINE_SPINLOCK(ratelimit_lock);
+	static unsigned long toks = 10*5*HZ;
+	static unsigned long last_msg;
+	static int missed;
+	unsigned long flags;
+	unsigned long now = jiffies;
+
+	spin_lock_irqsave(&ratelimit_lock, flags);
+	toks += now - last_msg;
+	last_msg = now;
+	if (toks > (ratelimit_burst * ratelimit_jiffies))
+		toks = ratelimit_burst * ratelimit_jiffies;
+	if (toks >= ratelimit_jiffies) {
+		int lost = missed;
+		missed = 0;
+		toks -= ratelimit_jiffies;
+		spin_unlock_irqrestore(&ratelimit_lock, flags);
+		if (lost)
+			printk(KERN_WARNING "printk: %d messages suppressed.\n", lost);
+		return 1;
+	}
+	missed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(__printk_ratelimit);
+
+/* minimum time in jiffies between messages */
+int printk_ratelimit_jiffies = 5*HZ;
+
+/* number of messages we send before ratelimiting */
+int printk_ratelimit_burst = 10;
+
+int printk_ratelimit(void)
+{
+	return __printk_ratelimit(printk_ratelimit_jiffies,
+				printk_ratelimit_burst);
+}
+EXPORT_SYMBOL(printk_ratelimit);
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -0,0 +1,563 @@
+/*
+ *  linux/kernel/profile.c
+ *  Simple profiling. Manages a direct-mapped profile hit count buffer,
+ *  with configurable resolution, support for restricting the cpus on
+ *  which profiling is done, and switching between cpu time and
+ *  schedule() calls via kernel command line parameters passed at boot.
+ *
+ *  Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
+ *	Red Hat, July 2004
+ *  Consolidation of architecture support code for profiling,
+ *	William Irwin, Oracle, July 2004
+ *  Amortized hit count accounting via per-cpu open-addressed hashtables
+ *	to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+#include <linux/cpu.h>
+#include <linux/profile.h>
+#include <linux/highmem.h>
+#include <asm/sections.h>
+#include <asm/semaphore.h>
+
+struct profile_hit {
+	u32 pc, hits;
+};
+#define PROFILE_GRPSHIFT	3
+#define PROFILE_GRPSZ		(1 << PROFILE_GRPSHIFT)
+#define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
+#define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
+
+/* Oprofile timer tick hook */
+int (*timer_hook)(struct pt_regs *);
+
+static atomic_t *prof_buffer;
+static unsigned long prof_len, prof_shift;
+static int prof_on;
+static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
+static DEFINE_PER_CPU(int, cpu_profile_flip);
+static DECLARE_MUTEX(profile_flip_mutex);
+#endif /* CONFIG_SMP */
+
+static int __init profile_setup(char * str)
+{
+	int par;
+
+	if (!strncmp(str, "schedule", 8)) {
+		prof_on = SCHED_PROFILING;
+		printk(KERN_INFO "kernel schedule profiling enabled\n");
+		if (str[7] == ',')
+			str += 8;
+	}
+	if (get_option(&str,&par)) {
+		prof_shift = par;
+		prof_on = CPU_PROFILING;
+		printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
+			prof_shift);
+	}
+	return 1;
+}
+__setup("profile=", profile_setup);
+
+
+void __init profile_init(void)
+{
+	if (!prof_on) 
+		return;
+ 
+	/* only text is profiled */
+	prof_len = (_etext - _stext) >> prof_shift;
+	prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
+}
+
+/* Profile event notifications */
+ 
+#ifdef CONFIG_PROFILING
+ 
+static DECLARE_RWSEM(profile_rwsem);
+static DEFINE_RWLOCK(handoff_lock);
+static struct notifier_block * task_exit_notifier;
+static struct notifier_block * task_free_notifier;
+static struct notifier_block * munmap_notifier;
+ 
+void profile_task_exit(struct task_struct * task)
+{
+	down_read(&profile_rwsem);
+	notifier_call_chain(&task_exit_notifier, 0, task);
+	up_read(&profile_rwsem);
+}
+ 
+int profile_handoff_task(struct task_struct * task)
+{
+	int ret;
+	read_lock(&handoff_lock);
+	ret = notifier_call_chain(&task_free_notifier, 0, task);
+	read_unlock(&handoff_lock);
+	return (ret == NOTIFY_OK) ? 1 : 0;
+}
+
+void profile_munmap(unsigned long addr)
+{
+	down_read(&profile_rwsem);
+	notifier_call_chain(&munmap_notifier, 0, (void *)addr);
+	up_read(&profile_rwsem);
+}
+
+int task_handoff_register(struct notifier_block * n)
+{
+	int err = -EINVAL;
+
+	write_lock(&handoff_lock);
+	err = notifier_chain_register(&task_free_notifier, n);
+	write_unlock(&handoff_lock);
+	return err;
+}
+
+int task_handoff_unregister(struct notifier_block * n)
+{
+	int err = -EINVAL;
+
+	write_lock(&handoff_lock);
+	err = notifier_chain_unregister(&task_free_notifier, n);
+	write_unlock(&handoff_lock);
+	return err;
+}
+
+int profile_event_register(enum profile_type type, struct notifier_block * n)
+{
+	int err = -EINVAL;
+ 
+	down_write(&profile_rwsem);
+ 
+	switch (type) {
+		case PROFILE_TASK_EXIT:
+			err = notifier_chain_register(&task_exit_notifier, n);
+			break;
+		case PROFILE_MUNMAP:
+			err = notifier_chain_register(&munmap_notifier, n);
+			break;
+	}
+ 
+	up_write(&profile_rwsem);
+ 
+	return err;
+}
+
+ 
+int profile_event_unregister(enum profile_type type, struct notifier_block * n)
+{
+	int err = -EINVAL;
+ 
+	down_write(&profile_rwsem);
+ 
+	switch (type) {
+		case PROFILE_TASK_EXIT:
+			err = notifier_chain_unregister(&task_exit_notifier, n);
+			break;
+		case PROFILE_MUNMAP:
+			err = notifier_chain_unregister(&munmap_notifier, n);
+			break;
+	}
+
+	up_write(&profile_rwsem);
+	return err;
+}
+
+int register_timer_hook(int (*hook)(struct pt_regs *))
+{
+	if (timer_hook)
+		return -EBUSY;
+	timer_hook = hook;
+	return 0;
+}
+
+void unregister_timer_hook(int (*hook)(struct pt_regs *))
+{
+	WARN_ON(hook != timer_hook);
+	timer_hook = NULL;
+	/* make sure all CPUs see the NULL hook */
+	synchronize_kernel();
+}
+
+EXPORT_SYMBOL_GPL(register_timer_hook);
+EXPORT_SYMBOL_GPL(unregister_timer_hook);
+EXPORT_SYMBOL_GPL(task_handoff_register);
+EXPORT_SYMBOL_GPL(task_handoff_unregister);
+
+#endif /* CONFIG_PROFILING */
+
+EXPORT_SYMBOL_GPL(profile_event_register);
+EXPORT_SYMBOL_GPL(profile_event_unregister);
+
+#ifdef CONFIG_SMP
+/*
+ * Each cpu has a pair of open-addressed hashtables for pending
+ * profile hits. read_profile() IPI's all cpus to request them
+ * to flip buffers and flushes their contents to prof_buffer itself.
+ * Flip requests are serialized by the profile_flip_mutex. The sole
+ * use of having a second hashtable is for avoiding cacheline
+ * contention that would otherwise happen during flushes of pending
+ * profile hits required for the accuracy of reported profile hits
+ * and so resurrect the interrupt livelock issue.
+ *
+ * The open-addressed hashtables are indexed by profile buffer slot
+ * and hold the number of pending hits to that profile buffer slot on
+ * a cpu in an entry. When the hashtable overflows, all pending hits
+ * are accounted to their corresponding profile buffer slots with
+ * atomic_add() and the hashtable emptied. As numerous pending hits
+ * may be accounted to a profile buffer slot in a hashtable entry,
+ * this amortizes a number of atomic profile buffer increments likely
+ * to be far larger than the number of entries in the hashtable,
+ * particularly given that the number of distinct profile buffer
+ * positions to which hits are accounted during short intervals (e.g.
+ * several seconds) is usually very small. Exclusion from buffer
+ * flipping is provided by interrupt disablement (note that for
+ * SCHED_PROFILING profile_hit() may be called from process context).
+ * The hash function is meant to be lightweight as opposed to strong,
+ * and was vaguely inspired by ppc64 firmware-supported inverted
+ * pagetable hash functions, but uses a full hashtable full of finite
+ * collision chains, not just pairs of them.
+ *
+ * -- wli
+ */
+static void __profile_flip_buffers(void *unused)
+{
+	int cpu = smp_processor_id();
+
+	per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
+}
+
+static void profile_flip_buffers(void)
+{
+	int i, j, cpu;
+
+	down(&profile_flip_mutex);
+	j = per_cpu(cpu_profile_flip, get_cpu());
+	put_cpu();
+	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
+		for (i = 0; i < NR_PROFILE_HIT; ++i) {
+			if (!hits[i].hits) {
+				if (hits[i].pc)
+					hits[i].pc = 0;
+				continue;
+			}
+			atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+			hits[i].hits = hits[i].pc = 0;
+		}
+	}
+	up(&profile_flip_mutex);
+}
+
+static void profile_discard_flip_buffers(void)
+{
+	int i, cpu;
+
+	down(&profile_flip_mutex);
+	i = per_cpu(cpu_profile_flip, get_cpu());
+	put_cpu();
+	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
+		memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
+	}
+	up(&profile_flip_mutex);
+}
+
+void profile_hit(int type, void *__pc)
+{
+	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
+	int i, j, cpu;
+	struct profile_hit *hits;
+
+	if (prof_on != type || !prof_buffer)
+		return;
+	pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
+	i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+	secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+	cpu = get_cpu();
+	hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
+	if (!hits) {
+		put_cpu();
+		return;
+	}
+	local_irq_save(flags);
+	do {
+		for (j = 0; j < PROFILE_GRPSZ; ++j) {
+			if (hits[i + j].pc == pc) {
+				hits[i + j].hits++;
+				goto out;
+			} else if (!hits[i + j].hits) {
+				hits[i + j].pc = pc;
+				hits[i + j].hits = 1;
+				goto out;
+			}
+		}
+		i = (i + secondary) & (NR_PROFILE_HIT - 1);
+	} while (i != primary);
+	atomic_inc(&prof_buffer[pc]);
+	for (i = 0; i < NR_PROFILE_HIT; ++i) {
+		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+		hits[i].pc = hits[i].hits = 0;
+	}
+out:
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit profile_cpu_callback(struct notifier_block *info,
+					unsigned long action, void *__cpu)
+{
+	int node, cpu = (unsigned long)__cpu;
+	struct page *page;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		node = cpu_to_node(cpu);
+		per_cpu(cpu_profile_flip, cpu) = 0;
+		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+			if (!page)
+				return NOTIFY_BAD;
+			per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
+		}
+		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+			if (!page)
+				goto out_free;
+			per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
+		}
+		break;
+	out_free:
+		page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+		per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+		__free_page(page);
+		return NOTIFY_BAD;
+	case CPU_ONLINE:
+		cpu_set(cpu, prof_cpu_mask);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		cpu_clear(cpu, prof_cpu_mask);
+		if (per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+			per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+			__free_page(page);
+		}
+		if (per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+			per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+			__free_page(page);
+		}
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+#else /* !CONFIG_SMP */
+#define profile_flip_buffers()		do { } while (0)
+#define profile_discard_flip_buffers()	do { } while (0)
+
+void profile_hit(int type, void *__pc)
+{
+	unsigned long pc;
+
+	if (prof_on != type || !prof_buffer)
+		return;
+	pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
+	atomic_inc(&prof_buffer[min(pc, prof_len - 1)]);
+}
+#endif /* !CONFIG_SMP */
+
+void profile_tick(int type, struct pt_regs *regs)
+{
+	if (type == CPU_PROFILING && timer_hook)
+		timer_hook(regs);
+	if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
+		profile_hit(type, (void *)profile_pc(regs));
+}
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+			int count, int *eof, void *data)
+{
+	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+	if (count - len < 2)
+		return -EINVAL;
+	len += sprintf(page + len, "\n");
+	return len;
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
+					unsigned long count, void *data)
+{
+	cpumask_t *mask = (cpumask_t *)data;
+	unsigned long full_count = count, err;
+	cpumask_t new_value;
+
+	err = cpumask_parse(buffer, count, new_value);
+	if (err)
+		return err;
+
+	*mask = new_value;
+	return full_count;
+}
+
+void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
+{
+	struct proc_dir_entry *entry;
+
+	/* create /proc/irq/prof_cpu_mask */
+	if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
+		return;
+	entry->nlink = 1;
+	entry->data = (void *)&prof_cpu_mask;
+	entry->read_proc = prof_cpu_mask_read_proc;
+	entry->write_proc = prof_cpu_mask_write_proc;
+}
+
+/*
+ * This function accesses profiling information. The returned data is
+ * binary: the sampling step and the actual contents of the profile
+ * buffer. Use of the program readprofile is recommended in order to
+ * get meaningful info out of these data.
+ */
+static ssize_t
+read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	ssize_t read;
+	char * pnt;
+	unsigned int sample_step = 1 << prof_shift;
+
+	profile_flip_buffers();
+	if (p >= (prof_len+1)*sizeof(unsigned int))
+		return 0;
+	if (count > (prof_len+1)*sizeof(unsigned int) - p)
+		count = (prof_len+1)*sizeof(unsigned int) - p;
+	read = 0;
+
+	while (p < sizeof(unsigned int) && count > 0) {
+		put_user(*((char *)(&sample_step)+p),buf);
+		buf++; p++; count--; read++;
+	}
+	pnt = (char *)prof_buffer + p - sizeof(atomic_t);
+	if (copy_to_user(buf,(void *)pnt,count))
+		return -EFAULT;
+	read += count;
+	*ppos += read;
+	return read;
+}
+
+/*
+ * Writing to /proc/profile resets the counters
+ *
+ * Writing a 'profiling multiplier' value into it also re-sets the profiling
+ * interrupt frequency, on architectures that support this.
+ */
+static ssize_t write_profile(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+	extern int setup_profiling_timer (unsigned int multiplier);
+
+	if (count == sizeof(int)) {
+		unsigned int multiplier;
+
+		if (copy_from_user(&multiplier, buf, sizeof(int)))
+			return -EFAULT;
+
+		if (setup_profiling_timer(multiplier))
+			return -EINVAL;
+	}
+#endif
+	profile_discard_flip_buffers();
+	memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
+	return count;
+}
+
+static struct file_operations proc_profile_operations = {
+	.read		= read_profile,
+	.write		= write_profile,
+};
+
+#ifdef CONFIG_SMP
+static void __init profile_nop(void *unused)
+{
+}
+
+static int __init create_hash_tables(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		int node = cpu_to_node(cpu);
+		struct page *page;
+
+		page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		if (!page)
+			goto out_cleanup;
+		per_cpu(cpu_profile_hits, cpu)[1]
+				= (struct profile_hit *)page_address(page);
+		page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		if (!page)
+			goto out_cleanup;
+		per_cpu(cpu_profile_hits, cpu)[0]
+				= (struct profile_hit *)page_address(page);
+	}
+	return 0;
+out_cleanup:
+	prof_on = 0;
+	mb();
+	on_each_cpu(profile_nop, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct page *page;
+
+		if (per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+			per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+			__free_page(page);
+		}
+		if (per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+			per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+			__free_page(page);
+		}
+	}
+	return -1;
+}
+#else
+#define create_hash_tables()			({ 0; })
+#endif
+
+static int __init create_proc_profile(void)
+{
+	struct proc_dir_entry *entry;
+
+	if (!prof_on)
+		return 0;
+	if (create_hash_tables())
+		return -1;
+	if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
+		return 0;
+	entry->proc_fops = &proc_profile_operations;
+	entry->size = (1+prof_len) * sizeof(atomic_t);
+	hotcpu_notifier(profile_cpu_callback, 0);
+	return 0;
+}
+module_init(create_proc_profile);
+#endif /* CONFIG_PROC_FS */
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -0,0 +1,389 @@
+/*
+ * linux/kernel/ptrace.c
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Common interfaces for "ptrace()" which we do not want
+ * to continually duplicate across every architecture.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/security.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+
+/*
+ * ptrace a task: make the debugger its new parent and
+ * move it to the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_link(task_t *child, task_t *new_parent)
+{
+	if (!list_empty(&child->ptrace_list))
+		BUG();
+	if (child->parent == new_parent)
+		return;
+	list_add(&child->ptrace_list, &child->parent->ptrace_children);
+	REMOVE_LINKS(child);
+	child->parent = new_parent;
+	SET_LINKS(child);
+}
+ 
+/*
+ * Turn a tracing stop into a normal stop now, since with no tracer there
+ * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
+ * signal sent that would resume the child, but didn't because it was in
+ * TASK_TRACED, resume it now.
+ * Requires that irqs be disabled.
+ */
+void ptrace_untrace(task_t *child)
+{
+	spin_lock(&child->sighand->siglock);
+	if (child->state == TASK_TRACED) {
+		if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+			child->state = TASK_STOPPED;
+		} else {
+			signal_wake_up(child, 1);
+		}
+	}
+	spin_unlock(&child->sighand->siglock);
+}
+
+/*
+ * unptrace a task: move it back to its original parent and
+ * remove it from the ptrace list.
+ *
+ * Must be called with the tasklist lock write-held.
+ */
+void __ptrace_unlink(task_t *child)
+{
+	if (!child->ptrace)
+		BUG();
+	child->ptrace = 0;
+	if (!list_empty(&child->ptrace_list)) {
+		list_del_init(&child->ptrace_list);
+		REMOVE_LINKS(child);
+		child->parent = child->real_parent;
+		SET_LINKS(child);
+	}
+
+	if (child->state == TASK_TRACED)
+		ptrace_untrace(child);
+}
+
+/*
+ * Check that we have indeed attached to the thing..
+ */
+int ptrace_check_attach(struct task_struct *child, int kill)
+{
+	int ret = -ESRCH;
+
+	/*
+	 * We take the read lock around doing both checks to close a
+	 * possible race where someone else was tracing our child and
+	 * detached between these two checks.  After this locked check,
+	 * we are sure that this is our traced child and that can only
+	 * be changed by us so it's not changing right after this.
+	 */
+	read_lock(&tasklist_lock);
+	if ((child->ptrace & PT_PTRACED) && child->parent == current &&
+	    (!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
+	    && child->signal != NULL) {
+		ret = 0;
+		spin_lock_irq(&child->sighand->siglock);
+		if (child->state == TASK_STOPPED) {
+			child->state = TASK_TRACED;
+		} else if (child->state != TASK_TRACED && !kill) {
+			ret = -ESRCH;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
+
+	if (!ret && !kill) {
+		wait_task_inactive(child);
+	}
+
+	/* All systems go.. */
+	return ret;
+}
+
+int ptrace_attach(struct task_struct *task)
+{
+	int retval;
+	task_lock(task);
+	retval = -EPERM;
+	if (task->pid <= 1)
+		goto bad;
+	if (task == current)
+		goto bad;
+	if (!task->mm)
+		goto bad;
+	if(((current->uid != task->euid) ||
+	    (current->uid != task->suid) ||
+	    (current->uid != task->uid) ||
+ 	    (current->gid != task->egid) ||
+ 	    (current->gid != task->sgid) ||
+ 	    (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
+		goto bad;
+	rmb();
+	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+		goto bad;
+	/* the same process cannot be attached many times */
+	if (task->ptrace & PT_PTRACED)
+		goto bad;
+	retval = security_ptrace(current, task);
+	if (retval)
+		goto bad;
+
+	/* Go */
+	task->ptrace |= PT_PTRACED | ((task->real_parent != current)
+				      ? PT_ATTACHED : 0);
+	if (capable(CAP_SYS_PTRACE))
+		task->ptrace |= PT_PTRACE_CAP;
+	task_unlock(task);
+
+	write_lock_irq(&tasklist_lock);
+	__ptrace_link(task, current);
+	write_unlock_irq(&tasklist_lock);
+
+	force_sig_specific(SIGSTOP, task);
+	return 0;
+
+bad:
+	task_unlock(task);
+	return retval;
+}
+
+int ptrace_detach(struct task_struct *child, unsigned int data)
+{
+	if ((unsigned long) data > _NSIG)
+		return	-EIO;
+
+	/* Architecture-specific hardware disable .. */
+	ptrace_disable(child);
+
+	/* .. re-parent .. */
+	child->exit_code = data;
+
+	write_lock_irq(&tasklist_lock);
+	__ptrace_unlink(child);
+	/* .. and wake it up. */
+	if (child->exit_state != EXIT_ZOMBIE)
+		wake_up_process(child);
+	write_unlock_irq(&tasklist_lock);
+
+	return 0;
+}
+
+/*
+ * Access another process' address space.
+ * Source/target buffer must be kernel space, 
+ * Do not walk the page table directly, use get_user_pages
+ */
+
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+{
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	struct page *page;
+	void *old_buf = buf;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return 0;
+
+	down_read(&mm->mmap_sem);
+	/* ignore errors, just check how much was sucessfully transfered */
+	while (len) {
+		int bytes, ret, offset;
+		void *maddr;
+
+		ret = get_user_pages(tsk, mm, addr, 1,
+				write, 1, &page, &vma);
+		if (ret <= 0)
+			break;
+
+		bytes = len;
+		offset = addr & (PAGE_SIZE-1);
+		if (bytes > PAGE_SIZE-offset)
+			bytes = PAGE_SIZE-offset;
+
+		maddr = kmap(page);
+		if (write) {
+			copy_to_user_page(vma, page, addr,
+					  maddr + offset, buf, bytes);
+			set_page_dirty_lock(page);
+		} else {
+			copy_from_user_page(vma, page, addr,
+					    buf, maddr + offset, bytes);
+		}
+		kunmap(page);
+		page_cache_release(page);
+		len -= bytes;
+		buf += bytes;
+		addr += bytes;
+	}
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	
+	return buf - old_buf;
+}
+
+int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
+{
+	int copied = 0;
+
+	while (len > 0) {
+		char buf[128];
+		int this_len, retval;
+
+		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+		retval = access_process_vm(tsk, src, buf, this_len, 0);
+		if (!retval) {
+			if (copied)
+				break;
+			return -EIO;
+		}
+		if (copy_to_user(dst, buf, retval))
+			return -EFAULT;
+		copied += retval;
+		src += retval;
+		dst += retval;
+		len -= retval;			
+	}
+	return copied;
+}
+
+int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len)
+{
+	int copied = 0;
+
+	while (len > 0) {
+		char buf[128];
+		int this_len, retval;
+
+		this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
+		if (copy_from_user(buf, src, this_len))
+			return -EFAULT;
+		retval = access_process_vm(tsk, dst, buf, this_len, 1);
+		if (!retval) {
+			if (copied)
+				break;
+			return -EIO;
+		}
+		copied += retval;
+		src += retval;
+		dst += retval;
+		len -= retval;			
+	}
+	return copied;
+}
+
+static int ptrace_setoptions(struct task_struct *child, long data)
+{
+	child->ptrace &= ~PT_TRACE_MASK;
+
+	if (data & PTRACE_O_TRACESYSGOOD)
+		child->ptrace |= PT_TRACESYSGOOD;
+
+	if (data & PTRACE_O_TRACEFORK)
+		child->ptrace |= PT_TRACE_FORK;
+
+	if (data & PTRACE_O_TRACEVFORK)
+		child->ptrace |= PT_TRACE_VFORK;
+
+	if (data & PTRACE_O_TRACECLONE)
+		child->ptrace |= PT_TRACE_CLONE;
+
+	if (data & PTRACE_O_TRACEEXEC)
+		child->ptrace |= PT_TRACE_EXEC;
+
+	if (data & PTRACE_O_TRACEVFORKDONE)
+		child->ptrace |= PT_TRACE_VFORK_DONE;
+
+	if (data & PTRACE_O_TRACEEXIT)
+		child->ptrace |= PT_TRACE_EXIT;
+
+	return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
+}
+
+static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
+{
+	siginfo_t lastinfo;
+	int error = -ESRCH;
+
+	read_lock(&tasklist_lock);
+	if (likely(child->sighand != NULL)) {
+		error = -EINVAL;
+		spin_lock_irq(&child->sighand->siglock);
+		if (likely(child->last_siginfo != NULL)) {
+			lastinfo = *child->last_siginfo;
+			error = 0;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
+	if (!error)
+		return copy_siginfo_to_user(data, &lastinfo);
+	return error;
+}
+
+static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
+{
+	siginfo_t newinfo;
+	int error = -ESRCH;
+
+	if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
+		return -EFAULT;
+
+	read_lock(&tasklist_lock);
+	if (likely(child->sighand != NULL)) {
+		error = -EINVAL;
+		spin_lock_irq(&child->sighand->siglock);
+		if (likely(child->last_siginfo != NULL)) {
+			*child->last_siginfo = newinfo;
+			error = 0;
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+	}
+	read_unlock(&tasklist_lock);
+	return error;
+}
+
+int ptrace_request(struct task_struct *child, long request,
+		   long addr, long data)
+{
+	int ret = -EIO;
+
+	switch (request) {
+#ifdef PTRACE_OLDSETOPTIONS
+	case PTRACE_OLDSETOPTIONS:
+#endif
+	case PTRACE_SETOPTIONS:
+		ret = ptrace_setoptions(child, data);
+		break;
+	case PTRACE_GETEVENTMSG:
+		ret = put_user(child->ptrace_message, (unsigned long __user *) data);
+		break;
+	case PTRACE_GETSIGINFO:
+		ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
+		break;
+	case PTRACE_SETSIGINFO:
+		ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -0,0 +1,470 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2001
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *	    Manfred Spraul <manfred@colorfullife.com>
+ * 
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/cpu.h>
+
+/* Definition for rcupdate control block. */
+struct rcu_ctrlblk rcu_ctrlblk = 
+	{ .cur = -300, .completed = -300 };
+struct rcu_ctrlblk rcu_bh_ctrlblk =
+	{ .cur = -300, .completed = -300 };
+
+/* Bookkeeping of the progress of the grace period */
+struct rcu_state {
+	spinlock_t	lock; /* Guard this struct and writes to rcu_ctrlblk */
+	cpumask_t	cpumask; /* CPUs that need to switch in order    */
+	                              /* for current batch to proceed.        */
+};
+
+static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
+	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
+static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
+	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
+
+DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
+DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
+
+/* Fake initialization required by compiler */
+static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
+static int maxbatch = 10;
+
+/**
+ * call_rcu - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ *
+ * The update function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void fastcall call_rcu(struct rcu_head *head,
+				void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	rdp = &__get_cpu_var(rcu_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+	local_irq_restore(flags);
+}
+
+/**
+ * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ *
+ * The update function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by rcu_read_lock() and
+ * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
+ * and rcu_read_unlock_bh(), if in process context. These may be nested.
+ */
+void fastcall call_rcu_bh(struct rcu_head *head,
+				void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	rdp = &__get_cpu_var(rcu_bh_data);
+	*rdp->nxttail = head;
+	rdp->nxttail = &head->next;
+	local_irq_restore(flags);
+}
+
+/*
+ * Invoke the completed RCU callbacks. They are expected to be in
+ * a per-cpu list.
+ */
+static void rcu_do_batch(struct rcu_data *rdp)
+{
+	struct rcu_head *next, *list;
+	int count = 0;
+
+	list = rdp->donelist;
+	while (list) {
+		next = rdp->donelist = list->next;
+		list->func(list);
+		list = next;
+		if (++count >= maxbatch)
+			break;
+	}
+	if (!rdp->donelist)
+		rdp->donetail = &rdp->donelist;
+	else
+		tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
+}
+
+/*
+ * Grace period handling:
+ * The grace period handling consists out of two steps:
+ * - A new grace period is started.
+ *   This is done by rcu_start_batch. The start is not broadcasted to
+ *   all cpus, they must pick this up by comparing rcp->cur with
+ *   rdp->quiescbatch. All cpus are recorded  in the
+ *   rcu_state.cpumask bitmap.
+ * - All cpus must go through a quiescent state.
+ *   Since the start of the grace period is not broadcasted, at least two
+ *   calls to rcu_check_quiescent_state are required:
+ *   The first call just notices that a new grace period is running. The
+ *   following calls check if there was a quiescent state since the beginning
+ *   of the grace period. If so, it updates rcu_state.cpumask. If
+ *   the bitmap is empty, then the grace period is completed.
+ *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
+ *   period (if necessary).
+ */
+/*
+ * Register a new batch of callbacks, and start it up if there is currently no
+ * active batch and the batch to be registered has not already occurred.
+ * Caller must hold rcu_state.lock.
+ */
+static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+				int next_pending)
+{
+	if (next_pending)
+		rcp->next_pending = 1;
+
+	if (rcp->next_pending &&
+			rcp->completed == rcp->cur) {
+		/* Can't change, since spin lock held. */
+		cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
+
+		rcp->next_pending = 0;
+		/* next_pending == 0 must be visible in __rcu_process_callbacks()
+		 * before it can see new value of cur.
+		 */
+		smp_wmb();
+		rcp->cur++;
+	}
+}
+
+/*
+ * cpu went through a quiescent state since the beginning of the grace period.
+ * Clear it from the cpu mask and complete the grace period if it was the last
+ * cpu. Start another grace period if someone has further entries pending
+ */
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+{
+	cpu_clear(cpu, rsp->cpumask);
+	if (cpus_empty(rsp->cpumask)) {
+		/* batch completed ! */
+		rcp->completed = rcp->cur;
+		rcu_start_batch(rcp, rsp, 0);
+	}
+}
+
+/*
+ * Check if the cpu has gone through a quiescent state (say context
+ * switch). If so and if it already hasn't done so in this RCU
+ * quiescent cycle, then indicate that it has done so.
+ */
+static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
+			struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	if (rdp->quiescbatch != rcp->cur) {
+		/* start new grace period: */
+		rdp->qs_pending = 1;
+		rdp->passed_quiesc = 0;
+		rdp->quiescbatch = rcp->cur;
+		return;
+	}
+
+	/* Grace period already completed for this cpu?
+	 * qs_pending is checked instead of the actual bitmap to avoid
+	 * cacheline trashing.
+	 */
+	if (!rdp->qs_pending)
+		return;
+
+	/* 
+	 * Was there a quiescent state since the beginning of the grace
+	 * period? If no, then exit and wait for the next call.
+	 */
+	if (!rdp->passed_quiesc)
+		return;
+	rdp->qs_pending = 0;
+
+	spin_lock(&rsp->lock);
+	/*
+	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
+	 * during cpu startup. Ignore the quiescent state.
+	 */
+	if (likely(rdp->quiescbatch == rcp->cur))
+		cpu_quiet(rdp->cpu, rcp, rsp);
+
+	spin_unlock(&rsp->lock);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
+ * locking requirements, the list it's pulling from has to belong to a cpu
+ * which is dead and hence not processing interrupts.
+ */
+static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
+				struct rcu_head **tail)
+{
+	local_irq_disable();
+	*this_rdp->nxttail = list;
+	if (list)
+		this_rdp->nxttail = tail;
+	local_irq_enable();
+}
+
+static void __rcu_offline_cpu(struct rcu_data *this_rdp,
+	struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	/* if the cpu going offline owns the grace period
+	 * we can block indefinitely waiting for it, so flush
+	 * it here
+	 */
+	spin_lock_bh(&rsp->lock);
+	if (rcp->cur != rcp->completed)
+		cpu_quiet(rdp->cpu, rcp, rsp);
+	spin_unlock_bh(&rsp->lock);
+	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
+	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+
+}
+static void rcu_offline_cpu(int cpu)
+{
+	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
+	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
+
+	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
+					&per_cpu(rcu_data, cpu));
+	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
+					&per_cpu(rcu_bh_data, cpu));
+	put_cpu_var(rcu_data);
+	put_cpu_var(rcu_bh_data);
+	tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
+}
+
+#else
+
+static void rcu_offline_cpu(int cpu)
+{
+}
+
+#endif
+
+/*
+ * This does the RCU processing work from tasklet context. 
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
+			struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
+		*rdp->donetail = rdp->curlist;
+		rdp->donetail = rdp->curtail;
+		rdp->curlist = NULL;
+		rdp->curtail = &rdp->curlist;
+	}
+
+	local_irq_disable();
+	if (rdp->nxtlist && !rdp->curlist) {
+		rdp->curlist = rdp->nxtlist;
+		rdp->curtail = rdp->nxttail;
+		rdp->nxtlist = NULL;
+		rdp->nxttail = &rdp->nxtlist;
+		local_irq_enable();
+
+		/*
+		 * start the next batch of callbacks
+		 */
+
+		/* determine batch number */
+		rdp->batch = rcp->cur + 1;
+		/* see the comment and corresponding wmb() in
+		 * the rcu_start_batch()
+		 */
+		smp_rmb();
+
+		if (!rcp->next_pending) {
+			/* and start it/schedule start if it's a new batch */
+			spin_lock(&rsp->lock);
+			rcu_start_batch(rcp, rsp, 1);
+			spin_unlock(&rsp->lock);
+		}
+	} else {
+		local_irq_enable();
+	}
+	rcu_check_quiescent_state(rcp, rsp, rdp);
+	if (rdp->donelist)
+		rcu_do_batch(rdp);
+}
+
+static void rcu_process_callbacks(unsigned long unused)
+{
+	__rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
+				&__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
+				&__get_cpu_var(rcu_bh_data));
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user || 
+	    (idle_cpu(cpu) && !in_softirq() && 
+				hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+		rcu_qsctr_inc(cpu);
+		rcu_bh_qsctr_inc(cpu);
+	} else if (!in_softirq())
+		rcu_bh_qsctr_inc(cpu);
+	tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
+}
+
+static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
+						struct rcu_data *rdp)
+{
+	memset(rdp, 0, sizeof(*rdp));
+	rdp->curtail = &rdp->curlist;
+	rdp->nxttail = &rdp->nxtlist;
+	rdp->donetail = &rdp->donelist;
+	rdp->quiescbatch = rcp->completed;
+	rdp->qs_pending = 0;
+	rdp->cpu = cpu;
+}
+
+static void __devinit rcu_online_cpu(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
+
+	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
+	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
+	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
+}
+
+static int __devinit rcu_cpu_notify(struct notifier_block *self, 
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		rcu_online_cpu(cpu);
+		break;
+	case CPU_DEAD:
+		rcu_offline_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata rcu_nb = {
+	.notifier_call	= rcu_cpu_notify,
+};
+
+/*
+ * Initializes rcu mechanism.  Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ * Note that rcu_qsctr and friends are implicitly
+ * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
+ */
+void __init rcu_init(void)
+{
+	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	/* Register notifier for non-boot CPUs */
+	register_cpu_notifier(&rcu_nb);
+}
+
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+
+/* Because of FASTCALL declaration of complete, we use this wrapper */
+static void wakeme_after_rcu(struct rcu_head  *head)
+{
+	struct rcu_synchronize *rcu;
+
+	rcu = container_of(head, struct rcu_synchronize, head);
+	complete(&rcu->completion);
+}
+
+/**
+ * synchronize_kernel - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_kernel(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+
+	/* Wait for it */
+	wait_for_completion(&rcu.completion);
+}
+
+module_param(maxbatch, int, 0);
+EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+EXPORT_SYMBOL_GPL(synchronize_kernel);
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -0,0 +1,551 @@
+/*
+ *	linux/kernel/resource.c
+ *
+ * Copyright (C) 1999	Linus Torvalds
+ * Copyright (C) 1999	Martin Mares <mj@ucw.cz>
+ *
+ * Arbitrary resource management.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/io.h>
+
+
+struct resource ioport_resource = {
+	.name	= "PCI IO",
+	.start	= 0x0000,
+	.end	= IO_SPACE_LIMIT,
+	.flags	= IORESOURCE_IO,
+};
+
+EXPORT_SYMBOL(ioport_resource);
+
+struct resource iomem_resource = {
+	.name	= "PCI mem",
+	.start	= 0UL,
+	.end	= ~0UL,
+	.flags	= IORESOURCE_MEM,
+};
+
+EXPORT_SYMBOL(iomem_resource);
+
+static DEFINE_RWLOCK(resource_lock);
+
+#ifdef CONFIG_PROC_FS
+
+enum { MAX_IORES_LEVEL = 5 };
+
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct resource *p = v;
+	(*pos)++;
+	if (p->child)
+		return p->child;
+	while (!p->sibling && p->parent)
+		p = p->parent;
+	return p->sibling;
+}
+
+static void *r_start(struct seq_file *m, loff_t *pos)
+	__acquires(resource_lock)
+{
+	struct resource *p = m->private;
+	loff_t l = 0;
+	read_lock(&resource_lock);
+	for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
+		;
+	return p;
+}
+
+static void r_stop(struct seq_file *m, void *v)
+	__releases(resource_lock)
+{
+	read_unlock(&resource_lock);
+}
+
+static int r_show(struct seq_file *m, void *v)
+{
+	struct resource *root = m->private;
+	struct resource *r = v, *p;
+	int width = root->end < 0x10000 ? 4 : 8;
+	int depth;
+
+	for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
+		if (p->parent == root)
+			break;
+	seq_printf(m, "%*s%0*lx-%0*lx : %s\n",
+			depth * 2, "",
+			width, r->start,
+			width, r->end,
+			r->name ? r->name : "<BAD>");
+	return 0;
+}
+
+static struct seq_operations resource_op = {
+	.start	= r_start,
+	.next	= r_next,
+	.stop	= r_stop,
+	.show	= r_show,
+};
+
+static int ioports_open(struct inode *inode, struct file *file)
+{
+	int res = seq_open(file, &resource_op);
+	if (!res) {
+		struct seq_file *m = file->private_data;
+		m->private = &ioport_resource;
+	}
+	return res;
+}
+
+static int iomem_open(struct inode *inode, struct file *file)
+{
+	int res = seq_open(file, &resource_op);
+	if (!res) {
+		struct seq_file *m = file->private_data;
+		m->private = &iomem_resource;
+	}
+	return res;
+}
+
+static struct file_operations proc_ioports_operations = {
+	.open		= ioports_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct file_operations proc_iomem_operations = {
+	.open		= iomem_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init ioresources_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("ioports", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_ioports_operations;
+	entry = create_proc_entry("iomem", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_iomem_operations;
+	return 0;
+}
+__initcall(ioresources_init);
+
+#endif /* CONFIG_PROC_FS */
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
+{
+	unsigned long start = new->start;
+	unsigned long end = new->end;
+	struct resource *tmp, **p;
+
+	if (end < start)
+		return root;
+	if (start < root->start)
+		return root;
+	if (end > root->end)
+		return root;
+	p = &root->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp || tmp->start > end) {
+			new->sibling = tmp;
+			*p = new;
+			new->parent = root;
+			return NULL;
+		}
+		p = &tmp->sibling;
+		if (tmp->end < start)
+			continue;
+		return tmp;
+	}
+}
+
+static int __release_resource(struct resource *old)
+{
+	struct resource *tmp, **p;
+
+	p = &old->parent->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp)
+			break;
+		if (tmp == old) {
+			*p = tmp->sibling;
+			old->parent = NULL;
+			return 0;
+		}
+		p = &tmp->sibling;
+	}
+	return -EINVAL;
+}
+
+int request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, new);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+EXPORT_SYMBOL(request_resource);
+
+struct resource *____request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, new);
+	write_unlock(&resource_lock);
+	return conflict;
+}
+
+EXPORT_SYMBOL(____request_resource);
+
+int release_resource(struct resource *old)
+{
+	int retval;
+
+	write_lock(&resource_lock);
+	retval = __release_resource(old);
+	write_unlock(&resource_lock);
+	return retval;
+}
+
+EXPORT_SYMBOL(release_resource);
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ */
+static int find_resource(struct resource *root, struct resource *new,
+			 unsigned long size,
+			 unsigned long min, unsigned long max,
+			 unsigned long align,
+			 void (*alignf)(void *, struct resource *,
+					unsigned long, unsigned long),
+			 void *alignf_data)
+{
+	struct resource *this = root->child;
+
+	new->start = root->start;
+	/*
+	 * Skip past an allocated resource that starts at 0, since the assignment
+	 * of this->start - 1 to new->end below would cause an underflow.
+	 */
+	if (this && this->start == 0) {
+		new->start = this->end + 1;
+		this = this->sibling;
+	}
+	for(;;) {
+		if (this)
+			new->end = this->start - 1;
+		else
+			new->end = root->end;
+		if (new->start < min)
+			new->start = min;
+		if (new->end > max)
+			new->end = max;
+		new->start = (new->start + align - 1) & ~(align - 1);
+		if (alignf)
+			alignf(alignf_data, new, size, align);
+		if (new->start < new->end && new->end - new->start + 1 >= size) {
+			new->end = new->start + size - 1;
+			return 0;
+		}
+		if (!this)
+			break;
+		new->start = this->end + 1;
+		this = this->sibling;
+	}
+	return -EBUSY;
+}
+
+/*
+ * Allocate empty slot in the resource tree given range and alignment.
+ */
+int allocate_resource(struct resource *root, struct resource *new,
+		      unsigned long size,
+		      unsigned long min, unsigned long max,
+		      unsigned long align,
+		      void (*alignf)(void *, struct resource *,
+				     unsigned long, unsigned long),
+		      void *alignf_data)
+{
+	int err;
+
+	write_lock(&resource_lock);
+	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+	if (err >= 0 && __request_resource(root, new))
+		err = -EBUSY;
+	write_unlock(&resource_lock);
+	return err;
+}
+
+EXPORT_SYMBOL(allocate_resource);
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ *
+ * This function is equivalent of request_resource when no conflict
+ * happens. If a conflict happens, and the conflicting resources
+ * entirely fit within the range of the new resource, then the new
+ * resource is inserted and the conflicting resources become childs of
+ * the new resource.  Otherwise the new resource becomes the child of
+ * the conflicting resource
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+	int result;
+	struct resource *first, *next;
+
+	write_lock(&resource_lock);
+ begin:
+ 	result = 0;
+	first = __request_resource(parent, new);
+	if (!first)
+		goto out;
+
+	result = -EBUSY;
+	if (first == parent)
+		goto out;
+
+	/* Resource fully contained by the clashing resource? Recurse into it */
+	if (first->start <= new->start && first->end >= new->end) {
+		parent = first;
+		goto begin;
+	}
+
+	for (next = first; ; next = next->sibling) {
+		/* Partial overlap? Bad, and unfixable */
+		if (next->start < new->start || next->end > new->end)
+			goto out;
+		if (!next->sibling)
+			break;
+		if (next->sibling->start > new->end)
+			break;
+	}
+
+	result = 0;
+
+	new->parent = parent;
+	new->sibling = next->sibling;
+	new->child = first;
+
+	next->sibling = NULL;
+	for (next = first; next; next = next->sibling)
+		next->parent = new;
+
+	if (parent->child == first) {
+		parent->child = new;
+	} else {
+		next = parent->child;
+		while (next->sibling != first)
+			next = next->sibling;
+		next->sibling = new;
+	}
+
+ out:
+	write_unlock(&resource_lock);
+	return result;
+}
+
+EXPORT_SYMBOL(insert_resource);
+
+/*
+ * Given an existing resource, change its start and size to match the
+ * arguments.  Returns -EBUSY if it can't fit.  Existing children of
+ * the resource are assumed to be immutable.
+ */
+int adjust_resource(struct resource *res, unsigned long start, unsigned long size)
+{
+	struct resource *tmp, *parent = res->parent;
+	unsigned long end = start + size - 1;
+	int result = -EBUSY;
+
+	write_lock(&resource_lock);
+
+	if ((start < parent->start) || (end > parent->end))
+		goto out;
+
+	for (tmp = res->child; tmp; tmp = tmp->sibling) {
+		if ((tmp->start < start) || (tmp->end > end))
+			goto out;
+	}
+
+	if (res->sibling && (res->sibling->start <= end))
+		goto out;
+
+	tmp = parent->child;
+	if (tmp != res) {
+		while (tmp->sibling != res)
+			tmp = tmp->sibling;
+		if (start <= tmp->end)
+			goto out;
+	}
+
+	res->start = start;
+	res->end = end;
+	result = 0;
+
+ out:
+	write_unlock(&resource_lock);
+	return result;
+}
+
+EXPORT_SYMBOL(adjust_resource);
+
+/*
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
+ */
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+{
+	struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+
+	if (res) {
+		memset(res, 0, sizeof(*res));
+		res->name = name;
+		res->start = start;
+		res->end = start + n - 1;
+		res->flags = IORESOURCE_BUSY;
+
+		write_lock(&resource_lock);
+
+		for (;;) {
+			struct resource *conflict;
+
+			conflict = __request_resource(parent, res);
+			if (!conflict)
+				break;
+			if (conflict != parent) {
+				parent = conflict;
+				if (!(conflict->flags & IORESOURCE_BUSY))
+					continue;
+			}
+
+			/* Uhhuh, that didn't work out.. */
+			kfree(res);
+			res = NULL;
+			break;
+		}
+		write_unlock(&resource_lock);
+	}
+	return res;
+}
+
+EXPORT_SYMBOL(__request_region);
+
+int __deprecated __check_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource * res;
+
+	res = __request_region(parent, start, n, "check-region");
+	if (!res)
+		return -EBUSY;
+
+	release_resource(res);
+	kfree(res);
+	return 0;
+}
+
+EXPORT_SYMBOL(__check_region);
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource **p;
+	unsigned long end;
+
+	p = &parent->child;
+	end = start + n - 1;
+
+	write_lock(&resource_lock);
+
+	for (;;) {
+		struct resource *res = *p;
+
+		if (!res)
+			break;
+		if (res->start <= start && res->end >= end) {
+			if (!(res->flags & IORESOURCE_BUSY)) {
+				p = &res->child;
+				continue;
+			}
+			if (res->start != start || res->end != end)
+				break;
+			*p = res->sibling;
+			write_unlock(&resource_lock);
+			kfree(res);
+			return;
+		}
+		p = &res->sibling;
+	}
+
+	write_unlock(&resource_lock);
+
+	printk(KERN_WARNING "Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+}
+
+EXPORT_SYMBOL(__release_region);
+
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
+{
+	static int reserved;
+	static struct resource reserve[MAXRESERVE];
+
+	for (;;) {
+		int io_start, io_num;
+		int x = reserved;
+
+		if (get_option (&str, &io_start) != 2)
+			break;
+		if (get_option (&str, &io_num)   == 0)
+			break;
+		if (x < MAXRESERVE) {
+			struct resource *res = reserve + x;
+			res->name = "reserved";
+			res->start = io_start;
+			res->end = io_start + io_num - 1;
+			res->flags = IORESOURCE_BUSY;
+			res->child = NULL;
+			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+				reserved = x+1;
+		}
+	}
+	return 1;
+}
+
+__setup("reserve=", reserve_setup);
--- a/kernel/sched.c
+++ b/kernel/sched.c
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -0,0 +1,56 @@
+/*
+ * linux/kernel/seccomp.c
+ *
+ * Copyright 2004-2005  Andrea Arcangeli <andrea@cpushare.com>
+ *
+ * This defines a simple but solid secure-computing mode.
+ */
+
+#include <linux/seccomp.h>
+#include <linux/sched.h>
+
+/* #define SECCOMP_DEBUG 1 */
+
+/*
+ * Secure computing mode 1 allows only read/write/exit/sigreturn.
+ * To be fully secure this must be combined with rlimit
+ * to limit the stack allocations too.
+ */
+static int mode1_syscalls[] = {
+	__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
+	0, /* null terminated */
+};
+
+#ifdef TIF_32BIT
+static int mode1_syscalls_32[] = {
+	__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
+	0, /* null terminated */
+};
+#endif
+
+void __secure_computing(int this_syscall)
+{
+	int mode = current->seccomp.mode;
+	int * syscall;
+
+	switch (mode) {
+	case 1:
+		syscall = mode1_syscalls;
+#ifdef TIF_32BIT
+		if (test_thread_flag(TIF_32BIT))
+			syscall = mode1_syscalls_32;
+#endif
+		do {
+			if (*syscall == this_syscall)
+				return;
+		} while (*++syscall);
+		break;
+	default:
+		BUG();
+	}
+
+#ifdef SECCOMP_DEBUG
+	dump_stack();
+#endif
+	do_exit(SIGKILL);
+}
--- a/kernel/signal.c
+++ b/kernel/signal.c
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -0,0 +1,496 @@
+/*
+ *	linux/kernel/softirq.c
+ *
+ *	Copyright (C) 1992 Linus Torvalds
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <linux/rcupdate.h>
+
+#include <asm/irq.h>
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+ */
+
+#ifndef __ARCH_IRQ_STAT
+irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
+EXPORT_SYMBOL(irq_stat);
+#endif
+
+static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+
+static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+
+/*
+ * we cannot loop indefinitely here to avoid userspace starvation,
+ * but we also don't want to introduce a worst case 1/HZ latency
+ * to the pending events, so lets the scheduler to balance
+ * the softirq load for us.
+ */
+static inline void wakeup_softirqd(void)
+{
+	/* Interrupts are disabled: no need to stop preemption */
+	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
+
+	if (tsk && tsk->state != TASK_RUNNING)
+		wake_up_process(tsk);
+}
+
+/*
+ * We restart softirq processing MAX_SOFTIRQ_RESTART times,
+ * and we fall back to softirqd after that.
+ *
+ * This number has been established via experimentation.
+ * The two things to balance is latency against fairness -
+ * we want to handle softirqs as soon as possible, but they
+ * should not be able to lock up the box.
+ */
+#define MAX_SOFTIRQ_RESTART 10
+
+asmlinkage void __do_softirq(void)
+{
+	struct softirq_action *h;
+	__u32 pending;
+	int max_restart = MAX_SOFTIRQ_RESTART;
+	int cpu;
+
+	pending = local_softirq_pending();
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+restart:
+	/* Reset the pending bitmask before enabling irqs */
+	local_softirq_pending() = 0;
+
+	local_irq_enable();
+
+	h = softirq_vec;
+
+	do {
+		if (pending & 1) {
+			h->action(h);
+			rcu_bh_qsctr_inc(cpu);
+		}
+		h++;
+		pending >>= 1;
+	} while (pending);
+
+	local_irq_disable();
+
+	pending = local_softirq_pending();
+	if (pending && --max_restart)
+		goto restart;
+
+	if (pending)
+		wakeup_softirqd();
+
+	__local_bh_enable();
+}
+
+#ifndef __ARCH_HAS_DO_SOFTIRQ
+
+asmlinkage void do_softirq(void)
+{
+	__u32 pending;
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	pending = local_softirq_pending();
+
+	if (pending)
+		__do_softirq();
+
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+
+#endif
+
+void local_bh_enable(void)
+{
+	WARN_ON(irqs_disabled());
+	/*
+	 * Keep preemption disabled until we are done with
+	 * softirq processing:
+ 	 */
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+	if (unlikely(!in_interrupt() && local_softirq_pending()))
+		do_softirq();
+
+	dec_preempt_count();
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable);
+
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+# define invoke_softirq()	__do_softirq()
+#else
+# define invoke_softirq()	do_softirq()
+#endif
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+	account_system_vtime(current);
+	sub_preempt_count(IRQ_EXIT_OFFSET);
+	if (!in_interrupt() && local_softirq_pending())
+		invoke_softirq();
+	preempt_enable_no_resched();
+}
+
+/*
+ * This function must run with irqs disabled!
+ */
+inline fastcall void raise_softirq_irqoff(unsigned int nr)
+{
+	__raise_softirq_irqoff(nr);
+
+	/*
+	 * If we're in an interrupt or softirq, we're done
+	 * (this also catches softirq-disabled code). We will
+	 * actually run the softirq once we return from
+	 * the irq or softirq.
+	 *
+	 * Otherwise we wake up ksoftirqd to make sure we
+	 * schedule the softirq soon.
+	 */
+	if (!in_interrupt())
+		wakeup_softirqd();
+}
+
+EXPORT_SYMBOL(raise_softirq_irqoff);
+
+void fastcall raise_softirq(unsigned int nr)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	raise_softirq_irqoff(nr);
+	local_irq_restore(flags);
+}
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+	softirq_vec[nr].data = data;
+	softirq_vec[nr].action = action;
+}
+
+EXPORT_SYMBOL(open_softirq);
+
+/* Tasklets */
+struct tasklet_head
+{
+	struct tasklet_struct *list;
+};
+
+/* Some compilers disobey section attribute on statics when not
+   initialized -- RR */
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
+
+void fastcall __tasklet_schedule(struct tasklet_struct *t)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	t->next = __get_cpu_var(tasklet_vec).list;
+	__get_cpu_var(tasklet_vec).list = t;
+	raise_softirq_irqoff(TASKLET_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__tasklet_schedule);
+
+void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	t->next = __get_cpu_var(tasklet_hi_vec).list;
+	__get_cpu_var(tasklet_hi_vec).list = t;
+	raise_softirq_irqoff(HI_SOFTIRQ);
+	local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(__tasklet_hi_schedule);
+
+static void tasklet_action(struct softirq_action *a)
+{
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = __get_cpu_var(tasklet_vec).list;
+	__get_cpu_var(tasklet_vec).list = NULL;
+	local_irq_enable();
+
+	while (list) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (!atomic_read(&t->count)) {
+				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+					BUG();
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+
+		local_irq_disable();
+		t->next = __get_cpu_var(tasklet_vec).list;
+		__get_cpu_var(tasklet_vec).list = t;
+		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = __get_cpu_var(tasklet_hi_vec).list;
+	__get_cpu_var(tasklet_hi_vec).list = NULL;
+	local_irq_enable();
+
+	while (list) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (!atomic_read(&t->count)) {
+				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+					BUG();
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+
+		local_irq_disable();
+		t->next = __get_cpu_var(tasklet_hi_vec).list;
+		__get_cpu_var(tasklet_hi_vec).list = t;
+		__raise_softirq_irqoff(HI_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+		  void (*func)(unsigned long), unsigned long data)
+{
+	t->next = NULL;
+	t->state = 0;
+	atomic_set(&t->count, 0);
+	t->func = func;
+	t->data = data;
+}
+
+EXPORT_SYMBOL(tasklet_init);
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+	if (in_interrupt())
+		printk("Attempt to kill tasklet from interrupt\n");
+
+	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		do
+			yield();
+		while (test_bit(TASKLET_STATE_SCHED, &t->state));
+	}
+	tasklet_unlock_wait(t);
+	clear_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
+EXPORT_SYMBOL(tasklet_kill);
+
+void __init softirq_init(void)
+{
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+static int ksoftirqd(void * __bind_cpu)
+{
+	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	while (!kthread_should_stop()) {
+		preempt_disable();
+		if (!local_softirq_pending()) {
+			preempt_enable_no_resched();
+			schedule();
+			preempt_disable();
+		}
+
+		__set_current_state(TASK_RUNNING);
+
+		while (local_softirq_pending()) {
+			/* Preempt disable stops cpu going offline.
+			   If already offline, we'll be on wrong CPU:
+			   don't process */
+			if (cpu_is_offline((long)__bind_cpu))
+				goto wait_to_die;
+			do_softirq();
+			preempt_enable_no_resched();
+			cond_resched();
+			preempt_disable();
+		}
+		preempt_enable();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+
+wait_to_die:
+	preempt_enable();
+	/* Wait for kthread_stop */
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * tasklet_kill_immediate is called to remove a tasklet which can already be
+ * scheduled for execution on @cpu.
+ *
+ * Unlike tasklet_kill, this function removes the tasklet
+ * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
+ *
+ * When this function is called, @cpu must be in the CPU_DEAD state.
+ */
+void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
+{
+	struct tasklet_struct **i;
+
+	BUG_ON(cpu_online(cpu));
+	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
+
+	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
+		return;
+
+	/* CPU is dead, so no lock needed. */
+	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
+		if (*i == t) {
+			*i = t->next;
+			return;
+		}
+	}
+	BUG();
+}
+
+static void takeover_tasklets(unsigned int cpu)
+{
+	struct tasklet_struct **i;
+
+	/* CPU is dead, so no lock needed. */
+	local_irq_disable();
+
+	/* Find end, append list for that CPU. */
+	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
+	*i = per_cpu(tasklet_vec, cpu).list;
+	per_cpu(tasklet_vec, cpu).list = NULL;
+	raise_softirq_irqoff(TASKLET_SOFTIRQ);
+
+	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
+	*i = per_cpu(tasklet_hi_vec, cpu).list;
+	per_cpu(tasklet_hi_vec, cpu).list = NULL;
+	raise_softirq_irqoff(HI_SOFTIRQ);
+
+	local_irq_enable();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit cpu_callback(struct notifier_block *nfb,
+				  unsigned long action,
+				  void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct task_struct *p;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
+		BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
+		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+		if (IS_ERR(p)) {
+			printk("ksoftirqd for %i failed\n", hotcpu);
+			return NOTIFY_BAD;
+		}
+		kthread_bind(p, hotcpu);
+  		per_cpu(ksoftirqd, hotcpu) = p;
+ 		break;
+	case CPU_ONLINE:
+		wake_up_process(per_cpu(ksoftirqd, hotcpu));
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+		/* Unbind so it can run.  Fall thru. */
+		kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
+	case CPU_DEAD:
+		p = per_cpu(ksoftirqd, hotcpu);
+		per_cpu(ksoftirqd, hotcpu) = NULL;
+		kthread_stop(p);
+		takeover_tasklets(hotcpu);
+		break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ 	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata cpu_nfb = {
+	.notifier_call = cpu_callback
+};
+
+__init int spawn_ksoftirqd(void)
+{
+	void *cpu = (void *)(long)smp_processor_id();
+	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+	register_cpu_notifier(&cpu_nfb);
+	return 0;
+}
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (2004) Linus Torvalds
+ *
+ * Author: Zwane Mwaikambo <zwane@fsmlabs.com>
+ *
+ * Copyright (2004) Ingo Molnar
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+/*
+ * Generic declaration of the raw read_trylock() function,
+ * architectures are supposed to optimize this:
+ */
+int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
+{
+	_raw_read_lock(lock);
+	return 1;
+}
+EXPORT_SYMBOL(generic_raw_read_trylock);
+
+int __lockfunc _spin_trylock(spinlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_spin_trylock(lock))
+		return 1;
+	
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL(_spin_trylock);
+
+int __lockfunc _read_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_read_trylock(lock))
+		return 1;
+
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL(_read_trylock);
+
+int __lockfunc _write_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_write_trylock(lock))
+		return 1;
+
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL(_write_trylock);
+
+#ifndef CONFIG_PREEMPT
+
+void __lockfunc _read_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	_raw_read_lock(lock);
+}
+EXPORT_SYMBOL(_read_lock);
+
+unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	_raw_spin_lock_flags(lock, flags);
+	return flags;
+}
+EXPORT_SYMBOL(_spin_lock_irqsave);
+
+void __lockfunc _spin_lock_irq(spinlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	_raw_spin_lock(lock);
+}
+EXPORT_SYMBOL(_spin_lock_irq);
+
+void __lockfunc _spin_lock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	_raw_spin_lock(lock);
+}
+EXPORT_SYMBOL(_spin_lock_bh);
+
+unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	_raw_read_lock(lock);
+	return flags;
+}
+EXPORT_SYMBOL(_read_lock_irqsave);
+
+void __lockfunc _read_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	_raw_read_lock(lock);
+}
+EXPORT_SYMBOL(_read_lock_irq);
+
+void __lockfunc _read_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	_raw_read_lock(lock);
+}
+EXPORT_SYMBOL(_read_lock_bh);
+
+unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	_raw_write_lock(lock);
+	return flags;
+}
+EXPORT_SYMBOL(_write_lock_irqsave);
+
+void __lockfunc _write_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	_raw_write_lock(lock);
+}
+EXPORT_SYMBOL(_write_lock_irq);
+
+void __lockfunc _write_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	_raw_write_lock(lock);
+}
+EXPORT_SYMBOL(_write_lock_bh);
+
+void __lockfunc _spin_lock(spinlock_t *lock)
+{
+	preempt_disable();
+	_raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock);
+
+void __lockfunc _write_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	_raw_write_lock(lock);
+}
+
+EXPORT_SYMBOL(_write_lock);
+
+#else /* CONFIG_PREEMPT: */
+
+/*
+ * This could be a long-held lock. We both prepare to spin for a long
+ * time (making _this_ CPU preemptable if possible), and we also signal
+ * towards that other CPU that it should break the lock ASAP.
+ *
+ * (We do this in a function because inlining it would be excessive.)
+ */
+
+#define BUILD_LOCK_OPS(op, locktype)					\
+void __lockfunc _##op##_lock(locktype##_t *lock)			\
+{									\
+	preempt_disable();						\
+	for (;;) {							\
+		if (likely(_raw_##op##_trylock(lock)))			\
+			break;						\
+		preempt_enable();					\
+		if (!(lock)->break_lock)				\
+			(lock)->break_lock = 1;				\
+		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+			cpu_relax();					\
+		preempt_disable();					\
+	}								\
+	(lock)->break_lock = 0;						\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock);						\
+									\
+unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock)	\
+{									\
+	unsigned long flags;						\
+									\
+	preempt_disable();						\
+	for (;;) {							\
+		local_irq_save(flags);					\
+		if (likely(_raw_##op##_trylock(lock)))			\
+			break;						\
+		local_irq_restore(flags);				\
+									\
+		preempt_enable();					\
+		if (!(lock)->break_lock)				\
+			(lock)->break_lock = 1;				\
+		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+			cpu_relax();					\
+		preempt_disable();					\
+	}								\
+	(lock)->break_lock = 0;						\
+	return flags;							\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_irqsave);					\
+									\
+void __lockfunc _##op##_lock_irq(locktype##_t *lock)			\
+{									\
+	_##op##_lock_irqsave(lock);					\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_irq);					\
+									\
+void __lockfunc _##op##_lock_bh(locktype##_t *lock)			\
+{									\
+	unsigned long flags;						\
+									\
+	/*							*/	\
+	/* Careful: we must exclude softirqs too, hence the	*/	\
+	/* irq-disabling. We use the generic preemption-aware	*/	\
+	/* function:						*/	\
+	/**/								\
+	flags = _##op##_lock_irqsave(lock);				\
+	local_bh_disable();						\
+	local_irq_restore(flags);					\
+}									\
+									\
+EXPORT_SYMBOL(_##op##_lock_bh)
+
+/*
+ * Build preemption-friendly versions of the following
+ * lock-spinning functions:
+ *
+ *         _[spin|read|write]_lock()
+ *         _[spin|read|write]_lock_irq()
+ *         _[spin|read|write]_lock_irqsave()
+ *         _[spin|read|write]_lock_bh()
+ */
+BUILD_LOCK_OPS(spin, spinlock);
+BUILD_LOCK_OPS(read, rwlock);
+BUILD_LOCK_OPS(write, rwlock);
+
+#endif /* CONFIG_PREEMPT */
+
+void __lockfunc _spin_unlock(spinlock_t *lock)
+{
+	_raw_spin_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_spin_unlock);
+
+void __lockfunc _write_unlock(rwlock_t *lock)
+{
+	_raw_write_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_write_unlock);
+
+void __lockfunc _read_unlock(rwlock_t *lock)
+{
+	_raw_read_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_read_unlock);
+
+void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+{
+	_raw_spin_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_spin_unlock_irqrestore);
+
+void __lockfunc _spin_unlock_irq(spinlock_t *lock)
+{
+	_raw_spin_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(_spin_unlock_irq);
+
+void __lockfunc _spin_unlock_bh(spinlock_t *lock)
+{
+	_raw_spin_unlock(lock);
+	preempt_enable();
+	local_bh_enable();
+}
+EXPORT_SYMBOL(_spin_unlock_bh);
+
+void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+	_raw_read_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_read_unlock_irqrestore);
+
+void __lockfunc _read_unlock_irq(rwlock_t *lock)
+{
+	_raw_read_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(_read_unlock_irq);
+
+void __lockfunc _read_unlock_bh(rwlock_t *lock)
+{
+	_raw_read_unlock(lock);
+	preempt_enable();
+	local_bh_enable();
+}
+EXPORT_SYMBOL(_read_unlock_bh);
+
+void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+	_raw_write_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+EXPORT_SYMBOL(_write_unlock_irqrestore);
+
+void __lockfunc _write_unlock_irq(rwlock_t *lock)
+{
+	_raw_write_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(_write_unlock_irq);
+
+void __lockfunc _write_unlock_bh(rwlock_t *lock)
+{
+	_raw_write_unlock(lock);
+	preempt_enable();
+	local_bh_enable();
+}
+EXPORT_SYMBOL(_write_unlock_bh);
+
+int __lockfunc _spin_trylock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	if (_raw_spin_trylock(lock))
+		return 1;
+
+	preempt_enable();
+	local_bh_enable();
+	return 0;
+}
+EXPORT_SYMBOL(_spin_trylock_bh);
+
+int in_lock_functions(unsigned long addr)
+{
+	/* Linker adds these: start and end of __lockfunc functions */
+	extern char __lock_text_start[], __lock_text_end[];
+
+	return addr >= (unsigned long)__lock_text_start
+	&& addr < (unsigned long)__lock_text_end;
+}
+EXPORT_SYMBOL(in_lock_functions);
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -0,0 +1,212 @@
+#include <linux/stop_machine.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <linux/err.h>
+#include <linux/syscalls.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+
+/* Since we effect priority and affinity (both of which are visible
+ * to, and settable by outside processes) we do indirection via a
+ * kthread. */
+
+/* Thread to stop each CPU in user context. */
+enum stopmachine_state {
+	STOPMACHINE_WAIT,
+	STOPMACHINE_PREPARE,
+	STOPMACHINE_DISABLE_IRQ,
+	STOPMACHINE_EXIT,
+};
+
+static enum stopmachine_state stopmachine_state;
+static unsigned int stopmachine_num_threads;
+static atomic_t stopmachine_thread_ack;
+static DECLARE_MUTEX(stopmachine_mutex);
+
+static int stopmachine(void *cpu)
+{
+	int irqs_disabled = 0;
+	int prepared = 0;
+
+	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
+
+	/* Ack: we are alive */
+	mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
+	atomic_inc(&stopmachine_thread_ack);
+
+	/* Simple state machine */
+	while (stopmachine_state != STOPMACHINE_EXIT) {
+		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
+		    && !irqs_disabled) {
+			local_irq_disable();
+			irqs_disabled = 1;
+			/* Ack: irqs disabled. */
+			mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
+		} else if (stopmachine_state == STOPMACHINE_PREPARE
+			   && !prepared) {
+			/* Everyone is in place, hold CPU. */
+			preempt_disable();
+			prepared = 1;
+			mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
+		}
+		/* Yield in first stage: migration threads need to
+		 * help our sisters onto their CPUs. */
+		if (!prepared && !irqs_disabled)
+			yield();
+		else
+			cpu_relax();
+	}
+
+	/* Ack: we are exiting. */
+	mb(); /* Must read state first. */
+	atomic_inc(&stopmachine_thread_ack);
+
+	if (irqs_disabled)
+		local_irq_enable();
+	if (prepared)
+		preempt_enable();
+
+	return 0;
+}
+
+/* Change the thread state */
+static void stopmachine_set_state(enum stopmachine_state state)
+{
+	atomic_set(&stopmachine_thread_ack, 0);
+	wmb();
+	stopmachine_state = state;
+	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
+		cpu_relax();
+}
+
+static int stop_machine(void)
+{
+	int i, ret = 0;
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+	mm_segment_t old_fs = get_fs();
+
+	/* One high-prio thread per cpu.  We'll do this one. */
+	set_fs(KERNEL_DS);
+	sys_sched_setscheduler(current->pid, SCHED_FIFO,
+				(struct sched_param __user *)&param);
+	set_fs(old_fs);
+
+	atomic_set(&stopmachine_thread_ack, 0);
+	stopmachine_num_threads = 0;
+	stopmachine_state = STOPMACHINE_WAIT;
+
+	for_each_online_cpu(i) {
+		if (i == _smp_processor_id())
+			continue;
+		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+		if (ret < 0)
+			break;
+		stopmachine_num_threads++;
+	}
+
+	/* Wait for them all to come to life. */
+	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
+		yield();
+
+	/* If some failed, kill them all. */
+	if (ret < 0) {
+		stopmachine_set_state(STOPMACHINE_EXIT);
+		up(&stopmachine_mutex);
+		return ret;
+	}
+
+	/* Don't schedule us away at this point, please. */
+	local_irq_disable();
+
+	/* Now they are all started, make them hold the CPUs, ready. */
+	stopmachine_set_state(STOPMACHINE_PREPARE);
+
+	/* Make them disable irqs. */
+	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+
+	return 0;
+}
+
+static void restart_machine(void)
+{
+	stopmachine_set_state(STOPMACHINE_EXIT);
+	local_irq_enable();
+}
+
+struct stop_machine_data
+{
+	int (*fn)(void *);
+	void *data;
+	struct completion done;
+};
+
+static int do_stop(void *_smdata)
+{
+	struct stop_machine_data *smdata = _smdata;
+	int ret;
+
+	ret = stop_machine();
+	if (ret == 0) {
+		ret = smdata->fn(smdata->data);
+		restart_machine();
+	}
+
+	/* We're done: you can kthread_stop us now */
+	complete(&smdata->done);
+
+	/* Wait for kthread_stop */
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+	return ret;
+}
+
+struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
+				       unsigned int cpu)
+{
+	struct stop_machine_data smdata;
+	struct task_struct *p;
+
+	smdata.fn = fn;
+	smdata.data = data;
+	init_completion(&smdata.done);
+
+	down(&stopmachine_mutex);
+
+	/* If they don't care which CPU fn runs on, bind to any online one. */
+	if (cpu == NR_CPUS)
+		cpu = _smp_processor_id();
+
+	p = kthread_create(do_stop, &smdata, "kstopmachine");
+	if (!IS_ERR(p)) {
+		kthread_bind(p, cpu);
+		wake_up_process(p);
+		wait_for_completion(&smdata.done);
+	}
+	up(&stopmachine_mutex);
+	return p;
+}
+
+int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+{
+	struct task_struct *p;
+	int ret;
+
+	/* No CPUs can come up or down during this. */
+	lock_cpu_hotplug();
+	p = __stop_machine_run(fn, data, cpu);
+	if (!IS_ERR(p))
+		ret = kthread_stop(p);
+	else
+		ret = PTR_ERR(p);
+	unlock_cpu_hotplug();
+
+	return ret;
+}
--- a/kernel/sys.c
+++ b/kernel/sys.c
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -0,0 +1,86 @@
+
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/unistd.h>
+
+/*
+ * Non-implemented system calls get redirected here.
+ */
+asmlinkage long sys_ni_syscall(void)
+{
+	return -ENOSYS;
+}
+
+cond_syscall(sys_nfsservctl);
+cond_syscall(sys_quotactl);
+cond_syscall(sys_acct);
+cond_syscall(sys_lookup_dcookie);
+cond_syscall(sys_swapon);
+cond_syscall(sys_swapoff);
+cond_syscall(sys_init_module);
+cond_syscall(sys_delete_module);
+cond_syscall(sys_socketpair);
+cond_syscall(sys_bind);
+cond_syscall(sys_listen);
+cond_syscall(sys_accept);
+cond_syscall(sys_connect);
+cond_syscall(sys_getsockname);
+cond_syscall(sys_getpeername);
+cond_syscall(sys_sendto);
+cond_syscall(sys_send);
+cond_syscall(sys_recvfrom);
+cond_syscall(sys_recv);
+cond_syscall(sys_socket);
+cond_syscall(sys_setsockopt);
+cond_syscall(sys_getsockopt);
+cond_syscall(sys_shutdown);
+cond_syscall(sys_sendmsg);
+cond_syscall(sys_recvmsg);
+cond_syscall(sys_socketcall);
+cond_syscall(sys_futex);
+cond_syscall(compat_sys_futex);
+cond_syscall(sys_epoll_create);
+cond_syscall(sys_epoll_ctl);
+cond_syscall(sys_epoll_wait);
+cond_syscall(sys_semget);
+cond_syscall(sys_semop);
+cond_syscall(sys_semtimedop);
+cond_syscall(sys_semctl);
+cond_syscall(sys_msgget);
+cond_syscall(sys_msgsnd);
+cond_syscall(sys_msgrcv);
+cond_syscall(sys_msgctl);
+cond_syscall(sys_shmget);
+cond_syscall(sys_shmdt);
+cond_syscall(sys_shmctl);
+cond_syscall(sys_mq_open);
+cond_syscall(sys_mq_unlink);
+cond_syscall(sys_mq_timedsend);
+cond_syscall(sys_mq_timedreceive);
+cond_syscall(sys_mq_notify);
+cond_syscall(sys_mq_getsetattr);
+cond_syscall(compat_sys_mq_open);
+cond_syscall(compat_sys_mq_timedsend);
+cond_syscall(compat_sys_mq_timedreceive);
+cond_syscall(compat_sys_mq_notify);
+cond_syscall(compat_sys_mq_getsetattr);
+cond_syscall(sys_mbind);
+cond_syscall(sys_get_mempolicy);
+cond_syscall(sys_set_mempolicy);
+cond_syscall(compat_sys_mbind);
+cond_syscall(compat_sys_get_mempolicy);
+cond_syscall(compat_sys_set_mempolicy);
+cond_syscall(sys_add_key);
+cond_syscall(sys_request_key);
+cond_syscall(sys_keyctl);
+cond_syscall(compat_sys_keyctl);
+cond_syscall(compat_sys_socketcall);
+
+/* arch-specific weak syscall entries */
+cond_syscall(sys_pciconfig_read);
+cond_syscall(sys_pciconfig_write);
+cond_syscall(sys_pciconfig_iobase);
+cond_syscall(sys32_ipc);
+cond_syscall(sys32_sysctl);
+cond_syscall(ppc_rtas);
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -0,0 +1,599 @@
+/*
+ *  linux/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  This file contains the interface functions for the various
+ *  time related system calls: time, stime, gettimeofday, settimeofday,
+ *			       adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ * 
+ * 1993-09-02    Philip Gladstone
+ *      Created file with time related functions from sched.c and adjtimex() 
+ * 1993-10-08    Torsten Duwe
+ *      adjtime interface update and CMOS clock write code
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16    Ulrich Windl
+ *	Introduced error checking for many cases in adjtimex().
+ *	Updated NTP code according to technical memorandum Jan '96
+ *	"A Kernel Model for Precision Timekeeping" by Dave Mills
+ *	Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ *	(Even though the technical memorandum forbids it)
+ * 2004-07-14	 Christoph Lameter
+ *	Added getnstimeofday to allow the posix timer functions to return
+ *	with nanosecond accuracy
+ */
+
+#include <linux/module.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+#include <linux/security.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+/* 
+ * The timezone where the local system is located.  Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+
+EXPORT_SYMBOL(sys_tz);
+
+#ifdef __ARCH_WANT_SYS_TIME
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+asmlinkage long sys_time(time_t __user * tloc)
+{
+	time_t i;
+	struct timeval tv;
+
+	do_gettimeofday(&tv);
+	i = tv.tv_sec;
+
+	if (tloc) {
+		if (put_user(i,tloc))
+			i = -EFAULT;
+	}
+	return i;
+}
+
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday().  Is this for backwards compatibility?  If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+ 
+asmlinkage long sys_stime(time_t __user *tptr)
+{
+	struct timespec tv;
+	int err;
+
+	if (get_user(tv.tv_sec, tptr))
+		return -EFAULT;
+
+	tv.tv_nsec = 0;
+
+	err = security_settime(&tv, NULL);
+	if (err)
+		return err;
+
+	do_settimeofday(&tv);
+	return 0;
+}
+
+#endif /* __ARCH_WANT_SYS_TIME */
+
+asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz)
+{
+	if (likely(tv != NULL)) {
+		struct timeval ktv;
+		do_gettimeofday(&ktv);
+		if (copy_to_user(tv, &ktv, sizeof(ktv)))
+			return -EFAULT;
+	}
+	if (unlikely(tz != NULL)) {
+		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ * 
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be 
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *              				- TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+inline static void warp_clock(void)
+{
+	write_seqlock_irq(&xtime_lock);
+	wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
+	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+	time_interpolator_reset();
+	write_sequnlock_irq(&xtime_lock);
+	clock_was_set();
+}
+
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+
+int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
+{
+	static int firsttime = 1;
+	int error = 0;
+
+	error = security_settime(tv, tz);
+	if (error)
+		return error;
+
+	if (tz) {
+		/* SMP safe, global irq locking makes it work. */
+		sys_tz = *tz;
+		if (firsttime) {
+			firsttime = 0;
+			if (!tv)
+				warp_clock();
+		}
+	}
+	if (tv)
+	{
+		/* SMP safe, again the code in arch/foo/time.c should
+		 * globally block out interrupts when it runs.
+		 */
+		return do_settimeofday(tv);
+	}
+	return 0;
+}
+
+asmlinkage long sys_settimeofday(struct timeval __user *tv,
+				struct timezone __user *tz)
+{
+	struct timeval user_tv;
+	struct timespec	new_ts;
+	struct timezone new_tz;
+
+	if (tv) {
+		if (copy_from_user(&user_tv, tv, sizeof(*tv)))
+			return -EFAULT;
+		new_ts.tv_sec = user_tv.tv_sec;
+		new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
+	}
+	if (tz) {
+		if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+			return -EFAULT;
+	}
+
+	return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
+}
+
+long pps_offset;		/* pps time offset (us) */
+long pps_jitter = MAXTIME;	/* time dispersion (jitter) (us) */
+
+long pps_freq;			/* frequency offset (scaled ppm) */
+long pps_stabil = MAXFREQ;	/* frequency dispersion (scaled ppm) */
+
+long pps_valid = PPS_VALID;	/* pps signal watchdog counter */
+
+int pps_shift = PPS_SHIFT;	/* interval duration (s) (shift) */
+
+long pps_jitcnt;		/* jitter limit exceeded */
+long pps_calcnt;		/* calibration intervals */
+long pps_errcnt;		/* calibration errors */
+long pps_stbcnt;		/* stability limit exceeded */
+
+/* hook for a loadable hardpps kernel module */
+void (*hardpps_ptr)(struct timeval *);
+
+/* we call this to notify the arch when the clock is being
+ * controlled.  If no such arch routine, do nothing.
+ */
+void __attribute__ ((weak)) notify_arch_cmos_timer(void)
+{
+	return;
+}
+
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int do_adjtimex(struct timex *txc)
+{
+        long ltemp, mtemp, save_adjust;
+	int result;
+
+	/* In order to modify anything, you gotta be super-user! */
+	if (txc->modes && !capable(CAP_SYS_TIME))
+		return -EPERM;
+		
+	/* Now we validate the data before disabling interrupts */
+
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	  /* singleshot must not be used with any other mode bits */
+		if (txc->modes != ADJ_OFFSET_SINGLESHOT)
+			return -EINVAL;
+
+	if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
+	  /* adjustment Offset limited to +- .512 seconds */
+		if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
+			return -EINVAL;	
+
+	/* if the quartz is off by more than 10% something is VERY wrong ! */
+	if (txc->modes & ADJ_TICK)
+		if (txc->tick <  900000/USER_HZ ||
+		    txc->tick > 1100000/USER_HZ)
+			return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+	result = time_state;	/* mostly `TIME_OK' */
+
+	/* Save for later - semantics of adjtime is to return old value */
+	save_adjust = time_next_adjust ? time_next_adjust : time_adjust;
+
+#if 0	/* STA_CLOCKERR is never set yet */
+	time_status &= ~STA_CLOCKERR;		/* reset STA_CLOCKERR */
+#endif
+	/* If there are input parameters, then process them */
+	if (txc->modes)
+	{
+	    if (txc->modes & ADJ_STATUS)	/* only set allowed bits */
+		time_status =  (txc->status & ~STA_RONLY) |
+			      (time_status & STA_RONLY);
+
+	    if (txc->modes & ADJ_FREQUENCY) {	/* p. 22 */
+		if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_freq = txc->freq - pps_freq;
+	    }
+
+	    if (txc->modes & ADJ_MAXERROR) {
+		if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_maxerror = txc->maxerror;
+	    }
+
+	    if (txc->modes & ADJ_ESTERROR) {
+		if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_esterror = txc->esterror;
+	    }
+
+	    if (txc->modes & ADJ_TIMECONST) {	/* p. 24 */
+		if (txc->constant < 0) {	/* NTP v4 uses values > 6 */
+		    result = -EINVAL;
+		    goto leave;
+		}
+		time_constant = txc->constant;
+	    }
+
+	    if (txc->modes & ADJ_OFFSET) {	/* values checked earlier */
+		if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+		    /* adjtime() is independent from ntp_adjtime() */
+		    if ((time_next_adjust = txc->offset) == 0)
+			 time_adjust = 0;
+		}
+		else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
+		    ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+		            (STA_PPSTIME | STA_PPSSIGNAL) ?
+		            pps_offset : txc->offset;
+
+		    /*
+		     * Scale the phase adjustment and
+		     * clamp to the operating range.
+		     */
+		    if (ltemp > MAXPHASE)
+		        time_offset = MAXPHASE << SHIFT_UPDATE;
+		    else if (ltemp < -MAXPHASE)
+			time_offset = -(MAXPHASE << SHIFT_UPDATE);
+		    else
+		        time_offset = ltemp << SHIFT_UPDATE;
+
+		    /*
+		     * Select whether the frequency is to be controlled
+		     * and in which mode (PLL or FLL). Clamp to the operating
+		     * range. Ugly multiply/divide should be replaced someday.
+		     */
+
+		    if (time_status & STA_FREQHOLD || time_reftime == 0)
+		        time_reftime = xtime.tv_sec;
+		    mtemp = xtime.tv_sec - time_reftime;
+		    time_reftime = xtime.tv_sec;
+		    if (time_status & STA_FLL) {
+		        if (mtemp >= MINSEC) {
+			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
+							      SHIFT_UPDATE);
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> SHIFT_KH;
+			    else
+			        time_freq += ltemp >> SHIFT_KH;
+			} else /* calibration interval too short (p. 12) */
+				result = TIME_ERROR;
+		    } else {	/* PLL mode */
+		        if (mtemp < MAXSEC) {
+			    ltemp *= mtemp;
+			    if (ltemp < 0)
+			        time_freq -= -ltemp >> (time_constant +
+							time_constant +
+							SHIFT_KF - SHIFT_USEC);
+			    else
+			        time_freq += ltemp >> (time_constant +
+						       time_constant +
+						       SHIFT_KF - SHIFT_USEC);
+			} else /* calibration interval too long (p. 12) */
+				result = TIME_ERROR;
+		    }
+		    if (time_freq > time_tolerance)
+		        time_freq = time_tolerance;
+		    else if (time_freq < -time_tolerance)
+		        time_freq = -time_tolerance;
+		} /* STA_PLL || STA_PPSTIME */
+	    } /* txc->modes & ADJ_OFFSET */
+	    if (txc->modes & ADJ_TICK) {
+		tick_usec = txc->tick;
+		tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
+	    }
+	} /* txc->modes */
+leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+	    || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+		&& (time_status & STA_PPSSIGNAL) == 0)
+	    /* p. 24, (b) */
+	    || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+		== (STA_PPSTIME|STA_PPSJITTER))
+	    /* p. 24, (c) */
+	    || ((time_status & STA_PPSFREQ) != 0
+		&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+	    /* p. 24, (d) */
+		result = TIME_ERROR;
+	
+	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+	    txc->offset	   = save_adjust;
+	else {
+	    if (time_offset < 0)
+		txc->offset = -(-time_offset >> SHIFT_UPDATE);
+	    else
+		txc->offset = time_offset >> SHIFT_UPDATE;
+	}
+	txc->freq	   = time_freq + pps_freq;
+	txc->maxerror	   = time_maxerror;
+	txc->esterror	   = time_esterror;
+	txc->status	   = time_status;
+	txc->constant	   = time_constant;
+	txc->precision	   = time_precision;
+	txc->tolerance	   = time_tolerance;
+	txc->tick	   = tick_usec;
+	txc->ppsfreq	   = pps_freq;
+	txc->jitter	   = pps_jitter >> PPS_AVG;
+	txc->shift	   = pps_shift;
+	txc->stabil	   = pps_stabil;
+	txc->jitcnt	   = pps_jitcnt;
+	txc->calcnt	   = pps_calcnt;
+	txc->errcnt	   = pps_errcnt;
+	txc->stbcnt	   = pps_stbcnt;
+	write_sequnlock_irq(&xtime_lock);
+	do_gettimeofday(&txc->time);
+	notify_arch_cmos_timer();
+	return(result);
+}
+
+asmlinkage long sys_adjtimex(struct timex __user *txc_p)
+{
+	struct timex txc;		/* Local copy of parameter */
+	int ret;
+
+	/* Copy the user data space into the kernel copy
+	 * structure. But bear in mind that the structures
+	 * may change
+	 */
+	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
+	ret = do_adjtimex(&txc);
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
+
+inline struct timespec current_kernel_time(void)
+{
+        struct timespec now;
+        unsigned long seq;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		
+		now = xtime;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	return now; 
+}
+
+EXPORT_SYMBOL(current_kernel_time);
+
+/**
+ * current_fs_time - Return FS time
+ * @sb: Superblock.
+ *
+ * Return the current time truncated to the time granuality supported by
+ * the fs.
+ */
+struct timespec current_fs_time(struct super_block *sb)
+{
+	struct timespec now = current_kernel_time();
+	return timespec_trunc(now, sb->s_time_gran);
+}
+EXPORT_SYMBOL(current_fs_time);
+
+/**
+ * timespec_trunc - Truncate timespec to a granuality
+ * @t: Timespec
+ * @gran: Granuality in ns.
+ *
+ * Truncate a timespec to a granuality. gran must be smaller than a second.
+ * Always rounds down.
+ *
+ * This function should be only used for timestamps returned by
+ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
+ * it doesn't handle the better resolution of the later.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+	/*
+	 * Division is pretty slow so avoid it for common cases.
+	 * Currently current_kernel_time() never returns better than
+	 * jiffies resolution. Exploit that.
+	 */
+	if (gran <= jiffies_to_usecs(1) * 1000) {
+		/* nothing */
+	} else if (gran == 1000000000) {
+		t.tv_nsec = 0;
+	} else {
+		t.tv_nsec -= t.tv_nsec % gran;
+	}
+	return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+
+#ifdef CONFIG_TIME_INTERPOLATION
+void getnstimeofday (struct timespec *tv)
+{
+	unsigned long seq,sec,nsec;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		sec = xtime.tv_sec;
+		nsec = xtime.tv_nsec+time_interpolator_get_offset();
+	} while (unlikely(read_seqretry(&xtime_lock, seq)));
+
+	while (unlikely(nsec >= NSEC_PER_SEC)) {
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	tv->tv_sec = sec;
+	tv->tv_nsec = nsec;
+}
+EXPORT_SYMBOL_GPL(getnstimeofday);
+
+int do_settimeofday (struct timespec *tv)
+{
+	time_t wtm_sec, sec = tv->tv_sec;
+	long wtm_nsec, nsec = tv->tv_nsec;
+
+	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+	{
+		/*
+		 * This is revolting. We need to set "xtime" correctly. However, the value
+		 * in this location is the value at the most recent update of wall time.
+		 * Discover what correction gettimeofday would have done, and then undo
+		 * it!
+		 */
+		nsec -= time_interpolator_get_offset();
+
+		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+		wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+		set_normalized_timespec(&xtime, sec, nsec);
+		set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+		time_adjust = 0;		/* stop active adjtime() */
+		time_status |= STA_UNSYNC;
+		time_maxerror = NTP_PHASE_LIMIT;
+		time_esterror = NTP_PHASE_LIMIT;
+		time_interpolator_reset();
+	}
+	write_sequnlock_irq(&xtime_lock);
+	clock_was_set();
+	return 0;
+}
+
+void do_gettimeofday (struct timeval *tv)
+{
+	unsigned long seq, nsec, usec, sec, offset;
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		offset = time_interpolator_get_offset();
+		sec = xtime.tv_sec;
+		nsec = xtime.tv_nsec;
+	} while (unlikely(read_seqretry(&xtime_lock, seq)));
+
+	usec = (nsec + offset) / 1000;
+
+	while (unlikely(usec >= USEC_PER_SEC)) {
+		usec -= USEC_PER_SEC;
+		++sec;
+	}
+
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+
+#else
+/*
+ * Simulate gettimeofday using do_gettimeofday which only allows a timeval
+ * and therefore only yields usec accuracy
+ */
+void getnstimeofday(struct timespec *tv)
+{
+	struct timeval x;
+
+	do_gettimeofday(&x);
+	tv->tv_sec = x.tv_sec;
+	tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
+}
+#endif
+
+#if (BITS_PER_LONG < 64)
+u64 get_jiffies_64(void)
+{
+	unsigned long seq;
+	u64 ret;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		ret = jiffies_64;
+	} while (read_seqretry(&xtime_lock, seq));
+	return ret;
+}
+
+EXPORT_SYMBOL(get_jiffies_64);
+#endif
+
+EXPORT_SYMBOL(jiffies);
--- a/kernel/timer.c
+++ b/kernel/timer.c
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -0,0 +1,196 @@
+/*
+ *	Wrapper functions for 16bit uid back compatibility. All nicely tied
+ *	together in the faint hope we can take the out in five years time.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/mman.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/prctl.h>
+#include <linux/init.h>
+#include <linux/highuid.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+
+#include <asm/uaccess.h>
+
+asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
+{
+	return sys_chown(filename, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
+{
+	return sys_lchown(filename, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
+{
+	return sys_fchown(fd, low2highuid(user), low2highgid(group));
+}
+
+asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
+{
+	return sys_setregid(low2highgid(rgid), low2highgid(egid));
+}
+
+asmlinkage long sys_setgid16(old_gid_t gid)
+{
+	return sys_setgid(low2highgid(gid));
+}
+
+asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
+{
+	return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+}
+
+asmlinkage long sys_setuid16(old_uid_t uid)
+{
+	return sys_setuid(low2highuid(uid));
+}
+
+asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
+{
+	return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+		low2highuid(suid));
+}
+
+asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
+{
+	int retval;
+
+	if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
+	    !(retval = put_user(high2lowuid(current->euid), euid)))
+		retval = put_user(high2lowuid(current->suid), suid);
+
+	return retval;
+}
+
+asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
+{
+	return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+		low2highgid(sgid));
+}
+
+asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
+{
+	int retval;
+
+	if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
+	    !(retval = put_user(high2lowgid(current->egid), egid)))
+		retval = put_user(high2lowgid(current->sgid), sgid);
+
+	return retval;
+}
+
+asmlinkage long sys_setfsuid16(old_uid_t uid)
+{
+	return sys_setfsuid(low2highuid(uid));
+}
+
+asmlinkage long sys_setfsgid16(old_gid_t gid)
+{
+	return sys_setfsgid(low2highgid(gid));
+}
+
+static int groups16_to_user(old_gid_t __user *grouplist,
+    struct group_info *group_info)
+{
+	int i;
+	old_gid_t group;
+
+	for (i = 0; i < group_info->ngroups; i++) {
+		group = high2lowgid(GROUP_AT(group_info, i));
+		if (put_user(group, grouplist+i))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int groups16_from_user(struct group_info *group_info,
+    old_gid_t __user *grouplist)
+{
+	int i;
+	old_gid_t group;
+
+	for (i = 0; i < group_info->ngroups; i++) {
+		if (get_user(group, grouplist+i))
+			return  -EFAULT;
+		GROUP_AT(group_info, i) = low2highgid(group);
+	}
+
+	return 0;
+}
+
+asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
+{
+	int i = 0;
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+
+	get_group_info(current->group_info);
+	i = current->group_info->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize) {
+			i = -EINVAL;
+			goto out;
+		}
+		if (groups16_to_user(grouplist, current->group_info)) {
+			i = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	put_group_info(current->group_info);
+	return i;
+}
+
+asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
+{
+	struct group_info *group_info;
+	int retval;
+
+	if (!capable(CAP_SETGID))
+		return -EPERM;
+	if ((unsigned)gidsetsize > NGROUPS_MAX)
+		return -EINVAL;
+
+	group_info = groups_alloc(gidsetsize);
+	if (!group_info)
+		return -ENOMEM;
+	retval = groups16_from_user(group_info, grouplist);
+	if (retval) {
+		put_group_info(group_info);
+		return retval;
+	}
+
+	retval = set_current_groups(group_info);
+	put_group_info(group_info);
+
+	return retval;
+}
+
+asmlinkage long sys_getuid16(void)
+{
+	return high2lowuid(current->uid);
+}
+
+asmlinkage long sys_geteuid16(void)
+{
+	return high2lowuid(current->euid);
+}
+
+asmlinkage long sys_getgid16(void)
+{
+	return high2lowgid(current->gid);
+}
+
+asmlinkage long sys_getegid16(void)
+{
+	return high2lowgid(current->egid);
+}
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -0,0 +1,189 @@
+/*
+ * The "user cache".
+ *
+ * (C) Copyright 1991-2000 Linus Torvalds
+ *
+ * We have a per-user structure to keep track of how many
+ * processes, files etc the user has claimed, in order to be
+ * able to have per-user limits for system resources. 
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/key.h>
+
+/*
+ * UID task count cache, to get fast user lookup in "alloc_uid"
+ * when changing user ID's (ie setuid() and friends).
+ */
+
+#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
+#define UIDHASH_SZ		(1 << UIDHASH_BITS)
+#define UIDHASH_MASK		(UIDHASH_SZ - 1)
+#define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
+#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
+
+static kmem_cache_t *uid_cachep;
+static struct list_head uidhash_table[UIDHASH_SZ];
+static DEFINE_SPINLOCK(uidhash_lock);
+
+struct user_struct root_user = {
+	.__count	= ATOMIC_INIT(1),
+	.processes	= ATOMIC_INIT(1),
+	.files		= ATOMIC_INIT(0),
+	.sigpending	= ATOMIC_INIT(0),
+	.mq_bytes	= 0,
+	.locked_shm     = 0,
+#ifdef CONFIG_KEYS
+	.uid_keyring	= &root_user_keyring,
+	.session_keyring = &root_session_keyring,
+#endif
+};
+
+/*
+ * These routines must be called with the uidhash spinlock held!
+ */
+static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent)
+{
+	list_add(&up->uidhash_list, hashent);
+}
+
+static inline void uid_hash_remove(struct user_struct *up)
+{
+	list_del(&up->uidhash_list);
+}
+
+static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
+{
+	struct list_head *up;
+
+	list_for_each(up, hashent) {
+		struct user_struct *user;
+
+		user = list_entry(up, struct user_struct, uidhash_list);
+
+		if(user->uid == uid) {
+			atomic_inc(&user->__count);
+			return user;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
+ * caller must undo that ref with free_uid().
+ *
+ * If the user_struct could not be found, return NULL.
+ */
+struct user_struct *find_user(uid_t uid)
+{
+	struct user_struct *ret;
+
+	spin_lock(&uidhash_lock);
+	ret = uid_hash_find(uid, uidhashentry(uid));
+	spin_unlock(&uidhash_lock);
+	return ret;
+}
+
+void free_uid(struct user_struct *up)
+{
+	if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+		uid_hash_remove(up);
+		key_put(up->uid_keyring);
+		key_put(up->session_keyring);
+		kmem_cache_free(uid_cachep, up);
+		spin_unlock(&uidhash_lock);
+	}
+}
+
+struct user_struct * alloc_uid(uid_t uid)
+{
+	struct list_head *hashent = uidhashentry(uid);
+	struct user_struct *up;
+
+	spin_lock(&uidhash_lock);
+	up = uid_hash_find(uid, hashent);
+	spin_unlock(&uidhash_lock);
+
+	if (!up) {
+		struct user_struct *new;
+
+		new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
+		if (!new)
+			return NULL;
+		new->uid = uid;
+		atomic_set(&new->__count, 1);
+		atomic_set(&new->processes, 0);
+		atomic_set(&new->files, 0);
+		atomic_set(&new->sigpending, 0);
+
+		new->mq_bytes = 0;
+		new->locked_shm = 0;
+
+		if (alloc_uid_keyring(new) < 0) {
+			kmem_cache_free(uid_cachep, new);
+			return NULL;
+		}
+
+		/*
+		 * Before adding this, check whether we raced
+		 * on adding the same user already..
+		 */
+		spin_lock(&uidhash_lock);
+		up = uid_hash_find(uid, hashent);
+		if (up) {
+			key_put(new->uid_keyring);
+			key_put(new->session_keyring);
+			kmem_cache_free(uid_cachep, new);
+		} else {
+			uid_hash_insert(new, hashent);
+			up = new;
+		}
+		spin_unlock(&uidhash_lock);
+
+	}
+	return up;
+}
+
+void switch_uid(struct user_struct *new_user)
+{
+	struct user_struct *old_user;
+
+	/* What if a process setreuid()'s and this brings the
+	 * new uid over his NPROC rlimit?  We can check this now
+	 * cheaply with the new uid cache, so if it matters
+	 * we should be checking for it.  -DaveM
+	 */
+	old_user = current->user;
+	atomic_inc(&new_user->processes);
+	atomic_dec(&old_user->processes);
+	switch_uid_keyring(new_user);
+	current->user = new_user;
+	free_uid(old_user);
+	suid_keys(current);
+}
+
+
+static int __init uid_cache_init(void)
+{
+	int n;
+
+	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+
+	for(n = 0; n < UIDHASH_SZ; ++n)
+		INIT_LIST_HEAD(uidhash_table + n);
+
+	/* Insert the root user immediately (init already runs as root) */
+	spin_lock(&uidhash_lock);
+	uid_hash_insert(&root_user, uidhashentry(0));
+	spin_unlock(&uidhash_lock);
+
+	return 0;
+}
+
+module_init(uid_cache_init);
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -0,0 +1,246 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 William Irwin, Oracle
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/hash.h>
+
+void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue);
+
+void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue_tail(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive);
+
+void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__remove_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(remove_wait_queue);
+
+
+/*
+ * Note: we use "set_current_state()" _after_ the wait-queue add,
+ * because we need a memory barrier there on SMP, so that any
+ * wake-function that tests for the wait-queue being active
+ * will be guaranteed to see waitqueue addition _or_ subsequent
+ * tests in this thread will see the wakeup having taken place.
+ *
+ * The spin_unlock() itself is semi-permeable and only protects
+ * one way (it only protects stuff inside the critical region and
+ * stops them from bleeding out - it would still allow subsequent
+ * loads to move into the the critical region).
+ */
+void fastcall
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+	unsigned long flags;
+
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue(q, wait);
+	/*
+	 * don't alter the task state if this is just going to
+	 * queue an async wait queue callback
+	 */
+	if (is_sync_wait(wait))
+		set_current_state(state);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait);
+
+void fastcall
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+	unsigned long flags;
+
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue_tail(q, wait);
+	/*
+	 * don't alter the task state if this is just going to
+ 	 * queue an async wait queue callback
+	 */
+	if (is_sync_wait(wait))
+		set_current_state(state);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive);
+
+void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	__set_current_state(TASK_RUNNING);
+	/*
+	 * We can check for list emptiness outside the lock
+	 * IFF:
+	 *  - we use the "careful" check that verifies both
+	 *    the next and prev pointers, so that there cannot
+	 *    be any half-pending updates in progress on other
+	 *    CPU's that we haven't seen yet (and that might
+	 *    still change the stack area.
+	 * and
+	 *  - all other users take the lock (ie we can only
+	 *    have _one_ other CPU that looks at or modifies
+	 *    the list).
+	 */
+	if (!list_empty_careful(&wait->task_list)) {
+		spin_lock_irqsave(&q->lock, flags);
+		list_del_init(&wait->task_list);
+		spin_unlock_irqrestore(&q->lock, flags);
+	}
+}
+EXPORT_SYMBOL(finish_wait);
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	int ret = default_wake_function(wait, mode, sync, key);
+
+	if (ret)
+		list_del_init(&wait->task_list);
+	return ret;
+}
+EXPORT_SYMBOL(autoremove_wake_function);
+
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue *wait_bit
+		= container_of(wait, struct wait_bit_queue, wait);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched fastcall
+__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
+			int (*action)(void *), unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq, &q->wait, mode);
+		if (test_bit(q->key.bit_nr, q->key.flags))
+			ret = (*action)(q->key.flags);
+	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
+	finish_wait(wq, &q->wait);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched fastcall out_of_line_wait_on_bit(void *word, int bit,
+					int (*action)(void *), unsigned mode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	return __wait_on_bit(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched fastcall
+__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+			int (*action)(void *), unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait_exclusive(wq, &q->wait, mode);
+		if (test_bit(q->key.bit_nr, q->key.flags)) {
+			if ((ret = (*action)(q->key.flags)))
+				break;
+		}
+	} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
+	finish_wait(wq, &q->wait);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit,
+					int (*action)(void *), unsigned mode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	return __wait_on_bit_lock(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq))
+		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_clear_bit(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void fastcall wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	const struct zone *zone = page_zone(virt_to_page(word));
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
+}
+EXPORT_SYMBOL(bit_waitqueue);
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -0,0 +1,555 @@
+/*
+ * linux/kernel/workqueue.c
+ *
+ * Generic mechanism for defining kernel helper threads for running
+ * arbitrary tasks in process context.
+ *
+ * Started by Ingo Molnar, Copyright (C) 2002
+ *
+ * Derived from the taskqueue/keventd code by:
+ *
+ *   David Woodhouse <dwmw2@infradead.org>
+ *   Andrew Morton <andrewm@uow.edu.au>
+ *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
+ *   Theodore Ts'o <tytso@mit.edu>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+/*
+ * The per-CPU workqueue (if single thread, we always use cpu 0's).
+ *
+ * The sequence counters are for flush_scheduled_work().  It wants to wait
+ * until until all currently-scheduled works are completed, but it doesn't
+ * want to be livelocked by new, incoming ones.  So it waits until
+ * remove_sequence is >= the insert_sequence which pertained when
+ * flush_scheduled_work() was called.
+ */
+struct cpu_workqueue_struct {
+
+	spinlock_t lock;
+
+	long remove_sequence;	/* Least-recently added (next to run) */
+	long insert_sequence;	/* Next to add */
+
+	struct list_head worklist;
+	wait_queue_head_t more_work;
+	wait_queue_head_t work_done;
+
+	struct workqueue_struct *wq;
+	task_t *thread;
+
+	int run_depth;		/* Detect run_workqueue() recursion depth */
+} ____cacheline_aligned;
+
+/*
+ * The externally visible workqueue abstraction is an array of
+ * per-CPU workqueues:
+ */
+struct workqueue_struct {
+	struct cpu_workqueue_struct cpu_wq[NR_CPUS];
+	const char *name;
+	struct list_head list; 	/* Empty if single thread */
+};
+
+/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
+   threads to each one as cpus come/go. */
+static DEFINE_SPINLOCK(workqueue_lock);
+static LIST_HEAD(workqueues);
+
+/* If it's single threaded, it isn't in the list of workqueues. */
+static inline int is_single_threaded(struct workqueue_struct *wq)
+{
+	return list_empty(&wq->list);
+}
+
+/* Preempt must be disabled. */
+static void __queue_work(struct cpu_workqueue_struct *cwq,
+			 struct work_struct *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cwq->lock, flags);
+	work->wq_data = cwq;
+	list_add_tail(&work->entry, &cwq->worklist);
+	cwq->insert_sequence++;
+	wake_up(&cwq->more_work);
+	spin_unlock_irqrestore(&cwq->lock, flags);
+}
+
+/*
+ * Queue work on a workqueue. Return non-zero if it was successfully
+ * added.
+ *
+ * We queue the work to the CPU it was submitted, but there is no
+ * guarantee that it will be processed by that CPU.
+ */
+int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+	int ret = 0, cpu = get_cpu();
+
+	if (!test_and_set_bit(0, &work->pending)) {
+		if (unlikely(is_single_threaded(wq)))
+			cpu = 0;
+		BUG_ON(!list_empty(&work->entry));
+		__queue_work(wq->cpu_wq + cpu, work);
+		ret = 1;
+	}
+	put_cpu();
+	return ret;
+}
+
+static void delayed_work_timer_fn(unsigned long __data)
+{
+	struct work_struct *work = (struct work_struct *)__data;
+	struct workqueue_struct *wq = work->wq_data;
+	int cpu = smp_processor_id();
+
+	if (unlikely(is_single_threaded(wq)))
+		cpu = 0;
+
+	__queue_work(wq->cpu_wq + cpu, work);
+}
+
+int fastcall queue_delayed_work(struct workqueue_struct *wq,
+			struct work_struct *work, unsigned long delay)
+{
+	int ret = 0;
+	struct timer_list *timer = &work->timer;
+
+	if (!test_and_set_bit(0, &work->pending)) {
+		BUG_ON(timer_pending(timer));
+		BUG_ON(!list_empty(&work->entry));
+
+		/* This stores wq for the moment, for the timer_fn */
+		work->wq_data = wq;
+		timer->expires = jiffies + delay;
+		timer->data = (unsigned long)work;
+		timer->function = delayed_work_timer_fn;
+		add_timer(timer);
+		ret = 1;
+	}
+	return ret;
+}
+
+static inline void run_workqueue(struct cpu_workqueue_struct *cwq)
+{
+	unsigned long flags;
+
+	/*
+	 * Keep taking off work from the queue until
+	 * done.
+	 */
+	spin_lock_irqsave(&cwq->lock, flags);
+	cwq->run_depth++;
+	if (cwq->run_depth > 3) {
+		/* morton gets to eat his hat */
+		printk("%s: recursion depth exceeded: %d\n",
+			__FUNCTION__, cwq->run_depth);
+		dump_stack();
+	}
+	while (!list_empty(&cwq->worklist)) {
+		struct work_struct *work = list_entry(cwq->worklist.next,
+						struct work_struct, entry);
+		void (*f) (void *) = work->func;
+		void *data = work->data;
+
+		list_del_init(cwq->worklist.next);
+		spin_unlock_irqrestore(&cwq->lock, flags);
+
+		BUG_ON(work->wq_data != cwq);
+		clear_bit(0, &work->pending);
+		f(data);
+
+		spin_lock_irqsave(&cwq->lock, flags);
+		cwq->remove_sequence++;
+		wake_up(&cwq->work_done);
+	}
+	cwq->run_depth--;
+	spin_unlock_irqrestore(&cwq->lock, flags);
+}
+
+static int worker_thread(void *__cwq)
+{
+	struct cpu_workqueue_struct *cwq = __cwq;
+	DECLARE_WAITQUEUE(wait, current);
+	struct k_sigaction sa;
+	sigset_t blocked;
+
+	current->flags |= PF_NOFREEZE;
+
+	set_user_nice(current, -5);
+
+	/* Block and flush all signals */
+	sigfillset(&blocked);
+	sigprocmask(SIG_BLOCK, &blocked, NULL);
+	flush_signals(current);
+
+	/* SIG_IGN makes children autoreap: see do_notify_parent(). */
+	sa.sa.sa_handler = SIG_IGN;
+	sa.sa.sa_flags = 0;
+	siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
+	do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		add_wait_queue(&cwq->more_work, &wait);
+		if (list_empty(&cwq->worklist))
+			schedule();
+		else
+			__set_current_state(TASK_RUNNING);
+		remove_wait_queue(&cwq->more_work, &wait);
+
+		if (!list_empty(&cwq->worklist))
+			run_workqueue(cwq);
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+{
+	if (cwq->thread == current) {
+		/*
+		 * Probably keventd trying to flush its own queue. So simply run
+		 * it by hand rather than deadlocking.
+		 */
+		run_workqueue(cwq);
+	} else {
+		DEFINE_WAIT(wait);
+		long sequence_needed;
+
+		spin_lock_irq(&cwq->lock);
+		sequence_needed = cwq->insert_sequence;
+
+		while (sequence_needed - cwq->remove_sequence > 0) {
+			prepare_to_wait(&cwq->work_done, &wait,
+					TASK_UNINTERRUPTIBLE);
+			spin_unlock_irq(&cwq->lock);
+			schedule();
+			spin_lock_irq(&cwq->lock);
+		}
+		finish_wait(&cwq->work_done, &wait);
+		spin_unlock_irq(&cwq->lock);
+	}
+}
+
+/*
+ * flush_workqueue - ensure that any scheduled work has run to completion.
+ *
+ * Forces execution of the workqueue and blocks until its completion.
+ * This is typically used in driver shutdown handlers.
+ *
+ * This function will sample each workqueue's current insert_sequence number and
+ * will sleep until the head sequence is greater than or equal to that.  This
+ * means that we sleep until all works which were queued on entry have been
+ * handled, but we are not livelocked by new incoming ones.
+ *
+ * This function used to run the workqueues itself.  Now we just wait for the
+ * helper threads to do it.
+ */
+void fastcall flush_workqueue(struct workqueue_struct *wq)
+{
+	might_sleep();
+
+	if (is_single_threaded(wq)) {
+		/* Always use cpu 0's area. */
+		flush_cpu_workqueue(wq->cpu_wq + 0);
+	} else {
+		int cpu;
+
+		lock_cpu_hotplug();
+		for_each_online_cpu(cpu)
+			flush_cpu_workqueue(wq->cpu_wq + cpu);
+		unlock_cpu_hotplug();
+	}
+}
+
+static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
+						   int cpu)
+{
+	struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
+	struct task_struct *p;
+
+	spin_lock_init(&cwq->lock);
+	cwq->wq = wq;
+	cwq->thread = NULL;
+	cwq->insert_sequence = 0;
+	cwq->remove_sequence = 0;
+	INIT_LIST_HEAD(&cwq->worklist);
+	init_waitqueue_head(&cwq->more_work);
+	init_waitqueue_head(&cwq->work_done);
+
+	if (is_single_threaded(wq))
+		p = kthread_create(worker_thread, cwq, "%s", wq->name);
+	else
+		p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
+	if (IS_ERR(p))
+		return NULL;
+	cwq->thread = p;
+	return p;
+}
+
+struct workqueue_struct *__create_workqueue(const char *name,
+					    int singlethread)
+{
+	int cpu, destroy = 0;
+	struct workqueue_struct *wq;
+	struct task_struct *p;
+
+	BUG_ON(strlen(name) > 10);
+
+	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
+	if (!wq)
+		return NULL;
+	memset(wq, 0, sizeof(*wq));
+
+	wq->name = name;
+	/* We don't need the distraction of CPUs appearing and vanishing. */
+	lock_cpu_hotplug();
+	if (singlethread) {
+		INIT_LIST_HEAD(&wq->list);
+		p = create_workqueue_thread(wq, 0);
+		if (!p)
+			destroy = 1;
+		else
+			wake_up_process(p);
+	} else {
+		spin_lock(&workqueue_lock);
+		list_add(&wq->list, &workqueues);
+		spin_unlock(&workqueue_lock);
+		for_each_online_cpu(cpu) {
+			p = create_workqueue_thread(wq, cpu);
+			if (p) {
+				kthread_bind(p, cpu);
+				wake_up_process(p);
+			} else
+				destroy = 1;
+		}
+	}
+	unlock_cpu_hotplug();
+
+	/*
+	 * Was there any error during startup? If yes then clean up:
+	 */
+	if (destroy) {
+		destroy_workqueue(wq);
+		wq = NULL;
+	}
+	return wq;
+}
+
+static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
+{
+	struct cpu_workqueue_struct *cwq;
+	unsigned long flags;
+	struct task_struct *p;
+
+	cwq = wq->cpu_wq + cpu;
+	spin_lock_irqsave(&cwq->lock, flags);
+	p = cwq->thread;
+	cwq->thread = NULL;
+	spin_unlock_irqrestore(&cwq->lock, flags);
+	if (p)
+		kthread_stop(p);
+}
+
+void destroy_workqueue(struct workqueue_struct *wq)
+{
+	int cpu;
+
+	flush_workqueue(wq);
+
+	/* We don't need the distraction of CPUs appearing and vanishing. */
+	lock_cpu_hotplug();
+	if (is_single_threaded(wq))
+		cleanup_workqueue_thread(wq, 0);
+	else {
+		for_each_online_cpu(cpu)
+			cleanup_workqueue_thread(wq, cpu);
+		spin_lock(&workqueue_lock);
+		list_del(&wq->list);
+		spin_unlock(&workqueue_lock);
+	}
+	unlock_cpu_hotplug();
+	kfree(wq);
+}
+
+static struct workqueue_struct *keventd_wq;
+
+int fastcall schedule_work(struct work_struct *work)
+{
+	return queue_work(keventd_wq, work);
+}
+
+int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
+{
+	return queue_delayed_work(keventd_wq, work, delay);
+}
+
+int schedule_delayed_work_on(int cpu,
+			struct work_struct *work, unsigned long delay)
+{
+	int ret = 0;
+	struct timer_list *timer = &work->timer;
+
+	if (!test_and_set_bit(0, &work->pending)) {
+		BUG_ON(timer_pending(timer));
+		BUG_ON(!list_empty(&work->entry));
+		/* This stores keventd_wq for the moment, for the timer_fn */
+		work->wq_data = keventd_wq;
+		timer->expires = jiffies + delay;
+		timer->data = (unsigned long)work;
+		timer->function = delayed_work_timer_fn;
+		add_timer_on(timer, cpu);
+		ret = 1;
+	}
+	return ret;
+}
+
+void flush_scheduled_work(void)
+{
+	flush_workqueue(keventd_wq);
+}
+
+/**
+ * cancel_rearming_delayed_workqueue - reliably kill off a delayed
+ *			work whose handler rearms the delayed work.
+ * @wq:   the controlling workqueue structure
+ * @work: the delayed work struct
+ */
+static void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
+					struct work_struct *work)
+{
+	while (!cancel_delayed_work(work))
+		flush_workqueue(wq);
+}
+
+/**
+ * cancel_rearming_delayed_work - reliably kill off a delayed keventd
+ *			work whose handler rearms the delayed work.
+ * @work: the delayed work struct
+ */
+void cancel_rearming_delayed_work(struct work_struct *work)
+{
+	cancel_rearming_delayed_workqueue(keventd_wq, work);
+}
+EXPORT_SYMBOL(cancel_rearming_delayed_work);
+
+int keventd_up(void)
+{
+	return keventd_wq != NULL;
+}
+
+int current_is_keventd(void)
+{
+	struct cpu_workqueue_struct *cwq;
+	int cpu = smp_processor_id();	/* preempt-safe: keventd is per-cpu */
+	int ret = 0;
+
+	BUG_ON(!keventd_wq);
+
+	cwq = keventd_wq->cpu_wq + cpu;
+	if (current == cwq->thread)
+		ret = 1;
+
+	return ret;
+
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* Take the work from this (downed) CPU. */
+static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
+{
+	struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
+	LIST_HEAD(list);
+	struct work_struct *work;
+
+	spin_lock_irq(&cwq->lock);
+	list_splice_init(&cwq->worklist, &list);
+
+	while (!list_empty(&list)) {
+		printk("Taking work for %s\n", wq->name);
+		work = list_entry(list.next,struct work_struct,entry);
+		list_del(&work->entry);
+		__queue_work(wq->cpu_wq + smp_processor_id(), work);
+	}
+	spin_unlock_irq(&cwq->lock);
+}
+
+/* We're holding the cpucontrol mutex here */
+static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
+				  unsigned long action,
+				  void *hcpu)
+{
+	unsigned int hotcpu = (unsigned long)hcpu;
+	struct workqueue_struct *wq;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		/* Create a new workqueue thread for it. */
+		list_for_each_entry(wq, &workqueues, list) {
+			if (create_workqueue_thread(wq, hotcpu) < 0) {
+				printk("workqueue for %i failed\n", hotcpu);
+				return NOTIFY_BAD;
+			}
+		}
+		break;
+
+	case CPU_ONLINE:
+		/* Kick off worker threads. */
+		list_for_each_entry(wq, &workqueues, list) {
+			kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu);
+			wake_up_process(wq->cpu_wq[hotcpu].thread);
+		}
+		break;
+
+	case CPU_UP_CANCELED:
+		list_for_each_entry(wq, &workqueues, list) {
+			/* Unbind so it can run. */
+			kthread_bind(wq->cpu_wq[hotcpu].thread,
+				     smp_processor_id());
+			cleanup_workqueue_thread(wq, hotcpu);
+		}
+		break;
+
+	case CPU_DEAD:
+		list_for_each_entry(wq, &workqueues, list)
+			cleanup_workqueue_thread(wq, hotcpu);
+		list_for_each_entry(wq, &workqueues, list)
+			take_over_work(wq, hotcpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+#endif
+
+void init_workqueues(void)
+{
+	hotcpu_notifier(workqueue_cpu_callback, 0);
+	keventd_wq = create_workqueue("events");
+	BUG_ON(!keventd_wq);
+}
+
+EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(queue_work);
+EXPORT_SYMBOL_GPL(queue_delayed_work);
+EXPORT_SYMBOL_GPL(flush_workqueue);
+EXPORT_SYMBOL_GPL(destroy_workqueue);
+
+EXPORT_SYMBOL(schedule_work);
+EXPORT_SYMBOL(schedule_delayed_work);
+EXPORT_SYMBOL(schedule_delayed_work_on);
+EXPORT_SYMBOL(flush_scheduled_work);