Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
This commit is contained in:
53
kernel/Makefile
Normal file
53
kernel/Makefile
Normal file
@@ -0,0 +1,53 @@
|
||||
#
|
||||
# Makefile for the linux kernel.
|
||||
#
|
||||
|
||||
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
|
||||
exit.o itimer.o time.o softirq.o resource.o \
|
||||
sysctl.o capability.o ptrace.o timer.o user.o \
|
||||
signal.o sys.o kmod.o workqueue.o pid.o \
|
||||
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
|
||||
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
|
||||
|
||||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
|
||||
obj-$(CONFIG_SMP) += cpu.o spinlock.o
|
||||
obj-$(CONFIG_UID16) += uid16.o
|
||||
obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_KALLSYMS) += kallsyms.o
|
||||
obj-$(CONFIG_PM) += power/
|
||||
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
|
||||
obj-$(CONFIG_COMPAT) += compat.o
|
||||
obj-$(CONFIG_CPUSETS) += cpuset.o
|
||||
obj-$(CONFIG_IKCONFIG) += configs.o
|
||||
obj-$(CONFIG_IKCONFIG_PROC) += configs.o
|
||||
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
|
||||
obj-$(CONFIG_AUDIT) += audit.o
|
||||
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
|
||||
ifneq ($(CONFIG_IA64),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
# needed for x86 only. Why this used to be enabled for all architectures is beyond
|
||||
# me. I suspect most platforms don't need this, but until we know that for sure
|
||||
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
|
||||
# to get a correct value for the wait-channel (WCHAN in ps). --davidm
|
||||
CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
$(obj)/configs.o: $(obj)/config_data.h
|
||||
|
||||
# config_data.h contains the same information as ikconfig.h but gzipped.
|
||||
# Info from config_data can be extracted from /proc/config*
|
||||
targets += config_data.gz
|
||||
$(obj)/config_data.gz: .config FORCE
|
||||
$(call if_changed,gzip)
|
||||
|
||||
quiet_cmd_ikconfiggz = IKCFG $@
|
||||
cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
|
||||
targets += config_data.h
|
||||
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
|
||||
$(call if_changed,ikconfiggz)
|
||||
561
kernel/acct.c
Normal file
561
kernel/acct.c
Normal file
@@ -0,0 +1,561 @@
|
||||
/*
|
||||
* linux/kernel/acct.c
|
||||
*
|
||||
* BSD Process Accounting for Linux
|
||||
*
|
||||
* Author: Marco van Wieringen <mvw@planets.elm.net>
|
||||
*
|
||||
* Some code based on ideas and code from:
|
||||
* Thomas K. Dyas <tdyas@eden.rutgers.edu>
|
||||
*
|
||||
* This file implements BSD-style process accounting. Whenever any
|
||||
* process exits, an accounting record of type "struct acct" is
|
||||
* written to the file specified with the acct() system call. It is
|
||||
* up to user-level programs to do useful things with the accounting
|
||||
* log. The kernel just provides the raw accounting information.
|
||||
*
|
||||
* (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
|
||||
*
|
||||
* Plugged two leaks. 1) It didn't return acct_file into the free_filps if
|
||||
* the file happened to be read-only. 2) If the accounting was suspended
|
||||
* due to the lack of space it happily allowed to reopen it and completely
|
||||
* lost the old acct_file. 3/10/98, Al Viro.
|
||||
*
|
||||
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
|
||||
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
|
||||
*
|
||||
* Fixed a nasty interaction with with sys_umount(). If the accointing
|
||||
* was suspeneded we failed to stop it on umount(). Messy.
|
||||
* Another one: remount to readonly didn't stop accounting.
|
||||
* Question: what should we do if we have CAP_SYS_ADMIN but not
|
||||
* CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
|
||||
* unless we are messing with the root. In that case we are getting a
|
||||
* real mess with do_remount_sb(). 9/11/98, AV.
|
||||
*
|
||||
* Fixed a bunch of races (and pair of leaks). Probably not the best way,
|
||||
* but this one obviously doesn't introduce deadlocks. Later. BTW, found
|
||||
* one race (and leak) in BSD implementation.
|
||||
* OK, that's better. ANOTHER race and leak in BSD variant. There always
|
||||
* is one more bug... 10/11/98, AV.
|
||||
*
|
||||
* Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
|
||||
* ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
|
||||
* a struct file opened for write. Fixed. 2/6/2000, AV.
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/acct.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/tty.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/vfs.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/times.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/div64.h>
|
||||
#include <linux/blkdev.h> /* sector_div */
|
||||
|
||||
/*
|
||||
* These constants control the amount of freespace that suspend and
|
||||
* resume the process accounting system, and the time delay between
|
||||
* each check.
|
||||
* Turned into sysctl-controllable parameters. AV, 12/11/98
|
||||
*/
|
||||
|
||||
int acct_parm[3] = {4, 2, 30};
|
||||
#define RESUME (acct_parm[0]) /* >foo% free space - resume */
|
||||
#define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */
|
||||
#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
|
||||
|
||||
/*
|
||||
* External references and all of the globals.
|
||||
*/
|
||||
static void do_acct_process(long, struct file *);
|
||||
|
||||
/*
|
||||
* This structure is used so that all the data protected by lock
|
||||
* can be placed in the same cache line as the lock. This primes
|
||||
* the cache line to have the data after getting the lock.
|
||||
*/
|
||||
struct acct_glbs {
|
||||
spinlock_t lock;
|
||||
volatile int active;
|
||||
volatile int needcheck;
|
||||
struct file *file;
|
||||
struct timer_list timer;
|
||||
};
|
||||
|
||||
static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
|
||||
|
||||
/*
|
||||
* Called whenever the timer says to check the free space.
|
||||
*/
|
||||
static void acct_timeout(unsigned long unused)
|
||||
{
|
||||
acct_globals.needcheck = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the amount of free space and suspend/resume accordingly.
|
||||
*/
|
||||
static int check_free_space(struct file *file)
|
||||
{
|
||||
struct kstatfs sbuf;
|
||||
int res;
|
||||
int act;
|
||||
sector_t resume;
|
||||
sector_t suspend;
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
res = acct_globals.active;
|
||||
if (!file || !acct_globals.needcheck)
|
||||
goto out;
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
/* May block */
|
||||
if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
|
||||
return res;
|
||||
suspend = sbuf.f_blocks * SUSPEND;
|
||||
resume = sbuf.f_blocks * RESUME;
|
||||
|
||||
sector_div(suspend, 100);
|
||||
sector_div(resume, 100);
|
||||
|
||||
if (sbuf.f_bavail <= suspend)
|
||||
act = -1;
|
||||
else if (sbuf.f_bavail >= resume)
|
||||
act = 1;
|
||||
else
|
||||
act = 0;
|
||||
|
||||
/*
|
||||
* If some joker switched acct_globals.file under us we'ld better be
|
||||
* silent and _not_ touch anything.
|
||||
*/
|
||||
spin_lock(&acct_globals.lock);
|
||||
if (file != acct_globals.file) {
|
||||
if (act)
|
||||
res = act>0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (acct_globals.active) {
|
||||
if (act < 0) {
|
||||
acct_globals.active = 0;
|
||||
printk(KERN_INFO "Process accounting paused\n");
|
||||
}
|
||||
} else {
|
||||
if (act > 0) {
|
||||
acct_globals.active = 1;
|
||||
printk(KERN_INFO "Process accounting resumed\n");
|
||||
}
|
||||
}
|
||||
|
||||
del_timer(&acct_globals.timer);
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
|
||||
add_timer(&acct_globals.timer);
|
||||
res = acct_globals.active;
|
||||
out:
|
||||
spin_unlock(&acct_globals.lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Close the old accouting file (if currently open) and then replace
|
||||
* it with file (if non-NULL).
|
||||
*
|
||||
* NOTE: acct_globals.lock MUST be held on entry and exit.
|
||||
*/
|
||||
static void acct_file_reopen(struct file *file)
|
||||
{
|
||||
struct file *old_acct = NULL;
|
||||
|
||||
if (acct_globals.file) {
|
||||
old_acct = acct_globals.file;
|
||||
del_timer(&acct_globals.timer);
|
||||
acct_globals.active = 0;
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.file = NULL;
|
||||
}
|
||||
if (file) {
|
||||
acct_globals.file = file;
|
||||
acct_globals.needcheck = 0;
|
||||
acct_globals.active = 1;
|
||||
/* It's been deleted if it was used before so this is safe */
|
||||
init_timer(&acct_globals.timer);
|
||||
acct_globals.timer.function = acct_timeout;
|
||||
acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
|
||||
add_timer(&acct_globals.timer);
|
||||
}
|
||||
if (old_acct) {
|
||||
spin_unlock(&acct_globals.lock);
|
||||
do_acct_process(0, old_acct);
|
||||
filp_close(old_acct, NULL);
|
||||
spin_lock(&acct_globals.lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_acct() is the only system call needed to implement process
|
||||
* accounting. It takes the name of the file where accounting records
|
||||
* should be written. If the filename is NULL, accounting will be
|
||||
* shutdown.
|
||||
*/
|
||||
asmlinkage long sys_acct(const char __user *name)
|
||||
{
|
||||
struct file *file = NULL;
|
||||
char *tmp;
|
||||
int error;
|
||||
|
||||
if (!capable(CAP_SYS_PACCT))
|
||||
return -EPERM;
|
||||
|
||||
if (name) {
|
||||
tmp = getname(name);
|
||||
if (IS_ERR(tmp)) {
|
||||
return (PTR_ERR(tmp));
|
||||
}
|
||||
/* Difference from BSD - they don't do O_APPEND */
|
||||
file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
|
||||
putname(tmp);
|
||||
if (IS_ERR(file)) {
|
||||
return (PTR_ERR(file));
|
||||
}
|
||||
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
|
||||
filp_close(file, NULL);
|
||||
return (-EACCES);
|
||||
}
|
||||
|
||||
if (!file->f_op->write) {
|
||||
filp_close(file, NULL);
|
||||
return (-EIO);
|
||||
}
|
||||
}
|
||||
|
||||
error = security_acct(file);
|
||||
if (error) {
|
||||
if (file)
|
||||
filp_close(file, NULL);
|
||||
return error;
|
||||
}
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
acct_file_reopen(file);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the accouting is turned on for a file in the filesystem pointed
|
||||
* to by sb, turn accouting off.
|
||||
*/
|
||||
void acct_auto_close(struct super_block *sb)
|
||||
{
|
||||
spin_lock(&acct_globals.lock);
|
||||
if (acct_globals.file &&
|
||||
acct_globals.file->f_dentry->d_inode->i_sb == sb) {
|
||||
acct_file_reopen((struct file *)NULL);
|
||||
}
|
||||
spin_unlock(&acct_globals.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* encode an unsigned long into a comp_t
|
||||
*
|
||||
* This routine has been adopted from the encode_comp_t() function in
|
||||
* the kern_acct.c file of the FreeBSD operating system. The encoding
|
||||
* is a 13-bit fraction with a 3-bit (base 8) exponent.
|
||||
*/
|
||||
|
||||
#define MANTSIZE 13 /* 13 bit mantissa. */
|
||||
#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
|
||||
#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
|
||||
|
||||
static comp_t encode_comp_t(unsigned long value)
|
||||
{
|
||||
int exp, rnd;
|
||||
|
||||
exp = rnd = 0;
|
||||
while (value > MAXFRACT) {
|
||||
rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
|
||||
value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to round up, do it (and handle overflow correctly).
|
||||
*/
|
||||
if (rnd && (++value > MAXFRACT)) {
|
||||
value >>= EXPSIZE;
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean it up and polish it off.
|
||||
*/
|
||||
exp <<= MANTSIZE; /* Shift the exponent into place */
|
||||
exp += value; /* and add on the mantissa. */
|
||||
return exp;
|
||||
}
|
||||
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
/*
|
||||
* encode an u64 into a comp2_t (24 bits)
|
||||
*
|
||||
* Format: 5 bit base 2 exponent, 20 bits mantissa.
|
||||
* The leading bit of the mantissa is not stored, but implied for
|
||||
* non-zero exponents.
|
||||
* Largest encodable value is 50 bits.
|
||||
*/
|
||||
|
||||
#define MANTSIZE2 20 /* 20 bit mantissa. */
|
||||
#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
|
||||
#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
|
||||
#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
|
||||
|
||||
static comp2_t encode_comp2_t(u64 value)
|
||||
{
|
||||
int exp, rnd;
|
||||
|
||||
exp = (value > (MAXFRACT2>>1));
|
||||
rnd = 0;
|
||||
while (value > MAXFRACT2) {
|
||||
rnd = value & 1;
|
||||
value >>= 1;
|
||||
exp++;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to round up, do it (and handle overflow correctly).
|
||||
*/
|
||||
if (rnd && (++value > MAXFRACT2)) {
|
||||
value >>= 1;
|
||||
exp++;
|
||||
}
|
||||
|
||||
if (exp > MAXEXP2) {
|
||||
/* Overflow. Return largest representable number instead. */
|
||||
return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
|
||||
} else {
|
||||
return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ACCT_VERSION==3
|
||||
/*
|
||||
* encode an u64 into a 32 bit IEEE float
|
||||
*/
|
||||
static u32 encode_float(u64 value)
|
||||
{
|
||||
unsigned exp = 190;
|
||||
unsigned u;
|
||||
|
||||
if (value==0) return 0;
|
||||
while ((s64)value > 0){
|
||||
value <<= 1;
|
||||
exp--;
|
||||
}
|
||||
u = (u32)(value >> 40) & 0x7fffffu;
|
||||
return u | (exp << 23);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Write an accounting entry for an exiting process
|
||||
*
|
||||
* The acct_process() call is the workhorse of the process
|
||||
* accounting system. The struct acct is built here and then written
|
||||
* into the accounting file. This function should only be called from
|
||||
* do_exit().
|
||||
*/
|
||||
|
||||
/*
|
||||
* do_acct_process does all actual work. Caller holds the reference to file.
|
||||
*/
|
||||
static void do_acct_process(long exitcode, struct file *file)
|
||||
{
|
||||
acct_t ac;
|
||||
mm_segment_t fs;
|
||||
unsigned long vsize;
|
||||
unsigned long flim;
|
||||
u64 elapsed;
|
||||
u64 run_time;
|
||||
struct timespec uptime;
|
||||
|
||||
/*
|
||||
* First check to see if there is enough free_space to continue
|
||||
* the process accounting system.
|
||||
*/
|
||||
if (!check_free_space(file))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fill the accounting struct with the needed info as recorded
|
||||
* by the different kernel functions.
|
||||
*/
|
||||
memset((caddr_t)&ac, 0, sizeof(acct_t));
|
||||
|
||||
ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
|
||||
strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
|
||||
|
||||
/* calculate run_time in nsec*/
|
||||
do_posix_clock_monotonic_gettime(&uptime);
|
||||
run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
|
||||
run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC
|
||||
+ current->start_time.tv_nsec;
|
||||
/* convert nsec -> AHZ */
|
||||
elapsed = nsec_to_AHZ(run_time);
|
||||
#if ACCT_VERSION==3
|
||||
ac.ac_etime = encode_float(elapsed);
|
||||
#else
|
||||
ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
|
||||
(unsigned long) elapsed : (unsigned long) -1l);
|
||||
#endif
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
{
|
||||
/* new enlarged etime field */
|
||||
comp2_t etime = encode_comp2_t(elapsed);
|
||||
ac.ac_etime_hi = etime >> 16;
|
||||
ac.ac_etime_lo = (u16) etime;
|
||||
}
|
||||
#endif
|
||||
do_div(elapsed, AHZ);
|
||||
ac.ac_btime = xtime.tv_sec - elapsed;
|
||||
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(
|
||||
current->signal->utime +
|
||||
current->group_leader->utime));
|
||||
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(
|
||||
current->signal->stime +
|
||||
current->group_leader->stime));
|
||||
/* we really need to bite the bullet and change layout */
|
||||
ac.ac_uid = current->uid;
|
||||
ac.ac_gid = current->gid;
|
||||
#if ACCT_VERSION==2
|
||||
ac.ac_ahz = AHZ;
|
||||
#endif
|
||||
#if ACCT_VERSION==1 || ACCT_VERSION==2
|
||||
/* backward-compatible 16 bit fields */
|
||||
ac.ac_uid16 = current->uid;
|
||||
ac.ac_gid16 = current->gid;
|
||||
#endif
|
||||
#if ACCT_VERSION==3
|
||||
ac.ac_pid = current->tgid;
|
||||
ac.ac_ppid = current->parent->tgid;
|
||||
#endif
|
||||
|
||||
read_lock(&tasklist_lock); /* pin current->signal */
|
||||
ac.ac_tty = current->signal->tty ?
|
||||
old_encode_dev(tty_devnum(current->signal->tty)) : 0;
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
ac.ac_flag = 0;
|
||||
if (current->flags & PF_FORKNOEXEC)
|
||||
ac.ac_flag |= AFORK;
|
||||
if (current->flags & PF_SUPERPRIV)
|
||||
ac.ac_flag |= ASU;
|
||||
if (current->flags & PF_DUMPCORE)
|
||||
ac.ac_flag |= ACORE;
|
||||
if (current->flags & PF_SIGNALED)
|
||||
ac.ac_flag |= AXSIG;
|
||||
|
||||
vsize = 0;
|
||||
if (current->mm) {
|
||||
struct vm_area_struct *vma;
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = current->mm->mmap;
|
||||
while (vma) {
|
||||
vsize += vma->vm_end - vma->vm_start;
|
||||
vma = vma->vm_next;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
vsize = vsize / 1024;
|
||||
ac.ac_mem = encode_comp_t(vsize);
|
||||
ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
|
||||
ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
|
||||
ac.ac_minflt = encode_comp_t(current->signal->min_flt +
|
||||
current->group_leader->min_flt);
|
||||
ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
|
||||
current->group_leader->maj_flt);
|
||||
ac.ac_swaps = encode_comp_t(0);
|
||||
ac.ac_exitcode = exitcode;
|
||||
|
||||
/*
|
||||
* Kernel segment override to datasegment and write it
|
||||
* to the accounting file.
|
||||
*/
|
||||
fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
/*
|
||||
* Accounting records are not subject to resource limits.
|
||||
*/
|
||||
flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
|
||||
file->f_op->write(file, (char *)&ac,
|
||||
sizeof(acct_t), &file->f_pos);
|
||||
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
|
||||
set_fs(fs);
|
||||
}
|
||||
|
||||
/*
|
||||
* acct_process - now just a wrapper around do_acct_process
|
||||
*/
|
||||
void acct_process(long exitcode)
|
||||
{
|
||||
struct file *file = NULL;
|
||||
|
||||
/*
|
||||
* accelerate the common fastpath:
|
||||
*/
|
||||
if (!acct_globals.file)
|
||||
return;
|
||||
|
||||
spin_lock(&acct_globals.lock);
|
||||
file = acct_globals.file;
|
||||
if (unlikely(!file)) {
|
||||
spin_unlock(&acct_globals.lock);
|
||||
return;
|
||||
}
|
||||
get_file(file);
|
||||
spin_unlock(&acct_globals.lock);
|
||||
|
||||
do_acct_process(exitcode, file);
|
||||
fput(file);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* acct_update_integrals
|
||||
* - update mm integral fields in task_struct
|
||||
*/
|
||||
void acct_update_integrals(struct task_struct *tsk)
|
||||
{
|
||||
if (likely(tsk->mm)) {
|
||||
long delta = tsk->stime - tsk->acct_stimexpd;
|
||||
|
||||
if (delta == 0)
|
||||
return;
|
||||
tsk->acct_stimexpd = tsk->stime;
|
||||
tsk->acct_rss_mem1 += delta * get_mm_counter(tsk->mm, rss);
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* acct_clear_integrals
|
||||
* - clear the mm integral fields in task_struct
|
||||
*/
|
||||
void acct_clear_integrals(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk) {
|
||||
tsk->acct_stimexpd = 0;
|
||||
tsk->acct_rss_mem1 = 0;
|
||||
tsk->acct_vm_mem1 = 0;
|
||||
}
|
||||
}
|
||||
839
kernel/audit.c
Normal file
839
kernel/audit.c
Normal file
@@ -0,0 +1,839 @@
|
||||
/* audit.c -- Auditing support -*- linux-c -*-
|
||||
* Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
|
||||
* System-call specific features have moved to auditsc.c
|
||||
*
|
||||
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* Written by Rickard E. (Rik) Faith <faith@redhat.com>
|
||||
*
|
||||
* Goals: 1) Integrate fully with SELinux.
|
||||
* 2) Minimal run-time overhead:
|
||||
* a) Minimal when syscall auditing is disabled (audit_enable=0).
|
||||
* b) Small when syscall auditing is enabled and no audit record
|
||||
* is generated (defer as much work as possible to record
|
||||
* generation time):
|
||||
* i) context is allocated,
|
||||
* ii) names from getname are stored without a copy, and
|
||||
* iii) inode information stored from path_lookup.
|
||||
* 3) Ability to disable syscall auditing at boot time (audit=0).
|
||||
* 4) Usable by other parts of the kernel (if audit_log* is called,
|
||||
* then a syscall record will be generated automatically for the
|
||||
* current syscall).
|
||||
* 5) Netlink interface to user-space.
|
||||
* 6) Support low-overhead kernel-based filtering to minimize the
|
||||
* information that must be passed to user-space.
|
||||
*
|
||||
* Example user-space utilities: http://people.redhat.com/faith/audit/
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/audit.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/netlink.h>
|
||||
|
||||
/* No auditing will take place until audit_initialized != 0.
|
||||
* (Initialization happens after skb_init is called.) */
|
||||
static int audit_initialized;
|
||||
|
||||
/* No syscall auditing will take place unless audit_enabled != 0. */
|
||||
int audit_enabled;
|
||||
|
||||
/* Default state when kernel boots without any parameters. */
|
||||
static int audit_default;
|
||||
|
||||
/* If auditing cannot proceed, audit_failure selects what happens. */
|
||||
static int audit_failure = AUDIT_FAIL_PRINTK;
|
||||
|
||||
/* If audit records are to be written to the netlink socket, audit_pid
|
||||
* contains the (non-zero) pid. */
|
||||
static int audit_pid;
|
||||
|
||||
/* If audit_limit is non-zero, limit the rate of sending audit records
|
||||
* to that number per second. This prevents DoS attacks, but results in
|
||||
* audit records being dropped. */
|
||||
static int audit_rate_limit;
|
||||
|
||||
/* Number of outstanding audit_buffers allowed. */
|
||||
static int audit_backlog_limit = 64;
|
||||
static atomic_t audit_backlog = ATOMIC_INIT(0);
|
||||
|
||||
/* Records can be lost in several ways:
|
||||
0) [suppressed in audit_alloc]
|
||||
1) out of memory in audit_log_start [kmalloc of struct audit_buffer]
|
||||
2) out of memory in audit_log_move [alloc_skb]
|
||||
3) suppressed due to audit_rate_limit
|
||||
4) suppressed due to audit_backlog_limit
|
||||
*/
|
||||
static atomic_t audit_lost = ATOMIC_INIT(0);
|
||||
|
||||
/* The netlink socket. */
|
||||
static struct sock *audit_sock;
|
||||
|
||||
/* There are two lists of audit buffers. The txlist contains audit
|
||||
* buffers that cannot be sent immediately to the netlink device because
|
||||
* we are in an irq context (these are sent later in a tasklet).
|
||||
*
|
||||
* The second list is a list of pre-allocated audit buffers (if more
|
||||
* than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
|
||||
* being placed on the freelist). */
|
||||
static DEFINE_SPINLOCK(audit_txlist_lock);
|
||||
static DEFINE_SPINLOCK(audit_freelist_lock);
|
||||
static int audit_freelist_count = 0;
|
||||
static LIST_HEAD(audit_txlist);
|
||||
static LIST_HEAD(audit_freelist);
|
||||
|
||||
/* There are three lists of rules -- one to search at task creation
|
||||
* time, one to search at syscall entry time, and another to search at
|
||||
* syscall exit time. */
|
||||
static LIST_HEAD(audit_tsklist);
|
||||
static LIST_HEAD(audit_entlist);
|
||||
static LIST_HEAD(audit_extlist);
|
||||
|
||||
/* The netlink socket is only to be read by 1 CPU, which lets us assume
|
||||
* that list additions and deletions never happen simultaneiously in
|
||||
* auditsc.c */
|
||||
static DECLARE_MUTEX(audit_netlink_sem);
|
||||
|
||||
/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
|
||||
* audit records. Since printk uses a 1024 byte buffer, this buffer
|
||||
* should be at least that large. */
|
||||
#define AUDIT_BUFSIZ 1024
|
||||
|
||||
/* AUDIT_MAXFREE is the number of empty audit_buffers we keep on the
|
||||
* audit_freelist. Doing so eliminates many kmalloc/kfree calls. */
|
||||
#define AUDIT_MAXFREE (2*NR_CPUS)
|
||||
|
||||
/* The audit_buffer is used when formatting an audit record. The caller
|
||||
* locks briefly to get the record off the freelist or to allocate the
|
||||
* buffer, and locks briefly to send the buffer to the netlink layer or
|
||||
* to place it on a transmit queue. Multiple audit_buffers can be in
|
||||
* use simultaneously. */
|
||||
struct audit_buffer {
|
||||
struct list_head list;
|
||||
struct sk_buff_head sklist; /* formatted skbs ready to send */
|
||||
struct audit_context *ctx; /* NULL or associated context */
|
||||
int len; /* used area of tmp */
|
||||
char tmp[AUDIT_BUFSIZ];
|
||||
|
||||
/* Pointer to header and contents */
|
||||
struct nlmsghdr *nlh;
|
||||
int total;
|
||||
int type;
|
||||
int pid;
|
||||
int count; /* Times requeued */
|
||||
};
|
||||
|
||||
void audit_set_type(struct audit_buffer *ab, int type)
|
||||
{
|
||||
ab->type = type;
|
||||
}
|
||||
|
||||
struct audit_entry {
|
||||
struct list_head list;
|
||||
struct audit_rule rule;
|
||||
};
|
||||
|
||||
static void audit_log_end_irq(struct audit_buffer *ab);
|
||||
static void audit_log_end_fast(struct audit_buffer *ab);
|
||||
|
||||
static void audit_panic(const char *message)
|
||||
{
|
||||
switch (audit_failure)
|
||||
{
|
||||
case AUDIT_FAIL_SILENT:
|
||||
break;
|
||||
case AUDIT_FAIL_PRINTK:
|
||||
printk(KERN_ERR "audit: %s\n", message);
|
||||
break;
|
||||
case AUDIT_FAIL_PANIC:
|
||||
panic("audit: %s\n", message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int audit_rate_check(void)
|
||||
{
|
||||
static unsigned long last_check = 0;
|
||||
static int messages = 0;
|
||||
static DEFINE_SPINLOCK(lock);
|
||||
unsigned long flags;
|
||||
unsigned long now;
|
||||
unsigned long elapsed;
|
||||
int retval = 0;
|
||||
|
||||
if (!audit_rate_limit) return 1;
|
||||
|
||||
spin_lock_irqsave(&lock, flags);
|
||||
if (++messages < audit_rate_limit) {
|
||||
retval = 1;
|
||||
} else {
|
||||
now = jiffies;
|
||||
elapsed = now - last_check;
|
||||
if (elapsed > HZ) {
|
||||
last_check = now;
|
||||
messages = 0;
|
||||
retval = 1;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&lock, flags);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Emit at least 1 message per second, even if audit_rate_check is
|
||||
* throttling. */
|
||||
void audit_log_lost(const char *message)
|
||||
{
|
||||
static unsigned long last_msg = 0;
|
||||
static DEFINE_SPINLOCK(lock);
|
||||
unsigned long flags;
|
||||
unsigned long now;
|
||||
int print;
|
||||
|
||||
atomic_inc(&audit_lost);
|
||||
|
||||
print = (audit_failure == AUDIT_FAIL_PANIC || !audit_rate_limit);
|
||||
|
||||
if (!print) {
|
||||
spin_lock_irqsave(&lock, flags);
|
||||
now = jiffies;
|
||||
if (now - last_msg > HZ) {
|
||||
print = 1;
|
||||
last_msg = now;
|
||||
}
|
||||
spin_unlock_irqrestore(&lock, flags);
|
||||
}
|
||||
|
||||
if (print) {
|
||||
printk(KERN_WARNING
|
||||
"audit: audit_lost=%d audit_backlog=%d"
|
||||
" audit_rate_limit=%d audit_backlog_limit=%d\n",
|
||||
atomic_read(&audit_lost),
|
||||
atomic_read(&audit_backlog),
|
||||
audit_rate_limit,
|
||||
audit_backlog_limit);
|
||||
audit_panic(message);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int audit_set_rate_limit(int limit)
|
||||
{
|
||||
int old = audit_rate_limit;
|
||||
audit_rate_limit = limit;
|
||||
audit_log(current->audit_context, "audit_rate_limit=%d old=%d",
|
||||
audit_rate_limit, old);
|
||||
return old;
|
||||
}
|
||||
|
||||
static int audit_set_backlog_limit(int limit)
|
||||
{
|
||||
int old = audit_backlog_limit;
|
||||
audit_backlog_limit = limit;
|
||||
audit_log(current->audit_context, "audit_backlog_limit=%d old=%d",
|
||||
audit_backlog_limit, old);
|
||||
return old;
|
||||
}
|
||||
|
||||
static int audit_set_enabled(int state)
|
||||
{
|
||||
int old = audit_enabled;
|
||||
if (state != 0 && state != 1)
|
||||
return -EINVAL;
|
||||
audit_enabled = state;
|
||||
audit_log(current->audit_context, "audit_enabled=%d old=%d",
|
||||
audit_enabled, old);
|
||||
return old;
|
||||
}
|
||||
|
||||
static int audit_set_failure(int state)
|
||||
{
|
||||
int old = audit_failure;
|
||||
if (state != AUDIT_FAIL_SILENT
|
||||
&& state != AUDIT_FAIL_PRINTK
|
||||
&& state != AUDIT_FAIL_PANIC)
|
||||
return -EINVAL;
|
||||
audit_failure = state;
|
||||
audit_log(current->audit_context, "audit_failure=%d old=%d",
|
||||
audit_failure, old);
|
||||
return old;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
void audit_send_reply(int pid, int seq, int type, int done, int multi,
|
||||
void *payload, int size)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct nlmsghdr *nlh;
|
||||
int len = NLMSG_SPACE(size);
|
||||
void *data;
|
||||
int flags = multi ? NLM_F_MULTI : 0;
|
||||
int t = done ? NLMSG_DONE : type;
|
||||
|
||||
skb = alloc_skb(len, GFP_KERNEL);
|
||||
if (!skb)
|
||||
goto nlmsg_failure;
|
||||
|
||||
nlh = NLMSG_PUT(skb, pid, seq, t, len - sizeof(*nlh));
|
||||
nlh->nlmsg_flags = flags;
|
||||
data = NLMSG_DATA(nlh);
|
||||
memcpy(data, payload, size);
|
||||
netlink_unicast(audit_sock, skb, pid, MSG_DONTWAIT);
|
||||
return;
|
||||
|
||||
nlmsg_failure: /* Used by NLMSG_PUT */
|
||||
if (skb)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for appropriate CAP_AUDIT_ capabilities on incoming audit
|
||||
* control messages.
|
||||
*/
|
||||
static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
switch (msg_type) {
|
||||
case AUDIT_GET:
|
||||
case AUDIT_LIST:
|
||||
case AUDIT_SET:
|
||||
case AUDIT_ADD:
|
||||
case AUDIT_DEL:
|
||||
if (!cap_raised(eff_cap, CAP_AUDIT_CONTROL))
|
||||
err = -EPERM;
|
||||
break;
|
||||
case AUDIT_USER:
|
||||
if (!cap_raised(eff_cap, CAP_AUDIT_WRITE))
|
||||
err = -EPERM;
|
||||
break;
|
||||
default: /* bad msg */
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
{
|
||||
u32 uid, pid, seq;
|
||||
void *data;
|
||||
struct audit_status *status_get, status_set;
|
||||
int err;
|
||||
struct audit_buffer *ab;
|
||||
u16 msg_type = nlh->nlmsg_type;
|
||||
|
||||
err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pid = NETLINK_CREDS(skb)->pid;
|
||||
uid = NETLINK_CREDS(skb)->uid;
|
||||
seq = nlh->nlmsg_seq;
|
||||
data = NLMSG_DATA(nlh);
|
||||
|
||||
switch (msg_type) {
|
||||
case AUDIT_GET:
|
||||
status_set.enabled = audit_enabled;
|
||||
status_set.failure = audit_failure;
|
||||
status_set.pid = audit_pid;
|
||||
status_set.rate_limit = audit_rate_limit;
|
||||
status_set.backlog_limit = audit_backlog_limit;
|
||||
status_set.lost = atomic_read(&audit_lost);
|
||||
status_set.backlog = atomic_read(&audit_backlog);
|
||||
audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0,
|
||||
&status_set, sizeof(status_set));
|
||||
break;
|
||||
case AUDIT_SET:
|
||||
if (nlh->nlmsg_len < sizeof(struct audit_status))
|
||||
return -EINVAL;
|
||||
status_get = (struct audit_status *)data;
|
||||
if (status_get->mask & AUDIT_STATUS_ENABLED) {
|
||||
err = audit_set_enabled(status_get->enabled);
|
||||
if (err < 0) return err;
|
||||
}
|
||||
if (status_get->mask & AUDIT_STATUS_FAILURE) {
|
||||
err = audit_set_failure(status_get->failure);
|
||||
if (err < 0) return err;
|
||||
}
|
||||
if (status_get->mask & AUDIT_STATUS_PID) {
|
||||
int old = audit_pid;
|
||||
audit_pid = status_get->pid;
|
||||
audit_log(current->audit_context,
|
||||
"audit_pid=%d old=%d", audit_pid, old);
|
||||
}
|
||||
if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
|
||||
audit_set_rate_limit(status_get->rate_limit);
|
||||
if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
|
||||
audit_set_backlog_limit(status_get->backlog_limit);
|
||||
break;
|
||||
case AUDIT_USER:
|
||||
ab = audit_log_start(NULL);
|
||||
if (!ab)
|
||||
break; /* audit_panic has been called */
|
||||
audit_log_format(ab,
|
||||
"user pid=%d uid=%d length=%d msg='%.1024s'",
|
||||
pid, uid,
|
||||
(int)(nlh->nlmsg_len
|
||||
- ((char *)data - (char *)nlh)),
|
||||
(char *)data);
|
||||
ab->type = AUDIT_USER;
|
||||
ab->pid = pid;
|
||||
audit_log_end(ab);
|
||||
break;
|
||||
case AUDIT_ADD:
|
||||
case AUDIT_DEL:
|
||||
if (nlh->nlmsg_len < sizeof(struct audit_rule))
|
||||
return -EINVAL;
|
||||
/* fallthrough */
|
||||
case AUDIT_LIST:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
|
||||
uid, seq, data);
|
||||
#else
|
||||
err = -EOPNOTSUPP;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return err < 0 ? err : 0;
|
||||
}
|
||||
|
||||
/* Get message from skb (based on rtnetlink_rcv_skb). Each message is
|
||||
* processed by audit_receive_msg. Malformed skbs with wrong length are
|
||||
* discarded silently. */
|
||||
static int audit_receive_skb(struct sk_buff *skb)
|
||||
{
|
||||
int err;
|
||||
struct nlmsghdr *nlh;
|
||||
u32 rlen;
|
||||
|
||||
while (skb->len >= NLMSG_SPACE(0)) {
|
||||
nlh = (struct nlmsghdr *)skb->data;
|
||||
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
|
||||
return 0;
|
||||
rlen = NLMSG_ALIGN(nlh->nlmsg_len);
|
||||
if (rlen > skb->len)
|
||||
rlen = skb->len;
|
||||
if ((err = audit_receive_msg(skb, nlh))) {
|
||||
netlink_ack(skb, nlh, err);
|
||||
} else if (nlh->nlmsg_flags & NLM_F_ACK)
|
||||
netlink_ack(skb, nlh, 0);
|
||||
skb_pull(skb, rlen);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Receive messages from netlink socket. */
|
||||
static void audit_receive(struct sock *sk, int length)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (down_trylock(&audit_netlink_sem))
|
||||
return;
|
||||
|
||||
/* FIXME: this must not cause starvation */
|
||||
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
|
||||
if (audit_receive_skb(skb) && skb->len)
|
||||
skb_queue_head(&sk->sk_receive_queue, skb);
|
||||
else
|
||||
kfree_skb(skb);
|
||||
}
|
||||
up(&audit_netlink_sem);
|
||||
}
|
||||
|
||||
/* Move data from tmp buffer into an skb. This is an extra copy, and
|
||||
* that is unfortunate. However, the copy will only occur when a record
|
||||
* is being written to user space, which is already a high-overhead
|
||||
* operation. (Elimination of the copy is possible, for example, by
|
||||
* writing directly into a pre-allocated skb, at the cost of wasting
|
||||
* memory. */
|
||||
static void audit_log_move(struct audit_buffer *ab)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
char *start;
|
||||
int extra = ab->nlh ? 0 : NLMSG_SPACE(0);
|
||||
|
||||
/* possible resubmission */
|
||||
if (ab->len == 0)
|
||||
return;
|
||||
|
||||
skb = skb_peek(&ab->sklist);
|
||||
if (!skb || skb_tailroom(skb) <= ab->len + extra) {
|
||||
skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC);
|
||||
if (!skb) {
|
||||
ab->len = 0; /* Lose information in ab->tmp */
|
||||
audit_log_lost("out of memory in audit_log_move");
|
||||
return;
|
||||
}
|
||||
__skb_queue_tail(&ab->sklist, skb);
|
||||
if (!ab->nlh)
|
||||
ab->nlh = (struct nlmsghdr *)skb_put(skb,
|
||||
NLMSG_SPACE(0));
|
||||
}
|
||||
start = skb_put(skb, ab->len);
|
||||
memcpy(start, ab->tmp, ab->len);
|
||||
ab->len = 0;
|
||||
}
|
||||
|
||||
/* Iterate over the skbuff in the audit_buffer, sending their contents
|
||||
* to user space. */
|
||||
static inline int audit_log_drain(struct audit_buffer *ab)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = skb_dequeue(&ab->sklist))) {
|
||||
int retval = 0;
|
||||
|
||||
if (audit_pid) {
|
||||
if (ab->nlh) {
|
||||
ab->nlh->nlmsg_len = ab->total;
|
||||
ab->nlh->nlmsg_type = ab->type;
|
||||
ab->nlh->nlmsg_flags = 0;
|
||||
ab->nlh->nlmsg_seq = 0;
|
||||
ab->nlh->nlmsg_pid = ab->pid;
|
||||
}
|
||||
skb_get(skb); /* because netlink_* frees */
|
||||
retval = netlink_unicast(audit_sock, skb, audit_pid,
|
||||
MSG_DONTWAIT);
|
||||
}
|
||||
if (retval == -EAGAIN && ab->count < 5) {
|
||||
++ab->count;
|
||||
skb_queue_tail(&ab->sklist, skb);
|
||||
audit_log_end_irq(ab);
|
||||
return 1;
|
||||
}
|
||||
if (retval < 0) {
|
||||
if (retval == -ECONNREFUSED) {
|
||||
printk(KERN_ERR
|
||||
"audit: *NO* daemon at audit_pid=%d\n",
|
||||
audit_pid);
|
||||
audit_pid = 0;
|
||||
} else
|
||||
audit_log_lost("netlink socket too busy");
|
||||
}
|
||||
if (!audit_pid) { /* No daemon */
|
||||
int offset = ab->nlh ? NLMSG_SPACE(0) : 0;
|
||||
int len = skb->len - offset;
|
||||
printk(KERN_ERR "%*.*s\n",
|
||||
len, len, skb->data + offset);
|
||||
}
|
||||
kfree_skb(skb);
|
||||
ab->nlh = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize audit support at boot time. */
|
||||
static int __init audit_init(void)
|
||||
{
|
||||
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
|
||||
audit_default ? "enabled" : "disabled");
|
||||
audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive);
|
||||
if (!audit_sock)
|
||||
audit_panic("cannot initialize netlink socket");
|
||||
|
||||
audit_initialized = 1;
|
||||
audit_enabled = audit_default;
|
||||
audit_log(NULL, "initialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
/* Without CONFIG_NET, we have no skbuffs. For now, print what we have
|
||||
* in the buffer. */
|
||||
static void audit_log_move(struct audit_buffer *ab)
|
||||
{
|
||||
printk(KERN_ERR "%*.*s\n", ab->len, ab->len, ab->tmp);
|
||||
ab->len = 0;
|
||||
}
|
||||
|
||||
static inline int audit_log_drain(struct audit_buffer *ab)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize audit support at boot time. */
|
||||
int __init audit_init(void)
|
||||
{
|
||||
printk(KERN_INFO "audit: initializing WITHOUT netlink support\n");
|
||||
audit_sock = NULL;
|
||||
audit_pid = 0;
|
||||
|
||||
audit_initialized = 1;
|
||||
audit_enabled = audit_default;
|
||||
audit_log(NULL, "initialized");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
__initcall(audit_init);
|
||||
|
||||
/* Process kernel command-line parameter at boot time. audit=0 or audit=1. */
|
||||
static int __init audit_enable(char *str)
|
||||
{
|
||||
audit_default = !!simple_strtol(str, NULL, 0);
|
||||
printk(KERN_INFO "audit: %s%s\n",
|
||||
audit_default ? "enabled" : "disabled",
|
||||
audit_initialized ? "" : " (after initialization)");
|
||||
if (audit_initialized)
|
||||
audit_enabled = audit_default;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__setup("audit=", audit_enable);
|
||||
|
||||
|
||||
/* Obtain an audit buffer. This routine does locking to obtain the
|
||||
* audit buffer, but then no locking is required for calls to
|
||||
* audit_log_*format. If the tsk is a task that is currently in a
|
||||
* syscall, then the syscall is marked as auditable and an audit record
|
||||
* will be written at syscall exit. If there is no associated task, tsk
|
||||
* should be NULL. */
|
||||
struct audit_buffer *audit_log_start(struct audit_context *ctx)
|
||||
{
|
||||
struct audit_buffer *ab = NULL;
|
||||
unsigned long flags;
|
||||
struct timespec t;
|
||||
int serial = 0;
|
||||
|
||||
if (!audit_initialized)
|
||||
return NULL;
|
||||
|
||||
if (audit_backlog_limit
|
||||
&& atomic_read(&audit_backlog) > audit_backlog_limit) {
|
||||
if (audit_rate_check())
|
||||
printk(KERN_WARNING
|
||||
"audit: audit_backlog=%d > "
|
||||
"audit_backlog_limit=%d\n",
|
||||
atomic_read(&audit_backlog),
|
||||
audit_backlog_limit);
|
||||
audit_log_lost("backlog limit exceeded");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&audit_freelist_lock, flags);
|
||||
if (!list_empty(&audit_freelist)) {
|
||||
ab = list_entry(audit_freelist.next,
|
||||
struct audit_buffer, list);
|
||||
list_del(&ab->list);
|
||||
--audit_freelist_count;
|
||||
}
|
||||
spin_unlock_irqrestore(&audit_freelist_lock, flags);
|
||||
|
||||
if (!ab)
|
||||
ab = kmalloc(sizeof(*ab), GFP_ATOMIC);
|
||||
if (!ab) {
|
||||
audit_log_lost("out of memory in audit_log_start");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
atomic_inc(&audit_backlog);
|
||||
skb_queue_head_init(&ab->sklist);
|
||||
|
||||
ab->ctx = ctx;
|
||||
ab->len = 0;
|
||||
ab->nlh = NULL;
|
||||
ab->total = 0;
|
||||
ab->type = AUDIT_KERNEL;
|
||||
ab->pid = 0;
|
||||
ab->count = 0;
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
if (ab->ctx)
|
||||
audit_get_stamp(ab->ctx, &t, &serial);
|
||||
else
|
||||
#endif
|
||||
t = CURRENT_TIME;
|
||||
|
||||
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
|
||||
t.tv_sec, t.tv_nsec/1000000, serial);
|
||||
return ab;
|
||||
}
|
||||
|
||||
|
||||
/* Format an audit message into the audit buffer. If there isn't enough
|
||||
* room in the audit buffer, more room will be allocated and vsnprint
|
||||
* will be called a second time. Currently, we assume that a printk
|
||||
* can't format message larger than 1024 bytes, so we don't either. */
|
||||
static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
|
||||
va_list args)
|
||||
{
|
||||
int len, avail;
|
||||
|
||||
if (!ab)
|
||||
return;
|
||||
|
||||
avail = sizeof(ab->tmp) - ab->len;
|
||||
if (avail <= 0) {
|
||||
audit_log_move(ab);
|
||||
avail = sizeof(ab->tmp) - ab->len;
|
||||
}
|
||||
len = vsnprintf(ab->tmp + ab->len, avail, fmt, args);
|
||||
if (len >= avail) {
|
||||
/* The printk buffer is 1024 bytes long, so if we get
|
||||
* here and AUDIT_BUFSIZ is at least 1024, then we can
|
||||
* log everything that printk could have logged. */
|
||||
audit_log_move(ab);
|
||||
avail = sizeof(ab->tmp) - ab->len;
|
||||
len = vsnprintf(ab->tmp + ab->len, avail, fmt, args);
|
||||
}
|
||||
ab->len += (len < avail) ? len : avail;
|
||||
ab->total += (len < avail) ? len : avail;
|
||||
}
|
||||
|
||||
/* Format a message into the audit buffer. All the work is done in
|
||||
* audit_log_vformat. */
|
||||
void audit_log_format(struct audit_buffer *ab, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
||||
if (!ab)
|
||||
return;
|
||||
va_start(args, fmt);
|
||||
audit_log_vformat(ab, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
/* This is a helper-function to print the d_path without using a static
|
||||
* buffer or allocating another buffer in addition to the one in
|
||||
* audit_buffer. */
|
||||
void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
|
||||
struct dentry *dentry, struct vfsmount *vfsmnt)
|
||||
{
|
||||
char *p;
|
||||
int len, avail;
|
||||
|
||||
if (prefix) audit_log_format(ab, " %s", prefix);
|
||||
|
||||
if (ab->len > 128)
|
||||
audit_log_move(ab);
|
||||
avail = sizeof(ab->tmp) - ab->len;
|
||||
p = d_path(dentry, vfsmnt, ab->tmp + ab->len, avail);
|
||||
if (IS_ERR(p)) {
|
||||
/* FIXME: can we save some information here? */
|
||||
audit_log_format(ab, "<toolong>");
|
||||
} else {
|
||||
/* path isn't at start of buffer */
|
||||
len = (ab->tmp + sizeof(ab->tmp) - 1) - p;
|
||||
memmove(ab->tmp + ab->len, p, len);
|
||||
ab->len += len;
|
||||
ab->total += len;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove queued messages from the audit_txlist and send them to userspace. */
|
||||
static void audit_tasklet_handler(unsigned long arg)
|
||||
{
|
||||
LIST_HEAD(list);
|
||||
struct audit_buffer *ab;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&audit_txlist_lock, flags);
|
||||
list_splice_init(&audit_txlist, &list);
|
||||
spin_unlock_irqrestore(&audit_txlist_lock, flags);
|
||||
|
||||
while (!list_empty(&list)) {
|
||||
ab = list_entry(list.next, struct audit_buffer, list);
|
||||
list_del(&ab->list);
|
||||
audit_log_end_fast(ab);
|
||||
}
|
||||
}
|
||||
|
||||
static DECLARE_TASKLET(audit_tasklet, audit_tasklet_handler, 0);
|
||||
|
||||
/* The netlink_* functions cannot be called inside an irq context, so
|
||||
* the audit buffer is places on a queue and a tasklet is scheduled to
|
||||
* remove them from the queue outside the irq context. May be called in
|
||||
* any context. */
|
||||
static void audit_log_end_irq(struct audit_buffer *ab)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!ab)
|
||||
return;
|
||||
spin_lock_irqsave(&audit_txlist_lock, flags);
|
||||
list_add_tail(&ab->list, &audit_txlist);
|
||||
spin_unlock_irqrestore(&audit_txlist_lock, flags);
|
||||
|
||||
tasklet_schedule(&audit_tasklet);
|
||||
}
|
||||
|
||||
/* Send the message in the audit buffer directly to user space. May not
|
||||
* be called in an irq context. */
|
||||
static void audit_log_end_fast(struct audit_buffer *ab)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(in_irq());
|
||||
if (!ab)
|
||||
return;
|
||||
if (!audit_rate_check()) {
|
||||
audit_log_lost("rate limit exceeded");
|
||||
} else {
|
||||
audit_log_move(ab);
|
||||
if (audit_log_drain(ab))
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_dec(&audit_backlog);
|
||||
spin_lock_irqsave(&audit_freelist_lock, flags);
|
||||
if (++audit_freelist_count > AUDIT_MAXFREE)
|
||||
kfree(ab);
|
||||
else
|
||||
list_add(&ab->list, &audit_freelist);
|
||||
spin_unlock_irqrestore(&audit_freelist_lock, flags);
|
||||
}
|
||||
|
||||
/* Send or queue the message in the audit buffer, depending on the
|
||||
* current context. (A convenience function that may be called in any
|
||||
* context.) */
|
||||
void audit_log_end(struct audit_buffer *ab)
|
||||
{
|
||||
if (in_irq())
|
||||
audit_log_end_irq(ab);
|
||||
else
|
||||
audit_log_end_fast(ab);
|
||||
}
|
||||
|
||||
/* Log an audit record. This is a convenience function that calls
|
||||
* audit_log_start, audit_log_vformat, and audit_log_end. It may be
|
||||
* called in any context. */
|
||||
void audit_log(struct audit_context *ctx, const char *fmt, ...)
|
||||
{
|
||||
struct audit_buffer *ab;
|
||||
va_list args;
|
||||
|
||||
ab = audit_log_start(ctx);
|
||||
if (ab) {
|
||||
va_start(args, fmt);
|
||||
audit_log_vformat(ab, fmt, args);
|
||||
va_end(args);
|
||||
audit_log_end(ab);
|
||||
}
|
||||
}
|
||||
1015
kernel/auditsc.c
Normal file
1015
kernel/auditsc.c
Normal file
File diff suppressed because it is too large
Load Diff
220
kernel/capability.c
Normal file
220
kernel/capability.c
Normal file
@@ -0,0 +1,220 @@
|
||||
/*
|
||||
* linux/kernel/capability.c
|
||||
*
|
||||
* Copyright (C) 1997 Andrew Main <zefram@fysh.org>
|
||||
*
|
||||
* Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com>
|
||||
* 30 May 2002: Cleanup, Robert M. Love <rml@tech9.net>
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
|
||||
kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
|
||||
|
||||
EXPORT_SYMBOL(securebits);
|
||||
EXPORT_SYMBOL(cap_bset);
|
||||
|
||||
/*
|
||||
* This lock protects task->cap_* for all tasks including current.
|
||||
* Locking rule: acquire this prior to tasklist_lock.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(task_capability_lock);
|
||||
|
||||
/*
|
||||
* For sys_getproccap() and sys_setproccap(), any of the three
|
||||
* capability set pointers may be NULL -- indicating that that set is
|
||||
* uninteresting and/or not to be changed.
|
||||
*/
|
||||
|
||||
/*
|
||||
* sys_capget - get the capabilities of a given process.
|
||||
*/
|
||||
asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
|
||||
{
|
||||
int ret = 0;
|
||||
pid_t pid;
|
||||
__u32 version;
|
||||
task_t *target;
|
||||
struct __user_cap_data_struct data;
|
||||
|
||||
if (get_user(version, &header->version))
|
||||
return -EFAULT;
|
||||
|
||||
if (version != _LINUX_CAPABILITY_VERSION) {
|
||||
if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
|
||||
return -EFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(pid, &header->pid))
|
||||
return -EFAULT;
|
||||
|
||||
if (pid < 0)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&task_capability_lock);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
if (pid && pid != current->pid) {
|
||||
target = find_task_by_pid(pid);
|
||||
if (!target) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
target = current;
|
||||
|
||||
ret = security_capget(target, &data.effective, &data.inheritable, &data.permitted);
|
||||
|
||||
out:
|
||||
read_unlock(&tasklist_lock);
|
||||
spin_unlock(&task_capability_lock);
|
||||
|
||||
if (!ret && copy_to_user(dataptr, &data, sizeof data))
|
||||
return -EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cap_set_pg - set capabilities for all processes in a given process
|
||||
* group. We call this holding task_capability_lock and tasklist_lock.
|
||||
*/
|
||||
static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
|
||||
kernel_cap_t *inheritable,
|
||||
kernel_cap_t *permitted)
|
||||
{
|
||||
task_t *g, *target;
|
||||
int ret = -EPERM;
|
||||
int found = 0;
|
||||
|
||||
do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
|
||||
target = g;
|
||||
while_each_thread(g, target) {
|
||||
if (!security_capset_check(target, effective,
|
||||
inheritable,
|
||||
permitted)) {
|
||||
security_capset_set(target, effective,
|
||||
inheritable,
|
||||
permitted);
|
||||
ret = 0;
|
||||
}
|
||||
found = 1;
|
||||
}
|
||||
} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
|
||||
|
||||
if (!found)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cap_set_all - set capabilities for all processes other than init
|
||||
* and self. We call this holding task_capability_lock and tasklist_lock.
|
||||
*/
|
||||
static inline int cap_set_all(kernel_cap_t *effective,
|
||||
kernel_cap_t *inheritable,
|
||||
kernel_cap_t *permitted)
|
||||
{
|
||||
task_t *g, *target;
|
||||
int ret = -EPERM;
|
||||
int found = 0;
|
||||
|
||||
do_each_thread(g, target) {
|
||||
if (target == current || target->pid == 1)
|
||||
continue;
|
||||
found = 1;
|
||||
if (security_capset_check(target, effective, inheritable,
|
||||
permitted))
|
||||
continue;
|
||||
ret = 0;
|
||||
security_capset_set(target, effective, inheritable, permitted);
|
||||
} while_each_thread(g, target);
|
||||
|
||||
if (!found)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_capset - set capabilities for a given process, all processes, or all
|
||||
* processes in a given process group.
|
||||
*
|
||||
* The restrictions on setting capabilities are specified as:
|
||||
*
|
||||
* [pid is for the 'target' task. 'current' is the calling task.]
|
||||
*
|
||||
* I: any raised capabilities must be a subset of the (old current) permitted
|
||||
* P: any raised capabilities must be a subset of the (old current) permitted
|
||||
* E: must be set to a subset of (new target) permitted
|
||||
*/
|
||||
asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
|
||||
{
|
||||
kernel_cap_t inheritable, permitted, effective;
|
||||
__u32 version;
|
||||
task_t *target;
|
||||
int ret;
|
||||
pid_t pid;
|
||||
|
||||
if (get_user(version, &header->version))
|
||||
return -EFAULT;
|
||||
|
||||
if (version != _LINUX_CAPABILITY_VERSION) {
|
||||
if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
|
||||
return -EFAULT;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(pid, &header->pid))
|
||||
return -EFAULT;
|
||||
|
||||
if (pid && pid != current->pid && !capable(CAP_SETPCAP))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
|
||||
copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
|
||||
copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
|
||||
return -EFAULT;
|
||||
|
||||
spin_lock(&task_capability_lock);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
if (pid > 0 && pid != current->pid) {
|
||||
target = find_task_by_pid(pid);
|
||||
if (!target) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
target = current;
|
||||
|
||||
ret = 0;
|
||||
|
||||
/* having verified that the proposed changes are legal,
|
||||
we now put them into effect. */
|
||||
if (pid < 0) {
|
||||
if (pid == -1) /* all procs other than current and init */
|
||||
ret = cap_set_all(&effective, &inheritable, &permitted);
|
||||
|
||||
else /* all procs in process group */
|
||||
ret = cap_set_pg(-pid, &effective, &inheritable,
|
||||
&permitted);
|
||||
} else {
|
||||
ret = security_capset_check(target, &effective, &inheritable,
|
||||
&permitted);
|
||||
if (!ret)
|
||||
security_capset_set(target, &effective, &inheritable,
|
||||
&permitted);
|
||||
}
|
||||
|
||||
out:
|
||||
read_unlock(&tasklist_lock);
|
||||
spin_unlock(&task_capability_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
860
kernel/compat.c
Normal file
860
kernel/compat.c
Normal file
@@ -0,0 +1,860 @@
|
||||
/*
|
||||
* linux/kernel/compat.c
|
||||
*
|
||||
* Kernel compatibililty routines for e.g. 32 bit syscall support
|
||||
* on 64 bit kernels.
|
||||
*
|
||||
* Copyright (C) 2002-2003 Stephen Rothwell, IBM Corporation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */
|
||||
#include <linux/futex.h> /* for FUTEX_WAIT */
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/security.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
|
||||
{
|
||||
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
|
||||
__get_user(ts->tv_sec, &cts->tv_sec) ||
|
||||
__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
|
||||
{
|
||||
return (!access_ok(VERIFY_WRITE, cts, sizeof(*cts)) ||
|
||||
__put_user(ts->tv_sec, &cts->tv_sec) ||
|
||||
__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static long compat_nanosleep_restart(struct restart_block *restart)
|
||||
{
|
||||
unsigned long expire = restart->arg0, now = jiffies;
|
||||
struct compat_timespec __user *rmtp;
|
||||
|
||||
/* Did it expire while we handled signals? */
|
||||
if (!time_after(expire, now))
|
||||
return 0;
|
||||
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
expire = schedule_timeout(expire - now);
|
||||
if (expire == 0)
|
||||
return 0;
|
||||
|
||||
rmtp = (struct compat_timespec __user *)restart->arg1;
|
||||
if (rmtp) {
|
||||
struct compat_timespec ct;
|
||||
struct timespec t;
|
||||
|
||||
jiffies_to_timespec(expire, &t);
|
||||
ct.tv_sec = t.tv_sec;
|
||||
ct.tv_nsec = t.tv_nsec;
|
||||
if (copy_to_user(rmtp, &ct, sizeof(ct)))
|
||||
return -EFAULT;
|
||||
}
|
||||
/* The 'restart' block is already filled in */
|
||||
return -ERESTART_RESTARTBLOCK;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
|
||||
struct compat_timespec __user *rmtp)
|
||||
{
|
||||
struct timespec t;
|
||||
struct restart_block *restart;
|
||||
unsigned long expire;
|
||||
|
||||
if (get_compat_timespec(&t, rqtp))
|
||||
return -EFAULT;
|
||||
|
||||
if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
|
||||
return -EINVAL;
|
||||
|
||||
expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
expire = schedule_timeout(expire);
|
||||
if (expire == 0)
|
||||
return 0;
|
||||
|
||||
if (rmtp) {
|
||||
jiffies_to_timespec(expire, &t);
|
||||
if (put_compat_timespec(&t, rmtp))
|
||||
return -EFAULT;
|
||||
}
|
||||
restart = ¤t_thread_info()->restart_block;
|
||||
restart->fn = compat_nanosleep_restart;
|
||||
restart->arg0 = jiffies + expire;
|
||||
restart->arg1 = (unsigned long) rmtp;
|
||||
return -ERESTART_RESTARTBLOCK;
|
||||
}
|
||||
|
||||
static inline long get_compat_itimerval(struct itimerval *o,
|
||||
struct compat_itimerval __user *i)
|
||||
{
|
||||
return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
|
||||
(__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) |
|
||||
__get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) |
|
||||
__get_user(o->it_value.tv_sec, &i->it_value.tv_sec) |
|
||||
__get_user(o->it_value.tv_usec, &i->it_value.tv_usec)));
|
||||
}
|
||||
|
||||
static inline long put_compat_itimerval(struct compat_itimerval __user *o,
|
||||
struct itimerval *i)
|
||||
{
|
||||
return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
|
||||
(__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) |
|
||||
__put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) |
|
||||
__put_user(i->it_value.tv_sec, &o->it_value.tv_sec) |
|
||||
__put_user(i->it_value.tv_usec, &o->it_value.tv_usec)));
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_getitimer(int which,
|
||||
struct compat_itimerval __user *it)
|
||||
{
|
||||
struct itimerval kit;
|
||||
int error;
|
||||
|
||||
error = do_getitimer(which, &kit);
|
||||
if (!error && put_compat_itimerval(it, &kit))
|
||||
error = -EFAULT;
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_setitimer(int which,
|
||||
struct compat_itimerval __user *in,
|
||||
struct compat_itimerval __user *out)
|
||||
{
|
||||
struct itimerval kin, kout;
|
||||
int error;
|
||||
|
||||
if (in) {
|
||||
if (get_compat_itimerval(&kin, in))
|
||||
return -EFAULT;
|
||||
} else
|
||||
memset(&kin, 0, sizeof(kin));
|
||||
|
||||
error = do_setitimer(which, &kin, out ? &kout : NULL);
|
||||
if (error || !out)
|
||||
return error;
|
||||
if (put_compat_itimerval(out, &kout))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
|
||||
{
|
||||
/*
|
||||
* In the SMP world we might just be unlucky and have one of
|
||||
* the times increment as we use it. Since the value is an
|
||||
* atomically safe type this is just fine. Conceptually its
|
||||
* as if the syscall took an instant longer to occur.
|
||||
*/
|
||||
if (tbuf) {
|
||||
struct compat_tms tmp;
|
||||
struct task_struct *tsk = current;
|
||||
struct task_struct *t;
|
||||
cputime_t utime, stime, cutime, cstime;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
utime = tsk->signal->utime;
|
||||
stime = tsk->signal->stime;
|
||||
t = tsk;
|
||||
do {
|
||||
utime = cputime_add(utime, t->utime);
|
||||
stime = cputime_add(stime, t->stime);
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
|
||||
/*
|
||||
* While we have tasklist_lock read-locked, no dying thread
|
||||
* can be updating current->signal->[us]time. Instead,
|
||||
* we got their counts included in the live thread loop.
|
||||
* However, another thread can come in right now and
|
||||
* do a wait call that updates current->signal->c[us]time.
|
||||
* To make sure we always see that pair updated atomically,
|
||||
* we take the siglock around fetching them.
|
||||
*/
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cutime = tsk->signal->cutime;
|
||||
cstime = tsk->signal->cstime;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
|
||||
tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
|
||||
tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
|
||||
tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
|
||||
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
|
||||
return -EFAULT;
|
||||
}
|
||||
return compat_jiffies_to_clock_t(jiffies);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assumption: old_sigset_t and compat_old_sigset_t are both
|
||||
* types that can be passed to put_user()/get_user().
|
||||
*/
|
||||
|
||||
asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
|
||||
{
|
||||
old_sigset_t s;
|
||||
long ret;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_sigpending((old_sigset_t __user *) &s);
|
||||
set_fs(old_fs);
|
||||
if (ret == 0)
|
||||
ret = put_user(s, set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
|
||||
compat_old_sigset_t __user *oset)
|
||||
{
|
||||
old_sigset_t s;
|
||||
long ret;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
if (set && get_user(s, set))
|
||||
return -EFAULT;
|
||||
old_fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_sigprocmask(how,
|
||||
set ? (old_sigset_t __user *) &s : NULL,
|
||||
oset ? (old_sigset_t __user *) &s : NULL);
|
||||
set_fs(old_fs);
|
||||
if (ret == 0)
|
||||
if (oset)
|
||||
ret = put_user(s, oset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUTEX
|
||||
asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
|
||||
struct compat_timespec __user *utime, u32 __user *uaddr2,
|
||||
int val3)
|
||||
{
|
||||
struct timespec t;
|
||||
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
int val2 = 0;
|
||||
|
||||
if ((op == FUTEX_WAIT) && utime) {
|
||||
if (get_compat_timespec(&t, utime))
|
||||
return -EFAULT;
|
||||
timeout = timespec_to_jiffies(&t) + 1;
|
||||
}
|
||||
if (op >= FUTEX_REQUEUE)
|
||||
val2 = (int) (unsigned long) utime;
|
||||
|
||||
return do_futex((unsigned long)uaddr, op, val, timeout,
|
||||
(unsigned long)uaddr2, val2, val3);
|
||||
}
|
||||
#endif
|
||||
|
||||
asmlinkage long compat_sys_setrlimit(unsigned int resource,
|
||||
struct compat_rlimit __user *rlim)
|
||||
{
|
||||
struct rlimit r;
|
||||
int ret;
|
||||
mm_segment_t old_fs = get_fs ();
|
||||
|
||||
if (resource >= RLIM_NLIMITS)
|
||||
return -EINVAL;
|
||||
|
||||
if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
|
||||
__get_user(r.rlim_cur, &rlim->rlim_cur) ||
|
||||
__get_user(r.rlim_max, &rlim->rlim_max))
|
||||
return -EFAULT;
|
||||
|
||||
if (r.rlim_cur == COMPAT_RLIM_INFINITY)
|
||||
r.rlim_cur = RLIM_INFINITY;
|
||||
if (r.rlim_max == COMPAT_RLIM_INFINITY)
|
||||
r.rlim_max = RLIM_INFINITY;
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
|
||||
set_fs(old_fs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_RLIM_OLD_INFINITY
|
||||
|
||||
asmlinkage long compat_sys_old_getrlimit(unsigned int resource,
|
||||
struct compat_rlimit __user *rlim)
|
||||
{
|
||||
struct rlimit r;
|
||||
int ret;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_old_getrlimit(resource, &r);
|
||||
set_fs(old_fs);
|
||||
|
||||
if (!ret) {
|
||||
if (r.rlim_cur > COMPAT_RLIM_OLD_INFINITY)
|
||||
r.rlim_cur = COMPAT_RLIM_INFINITY;
|
||||
if (r.rlim_max > COMPAT_RLIM_OLD_INFINITY)
|
||||
r.rlim_max = COMPAT_RLIM_INFINITY;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
|
||||
__put_user(r.rlim_cur, &rlim->rlim_cur) ||
|
||||
__put_user(r.rlim_max, &rlim->rlim_max))
|
||||
return -EFAULT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
asmlinkage long compat_sys_getrlimit (unsigned int resource,
|
||||
struct compat_rlimit __user *rlim)
|
||||
{
|
||||
struct rlimit r;
|
||||
int ret;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_getrlimit(resource, (struct rlimit __user *) &r);
|
||||
set_fs(old_fs);
|
||||
if (!ret) {
|
||||
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
|
||||
r.rlim_cur = COMPAT_RLIM_INFINITY;
|
||||
if (r.rlim_max > COMPAT_RLIM_INFINITY)
|
||||
r.rlim_max = COMPAT_RLIM_INFINITY;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
|
||||
__put_user(r.rlim_cur, &rlim->rlim_cur) ||
|
||||
__put_user(r.rlim_max, &rlim->rlim_max))
|
||||
return -EFAULT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
|
||||
{
|
||||
if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
|
||||
__put_user(r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
|
||||
__put_user(r->ru_utime.tv_usec, &ru->ru_utime.tv_usec) ||
|
||||
__put_user(r->ru_stime.tv_sec, &ru->ru_stime.tv_sec) ||
|
||||
__put_user(r->ru_stime.tv_usec, &ru->ru_stime.tv_usec) ||
|
||||
__put_user(r->ru_maxrss, &ru->ru_maxrss) ||
|
||||
__put_user(r->ru_ixrss, &ru->ru_ixrss) ||
|
||||
__put_user(r->ru_idrss, &ru->ru_idrss) ||
|
||||
__put_user(r->ru_isrss, &ru->ru_isrss) ||
|
||||
__put_user(r->ru_minflt, &ru->ru_minflt) ||
|
||||
__put_user(r->ru_majflt, &ru->ru_majflt) ||
|
||||
__put_user(r->ru_nswap, &ru->ru_nswap) ||
|
||||
__put_user(r->ru_inblock, &ru->ru_inblock) ||
|
||||
__put_user(r->ru_oublock, &ru->ru_oublock) ||
|
||||
__put_user(r->ru_msgsnd, &ru->ru_msgsnd) ||
|
||||
__put_user(r->ru_msgrcv, &ru->ru_msgrcv) ||
|
||||
__put_user(r->ru_nsignals, &ru->ru_nsignals) ||
|
||||
__put_user(r->ru_nvcsw, &ru->ru_nvcsw) ||
|
||||
__put_user(r->ru_nivcsw, &ru->ru_nivcsw))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
|
||||
{
|
||||
struct rusage r;
|
||||
int ret;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_getrusage(who, (struct rusage __user *) &r);
|
||||
set_fs(old_fs);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (put_compat_rusage(&r, ru))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
|
||||
struct compat_rusage __user *ru)
|
||||
{
|
||||
if (!ru) {
|
||||
return sys_wait4(pid, stat_addr, options, NULL);
|
||||
} else {
|
||||
struct rusage r;
|
||||
int ret;
|
||||
unsigned int status;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
set_fs (KERNEL_DS);
|
||||
ret = sys_wait4(pid,
|
||||
(stat_addr ?
|
||||
(unsigned int __user *) &status : NULL),
|
||||
options, (struct rusage __user *) &r);
|
||||
set_fs (old_fs);
|
||||
|
||||
if (ret > 0) {
|
||||
if (put_compat_rusage(&r, ru))
|
||||
return -EFAULT;
|
||||
if (stat_addr && put_user(status, stat_addr))
|
||||
return -EFAULT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
|
||||
struct compat_siginfo __user *uinfo, int options,
|
||||
struct compat_rusage __user *uru)
|
||||
{
|
||||
siginfo_t info;
|
||||
struct rusage ru;
|
||||
long ret;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
ret = sys_waitid(which, pid, (siginfo_t __user *)&info, options,
|
||||
uru ? (struct rusage __user *)&ru : NULL);
|
||||
set_fs(old_fs);
|
||||
|
||||
if ((ret < 0) || (info.si_signo == 0))
|
||||
return ret;
|
||||
|
||||
if (uru) {
|
||||
ret = put_compat_rusage(&ru, uru);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
BUG_ON(info.si_code & __SI_MASK);
|
||||
info.si_code |= __SI_CHLD;
|
||||
return copy_siginfo_to_user32(uinfo, &info);
|
||||
}
|
||||
|
||||
static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr,
|
||||
unsigned len, cpumask_t *new_mask)
|
||||
{
|
||||
unsigned long *k;
|
||||
|
||||
if (len < sizeof(cpumask_t))
|
||||
memset(new_mask, 0, sizeof(cpumask_t));
|
||||
else if (len > sizeof(cpumask_t))
|
||||
len = sizeof(cpumask_t);
|
||||
|
||||
k = cpus_addr(*new_mask);
|
||||
return compat_get_bitmap(k, user_mask_ptr, len * 8);
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
|
||||
unsigned int len,
|
||||
compat_ulong_t __user *user_mask_ptr)
|
||||
{
|
||||
cpumask_t new_mask;
|
||||
int retval;
|
||||
|
||||
retval = compat_get_user_cpu_mask(user_mask_ptr, len, &new_mask);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
return sched_setaffinity(pid, new_mask);
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
|
||||
compat_ulong_t __user *user_mask_ptr)
|
||||
{
|
||||
int ret;
|
||||
cpumask_t mask;
|
||||
unsigned long *k;
|
||||
unsigned int min_length = sizeof(cpumask_t);
|
||||
|
||||
if (NR_CPUS <= BITS_PER_COMPAT_LONG)
|
||||
min_length = sizeof(compat_ulong_t);
|
||||
|
||||
if (len < min_length)
|
||||
return -EINVAL;
|
||||
|
||||
ret = sched_getaffinity(pid, &mask);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
k = cpus_addr(mask);
|
||||
ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return min_length;
|
||||
}
|
||||
|
||||
static int get_compat_itimerspec(struct itimerspec *dst,
|
||||
struct compat_itimerspec __user *src)
|
||||
{
|
||||
if (get_compat_timespec(&dst->it_interval, &src->it_interval) ||
|
||||
get_compat_timespec(&dst->it_value, &src->it_value))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int put_compat_itimerspec(struct compat_itimerspec __user *dst,
|
||||
struct itimerspec *src)
|
||||
{
|
||||
if (put_compat_timespec(&src->it_interval, &dst->it_interval) ||
|
||||
put_compat_timespec(&src->it_value, &dst->it_value))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
long compat_sys_timer_settime(timer_t timer_id, int flags,
|
||||
struct compat_itimerspec __user *new,
|
||||
struct compat_itimerspec __user *old)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct itimerspec newts, oldts;
|
||||
|
||||
if (!new)
|
||||
return -EINVAL;
|
||||
if (get_compat_itimerspec(&newts, new))
|
||||
return -EFAULT;
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_timer_settime(timer_id, flags,
|
||||
(struct itimerspec __user *) &newts,
|
||||
(struct itimerspec __user *) &oldts);
|
||||
set_fs(oldfs);
|
||||
if (!err && old && put_compat_itimerspec(old, &oldts))
|
||||
return -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
long compat_sys_timer_gettime(timer_t timer_id,
|
||||
struct compat_itimerspec __user *setting)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct itimerspec ts;
|
||||
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_timer_gettime(timer_id,
|
||||
(struct itimerspec __user *) &ts);
|
||||
set_fs(oldfs);
|
||||
if (!err && put_compat_itimerspec(setting, &ts))
|
||||
return -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
long compat_sys_clock_settime(clockid_t which_clock,
|
||||
struct compat_timespec __user *tp)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct timespec ts;
|
||||
|
||||
if (get_compat_timespec(&ts, tp))
|
||||
return -EFAULT;
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_clock_settime(which_clock,
|
||||
(struct timespec __user *) &ts);
|
||||
set_fs(oldfs);
|
||||
return err;
|
||||
}
|
||||
|
||||
long compat_sys_clock_gettime(clockid_t which_clock,
|
||||
struct compat_timespec __user *tp)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct timespec ts;
|
||||
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_clock_gettime(which_clock,
|
||||
(struct timespec __user *) &ts);
|
||||
set_fs(oldfs);
|
||||
if (!err && put_compat_timespec(&ts, tp))
|
||||
return -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
long compat_sys_clock_getres(clockid_t which_clock,
|
||||
struct compat_timespec __user *tp)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct timespec ts;
|
||||
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_clock_getres(which_clock,
|
||||
(struct timespec __user *) &ts);
|
||||
set_fs(oldfs);
|
||||
if (!err && tp && put_compat_timespec(&ts, tp))
|
||||
return -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
|
||||
struct compat_timespec __user *rqtp,
|
||||
struct compat_timespec __user *rmtp)
|
||||
{
|
||||
long err;
|
||||
mm_segment_t oldfs;
|
||||
struct timespec in, out;
|
||||
|
||||
if (get_compat_timespec(&in, rqtp))
|
||||
return -EFAULT;
|
||||
|
||||
oldfs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
err = sys_clock_nanosleep(which_clock, flags,
|
||||
(struct timespec __user *) &in,
|
||||
(struct timespec __user *) &out);
|
||||
set_fs(oldfs);
|
||||
if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
|
||||
put_compat_timespec(&out, rmtp))
|
||||
return -EFAULT;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* We currently only need the following fields from the sigevent
|
||||
* structure: sigev_value, sigev_signo, sig_notify and (sometimes
|
||||
* sigev_notify_thread_id). The others are handled in user mode.
|
||||
* We also assume that copying sigev_value.sival_int is sufficient
|
||||
* to keep all the bits of sigev_value.sival_ptr intact.
|
||||
*/
|
||||
int get_compat_sigevent(struct sigevent *event,
|
||||
const struct compat_sigevent __user *u_event)
|
||||
{
|
||||
memset(&event, 0, sizeof(*event));
|
||||
return (!access_ok(VERIFY_READ, u_event, sizeof(*u_event)) ||
|
||||
__get_user(event->sigev_value.sival_int,
|
||||
&u_event->sigev_value.sival_int) ||
|
||||
__get_user(event->sigev_signo, &u_event->sigev_signo) ||
|
||||
__get_user(event->sigev_notify, &u_event->sigev_notify) ||
|
||||
__get_user(event->sigev_notify_thread_id,
|
||||
&u_event->sigev_notify_thread_id))
|
||||
? -EFAULT : 0;
|
||||
}
|
||||
|
||||
/* timer_create is architecture specific because it needs sigevent conversion */
|
||||
|
||||
long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
|
||||
unsigned long bitmap_size)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long m;
|
||||
compat_ulong_t um;
|
||||
unsigned long nr_compat_longs;
|
||||
|
||||
/* align bitmap up to nearest compat_long_t boundary */
|
||||
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
|
||||
|
||||
if (!access_ok(VERIFY_READ, umask, bitmap_size / 8))
|
||||
return -EFAULT;
|
||||
|
||||
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
|
||||
|
||||
for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
|
||||
m = 0;
|
||||
|
||||
for (j = 0; j < sizeof(m)/sizeof(um); j++) {
|
||||
/*
|
||||
* We dont want to read past the end of the userspace
|
||||
* bitmap. We must however ensure the end of the
|
||||
* kernel bitmap is zeroed.
|
||||
*/
|
||||
if (nr_compat_longs-- > 0) {
|
||||
if (__get_user(um, umask))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
um = 0;
|
||||
}
|
||||
|
||||
umask++;
|
||||
m |= (long)um << (j * BITS_PER_COMPAT_LONG);
|
||||
}
|
||||
*mask++ = m;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
|
||||
unsigned long bitmap_size)
|
||||
{
|
||||
int i, j;
|
||||
unsigned long m;
|
||||
compat_ulong_t um;
|
||||
unsigned long nr_compat_longs;
|
||||
|
||||
/* align bitmap up to nearest compat_long_t boundary */
|
||||
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, umask, bitmap_size / 8))
|
||||
return -EFAULT;
|
||||
|
||||
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
|
||||
|
||||
for (i = 0; i < BITS_TO_LONGS(bitmap_size); i++) {
|
||||
m = *mask++;
|
||||
|
||||
for (j = 0; j < sizeof(m)/sizeof(um); j++) {
|
||||
um = m;
|
||||
|
||||
/*
|
||||
* We dont want to write past the end of the userspace
|
||||
* bitmap.
|
||||
*/
|
||||
if (nr_compat_longs-- > 0) {
|
||||
if (__put_user(um, umask))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
umask++;
|
||||
m >>= 4*sizeof(um);
|
||||
m >>= 4*sizeof(um);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
|
||||
{
|
||||
switch (_NSIG_WORDS) {
|
||||
#if defined (__COMPAT_ENDIAN_SWAP__)
|
||||
case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
|
||||
case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
|
||||
case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
|
||||
case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
|
||||
#else
|
||||
case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
|
||||
case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
|
||||
case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
|
||||
case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
|
||||
struct compat_siginfo __user *uinfo,
|
||||
struct compat_timespec __user *uts, compat_size_t sigsetsize)
|
||||
{
|
||||
compat_sigset_t s32;
|
||||
sigset_t s;
|
||||
int sig;
|
||||
struct timespec t;
|
||||
siginfo_t info;
|
||||
long ret, timeout = 0;
|
||||
|
||||
if (sigsetsize != sizeof(sigset_t))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
|
||||
return -EFAULT;
|
||||
sigset_from_compat(&s, &s32);
|
||||
sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
|
||||
signotset(&s);
|
||||
|
||||
if (uts) {
|
||||
if (get_compat_timespec (&t, uts))
|
||||
return -EFAULT;
|
||||
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
|
||||
|| t.tv_sec < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
sig = dequeue_signal(current, &s, &info);
|
||||
if (!sig) {
|
||||
timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
if (uts)
|
||||
timeout = timespec_to_jiffies(&t)
|
||||
+(t.tv_sec || t.tv_nsec);
|
||||
if (timeout) {
|
||||
current->real_blocked = current->blocked;
|
||||
sigandsets(¤t->blocked, ¤t->blocked, &s);
|
||||
|
||||
recalc_sigpending();
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
timeout = schedule_timeout(timeout);
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
sig = dequeue_signal(current, &s, &info);
|
||||
current->blocked = current->real_blocked;
|
||||
siginitset(¤t->real_blocked, 0);
|
||||
recalc_sigpending();
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
if (sig) {
|
||||
ret = sig;
|
||||
if (uinfo) {
|
||||
if (copy_siginfo_to_user32(uinfo, &info))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}else {
|
||||
ret = timeout?-EINTR:-EAGAIN;
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#ifdef __ARCH_WANT_COMPAT_SYS_TIME
|
||||
|
||||
/* compat_time_t is a 32 bit "long" and needs to get converted. */
|
||||
|
||||
asmlinkage long compat_sys_time(compat_time_t __user * tloc)
|
||||
{
|
||||
compat_time_t i;
|
||||
struct timeval tv;
|
||||
|
||||
do_gettimeofday(&tv);
|
||||
i = tv.tv_sec;
|
||||
|
||||
if (tloc) {
|
||||
if (put_user(i,tloc))
|
||||
i = -EFAULT;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_stime(compat_time_t __user *tptr)
|
||||
{
|
||||
struct timespec tv;
|
||||
int err;
|
||||
|
||||
if (get_user(tv.tv_sec, tptr))
|
||||
return -EFAULT;
|
||||
|
||||
tv.tv_nsec = 0;
|
||||
|
||||
err = security_settime(&tv, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
do_settimeofday(&tv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* __ARCH_WANT_COMPAT_SYS_TIME */
|
||||
118
kernel/configs.c
Normal file
118
kernel/configs.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* kernel/configs.c
|
||||
* Echo the kernel .config file used to build the kernel
|
||||
*
|
||||
* Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
|
||||
* Copyright (C) 2002 Randy Dunlap <rddunlap@osdl.org>
|
||||
* Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
|
||||
* Copyright (C) 2002 Hewlett-Packard Company
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or (at
|
||||
* your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/**************************************************/
|
||||
/* the actual current config file */
|
||||
|
||||
/*
|
||||
* Define kernel_config_data and kernel_config_data_size, which contains the
|
||||
* wrapped and compressed configuration file. The file is first compressed
|
||||
* with gzip and then bounded by two eight byte magic numbers to allow
|
||||
* extraction from a binary kernel image:
|
||||
*
|
||||
* IKCFG_ST
|
||||
* <image>
|
||||
* IKCFG_ED
|
||||
*/
|
||||
#define MAGIC_START "IKCFG_ST"
|
||||
#define MAGIC_END "IKCFG_ED"
|
||||
#include "config_data.h"
|
||||
|
||||
|
||||
#define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
|
||||
#define kernel_config_data_size \
|
||||
(sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
|
||||
|
||||
#ifdef CONFIG_IKCONFIG_PROC
|
||||
|
||||
/**************************************************/
|
||||
/* globals and useful constants */
|
||||
|
||||
static ssize_t
|
||||
ikconfig_read_current(struct file *file, char __user *buf,
|
||||
size_t len, loff_t * offset)
|
||||
{
|
||||
loff_t pos = *offset;
|
||||
ssize_t count;
|
||||
|
||||
if (pos >= kernel_config_data_size)
|
||||
return 0;
|
||||
|
||||
count = min(len, (size_t)(kernel_config_data_size - pos));
|
||||
if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count))
|
||||
return -EFAULT;
|
||||
|
||||
*offset += count;
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct file_operations ikconfig_file_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.read = ikconfig_read_current,
|
||||
};
|
||||
|
||||
/***************************************************/
|
||||
/* ikconfig_init: start up everything we need to */
|
||||
|
||||
static int __init ikconfig_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create the current config file */
|
||||
entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
|
||||
&proc_root);
|
||||
if (!entry)
|
||||
return -ENOMEM;
|
||||
|
||||
entry->proc_fops = &ikconfig_file_ops;
|
||||
entry->size = kernel_config_data_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/***************************************************/
|
||||
/* ikconfig_cleanup: clean up our mess */
|
||||
|
||||
static void __exit ikconfig_cleanup(void)
|
||||
{
|
||||
remove_proc_entry("config.gz", &proc_root);
|
||||
}
|
||||
|
||||
module_init(ikconfig_init);
|
||||
module_exit(ikconfig_cleanup);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Randy Dunlap");
|
||||
MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
|
||||
|
||||
#endif /* CONFIG_IKCONFIG_PROC */
|
||||
193
kernel/cpu.c
Normal file
193
kernel/cpu.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/* CPU control.
|
||||
* (C) 2001, 2002, 2003, 2004 Rusty Russell
|
||||
*
|
||||
* This code is licenced under the GPL.
|
||||
*/
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
/* This protects CPUs going up and down... */
|
||||
DECLARE_MUTEX(cpucontrol);
|
||||
|
||||
static struct notifier_block *cpu_chain;
|
||||
|
||||
/* Need to know about CPUs going up/down? */
|
||||
int register_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if ((ret = down_interruptible(&cpucontrol)) != 0)
|
||||
return ret;
|
||||
ret = notifier_chain_register(&cpu_chain, nb);
|
||||
up(&cpucontrol);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(register_cpu_notifier);
|
||||
|
||||
void unregister_cpu_notifier(struct notifier_block *nb)
|
||||
{
|
||||
down(&cpucontrol);
|
||||
notifier_chain_unregister(&cpu_chain, nb);
|
||||
up(&cpucontrol);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_cpu_notifier);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static inline void check_for_tasks(int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
for_each_process(p) {
|
||||
if (task_cpu(p) == cpu &&
|
||||
(!cputime_eq(p->utime, cputime_zero) ||
|
||||
!cputime_eq(p->stime, cputime_zero)))
|
||||
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
|
||||
(state = %ld, flags = %lx) \n",
|
||||
p->comm, p->pid, cpu, p->state, p->flags);
|
||||
}
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
}
|
||||
|
||||
/* Take this CPU down. */
|
||||
static int take_cpu_down(void *unused)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Take offline: makes arch_cpu_down somewhat easier. */
|
||||
cpu_clear(smp_processor_id(), cpu_online_map);
|
||||
|
||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||
err = __cpu_disable();
|
||||
if (err < 0)
|
||||
cpu_set(smp_processor_id(), cpu_online_map);
|
||||
else
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *p;
|
||||
cpumask_t old_allowed, tmp;
|
||||
|
||||
if ((err = lock_cpu_hotplug_interruptible()) != 0)
|
||||
return err;
|
||||
|
||||
if (num_online_cpus() == 1) {
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!cpu_online(cpu)) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
|
||||
(void *)(long)cpu);
|
||||
if (err == NOTIFY_BAD) {
|
||||
printk("%s: attempt to take down CPU %u failed\n",
|
||||
__FUNCTION__, cpu);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Ensure that we are not runnable on dying cpu */
|
||||
old_allowed = current->cpus_allowed;
|
||||
tmp = CPU_MASK_ALL;
|
||||
cpu_clear(cpu, tmp);
|
||||
set_cpus_allowed(current, tmp);
|
||||
|
||||
p = __stop_machine_run(take_cpu_down, NULL, cpu);
|
||||
if (IS_ERR(p)) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
if (notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
|
||||
(void *)(long)cpu) == NOTIFY_BAD)
|
||||
BUG();
|
||||
|
||||
err = PTR_ERR(p);
|
||||
goto out_allowed;
|
||||
}
|
||||
|
||||
if (cpu_online(cpu))
|
||||
goto out_thread;
|
||||
|
||||
/* Wait for it to sleep (leaving idle task). */
|
||||
while (!idle_cpu(cpu))
|
||||
yield();
|
||||
|
||||
/* This actually kills the CPU. */
|
||||
__cpu_die(cpu);
|
||||
|
||||
/* Move it here so it can run. */
|
||||
kthread_bind(p, get_cpu());
|
||||
put_cpu();
|
||||
|
||||
/* CPU is completely dead: tell everyone. Too late to complain. */
|
||||
if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
|
||||
== NOTIFY_BAD)
|
||||
BUG();
|
||||
|
||||
check_for_tasks(cpu);
|
||||
|
||||
out_thread:
|
||||
err = kthread_stop(p);
|
||||
out_allowed:
|
||||
set_cpus_allowed(current, old_allowed);
|
||||
out:
|
||||
unlock_cpu_hotplug();
|
||||
return err;
|
||||
}
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
|
||||
int __devinit cpu_up(unsigned int cpu)
|
||||
{
|
||||
int ret;
|
||||
void *hcpu = (void *)(long)cpu;
|
||||
|
||||
if ((ret = down_interruptible(&cpucontrol)) != 0)
|
||||
return ret;
|
||||
|
||||
if (cpu_online(cpu) || !cpu_present(cpu)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
|
||||
if (ret == NOTIFY_BAD) {
|
||||
printk("%s: attempt to bring up CPU %u failed\n",
|
||||
__FUNCTION__, cpu);
|
||||
ret = -EINVAL;
|
||||
goto out_notify;
|
||||
}
|
||||
|
||||
/* Arch-specific enabling code. */
|
||||
ret = __cpu_up(cpu);
|
||||
if (ret != 0)
|
||||
goto out_notify;
|
||||
if (!cpu_online(cpu))
|
||||
BUG();
|
||||
|
||||
/* Now call notifier in preparation. */
|
||||
notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
|
||||
|
||||
out_notify:
|
||||
if (ret != 0)
|
||||
notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu);
|
||||
out:
|
||||
up(&cpucontrol);
|
||||
return ret;
|
||||
}
|
||||
1564
kernel/cpuset.c
Normal file
1564
kernel/cpuset.c
Normal file
File diff suppressed because it is too large
Load Diff
158
kernel/dma.c
Normal file
158
kernel/dma.c
Normal file
@@ -0,0 +1,158 @@
|
||||
/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $
|
||||
* linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
|
||||
*
|
||||
* Written by Hennus Bergman, 1992.
|
||||
*
|
||||
* 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma.
|
||||
* In the previous version the reported device could end up being wrong,
|
||||
* if a device requested a DMA channel that was already in use.
|
||||
* [It also happened to remove the sizeof(char *) == sizeof(int)
|
||||
* assumption introduced because of those /proc/dma patches. -- Hennus]
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/system.h>
|
||||
|
||||
|
||||
|
||||
/* A note on resource allocation:
|
||||
*
|
||||
* All drivers needing DMA channels, should allocate and release them
|
||||
* through the public routines `request_dma()' and `free_dma()'.
|
||||
*
|
||||
* In order to avoid problems, all processes should allocate resources in
|
||||
* the same sequence and release them in the reverse order.
|
||||
*
|
||||
* So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
|
||||
* When releasing them, first release the DMA, then release the IRQ.
|
||||
* If you don't, you may cause allocation requests to fail unnecessarily.
|
||||
* This doesn't really matter now, but it will once we get real semaphores
|
||||
* in the kernel.
|
||||
*/
|
||||
|
||||
|
||||
DEFINE_SPINLOCK(dma_spin_lock);
|
||||
|
||||
/*
|
||||
* If our port doesn't define this it has no PC like DMA
|
||||
*/
|
||||
|
||||
#ifdef MAX_DMA_CHANNELS
|
||||
|
||||
|
||||
/* Channel n is busy iff dma_chan_busy[n].lock != 0.
|
||||
* DMA0 used to be reserved for DRAM refresh, but apparently not any more...
|
||||
* DMA4 is reserved for cascading.
|
||||
*/
|
||||
|
||||
struct dma_chan {
|
||||
int lock;
|
||||
const char *device_id;
|
||||
};
|
||||
|
||||
static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
|
||||
[4] = { 1, "cascade" },
|
||||
};
|
||||
|
||||
|
||||
int request_dma(unsigned int dmanr, const char * device_id)
|
||||
{
|
||||
if (dmanr >= MAX_DMA_CHANNELS)
|
||||
return -EINVAL;
|
||||
|
||||
if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0)
|
||||
return -EBUSY;
|
||||
|
||||
dma_chan_busy[dmanr].device_id = device_id;
|
||||
|
||||
/* old flag was 0, now contains 1 to indicate busy */
|
||||
return 0;
|
||||
} /* request_dma */
|
||||
|
||||
|
||||
void free_dma(unsigned int dmanr)
|
||||
{
|
||||
if (dmanr >= MAX_DMA_CHANNELS) {
|
||||
printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
|
||||
return;
|
||||
}
|
||||
|
||||
if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) {
|
||||
printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
|
||||
return;
|
||||
}
|
||||
|
||||
} /* free_dma */
|
||||
|
||||
#else
|
||||
|
||||
int request_dma(unsigned int dmanr, const char *device_id)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void free_dma(unsigned int dmanr)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
#ifdef MAX_DMA_CHANNELS
|
||||
static int proc_dma_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
|
||||
if (dma_chan_busy[i].lock) {
|
||||
seq_printf(m, "%2d: %s\n", i,
|
||||
dma_chan_busy[i].device_id);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int proc_dma_show(struct seq_file *m, void *v)
|
||||
{
|
||||
seq_puts(m, "No DMA\n");
|
||||
return 0;
|
||||
}
|
||||
#endif /* MAX_DMA_CHANNELS */
|
||||
|
||||
static int proc_dma_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, proc_dma_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations proc_dma_operations = {
|
||||
.open = proc_dma_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init proc_dma_init(void)
|
||||
{
|
||||
struct proc_dir_entry *e;
|
||||
|
||||
e = create_proc_entry("dma", 0, NULL);
|
||||
if (e)
|
||||
e->proc_fops = &proc_dma_operations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(proc_dma_init);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(request_dma);
|
||||
EXPORT_SYMBOL(free_dma);
|
||||
EXPORT_SYMBOL(dma_spin_lock);
|
||||
209
kernel/exec_domain.c
Normal file
209
kernel/exec_domain.c
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Handling of different ABIs (personalities).
|
||||
*
|
||||
* We group personalities into execution domains which have their
|
||||
* own handlers for kernel entry points, signal mapping, etc...
|
||||
*
|
||||
* 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org)
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
|
||||
static void default_handler(int, struct pt_regs *);
|
||||
|
||||
static struct exec_domain *exec_domains = &default_exec_domain;
|
||||
static DEFINE_RWLOCK(exec_domains_lock);
|
||||
|
||||
|
||||
static u_long ident_map[32] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31
|
||||
};
|
||||
|
||||
struct exec_domain default_exec_domain = {
|
||||
.name = "Linux", /* name */
|
||||
.handler = default_handler, /* lcall7 causes a seg fault. */
|
||||
.pers_low = 0, /* PER_LINUX personality. */
|
||||
.pers_high = 0, /* PER_LINUX personality. */
|
||||
.signal_map = ident_map, /* Identity map signals. */
|
||||
.signal_invmap = ident_map, /* - both ways. */
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
default_handler(int segment, struct pt_regs *regp)
|
||||
{
|
||||
set_personality(0);
|
||||
|
||||
if (current_thread_info()->exec_domain->handler != default_handler)
|
||||
current_thread_info()->exec_domain->handler(segment, regp);
|
||||
else
|
||||
send_sig(SIGSEGV, current, 1);
|
||||
}
|
||||
|
||||
static struct exec_domain *
|
||||
lookup_exec_domain(u_long personality)
|
||||
{
|
||||
struct exec_domain * ep;
|
||||
u_long pers = personality(personality);
|
||||
|
||||
read_lock(&exec_domains_lock);
|
||||
for (ep = exec_domains; ep; ep = ep->next) {
|
||||
if (pers >= ep->pers_low && pers <= ep->pers_high)
|
||||
if (try_module_get(ep->module))
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KMOD
|
||||
read_unlock(&exec_domains_lock);
|
||||
request_module("personality-%ld", pers);
|
||||
read_lock(&exec_domains_lock);
|
||||
|
||||
for (ep = exec_domains; ep; ep = ep->next) {
|
||||
if (pers >= ep->pers_low && pers <= ep->pers_high)
|
||||
if (try_module_get(ep->module))
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
ep = &default_exec_domain;
|
||||
out:
|
||||
read_unlock(&exec_domains_lock);
|
||||
return (ep);
|
||||
}
|
||||
|
||||
int
|
||||
register_exec_domain(struct exec_domain *ep)
|
||||
{
|
||||
struct exec_domain *tmp;
|
||||
int err = -EBUSY;
|
||||
|
||||
if (ep == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (ep->next != NULL)
|
||||
return -EBUSY;
|
||||
|
||||
write_lock(&exec_domains_lock);
|
||||
for (tmp = exec_domains; tmp; tmp = tmp->next) {
|
||||
if (tmp == ep)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ep->next = exec_domains;
|
||||
exec_domains = ep;
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
write_unlock(&exec_domains_lock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
unregister_exec_domain(struct exec_domain *ep)
|
||||
{
|
||||
struct exec_domain **epp;
|
||||
|
||||
epp = &exec_domains;
|
||||
write_lock(&exec_domains_lock);
|
||||
for (epp = &exec_domains; *epp; epp = &(*epp)->next) {
|
||||
if (ep == *epp)
|
||||
goto unregister;
|
||||
}
|
||||
write_unlock(&exec_domains_lock);
|
||||
return -EINVAL;
|
||||
|
||||
unregister:
|
||||
*epp = ep->next;
|
||||
ep->next = NULL;
|
||||
write_unlock(&exec_domains_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
__set_personality(u_long personality)
|
||||
{
|
||||
struct exec_domain *ep, *oep;
|
||||
|
||||
ep = lookup_exec_domain(personality);
|
||||
if (ep == current_thread_info()->exec_domain) {
|
||||
current->personality = personality;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (atomic_read(¤t->fs->count) != 1) {
|
||||
struct fs_struct *fsp, *ofsp;
|
||||
|
||||
fsp = copy_fs_struct(current->fs);
|
||||
if (fsp == NULL) {
|
||||
module_put(ep->module);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
task_lock(current);
|
||||
ofsp = current->fs;
|
||||
current->fs = fsp;
|
||||
task_unlock(current);
|
||||
|
||||
put_fs_struct(ofsp);
|
||||
}
|
||||
|
||||
/*
|
||||
* At that point we are guaranteed to be the sole owner of
|
||||
* current->fs.
|
||||
*/
|
||||
|
||||
current->personality = personality;
|
||||
oep = current_thread_info()->exec_domain;
|
||||
current_thread_info()->exec_domain = ep;
|
||||
set_fs_altroot();
|
||||
|
||||
module_put(oep->module);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
get_exec_domain_list(char *page)
|
||||
{
|
||||
struct exec_domain *ep;
|
||||
int len = 0;
|
||||
|
||||
read_lock(&exec_domains_lock);
|
||||
for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next)
|
||||
len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n",
|
||||
ep->pers_low, ep->pers_high, ep->name,
|
||||
module_name(ep->module));
|
||||
read_unlock(&exec_domains_lock);
|
||||
return (len);
|
||||
}
|
||||
|
||||
asmlinkage long
|
||||
sys_personality(u_long personality)
|
||||
{
|
||||
u_long old = current->personality;
|
||||
|
||||
if (personality != 0xffffffff) {
|
||||
set_personality(personality);
|
||||
if (current->personality != personality)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return (long)old;
|
||||
}
|
||||
|
||||
|
||||
EXPORT_SYMBOL(register_exec_domain);
|
||||
EXPORT_SYMBOL(unregister_exec_domain);
|
||||
EXPORT_SYMBOL(__set_personality);
|
||||
1527
kernel/exit.c
Normal file
1527
kernel/exit.c
Normal file
File diff suppressed because it is too large
Load Diff
67
kernel/extable.c
Normal file
67
kernel/extable.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/* Rewritten by Rusty Russell, on the backs of many others...
|
||||
Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
extern struct exception_table_entry __start___ex_table[];
|
||||
extern struct exception_table_entry __stop___ex_table[];
|
||||
|
||||
/* Sort the kernel's built-in exception table */
|
||||
void __init sort_main_extable(void)
|
||||
{
|
||||
sort_extable(__start___ex_table, __stop___ex_table);
|
||||
}
|
||||
|
||||
/* Given an address, look for it in the exception tables. */
|
||||
const struct exception_table_entry *search_exception_tables(unsigned long addr)
|
||||
{
|
||||
const struct exception_table_entry *e;
|
||||
|
||||
e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
|
||||
if (!e)
|
||||
e = search_module_extables(addr);
|
||||
return e;
|
||||
}
|
||||
|
||||
static int core_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext &&
|
||||
addr <= (unsigned long)_etext)
|
||||
return 1;
|
||||
|
||||
if (addr >= (unsigned long)_sinittext &&
|
||||
addr <= (unsigned long)_einittext)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kernel_text_address(unsigned long addr)
|
||||
{
|
||||
if (core_kernel_text(addr))
|
||||
return 1;
|
||||
return __module_text_address(addr) != NULL;
|
||||
}
|
||||
|
||||
int kernel_text_address(unsigned long addr)
|
||||
{
|
||||
if (core_kernel_text(addr))
|
||||
return 1;
|
||||
return module_text_address(addr) != NULL;
|
||||
}
|
||||
1274
kernel/fork.c
Normal file
1274
kernel/fork.c
Normal file
File diff suppressed because it is too large
Load Diff
798
kernel/futex.c
Normal file
798
kernel/futex.c
Normal file
@@ -0,0 +1,798 @@
|
||||
/*
|
||||
* Fast Userspace Mutexes (which I call "Futexes!").
|
||||
* (C) Rusty Russell, IBM 2002
|
||||
*
|
||||
* Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
|
||||
* (C) Copyright 2003 Red Hat Inc, All Rights Reserved
|
||||
*
|
||||
* Removed page pinning, fix privately mapped COW pages and other cleanups
|
||||
* (C) Copyright 2003, 2004 Jamie Lokier
|
||||
*
|
||||
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
|
||||
* enough at me, Linus for the original (flawed) idea, Matthew
|
||||
* Kirkwood for proof-of-concept implementation.
|
||||
*
|
||||
* "The futexes are also cursed."
|
||||
* "But they come in a choice of three flavours!"
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/futex.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
|
||||
|
||||
/*
|
||||
* Futexes are matched on equal values of this key.
|
||||
* The key type depends on whether it's a shared or private mapping.
|
||||
* Don't rearrange members without looking at hash_futex().
|
||||
*
|
||||
* offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
|
||||
* We set bit 0 to indicate if it's an inode-based key.
|
||||
*/
|
||||
union futex_key {
|
||||
struct {
|
||||
unsigned long pgoff;
|
||||
struct inode *inode;
|
||||
int offset;
|
||||
} shared;
|
||||
struct {
|
||||
unsigned long uaddr;
|
||||
struct mm_struct *mm;
|
||||
int offset;
|
||||
} private;
|
||||
struct {
|
||||
unsigned long word;
|
||||
void *ptr;
|
||||
int offset;
|
||||
} both;
|
||||
};
|
||||
|
||||
/*
|
||||
* We use this hashed waitqueue instead of a normal wait_queue_t, so
|
||||
* we can wake only the relevant ones (hashed queues may be shared).
|
||||
*
|
||||
* A futex_q has a woken state, just like tasks have TASK_RUNNING.
|
||||
* It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
|
||||
* The order of wakup is always to make the first condition true, then
|
||||
* wake up q->waiters, then make the second condition true.
|
||||
*/
|
||||
struct futex_q {
|
||||
struct list_head list;
|
||||
wait_queue_head_t waiters;
|
||||
|
||||
/* Which hash list lock to use. */
|
||||
spinlock_t *lock_ptr;
|
||||
|
||||
/* Key which the futex is hashed on. */
|
||||
union futex_key key;
|
||||
|
||||
/* For fd, sigio sent using these. */
|
||||
int fd;
|
||||
struct file *filp;
|
||||
};
|
||||
|
||||
/*
|
||||
* Split the global futex_lock into every hash list lock.
|
||||
*/
|
||||
struct futex_hash_bucket {
|
||||
spinlock_t lock;
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
|
||||
|
||||
/* Futex-fs vfsmount entry: */
|
||||
static struct vfsmount *futex_mnt;
|
||||
|
||||
/*
|
||||
* We hash on the keys returned from get_futex_key (see below).
|
||||
*/
|
||||
static struct futex_hash_bucket *hash_futex(union futex_key *key)
|
||||
{
|
||||
u32 hash = jhash2((u32*)&key->both.word,
|
||||
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
|
||||
key->both.offset);
|
||||
return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 1 if two futex_keys are equal, 0 otherwise.
|
||||
*/
|
||||
static inline int match_futex(union futex_key *key1, union futex_key *key2)
|
||||
{
|
||||
return (key1->both.word == key2->both.word
|
||||
&& key1->both.ptr == key2->both.ptr
|
||||
&& key1->both.offset == key2->both.offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get parameters which are the keys for a futex.
|
||||
*
|
||||
* For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode,
|
||||
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
||||
* We can usually work out the index without swapping in the page.
|
||||
*
|
||||
* Returns: 0, or negative error code.
|
||||
* The key words are stored in *key on success.
|
||||
*
|
||||
* Should be called with ¤t->mm->mmap_sem but NOT any spinlocks.
|
||||
*/
|
||||
static int get_futex_key(unsigned long uaddr, union futex_key *key)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
struct page *page;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* The futex address must be "naturally" aligned.
|
||||
*/
|
||||
key->both.offset = uaddr % PAGE_SIZE;
|
||||
if (unlikely((key->both.offset % sizeof(u32)) != 0))
|
||||
return -EINVAL;
|
||||
uaddr -= key->both.offset;
|
||||
|
||||
/*
|
||||
* The futex is hashed differently depending on whether
|
||||
* it's in a shared or private mapping. So check vma first.
|
||||
*/
|
||||
vma = find_extend_vma(mm, uaddr);
|
||||
if (unlikely(!vma))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Permissions.
|
||||
*/
|
||||
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
|
||||
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
|
||||
|
||||
/*
|
||||
* Private mappings are handled in a simple way.
|
||||
*
|
||||
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
|
||||
* it's a read-only handle, it's expected that futexes attach to
|
||||
* the object not the particular process. Therefore we use
|
||||
* VM_MAYSHARE here, not VM_SHARED which is restricted to shared
|
||||
* mappings of _writable_ handles.
|
||||
*/
|
||||
if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
|
||||
key->private.mm = mm;
|
||||
key->private.uaddr = uaddr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Linear file mappings are also simple.
|
||||
*/
|
||||
key->shared.inode = vma->vm_file->f_dentry->d_inode;
|
||||
key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
|
||||
if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
|
||||
key->shared.pgoff = (((uaddr - vma->vm_start) >> PAGE_SHIFT)
|
||||
+ vma->vm_pgoff);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We could walk the page table to read the non-linear
|
||||
* pte, and get the page index without fetching the page
|
||||
* from swap. But that's a lot of code to duplicate here
|
||||
* for a rare case, so we simply fetch the page.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Do a quick atomic lookup first - this is the fastpath.
|
||||
*/
|
||||
spin_lock(¤t->mm->page_table_lock);
|
||||
page = follow_page(mm, uaddr, 0);
|
||||
if (likely(page != NULL)) {
|
||||
key->shared.pgoff =
|
||||
page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
spin_unlock(¤t->mm->page_table_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(¤t->mm->page_table_lock);
|
||||
|
||||
/*
|
||||
* Do it the general way.
|
||||
*/
|
||||
err = get_user_pages(current, mm, uaddr, 1, 0, 0, &page, NULL);
|
||||
if (err >= 0) {
|
||||
key->shared.pgoff =
|
||||
page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
put_page(page);
|
||||
return 0;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a reference to the resource addressed by a key.
|
||||
* Can be called while holding spinlocks.
|
||||
*
|
||||
* NOTE: mmap_sem MUST be held between get_futex_key() and calling this
|
||||
* function, if it is called at all. mmap_sem keeps key->shared.inode valid.
|
||||
*/
|
||||
static inline void get_key_refs(union futex_key *key)
|
||||
{
|
||||
if (key->both.ptr != 0) {
|
||||
if (key->both.offset & 1)
|
||||
atomic_inc(&key->shared.inode->i_count);
|
||||
else
|
||||
atomic_inc(&key->private.mm->mm_count);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop a reference to the resource addressed by a key.
|
||||
* The hash bucket spinlock must not be held.
|
||||
*/
|
||||
static void drop_key_refs(union futex_key *key)
|
||||
{
|
||||
if (key->both.ptr != 0) {
|
||||
if (key->both.offset & 1)
|
||||
iput(key->shared.inode);
|
||||
else
|
||||
mmdrop(key->private.mm);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_futex_value_locked(int *dest, int __user *from)
|
||||
{
|
||||
int ret;
|
||||
|
||||
inc_preempt_count();
|
||||
ret = __copy_from_user_inatomic(dest, from, sizeof(int));
|
||||
dec_preempt_count();
|
||||
|
||||
return ret ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The hash bucket lock must be held when this is called.
|
||||
* Afterwards, the futex_q must not be accessed.
|
||||
*/
|
||||
static void wake_futex(struct futex_q *q)
|
||||
{
|
||||
list_del_init(&q->list);
|
||||
if (q->filp)
|
||||
send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
|
||||
/*
|
||||
* The lock in wake_up_all() is a crucial memory barrier after the
|
||||
* list_del_init() and also before assigning to q->lock_ptr.
|
||||
*/
|
||||
wake_up_all(&q->waiters);
|
||||
/*
|
||||
* The waiting task can free the futex_q as soon as this is written,
|
||||
* without taking any locks. This must come last.
|
||||
*/
|
||||
q->lock_ptr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up all waiters hashed on the physical page that is mapped
|
||||
* to this virtual address:
|
||||
*/
|
||||
static int futex_wake(unsigned long uaddr, int nr_wake)
|
||||
{
|
||||
union futex_key key;
|
||||
struct futex_hash_bucket *bh;
|
||||
struct list_head *head;
|
||||
struct futex_q *this, *next;
|
||||
int ret;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_futex_key(uaddr, &key);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
bh = hash_futex(&key);
|
||||
spin_lock(&bh->lock);
|
||||
head = &bh->chain;
|
||||
|
||||
list_for_each_entry_safe(this, next, head, list) {
|
||||
if (match_futex (&this->key, &key)) {
|
||||
wake_futex(this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&bh->lock);
|
||||
out:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue all waiters hashed on one physical page to another
|
||||
* physical page.
|
||||
*/
|
||||
static int futex_requeue(unsigned long uaddr1, unsigned long uaddr2,
|
||||
int nr_wake, int nr_requeue, int *valp)
|
||||
{
|
||||
union futex_key key1, key2;
|
||||
struct futex_hash_bucket *bh1, *bh2;
|
||||
struct list_head *head1;
|
||||
struct futex_q *this, *next;
|
||||
int ret, drop_count = 0;
|
||||
|
||||
retry:
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_futex_key(uaddr1, &key1);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
ret = get_futex_key(uaddr2, &key2);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
bh1 = hash_futex(&key1);
|
||||
bh2 = hash_futex(&key2);
|
||||
|
||||
if (bh1 < bh2)
|
||||
spin_lock(&bh1->lock);
|
||||
spin_lock(&bh2->lock);
|
||||
if (bh1 > bh2)
|
||||
spin_lock(&bh1->lock);
|
||||
|
||||
if (likely(valp != NULL)) {
|
||||
int curval;
|
||||
|
||||
ret = get_futex_value_locked(&curval, (int __user *)uaddr1);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
spin_unlock(&bh1->lock);
|
||||
if (bh1 != bh2)
|
||||
spin_unlock(&bh2->lock);
|
||||
|
||||
/* If we would have faulted, release mmap_sem, fault
|
||||
* it in and start all over again.
|
||||
*/
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_user(curval, (int __user *)uaddr1);
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
|
||||
return ret;
|
||||
}
|
||||
if (curval != *valp) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
head1 = &bh1->chain;
|
||||
list_for_each_entry_safe(this, next, head1, list) {
|
||||
if (!match_futex (&this->key, &key1))
|
||||
continue;
|
||||
if (++ret <= nr_wake) {
|
||||
wake_futex(this);
|
||||
} else {
|
||||
list_move_tail(&this->list, &bh2->chain);
|
||||
this->lock_ptr = &bh2->lock;
|
||||
this->key = key2;
|
||||
get_key_refs(&key2);
|
||||
drop_count++;
|
||||
|
||||
if (ret - nr_wake >= nr_requeue)
|
||||
break;
|
||||
/* Make sure to stop if key1 == key2 */
|
||||
if (head1 == &bh2->chain && head1 != &next->list)
|
||||
head1 = &this->list;
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&bh1->lock);
|
||||
if (bh1 != bh2)
|
||||
spin_unlock(&bh2->lock);
|
||||
|
||||
/* drop_key_refs() must be called outside the spinlocks. */
|
||||
while (--drop_count >= 0)
|
||||
drop_key_refs(&key1);
|
||||
|
||||
out:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* The key must be already stored in q->key. */
|
||||
static inline struct futex_hash_bucket *
|
||||
queue_lock(struct futex_q *q, int fd, struct file *filp)
|
||||
{
|
||||
struct futex_hash_bucket *bh;
|
||||
|
||||
q->fd = fd;
|
||||
q->filp = filp;
|
||||
|
||||
init_waitqueue_head(&q->waiters);
|
||||
|
||||
get_key_refs(&q->key);
|
||||
bh = hash_futex(&q->key);
|
||||
q->lock_ptr = &bh->lock;
|
||||
|
||||
spin_lock(&bh->lock);
|
||||
return bh;
|
||||
}
|
||||
|
||||
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *bh)
|
||||
{
|
||||
list_add_tail(&q->list, &bh->chain);
|
||||
spin_unlock(&bh->lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
queue_unlock(struct futex_q *q, struct futex_hash_bucket *bh)
|
||||
{
|
||||
spin_unlock(&bh->lock);
|
||||
drop_key_refs(&q->key);
|
||||
}
|
||||
|
||||
/*
|
||||
* queue_me and unqueue_me must be called as a pair, each
|
||||
* exactly once. They are called with the hashed spinlock held.
|
||||
*/
|
||||
|
||||
/* The key must be already stored in q->key. */
|
||||
static void queue_me(struct futex_q *q, int fd, struct file *filp)
|
||||
{
|
||||
struct futex_hash_bucket *bh;
|
||||
bh = queue_lock(q, fd, filp);
|
||||
__queue_me(q, bh);
|
||||
}
|
||||
|
||||
/* Return 1 if we were still queued (ie. 0 means we were woken) */
|
||||
static int unqueue_me(struct futex_q *q)
|
||||
{
|
||||
int ret = 0;
|
||||
spinlock_t *lock_ptr;
|
||||
|
||||
/* In the common case we don't take the spinlock, which is nice. */
|
||||
retry:
|
||||
lock_ptr = q->lock_ptr;
|
||||
if (lock_ptr != 0) {
|
||||
spin_lock(lock_ptr);
|
||||
/*
|
||||
* q->lock_ptr can change between reading it and
|
||||
* spin_lock(), causing us to take the wrong lock. This
|
||||
* corrects the race condition.
|
||||
*
|
||||
* Reasoning goes like this: if we have the wrong lock,
|
||||
* q->lock_ptr must have changed (maybe several times)
|
||||
* between reading it and the spin_lock(). It can
|
||||
* change again after the spin_lock() but only if it was
|
||||
* already changed before the spin_lock(). It cannot,
|
||||
* however, change back to the original value. Therefore
|
||||
* we can detect whether we acquired the correct lock.
|
||||
*/
|
||||
if (unlikely(lock_ptr != q->lock_ptr)) {
|
||||
spin_unlock(lock_ptr);
|
||||
goto retry;
|
||||
}
|
||||
WARN_ON(list_empty(&q->list));
|
||||
list_del(&q->list);
|
||||
spin_unlock(lock_ptr);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
drop_key_refs(&q->key);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int futex_wait(unsigned long uaddr, int val, unsigned long time)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
int ret, curval;
|
||||
struct futex_q q;
|
||||
struct futex_hash_bucket *bh;
|
||||
|
||||
retry:
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_futex_key(uaddr, &q.key);
|
||||
if (unlikely(ret != 0))
|
||||
goto out_release_sem;
|
||||
|
||||
bh = queue_lock(&q, -1, NULL);
|
||||
|
||||
/*
|
||||
* Access the page AFTER the futex is queued.
|
||||
* Order is important:
|
||||
*
|
||||
* Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
|
||||
* Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
|
||||
*
|
||||
* The basic logical guarantee of a futex is that it blocks ONLY
|
||||
* if cond(var) is known to be true at the time of blocking, for
|
||||
* any cond. If we queued after testing *uaddr, that would open
|
||||
* a race condition where we could block indefinitely with
|
||||
* cond(var) false, which would violate the guarantee.
|
||||
*
|
||||
* A consequence is that futex_wait() can return zero and absorb
|
||||
* a wakeup when *uaddr != val on entry to the syscall. This is
|
||||
* rare, but normal.
|
||||
*
|
||||
* We hold the mmap semaphore, so the mapping cannot have changed
|
||||
* since we looked it up in get_futex_key.
|
||||
*/
|
||||
|
||||
ret = get_futex_value_locked(&curval, (int __user *)uaddr);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
queue_unlock(&q, bh);
|
||||
|
||||
/* If we would have faulted, release mmap_sem, fault it in and
|
||||
* start all over again.
|
||||
*/
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_user(curval, (int __user *)uaddr);
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
if (curval != val) {
|
||||
ret = -EWOULDBLOCK;
|
||||
queue_unlock(&q, bh);
|
||||
goto out_release_sem;
|
||||
}
|
||||
|
||||
/* Only actually queue if *uaddr contained val. */
|
||||
__queue_me(&q, bh);
|
||||
|
||||
/*
|
||||
* Now the futex is queued and we have checked the data, we
|
||||
* don't want to hold mmap_sem while we sleep.
|
||||
*/
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* There might have been scheduling since the queue_me(), as we
|
||||
* cannot hold a spinlock across the get_user() in case it
|
||||
* faults, and we cannot just set TASK_INTERRUPTIBLE state when
|
||||
* queueing ourselves into the futex hash. This code thus has to
|
||||
* rely on the futex_wake() code removing us from hash when it
|
||||
* wakes us up.
|
||||
*/
|
||||
|
||||
/* add_wait_queue is the barrier after __set_current_state. */
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
add_wait_queue(&q.waiters, &wait);
|
||||
/*
|
||||
* !list_empty() is safe here without any lock.
|
||||
* q.lock_ptr != 0 is not safe, because of ordering against wakeup.
|
||||
*/
|
||||
if (likely(!list_empty(&q.list)))
|
||||
time = schedule_timeout(time);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
/*
|
||||
* NOTE: we don't remove ourselves from the waitqueue because
|
||||
* we are the only user of it.
|
||||
*/
|
||||
|
||||
/* If we were woken (and unqueued), we succeeded, whatever. */
|
||||
if (!unqueue_me(&q))
|
||||
return 0;
|
||||
if (time == 0)
|
||||
return -ETIMEDOUT;
|
||||
/* We expect signal_pending(current), but another thread may
|
||||
* have handled it for us already. */
|
||||
return -EINTR;
|
||||
|
||||
out_release_sem:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int futex_close(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct futex_q *q = filp->private_data;
|
||||
|
||||
unqueue_me(q);
|
||||
kfree(q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This is one-shot: once it's gone off you need a new fd */
|
||||
static unsigned int futex_poll(struct file *filp,
|
||||
struct poll_table_struct *wait)
|
||||
{
|
||||
struct futex_q *q = filp->private_data;
|
||||
int ret = 0;
|
||||
|
||||
poll_wait(filp, &q->waiters, wait);
|
||||
|
||||
/*
|
||||
* list_empty() is safe here without any lock.
|
||||
* q->lock_ptr != 0 is not safe, because of ordering against wakeup.
|
||||
*/
|
||||
if (list_empty(&q->list))
|
||||
ret = POLLIN | POLLRDNORM;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct file_operations futex_fops = {
|
||||
.release = futex_close,
|
||||
.poll = futex_poll,
|
||||
};
|
||||
|
||||
/*
|
||||
* Signal allows caller to avoid the race which would occur if they
|
||||
* set the sigio stuff up afterwards.
|
||||
*/
|
||||
static int futex_fd(unsigned long uaddr, int signal)
|
||||
{
|
||||
struct futex_q *q;
|
||||
struct file *filp;
|
||||
int ret, err;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (signal < 0 || signal > _NSIG)
|
||||
goto out;
|
||||
|
||||
ret = get_unused_fd();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
filp = get_empty_filp();
|
||||
if (!filp) {
|
||||
put_unused_fd(ret);
|
||||
ret = -ENFILE;
|
||||
goto out;
|
||||
}
|
||||
filp->f_op = &futex_fops;
|
||||
filp->f_vfsmnt = mntget(futex_mnt);
|
||||
filp->f_dentry = dget(futex_mnt->mnt_root);
|
||||
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
|
||||
|
||||
if (signal) {
|
||||
int err;
|
||||
err = f_setown(filp, current->pid, 1);
|
||||
if (err < 0) {
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
filp->f_owner.signum = signal;
|
||||
}
|
||||
|
||||
q = kmalloc(sizeof(*q), GFP_KERNEL);
|
||||
if (!q) {
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
err = get_futex_key(uaddr, &q->key);
|
||||
|
||||
if (unlikely(err != 0)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
kfree(q);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* queue_me() must be called before releasing mmap_sem, because
|
||||
* key->shared.inode needs to be referenced while holding it.
|
||||
*/
|
||||
filp->private_data = q;
|
||||
|
||||
queue_me(q, ret, filp);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
/* Now we map fd to filp, so userspace can access it */
|
||||
fd_install(ret, filp);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
|
||||
unsigned long uaddr2, int val2, int val3)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_WAIT:
|
||||
ret = futex_wait(uaddr, val, timeout);
|
||||
break;
|
||||
case FUTEX_WAKE:
|
||||
ret = futex_wake(uaddr, val);
|
||||
break;
|
||||
case FUTEX_FD:
|
||||
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
|
||||
ret = futex_fd(uaddr, val);
|
||||
break;
|
||||
case FUTEX_REQUEUE:
|
||||
ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
|
||||
break;
|
||||
case FUTEX_CMP_REQUEUE:
|
||||
ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
|
||||
struct timespec __user *utime, u32 __user *uaddr2,
|
||||
int val3)
|
||||
{
|
||||
struct timespec t;
|
||||
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
int val2 = 0;
|
||||
|
||||
if ((op == FUTEX_WAIT) && utime) {
|
||||
if (copy_from_user(&t, utime, sizeof(t)) != 0)
|
||||
return -EFAULT;
|
||||
timeout = timespec_to_jiffies(&t) + 1;
|
||||
}
|
||||
/*
|
||||
* requeue parameter in 'utime' if op == FUTEX_REQUEUE.
|
||||
*/
|
||||
if (op >= FUTEX_REQUEUE)
|
||||
val2 = (int) (unsigned long) utime;
|
||||
|
||||
return do_futex((unsigned long)uaddr, op, val, timeout,
|
||||
(unsigned long)uaddr2, val2, val3);
|
||||
}
|
||||
|
||||
static struct super_block *
|
||||
futexfs_get_sb(struct file_system_type *fs_type,
|
||||
int flags, const char *dev_name, void *data)
|
||||
{
|
||||
return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
|
||||
}
|
||||
|
||||
static struct file_system_type futex_fs_type = {
|
||||
.name = "futexfs",
|
||||
.get_sb = futexfs_get_sb,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
static int __init init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
register_filesystem(&futex_fs_type);
|
||||
futex_mnt = kern_mount(&futex_fs_type);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
|
||||
INIT_LIST_HEAD(&futex_queues[i].chain);
|
||||
spin_lock_init(&futex_queues[i].lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
__initcall(init);
|
||||
182
kernel/intermodule.c
Normal file
182
kernel/intermodule.c
Normal file
@@ -0,0 +1,182 @@
|
||||
/* Deprecated, do not use. Moved from module.c to here. --RR */
|
||||
|
||||
/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */
|
||||
#include <linux/module.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/* inter_module functions are always available, even when the kernel is
|
||||
* compiled without modules. Consumers of inter_module_xxx routines
|
||||
* will always work, even when both are built into the kernel, this
|
||||
* approach removes lots of #ifdefs in mainline code.
|
||||
*/
|
||||
|
||||
static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
|
||||
static DEFINE_SPINLOCK(ime_lock);
|
||||
static int kmalloc_failed;
|
||||
|
||||
struct inter_module_entry {
|
||||
struct list_head list;
|
||||
const char *im_name;
|
||||
struct module *owner;
|
||||
const void *userdata;
|
||||
};
|
||||
|
||||
/**
|
||||
* inter_module_register - register a new set of inter module data.
|
||||
* @im_name: an arbitrary string to identify the data, must be unique
|
||||
* @owner: module that is registering the data, always use THIS_MODULE
|
||||
* @userdata: pointer to arbitrary userdata to be registered
|
||||
*
|
||||
* Description: Check that the im_name has not already been registered,
|
||||
* complain if it has. For new data, add it to the inter_module_entry
|
||||
* list.
|
||||
*/
|
||||
void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct inter_module_entry *ime, *ime_new;
|
||||
|
||||
if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) {
|
||||
/* Overloaded kernel, not fatal */
|
||||
printk(KERN_ERR
|
||||
"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
|
||||
im_name);
|
||||
kmalloc_failed = 1;
|
||||
return;
|
||||
}
|
||||
memset(ime_new, 0, sizeof(*ime_new));
|
||||
ime_new->im_name = im_name;
|
||||
ime_new->owner = owner;
|
||||
ime_new->userdata = userdata;
|
||||
|
||||
spin_lock(&ime_lock);
|
||||
list_for_each(tmp, &ime_list) {
|
||||
ime = list_entry(tmp, struct inter_module_entry, list);
|
||||
if (strcmp(ime->im_name, im_name) == 0) {
|
||||
spin_unlock(&ime_lock);
|
||||
kfree(ime_new);
|
||||
/* Program logic error, fatal */
|
||||
printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
list_add(&(ime_new->list), &ime_list);
|
||||
spin_unlock(&ime_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* inter_module_unregister - unregister a set of inter module data.
|
||||
* @im_name: an arbitrary string to identify the data, must be unique
|
||||
*
|
||||
* Description: Check that the im_name has been registered, complain if
|
||||
* it has not. For existing data, remove it from the
|
||||
* inter_module_entry list.
|
||||
*/
|
||||
void inter_module_unregister(const char *im_name)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct inter_module_entry *ime;
|
||||
|
||||
spin_lock(&ime_lock);
|
||||
list_for_each(tmp, &ime_list) {
|
||||
ime = list_entry(tmp, struct inter_module_entry, list);
|
||||
if (strcmp(ime->im_name, im_name) == 0) {
|
||||
list_del(&(ime->list));
|
||||
spin_unlock(&ime_lock);
|
||||
kfree(ime);
|
||||
return;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ime_lock);
|
||||
if (kmalloc_failed) {
|
||||
printk(KERN_ERR
|
||||
"inter_module_unregister: no entry for '%s', "
|
||||
"probably caused by previous kmalloc failure\n",
|
||||
im_name);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
/* Program logic error, fatal */
|
||||
printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* inter_module_get - return arbitrary userdata from another module.
|
||||
* @im_name: an arbitrary string to identify the data, must be unique
|
||||
*
|
||||
* Description: If the im_name has not been registered, return NULL.
|
||||
* Try to increment the use count on the owning module, if that fails
|
||||
* then return NULL. Otherwise return the userdata.
|
||||
*/
|
||||
static const void *inter_module_get(const char *im_name)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct inter_module_entry *ime;
|
||||
const void *result = NULL;
|
||||
|
||||
spin_lock(&ime_lock);
|
||||
list_for_each(tmp, &ime_list) {
|
||||
ime = list_entry(tmp, struct inter_module_entry, list);
|
||||
if (strcmp(ime->im_name, im_name) == 0) {
|
||||
if (try_module_get(ime->owner))
|
||||
result = ime->userdata;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ime_lock);
|
||||
return(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* inter_module_get_request - im get with automatic request_module.
|
||||
* @im_name: an arbitrary string to identify the data, must be unique
|
||||
* @modname: module that is expected to register im_name
|
||||
*
|
||||
* Description: If inter_module_get fails, do request_module then retry.
|
||||
*/
|
||||
const void *inter_module_get_request(const char *im_name, const char *modname)
|
||||
{
|
||||
const void *result = inter_module_get(im_name);
|
||||
if (!result) {
|
||||
request_module("%s", modname);
|
||||
result = inter_module_get(im_name);
|
||||
}
|
||||
return(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* inter_module_put - release use of data from another module.
|
||||
* @im_name: an arbitrary string to identify the data, must be unique
|
||||
*
|
||||
* Description: If the im_name has not been registered, complain,
|
||||
* otherwise decrement the use count on the owning module.
|
||||
*/
|
||||
void inter_module_put(const char *im_name)
|
||||
{
|
||||
struct list_head *tmp;
|
||||
struct inter_module_entry *ime;
|
||||
|
||||
spin_lock(&ime_lock);
|
||||
list_for_each(tmp, &ime_list) {
|
||||
ime = list_entry(tmp, struct inter_module_entry, list);
|
||||
if (strcmp(ime->im_name, im_name) == 0) {
|
||||
if (ime->owner)
|
||||
module_put(ime->owner);
|
||||
spin_unlock(&ime_lock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ime_lock);
|
||||
printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
|
||||
BUG();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(inter_module_register);
|
||||
EXPORT_SYMBOL(inter_module_unregister);
|
||||
EXPORT_SYMBOL(inter_module_get_request);
|
||||
EXPORT_SYMBOL(inter_module_put);
|
||||
5
kernel/irq/Makefile
Normal file
5
kernel/irq/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
obj-y := handle.o manage.o spurious.o
|
||||
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
|
||||
189
kernel/irq/autoprobe.c
Normal file
189
kernel/irq/autoprobe.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
* linux/kernel/irq/autoprobe.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains the interrupt probing code and driver APIs.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
/*
|
||||
* Autodetection depends on the fact that any interrupt that
|
||||
* comes in on to an unassigned handler will get stuck with
|
||||
* "IRQ_WAITING" cleared and the interrupt disabled.
|
||||
*/
|
||||
static DECLARE_MUTEX(probe_sem);
|
||||
|
||||
/**
|
||||
* probe_irq_on - begin an interrupt autodetect
|
||||
*
|
||||
* Commence probing for an interrupt. The interrupts are scanned
|
||||
* and a mask of potential interrupt lines is returned.
|
||||
*
|
||||
*/
|
||||
unsigned long probe_irq_on(void)
|
||||
{
|
||||
unsigned long val, delay;
|
||||
irq_desc_t *desc;
|
||||
unsigned int i;
|
||||
|
||||
down(&probe_sem);
|
||||
/*
|
||||
* something may have generated an irq long ago and we want to
|
||||
* flush such a longstanding irq before considering it as spurious.
|
||||
*/
|
||||
for (i = NR_IRQS-1; i > 0; i--) {
|
||||
desc = irq_desc + i;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!irq_desc[i].action)
|
||||
irq_desc[i].handler->startup(i);
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
/* Wait for longstanding interrupts to trigger. */
|
||||
for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
|
||||
/* about 20ms delay */ barrier();
|
||||
|
||||
/*
|
||||
* enable any unassigned irqs
|
||||
* (we must startup again here because if a longstanding irq
|
||||
* happened in the previous stage, it may have masked itself)
|
||||
*/
|
||||
for (i = NR_IRQS-1; i > 0; i--) {
|
||||
desc = irq_desc + i;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
|
||||
if (desc->handler->startup(i))
|
||||
desc->status |= IRQ_PENDING;
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for spurious interrupts to trigger
|
||||
*/
|
||||
for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
|
||||
/* about 100ms delay */ barrier();
|
||||
|
||||
/*
|
||||
* Now filter out any obviously spurious interrupts
|
||||
*/
|
||||
val = 0;
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
irq_desc_t *desc = irq_desc + i;
|
||||
unsigned int status;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
/* It triggered already - consider it spurious. */
|
||||
if (!(status & IRQ_WAITING)) {
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->handler->shutdown(i);
|
||||
} else
|
||||
if (i < 32)
|
||||
val |= 1 << i;
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(probe_irq_on);
|
||||
|
||||
/**
|
||||
* probe_irq_mask - scan a bitmap of interrupt lines
|
||||
* @val: mask of interrupts to consider
|
||||
*
|
||||
* Scan the interrupt lines and return a bitmap of active
|
||||
* autodetect interrupts. The interrupt probe logic state
|
||||
* is then returned to its previous value.
|
||||
*
|
||||
* Note: we need to scan all the irq's even though we will
|
||||
* only return autodetect irq numbers - just so that we reset
|
||||
* them all to a known state.
|
||||
*/
|
||||
unsigned int probe_irq_mask(unsigned long val)
|
||||
{
|
||||
unsigned int mask;
|
||||
int i;
|
||||
|
||||
mask = 0;
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
irq_desc_t *desc = irq_desc + i;
|
||||
unsigned int status;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
if (i < 16 && !(status & IRQ_WAITING))
|
||||
mask |= 1 << i;
|
||||
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->handler->shutdown(i);
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
up(&probe_sem);
|
||||
|
||||
return mask & val;
|
||||
}
|
||||
EXPORT_SYMBOL(probe_irq_mask);
|
||||
|
||||
/**
|
||||
* probe_irq_off - end an interrupt autodetect
|
||||
* @val: mask of potential interrupts (unused)
|
||||
*
|
||||
* Scans the unused interrupt lines and returns the line which
|
||||
* appears to have triggered the interrupt. If no interrupt was
|
||||
* found then zero is returned. If more than one interrupt is
|
||||
* found then minus the first candidate is returned to indicate
|
||||
* their is doubt.
|
||||
*
|
||||
* The interrupt probe logic state is returned to its previous
|
||||
* value.
|
||||
*
|
||||
* BUGS: When used in a module (which arguably shouldn't happen)
|
||||
* nothing prevents two IRQ probe callers from overlapping. The
|
||||
* results of this are non-optimal.
|
||||
*/
|
||||
int probe_irq_off(unsigned long val)
|
||||
{
|
||||
int i, irq_found = 0, nr_irqs = 0;
|
||||
|
||||
for (i = 0; i < NR_IRQS; i++) {
|
||||
irq_desc_t *desc = irq_desc + i;
|
||||
unsigned int status;
|
||||
|
||||
spin_lock_irq(&desc->lock);
|
||||
status = desc->status;
|
||||
|
||||
if (status & IRQ_AUTODETECT) {
|
||||
if (!(status & IRQ_WAITING)) {
|
||||
if (!nr_irqs)
|
||||
irq_found = i;
|
||||
nr_irqs++;
|
||||
}
|
||||
desc->status = status & ~IRQ_AUTODETECT;
|
||||
desc->handler->shutdown(i);
|
||||
}
|
||||
spin_unlock_irq(&desc->lock);
|
||||
}
|
||||
up(&probe_sem);
|
||||
|
||||
if (nr_irqs > 1)
|
||||
irq_found = -irq_found;
|
||||
return irq_found;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(probe_irq_off);
|
||||
|
||||
193
kernel/irq/handle.c
Normal file
193
kernel/irq/handle.c
Normal file
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
* linux/kernel/irq/handle.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains the core interrupt handling code.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
/*
|
||||
* Linux has a controller-independent interrupt architecture.
|
||||
* Every controller has a 'controller-template', that is used
|
||||
* by the main code to do the right thing. Each driver-visible
|
||||
* interrupt source is transparently wired to the apropriate
|
||||
* controller. Thus drivers need not be aware of the
|
||||
* interrupt-controller.
|
||||
*
|
||||
* The code is designed to be easily extended with new/different
|
||||
* interrupt controllers, without having to do assembly magic or
|
||||
* having to touch the generic code.
|
||||
*
|
||||
* Controller mappings for all interrupt sources:
|
||||
*/
|
||||
irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
|
||||
[0 ... NR_IRQS-1] = {
|
||||
.handler = &no_irq_type,
|
||||
.lock = SPIN_LOCK_UNLOCKED
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Generic 'no controller' code
|
||||
*/
|
||||
static void end_none(unsigned int irq) { }
|
||||
static void enable_none(unsigned int irq) { }
|
||||
static void disable_none(unsigned int irq) { }
|
||||
static void shutdown_none(unsigned int irq) { }
|
||||
static unsigned int startup_none(unsigned int irq) { return 0; }
|
||||
|
||||
static void ack_none(unsigned int irq)
|
||||
{
|
||||
/*
|
||||
* 'what should we do if we get a hw irq event on an illegal vector'.
|
||||
* each architecture has to answer this themself.
|
||||
*/
|
||||
ack_bad_irq(irq);
|
||||
}
|
||||
|
||||
struct hw_interrupt_type no_irq_type = {
|
||||
.typename = "none",
|
||||
.startup = startup_none,
|
||||
.shutdown = shutdown_none,
|
||||
.enable = enable_none,
|
||||
.disable = disable_none,
|
||||
.ack = ack_none,
|
||||
.end = end_none,
|
||||
.set_affinity = NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Special, empty irq handler:
|
||||
*/
|
||||
irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
|
||||
{
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Have got an event to handle:
|
||||
*/
|
||||
fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
|
||||
struct irqaction *action)
|
||||
{
|
||||
int ret, retval = 0, status = 0;
|
||||
|
||||
if (!(action->flags & SA_INTERRUPT))
|
||||
local_irq_enable();
|
||||
|
||||
do {
|
||||
ret = action->handler(irq, action->dev_id, regs);
|
||||
if (ret == IRQ_HANDLED)
|
||||
status |= action->flags;
|
||||
retval |= ret;
|
||||
action = action->next;
|
||||
} while (action);
|
||||
|
||||
if (status & SA_SAMPLE_RANDOM)
|
||||
add_interrupt_randomness(irq);
|
||||
local_irq_disable();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* do_IRQ handles all normal device IRQ's (the special
|
||||
* SMP cross-CPU interrupts have their own specific
|
||||
* handlers).
|
||||
*/
|
||||
fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
|
||||
{
|
||||
irq_desc_t *desc = irq_desc + irq;
|
||||
struct irqaction * action;
|
||||
unsigned int status;
|
||||
|
||||
kstat_this_cpu.irqs[irq]++;
|
||||
if (desc->status & IRQ_PER_CPU) {
|
||||
irqreturn_t action_ret;
|
||||
|
||||
/*
|
||||
* No locking required for CPU-local interrupts:
|
||||
*/
|
||||
desc->handler->ack(irq);
|
||||
action_ret = handle_IRQ_event(irq, regs, desc->action);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
desc->handler->end(irq);
|
||||
return 1;
|
||||
}
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
desc->handler->ack(irq);
|
||||
/*
|
||||
* REPLAY is when Linux resends an IRQ that was dropped earlier
|
||||
* WAITING is used by probe to mark irqs that are being tested
|
||||
*/
|
||||
status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
|
||||
status |= IRQ_PENDING; /* we _want_ to handle it */
|
||||
|
||||
/*
|
||||
* If the IRQ is disabled for whatever reason, we cannot
|
||||
* use the action we have.
|
||||
*/
|
||||
action = NULL;
|
||||
if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
|
||||
action = desc->action;
|
||||
status &= ~IRQ_PENDING; /* we commit to handling */
|
||||
status |= IRQ_INPROGRESS; /* we are handling it */
|
||||
}
|
||||
desc->status = status;
|
||||
|
||||
/*
|
||||
* If there is no IRQ handler or it was disabled, exit early.
|
||||
* Since we set PENDING, if another processor is handling
|
||||
* a different instance of this same irq, the other processor
|
||||
* will take care of it.
|
||||
*/
|
||||
if (unlikely(!action))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Edge triggered interrupts need to remember
|
||||
* pending events.
|
||||
* This applies to any hw interrupts that allow a second
|
||||
* instance of the same irq to arrive while we are in do_IRQ
|
||||
* or in the handler. But the code here only handles the _second_
|
||||
* instance of the irq, not the third or fourth. So it is mostly
|
||||
* useful for irq hardware that does not mask cleanly in an
|
||||
* SMP environment.
|
||||
*/
|
||||
for (;;) {
|
||||
irqreturn_t action_ret;
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
action_ret = handle_IRQ_event(irq, regs, action);
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
if (likely(!(desc->status & IRQ_PENDING)))
|
||||
break;
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
}
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
|
||||
out:
|
||||
/*
|
||||
* The ->end() handler has to deal with interrupts which got
|
||||
* disabled while the handler was running.
|
||||
*/
|
||||
desc->handler->end(irq);
|
||||
spin_unlock(&desc->lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
18
kernel/irq/internals.h
Normal file
18
kernel/irq/internals.h
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* IRQ subsystem internal functions and variables:
|
||||
*/
|
||||
|
||||
extern int noirqdebug;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
extern void register_irq_proc(unsigned int irq);
|
||||
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
|
||||
extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
|
||||
#else
|
||||
static inline void register_irq_proc(unsigned int irq) { }
|
||||
static inline void register_handler_proc(unsigned int irq,
|
||||
struct irqaction *action) { }
|
||||
static inline void unregister_handler_proc(unsigned int irq,
|
||||
struct irqaction *action) { }
|
||||
#endif
|
||||
|
||||
349
kernel/irq/manage.c
Normal file
349
kernel/irq/manage.c
Normal file
@@ -0,0 +1,349 @@
|
||||
/*
|
||||
* linux/kernel/irq/manage.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains driver APIs to the irq subsystem.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
|
||||
|
||||
/**
|
||||
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
|
||||
*
|
||||
* This function waits for any pending IRQ handlers for this interrupt
|
||||
* to complete before returning. If you use this function while
|
||||
* holding a resource the IRQ handler may need you will deadlock.
|
||||
*
|
||||
* This function may be called - with care - from IRQ context.
|
||||
*/
|
||||
void synchronize_irq(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
|
||||
while (desc->status & IRQ_INPROGRESS)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(synchronize_irq);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* disable_irq_nosync - disable an irq without waiting
|
||||
* @irq: Interrupt to disable
|
||||
*
|
||||
* Disable the selected interrupt line. Disables and Enables are
|
||||
* nested.
|
||||
* Unlike disable_irq(), this function does not ensure existing
|
||||
* instances of the IRQ handler have completed before returning.
|
||||
*
|
||||
* This function may be called from IRQ context.
|
||||
*/
|
||||
void disable_irq_nosync(unsigned int irq)
|
||||
{
|
||||
irq_desc_t *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
if (!desc->depth++) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->handler->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(disable_irq_nosync);
|
||||
|
||||
/**
|
||||
* disable_irq - disable an irq and wait for completion
|
||||
* @irq: Interrupt to disable
|
||||
*
|
||||
* Disable the selected interrupt line. Enables and Disables are
|
||||
* nested.
|
||||
* This function waits for any pending IRQ handlers for this interrupt
|
||||
* to complete before returning. If you use this function while
|
||||
* holding a resource the IRQ handler may need you will deadlock.
|
||||
*
|
||||
* This function may be called - with care - from IRQ context.
|
||||
*/
|
||||
void disable_irq(unsigned int irq)
|
||||
{
|
||||
irq_desc_t *desc = irq_desc + irq;
|
||||
|
||||
disable_irq_nosync(irq);
|
||||
if (desc->action)
|
||||
synchronize_irq(irq);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(disable_irq);
|
||||
|
||||
/**
|
||||
* enable_irq - enable handling of an irq
|
||||
* @irq: Interrupt to enable
|
||||
*
|
||||
* Undoes the effect of one call to disable_irq(). If this
|
||||
* matches the last disable, processing of interrupts on this
|
||||
* IRQ line is re-enabled.
|
||||
*
|
||||
* This function may be called from IRQ context.
|
||||
*/
|
||||
void enable_irq(unsigned int irq)
|
||||
{
|
||||
irq_desc_t *desc = irq_desc + irq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
switch (desc->depth) {
|
||||
case 0:
|
||||
WARN_ON(1);
|
||||
break;
|
||||
case 1: {
|
||||
unsigned int status = desc->status & ~IRQ_DISABLED;
|
||||
|
||||
desc->status = status;
|
||||
if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
|
||||
desc->status = status | IRQ_REPLAY;
|
||||
hw_resend_irq(desc->handler,irq);
|
||||
}
|
||||
desc->handler->enable(irq);
|
||||
/* fall-through */
|
||||
}
|
||||
default:
|
||||
desc->depth--;
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(enable_irq);
|
||||
|
||||
/*
|
||||
* Internal function that tells the architecture code whether a
|
||||
* particular irq has been exclusively allocated or is available
|
||||
* for driver use.
|
||||
*/
|
||||
int can_request_irq(unsigned int irq, unsigned long irqflags)
|
||||
{
|
||||
struct irqaction *action;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return 0;
|
||||
|
||||
action = irq_desc[irq].action;
|
||||
if (action)
|
||||
if (irqflags & action->flags & SA_SHIRQ)
|
||||
action = NULL;
|
||||
|
||||
return !action;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function to register an irqaction - typically used to
|
||||
* allocate special interrupts that are part of the architecture.
|
||||
*/
|
||||
int setup_irq(unsigned int irq, struct irqaction * new)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
struct irqaction *old, **p;
|
||||
unsigned long flags;
|
||||
int shared = 0;
|
||||
|
||||
if (desc->handler == &no_irq_type)
|
||||
return -ENOSYS;
|
||||
/*
|
||||
* Some drivers like serial.c use request_irq() heavily,
|
||||
* so we have to be careful not to interfere with a
|
||||
* running system.
|
||||
*/
|
||||
if (new->flags & SA_SAMPLE_RANDOM) {
|
||||
/*
|
||||
* This function might sleep, we want to call it first,
|
||||
* outside of the atomic block.
|
||||
* Yes, this might clear the entropy pool if the wrong
|
||||
* driver is attempted to be loaded, without actually
|
||||
* installing a new handler, but is this really a problem,
|
||||
* only the sysadmin is able to do this.
|
||||
*/
|
||||
rand_initialize_irq(irq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following block of code has to be executed atomically
|
||||
*/
|
||||
spin_lock_irqsave(&desc->lock,flags);
|
||||
p = &desc->action;
|
||||
if ((old = *p) != NULL) {
|
||||
/* Can't share interrupts unless both agree to */
|
||||
if (!(old->flags & new->flags & SA_SHIRQ)) {
|
||||
spin_unlock_irqrestore(&desc->lock,flags);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* add new interrupt at end of irq queue */
|
||||
do {
|
||||
p = &old->next;
|
||||
old = *p;
|
||||
} while (old);
|
||||
shared = 1;
|
||||
}
|
||||
|
||||
*p = new;
|
||||
|
||||
if (!shared) {
|
||||
desc->depth = 0;
|
||||
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
|
||||
IRQ_WAITING | IRQ_INPROGRESS);
|
||||
if (desc->handler->startup)
|
||||
desc->handler->startup(irq);
|
||||
else
|
||||
desc->handler->enable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock,flags);
|
||||
|
||||
new->irq = irq;
|
||||
register_irq_proc(irq);
|
||||
new->dir = NULL;
|
||||
register_handler_proc(irq, new);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_irq - free an interrupt
|
||||
* @irq: Interrupt line to free
|
||||
* @dev_id: Device identity to free
|
||||
*
|
||||
* Remove an interrupt handler. The handler is removed and if the
|
||||
* interrupt line is no longer in use by any driver it is disabled.
|
||||
* On a shared IRQ the caller must ensure the interrupt is disabled
|
||||
* on the card it drives before calling this function. The function
|
||||
* does not return until any executing interrupts for this IRQ
|
||||
* have completed.
|
||||
*
|
||||
* This function must not be called from interrupt context.
|
||||
*/
|
||||
void free_irq(unsigned int irq, void *dev_id)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
struct irqaction **p;
|
||||
unsigned long flags;
|
||||
|
||||
if (irq >= NR_IRQS)
|
||||
return;
|
||||
|
||||
desc = irq_desc + irq;
|
||||
spin_lock_irqsave(&desc->lock,flags);
|
||||
p = &desc->action;
|
||||
for (;;) {
|
||||
struct irqaction * action = *p;
|
||||
|
||||
if (action) {
|
||||
struct irqaction **pp = p;
|
||||
|
||||
p = &action->next;
|
||||
if (action->dev_id != dev_id)
|
||||
continue;
|
||||
|
||||
/* Found it - now remove it from the list of entries */
|
||||
*pp = action->next;
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
if (desc->handler->shutdown)
|
||||
desc->handler->shutdown(irq);
|
||||
else
|
||||
desc->handler->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock,flags);
|
||||
unregister_handler_proc(irq, action);
|
||||
|
||||
/* Make sure it's not being used on another CPU */
|
||||
synchronize_irq(irq);
|
||||
kfree(action);
|
||||
return;
|
||||
}
|
||||
printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
|
||||
spin_unlock_irqrestore(&desc->lock,flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(free_irq);
|
||||
|
||||
/**
|
||||
* request_irq - allocate an interrupt line
|
||||
* @irq: Interrupt line to allocate
|
||||
* @handler: Function to be called when the IRQ occurs
|
||||
* @irqflags: Interrupt type flags
|
||||
* @devname: An ascii name for the claiming device
|
||||
* @dev_id: A cookie passed back to the handler function
|
||||
*
|
||||
* This call allocates interrupt resources and enables the
|
||||
* interrupt line and IRQ handling. From the point this
|
||||
* call is made your handler function may be invoked. Since
|
||||
* your handler function must clear any interrupt the board
|
||||
* raises, you must take care both to initialise your hardware
|
||||
* and to set up the interrupt handler in the right order.
|
||||
*
|
||||
* Dev_id must be globally unique. Normally the address of the
|
||||
* device data structure is used as the cookie. Since the handler
|
||||
* receives this value it makes sense to use it.
|
||||
*
|
||||
* If your interrupt is shared you must pass a non NULL dev_id
|
||||
* as this is required when freeing the interrupt.
|
||||
*
|
||||
* Flags:
|
||||
*
|
||||
* SA_SHIRQ Interrupt is shared
|
||||
* SA_INTERRUPT Disable local interrupts while processing
|
||||
* SA_SAMPLE_RANDOM The interrupt can be used for entropy
|
||||
*
|
||||
*/
|
||||
int request_irq(unsigned int irq,
|
||||
irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
||||
unsigned long irqflags, const char * devname, void *dev_id)
|
||||
{
|
||||
struct irqaction * action;
|
||||
int retval;
|
||||
|
||||
/*
|
||||
* Sanity-check: shared interrupts must pass in a real dev-ID,
|
||||
* otherwise we'll have trouble later trying to figure out
|
||||
* which interrupt is which (messes up the interrupt freeing
|
||||
* logic etc).
|
||||
*/
|
||||
if ((irqflags & SA_SHIRQ) && !dev_id)
|
||||
return -EINVAL;
|
||||
if (irq >= NR_IRQS)
|
||||
return -EINVAL;
|
||||
if (!handler)
|
||||
return -EINVAL;
|
||||
|
||||
action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
|
||||
if (!action)
|
||||
return -ENOMEM;
|
||||
|
||||
action->handler = handler;
|
||||
action->flags = irqflags;
|
||||
cpus_clear(action->mask);
|
||||
action->name = devname;
|
||||
action->next = NULL;
|
||||
action->dev_id = dev_id;
|
||||
|
||||
retval = setup_irq(irq, action);
|
||||
if (retval)
|
||||
kfree(action);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(request_irq);
|
||||
|
||||
159
kernel/irq/proc.c
Normal file
159
kernel/irq/proc.c
Normal file
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* linux/kernel/irq/proc.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains the /proc/irq/ handling code.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
/*
|
||||
* The /proc/irq/<irq>/smp_affinity values:
|
||||
*/
|
||||
static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
|
||||
|
||||
void __attribute__((weak))
|
||||
proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
|
||||
{
|
||||
irq_affinity[irq] = mask_val;
|
||||
irq_desc[irq].handler->set_affinity(irq, mask_val);
|
||||
}
|
||||
|
||||
static int irq_affinity_read_proc(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
|
||||
|
||||
if (count - len < 2)
|
||||
return -EINVAL;
|
||||
len += sprintf(page + len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
int no_irq_affinity;
|
||||
static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
{
|
||||
unsigned int irq = (int)(long)data, full_count = count, err;
|
||||
cpumask_t new_value, tmp;
|
||||
|
||||
if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
|
||||
return -EIO;
|
||||
|
||||
err = cpumask_parse(buffer, count, new_value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Do not allow disabling IRQs completely - it's a too easy
|
||||
* way to make the system unusable accidentally :-) At least
|
||||
* one online CPU still has to be targeted.
|
||||
*/
|
||||
cpus_and(tmp, new_value, cpu_online_map);
|
||||
if (cpus_empty(tmp))
|
||||
return -EINVAL;
|
||||
|
||||
proc_set_irq_affinity(irq, new_value);
|
||||
|
||||
return full_count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define MAX_NAMELEN 128
|
||||
|
||||
static int name_unique(unsigned int irq, struct irqaction *new_action)
|
||||
{
|
||||
struct irq_desc *desc = irq_desc + irq;
|
||||
struct irqaction *action;
|
||||
|
||||
for (action = desc->action ; action; action = action->next)
|
||||
if ((action != new_action) && action->name &&
|
||||
!strcmp(new_action->name, action->name))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void register_handler_proc(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
if (!irq_dir[irq] || action->dir || !action->name ||
|
||||
!name_unique(irq, action))
|
||||
return;
|
||||
|
||||
memset(name, 0, MAX_NAMELEN);
|
||||
snprintf(name, MAX_NAMELEN, "%s", action->name);
|
||||
|
||||
/* create /proc/irq/1234/handler/ */
|
||||
action->dir = proc_mkdir(name, irq_dir[irq]);
|
||||
}
|
||||
|
||||
#undef MAX_NAMELEN
|
||||
|
||||
#define MAX_NAMELEN 10
|
||||
|
||||
void register_irq_proc(unsigned int irq)
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
if (!root_irq_dir ||
|
||||
(irq_desc[irq].handler == &no_irq_type) ||
|
||||
irq_dir[irq])
|
||||
return;
|
||||
|
||||
memset(name, 0, MAX_NAMELEN);
|
||||
sprintf(name, "%d", irq);
|
||||
|
||||
/* create /proc/irq/1234 */
|
||||
irq_dir[irq] = proc_mkdir(name, root_irq_dir);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/<irq>/smp_affinity */
|
||||
entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
|
||||
|
||||
if (entry) {
|
||||
entry->nlink = 1;
|
||||
entry->data = (void *)(long)irq;
|
||||
entry->read_proc = irq_affinity_read_proc;
|
||||
entry->write_proc = irq_affinity_write_proc;
|
||||
}
|
||||
smp_affinity_entry[irq] = entry;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef MAX_NAMELEN
|
||||
|
||||
void unregister_handler_proc(unsigned int irq, struct irqaction *action)
|
||||
{
|
||||
if (action->dir)
|
||||
remove_proc_entry(action->dir->name, irq_dir[irq]);
|
||||
}
|
||||
|
||||
void init_irq_proc(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* create /proc/irq */
|
||||
root_irq_dir = proc_mkdir("irq", NULL);
|
||||
if (!root_irq_dir)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Create entries for all existing IRQs.
|
||||
*/
|
||||
for (i = 0; i < NR_IRQS; i++)
|
||||
register_irq_proc(i);
|
||||
}
|
||||
|
||||
96
kernel/irq/spurious.c
Normal file
96
kernel/irq/spurious.c
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* linux/kernel/irq/spurious.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
|
||||
*
|
||||
* This file contains spurious interrupt handling.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
/*
|
||||
* If 99,900 of the previous 100,000 interrupts have not been handled
|
||||
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
|
||||
* and try to turn the IRQ off.
|
||||
*
|
||||
* (The other 100-of-100,000 interrupts may have been a correctly
|
||||
* functioning device sharing an IRQ with the failing one)
|
||||
*
|
||||
* Called under desc->lock
|
||||
*/
|
||||
|
||||
static void
|
||||
__report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
{
|
||||
struct irqaction *action;
|
||||
|
||||
if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
|
||||
printk(KERN_ERR "irq event %d: bogus return value %x\n",
|
||||
irq, action_ret);
|
||||
} else {
|
||||
printk(KERN_ERR "irq %d: nobody cared!\n", irq);
|
||||
}
|
||||
dump_stack();
|
||||
printk(KERN_ERR "handlers:\n");
|
||||
action = desc->action;
|
||||
while (action) {
|
||||
printk(KERN_ERR "[<%p>]", action->handler);
|
||||
print_symbol(" (%s)",
|
||||
(unsigned long)action->handler);
|
||||
printk("\n");
|
||||
action = action->next;
|
||||
}
|
||||
}
|
||||
|
||||
void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
{
|
||||
static int count = 100;
|
||||
|
||||
if (count > 0) {
|
||||
count--;
|
||||
__report_bad_irq(irq, desc, action_ret);
|
||||
}
|
||||
}
|
||||
|
||||
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
{
|
||||
if (action_ret != IRQ_HANDLED) {
|
||||
desc->irqs_unhandled++;
|
||||
if (action_ret != IRQ_NONE)
|
||||
report_bad_irq(irq, desc, action_ret);
|
||||
}
|
||||
|
||||
desc->irq_count++;
|
||||
if (desc->irq_count < 100000)
|
||||
return;
|
||||
|
||||
desc->irq_count = 0;
|
||||
if (desc->irqs_unhandled > 99900) {
|
||||
/*
|
||||
* The interrupt is stuck
|
||||
*/
|
||||
__report_bad_irq(irq, desc, action_ret);
|
||||
/*
|
||||
* Now kill the IRQ
|
||||
*/
|
||||
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->handler->disable(irq);
|
||||
}
|
||||
desc->irqs_unhandled = 0;
|
||||
}
|
||||
|
||||
int noirqdebug;
|
||||
|
||||
int __init noirqdebug_setup(char *str)
|
||||
{
|
||||
noirqdebug = 1;
|
||||
printk(KERN_INFO "IRQ lockup detection disabled\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noirqdebug", noirqdebug_setup);
|
||||
|
||||
241
kernel/itimer.c
Normal file
241
kernel/itimer.c
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* linux/kernel/itimer.c
|
||||
*
|
||||
* Copyright (C) 1992 Darren Senn
|
||||
*/
|
||||
|
||||
/* These are all the functions necessary to implement itimers */
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/posix-timers.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
static unsigned long it_real_value(struct signal_struct *sig)
|
||||
{
|
||||
unsigned long val = 0;
|
||||
if (timer_pending(&sig->real_timer)) {
|
||||
val = sig->real_timer.expires - jiffies;
|
||||
|
||||
/* look out for negative/zero itimer.. */
|
||||
if ((long) val <= 0)
|
||||
val = 1;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
int do_getitimer(int which, struct itimerval *value)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long interval, val;
|
||||
cputime_t cinterval, cval;
|
||||
|
||||
switch (which) {
|
||||
case ITIMER_REAL:
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
interval = tsk->signal->it_real_incr;
|
||||
val = it_real_value(tsk->signal);
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
jiffies_to_timeval(val, &value->it_value);
|
||||
jiffies_to_timeval(interval, &value->it_interval);
|
||||
break;
|
||||
case ITIMER_VIRTUAL:
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_virt_expires;
|
||||
cinterval = tsk->signal->it_virt_incr;
|
||||
if (!cputime_eq(cval, cputime_zero)) {
|
||||
struct task_struct *t = tsk;
|
||||
cputime_t utime = tsk->signal->utime;
|
||||
do {
|
||||
utime = cputime_add(utime, t->utime);
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
if (cputime_le(cval, utime)) { /* about to fire */
|
||||
cval = jiffies_to_cputime(1);
|
||||
} else {
|
||||
cval = cputime_sub(cval, utime);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
cputime_to_timeval(cval, &value->it_value);
|
||||
cputime_to_timeval(cinterval, &value->it_interval);
|
||||
break;
|
||||
case ITIMER_PROF:
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_prof_expires;
|
||||
cinterval = tsk->signal->it_prof_incr;
|
||||
if (!cputime_eq(cval, cputime_zero)) {
|
||||
struct task_struct *t = tsk;
|
||||
cputime_t ptime = cputime_add(tsk->signal->utime,
|
||||
tsk->signal->stime);
|
||||
do {
|
||||
ptime = cputime_add(ptime,
|
||||
cputime_add(t->utime,
|
||||
t->stime));
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
if (cputime_le(cval, ptime)) { /* about to fire */
|
||||
cval = jiffies_to_cputime(1);
|
||||
} else {
|
||||
cval = cputime_sub(cval, ptime);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
cputime_to_timeval(cval, &value->it_value);
|
||||
cputime_to_timeval(cinterval, &value->it_interval);
|
||||
break;
|
||||
default:
|
||||
return(-EINVAL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
|
||||
{
|
||||
int error = -EFAULT;
|
||||
struct itimerval get_buffer;
|
||||
|
||||
if (value) {
|
||||
error = do_getitimer(which, &get_buffer);
|
||||
if (!error &&
|
||||
copy_to_user(value, &get_buffer, sizeof(get_buffer)))
|
||||
error = -EFAULT;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with P->sighand->siglock held and P->signal->real_timer inactive.
|
||||
* If interval is nonzero, arm the timer for interval ticks from now.
|
||||
*/
|
||||
static inline void it_real_arm(struct task_struct *p, unsigned long interval)
|
||||
{
|
||||
p->signal->it_real_value = interval; /* XXX unnecessary field?? */
|
||||
if (interval == 0)
|
||||
return;
|
||||
if (interval > (unsigned long) LONG_MAX)
|
||||
interval = LONG_MAX;
|
||||
p->signal->real_timer.expires = jiffies + interval;
|
||||
add_timer(&p->signal->real_timer);
|
||||
}
|
||||
|
||||
void it_real_fn(unsigned long __data)
|
||||
{
|
||||
struct task_struct * p = (struct task_struct *) __data;
|
||||
|
||||
send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
|
||||
|
||||
/*
|
||||
* Now restart the timer if necessary. We don't need any locking
|
||||
* here because do_setitimer makes sure we have finished running
|
||||
* before it touches anything.
|
||||
*/
|
||||
it_real_arm(p, p->signal->it_real_incr);
|
||||
}
|
||||
|
||||
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long val, interval;
|
||||
cputime_t cval, cinterval, nval, ninterval;
|
||||
|
||||
switch (which) {
|
||||
case ITIMER_REAL:
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
interval = tsk->signal->it_real_incr;
|
||||
val = it_real_value(tsk->signal);
|
||||
if (val)
|
||||
del_timer_sync(&tsk->signal->real_timer);
|
||||
tsk->signal->it_real_incr =
|
||||
timeval_to_jiffies(&value->it_interval);
|
||||
it_real_arm(tsk, timeval_to_jiffies(&value->it_value));
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
if (ovalue) {
|
||||
jiffies_to_timeval(val, &ovalue->it_value);
|
||||
jiffies_to_timeval(interval,
|
||||
&ovalue->it_interval);
|
||||
}
|
||||
break;
|
||||
case ITIMER_VIRTUAL:
|
||||
nval = timeval_to_cputime(&value->it_value);
|
||||
ninterval = timeval_to_cputime(&value->it_interval);
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_virt_expires;
|
||||
cinterval = tsk->signal->it_virt_incr;
|
||||
if (!cputime_eq(cval, cputime_zero) ||
|
||||
!cputime_eq(nval, cputime_zero)) {
|
||||
if (cputime_gt(nval, cputime_zero))
|
||||
nval = cputime_add(nval,
|
||||
jiffies_to_cputime(1));
|
||||
set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
|
||||
&nval, &cval);
|
||||
}
|
||||
tsk->signal->it_virt_expires = nval;
|
||||
tsk->signal->it_virt_incr = ninterval;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (ovalue) {
|
||||
cputime_to_timeval(cval, &ovalue->it_value);
|
||||
cputime_to_timeval(cinterval, &ovalue->it_interval);
|
||||
}
|
||||
break;
|
||||
case ITIMER_PROF:
|
||||
nval = timeval_to_cputime(&value->it_value);
|
||||
ninterval = timeval_to_cputime(&value->it_interval);
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cval = tsk->signal->it_prof_expires;
|
||||
cinterval = tsk->signal->it_prof_incr;
|
||||
if (!cputime_eq(cval, cputime_zero) ||
|
||||
!cputime_eq(nval, cputime_zero)) {
|
||||
if (cputime_gt(nval, cputime_zero))
|
||||
nval = cputime_add(nval,
|
||||
jiffies_to_cputime(1));
|
||||
set_process_cpu_timer(tsk, CPUCLOCK_PROF,
|
||||
&nval, &cval);
|
||||
}
|
||||
tsk->signal->it_prof_expires = nval;
|
||||
tsk->signal->it_prof_incr = ninterval;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (ovalue) {
|
||||
cputime_to_timeval(cval, &ovalue->it_value);
|
||||
cputime_to_timeval(cinterval, &ovalue->it_interval);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_setitimer(int which,
|
||||
struct itimerval __user *value,
|
||||
struct itimerval __user *ovalue)
|
||||
{
|
||||
struct itimerval set_buffer, get_buffer;
|
||||
int error;
|
||||
|
||||
if (value) {
|
||||
if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
|
||||
return -EFAULT;
|
||||
} else
|
||||
memset((char *) &set_buffer, 0, sizeof(set_buffer));
|
||||
|
||||
error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
|
||||
if (error || !ovalue)
|
||||
return error;
|
||||
|
||||
if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
411
kernel/kallsyms.c
Normal file
411
kernel/kallsyms.c
Normal file
@@ -0,0 +1,411 @@
|
||||
/*
|
||||
* kallsyms.c: in-kernel printing of symbolic oopses and stack traces.
|
||||
*
|
||||
* Rewritten and vastly simplified by Rusty Russell for in-kernel
|
||||
* module loader:
|
||||
* Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
|
||||
*
|
||||
* ChangeLog:
|
||||
*
|
||||
* (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
|
||||
* Changed the compression method from stem compression to "table lookup"
|
||||
* compression (see scripts/kallsyms.c for a more complete description)
|
||||
*/
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/sections.h>
|
||||
|
||||
#ifdef CONFIG_KALLSYMS_ALL
|
||||
#define all_var 1
|
||||
#else
|
||||
#define all_var 0
|
||||
#endif
|
||||
|
||||
/* These will be re-linked against their real values during the second link stage */
|
||||
extern unsigned long kallsyms_addresses[] __attribute__((weak));
|
||||
extern unsigned long kallsyms_num_syms __attribute__((weak,section("data")));
|
||||
extern u8 kallsyms_names[] __attribute__((weak));
|
||||
|
||||
extern u8 kallsyms_token_table[] __attribute__((weak));
|
||||
extern u16 kallsyms_token_index[] __attribute__((weak));
|
||||
|
||||
extern unsigned long kallsyms_markers[] __attribute__((weak));
|
||||
|
||||
static inline int is_kernel_inittext(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_sinittext
|
||||
&& addr <= (unsigned long)_einittext)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int is_kernel_text(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
|
||||
return 1;
|
||||
return in_gate_area_no_task(addr);
|
||||
}
|
||||
|
||||
static inline int is_kernel(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
|
||||
return 1;
|
||||
return in_gate_area_no_task(addr);
|
||||
}
|
||||
|
||||
/* expand a compressed symbol data into the resulting uncompressed string,
|
||||
given the offset to where the symbol is in the compressed stream */
|
||||
static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
|
||||
{
|
||||
int len, skipped_first = 0;
|
||||
u8 *tptr, *data;
|
||||
|
||||
/* get the compressed symbol length from the first symbol byte */
|
||||
data = &kallsyms_names[off];
|
||||
len = *data;
|
||||
data++;
|
||||
|
||||
/* update the offset to return the offset for the next symbol on
|
||||
* the compressed stream */
|
||||
off += len + 1;
|
||||
|
||||
/* for every byte on the compressed symbol data, copy the table
|
||||
entry for that byte */
|
||||
while(len) {
|
||||
tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
|
||||
data++;
|
||||
len--;
|
||||
|
||||
while (*tptr) {
|
||||
if(skipped_first) {
|
||||
*result = *tptr;
|
||||
result++;
|
||||
} else
|
||||
skipped_first = 1;
|
||||
tptr++;
|
||||
}
|
||||
}
|
||||
|
||||
*result = '\0';
|
||||
|
||||
/* return to offset to the next symbol */
|
||||
return off;
|
||||
}
|
||||
|
||||
/* get symbol type information. This is encoded as a single char at the
|
||||
* begining of the symbol name */
|
||||
static char kallsyms_get_symbol_type(unsigned int off)
|
||||
{
|
||||
/* get just the first code, look it up in the token table, and return the
|
||||
* first char from this token */
|
||||
return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
|
||||
}
|
||||
|
||||
|
||||
/* find the offset on the compressed stream given and index in the
|
||||
* kallsyms array */
|
||||
static unsigned int get_symbol_offset(unsigned long pos)
|
||||
{
|
||||
u8 *name;
|
||||
int i;
|
||||
|
||||
/* use the closest marker we have. We have markers every 256 positions,
|
||||
* so that should be close enough */
|
||||
name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
|
||||
|
||||
/* sequentially scan all the symbols up to the point we're searching for.
|
||||
* Every symbol is stored in a [<len>][<len> bytes of data] format, so we
|
||||
* just need to add the len to the current pointer for every symbol we
|
||||
* wish to skip */
|
||||
for(i = 0; i < (pos&0xFF); i++)
|
||||
name = name + (*name) + 1;
|
||||
|
||||
return name - kallsyms_names;
|
||||
}
|
||||
|
||||
/* Lookup the address for this symbol. Returns 0 if not found. */
|
||||
unsigned long kallsyms_lookup_name(const char *name)
|
||||
{
|
||||
char namebuf[KSYM_NAME_LEN+1];
|
||||
unsigned long i;
|
||||
unsigned int off;
|
||||
|
||||
for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
|
||||
off = kallsyms_expand_symbol(off, namebuf);
|
||||
|
||||
if (strcmp(namebuf, name) == 0)
|
||||
return kallsyms_addresses[i];
|
||||
}
|
||||
return module_kallsyms_lookup_name(name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
|
||||
|
||||
/*
|
||||
* Lookup an address
|
||||
* - modname is set to NULL if it's in the kernel
|
||||
* - we guarantee that the returned name is valid until we reschedule even if
|
||||
* it resides in a module
|
||||
* - we also guarantee that modname will be valid until rescheduled
|
||||
*/
|
||||
const char *kallsyms_lookup(unsigned long addr,
|
||||
unsigned long *symbolsize,
|
||||
unsigned long *offset,
|
||||
char **modname, char *namebuf)
|
||||
{
|
||||
unsigned long i, low, high, mid;
|
||||
const char *msym;
|
||||
|
||||
/* This kernel should never had been booted. */
|
||||
BUG_ON(!kallsyms_addresses);
|
||||
|
||||
namebuf[KSYM_NAME_LEN] = 0;
|
||||
namebuf[0] = 0;
|
||||
|
||||
if ((all_var && is_kernel(addr)) ||
|
||||
(!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr)))) {
|
||||
unsigned long symbol_end=0;
|
||||
|
||||
/* do a binary search on the sorted kallsyms_addresses array */
|
||||
low = 0;
|
||||
high = kallsyms_num_syms;
|
||||
|
||||
while (high-low > 1) {
|
||||
mid = (low + high) / 2;
|
||||
if (kallsyms_addresses[mid] <= addr) low = mid;
|
||||
else high = mid;
|
||||
}
|
||||
|
||||
/* search for the first aliased symbol. Aliased symbols are
|
||||
symbols with the same address */
|
||||
while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
|
||||
--low;
|
||||
|
||||
/* Grab name */
|
||||
kallsyms_expand_symbol(get_symbol_offset(low), namebuf);
|
||||
|
||||
/* Search for next non-aliased symbol */
|
||||
for (i = low + 1; i < kallsyms_num_syms; i++) {
|
||||
if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
|
||||
symbol_end = kallsyms_addresses[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we found no next symbol, we use the end of the section */
|
||||
if (!symbol_end) {
|
||||
if (is_kernel_inittext(addr))
|
||||
symbol_end = (unsigned long)_einittext;
|
||||
else
|
||||
symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext;
|
||||
}
|
||||
|
||||
*symbolsize = symbol_end - kallsyms_addresses[low];
|
||||
*modname = NULL;
|
||||
*offset = addr - kallsyms_addresses[low];
|
||||
return namebuf;
|
||||
}
|
||||
|
||||
/* see if it's in a module */
|
||||
msym = module_address_lookup(addr, symbolsize, offset, modname);
|
||||
if (msym)
|
||||
return strncpy(namebuf, msym, KSYM_NAME_LEN);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Replace "%s" in format with address, or returns -errno. */
|
||||
void __print_symbol(const char *fmt, unsigned long address)
|
||||
{
|
||||
char *modname;
|
||||
const char *name;
|
||||
unsigned long offset, size;
|
||||
char namebuf[KSYM_NAME_LEN+1];
|
||||
char buffer[sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN +
|
||||
2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1];
|
||||
|
||||
name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
|
||||
|
||||
if (!name)
|
||||
sprintf(buffer, "0x%lx", address);
|
||||
else {
|
||||
if (modname)
|
||||
sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset,
|
||||
size, modname);
|
||||
else
|
||||
sprintf(buffer, "%s+%#lx/%#lx", name, offset, size);
|
||||
}
|
||||
printk(fmt, buffer);
|
||||
}
|
||||
|
||||
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
|
||||
struct kallsym_iter
|
||||
{
|
||||
loff_t pos;
|
||||
struct module *owner;
|
||||
unsigned long value;
|
||||
unsigned int nameoff; /* If iterating in core kernel symbols */
|
||||
char type;
|
||||
char name[KSYM_NAME_LEN+1];
|
||||
};
|
||||
|
||||
/* Only label it "global" if it is exported. */
|
||||
static void upcase_if_global(struct kallsym_iter *iter)
|
||||
{
|
||||
if (is_exported(iter->name, iter->owner))
|
||||
iter->type += 'A' - 'a';
|
||||
}
|
||||
|
||||
static int get_ksymbol_mod(struct kallsym_iter *iter)
|
||||
{
|
||||
iter->owner = module_get_kallsym(iter->pos - kallsyms_num_syms,
|
||||
&iter->value,
|
||||
&iter->type, iter->name);
|
||||
if (iter->owner == NULL)
|
||||
return 0;
|
||||
|
||||
upcase_if_global(iter);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns space to next name. */
|
||||
static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
|
||||
{
|
||||
unsigned off = iter->nameoff;
|
||||
|
||||
iter->owner = NULL;
|
||||
iter->value = kallsyms_addresses[iter->pos];
|
||||
|
||||
iter->type = kallsyms_get_symbol_type(off);
|
||||
|
||||
off = kallsyms_expand_symbol(off, iter->name);
|
||||
|
||||
return off - iter->nameoff;
|
||||
}
|
||||
|
||||
static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
|
||||
{
|
||||
iter->name[0] = '\0';
|
||||
iter->nameoff = get_symbol_offset(new_pos);
|
||||
iter->pos = new_pos;
|
||||
}
|
||||
|
||||
/* Returns false if pos at or past end of file. */
|
||||
static int update_iter(struct kallsym_iter *iter, loff_t pos)
|
||||
{
|
||||
/* Module symbols can be accessed randomly. */
|
||||
if (pos >= kallsyms_num_syms) {
|
||||
iter->pos = pos;
|
||||
return get_ksymbol_mod(iter);
|
||||
}
|
||||
|
||||
/* If we're not on the desired position, reset to new position. */
|
||||
if (pos != iter->pos)
|
||||
reset_iter(iter, pos);
|
||||
|
||||
iter->nameoff += get_ksymbol_core(iter);
|
||||
iter->pos++;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
|
||||
if (!update_iter(m->private, *pos))
|
||||
return NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void *s_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
if (!update_iter(m->private, *pos))
|
||||
return NULL;
|
||||
return m->private;
|
||||
}
|
||||
|
||||
static void s_stop(struct seq_file *m, void *p)
|
||||
{
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *m, void *p)
|
||||
{
|
||||
struct kallsym_iter *iter = m->private;
|
||||
|
||||
/* Some debugging symbols have no name. Ignore them. */
|
||||
if (!iter->name[0])
|
||||
return 0;
|
||||
|
||||
if (iter->owner)
|
||||
seq_printf(m, "%0*lx %c %s\t[%s]\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
iter->value, iter->type, iter->name,
|
||||
module_name(iter->owner));
|
||||
else
|
||||
seq_printf(m, "%0*lx %c %s\n",
|
||||
(int)(2*sizeof(void*)),
|
||||
iter->value, iter->type, iter->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations kallsyms_op = {
|
||||
.start = s_start,
|
||||
.next = s_next,
|
||||
.stop = s_stop,
|
||||
.show = s_show
|
||||
};
|
||||
|
||||
static int kallsyms_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
/* We keep iterator in m->private, since normal case is to
|
||||
* s_start from where we left off, so we avoid doing
|
||||
* using get_symbol_offset for every symbol */
|
||||
struct kallsym_iter *iter;
|
||||
int ret;
|
||||
|
||||
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
|
||||
if (!iter)
|
||||
return -ENOMEM;
|
||||
reset_iter(iter, 0);
|
||||
|
||||
ret = seq_open(file, &kallsyms_op);
|
||||
if (ret == 0)
|
||||
((struct seq_file *)file->private_data)->private = iter;
|
||||
else
|
||||
kfree(iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kallsyms_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m = (struct seq_file *)file->private_data;
|
||||
kfree(m->private);
|
||||
return seq_release(inode, file);
|
||||
}
|
||||
|
||||
static struct file_operations kallsyms_operations = {
|
||||
.open = kallsyms_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = kallsyms_release,
|
||||
};
|
||||
|
||||
static int __init kallsyms_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
entry = create_proc_entry("kallsyms", 0444, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &kallsyms_operations;
|
||||
return 0;
|
||||
}
|
||||
__initcall(kallsyms_init);
|
||||
|
||||
EXPORT_SYMBOL(__print_symbol);
|
||||
168
kernel/kfifo.c
Normal file
168
kernel/kfifo.c
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* A simple kernel FIFO implementation.
|
||||
*
|
||||
* Copyright (C) 2004 Stelian Pop <stelian@popies.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kfifo.h>
|
||||
|
||||
/**
|
||||
* kfifo_init - allocates a new FIFO using a preallocated buffer
|
||||
* @buffer: the preallocated buffer to be used.
|
||||
* @size: the size of the internal buffer, this have to be a power of 2.
|
||||
* @gfp_mask: get_free_pages mask, passed to kmalloc()
|
||||
* @lock: the lock to be used to protect the fifo buffer
|
||||
*
|
||||
* Do NOT pass the kfifo to kfifo_free() after use ! Simply free the
|
||||
* struct kfifo with kfree().
|
||||
*/
|
||||
struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
|
||||
unsigned int __nocast gfp_mask, spinlock_t *lock)
|
||||
{
|
||||
struct kfifo *fifo;
|
||||
|
||||
/* size must be a power of 2 */
|
||||
BUG_ON(size & (size - 1));
|
||||
|
||||
fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
|
||||
if (!fifo)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
fifo->buffer = buffer;
|
||||
fifo->size = size;
|
||||
fifo->in = fifo->out = 0;
|
||||
fifo->lock = lock;
|
||||
|
||||
return fifo;
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_init);
|
||||
|
||||
/**
|
||||
* kfifo_alloc - allocates a new FIFO and its internal buffer
|
||||
* @size: the size of the internal buffer to be allocated.
|
||||
* @gfp_mask: get_free_pages mask, passed to kmalloc()
|
||||
* @lock: the lock to be used to protect the fifo buffer
|
||||
*
|
||||
* The size will be rounded-up to a power of 2.
|
||||
*/
|
||||
struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock)
|
||||
{
|
||||
unsigned char *buffer;
|
||||
struct kfifo *ret;
|
||||
|
||||
/*
|
||||
* round up to the next power of 2, since our 'let the indices
|
||||
* wrap' tachnique works only in this case.
|
||||
*/
|
||||
if (size & (size - 1)) {
|
||||
BUG_ON(size > 0x80000000);
|
||||
size = roundup_pow_of_two(size);
|
||||
}
|
||||
|
||||
buffer = kmalloc(size, gfp_mask);
|
||||
if (!buffer)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = kfifo_init(buffer, size, gfp_mask, lock);
|
||||
|
||||
if (IS_ERR(ret))
|
||||
kfree(buffer);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_alloc);
|
||||
|
||||
/**
|
||||
* kfifo_free - frees the FIFO
|
||||
* @fifo: the fifo to be freed.
|
||||
*/
|
||||
void kfifo_free(struct kfifo *fifo)
|
||||
{
|
||||
kfree(fifo->buffer);
|
||||
kfree(fifo);
|
||||
}
|
||||
EXPORT_SYMBOL(kfifo_free);
|
||||
|
||||
/**
|
||||
* __kfifo_put - puts some data into the FIFO, no locking version
|
||||
* @fifo: the fifo to be used.
|
||||
* @buffer: the data to be added.
|
||||
* @len: the length of the data to be added.
|
||||
*
|
||||
* This function copies at most 'len' bytes from the 'buffer' into
|
||||
* the FIFO depending on the free space, and returns the number of
|
||||
* bytes copied.
|
||||
*
|
||||
* Note that with only one concurrent reader and one concurrent
|
||||
* writer, you don't need extra locking to use these functions.
|
||||
*/
|
||||
unsigned int __kfifo_put(struct kfifo *fifo,
|
||||
unsigned char *buffer, unsigned int len)
|
||||
{
|
||||
unsigned int l;
|
||||
|
||||
len = min(len, fifo->size - fifo->in + fifo->out);
|
||||
|
||||
/* first put the data starting from fifo->in to buffer end */
|
||||
l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
|
||||
memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
|
||||
|
||||
/* then put the rest (if any) at the beginning of the buffer */
|
||||
memcpy(fifo->buffer, buffer + l, len - l);
|
||||
|
||||
fifo->in += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(__kfifo_put);
|
||||
|
||||
/**
|
||||
* __kfifo_get - gets some data from the FIFO, no locking version
|
||||
* @fifo: the fifo to be used.
|
||||
* @buffer: where the data must be copied.
|
||||
* @len: the size of the destination buffer.
|
||||
*
|
||||
* This function copies at most 'len' bytes from the FIFO into the
|
||||
* 'buffer' and returns the number of copied bytes.
|
||||
*
|
||||
* Note that with only one concurrent reader and one concurrent
|
||||
* writer, you don't need extra locking to use these functions.
|
||||
*/
|
||||
unsigned int __kfifo_get(struct kfifo *fifo,
|
||||
unsigned char *buffer, unsigned int len)
|
||||
{
|
||||
unsigned int l;
|
||||
|
||||
len = min(len, fifo->in - fifo->out);
|
||||
|
||||
/* first get the data from fifo->out until the end of the buffer */
|
||||
l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
|
||||
memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
|
||||
|
||||
/* then get the rest (if any) from the beginning of the buffer */
|
||||
memcpy(buffer + l, fifo->buffer, len - l);
|
||||
|
||||
fifo->out += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(__kfifo_get);
|
||||
256
kernel/kmod.c
Normal file
256
kernel/kmod.c
Normal file
@@ -0,0 +1,256 @@
|
||||
/*
|
||||
kmod, the new module loader (replaces kerneld)
|
||||
Kirk Petersen
|
||||
|
||||
Reorganized not to be a daemon by Adam Richter, with guidance
|
||||
from Greg Zornetzer.
|
||||
|
||||
Modified to avoid chroot and file sharing problems.
|
||||
Mikael Pettersson
|
||||
|
||||
Limit the concurrent number of kmod modprobes to catch loops from
|
||||
"modprobe needs a service that is in a module".
|
||||
Keith Owens <kaos@ocs.com.au> December 1999
|
||||
|
||||
Unblock all signals when we exec a usermode process.
|
||||
Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000
|
||||
|
||||
call_usermodehelper wait flag, and remove exec_usermodehelper.
|
||||
Rusty Russell <rusty@rustcorp.com.au> Jan 2003
|
||||
*/
|
||||
#define __KERNEL_SYSCALLS__
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namespace.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
extern int max_threads;
|
||||
|
||||
static struct workqueue_struct *khelper_wq;
|
||||
|
||||
#ifdef CONFIG_KMOD
|
||||
|
||||
/*
|
||||
modprobe_path is set via /proc/sys.
|
||||
*/
|
||||
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
|
||||
|
||||
/**
|
||||
* request_module - try to load a kernel module
|
||||
* @fmt: printf style format string for the name of the module
|
||||
* @varargs: arguements as specified in the format string
|
||||
*
|
||||
* Load a module using the user mode module loader. The function returns
|
||||
* zero on success or a negative errno code on failure. Note that a
|
||||
* successful module load does not mean the module did not then unload
|
||||
* and exit on an error of its own. Callers must check that the service
|
||||
* they requested is now available not blindly invoke it.
|
||||
*
|
||||
* If module auto-loading support is disabled then this function
|
||||
* becomes a no-operation.
|
||||
*/
|
||||
int request_module(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
char module_name[MODULE_NAME_LEN];
|
||||
unsigned int max_modprobes;
|
||||
int ret;
|
||||
char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
|
||||
static char *envp[] = { "HOME=/",
|
||||
"TERM=linux",
|
||||
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
|
||||
NULL };
|
||||
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
|
||||
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
|
||||
static int kmod_loop_msg;
|
||||
|
||||
va_start(args, fmt);
|
||||
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
|
||||
va_end(args);
|
||||
if (ret >= MODULE_NAME_LEN)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
/* If modprobe needs a service that is in a module, we get a recursive
|
||||
* loop. Limit the number of running kmod threads to max_threads/2 or
|
||||
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
|
||||
* would be to run the parents of this process, counting how many times
|
||||
* kmod was invoked. That would mean accessing the internals of the
|
||||
* process tables to get the command line, proc_pid_cmdline is static
|
||||
* and it is not worth changing the proc code just to handle this case.
|
||||
* KAO.
|
||||
*
|
||||
* "trace the ppid" is simple, but will fail if someone's
|
||||
* parent exits. I think this is as good as it gets. --RR
|
||||
*/
|
||||
max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
|
||||
atomic_inc(&kmod_concurrent);
|
||||
if (atomic_read(&kmod_concurrent) > max_modprobes) {
|
||||
/* We may be blaming an innocent here, but unlikely */
|
||||
if (kmod_loop_msg++ < 5)
|
||||
printk(KERN_ERR
|
||||
"request_module: runaway loop modprobe %s\n",
|
||||
module_name);
|
||||
atomic_dec(&kmod_concurrent);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = call_usermodehelper(modprobe_path, argv, envp, 1);
|
||||
atomic_dec(&kmod_concurrent);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(request_module);
|
||||
#endif /* CONFIG_KMOD */
|
||||
|
||||
struct subprocess_info {
|
||||
struct completion *complete;
|
||||
char *path;
|
||||
char **argv;
|
||||
char **envp;
|
||||
int wait;
|
||||
int retval;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the task which runs the usermode application
|
||||
*/
|
||||
static int ____call_usermodehelper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
int retval;
|
||||
|
||||
/* Unblock all signals. */
|
||||
flush_signals(current);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
flush_signal_handlers(current, 1);
|
||||
sigemptyset(¤t->blocked);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
/* We can run anywhere, unlike our parent keventd(). */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
retval = -EPERM;
|
||||
if (current->fs->root)
|
||||
retval = execve(sub_info->path, sub_info->argv,sub_info->envp);
|
||||
|
||||
/* Exec failed? */
|
||||
sub_info->retval = retval;
|
||||
do_exit(0);
|
||||
}
|
||||
|
||||
/* Keventd can't block, but this (a child) can. */
|
||||
static int wait_for_helper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
pid_t pid;
|
||||
struct k_sigaction sa;
|
||||
|
||||
/* Install a handler: if SIGCLD isn't handled sys_wait4 won't
|
||||
* populate the status, but will return -ECHILD. */
|
||||
sa.sa.sa_handler = SIG_IGN;
|
||||
sa.sa.sa_flags = 0;
|
||||
siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
|
||||
do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
|
||||
allow_signal(SIGCHLD);
|
||||
|
||||
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
|
||||
if (pid < 0) {
|
||||
sub_info->retval = pid;
|
||||
} else {
|
||||
/*
|
||||
* Normally it is bogus to call wait4() from in-kernel because
|
||||
* wait4() wants to write the exit code to a userspace address.
|
||||
* But wait_for_helper() always runs as keventd, and put_user()
|
||||
* to a kernel address works OK for kernel threads, due to their
|
||||
* having an mm_segment_t which spans the entire address space.
|
||||
*
|
||||
* Thus the __user pointer cast is valid here.
|
||||
*/
|
||||
sys_wait4(pid, (int __user *) &sub_info->retval, 0, NULL);
|
||||
}
|
||||
|
||||
complete(sub_info->complete);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This is run by khelper thread */
|
||||
static void __call_usermodehelper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
pid_t pid;
|
||||
|
||||
/* CLONE_VFORK: wait until the usermode helper has execve'd
|
||||
* successfully We need the data structures to stay around
|
||||
* until that is done. */
|
||||
if (sub_info->wait)
|
||||
pid = kernel_thread(wait_for_helper, sub_info,
|
||||
CLONE_FS | CLONE_FILES | SIGCHLD);
|
||||
else
|
||||
pid = kernel_thread(____call_usermodehelper, sub_info,
|
||||
CLONE_VFORK | SIGCHLD);
|
||||
|
||||
if (pid < 0) {
|
||||
sub_info->retval = pid;
|
||||
complete(sub_info->complete);
|
||||
} else if (!sub_info->wait)
|
||||
complete(sub_info->complete);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_usermodehelper - start a usermode application
|
||||
* @path: pathname for the application
|
||||
* @argv: null-terminated argument list
|
||||
* @envp: null-terminated environment list
|
||||
* @wait: wait for the application to finish and return status.
|
||||
*
|
||||
* Runs a user-space application. The application is started
|
||||
* asynchronously if wait is not set, and runs as a child of keventd.
|
||||
* (ie. it runs with full root capabilities).
|
||||
*
|
||||
* Must be called from process context. Returns a negative error code
|
||||
* if program was not execed successfully, or 0.
|
||||
*/
|
||||
int call_usermodehelper(char *path, char **argv, char **envp, int wait)
|
||||
{
|
||||
DECLARE_COMPLETION(done);
|
||||
struct subprocess_info sub_info = {
|
||||
.complete = &done,
|
||||
.path = path,
|
||||
.argv = argv,
|
||||
.envp = envp,
|
||||
.wait = wait,
|
||||
.retval = 0,
|
||||
};
|
||||
DECLARE_WORK(work, __call_usermodehelper, &sub_info);
|
||||
|
||||
if (!khelper_wq)
|
||||
return -EBUSY;
|
||||
|
||||
if (path[0] == '\0')
|
||||
return 0;
|
||||
|
||||
queue_work(khelper_wq, &work);
|
||||
wait_for_completion(&done);
|
||||
return sub_info.retval;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper);
|
||||
|
||||
void __init usermodehelper_init(void)
|
||||
{
|
||||
khelper_wq = create_singlethread_workqueue("khelper");
|
||||
BUG_ON(!khelper_wq);
|
||||
}
|
||||
157
kernel/kprobes.c
Normal file
157
kernel/kprobes.c
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Kernel Probes (KProbes)
|
||||
* kernel/kprobes.c
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2002, 2004
|
||||
*
|
||||
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
|
||||
* Probes initial implementation (includes suggestions from
|
||||
* Rusty Russell).
|
||||
* 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
|
||||
* hlists and exceptions notifier as suggested by Andi Kleen.
|
||||
* 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
|
||||
* interface to access function arguments.
|
||||
* 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
|
||||
* exceptions notifier to be first on the priority list.
|
||||
*/
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/kdebug.h>
|
||||
|
||||
#define KPROBE_HASH_BITS 6
|
||||
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
|
||||
|
||||
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
||||
|
||||
unsigned int kprobe_cpu = NR_CPUS;
|
||||
static DEFINE_SPINLOCK(kprobe_lock);
|
||||
|
||||
/* Locks kprobe: irqs must be disabled */
|
||||
void lock_kprobes(void)
|
||||
{
|
||||
spin_lock(&kprobe_lock);
|
||||
kprobe_cpu = smp_processor_id();
|
||||
}
|
||||
|
||||
void unlock_kprobes(void)
|
||||
{
|
||||
kprobe_cpu = NR_CPUS;
|
||||
spin_unlock(&kprobe_lock);
|
||||
}
|
||||
|
||||
/* You have to be holding the kprobe_lock */
|
||||
struct kprobe *get_kprobe(void *addr)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
|
||||
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
|
||||
hlist_for_each(node, head) {
|
||||
struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
|
||||
if (p->addr == addr)
|
||||
return p;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int register_kprobe(struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags = 0;
|
||||
|
||||
if ((ret = arch_prepare_kprobe(p)) != 0) {
|
||||
goto rm_kprobe;
|
||||
}
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
INIT_HLIST_NODE(&p->hlist);
|
||||
if (get_kprobe(p->addr)) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
}
|
||||
arch_copy_kprobe(p);
|
||||
|
||||
hlist_add_head(&p->hlist,
|
||||
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
|
||||
|
||||
p->opcode = *p->addr;
|
||||
*p->addr = BREAKPOINT_INSTRUCTION;
|
||||
flush_icache_range((unsigned long) p->addr,
|
||||
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
|
||||
out:
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
rm_kprobe:
|
||||
if (ret == -EEXIST)
|
||||
arch_remove_kprobe(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void unregister_kprobe(struct kprobe *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
arch_remove_kprobe(p);
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
*p->addr = p->opcode;
|
||||
hlist_del(&p->hlist);
|
||||
flush_icache_range((unsigned long) p->addr,
|
||||
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
}
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
.priority = 0x7fffffff /* we need to notified first */
|
||||
};
|
||||
|
||||
int register_jprobe(struct jprobe *jp)
|
||||
{
|
||||
/* Todo: Verify probepoint is a function entry point */
|
||||
jp->kp.pre_handler = setjmp_pre_handler;
|
||||
jp->kp.break_handler = longjmp_break_handler;
|
||||
|
||||
return register_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
void unregister_jprobe(struct jprobe *jp)
|
||||
{
|
||||
unregister_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
static int __init init_kprobes(void)
|
||||
{
|
||||
int i, err = 0;
|
||||
|
||||
/* FIXME allocate the probe table, currently defined statically */
|
||||
/* initialize all list heads */
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++)
|
||||
INIT_HLIST_HEAD(&kprobe_table[i]);
|
||||
|
||||
err = register_die_notifier(&kprobe_exceptions_nb);
|
||||
return err;
|
||||
}
|
||||
|
||||
__initcall(init_kprobes);
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_kprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
EXPORT_SYMBOL_GPL(jprobe_return);
|
||||
57
kernel/ksysfs.c
Normal file
57
kernel/ksysfs.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* kernel/ksysfs.c - sysfs attributes in /sys/kernel, which
|
||||
* are not related to any other subsystem
|
||||
*
|
||||
* Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
|
||||
*
|
||||
* This file is release under the GPLv2
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#define KERNEL_ATTR_RO(_name) \
|
||||
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
#define KERNEL_ATTR_RW(_name) \
|
||||
static struct subsys_attribute _name##_attr = \
|
||||
__ATTR(_name, 0644, _name##_show, _name##_store)
|
||||
|
||||
#ifdef CONFIG_HOTPLUG
|
||||
static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n", (unsigned long long)hotplug_seqnum);
|
||||
}
|
||||
KERNEL_ATTR_RO(hotplug_seqnum);
|
||||
#endif
|
||||
|
||||
decl_subsys(kernel, NULL, NULL);
|
||||
EXPORT_SYMBOL_GPL(kernel_subsys);
|
||||
|
||||
static struct attribute * kernel_attrs[] = {
|
||||
#ifdef CONFIG_HOTPLUG
|
||||
&hotplug_seqnum_attr.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group kernel_attr_group = {
|
||||
.attrs = kernel_attrs,
|
||||
};
|
||||
|
||||
static int __init ksysfs_init(void)
|
||||
{
|
||||
int error = subsystem_register(&kernel_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&kernel_subsys.kset.kobj,
|
||||
&kernel_attr_group);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
core_initcall(ksysfs_init);
|
||||
202
kernel/kthread.c
Normal file
202
kernel/kthread.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/* Kernel thread helper functions.
|
||||
* Copyright (C) 2004 IBM Corporation, Rusty Russell.
|
||||
*
|
||||
* Creation is done via keventd, so that we get a clean environment
|
||||
* even if we're invoked from userspace (think modprobe, hotplug cpu,
|
||||
* etc.).
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
/*
|
||||
* We dont want to execute off keventd since it might
|
||||
* hold a semaphore our callers hold too:
|
||||
*/
|
||||
static struct workqueue_struct *helper_wq;
|
||||
|
||||
struct kthread_create_info
|
||||
{
|
||||
/* Information passed to kthread() from keventd. */
|
||||
int (*threadfn)(void *data);
|
||||
void *data;
|
||||
struct completion started;
|
||||
|
||||
/* Result passed back to kthread_create() from keventd. */
|
||||
struct task_struct *result;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
struct kthread_stop_info
|
||||
{
|
||||
struct task_struct *k;
|
||||
int err;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
/* Thread stopping is done by setthing this var: lock serializes
|
||||
* multiple kthread_stop calls. */
|
||||
static DECLARE_MUTEX(kthread_stop_lock);
|
||||
static struct kthread_stop_info kthread_stop_info;
|
||||
|
||||
int kthread_should_stop(void)
|
||||
{
|
||||
return (kthread_stop_info.k == current);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_should_stop);
|
||||
|
||||
static void kthread_exit_files(void)
|
||||
{
|
||||
struct fs_struct *fs;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
exit_fs(tsk); /* current->fs->count--; */
|
||||
fs = init_task.fs;
|
||||
tsk->fs = fs;
|
||||
atomic_inc(&fs->count);
|
||||
exit_files(tsk);
|
||||
current->files = init_task.files;
|
||||
atomic_inc(&tsk->files->count);
|
||||
}
|
||||
|
||||
static int kthread(void *_create)
|
||||
{
|
||||
struct kthread_create_info *create = _create;
|
||||
int (*threadfn)(void *data);
|
||||
void *data;
|
||||
sigset_t blocked;
|
||||
int ret = -EINTR;
|
||||
|
||||
kthread_exit_files();
|
||||
|
||||
/* Copy data: it's on keventd's stack */
|
||||
threadfn = create->threadfn;
|
||||
data = create->data;
|
||||
|
||||
/* Block and flush all signals (in case we're not from keventd). */
|
||||
sigfillset(&blocked);
|
||||
sigprocmask(SIG_BLOCK, &blocked, NULL);
|
||||
flush_signals(current);
|
||||
|
||||
/* By default we can run anywhere, unlike keventd. */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
complete(&create->started);
|
||||
schedule();
|
||||
|
||||
if (!kthread_should_stop())
|
||||
ret = threadfn(data);
|
||||
|
||||
/* It might have exited on its own, w/o kthread_stop. Check. */
|
||||
if (kthread_should_stop()) {
|
||||
kthread_stop_info.err = ret;
|
||||
complete(&kthread_stop_info.done);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We are keventd: create a thread. */
|
||||
static void keventd_create_kthread(void *_create)
|
||||
{
|
||||
struct kthread_create_info *create = _create;
|
||||
int pid;
|
||||
|
||||
/* We want our own signal handler (we take no signals by default). */
|
||||
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
|
||||
if (pid < 0) {
|
||||
create->result = ERR_PTR(pid);
|
||||
} else {
|
||||
wait_for_completion(&create->started);
|
||||
create->result = find_task_by_pid(pid);
|
||||
}
|
||||
complete(&create->done);
|
||||
}
|
||||
|
||||
struct task_struct *kthread_create(int (*threadfn)(void *data),
|
||||
void *data,
|
||||
const char namefmt[],
|
||||
...)
|
||||
{
|
||||
struct kthread_create_info create;
|
||||
DECLARE_WORK(work, keventd_create_kthread, &create);
|
||||
|
||||
create.threadfn = threadfn;
|
||||
create.data = data;
|
||||
init_completion(&create.started);
|
||||
init_completion(&create.done);
|
||||
|
||||
/*
|
||||
* The workqueue needs to start up first:
|
||||
*/
|
||||
if (!helper_wq)
|
||||
work.func(work.data);
|
||||
else {
|
||||
queue_work(helper_wq, &work);
|
||||
wait_for_completion(&create.done);
|
||||
}
|
||||
if (!IS_ERR(create.result)) {
|
||||
va_list args;
|
||||
va_start(args, namefmt);
|
||||
vsnprintf(create.result->comm, sizeof(create.result->comm),
|
||||
namefmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
return create.result;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_create);
|
||||
|
||||
void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||
{
|
||||
BUG_ON(k->state != TASK_INTERRUPTIBLE);
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
wait_task_inactive(k);
|
||||
set_task_cpu(k, cpu);
|
||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
int kthread_stop(struct task_struct *k)
|
||||
{
|
||||
int ret;
|
||||
|
||||
down(&kthread_stop_lock);
|
||||
|
||||
/* It could exit after stop_info.k set, but before wake_up_process. */
|
||||
get_task_struct(k);
|
||||
|
||||
/* Must init completion *before* thread sees kthread_stop_info.k */
|
||||
init_completion(&kthread_stop_info.done);
|
||||
wmb();
|
||||
|
||||
/* Now set kthread_should_stop() to true, and wake it up. */
|
||||
kthread_stop_info.k = k;
|
||||
wake_up_process(k);
|
||||
put_task_struct(k);
|
||||
|
||||
/* Once it dies, reset stop ptr, gather result and we're done. */
|
||||
wait_for_completion(&kthread_stop_info.done);
|
||||
kthread_stop_info.k = NULL;
|
||||
ret = kthread_stop_info.err;
|
||||
up(&kthread_stop_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_stop);
|
||||
|
||||
static __init int helper_init(void)
|
||||
{
|
||||
helper_wq = create_singlethread_workqueue("kthread");
|
||||
BUG_ON(!helper_wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
core_initcall(helper_init);
|
||||
|
||||
2108
kernel/module.c
Normal file
2108
kernel/module.c
Normal file
File diff suppressed because it is too large
Load Diff
157
kernel/panic.c
Normal file
157
kernel/panic.c
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* linux/kernel/panic.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*/
|
||||
|
||||
/*
|
||||
* This function is used through-out the kernel (including mm and fs)
|
||||
* to indicate a major problem.
|
||||
*/
|
||||
#include <linux/config.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysrq.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/nmi.h>
|
||||
|
||||
int panic_timeout;
|
||||
int panic_on_oops;
|
||||
int tainted;
|
||||
|
||||
EXPORT_SYMBOL(panic_timeout);
|
||||
|
||||
struct notifier_block *panic_notifier_list;
|
||||
|
||||
EXPORT_SYMBOL(panic_notifier_list);
|
||||
|
||||
static int __init panic_setup(char *str)
|
||||
{
|
||||
panic_timeout = simple_strtoul(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("panic=", panic_setup);
|
||||
|
||||
static long no_blink(long time)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns how long it waited in ms */
|
||||
long (*panic_blink)(long time);
|
||||
EXPORT_SYMBOL(panic_blink);
|
||||
|
||||
/**
|
||||
* panic - halt the system
|
||||
* @fmt: The text string to print
|
||||
*
|
||||
* Display a message, then perform cleanups.
|
||||
*
|
||||
* This function never returns.
|
||||
*/
|
||||
|
||||
NORET_TYPE void panic(const char * fmt, ...)
|
||||
{
|
||||
long i;
|
||||
static char buf[1024];
|
||||
va_list args;
|
||||
#if defined(CONFIG_ARCH_S390)
|
||||
unsigned long caller = (unsigned long) __builtin_return_address(0);
|
||||
#endif
|
||||
|
||||
bust_spinlocks(1);
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
|
||||
bust_spinlocks(0);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
smp_send_stop();
|
||||
#endif
|
||||
|
||||
notifier_call_chain(&panic_notifier_list, 0, buf);
|
||||
|
||||
if (!panic_blink)
|
||||
panic_blink = no_blink;
|
||||
|
||||
if (panic_timeout > 0)
|
||||
{
|
||||
/*
|
||||
* Delay timeout seconds before rebooting the machine.
|
||||
* We can't use the "normal" timers since we just panicked..
|
||||
*/
|
||||
printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
|
||||
for (i = 0; i < panic_timeout*1000; ) {
|
||||
touch_nmi_watchdog();
|
||||
i += panic_blink(i);
|
||||
mdelay(1);
|
||||
i++;
|
||||
}
|
||||
/*
|
||||
* Should we run the reboot notifier. For the moment Im
|
||||
* choosing not too. It might crash, be corrupt or do
|
||||
* more harm than good for other reasons.
|
||||
*/
|
||||
machine_restart(NULL);
|
||||
}
|
||||
#ifdef __sparc__
|
||||
{
|
||||
extern int stop_a_enabled;
|
||||
/* Make sure the user can actually press L1-A */
|
||||
stop_a_enabled = 1;
|
||||
printk(KERN_EMERG "Press L1-A to return to the boot prom\n");
|
||||
}
|
||||
#endif
|
||||
#if defined(CONFIG_ARCH_S390)
|
||||
disabled_wait(caller);
|
||||
#endif
|
||||
local_irq_enable();
|
||||
for (i = 0;;) {
|
||||
i += panic_blink(i);
|
||||
mdelay(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(panic);
|
||||
|
||||
/**
|
||||
* print_tainted - return a string to represent the kernel taint state.
|
||||
*
|
||||
* 'P' - Proprietary module has been loaded.
|
||||
* 'F' - Module has been forcibly loaded.
|
||||
* 'S' - SMP with CPUs not designed for SMP.
|
||||
* 'R' - User forced a module unload.
|
||||
* 'M' - Machine had a machine check experience.
|
||||
* 'B' - System has hit bad_page.
|
||||
*
|
||||
* The string is overwritten by the next call to print_taint().
|
||||
*/
|
||||
|
||||
const char *print_tainted(void)
|
||||
{
|
||||
static char buf[20];
|
||||
if (tainted) {
|
||||
snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
|
||||
tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
|
||||
tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
|
||||
tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
|
||||
tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
|
||||
tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
|
||||
tainted & TAINT_BAD_PAGE ? 'B' : ' ');
|
||||
}
|
||||
else
|
||||
snprintf(buf, sizeof(buf), "Not tainted");
|
||||
return(buf);
|
||||
}
|
||||
|
||||
void add_taint(unsigned flag)
|
||||
{
|
||||
tainted |= flag;
|
||||
}
|
||||
EXPORT_SYMBOL(add_taint);
|
||||
721
kernel/params.c
Normal file
721
kernel/params.c
Normal file
@@ -0,0 +1,721 @@
|
||||
/* Helpers for initial module or kernel cmdline parsing
|
||||
Copyright (C) 2001 Rusty Russell.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/config.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#if 0
|
||||
#define DEBUGP printk
|
||||
#else
|
||||
#define DEBUGP(fmt, a...)
|
||||
#endif
|
||||
|
||||
static inline int dash2underscore(char c)
|
||||
{
|
||||
if (c == '-')
|
||||
return '_';
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline int parameq(const char *input, const char *paramname)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; dash2underscore(input[i]) == paramname[i]; i++)
|
||||
if (input[i] == '\0')
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_one(char *param,
|
||||
char *val,
|
||||
struct kernel_param *params,
|
||||
unsigned num_params,
|
||||
int (*handle_unknown)(char *param, char *val))
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Find parameter */
|
||||
for (i = 0; i < num_params; i++) {
|
||||
if (parameq(param, params[i].name)) {
|
||||
DEBUGP("They are equal! Calling %p\n",
|
||||
params[i].set);
|
||||
return params[i].set(val, ¶ms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (handle_unknown) {
|
||||
DEBUGP("Unknown argument: calling %p\n", handle_unknown);
|
||||
return handle_unknown(param, val);
|
||||
}
|
||||
|
||||
DEBUGP("Unknown argument `%s'\n", param);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* You can use " around spaces, but can't escape ". */
|
||||
/* Hyphens and underscores equivalent in parameter names. */
|
||||
static char *next_arg(char *args, char **param, char **val)
|
||||
{
|
||||
unsigned int i, equals = 0;
|
||||
int in_quote = 0, quoted = 0;
|
||||
char *next;
|
||||
|
||||
/* Chew any extra spaces */
|
||||
while (*args == ' ') args++;
|
||||
if (*args == '"') {
|
||||
args++;
|
||||
in_quote = 1;
|
||||
quoted = 1;
|
||||
}
|
||||
|
||||
for (i = 0; args[i]; i++) {
|
||||
if (args[i] == ' ' && !in_quote)
|
||||
break;
|
||||
if (equals == 0) {
|
||||
if (args[i] == '=')
|
||||
equals = i;
|
||||
}
|
||||
if (args[i] == '"')
|
||||
in_quote = !in_quote;
|
||||
}
|
||||
|
||||
*param = args;
|
||||
if (!equals)
|
||||
*val = NULL;
|
||||
else {
|
||||
args[equals] = '\0';
|
||||
*val = args + equals + 1;
|
||||
|
||||
/* Don't include quotes in value. */
|
||||
if (**val == '"') {
|
||||
(*val)++;
|
||||
if (args[i-1] == '"')
|
||||
args[i-1] = '\0';
|
||||
}
|
||||
if (quoted && args[i-1] == '"')
|
||||
args[i-1] = '\0';
|
||||
}
|
||||
|
||||
if (args[i]) {
|
||||
args[i] = '\0';
|
||||
next = args + i + 1;
|
||||
} else
|
||||
next = args + i;
|
||||
return next;
|
||||
}
|
||||
|
||||
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
|
||||
int parse_args(const char *name,
|
||||
char *args,
|
||||
struct kernel_param *params,
|
||||
unsigned num,
|
||||
int (*unknown)(char *param, char *val))
|
||||
{
|
||||
char *param, *val;
|
||||
|
||||
DEBUGP("Parsing ARGS: %s\n", args);
|
||||
|
||||
while (*args) {
|
||||
int ret;
|
||||
|
||||
args = next_arg(args, ¶m, &val);
|
||||
ret = parse_one(param, val, params, num, unknown);
|
||||
switch (ret) {
|
||||
case -ENOENT:
|
||||
printk(KERN_ERR "%s: Unknown parameter `%s'\n",
|
||||
name, param);
|
||||
return ret;
|
||||
case -ENOSPC:
|
||||
printk(KERN_ERR
|
||||
"%s: `%s' too large for parameter `%s'\n",
|
||||
name, val ?: "", param);
|
||||
return ret;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR
|
||||
"%s: `%s' invalid for parameter `%s'\n",
|
||||
name, val ?: "", param);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* All parsed OK. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Lazy bastard, eh? */
|
||||
#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \
|
||||
int param_set_##name(const char *val, struct kernel_param *kp) \
|
||||
{ \
|
||||
char *endp; \
|
||||
tmptype l; \
|
||||
\
|
||||
if (!val) return -EINVAL; \
|
||||
l = strtolfn(val, &endp, 0); \
|
||||
if (endp == val || ((type)l != l)) \
|
||||
return -EINVAL; \
|
||||
*((type *)kp->arg) = l; \
|
||||
return 0; \
|
||||
} \
|
||||
int param_get_##name(char *buffer, struct kernel_param *kp) \
|
||||
{ \
|
||||
return sprintf(buffer, format, *((type *)kp->arg)); \
|
||||
}
|
||||
|
||||
STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(short, short, "%hi", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(int, int, "%i", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, simple_strtoul);
|
||||
STANDARD_PARAM_DEF(long, long, "%li", long, simple_strtol);
|
||||
STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, simple_strtoul);
|
||||
|
||||
int param_set_charp(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
if (!val) {
|
||||
printk(KERN_ERR "%s: string parameter expected\n",
|
||||
kp->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (strlen(val) > 1024) {
|
||||
printk(KERN_ERR "%s: string parameter too long\n",
|
||||
kp->name);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
*(char **)kp->arg = (char *)val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_get_charp(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
return sprintf(buffer, "%s", *((char **)kp->arg));
|
||||
}
|
||||
|
||||
int param_set_bool(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
/* No equals means "set"... */
|
||||
if (!val) val = "1";
|
||||
|
||||
/* One of =[yYnN01] */
|
||||
switch (val[0]) {
|
||||
case 'y': case 'Y': case '1':
|
||||
*(int *)kp->arg = 1;
|
||||
return 0;
|
||||
case 'n': case 'N': case '0':
|
||||
*(int *)kp->arg = 0;
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int param_get_bool(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
/* Y and N chosen as being relatively non-coder friendly */
|
||||
return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
|
||||
}
|
||||
|
||||
int param_set_invbool(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
int boolval, ret;
|
||||
struct kernel_param dummy = { .arg = &boolval };
|
||||
|
||||
ret = param_set_bool(val, &dummy);
|
||||
if (ret == 0)
|
||||
*(int *)kp->arg = !boolval;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int param_get_invbool(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
int val;
|
||||
struct kernel_param dummy = { .arg = &val };
|
||||
|
||||
val = !*(int *)kp->arg;
|
||||
return param_get_bool(buffer, &dummy);
|
||||
}
|
||||
|
||||
/* We cheat here and temporarily mangle the string. */
|
||||
int param_array(const char *name,
|
||||
const char *val,
|
||||
unsigned int min, unsigned int max,
|
||||
void *elem, int elemsize,
|
||||
int (*set)(const char *, struct kernel_param *kp),
|
||||
int *num)
|
||||
{
|
||||
int ret;
|
||||
struct kernel_param kp;
|
||||
char save;
|
||||
|
||||
/* Get the name right for errors. */
|
||||
kp.name = name;
|
||||
kp.arg = elem;
|
||||
|
||||
/* No equals sign? */
|
||||
if (!val) {
|
||||
printk(KERN_ERR "%s: expects arguments\n", name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*num = 0;
|
||||
/* We expect a comma-separated list of values. */
|
||||
do {
|
||||
int len;
|
||||
|
||||
if (*num == max) {
|
||||
printk(KERN_ERR "%s: can only take %i arguments\n",
|
||||
name, max);
|
||||
return -EINVAL;
|
||||
}
|
||||
len = strcspn(val, ",");
|
||||
|
||||
/* nul-terminate and parse */
|
||||
save = val[len];
|
||||
((char *)val)[len] = '\0';
|
||||
ret = set(val, &kp);
|
||||
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
kp.arg += elemsize;
|
||||
val += len+1;
|
||||
(*num)++;
|
||||
} while (save == ',');
|
||||
|
||||
if (*num < min) {
|
||||
printk(KERN_ERR "%s: needs at least %i arguments\n",
|
||||
name, min);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_array_set(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_array *arr = kp->arg;
|
||||
|
||||
return param_array(kp->name, val, 1, arr->max, arr->elem,
|
||||
arr->elemsize, arr->set, arr->num ?: &arr->max);
|
||||
}
|
||||
|
||||
int param_array_get(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
int i, off, ret;
|
||||
struct kparam_array *arr = kp->arg;
|
||||
struct kernel_param p;
|
||||
|
||||
p = *kp;
|
||||
for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
|
||||
if (i)
|
||||
buffer[off++] = ',';
|
||||
p.arg = arr->elem + arr->elemsize * i;
|
||||
ret = arr->get(buffer + off, &p);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
off += ret;
|
||||
}
|
||||
buffer[off] = '\0';
|
||||
return off;
|
||||
}
|
||||
|
||||
int param_set_copystring(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_string *kps = kp->arg;
|
||||
|
||||
if (strlen(val)+1 > kps->maxlen) {
|
||||
printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
|
||||
kp->name, kps->maxlen-1);
|
||||
return -ENOSPC;
|
||||
}
|
||||
strcpy(kps->string, val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int param_get_string(char *buffer, struct kernel_param *kp)
|
||||
{
|
||||
struct kparam_string *kps = kp->arg;
|
||||
return strlcpy(buffer, kps->string, kps->maxlen);
|
||||
}
|
||||
|
||||
/* sysfs output in /sys/modules/XYZ/parameters/ */
|
||||
|
||||
extern struct kernel_param __start___param[], __stop___param[];
|
||||
|
||||
#define MAX_KBUILD_MODNAME KOBJ_NAME_LEN
|
||||
|
||||
struct param_attribute
|
||||
{
|
||||
struct module_attribute mattr;
|
||||
struct kernel_param *param;
|
||||
};
|
||||
|
||||
struct module_param_attrs
|
||||
{
|
||||
struct attribute_group grp;
|
||||
struct param_attribute attrs[0];
|
||||
};
|
||||
|
||||
#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
|
||||
|
||||
static ssize_t param_attr_show(struct module_attribute *mattr,
|
||||
struct module *mod, char *buf)
|
||||
{
|
||||
int count;
|
||||
struct param_attribute *attribute = to_param_attr(mattr);
|
||||
|
||||
if (!attribute->param->get)
|
||||
return -EPERM;
|
||||
|
||||
count = attribute->param->get(buf, attribute->param);
|
||||
if (count > 0) {
|
||||
strcat(buf, "\n");
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
|
||||
static ssize_t param_attr_store(struct module_attribute *mattr,
|
||||
struct module *owner,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
int err;
|
||||
struct param_attribute *attribute = to_param_attr(mattr);
|
||||
|
||||
if (!attribute->param->set)
|
||||
return -EPERM;
|
||||
|
||||
err = attribute->param->set(buf, attribute->param);
|
||||
if (!err)
|
||||
return len;
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
#define __modinit
|
||||
#else
|
||||
#define __modinit __init
|
||||
#endif
|
||||
|
||||
/*
|
||||
* param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
|
||||
* @mk: struct module_kobject (contains parent kobject)
|
||||
* @kparam: array of struct kernel_param, the actual parameter definitions
|
||||
* @num_params: number of entries in array
|
||||
* @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
|
||||
*
|
||||
* Create a kobject for a (per-module) group of parameters, and create files
|
||||
* in sysfs. A pointer to the param_kobject is returned on success,
|
||||
* NULL if there's no parameter to export, or other ERR_PTR(err).
|
||||
*/
|
||||
static __modinit struct module_param_attrs *
|
||||
param_sysfs_setup(struct module_kobject *mk,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params,
|
||||
unsigned int name_skip)
|
||||
{
|
||||
struct module_param_attrs *mp;
|
||||
unsigned int valid_attrs = 0;
|
||||
unsigned int i, size[2];
|
||||
struct param_attribute *pattr;
|
||||
struct attribute **gattr;
|
||||
int err;
|
||||
|
||||
for (i=0; i<num_params; i++) {
|
||||
if (kparam[i].perm)
|
||||
valid_attrs++;
|
||||
}
|
||||
|
||||
if (!valid_attrs)
|
||||
return NULL;
|
||||
|
||||
size[0] = ALIGN(sizeof(*mp) +
|
||||
valid_attrs * sizeof(mp->attrs[0]),
|
||||
sizeof(mp->grp.attrs[0]));
|
||||
size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
|
||||
|
||||
mp = kmalloc(size[0] + size[1], GFP_KERNEL);
|
||||
if (!mp)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mp->grp.name = "parameters";
|
||||
mp->grp.attrs = (void *)mp + size[0];
|
||||
|
||||
pattr = &mp->attrs[0];
|
||||
gattr = &mp->grp.attrs[0];
|
||||
for (i = 0; i < num_params; i++) {
|
||||
struct kernel_param *kp = &kparam[i];
|
||||
if (kp->perm) {
|
||||
pattr->param = kp;
|
||||
pattr->mattr.show = param_attr_show;
|
||||
pattr->mattr.store = param_attr_store;
|
||||
pattr->mattr.attr.name = (char *)&kp->name[name_skip];
|
||||
pattr->mattr.attr.owner = mk->mod;
|
||||
pattr->mattr.attr.mode = kp->perm;
|
||||
*(gattr++) = &(pattr++)->mattr.attr;
|
||||
}
|
||||
}
|
||||
*gattr = NULL;
|
||||
|
||||
if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) {
|
||||
kfree(mp);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
return mp;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
/*
|
||||
* module_param_sysfs_setup - setup sysfs support for one module
|
||||
* @mod: module
|
||||
* @kparam: module parameters (array)
|
||||
* @num_params: number of module parameters
|
||||
*
|
||||
* Adds sysfs entries for module parameters, and creates a link from
|
||||
* /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/
|
||||
*/
|
||||
int module_param_sysfs_setup(struct module *mod,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params)
|
||||
{
|
||||
struct module_param_attrs *mp;
|
||||
|
||||
mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0);
|
||||
if (IS_ERR(mp))
|
||||
return PTR_ERR(mp);
|
||||
|
||||
mod->param_attrs = mp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* module_param_sysfs_remove - remove sysfs support for one module
|
||||
* @mod: module
|
||||
*
|
||||
* Remove sysfs entries for module parameters and the corresponding
|
||||
* kobject.
|
||||
*/
|
||||
void module_param_sysfs_remove(struct module *mod)
|
||||
{
|
||||
if (mod->param_attrs) {
|
||||
sysfs_remove_group(&mod->mkobj.kobj,
|
||||
&mod->param_attrs->grp);
|
||||
/* We are positive that no one is using any param
|
||||
* attrs at this point. Deallocate immediately. */
|
||||
kfree(mod->param_attrs);
|
||||
mod->param_attrs = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* kernel_param_sysfs_setup - wrapper for built-in params support
|
||||
*/
|
||||
static void __init kernel_param_sysfs_setup(const char *name,
|
||||
struct kernel_param *kparam,
|
||||
unsigned int num_params,
|
||||
unsigned int name_skip)
|
||||
{
|
||||
struct module_kobject *mk;
|
||||
|
||||
mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL);
|
||||
memset(mk, 0, sizeof(struct module_kobject));
|
||||
|
||||
mk->mod = THIS_MODULE;
|
||||
kobj_set_kset_s(mk, module_subsys);
|
||||
kobject_set_name(&mk->kobj, name);
|
||||
kobject_register(&mk->kobj);
|
||||
|
||||
/* no need to keep the kobject if no parameter is exported */
|
||||
if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) {
|
||||
kobject_unregister(&mk->kobj);
|
||||
kfree(mk);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* param_sysfs_builtin - add contents in /sys/parameters for built-in modules
|
||||
*
|
||||
* Add module_parameters to sysfs for "modules" built into the kernel.
|
||||
*
|
||||
* The "module" name (KBUILD_MODNAME) is stored before a dot, the
|
||||
* "parameter" name is stored behind a dot in kernel_param->name. So,
|
||||
* extract the "module" name for all built-in kernel_param-eters,
|
||||
* and for all who have the same, call kernel_param_sysfs_setup.
|
||||
*/
|
||||
static void __init param_sysfs_builtin(void)
|
||||
{
|
||||
struct kernel_param *kp, *kp_begin = NULL;
|
||||
unsigned int i, name_len, count = 0;
|
||||
char modname[MAX_KBUILD_MODNAME + 1] = "";
|
||||
|
||||
for (i=0; i < __stop___param - __start___param; i++) {
|
||||
char *dot;
|
||||
|
||||
kp = &__start___param[i];
|
||||
|
||||
/* We do not handle args without periods. */
|
||||
dot = memchr(kp->name, '.', MAX_KBUILD_MODNAME);
|
||||
if (!dot) {
|
||||
DEBUGP("couldn't find period in %s\n", kp->name);
|
||||
continue;
|
||||
}
|
||||
name_len = dot - kp->name;
|
||||
|
||||
/* new kbuild_modname? */
|
||||
if (strlen(modname) != name_len
|
||||
|| strncmp(modname, kp->name, name_len) != 0) {
|
||||
/* add a new kobject for previous kernel_params. */
|
||||
if (count)
|
||||
kernel_param_sysfs_setup(modname,
|
||||
kp_begin,
|
||||
count,
|
||||
strlen(modname)+1);
|
||||
|
||||
strncpy(modname, kp->name, name_len);
|
||||
modname[name_len] = '\0';
|
||||
count = 0;
|
||||
kp_begin = kp;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
/* last kernel_params need to be registered as well */
|
||||
if (count)
|
||||
kernel_param_sysfs_setup(modname, kp_begin, count,
|
||||
strlen(modname)+1);
|
||||
}
|
||||
|
||||
|
||||
/* module-related sysfs stuff */
|
||||
#ifdef CONFIG_MODULES
|
||||
|
||||
#define to_module_attr(n) container_of(n, struct module_attribute, attr);
|
||||
#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
|
||||
|
||||
static ssize_t module_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct module_attribute *attribute;
|
||||
struct module_kobject *mk;
|
||||
int ret;
|
||||
|
||||
attribute = to_module_attr(attr);
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->show)
|
||||
return -EPERM;
|
||||
|
||||
if (!try_module_get(mk->mod))
|
||||
return -ENODEV;
|
||||
|
||||
ret = attribute->show(attribute, mk->mod, buf);
|
||||
|
||||
module_put(mk->mod);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t module_attr_store(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct module_attribute *attribute;
|
||||
struct module_kobject *mk;
|
||||
int ret;
|
||||
|
||||
attribute = to_module_attr(attr);
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->store)
|
||||
return -EPERM;
|
||||
|
||||
if (!try_module_get(mk->mod))
|
||||
return -ENODEV;
|
||||
|
||||
ret = attribute->store(attribute, mk->mod, buf, len);
|
||||
|
||||
module_put(mk->mod);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sysfs_ops module_sysfs_ops = {
|
||||
.show = module_attr_show,
|
||||
.store = module_attr_store,
|
||||
};
|
||||
|
||||
#else
|
||||
static struct sysfs_ops module_sysfs_ops = {
|
||||
.show = NULL,
|
||||
.store = NULL,
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct kobj_type module_ktype = {
|
||||
.sysfs_ops = &module_sysfs_ops,
|
||||
};
|
||||
|
||||
decl_subsys(module, &module_ktype, NULL);
|
||||
|
||||
/*
|
||||
* param_sysfs_init - wrapper for built-in params support
|
||||
*/
|
||||
static int __init param_sysfs_init(void)
|
||||
{
|
||||
subsystem_register(&module_subsys);
|
||||
|
||||
param_sysfs_builtin();
|
||||
|
||||
return 0;
|
||||
}
|
||||
__initcall(param_sysfs_init);
|
||||
|
||||
EXPORT_SYMBOL(param_set_byte);
|
||||
EXPORT_SYMBOL(param_get_byte);
|
||||
EXPORT_SYMBOL(param_set_short);
|
||||
EXPORT_SYMBOL(param_get_short);
|
||||
EXPORT_SYMBOL(param_set_ushort);
|
||||
EXPORT_SYMBOL(param_get_ushort);
|
||||
EXPORT_SYMBOL(param_set_int);
|
||||
EXPORT_SYMBOL(param_get_int);
|
||||
EXPORT_SYMBOL(param_set_uint);
|
||||
EXPORT_SYMBOL(param_get_uint);
|
||||
EXPORT_SYMBOL(param_set_long);
|
||||
EXPORT_SYMBOL(param_get_long);
|
||||
EXPORT_SYMBOL(param_set_ulong);
|
||||
EXPORT_SYMBOL(param_get_ulong);
|
||||
EXPORT_SYMBOL(param_set_charp);
|
||||
EXPORT_SYMBOL(param_get_charp);
|
||||
EXPORT_SYMBOL(param_set_bool);
|
||||
EXPORT_SYMBOL(param_get_bool);
|
||||
EXPORT_SYMBOL(param_set_invbool);
|
||||
EXPORT_SYMBOL(param_get_invbool);
|
||||
EXPORT_SYMBOL(param_array_set);
|
||||
EXPORT_SYMBOL(param_array_get);
|
||||
EXPORT_SYMBOL(param_set_copystring);
|
||||
EXPORT_SYMBOL(param_get_string);
|
||||
292
kernel/pid.c
Normal file
292
kernel/pid.c
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* Generic pidhash and scalable, time-bounded PID allocator
|
||||
*
|
||||
* (C) 2002-2003 William Irwin, IBM
|
||||
* (C) 2004 William Irwin, Oracle
|
||||
* (C) 2002-2004 Ingo Molnar, Red Hat
|
||||
*
|
||||
* pid-structures are backing objects for tasks sharing a given ID to chain
|
||||
* against. There is very little to them aside from hashing them and
|
||||
* parking tasks using given ID's on a list.
|
||||
*
|
||||
* The hash is always changed with the tasklist_lock write-acquired,
|
||||
* and the hash is only accessed with the tasklist_lock at least
|
||||
* read-acquired, so there's no additional SMP locking needed here.
|
||||
*
|
||||
* We have a list of bitmap pages, which bitmaps represent the PID space.
|
||||
* Allocating and freeing PIDs is completely lockless. The worst-case
|
||||
* allocation scenario when all but one out of 1 million PIDs possible are
|
||||
* allocated already: the scanning of 32 list entries and at most PAGE_SIZE
|
||||
* bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
|
||||
static struct hlist_head *pid_hash[PIDTYPE_MAX];
|
||||
static int pidhash_shift;
|
||||
|
||||
int pid_max = PID_MAX_DEFAULT;
|
||||
int last_pid;
|
||||
|
||||
#define RESERVED_PIDS 300
|
||||
|
||||
int pid_max_min = RESERVED_PIDS + 1;
|
||||
int pid_max_max = PID_MAX_LIMIT;
|
||||
|
||||
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
|
||||
#define BITS_PER_PAGE (PAGE_SIZE*8)
|
||||
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
|
||||
#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off))
|
||||
#define find_next_offset(map, off) \
|
||||
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
|
||||
|
||||
/*
|
||||
* PID-map pages start out as NULL, they get allocated upon
|
||||
* first use and are never deallocated. This way a low pid_max
|
||||
* value does not cause lots of bitmaps to be allocated, but
|
||||
* the scheme scales to up to 4 million PIDs, runtime.
|
||||
*/
|
||||
typedef struct pidmap {
|
||||
atomic_t nr_free;
|
||||
void *page;
|
||||
} pidmap_t;
|
||||
|
||||
static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
|
||||
{ [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
|
||||
|
||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||
|
||||
fastcall void free_pidmap(int pid)
|
||||
{
|
||||
pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
|
||||
int offset = pid & BITS_PER_PAGE_MASK;
|
||||
|
||||
clear_bit(offset, map->page);
|
||||
atomic_inc(&map->nr_free);
|
||||
}
|
||||
|
||||
int alloc_pidmap(void)
|
||||
{
|
||||
int i, offset, max_scan, pid, last = last_pid;
|
||||
pidmap_t *map;
|
||||
|
||||
pid = last + 1;
|
||||
if (pid >= pid_max)
|
||||
pid = RESERVED_PIDS;
|
||||
offset = pid & BITS_PER_PAGE_MASK;
|
||||
map = &pidmap_array[pid/BITS_PER_PAGE];
|
||||
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
|
||||
for (i = 0; i <= max_scan; ++i) {
|
||||
if (unlikely(!map->page)) {
|
||||
unsigned long page = get_zeroed_page(GFP_KERNEL);
|
||||
/*
|
||||
* Free the page if someone raced with us
|
||||
* installing it:
|
||||
*/
|
||||
spin_lock(&pidmap_lock);
|
||||
if (map->page)
|
||||
free_page(page);
|
||||
else
|
||||
map->page = (void *)page;
|
||||
spin_unlock(&pidmap_lock);
|
||||
if (unlikely(!map->page))
|
||||
break;
|
||||
}
|
||||
if (likely(atomic_read(&map->nr_free))) {
|
||||
do {
|
||||
if (!test_and_set_bit(offset, map->page)) {
|
||||
atomic_dec(&map->nr_free);
|
||||
last_pid = pid;
|
||||
return pid;
|
||||
}
|
||||
offset = find_next_offset(map, offset);
|
||||
pid = mk_pid(map, offset);
|
||||
/*
|
||||
* find_next_offset() found a bit, the pid from it
|
||||
* is in-bounds, and if we fell back to the last
|
||||
* bitmap block and the final block was the same
|
||||
* as the starting point, pid is before last_pid.
|
||||
*/
|
||||
} while (offset < BITS_PER_PAGE && pid < pid_max &&
|
||||
(i != max_scan || pid < last ||
|
||||
!((last+1) & BITS_PER_PAGE_MASK)));
|
||||
}
|
||||
if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
|
||||
++map;
|
||||
offset = 0;
|
||||
} else {
|
||||
map = &pidmap_array[0];
|
||||
offset = RESERVED_PIDS;
|
||||
if (unlikely(last == offset))
|
||||
break;
|
||||
}
|
||||
pid = mk_pid(map, offset);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct pid * fastcall find_pid(enum pid_type type, int nr)
|
||||
{
|
||||
struct hlist_node *elem;
|
||||
struct pid *pid;
|
||||
|
||||
hlist_for_each_entry(pid, elem,
|
||||
&pid_hash[type][pid_hashfn(nr)], pid_chain) {
|
||||
if (pid->nr == nr)
|
||||
return pid;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
|
||||
{
|
||||
struct pid *pid, *task_pid;
|
||||
|
||||
task_pid = &task->pids[type];
|
||||
pid = find_pid(type, nr);
|
||||
if (pid == NULL) {
|
||||
hlist_add_head(&task_pid->pid_chain,
|
||||
&pid_hash[type][pid_hashfn(nr)]);
|
||||
INIT_LIST_HEAD(&task_pid->pid_list);
|
||||
} else {
|
||||
INIT_HLIST_NODE(&task_pid->pid_chain);
|
||||
list_add_tail(&task_pid->pid_list, &pid->pid_list);
|
||||
}
|
||||
task_pid->nr = nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static fastcall int __detach_pid(task_t *task, enum pid_type type)
|
||||
{
|
||||
struct pid *pid, *pid_next;
|
||||
int nr = 0;
|
||||
|
||||
pid = &task->pids[type];
|
||||
if (!hlist_unhashed(&pid->pid_chain)) {
|
||||
hlist_del(&pid->pid_chain);
|
||||
|
||||
if (list_empty(&pid->pid_list))
|
||||
nr = pid->nr;
|
||||
else {
|
||||
pid_next = list_entry(pid->pid_list.next,
|
||||
struct pid, pid_list);
|
||||
/* insert next pid from pid_list to hash */
|
||||
hlist_add_head(&pid_next->pid_chain,
|
||||
&pid_hash[type][pid_hashfn(pid_next->nr)]);
|
||||
}
|
||||
}
|
||||
|
||||
list_del(&pid->pid_list);
|
||||
pid->nr = 0;
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
void fastcall detach_pid(task_t *task, enum pid_type type)
|
||||
{
|
||||
int tmp, nr;
|
||||
|
||||
nr = __detach_pid(task, type);
|
||||
if (!nr)
|
||||
return;
|
||||
|
||||
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
|
||||
if (tmp != type && find_pid(tmp, nr))
|
||||
return;
|
||||
|
||||
free_pidmap(nr);
|
||||
}
|
||||
|
||||
task_t *find_task_by_pid_type(int type, int nr)
|
||||
{
|
||||
struct pid *pid;
|
||||
|
||||
pid = find_pid(type, nr);
|
||||
if (!pid)
|
||||
return NULL;
|
||||
|
||||
return pid_task(&pid->pid_list, type);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(find_task_by_pid_type);
|
||||
|
||||
/*
|
||||
* This function switches the PIDs if a non-leader thread calls
|
||||
* sys_execve() - this must be done without releasing the PID.
|
||||
* (which a detach_pid() would eventually do.)
|
||||
*/
|
||||
void switch_exec_pids(task_t *leader, task_t *thread)
|
||||
{
|
||||
__detach_pid(leader, PIDTYPE_PID);
|
||||
__detach_pid(leader, PIDTYPE_TGID);
|
||||
__detach_pid(leader, PIDTYPE_PGID);
|
||||
__detach_pid(leader, PIDTYPE_SID);
|
||||
|
||||
__detach_pid(thread, PIDTYPE_PID);
|
||||
__detach_pid(thread, PIDTYPE_TGID);
|
||||
|
||||
leader->pid = leader->tgid = thread->pid;
|
||||
thread->pid = thread->tgid;
|
||||
|
||||
attach_pid(thread, PIDTYPE_PID, thread->pid);
|
||||
attach_pid(thread, PIDTYPE_TGID, thread->tgid);
|
||||
attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
|
||||
attach_pid(thread, PIDTYPE_SID, thread->signal->session);
|
||||
list_add_tail(&thread->tasks, &init_task.tasks);
|
||||
|
||||
attach_pid(leader, PIDTYPE_PID, leader->pid);
|
||||
attach_pid(leader, PIDTYPE_TGID, leader->tgid);
|
||||
attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
|
||||
attach_pid(leader, PIDTYPE_SID, leader->signal->session);
|
||||
}
|
||||
|
||||
/*
|
||||
* The pid hash table is scaled according to the amount of memory in the
|
||||
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
|
||||
* more.
|
||||
*/
|
||||
void __init pidhash_init(void)
|
||||
{
|
||||
int i, j, pidhash_size;
|
||||
unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
|
||||
|
||||
pidhash_shift = max(4, fls(megabytes * 4));
|
||||
pidhash_shift = min(12, pidhash_shift);
|
||||
pidhash_size = 1 << pidhash_shift;
|
||||
|
||||
printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
|
||||
pidhash_size, pidhash_shift,
|
||||
PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
|
||||
|
||||
for (i = 0; i < PIDTYPE_MAX; i++) {
|
||||
pid_hash[i] = alloc_bootmem(pidhash_size *
|
||||
sizeof(*(pid_hash[i])));
|
||||
if (!pid_hash[i])
|
||||
panic("Could not alloc pidhash!\n");
|
||||
for (j = 0; j < pidhash_size; j++)
|
||||
INIT_HLIST_HEAD(&pid_hash[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
void __init pidmap_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
set_bit(0, pidmap_array->page);
|
||||
atomic_dec(&pidmap_array->nr_free);
|
||||
|
||||
/*
|
||||
* Allocate PID 0, and hash it via all PID types:
|
||||
*/
|
||||
|
||||
for (i = 0; i < PIDTYPE_MAX; i++)
|
||||
attach_pid(current, i, 0);
|
||||
}
|
||||
1559
kernel/posix-cpu-timers.c
Normal file
1559
kernel/posix-cpu-timers.c
Normal file
File diff suppressed because it is too large
Load Diff
1584
kernel/posix-timers.c
Normal file
1584
kernel/posix-timers.c
Normal file
File diff suppressed because it is too large
Load Diff
74
kernel/power/Kconfig
Normal file
74
kernel/power/Kconfig
Normal file
@@ -0,0 +1,74 @@
|
||||
config PM
|
||||
bool "Power Management support"
|
||||
---help---
|
||||
"Power Management" means that parts of your computer are shut
|
||||
off or put into a power conserving "sleep" mode if they are not
|
||||
being used. There are two competing standards for doing this: APM
|
||||
and ACPI. If you want to use either one, say Y here and then also
|
||||
to the requisite support below.
|
||||
|
||||
Power Management is most important for battery powered laptop
|
||||
computers; if you have a laptop, check out the Linux Laptop home
|
||||
page on the WWW at <http://www.linux-on-laptops.com/> or
|
||||
Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
|
||||
and the Battery Powered Linux mini-HOWTO, available from
|
||||
<http://www.tldp.org/docs.html#howto>.
|
||||
|
||||
Note that, even if you say N here, Linux on the x86 architecture
|
||||
will issue the hlt instruction if nothing is to be done, thereby
|
||||
sending the processor to sleep and saving power.
|
||||
|
||||
config PM_DEBUG
|
||||
bool "Power Management Debug Support"
|
||||
depends on PM
|
||||
---help---
|
||||
This option enables verbose debugging support in the Power Management
|
||||
code. This is helpful when debugging and reporting various PM bugs,
|
||||
like suspend support.
|
||||
|
||||
config SOFTWARE_SUSPEND
|
||||
bool "Software Suspend (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL && PM && SWAP
|
||||
---help---
|
||||
Enable the possibility of suspending the machine.
|
||||
It doesn't need APM.
|
||||
You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
|
||||
(patch for sysvinit needed).
|
||||
|
||||
It creates an image which is saved in your active swap. Upon next
|
||||
boot, pass the 'resume=/dev/swappartition' argument to the kernel to
|
||||
have it detect the saved image, restore memory state from it, and
|
||||
continue to run as before. If you do not want the previous state to
|
||||
be reloaded, then use the 'noresume' kernel argument. However, note
|
||||
that your partitions will be fsck'd and you must re-mkswap your swap
|
||||
partitions. It does not work with swap files.
|
||||
|
||||
Right now you may boot without resuming and then later resume but
|
||||
in meantime you cannot use those swap partitions/files which were
|
||||
involved in suspending. Also in this case there is a risk that buffers
|
||||
on disk won't match with saved ones.
|
||||
|
||||
For more information take a look at <file:Documentation/power/swsusp.txt>.
|
||||
|
||||
config PM_STD_PARTITION
|
||||
string "Default resume partition"
|
||||
depends on SOFTWARE_SUSPEND
|
||||
default ""
|
||||
---help---
|
||||
The default resume partition is the partition that the suspend-
|
||||
to-disk implementation will look for a suspended disk image.
|
||||
|
||||
The partition specified here will be different for almost every user.
|
||||
It should be a valid swap partition (at least for now) that is turned
|
||||
on before suspending.
|
||||
|
||||
The partition specified can be overridden by specifying:
|
||||
|
||||
resume=/dev/<other device>
|
||||
|
||||
which will set the resume partition to the device specified.
|
||||
|
||||
Note there is currently not a way to specify which device to save the
|
||||
suspended image to. It will simply pick the first available swap
|
||||
device.
|
||||
|
||||
11
kernel/power/Makefile
Normal file
11
kernel/power/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
ifeq ($(CONFIG_PM_DEBUG),y)
|
||||
EXTRA_CFLAGS += -DDEBUG
|
||||
endif
|
||||
|
||||
swsusp-smp-$(CONFIG_SMP) += smp.o
|
||||
|
||||
obj-y := main.o process.o console.o pm.o
|
||||
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
|
||||
|
||||
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
|
||||
58
kernel/power/console.c
Normal file
58
kernel/power/console.c
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* drivers/power/process.c - Functions for saving/restoring console.
|
||||
*
|
||||
* Originally from swsusp.
|
||||
*/
|
||||
|
||||
#include <linux/vt_kern.h>
|
||||
#include <linux/kbd_kern.h>
|
||||
#include <linux/console.h>
|
||||
#include "power.h"
|
||||
|
||||
static int new_loglevel = 10;
|
||||
static int orig_loglevel;
|
||||
#ifdef SUSPEND_CONSOLE
|
||||
static int orig_fgconsole, orig_kmsg;
|
||||
#endif
|
||||
|
||||
int pm_prepare_console(void)
|
||||
{
|
||||
orig_loglevel = console_loglevel;
|
||||
console_loglevel = new_loglevel;
|
||||
|
||||
#ifdef SUSPEND_CONSOLE
|
||||
acquire_console_sem();
|
||||
|
||||
orig_fgconsole = fg_console;
|
||||
|
||||
if (vc_allocate(SUSPEND_CONSOLE)) {
|
||||
/* we can't have a free VC for now. Too bad,
|
||||
* we don't want to mess the screen for now. */
|
||||
release_console_sem();
|
||||
return 1;
|
||||
}
|
||||
|
||||
set_console(SUSPEND_CONSOLE);
|
||||
release_console_sem();
|
||||
|
||||
if (vt_waitactive(SUSPEND_CONSOLE)) {
|
||||
pr_debug("Suspend: Can't switch VCs.");
|
||||
return 1;
|
||||
}
|
||||
orig_kmsg = kmsg_redirect;
|
||||
kmsg_redirect = SUSPEND_CONSOLE;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pm_restore_console(void)
|
||||
{
|
||||
console_loglevel = orig_loglevel;
|
||||
#ifdef SUSPEND_CONSOLE
|
||||
acquire_console_sem();
|
||||
set_console(orig_fgconsole);
|
||||
release_console_sem();
|
||||
kmsg_redirect = orig_kmsg;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
431
kernel/power/disk.c
Normal file
431
kernel/power/disk.c
Normal file
@@ -0,0 +1,431 @@
|
||||
/*
|
||||
* kernel/power/disk.c - Suspend-to-disk support.
|
||||
*
|
||||
* Copyright (c) 2003 Patrick Mochel
|
||||
* Copyright (c) 2003 Open Source Development Lab
|
||||
* Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/fs.h>
|
||||
#include "power.h"
|
||||
|
||||
|
||||
extern suspend_disk_method_t pm_disk_mode;
|
||||
extern struct pm_ops * pm_ops;
|
||||
|
||||
extern int swsusp_suspend(void);
|
||||
extern int swsusp_write(void);
|
||||
extern int swsusp_check(void);
|
||||
extern int swsusp_read(void);
|
||||
extern void swsusp_close(void);
|
||||
extern int swsusp_resume(void);
|
||||
extern int swsusp_free(void);
|
||||
|
||||
|
||||
static int noresume = 0;
|
||||
char resume_file[256] = CONFIG_PM_STD_PARTITION;
|
||||
dev_t swsusp_resume_device;
|
||||
|
||||
/**
|
||||
* power_down - Shut machine down for hibernate.
|
||||
* @mode: Suspend-to-disk mode
|
||||
*
|
||||
* Use the platform driver, if configured so, and return gracefully if it
|
||||
* fails.
|
||||
* Otherwise, try to power off and reboot. If they fail, halt the machine,
|
||||
* there ain't no turning back.
|
||||
*/
|
||||
|
||||
static void power_down(suspend_disk_method_t mode)
|
||||
{
|
||||
unsigned long flags;
|
||||
int error = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
switch(mode) {
|
||||
case PM_DISK_PLATFORM:
|
||||
device_shutdown();
|
||||
error = pm_ops->enter(PM_SUSPEND_DISK);
|
||||
break;
|
||||
case PM_DISK_SHUTDOWN:
|
||||
printk("Powering off system\n");
|
||||
device_shutdown();
|
||||
machine_power_off();
|
||||
break;
|
||||
case PM_DISK_REBOOT:
|
||||
device_shutdown();
|
||||
machine_restart(NULL);
|
||||
break;
|
||||
}
|
||||
machine_halt();
|
||||
/* Valid image is on the disk, if we continue we risk serious data corruption
|
||||
after resume. */
|
||||
printk(KERN_CRIT "Please power me down manually\n");
|
||||
while(1);
|
||||
}
|
||||
|
||||
|
||||
static int in_suspend __nosavedata = 0;
|
||||
|
||||
|
||||
/**
|
||||
* free_some_memory - Try to free as much memory as possible
|
||||
*
|
||||
* ... but do not OOM-kill anyone
|
||||
*
|
||||
* Notice: all userland should be stopped at this point, or
|
||||
* livelock is possible.
|
||||
*/
|
||||
|
||||
static void free_some_memory(void)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
unsigned int tmp;
|
||||
unsigned long pages = 0;
|
||||
char *p = "-\\|/";
|
||||
|
||||
printk("Freeing memory... ");
|
||||
while ((tmp = shrink_all_memory(10000))) {
|
||||
pages += tmp;
|
||||
printk("\b%c", p[i]);
|
||||
i++;
|
||||
if (i > 3)
|
||||
i = 0;
|
||||
}
|
||||
printk("\bdone (%li pages freed)\n", pages);
|
||||
}
|
||||
|
||||
|
||||
static inline void platform_finish(void)
|
||||
{
|
||||
if (pm_disk_mode == PM_DISK_PLATFORM) {
|
||||
if (pm_ops && pm_ops->finish)
|
||||
pm_ops->finish(PM_SUSPEND_DISK);
|
||||
}
|
||||
}
|
||||
|
||||
static void finish(void)
|
||||
{
|
||||
device_resume();
|
||||
platform_finish();
|
||||
enable_nonboot_cpus();
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
|
||||
static int prepare_processes(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
pm_prepare_console();
|
||||
|
||||
sys_sync();
|
||||
|
||||
if (freeze_processes()) {
|
||||
error = -EBUSY;
|
||||
return error;
|
||||
}
|
||||
|
||||
if (pm_disk_mode == PM_DISK_PLATFORM) {
|
||||
if (pm_ops && pm_ops->prepare) {
|
||||
if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free memory before shutting down devices. */
|
||||
free_some_memory();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unprepare_processes(void)
|
||||
{
|
||||
enable_nonboot_cpus();
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
static int prepare_devices(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
disable_nonboot_cpus();
|
||||
if ((error = device_suspend(PMSG_FREEZE))) {
|
||||
printk("Some devices failed to suspend\n");
|
||||
platform_finish();
|
||||
enable_nonboot_cpus();
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_suspend_disk - The granpappy of power management.
|
||||
*
|
||||
* If we're going through the firmware, then get it over with quickly.
|
||||
*
|
||||
* If not, then call swsusp to do its thing, then figure out how
|
||||
* to power down the system.
|
||||
*/
|
||||
|
||||
int pm_suspend_disk(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = prepare_processes();
|
||||
if (!error) {
|
||||
error = prepare_devices();
|
||||
}
|
||||
|
||||
if (error) {
|
||||
unprepare_processes();
|
||||
return error;
|
||||
}
|
||||
|
||||
pr_debug("PM: Attempting to suspend to disk.\n");
|
||||
if (pm_disk_mode == PM_DISK_FIRMWARE)
|
||||
return pm_ops->enter(PM_SUSPEND_DISK);
|
||||
|
||||
pr_debug("PM: snapshotting memory.\n");
|
||||
in_suspend = 1;
|
||||
if ((error = swsusp_suspend()))
|
||||
goto Done;
|
||||
|
||||
if (in_suspend) {
|
||||
pr_debug("PM: writing image.\n");
|
||||
error = swsusp_write();
|
||||
if (!error)
|
||||
power_down(pm_disk_mode);
|
||||
} else
|
||||
pr_debug("PM: Image restored successfully.\n");
|
||||
swsusp_free();
|
||||
Done:
|
||||
finish();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* software_resume - Resume from a saved image.
|
||||
*
|
||||
* Called as a late_initcall (so all devices are discovered and
|
||||
* initialized), we call swsusp to see if we have a saved image or not.
|
||||
* If so, we quiesce devices, the restore the saved image. We will
|
||||
* return above (in pm_suspend_disk() ) if everything goes well.
|
||||
* Otherwise, we fail gracefully and return to the normally
|
||||
* scheduled program.
|
||||
*
|
||||
*/
|
||||
|
||||
static int software_resume(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (noresume) {
|
||||
/**
|
||||
* FIXME: If noresume is specified, we need to find the partition
|
||||
* and reset it back to normal swap space.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_debug("PM: Checking swsusp image.\n");
|
||||
|
||||
if ((error = swsusp_check()))
|
||||
goto Done;
|
||||
|
||||
pr_debug("PM: Preparing processes for restore.\n");
|
||||
|
||||
if ((error = prepare_processes())) {
|
||||
swsusp_close();
|
||||
goto Cleanup;
|
||||
}
|
||||
|
||||
pr_debug("PM: Reading swsusp image.\n");
|
||||
|
||||
if ((error = swsusp_read()))
|
||||
goto Cleanup;
|
||||
|
||||
pr_debug("PM: Preparing devices for restore.\n");
|
||||
|
||||
if ((error = prepare_devices()))
|
||||
goto Free;
|
||||
|
||||
mb();
|
||||
|
||||
pr_debug("PM: Restoring saved image.\n");
|
||||
swsusp_resume();
|
||||
pr_debug("PM: Restore failed, recovering.n");
|
||||
finish();
|
||||
Free:
|
||||
swsusp_free();
|
||||
Cleanup:
|
||||
unprepare_processes();
|
||||
Done:
|
||||
pr_debug("PM: Resume from disk failed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(software_resume);
|
||||
|
||||
|
||||
static char * pm_disk_modes[] = {
|
||||
[PM_DISK_FIRMWARE] = "firmware",
|
||||
[PM_DISK_PLATFORM] = "platform",
|
||||
[PM_DISK_SHUTDOWN] = "shutdown",
|
||||
[PM_DISK_REBOOT] = "reboot",
|
||||
};
|
||||
|
||||
/**
|
||||
* disk - Control suspend-to-disk mode
|
||||
*
|
||||
* Suspend-to-disk can be handled in several ways. The greatest
|
||||
* distinction is who writes memory to disk - the firmware or the OS.
|
||||
* If the firmware does it, we assume that it also handles suspending
|
||||
* the system.
|
||||
* If the OS does it, then we have three options for putting the system
|
||||
* to sleep - using the platform driver (e.g. ACPI or other PM registers),
|
||||
* powering off the system or rebooting the system (for testing).
|
||||
*
|
||||
* The system will support either 'firmware' or 'platform', and that is
|
||||
* known a priori (and encoded in pm_ops). But, the user may choose
|
||||
* 'shutdown' or 'reboot' as alternatives.
|
||||
*
|
||||
* show() will display what the mode is currently set to.
|
||||
* store() will accept one of
|
||||
*
|
||||
* 'firmware'
|
||||
* 'platform'
|
||||
* 'shutdown'
|
||||
* 'reboot'
|
||||
*
|
||||
* It will only change to 'firmware' or 'platform' if the system
|
||||
* supports it (as determined from pm_ops->pm_disk_mode).
|
||||
*/
|
||||
|
||||
static ssize_t disk_show(struct subsystem * subsys, char * buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
|
||||
}
|
||||
|
||||
|
||||
static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
|
||||
{
|
||||
int error = 0;
|
||||
int i;
|
||||
int len;
|
||||
char *p;
|
||||
suspend_disk_method_t mode = 0;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
down(&pm_sem);
|
||||
for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
|
||||
if (!strncmp(buf, pm_disk_modes[i], len)) {
|
||||
mode = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (mode) {
|
||||
if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
|
||||
pm_disk_mode = mode;
|
||||
else {
|
||||
if (pm_ops && pm_ops->enter &&
|
||||
(mode == pm_ops->pm_disk_mode))
|
||||
pm_disk_mode = mode;
|
||||
else
|
||||
error = -EINVAL;
|
||||
}
|
||||
} else
|
||||
error = -EINVAL;
|
||||
|
||||
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
|
||||
pm_disk_modes[mode]);
|
||||
up(&pm_sem);
|
||||
return error ? error : n;
|
||||
}
|
||||
|
||||
power_attr(disk);
|
||||
|
||||
static ssize_t resume_show(struct subsystem * subsys, char *buf)
|
||||
{
|
||||
return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
|
||||
MINOR(swsusp_resume_device));
|
||||
}
|
||||
|
||||
static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n)
|
||||
{
|
||||
int len;
|
||||
char *p;
|
||||
unsigned int maj, min;
|
||||
int error = -EINVAL;
|
||||
dev_t res;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
|
||||
res = MKDEV(maj,min);
|
||||
if (maj == MAJOR(res) && min == MINOR(res)) {
|
||||
swsusp_resume_device = res;
|
||||
printk("Attempting manual resume\n");
|
||||
noresume = 0;
|
||||
software_resume();
|
||||
}
|
||||
}
|
||||
|
||||
return error >= 0 ? n : error;
|
||||
}
|
||||
|
||||
power_attr(resume);
|
||||
|
||||
static struct attribute * g[] = {
|
||||
&disk_attr.attr,
|
||||
&resume_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
static struct attribute_group attr_group = {
|
||||
.attrs = g,
|
||||
};
|
||||
|
||||
|
||||
static int __init pm_disk_init(void)
|
||||
{
|
||||
return sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
|
||||
}
|
||||
|
||||
core_initcall(pm_disk_init);
|
||||
|
||||
|
||||
static int __init resume_setup(char *str)
|
||||
{
|
||||
if (noresume)
|
||||
return 1;
|
||||
|
||||
strncpy( resume_file, str, 255 );
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init noresume_setup(char *str)
|
||||
{
|
||||
noresume = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noresume", noresume_setup);
|
||||
__setup("resume=", resume_setup);
|
||||
269
kernel/power/main.c
Normal file
269
kernel/power/main.c
Normal file
@@ -0,0 +1,269 @@
|
||||
/*
|
||||
* kernel/power/main.c - PM subsystem core functionality.
|
||||
*
|
||||
* Copyright (c) 2003 Patrick Mochel
|
||||
* Copyright (c) 2003 Open Source Development Lab
|
||||
*
|
||||
* This file is released under the GPLv2
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pm.h>
|
||||
|
||||
|
||||
#include "power.h"
|
||||
|
||||
DECLARE_MUTEX(pm_sem);
|
||||
|
||||
struct pm_ops * pm_ops = NULL;
|
||||
suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
|
||||
|
||||
/**
|
||||
* pm_set_ops - Set the global power method table.
|
||||
* @ops: Pointer to ops structure.
|
||||
*/
|
||||
|
||||
void pm_set_ops(struct pm_ops * ops)
|
||||
{
|
||||
down(&pm_sem);
|
||||
pm_ops = ops;
|
||||
up(&pm_sem);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* suspend_prepare - Do prep work before entering low-power state.
|
||||
* @state: State we're entering.
|
||||
*
|
||||
* This is common code that is called for each state that we're
|
||||
* entering. Allocate a console, stop all processes, then make sure
|
||||
* the platform can enter the requested state.
|
||||
*/
|
||||
|
||||
static int suspend_prepare(suspend_state_t state)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (!pm_ops || !pm_ops->enter)
|
||||
return -EPERM;
|
||||
|
||||
pm_prepare_console();
|
||||
|
||||
if (freeze_processes()) {
|
||||
error = -EAGAIN;
|
||||
goto Thaw;
|
||||
}
|
||||
|
||||
if (pm_ops->prepare) {
|
||||
if ((error = pm_ops->prepare(state)))
|
||||
goto Thaw;
|
||||
}
|
||||
|
||||
if ((error = device_suspend(PMSG_SUSPEND))) {
|
||||
printk(KERN_ERR "Some devices failed to suspend\n");
|
||||
goto Finish;
|
||||
}
|
||||
return 0;
|
||||
Finish:
|
||||
if (pm_ops->finish)
|
||||
pm_ops->finish(state);
|
||||
Thaw:
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
static int suspend_enter(suspend_state_t state)
|
||||
{
|
||||
int error = 0;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if ((error = device_power_down(PMSG_SUSPEND))) {
|
||||
printk(KERN_ERR "Some devices failed to power down\n");
|
||||
goto Done;
|
||||
}
|
||||
error = pm_ops->enter(state);
|
||||
device_power_up();
|
||||
Done:
|
||||
local_irq_restore(flags);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* suspend_finish - Do final work before exiting suspend sequence.
|
||||
* @state: State we're coming out of.
|
||||
*
|
||||
* Call platform code to clean up, restart processes, and free the
|
||||
* console that we've allocated. This is not called for suspend-to-disk.
|
||||
*/
|
||||
|
||||
static void suspend_finish(suspend_state_t state)
|
||||
{
|
||||
device_resume();
|
||||
if (pm_ops && pm_ops->finish)
|
||||
pm_ops->finish(state);
|
||||
thaw_processes();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static char * pm_states[] = {
|
||||
[PM_SUSPEND_STANDBY] = "standby",
|
||||
[PM_SUSPEND_MEM] = "mem",
|
||||
[PM_SUSPEND_DISK] = "disk",
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* enter_state - Do common work of entering low-power state.
|
||||
* @state: pm_state structure for state we're entering.
|
||||
*
|
||||
* Make sure we're the only ones trying to enter a sleep state. Fail
|
||||
* if someone has beat us to it, since we don't want anything weird to
|
||||
* happen when we wake up.
|
||||
* Then, do the setup for suspend, enter the state, and cleaup (after
|
||||
* we've woken up).
|
||||
*/
|
||||
|
||||
static int enter_state(suspend_state_t state)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (down_trylock(&pm_sem))
|
||||
return -EBUSY;
|
||||
|
||||
if (state == PM_SUSPEND_DISK) {
|
||||
error = pm_suspend_disk();
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
/* Suspend is hard to get right on SMP. */
|
||||
if (num_online_cpus() != 1) {
|
||||
error = -EPERM;
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
pr_debug("PM: Preparing system for suspend\n");
|
||||
if ((error = suspend_prepare(state)))
|
||||
goto Unlock;
|
||||
|
||||
pr_debug("PM: Entering state.\n");
|
||||
error = suspend_enter(state);
|
||||
|
||||
pr_debug("PM: Finishing up.\n");
|
||||
suspend_finish(state);
|
||||
Unlock:
|
||||
up(&pm_sem);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is main interface to the outside world. It needs to be
|
||||
* called from process context.
|
||||
*/
|
||||
int software_suspend(void)
|
||||
{
|
||||
return enter_state(PM_SUSPEND_DISK);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* pm_suspend - Externally visible function for suspending system.
|
||||
* @state: Enumarted value of state to enter.
|
||||
*
|
||||
* Determine whether or not value is within range, get state
|
||||
* structure, and enter (above).
|
||||
*/
|
||||
|
||||
int pm_suspend(suspend_state_t state)
|
||||
{
|
||||
if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX)
|
||||
return enter_state(state);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
decl_subsys(power,NULL,NULL);
|
||||
|
||||
|
||||
/**
|
||||
* state - control system power state.
|
||||
*
|
||||
* show() returns what states are supported, which is hard-coded to
|
||||
* 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
|
||||
* 'disk' (Suspend-to-Disk).
|
||||
*
|
||||
* store() accepts one of those strings, translates it into the
|
||||
* proper enumerated value, and initiates a suspend transition.
|
||||
*/
|
||||
|
||||
static ssize_t state_show(struct subsystem * subsys, char * buf)
|
||||
{
|
||||
int i;
|
||||
char * s = buf;
|
||||
|
||||
for (i = 0; i < PM_SUSPEND_MAX; i++) {
|
||||
if (pm_states[i])
|
||||
s += sprintf(s,"%s ",pm_states[i]);
|
||||
}
|
||||
s += sprintf(s,"\n");
|
||||
return (s - buf);
|
||||
}
|
||||
|
||||
static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
|
||||
{
|
||||
suspend_state_t state = PM_SUSPEND_STANDBY;
|
||||
char ** s;
|
||||
char *p;
|
||||
int error;
|
||||
int len;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
|
||||
if (*s && !strncmp(buf, *s, len))
|
||||
break;
|
||||
}
|
||||
if (*s)
|
||||
error = enter_state(state);
|
||||
else
|
||||
error = -EINVAL;
|
||||
return error ? error : n;
|
||||
}
|
||||
|
||||
power_attr(state);
|
||||
|
||||
static struct attribute * g[] = {
|
||||
&state_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group attr_group = {
|
||||
.attrs = g,
|
||||
};
|
||||
|
||||
|
||||
static int __init pm_init(void)
|
||||
{
|
||||
int error = subsystem_register(&power_subsys);
|
||||
if (!error)
|
||||
error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group);
|
||||
return error;
|
||||
}
|
||||
|
||||
core_initcall(pm_init);
|
||||
265
kernel/power/pm.c
Normal file
265
kernel/power/pm.c
Normal file
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* pm.c - Power management interface
|
||||
*
|
||||
* Copyright (C) 2000 Andrew Henroid
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
int pm_active;
|
||||
|
||||
/*
|
||||
* Locking notes:
|
||||
* pm_devs_lock can be a semaphore providing pm ops are not called
|
||||
* from an interrupt handler (already a bad idea so no change here). Each
|
||||
* change must be protected so that an unlink of an entry doesn't clash
|
||||
* with a pm send - which is permitted to sleep in the current architecture
|
||||
*
|
||||
* Module unloads clashing with pm events now work out safely, the module
|
||||
* unload path will block until the event has been sent. It may well block
|
||||
* until a resume but that will be fine.
|
||||
*/
|
||||
|
||||
static DECLARE_MUTEX(pm_devs_lock);
|
||||
static LIST_HEAD(pm_devs);
|
||||
|
||||
/**
|
||||
* pm_register - register a device with power management
|
||||
* @type: device type
|
||||
* @id: device ID
|
||||
* @callback: callback function
|
||||
*
|
||||
* Add a device to the list of devices that wish to be notified about
|
||||
* power management events. A &pm_dev structure is returned on success,
|
||||
* on failure the return is %NULL.
|
||||
*
|
||||
* The callback function will be called in process context and
|
||||
* it may sleep.
|
||||
*/
|
||||
|
||||
struct pm_dev *pm_register(pm_dev_t type,
|
||||
unsigned long id,
|
||||
pm_callback callback)
|
||||
{
|
||||
struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
|
||||
if (dev) {
|
||||
memset(dev, 0, sizeof(*dev));
|
||||
dev->type = type;
|
||||
dev->id = id;
|
||||
dev->callback = callback;
|
||||
|
||||
down(&pm_devs_lock);
|
||||
list_add(&dev->entry, &pm_devs);
|
||||
up(&pm_devs_lock);
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_unregister - unregister a device with power management
|
||||
* @dev: device to unregister
|
||||
*
|
||||
* Remove a device from the power management notification lists. The
|
||||
* dev passed must be a handle previously returned by pm_register.
|
||||
*/
|
||||
|
||||
void pm_unregister(struct pm_dev *dev)
|
||||
{
|
||||
if (dev) {
|
||||
down(&pm_devs_lock);
|
||||
list_del(&dev->entry);
|
||||
up(&pm_devs_lock);
|
||||
|
||||
kfree(dev);
|
||||
}
|
||||
}
|
||||
|
||||
static void __pm_unregister(struct pm_dev *dev)
|
||||
{
|
||||
if (dev) {
|
||||
list_del(&dev->entry);
|
||||
kfree(dev);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_unregister_all - unregister all devices with matching callback
|
||||
* @callback: callback function pointer
|
||||
*
|
||||
* Unregister every device that would call the callback passed. This
|
||||
* is primarily meant as a helper function for loadable modules. It
|
||||
* enables a module to give up all its managed devices without keeping
|
||||
* its own private list.
|
||||
*/
|
||||
|
||||
void pm_unregister_all(pm_callback callback)
|
||||
{
|
||||
struct list_head *entry;
|
||||
|
||||
if (!callback)
|
||||
return;
|
||||
|
||||
down(&pm_devs_lock);
|
||||
entry = pm_devs.next;
|
||||
while (entry != &pm_devs) {
|
||||
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
|
||||
entry = entry->next;
|
||||
if (dev->callback == callback)
|
||||
__pm_unregister(dev);
|
||||
}
|
||||
up(&pm_devs_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_send - send request to a single device
|
||||
* @dev: device to send to
|
||||
* @rqst: power management request
|
||||
* @data: data for the callback
|
||||
*
|
||||
* Issue a power management request to a given device. The
|
||||
* %PM_SUSPEND and %PM_RESUME events are handled specially. The
|
||||
* data field must hold the intended next state. No call is made
|
||||
* if the state matches.
|
||||
*
|
||||
* BUGS: what stops two power management requests occurring in parallel
|
||||
* and conflicting.
|
||||
*
|
||||
* WARNING: Calling pm_send directly is not generally recommended, in
|
||||
* particular there is no locking against the pm_dev going away. The
|
||||
* caller must maintain all needed locking or have 'inside knowledge'
|
||||
* on the safety. Also remember that this function is not locked against
|
||||
* pm_unregister. This means that you must handle SMP races on callback
|
||||
* execution and unload yourself.
|
||||
*/
|
||||
|
||||
static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
|
||||
{
|
||||
int status = 0;
|
||||
unsigned long prev_state, next_state;
|
||||
|
||||
if (in_interrupt())
|
||||
BUG();
|
||||
|
||||
switch (rqst) {
|
||||
case PM_SUSPEND:
|
||||
case PM_RESUME:
|
||||
prev_state = dev->state;
|
||||
next_state = (unsigned long) data;
|
||||
if (prev_state != next_state) {
|
||||
if (dev->callback)
|
||||
status = (*dev->callback)(dev, rqst, data);
|
||||
if (!status) {
|
||||
dev->state = next_state;
|
||||
dev->prev_state = prev_state;
|
||||
}
|
||||
}
|
||||
else {
|
||||
dev->prev_state = prev_state;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (dev->callback)
|
||||
status = (*dev->callback)(dev, rqst, data);
|
||||
break;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Undo incomplete request
|
||||
*/
|
||||
static void pm_undo_all(struct pm_dev *last)
|
||||
{
|
||||
struct list_head *entry = last->entry.prev;
|
||||
while (entry != &pm_devs) {
|
||||
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
|
||||
if (dev->state != dev->prev_state) {
|
||||
/* previous state was zero (running) resume or
|
||||
* previous state was non-zero (suspended) suspend
|
||||
*/
|
||||
pm_request_t undo = (dev->prev_state
|
||||
? PM_SUSPEND:PM_RESUME);
|
||||
pm_send(dev, undo, (void*) dev->prev_state);
|
||||
}
|
||||
entry = entry->prev;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_send_all - send request to all managed devices
|
||||
* @rqst: power management request
|
||||
* @data: data for the callback
|
||||
*
|
||||
* Issue a power management request to a all devices. The
|
||||
* %PM_SUSPEND events are handled specially. Any device is
|
||||
* permitted to fail a suspend by returning a non zero (error)
|
||||
* value from its callback function. If any device vetoes a
|
||||
* suspend request then all other devices that have suspended
|
||||
* during the processing of this request are restored to their
|
||||
* previous state.
|
||||
*
|
||||
* WARNING: This function takes the pm_devs_lock. The lock is not dropped until
|
||||
* the callbacks have completed. This prevents races against pm locking
|
||||
* functions, races against module unload pm_unregister code. It does
|
||||
* mean however that you must not issue pm_ functions within the callback
|
||||
* or you will deadlock and users will hate you.
|
||||
*
|
||||
* Zero is returned on success. If a suspend fails then the status
|
||||
* from the device that vetoes the suspend is returned.
|
||||
*
|
||||
* BUGS: what stops two power management requests occurring in parallel
|
||||
* and conflicting.
|
||||
*/
|
||||
|
||||
int pm_send_all(pm_request_t rqst, void *data)
|
||||
{
|
||||
struct list_head *entry;
|
||||
|
||||
down(&pm_devs_lock);
|
||||
entry = pm_devs.next;
|
||||
while (entry != &pm_devs) {
|
||||
struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
|
||||
if (dev->callback) {
|
||||
int status = pm_send(dev, rqst, data);
|
||||
if (status) {
|
||||
/* return devices to previous state on
|
||||
* failed suspend request
|
||||
*/
|
||||
if (rqst == PM_SUSPEND)
|
||||
pm_undo_all(dev);
|
||||
up(&pm_devs_lock);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
entry = entry->next;
|
||||
}
|
||||
up(&pm_devs_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(pm_register);
|
||||
EXPORT_SYMBOL(pm_unregister);
|
||||
EXPORT_SYMBOL(pm_unregister_all);
|
||||
EXPORT_SYMBOL(pm_send_all);
|
||||
EXPORT_SYMBOL(pm_active);
|
||||
|
||||
|
||||
52
kernel/power/power.h
Normal file
52
kernel/power/power.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
|
||||
we probably do not take enough locks for switching consoles, etc,
|
||||
so bad things might happen.
|
||||
*/
|
||||
#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
|
||||
#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
|
||||
#endif
|
||||
|
||||
|
||||
struct swsusp_info {
|
||||
struct new_utsname uts;
|
||||
u32 version_code;
|
||||
unsigned long num_physpages;
|
||||
int cpus;
|
||||
unsigned long image_pages;
|
||||
unsigned long pagedir_pages;
|
||||
suspend_pagedir_t * suspend_pagedir;
|
||||
swp_entry_t pagedir[768];
|
||||
} __attribute__((aligned(PAGE_SIZE)));
|
||||
|
||||
|
||||
|
||||
#ifdef CONFIG_SOFTWARE_SUSPEND
|
||||
extern int pm_suspend_disk(void);
|
||||
|
||||
#else
|
||||
static inline int pm_suspend_disk(void)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
#endif
|
||||
extern struct semaphore pm_sem;
|
||||
#define power_attr(_name) \
|
||||
static struct subsys_attribute _name##_attr = { \
|
||||
.attr = { \
|
||||
.name = __stringify(_name), \
|
||||
.mode = 0644, \
|
||||
}, \
|
||||
.show = _name##_show, \
|
||||
.store = _name##_store, \
|
||||
}
|
||||
|
||||
extern struct subsystem power_subsys;
|
||||
|
||||
extern int freeze_processes(void);
|
||||
extern void thaw_processes(void);
|
||||
|
||||
extern int pm_prepare_console(void);
|
||||
extern void pm_restore_console(void);
|
||||
45
kernel/power/poweroff.c
Normal file
45
kernel/power/poweroff.c
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* poweroff.c - sysrq handler to gracefully power down machine.
|
||||
*
|
||||
* This file is released under the GPL v2
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sysrq.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/*
|
||||
* When the user hits Sys-Rq o to power down the machine this is the
|
||||
* callback we use.
|
||||
*/
|
||||
|
||||
static void do_poweroff(void *dummy)
|
||||
{
|
||||
if (pm_power_off)
|
||||
pm_power_off();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
|
||||
|
||||
static void handle_poweroff(int key, struct pt_regs *pt_regs,
|
||||
struct tty_struct *tty)
|
||||
{
|
||||
schedule_work(&poweroff_work);
|
||||
}
|
||||
|
||||
static struct sysrq_key_op sysrq_poweroff_op = {
|
||||
.handler = handle_poweroff,
|
||||
.help_msg = "powerOff",
|
||||
.action_msg = "Power Off",
|
||||
.enable_mask = SYSRQ_ENABLE_BOOT,
|
||||
};
|
||||
|
||||
static int pm_sysrq_init(void)
|
||||
{
|
||||
register_sysrq_key('o', &sysrq_poweroff_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
subsys_initcall(pm_sysrq_init);
|
||||
121
kernel/power/process.c
Normal file
121
kernel/power/process.c
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* drivers/power/process.c - Functions for starting/stopping processes on
|
||||
* suspend transitions.
|
||||
*
|
||||
* Originally from swsusp.
|
||||
*/
|
||||
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* Timeout for stopping processes
|
||||
*/
|
||||
#define TIMEOUT (6 * HZ)
|
||||
|
||||
|
||||
static inline int freezeable(struct task_struct * p)
|
||||
{
|
||||
if ((p == current) ||
|
||||
(p->flags & PF_NOFREEZE) ||
|
||||
(p->exit_state == EXIT_ZOMBIE) ||
|
||||
(p->exit_state == EXIT_DEAD) ||
|
||||
(p->state == TASK_STOPPED) ||
|
||||
(p->state == TASK_TRACED))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Refrigerator is place where frozen processes are stored :-). */
|
||||
void refrigerator(unsigned long flag)
|
||||
{
|
||||
/* Hmm, should we be allowed to suspend when there are realtime
|
||||
processes around? */
|
||||
long save;
|
||||
save = current->state;
|
||||
current->state = TASK_UNINTERRUPTIBLE;
|
||||
pr_debug("%s entered refrigerator\n", current->comm);
|
||||
printk("=");
|
||||
current->flags &= ~PF_FREEZE;
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* We sent fake signal, clean it up */
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
current->flags |= PF_FROZEN;
|
||||
while (current->flags & PF_FROZEN)
|
||||
schedule();
|
||||
pr_debug("%s left refrigerator\n", current->comm);
|
||||
current->state = save;
|
||||
}
|
||||
|
||||
/* 0 = success, else # of processes that we failed to stop */
|
||||
int freeze_processes(void)
|
||||
{
|
||||
int todo;
|
||||
unsigned long start_time;
|
||||
struct task_struct *g, *p;
|
||||
|
||||
printk( "Stopping tasks: " );
|
||||
start_time = jiffies;
|
||||
do {
|
||||
todo = 0;
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
unsigned long flags;
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
if ((p->flags & PF_FROZEN) ||
|
||||
(p->state == TASK_TRACED) ||
|
||||
(p->state == TASK_STOPPED))
|
||||
continue;
|
||||
|
||||
/* FIXME: smp problem here: we may not access other process' flags
|
||||
without locking */
|
||||
p->flags |= PF_FREEZE;
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
signal_wake_up(p, 0);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
todo++;
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
yield(); /* Yield is okay here */
|
||||
if (time_after(jiffies, start_time + TIMEOUT)) {
|
||||
printk( "\n" );
|
||||
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
|
||||
return todo;
|
||||
}
|
||||
} while(todo);
|
||||
|
||||
printk( "|\n" );
|
||||
BUG_ON(in_atomic());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void thaw_processes(void)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
printk( "Restarting tasks..." );
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
if (p->flags & PF_FROZEN) {
|
||||
p->flags &= ~PF_FROZEN;
|
||||
wake_up_process(p);
|
||||
} else
|
||||
printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
|
||||
} while_each_thread(g, p);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
schedule();
|
||||
printk( " done\n" );
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(refrigerator);
|
||||
85
kernel/power/smp.c
Normal file
85
kernel/power/smp.c
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* drivers/power/smp.c - Functions for stopping other CPUs.
|
||||
*
|
||||
* Copyright 2004 Pavel Machek <pavel@suse.cz>
|
||||
* Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*/
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
static atomic_t cpu_counter, freeze;
|
||||
|
||||
|
||||
static void smp_pause(void * data)
|
||||
{
|
||||
struct saved_context ctxt;
|
||||
__save_processor_state(&ctxt);
|
||||
printk("Sleeping in:\n");
|
||||
dump_stack();
|
||||
atomic_inc(&cpu_counter);
|
||||
while (atomic_read(&freeze)) {
|
||||
/* FIXME: restore takes place at random piece inside this.
|
||||
This should probably be written in assembly, and
|
||||
preserve general-purpose registers, too
|
||||
|
||||
What about stack? We may need to move to new stack here.
|
||||
|
||||
This should better be ran with interrupts disabled.
|
||||
*/
|
||||
cpu_relax();
|
||||
barrier();
|
||||
}
|
||||
atomic_dec(&cpu_counter);
|
||||
__restore_processor_state(&ctxt);
|
||||
}
|
||||
|
||||
static cpumask_t oldmask;
|
||||
|
||||
void disable_nonboot_cpus(void)
|
||||
{
|
||||
printk("Freezing CPUs (at %d)", smp_processor_id());
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(0));
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
schedule_timeout(HZ);
|
||||
printk("...");
|
||||
BUG_ON(smp_processor_id() != 0);
|
||||
|
||||
/* FIXME: for this to work, all the CPUs must be running
|
||||
* "idle" thread (or we deadlock). Is that guaranteed? */
|
||||
|
||||
atomic_set(&cpu_counter, 0);
|
||||
atomic_set(&freeze, 1);
|
||||
smp_call_function(smp_pause, NULL, 0, 0);
|
||||
while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
|
||||
cpu_relax();
|
||||
barrier();
|
||||
}
|
||||
printk("ok\n");
|
||||
}
|
||||
|
||||
void enable_nonboot_cpus(void)
|
||||
{
|
||||
printk("Restarting CPUs");
|
||||
atomic_set(&freeze, 0);
|
||||
while (atomic_read(&cpu_counter)) {
|
||||
cpu_relax();
|
||||
barrier();
|
||||
}
|
||||
printk("...");
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
printk("ok\n");
|
||||
|
||||
}
|
||||
|
||||
|
||||
1433
kernel/power/swsusp.c
Normal file
1433
kernel/power/swsusp.c
Normal file
File diff suppressed because it is too large
Load Diff
996
kernel/printk.c
Normal file
996
kernel/printk.c
Normal file
@@ -0,0 +1,996 @@
|
||||
/*
|
||||
* linux/kernel/printk.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* Modified to make sys_syslog() more flexible: added commands to
|
||||
* return the last 4k of kernel messages, regardless of whether
|
||||
* they've been read or not. Added option to suppress kernel printk's
|
||||
* to the console. Added hook for sending the console messages
|
||||
* elsewhere, in preparation for a serial line console (someday).
|
||||
* Ted Ts'o, 2/11/93.
|
||||
* Modified for sysctl support, 1/8/97, Chris Horn.
|
||||
* Fixed SMP synchronization, 08/08/99, Manfred Spraul
|
||||
* manfreds@colorfullife.com
|
||||
* Rewrote bits to get rid of console_lock
|
||||
* 01Mar01 Andrew Morton <andrewm@uow.edu.au>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/tty.h>
|
||||
#include <linux/tty_driver.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h> /* For in_interrupt() */
|
||||
#include <linux/config.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
|
||||
|
||||
/* printk's without a loglevel use this.. */
|
||||
#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
|
||||
|
||||
/* We show everything that is MORE important than this.. */
|
||||
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
|
||||
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
|
||||
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
|
||||
int console_printk[4] = {
|
||||
DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
|
||||
DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
|
||||
MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */
|
||||
DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */
|
||||
};
|
||||
|
||||
EXPORT_SYMBOL(console_printk);
|
||||
|
||||
/*
|
||||
* Low lever drivers may need that to know if they can schedule in
|
||||
* their unblank() callback or not. So let's export it.
|
||||
*/
|
||||
int oops_in_progress;
|
||||
EXPORT_SYMBOL(oops_in_progress);
|
||||
|
||||
/*
|
||||
* console_sem protects the console_drivers list, and also
|
||||
* provides serialisation for access to the entire console
|
||||
* driver system.
|
||||
*/
|
||||
static DECLARE_MUTEX(console_sem);
|
||||
struct console *console_drivers;
|
||||
/*
|
||||
* This is used for debugging the mess that is the VT code by
|
||||
* keeping track if we have the console semaphore held. It's
|
||||
* definitely not the perfect debug tool (we don't know if _WE_
|
||||
* hold it are racing, but it helps tracking those weird code
|
||||
* path in the console code where we end up in places I want
|
||||
* locked without the console sempahore held
|
||||
*/
|
||||
static int console_locked;
|
||||
|
||||
/*
|
||||
* logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
|
||||
* It is also used in interesting ways to provide interlocking in
|
||||
* release_console_sem().
|
||||
*/
|
||||
static DEFINE_SPINLOCK(logbuf_lock);
|
||||
|
||||
static char __log_buf[__LOG_BUF_LEN];
|
||||
static char *log_buf = __log_buf;
|
||||
static int log_buf_len = __LOG_BUF_LEN;
|
||||
|
||||
#define LOG_BUF_MASK (log_buf_len-1)
|
||||
#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
|
||||
|
||||
/*
|
||||
* The indices into log_buf are not constrained to log_buf_len - they
|
||||
* must be masked before subscripting
|
||||
*/
|
||||
static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */
|
||||
static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */
|
||||
static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */
|
||||
static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
|
||||
|
||||
/*
|
||||
* Array of consoles built from command line options (console=)
|
||||
*/
|
||||
struct console_cmdline
|
||||
{
|
||||
char name[8]; /* Name of the driver */
|
||||
int index; /* Minor dev. to use */
|
||||
char *options; /* Options for the driver */
|
||||
};
|
||||
|
||||
#define MAX_CMDLINECONSOLES 8
|
||||
|
||||
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
|
||||
static int selected_console = -1;
|
||||
static int preferred_console = -1;
|
||||
|
||||
/* Flag: console code may call schedule() */
|
||||
static int console_may_schedule;
|
||||
|
||||
/*
|
||||
* Setup a list of consoles. Called from init/main.c
|
||||
*/
|
||||
static int __init console_setup(char *str)
|
||||
{
|
||||
char name[sizeof(console_cmdline[0].name)];
|
||||
char *s, *options;
|
||||
int idx;
|
||||
|
||||
/*
|
||||
* Decode str into name, index, options.
|
||||
*/
|
||||
if (str[0] >= '0' && str[0] <= '9') {
|
||||
strcpy(name, "ttyS");
|
||||
strncpy(name + 4, str, sizeof(name) - 5);
|
||||
} else
|
||||
strncpy(name, str, sizeof(name) - 1);
|
||||
name[sizeof(name) - 1] = 0;
|
||||
if ((options = strchr(str, ',')) != NULL)
|
||||
*(options++) = 0;
|
||||
#ifdef __sparc__
|
||||
if (!strcmp(str, "ttya"))
|
||||
strcpy(name, "ttyS0");
|
||||
if (!strcmp(str, "ttyb"))
|
||||
strcpy(name, "ttyS1");
|
||||
#endif
|
||||
for(s = name; *s; s++)
|
||||
if ((*s >= '0' && *s <= '9') || *s == ',')
|
||||
break;
|
||||
idx = simple_strtoul(s, NULL, 10);
|
||||
*s = 0;
|
||||
|
||||
add_preferred_console(name, idx, options);
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("console=", console_setup);
|
||||
|
||||
/**
|
||||
* add_preferred_console - add a device to the list of preferred consoles.
|
||||
*
|
||||
* The last preferred console added will be used for kernel messages
|
||||
* and stdin/out/err for init. Normally this is used by console_setup
|
||||
* above to handle user-supplied console arguments; however it can also
|
||||
* be used by arch-specific code either to override the user or more
|
||||
* commonly to provide a default console (ie from PROM variables) when
|
||||
* the user has not supplied one.
|
||||
*/
|
||||
int __init add_preferred_console(char *name, int idx, char *options)
|
||||
{
|
||||
struct console_cmdline *c;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* See if this tty is not yet registered, and
|
||||
* if we have a slot free.
|
||||
*/
|
||||
for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
|
||||
if (strcmp(console_cmdline[i].name, name) == 0 &&
|
||||
console_cmdline[i].index == idx) {
|
||||
selected_console = i;
|
||||
return 0;
|
||||
}
|
||||
if (i == MAX_CMDLINECONSOLES)
|
||||
return -E2BIG;
|
||||
selected_console = i;
|
||||
c = &console_cmdline[i];
|
||||
memcpy(c->name, name, sizeof(c->name));
|
||||
c->name[sizeof(c->name) - 1] = 0;
|
||||
c->options = options;
|
||||
c->index = idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init log_buf_len_setup(char *str)
|
||||
{
|
||||
unsigned long size = memparse(str, &str);
|
||||
unsigned long flags;
|
||||
|
||||
if (size)
|
||||
size = roundup_pow_of_two(size);
|
||||
if (size > log_buf_len) {
|
||||
unsigned long start, dest_idx, offset;
|
||||
char * new_log_buf;
|
||||
|
||||
new_log_buf = alloc_bootmem(size);
|
||||
if (!new_log_buf) {
|
||||
printk("log_buf_len: allocation failed\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&logbuf_lock, flags);
|
||||
log_buf_len = size;
|
||||
log_buf = new_log_buf;
|
||||
|
||||
offset = start = min(con_start, log_start);
|
||||
dest_idx = 0;
|
||||
while (start != log_end) {
|
||||
log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
|
||||
start++;
|
||||
dest_idx++;
|
||||
}
|
||||
log_start -= offset;
|
||||
con_start -= offset;
|
||||
log_end -= offset;
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
|
||||
printk("log_buf_len: %d\n", log_buf_len);
|
||||
}
|
||||
out:
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("log_buf_len=", log_buf_len_setup);
|
||||
|
||||
/*
|
||||
* Commands to do_syslog:
|
||||
*
|
||||
* 0 -- Close the log. Currently a NOP.
|
||||
* 1 -- Open the log. Currently a NOP.
|
||||
* 2 -- Read from the log.
|
||||
* 3 -- Read all messages remaining in the ring buffer.
|
||||
* 4 -- Read and clear all messages remaining in the ring buffer
|
||||
* 5 -- Clear ring buffer.
|
||||
* 6 -- Disable printk's to console
|
||||
* 7 -- Enable printk's to console
|
||||
* 8 -- Set level of messages printed to console
|
||||
* 9 -- Return number of unread characters in the log buffer
|
||||
* 10 -- Return size of the log buffer
|
||||
*/
|
||||
int do_syslog(int type, char __user * buf, int len)
|
||||
{
|
||||
unsigned long i, j, limit, count;
|
||||
int do_clear = 0;
|
||||
char c;
|
||||
int error = 0;
|
||||
|
||||
error = security_syslog(type);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
switch (type) {
|
||||
case 0: /* Close log */
|
||||
break;
|
||||
case 1: /* Open log */
|
||||
break;
|
||||
case 2: /* Read from log */
|
||||
error = -EINVAL;
|
||||
if (!buf || len < 0)
|
||||
goto out;
|
||||
error = 0;
|
||||
if (!len)
|
||||
goto out;
|
||||
if (!access_ok(VERIFY_WRITE, buf, len)) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
error = wait_event_interruptible(log_wait, (log_start - log_end));
|
||||
if (error)
|
||||
goto out;
|
||||
i = 0;
|
||||
spin_lock_irq(&logbuf_lock);
|
||||
while (!error && (log_start != log_end) && i < len) {
|
||||
c = LOG_BUF(log_start);
|
||||
log_start++;
|
||||
spin_unlock_irq(&logbuf_lock);
|
||||
error = __put_user(c,buf);
|
||||
buf++;
|
||||
i++;
|
||||
cond_resched();
|
||||
spin_lock_irq(&logbuf_lock);
|
||||
}
|
||||
spin_unlock_irq(&logbuf_lock);
|
||||
if (!error)
|
||||
error = i;
|
||||
break;
|
||||
case 4: /* Read/clear last kernel messages */
|
||||
do_clear = 1;
|
||||
/* FALL THRU */
|
||||
case 3: /* Read last kernel messages */
|
||||
error = -EINVAL;
|
||||
if (!buf || len < 0)
|
||||
goto out;
|
||||
error = 0;
|
||||
if (!len)
|
||||
goto out;
|
||||
if (!access_ok(VERIFY_WRITE, buf, len)) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
count = len;
|
||||
if (count > log_buf_len)
|
||||
count = log_buf_len;
|
||||
spin_lock_irq(&logbuf_lock);
|
||||
if (count > logged_chars)
|
||||
count = logged_chars;
|
||||
if (do_clear)
|
||||
logged_chars = 0;
|
||||
limit = log_end;
|
||||
/*
|
||||
* __put_user() could sleep, and while we sleep
|
||||
* printk() could overwrite the messages
|
||||
* we try to copy to user space. Therefore
|
||||
* the messages are copied in reverse. <manfreds>
|
||||
*/
|
||||
for(i = 0; i < count && !error; i++) {
|
||||
j = limit-1-i;
|
||||
if (j + log_buf_len < log_end)
|
||||
break;
|
||||
c = LOG_BUF(j);
|
||||
spin_unlock_irq(&logbuf_lock);
|
||||
error = __put_user(c,&buf[count-1-i]);
|
||||
cond_resched();
|
||||
spin_lock_irq(&logbuf_lock);
|
||||
}
|
||||
spin_unlock_irq(&logbuf_lock);
|
||||
if (error)
|
||||
break;
|
||||
error = i;
|
||||
if(i != count) {
|
||||
int offset = count-error;
|
||||
/* buffer overflow during copy, correct user buffer. */
|
||||
for(i=0;i<error;i++) {
|
||||
if (__get_user(c,&buf[i+offset]) ||
|
||||
__put_user(c,&buf[i])) {
|
||||
error = -EFAULT;
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 5: /* Clear ring buffer */
|
||||
logged_chars = 0;
|
||||
break;
|
||||
case 6: /* Disable logging to console */
|
||||
console_loglevel = minimum_console_loglevel;
|
||||
break;
|
||||
case 7: /* Enable logging to console */
|
||||
console_loglevel = default_console_loglevel;
|
||||
break;
|
||||
case 8: /* Set level of messages printed to console */
|
||||
error = -EINVAL;
|
||||
if (len < 1 || len > 8)
|
||||
goto out;
|
||||
if (len < minimum_console_loglevel)
|
||||
len = minimum_console_loglevel;
|
||||
console_loglevel = len;
|
||||
error = 0;
|
||||
break;
|
||||
case 9: /* Number of chars in the log buffer */
|
||||
error = log_end - log_start;
|
||||
break;
|
||||
case 10: /* Size of the log buffer */
|
||||
error = log_buf_len;
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
asmlinkage long sys_syslog(int type, char __user * buf, int len)
|
||||
{
|
||||
return do_syslog(type, buf, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the console drivers on a range of log_buf
|
||||
*/
|
||||
static void __call_console_drivers(unsigned long start, unsigned long end)
|
||||
{
|
||||
struct console *con;
|
||||
|
||||
for (con = console_drivers; con; con = con->next) {
|
||||
if ((con->flags & CON_ENABLED) && con->write)
|
||||
con->write(con, &LOG_BUF(start), end - start);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Write out chars from start to end - 1 inclusive
|
||||
*/
|
||||
static void _call_console_drivers(unsigned long start,
|
||||
unsigned long end, int msg_log_level)
|
||||
{
|
||||
if (msg_log_level < console_loglevel &&
|
||||
console_drivers && start != end) {
|
||||
if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
|
||||
/* wrapped write */
|
||||
__call_console_drivers(start & LOG_BUF_MASK,
|
||||
log_buf_len);
|
||||
__call_console_drivers(0, end & LOG_BUF_MASK);
|
||||
} else {
|
||||
__call_console_drivers(start, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the console drivers, asking them to write out
|
||||
* log_buf[start] to log_buf[end - 1].
|
||||
* The console_sem must be held.
|
||||
*/
|
||||
static void call_console_drivers(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long cur_index, start_print;
|
||||
static int msg_level = -1;
|
||||
|
||||
if (((long)(start - end)) > 0)
|
||||
BUG();
|
||||
|
||||
cur_index = start;
|
||||
start_print = start;
|
||||
while (cur_index != end) {
|
||||
if ( msg_level < 0 &&
|
||||
((end - cur_index) > 2) &&
|
||||
LOG_BUF(cur_index + 0) == '<' &&
|
||||
LOG_BUF(cur_index + 1) >= '0' &&
|
||||
LOG_BUF(cur_index + 1) <= '7' &&
|
||||
LOG_BUF(cur_index + 2) == '>')
|
||||
{
|
||||
msg_level = LOG_BUF(cur_index + 1) - '0';
|
||||
cur_index += 3;
|
||||
start_print = cur_index;
|
||||
}
|
||||
while (cur_index != end) {
|
||||
char c = LOG_BUF(cur_index);
|
||||
cur_index++;
|
||||
|
||||
if (c == '\n') {
|
||||
if (msg_level < 0) {
|
||||
/*
|
||||
* printk() has already given us loglevel tags in
|
||||
* the buffer. This code is here in case the
|
||||
* log buffer has wrapped right round and scribbled
|
||||
* on those tags
|
||||
*/
|
||||
msg_level = default_message_loglevel;
|
||||
}
|
||||
_call_console_drivers(start_print, cur_index, msg_level);
|
||||
msg_level = -1;
|
||||
start_print = cur_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_call_console_drivers(start_print, end, msg_level);
|
||||
}
|
||||
|
||||
static void emit_log_char(char c)
|
||||
{
|
||||
LOG_BUF(log_end) = c;
|
||||
log_end++;
|
||||
if (log_end - log_start > log_buf_len)
|
||||
log_start = log_end - log_buf_len;
|
||||
if (log_end - con_start > log_buf_len)
|
||||
con_start = log_end - log_buf_len;
|
||||
if (logged_chars < log_buf_len)
|
||||
logged_chars++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zap console related locks when oopsing. Only zap at most once
|
||||
* every 10 seconds, to leave time for slow consoles to print a
|
||||
* full oops.
|
||||
*/
|
||||
static void zap_locks(void)
|
||||
{
|
||||
static unsigned long oops_timestamp;
|
||||
|
||||
if (time_after_eq(jiffies, oops_timestamp) &&
|
||||
!time_after(jiffies, oops_timestamp + 30*HZ))
|
||||
return;
|
||||
|
||||
oops_timestamp = jiffies;
|
||||
|
||||
/* If a crash is occurring, make sure we can't deadlock */
|
||||
spin_lock_init(&logbuf_lock);
|
||||
/* And make sure that we print immediately */
|
||||
init_MUTEX(&console_sem);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PRINTK_TIME)
|
||||
static int printk_time = 1;
|
||||
#else
|
||||
static int printk_time = 0;
|
||||
#endif
|
||||
|
||||
static int __init printk_time_setup(char *str)
|
||||
{
|
||||
if (*str)
|
||||
return 0;
|
||||
printk_time = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("time", printk_time_setup);
|
||||
|
||||
/*
|
||||
* This is printk. It can be called from any context. We want it to work.
|
||||
*
|
||||
* We try to grab the console_sem. If we succeed, it's easy - we log the output and
|
||||
* call the console drivers. If we fail to get the semaphore we place the output
|
||||
* into the log buffer and return. The current holder of the console_sem will
|
||||
* notice the new output in release_console_sem() and will send it to the
|
||||
* consoles before releasing the semaphore.
|
||||
*
|
||||
* One effect of this deferred printing is that code which calls printk() and
|
||||
* then changes console_loglevel may break. This is because console_loglevel
|
||||
* is inspected when the actual printing occurs.
|
||||
*/
|
||||
asmlinkage int printk(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int r;
|
||||
|
||||
va_start(args, fmt);
|
||||
r = vprintk(fmt, args);
|
||||
va_end(args);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
unsigned long flags;
|
||||
int printed_len;
|
||||
char *p;
|
||||
static char printk_buf[1024];
|
||||
static int log_level_unknown = 1;
|
||||
|
||||
if (unlikely(oops_in_progress))
|
||||
zap_locks();
|
||||
|
||||
/* This stops the holder of console_sem just where we want him */
|
||||
spin_lock_irqsave(&logbuf_lock, flags);
|
||||
|
||||
/* Emit the output into the temporary buffer */
|
||||
printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
|
||||
|
||||
/*
|
||||
* Copy the output into log_buf. If the caller didn't provide
|
||||
* appropriate log level tags, we insert them here
|
||||
*/
|
||||
for (p = printk_buf; *p; p++) {
|
||||
if (log_level_unknown) {
|
||||
/* log_level_unknown signals the start of a new line */
|
||||
if (printk_time) {
|
||||
int loglev_char;
|
||||
char tbuf[50], *tp;
|
||||
unsigned tlen;
|
||||
unsigned long long t;
|
||||
unsigned long nanosec_rem;
|
||||
|
||||
/*
|
||||
* force the log level token to be
|
||||
* before the time output.
|
||||
*/
|
||||
if (p[0] == '<' && p[1] >='0' &&
|
||||
p[1] <= '7' && p[2] == '>') {
|
||||
loglev_char = p[1];
|
||||
p += 3;
|
||||
printed_len += 3;
|
||||
} else {
|
||||
loglev_char = default_message_loglevel
|
||||
+ '0';
|
||||
}
|
||||
t = sched_clock();
|
||||
nanosec_rem = do_div(t, 1000000000);
|
||||
tlen = sprintf(tbuf,
|
||||
"<%c>[%5lu.%06lu] ",
|
||||
loglev_char,
|
||||
(unsigned long)t,
|
||||
nanosec_rem/1000);
|
||||
|
||||
for (tp = tbuf; tp < tbuf + tlen; tp++)
|
||||
emit_log_char(*tp);
|
||||
printed_len += tlen - 3;
|
||||
} else {
|
||||
if (p[0] != '<' || p[1] < '0' ||
|
||||
p[1] > '7' || p[2] != '>') {
|
||||
emit_log_char('<');
|
||||
emit_log_char(default_message_loglevel
|
||||
+ '0');
|
||||
emit_log_char('>');
|
||||
}
|
||||
printed_len += 3;
|
||||
}
|
||||
log_level_unknown = 0;
|
||||
if (!*p)
|
||||
break;
|
||||
}
|
||||
emit_log_char(*p);
|
||||
if (*p == '\n')
|
||||
log_level_unknown = 1;
|
||||
}
|
||||
|
||||
if (!cpu_online(smp_processor_id()) &&
|
||||
system_state != SYSTEM_RUNNING) {
|
||||
/*
|
||||
* Some console drivers may assume that per-cpu resources have
|
||||
* been allocated. So don't allow them to be called by this
|
||||
* CPU until it is officially up. We shouldn't be calling into
|
||||
* random console drivers on a CPU which doesn't exist yet..
|
||||
*/
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
goto out;
|
||||
}
|
||||
if (!down_trylock(&console_sem)) {
|
||||
console_locked = 1;
|
||||
/*
|
||||
* We own the drivers. We can drop the spinlock and let
|
||||
* release_console_sem() print the text
|
||||
*/
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
console_may_schedule = 0;
|
||||
release_console_sem();
|
||||
} else {
|
||||
/*
|
||||
* Someone else owns the drivers. We drop the spinlock, which
|
||||
* allows the semaphore holder to proceed and to call the
|
||||
* console drivers with the output which we just produced.
|
||||
*/
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
}
|
||||
out:
|
||||
return printed_len;
|
||||
}
|
||||
EXPORT_SYMBOL(printk);
|
||||
EXPORT_SYMBOL(vprintk);
|
||||
|
||||
/**
|
||||
* acquire_console_sem - lock the console system for exclusive use.
|
||||
*
|
||||
* Acquires a semaphore which guarantees that the caller has
|
||||
* exclusive access to the console system and the console_drivers list.
|
||||
*
|
||||
* Can sleep, returns nothing.
|
||||
*/
|
||||
void acquire_console_sem(void)
|
||||
{
|
||||
if (in_interrupt())
|
||||
BUG();
|
||||
down(&console_sem);
|
||||
console_locked = 1;
|
||||
console_may_schedule = 1;
|
||||
}
|
||||
EXPORT_SYMBOL(acquire_console_sem);
|
||||
|
||||
int try_acquire_console_sem(void)
|
||||
{
|
||||
if (down_trylock(&console_sem))
|
||||
return -1;
|
||||
console_locked = 1;
|
||||
console_may_schedule = 0;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(try_acquire_console_sem);
|
||||
|
||||
int is_console_locked(void)
|
||||
{
|
||||
return console_locked;
|
||||
}
|
||||
EXPORT_SYMBOL(is_console_locked);
|
||||
|
||||
/**
|
||||
* release_console_sem - unlock the console system
|
||||
*
|
||||
* Releases the semaphore which the caller holds on the console system
|
||||
* and the console driver list.
|
||||
*
|
||||
* While the semaphore was held, console output may have been buffered
|
||||
* by printk(). If this is the case, release_console_sem() emits
|
||||
* the output prior to releasing the semaphore.
|
||||
*
|
||||
* If there is output waiting for klogd, we wake it up.
|
||||
*
|
||||
* release_console_sem() may be called from any context.
|
||||
*/
|
||||
void release_console_sem(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long _con_start, _log_end;
|
||||
unsigned long wake_klogd = 0;
|
||||
|
||||
for ( ; ; ) {
|
||||
spin_lock_irqsave(&logbuf_lock, flags);
|
||||
wake_klogd |= log_start - log_end;
|
||||
if (con_start == log_end)
|
||||
break; /* Nothing to print */
|
||||
_con_start = con_start;
|
||||
_log_end = log_end;
|
||||
con_start = log_end; /* Flush */
|
||||
spin_unlock(&logbuf_lock);
|
||||
call_console_drivers(_con_start, _log_end);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
console_locked = 0;
|
||||
console_may_schedule = 0;
|
||||
up(&console_sem);
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
|
||||
wake_up_interruptible(&log_wait);
|
||||
}
|
||||
EXPORT_SYMBOL(release_console_sem);
|
||||
|
||||
/** console_conditional_schedule - yield the CPU if required
|
||||
*
|
||||
* If the console code is currently allowed to sleep, and
|
||||
* if this CPU should yield the CPU to another task, do
|
||||
* so here.
|
||||
*
|
||||
* Must be called within acquire_console_sem().
|
||||
*/
|
||||
void __sched console_conditional_schedule(void)
|
||||
{
|
||||
if (console_may_schedule)
|
||||
cond_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(console_conditional_schedule);
|
||||
|
||||
void console_print(const char *s)
|
||||
{
|
||||
printk(KERN_EMERG "%s", s);
|
||||
}
|
||||
EXPORT_SYMBOL(console_print);
|
||||
|
||||
void console_unblank(void)
|
||||
{
|
||||
struct console *c;
|
||||
|
||||
/*
|
||||
* console_unblank can no longer be called in interrupt context unless
|
||||
* oops_in_progress is set to 1..
|
||||
*/
|
||||
if (oops_in_progress) {
|
||||
if (down_trylock(&console_sem) != 0)
|
||||
return;
|
||||
} else
|
||||
acquire_console_sem();
|
||||
|
||||
console_locked = 1;
|
||||
console_may_schedule = 0;
|
||||
for (c = console_drivers; c != NULL; c = c->next)
|
||||
if ((c->flags & CON_ENABLED) && c->unblank)
|
||||
c->unblank();
|
||||
release_console_sem();
|
||||
}
|
||||
EXPORT_SYMBOL(console_unblank);
|
||||
|
||||
/*
|
||||
* Return the console tty driver structure and its associated index
|
||||
*/
|
||||
struct tty_driver *console_device(int *index)
|
||||
{
|
||||
struct console *c;
|
||||
struct tty_driver *driver = NULL;
|
||||
|
||||
acquire_console_sem();
|
||||
for (c = console_drivers; c != NULL; c = c->next) {
|
||||
if (!c->device)
|
||||
continue;
|
||||
driver = c->device(c, index);
|
||||
if (driver)
|
||||
break;
|
||||
}
|
||||
release_console_sem();
|
||||
return driver;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prevent further output on the passed console device so that (for example)
|
||||
* serial drivers can disable console output before suspending a port, and can
|
||||
* re-enable output afterwards.
|
||||
*/
|
||||
void console_stop(struct console *console)
|
||||
{
|
||||
acquire_console_sem();
|
||||
console->flags &= ~CON_ENABLED;
|
||||
release_console_sem();
|
||||
}
|
||||
EXPORT_SYMBOL(console_stop);
|
||||
|
||||
void console_start(struct console *console)
|
||||
{
|
||||
acquire_console_sem();
|
||||
console->flags |= CON_ENABLED;
|
||||
release_console_sem();
|
||||
}
|
||||
EXPORT_SYMBOL(console_start);
|
||||
|
||||
/*
|
||||
* The console driver calls this routine during kernel initialization
|
||||
* to register the console printing procedure with printk() and to
|
||||
* print any messages that were printed by the kernel before the
|
||||
* console driver was initialized.
|
||||
*/
|
||||
void register_console(struct console * console)
|
||||
{
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
if (preferred_console < 0)
|
||||
preferred_console = selected_console;
|
||||
|
||||
/*
|
||||
* See if we want to use this console driver. If we
|
||||
* didn't select a console we take the first one
|
||||
* that registers here.
|
||||
*/
|
||||
if (preferred_console < 0) {
|
||||
if (console->index < 0)
|
||||
console->index = 0;
|
||||
if (console->setup == NULL ||
|
||||
console->setup(console, NULL) == 0) {
|
||||
console->flags |= CON_ENABLED | CON_CONSDEV;
|
||||
preferred_console = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* See if this console matches one we selected on
|
||||
* the command line.
|
||||
*/
|
||||
for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) {
|
||||
if (strcmp(console_cmdline[i].name, console->name) != 0)
|
||||
continue;
|
||||
if (console->index >= 0 &&
|
||||
console->index != console_cmdline[i].index)
|
||||
continue;
|
||||
if (console->index < 0)
|
||||
console->index = console_cmdline[i].index;
|
||||
if (console->setup &&
|
||||
console->setup(console, console_cmdline[i].options) != 0)
|
||||
break;
|
||||
console->flags |= CON_ENABLED;
|
||||
console->index = console_cmdline[i].index;
|
||||
if (i == preferred_console)
|
||||
console->flags |= CON_CONSDEV;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(console->flags & CON_ENABLED))
|
||||
return;
|
||||
|
||||
if (console_drivers && (console_drivers->flags & CON_BOOT)) {
|
||||
unregister_console(console_drivers);
|
||||
console->flags &= ~CON_PRINTBUFFER;
|
||||
}
|
||||
|
||||
/*
|
||||
* Put this console in the list - keep the
|
||||
* preferred driver at the head of the list.
|
||||
*/
|
||||
acquire_console_sem();
|
||||
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
|
||||
console->next = console_drivers;
|
||||
console_drivers = console;
|
||||
} else {
|
||||
console->next = console_drivers->next;
|
||||
console_drivers->next = console;
|
||||
}
|
||||
if (console->flags & CON_PRINTBUFFER) {
|
||||
/*
|
||||
* release_console_sem() will print out the buffered messages
|
||||
* for us.
|
||||
*/
|
||||
spin_lock_irqsave(&logbuf_lock, flags);
|
||||
con_start = log_start;
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
}
|
||||
release_console_sem();
|
||||
}
|
||||
EXPORT_SYMBOL(register_console);
|
||||
|
||||
int unregister_console(struct console * console)
|
||||
{
|
||||
struct console *a,*b;
|
||||
int res = 1;
|
||||
|
||||
acquire_console_sem();
|
||||
if (console_drivers == console) {
|
||||
console_drivers=console->next;
|
||||
res = 0;
|
||||
} else {
|
||||
for (a=console_drivers->next, b=console_drivers ;
|
||||
a; b=a, a=b->next) {
|
||||
if (a == console) {
|
||||
b->next = a->next;
|
||||
res = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If last console is removed, we re-enable picking the first
|
||||
* one that gets registered. Without that, pmac early boot console
|
||||
* would prevent fbcon from taking over.
|
||||
*/
|
||||
if (console_drivers == NULL)
|
||||
preferred_console = selected_console;
|
||||
|
||||
|
||||
release_console_sem();
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_console);
|
||||
|
||||
/**
|
||||
* tty_write_message - write a message to a certain tty, not just the console.
|
||||
*
|
||||
* This is used for messages that need to be redirected to a specific tty.
|
||||
* We don't put it into the syslog queue right now maybe in the future if
|
||||
* really needed.
|
||||
*/
|
||||
void tty_write_message(struct tty_struct *tty, char *msg)
|
||||
{
|
||||
if (tty && tty->driver->write)
|
||||
tty->driver->write(tty, msg, strlen(msg));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* printk rate limiting, lifted from the networking subsystem.
|
||||
*
|
||||
* This enforces a rate limit: not more than one kernel message
|
||||
* every printk_ratelimit_jiffies to make a denial-of-service
|
||||
* attack impossible.
|
||||
*/
|
||||
int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
|
||||
{
|
||||
static DEFINE_SPINLOCK(ratelimit_lock);
|
||||
static unsigned long toks = 10*5*HZ;
|
||||
static unsigned long last_msg;
|
||||
static int missed;
|
||||
unsigned long flags;
|
||||
unsigned long now = jiffies;
|
||||
|
||||
spin_lock_irqsave(&ratelimit_lock, flags);
|
||||
toks += now - last_msg;
|
||||
last_msg = now;
|
||||
if (toks > (ratelimit_burst * ratelimit_jiffies))
|
||||
toks = ratelimit_burst * ratelimit_jiffies;
|
||||
if (toks >= ratelimit_jiffies) {
|
||||
int lost = missed;
|
||||
missed = 0;
|
||||
toks -= ratelimit_jiffies;
|
||||
spin_unlock_irqrestore(&ratelimit_lock, flags);
|
||||
if (lost)
|
||||
printk(KERN_WARNING "printk: %d messages suppressed.\n", lost);
|
||||
return 1;
|
||||
}
|
||||
missed++;
|
||||
spin_unlock_irqrestore(&ratelimit_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__printk_ratelimit);
|
||||
|
||||
/* minimum time in jiffies between messages */
|
||||
int printk_ratelimit_jiffies = 5*HZ;
|
||||
|
||||
/* number of messages we send before ratelimiting */
|
||||
int printk_ratelimit_burst = 10;
|
||||
|
||||
int printk_ratelimit(void)
|
||||
{
|
||||
return __printk_ratelimit(printk_ratelimit_jiffies,
|
||||
printk_ratelimit_burst);
|
||||
}
|
||||
EXPORT_SYMBOL(printk_ratelimit);
|
||||
563
kernel/profile.c
Normal file
563
kernel/profile.c
Normal file
@@ -0,0 +1,563 @@
|
||||
/*
|
||||
* linux/kernel/profile.c
|
||||
* Simple profiling. Manages a direct-mapped profile hit count buffer,
|
||||
* with configurable resolution, support for restricting the cpus on
|
||||
* which profiling is done, and switching between cpu time and
|
||||
* schedule() calls via kernel command line parameters passed at boot.
|
||||
*
|
||||
* Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
|
||||
* Red Hat, July 2004
|
||||
* Consolidation of architecture support code for profiling,
|
||||
* William Irwin, Oracle, July 2004
|
||||
* Amortized hit count accounting via per-cpu open-addressed hashtables
|
||||
* to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
struct profile_hit {
|
||||
u32 pc, hits;
|
||||
};
|
||||
#define PROFILE_GRPSHIFT 3
|
||||
#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT)
|
||||
#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
|
||||
#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
|
||||
|
||||
/* Oprofile timer tick hook */
|
||||
int (*timer_hook)(struct pt_regs *);
|
||||
|
||||
static atomic_t *prof_buffer;
|
||||
static unsigned long prof_len, prof_shift;
|
||||
static int prof_on;
|
||||
static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
|
||||
#ifdef CONFIG_SMP
|
||||
static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
|
||||
static DEFINE_PER_CPU(int, cpu_profile_flip);
|
||||
static DECLARE_MUTEX(profile_flip_mutex);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int __init profile_setup(char * str)
|
||||
{
|
||||
int par;
|
||||
|
||||
if (!strncmp(str, "schedule", 8)) {
|
||||
prof_on = SCHED_PROFILING;
|
||||
printk(KERN_INFO "kernel schedule profiling enabled\n");
|
||||
if (str[7] == ',')
|
||||
str += 8;
|
||||
}
|
||||
if (get_option(&str,&par)) {
|
||||
prof_shift = par;
|
||||
prof_on = CPU_PROFILING;
|
||||
printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
|
||||
prof_shift);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("profile=", profile_setup);
|
||||
|
||||
|
||||
void __init profile_init(void)
|
||||
{
|
||||
if (!prof_on)
|
||||
return;
|
||||
|
||||
/* only text is profiled */
|
||||
prof_len = (_etext - _stext) >> prof_shift;
|
||||
prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
|
||||
}
|
||||
|
||||
/* Profile event notifications */
|
||||
|
||||
#ifdef CONFIG_PROFILING
|
||||
|
||||
static DECLARE_RWSEM(profile_rwsem);
|
||||
static DEFINE_RWLOCK(handoff_lock);
|
||||
static struct notifier_block * task_exit_notifier;
|
||||
static struct notifier_block * task_free_notifier;
|
||||
static struct notifier_block * munmap_notifier;
|
||||
|
||||
void profile_task_exit(struct task_struct * task)
|
||||
{
|
||||
down_read(&profile_rwsem);
|
||||
notifier_call_chain(&task_exit_notifier, 0, task);
|
||||
up_read(&profile_rwsem);
|
||||
}
|
||||
|
||||
int profile_handoff_task(struct task_struct * task)
|
||||
{
|
||||
int ret;
|
||||
read_lock(&handoff_lock);
|
||||
ret = notifier_call_chain(&task_free_notifier, 0, task);
|
||||
read_unlock(&handoff_lock);
|
||||
return (ret == NOTIFY_OK) ? 1 : 0;
|
||||
}
|
||||
|
||||
void profile_munmap(unsigned long addr)
|
||||
{
|
||||
down_read(&profile_rwsem);
|
||||
notifier_call_chain(&munmap_notifier, 0, (void *)addr);
|
||||
up_read(&profile_rwsem);
|
||||
}
|
||||
|
||||
int task_handoff_register(struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
write_lock(&handoff_lock);
|
||||
err = notifier_chain_register(&task_free_notifier, n);
|
||||
write_unlock(&handoff_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
int task_handoff_unregister(struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
write_lock(&handoff_lock);
|
||||
err = notifier_chain_unregister(&task_free_notifier, n);
|
||||
write_unlock(&handoff_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
int profile_event_register(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
down_write(&profile_rwsem);
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = notifier_chain_register(&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = notifier_chain_register(&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
up_write(&profile_rwsem);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int profile_event_unregister(enum profile_type type, struct notifier_block * n)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
|
||||
down_write(&profile_rwsem);
|
||||
|
||||
switch (type) {
|
||||
case PROFILE_TASK_EXIT:
|
||||
err = notifier_chain_unregister(&task_exit_notifier, n);
|
||||
break;
|
||||
case PROFILE_MUNMAP:
|
||||
err = notifier_chain_unregister(&munmap_notifier, n);
|
||||
break;
|
||||
}
|
||||
|
||||
up_write(&profile_rwsem);
|
||||
return err;
|
||||
}
|
||||
|
||||
int register_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
if (timer_hook)
|
||||
return -EBUSY;
|
||||
timer_hook = hook;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void unregister_timer_hook(int (*hook)(struct pt_regs *))
|
||||
{
|
||||
WARN_ON(hook != timer_hook);
|
||||
timer_hook = NULL;
|
||||
/* make sure all CPUs see the NULL hook */
|
||||
synchronize_kernel();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(register_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(unregister_timer_hook);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_register);
|
||||
EXPORT_SYMBOL_GPL(task_handoff_unregister);
|
||||
|
||||
#endif /* CONFIG_PROFILING */
|
||||
|
||||
EXPORT_SYMBOL_GPL(profile_event_register);
|
||||
EXPORT_SYMBOL_GPL(profile_event_unregister);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Each cpu has a pair of open-addressed hashtables for pending
|
||||
* profile hits. read_profile() IPI's all cpus to request them
|
||||
* to flip buffers and flushes their contents to prof_buffer itself.
|
||||
* Flip requests are serialized by the profile_flip_mutex. The sole
|
||||
* use of having a second hashtable is for avoiding cacheline
|
||||
* contention that would otherwise happen during flushes of pending
|
||||
* profile hits required for the accuracy of reported profile hits
|
||||
* and so resurrect the interrupt livelock issue.
|
||||
*
|
||||
* The open-addressed hashtables are indexed by profile buffer slot
|
||||
* and hold the number of pending hits to that profile buffer slot on
|
||||
* a cpu in an entry. When the hashtable overflows, all pending hits
|
||||
* are accounted to their corresponding profile buffer slots with
|
||||
* atomic_add() and the hashtable emptied. As numerous pending hits
|
||||
* may be accounted to a profile buffer slot in a hashtable entry,
|
||||
* this amortizes a number of atomic profile buffer increments likely
|
||||
* to be far larger than the number of entries in the hashtable,
|
||||
* particularly given that the number of distinct profile buffer
|
||||
* positions to which hits are accounted during short intervals (e.g.
|
||||
* several seconds) is usually very small. Exclusion from buffer
|
||||
* flipping is provided by interrupt disablement (note that for
|
||||
* SCHED_PROFILING profile_hit() may be called from process context).
|
||||
* The hash function is meant to be lightweight as opposed to strong,
|
||||
* and was vaguely inspired by ppc64 firmware-supported inverted
|
||||
* pagetable hash functions, but uses a full hashtable full of finite
|
||||
* collision chains, not just pairs of them.
|
||||
*
|
||||
* -- wli
|
||||
*/
|
||||
static void __profile_flip_buffers(void *unused)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
|
||||
}
|
||||
|
||||
static void profile_flip_buffers(void)
|
||||
{
|
||||
int i, j, cpu;
|
||||
|
||||
down(&profile_flip_mutex);
|
||||
j = per_cpu(cpu_profile_flip, get_cpu());
|
||||
put_cpu();
|
||||
on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
|
||||
for (i = 0; i < NR_PROFILE_HIT; ++i) {
|
||||
if (!hits[i].hits) {
|
||||
if (hits[i].pc)
|
||||
hits[i].pc = 0;
|
||||
continue;
|
||||
}
|
||||
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
|
||||
hits[i].hits = hits[i].pc = 0;
|
||||
}
|
||||
}
|
||||
up(&profile_flip_mutex);
|
||||
}
|
||||
|
||||
static void profile_discard_flip_buffers(void)
|
||||
{
|
||||
int i, cpu;
|
||||
|
||||
down(&profile_flip_mutex);
|
||||
i = per_cpu(cpu_profile_flip, get_cpu());
|
||||
put_cpu();
|
||||
on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
|
||||
memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
|
||||
}
|
||||
up(&profile_flip_mutex);
|
||||
}
|
||||
|
||||
void profile_hit(int type, void *__pc)
|
||||
{
|
||||
unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
|
||||
int i, j, cpu;
|
||||
struct profile_hit *hits;
|
||||
|
||||
if (prof_on != type || !prof_buffer)
|
||||
return;
|
||||
pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
|
||||
i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
|
||||
secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
|
||||
cpu = get_cpu();
|
||||
hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
|
||||
if (!hits) {
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
local_irq_save(flags);
|
||||
do {
|
||||
for (j = 0; j < PROFILE_GRPSZ; ++j) {
|
||||
if (hits[i + j].pc == pc) {
|
||||
hits[i + j].hits++;
|
||||
goto out;
|
||||
} else if (!hits[i + j].hits) {
|
||||
hits[i + j].pc = pc;
|
||||
hits[i + j].hits = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
i = (i + secondary) & (NR_PROFILE_HIT - 1);
|
||||
} while (i != primary);
|
||||
atomic_inc(&prof_buffer[pc]);
|
||||
for (i = 0; i < NR_PROFILE_HIT; ++i) {
|
||||
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
|
||||
hits[i].pc = hits[i].hits = 0;
|
||||
}
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int __devinit profile_cpu_callback(struct notifier_block *info,
|
||||
unsigned long action, void *__cpu)
|
||||
{
|
||||
int node, cpu = (unsigned long)__cpu;
|
||||
struct page *page;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
node = cpu_to_node(cpu);
|
||||
per_cpu(cpu_profile_flip, cpu) = 0;
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!page)
|
||||
return NOTIFY_BAD;
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
|
||||
}
|
||||
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!page)
|
||||
goto out_free;
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
|
||||
}
|
||||
break;
|
||||
out_free:
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
return NOTIFY_BAD;
|
||||
case CPU_ONLINE:
|
||||
cpu_set(cpu, prof_cpu_mask);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
cpu_clear(cpu, prof_cpu_mask);
|
||||
if (per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
if (per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
#else /* !CONFIG_SMP */
|
||||
#define profile_flip_buffers() do { } while (0)
|
||||
#define profile_discard_flip_buffers() do { } while (0)
|
||||
|
||||
void profile_hit(int type, void *__pc)
|
||||
{
|
||||
unsigned long pc;
|
||||
|
||||
if (prof_on != type || !prof_buffer)
|
||||
return;
|
||||
pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
|
||||
atomic_inc(&prof_buffer[min(pc, prof_len - 1)]);
|
||||
}
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
void profile_tick(int type, struct pt_regs *regs)
|
||||
{
|
||||
if (type == CPU_PROFILING && timer_hook)
|
||||
timer_hook(regs);
|
||||
if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
|
||||
profile_hit(type, (void *)profile_pc(regs));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
#include <linux/proc_fs.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
{
|
||||
int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
|
||||
if (count - len < 2)
|
||||
return -EINVAL;
|
||||
len += sprintf(page + len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
|
||||
unsigned long count, void *data)
|
||||
{
|
||||
cpumask_t *mask = (cpumask_t *)data;
|
||||
unsigned long full_count = count, err;
|
||||
cpumask_t new_value;
|
||||
|
||||
err = cpumask_parse(buffer, count, new_value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*mask = new_value;
|
||||
return full_count;
|
||||
}
|
||||
|
||||
void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
/* create /proc/irq/prof_cpu_mask */
|
||||
if (!(entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir)))
|
||||
return;
|
||||
entry->nlink = 1;
|
||||
entry->data = (void *)&prof_cpu_mask;
|
||||
entry->read_proc = prof_cpu_mask_read_proc;
|
||||
entry->write_proc = prof_cpu_mask_write_proc;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function accesses profiling information. The returned data is
|
||||
* binary: the sampling step and the actual contents of the profile
|
||||
* buffer. Use of the program readprofile is recommended in order to
|
||||
* get meaningful info out of these data.
|
||||
*/
|
||||
static ssize_t
|
||||
read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
unsigned long p = *ppos;
|
||||
ssize_t read;
|
||||
char * pnt;
|
||||
unsigned int sample_step = 1 << prof_shift;
|
||||
|
||||
profile_flip_buffers();
|
||||
if (p >= (prof_len+1)*sizeof(unsigned int))
|
||||
return 0;
|
||||
if (count > (prof_len+1)*sizeof(unsigned int) - p)
|
||||
count = (prof_len+1)*sizeof(unsigned int) - p;
|
||||
read = 0;
|
||||
|
||||
while (p < sizeof(unsigned int) && count > 0) {
|
||||
put_user(*((char *)(&sample_step)+p),buf);
|
||||
buf++; p++; count--; read++;
|
||||
}
|
||||
pnt = (char *)prof_buffer + p - sizeof(atomic_t);
|
||||
if (copy_to_user(buf,(void *)pnt,count))
|
||||
return -EFAULT;
|
||||
read += count;
|
||||
*ppos += read;
|
||||
return read;
|
||||
}
|
||||
|
||||
/*
|
||||
* Writing to /proc/profile resets the counters
|
||||
*
|
||||
* Writing a 'profiling multiplier' value into it also re-sets the profiling
|
||||
* interrupt frequency, on architectures that support this.
|
||||
*/
|
||||
static ssize_t write_profile(struct file *file, const char __user *buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
extern int setup_profiling_timer (unsigned int multiplier);
|
||||
|
||||
if (count == sizeof(int)) {
|
||||
unsigned int multiplier;
|
||||
|
||||
if (copy_from_user(&multiplier, buf, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
if (setup_profiling_timer(multiplier))
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
profile_discard_flip_buffers();
|
||||
memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct file_operations proc_profile_operations = {
|
||||
.read = read_profile,
|
||||
.write = write_profile,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void __init profile_nop(void *unused)
|
||||
{
|
||||
}
|
||||
|
||||
static int __init create_hash_tables(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!page)
|
||||
goto out_cleanup;
|
||||
per_cpu(cpu_profile_hits, cpu)[1]
|
||||
= (struct profile_hit *)page_address(page);
|
||||
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!page)
|
||||
goto out_cleanup;
|
||||
per_cpu(cpu_profile_hits, cpu)[0]
|
||||
= (struct profile_hit *)page_address(page);
|
||||
}
|
||||
return 0;
|
||||
out_cleanup:
|
||||
prof_on = 0;
|
||||
mb();
|
||||
on_each_cpu(profile_nop, NULL, 0, 1);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct page *page;
|
||||
|
||||
if (per_cpu(cpu_profile_hits, cpu)[0]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
|
||||
per_cpu(cpu_profile_hits, cpu)[0] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
if (per_cpu(cpu_profile_hits, cpu)[1]) {
|
||||
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
|
||||
per_cpu(cpu_profile_hits, cpu)[1] = NULL;
|
||||
__free_page(page);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
#define create_hash_tables() ({ 0; })
|
||||
#endif
|
||||
|
||||
static int __init create_proc_profile(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
if (!prof_on)
|
||||
return 0;
|
||||
if (create_hash_tables())
|
||||
return -1;
|
||||
if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
|
||||
return 0;
|
||||
entry->proc_fops = &proc_profile_operations;
|
||||
entry->size = (1+prof_len) * sizeof(atomic_t);
|
||||
hotcpu_notifier(profile_cpu_callback, 0);
|
||||
return 0;
|
||||
}
|
||||
module_init(create_proc_profile);
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
389
kernel/ptrace.c
Normal file
389
kernel/ptrace.c
Normal file
@@ -0,0 +1,389 @@
|
||||
/*
|
||||
* linux/kernel/ptrace.c
|
||||
*
|
||||
* (C) Copyright 1999 Linus Torvalds
|
||||
*
|
||||
* Common interfaces for "ptrace()" which we do not want
|
||||
* to continually duplicate across every architecture.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* ptrace a task: make the debugger its new parent and
|
||||
* move it to the ptrace list.
|
||||
*
|
||||
* Must be called with the tasklist lock write-held.
|
||||
*/
|
||||
void __ptrace_link(task_t *child, task_t *new_parent)
|
||||
{
|
||||
if (!list_empty(&child->ptrace_list))
|
||||
BUG();
|
||||
if (child->parent == new_parent)
|
||||
return;
|
||||
list_add(&child->ptrace_list, &child->parent->ptrace_children);
|
||||
REMOVE_LINKS(child);
|
||||
child->parent = new_parent;
|
||||
SET_LINKS(child);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn a tracing stop into a normal stop now, since with no tracer there
|
||||
* would be no way to wake it up with SIGCONT or SIGKILL. If there was a
|
||||
* signal sent that would resume the child, but didn't because it was in
|
||||
* TASK_TRACED, resume it now.
|
||||
* Requires that irqs be disabled.
|
||||
*/
|
||||
void ptrace_untrace(task_t *child)
|
||||
{
|
||||
spin_lock(&child->sighand->siglock);
|
||||
if (child->state == TASK_TRACED) {
|
||||
if (child->signal->flags & SIGNAL_STOP_STOPPED) {
|
||||
child->state = TASK_STOPPED;
|
||||
} else {
|
||||
signal_wake_up(child, 1);
|
||||
}
|
||||
}
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
}
|
||||
|
||||
/*
|
||||
* unptrace a task: move it back to its original parent and
|
||||
* remove it from the ptrace list.
|
||||
*
|
||||
* Must be called with the tasklist lock write-held.
|
||||
*/
|
||||
void __ptrace_unlink(task_t *child)
|
||||
{
|
||||
if (!child->ptrace)
|
||||
BUG();
|
||||
child->ptrace = 0;
|
||||
if (!list_empty(&child->ptrace_list)) {
|
||||
list_del_init(&child->ptrace_list);
|
||||
REMOVE_LINKS(child);
|
||||
child->parent = child->real_parent;
|
||||
SET_LINKS(child);
|
||||
}
|
||||
|
||||
if (child->state == TASK_TRACED)
|
||||
ptrace_untrace(child);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that we have indeed attached to the thing..
|
||||
*/
|
||||
int ptrace_check_attach(struct task_struct *child, int kill)
|
||||
{
|
||||
int ret = -ESRCH;
|
||||
|
||||
/*
|
||||
* We take the read lock around doing both checks to close a
|
||||
* possible race where someone else was tracing our child and
|
||||
* detached between these two checks. After this locked check,
|
||||
* we are sure that this is our traced child and that can only
|
||||
* be changed by us so it's not changing right after this.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
if ((child->ptrace & PT_PTRACED) && child->parent == current &&
|
||||
(!(child->ptrace & PT_ATTACHED) || child->real_parent != current)
|
||||
&& child->signal != NULL) {
|
||||
ret = 0;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (child->state == TASK_STOPPED) {
|
||||
child->state = TASK_TRACED;
|
||||
} else if (child->state != TASK_TRACED && !kill) {
|
||||
ret = -ESRCH;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!ret && !kill) {
|
||||
wait_task_inactive(child);
|
||||
}
|
||||
|
||||
/* All systems go.. */
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ptrace_attach(struct task_struct *task)
|
||||
{
|
||||
int retval;
|
||||
task_lock(task);
|
||||
retval = -EPERM;
|
||||
if (task->pid <= 1)
|
||||
goto bad;
|
||||
if (task == current)
|
||||
goto bad;
|
||||
if (!task->mm)
|
||||
goto bad;
|
||||
if(((current->uid != task->euid) ||
|
||||
(current->uid != task->suid) ||
|
||||
(current->uid != task->uid) ||
|
||||
(current->gid != task->egid) ||
|
||||
(current->gid != task->sgid) ||
|
||||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
|
||||
goto bad;
|
||||
rmb();
|
||||
if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
|
||||
goto bad;
|
||||
/* the same process cannot be attached many times */
|
||||
if (task->ptrace & PT_PTRACED)
|
||||
goto bad;
|
||||
retval = security_ptrace(current, task);
|
||||
if (retval)
|
||||
goto bad;
|
||||
|
||||
/* Go */
|
||||
task->ptrace |= PT_PTRACED | ((task->real_parent != current)
|
||||
? PT_ATTACHED : 0);
|
||||
if (capable(CAP_SYS_PTRACE))
|
||||
task->ptrace |= PT_PTRACE_CAP;
|
||||
task_unlock(task);
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
__ptrace_link(task, current);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
force_sig_specific(SIGSTOP, task);
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
task_unlock(task);
|
||||
return retval;
|
||||
}
|
||||
|
||||
int ptrace_detach(struct task_struct *child, unsigned int data)
|
||||
{
|
||||
if ((unsigned long) data > _NSIG)
|
||||
return -EIO;
|
||||
|
||||
/* Architecture-specific hardware disable .. */
|
||||
ptrace_disable(child);
|
||||
|
||||
/* .. re-parent .. */
|
||||
child->exit_code = data;
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
__ptrace_unlink(child);
|
||||
/* .. and wake it up. */
|
||||
if (child->exit_state != EXIT_ZOMBIE)
|
||||
wake_up_process(child);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Access another process' address space.
|
||||
* Source/target buffer must be kernel space,
|
||||
* Do not walk the page table directly, use get_user_pages
|
||||
*/
|
||||
|
||||
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
struct page *page;
|
||||
void *old_buf = buf;
|
||||
|
||||
mm = get_task_mm(tsk);
|
||||
if (!mm)
|
||||
return 0;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
/* ignore errors, just check how much was sucessfully transfered */
|
||||
while (len) {
|
||||
int bytes, ret, offset;
|
||||
void *maddr;
|
||||
|
||||
ret = get_user_pages(tsk, mm, addr, 1,
|
||||
write, 1, &page, &vma);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
bytes = len;
|
||||
offset = addr & (PAGE_SIZE-1);
|
||||
if (bytes > PAGE_SIZE-offset)
|
||||
bytes = PAGE_SIZE-offset;
|
||||
|
||||
maddr = kmap(page);
|
||||
if (write) {
|
||||
copy_to_user_page(vma, page, addr,
|
||||
maddr + offset, buf, bytes);
|
||||
set_page_dirty_lock(page);
|
||||
} else {
|
||||
copy_from_user_page(vma, page, addr,
|
||||
buf, maddr + offset, bytes);
|
||||
}
|
||||
kunmap(page);
|
||||
page_cache_release(page);
|
||||
len -= bytes;
|
||||
buf += bytes;
|
||||
addr += bytes;
|
||||
}
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
|
||||
return buf - old_buf;
|
||||
}
|
||||
|
||||
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
|
||||
{
|
||||
int copied = 0;
|
||||
|
||||
while (len > 0) {
|
||||
char buf[128];
|
||||
int this_len, retval;
|
||||
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
retval = access_process_vm(tsk, src, buf, this_len, 0);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
return -EIO;
|
||||
}
|
||||
if (copy_to_user(dst, buf, retval))
|
||||
return -EFAULT;
|
||||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len)
|
||||
{
|
||||
int copied = 0;
|
||||
|
||||
while (len > 0) {
|
||||
char buf[128];
|
||||
int this_len, retval;
|
||||
|
||||
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
|
||||
if (copy_from_user(buf, src, this_len))
|
||||
return -EFAULT;
|
||||
retval = access_process_vm(tsk, dst, buf, this_len, 1);
|
||||
if (!retval) {
|
||||
if (copied)
|
||||
break;
|
||||
return -EIO;
|
||||
}
|
||||
copied += retval;
|
||||
src += retval;
|
||||
dst += retval;
|
||||
len -= retval;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int ptrace_setoptions(struct task_struct *child, long data)
|
||||
{
|
||||
child->ptrace &= ~PT_TRACE_MASK;
|
||||
|
||||
if (data & PTRACE_O_TRACESYSGOOD)
|
||||
child->ptrace |= PT_TRACESYSGOOD;
|
||||
|
||||
if (data & PTRACE_O_TRACEFORK)
|
||||
child->ptrace |= PT_TRACE_FORK;
|
||||
|
||||
if (data & PTRACE_O_TRACEVFORK)
|
||||
child->ptrace |= PT_TRACE_VFORK;
|
||||
|
||||
if (data & PTRACE_O_TRACECLONE)
|
||||
child->ptrace |= PT_TRACE_CLONE;
|
||||
|
||||
if (data & PTRACE_O_TRACEEXEC)
|
||||
child->ptrace |= PT_TRACE_EXEC;
|
||||
|
||||
if (data & PTRACE_O_TRACEVFORKDONE)
|
||||
child->ptrace |= PT_TRACE_VFORK_DONE;
|
||||
|
||||
if (data & PTRACE_O_TRACEEXIT)
|
||||
child->ptrace |= PT_TRACE_EXIT;
|
||||
|
||||
return (data & ~PTRACE_O_MASK) ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
|
||||
{
|
||||
siginfo_t lastinfo;
|
||||
int error = -ESRCH;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
if (likely(child->sighand != NULL)) {
|
||||
error = -EINVAL;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (likely(child->last_siginfo != NULL)) {
|
||||
lastinfo = *child->last_siginfo;
|
||||
error = 0;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!error)
|
||||
return copy_siginfo_to_user(data, &lastinfo);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
|
||||
{
|
||||
siginfo_t newinfo;
|
||||
int error = -ESRCH;
|
||||
|
||||
if (copy_from_user(&newinfo, data, sizeof (siginfo_t)))
|
||||
return -EFAULT;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
if (likely(child->sighand != NULL)) {
|
||||
error = -EINVAL;
|
||||
spin_lock_irq(&child->sighand->siglock);
|
||||
if (likely(child->last_siginfo != NULL)) {
|
||||
*child->last_siginfo = newinfo;
|
||||
error = 0;
|
||||
}
|
||||
spin_unlock_irq(&child->sighand->siglock);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
return error;
|
||||
}
|
||||
|
||||
int ptrace_request(struct task_struct *child, long request,
|
||||
long addr, long data)
|
||||
{
|
||||
int ret = -EIO;
|
||||
|
||||
switch (request) {
|
||||
#ifdef PTRACE_OLDSETOPTIONS
|
||||
case PTRACE_OLDSETOPTIONS:
|
||||
#endif
|
||||
case PTRACE_SETOPTIONS:
|
||||
ret = ptrace_setoptions(child, data);
|
||||
break;
|
||||
case PTRACE_GETEVENTMSG:
|
||||
ret = put_user(child->ptrace_message, (unsigned long __user *) data);
|
||||
break;
|
||||
case PTRACE_GETSIGINFO:
|
||||
ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
|
||||
break;
|
||||
case PTRACE_SETSIGINFO:
|
||||
ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
470
kernel/rcupdate.c
Normal file
470
kernel/rcupdate.c
Normal file
@@ -0,0 +1,470 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2001
|
||||
*
|
||||
* Authors: Dipankar Sarma <dipankar@in.ibm.com>
|
||||
* Manfred Spraul <manfred@colorfullife.com>
|
||||
*
|
||||
* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
|
||||
* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
|
||||
* Papers:
|
||||
* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
|
||||
* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
|
||||
*
|
||||
* For detailed explanation of Read-Copy Update mechanism see -
|
||||
* http://lse.sourceforge.net/locking/rcupdate.html
|
||||
*
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
struct rcu_ctrlblk rcu_ctrlblk =
|
||||
{ .cur = -300, .completed = -300 };
|
||||
struct rcu_ctrlblk rcu_bh_ctrlblk =
|
||||
{ .cur = -300, .completed = -300 };
|
||||
|
||||
/* Bookkeeping of the progress of the grace period */
|
||||
struct rcu_state {
|
||||
spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */
|
||||
cpumask_t cpumask; /* CPUs that need to switch in order */
|
||||
/* for current batch to proceed. */
|
||||
};
|
||||
|
||||
static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
|
||||
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
|
||||
static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
|
||||
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
|
||||
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
|
||||
|
||||
/* Fake initialization required by compiler */
|
||||
static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
|
||||
static int maxbatch = 10;
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void fastcall call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual update function to be invoked after the grace period
|
||||
*
|
||||
* The update function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. call_rcu_bh() assumes
|
||||
* that the read-side critical sections end on completion of a softirq
|
||||
* handler. This means that read-side critical sections in process
|
||||
* context must not be interrupted by softirqs. This interface is to be
|
||||
* used when most of the read-side critical sections are in softirq context.
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock() and
|
||||
* rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
|
||||
* and rcu_read_unlock_bh(), if in process context. These may be nested.
|
||||
*/
|
||||
void fastcall call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke the completed RCU callbacks. They are expected to be in
|
||||
* a per-cpu list.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
list = rdp->donelist;
|
||||
while (list) {
|
||||
next = rdp->donelist = list->next;
|
||||
list->func(list);
|
||||
list = next;
|
||||
if (++count >= maxbatch)
|
||||
break;
|
||||
}
|
||||
if (!rdp->donelist)
|
||||
rdp->donetail = &rdp->donelist;
|
||||
else
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, rdp->cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Grace period handling:
|
||||
* The grace period handling consists out of two steps:
|
||||
* - A new grace period is started.
|
||||
* This is done by rcu_start_batch. The start is not broadcasted to
|
||||
* all cpus, they must pick this up by comparing rcp->cur with
|
||||
* rdp->quiescbatch. All cpus are recorded in the
|
||||
* rcu_state.cpumask bitmap.
|
||||
* - All cpus must go through a quiescent state.
|
||||
* Since the start of the grace period is not broadcasted, at least two
|
||||
* calls to rcu_check_quiescent_state are required:
|
||||
* The first call just notices that a new grace period is running. The
|
||||
* following calls check if there was a quiescent state since the beginning
|
||||
* of the grace period. If so, it updates rcu_state.cpumask. If
|
||||
* the bitmap is empty, then the grace period is completed.
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
* Caller must hold rcu_state.lock.
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
|
||||
int next_pending)
|
||||
{
|
||||
if (next_pending)
|
||||
rcp->next_pending = 1;
|
||||
|
||||
if (rcp->next_pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
/* Can't change, since spin lock held. */
|
||||
cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
|
||||
|
||||
rcp->next_pending = 0;
|
||||
/* next_pending == 0 must be visible in __rcu_process_callbacks()
|
||||
* before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu went through a quiescent state since the beginning of the grace period.
|
||||
* Clear it from the cpu mask and complete the grace period if it was the last
|
||||
* cpu. Start another grace period if someone has further entries pending
|
||||
*/
|
||||
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
|
||||
{
|
||||
cpu_clear(cpu, rsp->cpumask);
|
||||
if (cpus_empty(rsp->cpumask)) {
|
||||
/* batch completed ! */
|
||||
rcp->completed = rcp->cur;
|
||||
rcu_start_batch(rcp, rsp, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the cpu has gone through a quiescent state (say context
|
||||
* switch). If so and if it already hasn't done so in this RCU
|
||||
* quiescent cycle, then indicate that it has done so.
|
||||
*/
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesc = 0;
|
||||
rdp->quiescbatch = rcp->cur;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Grace period already completed for this cpu?
|
||||
* qs_pending is checked instead of the actual bitmap to avoid
|
||||
* cacheline trashing.
|
||||
*/
|
||||
if (!rdp->qs_pending)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Was there a quiescent state since the beginning of the grace
|
||||
* period? If no, then exit and wait for the next call.
|
||||
*/
|
||||
if (!rdp->passed_quiesc)
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rsp->lock);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
*/
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp, rsp);
|
||||
|
||||
spin_unlock(&rsp->lock);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
|
||||
* locking requirements, the list it's pulling from has to belong to a cpu
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rsp->lock);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp, rsp);
|
||||
spin_unlock_bh(&rsp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
|
||||
}
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
|
||||
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
|
||||
|
||||
__rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
|
||||
&per_cpu(rcu_data, cpu));
|
||||
__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
|
||||
&per_cpu(rcu_bh_data, cpu));
|
||||
put_cpu_var(rcu_data);
|
||||
put_cpu_var(rcu_bh_data);
|
||||
tasklet_kill_immediate(&per_cpu(rcu_tasklet, cpu), cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This does the RCU processing work from tasklet context.
|
||||
*/
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
*/
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rsp->lock);
|
||||
rcu_start_batch(rcp, rsp, 1);
|
||||
spin_unlock(&rsp->lock);
|
||||
}
|
||||
} else {
|
||||
local_irq_enable();
|
||||
}
|
||||
rcu_check_quiescent_state(rcp, rsp, rdp);
|
||||
if (rdp->donelist)
|
||||
rcu_do_batch(rdp);
|
||||
}
|
||||
|
||||
static void rcu_process_callbacks(unsigned long unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
|
||||
&__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
|
||||
&__get_cpu_var(rcu_bh_data));
|
||||
}
|
||||
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && !in_softirq() &&
|
||||
hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
rcu_qsctr_inc(cpu);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
} else if (!in_softirq())
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
|
||||
}
|
||||
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
}
|
||||
|
||||
static void __devinit rcu_online_cpu(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
|
||||
tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
|
||||
}
|
||||
|
||||
static int __devinit rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
rcu_online_cpu(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
rcu_offline_cpu(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __devinitdata rcu_nb = {
|
||||
.notifier_call = rcu_cpu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* Initializes rcu mechanism. Assumed to be called early.
|
||||
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
|
||||
* Note that rcu_qsctr and friends are implicitly
|
||||
* initialized due to the choice of ``0'' for RCU_CTR_INVALID.
|
||||
*/
|
||||
void __init rcu_init(void)
|
||||
{
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
register_cpu_notifier(&rcu_nb);
|
||||
}
|
||||
|
||||
struct rcu_synchronize {
|
||||
struct rcu_head head;
|
||||
struct completion completion;
|
||||
};
|
||||
|
||||
/* Because of FASTCALL declaration of complete, we use this wrapper */
|
||||
static void wakeme_after_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct rcu_synchronize *rcu;
|
||||
|
||||
rcu = container_of(head, struct rcu_synchronize, head);
|
||||
complete(&rcu->completion);
|
||||
}
|
||||
|
||||
/**
|
||||
* synchronize_kernel - wait until a grace period has elapsed.
|
||||
*
|
||||
* Control will return to the caller some time after a full grace
|
||||
* period has elapsed, in other words after all currently executing RCU
|
||||
* read-side critical sections have completed. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*/
|
||||
void synchronize_kernel(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished */
|
||||
call_rcu(&rcu.head, wakeme_after_rcu);
|
||||
|
||||
/* Wait for it */
|
||||
wait_for_completion(&rcu.completion);
|
||||
}
|
||||
|
||||
module_param(maxbatch, int, 0);
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
EXPORT_SYMBOL_GPL(synchronize_kernel);
|
||||
551
kernel/resource.c
Normal file
551
kernel/resource.c
Normal file
@@ -0,0 +1,551 @@
|
||||
/*
|
||||
* linux/kernel/resource.c
|
||||
*
|
||||
* Copyright (C) 1999 Linus Torvalds
|
||||
* Copyright (C) 1999 Martin Mares <mj@ucw.cz>
|
||||
*
|
||||
* Arbitrary resource management.
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
|
||||
struct resource ioport_resource = {
|
||||
.name = "PCI IO",
|
||||
.start = 0x0000,
|
||||
.end = IO_SPACE_LIMIT,
|
||||
.flags = IORESOURCE_IO,
|
||||
};
|
||||
|
||||
EXPORT_SYMBOL(ioport_resource);
|
||||
|
||||
struct resource iomem_resource = {
|
||||
.name = "PCI mem",
|
||||
.start = 0UL,
|
||||
.end = ~0UL,
|
||||
.flags = IORESOURCE_MEM,
|
||||
};
|
||||
|
||||
EXPORT_SYMBOL(iomem_resource);
|
||||
|
||||
static DEFINE_RWLOCK(resource_lock);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
enum { MAX_IORES_LEVEL = 5 };
|
||||
|
||||
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct resource *p = v;
|
||||
(*pos)++;
|
||||
if (p->child)
|
||||
return p->child;
|
||||
while (!p->sibling && p->parent)
|
||||
p = p->parent;
|
||||
return p->sibling;
|
||||
}
|
||||
|
||||
static void *r_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(resource_lock)
|
||||
{
|
||||
struct resource *p = m->private;
|
||||
loff_t l = 0;
|
||||
read_lock(&resource_lock);
|
||||
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
|
||||
;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void r_stop(struct seq_file *m, void *v)
|
||||
__releases(resource_lock)
|
||||
{
|
||||
read_unlock(&resource_lock);
|
||||
}
|
||||
|
||||
static int r_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct resource *root = m->private;
|
||||
struct resource *r = v, *p;
|
||||
int width = root->end < 0x10000 ? 4 : 8;
|
||||
int depth;
|
||||
|
||||
for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
|
||||
if (p->parent == root)
|
||||
break;
|
||||
seq_printf(m, "%*s%0*lx-%0*lx : %s\n",
|
||||
depth * 2, "",
|
||||
width, r->start,
|
||||
width, r->end,
|
||||
r->name ? r->name : "<BAD>");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct seq_operations resource_op = {
|
||||
.start = r_start,
|
||||
.next = r_next,
|
||||
.stop = r_stop,
|
||||
.show = r_show,
|
||||
};
|
||||
|
||||
static int ioports_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int res = seq_open(file, &resource_op);
|
||||
if (!res) {
|
||||
struct seq_file *m = file->private_data;
|
||||
m->private = &ioport_resource;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static int iomem_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int res = seq_open(file, &resource_op);
|
||||
if (!res) {
|
||||
struct seq_file *m = file->private_data;
|
||||
m->private = &iomem_resource;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static struct file_operations proc_ioports_operations = {
|
||||
.open = ioports_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static struct file_operations proc_iomem_operations = {
|
||||
.open = iomem_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static int __init ioresources_init(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
entry = create_proc_entry("ioports", 0, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_ioports_operations;
|
||||
entry = create_proc_entry("iomem", 0, NULL);
|
||||
if (entry)
|
||||
entry->proc_fops = &proc_iomem_operations;
|
||||
return 0;
|
||||
}
|
||||
__initcall(ioresources_init);
|
||||
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
/* Return the conflict entry if you can't request it */
|
||||
static struct resource * __request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
unsigned long start = new->start;
|
||||
unsigned long end = new->end;
|
||||
struct resource *tmp, **p;
|
||||
|
||||
if (end < start)
|
||||
return root;
|
||||
if (start < root->start)
|
||||
return root;
|
||||
if (end > root->end)
|
||||
return root;
|
||||
p = &root->child;
|
||||
for (;;) {
|
||||
tmp = *p;
|
||||
if (!tmp || tmp->start > end) {
|
||||
new->sibling = tmp;
|
||||
*p = new;
|
||||
new->parent = root;
|
||||
return NULL;
|
||||
}
|
||||
p = &tmp->sibling;
|
||||
if (tmp->end < start)
|
||||
continue;
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
|
||||
static int __release_resource(struct resource *old)
|
||||
{
|
||||
struct resource *tmp, **p;
|
||||
|
||||
p = &old->parent->child;
|
||||
for (;;) {
|
||||
tmp = *p;
|
||||
if (!tmp)
|
||||
break;
|
||||
if (tmp == old) {
|
||||
*p = tmp->sibling;
|
||||
old->parent = NULL;
|
||||
return 0;
|
||||
}
|
||||
p = &tmp->sibling;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
struct resource *conflict;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
conflict = __request_resource(root, new);
|
||||
write_unlock(&resource_lock);
|
||||
return conflict ? -EBUSY : 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(request_resource);
|
||||
|
||||
struct resource *____request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
struct resource *conflict;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
conflict = __request_resource(root, new);
|
||||
write_unlock(&resource_lock);
|
||||
return conflict;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(____request_resource);
|
||||
|
||||
int release_resource(struct resource *old)
|
||||
{
|
||||
int retval;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
retval = __release_resource(old);
|
||||
write_unlock(&resource_lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(release_resource);
|
||||
|
||||
/*
|
||||
* Find empty slot in the resource tree given range and alignment.
|
||||
*/
|
||||
static int find_resource(struct resource *root, struct resource *new,
|
||||
unsigned long size,
|
||||
unsigned long min, unsigned long max,
|
||||
unsigned long align,
|
||||
void (*alignf)(void *, struct resource *,
|
||||
unsigned long, unsigned long),
|
||||
void *alignf_data)
|
||||
{
|
||||
struct resource *this = root->child;
|
||||
|
||||
new->start = root->start;
|
||||
/*
|
||||
* Skip past an allocated resource that starts at 0, since the assignment
|
||||
* of this->start - 1 to new->end below would cause an underflow.
|
||||
*/
|
||||
if (this && this->start == 0) {
|
||||
new->start = this->end + 1;
|
||||
this = this->sibling;
|
||||
}
|
||||
for(;;) {
|
||||
if (this)
|
||||
new->end = this->start - 1;
|
||||
else
|
||||
new->end = root->end;
|
||||
if (new->start < min)
|
||||
new->start = min;
|
||||
if (new->end > max)
|
||||
new->end = max;
|
||||
new->start = (new->start + align - 1) & ~(align - 1);
|
||||
if (alignf)
|
||||
alignf(alignf_data, new, size, align);
|
||||
if (new->start < new->end && new->end - new->start + 1 >= size) {
|
||||
new->end = new->start + size - 1;
|
||||
return 0;
|
||||
}
|
||||
if (!this)
|
||||
break;
|
||||
new->start = this->end + 1;
|
||||
this = this->sibling;
|
||||
}
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate empty slot in the resource tree given range and alignment.
|
||||
*/
|
||||
int allocate_resource(struct resource *root, struct resource *new,
|
||||
unsigned long size,
|
||||
unsigned long min, unsigned long max,
|
||||
unsigned long align,
|
||||
void (*alignf)(void *, struct resource *,
|
||||
unsigned long, unsigned long),
|
||||
void *alignf_data)
|
||||
{
|
||||
int err;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
|
||||
if (err >= 0 && __request_resource(root, new))
|
||||
err = -EBUSY;
|
||||
write_unlock(&resource_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(allocate_resource);
|
||||
|
||||
/**
|
||||
* insert_resource - Inserts a resource in the resource tree
|
||||
* @parent: parent of the new resource
|
||||
* @new: new resource to insert
|
||||
*
|
||||
* Returns 0 on success, -EBUSY if the resource can't be inserted.
|
||||
*
|
||||
* This function is equivalent of request_resource when no conflict
|
||||
* happens. If a conflict happens, and the conflicting resources
|
||||
* entirely fit within the range of the new resource, then the new
|
||||
* resource is inserted and the conflicting resources become childs of
|
||||
* the new resource. Otherwise the new resource becomes the child of
|
||||
* the conflicting resource
|
||||
*/
|
||||
int insert_resource(struct resource *parent, struct resource *new)
|
||||
{
|
||||
int result;
|
||||
struct resource *first, *next;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
begin:
|
||||
result = 0;
|
||||
first = __request_resource(parent, new);
|
||||
if (!first)
|
||||
goto out;
|
||||
|
||||
result = -EBUSY;
|
||||
if (first == parent)
|
||||
goto out;
|
||||
|
||||
/* Resource fully contained by the clashing resource? Recurse into it */
|
||||
if (first->start <= new->start && first->end >= new->end) {
|
||||
parent = first;
|
||||
goto begin;
|
||||
}
|
||||
|
||||
for (next = first; ; next = next->sibling) {
|
||||
/* Partial overlap? Bad, and unfixable */
|
||||
if (next->start < new->start || next->end > new->end)
|
||||
goto out;
|
||||
if (!next->sibling)
|
||||
break;
|
||||
if (next->sibling->start > new->end)
|
||||
break;
|
||||
}
|
||||
|
||||
result = 0;
|
||||
|
||||
new->parent = parent;
|
||||
new->sibling = next->sibling;
|
||||
new->child = first;
|
||||
|
||||
next->sibling = NULL;
|
||||
for (next = first; next; next = next->sibling)
|
||||
next->parent = new;
|
||||
|
||||
if (parent->child == first) {
|
||||
parent->child = new;
|
||||
} else {
|
||||
next = parent->child;
|
||||
while (next->sibling != first)
|
||||
next = next->sibling;
|
||||
next->sibling = new;
|
||||
}
|
||||
|
||||
out:
|
||||
write_unlock(&resource_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(insert_resource);
|
||||
|
||||
/*
|
||||
* Given an existing resource, change its start and size to match the
|
||||
* arguments. Returns -EBUSY if it can't fit. Existing children of
|
||||
* the resource are assumed to be immutable.
|
||||
*/
|
||||
int adjust_resource(struct resource *res, unsigned long start, unsigned long size)
|
||||
{
|
||||
struct resource *tmp, *parent = res->parent;
|
||||
unsigned long end = start + size - 1;
|
||||
int result = -EBUSY;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
if ((start < parent->start) || (end > parent->end))
|
||||
goto out;
|
||||
|
||||
for (tmp = res->child; tmp; tmp = tmp->sibling) {
|
||||
if ((tmp->start < start) || (tmp->end > end))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (res->sibling && (res->sibling->start <= end))
|
||||
goto out;
|
||||
|
||||
tmp = parent->child;
|
||||
if (tmp != res) {
|
||||
while (tmp->sibling != res)
|
||||
tmp = tmp->sibling;
|
||||
if (start <= tmp->end)
|
||||
goto out;
|
||||
}
|
||||
|
||||
res->start = start;
|
||||
res->end = end;
|
||||
result = 0;
|
||||
|
||||
out:
|
||||
write_unlock(&resource_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(adjust_resource);
|
||||
|
||||
/*
|
||||
* This is compatibility stuff for IO resources.
|
||||
*
|
||||
* Note how this, unlike the above, knows about
|
||||
* the IO flag meanings (busy etc).
|
||||
*
|
||||
* Request-region creates a new busy region.
|
||||
*
|
||||
* Check-region returns non-zero if the area is already busy
|
||||
*
|
||||
* Release-region releases a matching busy region.
|
||||
*/
|
||||
struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
|
||||
{
|
||||
struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
|
||||
|
||||
if (res) {
|
||||
memset(res, 0, sizeof(*res));
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
for (;;) {
|
||||
struct resource *conflict;
|
||||
|
||||
conflict = __request_resource(parent, res);
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
res = NULL;
|
||||
break;
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__request_region);
|
||||
|
||||
int __deprecated __check_region(struct resource *parent, unsigned long start, unsigned long n)
|
||||
{
|
||||
struct resource * res;
|
||||
|
||||
res = __request_region(parent, start, n, "check-region");
|
||||
if (!res)
|
||||
return -EBUSY;
|
||||
|
||||
release_resource(res);
|
||||
kfree(res);
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__check_region);
|
||||
|
||||
void __release_region(struct resource *parent, unsigned long start, unsigned long n)
|
||||
{
|
||||
struct resource **p;
|
||||
unsigned long end;
|
||||
|
||||
p = &parent->child;
|
||||
end = start + n - 1;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
for (;;) {
|
||||
struct resource *res = *p;
|
||||
|
||||
if (!res)
|
||||
break;
|
||||
if (res->start <= start && res->end >= end) {
|
||||
if (!(res->flags & IORESOURCE_BUSY)) {
|
||||
p = &res->child;
|
||||
continue;
|
||||
}
|
||||
if (res->start != start || res->end != end)
|
||||
break;
|
||||
*p = res->sibling;
|
||||
write_unlock(&resource_lock);
|
||||
kfree(res);
|
||||
return;
|
||||
}
|
||||
p = &res->sibling;
|
||||
}
|
||||
|
||||
write_unlock(&resource_lock);
|
||||
|
||||
printk(KERN_WARNING "Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__release_region);
|
||||
|
||||
/*
|
||||
* Called from init/main.c to reserve IO ports.
|
||||
*/
|
||||
#define MAXRESERVE 4
|
||||
static int __init reserve_setup(char *str)
|
||||
{
|
||||
static int reserved;
|
||||
static struct resource reserve[MAXRESERVE];
|
||||
|
||||
for (;;) {
|
||||
int io_start, io_num;
|
||||
int x = reserved;
|
||||
|
||||
if (get_option (&str, &io_start) != 2)
|
||||
break;
|
||||
if (get_option (&str, &io_num) == 0)
|
||||
break;
|
||||
if (x < MAXRESERVE) {
|
||||
struct resource *res = reserve + x;
|
||||
res->name = "reserved";
|
||||
res->start = io_start;
|
||||
res->end = io_start + io_num - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
res->child = NULL;
|
||||
if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
|
||||
reserved = x+1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("reserve=", reserve_setup);
|
||||
5004
kernel/sched.c
Normal file
5004
kernel/sched.c
Normal file
File diff suppressed because it is too large
Load Diff
56
kernel/seccomp.c
Normal file
56
kernel/seccomp.c
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* linux/kernel/seccomp.c
|
||||
*
|
||||
* Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
|
||||
*
|
||||
* This defines a simple but solid secure-computing mode.
|
||||
*/
|
||||
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
/* #define SECCOMP_DEBUG 1 */
|
||||
|
||||
/*
|
||||
* Secure computing mode 1 allows only read/write/exit/sigreturn.
|
||||
* To be fully secure this must be combined with rlimit
|
||||
* to limit the stack allocations too.
|
||||
*/
|
||||
static int mode1_syscalls[] = {
|
||||
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
|
||||
0, /* null terminated */
|
||||
};
|
||||
|
||||
#ifdef TIF_32BIT
|
||||
static int mode1_syscalls_32[] = {
|
||||
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
||||
0, /* null terminated */
|
||||
};
|
||||
#endif
|
||||
|
||||
void __secure_computing(int this_syscall)
|
||||
{
|
||||
int mode = current->seccomp.mode;
|
||||
int * syscall;
|
||||
|
||||
switch (mode) {
|
||||
case 1:
|
||||
syscall = mode1_syscalls;
|
||||
#ifdef TIF_32BIT
|
||||
if (test_thread_flag(TIF_32BIT))
|
||||
syscall = mode1_syscalls_32;
|
||||
#endif
|
||||
do {
|
||||
if (*syscall == this_syscall)
|
||||
return;
|
||||
} while (*++syscall);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
#ifdef SECCOMP_DEBUG
|
||||
dump_stack();
|
||||
#endif
|
||||
do_exit(SIGKILL);
|
||||
}
|
||||
2662
kernel/signal.c
Normal file
2662
kernel/signal.c
Normal file
File diff suppressed because it is too large
Load Diff
496
kernel/softirq.c
Normal file
496
kernel/softirq.c
Normal file
@@ -0,0 +1,496 @@
|
||||
/*
|
||||
* linux/kernel/softirq.c
|
||||
*
|
||||
* Copyright (C) 1992 Linus Torvalds
|
||||
*
|
||||
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
#include <asm/irq.h>
|
||||
/*
|
||||
- No shared variables, all the data are CPU local.
|
||||
- If a softirq needs serialization, let it serialize itself
|
||||
by its own spinlocks.
|
||||
- Even if softirq is serialized, only local cpu is marked for
|
||||
execution. Hence, we get something sort of weak cpu binding.
|
||||
Though it is still not clear, will it result in better locality
|
||||
or will not.
|
||||
|
||||
Examples:
|
||||
- NET RX softirq. It is multithreaded and does not require
|
||||
any global serialization.
|
||||
- NET TX softirq. It kicks software netdevice queues, hence
|
||||
it is logically serialized per device, but this serialization
|
||||
is invisible to common code.
|
||||
- Tasklets: serialized wrt itself.
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_IRQ_STAT
|
||||
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
|
||||
EXPORT_SYMBOL(irq_stat);
|
||||
#endif
|
||||
|
||||
static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
|
||||
|
||||
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
|
||||
|
||||
/*
|
||||
* we cannot loop indefinitely here to avoid userspace starvation,
|
||||
* but we also don't want to introduce a worst case 1/HZ latency
|
||||
* to the pending events, so lets the scheduler to balance
|
||||
* the softirq load for us.
|
||||
*/
|
||||
static inline void wakeup_softirqd(void)
|
||||
{
|
||||
/* Interrupts are disabled: no need to stop preemption */
|
||||
struct task_struct *tsk = __get_cpu_var(ksoftirqd);
|
||||
|
||||
if (tsk && tsk->state != TASK_RUNNING)
|
||||
wake_up_process(tsk);
|
||||
}
|
||||
|
||||
/*
|
||||
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
|
||||
* and we fall back to softirqd after that.
|
||||
*
|
||||
* This number has been established via experimentation.
|
||||
* The two things to balance is latency against fairness -
|
||||
* we want to handle softirqs as soon as possible, but they
|
||||
* should not be able to lock up the box.
|
||||
*/
|
||||
#define MAX_SOFTIRQ_RESTART 10
|
||||
|
||||
asmlinkage void __do_softirq(void)
|
||||
{
|
||||
struct softirq_action *h;
|
||||
__u32 pending;
|
||||
int max_restart = MAX_SOFTIRQ_RESTART;
|
||||
int cpu;
|
||||
|
||||
pending = local_softirq_pending();
|
||||
|
||||
local_bh_disable();
|
||||
cpu = smp_processor_id();
|
||||
restart:
|
||||
/* Reset the pending bitmask before enabling irqs */
|
||||
local_softirq_pending() = 0;
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
h = softirq_vec;
|
||||
|
||||
do {
|
||||
if (pending & 1) {
|
||||
h->action(h);
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
}
|
||||
h++;
|
||||
pending >>= 1;
|
||||
} while (pending);
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
pending = local_softirq_pending();
|
||||
if (pending && --max_restart)
|
||||
goto restart;
|
||||
|
||||
if (pending)
|
||||
wakeup_softirqd();
|
||||
|
||||
__local_bh_enable();
|
||||
}
|
||||
|
||||
#ifndef __ARCH_HAS_DO_SOFTIRQ
|
||||
|
||||
asmlinkage void do_softirq(void)
|
||||
{
|
||||
__u32 pending;
|
||||
unsigned long flags;
|
||||
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
pending = local_softirq_pending();
|
||||
|
||||
if (pending)
|
||||
__do_softirq();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(do_softirq);
|
||||
|
||||
#endif
|
||||
|
||||
void local_bh_enable(void)
|
||||
{
|
||||
WARN_ON(irqs_disabled());
|
||||
/*
|
||||
* Keep preemption disabled until we are done with
|
||||
* softirq processing:
|
||||
*/
|
||||
sub_preempt_count(SOFTIRQ_OFFSET - 1);
|
||||
|
||||
if (unlikely(!in_interrupt() && local_softirq_pending()))
|
||||
do_softirq();
|
||||
|
||||
dec_preempt_count();
|
||||
preempt_check_resched();
|
||||
}
|
||||
EXPORT_SYMBOL(local_bh_enable);
|
||||
|
||||
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
|
||||
# define invoke_softirq() __do_softirq()
|
||||
#else
|
||||
# define invoke_softirq() do_softirq()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Exit an interrupt context. Process softirqs if needed and possible:
|
||||
*/
|
||||
void irq_exit(void)
|
||||
{
|
||||
account_system_vtime(current);
|
||||
sub_preempt_count(IRQ_EXIT_OFFSET);
|
||||
if (!in_interrupt() && local_softirq_pending())
|
||||
invoke_softirq();
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* This function must run with irqs disabled!
|
||||
*/
|
||||
inline fastcall void raise_softirq_irqoff(unsigned int nr)
|
||||
{
|
||||
__raise_softirq_irqoff(nr);
|
||||
|
||||
/*
|
||||
* If we're in an interrupt or softirq, we're done
|
||||
* (this also catches softirq-disabled code). We will
|
||||
* actually run the softirq once we return from
|
||||
* the irq or softirq.
|
||||
*
|
||||
* Otherwise we wake up ksoftirqd to make sure we
|
||||
* schedule the softirq soon.
|
||||
*/
|
||||
if (!in_interrupt())
|
||||
wakeup_softirqd();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(raise_softirq_irqoff);
|
||||
|
||||
void fastcall raise_softirq(unsigned int nr)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
raise_softirq_irqoff(nr);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
|
||||
{
|
||||
softirq_vec[nr].data = data;
|
||||
softirq_vec[nr].action = action;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(open_softirq);
|
||||
|
||||
/* Tasklets */
|
||||
struct tasklet_head
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
};
|
||||
|
||||
/* Some compilers disobey section attribute on statics when not
|
||||
initialized -- RR */
|
||||
static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
|
||||
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
|
||||
|
||||
void fastcall __tasklet_schedule(struct tasklet_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_schedule);
|
||||
|
||||
void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule);
|
||||
|
||||
static void tasklet_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = NULL;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
struct tasklet_struct *t = list;
|
||||
|
||||
list = list->next;
|
||||
|
||||
if (tasklet_trylock(t)) {
|
||||
if (!atomic_read(&t->count)) {
|
||||
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
BUG();
|
||||
t->func(t->data);
|
||||
tasklet_unlock(t);
|
||||
continue;
|
||||
}
|
||||
tasklet_unlock(t);
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_vec).list;
|
||||
__get_cpu_var(tasklet_vec).list = t;
|
||||
__raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
static void tasklet_hi_action(struct softirq_action *a)
|
||||
{
|
||||
struct tasklet_struct *list;
|
||||
|
||||
local_irq_disable();
|
||||
list = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = NULL;
|
||||
local_irq_enable();
|
||||
|
||||
while (list) {
|
||||
struct tasklet_struct *t = list;
|
||||
|
||||
list = list->next;
|
||||
|
||||
if (tasklet_trylock(t)) {
|
||||
if (!atomic_read(&t->count)) {
|
||||
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
BUG();
|
||||
t->func(t->data);
|
||||
tasklet_unlock(t);
|
||||
continue;
|
||||
}
|
||||
tasklet_unlock(t);
|
||||
}
|
||||
|
||||
local_irq_disable();
|
||||
t->next = __get_cpu_var(tasklet_hi_vec).list;
|
||||
__get_cpu_var(tasklet_hi_vec).list = t;
|
||||
__raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void tasklet_init(struct tasklet_struct *t,
|
||||
void (*func)(unsigned long), unsigned long data)
|
||||
{
|
||||
t->next = NULL;
|
||||
t->state = 0;
|
||||
atomic_set(&t->count, 0);
|
||||
t->func = func;
|
||||
t->data = data;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(tasklet_init);
|
||||
|
||||
void tasklet_kill(struct tasklet_struct *t)
|
||||
{
|
||||
if (in_interrupt())
|
||||
printk("Attempt to kill tasklet from interrupt\n");
|
||||
|
||||
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
|
||||
do
|
||||
yield();
|
||||
while (test_bit(TASKLET_STATE_SCHED, &t->state));
|
||||
}
|
||||
tasklet_unlock_wait(t);
|
||||
clear_bit(TASKLET_STATE_SCHED, &t->state);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
void __init softirq_init(void)
|
||||
{
|
||||
open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
|
||||
open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
|
||||
}
|
||||
|
||||
static int ksoftirqd(void * __bind_cpu)
|
||||
{
|
||||
set_user_nice(current, 19);
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
preempt_disable();
|
||||
if (!local_softirq_pending()) {
|
||||
preempt_enable_no_resched();
|
||||
schedule();
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
while (local_softirq_pending()) {
|
||||
/* Preempt disable stops cpu going offline.
|
||||
If already offline, we'll be on wrong CPU:
|
||||
don't process */
|
||||
if (cpu_is_offline((long)__bind_cpu))
|
||||
goto wait_to_die;
|
||||
do_softirq();
|
||||
preempt_enable_no_resched();
|
||||
cond_resched();
|
||||
preempt_disable();
|
||||
}
|
||||
preempt_enable();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
|
||||
wait_to_die:
|
||||
preempt_enable();
|
||||
/* Wait for kthread_stop */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
schedule();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* tasklet_kill_immediate is called to remove a tasklet which can already be
|
||||
* scheduled for execution on @cpu.
|
||||
*
|
||||
* Unlike tasklet_kill, this function removes the tasklet
|
||||
* _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
|
||||
*
|
||||
* When this function is called, @cpu must be in the CPU_DEAD state.
|
||||
*/
|
||||
void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
|
||||
{
|
||||
struct tasklet_struct **i;
|
||||
|
||||
BUG_ON(cpu_online(cpu));
|
||||
BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
|
||||
|
||||
if (!test_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
return;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
|
||||
if (*i == t) {
|
||||
*i = t->next;
|
||||
return;
|
||||
}
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void takeover_tasklets(unsigned int cpu)
|
||||
{
|
||||
struct tasklet_struct **i;
|
||||
|
||||
/* CPU is dead, so no lock needed. */
|
||||
local_irq_disable();
|
||||
|
||||
/* Find end, append list for that CPU. */
|
||||
for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_vec, cpu).list;
|
||||
per_cpu(tasklet_vec, cpu).list = NULL;
|
||||
raise_softirq_irqoff(TASKLET_SOFTIRQ);
|
||||
|
||||
for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
|
||||
*i = per_cpu(tasklet_hi_vec, cpu).list;
|
||||
per_cpu(tasklet_hi_vec, cpu).list = NULL;
|
||||
raise_softirq_irqoff(HI_SOFTIRQ);
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static int __devinit cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
int hotcpu = (unsigned long)hcpu;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
|
||||
BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
|
||||
p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
|
||||
if (IS_ERR(p)) {
|
||||
printk("ksoftirqd for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
kthread_bind(p, hotcpu);
|
||||
per_cpu(ksoftirqd, hotcpu) = p;
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
wake_up_process(per_cpu(ksoftirqd, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
|
||||
case CPU_DEAD:
|
||||
p = per_cpu(ksoftirqd, hotcpu);
|
||||
per_cpu(ksoftirqd, hotcpu) = NULL;
|
||||
kthread_stop(p);
|
||||
takeover_tasklets(hotcpu);
|
||||
break;
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __devinitdata cpu_nfb = {
|
||||
.notifier_call = cpu_callback
|
||||
};
|
||||
|
||||
__init int spawn_ksoftirqd(void)
|
||||
{
|
||||
void *cpu = (void *)(long)smp_processor_id();
|
||||
cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
|
||||
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
|
||||
register_cpu_notifier(&cpu_nfb);
|
||||
return 0;
|
||||
}
|
||||
371
kernel/spinlock.c
Normal file
371
kernel/spinlock.c
Normal file
@@ -0,0 +1,371 @@
|
||||
/*
|
||||
* Copyright (2004) Linus Torvalds
|
||||
*
|
||||
* Author: Zwane Mwaikambo <zwane@fsmlabs.com>
|
||||
*
|
||||
* Copyright (2004) Ingo Molnar
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* Generic declaration of the raw read_trylock() function,
|
||||
* architectures are supposed to optimize this:
|
||||
*/
|
||||
int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
|
||||
{
|
||||
_raw_read_lock(lock);
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_raw_read_trylock);
|
||||
|
||||
int __lockfunc _spin_trylock(spinlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_spin_trylock(lock))
|
||||
return 1;
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_trylock);
|
||||
|
||||
int __lockfunc _read_trylock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_read_trylock(lock))
|
||||
return 1;
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_read_trylock);
|
||||
|
||||
int __lockfunc _write_trylock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
if (_raw_write_trylock(lock))
|
||||
return 1;
|
||||
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_write_trylock);
|
||||
|
||||
#ifndef CONFIG_PREEMPT
|
||||
|
||||
void __lockfunc _read_lock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock);
|
||||
|
||||
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
_raw_spin_lock_flags(lock, flags);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_irqsave);
|
||||
|
||||
void __lockfunc _spin_lock_irq(spinlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_irq);
|
||||
|
||||
void __lockfunc _spin_lock_bh(spinlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_lock_bh);
|
||||
|
||||
unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
_raw_read_lock(lock);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_irqsave);
|
||||
|
||||
void __lockfunc _read_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_irq);
|
||||
|
||||
void __lockfunc _read_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
_raw_read_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_read_lock_bh);
|
||||
|
||||
unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
preempt_disable();
|
||||
_raw_write_lock(lock);
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_irqsave);
|
||||
|
||||
void __lockfunc _write_lock_irq(rwlock_t *lock)
|
||||
{
|
||||
local_irq_disable();
|
||||
preempt_disable();
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_irq);
|
||||
|
||||
void __lockfunc _write_lock_bh(rwlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL(_write_lock_bh);
|
||||
|
||||
void __lockfunc _spin_lock(spinlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
_raw_spin_lock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_spin_lock);
|
||||
|
||||
void __lockfunc _write_lock(rwlock_t *lock)
|
||||
{
|
||||
preempt_disable();
|
||||
_raw_write_lock(lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_write_lock);
|
||||
|
||||
#else /* CONFIG_PREEMPT: */
|
||||
|
||||
/*
|
||||
* This could be a long-held lock. We both prepare to spin for a long
|
||||
* time (making _this_ CPU preemptable if possible), and we also signal
|
||||
* towards that other CPU that it should break the lock ASAP.
|
||||
*
|
||||
* (We do this in a function because inlining it would be excessive.)
|
||||
*/
|
||||
|
||||
#define BUILD_LOCK_OPS(op, locktype) \
|
||||
void __lockfunc _##op##_lock(locktype##_t *lock) \
|
||||
{ \
|
||||
preempt_disable(); \
|
||||
for (;;) { \
|
||||
if (likely(_raw_##op##_trylock(lock))) \
|
||||
break; \
|
||||
preempt_enable(); \
|
||||
if (!(lock)->break_lock) \
|
||||
(lock)->break_lock = 1; \
|
||||
while (!op##_can_lock(lock) && (lock)->break_lock) \
|
||||
cpu_relax(); \
|
||||
preempt_disable(); \
|
||||
} \
|
||||
(lock)->break_lock = 0; \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock); \
|
||||
\
|
||||
unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
preempt_disable(); \
|
||||
for (;;) { \
|
||||
local_irq_save(flags); \
|
||||
if (likely(_raw_##op##_trylock(lock))) \
|
||||
break; \
|
||||
local_irq_restore(flags); \
|
||||
\
|
||||
preempt_enable(); \
|
||||
if (!(lock)->break_lock) \
|
||||
(lock)->break_lock = 1; \
|
||||
while (!op##_can_lock(lock) && (lock)->break_lock) \
|
||||
cpu_relax(); \
|
||||
preempt_disable(); \
|
||||
} \
|
||||
(lock)->break_lock = 0; \
|
||||
return flags; \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_irqsave); \
|
||||
\
|
||||
void __lockfunc _##op##_lock_irq(locktype##_t *lock) \
|
||||
{ \
|
||||
_##op##_lock_irqsave(lock); \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_irq); \
|
||||
\
|
||||
void __lockfunc _##op##_lock_bh(locktype##_t *lock) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
/* */ \
|
||||
/* Careful: we must exclude softirqs too, hence the */ \
|
||||
/* irq-disabling. We use the generic preemption-aware */ \
|
||||
/* function: */ \
|
||||
/**/ \
|
||||
flags = _##op##_lock_irqsave(lock); \
|
||||
local_bh_disable(); \
|
||||
local_irq_restore(flags); \
|
||||
} \
|
||||
\
|
||||
EXPORT_SYMBOL(_##op##_lock_bh)
|
||||
|
||||
/*
|
||||
* Build preemption-friendly versions of the following
|
||||
* lock-spinning functions:
|
||||
*
|
||||
* _[spin|read|write]_lock()
|
||||
* _[spin|read|write]_lock_irq()
|
||||
* _[spin|read|write]_lock_irqsave()
|
||||
* _[spin|read|write]_lock_bh()
|
||||
*/
|
||||
BUILD_LOCK_OPS(spin, spinlock);
|
||||
BUILD_LOCK_OPS(read, rwlock);
|
||||
BUILD_LOCK_OPS(write, rwlock);
|
||||
|
||||
#endif /* CONFIG_PREEMPT */
|
||||
|
||||
void __lockfunc _spin_unlock(spinlock_t *lock)
|
||||
{
|
||||
_raw_spin_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock);
|
||||
|
||||
void __lockfunc _write_unlock(rwlock_t *lock)
|
||||
{
|
||||
_raw_write_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock);
|
||||
|
||||
void __lockfunc _read_unlock(rwlock_t *lock)
|
||||
{
|
||||
_raw_read_unlock(lock);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock);
|
||||
|
||||
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
_raw_spin_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _spin_unlock_irq(spinlock_t *lock)
|
||||
{
|
||||
_raw_spin_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_irq);
|
||||
|
||||
void __lockfunc _spin_unlock_bh(spinlock_t *lock)
|
||||
{
|
||||
_raw_spin_unlock(lock);
|
||||
preempt_enable();
|
||||
local_bh_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_unlock_bh);
|
||||
|
||||
void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
_raw_read_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _read_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
_raw_read_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_irq);
|
||||
|
||||
void __lockfunc _read_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
_raw_read_unlock(lock);
|
||||
preempt_enable();
|
||||
local_bh_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_read_unlock_bh);
|
||||
|
||||
void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
|
||||
{
|
||||
_raw_write_unlock(lock);
|
||||
local_irq_restore(flags);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_irqrestore);
|
||||
|
||||
void __lockfunc _write_unlock_irq(rwlock_t *lock)
|
||||
{
|
||||
_raw_write_unlock(lock);
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_irq);
|
||||
|
||||
void __lockfunc _write_unlock_bh(rwlock_t *lock)
|
||||
{
|
||||
_raw_write_unlock(lock);
|
||||
preempt_enable();
|
||||
local_bh_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(_write_unlock_bh);
|
||||
|
||||
int __lockfunc _spin_trylock_bh(spinlock_t *lock)
|
||||
{
|
||||
local_bh_disable();
|
||||
preempt_disable();
|
||||
if (_raw_spin_trylock(lock))
|
||||
return 1;
|
||||
|
||||
preempt_enable();
|
||||
local_bh_enable();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(_spin_trylock_bh);
|
||||
|
||||
int in_lock_functions(unsigned long addr)
|
||||
{
|
||||
/* Linker adds these: start and end of __lockfunc functions */
|
||||
extern char __lock_text_start[], __lock_text_end[];
|
||||
|
||||
return addr >= (unsigned long)__lock_text_start
|
||||
&& addr < (unsigned long)__lock_text_end;
|
||||
}
|
||||
EXPORT_SYMBOL(in_lock_functions);
|
||||
212
kernel/stop_machine.c
Normal file
212
kernel/stop_machine.c
Normal file
@@ -0,0 +1,212 @@
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/semaphore.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* Since we effect priority and affinity (both of which are visible
|
||||
* to, and settable by outside processes) we do indirection via a
|
||||
* kthread. */
|
||||
|
||||
/* Thread to stop each CPU in user context. */
|
||||
enum stopmachine_state {
|
||||
STOPMACHINE_WAIT,
|
||||
STOPMACHINE_PREPARE,
|
||||
STOPMACHINE_DISABLE_IRQ,
|
||||
STOPMACHINE_EXIT,
|
||||
};
|
||||
|
||||
static enum stopmachine_state stopmachine_state;
|
||||
static unsigned int stopmachine_num_threads;
|
||||
static atomic_t stopmachine_thread_ack;
|
||||
static DECLARE_MUTEX(stopmachine_mutex);
|
||||
|
||||
static int stopmachine(void *cpu)
|
||||
{
|
||||
int irqs_disabled = 0;
|
||||
int prepared = 0;
|
||||
|
||||
set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
|
||||
|
||||
/* Ack: we are alive */
|
||||
mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
|
||||
/* Simple state machine */
|
||||
while (stopmachine_state != STOPMACHINE_EXIT) {
|
||||
if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
|
||||
&& !irqs_disabled) {
|
||||
local_irq_disable();
|
||||
irqs_disabled = 1;
|
||||
/* Ack: irqs disabled. */
|
||||
mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
} else if (stopmachine_state == STOPMACHINE_PREPARE
|
||||
&& !prepared) {
|
||||
/* Everyone is in place, hold CPU. */
|
||||
preempt_disable();
|
||||
prepared = 1;
|
||||
mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
}
|
||||
/* Yield in first stage: migration threads need to
|
||||
* help our sisters onto their CPUs. */
|
||||
if (!prepared && !irqs_disabled)
|
||||
yield();
|
||||
else
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* Ack: we are exiting. */
|
||||
mb(); /* Must read state first. */
|
||||
atomic_inc(&stopmachine_thread_ack);
|
||||
|
||||
if (irqs_disabled)
|
||||
local_irq_enable();
|
||||
if (prepared)
|
||||
preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Change the thread state */
|
||||
static void stopmachine_set_state(enum stopmachine_state state)
|
||||
{
|
||||
atomic_set(&stopmachine_thread_ack, 0);
|
||||
wmb();
|
||||
stopmachine_state = state;
|
||||
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
static int stop_machine(void)
|
||||
{
|
||||
int i, ret = 0;
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
|
||||
mm_segment_t old_fs = get_fs();
|
||||
|
||||
/* One high-prio thread per cpu. We'll do this one. */
|
||||
set_fs(KERNEL_DS);
|
||||
sys_sched_setscheduler(current->pid, SCHED_FIFO,
|
||||
(struct sched_param __user *)¶m);
|
||||
set_fs(old_fs);
|
||||
|
||||
atomic_set(&stopmachine_thread_ack, 0);
|
||||
stopmachine_num_threads = 0;
|
||||
stopmachine_state = STOPMACHINE_WAIT;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == _smp_processor_id())
|
||||
continue;
|
||||
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
|
||||
if (ret < 0)
|
||||
break;
|
||||
stopmachine_num_threads++;
|
||||
}
|
||||
|
||||
/* Wait for them all to come to life. */
|
||||
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
|
||||
yield();
|
||||
|
||||
/* If some failed, kill them all. */
|
||||
if (ret < 0) {
|
||||
stopmachine_set_state(STOPMACHINE_EXIT);
|
||||
up(&stopmachine_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Don't schedule us away at this point, please. */
|
||||
local_irq_disable();
|
||||
|
||||
/* Now they are all started, make them hold the CPUs, ready. */
|
||||
stopmachine_set_state(STOPMACHINE_PREPARE);
|
||||
|
||||
/* Make them disable irqs. */
|
||||
stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void restart_machine(void)
|
||||
{
|
||||
stopmachine_set_state(STOPMACHINE_EXIT);
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
struct stop_machine_data
|
||||
{
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
static int do_stop(void *_smdata)
|
||||
{
|
||||
struct stop_machine_data *smdata = _smdata;
|
||||
int ret;
|
||||
|
||||
ret = stop_machine();
|
||||
if (ret == 0) {
|
||||
ret = smdata->fn(smdata->data);
|
||||
restart_machine();
|
||||
}
|
||||
|
||||
/* We're done: you can kthread_stop us now */
|
||||
complete(&smdata->done);
|
||||
|
||||
/* Wait for kthread_stop */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
schedule();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
|
||||
unsigned int cpu)
|
||||
{
|
||||
struct stop_machine_data smdata;
|
||||
struct task_struct *p;
|
||||
|
||||
smdata.fn = fn;
|
||||
smdata.data = data;
|
||||
init_completion(&smdata.done);
|
||||
|
||||
down(&stopmachine_mutex);
|
||||
|
||||
/* If they don't care which CPU fn runs on, bind to any online one. */
|
||||
if (cpu == NR_CPUS)
|
||||
cpu = _smp_processor_id();
|
||||
|
||||
p = kthread_create(do_stop, &smdata, "kstopmachine");
|
||||
if (!IS_ERR(p)) {
|
||||
kthread_bind(p, cpu);
|
||||
wake_up_process(p);
|
||||
wait_for_completion(&smdata.done);
|
||||
}
|
||||
up(&stopmachine_mutex);
|
||||
return p;
|
||||
}
|
||||
|
||||
int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
int ret;
|
||||
|
||||
/* No CPUs can come up or down during this. */
|
||||
lock_cpu_hotplug();
|
||||
p = __stop_machine_run(fn, data, cpu);
|
||||
if (!IS_ERR(p))
|
||||
ret = kthread_stop(p);
|
||||
else
|
||||
ret = PTR_ERR(p);
|
||||
unlock_cpu_hotplug();
|
||||
|
||||
return ret;
|
||||
}
|
||||
1725
kernel/sys.c
Normal file
1725
kernel/sys.c
Normal file
File diff suppressed because it is too large
Load Diff
86
kernel/sys_ni.c
Normal file
86
kernel/sys_ni.c
Normal file
@@ -0,0 +1,86 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* Non-implemented system calls get redirected here.
|
||||
*/
|
||||
asmlinkage long sys_ni_syscall(void)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
cond_syscall(sys_nfsservctl);
|
||||
cond_syscall(sys_quotactl);
|
||||
cond_syscall(sys_acct);
|
||||
cond_syscall(sys_lookup_dcookie);
|
||||
cond_syscall(sys_swapon);
|
||||
cond_syscall(sys_swapoff);
|
||||
cond_syscall(sys_init_module);
|
||||
cond_syscall(sys_delete_module);
|
||||
cond_syscall(sys_socketpair);
|
||||
cond_syscall(sys_bind);
|
||||
cond_syscall(sys_listen);
|
||||
cond_syscall(sys_accept);
|
||||
cond_syscall(sys_connect);
|
||||
cond_syscall(sys_getsockname);
|
||||
cond_syscall(sys_getpeername);
|
||||
cond_syscall(sys_sendto);
|
||||
cond_syscall(sys_send);
|
||||
cond_syscall(sys_recvfrom);
|
||||
cond_syscall(sys_recv);
|
||||
cond_syscall(sys_socket);
|
||||
cond_syscall(sys_setsockopt);
|
||||
cond_syscall(sys_getsockopt);
|
||||
cond_syscall(sys_shutdown);
|
||||
cond_syscall(sys_sendmsg);
|
||||
cond_syscall(sys_recvmsg);
|
||||
cond_syscall(sys_socketcall);
|
||||
cond_syscall(sys_futex);
|
||||
cond_syscall(compat_sys_futex);
|
||||
cond_syscall(sys_epoll_create);
|
||||
cond_syscall(sys_epoll_ctl);
|
||||
cond_syscall(sys_epoll_wait);
|
||||
cond_syscall(sys_semget);
|
||||
cond_syscall(sys_semop);
|
||||
cond_syscall(sys_semtimedop);
|
||||
cond_syscall(sys_semctl);
|
||||
cond_syscall(sys_msgget);
|
||||
cond_syscall(sys_msgsnd);
|
||||
cond_syscall(sys_msgrcv);
|
||||
cond_syscall(sys_msgctl);
|
||||
cond_syscall(sys_shmget);
|
||||
cond_syscall(sys_shmdt);
|
||||
cond_syscall(sys_shmctl);
|
||||
cond_syscall(sys_mq_open);
|
||||
cond_syscall(sys_mq_unlink);
|
||||
cond_syscall(sys_mq_timedsend);
|
||||
cond_syscall(sys_mq_timedreceive);
|
||||
cond_syscall(sys_mq_notify);
|
||||
cond_syscall(sys_mq_getsetattr);
|
||||
cond_syscall(compat_sys_mq_open);
|
||||
cond_syscall(compat_sys_mq_timedsend);
|
||||
cond_syscall(compat_sys_mq_timedreceive);
|
||||
cond_syscall(compat_sys_mq_notify);
|
||||
cond_syscall(compat_sys_mq_getsetattr);
|
||||
cond_syscall(sys_mbind);
|
||||
cond_syscall(sys_get_mempolicy);
|
||||
cond_syscall(sys_set_mempolicy);
|
||||
cond_syscall(compat_sys_mbind);
|
||||
cond_syscall(compat_sys_get_mempolicy);
|
||||
cond_syscall(compat_sys_set_mempolicy);
|
||||
cond_syscall(sys_add_key);
|
||||
cond_syscall(sys_request_key);
|
||||
cond_syscall(sys_keyctl);
|
||||
cond_syscall(compat_sys_keyctl);
|
||||
cond_syscall(compat_sys_socketcall);
|
||||
|
||||
/* arch-specific weak syscall entries */
|
||||
cond_syscall(sys_pciconfig_read);
|
||||
cond_syscall(sys_pciconfig_write);
|
||||
cond_syscall(sys_pciconfig_iobase);
|
||||
cond_syscall(sys32_ipc);
|
||||
cond_syscall(sys32_sysctl);
|
||||
cond_syscall(ppc_rtas);
|
||||
2337
kernel/sysctl.c
Normal file
2337
kernel/sysctl.c
Normal file
File diff suppressed because it is too large
Load Diff
599
kernel/time.c
Normal file
599
kernel/time.c
Normal file
@@ -0,0 +1,599 @@
|
||||
/*
|
||||
* linux/kernel/time.c
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*
|
||||
* This file contains the interface functions for the various
|
||||
* time related system calls: time, stime, gettimeofday, settimeofday,
|
||||
* adjtime
|
||||
*/
|
||||
/*
|
||||
* Modification history kernel/time.c
|
||||
*
|
||||
* 1993-09-02 Philip Gladstone
|
||||
* Created file with time related functions from sched.c and adjtimex()
|
||||
* 1993-10-08 Torsten Duwe
|
||||
* adjtime interface update and CMOS clock write code
|
||||
* 1995-08-13 Torsten Duwe
|
||||
* kernel PLL updated to 1994-12-13 specs (rfc-1589)
|
||||
* 1999-01-16 Ulrich Windl
|
||||
* Introduced error checking for many cases in adjtimex().
|
||||
* Updated NTP code according to technical memorandum Jan '96
|
||||
* "A Kernel Model for Precision Timekeeping" by Dave Mills
|
||||
* Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
|
||||
* (Even though the technical memorandum forbids it)
|
||||
* 2004-07-14 Christoph Lameter
|
||||
* Added getnstimeofday to allow the posix timer functions to return
|
||||
* with nanosecond accuracy
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
/*
|
||||
* The timezone where the local system is located. Used as a default by some
|
||||
* programs who obtain this value by using gettimeofday.
|
||||
*/
|
||||
struct timezone sys_tz;
|
||||
|
||||
EXPORT_SYMBOL(sys_tz);
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_TIME
|
||||
|
||||
/*
|
||||
* sys_time() can be implemented in user-level using
|
||||
* sys_gettimeofday(). Is this for backwards compatibility? If so,
|
||||
* why not move it into the appropriate arch directory (for those
|
||||
* architectures that need it).
|
||||
*/
|
||||
asmlinkage long sys_time(time_t __user * tloc)
|
||||
{
|
||||
time_t i;
|
||||
struct timeval tv;
|
||||
|
||||
do_gettimeofday(&tv);
|
||||
i = tv.tv_sec;
|
||||
|
||||
if (tloc) {
|
||||
if (put_user(i,tloc))
|
||||
i = -EFAULT;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_stime() can be implemented in user-level using
|
||||
* sys_settimeofday(). Is this for backwards compatibility? If so,
|
||||
* why not move it into the appropriate arch directory (for those
|
||||
* architectures that need it).
|
||||
*/
|
||||
|
||||
asmlinkage long sys_stime(time_t __user *tptr)
|
||||
{
|
||||
struct timespec tv;
|
||||
int err;
|
||||
|
||||
if (get_user(tv.tv_sec, tptr))
|
||||
return -EFAULT;
|
||||
|
||||
tv.tv_nsec = 0;
|
||||
|
||||
err = security_settime(&tv, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
do_settimeofday(&tv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* __ARCH_WANT_SYS_TIME */
|
||||
|
||||
asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz)
|
||||
{
|
||||
if (likely(tv != NULL)) {
|
||||
struct timeval ktv;
|
||||
do_gettimeofday(&ktv);
|
||||
if (copy_to_user(tv, &ktv, sizeof(ktv)))
|
||||
return -EFAULT;
|
||||
}
|
||||
if (unlikely(tz != NULL)) {
|
||||
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the time obtained from the CMOS to be UTC time instead of
|
||||
* local time.
|
||||
*
|
||||
* This is ugly, but preferable to the alternatives. Otherwise we
|
||||
* would either need to write a program to do it in /etc/rc (and risk
|
||||
* confusion if the program gets run more than once; it would also be
|
||||
* hard to make the program warp the clock precisely n hours) or
|
||||
* compile in the timezone information into the kernel. Bad, bad....
|
||||
*
|
||||
* - TYT, 1992-01-01
|
||||
*
|
||||
* The best thing to do is to keep the CMOS clock in universal time (UTC)
|
||||
* as real UNIX machines always do it. This avoids all headaches about
|
||||
* daylight saving times and warping kernel clocks.
|
||||
*/
|
||||
inline static void warp_clock(void)
|
||||
{
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
|
||||
xtime.tv_sec += sys_tz.tz_minuteswest * 60;
|
||||
time_interpolator_reset();
|
||||
write_sequnlock_irq(&xtime_lock);
|
||||
clock_was_set();
|
||||
}
|
||||
|
||||
/*
|
||||
* In case for some reason the CMOS clock has not already been running
|
||||
* in UTC, but in some local time: The first time we set the timezone,
|
||||
* we will warp the clock so that it is ticking UTC time instead of
|
||||
* local time. Presumably, if someone is setting the timezone then we
|
||||
* are running in an environment where the programs understand about
|
||||
* timezones. This should be done at boot time in the /etc/rc script,
|
||||
* as soon as possible, so that the clock can be set right. Otherwise,
|
||||
* various programs will get confused when the clock gets warped.
|
||||
*/
|
||||
|
||||
int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
|
||||
{
|
||||
static int firsttime = 1;
|
||||
int error = 0;
|
||||
|
||||
error = security_settime(tv, tz);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (tz) {
|
||||
/* SMP safe, global irq locking makes it work. */
|
||||
sys_tz = *tz;
|
||||
if (firsttime) {
|
||||
firsttime = 0;
|
||||
if (!tv)
|
||||
warp_clock();
|
||||
}
|
||||
}
|
||||
if (tv)
|
||||
{
|
||||
/* SMP safe, again the code in arch/foo/time.c should
|
||||
* globally block out interrupts when it runs.
|
||||
*/
|
||||
return do_settimeofday(tv);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_settimeofday(struct timeval __user *tv,
|
||||
struct timezone __user *tz)
|
||||
{
|
||||
struct timeval user_tv;
|
||||
struct timespec new_ts;
|
||||
struct timezone new_tz;
|
||||
|
||||
if (tv) {
|
||||
if (copy_from_user(&user_tv, tv, sizeof(*tv)))
|
||||
return -EFAULT;
|
||||
new_ts.tv_sec = user_tv.tv_sec;
|
||||
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
|
||||
}
|
||||
if (tz) {
|
||||
if (copy_from_user(&new_tz, tz, sizeof(*tz)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
|
||||
}
|
||||
|
||||
long pps_offset; /* pps time offset (us) */
|
||||
long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
|
||||
|
||||
long pps_freq; /* frequency offset (scaled ppm) */
|
||||
long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
|
||||
|
||||
long pps_valid = PPS_VALID; /* pps signal watchdog counter */
|
||||
|
||||
int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
|
||||
|
||||
long pps_jitcnt; /* jitter limit exceeded */
|
||||
long pps_calcnt; /* calibration intervals */
|
||||
long pps_errcnt; /* calibration errors */
|
||||
long pps_stbcnt; /* stability limit exceeded */
|
||||
|
||||
/* hook for a loadable hardpps kernel module */
|
||||
void (*hardpps_ptr)(struct timeval *);
|
||||
|
||||
/* we call this to notify the arch when the clock is being
|
||||
* controlled. If no such arch routine, do nothing.
|
||||
*/
|
||||
void __attribute__ ((weak)) notify_arch_cmos_timer(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* adjtimex mainly allows reading (and writing, if superuser) of
|
||||
* kernel time-keeping variables. used by xntpd.
|
||||
*/
|
||||
int do_adjtimex(struct timex *txc)
|
||||
{
|
||||
long ltemp, mtemp, save_adjust;
|
||||
int result;
|
||||
|
||||
/* In order to modify anything, you gotta be super-user! */
|
||||
if (txc->modes && !capable(CAP_SYS_TIME))
|
||||
return -EPERM;
|
||||
|
||||
/* Now we validate the data before disabling interrupts */
|
||||
|
||||
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
|
||||
/* singleshot must not be used with any other mode bits */
|
||||
if (txc->modes != ADJ_OFFSET_SINGLESHOT)
|
||||
return -EINVAL;
|
||||
|
||||
if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
|
||||
/* adjustment Offset limited to +- .512 seconds */
|
||||
if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
|
||||
return -EINVAL;
|
||||
|
||||
/* if the quartz is off by more than 10% something is VERY wrong ! */
|
||||
if (txc->modes & ADJ_TICK)
|
||||
if (txc->tick < 900000/USER_HZ ||
|
||||
txc->tick > 1100000/USER_HZ)
|
||||
return -EINVAL;
|
||||
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
result = time_state; /* mostly `TIME_OK' */
|
||||
|
||||
/* Save for later - semantics of adjtime is to return old value */
|
||||
save_adjust = time_next_adjust ? time_next_adjust : time_adjust;
|
||||
|
||||
#if 0 /* STA_CLOCKERR is never set yet */
|
||||
time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
|
||||
#endif
|
||||
/* If there are input parameters, then process them */
|
||||
if (txc->modes)
|
||||
{
|
||||
if (txc->modes & ADJ_STATUS) /* only set allowed bits */
|
||||
time_status = (txc->status & ~STA_RONLY) |
|
||||
(time_status & STA_RONLY);
|
||||
|
||||
if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */
|
||||
if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
|
||||
result = -EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
time_freq = txc->freq - pps_freq;
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_MAXERROR) {
|
||||
if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
|
||||
result = -EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
time_maxerror = txc->maxerror;
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_ESTERROR) {
|
||||
if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
|
||||
result = -EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
time_esterror = txc->esterror;
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_TIMECONST) { /* p. 24 */
|
||||
if (txc->constant < 0) { /* NTP v4 uses values > 6 */
|
||||
result = -EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
time_constant = txc->constant;
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_OFFSET) { /* values checked earlier */
|
||||
if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
|
||||
/* adjtime() is independent from ntp_adjtime() */
|
||||
if ((time_next_adjust = txc->offset) == 0)
|
||||
time_adjust = 0;
|
||||
}
|
||||
else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
|
||||
ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
|
||||
(STA_PPSTIME | STA_PPSSIGNAL) ?
|
||||
pps_offset : txc->offset;
|
||||
|
||||
/*
|
||||
* Scale the phase adjustment and
|
||||
* clamp to the operating range.
|
||||
*/
|
||||
if (ltemp > MAXPHASE)
|
||||
time_offset = MAXPHASE << SHIFT_UPDATE;
|
||||
else if (ltemp < -MAXPHASE)
|
||||
time_offset = -(MAXPHASE << SHIFT_UPDATE);
|
||||
else
|
||||
time_offset = ltemp << SHIFT_UPDATE;
|
||||
|
||||
/*
|
||||
* Select whether the frequency is to be controlled
|
||||
* and in which mode (PLL or FLL). Clamp to the operating
|
||||
* range. Ugly multiply/divide should be replaced someday.
|
||||
*/
|
||||
|
||||
if (time_status & STA_FREQHOLD || time_reftime == 0)
|
||||
time_reftime = xtime.tv_sec;
|
||||
mtemp = xtime.tv_sec - time_reftime;
|
||||
time_reftime = xtime.tv_sec;
|
||||
if (time_status & STA_FLL) {
|
||||
if (mtemp >= MINSEC) {
|
||||
ltemp = (time_offset / mtemp) << (SHIFT_USEC -
|
||||
SHIFT_UPDATE);
|
||||
if (ltemp < 0)
|
||||
time_freq -= -ltemp >> SHIFT_KH;
|
||||
else
|
||||
time_freq += ltemp >> SHIFT_KH;
|
||||
} else /* calibration interval too short (p. 12) */
|
||||
result = TIME_ERROR;
|
||||
} else { /* PLL mode */
|
||||
if (mtemp < MAXSEC) {
|
||||
ltemp *= mtemp;
|
||||
if (ltemp < 0)
|
||||
time_freq -= -ltemp >> (time_constant +
|
||||
time_constant +
|
||||
SHIFT_KF - SHIFT_USEC);
|
||||
else
|
||||
time_freq += ltemp >> (time_constant +
|
||||
time_constant +
|
||||
SHIFT_KF - SHIFT_USEC);
|
||||
} else /* calibration interval too long (p. 12) */
|
||||
result = TIME_ERROR;
|
||||
}
|
||||
if (time_freq > time_tolerance)
|
||||
time_freq = time_tolerance;
|
||||
else if (time_freq < -time_tolerance)
|
||||
time_freq = -time_tolerance;
|
||||
} /* STA_PLL || STA_PPSTIME */
|
||||
} /* txc->modes & ADJ_OFFSET */
|
||||
if (txc->modes & ADJ_TICK) {
|
||||
tick_usec = txc->tick;
|
||||
tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
|
||||
}
|
||||
} /* txc->modes */
|
||||
leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
|
||||
|| ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
|
||||
&& (time_status & STA_PPSSIGNAL) == 0)
|
||||
/* p. 24, (b) */
|
||||
|| ((time_status & (STA_PPSTIME|STA_PPSJITTER))
|
||||
== (STA_PPSTIME|STA_PPSJITTER))
|
||||
/* p. 24, (c) */
|
||||
|| ((time_status & STA_PPSFREQ) != 0
|
||||
&& (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
|
||||
/* p. 24, (d) */
|
||||
result = TIME_ERROR;
|
||||
|
||||
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
|
||||
txc->offset = save_adjust;
|
||||
else {
|
||||
if (time_offset < 0)
|
||||
txc->offset = -(-time_offset >> SHIFT_UPDATE);
|
||||
else
|
||||
txc->offset = time_offset >> SHIFT_UPDATE;
|
||||
}
|
||||
txc->freq = time_freq + pps_freq;
|
||||
txc->maxerror = time_maxerror;
|
||||
txc->esterror = time_esterror;
|
||||
txc->status = time_status;
|
||||
txc->constant = time_constant;
|
||||
txc->precision = time_precision;
|
||||
txc->tolerance = time_tolerance;
|
||||
txc->tick = tick_usec;
|
||||
txc->ppsfreq = pps_freq;
|
||||
txc->jitter = pps_jitter >> PPS_AVG;
|
||||
txc->shift = pps_shift;
|
||||
txc->stabil = pps_stabil;
|
||||
txc->jitcnt = pps_jitcnt;
|
||||
txc->calcnt = pps_calcnt;
|
||||
txc->errcnt = pps_errcnt;
|
||||
txc->stbcnt = pps_stbcnt;
|
||||
write_sequnlock_irq(&xtime_lock);
|
||||
do_gettimeofday(&txc->time);
|
||||
notify_arch_cmos_timer();
|
||||
return(result);
|
||||
}
|
||||
|
||||
asmlinkage long sys_adjtimex(struct timex __user *txc_p)
|
||||
{
|
||||
struct timex txc; /* Local copy of parameter */
|
||||
int ret;
|
||||
|
||||
/* Copy the user data space into the kernel copy
|
||||
* structure. But bear in mind that the structures
|
||||
* may change
|
||||
*/
|
||||
if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
|
||||
return -EFAULT;
|
||||
ret = do_adjtimex(&txc);
|
||||
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
|
||||
}
|
||||
|
||||
inline struct timespec current_kernel_time(void)
|
||||
{
|
||||
struct timespec now;
|
||||
unsigned long seq;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
|
||||
now = xtime;
|
||||
} while (read_seqretry(&xtime_lock, seq));
|
||||
|
||||
return now;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(current_kernel_time);
|
||||
|
||||
/**
|
||||
* current_fs_time - Return FS time
|
||||
* @sb: Superblock.
|
||||
*
|
||||
* Return the current time truncated to the time granuality supported by
|
||||
* the fs.
|
||||
*/
|
||||
struct timespec current_fs_time(struct super_block *sb)
|
||||
{
|
||||
struct timespec now = current_kernel_time();
|
||||
return timespec_trunc(now, sb->s_time_gran);
|
||||
}
|
||||
EXPORT_SYMBOL(current_fs_time);
|
||||
|
||||
/**
|
||||
* timespec_trunc - Truncate timespec to a granuality
|
||||
* @t: Timespec
|
||||
* @gran: Granuality in ns.
|
||||
*
|
||||
* Truncate a timespec to a granuality. gran must be smaller than a second.
|
||||
* Always rounds down.
|
||||
*
|
||||
* This function should be only used for timestamps returned by
|
||||
* current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
|
||||
* it doesn't handle the better resolution of the later.
|
||||
*/
|
||||
struct timespec timespec_trunc(struct timespec t, unsigned gran)
|
||||
{
|
||||
/*
|
||||
* Division is pretty slow so avoid it for common cases.
|
||||
* Currently current_kernel_time() never returns better than
|
||||
* jiffies resolution. Exploit that.
|
||||
*/
|
||||
if (gran <= jiffies_to_usecs(1) * 1000) {
|
||||
/* nothing */
|
||||
} else if (gran == 1000000000) {
|
||||
t.tv_nsec = 0;
|
||||
} else {
|
||||
t.tv_nsec -= t.tv_nsec % gran;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
EXPORT_SYMBOL(timespec_trunc);
|
||||
|
||||
#ifdef CONFIG_TIME_INTERPOLATION
|
||||
void getnstimeofday (struct timespec *tv)
|
||||
{
|
||||
unsigned long seq,sec,nsec;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
sec = xtime.tv_sec;
|
||||
nsec = xtime.tv_nsec+time_interpolator_get_offset();
|
||||
} while (unlikely(read_seqretry(&xtime_lock, seq)));
|
||||
|
||||
while (unlikely(nsec >= NSEC_PER_SEC)) {
|
||||
nsec -= NSEC_PER_SEC;
|
||||
++sec;
|
||||
}
|
||||
tv->tv_sec = sec;
|
||||
tv->tv_nsec = nsec;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(getnstimeofday);
|
||||
|
||||
int do_settimeofday (struct timespec *tv)
|
||||
{
|
||||
time_t wtm_sec, sec = tv->tv_sec;
|
||||
long wtm_nsec, nsec = tv->tv_nsec;
|
||||
|
||||
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
|
||||
return -EINVAL;
|
||||
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
{
|
||||
/*
|
||||
* This is revolting. We need to set "xtime" correctly. However, the value
|
||||
* in this location is the value at the most recent update of wall time.
|
||||
* Discover what correction gettimeofday would have done, and then undo
|
||||
* it!
|
||||
*/
|
||||
nsec -= time_interpolator_get_offset();
|
||||
|
||||
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
|
||||
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
|
||||
|
||||
set_normalized_timespec(&xtime, sec, nsec);
|
||||
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
|
||||
|
||||
time_adjust = 0; /* stop active adjtime() */
|
||||
time_status |= STA_UNSYNC;
|
||||
time_maxerror = NTP_PHASE_LIMIT;
|
||||
time_esterror = NTP_PHASE_LIMIT;
|
||||
time_interpolator_reset();
|
||||
}
|
||||
write_sequnlock_irq(&xtime_lock);
|
||||
clock_was_set();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void do_gettimeofday (struct timeval *tv)
|
||||
{
|
||||
unsigned long seq, nsec, usec, sec, offset;
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
offset = time_interpolator_get_offset();
|
||||
sec = xtime.tv_sec;
|
||||
nsec = xtime.tv_nsec;
|
||||
} while (unlikely(read_seqretry(&xtime_lock, seq)));
|
||||
|
||||
usec = (nsec + offset) / 1000;
|
||||
|
||||
while (unlikely(usec >= USEC_PER_SEC)) {
|
||||
usec -= USEC_PER_SEC;
|
||||
++sec;
|
||||
}
|
||||
|
||||
tv->tv_sec = sec;
|
||||
tv->tv_usec = usec;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(do_gettimeofday);
|
||||
|
||||
|
||||
#else
|
||||
/*
|
||||
* Simulate gettimeofday using do_gettimeofday which only allows a timeval
|
||||
* and therefore only yields usec accuracy
|
||||
*/
|
||||
void getnstimeofday(struct timespec *tv)
|
||||
{
|
||||
struct timeval x;
|
||||
|
||||
do_gettimeofday(&x);
|
||||
tv->tv_sec = x.tv_sec;
|
||||
tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (BITS_PER_LONG < 64)
|
||||
u64 get_jiffies_64(void)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
ret = jiffies_64;
|
||||
} while (read_seqretry(&xtime_lock, seq));
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(get_jiffies_64);
|
||||
#endif
|
||||
|
||||
EXPORT_SYMBOL(jiffies);
|
||||
1611
kernel/timer.c
Normal file
1611
kernel/timer.c
Normal file
File diff suppressed because it is too large
Load Diff
196
kernel/uid16.c
Normal file
196
kernel/uid16.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Wrapper functions for 16bit uid back compatibility. All nicely tied
|
||||
* together in the faint hope we can take the out in five years time.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/highuid.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group)
|
||||
{
|
||||
return sys_chown(filename, low2highuid(user), low2highgid(group));
|
||||
}
|
||||
|
||||
asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group)
|
||||
{
|
||||
return sys_lchown(filename, low2highuid(user), low2highgid(group));
|
||||
}
|
||||
|
||||
asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group)
|
||||
{
|
||||
return sys_fchown(fd, low2highuid(user), low2highgid(group));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid)
|
||||
{
|
||||
return sys_setregid(low2highgid(rgid), low2highgid(egid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setgid16(old_gid_t gid)
|
||||
{
|
||||
return sys_setgid(low2highgid(gid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid)
|
||||
{
|
||||
return sys_setreuid(low2highuid(ruid), low2highuid(euid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setuid16(old_uid_t uid)
|
||||
{
|
||||
return sys_setuid(low2highuid(uid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
|
||||
{
|
||||
return sys_setresuid(low2highuid(ruid), low2highuid(euid),
|
||||
low2highuid(suid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
|
||||
!(retval = put_user(high2lowuid(current->euid), euid)))
|
||||
retval = put_user(high2lowuid(current->suid), suid);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
|
||||
{
|
||||
return sys_setresgid(low2highgid(rgid), low2highgid(egid),
|
||||
low2highgid(sgid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
|
||||
!(retval = put_user(high2lowgid(current->egid), egid)))
|
||||
retval = put_user(high2lowgid(current->sgid), sgid);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
asmlinkage long sys_setfsuid16(old_uid_t uid)
|
||||
{
|
||||
return sys_setfsuid(low2highuid(uid));
|
||||
}
|
||||
|
||||
asmlinkage long sys_setfsgid16(old_gid_t gid)
|
||||
{
|
||||
return sys_setfsgid(low2highgid(gid));
|
||||
}
|
||||
|
||||
static int groups16_to_user(old_gid_t __user *grouplist,
|
||||
struct group_info *group_info)
|
||||
{
|
||||
int i;
|
||||
old_gid_t group;
|
||||
|
||||
for (i = 0; i < group_info->ngroups; i++) {
|
||||
group = high2lowgid(GROUP_AT(group_info, i));
|
||||
if (put_user(group, grouplist+i))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int groups16_from_user(struct group_info *group_info,
|
||||
old_gid_t __user *grouplist)
|
||||
{
|
||||
int i;
|
||||
old_gid_t group;
|
||||
|
||||
for (i = 0; i < group_info->ngroups; i++) {
|
||||
if (get_user(group, grouplist+i))
|
||||
return -EFAULT;
|
||||
GROUP_AT(group_info, i) = low2highgid(group);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
if (gidsetsize < 0)
|
||||
return -EINVAL;
|
||||
|
||||
get_group_info(current->group_info);
|
||||
i = current->group_info->ngroups;
|
||||
if (gidsetsize) {
|
||||
if (i > gidsetsize) {
|
||||
i = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (groups16_to_user(grouplist, current->group_info)) {
|
||||
i = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
put_group_info(current->group_info);
|
||||
return i;
|
||||
}
|
||||
|
||||
asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
|
||||
{
|
||||
struct group_info *group_info;
|
||||
int retval;
|
||||
|
||||
if (!capable(CAP_SETGID))
|
||||
return -EPERM;
|
||||
if ((unsigned)gidsetsize > NGROUPS_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
group_info = groups_alloc(gidsetsize);
|
||||
if (!group_info)
|
||||
return -ENOMEM;
|
||||
retval = groups16_from_user(group_info, grouplist);
|
||||
if (retval) {
|
||||
put_group_info(group_info);
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval = set_current_groups(group_info);
|
||||
put_group_info(group_info);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
asmlinkage long sys_getuid16(void)
|
||||
{
|
||||
return high2lowuid(current->uid);
|
||||
}
|
||||
|
||||
asmlinkage long sys_geteuid16(void)
|
||||
{
|
||||
return high2lowuid(current->euid);
|
||||
}
|
||||
|
||||
asmlinkage long sys_getgid16(void)
|
||||
{
|
||||
return high2lowgid(current->gid);
|
||||
}
|
||||
|
||||
asmlinkage long sys_getegid16(void)
|
||||
{
|
||||
return high2lowgid(current->egid);
|
||||
}
|
||||
189
kernel/user.c
Normal file
189
kernel/user.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
* The "user cache".
|
||||
*
|
||||
* (C) Copyright 1991-2000 Linus Torvalds
|
||||
*
|
||||
* We have a per-user structure to keep track of how many
|
||||
* processes, files etc the user has claimed, in order to be
|
||||
* able to have per-user limits for system resources.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/key.h>
|
||||
|
||||
/*
|
||||
* UID task count cache, to get fast user lookup in "alloc_uid"
|
||||
* when changing user ID's (ie setuid() and friends).
|
||||
*/
|
||||
|
||||
#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
|
||||
#define UIDHASH_SZ (1 << UIDHASH_BITS)
|
||||
#define UIDHASH_MASK (UIDHASH_SZ - 1)
|
||||
#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
|
||||
#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
|
||||
|
||||
static kmem_cache_t *uid_cachep;
|
||||
static struct list_head uidhash_table[UIDHASH_SZ];
|
||||
static DEFINE_SPINLOCK(uidhash_lock);
|
||||
|
||||
struct user_struct root_user = {
|
||||
.__count = ATOMIC_INIT(1),
|
||||
.processes = ATOMIC_INIT(1),
|
||||
.files = ATOMIC_INIT(0),
|
||||
.sigpending = ATOMIC_INIT(0),
|
||||
.mq_bytes = 0,
|
||||
.locked_shm = 0,
|
||||
#ifdef CONFIG_KEYS
|
||||
.uid_keyring = &root_user_keyring,
|
||||
.session_keyring = &root_session_keyring,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* These routines must be called with the uidhash spinlock held!
|
||||
*/
|
||||
static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent)
|
||||
{
|
||||
list_add(&up->uidhash_list, hashent);
|
||||
}
|
||||
|
||||
static inline void uid_hash_remove(struct user_struct *up)
|
||||
{
|
||||
list_del(&up->uidhash_list);
|
||||
}
|
||||
|
||||
static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
|
||||
{
|
||||
struct list_head *up;
|
||||
|
||||
list_for_each(up, hashent) {
|
||||
struct user_struct *user;
|
||||
|
||||
user = list_entry(up, struct user_struct, uidhash_list);
|
||||
|
||||
if(user->uid == uid) {
|
||||
atomic_inc(&user->__count);
|
||||
return user;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Locate the user_struct for the passed UID. If found, take a ref on it. The
|
||||
* caller must undo that ref with free_uid().
|
||||
*
|
||||
* If the user_struct could not be found, return NULL.
|
||||
*/
|
||||
struct user_struct *find_user(uid_t uid)
|
||||
{
|
||||
struct user_struct *ret;
|
||||
|
||||
spin_lock(&uidhash_lock);
|
||||
ret = uid_hash_find(uid, uidhashentry(uid));
|
||||
spin_unlock(&uidhash_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void free_uid(struct user_struct *up)
|
||||
{
|
||||
if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
|
||||
uid_hash_remove(up);
|
||||
key_put(up->uid_keyring);
|
||||
key_put(up->session_keyring);
|
||||
kmem_cache_free(uid_cachep, up);
|
||||
spin_unlock(&uidhash_lock);
|
||||
}
|
||||
}
|
||||
|
||||
struct user_struct * alloc_uid(uid_t uid)
|
||||
{
|
||||
struct list_head *hashent = uidhashentry(uid);
|
||||
struct user_struct *up;
|
||||
|
||||
spin_lock(&uidhash_lock);
|
||||
up = uid_hash_find(uid, hashent);
|
||||
spin_unlock(&uidhash_lock);
|
||||
|
||||
if (!up) {
|
||||
struct user_struct *new;
|
||||
|
||||
new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
new->uid = uid;
|
||||
atomic_set(&new->__count, 1);
|
||||
atomic_set(&new->processes, 0);
|
||||
atomic_set(&new->files, 0);
|
||||
atomic_set(&new->sigpending, 0);
|
||||
|
||||
new->mq_bytes = 0;
|
||||
new->locked_shm = 0;
|
||||
|
||||
if (alloc_uid_keyring(new) < 0) {
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Before adding this, check whether we raced
|
||||
* on adding the same user already..
|
||||
*/
|
||||
spin_lock(&uidhash_lock);
|
||||
up = uid_hash_find(uid, hashent);
|
||||
if (up) {
|
||||
key_put(new->uid_keyring);
|
||||
key_put(new->session_keyring);
|
||||
kmem_cache_free(uid_cachep, new);
|
||||
} else {
|
||||
uid_hash_insert(new, hashent);
|
||||
up = new;
|
||||
}
|
||||
spin_unlock(&uidhash_lock);
|
||||
|
||||
}
|
||||
return up;
|
||||
}
|
||||
|
||||
void switch_uid(struct user_struct *new_user)
|
||||
{
|
||||
struct user_struct *old_user;
|
||||
|
||||
/* What if a process setreuid()'s and this brings the
|
||||
* new uid over his NPROC rlimit? We can check this now
|
||||
* cheaply with the new uid cache, so if it matters
|
||||
* we should be checking for it. -DaveM
|
||||
*/
|
||||
old_user = current->user;
|
||||
atomic_inc(&new_user->processes);
|
||||
atomic_dec(&old_user->processes);
|
||||
switch_uid_keyring(new_user);
|
||||
current->user = new_user;
|
||||
free_uid(old_user);
|
||||
suid_keys(current);
|
||||
}
|
||||
|
||||
|
||||
static int __init uid_cache_init(void)
|
||||
{
|
||||
int n;
|
||||
|
||||
uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
|
||||
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
|
||||
|
||||
for(n = 0; n < UIDHASH_SZ; ++n)
|
||||
INIT_LIST_HEAD(uidhash_table + n);
|
||||
|
||||
/* Insert the root user immediately (init already runs as root) */
|
||||
spin_lock(&uidhash_lock);
|
||||
uid_hash_insert(&root_user, uidhashentry(0));
|
||||
spin_unlock(&uidhash_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(uid_cache_init);
|
||||
246
kernel/wait.c
Normal file
246
kernel/wait.c
Normal file
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Generic waiting primitives.
|
||||
*
|
||||
* (C) 2004 William Irwin, Oracle
|
||||
*/
|
||||
#include <linux/config.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/hash.h>
|
||||
|
||||
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__add_wait_queue(q, wait);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(add_wait_queue);
|
||||
|
||||
void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__add_wait_queue_tail(q, wait);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(add_wait_queue_exclusive);
|
||||
|
||||
void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__remove_wait_queue(q, wait);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(remove_wait_queue);
|
||||
|
||||
|
||||
/*
|
||||
* Note: we use "set_current_state()" _after_ the wait-queue add,
|
||||
* because we need a memory barrier there on SMP, so that any
|
||||
* wake-function that tests for the wait-queue being active
|
||||
* will be guaranteed to see waitqueue addition _or_ subsequent
|
||||
* tests in this thread will see the wakeup having taken place.
|
||||
*
|
||||
* The spin_unlock() itself is semi-permeable and only protects
|
||||
* one way (it only protects stuff inside the critical region and
|
||||
* stops them from bleeding out - it would still allow subsequent
|
||||
* loads to move into the the critical region).
|
||||
*/
|
||||
void fastcall
|
||||
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
if (list_empty(&wait->task_list))
|
||||
__add_wait_queue(q, wait);
|
||||
/*
|
||||
* don't alter the task state if this is just going to
|
||||
* queue an async wait queue callback
|
||||
*/
|
||||
if (is_sync_wait(wait))
|
||||
set_current_state(state);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait);
|
||||
|
||||
void fastcall
|
||||
prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
if (list_empty(&wait->task_list))
|
||||
__add_wait_queue_tail(q, wait);
|
||||
/*
|
||||
* don't alter the task state if this is just going to
|
||||
* queue an async wait queue callback
|
||||
*/
|
||||
if (is_sync_wait(wait))
|
||||
set_current_state(state);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait_exclusive);
|
||||
|
||||
void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
/*
|
||||
* We can check for list emptiness outside the lock
|
||||
* IFF:
|
||||
* - we use the "careful" check that verifies both
|
||||
* the next and prev pointers, so that there cannot
|
||||
* be any half-pending updates in progress on other
|
||||
* CPU's that we haven't seen yet (and that might
|
||||
* still change the stack area.
|
||||
* and
|
||||
* - all other users take the lock (ie we can only
|
||||
* have _one_ other CPU that looks at or modifies
|
||||
* the list).
|
||||
*/
|
||||
if (!list_empty_careful(&wait->task_list)) {
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
list_del_init(&wait->task_list);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(finish_wait);
|
||||
|
||||
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
{
|
||||
int ret = default_wake_function(wait, mode, sync, key);
|
||||
|
||||
if (ret)
|
||||
list_del_init(&wait->task_list);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(autoremove_wake_function);
|
||||
|
||||
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
|
||||
{
|
||||
struct wait_bit_key *key = arg;
|
||||
struct wait_bit_queue *wait_bit
|
||||
= container_of(wait, struct wait_bit_queue, wait);
|
||||
|
||||
if (wait_bit->key.flags != key->flags ||
|
||||
wait_bit->key.bit_nr != key->bit_nr ||
|
||||
test_bit(key->bit_nr, key->flags))
|
||||
return 0;
|
||||
else
|
||||
return autoremove_wake_function(wait, mode, sync, key);
|
||||
}
|
||||
EXPORT_SYMBOL(wake_bit_function);
|
||||
|
||||
/*
|
||||
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
|
||||
* waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
|
||||
* permitted return codes. Nonzero return codes halt waiting and return.
|
||||
*/
|
||||
int __sched fastcall
|
||||
__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
|
||||
int (*action)(void *), unsigned mode)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
do {
|
||||
prepare_to_wait(wq, &q->wait, mode);
|
||||
if (test_bit(q->key.bit_nr, q->key.flags))
|
||||
ret = (*action)(q->key.flags);
|
||||
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
|
||||
finish_wait(wq, &q->wait);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__wait_on_bit);
|
||||
|
||||
int __sched fastcall out_of_line_wait_on_bit(void *word, int bit,
|
||||
int (*action)(void *), unsigned mode)
|
||||
{
|
||||
wait_queue_head_t *wq = bit_waitqueue(word, bit);
|
||||
DEFINE_WAIT_BIT(wait, word, bit);
|
||||
|
||||
return __wait_on_bit(wq, &wait, action, mode);
|
||||
}
|
||||
EXPORT_SYMBOL(out_of_line_wait_on_bit);
|
||||
|
||||
int __sched fastcall
|
||||
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
|
||||
int (*action)(void *), unsigned mode)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
do {
|
||||
prepare_to_wait_exclusive(wq, &q->wait, mode);
|
||||
if (test_bit(q->key.bit_nr, q->key.flags)) {
|
||||
if ((ret = (*action)(q->key.flags)))
|
||||
break;
|
||||
}
|
||||
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
|
||||
finish_wait(wq, &q->wait);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__wait_on_bit_lock);
|
||||
|
||||
int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit,
|
||||
int (*action)(void *), unsigned mode)
|
||||
{
|
||||
wait_queue_head_t *wq = bit_waitqueue(word, bit);
|
||||
DEFINE_WAIT_BIT(wait, word, bit);
|
||||
|
||||
return __wait_on_bit_lock(wq, &wait, action, mode);
|
||||
}
|
||||
EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
|
||||
|
||||
void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
|
||||
{
|
||||
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
|
||||
if (waitqueue_active(wq))
|
||||
__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
|
||||
}
|
||||
EXPORT_SYMBOL(__wake_up_bit);
|
||||
|
||||
/**
|
||||
* wake_up_bit - wake up a waiter on a bit
|
||||
* @word: the word being waited on, a kernel virtual address
|
||||
* @bit: the bit of the word being waited on
|
||||
*
|
||||
* There is a standard hashed waitqueue table for generic use. This
|
||||
* is the part of the hashtable's accessor API that wakes up waiters
|
||||
* on a bit. For instance, if one were to have waiters on a bitflag,
|
||||
* one would call wake_up_bit() after clearing the bit.
|
||||
*
|
||||
* In order for this to function properly, as it uses waitqueue_active()
|
||||
* internally, some kind of memory barrier must be done prior to calling
|
||||
* this. Typically, this will be smp_mb__after_clear_bit(), but in some
|
||||
* cases where bitflags are manipulated non-atomically under a lock, one
|
||||
* may need to use a less regular barrier, such fs/inode.c's smp_mb(),
|
||||
* because spin_unlock() does not guarantee a memory barrier.
|
||||
*/
|
||||
void fastcall wake_up_bit(void *word, int bit)
|
||||
{
|
||||
__wake_up_bit(bit_waitqueue(word, bit), word, bit);
|
||||
}
|
||||
EXPORT_SYMBOL(wake_up_bit);
|
||||
|
||||
fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit)
|
||||
{
|
||||
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
|
||||
const struct zone *zone = page_zone(virt_to_page(word));
|
||||
unsigned long val = (unsigned long)word << shift | bit;
|
||||
|
||||
return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
|
||||
}
|
||||
EXPORT_SYMBOL(bit_waitqueue);
|
||||
555
kernel/workqueue.c
Normal file
555
kernel/workqueue.c
Normal file
@@ -0,0 +1,555 @@
|
||||
/*
|
||||
* linux/kernel/workqueue.c
|
||||
*
|
||||
* Generic mechanism for defining kernel helper threads for running
|
||||
* arbitrary tasks in process context.
|
||||
*
|
||||
* Started by Ingo Molnar, Copyright (C) 2002
|
||||
*
|
||||
* Derived from the taskqueue/keventd code by:
|
||||
*
|
||||
* David Woodhouse <dwmw2@infradead.org>
|
||||
* Andrew Morton <andrewm@uow.edu.au>
|
||||
* Kai Petzke <wpp@marie.physik.tu-berlin.de>
|
||||
* Theodore Ts'o <tytso@mit.edu>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
/*
|
||||
* The per-CPU workqueue (if single thread, we always use cpu 0's).
|
||||
*
|
||||
* The sequence counters are for flush_scheduled_work(). It wants to wait
|
||||
* until until all currently-scheduled works are completed, but it doesn't
|
||||
* want to be livelocked by new, incoming ones. So it waits until
|
||||
* remove_sequence is >= the insert_sequence which pertained when
|
||||
* flush_scheduled_work() was called.
|
||||
*/
|
||||
struct cpu_workqueue_struct {
|
||||
|
||||
spinlock_t lock;
|
||||
|
||||
long remove_sequence; /* Least-recently added (next to run) */
|
||||
long insert_sequence; /* Next to add */
|
||||
|
||||
struct list_head worklist;
|
||||
wait_queue_head_t more_work;
|
||||
wait_queue_head_t work_done;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
task_t *thread;
|
||||
|
||||
int run_depth; /* Detect run_workqueue() recursion depth */
|
||||
} ____cacheline_aligned;
|
||||
|
||||
/*
|
||||
* The externally visible workqueue abstraction is an array of
|
||||
* per-CPU workqueues:
|
||||
*/
|
||||
struct workqueue_struct {
|
||||
struct cpu_workqueue_struct cpu_wq[NR_CPUS];
|
||||
const char *name;
|
||||
struct list_head list; /* Empty if single thread */
|
||||
};
|
||||
|
||||
/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
|
||||
threads to each one as cpus come/go. */
|
||||
static DEFINE_SPINLOCK(workqueue_lock);
|
||||
static LIST_HEAD(workqueues);
|
||||
|
||||
/* If it's single threaded, it isn't in the list of workqueues. */
|
||||
static inline int is_single_threaded(struct workqueue_struct *wq)
|
||||
{
|
||||
return list_empty(&wq->list);
|
||||
}
|
||||
|
||||
/* Preempt must be disabled. */
|
||||
static void __queue_work(struct cpu_workqueue_struct *cwq,
|
||||
struct work_struct *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&cwq->lock, flags);
|
||||
work->wq_data = cwq;
|
||||
list_add_tail(&work->entry, &cwq->worklist);
|
||||
cwq->insert_sequence++;
|
||||
wake_up(&cwq->more_work);
|
||||
spin_unlock_irqrestore(&cwq->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue work on a workqueue. Return non-zero if it was successfully
|
||||
* added.
|
||||
*
|
||||
* We queue the work to the CPU it was submitted, but there is no
|
||||
* guarantee that it will be processed by that CPU.
|
||||
*/
|
||||
int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
|
||||
{
|
||||
int ret = 0, cpu = get_cpu();
|
||||
|
||||
if (!test_and_set_bit(0, &work->pending)) {
|
||||
if (unlikely(is_single_threaded(wq)))
|
||||
cpu = 0;
|
||||
BUG_ON(!list_empty(&work->entry));
|
||||
__queue_work(wq->cpu_wq + cpu, work);
|
||||
ret = 1;
|
||||
}
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void delayed_work_timer_fn(unsigned long __data)
|
||||
{
|
||||
struct work_struct *work = (struct work_struct *)__data;
|
||||
struct workqueue_struct *wq = work->wq_data;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (unlikely(is_single_threaded(wq)))
|
||||
cpu = 0;
|
||||
|
||||
__queue_work(wq->cpu_wq + cpu, work);
|
||||
}
|
||||
|
||||
int fastcall queue_delayed_work(struct workqueue_struct *wq,
|
||||
struct work_struct *work, unsigned long delay)
|
||||
{
|
||||
int ret = 0;
|
||||
struct timer_list *timer = &work->timer;
|
||||
|
||||
if (!test_and_set_bit(0, &work->pending)) {
|
||||
BUG_ON(timer_pending(timer));
|
||||
BUG_ON(!list_empty(&work->entry));
|
||||
|
||||
/* This stores wq for the moment, for the timer_fn */
|
||||
work->wq_data = wq;
|
||||
timer->expires = jiffies + delay;
|
||||
timer->data = (unsigned long)work;
|
||||
timer->function = delayed_work_timer_fn;
|
||||
add_timer(timer);
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void run_workqueue(struct cpu_workqueue_struct *cwq)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Keep taking off work from the queue until
|
||||
* done.
|
||||
*/
|
||||
spin_lock_irqsave(&cwq->lock, flags);
|
||||
cwq->run_depth++;
|
||||
if (cwq->run_depth > 3) {
|
||||
/* morton gets to eat his hat */
|
||||
printk("%s: recursion depth exceeded: %d\n",
|
||||
__FUNCTION__, cwq->run_depth);
|
||||
dump_stack();
|
||||
}
|
||||
while (!list_empty(&cwq->worklist)) {
|
||||
struct work_struct *work = list_entry(cwq->worklist.next,
|
||||
struct work_struct, entry);
|
||||
void (*f) (void *) = work->func;
|
||||
void *data = work->data;
|
||||
|
||||
list_del_init(cwq->worklist.next);
|
||||
spin_unlock_irqrestore(&cwq->lock, flags);
|
||||
|
||||
BUG_ON(work->wq_data != cwq);
|
||||
clear_bit(0, &work->pending);
|
||||
f(data);
|
||||
|
||||
spin_lock_irqsave(&cwq->lock, flags);
|
||||
cwq->remove_sequence++;
|
||||
wake_up(&cwq->work_done);
|
||||
}
|
||||
cwq->run_depth--;
|
||||
spin_unlock_irqrestore(&cwq->lock, flags);
|
||||
}
|
||||
|
||||
static int worker_thread(void *__cwq)
|
||||
{
|
||||
struct cpu_workqueue_struct *cwq = __cwq;
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
struct k_sigaction sa;
|
||||
sigset_t blocked;
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
set_user_nice(current, -5);
|
||||
|
||||
/* Block and flush all signals */
|
||||
sigfillset(&blocked);
|
||||
sigprocmask(SIG_BLOCK, &blocked, NULL);
|
||||
flush_signals(current);
|
||||
|
||||
/* SIG_IGN makes children autoreap: see do_notify_parent(). */
|
||||
sa.sa.sa_handler = SIG_IGN;
|
||||
sa.sa.sa_flags = 0;
|
||||
siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
|
||||
do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
add_wait_queue(&cwq->more_work, &wait);
|
||||
if (list_empty(&cwq->worklist))
|
||||
schedule();
|
||||
else
|
||||
__set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(&cwq->more_work, &wait);
|
||||
|
||||
if (!list_empty(&cwq->worklist))
|
||||
run_workqueue(cwq);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
|
||||
{
|
||||
if (cwq->thread == current) {
|
||||
/*
|
||||
* Probably keventd trying to flush its own queue. So simply run
|
||||
* it by hand rather than deadlocking.
|
||||
*/
|
||||
run_workqueue(cwq);
|
||||
} else {
|
||||
DEFINE_WAIT(wait);
|
||||
long sequence_needed;
|
||||
|
||||
spin_lock_irq(&cwq->lock);
|
||||
sequence_needed = cwq->insert_sequence;
|
||||
|
||||
while (sequence_needed - cwq->remove_sequence > 0) {
|
||||
prepare_to_wait(&cwq->work_done, &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock_irq(&cwq->lock);
|
||||
schedule();
|
||||
spin_lock_irq(&cwq->lock);
|
||||
}
|
||||
finish_wait(&cwq->work_done, &wait);
|
||||
spin_unlock_irq(&cwq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* flush_workqueue - ensure that any scheduled work has run to completion.
|
||||
*
|
||||
* Forces execution of the workqueue and blocks until its completion.
|
||||
* This is typically used in driver shutdown handlers.
|
||||
*
|
||||
* This function will sample each workqueue's current insert_sequence number and
|
||||
* will sleep until the head sequence is greater than or equal to that. This
|
||||
* means that we sleep until all works which were queued on entry have been
|
||||
* handled, but we are not livelocked by new incoming ones.
|
||||
*
|
||||
* This function used to run the workqueues itself. Now we just wait for the
|
||||
* helper threads to do it.
|
||||
*/
|
||||
void fastcall flush_workqueue(struct workqueue_struct *wq)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
if (is_single_threaded(wq)) {
|
||||
/* Always use cpu 0's area. */
|
||||
flush_cpu_workqueue(wq->cpu_wq + 0);
|
||||
} else {
|
||||
int cpu;
|
||||
|
||||
lock_cpu_hotplug();
|
||||
for_each_online_cpu(cpu)
|
||||
flush_cpu_workqueue(wq->cpu_wq + cpu);
|
||||
unlock_cpu_hotplug();
|
||||
}
|
||||
}
|
||||
|
||||
static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
|
||||
int cpu)
|
||||
{
|
||||
struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
|
||||
struct task_struct *p;
|
||||
|
||||
spin_lock_init(&cwq->lock);
|
||||
cwq->wq = wq;
|
||||
cwq->thread = NULL;
|
||||
cwq->insert_sequence = 0;
|
||||
cwq->remove_sequence = 0;
|
||||
INIT_LIST_HEAD(&cwq->worklist);
|
||||
init_waitqueue_head(&cwq->more_work);
|
||||
init_waitqueue_head(&cwq->work_done);
|
||||
|
||||
if (is_single_threaded(wq))
|
||||
p = kthread_create(worker_thread, cwq, "%s", wq->name);
|
||||
else
|
||||
p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
|
||||
if (IS_ERR(p))
|
||||
return NULL;
|
||||
cwq->thread = p;
|
||||
return p;
|
||||
}
|
||||
|
||||
struct workqueue_struct *__create_workqueue(const char *name,
|
||||
int singlethread)
|
||||
{
|
||||
int cpu, destroy = 0;
|
||||
struct workqueue_struct *wq;
|
||||
struct task_struct *p;
|
||||
|
||||
BUG_ON(strlen(name) > 10);
|
||||
|
||||
wq = kmalloc(sizeof(*wq), GFP_KERNEL);
|
||||
if (!wq)
|
||||
return NULL;
|
||||
memset(wq, 0, sizeof(*wq));
|
||||
|
||||
wq->name = name;
|
||||
/* We don't need the distraction of CPUs appearing and vanishing. */
|
||||
lock_cpu_hotplug();
|
||||
if (singlethread) {
|
||||
INIT_LIST_HEAD(&wq->list);
|
||||
p = create_workqueue_thread(wq, 0);
|
||||
if (!p)
|
||||
destroy = 1;
|
||||
else
|
||||
wake_up_process(p);
|
||||
} else {
|
||||
spin_lock(&workqueue_lock);
|
||||
list_add(&wq->list, &workqueues);
|
||||
spin_unlock(&workqueue_lock);
|
||||
for_each_online_cpu(cpu) {
|
||||
p = create_workqueue_thread(wq, cpu);
|
||||
if (p) {
|
||||
kthread_bind(p, cpu);
|
||||
wake_up_process(p);
|
||||
} else
|
||||
destroy = 1;
|
||||
}
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
|
||||
/*
|
||||
* Was there any error during startup? If yes then clean up:
|
||||
*/
|
||||
if (destroy) {
|
||||
destroy_workqueue(wq);
|
||||
wq = NULL;
|
||||
}
|
||||
return wq;
|
||||
}
|
||||
|
||||
static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
|
||||
{
|
||||
struct cpu_workqueue_struct *cwq;
|
||||
unsigned long flags;
|
||||
struct task_struct *p;
|
||||
|
||||
cwq = wq->cpu_wq + cpu;
|
||||
spin_lock_irqsave(&cwq->lock, flags);
|
||||
p = cwq->thread;
|
||||
cwq->thread = NULL;
|
||||
spin_unlock_irqrestore(&cwq->lock, flags);
|
||||
if (p)
|
||||
kthread_stop(p);
|
||||
}
|
||||
|
||||
void destroy_workqueue(struct workqueue_struct *wq)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
flush_workqueue(wq);
|
||||
|
||||
/* We don't need the distraction of CPUs appearing and vanishing. */
|
||||
lock_cpu_hotplug();
|
||||
if (is_single_threaded(wq))
|
||||
cleanup_workqueue_thread(wq, 0);
|
||||
else {
|
||||
for_each_online_cpu(cpu)
|
||||
cleanup_workqueue_thread(wq, cpu);
|
||||
spin_lock(&workqueue_lock);
|
||||
list_del(&wq->list);
|
||||
spin_unlock(&workqueue_lock);
|
||||
}
|
||||
unlock_cpu_hotplug();
|
||||
kfree(wq);
|
||||
}
|
||||
|
||||
static struct workqueue_struct *keventd_wq;
|
||||
|
||||
int fastcall schedule_work(struct work_struct *work)
|
||||
{
|
||||
return queue_work(keventd_wq, work);
|
||||
}
|
||||
|
||||
int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
|
||||
{
|
||||
return queue_delayed_work(keventd_wq, work, delay);
|
||||
}
|
||||
|
||||
int schedule_delayed_work_on(int cpu,
|
||||
struct work_struct *work, unsigned long delay)
|
||||
{
|
||||
int ret = 0;
|
||||
struct timer_list *timer = &work->timer;
|
||||
|
||||
if (!test_and_set_bit(0, &work->pending)) {
|
||||
BUG_ON(timer_pending(timer));
|
||||
BUG_ON(!list_empty(&work->entry));
|
||||
/* This stores keventd_wq for the moment, for the timer_fn */
|
||||
work->wq_data = keventd_wq;
|
||||
timer->expires = jiffies + delay;
|
||||
timer->data = (unsigned long)work;
|
||||
timer->function = delayed_work_timer_fn;
|
||||
add_timer_on(timer, cpu);
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void flush_scheduled_work(void)
|
||||
{
|
||||
flush_workqueue(keventd_wq);
|
||||
}
|
||||
|
||||
/**
|
||||
* cancel_rearming_delayed_workqueue - reliably kill off a delayed
|
||||
* work whose handler rearms the delayed work.
|
||||
* @wq: the controlling workqueue structure
|
||||
* @work: the delayed work struct
|
||||
*/
|
||||
static void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
|
||||
struct work_struct *work)
|
||||
{
|
||||
while (!cancel_delayed_work(work))
|
||||
flush_workqueue(wq);
|
||||
}
|
||||
|
||||
/**
|
||||
* cancel_rearming_delayed_work - reliably kill off a delayed keventd
|
||||
* work whose handler rearms the delayed work.
|
||||
* @work: the delayed work struct
|
||||
*/
|
||||
void cancel_rearming_delayed_work(struct work_struct *work)
|
||||
{
|
||||
cancel_rearming_delayed_workqueue(keventd_wq, work);
|
||||
}
|
||||
EXPORT_SYMBOL(cancel_rearming_delayed_work);
|
||||
|
||||
int keventd_up(void)
|
||||
{
|
||||
return keventd_wq != NULL;
|
||||
}
|
||||
|
||||
int current_is_keventd(void)
|
||||
{
|
||||
struct cpu_workqueue_struct *cwq;
|
||||
int cpu = smp_processor_id(); /* preempt-safe: keventd is per-cpu */
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!keventd_wq);
|
||||
|
||||
cwq = keventd_wq->cpu_wq + cpu;
|
||||
if (current == cwq->thread)
|
||||
ret = 1;
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* Take the work from this (downed) CPU. */
|
||||
static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
|
||||
{
|
||||
struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
|
||||
LIST_HEAD(list);
|
||||
struct work_struct *work;
|
||||
|
||||
spin_lock_irq(&cwq->lock);
|
||||
list_splice_init(&cwq->worklist, &list);
|
||||
|
||||
while (!list_empty(&list)) {
|
||||
printk("Taking work for %s\n", wq->name);
|
||||
work = list_entry(list.next,struct work_struct,entry);
|
||||
list_del(&work->entry);
|
||||
__queue_work(wq->cpu_wq + smp_processor_id(), work);
|
||||
}
|
||||
spin_unlock_irq(&cwq->lock);
|
||||
}
|
||||
|
||||
/* We're holding the cpucontrol mutex here */
|
||||
static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int hotcpu = (unsigned long)hcpu;
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
/* Create a new workqueue thread for it. */
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
if (create_workqueue_thread(wq, hotcpu) < 0) {
|
||||
printk("workqueue for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
/* Kick off worker threads. */
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu);
|
||||
wake_up_process(wq->cpu_wq[hotcpu].thread);
|
||||
}
|
||||
break;
|
||||
|
||||
case CPU_UP_CANCELED:
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
/* Unbind so it can run. */
|
||||
kthread_bind(wq->cpu_wq[hotcpu].thread,
|
||||
smp_processor_id());
|
||||
cleanup_workqueue_thread(wq, hotcpu);
|
||||
}
|
||||
break;
|
||||
|
||||
case CPU_DEAD:
|
||||
list_for_each_entry(wq, &workqueues, list)
|
||||
cleanup_workqueue_thread(wq, hotcpu);
|
||||
list_for_each_entry(wq, &workqueues, list)
|
||||
take_over_work(wq, hotcpu);
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
void init_workqueues(void)
|
||||
{
|
||||
hotcpu_notifier(workqueue_cpu_callback, 0);
|
||||
keventd_wq = create_workqueue("events");
|
||||
BUG_ON(!keventd_wq);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(__create_workqueue);
|
||||
EXPORT_SYMBOL_GPL(queue_work);
|
||||
EXPORT_SYMBOL_GPL(queue_delayed_work);
|
||||
EXPORT_SYMBOL_GPL(flush_workqueue);
|
||||
EXPORT_SYMBOL_GPL(destroy_workqueue);
|
||||
|
||||
EXPORT_SYMBOL(schedule_work);
|
||||
EXPORT_SYMBOL(schedule_delayed_work);
|
||||
EXPORT_SYMBOL(schedule_delayed_work_on);
|
||||
EXPORT_SYMBOL(flush_scheduled_work);
|
||||
Reference in New Issue
Block a user