diff options
author | Oliver Schinagl <oliver@schinagl.nl> | 2011-04-27 13:13:05 (GMT) |
---|---|---|
committer | Oliver Schinagl <oliver@schinagl.nl> | 2011-04-27 13:13:05 (GMT) |
commit | cb589e64ddfbc502e8b1189ec7253c43b42cd183 (patch) | |
tree | a45aa4df23db84c279f39bd2c894ecf6bada0289 /uClinux-2.4.31-uc0/kernel | |
parent | d53ae4b2067e5e7c4f5a0b9a234a89e0582c2e84 (diff) | |
download | openipcam-cb589e64ddfbc502e8b1189ec7253c43b42cd183.zip openipcam-cb589e64ddfbc502e8b1189ec7253c43b42cd183.tar.gz openipcam-cb589e64ddfbc502e8b1189ec7253c43b42cd183.tar.bz2 |
linux-2.4.31 with uCLinux uc0 pre-patched
Diffstat (limited to 'uClinux-2.4.31-uc0/kernel')
27 files changed, 14948 insertions, 0 deletions
diff --git a/uClinux-2.4.31-uc0/kernel/Makefile b/uClinux-2.4.31-uc0/kernel/Makefile new file mode 100644 index 0000000..1212f40 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/Makefile @@ -0,0 +1,38 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +O_TARGET := kernel.o + +export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o dma.o + +obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ + module.o exit.o itimer.o info.o time.o softirq.o resource.o \ + sysctl.o acct.o capability.o ptrace.o timer.o user.o \ + signal.o sys.o kmod.o context.o + +ifndef CONFIG_ARM +obj-y += dma.o +endif + +OX_OBJS += signal.o sys.o kmod.o context.o + +obj-$(CONFIG_UID16) += uid16.o +obj-$(CONFIG_MODULES) += ksyms.o +obj-$(CONFIG_PM) += pm.o + +ifneq ($(CONFIG_IA64),y) +# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is +# needed for x86 only. Why this used to be enabled for all architectures is beyond +# me. I suspect most platforms don't need this, but until we know that for sure +# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k +# to get a correct value for the wait-channel (WCHAN in ps). --davidm +CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer +endif + +include $(TOPDIR)/Rules.make diff --git a/uClinux-2.4.31-uc0/kernel/acct.c b/uClinux-2.4.31-uc0/kernel/acct.c new file mode 100644 index 0000000..3e22b57 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/acct.c @@ -0,0 +1,381 @@ +/* + * linux/kernel/acct.c + * + * BSD Process Accounting for Linux + * + * Author: Marco van Wieringen <mvw@planets.elm.net> + * + * Some code based on ideas and code from: + * Thomas K. Dyas <tdyas@eden.rutgers.edu> + * + * This file implements BSD-style process accounting. Whenever any + * process exits, an accounting record of type "struct acct" is + * written to the file specified with the acct() system call. It is + * up to user-level programs to do useful things with the accounting + * log. The kernel just provides the raw accounting information. + * + * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. + * + * Plugged two leaks. 1) It didn't return acct_file into the free_filps if + * the file happened to be read-only. 2) If the accounting was suspended + * due to the lack of space it happily allowed to reopen it and completely + * lost the old acct_file. 3/10/98, Al Viro. + * + * Now we silently close acct_file on attempt to reopen. Cleaned sys_acct(). + * XTerms and EMACS are manifestations of pure evil. 21/10/98, AV. + * + * Fixed a nasty interaction with with sys_umount(). If the accointing + * was suspeneded we failed to stop it on umount(). Messy. + * Another one: remount to readonly didn't stop accounting. + * Question: what should we do if we have CAP_SYS_ADMIN but not + * CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY + * unless we are messing with the root. In that case we are getting a + * real mess with do_remount_sb(). 9/11/98, AV. + * + * Fixed a bunch of races (and pair of leaks). Probably not the best way, + * but this one obviously doesn't introduce deadlocks. Later. BTW, found + * one race (and leak) in BSD implementation. + * OK, that's better. ANOTHER race and leak in BSD variant. There always + * is one more bug... 10/11/98, AV. + * + * Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold + * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks + * a struct file opened for write. Fixed. 2/6/2000, AV. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> + +#ifdef CONFIG_BSD_PROCESS_ACCT +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/acct.h> +#include <linux/smp_lock.h> +#include <linux/file.h> +#include <linux/tty.h> + +#include <asm/uaccess.h> + +/* + * These constants control the amount of freespace that suspend and + * resume the process accounting system, and the time delay between + * each check. + * Turned into sysctl-controllable parameters. AV, 12/11/98 + */ + +int acct_parm[3] = {4, 2, 30}; +#define RESUME (acct_parm[0]) /* >foo% free space - resume */ +#define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */ +#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */ + +/* + * External references and all of the globals. + */ + +static volatile int acct_active; +static volatile int acct_needcheck; +static struct file *acct_file; +static struct timer_list acct_timer; +static void do_acct_process(long, struct file *); + +/* + * Called whenever the timer says to check the free space. + */ +static void acct_timeout(unsigned long unused) +{ + acct_needcheck = 1; +} + +/* + * Check the amount of free space and suspend/resume accordingly. + */ +static int check_free_space(struct file *file) +{ + struct statfs sbuf; + int res; + int act; + + lock_kernel(); + res = acct_active; + if (!file || !acct_needcheck) + goto out; + unlock_kernel(); + + /* May block */ + if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) + return res; + + if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100) + act = -1; + else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100) + act = 1; + else + act = 0; + + /* + * If some joker switched acct_file under us we'ld better be + * silent and _not_ touch anything. + */ + lock_kernel(); + if (file != acct_file) { + if (act) + res = act>0; + goto out; + } + + if (acct_active) { + if (act < 0) { + acct_active = 0; + printk(KERN_INFO "Process accounting paused\n"); + } + } else { + if (act > 0) { + acct_active = 1; + printk(KERN_INFO "Process accounting resumed\n"); + } + } + + del_timer(&acct_timer); + acct_needcheck = 0; + acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct_timer); + res = acct_active; +out: + unlock_kernel(); + return res; +} + +/* + * sys_acct() is the only system call needed to implement process + * accounting. It takes the name of the file where accounting records + * should be written. If the filename is NULL, accounting will be + * shutdown. + */ +asmlinkage long sys_acct(const char *name) +{ + struct file *file = NULL, *old_acct = NULL; + char *tmp; + int error; + + if (!capable(CAP_SYS_PACCT)) + return -EPERM; + + if (name) { + tmp = getname(name); + error = PTR_ERR(tmp); + if (IS_ERR(tmp)) + goto out; + /* Difference from BSD - they don't do O_APPEND */ + file = filp_open(tmp, O_WRONLY|O_APPEND, 0); + putname(tmp); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto out; + } + error = -EACCES; + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) + goto out_err; + + error = -EIO; + if (!file->f_op->write) + goto out_err; + } + + error = 0; + lock_kernel(); + if (acct_file) { + old_acct = acct_file; + del_timer(&acct_timer); + acct_active = 0; + acct_needcheck = 0; + acct_file = NULL; + } + if (name) { + acct_file = file; + acct_needcheck = 0; + acct_active = 1; + /* It's been deleted if it was used before so this is safe */ + init_timer(&acct_timer); + acct_timer.function = acct_timeout; + acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; + add_timer(&acct_timer); + } + unlock_kernel(); + if (old_acct) { + do_acct_process(0,old_acct); + filp_close(old_acct, NULL); + } +out: + return error; +out_err: + filp_close(file, NULL); + goto out; +} + +void acct_auto_close(kdev_t dev) +{ + lock_kernel(); + if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev) + sys_acct(NULL); + unlock_kernel(); +} + +/* + * encode an unsigned long into a comp_t + * + * This routine has been adopted from the encode_comp_t() function in + * the kern_acct.c file of the FreeBSD operating system. The encoding + * is a 13-bit fraction with a 3-bit (base 8) exponent. + */ + +#define MANTSIZE 13 /* 13 bit mantissa. */ +#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ +#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ + +static comp_t encode_comp_t(unsigned long value) +{ + int exp, rnd; + + exp = rnd = 0; + while (value > MAXFRACT) { + rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */ + value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ + exp++; + } + + /* + * If we need to round up, do it (and handle overflow correctly). + */ + if (rnd && (++value > MAXFRACT)) { + value >>= EXPSIZE; + exp++; + } + + /* + * Clean it up and polish it off. + */ + exp <<= MANTSIZE; /* Shift the exponent into place */ + exp += value; /* and add on the mantissa. */ + return exp; +} + +/* + * Write an accounting entry for an exiting process + * + * The acct_process() call is the workhorse of the process + * accounting system. The struct acct is built here and then written + * into the accounting file. This function should only be called from + * do_exit(). + */ + +/* + * do_acct_process does all actual work. Caller holds the reference to file. + */ +static void do_acct_process(long exitcode, struct file *file) +{ + struct acct ac; + mm_segment_t fs; + unsigned long vsize; + unsigned long flim; + + /* + * First check to see if there is enough free_space to continue + * the process accounting system. + */ + if (!check_free_space(file)) + return; + + /* + * Fill the accounting struct with the needed info as recorded + * by the different kernel functions. + */ + memset((caddr_t)&ac, 0, sizeof(struct acct)); + + strncpy(ac.ac_comm, current->comm, ACCT_COMM); + ac.ac_comm[ACCT_COMM - 1] = '\0'; + + ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); + ac.ac_etime = encode_comp_t(jiffies - current->start_time); + ac.ac_utime = encode_comp_t(current->times.tms_utime); + ac.ac_stime = encode_comp_t(current->times.tms_stime); + ac.ac_uid = current->uid; + ac.ac_gid = current->gid; + ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0; + + ac.ac_flag = 0; + if (current->flags & PF_FORKNOEXEC) + ac.ac_flag |= AFORK; + if (current->flags & PF_SUPERPRIV) + ac.ac_flag |= ASU; + if (current->flags & PF_DUMPCORE) + ac.ac_flag |= ACORE; + if (current->flags & PF_SIGNALED) + ac.ac_flag |= AXSIG; + + vsize = 0; + if (current->mm) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + vma = current->mm->mmap; + while (vma) { + vsize += vma->vm_end - vma->vm_start; + vma = vma->vm_next; + } + up_read(¤t->mm->mmap_sem); + } + vsize = vsize / 1024; + ac.ac_mem = encode_comp_t(vsize); + ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ + ac.ac_rw = encode_comp_t(ac.ac_io / 1024); + ac.ac_minflt = encode_comp_t(current->min_flt); + ac.ac_majflt = encode_comp_t(current->maj_flt); + ac.ac_swaps = encode_comp_t(current->nswap); + ac.ac_exitcode = exitcode; + + /* + * Kernel segment override to datasegment and write it + * to the accounting file. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + /* + * Accounting records are not subject to resource limits. + */ + flim = current->rlim[RLIMIT_FSIZE].rlim_cur; + current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + file->f_op->write(file, (char *)&ac, + sizeof(struct acct), &file->f_pos); + current->rlim[RLIMIT_FSIZE].rlim_cur = flim; + set_fs(fs); +} + +/* + * acct_process - now just a wrapper around do_acct_process + */ +int acct_process(long exitcode) +{ + struct file *file = NULL; + lock_kernel(); + if (acct_file) { + file = acct_file; + get_file(file); + unlock_kernel(); + do_acct_process(exitcode, file); + fput(file); + } else + unlock_kernel(); + return 0; +} + +#else +/* + * Dummy system call when BSD process accounting is not configured + * into the kernel. + */ + +asmlinkage long sys_acct(const char * filename) +{ + return -ENOSYS; +} +#endif diff --git a/uClinux-2.4.31-uc0/kernel/capability.c b/uClinux-2.4.31-uc0/kernel/capability.c new file mode 100644 index 0000000..7aaf1a4 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/capability.c @@ -0,0 +1,216 @@ +/* + * linux/kernel/capability.c + * + * Copyright (C) 1997 Andrew Main <zefram@fysh.org> + * Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com> + */ + +#include <linux/mm.h> +#include <asm/uaccess.h> + +kernel_cap_t cap_bset = CAP_INIT_EFF_SET; + +/* Note: never hold tasklist_lock while spinning for this one */ +spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED; + +/* + * For sys_getproccap() and sys_setproccap(), any of the three + * capability set pointers may be NULL -- indicating that that set is + * uninteresting and/or not to be changed. + */ + +asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) +{ + int error, pid; + __u32 version; + struct task_struct *target; + struct __user_cap_data_struct data; + + if (get_user(version, &header->version)) + return -EFAULT; + + error = -EINVAL; + if (version != _LINUX_CAPABILITY_VERSION) { + version = _LINUX_CAPABILITY_VERSION; + if (put_user(version, &header->version)) + error = -EFAULT; + return error; + } + + if (get_user(pid, &header->pid)) + return -EFAULT; + + if (pid < 0) + return -EINVAL; + + error = 0; + + spin_lock(&task_capability_lock); + + if (pid && pid != current->pid) { + read_lock(&tasklist_lock); + target = find_task_by_pid(pid); /* identify target of query */ + if (!target) + error = -ESRCH; + } else { + target = current; + } + + if (!error) { + data.permitted = cap_t(target->cap_permitted); + data.inheritable = cap_t(target->cap_inheritable); + data.effective = cap_t(target->cap_effective); + } + + if (target != current) + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); + + if (!error) { + if (copy_to_user(dataptr, &data, sizeof data)) + return -EFAULT; + } + + return error; +} + +/* set capabilities for all processes in a given process group */ + +static void cap_set_pg(int pgrp, + kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) +{ + struct task_struct *target; + + /* FIXME: do we need to have a write lock here..? */ + read_lock(&tasklist_lock); + for_each_task(target) { + if (target->pgrp != pgrp) + continue; + target->cap_effective = *effective; + target->cap_inheritable = *inheritable; + target->cap_permitted = *permitted; + } + read_unlock(&tasklist_lock); +} + +/* set capabilities for all processes other than 1 and self */ + +static void cap_set_all(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) +{ + struct task_struct *target; + + /* FIXME: do we need to have a write lock here..? */ + read_lock(&tasklist_lock); + /* ALL means everyone other than self or 'init' */ + for_each_task(target) { + if (target == current || target->pid == 1) + continue; + target->cap_effective = *effective; + target->cap_inheritable = *inheritable; + target->cap_permitted = *permitted; + } + read_unlock(&tasklist_lock); +} + +/* + * The restrictions on setting capabilities are specified as: + * + * [pid is for the 'target' task. 'current' is the calling task.] + * + * I: any raised capabilities must be a subset of the (old current) Permitted + * P: any raised capabilities must be a subset of the (old current) permitted + * E: must be set to a subset of (new target) Permitted + */ + +asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +{ + kernel_cap_t inheritable, permitted, effective; + __u32 version; + struct task_struct *target; + int error, pid; + + if (get_user(version, &header->version)) + return -EFAULT; + + if (version != _LINUX_CAPABILITY_VERSION) { + version = _LINUX_CAPABILITY_VERSION; + if (put_user(version, &header->version)) + return -EFAULT; + return -EINVAL; + } + + if (get_user(pid, &header->pid)) + return -EFAULT; + + if (pid && !capable(CAP_SETPCAP)) + return -EPERM; + + if (copy_from_user(&effective, &data->effective, sizeof(effective)) || + copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || + copy_from_user(&permitted, &data->permitted, sizeof(permitted))) + return -EFAULT; + + error = -EPERM; + spin_lock(&task_capability_lock); + + if (pid > 0 && pid != current->pid) { + read_lock(&tasklist_lock); + target = find_task_by_pid(pid); /* identify target of query */ + if (!target) { + error = -ESRCH; + goto out; + } + } else { + target = current; + } + + + /* verify restrictions on target's new Inheritable set */ + if (!cap_issubset(inheritable, + cap_combine(target->cap_inheritable, + current->cap_permitted))) { + goto out; + } + + /* verify restrictions on target's new Permitted set */ + if (!cap_issubset(permitted, + cap_combine(target->cap_permitted, + current->cap_permitted))) { + goto out; + } + + /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ + if (!cap_issubset(effective, permitted)) { + goto out; + } + + /* having verified that the proposed changes are legal, + we now put them into effect. */ + error = 0; + + if (pid < 0) { + if (pid == -1) /* all procs other than current and init */ + cap_set_all(&effective, &inheritable, &permitted); + + else /* all procs in process group */ + cap_set_pg(-pid, &effective, &inheritable, &permitted); + goto spin_out; + } else { + /* FIXME: do we need to have a write lock here..? */ + target->cap_effective = effective; + target->cap_inheritable = inheritable; + target->cap_permitted = permitted; + } + +out: + if (target != current) { + read_unlock(&tasklist_lock); + } +spin_out: + spin_unlock(&task_capability_lock); + return error; +} diff --git a/uClinux-2.4.31-uc0/kernel/context.c b/uClinux-2.4.31-uc0/kernel/context.c new file mode 100644 index 0000000..4fbd192 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/context.c @@ -0,0 +1,165 @@ +/* + * linux/kernel/context.c + * + * Mechanism for running arbitrary tasks in process context + * + * dwmw2@redhat.com: Genesis + * + * andrewm@uow.edu.au: 2.4.0-test12 + * - Child reaping + * - Support for tasks which re-add themselves + * - flush_scheduled_tasks. + */ + +#define __KERNEL_SYSCALLS__ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/unistd.h> +#include <linux/signal.h> +#include <linux/completion.h> + +static DECLARE_TASK_QUEUE(tq_context); +static DECLARE_WAIT_QUEUE_HEAD(context_task_wq); +static DECLARE_WAIT_QUEUE_HEAD(context_task_done); +static int keventd_running; +static struct task_struct *keventd_task; + +static int need_keventd(const char *who) +{ + if (keventd_running == 0) + printk(KERN_ERR "%s(): keventd has not started\n", who); + return keventd_running; +} + +int current_is_keventd(void) +{ + int ret = 0; + if (need_keventd(__FUNCTION__)) + ret = (current == keventd_task); + return ret; +} + +/** + * schedule_task - schedule a function for subsequent execution in process context. + * @task: pointer to a &tq_struct which defines the function to be scheduled. + * + * May be called from interrupt context. The scheduled function is run at some + * time in the near future by the keventd kernel thread. If it can sleep, it + * should be designed to do so for the minimum possible time, as it will be + * stalling all other scheduled tasks. + * + * schedule_task() returns non-zero if the task was successfully scheduled. + * If @task is already residing on a task queue then schedule_task() fails + * to schedule your task and returns zero. + */ +int schedule_task(struct tq_struct *task) +{ + int ret; + need_keventd(__FUNCTION__); + ret = queue_task(task, &tq_context); + wake_up(&context_task_wq); + return ret; +} + +static int context_thread(void *startup) +{ + struct task_struct *curtask = current; + DECLARE_WAITQUEUE(wait, curtask); + struct k_sigaction sa; + + daemonize(); + strcpy(curtask->comm, "keventd"); + keventd_running = 1; + keventd_task = curtask; + + spin_lock_irq(&curtask->sigmask_lock); + siginitsetinv(&curtask->blocked, sigmask(SIGCHLD)); + recalc_sigpending(curtask); + spin_unlock_irq(&curtask->sigmask_lock); + + complete((struct completion *)startup); + + /* Install a handler so SIGCLD is delivered */ + sa.sa.sa_handler = SIG_IGN; + sa.sa.sa_flags = 0; + siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); + do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); + + /* + * If one of the functions on a task queue re-adds itself + * to the task queue we call schedule() in state TASK_RUNNING + */ + for (;;) { + set_task_state(curtask, TASK_INTERRUPTIBLE); + add_wait_queue(&context_task_wq, &wait); + if (TQ_ACTIVE(tq_context)) + set_task_state(curtask, TASK_RUNNING); + schedule(); + remove_wait_queue(&context_task_wq, &wait); + run_task_queue(&tq_context); + wake_up(&context_task_done); + if (signal_pending(curtask)) { + while (waitpid(-1, (unsigned int *)0, __WALL|WNOHANG) > 0) + ; + spin_lock_irq(&curtask->sigmask_lock); + flush_signals(curtask); + recalc_sigpending(curtask); + spin_unlock_irq(&curtask->sigmask_lock); + } + } +} + +/** + * flush_scheduled_tasks - ensure that any scheduled tasks have run to completion. + * + * Forces execution of the schedule_task() queue and blocks until its completion. + * + * If a kernel subsystem uses schedule_task() and wishes to flush any pending + * tasks, it should use this function. This is typically used in driver shutdown + * handlers. + * + * The caller should hold no spinlocks and should hold no semaphores which could + * cause the scheduled tasks to block. + */ +static struct tq_struct dummy_task; + +void flush_scheduled_tasks(void) +{ + int count; + DECLARE_WAITQUEUE(wait, current); + + /* + * Do it twice. It's possible, albeit highly unlikely, that + * the caller queued a task immediately before calling us, + * and that the eventd thread was already past the run_task_queue() + * but not yet into wake_up(), so it woke us up before completing + * the caller's queued task or our new dummy task. + */ + add_wait_queue(&context_task_done, &wait); + for (count = 0; count < 2; count++) { + set_current_state(TASK_UNINTERRUPTIBLE); + + /* Queue a dummy task to make sure we get kicked */ + schedule_task(&dummy_task); + + /* Wait for it to complete */ + schedule(); + } + remove_wait_queue(&context_task_done, &wait); +} + +int start_context_thread(void) +{ + static struct completion startup __initdata = COMPLETION_INITIALIZER(startup); + + kernel_thread(context_thread, &startup, CLONE_FS | CLONE_FILES); + wait_for_completion(&startup); + return 0; +} + +EXPORT_SYMBOL(schedule_task); +EXPORT_SYMBOL(flush_scheduled_tasks); + diff --git a/uClinux-2.4.31-uc0/kernel/dma.c b/uClinux-2.4.31-uc0/kernel/dma.c new file mode 100644 index 0000000..5032fe5 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/dma.c @@ -0,0 +1,132 @@ +/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $ + * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c. + * + * Written by Hennus Bergman, 1992. + * + * 1994/12/26: Changes by Alex Nash to fix a minor bug in /proc/dma. + * In the previous version the reported device could end up being wrong, + * if a device requested a DMA channel that was already in use. + * [It also happened to remove the sizeof(char *) == sizeof(int) + * assumption introduced because of those /proc/dma patches. -- Hennus] + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <asm/dma.h> +#include <asm/system.h> + + + +/* A note on resource allocation: + * + * All drivers needing DMA channels, should allocate and release them + * through the public routines `request_dma()' and `free_dma()'. + * + * In order to avoid problems, all processes should allocate resources in + * the same sequence and release them in the reverse order. + * + * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA. + * When releasing them, first release the DMA, then release the IRQ. + * If you don't, you may cause allocation requests to fail unnecessarily. + * This doesn't really matter now, but it will once we get real semaphores + * in the kernel. + */ + + +spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED; + +/* + * If our port doesn't define this it has no PC like DMA + */ + +#ifdef MAX_DMA_CHANNELS + + +/* Channel n is busy iff dma_chan_busy[n].lock != 0. + * DMA0 used to be reserved for DRAM refresh, but apparently not any more... + * DMA4 is reserved for cascading. + */ + +struct dma_chan { + int lock; + const char *device_id; +}; + +static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = { + { 0, 0 }, +#if defined(CONFIG_M5307) || defined(CONFIG_M5407) + { 0, 0 }, + { 0, 0 }, +#endif +#ifndef CONFIG_UCLINUX + { 0, 0 }, + { 1, "cascade" }, + { 0, 0 }, + { 0, 0 }, +#endif + { 0, 0 } +}; + +int get_dma_list(char *buf) +{ + int i, len = 0; + + for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) { + if (dma_chan_busy[i].lock) { + len += sprintf(buf+len, "%2d: %s\n", + i, + dma_chan_busy[i].device_id); + } + } + return len; +} /* get_dma_list */ + + +int request_dma(unsigned int dmanr, const char * device_id) +{ + if (dmanr >= MAX_DMA_CHANNELS) + return -EINVAL; + + if (xchg(&dma_chan_busy[dmanr].lock, 1) != 0) + return -EBUSY; + + dma_chan_busy[dmanr].device_id = device_id; + + /* old flag was 0, now contains 1 to indicate busy */ + return 0; +} /* request_dma */ + + +void free_dma(unsigned int dmanr) +{ + if (dmanr >= MAX_DMA_CHANNELS) { + printk("Trying to free DMA%d\n", dmanr); + return; + } + + if (xchg(&dma_chan_busy[dmanr].lock, 0) == 0) { + printk("Trying to free free DMA%d\n", dmanr); + return; + } + +} /* free_dma */ + +#else + +int request_dma(unsigned int dmanr, const char *device_id) +{ + return -EINVAL; +} + +void free_dma(unsigned int dmanr) +{ +} + +int get_dma_list(char *buf) +{ + strcpy(buf, "No DMA\n"); + return 7; +} +#endif diff --git a/uClinux-2.4.31-uc0/kernel/exec_domain.c b/uClinux-2.4.31-uc0/kernel/exec_domain.c new file mode 100644 index 0000000..c1b7cc9 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/exec_domain.c @@ -0,0 +1,290 @@ +/* + * Handling of different ABIs (personalities). + * + * We group personalities into execution domains which have their + * own handlers for kernel entry points, signal mapping, etc... + * + * 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org) + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/sched.h> +#include <linux/sysctl.h> +#include <linux/types.h> + + +static void default_handler(int, struct pt_regs *); + +static struct exec_domain *exec_domains = &default_exec_domain; +static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED; + + +static u_long ident_map[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +struct exec_domain default_exec_domain = { + "Linux", /* name */ + default_handler, /* lcall7 causes a seg fault. */ + 0, 0, /* PER_LINUX personality. */ + ident_map, /* Identity map signals. */ + ident_map, /* - both ways. */ +}; + + +static void +default_handler(int segment, struct pt_regs *regp) +{ + u_long pers = 0; + + /* + * This may have been a static linked SVr4 binary, so we would + * have the personality set incorrectly. Or it might have been + * a Solaris/x86 binary. We can tell which because the former + * uses lcall7, while the latter used lcall 0x27. + * Try to find or load the appropriate personality, and fall back + * to just forcing a SEGV. + * + * XXX: this is IA32-specific and should be moved to the MD-tree. + */ + switch (segment) { +#ifdef __i386__ + case 0x07: + pers = abi_defhandler_lcall7; + break; + case 0x27: + pers = PER_SOLARIS; + break; +#endif + } + set_personality(pers); + + if (current->exec_domain->handler != default_handler) + current->exec_domain->handler(segment, regp); + else + send_sig(SIGSEGV, current, 1); +} + +static struct exec_domain * +lookup_exec_domain(u_long personality) +{ + struct exec_domain * ep; + u_long pers = personality(personality); + + read_lock(&exec_domains_lock); + for (ep = exec_domains; ep; ep = ep->next) { + if (pers >= ep->pers_low && pers <= ep->pers_high) + if (try_inc_mod_count(ep->module)) + goto out; + } + +#ifdef CONFIG_KMOD + read_unlock(&exec_domains_lock); + { + char buffer[30]; + sprintf(buffer, "personality-%ld", pers); + request_module(buffer); + } + read_lock(&exec_domains_lock); + + for (ep = exec_domains; ep; ep = ep->next) { + if (pers >= ep->pers_low && pers <= ep->pers_high) + if (try_inc_mod_count(ep->module)) + goto out; + } +#endif + + ep = &default_exec_domain; +out: + read_unlock(&exec_domains_lock); + return (ep); +} + +int +register_exec_domain(struct exec_domain *ep) +{ + struct exec_domain *tmp; + int err = -EBUSY; + + if (ep == NULL) + return -EINVAL; + + if (ep->next != NULL) + return -EBUSY; + + write_lock(&exec_domains_lock); + for (tmp = exec_domains; tmp; tmp = tmp->next) { + if (tmp == ep) + goto out; + } + + ep->next = exec_domains; + exec_domains = ep; + err = 0; + +out: + write_unlock(&exec_domains_lock); + return (err); +} + +int +unregister_exec_domain(struct exec_domain *ep) +{ + struct exec_domain **epp; + + epp = &exec_domains; + write_lock(&exec_domains_lock); + for (epp = &exec_domains; *epp; epp = &(*epp)->next) { + if (ep == *epp) + goto unregister; + } + write_unlock(&exec_domains_lock); + return -EINVAL; + +unregister: + *epp = ep->next; + ep->next = NULL; + write_unlock(&exec_domains_lock); + return 0; +} + +int +__set_personality(u_long personality) +{ + struct exec_domain *ep, *oep; + + ep = lookup_exec_domain(personality); + if (ep == current->exec_domain) { + current->personality = personality; + return 0; + } + + if (atomic_read(¤t->fs->count) != 1) { + struct fs_struct *fsp, *ofsp; + + fsp = copy_fs_struct(current->fs); + if (fsp == NULL) { + put_exec_domain(ep); + return -ENOMEM;; + } + + task_lock(current); + ofsp = current->fs; + current->fs = fsp; + task_unlock(current); + + put_fs_struct(ofsp); + } + + /* + * At that point we are guaranteed to be the sole owner of + * current->fs. + */ + + current->personality = personality; + oep = current->exec_domain; + current->exec_domain = ep; + set_fs_altroot(); + + put_exec_domain(oep); + + return 0; +} + +int +get_exec_domain_list(char *page) +{ + struct exec_domain *ep; + int len = 0; + + read_lock(&exec_domains_lock); + for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next) + len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n", + ep->pers_low, ep->pers_high, ep->name, + ep->module ? ep->module->name : "kernel"); + read_unlock(&exec_domains_lock); + return (len); +} + +asmlinkage long +sys_personality(u_long personality) +{ + u_long old = current->personality;; + + if (personality != 0xffffffff) { + set_personality(personality); + if (current->personality != personality) + return -EINVAL; + } + + return (long)old; +} + + +EXPORT_SYMBOL(register_exec_domain); +EXPORT_SYMBOL(unregister_exec_domain); +EXPORT_SYMBOL(__set_personality); + +/* + * We have to have all sysctl handling for the Linux-ABI + * in one place as the dynamic registration of sysctls is + * horribly crufty in Linux <= 2.4. + * + * I hope the new sysctl schemes discussed for future versions + * will obsolete this. + * + * --hch + */ + +u_long abi_defhandler_coff = PER_SCOSVR3; +u_long abi_defhandler_elf = PER_LINUX; +u_long abi_defhandler_lcall7 = PER_SVR4; +u_long abi_defhandler_libcso = PER_SVR4; +u_int abi_traceflg; +int abi_fake_utsname; + +static struct ctl_table abi_table[] = { + {ABI_DEFHANDLER_COFF, "defhandler_coff", &abi_defhandler_coff, + sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, + {ABI_DEFHANDLER_ELF, "defhandler_elf", &abi_defhandler_elf, + sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, + {ABI_DEFHANDLER_LCALL7, "defhandler_lcall7", &abi_defhandler_lcall7, + sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, + {ABI_DEFHANDLER_LIBCSO, "defhandler_libcso", &abi_defhandler_libcso, + sizeof(int), 0644, NULL, &proc_doulongvec_minmax}, + {ABI_TRACE, "trace", &abi_traceflg, + sizeof(u_int), 0644, NULL, &proc_dointvec}, + {ABI_FAKE_UTSNAME, "fake_utsname", &abi_fake_utsname, + sizeof(int), 0644, NULL, &proc_dointvec}, + {0} +}; + +static struct ctl_table abi_root_table[] = { + {CTL_ABI, "abi", NULL, 0, 0555, abi_table}, + {0} +}; + +static int __init +abi_register_sysctl(void) +{ + register_sysctl_table(abi_root_table, 1); + return 0; +} + +__initcall(abi_register_sysctl); + + +EXPORT_SYMBOL(abi_defhandler_coff); +EXPORT_SYMBOL(abi_defhandler_elf); +EXPORT_SYMBOL(abi_defhandler_lcall7); +EXPORT_SYMBOL(abi_defhandler_libcso); +EXPORT_SYMBOL(abi_traceflg); +EXPORT_SYMBOL(abi_fake_utsname); diff --git a/uClinux-2.4.31-uc0/kernel/exit.c b/uClinux-2.4.31-uc0/kernel/exit.c new file mode 100644 index 0000000..2747da7 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/exit.c @@ -0,0 +1,604 @@ +/* + * linux/kernel/exit.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/personality.h> +#include <linux/tty.h> +#include <linux/namespace.h> +#ifdef CONFIG_BSD_PROCESS_ACCT +#include <linux/acct.h> +#endif + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> + +extern void sem_exit (void); +extern struct task_struct *child_reaper; + +int getrusage(struct task_struct *, int, struct rusage *); + +static void release_task(struct task_struct * p) +{ + if (p != current) { +#ifdef CONFIG_SMP + /* + * Wait to make sure the process isn't on the + * runqueue (active on some other CPU still) + */ + for (;;) { + task_lock(p); + if (!task_has_cpu(p)) + break; + task_unlock(p); + do { + cpu_relax(); + barrier(); + } while (task_has_cpu(p)); + } + task_unlock(p); +#endif + atomic_dec(&p->user->processes); + free_uid(p->user); + unhash_process(p); + + release_thread(p); + current->cmin_flt += p->min_flt + p->cmin_flt; + current->cmaj_flt += p->maj_flt + p->cmaj_flt; + current->cnswap += p->nswap + p->cnswap; + /* + * Potentially available timeslices are retrieved + * here - this way the parent does not get penalized + * for creating too many processes. + * + * (this cannot be used to artificially 'generate' + * timeslices, because any timeslice recovered here + * was given away by the parent in the first place.) + */ + current->counter += p->counter; + if (current->counter >= MAX_COUNTER) + current->counter = MAX_COUNTER; + p->pid = 0; + free_task_struct(p); + } else { + printk("task releasing itself\n"); + } +} + +/* + * This checks not only the pgrp, but falls back on the pid if no + * satisfactory pgrp is found. I dunno - gdb doesn't work correctly + * without this... + */ +int session_of_pgrp(int pgrp) +{ + struct task_struct *p; + int fallback; + + fallback = -1; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->session <= 0) + continue; + if (p->pgrp == pgrp) { + fallback = p->session; + break; + } + if (p->pid == pgrp) + fallback = p->session; + } + read_unlock(&tasklist_lock); + return fallback; +} + +/* + * Determine if a process group is "orphaned", according to the POSIX + * definition in 2.2.2.52. Orphaned process groups are not to be affected + * by terminal-generated stop signals. Newly orphaned process groups are + * to receive a SIGHUP and a SIGCONT. + * + * "I ask you, have you ever known what it is to be an orphan?" + */ +static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task) +{ + struct task_struct *p; + + read_lock(&tasklist_lock); + for_each_task(p) { + if ((p == ignored_task) || (p->pgrp != pgrp) || + (p->state == TASK_ZOMBIE) || + (p->p_pptr->pid == 1)) + continue; + if ((p->p_pptr->pgrp != pgrp) && + (p->p_pptr->session == p->session)) { + read_unlock(&tasklist_lock); + return 0; + } + } + read_unlock(&tasklist_lock); + return 1; /* (sighing) "Often!" */ +} + +int is_orphaned_pgrp(int pgrp) +{ + return will_become_orphaned_pgrp(pgrp, 0); +} + +static inline int has_stopped_jobs(int pgrp) +{ + int retval = 0; + struct task_struct * p; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pgrp != pgrp) + continue; + if (p->state != TASK_STOPPED) + continue; + retval = 1; + break; + } + read_unlock(&tasklist_lock); + return retval; +} + +/* + * When we die, we re-parent all our children. + * Try to give them to another thread in our thread + * group, and if no such member exists, give it to + * the global child reaper process (ie "init") + */ +static inline void forget_original_parent(struct task_struct * father) +{ + struct task_struct * p; + + read_lock(&tasklist_lock); + + for_each_task(p) { + if (p->p_opptr == father) { + /* We dont want people slaying init */ + p->exit_signal = SIGCHLD; + p->self_exec_id++; + + /* Make sure we're not reparenting to ourselves */ + p->p_opptr = child_reaper; + + if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); + } + } + read_unlock(&tasklist_lock); +} + +static inline void close_files(struct files_struct * files) +{ + int i, j; + + j = 0; + for (;;) { + unsigned long set; + i = j * __NFDBITS; + if (i >= files->max_fdset || i >= files->max_fds) + break; + set = files->open_fds->fds_bits[j++]; + while (set) { + if (set & 1) { + struct file * file = xchg(&files->fd[i], NULL); + if (file) + filp_close(file, files); + } + i++; + set >>= 1; + } + } +} + +void put_files_struct(struct files_struct *files) +{ + if (atomic_dec_and_test(&files->count)) { + close_files(files); + /* + * Free the fd and fdset arrays if we expanded them. + */ + if (files->fd != &files->fd_array[0]) + free_fd_array(files->fd, files->max_fds); + if (files->max_fdset > __FD_SETSIZE) { + free_fdset(files->open_fds, files->max_fdset); + free_fdset(files->close_on_exec, files->max_fdset); + } + kmem_cache_free(files_cachep, files); + } +} + +static inline void __exit_files(struct task_struct *tsk) +{ + struct files_struct * files = tsk->files; + + if (files) { + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); + put_files_struct(files); + } +} + +void exit_files(struct task_struct *tsk) +{ + __exit_files(tsk); +} + +static inline void __put_fs_struct(struct fs_struct *fs) +{ + /* No need to hold fs->lock if we are killing it */ + if (atomic_dec_and_test(&fs->count)) { + dput(fs->root); + mntput(fs->rootmnt); + dput(fs->pwd); + mntput(fs->pwdmnt); + if (fs->altroot) { + dput(fs->altroot); + mntput(fs->altrootmnt); + } + kmem_cache_free(fs_cachep, fs); + } +} + +void put_fs_struct(struct fs_struct *fs) +{ + __put_fs_struct(fs); +} + +static inline void __exit_fs(struct task_struct *tsk) +{ + struct fs_struct * fs = tsk->fs; + + if (fs) { + task_lock(tsk); + tsk->fs = NULL; + task_unlock(tsk); + __put_fs_struct(fs); + } +} + +void exit_fs(struct task_struct *tsk) +{ + __exit_fs(tsk); +} + +/* + * We can use these to temporarily drop into + * "lazy TLB" mode and back. + */ +struct mm_struct * start_lazy_tlb(void) +{ + struct mm_struct *mm = current->mm; + current->mm = NULL; + /* active_mm is still 'mm' */ + atomic_inc(&mm->mm_count); + enter_lazy_tlb(mm, current, smp_processor_id()); + return mm; +} + +void end_lazy_tlb(struct mm_struct *mm) +{ + struct mm_struct *active_mm = current->active_mm; + + current->mm = mm; + if (mm != active_mm) { + current->active_mm = mm; + activate_mm(active_mm, mm); + } + mmdrop(active_mm); +} + +/* + * Turn us into a lazy TLB process if we + * aren't already.. + */ +static inline void __exit_mm(struct task_struct * tsk) +{ + struct mm_struct * mm = tsk->mm; + + mm_release(); + if (mm) { + atomic_inc(&mm->mm_count); + BUG_ON(mm != tsk->active_mm); + /* more a memory barrier than a real lock */ + task_lock(tsk); + tsk->mm = NULL; + task_unlock(tsk); + enter_lazy_tlb(mm, current, smp_processor_id()); + mmput(mm); + } +} + +void exit_mm(struct task_struct *tsk) +{ + __exit_mm(tsk); +} + +/* + * Send signals to all our closest relatives so that they know + * to properly mourn us.. + */ +static void exit_notify(void) +{ + struct task_struct * p, *t; + + forget_original_parent(current); + /* + * Check to see if any process groups have become orphaned + * as a result of our exiting, and if they have any stopped + * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + * + * Case i: Our father is in a different pgrp than we are + * and we were the only connection outside, so our pgrp + * is about to become orphaned. + */ + + t = current->p_pptr; + + if ((t->pgrp != current->pgrp) && + (t->session == current->session) && + will_become_orphaned_pgrp(current->pgrp, current) && + has_stopped_jobs(current->pgrp)) { + kill_pg(current->pgrp,SIGHUP,1); + kill_pg(current->pgrp,SIGCONT,1); + } + + /* Let father know we died + * + * Thread signals are configurable, but you aren't going to use + * that to send signals to arbitary processes. + * That stops right now. + * + * If the parent exec id doesn't match the exec id we saved + * when we started then we know the parent has changed security + * domain. + * + * If our self_exec id doesn't match our parent_exec_id then + * we have changed execution domain as these two values started + * the same after a fork. + * + */ + + if(current->exit_signal != SIGCHLD && + ( current->parent_exec_id != t->self_exec_id || + current->self_exec_id != current->parent_exec_id) + && !capable(CAP_KILL)) + current->exit_signal = SIGCHLD; + + + /* + * This loop does two things: + * + * A. Make init inherit all the child processes + * B. Check to see if any process groups have become orphaned + * as a result of our exiting, and if they have any stopped + * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + */ + + write_lock_irq(&tasklist_lock); + current->state = TASK_ZOMBIE; + do_notify_parent(current, current->exit_signal); + while (current->p_cptr != NULL) { + p = current->p_cptr; + current->p_cptr = p->p_osptr; + p->p_ysptr = NULL; + p->ptrace = 0; + + p->p_pptr = p->p_opptr; + p->p_osptr = p->p_pptr->p_cptr; + if (p->p_osptr) + p->p_osptr->p_ysptr = p; + p->p_pptr->p_cptr = p; + if (p->state == TASK_ZOMBIE) + do_notify_parent(p, p->exit_signal); + /* + * process group orphan check + * Case ii: Our child is in a different pgrp + * than we are, and it was the only connection + * outside, so the child pgrp is now orphaned. + */ + if ((p->pgrp != current->pgrp) && + (p->session == current->session)) { + int pgrp = p->pgrp; + + write_unlock_irq(&tasklist_lock); + if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) { + kill_pg(pgrp,SIGHUP,1); + kill_pg(pgrp,SIGCONT,1); + } + write_lock_irq(&tasklist_lock); + } + } + write_unlock_irq(&tasklist_lock); +} + +NORET_TYPE void do_exit(long code) +{ + struct task_struct *tsk = current; + + if (in_interrupt()) + panic("Aiee, killing interrupt handler\n"); + if (!tsk->pid) + panic("Attempted to kill the idle task!"); + if (tsk->pid == 1) + panic("Attempted to kill init!"); + tsk->flags |= PF_EXITING; + del_timer_sync(&tsk->real_timer); + +fake_volatile: +#ifdef CONFIG_BSD_PROCESS_ACCT + acct_process(code); +#endif + __exit_mm(tsk); + + lock_kernel(); + sem_exit(); + __exit_files(tsk); + __exit_fs(tsk); + exit_namespace(tsk); + exit_sighand(tsk); + exit_thread(); + + if (current->leader) + disassociate_ctty(1); + + put_exec_domain(tsk->exec_domain); + if (tsk->binfmt && tsk->binfmt->module) + __MOD_DEC_USE_COUNT(tsk->binfmt->module); + + tsk->exit_code = code; + exit_notify(); +#ifdef CONFIG_SYSCALLTIMER + current->curr_syscall = 0; +#endif + schedule(); + BUG(); +/* + * In order to get rid of the "volatile function does return" message + * I did this little loop that confuses gcc to think do_exit really + * is volatile. In fact it's schedule() that is volatile in some + * circumstances: when current->state = ZOMBIE, schedule() never + * returns. + * + * In fact the natural way to do all this is to have the label and the + * goto right after each other, but I put the fake_volatile label at + * the start of the function just in case something /really/ bad + * happens, and the schedule returns. This way we can try again. I'm + * not paranoid: it's just that everybody is out to get me. + */ + goto fake_volatile; +} + +NORET_TYPE void complete_and_exit(struct completion *comp, long code) +{ + if (comp) + complete(comp); + + do_exit(code); +} + +asmlinkage long sys_exit(int error_code) +{ + do_exit((error_code&0xff)<<8); +} + +asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) +{ + int flag, retval; + DECLARE_WAITQUEUE(wait, current); + struct task_struct *tsk; + + if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) + return -EINVAL; + + add_wait_queue(¤t->wait_chldexit,&wait); +repeat: + flag = 0; + current->state = TASK_INTERRUPTIBLE; + read_lock(&tasklist_lock); + tsk = current; + do { + struct task_struct *p; + for (p = tsk->p_cptr ; p ; p = p->p_osptr) { + if (pid>0) { + if (p->pid != pid) + continue; + } else if (!pid) { + if (p->pgrp != current->pgrp) + continue; + } else if (pid != -1) { + if (p->pgrp != -pid) + continue; + } + /* Wait for all children (clone and not) if __WALL is set; + * otherwise, wait for clone children *only* if __WCLONE is + * set; otherwise, wait for non-clone children *only*. (Note: + * A "clone" child here is one that reports to its parent + * using a signal other than SIGCHLD.) */ + if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) + && !(options & __WALL)) + continue; + flag = 1; + switch (p->state) { + case TASK_STOPPED: + if (!p->exit_code) + continue; + if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED)) + continue; + read_unlock(&tasklist_lock); + retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; + if (!retval && stat_addr) + retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); + if (!retval) { + p->exit_code = 0; + retval = p->pid; + } + goto end_wait4; + case TASK_ZOMBIE: + current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; + current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; + read_unlock(&tasklist_lock); + retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; + if (!retval && stat_addr) + retval = put_user(p->exit_code, stat_addr); + if (retval) + goto end_wait4; + retval = p->pid; + if (p->p_opptr != p->p_pptr) { + write_lock_irq(&tasklist_lock); + REMOVE_LINKS(p); + p->p_pptr = p->p_opptr; + SET_LINKS(p); + do_notify_parent(p, SIGCHLD); + write_unlock_irq(&tasklist_lock); + } else + release_task(p); + goto end_wait4; + default: + continue; + } + } + if (options & __WNOTHREAD) + break; + tsk = next_thread(tsk); + } while (tsk != current); + read_unlock(&tasklist_lock); + if (flag) { + retval = 0; + if (options & WNOHANG) + goto end_wait4; + retval = -ERESTARTSYS; + if (signal_pending(current)) + goto end_wait4; + schedule(); + goto repeat; + } + retval = -ECHILD; +end_wait4: + current->state = TASK_RUNNING; + remove_wait_queue(¤t->wait_chldexit,&wait); + return retval; +} + +#if !defined(__alpha__) && !defined(__ia64__) && !defined(__arm__) + +/* + * sys_waitpid() remains for compatibility. waitpid() should be + * implemented by calling sys_wait4() from libc.a. + */ +asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options) +{ + return sys_wait4(pid, stat_addr, options, NULL); +} + +#endif diff --git a/uClinux-2.4.31-uc0/kernel/fork.c b/uClinux-2.4.31-uc0/kernel/fork.c new file mode 100644 index 0000000..b13b2f3 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/fork.c @@ -0,0 +1,1043 @@ +/* + * linux/kernel/fork.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'fork.c' contains the help-routines for the 'fork' system call + * (see also entry.S and others). + * Fork is rather simple, once you get the hang of it, but the memory + * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/unistd.h> +#include <linux/smp_lock.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/completion.h> +#include <linux/namespace.h> +#include <linux/personality.h> +#include <linux/compiler.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/processor.h> + +/* The idle threads do not count.. */ +int nr_threads; +int nr_running; + +int max_threads; +unsigned long total_forks; /* Handle normal Linux uptimes. */ +int last_pid; + +struct task_struct *pidhash[PIDHASH_SZ]; + +void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) +{ + unsigned long flags; + + wait->flags &= ~WQ_FLAG_EXCLUSIVE; + wq_write_lock_irqsave(&q->lock, flags); + __add_wait_queue(q, wait); + wq_write_unlock_irqrestore(&q->lock, flags); +} + +void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait) +{ + unsigned long flags; + + wait->flags |= WQ_FLAG_EXCLUSIVE; + wq_write_lock_irqsave(&q->lock, flags); + __add_wait_queue_tail(q, wait); + wq_write_unlock_irqrestore(&q->lock, flags); +} + +void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) +{ + unsigned long flags; + + wq_write_lock_irqsave(&q->lock, flags); + __remove_wait_queue(q, wait); + wq_write_unlock_irqrestore(&q->lock, flags); +} + +void __init fork_init(unsigned long mempages) +{ + /* + * The default maximum number of threads is set to a safe + * value: the thread structures can take up at most half + * of memory. + */ + max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); + + /* + * we need to allow at least 10 threads to boot a system + */ + init_task.rlim[RLIMIT_NPROC].rlim_cur = max(10, max_threads/2); + init_task.rlim[RLIMIT_NPROC].rlim_max = max(10, max_threads/2); +} + +/* Protects next_safe and last_pid. */ +spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED; + +static int get_pid(unsigned long flags) +{ + static int next_safe = PID_MAX; + struct task_struct *p; + int pid, beginpid; + + if (flags & CLONE_PID) + return current->pid; + + spin_lock(&lastpid_lock); + beginpid = last_pid; + if((++last_pid) & 0xffff8000) { + last_pid = 300; /* Skip daemons etc. */ + goto inside; + } + if(last_pid >= next_safe) { +inside: + next_safe = PID_MAX; + read_lock(&tasklist_lock); + repeat: + for_each_task(p) { + if(p->pid == last_pid || + p->pgrp == last_pid || + p->tgid == last_pid || + p->session == last_pid) { + if(++last_pid >= next_safe) { + if(last_pid & 0xffff8000) + last_pid = 300; + next_safe = PID_MAX; + } + if(unlikely(last_pid == beginpid)) { + next_safe = 0; + goto nomorepids; + } + goto repeat; + } + if(p->pid > last_pid && next_safe > p->pid) + next_safe = p->pid; + if(p->pgrp > last_pid && next_safe > p->pgrp) + next_safe = p->pgrp; + if(p->tgid > last_pid && next_safe > p->tgid) + next_safe = p->tgid; + if(p->session > last_pid && next_safe > p->session) + next_safe = p->session; + } + read_unlock(&tasklist_lock); + } + pid = last_pid; + spin_unlock(&lastpid_lock); + + return pid; + +nomorepids: + read_unlock(&tasklist_lock); + spin_unlock(&lastpid_lock); + return 0; +} + +#ifndef CONFIG_UCLINUX + +static inline int dup_mmap(struct mm_struct * mm) +{ + struct vm_area_struct * mpnt, *tmp, **pprev; + int retval; + + flush_cache_mm(current->mm); + mm->locked_vm = 0; + mm->mmap = NULL; + mm->mmap_cache = NULL; + mm->map_count = 0; + mm->rss = 0; + mm->cpu_vm_mask = 0; + mm->swap_address = 0; + pprev = &mm->mmap; + + /* + * Add it to the mmlist after the parent. + * Doing it this way means that we can order the list, + * and fork() won't mess up the ordering significantly. + * Add it first so that swapoff can see any swap entries. + */ + spin_lock(&mmlist_lock); + list_add(&mm->mmlist, ¤t->mm->mmlist); + mmlist_nr++; + spin_unlock(&mmlist_lock); + + for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { + struct file *file; + + retval = -ENOMEM; + if(mpnt->vm_flags & VM_DONTCOPY) + continue; + tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!tmp) + goto fail_nomem; + *tmp = *mpnt; + tmp->vm_flags &= ~VM_LOCKED; + tmp->vm_mm = mm; + tmp->vm_next = NULL; + tmp->vm_sharing_data = NULL; + file = tmp->vm_file; + if (file) { + struct inode *inode = file->f_dentry->d_inode; + get_file(file); + if (tmp->vm_flags & VM_DENYWRITE) + atomic_dec(&inode->i_writecount); + + /* insert tmp into the share list, just after mpnt */ + spin_lock(&inode->i_mapping->i_shared_lock); + if((tmp->vm_next_share = mpnt->vm_next_share) != NULL) + mpnt->vm_next_share->vm_pprev_share = + &tmp->vm_next_share; + mpnt->vm_next_share = tmp; + tmp->vm_pprev_share = &mpnt->vm_next_share; + spin_unlock(&inode->i_mapping->i_shared_lock); + } + + /* + * Link in the new vma and copy the page table entries: + * link in first so that swapoff can see swap entries. + */ + spin_lock(&mm->page_table_lock); + *pprev = tmp; + pprev = &tmp->vm_next; + mm->map_count++; + retval = copy_page_range(mm, current->mm, tmp); + spin_unlock(&mm->page_table_lock); + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + + if (retval) + goto fail_nomem; + } + retval = 0; + build_mmap_rb(mm); + +#ifdef CONFIG_ARM_FASS + arch_new_mm(current, mm); +#endif + +fail_nomem: + flush_tlb_mm(current->mm); + return retval; +} + +spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; +int mmlist_nr; + +#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) +#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) + +static struct mm_struct * mm_init(struct mm_struct * mm) +{ + atomic_set(&mm->mm_users, 1); + atomic_set(&mm->mm_count, 1); + init_rwsem(&mm->mmap_sem); + mm->page_table_lock = SPIN_LOCK_UNLOCKED; + mm->pgd = pgd_alloc(mm); + mm->def_flags = 0; + if (mm->pgd) + return mm; + free_mm(mm); + return NULL; +} + + +/* + * Allocate and initialize an mm_struct. + */ +struct mm_struct * mm_alloc(void) +{ + struct mm_struct * mm; + + mm = allocate_mm(); + if (mm) { + memset(mm, 0, sizeof(*mm)); + return mm_init(mm); + } + return NULL; +} + +/* + * Called when the last reference to the mm + * is dropped: either by a lazy thread or by + * mmput. Free the page directory and the mm. + */ +void fastcall __mmdrop(struct mm_struct *mm) +{ + BUG_ON(mm == &init_mm); + pgd_free(mm->pgd); + check_pgt_cache(); + destroy_context(mm); + free_mm(mm); +} + +/* + * Decrement the use count and release all resources for an mm. + */ +void mmput(struct mm_struct *mm) +{ + if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) { + extern struct mm_struct *swap_mm; + if (swap_mm == mm) + swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist); + list_del(&mm->mmlist); + mmlist_nr--; + spin_unlock(&mmlist_lock); + exit_mmap(mm); + mmdrop(mm); + } +} + +/* Please note the differences between mmput and mm_release. + * mmput is called whenever we stop holding onto a mm_struct, + * error success whatever. + * + * mm_release is called after a mm_struct has been removed + * from the current process. + * + * This difference is important for error handling, when we + * only half set up a mm_struct for a new process and need to restore + * the old one. Because we mmput the new mm_struct before + * restoring the old one. . . + * Eric Biederman 10 January 1998 + */ +void mm_release(void) +{ + struct task_struct *tsk = current; + struct completion *vfork_done = tsk->vfork_done; + + /* notify parent sleeping on vfork() */ + if (vfork_done) { + tsk->vfork_done = NULL; + complete(vfork_done); + } +} + +static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) +{ + struct mm_struct * mm, *oldmm; + int retval; + + tsk->min_flt = tsk->maj_flt = 0; + tsk->cmin_flt = tsk->cmaj_flt = 0; + tsk->nswap = tsk->cnswap = 0; + + tsk->mm = NULL; + tsk->active_mm = NULL; + + /* + * Are we cloning a kernel thread? + * + * We need to steal a active VM for that.. + */ + oldmm = current->mm; + if (!oldmm) + return 0; + + if (clone_flags & CLONE_VM) { + atomic_inc(&oldmm->mm_users); + mm = oldmm; + goto good_mm; + } + + retval = -ENOMEM; + mm = allocate_mm(); + if (!mm) + goto fail_nomem; + + /* Copy the current MM stuff.. */ + memcpy(mm, oldmm, sizeof(*mm)); + if (!mm_init(mm)) + goto fail_nomem; + + if (init_new_context(tsk,mm)) + goto free_pt; + + down_write(&oldmm->mmap_sem); + retval = dup_mmap(mm); + up_write(&oldmm->mmap_sem); + + if (retval) + goto free_pt; + + /* + * child gets a private LDT (if there was an LDT in the parent) + */ + copy_segments(tsk, mm); + +good_mm: + tsk->mm = mm; + tsk->active_mm = mm; + return 0; + +free_pt: + mmput(mm); +fail_nomem: + return retval; +} + +#else /* !CONFIG_UCLINUX */ + +/* + * Allocate and initialize an mm_struct. + */ +struct mm_struct * mm_alloc(void) +{ + struct mm_struct * mm; + + mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL); + if (mm) { + memset(mm, 0, sizeof(*mm)); + atomic_set(&mm->mm_users, 1); + atomic_set(&mm->mm_count, 1); + init_rwsem(&mm->mmap_sem); + mm->page_table_lock = SPIN_LOCK_UNLOCKED; + return mm; + } + return NULL; +} + +/* + * Called when the last reference to the mm + * is dropped: either by a lazy thread or by + * mmput. Free the mm. + */ +void fastcall __mmdrop(struct mm_struct *mm) +{ + if (mm == &init_mm) BUG(); + kmem_cache_free(mm_cachep, mm); +} + +/* + * Decrement the use count and release all resources for an mm. + */ +void mmput(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_users)) { + exit_mmap(mm); + mmdrop(mm); + } +} + +/* Please note the differences between mmput and mm_release. + * mmput is called whenever we stop holding onto a mm_struct, + * error success whatever. + * + * mm_release is called after a mm_struct has been removed + * from the current process. + * + * This difference is important for error handling, when we + * only half set up a mm_struct for a new process and need to restore + * the old one. Because we mmput the new mm_struct before + * restoring the old one. . . + * Eric Biederman 10 January 1998 + */ +void mm_release(void) +{ + struct task_struct *tsk = current; + struct completion *vfork_done = tsk->vfork_done; + + /* notify parent sleeping on vfork() */ + if (vfork_done) { + tsk->vfork_done = NULL; + complete(vfork_done); + } +} + +static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk) +{ + struct mm_struct * mm; + int retval; + + tsk->min_flt = tsk->maj_flt = 0; + tsk->cmin_flt = tsk->cmaj_flt = 0; + tsk->nswap = tsk->cnswap = 0; + + tsk->mm = NULL; + tsk->active_mm = NULL; + + /* + * Are we cloning a kernel thread? + * + * We need to steal a active VM for that.. + */ + mm = current->mm; + if (!mm) + return 0; + + if (clone_flags & CLONE_VM) { + atomic_inc(&mm->mm_users); + goto good_mm; + } + + retval = -ENOMEM; + mm = mm_alloc(); + if (!mm) + goto fail_nomem; + + tsk->mm = mm; + tsk->active_mm = mm; + +#if DAVIDM /* is this needed, I took it out as it didn't appear to be */ + if (tsk->mm->executable) + atomic_inc(&tsk->mm->executable->i_count); +#endif + + /* + * child gets a private LDT (if there was an LDT in the parent) + */ + copy_segments(tsk, mm); + +good_mm: + tsk->mm = mm; + tsk->active_mm = mm; + return 0; + +free_pt: + mmput(mm); +fail_nomem: + return retval; +} + +#endif /* !CONFIG_UCLINUX */ + +static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) +{ + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { + atomic_set(&fs->count, 1); + fs->lock = RW_LOCK_UNLOCKED; + fs->umask = old->umask; + read_lock(&old->lock); + fs->rootmnt = mntget(old->rootmnt); + fs->root = dget(old->root); + fs->pwdmnt = mntget(old->pwdmnt); + fs->pwd = dget(old->pwd); + if (old->altroot) { + fs->altrootmnt = mntget(old->altrootmnt); + fs->altroot = dget(old->altroot); + } else { + fs->altrootmnt = NULL; + fs->altroot = NULL; + } + read_unlock(&old->lock); + } + return fs; +} + +struct fs_struct *copy_fs_struct(struct fs_struct *old) +{ + return __copy_fs_struct(old); +} + +static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) +{ + if (clone_flags & CLONE_FS) { + atomic_inc(¤t->fs->count); + return 0; + } + tsk->fs = __copy_fs_struct(current->fs); + if (!tsk->fs) + return -1; + return 0; +} + +static int count_open_files(struct files_struct *files, int size) +{ + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (files->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +static int copy_files(unsigned long clone_flags, struct task_struct * tsk) +{ + struct files_struct *oldf, *newf; + struct file **old_fds, **new_fds; + int open_files, nfds, size, i, error = 0; + + /* + * A background process may not have any files ... + */ + oldf = current->files; + if (!oldf) + goto out; + + if (clone_flags & CLONE_FILES) { + atomic_inc(&oldf->count); + goto out; + } + + /* + * Note: we may be using current for both targets (See exec.c) + * This works because we cache current->files (old) as oldf. Don't + * break this. + */ + tsk->files = NULL; + error = -ENOMEM; + newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + newf->file_lock = RW_LOCK_UNLOCKED; + newf->next_fd = 0; + newf->max_fds = NR_OPEN_DEFAULT; + newf->max_fdset = __FD_SETSIZE; + newf->close_on_exec = &newf->close_on_exec_init; + newf->open_fds = &newf->open_fds_init; + newf->fd = &newf->fd_array[0]; + + /* We don't yet have the oldf readlock, but even if the old + fdset gets grown now, we'll only copy up to "size" fds */ + size = oldf->max_fdset; + if (size > __FD_SETSIZE) { + newf->max_fdset = 0; + write_lock(&newf->file_lock); + error = expand_fdset(newf, size-1); + write_unlock(&newf->file_lock); + if (error) + goto out_release; + } + read_lock(&oldf->file_lock); + + open_files = count_open_files(oldf, size); + + /* + * Check whether we need to allocate a larger fd array. + * Note: we're not a clone task, so the open count won't + * change. + */ + nfds = NR_OPEN_DEFAULT; + if (open_files > nfds) { + read_unlock(&oldf->file_lock); + newf->max_fds = 0; + write_lock(&newf->file_lock); + error = expand_fd_array(newf, open_files-1); + write_unlock(&newf->file_lock); + if (error) + goto out_release; + nfds = newf->max_fds; + read_lock(&oldf->file_lock); + } + + old_fds = oldf->fd; + new_fds = newf->fd; + + memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); + memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, newf->open_fds); + } + *new_fds++ = f; + } + read_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (newf->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (newf->max_fdset > open_files) { + int left = (newf->max_fdset-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&newf->open_fds->fds_bits[start], 0, left); + memset(&newf->close_on_exec->fds_bits[start], 0, left); + } + + tsk->files = newf; + error = 0; +out: + return error; + +out_release: + free_fdset (newf->close_on_exec, newf->max_fdset); + free_fdset (newf->open_fds, newf->max_fdset); + kmem_cache_free(files_cachep, newf); + goto out; +} + +/* + * Helper to unshare the files of the current task. + * We don't want to expose copy_files internals to + * the exec layer of the kernel. + */ + +int unshare_files(void) +{ + struct files_struct *files = current->files; + int rc; + + if(!files) + BUG(); + + /* This can race but the race causes us to copy when we don't + need to and drop the copy */ + if(atomic_read(&files->count) == 1) + { + atomic_inc(&files->count); + return 0; + } + rc = copy_files(0, current); + if(rc) + current->files = files; + return rc; +} + +static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) +{ + struct signal_struct *sig; + + if (clone_flags & CLONE_SIGHAND) { + atomic_inc(¤t->sig->count); + return 0; + } + sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL); + tsk->sig = sig; + if (!sig) + return -1; + spin_lock_init(&sig->siglock); + atomic_set(&sig->count, 1); + memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action)); + return 0; +} + +static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) +{ + unsigned long new_flags = p->flags; + + new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU); + new_flags |= PF_FORKNOEXEC; + if (!(clone_flags & CLONE_PTRACE)) + p->ptrace = 0; + p->flags = new_flags; +} + +long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + struct task_struct *task = current; + unsigned old_task_dumpable; + long ret; + + /* lock out any potential ptracer */ + task_lock(task); + if (task->ptrace) { + task_unlock(task); + return -EPERM; + } + + old_task_dumpable = task->task_dumpable; + task->task_dumpable = 0; + task_unlock(task); + + ret = arch_kernel_thread(fn, arg, flags); + + /* never reached in child process, only in parent */ + current->task_dumpable = old_task_dumpable; + + return ret; +} + +/* + * Ok, this is the main fork-routine. It copies the system process + * information (task[nr]) and sets up the necessary registers. It also + * copies the data segment in its entirety. The "stack_start" and + * "stack_top" arguments are simply passed along to the platform + * specific copy_thread() routine. Most platforms ignore stack_top. + * For an example that's using stack_top, see + * arch/ia64/kernel/process.c. + */ +int do_fork(unsigned long clone_flags, unsigned long stack_start, + struct pt_regs *regs, unsigned long stack_size) +{ + int retval; + struct task_struct *p; + struct completion vfork; + + if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) + return -EINVAL; + + retval = -EPERM; + + /* + * CLONE_PID is only allowed for the initial SMP swapper + * calls + */ + if (clone_flags & CLONE_PID) { + if (current->pid) + goto fork_out; + } + + retval = -ENOMEM; + p = alloc_task_struct(); + if (!p) + goto fork_out; + + *p = *current; + +#ifdef CONFIG_SYSCALLTIMER + p->curr_syscall = 0; +#endif + + retval = -EAGAIN; + /* + * Check if we are over our maximum process limit, but be sure to + * exclude root. This is needed to make it possible for login and + * friends to set the per-user process limit to something lower + * than the amount of processes root is running. -- Rik + */ + if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur + && p->user != &root_user + && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) + goto bad_fork_free; + + atomic_inc(&p->user->__count); + atomic_inc(&p->user->processes); + + /* + * Counter increases are protected by + * the kernel lock so nr_threads can't + * increase under us (but it may decrease). + */ + if (nr_threads >= max_threads) + goto bad_fork_cleanup_count; + + get_exec_domain(p->exec_domain); + + if (p->binfmt && p->binfmt->module) + __MOD_INC_USE_COUNT(p->binfmt->module); + + p->did_exec = 0; + p->swappable = 0; + p->state = TASK_UNINTERRUPTIBLE; + + copy_flags(clone_flags, p); + p->pid = get_pid(clone_flags); + if (p->pid == 0 && current->pid != 0) + goto bad_fork_cleanup; + + p->run_list.next = NULL; + p->run_list.prev = NULL; + + p->p_cptr = NULL; + init_waitqueue_head(&p->wait_chldexit); + p->vfork_done = NULL; + if (clone_flags & CLONE_VFORK) { + p->vfork_done = &vfork; + init_completion(&vfork); + } + spin_lock_init(&p->alloc_lock); + + p->sigpending = 0; + init_sigpending(&p->pending); + + p->it_real_value = p->it_virt_value = p->it_prof_value = 0; + p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; + init_timer(&p->real_timer); + p->real_timer.data = (unsigned long) p; + + p->leader = 0; /* session leadership doesn't inherit */ + p->tty_old_pgrp = 0; + p->times.tms_utime = p->times.tms_stime = 0; + p->times.tms_cutime = p->times.tms_cstime = 0; +#ifdef CONFIG_SMP + { + int i; + p->cpus_runnable = ~0UL; + p->processor = current->processor; + /* ?? should we just memset this ?? */ + for(i = 0; i < smp_num_cpus; i++) + p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; + spin_lock_init(&p->sigmask_lock); + } +#endif + p->lock_depth = -1; /* -1 = no lock */ + p->start_time = jiffies; + + INIT_LIST_HEAD(&p->local_pages); + + retval = -ENOMEM; + /* copy all the process information */ + if (copy_files(clone_flags, p)) + goto bad_fork_cleanup; + if (copy_fs(clone_flags, p)) + goto bad_fork_cleanup_files; + if (copy_sighand(clone_flags, p)) + goto bad_fork_cleanup_fs; + if (copy_mm(clone_flags, p)) + goto bad_fork_cleanup_sighand; + retval = copy_namespace(clone_flags, p); + if (retval) + goto bad_fork_cleanup_mm; + retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); + if (retval) + goto bad_fork_cleanup_namespace; + p->semundo = NULL; + + /* Our parent execution domain becomes current domain + These must match for thread signalling to apply */ + + p->parent_exec_id = p->self_exec_id; + + /* ok, now we should be set up.. */ + p->swappable = 1; + p->exit_signal = clone_flags & CSIGNAL; + p->pdeath_signal = 0; + + /* + * "share" dynamic priority between parent and child, thus the + * total amount of dynamic priorities in the system doesn't change, + * more scheduling fairness. This is only important in the first + * timeslice, on the long run the scheduling behaviour is unchanged. + */ + p->counter = (current->counter + 1) >> 1; + current->counter >>= 1; + if (!current->counter) + current->need_resched = 1; + + /* + * Ok, add it to the run-queues and make it + * visible to the rest of the system. + * + * Let it rip! + */ + retval = p->pid; + p->tgid = retval; + INIT_LIST_HEAD(&p->thread_group); + + /* Need tasklist lock for parent etc handling! */ + write_lock_irq(&tasklist_lock); + + /* CLONE_PARENT re-uses the old parent */ + p->p_opptr = current->p_opptr; + p->p_pptr = current->p_pptr; + if (!(clone_flags & CLONE_PARENT)) { + p->p_opptr = current; + if (!(p->ptrace & PT_PTRACED)) + p->p_pptr = current; + } + + if (clone_flags & CLONE_THREAD) { + p->tgid = current->tgid; + list_add(&p->thread_group, ¤t->thread_group); + } + + SET_LINKS(p); + hash_pid(p); + nr_threads++; + write_unlock_irq(&tasklist_lock); + + if (p->ptrace & PT_PTRACED) + send_sig(SIGSTOP, p, 1); + + wake_up_process(p); /* do this last */ + ++total_forks; + if (clone_flags & CLONE_VFORK) + wait_for_completion(&vfork); + +fork_out: + return retval; + +bad_fork_cleanup_namespace: + exit_namespace(p); +bad_fork_cleanup_mm: + exit_mm(p); + if (p->active_mm) + mmdrop(p->active_mm); +bad_fork_cleanup_sighand: + exit_sighand(p); +bad_fork_cleanup_fs: + exit_fs(p); /* blocking */ +bad_fork_cleanup_files: + exit_files(p); /* blocking */ +bad_fork_cleanup: + put_exec_domain(p->exec_domain); + if (p->binfmt && p->binfmt->module) + __MOD_DEC_USE_COUNT(p->binfmt->module); +bad_fork_cleanup_count: + atomic_dec(&p->user->processes); + free_uid(p->user); +bad_fork_free: + free_task_struct(p); + goto fork_out; +} + +/* SLAB cache for signal_struct structures (tsk->sig) */ +kmem_cache_t *sigact_cachep; + +/* SLAB cache for files_struct structures (tsk->files) */ +kmem_cache_t *files_cachep; + +/* SLAB cache for fs_struct structures (tsk->fs) */ +kmem_cache_t *fs_cachep; + +/* SLAB cache for vm_area_struct structures */ +kmem_cache_t *vm_area_cachep; + +/* SLAB cache for mm_struct structures (tsk->mm) */ +kmem_cache_t *mm_cachep; + +void __init proc_caches_init(void) +{ + sigact_cachep = kmem_cache_create("signal_act", + sizeof(struct signal_struct), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!sigact_cachep) + panic("Cannot create signal action SLAB cache"); + + files_cachep = kmem_cache_create("files_cache", + sizeof(struct files_struct), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!files_cachep) + panic("Cannot create files SLAB cache"); + + fs_cachep = kmem_cache_create("fs_cache", + sizeof(struct fs_struct), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!fs_cachep) + panic("Cannot create fs_struct SLAB cache"); + + vm_area_cachep = kmem_cache_create("vm_area_struct", + sizeof(struct vm_area_struct), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!vm_area_cachep) + panic("vma_init: Cannot alloc vm_area_struct SLAB cache"); + + mm_cachep = kmem_cache_create("mm_struct", + sizeof(struct mm_struct), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!mm_cachep) + panic("vma_init: Cannot alloc mm_struct SLAB cache"); +} diff --git a/uClinux-2.4.31-uc0/kernel/info.c b/uClinux-2.4.31-uc0/kernel/info.c new file mode 100644 index 0000000..6f30b3a --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/info.c @@ -0,0 +1,79 @@ +/* + * linux/kernel/info.c + * + * Copyright (C) 1992 Darren Senn + */ + +/* This implements the sysinfo() system call */ + +#include <linux/mm.h> +#include <linux/unistd.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> + +#include <asm/uaccess.h> + +asmlinkage long sys_sysinfo(struct sysinfo *info) +{ + struct sysinfo val; + + memset((char *)&val, 0, sizeof(struct sysinfo)); + + cli(); + val.uptime = jiffies / HZ; + + val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + + val.procs = nr_threads-1; + sti(); + + si_meminfo(&val); + si_swapinfo(&val); + + { + unsigned long mem_total, sav_total; + unsigned int mem_unit, bitcount; + + /* If the sum of all the available memory (i.e. ram + swap) + * is less than can be stored in a 32 bit unsigned long then + * we can be binary compatible with 2.2.x kernels. If not, + * well, in that case 2.2.x was broken anyways... + * + * -Erik Andersen <andersee@debian.org> */ + + mem_total = val.totalram + val.totalswap; + if (mem_total < val.totalram || mem_total < val.totalswap) + goto out; + bitcount = 0; + mem_unit = val.mem_unit; + while (mem_unit > 1) { + bitcount++; + mem_unit >>= 1; + sav_total = mem_total; + mem_total <<= 1; + if (mem_total < sav_total) + goto out; + } + + /* If mem_total did not overflow, multiply all memory values by + * val.mem_unit and set it to 1. This leaves things compatible + * with 2.2.x, and also retains compatibility with earlier 2.4.x + * kernels... */ + + val.mem_unit = 1; + val.totalram <<= bitcount; + val.freeram <<= bitcount; + val.sharedram <<= bitcount; + val.bufferram <<= bitcount; + val.totalswap <<= bitcount; + val.freeswap <<= bitcount; + val.totalhigh <<= bitcount; + val.freehigh <<= bitcount; + } +out: + if (copy_to_user(info, &val, sizeof(struct sysinfo))) + return -EFAULT; + return 0; +} diff --git a/uClinux-2.4.31-uc0/kernel/itimer.c b/uClinux-2.4.31-uc0/kernel/itimer.c new file mode 100644 index 0000000..79d5822 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/itimer.c @@ -0,0 +1,170 @@ +/* + * linux/kernel/itimer.c + * + * Copyright (C) 1992 Darren Senn + */ + +/* These are all the functions necessary to implement itimers */ + +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> + +#include <asm/uaccess.h> + +/* + * change timeval to jiffies, trying to avoid the + * most obvious overflows.. + * + * The tv_*sec values are signed, but nothing seems to + * indicate whether we really should use them as signed values + * when doing itimers. POSIX doesn't mention this (but if + * alarm() uses itimers without checking, we have to use unsigned + * arithmetic). + */ +static unsigned long tvtojiffies(struct timeval *value) +{ + unsigned long sec = (unsigned) value->tv_sec; + unsigned long usec = (unsigned) value->tv_usec; + + if (sec > (ULONG_MAX / HZ)) + return ULONG_MAX; + usec += 1000000 / HZ - 1; + usec /= 1000000 / HZ; + return HZ*sec+usec; +} + +static void jiffiestotv(unsigned long jiffies, struct timeval *value) +{ + value->tv_usec = (jiffies % HZ) * (1000000 / HZ); + value->tv_sec = jiffies / HZ; +} + +int do_getitimer(int which, struct itimerval *value) +{ + register unsigned long val, interval; + + switch (which) { + case ITIMER_REAL: + interval = current->it_real_incr; + val = 0; + /* + * FIXME! This needs to be atomic, in case the kernel timer happens! + */ + if (timer_pending(¤t->real_timer)) { + val = current->real_timer.expires - jiffies; + + /* look out for negative/zero itimer.. */ + if ((long) val <= 0) + val = 1; + } + break; + case ITIMER_VIRTUAL: + val = current->it_virt_value; + interval = current->it_virt_incr; + break; + case ITIMER_PROF: + val = current->it_prof_value; + interval = current->it_prof_incr; + break; + default: + return(-EINVAL); + } + jiffiestotv(val, &value->it_value); + jiffiestotv(interval, &value->it_interval); + return 0; +} + +/* SMP: Only we modify our itimer values. */ +asmlinkage long sys_getitimer(int which, struct itimerval *value) +{ + int error = -EFAULT; + struct itimerval get_buffer; + + if (value) { + error = do_getitimer(which, &get_buffer); + if (!error && + copy_to_user(value, &get_buffer, sizeof(get_buffer))) + error = -EFAULT; + } + return error; +} + +void it_real_fn(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + unsigned long interval; + + send_sig(SIGALRM, p, 1); + interval = p->it_real_incr; + if (interval) { + if (interval > (unsigned long) LONG_MAX) + interval = LONG_MAX; + p->real_timer.expires = jiffies + interval; + add_timer(&p->real_timer); + } +} + +int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) +{ + register unsigned long i, j; + int k; + + i = tvtojiffies(&value->it_interval); + j = tvtojiffies(&value->it_value); + if (ovalue && (k = do_getitimer(which, ovalue)) < 0) + return k; + switch (which) { + case ITIMER_REAL: + del_timer_sync(¤t->real_timer); + current->it_real_value = j; + current->it_real_incr = i; + if (!j) + break; + if (j > (unsigned long) LONG_MAX) + j = LONG_MAX; + i = j + jiffies; + current->real_timer.expires = i; + add_timer(¤t->real_timer); + break; + case ITIMER_VIRTUAL: + if (j) + j++; + current->it_virt_value = j; + current->it_virt_incr = i; + break; + case ITIMER_PROF: + if (j) + j++; + current->it_prof_value = j; + current->it_prof_incr = i; + break; + default: + return -EINVAL; + } + return 0; +} + +/* SMP: Again, only we play with our itimers, and signals are SMP safe + * now so that is not an issue at all anymore. + */ +asmlinkage long sys_setitimer(int which, struct itimerval *value, + struct itimerval *ovalue) +{ + struct itimerval set_buffer, get_buffer; + int error; + + if (value) { + if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) + return -EFAULT; + } else + memset((char *) &set_buffer, 0, sizeof(set_buffer)); + + error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : 0); + if (error || !ovalue) + return error; + + if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer))) + return -EFAULT; + return 0; +} diff --git a/uClinux-2.4.31-uc0/kernel/kmod.c b/uClinux-2.4.31-uc0/kernel/kmod.c new file mode 100644 index 0000000..d245302 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/kmod.c @@ -0,0 +1,376 @@ +/* + kmod, the new module loader (replaces kerneld) + Kirk Petersen + + Reorganized not to be a daemon by Adam Richter, with guidance + from Greg Zornetzer. + + Modified to avoid chroot and file sharing problems. + Mikael Pettersson + + Limit the concurrent number of kmod modprobes to catch loops from + "modprobe needs a service that is in a module". + Keith Owens <kaos@ocs.com.au> December 1999 + + Unblock all signals when we exec a usermode process. + Shuu Yamaguchi <shuu@wondernetworkresources.com> December 2000 +*/ + +#define __KERNEL_SYSCALLS__ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/unistd.h> +#include <linux/kmod.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/namespace.h> +#include <linux/completion.h> + +#include <asm/uaccess.h> + +extern int max_threads; + +static inline void +use_init_fs_context(void) +{ + struct fs_struct *our_fs, *init_fs; + struct dentry *root, *pwd; + struct vfsmount *rootmnt, *pwdmnt; + struct namespace *our_ns, *init_ns; + + /* + * Make modprobe's fs context be a copy of init's. + * + * We cannot use the user's fs context, because it + * may have a different root than init. + * Since init was created with CLONE_FS, we can grab + * its fs context from "init_task". + * + * The fs context has to be a copy. If it is shared + * with init, then any chdir() call in modprobe will + * also affect init and the other threads sharing + * init_task's fs context. + * + * We created the exec_modprobe thread without CLONE_FS, + * so we can update the fields in our fs context freely. + */ + + init_fs = init_task.fs; + init_ns = init_task.namespace; + get_namespace(init_ns); + our_ns = current->namespace; + current->namespace = init_ns; + put_namespace(our_ns); + read_lock(&init_fs->lock); + rootmnt = mntget(init_fs->rootmnt); + root = dget(init_fs->root); + pwdmnt = mntget(init_fs->pwdmnt); + pwd = dget(init_fs->pwd); + read_unlock(&init_fs->lock); + + /* FIXME - unsafe ->fs access */ + our_fs = current->fs; + our_fs->umask = init_fs->umask; + set_fs_root(our_fs, rootmnt, root); + set_fs_pwd(our_fs, pwdmnt, pwd); + write_lock(&our_fs->lock); + if (our_fs->altroot) { + struct vfsmount *mnt = our_fs->altrootmnt; + struct dentry *dentry = our_fs->altroot; + our_fs->altrootmnt = NULL; + our_fs->altroot = NULL; + write_unlock(&our_fs->lock); + dput(dentry); + mntput(mnt); + } else + write_unlock(&our_fs->lock); + dput(root); + mntput(rootmnt); + dput(pwd); + mntput(pwdmnt); +} + +int exec_usermodehelper(char *program_path, char *argv[], char *envp[]) +{ + int i; + struct task_struct *curtask = current; + + curtask->session = 1; + curtask->pgrp = 1; + + use_init_fs_context(); + + /* Prevent parent user process from sending signals to child. + Otherwise, if the modprobe program does not exist, it might + be possible to get a user defined signal handler to execute + as the super user right after the execve fails if you time + the signal just right. + */ + spin_lock_irq(&curtask->sigmask_lock); + sigemptyset(&curtask->blocked); + flush_signals(curtask); + flush_signal_handlers(curtask); + recalc_sigpending(curtask); + spin_unlock_irq(&curtask->sigmask_lock); + + for (i = 0; i < curtask->files->max_fds; i++ ) { + if (curtask->files->fd[i]) close(i); + } + + switch_uid(INIT_USER); + + /* Give kmod all effective privileges.. */ + curtask->euid = curtask->uid = curtask->suid = curtask->fsuid = 0; + curtask->egid = curtask->gid = curtask->sgid = curtask->fsgid = 0; + + curtask->ngroups = 0; + + cap_set_full(curtask->cap_effective); + + /* Allow execve args to be in kernel space. */ + set_fs(KERNEL_DS); + + /* Go, go, go... */ + if (execve(program_path, argv, envp) < 0) + return -errno; + return 0; +} + +#ifdef CONFIG_KMOD + +/* + modprobe_path is set via /proc/sys. +*/ +char modprobe_path[256] = "/sbin/modprobe"; + +static int exec_modprobe(void * module_name) +{ + static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + char *argv[] = { modprobe_path, "-s", "-k", "--", (char*)module_name, NULL }; + int ret; + + ret = exec_usermodehelper(modprobe_path, argv, envp); + if (ret) { + printk(KERN_ERR + "kmod: failed to exec %s -s -k %s, errno = %d\n", + modprobe_path, (char*) module_name, errno); + } + return ret; +} + +/** + * request_module - try to load a kernel module + * @module_name: Name of module + * + * Load a module using the user mode module loader. The function returns + * zero on success or a negative errno code on failure. Note that a + * successful module load does not mean the module did not then unload + * and exit on an error of its own. Callers must check that the service + * they requested is now available not blindly invoke it. + * + * If module auto-loading support is disabled then this function + * becomes a no-operation. + */ +int request_module(const char * module_name) +{ + pid_t pid; + int waitpid_result; + sigset_t tmpsig; + int i; + static atomic_t kmod_concurrent = ATOMIC_INIT(0); +#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ + static int kmod_loop_msg; + + /* Don't allow request_module() before the root fs is mounted! */ + if ( ! current->fs->root ) { + printk(KERN_ERR "request_module[%s]: Root fs not mounted\n", + module_name); + return -EPERM; + } + + /* If modprobe needs a service that is in a module, we get a recursive + * loop. Limit the number of running kmod threads to max_threads/2 or + * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method + * would be to run the parents of this process, counting how many times + * kmod was invoked. That would mean accessing the internals of the + * process tables to get the command line, proc_pid_cmdline is static + * and it is not worth changing the proc code just to handle this case. + * KAO. + */ + i = max_threads/2; + if (i > MAX_KMOD_CONCURRENT) + i = MAX_KMOD_CONCURRENT; + atomic_inc(&kmod_concurrent); + if (atomic_read(&kmod_concurrent) > i) { + if (kmod_loop_msg++ < 5) + printk(KERN_ERR + "kmod: runaway modprobe loop assumed and stopped\n"); + atomic_dec(&kmod_concurrent); + return -ENOMEM; + } + + pid = kernel_thread(exec_modprobe, (void*) module_name, 0); + if (pid < 0) { + printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid); + atomic_dec(&kmod_concurrent); + return pid; + } + + /* Block everything but SIGKILL/SIGSTOP */ + spin_lock_irq(¤t->sigmask_lock); + tmpsig = current->blocked; + siginitsetinv(¤t->blocked, sigmask(SIGKILL) | sigmask(SIGSTOP)); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + waitpid_result = waitpid(pid, NULL, __WCLONE); + atomic_dec(&kmod_concurrent); + + /* Allow signals again.. */ + spin_lock_irq(¤t->sigmask_lock); + current->blocked = tmpsig; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (waitpid_result != pid) { + printk(KERN_ERR "request_module[%s]: waitpid(%d,...) failed, errno %d\n", + module_name, pid, -waitpid_result); + } + return 0; +} +#endif /* CONFIG_KMOD */ + + +#ifdef CONFIG_HOTPLUG +/* + hotplug path is set via /proc/sys + invoked by hotplug-aware bus drivers, + with exec_usermodehelper and some thread-spawner + + argv [0] = hotplug_path; + argv [1] = "usb", "scsi", "pci", "network", etc; + ... plus optional type-specific parameters + argv [n] = 0; + + envp [*] = HOME, PATH; optional type-specific parameters + + a hotplug bus should invoke this for device add/remove + events. the command is expected to load drivers when + necessary, and may perform additional system setup. +*/ +char hotplug_path[256] = "/sbin/hotplug"; + +EXPORT_SYMBOL(hotplug_path); + +#endif /* CONFIG_HOTPLUG */ + +struct subprocess_info { + struct completion *complete; + char *path; + char **argv; + char **envp; + pid_t retval; +}; + +/* + * This is the task which runs the usermode application + */ +static int ____call_usermodehelper(void *data) +{ + struct subprocess_info *sub_info = data; + int retval; + + retval = -EPERM; + if (current->fs->root) + retval = exec_usermodehelper(sub_info->path, sub_info->argv, sub_info->envp); + + /* Exec failed? */ + sub_info->retval = (pid_t)retval; + do_exit(0); +} + +/* + * This is run by keventd. + */ +static void __call_usermodehelper(void *data) +{ + struct subprocess_info *sub_info = data; + pid_t pid; + + /* + * CLONE_VFORK: wait until the usermode helper has execve'd successfully + * We need the data structures to stay around until that is done. + */ + pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); + if (pid < 0) + sub_info->retval = pid; + complete(sub_info->complete); +} + +/** + * call_usermodehelper - start a usermode application + * @path: pathname for the application + * @argv: null-terminated argument list + * @envp: null-terminated environment list + * + * Runs a user-space application. The application is started asynchronously. It + * runs as a child of keventd. It runs with full root capabilities. keventd silently + * reaps the child when it exits. + * + * Must be called from process context. Returns zero on success, else a negative + * error code. + */ +int call_usermodehelper(char *path, char **argv, char **envp) +{ + DECLARE_COMPLETION(work); + struct subprocess_info sub_info = { + complete: &work, + path: path, + argv: argv, + envp: envp, + retval: 0, + }; + struct tq_struct tqs = { + routine: __call_usermodehelper, + data: &sub_info, + }; + + if (path[0] == '\0') + goto out; + + if (current_is_keventd()) { + /* We can't wait on keventd! */ + __call_usermodehelper(&sub_info); + } else { + schedule_task(&tqs); + wait_for_completion(&work); + } +out: + return sub_info.retval; +} + +/* + * This is for the serialisation of device probe() functions + * against device open() functions + */ +static DECLARE_MUTEX(dev_probe_sem); + +void dev_probe_lock(void) +{ + down(&dev_probe_sem); +} + +void dev_probe_unlock(void) +{ + up(&dev_probe_sem); +} + +EXPORT_SYMBOL(exec_usermodehelper); +EXPORT_SYMBOL(call_usermodehelper); + +#ifdef CONFIG_KMOD +EXPORT_SYMBOL(request_module); +#endif + diff --git a/uClinux-2.4.31-uc0/kernel/ksyms.c b/uClinux-2.4.31-uc0/kernel/ksyms.c new file mode 100644 index 0000000..14c5761 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/ksyms.c @@ -0,0 +1,651 @@ +/* $USAGI: ksyms.c,v 1.33 2003/11/12 05:11:57 yoshfuji Exp $ */ + +/* + * Herein lies all the functions/variables that are "exported" for linkage + * with dynamically loaded kernel modules. + * Jon. + * + * - Stacked module support and unified symbol table added (June 1994) + * - External symbol table support added (December 1994) + * - Versions on symbols added (December 1994) + * by Bjorn Ekwall <bj0rn@blox.se> + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/cdrom.h> +#include <linux/kernel_stat.h> +#include <linux/vmalloc.h> +#include <linux/sys.h> +#include <linux/utsname.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/serial.h> +#include <linux/locks.h> +#include <linux/delay.h> +#include <linux/random.h> +#include <linux/reboot.h> +#include <linux/pagemap.h> +#include <linux/sysctl.h> +#include <linux/hdreg.h> +#include <linux/skbuff.h> +#include <linux/genhd.h> +#include <linux/blkpg.h> +#include <linux/swap.h> +#include <linux/ctype.h> +#include <linux/file.h> +#include <linux/iobuf.h> +#include <linux/console.h> +#include <linux/poll.h> +#include <linux/mmzone.h> +#include <linux/mm.h> +#include <linux/capability.h> +#include <linux/highuid.h> +#include <linux/brlock.h> +#include <linux/fs.h> +#include <linux/tty.h> +#include <linux/in6.h> +#include <linux/completion.h> +#include <linux/seq_file.h> +#include <linux/dnotify.h> +#include <linux/crc32.h> +#include <linux/firmware.h> +#include <asm/checksum.h> + +#if defined(CONFIG_PROC_FS) +#include <linux/proc_fs.h> +#endif +#ifdef CONFIG_KMOD +#include <linux/kmod.h> +#endif + +extern void set_device_ro(kdev_t dev,int flag); + +extern void *sys_call_table; + +extern struct timezone sys_tz; +extern int request_dma(unsigned int dmanr, char * deviceID); +extern void free_dma(unsigned int dmanr); +extern spinlock_t dma_spin_lock; +extern int panic_timeout; + +#ifdef CONFIG_MODVERSIONS +const struct module_symbol __export_Using_Versions +__attribute__((section("__ksymtab"))) = { + 1 /* Version version */, "Using_Versions" +}; +#endif + + +EXPORT_SYMBOL(inter_module_register); +EXPORT_SYMBOL(inter_module_unregister); +EXPORT_SYMBOL(inter_module_get); +EXPORT_SYMBOL(inter_module_get_request); +EXPORT_SYMBOL(inter_module_put); +EXPORT_SYMBOL(try_inc_mod_count); + +/* process memory management */ +EXPORT_SYMBOL(do_mmap_pgoff); +EXPORT_SYMBOL(do_munmap); +#ifndef NO_MM +EXPORT_SYMBOL(do_brk); +#endif +EXPORT_SYMBOL(exit_mm); +EXPORT_SYMBOL(exit_files); +EXPORT_SYMBOL(exit_fs); +EXPORT_SYMBOL(exit_sighand); + +/* internal kernel memory management */ +EXPORT_SYMBOL(_alloc_pages); +EXPORT_SYMBOL(__alloc_pages); +EXPORT_SYMBOL(alloc_pages_node); +EXPORT_SYMBOL(__get_free_pages); +EXPORT_SYMBOL(get_zeroed_page); +EXPORT_SYMBOL(__free_pages); +EXPORT_SYMBOL(free_pages); +EXPORT_SYMBOL(num_physpages); +EXPORT_SYMBOL(kmem_find_general_cachep); +EXPORT_SYMBOL(kmem_cache_create); +EXPORT_SYMBOL(kmem_cache_destroy); +EXPORT_SYMBOL(kmem_cache_shrink); +EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmem_cache_free); +EXPORT_SYMBOL(kmem_cache_size); +EXPORT_SYMBOL(kmalloc); +EXPORT_SYMBOL(kfree); +#ifdef NO_MM +EXPORT_SYMBOL(ksize); +#endif +EXPORT_SYMBOL(vfree); +EXPORT_SYMBOL(__vmalloc); +#ifndef NO_MM +EXPORT_SYMBOL(vmap); +#endif +EXPORT_SYMBOL(vmalloc_to_page); +EXPORT_SYMBOL(mem_map); +EXPORT_SYMBOL(remap_page_range); +EXPORT_SYMBOL(max_mapnr); +EXPORT_SYMBOL(high_memory); +EXPORT_SYMBOL(vmtruncate); +#ifndef NO_MM +EXPORT_SYMBOL(find_vma); +EXPORT_SYMBOL(get_unmapped_area); +#endif +EXPORT_SYMBOL(init_mm); +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_high); +EXPORT_SYMBOL(kunmap_high); +EXPORT_SYMBOL(highmem_start_page); +EXPORT_SYMBOL(create_bounce); +#ifndef kmap_prot +EXPORT_SYMBOL(kmap_prot); +#endif +#ifndef kmap_pte +EXPORT_SYMBOL(kmap_pte); +#endif +#endif + +/* filesystem internal functions */ +EXPORT_SYMBOL(def_blk_fops); +EXPORT_SYMBOL(update_atime); +EXPORT_SYMBOL(get_fs_type); +EXPORT_SYMBOL(get_super); +EXPORT_SYMBOL(drop_super); +EXPORT_SYMBOL(getname); +EXPORT_SYMBOL(names_cachep); +EXPORT_SYMBOL(fput); +EXPORT_SYMBOL(fget); +EXPORT_SYMBOL(igrab); +EXPORT_SYMBOL(iunique); +EXPORT_SYMBOL(ilookup); +EXPORT_SYMBOL(iget4_locked); +EXPORT_SYMBOL(unlock_new_inode); +EXPORT_SYMBOL(iput); +EXPORT_SYMBOL(inode_init_once); +EXPORT_SYMBOL(__inode_init_once); +EXPORT_SYMBOL(force_delete); +EXPORT_SYMBOL(follow_up); +EXPORT_SYMBOL(follow_down); +EXPORT_SYMBOL(lookup_mnt); +EXPORT_SYMBOL(path_init); +EXPORT_SYMBOL(path_walk); +EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(path_release); +EXPORT_SYMBOL(__user_walk); +EXPORT_SYMBOL(lookup_one_len); +EXPORT_SYMBOL(lookup_hash); +EXPORT_SYMBOL(sys_close); +EXPORT_SYMBOL(dcache_lock); +EXPORT_SYMBOL(d_alloc_root); +EXPORT_SYMBOL(d_delete); +EXPORT_SYMBOL(dget_locked); +EXPORT_SYMBOL(d_validate); +EXPORT_SYMBOL(d_rehash); +EXPORT_SYMBOL(d_invalidate); /* May be it will be better in dcache.h? */ +EXPORT_SYMBOL(d_move); +EXPORT_SYMBOL(d_instantiate); +EXPORT_SYMBOL(d_alloc); +EXPORT_SYMBOL(d_lookup); +EXPORT_SYMBOL(__d_path); +EXPORT_SYMBOL(mark_buffer_dirty); +EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */ +EXPORT_SYMBOL(end_buffer_io_async); +EXPORT_SYMBOL(__mark_buffer_dirty); +EXPORT_SYMBOL(__mark_inode_dirty); +EXPORT_SYMBOL(fd_install); +EXPORT_SYMBOL(get_empty_filp); +EXPORT_SYMBOL(init_private_file); +EXPORT_SYMBOL(filp_open); +EXPORT_SYMBOL(filp_close); +EXPORT_SYMBOL(put_filp); +EXPORT_SYMBOL(files_lock); +EXPORT_SYMBOL(check_disk_change); +EXPORT_SYMBOL(__invalidate_buffers); +EXPORT_SYMBOL(invalidate_bdev); +EXPORT_SYMBOL(invalidate_inodes); +EXPORT_SYMBOL(invalidate_device); +EXPORT_SYMBOL(invalidate_inode_pages); +EXPORT_SYMBOL(truncate_inode_pages); +EXPORT_SYMBOL(fsync_dev); +EXPORT_SYMBOL(fsync_no_super); +EXPORT_SYMBOL(permission); +EXPORT_SYMBOL(vfs_permission); +EXPORT_SYMBOL(inode_setattr); +EXPORT_SYMBOL(inode_change_ok); +EXPORT_SYMBOL(write_inode_now); +EXPORT_SYMBOL(notify_change); +EXPORT_SYMBOL(set_blocksize); +EXPORT_SYMBOL(sb_set_blocksize); +EXPORT_SYMBOL(sb_min_blocksize); +EXPORT_SYMBOL(getblk); +EXPORT_SYMBOL(cdget); +EXPORT_SYMBOL(cdput); +EXPORT_SYMBOL(bdget); +EXPORT_SYMBOL(bdput); +EXPORT_SYMBOL(bread); +EXPORT_SYMBOL(__brelse); +EXPORT_SYMBOL(__bforget); +EXPORT_SYMBOL(ll_rw_block); +EXPORT_SYMBOL(submit_bh); +EXPORT_SYMBOL(unlock_buffer); +EXPORT_SYMBOL(__wait_on_buffer); +EXPORT_SYMBOL(___wait_on_page); +EXPORT_SYMBOL(generic_direct_IO); +EXPORT_SYMBOL(discard_bh_page); +EXPORT_SYMBOL(block_write_full_page); +EXPORT_SYMBOL(block_read_full_page); +EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(block_sync_page); +EXPORT_SYMBOL(generic_cont_expand); +EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(generic_commit_write); +EXPORT_SYMBOL(block_truncate_page); +EXPORT_SYMBOL(generic_block_bmap); +EXPORT_SYMBOL(generic_file_read); +EXPORT_SYMBOL(do_generic_file_read); +EXPORT_SYMBOL(do_generic_file_write); +EXPORT_SYMBOL(do_generic_direct_read); +EXPORT_SYMBOL(do_generic_direct_write); +EXPORT_SYMBOL(generic_file_write); +EXPORT_SYMBOL(generic_file_mmap); +EXPORT_SYMBOL(generic_ro_fops); +EXPORT_SYMBOL(generic_buffer_fdatasync); +EXPORT_SYMBOL(page_hash_bits); +EXPORT_SYMBOL(page_hash_table); +EXPORT_SYMBOL(file_lock_list); +EXPORT_SYMBOL(locks_init_lock); +EXPORT_SYMBOL(locks_copy_lock); +EXPORT_SYMBOL(posix_lock_file); +EXPORT_SYMBOL(posix_test_lock); +EXPORT_SYMBOL(posix_block_lock); +EXPORT_SYMBOL(posix_unblock_lock); +EXPORT_SYMBOL(posix_locks_deadlock); +EXPORT_SYMBOL(locks_mandatory_area); +EXPORT_SYMBOL(dput); +EXPORT_SYMBOL(have_submounts); +EXPORT_SYMBOL(d_find_alias); +EXPORT_SYMBOL(d_prune_aliases); +EXPORT_SYMBOL(prune_dcache); +EXPORT_SYMBOL(shrink_dcache_sb); +EXPORT_SYMBOL(shrink_dcache_parent); +EXPORT_SYMBOL(find_inode_number); +EXPORT_SYMBOL(is_subdir); +EXPORT_SYMBOL(get_unused_fd); +EXPORT_SYMBOL(put_unused_fd); +EXPORT_SYMBOL(vfs_create); +EXPORT_SYMBOL(vfs_mkdir); +EXPORT_SYMBOL(vfs_mknod); +EXPORT_SYMBOL(vfs_symlink); +EXPORT_SYMBOL(vfs_link); +EXPORT_SYMBOL(vfs_rmdir); +EXPORT_SYMBOL(vfs_unlink); +EXPORT_SYMBOL(vfs_rename); +EXPORT_SYMBOL(vfs_statfs); +EXPORT_SYMBOL(generic_read_dir); +EXPORT_SYMBOL(generic_file_llseek); +EXPORT_SYMBOL(no_llseek); +EXPORT_SYMBOL(__pollwait); +EXPORT_SYMBOL(poll_freewait); +EXPORT_SYMBOL(ROOT_DEV); +EXPORT_SYMBOL(__find_get_page); +EXPORT_SYMBOL(__find_lock_page); +EXPORT_SYMBOL(find_trylock_page); +EXPORT_SYMBOL(find_or_create_page); +EXPORT_SYMBOL(grab_cache_page_nowait); +EXPORT_SYMBOL(read_cache_page); +EXPORT_SYMBOL(set_page_dirty); +EXPORT_SYMBOL(mark_page_accessed); +EXPORT_SYMBOL(vfs_readlink); +EXPORT_SYMBOL(vfs_follow_link); +EXPORT_SYMBOL(page_readlink); +EXPORT_SYMBOL(page_follow_link); +EXPORT_SYMBOL(page_symlink_inode_operations); +EXPORT_SYMBOL(block_symlink); +EXPORT_SYMBOL(vfs_readdir); +EXPORT_SYMBOL(__get_lease); +EXPORT_SYMBOL(lease_get_mtime); +EXPORT_SYMBOL(lock_may_read); +EXPORT_SYMBOL(lock_may_write); +EXPORT_SYMBOL(dcache_dir_open); +EXPORT_SYMBOL(dcache_dir_close); +EXPORT_SYMBOL(dcache_dir_lseek); +EXPORT_SYMBOL(dcache_dir_fsync); +EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(dcache_dir_ops); + +/* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ +EXPORT_SYMBOL(default_llseek); +EXPORT_SYMBOL(dentry_open); +#ifndef NO_MM +EXPORT_SYMBOL(filemap_nopage); +EXPORT_SYMBOL(filemap_sync); +EXPORT_SYMBOL(filemap_fdatawrite); +EXPORT_SYMBOL(filemap_fdatasync); +EXPORT_SYMBOL(filemap_fdatawait); +#endif +EXPORT_SYMBOL(lock_page); +EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL(wakeup_page_waiters); + +/* device registration */ +EXPORT_SYMBOL(register_chrdev); +EXPORT_SYMBOL(unregister_chrdev); +EXPORT_SYMBOL(register_blkdev); +EXPORT_SYMBOL(unregister_blkdev); +EXPORT_SYMBOL(tty_register_driver); +EXPORT_SYMBOL(tty_unregister_driver); +EXPORT_SYMBOL(tty_std_termios); + +/* block device driver support */ +EXPORT_SYMBOL(blksize_size); +EXPORT_SYMBOL(hardsect_size); +EXPORT_SYMBOL(blk_size); +EXPORT_SYMBOL(blk_dev); +EXPORT_SYMBOL(is_read_only); +EXPORT_SYMBOL(set_device_ro); +EXPORT_SYMBOL(bmap); +EXPORT_SYMBOL(sync_dev); +EXPORT_SYMBOL(devfs_register_partitions); +EXPORT_SYMBOL(blkdev_open); +EXPORT_SYMBOL(blkdev_get); +EXPORT_SYMBOL(blkdev_put); +EXPORT_SYMBOL(ioctl_by_bdev); +EXPORT_SYMBOL(grok_partitions); +EXPORT_SYMBOL(register_disk); +EXPORT_SYMBOL(tq_disk); +EXPORT_SYMBOL(init_buffer); +EXPORT_SYMBOL(refile_buffer); +EXPORT_SYMBOL(max_sectors); +EXPORT_SYMBOL(max_readahead); + +/* tty routines */ +EXPORT_SYMBOL(tty_hangup); +EXPORT_SYMBOL(tty_wait_until_sent); +EXPORT_SYMBOL(tty_check_change); +EXPORT_SYMBOL(tty_hung_up_p); +EXPORT_SYMBOL(tty_flip_buffer_push); +EXPORT_SYMBOL(tty_get_baud_rate); +EXPORT_SYMBOL(do_SAK); + +/* filesystem registration */ +EXPORT_SYMBOL(register_filesystem); +EXPORT_SYMBOL(unregister_filesystem); +EXPORT_SYMBOL(kern_mount); +EXPORT_SYMBOL(__mntput); +EXPORT_SYMBOL(may_umount); + +/* executable format registration */ +EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(unregister_binfmt); +EXPORT_SYMBOL(search_binary_handler); +EXPORT_SYMBOL(prepare_binprm); +EXPORT_SYMBOL(compute_creds); +#ifndef NO_MM +EXPORT_SYMBOL(remove_arg_zero); +#endif +EXPORT_SYMBOL(set_binfmt); + +/* sysctl table registration */ +EXPORT_SYMBOL(register_sysctl_table); +EXPORT_SYMBOL(unregister_sysctl_table); +EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_intvec); +EXPORT_SYMBOL(sysctl_jiffies); +EXPORT_SYMBOL(proc_dostring); +EXPORT_SYMBOL(proc_dointvec); +EXPORT_SYMBOL(proc_dointvec_jiffies); +EXPORT_SYMBOL(proc_dointvec_minmax); +EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); +EXPORT_SYMBOL(proc_doulongvec_minmax); + +/* interrupt handling */ +EXPORT_SYMBOL(add_timer); +EXPORT_SYMBOL(del_timer); +EXPORT_SYMBOL(request_irq); +EXPORT_SYMBOL(free_irq); +#if !defined(CONFIG_IA64) /* irq_stat is part of struct cpuinfo_ia64 */ +EXPORT_SYMBOL(irq_stat); +#endif + +/* waitqueue handling */ +EXPORT_SYMBOL(add_wait_queue); +EXPORT_SYMBOL(add_wait_queue_exclusive); +EXPORT_SYMBOL(remove_wait_queue); + +/* completion handling */ +EXPORT_SYMBOL(wait_for_completion); +EXPORT_SYMBOL(complete); + +/* The notion of irq probe/assignment is foreign to S/390 */ + +#if !defined(CONFIG_ARCH_S390) +EXPORT_SYMBOL(probe_irq_on); +EXPORT_SYMBOL(probe_irq_off); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(del_timer_sync); +#endif +EXPORT_SYMBOL(mod_timer); +EXPORT_SYMBOL(tq_timer); +EXPORT_SYMBOL(tq_immediate); + +#ifdef CONFIG_SMP +/* Various random spinlocks we want to export */ +EXPORT_SYMBOL(tqueue_lock); + +/* Big-Reader lock implementation */ +EXPORT_SYMBOL(__brlock_array); +#ifndef __BRLOCK_USE_ATOMICS +EXPORT_SYMBOL(__br_write_locks); +#endif +EXPORT_SYMBOL(__br_write_lock); +EXPORT_SYMBOL(__br_write_unlock); +#endif + +/* Kiobufs */ +EXPORT_SYMBOL(alloc_kiovec); +EXPORT_SYMBOL(free_kiovec); +EXPORT_SYMBOL(expand_kiobuf); + +EXPORT_SYMBOL(map_user_kiobuf); +EXPORT_SYMBOL(unmap_kiobuf); +EXPORT_SYMBOL(lock_kiovec); +EXPORT_SYMBOL(unlock_kiovec); +EXPORT_SYMBOL(brw_kiovec); +EXPORT_SYMBOL(kiobuf_wait_for_io); + +/* dma handling */ +#ifndef CONFIG_ARM +EXPORT_SYMBOL(request_dma); +EXPORT_SYMBOL(free_dma); +EXPORT_SYMBOL(dma_spin_lock); +#endif +#ifdef HAVE_DISABLE_HLT +EXPORT_SYMBOL(disable_hlt); +EXPORT_SYMBOL(enable_hlt); +#endif + +/* resource handling */ +EXPORT_SYMBOL(request_resource); +EXPORT_SYMBOL(release_resource); +EXPORT_SYMBOL(allocate_resource); +EXPORT_SYMBOL(check_resource); +EXPORT_SYMBOL(__request_region); +EXPORT_SYMBOL(__check_region); +EXPORT_SYMBOL(__release_region); +EXPORT_SYMBOL(ioport_resource); +EXPORT_SYMBOL(iomem_resource); + +/* process management */ +EXPORT_SYMBOL(complete_and_exit); +EXPORT_SYMBOL(__wake_up); +EXPORT_SYMBOL(__wake_up_sync); +EXPORT_SYMBOL(wake_up_process); +EXPORT_SYMBOL(sleep_on); +EXPORT_SYMBOL(sleep_on_timeout); +EXPORT_SYMBOL(interruptible_sleep_on); +EXPORT_SYMBOL(interruptible_sleep_on_timeout); +EXPORT_SYMBOL(schedule); +EXPORT_SYMBOL(schedule_timeout); +#if CONFIG_SMP +EXPORT_SYMBOL(set_cpus_allowed); +#endif +EXPORT_SYMBOL(yield); +EXPORT_SYMBOL(__cond_resched); +EXPORT_SYMBOL(jiffies); +EXPORT_SYMBOL(xtime); +EXPORT_SYMBOL(do_gettimeofday); +EXPORT_SYMBOL(do_settimeofday); + +#if !defined(__ia64__) +EXPORT_SYMBOL(loops_per_jiffy); +#endif + +EXPORT_SYMBOL(kstat); +EXPORT_SYMBOL(nr_running); + +/* misc */ +EXPORT_SYMBOL(panic); +EXPORT_SYMBOL(panic_notifier_list); +EXPORT_SYMBOL(panic_timeout); +EXPORT_SYMBOL(__out_of_line_bug); +EXPORT_SYMBOL(sprintf); +EXPORT_SYMBOL(snprintf); +EXPORT_SYMBOL(sscanf); +EXPORT_SYMBOL(vsprintf); +EXPORT_SYMBOL(vsnprintf); +EXPORT_SYMBOL(vsscanf); +EXPORT_SYMBOL(kdevname); +EXPORT_SYMBOL(bdevname); +EXPORT_SYMBOL(cdevname); +EXPORT_SYMBOL(simple_strtol); +EXPORT_SYMBOL(simple_strtoul); +EXPORT_SYMBOL(simple_strtoull); +EXPORT_SYMBOL(system_utsname); /* UTS data */ +EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ +#ifdef CONFIG_IPV6_NODEINFO +EXPORT_SYMBOL(icmpv6_sethostname_hook); +EXPORT_SYMBOL(icmpv6_sethostname_hook_sem); +#endif +#ifndef __mips__ +EXPORT_SYMBOL(sys_call_table); +#endif +EXPORT_SYMBOL(machine_restart); +EXPORT_SYMBOL(machine_halt); +EXPORT_SYMBOL(machine_power_off); +EXPORT_SYMBOL(_ctype); +EXPORT_SYMBOL(secure_tcp_sequence_number); +EXPORT_SYMBOL(get_random_bytes); +EXPORT_SYMBOL(securebits); +EXPORT_SYMBOL(cap_bset); +EXPORT_SYMBOL(reparent_to_init); +EXPORT_SYMBOL(daemonize); +EXPORT_SYMBOL(csum_partial); /* for networking and md */ +EXPORT_SYMBOL(seq_escape); +EXPORT_SYMBOL(seq_printf); +EXPORT_SYMBOL(seq_open); +EXPORT_SYMBOL(seq_release); +EXPORT_SYMBOL(seq_read); +EXPORT_SYMBOL(seq_lseek); +EXPORT_SYMBOL(single_open); +EXPORT_SYMBOL(single_release); +EXPORT_SYMBOL(seq_release_private); + +/* Program loader interfaces */ +#ifndef NO_MM +EXPORT_SYMBOL(setup_arg_pages); +EXPORT_SYMBOL(copy_strings_kernel); +#endif +EXPORT_SYMBOL(do_execve); +EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(kernel_read); +EXPORT_SYMBOL(open_exec); + +/* Miscellaneous access points */ +EXPORT_SYMBOL(si_meminfo); + +/* Added to make file system as module */ +EXPORT_SYMBOL(sys_tz); +EXPORT_SYMBOL(file_fsync); +EXPORT_SYMBOL(fsync_buffers_list); +EXPORT_SYMBOL(clear_inode); +EXPORT_SYMBOL(___strtok); +EXPORT_SYMBOL(init_special_inode); +EXPORT_SYMBOL(read_ahead); +EXPORT_SYMBOL(get_hash_table); +EXPORT_SYMBOL(new_inode); +EXPORT_SYMBOL(insert_inode_hash); +EXPORT_SYMBOL(remove_inode_hash); +EXPORT_SYMBOL(buffer_insert_list); +EXPORT_SYMBOL(make_bad_inode); +EXPORT_SYMBOL(is_bad_inode); +EXPORT_SYMBOL(event); +EXPORT_SYMBOL(brw_page); +EXPORT_SYMBOL(__inode_dir_notify); + +#ifdef CONFIG_UID16 +EXPORT_SYMBOL(overflowuid); +EXPORT_SYMBOL(overflowgid); +#endif +EXPORT_SYMBOL(fs_overflowuid); +EXPORT_SYMBOL(fs_overflowgid); + +/* all busmice */ +EXPORT_SYMBOL(fasync_helper); +EXPORT_SYMBOL(kill_fasync); + +EXPORT_SYMBOL(disk_name); /* for md.c */ + +/* binfmt_aout */ +EXPORT_SYMBOL(get_write_access); + +/* library functions */ +EXPORT_SYMBOL(strnicmp); +EXPORT_SYMBOL(strspn); +EXPORT_SYMBOL(strsep); + +#ifdef CONFIG_CRC32 +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_be); +EXPORT_SYMBOL(bitreverse); +#endif + +#ifdef CONFIG_FW_LOADER +EXPORT_SYMBOL(release_firmware); +EXPORT_SYMBOL(request_firmware); +EXPORT_SYMBOL(request_firmware_nowait); +EXPORT_SYMBOL(register_firmware); +#endif + +/* software interrupts */ +EXPORT_SYMBOL(tasklet_hi_vec); +EXPORT_SYMBOL(tasklet_vec); +EXPORT_SYMBOL(bh_task_vec); +EXPORT_SYMBOL(init_bh); +EXPORT_SYMBOL(remove_bh); +EXPORT_SYMBOL(tasklet_init); +EXPORT_SYMBOL(tasklet_kill); +EXPORT_SYMBOL(__run_task_queue); +EXPORT_SYMBOL(do_softirq); +EXPORT_SYMBOL(raise_softirq); +EXPORT_SYMBOL(cpu_raise_softirq); +EXPORT_SYMBOL(__tasklet_schedule); +EXPORT_SYMBOL(__tasklet_hi_schedule); + +/* init task, for moving kthread roots - ought to export a function ?? */ + +EXPORT_SYMBOL(init_task_union); + +EXPORT_SYMBOL(tasklist_lock); +EXPORT_SYMBOL(pidhash); +EXPORT_SYMBOL(unshare_files); + +/* debug */ +EXPORT_SYMBOL(dump_stack); + +/* To match ksyms with System.map */ +extern const char _end[]; +EXPORT_SYMBOL(_end); diff --git a/uClinux-2.4.31-uc0/kernel/module.c b/uClinux-2.4.31-uc0/kernel/module.c new file mode 100644 index 0000000..902261c --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/module.c @@ -0,0 +1,1298 @@ +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <asm/module.h> +#include <asm/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/smp_lock.h> +#include <asm/pgalloc.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/kmod.h> +#include <linux/seq_file.h> + +/* + * Originally by Anonymous (as far as I know...) + * Linux version by Bas Laarhoven <bas@vimec.nl> + * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>, + * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C) + * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 + * Add MOD_INITIALIZING Keith Owens <kaos@ocs.com.au> Nov 1999 + * Add kallsyms support, Keith Owens <kaos@ocs.com.au> Apr 2000 + * Add asm/module support, IA64 has special requirements. Keith Owens <kaos@ocs.com.au> Sep 2000 + * Fix assorted bugs in module verification. Keith Owens <kaos@ocs.com.au> Sep 2000 + * Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000 + * http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html + * Replace xxx_module_symbol with inter_module_xxx. Keith Owens <kaos@ocs.com.au> Oct 2000 + * Add a module list lock for kernel fault race fixing. Alan Cox <alan@redhat.com> + * Fix to respect mod->can_unload(). YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * + * This source is covered by the GNU GPL, the same as all kernel sources. + */ + +#if defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS) + +extern struct module_symbol __start___ksymtab[]; +extern struct module_symbol __stop___ksymtab[]; + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +extern const char __start___kallsyms[] __attribute__ ((weak)); +extern const char __stop___kallsyms[] __attribute__ ((weak)); + +struct module kernel_module = +{ + size_of_struct: sizeof(struct module), + name: "", + uc: {ATOMIC_INIT(1)}, + flags: MOD_RUNNING, + syms: __start___ksymtab, + ex_table_start: __start___ex_table, + ex_table_end: __stop___ex_table, + kallsyms_start: __start___kallsyms, + kallsyms_end: __stop___kallsyms, +}; + +struct module *module_list = &kernel_module; + +#endif /* defined(CONFIG_MODULES) || defined(CONFIG_KALLSYMS) */ + +/* inter_module functions are always available, even when the kernel is + * compiled without modules. Consumers of inter_module_xxx routines + * will always work, even when both are built into the kernel, this + * approach removes lots of #ifdefs in mainline code. + */ + +static struct list_head ime_list = LIST_HEAD_INIT(ime_list); +static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED; +static int kmalloc_failed; + +/* + * This lock prevents modifications that might race the kernel fault + * fixups. It does not prevent reader walks that the modules code + * does. The kernel lock does that. + * + * Since vmalloc fault fixups occur in any context this lock is taken + * irqsave at all times. + */ + +spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED; + +/** + * inter_module_register - register a new set of inter module data. + * @im_name: an arbitrary string to identify the data, must be unique + * @owner: module that is registering the data, always use THIS_MODULE + * @userdata: pointer to arbitrary userdata to be registered + * + * Description: Check that the im_name has not already been registered, + * complain if it has. For new data, add it to the inter_module_entry + * list. + */ +void inter_module_register(const char *im_name, struct module *owner, const void *userdata) +{ + struct list_head *tmp; + struct inter_module_entry *ime, *ime_new; + + if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { + /* Overloaded kernel, not fatal */ + printk(KERN_ERR + "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", + im_name); + kmalloc_failed = 1; + return; + } + memset(ime_new, 0, sizeof(*ime_new)); + ime_new->im_name = im_name; + ime_new->owner = owner; + ime_new->userdata = userdata; + + spin_lock(&ime_lock); + list_for_each(tmp, &ime_list) { + ime = list_entry(tmp, struct inter_module_entry, list); + if (strcmp(ime->im_name, im_name) == 0) { + spin_unlock(&ime_lock); + kfree(ime_new); + /* Program logic error, fatal */ + printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name); + BUG(); + } + } + list_add(&(ime_new->list), &ime_list); + spin_unlock(&ime_lock); +} + +/** + * inter_module_unregister - unregister a set of inter module data. + * @im_name: an arbitrary string to identify the data, must be unique + * + * Description: Check that the im_name has been registered, complain if + * it has not. For existing data, remove it from the + * inter_module_entry list. + */ +void inter_module_unregister(const char *im_name) +{ + struct list_head *tmp; + struct inter_module_entry *ime; + + spin_lock(&ime_lock); + list_for_each(tmp, &ime_list) { + ime = list_entry(tmp, struct inter_module_entry, list); + if (strcmp(ime->im_name, im_name) == 0) { + list_del(&(ime->list)); + spin_unlock(&ime_lock); + kfree(ime); + return; + } + } + spin_unlock(&ime_lock); + if (kmalloc_failed) { + printk(KERN_ERR + "inter_module_unregister: no entry for '%s', " + "probably caused by previous kmalloc failure\n", + im_name); + return; + } + else { + /* Program logic error, fatal */ + printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name); + BUG(); + } +} + +/** + * inter_module_get - return arbitrary userdata from another module. + * @im_name: an arbitrary string to identify the data, must be unique + * + * Description: If the im_name has not been registered, return NULL. + * Try to increment the use count on the owning module, if that fails + * then return NULL. Otherwise return the userdata. + */ +const void *inter_module_get(const char *im_name) +{ + struct list_head *tmp; + struct inter_module_entry *ime; + const void *result = NULL; + + spin_lock(&ime_lock); + list_for_each(tmp, &ime_list) { + ime = list_entry(tmp, struct inter_module_entry, list); + if (strcmp(ime->im_name, im_name) == 0) { + if (try_inc_mod_count(ime->owner)) + result = ime->userdata; + break; + } + } + spin_unlock(&ime_lock); + return(result); +} + +/** + * inter_module_get_request - im get with automatic request_module. + * @im_name: an arbitrary string to identify the data, must be unique + * @modname: module that is expected to register im_name + * + * Description: If inter_module_get fails, do request_module then retry. + */ +const void *inter_module_get_request(const char *im_name, const char *modname) +{ + const void *result = inter_module_get(im_name); + if (!result) { + request_module(modname); + result = inter_module_get(im_name); + } + return(result); +} + +/** + * inter_module_put - release use of data from another module. + * @im_name: an arbitrary string to identify the data, must be unique + * + * Description: If the im_name has not been registered, complain, + * otherwise decrement the use count on the owning module. + */ +void inter_module_put(const char *im_name) +{ + struct list_head *tmp; + struct inter_module_entry *ime; + + spin_lock(&ime_lock); + list_for_each(tmp, &ime_list) { + ime = list_entry(tmp, struct inter_module_entry, list); + if (strcmp(ime->im_name, im_name) == 0) { + if (ime->owner) + __MOD_DEC_USE_COUNT(ime->owner); + spin_unlock(&ime_lock); + return; + } + } + spin_unlock(&ime_lock); + printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name); + BUG(); +} + + +#if defined(CONFIG_MODULES) /* The rest of the source */ + +static long get_mod_name(const char *user_name, char **buf); +static void put_mod_name(char *buf); +struct module *find_module(const char *name); +void free_module(struct module *, int tag_freed); + + +/* + * Called at boot time + */ + +void __init init_modules(void) +{ + kernel_module.nsyms = __stop___ksymtab - __start___ksymtab; + + arch_init_modules(&kernel_module); +} + +/* + * Copy the name of a module from user space. + */ + +static inline long +get_mod_name(const char *user_name, char **buf) +{ + unsigned long page; + long retval; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) { + *buf = (char *)page; + return retval; + } + retval = -ENAMETOOLONG; + } else if (!retval) + retval = -EINVAL; + + free_page(page); + return retval; +} + +static inline void +put_mod_name(char *buf) +{ + free_page((unsigned long)buf); +} + +/* + * Allocate space for a module. + */ + +asmlinkage unsigned long +sys_create_module(const char *name_user, size_t size) +{ + char *name; + long namelen, error; + struct module *mod; + unsigned long flags; + + if (!capable(CAP_SYS_MODULE)) + return -EPERM; + lock_kernel(); + if ((namelen = get_mod_name(name_user, &name)) < 0) { + error = namelen; + goto err0; + } + if (size < sizeof(struct module)+namelen+1) { + error = -EINVAL; + goto err1; + } + if (find_module(name) != NULL) { + error = -EEXIST; + goto err1; + } + if ((mod = (struct module *)module_map(size)) == NULL) { + error = -ENOMEM; + goto err1; + } + + memset(mod, 0, sizeof(*mod)); + mod->size_of_struct = sizeof(*mod); + mod->name = (char *)(mod + 1); + mod->size = size; + memcpy((char*)(mod+1), name, namelen+1); + + put_mod_name(name); + + spin_lock_irqsave(&modlist_lock, flags); + mod->next = module_list; + module_list = mod; /* link it in */ + spin_unlock_irqrestore(&modlist_lock, flags); + + error = (long) mod; + goto err0; +err1: + put_mod_name(name); +err0: + unlock_kernel(); + return error; +} + +/* + * Initialize a module. + */ + +asmlinkage long +sys_init_module(const char *name_user, struct module *mod_user) +{ + struct module mod_tmp, *mod, *mod2 = NULL; + char *name, *n_name, *name_tmp = NULL; + long namelen, n_namelen, i, error; + unsigned long mod_user_size, flags; + struct module_ref *dep; + + if (!capable(CAP_SYS_MODULE)) + return -EPERM; + lock_kernel(); + if ((namelen = get_mod_name(name_user, &name)) < 0) { + error = namelen; + goto err0; + } + if ((mod = find_module(name)) == NULL) { + error = -ENOENT; + goto err1; + } + + /* Check module header size. We allow a bit of slop over the + size we are familiar with to cope with a version of insmod + for a newer kernel. But don't over do it. */ + if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0) + goto err1; + if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start + || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) { + printk(KERN_ERR "init_module: Invalid module header size.\n" + KERN_ERR "A new version of the modutils is likely " + "needed.\n"); + error = -EINVAL; + goto err1; + } + + /* Hold the current contents while we play with the user's idea + of righteousness. */ + mod_tmp = *mod; + name_tmp = kmalloc(strlen(mod->name) + 1, GFP_KERNEL); /* Where's kstrdup()? */ + if (name_tmp == NULL) { + error = -ENOMEM; + goto err1; + } + strcpy(name_tmp, mod->name); + + /* Copying mod_user directly over mod breaks the module_list chain and + * races against search_exception_table. copy_from_user may sleep so it + * cannot be under modlist_lock, do the copy in two stages. + */ + if (!(mod2 = vmalloc(mod_user_size))) { + error = -ENOMEM; + goto err2; + } + error = copy_from_user(mod2, mod_user, mod_user_size); + if (error) { + error = -EFAULT; + goto err2; + } + spin_lock_irqsave(&modlist_lock, flags); + memcpy(mod, mod2, mod_user_size); + mod->next = mod_tmp.next; + spin_unlock_irqrestore(&modlist_lock, flags); + + /* Sanity check the size of the module. */ + error = -EINVAL; + + if (mod->size > mod_tmp.size) { + printk(KERN_ERR "init_module: Size of initialized module " + "exceeds size of created module.\n"); + goto err2; + } + + /* Make sure all interesting pointers are sane. */ + + if (!mod_bound(mod->name, namelen, mod)) { + printk(KERN_ERR "init_module: mod->name out of bounds.\n"); + goto err2; + } + if (mod->nsyms && !mod_bound(mod->syms, mod->nsyms, mod)) { + printk(KERN_ERR "init_module: mod->syms out of bounds.\n"); + goto err2; + } + if (mod->ndeps && !mod_bound(mod->deps, mod->ndeps, mod)) { + printk(KERN_ERR "init_module: mod->deps out of bounds.\n"); + goto err2; + } + if (mod->init && !mod_bound(mod->init, 0, mod)) { + printk(KERN_ERR "init_module: mod->init out of bounds.\n"); + goto err2; + } + if (mod->cleanup && !mod_bound(mod->cleanup, 0, mod)) { + printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n"); + goto err2; + } + if (mod->ex_table_start > mod->ex_table_end + || (mod->ex_table_start && + !((unsigned long)mod->ex_table_start >= ((unsigned long)mod + mod->size_of_struct) + && ((unsigned long)mod->ex_table_end + < (unsigned long)mod + mod->size))) + || (((unsigned long)mod->ex_table_start + - (unsigned long)mod->ex_table_end) + % sizeof(struct exception_table_entry))) { + printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n"); + goto err2; + } + if (mod->flags & ~MOD_AUTOCLEAN) { + printk(KERN_ERR "init_module: mod->flags invalid.\n"); + goto err2; + } + if (mod_member_present(mod, can_unload) + && mod->can_unload && !mod_bound(mod->can_unload, 0, mod)) { + printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n"); + goto err2; + } + if (mod_member_present(mod, kallsyms_end)) { + if (mod->kallsyms_end && + (!mod_bound(mod->kallsyms_start, 0, mod) || + !mod_bound(mod->kallsyms_end, 0, mod))) { + printk(KERN_ERR "init_module: mod->kallsyms out of bounds.\n"); + goto err2; + } + if (mod->kallsyms_start > mod->kallsyms_end) { + printk(KERN_ERR "init_module: mod->kallsyms invalid.\n"); + goto err2; + } + } + if (mod_member_present(mod, archdata_end)) { + if (mod->archdata_end && + (!mod_bound(mod->archdata_start, 0, mod) || + !mod_bound(mod->archdata_end, 0, mod))) { + printk(KERN_ERR "init_module: mod->archdata out of bounds.\n"); + goto err2; + } + if (mod->archdata_start > mod->archdata_end) { + printk(KERN_ERR "init_module: mod->archdata invalid.\n"); + goto err2; + } + } + if (mod_member_present(mod, kernel_data) && mod->kernel_data) { + printk(KERN_ERR "init_module: mod->kernel_data must be zero.\n"); + goto err2; + } + + /* Check that the user isn't doing something silly with the name. */ + + if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod + + (unsigned long)mod_user, + &n_name)) < 0) { + printk(KERN_ERR "init_module: get_mod_name failure.\n"); + error = n_namelen; + goto err2; + } + if (namelen != n_namelen || strcmp(n_name, name_tmp) != 0) { + printk(KERN_ERR "init_module: changed module name to " + "`%s' from `%s'\n", + n_name, name_tmp); + goto err3; + } + + /* Ok, that's about all the sanity we can stomach; copy the rest. */ + + if (copy_from_user((char *)mod+mod_user_size, + (char *)mod_user+mod_user_size, + mod->size-mod_user_size)) { + error = -EFAULT; + goto err3; + } + + if (module_arch_init(mod)) + goto err3; + + /* On some machines it is necessary to do something here + to make the I and D caches consistent. */ + flush_icache_range((unsigned long)mod, (unsigned long)mod + mod->size); + + mod->refs = NULL; + + /* Sanity check the module's dependents */ + for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { + struct module *o, *d = dep->dep; + + /* Make sure the indicated dependencies are really modules. */ + if (d == mod) { + printk(KERN_ERR "init_module: self-referential " + "dependency in mod->deps.\n"); + goto err3; + } + + /* Scan the current modules for this dependency */ + for (o = module_list; o != &kernel_module && o != d; o = o->next) + ; + + if (o != d) { + printk(KERN_ERR "init_module: found dependency that is " + "(no longer?) a module.\n"); + goto err3; + } + } + + /* Update module references. */ + for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { + struct module *d = dep->dep; + + dep->ref = mod; + dep->next_ref = d->refs; + d->refs = dep; + /* Being referenced by a dependent module counts as a + use as far as kmod is concerned. */ + d->flags |= MOD_USED_ONCE; + } + + /* Free our temporary memory. */ + put_mod_name(n_name); + put_mod_name(name); + + /* Initialize the module. */ + atomic_set(&mod->uc.usecount,1); + mod->flags |= MOD_INITIALIZING; + if (mod->init && (error = mod->init()) != 0) { + atomic_set(&mod->uc.usecount,0); + mod->flags &= ~MOD_INITIALIZING; + if (error > 0) /* Buggy module */ + error = -EBUSY; + goto err0; + } + atomic_dec(&mod->uc.usecount); + + /* And set it running. */ + mod->flags = (mod->flags | MOD_RUNNING) & ~MOD_INITIALIZING; + error = 0; + goto err0; + +err3: + put_mod_name(n_name); +err2: + *mod = mod_tmp; + strcpy((char *)mod->name, name_tmp); /* We know there is room for this */ +err1: + put_mod_name(name); +err0: + if (mod2) + vfree(mod2); + unlock_kernel(); + kfree(name_tmp); + return error; +} + +static spinlock_t unload_lock = SPIN_LOCK_UNLOCKED; +int try_inc_mod_count(struct module *mod) +{ + int res = 1; + if (mod) { + spin_lock(&unload_lock); + if (mod->flags & MOD_DELETED) + res = 0; + else + __MOD_INC_USE_COUNT(mod); + spin_unlock(&unload_lock); + } + return res; +} + +asmlinkage long +sys_delete_module(const char *name_user) +{ + struct module *mod, *next; + char *name; + long error; + int something_changed; + + if (!capable(CAP_SYS_MODULE)) + return -EPERM; + + lock_kernel(); + if (name_user) { + if ((error = get_mod_name(name_user, &name)) < 0) + goto out; + error = -ENOENT; + if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; + } + put_mod_name(name); + error = -EBUSY; + if (mod->refs != NULL) + goto out; + + spin_lock(&unload_lock); + if (!__MOD_IN_USE(mod)) { + mod->flags |= MOD_DELETED; + spin_unlock(&unload_lock); + free_module(mod, 0); + error = 0; + } else { + spin_unlock(&unload_lock); + } + goto out; + } + + /* Do automatic reaping */ +restart: + something_changed = 0; + + for (mod = module_list; mod != &kernel_module; mod = next) { + next = mod->next; + spin_lock(&unload_lock); + if (mod->refs == NULL + && (mod->flags & MOD_AUTOCLEAN) + && (mod->flags & MOD_RUNNING) + && !(mod->flags & MOD_DELETED) + && (mod->flags & MOD_USED_ONCE) + && !__MOD_IN_USE(mod)) { + if ((mod->flags & MOD_VISITED) + && !(mod->flags & MOD_JUST_FREED)) { + spin_unlock(&unload_lock); + mod->flags &= ~MOD_VISITED; + } else { + mod->flags |= MOD_DELETED; + spin_unlock(&unload_lock); + free_module(mod, 1); + something_changed = 1; + } + } else { + spin_unlock(&unload_lock); + } + } + + if (something_changed) + goto restart; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) + mod->flags &= ~MOD_JUST_FREED; + + error = 0; +out: + unlock_kernel(); + return error; +} + +/* Query various bits about modules. */ + +static int +qm_modules(char *buf, size_t bufsize, size_t *ret) +{ + struct module *mod; + size_t nmod, space, len; + + nmod = space = 0; + + for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) { + len = strlen(mod->name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, mod->name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nmod, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((mod = mod->next) != &kernel_module) + space += strlen(mod->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret) +{ + size_t i, space, len; + + if (mod == &kernel_module) + return -EINVAL; + if (!MOD_CAN_QUERY(mod)) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (i = 0; i < mod->ndeps; ++i) { + const char *dep_name = mod->deps[i].dep->name; + + len = strlen(dep_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, dep_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while (++i < mod->ndeps) + space += strlen(mod->deps[i].dep->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret) +{ + size_t nrefs, space, len; + struct module_ref *ref; + + if (mod == &kernel_module) + return -EINVAL; + if (!MOD_CAN_QUERY(mod)) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) { + const char *ref_name = ref->ref->name; + + len = strlen(ref_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, ref_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nrefs, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((ref = ref->next_ref) != NULL) + space += strlen(ref->ref->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret) +{ + size_t i, space, len; + struct module_symbol *s; + char *strings; + unsigned long *vals; + + if (!MOD_CAN_QUERY(mod)) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = mod->nsyms * 2*sizeof(void *); + + i = len = 0; + s = mod->syms; + + if (space > bufsize) + goto calc_space_needed; + + if (!access_ok(VERIFY_WRITE, buf, space)) + return -EFAULT; + + bufsize -= space; + vals = (unsigned long *)buf; + strings = buf+space; + + for (; i < mod->nsyms ; ++i, ++s, vals += 2) { + len = strlen(s->name)+1; + if (len > bufsize) + goto calc_space_needed; + + if (copy_to_user(strings, s->name, len) + || __put_user(s->value, vals+0) + || __put_user(space, vals+1)) + return -EFAULT; + + strings += len; + bufsize -= len; + space += len; + } + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + for (; i < mod->nsyms; ++i, ++s) + space += strlen(s->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret) +{ + int error = 0; + + if (mod == &kernel_module) + return -EINVAL; + + if (sizeof(struct module_info) <= bufsize) { + struct module_info info; + info.addr = (unsigned long)mod; + info.size = mod->size; + info.flags = mod->flags; + + /* usecount is one too high here - report appropriately to + compensate for locking */ + info.usecount = (mod_member_present(mod, can_unload) + && mod->can_unload && mod->can_unload() + ? -1 : atomic_read(&mod->uc.usecount)-1); + + if (copy_to_user(buf, &info, sizeof(struct module_info))) + return -EFAULT; + } else + error = -ENOSPC; + + if (put_user(sizeof(struct module_info), ret)) + return -EFAULT; + + return error; +} + +asmlinkage long +sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, + size_t *ret) +{ + struct module *mod; + int err; + + lock_kernel(); + if (name_user == NULL) + mod = &kernel_module; + else { + long namelen; + char *name; + + if ((namelen = get_mod_name(name_user, &name)) < 0) { + err = namelen; + goto out; + } + err = -ENOENT; + if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; + } + put_mod_name(name); + } + + /* __MOD_ touches the flags. We must avoid that */ + + atomic_inc(&mod->uc.usecount); + + switch (which) + { + case 0: + err = 0; + break; + case QM_MODULES: + err = qm_modules(buf, bufsize, ret); + break; + case QM_DEPS: + err = qm_deps(mod, buf, bufsize, ret); + break; + case QM_REFS: + err = qm_refs(mod, buf, bufsize, ret); + break; + case QM_SYMBOLS: + err = qm_symbols(mod, buf, bufsize, ret); + break; + case QM_INFO: + err = qm_info(mod, buf, bufsize, ret); + break; + default: + err = -EINVAL; + break; + } + atomic_dec(&mod->uc.usecount); + +out: + unlock_kernel(); + return err; +} + +/* + * Copy the kernel symbol table to user space. If the argument is + * NULL, just return the size of the table. + * + * This call is obsolete. New programs should use query_module+QM_SYMBOLS + * which does not arbitrarily limit the length of symbols. + */ + +asmlinkage long +sys_get_kernel_syms(struct kernel_sym *table) +{ + struct module *mod; + int i; + struct kernel_sym ksym; + + lock_kernel(); + for (mod = module_list, i = 0; mod; mod = mod->next) { + /* include the count for the module name! */ + i += mod->nsyms + 1; + } + + if (table == NULL) + goto out; + + /* So that we don't give the user our stack content */ + memset (&ksym, 0, sizeof (ksym)); + + for (mod = module_list, i = 0; mod; mod = mod->next) { + struct module_symbol *msym; + unsigned int j; + + if (!MOD_CAN_QUERY(mod)) + continue; + + /* magic: write module info as a pseudo symbol */ + ksym.value = (unsigned long)mod; + ksym.name[0] = '#'; + strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1); + ksym.name[sizeof(ksym.name)-1] = '\0'; + + if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) + goto out; + ++i, ++table; + + if (mod->nsyms == 0) + continue; + + for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) { + ksym.value = msym->value; + strncpy(ksym.name, msym->name, sizeof(ksym.name)); + ksym.name[sizeof(ksym.name)-1] = '\0'; + + if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) + goto out; + ++i, ++table; + } + } +out: + unlock_kernel(); + return i; +} + +/* + * Look for a module by name, ignoring modules marked for deletion. + */ + +struct module * +find_module(const char *name) +{ + struct module *mod; + + for (mod = module_list; mod ; mod = mod->next) { + if (mod->flags & MOD_DELETED) + continue; + if (!strcmp(mod->name, name)) + break; + } + + return mod; +} + +/* + * Free the given module. + */ + +void +free_module(struct module *mod, int tag_freed) +{ + struct module_ref *dep; + unsigned i; + unsigned long flags; + + /* Let the module clean up. */ + + if (mod->flags & MOD_RUNNING) + { + if(mod->cleanup) + mod->cleanup(); + mod->flags &= ~MOD_RUNNING; + } + + /* Remove the module from the dependency lists. */ + + for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { + struct module_ref **pp; + for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref) + continue; + *pp = dep->next_ref; + if (tag_freed && dep->dep->refs == NULL) + dep->dep->flags |= MOD_JUST_FREED; + } + + /* And from the main module list. */ + + spin_lock_irqsave(&modlist_lock, flags); + if (mod == module_list) { + module_list = mod->next; + } else { + struct module *p; + for (p = module_list; p->next != mod; p = p->next) + continue; + p->next = mod->next; + } + spin_unlock_irqrestore(&modlist_lock, flags); + + /* And free the memory. */ + + module_unmap(mod); +} + +/* + * Called by the /proc file system to return a current list of modules. + */ + +int get_module_list(char *p) +{ + size_t left = PAGE_SIZE; + struct module *mod; + char tmpstr[64]; + struct module_ref *ref; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + long len; + const char *q; + +#define safe_copy_str(str, len) \ + do { \ + if (left < len) \ + goto fini; \ + memcpy(p, str, len); p += len, left -= len; \ + } while (0) +#define safe_copy_cstr(str) safe_copy_str(str, sizeof(str)-1) + + len = strlen(mod->name); + safe_copy_str(mod->name, len); + + if ((len = 20 - len) > 0) { + if (left < len) + goto fini; + memset(p, ' ', len); + p += len; + left -= len; + } + + len = sprintf(tmpstr, "%8lu", mod->size); + safe_copy_str(tmpstr, len); + + if (mod->flags & MOD_RUNNING) { + len = sprintf(tmpstr, "%4ld", + (mod_member_present(mod, can_unload) + && mod->can_unload && mod->can_unload() + ? -1L : (long)atomic_read(&mod->uc.usecount))); + safe_copy_str(tmpstr, len); + } + + if (mod->flags & MOD_DELETED) + safe_copy_cstr(" (deleted)"); + else if (mod->flags & MOD_RUNNING) { + if (mod->flags & MOD_AUTOCLEAN) + safe_copy_cstr(" (autoclean)"); + if (!(mod->flags & MOD_USED_ONCE)) + safe_copy_cstr(" (unused)"); + } + else if (mod->flags & MOD_INITIALIZING) + safe_copy_cstr(" (initializing)"); + else + safe_copy_cstr(" (uninitialized)"); + + if ((ref = mod->refs) != NULL) { + safe_copy_cstr(" ["); + while (1) { + q = ref->ref->name; + len = strlen(q); + safe_copy_str(q, len); + + if ((ref = ref->next_ref) != NULL) + safe_copy_cstr(" "); + else + break; + } + safe_copy_cstr("]"); + } + safe_copy_cstr("\n"); + +#undef safe_copy_str +#undef safe_copy_cstr + } + +fini: + return PAGE_SIZE - left; +} + +/* + * Called by the /proc file system to return a current list of ksyms. + */ + +struct mod_sym { + struct module *mod; + int index; +}; + +/* iterator */ + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + struct mod_sym *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct module *v; + loff_t n = *pos; + + if (!p) + return ERR_PTR(-ENOMEM); + lock_kernel(); + for (v = module_list, n = *pos; v; n -= v->nsyms, v = v->next) { + if (n < v->nsyms) { + p->mod = v; + p->index = n; + return p; + } + } + unlock_kernel(); + kfree(p); + return NULL; +} + +static void *s_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct mod_sym *v = p; + (*pos)++; + if (++v->index >= v->mod->nsyms) { + do { + v->mod = v->mod->next; + if (!v->mod) { + unlock_kernel(); + kfree(p); + return NULL; + } + } while (!v->mod->nsyms); + v->index = 0; + } + return p; +} + +static void s_stop(struct seq_file *m, void *p) +{ + if (p && !IS_ERR(p)) { + unlock_kernel(); + kfree(p); + } +} + +static int s_show(struct seq_file *m, void *p) +{ + struct mod_sym *v = p; + struct module_symbol *sym; + + if (!MOD_CAN_QUERY(v->mod)) + return 0; + sym = &v->mod->syms[v->index]; + if (*v->mod->name) + seq_printf(m, "%0*lx %s\t[%s]\n", (int)(2*sizeof(void*)), + sym->value, sym->name, v->mod->name); + else + seq_printf(m, "%0*lx %s\n", (int)(2*sizeof(void*)), + sym->value, sym->name); + return 0; +} + +struct seq_operations ksyms_op = { + start: s_start, + next: s_next, + stop: s_stop, + show: s_show +}; + +#else /* CONFIG_MODULES */ + +/* Dummy syscalls for people who don't want modules */ + +asmlinkage unsigned long +sys_create_module(const char *name_user, size_t size) +{ + return -ENOSYS; +} + +asmlinkage long +sys_init_module(const char *name_user, struct module *mod_user) +{ + return -ENOSYS; +} + +asmlinkage long +sys_delete_module(const char *name_user) +{ + return -ENOSYS; +} + +asmlinkage long +sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, + size_t *ret) +{ + /* Let the program know about the new interface. Not that + it'll do them much good. */ + if (which == 0) + return 0; + + return -ENOSYS; +} + +asmlinkage long +sys_get_kernel_syms(struct kernel_sym *table) +{ + return -ENOSYS; +} + +int try_inc_mod_count(struct module *mod) +{ + return 1; +} + +#endif /* CONFIG_MODULES */ diff --git a/uClinux-2.4.31-uc0/kernel/panic.c b/uClinux-2.4.31-uc0/kernel/panic.c new file mode 100644 index 0000000..319a062 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/panic.c @@ -0,0 +1,155 @@ +/* + * linux/kernel/panic.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * This function is used through-out the kernel (including mm and fs) + * to indicate a major problem. + */ +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/sysrq.h> +#include <linux/interrupt.h> +#include <linux/console.h> + +asmlinkage void sys_sync(void); /* it's really int */ + +int panic_timeout; + +struct notifier_block *panic_notifier_list; + +static int __init panic_setup(char *str) +{ + panic_timeout = simple_strtoul(str, NULL, 0); + return 1; +} + +__setup("panic=", panic_setup); + +int machine_paniced; + +/** + * panic - halt the system + * @fmt: The text string to print + * + * Display a message, then perform cleanups. Functions in the panic + * notifier list are called after the filesystem cache is flushed (when possible). + * + * This function never returns. + */ + +NORET_TYPE void panic(const char * fmt, ...) +{ + static char buf[1024]; + va_list args; +#if defined(CONFIG_ARCH_S390) + unsigned long caller = (unsigned long) __builtin_return_address(0); +#endif + +#ifdef CONFIG_VT + disable_console_blank(); +#endif + machine_paniced = 1; + + bust_spinlocks(1); + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + printk(KERN_EMERG "Kernel panic: %s\n",buf); + if (in_interrupt()) + printk(KERN_EMERG "In interrupt handler - not syncing\n"); + else if (!current->pid) + printk(KERN_EMERG "In idle task - not syncing\n"); + else + sys_sync(); + bust_spinlocks(0); + +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + + notifier_call_chain(&panic_notifier_list, 0, NULL); + + if (panic_timeout < 0) { + machine_halt(); + } else if (panic_timeout > 0) { + /* + * Delay timeout seconds before rebooting the machine. + * We can't use the "normal" timers since we just panicked.. + */ + printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); + mdelay(panic_timeout*1000); + /* + * Should we run the reboot notifier. For the moment Im + * choosing not too. It might crash, be corrupt or do + * more harm than good for other reasons. + */ + machine_restart(NULL); + } +#ifdef __sparc__ + { + extern int stop_a_enabled; + /* Make sure the user can actually press L1-A */ + stop_a_enabled = 1; + printk("Press L1-A to return to the boot prom\n"); + } +#endif +#if defined(CONFIG_ARCH_S390) + disabled_wait(caller); +#endif + sti(); + for(;;) { +#if defined(CONFIG_X86) && defined(CONFIG_VT) + extern void panic_blink(void); + panic_blink(); +#endif + CHECK_EMERGENCY_SYNC + } +} + +/** + * print_tainted - return a string to represent the kernel taint state. + * + * The string is overwritten by the next call to print_taint(). + */ + +const char *print_tainted() +{ + static char buf[20]; + if (tainted) { + snprintf(buf, sizeof(buf), "Tainted: %c%c", + tainted & 1 ? 'P' : 'G', + tainted & 2 ? 'F' : ' '); + } + else + snprintf(buf, sizeof(buf), "Not tainted"); + return(buf); +} + +int tainted = 0; + +/* + * A BUG() call in an inline function in a header should be avoided, + * because it can seriously bloat the kernel. So here we have + * helper functions. + * We lose the BUG()-time file-and-line info this way, but it's + * usually not very useful from an inline anyway. The backtrace + * tells us what we want to know. + */ + +void __out_of_line_bug(int line) +{ + printk("kernel BUG in header file at line %d\n", line); + + BUG(); + + /* Satisfy __attribute__((noreturn)) */ + for ( ; ; ) + ; +} diff --git a/uClinux-2.4.31-uc0/kernel/pm.c b/uClinux-2.4.31-uc0/kernel/pm.c new file mode 100644 index 0000000..11bdc2a --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/pm.c @@ -0,0 +1,293 @@ +/* + * pm.c - Power management interface + * + * Copyright (C) 2000 Andrew Henroid + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/pm.h> +#include <linux/interrupt.h> + +int pm_active; + +/* + * Locking notes: + * pm_devs_lock can be a semaphore providing pm ops are not called + * from an interrupt handler (already a bad idea so no change here). Each + * change must be protected so that an unlink of an entry doesn't clash + * with a pm send - which is permitted to sleep in the current architecture + * + * Module unloads clashing with pm events now work out safely, the module + * unload path will block until the event has been sent. It may well block + * until a resume but that will be fine. + */ + +static DECLARE_MUTEX(pm_devs_lock); +static LIST_HEAD(pm_devs); + +/** + * pm_register - register a device with power management + * @type: device type + * @id: device ID + * @callback: callback function + * + * Add a device to the list of devices that wish to be notified about + * power management events. A &pm_dev structure is returned on success, + * on failure the return is %NULL. + * + * The callback function will be called in process context and + * it may sleep. + */ + +struct pm_dev *pm_register(pm_dev_t type, + unsigned long id, + pm_callback callback) +{ + struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); + if (dev) { + memset(dev, 0, sizeof(*dev)); + dev->type = type; + dev->id = id; + dev->callback = callback; + + down(&pm_devs_lock); + list_add(&dev->entry, &pm_devs); + up(&pm_devs_lock); + } + return dev; +} + +/** + * pm_unregister - unregister a device with power management + * @dev: device to unregister + * + * Remove a device from the power management notification lists. The + * dev passed must be a handle previously returned by pm_register. + */ + +void pm_unregister(struct pm_dev *dev) +{ + if (dev) { + down(&pm_devs_lock); + list_del(&dev->entry); + up(&pm_devs_lock); + + kfree(dev); + } +} + +static void __pm_unregister(struct pm_dev *dev) +{ + if (dev) { + list_del(&dev->entry); + kfree(dev); + } +} + +/** + * pm_unregister_all - unregister all devices with matching callback + * @callback: callback function pointer + * + * Unregister every device that would call the callback passed. This + * is primarily meant as a helper function for loadable modules. It + * enables a module to give up all its managed devices without keeping + * its own private list. + */ + +void pm_unregister_all(pm_callback callback) +{ + struct list_head *entry; + + if (!callback) + return; + + down(&pm_devs_lock); + entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + entry = entry->next; + if (dev->callback == callback) + __pm_unregister(dev); + } + up(&pm_devs_lock); +} + +/** + * pm_send - send request to a single device + * @dev: device to send to + * @rqst: power management request + * @data: data for the callback + * + * Issue a power management request to a given device. The + * %PM_SUSPEND and %PM_RESUME events are handled specially. The + * data field must hold the intended next state. No call is made + * if the state matches. + * + * BUGS: what stops two power management requests occuring in parallel + * and conflicting. + * + * WARNING: Calling pm_send directly is not generally recommended, in + * paticular there is no locking against the pm_dev going away. The + * caller must maintain all needed locking or have 'inside knowledge' + * on the safety. Also remember that this function is not locked against + * pm_unregister. This means that you must handle SMP races on callback + * execution and unload yourself. + */ + +int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + int status = 0; + int prev_state, next_state; + + if (in_interrupt()) + BUG(); + + switch (rqst) { + case PM_SUSPEND: + case PM_RESUME: + prev_state = dev->state; + next_state = (unsigned long) data; + if (prev_state != next_state) { + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + if (!status) { + dev->state = next_state; + dev->prev_state = prev_state; + } + } + else { + dev->prev_state = prev_state; + } + break; + default: + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + break; + } + return status; +} + +/* + * Undo incomplete request + */ +static void pm_undo_all(struct pm_dev *last) +{ + struct list_head *entry = last->entry.prev; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->state != dev->prev_state) { + /* previous state was zero (running) resume or + * previous state was non-zero (suspended) suspend + */ + pm_request_t undo = (dev->prev_state + ? PM_SUSPEND:PM_RESUME); + pm_send(dev, undo, (void*) dev->prev_state); + } + entry = entry->prev; + } +} + +/** + * pm_send_all - send request to all managed devices + * @rqst: power management request + * @data: data for the callback + * + * Issue a power management request to a all devices. The + * %PM_SUSPEND events are handled specially. Any device is + * permitted to fail a suspend by returning a non zero (error) + * value from its callback function. If any device vetoes a + * suspend request then all other devices that have suspended + * during the processing of this request are restored to their + * previous state. + * + * WARNING: This function takes the pm_devs_lock. The lock is not dropped until + * the callbacks have completed. This prevents races against pm locking + * functions, races against module unload pm_unregister code. It does + * mean however that you must not issue pm_ functions within the callback + * or you will deadlock and users will hate you. + * + * Zero is returned on success. If a suspend fails then the status + * from the device that vetoes the suspend is returned. + * + * BUGS: what stops two power management requests occuring in parallel + * and conflicting. + */ + +int pm_send_all(pm_request_t rqst, void *data) +{ + struct list_head *entry; + + down(&pm_devs_lock); + entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->callback) { + int status = pm_send(dev, rqst, data); + if (status) { + /* return devices to previous state on + * failed suspend request + */ + if (rqst == PM_SUSPEND) + pm_undo_all(dev); + up(&pm_devs_lock); + return status; + } + } + entry = entry->next; + } + up(&pm_devs_lock); + return 0; +} + +/** + * pm_find - find a device + * @type: type of device + * @from: where to start looking + * + * Scan the power management list for devices of a specific type. The + * return value for a matching device may be passed to further calls + * to this function to find further matches. A %NULL indicates the end + * of the list. + * + * To search from the beginning pass %NULL as the @from value. + * + * The caller MUST hold the pm_devs_lock lock when calling this + * function. The instant that the lock is dropped all pointers returned + * may become invalid. + */ + +struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from) +{ + struct list_head *entry = from ? from->entry.next:pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (type == PM_UNKNOWN_DEV || dev->type == type) + return dev; + entry = entry->next; + } + return 0; +} + +EXPORT_SYMBOL(pm_register); +EXPORT_SYMBOL(pm_unregister); +EXPORT_SYMBOL(pm_unregister_all); +EXPORT_SYMBOL(pm_send); +EXPORT_SYMBOL(pm_send_all); +EXPORT_SYMBOL(pm_find); +EXPORT_SYMBOL(pm_active); diff --git a/uClinux-2.4.31-uc0/kernel/printk.c b/uClinux-2.4.31-uc0/kernel/printk.c new file mode 100644 index 0000000..3952dbe --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/printk.c @@ -0,0 +1,721 @@ +/* + * linux/kernel/printk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Modified to make sys_syslog() more flexible: added commands to + * return the last 4k of kernel messages, regardless of whether + * they've been read or not. Added option to suppress kernel printk's + * to the console. Added hook for sending the console messages + * elsewhere, in preparation for a serial line console (someday). + * Ted Ts'o, 2/11/93. + * Modified for sysctl support, 1/8/97, Chris Horn. + * Fixed SMP synchronization, 08/08/99, Manfred Spraul + * manfreds@colorfullife.com + * Rewrote bits to get rid of console_lock + * 01Mar01 Andrew Morton <andrewm@uow.edu.au> + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/smp_lock.h> +#include <linux/console.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/interrupt.h> /* For in_interrupt() */ +#include <linux/config.h> + +#include <asm/uaccess.h> + +#if !defined(CONFIG_LOG_BUF_SHIFT) || (CONFIG_LOG_BUF_SHIFT == 0) +#if defined(CONFIG_MULTIQUAD) || defined(CONFIG_IA64) +#define LOG_BUF_LEN (65536) +#elif defined(CONFIG_ARCH_S390) +#define LOG_BUF_LEN (131072) +#elif defined(CONFIG_SMP) +#define LOG_BUF_LEN (32768) +#else +#define LOG_BUF_LEN (16384) /* This must be a power of two */ +#endif +#else /* CONFIG_LOG_BUF_SHIFT */ +#define LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#endif + +#define LOG_BUF_MASK (LOG_BUF_LEN-1) + +#ifndef arch_consoles_callable +#define arch_consoles_callable() (1) +#endif + +/* printk's without a loglevel use this.. */ +#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ + +/* We show everything that is MORE important than this.. */ +#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ +#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ + +DECLARE_WAIT_QUEUE_HEAD(log_wait); + +int console_printk[4] = { + DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ + DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ + MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ + DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ +}; + +int oops_in_progress; + +/* + * console_sem protects the console_drivers list, and also + * provides serialisation for access to the entire console + * driver system. + */ +static DECLARE_MUTEX(console_sem); +struct console *console_drivers; + +/* + * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars + * It is also used in interesting ways to provide interlocking in + * release_console_sem(). + */ +static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED; + +static char log_buf[LOG_BUF_LEN]; +#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) + +/* + * The indices into log_buf are not constrained to LOG_BUF_LEN - they + * must be masked before subscripting + */ +static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */ +static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */ +static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */ +static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ + +struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; +static int selected_console = -1; +static int preferred_console = -1; + +/* Flag: console code may call schedule() */ +static int console_may_schedule; + +/* + * Setup a list of consoles. Called from init/main.c + */ +static int __init console_setup(char *str) +{ + struct console_cmdline *c; + char name[sizeof(c->name)]; + char *s, *options; + int i, idx; + + /* + * Decode str into name, index, options. + */ + if (str[0] >= '0' && str[0] <= '9') { + strcpy(name, "ttyS"); + strncpy(name + 4, str, sizeof(name) - 5); + } else + strncpy(name, str, sizeof(name) - 1); + name[sizeof(name) - 1] = 0; + if ((options = strchr(str, ',')) != NULL) + *(options++) = 0; +#ifdef __sparc__ + if (!strcmp(str, "ttya")) + strcpy(name, "ttyS0"); + if (!strcmp(str, "ttyb")) + strcpy(name, "ttyS1"); +#endif + for(s = name; *s; s++) + if (*s >= '0' && *s <= '9') + break; + idx = simple_strtoul(s, NULL, 10); + *s = 0; + + /* + * See if this tty is not yet registered, and + * if we have a slot free. + */ + for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) + if (strcmp(console_cmdline[i].name, name) == 0 && + console_cmdline[i].index == idx) { + selected_console = i; + return 1; + } + if (i == MAX_CMDLINECONSOLES) + return 1; + selected_console = i; + c = &console_cmdline[i]; + memcpy(c->name, name, sizeof(c->name)); + c->options = options; + c->index = idx; + return 1; +} + +#ifdef CONFIG_UCLINUX +/* + * DAVIDM - put this in so 2.0 and 2.4 NETtel images work with the + * same boot args. + */ + +static int __init CONSOLE_setup(char *str) +{ + /* + * 2.4 does not want the /dev/ options on the front + */ + if (strncmp(str, "/dev/", 5) == 0) + return(console_setup(str + 5)); + return(console_setup(str)); +} + +__setup("CONSOLE=", CONSOLE_setup); +#endif + +__setup("console=", console_setup); + +/* + * Commands to do_syslog: + * + * 0 -- Close the log. Currently a NOP. + * 1 -- Open the log. Currently a NOP. + * 2 -- Read from the log. + * 3 -- Read all messages remaining in the ring buffer. + * 4 -- Read and clear all messages remaining in the ring buffer + * 5 -- Clear ring buffer. + * 6 -- Disable printk's to console + * 7 -- Enable printk's to console + * 8 -- Set level of messages printed to console + * 9 -- Return number of unread characters in the log buffer + */ +int do_syslog(int type, char * buf, int len) +{ + unsigned long i, j, limit, count; + int do_clear = 0; + char c; + int error = 0; + + switch (type) { + case 0: /* Close log */ + break; + case 1: /* Open log */ + break; + case 2: /* Read from log */ + error = -EINVAL; + if (!buf || len < 0) + goto out; + error = 0; + if (!len) + goto out; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + goto out; + error = wait_event_interruptible(log_wait, (log_start - log_end)); + if (error) + goto out; + i = 0; + spin_lock_irq(&logbuf_lock); + while ((log_start != log_end) && i < len) { + c = LOG_BUF(log_start); + log_start++; + spin_unlock_irq(&logbuf_lock); + __put_user(c,buf); + buf++; + i++; + spin_lock_irq(&logbuf_lock); + } + spin_unlock_irq(&logbuf_lock); + error = i; + break; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* FALL THRU */ + case 3: /* Read last kernel messages */ + error = -EINVAL; + if (!buf || len < 0) + goto out; + error = 0; + if (!len) + goto out; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + goto out; + count = len; + if (count > LOG_BUF_LEN) + count = LOG_BUF_LEN; + spin_lock_irq(&logbuf_lock); + if (count > logged_chars) + count = logged_chars; + if (do_clear) + logged_chars = 0; + limit = log_end; + /* + * __put_user() could sleep, and while we sleep + * printk() could overwrite the messages + * we try to copy to user space. Therefore + * the messages are copied in reverse. <manfreds> + */ + for(i=0;i < count;i++) { + j = limit-1-i; + if (j+LOG_BUF_LEN < log_end) + break; + c = LOG_BUF(j); + spin_unlock_irq(&logbuf_lock); + __put_user(c,&buf[count-1-i]); + spin_lock_irq(&logbuf_lock); + } + spin_unlock_irq(&logbuf_lock); + error = i; + if(i != count) { + int offset = count-error; + /* buffer overflow during copy, correct user buffer. */ + for(i=0;i<error;i++) { + __get_user(c,&buf[i+offset]); + __put_user(c,&buf[i]); + } + } + + break; + case 5: /* Clear ring buffer */ + spin_lock_irq(&logbuf_lock); + logged_chars = 0; + spin_unlock_irq(&logbuf_lock); + break; + case 6: /* Disable logging to console */ + spin_lock_irq(&logbuf_lock); + console_loglevel = minimum_console_loglevel; + spin_unlock_irq(&logbuf_lock); + break; + case 7: /* Enable logging to console */ + spin_lock_irq(&logbuf_lock); + console_loglevel = default_console_loglevel; + spin_unlock_irq(&logbuf_lock); + break; + case 8: /* Set level of messages printed to console */ + error = -EINVAL; + if (len < 1 || len > 8) + goto out; + if (len < minimum_console_loglevel) + len = minimum_console_loglevel; + spin_lock_irq(&logbuf_lock); + console_loglevel = len; + spin_unlock_irq(&logbuf_lock); + error = 0; + break; + case 9: /* Number of chars in the log buffer */ + spin_lock_irq(&logbuf_lock); + error = log_end - log_start; + spin_unlock_irq(&logbuf_lock); + break; + default: + error = -EINVAL; + break; + } +out: + return error; +} + +asmlinkage long sys_syslog(int type, char * buf, int len) +{ + if ((type != 3) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + return do_syslog(type, buf, len); +} + +/* + * Call the console drivers on a range of log_buf + */ +static void __call_console_drivers(unsigned long start, unsigned long end) +{ + struct console *con; + + for (con = console_drivers; con; con = con->next) { + if ((con->flags & CON_ENABLED) && con->write) + con->write(con, &LOG_BUF(start), end - start); + } +} + +/* + * Write out chars from start to end - 1 inclusive + */ +static void _call_console_drivers(unsigned long start, unsigned long end, int msg_log_level) +{ + if (msg_log_level < console_loglevel && console_drivers && start != end) { + if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { + /* wrapped write */ + __call_console_drivers(start & LOG_BUF_MASK, LOG_BUF_LEN); + __call_console_drivers(0, end & LOG_BUF_MASK); + } else { + __call_console_drivers(start, end); + } + } +} + +/* + * Call the console drivers, asking them to write out + * log_buf[start] to log_buf[end - 1]. + * The console_sem must be held. + */ +static void call_console_drivers(unsigned long start, unsigned long end) +{ + unsigned long cur_index, start_print; + static int msg_level = -1; + + if (((long)(start - end)) > 0) + BUG(); + + cur_index = start; + start_print = start; + while (cur_index != end) { + if ( msg_level < 0 && + ((end - cur_index) > 2) && + LOG_BUF(cur_index + 0) == '<' && + LOG_BUF(cur_index + 1) >= '0' && + LOG_BUF(cur_index + 1) <= '7' && + LOG_BUF(cur_index + 2) == '>') + { + msg_level = LOG_BUF(cur_index + 1) - '0'; + cur_index += 3; + start_print = cur_index; + } + while (cur_index != end) { + char c = LOG_BUF(cur_index); + cur_index++; + + if (c == '\n') { + if (msg_level < 0) { + /* + * printk() has already given us loglevel tags in + * the buffer. This code is here in case the + * log buffer has wrapped right round and scribbled + * on those tags + */ + msg_level = default_message_loglevel; + } + _call_console_drivers(start_print, cur_index, msg_level); + msg_level = -1; + start_print = cur_index; + break; + } + } + } + _call_console_drivers(start_print, end, msg_level); +} + +static void emit_log_char(char c) +{ + LOG_BUF(log_end) = c; + log_end++; + if (log_end - log_start > LOG_BUF_LEN) + log_start = log_end - LOG_BUF_LEN; + if (log_end - con_start > LOG_BUF_LEN) + con_start = log_end - LOG_BUF_LEN; + if (logged_chars < LOG_BUF_LEN) + logged_chars++; +} + +/* + * This is printk. It can be called from any context. We want it to work. + * + * We try to grab the console_sem. If we succeed, it's easy - we log the output and + * call the console drivers. If we fail to get the semaphore we place the output + * into the log buffer and return. The current holder of the console_sem will + * notice the new output in release_console_sem() and will send it to the + * consoles before releasing the semaphore. + * + * One effect of this deferred printing is that code which calls printk() and + * then changes console_loglevel may break. This is because console_loglevel + * is inspected when the actual printing occurs. + */ +asmlinkage int printk(const char *fmt, ...) +{ + va_list args; + unsigned long flags; + int printed_len; + char *p; + static char printk_buf[1024]; + static int log_level_unknown = 1; + + if (oops_in_progress) { + /* If a crash is occurring, make sure we can't deadlock */ + spin_lock_init(&logbuf_lock); + /* And make sure that we print immediately */ + init_MUTEX(&console_sem); + } + + /* This stops the holder of console_sem just where we want him */ + spin_lock_irqsave(&logbuf_lock, flags); + + /* Emit the output into the temporary buffer */ + va_start(args, fmt); + printed_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); + va_end(args); + + /* + * Copy the output into log_buf. If the caller didn't provide + * appropriate log level tags, we insert them here + */ + for (p = printk_buf; *p; p++) { + if (log_level_unknown) { + if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') { + emit_log_char('<'); + emit_log_char(default_message_loglevel + '0'); + emit_log_char('>'); + } + log_level_unknown = 0; + } + emit_log_char(*p); + if (*p == '\n') + log_level_unknown = 1; + } + + if (!arch_consoles_callable()) { + /* + * On some architectures, the consoles are not usable + * on secondary CPUs early in the boot process. + */ + spin_unlock_irqrestore(&logbuf_lock, flags); + goto out; + } + if (!down_trylock(&console_sem)) { + /* + * We own the drivers. We can drop the spinlock and let + * release_console_sem() print the text + */ + spin_unlock_irqrestore(&logbuf_lock, flags); + console_may_schedule = 0; + release_console_sem(); + } else { + /* + * Someone else owns the drivers. We drop the spinlock, which + * allows the semaphore holder to proceed and to call the + * console drivers with the output which we just produced. + */ + spin_unlock_irqrestore(&logbuf_lock, flags); + } +out: + return printed_len; +} +EXPORT_SYMBOL(printk); + +/** + * acquire_console_sem - lock the console system for exclusive use. + * + * Acquires a semaphore which guarantees that the caller has + * exclusive access to the console system and the console_drivers list. + * + * Can sleep, returns nothing. + */ +void acquire_console_sem(void) +{ + if (in_interrupt()) + BUG(); + down(&console_sem); + console_may_schedule = 1; +} +EXPORT_SYMBOL(acquire_console_sem); + +/** + * release_console_sem - unlock the console system + * + * Releases the semaphore which the caller holds on the console system + * and the console driver list. + * + * While the semaphore was held, console output may have been buffered + * by printk(). If this is the case, release_console_sem() emits + * the output prior to releasing the semaphore. + * + * If there is output waiting for klogd, we wake it up. + * + * release_console_sem() may be called from any context. + */ +void release_console_sem(void) +{ + unsigned long flags; + unsigned long _con_start, _log_end; + unsigned long must_wake_klogd = 0; + + for ( ; ; ) { + spin_lock_irqsave(&logbuf_lock, flags); + must_wake_klogd |= log_start - log_end; + if (con_start == log_end) + break; /* Nothing to print */ + _con_start = con_start; + _log_end = log_end; + con_start = log_end; /* Flush */ + spin_unlock_irqrestore(&logbuf_lock, flags); + call_console_drivers(_con_start, _log_end); + } + console_may_schedule = 0; + up(&console_sem); + spin_unlock_irqrestore(&logbuf_lock, flags); + if (must_wake_klogd && !oops_in_progress) + wake_up_interruptible(&log_wait); +} + +/** console_conditional_schedule - yield the CPU if required + * + * If the console code is currently allowed to sleep, and + * if this CPU should yield the CPU to another task, do + * so here. + * + * Must be called within acquire_console_sem(). + */ +void console_conditional_schedule(void) +{ + if (console_may_schedule && current->need_resched) { + set_current_state(TASK_RUNNING); + schedule(); + } +} + +void console_print(const char *s) +{ + printk(KERN_EMERG "%s", s); +} +EXPORT_SYMBOL(console_print); + +void console_unblank(void) +{ + struct console *c; + + /* + * Try to get the console semaphore. If someone else owns it + * we have to return without unblanking because console_unblank + * may be called in interrupt context. + */ + if (down_trylock(&console_sem) != 0) + return; + console_may_schedule = 0; + for (c = console_drivers; c != NULL; c = c->next) + if ((c->flags & CON_ENABLED) && c->unblank) + c->unblank(); + release_console_sem(); +} +EXPORT_SYMBOL(console_unblank); + +/* + * The console driver calls this routine during kernel initialization + * to register the console printing procedure with printk() and to + * print any messages that were printed by the kernel before the + * console driver was initialized. + */ +void register_console(struct console * console) +{ + int i; + unsigned long flags; + + if (preferred_console < 0) + preferred_console = selected_console; + + /* + * See if we want to use this console driver. If we + * didn't select a console we take the first one + * that registers here. + */ + if (preferred_console < 0) { + if (console->index < 0) + console->index = 0; + if (console->setup == NULL || + console->setup(console, NULL) == 0) { + console->flags |= CON_ENABLED | CON_CONSDEV; + preferred_console = 0; + } + } + + /* + * See if this console matches one we selected on + * the command line. + */ + for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) { + if (strcmp(console_cmdline[i].name, console->name) != 0) + continue; + if (console->index >= 0 && + console->index != console_cmdline[i].index) + continue; + if (console->index < 0) + console->index = console_cmdline[i].index; + if (console->setup && + console->setup(console, console_cmdline[i].options) != 0) + break; + console->flags |= CON_ENABLED; + console->index = console_cmdline[i].index; + if (i == preferred_console) + console->flags |= CON_CONSDEV; + break; + } + + if (!(console->flags & CON_ENABLED)) + return; + + /* + * Put this console in the list - keep the + * preferred driver at the head of the list. + */ + acquire_console_sem(); + if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { + console->next = console_drivers; + console_drivers = console; + } else { + console->next = console_drivers->next; + console_drivers->next = console; + } + if (console->flags & CON_PRINTBUFFER) { + /* + * release_console_sem() will print out the buffered messages for us. + */ + spin_lock_irqsave(&logbuf_lock, flags); + con_start = log_start; + spin_unlock_irqrestore(&logbuf_lock, flags); + } + release_console_sem(); +} +EXPORT_SYMBOL(register_console); + +int unregister_console(struct console * console) +{ + struct console *a,*b; + int res = 1; + + acquire_console_sem(); + if (console_drivers == console) { + console_drivers=console->next; + res = 0; + } else { + for (a=console_drivers->next, b=console_drivers ; + a; b=a, a=b->next) { + if (a == console) { + b->next = a->next; + res = 0; + break; + } + } + } + + /* If last console is removed, we re-enable picking the first + * one that gets registered. Without that, pmac early boot console + * would prevent fbcon from taking over. + */ + if (console_drivers == NULL) + preferred_console = selected_console; + + + release_console_sem(); + return res; +} +EXPORT_SYMBOL(unregister_console); + +/** + * tty_write_message - write a message to a certain tty, not just the console. + * + * This is used for messages that need to be redirected to a specific tty. + * We don't put it into the syslog queue right now maybe in the future if + * really needed. + */ +void tty_write_message(struct tty_struct *tty, char *msg) +{ + if (tty && tty->driver.write) + tty->driver.write(tty, 0, msg, strlen(msg)); + return; +} diff --git a/uClinux-2.4.31-uc0/kernel/ptrace.c b/uClinux-2.4.31-uc0/kernel/ptrace.c new file mode 100644 index 0000000..20a83a5 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/ptrace.c @@ -0,0 +1,235 @@ +/* + * linux/kernel/ptrace.c + * + * (C) Copyright 1999 Linus Torvalds + * + * Common interfaces for "ptrace()" which we do not want + * to continually duplicate across every architecture. + */ + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/smp_lock.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> + +/* + * Check that we have indeed attached to the thing.. + */ +int ptrace_check_attach(struct task_struct *child, int kill) +{ + + if (!(child->ptrace & PT_PTRACED)) + return -ESRCH; + + if (child->p_pptr != current) + return -ESRCH; + + if (!kill) { + if (child->state != TASK_STOPPED) + return -ESRCH; +#ifdef CONFIG_SMP + /* Make sure the child gets off its CPU.. */ + for (;;) { + task_lock(child); + if (!task_has_cpu(child)) + break; + task_unlock(child); + do { + if (child->state != TASK_STOPPED) + return -ESRCH; + barrier(); + cpu_relax(); + } while (task_has_cpu(child)); + } + task_unlock(child); +#endif + } + + /* All systems go.. */ + return 0; +} + +int ptrace_attach(struct task_struct *task) +{ + task_lock(task); + if (task->pid <= 1) + goto bad; + if (task == current) + goto bad; + if (!task->mm) + goto bad; + if(((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (!cap_issubset(task->cap_permitted, current->cap_permitted)) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + goto bad; + rmb(); + if (!is_dumpable(task) && !capable(CAP_SYS_PTRACE)) + goto bad; + /* the same process cannot be attached many times */ + if (task->ptrace & PT_PTRACED) + goto bad; + + /* Go */ + task->ptrace |= PT_PTRACED; + if (capable(CAP_SYS_PTRACE)) + task->ptrace |= PT_PTRACE_CAP; + task_unlock(task); + + write_lock_irq(&tasklist_lock); + if (task->p_pptr != current) { + REMOVE_LINKS(task); + task->p_pptr = current; + SET_LINKS(task); + } + write_unlock_irq(&tasklist_lock); + + send_sig(SIGSTOP, task, 1); + return 0; + +bad: + task_unlock(task); + return -EPERM; +} + +int ptrace_detach(struct task_struct *child, unsigned int data) +{ + if ((unsigned long) data > _NSIG) + return -EIO; + + /* Architecture-specific hardware disable .. */ + ptrace_disable(child); + + /* .. re-parent .. */ + child->ptrace = 0; + child->exit_code = data; + write_lock_irq(&tasklist_lock); + REMOVE_LINKS(child); + child->p_pptr = child->p_opptr; + SET_LINKS(child); + write_unlock_irq(&tasklist_lock); + + /* .. and wake it up. */ + wake_up_process(child); + return 0; +} + +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ + +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + /* Worry about races with exit() */ + task_lock(tsk); + mm = tsk->mm; + if (mm) + atomic_inc(&mm->mm_users); + task_unlock(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + /* ignore errors, just check how much was sucessfully transfered */ + while (len) { + int bytes, ret, offset; + void *maddr; + + ret = get_user_pages(current, mm, addr, 1, + write, 1, &page, &vma); + if (ret <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + flush_cache_page(vma, addr); + + maddr = kmap(page); + if (write) { + memcpy(maddr + offset, buf, bytes); + flush_page_to_ram(page); + flush_icache_user_range(vma, page, addr, len); + set_page_dirty(page); + } else { + memcpy(buf, maddr + offset, bytes); + flush_page_to_ram(page); + } + kunmap(page); + put_page(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + up_read(&mm->mmap_sem); + mmput(mm); + + return buf - old_buf; +} + +int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len) +{ + int copied = 0; + + while (len > 0) { + char buf[128]; + int this_len, retval; + + this_len = (len > sizeof(buf)) ? sizeof(buf) : len; + retval = access_process_vm(tsk, src, buf, this_len, 0); + if (!retval) { + if (copied) + break; + return -EIO; + } + if (copy_to_user(dst, buf, retval)) + return -EFAULT; + copied += retval; + src += retval; + dst += retval; + len -= retval; + } + return copied; +} + +int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len) +{ + int copied = 0; + + while (len > 0) { + char buf[128]; + int this_len, retval; + + this_len = (len > sizeof(buf)) ? sizeof(buf) : len; + if (copy_from_user(buf, src, this_len)) + return -EFAULT; + retval = access_process_vm(tsk, dst, buf, this_len, 1); + if (!retval) { + if (copied) + break; + return -EIO; + } + copied += retval; + src += retval; + dst += retval; + len -= retval; + } + return copied; +} + diff --git a/uClinux-2.4.31-uc0/kernel/resource.c b/uClinux-2.4.31-uc0/kernel/resource.c new file mode 100644 index 0000000..cd14435 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/resource.c @@ -0,0 +1,372 @@ +/* + * linux/kernel/resource.c + * + * Copyright (C) 1999 Linus Torvalds + * Copyright (C) 1999 Martin Mares <mj@ucw.cz> + * + * Arbitrary resource management. + */ + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/ioport.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/seq_file.h> +#include <asm/io.h> + +struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO }; +struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM }; + +static rwlock_t resource_lock = RW_LOCK_UNLOCKED; + +enum { MAX_IORES_LEVEL = 5 }; + +static void *r_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct resource *p = v; + (*pos)++; + if (p->child) + return p->child; + while (!p->sibling && p->parent) + p = p->parent; + return p->sibling; +} + +static void *r_start(struct seq_file *m, loff_t *pos) +{ + struct resource *p = m->private; + loff_t l = 0; + read_lock(&resource_lock); + for (p = p->child; p && l < *pos; p = r_next(m, p, &l)) + ; + return p; +} + +static void r_stop(struct seq_file *m, void *v) +{ + read_unlock(&resource_lock); +} + +static int r_show(struct seq_file *m, void *v) +{ + struct resource *root = m->private; + struct resource *r = v, *p; + int width = root->end < 0x10000 ? 4 : 8; + int depth; + + for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) + if (p->parent == root) + break; + seq_printf(m, "%*s%0*lx-%0*lx : %s\n", + depth * 2, "", + width, r->start, + width, r->end, + r->name ? r->name : "<BAD>"); + return 0; +} + +static struct seq_operations resource_op = { + .start = r_start, + .next = r_next, + .stop = r_stop, + .show = r_show, +}; + +static int ioports_open(struct inode *inode, struct file *file) +{ + int res = seq_open(file, &resource_op); + if (!res) { + struct seq_file *m = file->private_data; + m->private = &ioport_resource; + } + return res; +} + +static int iomem_open(struct inode *inode, struct file *file) +{ + int res = seq_open(file, &resource_op); + if (!res) { + struct seq_file *m = file->private_data; + m->private = &iomem_resource; + } + return res; +} + +struct file_operations proc_ioports_operations = { + .open = ioports_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +struct file_operations proc_iomem_operations = { + .open = iomem_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* Return the conflict entry if you can't request it */ +static struct resource * __request_resource(struct resource *root, struct resource *new) +{ + unsigned long start = new->start; + unsigned long end = new->end; + struct resource *tmp, **p; + + if (end < start) + return root; + if (start < root->start) + return root; + if (end > root->end) + return root; + p = &root->child; + for (;;) { + tmp = *p; + if (!tmp || tmp->start > end) { + new->sibling = tmp; + *p = new; + new->parent = root; + return NULL; + } + p = &tmp->sibling; + if (tmp->end < start) + continue; + return tmp; + } +} + +static int __release_resource(struct resource *old) +{ + struct resource *tmp, **p; + + p = &old->parent->child; + for (;;) { + tmp = *p; + if (!tmp) + break; + if (tmp == old) { + *p = tmp->sibling; + old->parent = NULL; + return 0; + } + p = &tmp->sibling; + } + return -EINVAL; +} + +int request_resource(struct resource *root, struct resource *new) +{ + struct resource *conflict; + + write_lock(&resource_lock); + conflict = __request_resource(root, new); + write_unlock(&resource_lock); + return conflict ? -EBUSY : 0; +} + +int release_resource(struct resource *old) +{ + int retval; + + write_lock(&resource_lock); + retval = __release_resource(old); + write_unlock(&resource_lock); + return retval; +} + +int check_resource(struct resource *root, unsigned long start, unsigned long len) +{ + struct resource *conflict, tmp; + + tmp.start = start; + tmp.end = start + len - 1; + write_lock(&resource_lock); + conflict = __request_resource(root, &tmp); + if (!conflict) + __release_resource(&tmp); + write_unlock(&resource_lock); + return conflict ? -EBUSY : 0; +} + +/* + * Find empty slot in the resource tree given range and alignment. + */ +static int find_resource(struct resource *root, struct resource *new, + unsigned long size, + unsigned long min, unsigned long max, + unsigned long align, + void (*alignf)(void *, struct resource *, + unsigned long, unsigned long), + void *alignf_data) +{ + struct resource *this = root->child; + + new->start = root->start; + for(;;) { + if (this) + new->end = this->start; + else + new->end = root->end; + if (new->start < min) + new->start = min; + if (new->end > max) + new->end = max; + new->start = (new->start + align - 1) & ~(align - 1); + if (alignf) + alignf(alignf_data, new, size, align); + if (new->start < new->end && new->end - new->start + 1 >= size) { + new->end = new->start + size - 1; + return 0; + } + if (!this) + break; + new->start = this->end + 1; + this = this->sibling; + } + return -EBUSY; +} + +/* + * Allocate empty slot in the resource tree given range and alignment. + */ +int allocate_resource(struct resource *root, struct resource *new, + unsigned long size, + unsigned long min, unsigned long max, + unsigned long align, + void (*alignf)(void *, struct resource *, + unsigned long, unsigned long), + void *alignf_data) +{ + int err; + + write_lock(&resource_lock); + err = find_resource(root, new, size, min, max, align, alignf, alignf_data); + if (err >= 0 && __request_resource(root, new)) + err = -EBUSY; + write_unlock(&resource_lock); + return err; +} + +/* + * This is compatibility stuff for IO resources. + * + * Note how this, unlike the above, knows about + * the IO flag meanings (busy etc). + * + * Request-region creates a new busy region. + * + * Check-region returns non-zero if the area is already busy + * + * Release-region releases a matching busy region. + */ +struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) +{ + struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); + + if (res) { + memset(res, 0, sizeof(*res)); + res->name = name; + res->start = start; + res->end = start + n - 1; + res->flags = IORESOURCE_BUSY; + + write_lock(&resource_lock); + + for (;;) { + struct resource *conflict; + + conflict = __request_resource(parent, res); + if (!conflict) + break; + if (conflict != parent) { + parent = conflict; + if (!(conflict->flags & IORESOURCE_BUSY)) + continue; + } + + /* Uhhuh, that didn't work out.. */ + kfree(res); + res = NULL; + break; + } + write_unlock(&resource_lock); + } + return res; +} + +int __check_region(struct resource *parent, unsigned long start, unsigned long n) +{ + struct resource * res; + + res = __request_region(parent, start, n, "check-region"); + if (!res) + return -EBUSY; + + release_resource(res); + kfree(res); + return 0; +} + +void __release_region(struct resource *parent, unsigned long start, unsigned long n) +{ + struct resource **p; + unsigned long end; + + p = &parent->child; + end = start + n - 1; + + for (;;) { + struct resource *res = *p; + + if (!res) + break; + if (res->start <= start && res->end >= end) { + if (!(res->flags & IORESOURCE_BUSY)) { + p = &res->child; + continue; + } + if (res->start != start || res->end != end) + break; + *p = res->sibling; + kfree(res); + return; + } + p = &res->sibling; + } + printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end); +} + +/* + * Called from init/main.c to reserve IO ports. + */ +#define MAXRESERVE 4 +static int __init reserve_setup(char *str) +{ + static int reserved = 0; + static struct resource reserve[MAXRESERVE]; + + for (;;) { + int io_start, io_num; + int x = reserved; + + if (get_option (&str, &io_start) != 2) + break; + if (get_option (&str, &io_num) == 0) + break; + if (x < MAXRESERVE) { + struct resource *res = reserve + x; + res->name = "reserved"; + res->start = io_start; + res->end = io_start + io_num - 1; + res->flags = IORESOURCE_BUSY; + res->child = NULL; + if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0) + reserved = x+1; + } + } + return 1; +} + +__setup("reserve=", reserve_setup); diff --git a/uClinux-2.4.31-uc0/kernel/sched.c b/uClinux-2.4.31-uc0/kernel/sched.c new file mode 100644 index 0000000..be77326 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/sched.c @@ -0,0 +1,1409 @@ +/* + * linux/kernel/sched.c + * + * Kernel scheduler and related syscalls + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and + * make semaphores SMP safe + * 1998-11-19 Implemented schedule_timeout() and related stuff + * by Andrea Arcangeli + * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar + */ + +/* + * 'sched.c' is the main kernel file. It contains scheduling primitives + * (sleep_on, wakeup, schedule etc) as well as a number of simple system + * call functions (type getpid()), which just extract a field from + * current-task + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/nmi.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/completion.h> +#include <linux/prefetch.h> +#include <linux/compiler.h> + +#include <asm/uaccess.h> +#include <asm/mmu_context.h> + +extern void timer_bh(void); +extern void tqueue_bh(void); +extern void immediate_bh(void); + +/* + * scheduler variables + */ + +unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ + +extern void mem_use(void); + +/* + * Scheduling quanta. + * + * NOTE! The unix "nice" value influences how long a process + * gets. The nice value ranges from -20 to +19, where a -20 + * is a "high-priority" task, and a "+10" is a low-priority + * task. + * + * We want the time-slice to be around 50ms or so, so this + * calculation depends on the value of HZ. + */ +#if HZ < 200 +#define TICK_SCALE(x) ((x) >> 2) +#elif HZ < 400 +#define TICK_SCALE(x) ((x) >> 1) +#elif HZ < 800 +#define TICK_SCALE(x) (x) +#elif HZ < 1600 +#define TICK_SCALE(x) ((x) << 1) +#else +#define TICK_SCALE(x) ((x) << 2) +#endif + +#define NICE_TO_TICKS(nice) (TICK_SCALE(20-(nice))+1) + + +/* + * Init task must be ok at boot for the ix86 as we will check its signals + * via the SMP irq return path. + */ + +struct task_struct * init_tasks[NR_CPUS] = {&init_task, }; + +/* + * The tasklist_lock protects the linked list of processes. + * + * The runqueue_lock locks the parts that actually access + * and change the run-queues, and have to be interrupt-safe. + * + * If both locks are to be concurrently held, the runqueue_lock + * nests inside the tasklist_lock. + * + * task->alloc_lock nests inside tasklist_lock. + */ +spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ +rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ + +static LIST_HEAD(runqueue_head); + +/* + * We align per-CPU scheduling data on cacheline boundaries, + * to prevent cacheline ping-pong. + */ +static union { + struct schedule_data { + struct task_struct * curr; + cycles_t last_schedule; + } schedule_data; + char __pad [SMP_CACHE_BYTES]; +} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; + +#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr +#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule + +struct kernel_stat kstat; +extern struct task_struct *child_reaper; + +#ifdef CONFIG_SMP + +#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) +#define can_schedule(p,cpu) \ + ((p)->cpus_runnable & (p)->cpus_allowed & (1UL << cpu)) + +#else + +#define idle_task(cpu) (&init_task) +#define can_schedule(p,cpu) (1) + +#endif + +void scheduling_functions_start_here(void) { } + +/* + * This is the function that decides how desirable a process is.. + * You can weigh different processes against each other depending + * on what CPU they've run on lately etc to try to handle cache + * and TLB miss penalties. + * + * Return values: + * -1000: never select this + * 0: out of time, recalculate counters (but it might still be + * selected) + * +ve: "goodness" value (the larger, the better) + * +1000: realtime process, select this. + */ + +static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm) +{ + int weight; + + /* + * select the current process after every other + * runnable process, but before the idle thread. + * Also, dont trigger a counter recalculation. + */ + weight = -1; + if (p->policy & SCHED_YIELD) + goto out; + + /* + * Non-RT process - normal case first. + */ + if (p->policy == SCHED_OTHER) { + /* + * Give the process a first-approximation goodness value + * according to the number of clock-ticks it has left. + * + * Don't do any other calculations if the time slice is + * over.. + */ + weight = p->counter; + if (!weight) + goto out; + +#ifdef CONFIG_SMP + /* Give a largish advantage to the same processor... */ + /* (this is equivalent to penalizing other processors) */ + if (p->processor == this_cpu) + weight += PROC_CHANGE_PENALTY; +#endif + + /* .. and a slight advantage to the current MM */ + if (p->mm == this_mm || !p->mm) + weight += 1; + weight += 20 - p->nice; + goto out; + } + + /* + * Realtime process, select the first one on the + * runqueue (taking priorities within processes + * into account). + */ + weight = 1000 + p->rt_priority; +out: + return weight; +} + +/* + * the 'goodness value' of replacing a process on a given CPU. + * positive value means 'replace', zero or negative means 'dont'. + */ +static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu) +{ + return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm); +} + +/* + * This is ugly, but reschedule_idle() is very timing-critical. + * We are called with the runqueue spinlock held and we must + * not claim the tasklist_lock. + */ +static FASTCALL(void reschedule_idle(struct task_struct * p)); + +static void fastcall reschedule_idle(struct task_struct * p) +{ +#ifdef CONFIG_SMP + int this_cpu = smp_processor_id(); + struct task_struct *tsk, *target_tsk; + int cpu, best_cpu, i, max_prio; + cycles_t oldest_idle; + + /* + * shortcut if the woken up task's last CPU is + * idle now. + */ + best_cpu = p->processor; + if (can_schedule(p, best_cpu)) { + tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == tsk) { + int need_resched; +send_now_idle: + /* + * If need_resched == -1 then we can skip sending + * the IPI altogether, tsk->need_resched is + * actively watched by the idle thread. + */ + need_resched = tsk->need_resched; + tsk->need_resched = 1; + if ((best_cpu != this_cpu) && !need_resched) + smp_send_reschedule(best_cpu); + return; + } + } + + /* + * We know that the preferred CPU has a cache-affine current + * process, lets try to find a new idle CPU for the woken-up + * process. Select the least recently active idle CPU. (that + * one will have the least active cache context.) Also find + * the executing process which has the least priority. + */ + oldest_idle = (cycles_t) -1; + target_tsk = NULL; + max_prio = 0; + + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); + if (!can_schedule(p, cpu)) + continue; + tsk = cpu_curr(cpu); + /* + * We use the first available idle CPU. This creates + * a priority list between idle CPUs, but this is not + * a problem. + */ + if (tsk == idle_task(cpu)) { +#if defined(__i386__) && defined(CONFIG_SMP) + /* + * Check if two siblings are idle in the same + * physical package. Use them if found. + */ + if (smp_num_siblings == 2) { + if (cpu_curr(cpu_sibling_map[cpu]) == + idle_task(cpu_sibling_map[cpu])) { + oldest_idle = last_schedule(cpu); + target_tsk = tsk; + break; + } + + } +#endif + if (last_schedule(cpu) < oldest_idle) { + oldest_idle = last_schedule(cpu); + target_tsk = tsk; + } + } else { + if (oldest_idle == (cycles_t)-1) { + int prio = preemption_goodness(tsk, p, cpu); + + if (prio > max_prio) { + max_prio = prio; + target_tsk = tsk; + } + } + } + } + tsk = target_tsk; + if (tsk) { + if (oldest_idle != (cycles_t)-1) { + best_cpu = tsk->processor; + goto send_now_idle; + } + tsk->need_resched = 1; + if (tsk->processor != this_cpu) + smp_send_reschedule(tsk->processor); + } + return; + + +#else /* UP */ + int this_cpu = smp_processor_id(); + struct task_struct *tsk; + + tsk = cpu_curr(this_cpu); + if (preemption_goodness(tsk, p, this_cpu) > 0) + tsk->need_resched = 1; +#endif +} + +/* + * Careful! + * + * This has to add the process to the _end_ of the + * run-queue, not the beginning. The goodness value will + * determine whether this process will run next. This is + * important to get SCHED_FIFO and SCHED_RR right, where + * a process that is either pre-empted or its time slice + * has expired, should be moved to the tail of the run + * queue for its priority - Bhavesh Davda + */ +static inline void add_to_runqueue(struct task_struct * p) +{ + list_add_tail(&p->run_list, &runqueue_head); + nr_running++; +} + +static inline void move_last_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add_tail(&p->run_list, &runqueue_head); +} + +/* + * Wake up a process. Put it on the run-queue if it's not + * already there. The "current" process is always on the + * run-queue (except when the actual re-schedule is in + * progress), and as such you're allowed to do the simpler + * "current->state = TASK_RUNNING" to mark yourself runnable + * without the overhead of this. + */ +static inline int try_to_wake_up(struct task_struct * p, int synchronous) +{ + unsigned long flags; + int success = 0; + + /* + * We want the common case fall through straight, thus the goto. + */ + spin_lock_irqsave(&runqueue_lock, flags); + p->state = TASK_RUNNING; + if (task_on_runqueue(p)) + goto out; + add_to_runqueue(p); + if (!synchronous || !(p->cpus_allowed & (1UL << smp_processor_id()))) + reschedule_idle(p); + success = 1; +out: + spin_unlock_irqrestore(&runqueue_lock, flags); + return success; +} + +inline int fastcall wake_up_process(struct task_struct * p) +{ + return try_to_wake_up(p, 0); +} + +static void process_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + + wake_up_process(p); +} + +/** + * schedule_timeout - sleep until timeout + * @timeout: timeout value in jiffies + * + * Make the current task sleep until @timeout jiffies have + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to + * pass before the routine returns. The routine will return 0 + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. In this case the remaining time + * in jiffies will be returned, or 0 if the timer expired in time + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule + * the CPU away without a bound on the timeout. In this case the return + * value will be %MAX_SCHEDULE_TIMEOUT. + * + * In all cases the return value is guaranteed to be non-negative. + */ +signed long fastcall schedule_timeout(signed long timeout) +{ + struct timer_list timer; + unsigned long expire; + + switch (timeout) + { + case MAX_SCHEDULE_TIMEOUT: + /* + * These two special cases are useful to be comfortable + * in the caller. Nothing more. We could take + * MAX_SCHEDULE_TIMEOUT from one of the negative value + * but I' d like to return a valid offset (>=0) to allow + * the caller to do everything it want with the retval. + */ + schedule(); + goto out; + default: + /* + * Another bit of PARANOID. Note that the retval will be + * 0 since no piece of kernel is supposed to do a check + * for a negative retval of schedule_timeout() (since it + * should never happens anyway). You just have the printk() + * that will tell you if something is gone wrong and where. + */ + if (timeout < 0) + { + printk(KERN_ERR "schedule_timeout: wrong timeout " + "value %lx from %p\n", timeout, + __builtin_return_address(0)); + current->state = TASK_RUNNING; + goto out; + } + } + + expire = timeout + jiffies; + + init_timer(&timer); + timer.expires = expire; + timer.data = (unsigned long) current; + timer.function = process_timeout; + + add_timer(&timer); + schedule(); + del_timer_sync(&timer); + + timeout = expire - jiffies; + + out: + return timeout < 0 ? 0 : timeout; +} + +/* + * schedule_tail() is getting called from the fork return path. This + * cleans up all remaining scheduler things, without impacting the + * common case. + */ +static inline void __schedule_tail(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + int policy; + + /* + * prev->policy can be written from here only before `prev' + * can be scheduled (before setting prev->cpus_runnable to ~0UL). + * Of course it must also be read before allowing prev + * to be rescheduled, but since the write depends on the read + * to complete, wmb() is enough. (the spin_lock() acquired + * before setting cpus_runnable is not enough because the spin_lock() + * common code semantics allows code outside the critical section + * to enter inside the critical section) + */ + policy = prev->policy; + prev->policy = policy & ~SCHED_YIELD; + wmb(); + + /* + * fast path falls through. We have to clear cpus_runnable before + * checking prev->state to avoid a wakeup race. Protect against + * the task exiting early. + */ + task_lock(prev); + task_release_cpu(prev); + mb(); + if (prev->state == TASK_RUNNING) + goto needs_resched; + +out_unlock: + task_unlock(prev); /* Synchronise here with release_task() if prev is TASK_ZOMBIE */ + return; + + /* + * Slow path - we 'push' the previous process and + * reschedule_idle() will attempt to find a new + * processor for it. (but it might preempt the + * current process as well.) We must take the runqueue + * lock and re-check prev->state to be correct. It might + * still happen that this process has a preemption + * 'in progress' already - but this is not a problem and + * might happen in other circumstances as well. + */ +needs_resched: + { + unsigned long flags; + + /* + * Avoid taking the runqueue lock in cases where + * no preemption-check is necessery: + */ + if ((prev == idle_task(smp_processor_id())) || + (policy & SCHED_YIELD)) + goto out_unlock; + + spin_lock_irqsave(&runqueue_lock, flags); + if ((prev->state == TASK_RUNNING) && !task_has_cpu(prev)) + reschedule_idle(prev); + spin_unlock_irqrestore(&runqueue_lock, flags); + goto out_unlock; + } +#else + prev->policy &= ~SCHED_YIELD; +#endif /* CONFIG_SMP */ +} + +asmlinkage void schedule_tail(struct task_struct *prev) +{ + __schedule_tail(prev); +} + +#ifdef CONFIG_SYSCALLTIMER +extern void timepeg_schedule_switchout(void); +extern void timepeg_schedule_switchin(void); +#endif + +/* + * 'schedule()' is the scheduler function. It's a very simple and nice + * scheduler: it's not perfect, but certainly works for most things. + * + * The goto is "interesting". + * + * NOTE!! Task 0 is the 'idle' task, which gets called when no other + * tasks can run. It can not be killed, and it cannot sleep. The 'state' + * information in task[0] is never used. + */ +asmlinkage void schedule(void) +{ + struct schedule_data * sched_data; + struct task_struct *prev, *next, *p; + struct list_head *tmp; + int this_cpu, c; + + + spin_lock_prefetch(&runqueue_lock); + + BUG_ON(!current->active_mm); +need_resched_back: + prev = current; + this_cpu = prev->processor; + + if (unlikely(in_interrupt())) { + printk("Scheduling in interrupt\n"); + BUG(); + } + + release_kernel_lock(prev, this_cpu); + + /* + * 'sched_data' is protected by the fact that we can run + * only one process per CPU. + */ + sched_data = & aligned_data[this_cpu].schedule_data; + + spin_lock_irq(&runqueue_lock); + + /* move an exhausted RR process to be last.. */ + if (unlikely(prev->policy == SCHED_RR)) + if (!prev->counter) { + prev->counter = NICE_TO_TICKS(prev->nice); + move_last_runqueue(prev); + } + + switch (prev->state) { + case TASK_INTERRUPTIBLE: + if (signal_pending(prev)) { + prev->state = TASK_RUNNING; + break; + } + default: + del_from_runqueue(prev); + case TASK_RUNNING:; + } + prev->need_resched = 0; + + /* + * this is the scheduler proper: + */ + +repeat_schedule: + /* + * Default process to select.. + */ + next = idle_task(this_cpu); + c = -1000; + list_for_each(tmp, &runqueue_head) { + p = list_entry(tmp, struct task_struct, run_list); + if (can_schedule(p, this_cpu)) { + int weight = goodness(p, this_cpu, prev->active_mm); + if (weight > c) + c = weight, next = p; + } + } + + /* Do we need to re-calculate counters? */ + if (unlikely(!c)) { + struct task_struct *p; + + spin_unlock_irq(&runqueue_lock); + read_lock(&tasklist_lock); + for_each_task(p) + p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); + read_unlock(&tasklist_lock); + spin_lock_irq(&runqueue_lock); + goto repeat_schedule; + } + + /* + * from this point on nothing can prevent us from + * switching to the next task, save this fact in + * sched_data. + */ + sched_data->curr = next; + task_set_cpu(next, this_cpu); + spin_unlock_irq(&runqueue_lock); + + if (unlikely(prev == next)) { + /* We won't go through the normal tail, so do this by hand */ + prev->policy &= ~SCHED_YIELD; + goto same_process; + } + +#ifdef CONFIG_SMP + /* + * maintain the per-process 'last schedule' value. + * (this has to be recalculated even if we reschedule to + * the same process) Currently this is only used on SMP, + * and it's approximate, so we do not have to maintain + * it while holding the runqueue spinlock. + */ + sched_data->last_schedule = get_cycles(); + + /* + * We drop the scheduler lock early (it's a global spinlock), + * thus we have to lock the previous process from getting + * rescheduled during switch_to(). + */ + +#endif /* CONFIG_SMP */ + + kstat.context_swtch++; + /* + * there are 3 processes which are affected by a context switch: + * + * prev == .... ==> (last => next) + * + * It's the 'much more previous' 'prev' that is on next's stack, + * but prev is set to (the just run) 'last' process by switch_to(). + * This might sound slightly confusing but makes tons of sense. + */ + prepare_to_switch(); + { + struct mm_struct *mm = next->mm; + struct mm_struct *oldmm = prev->active_mm; + if (!mm) { + BUG_ON(next->active_mm); + BUG_ON(!oldmm); + next->active_mm = oldmm; + atomic_inc(&oldmm->mm_count); + enter_lazy_tlb(oldmm, next, this_cpu); + } else { + BUG_ON(next->active_mm != mm); + switch_mm(oldmm, mm, next, this_cpu); + } + + if (!prev->mm) { + prev->active_mm = NULL; + mmdrop(oldmm); + } + } + + /* + * This just switches the register state and the + * stack. + */ +#ifdef CONFIG_SYSCALLTIMER + timepeg_schedule_switchout(); +#endif + switch_to(prev, next, prev); +#ifdef CONFIG_SYSCALLTIMER + timepeg_schedule_switchin(); +#endif + __schedule_tail(prev); + +same_process: + reacquire_kernel_lock(current); + if (current->need_resched) + goto need_resched_back; + return; +} + +/* + * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything + * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the + * non-exclusive tasks and one exclusive task. + * + * There are circumstances in which we can try to wake a task which has already + * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero + * in this (rare) case, and we handle it by contonuing to scan the queue. + */ +static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, const int sync) +{ + struct list_head *tmp; + struct task_struct *p; + + CHECK_MAGIC_WQHEAD(q); + WQ_CHECK_LIST_HEAD(&q->task_list); + + list_for_each(tmp,&q->task_list) { + unsigned int state; + wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); + + CHECK_MAGIC(curr->__magic); + p = curr->task; + state = p->state; + if (state & mode) { + WQ_NOTE_WAKER(curr); + if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) + break; + } + } +} + +void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr) +{ + if (q) { + unsigned long flags; + wq_read_lock_irqsave(&q->lock, flags); + __wake_up_common(q, mode, nr, 0); + wq_read_unlock_irqrestore(&q->lock, flags); + } +} + +void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr) +{ + if (q) { + unsigned long flags; + wq_read_lock_irqsave(&q->lock, flags); + __wake_up_common(q, mode, nr, 1); + wq_read_unlock_irqrestore(&q->lock, flags); + } +} + +void fastcall complete(struct completion *x) +{ + unsigned long flags; + + spin_lock_irqsave(&x->wait.lock, flags); + x->done++; + __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); + spin_unlock_irqrestore(&x->wait.lock, flags); +} + +void fastcall wait_for_completion(struct completion *x) +{ + spin_lock_irq(&x->wait.lock); + if (!x->done) { + DECLARE_WAITQUEUE(wait, current); + + wait.flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_tail(&x->wait, &wait); + do { + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&x->wait.lock); + schedule(); + spin_lock_irq(&x->wait.lock); + } while (!x->done); + __remove_wait_queue(&x->wait, &wait); + } + x->done--; + spin_unlock_irq(&x->wait.lock); +} + +#define SLEEP_ON_VAR \ + unsigned long flags; \ + wait_queue_t wait; \ + init_waitqueue_entry(&wait, current); + +#define SLEEP_ON_HEAD \ + wq_write_lock_irqsave(&q->lock,flags); \ + __add_wait_queue(q, &wait); \ + wq_write_unlock(&q->lock); + +#define SLEEP_ON_TAIL \ + wq_write_lock_irq(&q->lock); \ + __remove_wait_queue(q, &wait); \ + wq_write_unlock_irqrestore(&q->lock,flags); + +void fastcall interruptible_sleep_on(wait_queue_head_t *q) +{ + SLEEP_ON_VAR + + current->state = TASK_INTERRUPTIBLE; + + SLEEP_ON_HEAD + schedule(); + SLEEP_ON_TAIL +} + +long fastcall interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) +{ + SLEEP_ON_VAR + + current->state = TASK_INTERRUPTIBLE; + + SLEEP_ON_HEAD + timeout = schedule_timeout(timeout); + SLEEP_ON_TAIL + + return timeout; +} + +void fastcall sleep_on(wait_queue_head_t *q) +{ + SLEEP_ON_VAR + + current->state = TASK_UNINTERRUPTIBLE; + + SLEEP_ON_HEAD + schedule(); + SLEEP_ON_TAIL +} + +long fastcall sleep_on_timeout(wait_queue_head_t *q, long timeout) +{ + SLEEP_ON_VAR + + current->state = TASK_UNINTERRUPTIBLE; + + SLEEP_ON_HEAD + timeout = schedule_timeout(timeout); + SLEEP_ON_TAIL + + return timeout; +} + +void scheduling_functions_end_here(void) { } + +#if CONFIG_SMP +/** + * set_cpus_allowed() - change a given task's processor affinity + * @p: task to bind + * @new_mask: bitmask of allowed processors + * + * Upon return, the task is running on a legal processor. Note the caller + * must have a valid reference to the task: it must not exit() prematurely. + * This call can sleep; do not hold locks on call. + */ +void set_cpus_allowed(struct task_struct *p, unsigned long new_mask) +{ + new_mask &= cpu_online_map; + BUG_ON(!new_mask); + + p->cpus_allowed = new_mask; + + /* + * If the task is on a no-longer-allowed processor, we need to move + * it. If the task is not current, then set need_resched and send + * its processor an IPI to reschedule. + */ + if (!(p->cpus_runnable & p->cpus_allowed)) { + if (p != current) { + p->need_resched = 1; + smp_send_reschedule(p->processor); + } + /* + * Wait until we are on a legal processor. If the task is + * current, then we should be on a legal processor the next + * time we reschedule. Otherwise, we need to wait for the IPI. + */ + while (!(p->cpus_runnable & p->cpus_allowed)) + schedule(); + } +} +#endif /* CONFIG_SMP */ + +#ifndef __alpha__ + +/* + * This has been replaced by sys_setpriority. Maybe it should be + * moved into the arch dependent tree for those ports that require + * it for backward compatibility? + */ + +asmlinkage long sys_nice(int increment) +{ + long newprio; + + /* + * Setpriority might change our priority at the same moment. + * We don't have to worry. Conceptually one call occurs first + * and we have a single winner. + */ + if (increment < 0) { + if (!capable(CAP_SYS_NICE)) + return -EPERM; + if (increment < -40) + increment = -40; + } + if (increment > 40) + increment = 40; + + newprio = current->nice + increment; + if (newprio < -20) + newprio = -20; + if (newprio > 19) + newprio = 19; + current->nice = newprio; + return 0; +} + +#endif + +static inline struct task_struct *find_process_by_pid(pid_t pid) +{ + struct task_struct *tsk = current; + + if (pid) + tsk = find_task_by_pid(pid); + return tsk; +} + +static int setscheduler(pid_t pid, int policy, + struct sched_param *param) +{ + struct sched_param lp; + struct task_struct *p; + int retval; + + retval = -EINVAL; + if (!param || pid < 0) + goto out_nounlock; + + retval = -EFAULT; + if (copy_from_user(&lp, param, sizeof(struct sched_param))) + goto out_nounlock; + + /* + * We play safe to avoid deadlocks. + */ + read_lock_irq(&tasklist_lock); + spin_lock(&runqueue_lock); + + p = find_process_by_pid(pid); + + retval = -ESRCH; + if (!p) + goto out_unlock; + + if (policy < 0) + policy = p->policy; + else { + retval = -EINVAL; + if (policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_OTHER) + goto out_unlock; + } + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid + * priority for SCHED_OTHER is 0. + */ + retval = -EINVAL; + if (lp.sched_priority < 0 || lp.sched_priority > 99) + goto out_unlock; + if ((policy == SCHED_OTHER) != (lp.sched_priority == 0)) + goto out_unlock; + + retval = -EPERM; + if ((policy == SCHED_FIFO || policy == SCHED_RR) && + !capable(CAP_SYS_NICE)) + goto out_unlock; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_NICE)) + goto out_unlock; + + retval = 0; + p->policy = policy; + p->rt_priority = lp.sched_priority; + + current->need_resched = 1; + +out_unlock: + spin_unlock(&runqueue_lock); + read_unlock_irq(&tasklist_lock); + +out_nounlock: + return retval; +} + +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, + struct sched_param *param) +{ + return setscheduler(pid, policy, param); +} + +asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param) +{ + return setscheduler(pid, -1, param); +} + +asmlinkage long sys_sched_getscheduler(pid_t pid) +{ + struct task_struct *p; + int retval; + + retval = -EINVAL; + if (pid < 0) + goto out_nounlock; + + retval = -ESRCH; + read_lock(&tasklist_lock); + p = find_process_by_pid(pid); + if (p) + retval = p->policy & ~SCHED_YIELD; + read_unlock(&tasklist_lock); + +out_nounlock: + return retval; +} + +asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param) +{ + struct task_struct *p; + struct sched_param lp; + int retval; + + retval = -EINVAL; + if (!param || pid < 0) + goto out_nounlock; + + read_lock(&tasklist_lock); + p = find_process_by_pid(pid); + retval = -ESRCH; + if (!p) + goto out_unlock; + lp.sched_priority = p->rt_priority; + read_unlock(&tasklist_lock); + + /* + * This one might sleep, we cannot do it with a spinlock held ... + */ + retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; + +out_nounlock: + return retval; + +out_unlock: + read_unlock(&tasklist_lock); + return retval; +} + +asmlinkage long sys_sched_yield(void) +{ + /* + * Trick. sched_yield() first counts the number of truly + * 'pending' runnable processes, then returns if it's + * only the current processes. (This test does not have + * to be atomic.) In threaded applications this optimization + * gets triggered quite often. + */ + + int nr_pending = nr_running; + +#if CONFIG_SMP + int i; + + // Subtract non-idle processes running on other CPUs. + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + if (aligned_data[cpu].schedule_data.curr != idle_task(cpu)) + nr_pending--; + } +#else + // on UP this process is on the runqueue as well + nr_pending--; +#endif + if (nr_pending) { + /* + * This process can only be rescheduled by us, + * so this is safe without any locking. + */ + if (current->policy == SCHED_OTHER) + current->policy |= SCHED_YIELD; + current->need_resched = 1; + + spin_lock_irq(&runqueue_lock); + move_last_runqueue(current); + spin_unlock_irq(&runqueue_lock); + } + return 0; +} + +/** + * yield - yield the current processor to other threads. + * + * this is a shortcut for kernel-space yielding - it marks the + * thread runnable and calls sys_sched_yield(). + */ +void yield(void) +{ + set_current_state(TASK_RUNNING); + sys_sched_yield(); + schedule(); +} + +void __cond_resched(void) +{ + set_current_state(TASK_RUNNING); + schedule(); +} + +asmlinkage long sys_sched_get_priority_max(int policy) +{ + int ret = -EINVAL; + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = 99; + break; + case SCHED_OTHER: + ret = 0; + break; + } + return ret; +} + +asmlinkage long sys_sched_get_priority_min(int policy) +{ + int ret = -EINVAL; + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = 1; + break; + case SCHED_OTHER: + ret = 0; + } + return ret; +} + +asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval) +{ + struct timespec t; + struct task_struct *p; + int retval = -EINVAL; + + if (pid < 0) + goto out_nounlock; + + retval = -ESRCH; + read_lock(&tasklist_lock); + p = find_process_by_pid(pid); + if (p) + jiffies_to_timespec(p->policy & SCHED_FIFO ? 0 : NICE_TO_TICKS(p->nice), + &t); + read_unlock(&tasklist_lock); + if (p) + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; +out_nounlock: + return retval; +} + +static void show_task(struct task_struct * p) +{ + unsigned long free = 0; + int state; + static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; + + printk("%-13.13s ", p->comm); + state = p->state ? ffz(~p->state) + 1 : 0; + if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *)) + printk(stat_nam[state]); + else + printk(" "); +#if (BITS_PER_LONG == 32) + if (p == current) + printk(" current "); + else + printk(" %08lX ", thread_saved_pc(&p->thread)); +#else + if (p == current) + printk(" current task "); + else + printk(" %016lx ", thread_saved_pc(&p->thread)); +#endif + { + unsigned long * n = (unsigned long *) (p+1); + while (!*n) + n++; + free = (unsigned long) n - (unsigned long)(p+1); + } + printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid); + if (p->p_cptr) + printk("%5d ", p->p_cptr->pid); + else + printk(" "); + if (p->p_ysptr) + printk("%7d", p->p_ysptr->pid); + else + printk(" "); + if (p->p_osptr) + printk(" %5d", p->p_osptr->pid); + else + printk(" "); + if (!p->mm) + printk(" (L-TLB)\n"); + else + printk(" (NOTLB)\n"); + + { + extern void show_trace_task(struct task_struct *tsk); + show_trace_task(p); + } +} + +char * render_sigset_t(sigset_t *set, char *buffer) +{ + int i = _NSIG, x; + do { + i -= 4, x = 0; + if (sigismember(set, i+1)) x |= 1; + if (sigismember(set, i+2)) x |= 2; + if (sigismember(set, i+3)) x |= 4; + if (sigismember(set, i+4)) x |= 8; + *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + } while (i >= 4); + *buffer = 0; + return buffer; +} + +void show_state(void) +{ + struct task_struct *p; + +#if (BITS_PER_LONG == 32) + printk("\n" + " free sibling\n"); + printk(" task PC stack pid father child younger older\n"); +#else + printk("\n" + " free sibling\n"); + printk(" task PC stack pid father child younger older\n"); +#endif + read_lock(&tasklist_lock); + for_each_task(p) { + /* + * reset the NMI-timeout, listing all files on a slow + * console might take alot of time: + */ + touch_nmi_watchdog(); + show_task(p); + } + read_unlock(&tasklist_lock); +} + +/** + * reparent_to_init() - Reparent the calling kernel thread to the init task. + * + * If a kernel thread is launched as a result of a system call, or if + * it ever exits, it should generally reparent itself to init so that + * it is correctly cleaned up on exit. + * + * The various task state such as scheduling policy and priority may have + * been inherited fro a user process, so we reset them to sane values here. + * + * NOTE that reparent_to_init() gives the caller full capabilities. + */ +void reparent_to_init(void) +{ + struct task_struct *this_task = current; + + write_lock_irq(&tasklist_lock); + + /* Reparent to init */ + REMOVE_LINKS(this_task); + this_task->p_pptr = child_reaper; + this_task->p_opptr = child_reaper; + SET_LINKS(this_task); + + /* Set the exit signal to SIGCHLD so we signal init on exit */ + this_task->exit_signal = SIGCHLD; + + /* We also take the runqueue_lock while altering task fields + * which affect scheduling decisions */ + spin_lock(&runqueue_lock); + + this_task->ptrace = 0; + this_task->nice = DEF_NICE; + this_task->policy = SCHED_OTHER; + /* cpus_allowed? */ + /* rt_priority? */ + /* signals? */ + this_task->cap_effective = CAP_INIT_EFF_SET; + this_task->cap_inheritable = CAP_INIT_INH_SET; + this_task->cap_permitted = CAP_FULL_SET; + this_task->keep_capabilities = 0; + memcpy(this_task->rlim, init_task.rlim, sizeof(*(this_task->rlim))); + switch_uid(INIT_USER); + + spin_unlock(&runqueue_lock); + write_unlock_irq(&tasklist_lock); +} + +/* + * Put all the gunge required to become a kernel thread without + * attached user resources in one place where it belongs. + */ + +void daemonize(void) +{ + struct fs_struct *fs; + + + /* + * If we were started as result of loading a module, close all of the + * user space pages. We don't need them, and if we didn't close them + * they would be locked into memory. + */ + exit_mm(current); + + current->session = 1; + current->pgrp = 1; + current->tty = NULL; + + /* Become as one with the init task */ + + exit_fs(current); /* current->fs->count--; */ + fs = init_task.fs; + current->fs = fs; + atomic_inc(&fs->count); + exit_files(current); + current->files = init_task.files; + atomic_inc(¤t->files->count); +} + +extern unsigned long wait_init_idle; + +void __init init_idle(void) +{ + struct schedule_data * sched_data; + sched_data = &aligned_data[smp_processor_id()].schedule_data; + + if (current != &init_task && task_on_runqueue(current)) { + printk("UGH! (%d:%d) was on the runqueue, removing.\n", + smp_processor_id(), current->pid); + del_from_runqueue(current); + } + sched_data->curr = current; + sched_data->last_schedule = get_cycles(); + clear_bit(current->processor, &wait_init_idle); +} + +extern void init_timervecs (void); + +void __init sched_init(void) +{ + /* + * We have to do a little magic to get the first + * process right in SMP mode. + */ + int cpu = smp_processor_id(); + int nr; + + init_task.processor = cpu; + + for(nr = 0; nr < PIDHASH_SZ; nr++) + pidhash[nr] = NULL; + + init_timervecs(); + + init_bh(TIMER_BH, timer_bh); + init_bh(TQUEUE_BH, tqueue_bh); + init_bh(IMMEDIATE_BH, immediate_bh); + + /* + * The boot idle thread does lazy MMU switching as well: + */ + atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current, cpu); +} diff --git a/uClinux-2.4.31-uc0/kernel/signal.c b/uClinux-2.4.31-uc0/kernel/signal.c new file mode 100644 index 0000000..d40a085 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/signal.c @@ -0,0 +1,1326 @@ +/* + * linux/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/unistd.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/sched.h> + +#include <asm/uaccess.h> + +/* + * SLAB caches for signal bits. + */ + +#define DEBUG_SIG 0 + +#if DEBUG_SIG +#define SIG_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) +#else +#define SIG_SLAB_DEBUG 0 +#endif + +static kmem_cache_t *sigqueue_cachep; + +atomic_t nr_queued_signals; +int max_queued_signals = 1024; + +void __init signals_init(void) +{ + sigqueue_cachep = + kmem_cache_create("sigqueue", + sizeof(struct sigqueue), + __alignof__(struct sigqueue), + SIG_SLAB_DEBUG, NULL, NULL); + if (!sigqueue_cachep) + panic("signals_init(): cannot create sigqueue SLAB cache"); +} + + +/* Given the mask, find the first available signal that should be serviced. */ + +static int +next_signal(struct task_struct *tsk, sigset_t *mask) +{ + unsigned long i, *s, *m, x; + int sig = 0; + + s = tsk->pending.signal.sig; + m = mask->sig; + switch (_NSIG_WORDS) { + default: + for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m) + if ((x = *s &~ *m) != 0) { + sig = ffz(~x) + i*_NSIG_BPW + 1; + break; + } + break; + + case 2: if ((x = s[0] &~ m[0]) != 0) + sig = 1; + else if ((x = s[1] &~ m[1]) != 0) + sig = _NSIG_BPW + 1; + else + break; + sig += ffz(~x); + break; + + case 1: if ((x = *s &~ *m) != 0) + sig = ffz(~x) + 1; + break; + } + + return sig; +} + +static void flush_sigqueue(struct sigpending *queue) +{ + struct sigqueue *q, *n; + + sigemptyset(&queue->signal); + q = queue->head; + queue->head = NULL; + queue->tail = &queue->head; + + while (q) { + n = q->next; + kmem_cache_free(sigqueue_cachep, q); + atomic_dec(&nr_queued_signals); + q = n; + } +} + +/* + * Flush all pending signals for a task. + */ + +void +flush_signals(struct task_struct *t) +{ + t->sigpending = 0; + flush_sigqueue(&t->pending); +} + +void exit_sighand(struct task_struct *tsk) +{ + struct signal_struct * sig = tsk->sig; + + spin_lock_irq(&tsk->sigmask_lock); + if (sig) { + tsk->sig = NULL; + if (atomic_dec_and_test(&sig->count)) + kmem_cache_free(sigact_cachep, sig); + } + tsk->sigpending = 0; + flush_sigqueue(&tsk->pending); + spin_unlock_irq(&tsk->sigmask_lock); +} + +/* + * Flush all handlers for a task. + */ + +void +flush_signal_handlers(struct task_struct *t) +{ + int i; + struct k_sigaction *ka = &t->sig->action[0]; + for (i = _NSIG ; i != 0 ; i--) { + if (ka->sa.sa_handler != SIG_IGN) + ka->sa.sa_handler = SIG_DFL; + ka->sa.sa_flags = 0; + sigemptyset(&ka->sa.sa_mask); + ka++; + } +} + +/* + * sig_exit - cause the current task to exit due to a signal. + */ + +void +sig_exit(int sig, int exit_code, struct siginfo *info) +{ + struct task_struct *t; + + sigaddset(¤t->pending.signal, sig); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + + /* Propagate the signal to all the tasks in + * our thread group + */ + if (info && (unsigned long)info != 1 + && info->si_code != SI_TKILL) { + read_lock(&tasklist_lock); + for_each_thread(t) { + force_sig_info(sig, info, t); + } + read_unlock(&tasklist_lock); + } + + do_exit(exit_code); + /* NOTREACHED */ +} + +/* Notify the system that a driver wants to block all signals for this + * process, and wants to be notified if any signals at all were to be + * sent/acted upon. If the notifier routine returns non-zero, then the + * signal will be acted upon after all. If the notifier routine returns 0, + * then then signal will be blocked. Only one block per process is + * allowed. priv is a pointer to private data that the notifier routine + * can use to determine if the signal should be blocked or not. */ + +void +block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->notifier_mask = mask; + current->notifier_data = priv; + current->notifier = notifier; + spin_unlock_irqrestore(¤t->sigmask_lock, flags); +} + +/* Notify the system that blocking has ended. */ + +void +unblock_all_signals(void) +{ + unsigned long flags; + + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->notifier = NULL; + current->notifier_data = NULL; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); +} + +static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) +{ + if (sigismember(&list->signal, sig)) { + /* Collect the siginfo appropriate to this signal. */ + struct sigqueue *q, **pp; + pp = &list->head; + while ((q = *pp) != NULL) { + if (q->info.si_signo == sig) + goto found_it; + pp = &q->next; + } + + /* Ok, it wasn't in the queue. We must have + been out of queue space. So zero out the + info. */ + sigdelset(&list->signal, sig); + info->si_signo = sig; + info->si_errno = 0; + info->si_code = 0; + info->si_pid = 0; + info->si_uid = 0; + return 1; + +found_it: + if ((*pp = q->next) == NULL) + list->tail = pp; + + /* Copy the sigqueue information and free the queue entry */ + copy_siginfo(info, &q->info); + kmem_cache_free(sigqueue_cachep,q); + atomic_dec(&nr_queued_signals); + + /* Non-RT signals can exist multiple times.. */ + if (sig >= SIGRTMIN) { + while ((q = *pp) != NULL) { + if (q->info.si_signo == sig) + goto found_another; + pp = &q->next; + } + } + + sigdelset(&list->signal, sig); +found_another: + return 1; + } + return 0; +} + +/* + * Dequeue a signal and return the element to the caller, which is + * expected to free it. + * + * All callers must be holding current->sigmask_lock. + */ + +int +dequeue_signal(sigset_t *mask, siginfo_t *info) +{ + int sig = 0; + +#if DEBUG_SIG +printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid, + signal_pending(current)); +#endif + + sig = next_signal(current, mask); + if (sig) { + if (current->notifier) { + if (sigismember(current->notifier_mask, sig)) { + if (!(current->notifier)(current->notifier_data)) { + current->sigpending = 0; + return 0; + } + } + } + + if (!collect_signal(sig, ¤t->pending, info)) + sig = 0; + + /* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER, + we need to xchg out the timer overrun values. */ + } + recalc_sigpending(current); + +#if DEBUG_SIG +printk(" %d -> %d\n", signal_pending(current), sig); +#endif + + return sig; +} + +static int rm_from_queue(int sig, struct sigpending *s) +{ + struct sigqueue *q, **pp; + + if (!sigismember(&s->signal, sig)) + return 0; + + sigdelset(&s->signal, sig); + + pp = &s->head; + + while ((q = *pp) != NULL) { + if (q->info.si_signo == sig) { + if ((*pp = q->next) == NULL) + s->tail = pp; + kmem_cache_free(sigqueue_cachep,q); + atomic_dec(&nr_queued_signals); + continue; + } + pp = &q->next; + } + return 1; +} + +/* + * Remove signal sig from t->pending. + * Returns 1 if sig was found. + * + * All callers must be holding t->sigmask_lock. + */ +static int rm_sig_from_queue(int sig, struct task_struct *t) +{ + return rm_from_queue(sig, &t->pending); +} + +/* + * Bad permissions for sending the signal + */ +int bad_signal(int sig, struct siginfo *info, struct task_struct *t) +{ + return (!info || ((unsigned long)info != 1 && SI_FROMUSER(info))) + && ((sig != SIGCONT) || (current->session != t->session)) + && (current->euid ^ t->suid) && (current->euid ^ t->uid) + && (current->uid ^ t->suid) && (current->uid ^ t->uid) + && !capable(CAP_KILL); +} + +/* + * Signal type: + * < 0 : global action (kill - spread to all non-blocked threads) + * = 0 : ignored + * > 0 : wake up. + */ +static int signal_type(int sig, struct signal_struct *signals) +{ + unsigned long handler; + + if (!signals) + return 0; + + handler = (unsigned long) signals->action[sig-1].sa.sa_handler; + if (handler > 1) + return 1; + + /* "Ignore" handler.. Illogical, but that has an implicit handler for SIGCHLD */ + if (handler == 1) + return sig == SIGCHLD; + + /* Default handler. Normally lethal, but.. */ + switch (sig) { + + /* Ignored */ + case SIGCONT: case SIGWINCH: + case SIGCHLD: case SIGURG: + return 0; + + /* Implicit behaviour */ + case SIGTSTP: case SIGTTIN: case SIGTTOU: + return 1; + + /* Implicit actions (kill or do special stuff) */ + default: + return -1; + } +} + + +/* + * Determine whether a signal should be posted or not. + * + * Signals with SIG_IGN can be ignored, except for the + * special case of a SIGCHLD. + * + * Some signals with SIG_DFL default to a non-action. + */ +static int ignored_signal(int sig, struct task_struct *t) +{ + /* Don't ignore traced or blocked signals */ + if ((t->ptrace & PT_PTRACED) || sigismember(&t->blocked, sig)) + return 0; + + return signal_type(sig, t->sig) == 0; +} + +/* + * Handle TASK_STOPPED cases etc implicit behaviour + * of certain magical signals. + * + * SIGKILL gets spread out to every thread. + */ +static void handle_stop_signal(int sig, struct task_struct *t) +{ + switch (sig) { + case SIGKILL: case SIGCONT: + /* Wake up the process if stopped. */ + if (t->state == TASK_STOPPED) + wake_up_process(t); + t->exit_code = 0; + rm_sig_from_queue(SIGSTOP, t); + rm_sig_from_queue(SIGTSTP, t); + rm_sig_from_queue(SIGTTOU, t); + rm_sig_from_queue(SIGTTIN, t); + break; + + case SIGSTOP: case SIGTSTP: + case SIGTTIN: case SIGTTOU: + /* If we're stopping again, cancel SIGCONT */ + rm_sig_from_queue(SIGCONT, t); + break; + } +} + +static int send_signal(int sig, struct siginfo *info, struct sigpending *signals) +{ + struct sigqueue * q = NULL; + + /* Real-time signals must be queued if sent by sigqueue, or + some other real-time mechanism. It is implementation + defined whether kill() does so. We attempt to do so, on + the principle of least surprise, but since kill is not + allowed to fail with EAGAIN when low on memory we just + make sure at least one signal gets delivered and don't + pass on the info struct. */ + + if (atomic_read(&nr_queued_signals) < max_queued_signals) { + q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC); + } + + if (q) { + atomic_inc(&nr_queued_signals); + q->next = NULL; + *signals->tail = q; + signals->tail = &q->next; + switch ((unsigned long) info) { + case 0: + q->info.si_signo = sig; + q->info.si_errno = 0; + q->info.si_code = SI_USER; + q->info.si_pid = current->pid; + q->info.si_uid = current->uid; + break; + case 1: + q->info.si_signo = sig; + q->info.si_errno = 0; + q->info.si_code = SI_KERNEL; + q->info.si_pid = 0; + q->info.si_uid = 0; + break; + default: + copy_siginfo(&q->info, info); + break; + } + } else if (sig >= SIGRTMIN && info && (unsigned long)info != 1 + && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the signal was rt + * and sent by user using something other than kill(). + */ + return -EAGAIN; + } + + sigaddset(&signals->signal, sig); + return 0; +} + +/* + * Tell a process that it has a new active signal.. + * + * NOTE! we rely on the previous spin_lock to + * lock interrupts for us! We can only be called with + * "sigmask_lock" held, and the local interrupt must + * have been disabled when that got acquired! + * + * No need to set need_resched since signal event passing + * goes through ->blocked + */ +static inline void signal_wake_up(struct task_struct *t) +{ + t->sigpending = 1; + +#ifdef CONFIG_SMP + /* + * If the task is running on a different CPU + * force a reschedule on the other CPU to make + * it notice the new signal quickly. + * + * The code below is a tad loose and might occasionally + * kick the wrong CPU if we catch the process in the + * process of changing - but no harm is done by that + * other than doing an extra (lightweight) IPI interrupt. + */ + spin_lock(&runqueue_lock); + if (task_has_cpu(t) && t->processor != smp_processor_id()) + smp_send_reschedule(t->processor); + spin_unlock(&runqueue_lock); +#endif /* CONFIG_SMP */ + + if (t->state & TASK_INTERRUPTIBLE) { + wake_up_process(t); + return; + } +} + +static int deliver_signal(int sig, struct siginfo *info, struct task_struct *t) +{ + int retval = send_signal(sig, info, &t->pending); + + if (!retval && !sigismember(&t->blocked, sig)) + signal_wake_up(t); + + return retval; +} + +int +send_sig_info(int sig, struct siginfo *info, struct task_struct *t) +{ + unsigned long flags; + int ret; + + +#if DEBUG_SIG +printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); +#endif + + ret = -EINVAL; + if (sig < 0 || sig > _NSIG) + goto out_nolock; + /* The somewhat baroque permissions check... */ + ret = -EPERM; + if (bad_signal(sig, info, t)) + goto out_nolock; + + /* The null signal is a permissions and process existence probe. + No signal is actually delivered. Same goes for zombies. */ + ret = 0; + if (!sig || !t->sig) + goto out_nolock; + + spin_lock_irqsave(&t->sigmask_lock, flags); + handle_stop_signal(sig, t); + + /* Optimize away the signal, if it's a signal that can be + handled immediately (ie non-blocked and untraced) and + that is ignored (either explicitly or by default). */ + + if (ignored_signal(sig, t)) + goto out; + + /* Support queueing exactly one non-rt signal, so that we + can get more detailed information about the cause of + the signal. */ + if (sig < SIGRTMIN && sigismember(&t->pending.signal, sig)) + goto out; + + ret = deliver_signal(sig, info, t); +out: + spin_unlock_irqrestore(&t->sigmask_lock, flags); +out_nolock: +#if DEBUG_SIG +printk(" %d -> %d\n", signal_pending(t), ret); +#endif + + return ret; +} + +/* + * Force a signal that the process can't ignore: if necessary + * we unblock the signal and change any SIG_IGN to SIG_DFL. + */ + +int +force_sig_info(int sig, struct siginfo *info, struct task_struct *t) +{ + unsigned long int flags; + + spin_lock_irqsave(&t->sigmask_lock, flags); + if (t->sig == NULL) { + spin_unlock_irqrestore(&t->sigmask_lock, flags); + return -ESRCH; + } + + if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN) + t->sig->action[sig-1].sa.sa_handler = SIG_DFL; + sigdelset(&t->blocked, sig); + recalc_sigpending(t); + spin_unlock_irqrestore(&t->sigmask_lock, flags); + + return send_sig_info(sig, info, t); +} + +/* + * kill_pg_info() sends a signal to a process group: this is what the tty + * control characters do (^C, ^Z etc) + */ + +int +kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) +{ + int retval = -EINVAL; + if (pgrp > 0) { + struct task_struct *p; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pgrp == pgrp && thread_group_leader(p)) { + int err = send_sig_info(sig, info, p); + if (retval) + retval = err; + } + } + read_unlock(&tasklist_lock); + } + return retval; +} + +/* + * kill_sl_info() sends a signal to the session leader: this is used + * to send SIGHUP to the controlling process of a terminal when + * the connection is lost. + */ + +int +kill_sl_info(int sig, struct siginfo *info, pid_t sess) +{ + int retval = -EINVAL; + if (sess > 0) { + struct task_struct *p; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->leader && p->session == sess) { + int err = send_sig_info(sig, info, p); + if (retval) + retval = err; + } + } + read_unlock(&tasklist_lock); + } + return retval; +} + +inline int +kill_proc_info(int sig, struct siginfo *info, pid_t pid) +{ + int error; + struct task_struct *p; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + error = -ESRCH; + if (p) { + if (!thread_group_leader(p)) { + struct task_struct *tg; + tg = find_task_by_pid(p->tgid); + if (tg) + p = tg; + } + error = send_sig_info(sig, info, p); + } + read_unlock(&tasklist_lock); + return error; +} + + +/* + * kill_something_info() interprets pid in interesting ways just like kill(2). + * + * POSIX specifies that kill(-1,sig) is unspecified, but what we have + * is probably wrong. Should make it like BSD or SYSV. + */ + +static int kill_something_info(int sig, struct siginfo *info, int pid) +{ + if (!pid) { + return kill_pg_info(sig, info, current->pgrp); + } else if (pid == -1) { + int retval = 0, count = 0; + struct task_struct * p; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pid > 1 && p != current && thread_group_leader(p)) { + int err = send_sig_info(sig, info, p); + ++count; + if (err != -EPERM) + retval = err; + } + } + read_unlock(&tasklist_lock); + return count ? retval : -ESRCH; + } else if (pid < 0) { + return kill_pg_info(sig, info, -pid); + } else { + return kill_proc_info(sig, info, pid); + } +} + +/* + * These are for backward compatibility with the rest of the kernel source. + */ + +int +send_sig(int sig, struct task_struct *p, int priv) +{ + return send_sig_info(sig, (void*)(long)(priv != 0), p); +} + +void +force_sig(int sig, struct task_struct *p) +{ + force_sig_info(sig, (void*)1L, p); +} + +int +kill_pg(pid_t pgrp, int sig, int priv) +{ + return kill_pg_info(sig, (void *)(long)(priv != 0), pgrp); +} + +int +kill_sl(pid_t sess, int sig, int priv) +{ + return kill_sl_info(sig, (void *)(long)(priv != 0), sess); +} + +int +kill_proc(pid_t pid, int sig, int priv) +{ + return kill_proc_info(sig, (void *)(long)(priv != 0), pid); +} + +/* + * Joy. Or not. Pthread wants us to wake up every thread + * in our parent group. + */ +static void wake_up_parent(struct task_struct *parent) +{ + struct task_struct *tsk = parent; + + do { + wake_up_interruptible(&tsk->wait_chldexit); + tsk = next_thread(tsk); + } while (tsk != parent); +} + +/* + * Let a parent know about a status change of a child. + */ + +void do_notify_parent(struct task_struct *tsk, int sig) +{ + struct siginfo info; + int why, status; + + info.si_signo = sig; + info.si_errno = 0; + info.si_pid = tsk->pid; + info.si_uid = tsk->uid; + + /* FIXME: find out whether or not this is supposed to be c*time. */ + info.si_utime = tsk->times.tms_utime; + info.si_stime = tsk->times.tms_stime; + + status = tsk->exit_code & 0x7f; + why = SI_KERNEL; /* shouldn't happen */ + switch (tsk->state) { + case TASK_STOPPED: + /* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */ + if (tsk->ptrace & PT_PTRACED) + why = CLD_TRAPPED; + else + why = CLD_STOPPED; + break; + + default: + if (tsk->exit_code & 0x80) + why = CLD_DUMPED; + else if (tsk->exit_code & 0x7f) + why = CLD_KILLED; + else { + why = CLD_EXITED; + status = tsk->exit_code >> 8; + } + break; + } + info.si_code = why; + info.si_status = status; + + send_sig_info(sig, &info, tsk->p_pptr); + wake_up_parent(tsk->p_pptr); +} + + +/* + * We need the tasklist lock because it's the only + * thing that protects out "parent" pointer. + * + * exit.c calls "do_notify_parent()" directly, because + * it already has the tasklist lock. + */ +void +notify_parent(struct task_struct *tsk, int sig) +{ + read_lock(&tasklist_lock); + do_notify_parent(tsk, sig); + read_unlock(&tasklist_lock); +} + +EXPORT_SYMBOL(dequeue_signal); +EXPORT_SYMBOL(flush_signals); +EXPORT_SYMBOL(force_sig); +EXPORT_SYMBOL(force_sig_info); +EXPORT_SYMBOL(kill_pg); +EXPORT_SYMBOL(kill_pg_info); +EXPORT_SYMBOL(kill_proc); +EXPORT_SYMBOL(kill_proc_info); +EXPORT_SYMBOL(kill_sl); +EXPORT_SYMBOL(kill_sl_info); +EXPORT_SYMBOL(notify_parent); +EXPORT_SYMBOL(recalc_sigpending); +EXPORT_SYMBOL(send_sig); +EXPORT_SYMBOL(send_sig_info); +EXPORT_SYMBOL(block_all_signals); +EXPORT_SYMBOL(unblock_all_signals); + + +/* + * System call entry points. + */ + +/* + * We don't need to get the kernel lock - this is all local to this + * particular thread.. (and that's good, because this is _heavily_ + * used by various programs) + */ + +asmlinkage long +sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize) +{ + int error = -EINVAL; + sigset_t old_set, new_set; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + goto out; + + if (set) { + error = -EFAULT; + if (copy_from_user(&new_set, set, sizeof(*set))) + goto out; + sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); + + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked; + + error = 0; + switch (how) { + default: + error = -EINVAL; + break; + case SIG_BLOCK: + sigorsets(¤t->blocked, &old_set, &new_set); + break; + case SIG_UNBLOCK: + signandsets(¤t->blocked, &old_set, &new_set); + break; + case SIG_SETMASK: + current->blocked = new_set; + break; + } + + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + if (error) + goto out; + if (oset) + goto set_old; + } else if (oset) { + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked; + spin_unlock_irq(¤t->sigmask_lock); + + set_old: + error = -EFAULT; + if (copy_to_user(oset, &old_set, sizeof(*oset))) + goto out; + } + error = 0; +out: + return error; +} + +long do_sigpending(void *set, unsigned long sigsetsize) +{ + long error = -EINVAL; + sigset_t pending; + + if (sigsetsize > sizeof(sigset_t)) + goto out; + + spin_lock_irq(¤t->sigmask_lock); + sigandsets(&pending, ¤t->blocked, ¤t->pending.signal); + spin_unlock_irq(¤t->sigmask_lock); + + error = -EFAULT; + if (!copy_to_user(set, &pending, sigsetsize)) + error = 0; +out: + return error; +} + +asmlinkage long +sys_rt_sigpending(sigset_t *set, size_t sigsetsize) +{ + return do_sigpending(set, sigsetsize); +} + +asmlinkage long +sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, + const struct timespec *uts, size_t sigsetsize) +{ + int ret, sig; + sigset_t these; + struct timespec ts; + siginfo_t info; + long timeout = 0; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&these, uthese, sizeof(these))) + return -EFAULT; + + /* + * Invert the set of allowed signals to get those we + * want to block. + */ + sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); + signotset(&these); + + if (uts) { + if (copy_from_user(&ts, uts, sizeof(ts))) + return -EFAULT; + if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 + || ts.tv_sec < 0) + return -EINVAL; + } + + spin_lock_irq(¤t->sigmask_lock); + sig = dequeue_signal(&these, &info); + if (!sig) { + timeout = MAX_SCHEDULE_TIMEOUT; + if (uts) + timeout = (timespec_to_jiffies(&ts) + + (ts.tv_sec || ts.tv_nsec)); + + if (timeout) { + /* None ready -- temporarily unblock those we're + * interested while we are sleeping in so that we'll + * be awakened when they arrive. */ + sigset_t oldblocked = current->blocked; + sigandsets(¤t->blocked, ¤t->blocked, &these); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + current->state = TASK_INTERRUPTIBLE; + timeout = schedule_timeout(timeout); + + spin_lock_irq(¤t->sigmask_lock); + sig = dequeue_signal(&these, &info); + current->blocked = oldblocked; + recalc_sigpending(current); + } + } + spin_unlock_irq(¤t->sigmask_lock); + + if (sig) { + ret = sig; + if (uinfo) { + if (copy_siginfo_to_user(uinfo, &info)) + ret = -EFAULT; + } + } else { + ret = -EAGAIN; + if (timeout) + ret = -EINTR; + } + + return ret; +} + +asmlinkage long +sys_kill(int pid, int sig) +{ + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; + + return kill_something_info(sig, &info, pid); +} + +/* + * Kill only one task, even if it's a CLONE_THREAD task. + */ +asmlinkage long +sys_tkill(int pid, int sig) +{ + struct siginfo info; + int error; + struct task_struct *p; + + /* This is only valid for single tasks */ + if (pid <= 0) + return -EINVAL; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = current->pid; + info.si_uid = current->uid; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + error = -ESRCH; + if (p) { + error = send_sig_info(sig, &info, p); + } + read_unlock(&tasklist_lock); + return error; +} + +asmlinkage long +sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info.si_code >= 0) + return -EPERM; + info.si_signo = sig; + + /* POSIX.1b doesn't mention process groups. */ + return kill_proc_info(sig, &info, pid); +} + +int +do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) +{ + struct k_sigaction *k; + + if (sig < 1 || sig > _NSIG || + (act && (sig == SIGKILL || sig == SIGSTOP))) + return -EINVAL; + + k = ¤t->sig->action[sig-1]; + + spin_lock(¤t->sig->siglock); + + if (oact) + *oact = *k; + + if (act) { + *k = *act; + sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + + /* + * POSIX 3.3.1.3: + * "Setting a signal action to SIG_IGN for a signal that is + * pending shall cause the pending signal to be discarded, + * whether or not it is blocked." + * + * "Setting a signal action to SIG_DFL for a signal that is + * pending and whose default action is to ignore the signal + * (for example, SIGCHLD), shall cause the pending signal to + * be discarded, whether or not it is blocked" + * + * Note the silly behaviour of SIGCHLD: SIG_IGN means that the + * signal isn't actually ignored, but does automatic child + * reaping, while SIG_DFL is explicitly said by POSIX to force + * the signal to be ignored. + */ + + if (k->sa.sa_handler == SIG_IGN + || (k->sa.sa_handler == SIG_DFL + && (sig == SIGCONT || + sig == SIGCHLD || + sig == SIGURG || + sig == SIGWINCH))) { + spin_lock_irq(¤t->sigmask_lock); + if (rm_sig_from_queue(sig, current)) + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + } + + spin_unlock(¤t->sig->siglock); + return 0; +} + +int +do_sigaltstack (const stack_t *uss, stack_t *uoss, unsigned long sp) +{ + stack_t oss; + int error; + + if (uoss) { + oss.ss_sp = (void *) current->sas_ss_sp; + oss.ss_size = current->sas_ss_size; + oss.ss_flags = sas_ss_flags(sp); + } + + if (uss) { + void *ss_sp; + size_t ss_size; + int ss_flags; + + error = -EFAULT; + if (verify_area(VERIFY_READ, uss, sizeof(*uss)) + || __get_user(ss_sp, &uss->ss_sp) + || __get_user(ss_flags, &uss->ss_flags) + || __get_user(ss_size, &uss->ss_size)) + goto out; + + error = -EPERM; + if (on_sig_stack (sp)) + goto out; + + error = -EINVAL; + /* + * + * Note - this code used to test ss_flags incorrectly + * old code may have been written using ss_flags==0 + * to mean ss_flags==SS_ONSTACK (as this was the only + * way that worked) - this fix preserves that older + * mechanism + */ + if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0) + goto out; + + if (ss_flags == SS_DISABLE) { + ss_size = 0; + ss_sp = NULL; + } else { + error = -ENOMEM; + if (ss_size < MINSIGSTKSZ) + goto out; + } + + current->sas_ss_sp = (unsigned long) ss_sp; + current->sas_ss_size = ss_size; + } + + if (uoss) { + error = -EFAULT; + if (copy_to_user(uoss, &oss, sizeof(oss))) + goto out; + } + + error = 0; +out: + return error; +} + +asmlinkage long +sys_sigpending(old_sigset_t *set) +{ + return do_sigpending(set, sizeof(*set)); +} + +#if !defined(__alpha__) +/* Alpha has its own versions with special arguments. */ + +asmlinkage long +sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset) +{ + int error; + old_sigset_t old_set, new_set; + + if (set) { + error = -EFAULT; + if (copy_from_user(&new_set, set, sizeof(*set))) + goto out; + new_set &= ~(sigmask(SIGKILL)|sigmask(SIGSTOP)); + + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked.sig[0]; + + error = 0; + switch (how) { + default: + error = -EINVAL; + break; + case SIG_BLOCK: + sigaddsetmask(¤t->blocked, new_set); + break; + case SIG_UNBLOCK: + sigdelsetmask(¤t->blocked, new_set); + break; + case SIG_SETMASK: + current->blocked.sig[0] = new_set; + break; + } + + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + if (error) + goto out; + if (oset) + goto set_old; + } else if (oset) { + old_set = current->blocked.sig[0]; + set_old: + error = -EFAULT; + if (copy_to_user(oset, &old_set, sizeof(*oset))) + goto out; + } + error = 0; +out: + return error; +} + +#ifndef __sparc__ +asmlinkage long +sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, + size_t sigsetsize) +{ + struct k_sigaction new_sa, old_sa; + int ret = -EINVAL; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + goto out; + + if (act) { + if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + + if (!ret && oact) { + if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) + return -EFAULT; + } +out: + return ret; +} +#endif /* __sparc__ */ +#endif + +#if !defined(__alpha__) && !defined(__ia64__) && !defined(__arm__) +/* + * For backwards compatibility. Functionality superseded by sigprocmask. + */ +asmlinkage long +sys_sgetmask(void) +{ + /* SMP safe */ + return current->blocked.sig[0]; +} + +asmlinkage long +sys_ssetmask(int newmask) +{ + int old; + + spin_lock_irq(¤t->sigmask_lock); + old = current->blocked.sig[0]; + + siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| + sigmask(SIGSTOP))); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + return old; +} +#endif /* !defined(__alpha__) */ + +#if !defined(__alpha__) && !defined(__ia64__) && !defined(__mips__) && \ + !defined(__arm__) +/* + * For backwards compatibility. Functionality superseded by sigaction. + */ +asmlinkage unsigned long +sys_signal(int sig, __sighandler_t handler) +{ + struct k_sigaction new_sa, old_sa; + int ret; + + new_sa.sa.sa_handler = handler; + new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; + + ret = do_sigaction(sig, &new_sa, &old_sa); + + return ret ? ret : (unsigned long)old_sa.sa.sa_handler; +} +#endif /* !alpha && !__ia64__ && !defined(__mips__) && !defined(__arm__) */ diff --git a/uClinux-2.4.31-uc0/kernel/softirq.c b/uClinux-2.4.31-uc0/kernel/softirq.c new file mode 100644 index 0000000..efd9b55 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/softirq.c @@ -0,0 +1,453 @@ +/* + * linux/kernel/softirq.c + * + * Copyright (C) 1992 Linus Torvalds + * + * Fixed a disable_bh()/enable_bh() race (was causing a console lockup) + * due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/kernel_stat.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/tqueue.h> +#include <linux/compiler.h> + +/* + - No shared variables, all the data are CPU local. + - If a softirq needs serialization, let it serialize itself + by its own spinlocks. + - Even if softirq is serialized, only local cpu is marked for + execution. Hence, we get something sort of weak cpu binding. + Though it is still not clear, will it result in better locality + or will not. + - These softirqs are not masked by global cli() and start_bh_atomic() + (by clear reasons). Hence, old parts of code still using global locks + MUST NOT use softirqs, but insert interfacing routines acquiring + global locks. F.e. look at BHs implementation. + + Examples: + - NET RX softirq. It is multithreaded and does not require + any global serialization. + - NET TX softirq. It kicks software netdevice queues, hence + it is logically serialized per device, but this serialization + is invisible to common code. + - Tasklets: serialized wrt itself. + - Bottom halves: globally serialized, grr... + */ + +irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; + +static struct softirq_action softirq_vec[32] __cacheline_aligned; + +/* + * we cannot loop indefinitely here to avoid userspace starvation, + * but we also don't want to introduce a worst case 1/HZ latency + * to the pending events, so lets the scheduler to balance + * the softirq load for us. + */ +static inline void wakeup_softirqd(unsigned cpu) +{ + struct task_struct * tsk = ksoftirqd_task(cpu); + + if (tsk && tsk->state != TASK_RUNNING) + wake_up_process(tsk); +} + +static inline int softirqd_is_waken(unsigned cpu) +{ + struct task_struct * tsk = ksoftirqd_task(cpu); + + return tsk && tsk->state == TASK_RUNNING; +} + +/* + * the higher this number the less likely ksoftirqd will be waken by + * a short irq flood peak, but the higher unfariness the softirq load + * will generate against the regular scheduler tasks. + * Each loop will allow one more block to pass through to the + * higher layer. If further blocks keeps arriving we giveup and we + * offload the work in a scheduler friendly way. After ksoftirqd + * is started we will stop wasting time here, so under attack + * we're still competely fair. + */ +#define MAX_SOFTIRQ_LOOPS 8 + +static void __do_softirq(int ksoftirqd) +{ + int cpu = smp_processor_id(); + __u32 pending; + unsigned long flags; + __u32 mask; + int loops; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (!ksoftirqd && softirqd_is_waken(cpu)) + pending = 0; + else + pending = softirq_pending(cpu); + + loops = 0; + if (pending) { + struct softirq_action *h; + + mask = ~pending; + local_bh_disable(); +restart: + /* Reset the pending bitmask before enabling irqs */ + softirq_pending(cpu) = 0; + + local_irq_enable(); + + h = softirq_vec; + + do { + if (pending & 1) + h->action(h); + h++; + pending >>= 1; + } while (pending); + + local_irq_disable(); + + pending = softirq_pending(cpu); + if (pending & mask) { + mask &= ~pending; + goto restart; + } + __local_bh_enable(); + + if (!softirqd_is_waken(cpu)) { + if (unlikely(++loops >= MAX_SOFTIRQ_LOOPS)) { + if (pending) + wakeup_softirqd(cpu); + } else { + mask = ~pending; + local_bh_disable(); + goto restart; + } + } + } + + local_irq_restore(flags); +} + +asmlinkage void do_softirq() +{ + __do_softirq(0); +} + +/* + * This function must run with irq disabled! + */ +inline fastcall void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +{ + __cpu_raise_softirq(cpu, nr); + + /* + * If we're in an interrupt or bh, we're done + * (this also catches bh-disabled code). We will + * actually run the softirq once we return from + * the irq or bh. + * + * Otherwise we wake up ksoftirqd to make sure we + * schedule the softirq soon. + */ + if (!(local_irq_count(cpu) | local_bh_count(cpu))) + wakeup_softirqd(cpu); +} + +void fastcall raise_softirq(unsigned int nr) +{ + unsigned long flags; + + local_irq_save(flags); + cpu_raise_softirq(smp_processor_id(), nr); + local_irq_restore(flags); +} + +void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) +{ + softirq_vec[nr].data = data; + softirq_vec[nr].action = action; +} + + +/* Tasklets */ + +struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned; +struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned; + +void fastcall __tasklet_schedule(struct tasklet_struct *t) +{ + int cpu = smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_restore(flags); +} + +void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) +{ + int cpu = smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_restore(flags); +} + +static void tasklet_action(struct softirq_action *a) +{ + int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = NULL; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_enable(); + } +} + +static void tasklet_hi_action(struct softirq_action *a) +{ + int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = NULL; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + __cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_enable(); + } +} + + +void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data) +{ + t->next = NULL; + t->state = 0; + atomic_set(&t->count, 0); + t->func = func; + t->data = data; +} + +void tasklet_kill(struct tasklet_struct *t) +{ + if (in_interrupt()) + printk("Attempt to kill tasklet from interrupt\n"); + + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + current->state = TASK_RUNNING; + do { + yield(); + } while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } + tasklet_unlock_wait(t); + clear_bit(TASKLET_STATE_SCHED, &t->state); +} + + + +/* Old style BHs */ + +static void (*bh_base[32])(void); +struct tasklet_struct bh_task_vec[32]; + +/* BHs are serialized by spinlock global_bh_lock. + + It is still possible to make synchronize_bh() as + spin_unlock_wait(&global_bh_lock). This operation is not used + by kernel now, so that this lock is not made private only + due to wait_on_irq(). + + It can be removed only after auditing all the BHs. + */ +spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED; + +static void bh_action(unsigned long nr) +{ + int cpu = smp_processor_id(); + + if (!spin_trylock(&global_bh_lock)) + goto resched; + + if (!hardirq_trylock(cpu)) + goto resched_unlock; + + if (bh_base[nr]) + bh_base[nr](); + + hardirq_endlock(cpu); + spin_unlock(&global_bh_lock); + return; + +resched_unlock: + spin_unlock(&global_bh_lock); +resched: + mark_bh(nr); +} + +void init_bh(int nr, void (*routine)(void)) +{ + bh_base[nr] = routine; + mb(); +} + +void remove_bh(int nr) +{ + tasklet_kill(bh_task_vec+nr); + bh_base[nr] = NULL; +} + +void __init softirq_init() +{ + int i; + + for (i=0; i<32; i++) + tasklet_init(bh_task_vec+i, bh_action, i); + + open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); + open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); +} + +void __run_task_queue(task_queue *list) +{ + struct list_head head, *next; + unsigned long flags; + + spin_lock_irqsave(&tqueue_lock, flags); + list_add(&head, list); + list_del_init(list); + spin_unlock_irqrestore(&tqueue_lock, flags); + + next = head.next; + while (next != &head) { + void (*f) (void *); + struct tq_struct *p; + void *data; + + p = list_entry(next, struct tq_struct, list); + next = next->next; + f = p->routine; + data = p->data; + wmb(); + p->sync = 0; + if (f) + f(data); + } +} + +static int ksoftirqd(void * __bind_cpu) +{ + int bind_cpu = (int) (long) __bind_cpu; + int cpu = cpu_logical_map(bind_cpu); + + daemonize(); + current->nice = 19; + sigfillset(¤t->blocked); + + /* Migrate to the right CPU */ + current->cpus_allowed = 1UL << cpu; + while (smp_processor_id() != cpu) + schedule(); + + sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu); + + __set_current_state(TASK_INTERRUPTIBLE); + mb(); + + ksoftirqd_task(cpu) = current; + + for (;;) { + if (!softirq_pending(cpu)) + schedule(); + + __set_current_state(TASK_RUNNING); + + while (softirq_pending(cpu)) { + __do_softirq(1); + if (current->need_resched) + schedule(); + } + + __set_current_state(TASK_INTERRUPTIBLE); + } +} + +static __init int spawn_ksoftirqd(void) +{ + int cpu; + + for (cpu = 0; cpu < smp_num_cpus; cpu++) { + if (kernel_thread(ksoftirqd, (void *) (long) cpu, + CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0) + printk("spawn_ksoftirqd() failed for cpu %d\n", cpu); + else { + while (!ksoftirqd_task(cpu_logical_map(cpu))) + yield(); + } + } + + return 0; +} + +__initcall(spawn_ksoftirqd); diff --git a/uClinux-2.4.31-uc0/kernel/sys.c b/uClinux-2.4.31-uc0/kernel/sys.c new file mode 100644 index 0000000..e156866 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/sys.c @@ -0,0 +1,1305 @@ +/* $USAGI: sys.c,v 1.16 2003/11/12 05:11:57 yoshfuji Exp $ */ + +/* + * linux/kernel/sys.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/mman.h> +#include <linux/smp_lock.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/prctl.h> +#include <linux/init.h> +#include <linux/highuid.h> + +#include <asm/uaccess.h> +#include <asm/io.h> + +#ifndef SET_UNALIGN_CTL +# define SET_UNALIGN_CTL(a,b) (-EINVAL) +#endif +#ifndef GET_UNALIGN_CTL +# define GET_UNALIGN_CTL(a,b) (-EINVAL) +#endif +#ifndef SET_FPEMU_CTL +# define SET_FPEMU_CTL(a,b) (-EINVAL) +#endif +#ifndef GET_FPEMU_CTL +# define GET_FPEMU_CTL(a,b) (-EINVAL) +#endif +#ifndef SET_FPEXC_CTL +# define SET_FPEXC_CTL(a,b) (-EINVAL) +#endif +#ifndef GET_FPEXC_CTL +# define GET_FPEXC_CTL(a,b) (-EINVAL) +#endif + +/* + * this is where the system-wide overflow UID and GID are defined, for + * architectures that now have 32-bit UID/GID but didn't in the past + */ + +int overflowuid = DEFAULT_OVERFLOWUID; +int overflowgid = DEFAULT_OVERFLOWGID; + +/* + * the same as above, but for filesystems which can only store a 16-bit + * UID and GID. as such, this is needed on all architectures + */ + +int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; +int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; + +/* + * this indicates whether you can reboot with ctrl-alt-del: the default is yes + */ + +int C_A_D = 1; +int cad_pid = 1; + + +/* + * Notifier list for kernel code which wants to be called + * at shutdown. This is used to stop any idling DMA operations + * and the like. + */ + +static struct notifier_block *reboot_notifier_list; +rwlock_t notifier_lock = RW_LOCK_UNLOCKED; + +/** + * notifier_chain_register - Add notifier to a notifier chain + * @list: Pointer to root list pointer + * @n: New entry in notifier chain + * + * Adds a notifier to a notifier chain. + * + * Currently always returns zero. + */ + +int notifier_chain_register(struct notifier_block **list, struct notifier_block *n) +{ + write_lock(¬ifier_lock); + while(*list) + { + if(n->priority > (*list)->priority) + break; + list= &((*list)->next); + } + n->next = *list; + *list=n; + write_unlock(¬ifier_lock); + return 0; +} + +/** + * notifier_chain_unregister - Remove notifier from a notifier chain + * @nl: Pointer to root list pointer + * @n: New entry in notifier chain + * + * Removes a notifier from a notifier chain. + * + * Returns zero on success, or %-ENOENT on failure. + */ + +int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) +{ + write_lock(¬ifier_lock); + while((*nl)!=NULL) + { + if((*nl)==n) + { + *nl=n->next; + write_unlock(¬ifier_lock); + return 0; + } + nl=&((*nl)->next); + } + write_unlock(¬ifier_lock); + return -ENOENT; +} + +/** + * notifier_call_chain - Call functions in a notifier chain + * @n: Pointer to root pointer of notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in a notifier chain in turn. + * + * If the return value of the notifier can be and'd + * with %NOTIFY_STOP_MASK, then notifier_call_chain + * will return immediately, with the return value of + * the notifier function which halted execution. + * Otherwise, the return value is the return value + * of the last notifier function called. + */ + +int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +{ + int ret=NOTIFY_DONE; + struct notifier_block *nb = *n; + + while(nb) + { + ret=nb->notifier_call(nb,val,v); + if(ret&NOTIFY_STOP_MASK) + { + return ret; + } + nb=nb->next; + } + return ret; +} + +/** + * register_reboot_notifier - Register function to be called at reboot time + * @nb: Info about notifier function to be called + * + * Registers a function with the list of functions + * to be called at reboot time. + * + * Currently always returns zero, as notifier_chain_register + * always returns zero. + */ + +int register_reboot_notifier(struct notifier_block * nb) +{ + return notifier_chain_register(&reboot_notifier_list, nb); +} + +/** + * unregister_reboot_notifier - Unregister previously registered reboot notifier + * @nb: Hook to be unregistered + * + * Unregisters a previously registered reboot + * notifier function. + * + * Returns zero on success, or %-ENOENT on failure. + */ + +int unregister_reboot_notifier(struct notifier_block * nb) +{ + return notifier_chain_unregister(&reboot_notifier_list, nb); +} + +asmlinkage long sys_ni_syscall(void) +{ + return -ENOSYS; +} + +static int proc_sel(struct task_struct *p, int which, int who) +{ + if(p->pid) + { + switch (which) { + case PRIO_PROCESS: + if (!who && p == current) + return 1; + return(p->pid == who); + case PRIO_PGRP: + if (!who) + who = current->pgrp; + return(p->pgrp == who); + case PRIO_USER: + if (!who) + who = current->uid; + return(p->uid == who); + } + } + return 0; +} + +asmlinkage long sys_setpriority(int which, int who, int niceval) +{ + struct task_struct *p; + int error; + + if (which > 2 || which < 0) + return -EINVAL; + + /* normalize: avoid signed division (rounding problems) */ + error = -ESRCH; + if (niceval < -20) + niceval = -20; + if (niceval > 19) + niceval = 19; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (!proc_sel(p, which, who)) + continue; + if (p->uid != current->euid && + p->uid != current->uid && !capable(CAP_SYS_NICE)) { + error = -EPERM; + continue; + } + if (error == -ESRCH) + error = 0; + if (niceval < p->nice && !capable(CAP_SYS_NICE)) + error = -EACCES; + else + p->nice = niceval; + } + read_unlock(&tasklist_lock); + + return error; +} + +/* + * Ugh. To avoid negative return values, "getpriority()" will + * not return the normal nice-value, but a negated value that + * has been offset by 20 (ie it returns 40..1 instead of -20..19) + * to stay compatible. + */ +asmlinkage long sys_getpriority(int which, int who) +{ + struct task_struct *p; + long retval = -ESRCH; + + if (which > 2 || which < 0) + return -EINVAL; + + read_lock(&tasklist_lock); + for_each_task (p) { + long niceval; + if (!proc_sel(p, which, who)) + continue; + niceval = 20 - p->nice; + if (niceval > retval) + retval = niceval; + } + read_unlock(&tasklist_lock); + + return retval; +} + + +/* + * Reboot system call: for obvious reasons only root may call it, + * and even root needs to set up some magic numbers in the registers + * so that some mistake won't make this reboot the whole machine. + * You can also set the meaning of the ctrl-alt-del-key here. + * + * reboot doesn't sync: do that yourself before calling this. + */ +asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg) +{ + char buffer[256]; + + /* We only trust the superuser with rebooting the system. */ + if (!capable(CAP_SYS_BOOT)) + return -EPERM; + + /* For safety, we require "magic" arguments. */ + if (magic1 != LINUX_REBOOT_MAGIC1 || + (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A && + magic2 != LINUX_REBOOT_MAGIC2B)) + return -EINVAL; + + lock_kernel(); + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + printk(KERN_EMERG "Restarting system.\n"); + machine_restart(NULL); + break; + + case LINUX_REBOOT_CMD_CAD_ON: + C_A_D = 1; + break; + + case LINUX_REBOOT_CMD_CAD_OFF: + C_A_D = 0; + break; + + case LINUX_REBOOT_CMD_HALT: + notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); + printk(KERN_EMERG "System halted.\n"); + machine_halt(); + do_exit(0); + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); + printk(KERN_EMERG "Power down.\n"); + machine_power_off(); + do_exit(0); + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) { + unlock_kernel(); + return -EFAULT; + } + buffer[sizeof(buffer) - 1] = '\0'; + + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); + printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); + machine_restart(buffer); + break; + + default: + unlock_kernel(); + return -EINVAL; + } + unlock_kernel(); + return 0; +} + +static void deferred_cad(void *dummy) +{ + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + machine_restart(NULL); +} + +/* + * This function gets called by ctrl-alt-del - ie the keyboard interrupt. + * As it's called within an interrupt, it may NOT sync: the only choice + * is whether to reboot at once, or just ignore the ctrl-alt-del. + */ +void ctrl_alt_del(void) +{ + static struct tq_struct cad_tq = { + routine: deferred_cad, + }; + + if (C_A_D) + schedule_task(&cad_tq); + else + kill_proc(cad_pid, SIGINT, 1); +} + + +/* + * Unprivileged users may change the real gid to the effective gid + * or vice versa. (BSD-style) + * + * If you set the real gid at all, or set the effective gid to a value not + * equal to the real gid, then the saved gid is set to the new effective gid. + * + * This makes it possible for a setgid program to completely drop its + * privileges, which is often a useful assertion to make when you are doing + * a security audit over a program. + * + * The general idea is that a program which uses just setregid() will be + * 100% compatible with BSD. A program which uses just setgid() will be + * 100% compatible with POSIX with saved IDs. + * + * SMP: There are not races, the GIDs are checked only by filesystem + * operations (as far as semantic preservation is concerned). + */ +asmlinkage long sys_setregid(gid_t rgid, gid_t egid) +{ + int old_rgid = current->gid; + int old_egid = current->egid; + int new_rgid = old_rgid; + int new_egid = old_egid; + + if (rgid != (gid_t) -1) { + if ((old_rgid == rgid) || + (current->egid==rgid) || + capable(CAP_SETGID)) + new_rgid = rgid; + else + return -EPERM; + } + if (egid != (gid_t) -1) { + if ((old_rgid == egid) || + (current->egid == egid) || + (current->sgid == egid) || + capable(CAP_SETGID)) + new_egid = egid; + else { + return -EPERM; + } + } + if (new_egid != old_egid) + { + current->mm->dumpable = 0; + wmb(); + } + if (rgid != (gid_t) -1 || + (egid != (gid_t) -1 && egid != old_rgid)) + current->sgid = new_egid; + current->fsgid = new_egid; + current->egid = new_egid; + current->gid = new_rgid; + return 0; +} + +/* + * setgid() is implemented like SysV w/ SAVED_IDS + * + * SMP: Same implicit races as above. + */ +asmlinkage long sys_setgid(gid_t gid) +{ + int old_egid = current->egid; + + if (capable(CAP_SETGID)) + { + if(old_egid != gid) + { + current->mm->dumpable=0; + wmb(); + } + current->gid = current->egid = current->sgid = current->fsgid = gid; + } + else if ((gid == current->gid) || (gid == current->sgid)) + { + if(old_egid != gid) + { + current->mm->dumpable=0; + wmb(); + } + current->egid = current->fsgid = gid; + } + else + return -EPERM; + return 0; +} + +/* + * cap_emulate_setxuid() fixes the effective / permitted capabilities of + * a process after a call to setuid, setreuid, or setresuid. + * + * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of + * {r,e,s}uid != 0, the permitted and effective capabilities are + * cleared. + * + * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective + * capabilities of the process are cleared. + * + * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective + * capabilities are set to the permitted capabilities. + * + * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should + * never happen. + * + * -astor + * + * cevans - New behaviour, Oct '99 + * A process may, via prctl(), elect to keep its capabilities when it + * calls setuid() and switches away from uid==0. Both permitted and + * effective sets will be retained. + * Without this change, it was impossible for a daemon to drop only some + * of its privilege. The call to setuid(!=0) would drop all privileges! + * Keeping uid 0 is not an option because uid 0 owns too many vital + * files.. + * Thanks to Olaf Kirch and Peter Benie for spotting this. + */ +static inline void cap_emulate_setxuid(int old_ruid, int old_euid, + int old_suid) +{ + if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && + (current->uid != 0 && current->euid != 0 && current->suid != 0) && + !current->keep_capabilities) { + cap_clear(current->cap_permitted); + cap_clear(current->cap_effective); + } + if (old_euid == 0 && current->euid != 0) { + cap_clear(current->cap_effective); + } + if (old_euid != 0 && current->euid == 0) { + current->cap_effective = current->cap_permitted; + } +} + +static int set_user(uid_t new_ruid, int dumpclear) +{ + struct user_struct *new_user; + + new_user = alloc_uid(new_ruid); + if (!new_user) + return -EAGAIN; + switch_uid(new_user); + + if(dumpclear) + { + current->mm->dumpable = 0; + wmb(); + } + current->uid = new_ruid; + return 0; +} + +/* + * Unprivileged users may change the real uid to the effective uid + * or vice versa. (BSD-style) + * + * If you set the real uid at all, or set the effective uid to a value not + * equal to the real uid, then the saved uid is set to the new effective uid. + * + * This makes it possible for a setuid program to completely drop its + * privileges, which is often a useful assertion to make when you are doing + * a security audit over a program. + * + * The general idea is that a program which uses just setreuid() will be + * 100% compatible with BSD. A program which uses just setuid() will be + * 100% compatible with POSIX with saved IDs. + */ +asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) +{ + int old_ruid, old_euid, old_suid, new_ruid, new_euid; + + new_ruid = old_ruid = current->uid; + new_euid = old_euid = current->euid; + old_suid = current->suid; + + if (ruid != (uid_t) -1) { + new_ruid = ruid; + if ((old_ruid != ruid) && + (current->euid != ruid) && + !capable(CAP_SETUID)) + return -EPERM; + } + + if (euid != (uid_t) -1) { + new_euid = euid; + if ((old_ruid != euid) && + (current->euid != euid) && + (current->suid != euid) && + !capable(CAP_SETUID)) + return -EPERM; + } + + if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) + return -EAGAIN; + + if (new_euid != old_euid) + { + current->mm->dumpable=0; + wmb(); + } + current->fsuid = current->euid = new_euid; + if (ruid != (uid_t) -1 || + (euid != (uid_t) -1 && euid != old_ruid)) + current->suid = current->euid; + current->fsuid = current->euid; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + + return 0; +} + + + +/* + * setuid() is implemented like SysV with SAVED_IDS + * + * Note that SAVED_ID's is deficient in that a setuid root program + * like sendmail, for example, cannot set its uid to be a normal + * user and then switch back, because if you're root, setuid() sets + * the saved uid too. If you don't like this, blame the bright people + * in the POSIX committee and/or USG. Note that the BSD-style setreuid() + * will allow a root program to temporarily drop privileges and be able to + * regain them by swapping the real and effective uid. + */ +asmlinkage long sys_setuid(uid_t uid) +{ + int old_euid = current->euid; + int old_ruid, old_suid, new_ruid, new_suid; + + old_ruid = new_ruid = current->uid; + old_suid = current->suid; + new_suid = old_suid; + + if (capable(CAP_SETUID)) { + if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) + return -EAGAIN; + new_suid = uid; + } else if ((uid != current->uid) && (uid != new_suid)) + return -EPERM; + + if (old_euid != uid) + { + current->mm->dumpable = 0; + wmb(); + } + current->fsuid = current->euid = uid; + current->suid = new_suid; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + + return 0; +} + + +/* + * This function implements a generic ability to update ruid, euid, + * and suid. This allows you to implement the 4.4 compatible seteuid(). + */ +asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) +{ + int old_ruid = current->uid; + int old_euid = current->euid; + int old_suid = current->suid; + + if (!capable(CAP_SETUID)) { + if ((ruid != (uid_t) -1) && (ruid != current->uid) && + (ruid != current->euid) && (ruid != current->suid)) + return -EPERM; + if ((euid != (uid_t) -1) && (euid != current->uid) && + (euid != current->euid) && (euid != current->suid)) + return -EPERM; + if ((suid != (uid_t) -1) && (suid != current->uid) && + (suid != current->euid) && (suid != current->suid)) + return -EPERM; + } + if (ruid != (uid_t) -1) { + if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + return -EAGAIN; + } + if (euid != (uid_t) -1) { + if (euid != current->euid) + { + current->mm->dumpable = 0; + wmb(); + } + current->euid = euid; + } + current->fsuid = current->euid; + if (suid != (uid_t) -1) + current->suid = suid; + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + cap_emulate_setxuid(old_ruid, old_euid, old_suid); + } + + return 0; +} + +asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) +{ + int retval; + + if (!(retval = put_user(current->uid, ruid)) && + !(retval = put_user(current->euid, euid))) + retval = put_user(current->suid, suid); + + return retval; +} + +/* + * Same as above, but for rgid, egid, sgid. + */ +asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) +{ + if (!capable(CAP_SETGID)) { + if ((rgid != (gid_t) -1) && (rgid != current->gid) && + (rgid != current->egid) && (rgid != current->sgid)) + return -EPERM; + if ((egid != (gid_t) -1) && (egid != current->gid) && + (egid != current->egid) && (egid != current->sgid)) + return -EPERM; + if ((sgid != (gid_t) -1) && (sgid != current->gid) && + (sgid != current->egid) && (sgid != current->sgid)) + return -EPERM; + } + if (egid != (gid_t) -1) { + if (egid != current->egid) + { + current->mm->dumpable = 0; + wmb(); + } + current->egid = egid; + } + current->fsgid = current->egid; + if (rgid != (gid_t) -1) + current->gid = rgid; + if (sgid != (gid_t) -1) + current->sgid = sgid; + return 0; +} + +asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid) +{ + int retval; + + if (!(retval = put_user(current->gid, rgid)) && + !(retval = put_user(current->egid, egid))) + retval = put_user(current->sgid, sgid); + + return retval; +} + + +/* + * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This + * is used for "access()" and for the NFS daemon (letting nfsd stay at + * whatever uid it wants to). It normally shadows "euid", except when + * explicitly set by setfsuid() or for access.. + */ +asmlinkage long sys_setfsuid(uid_t uid) +{ + int old_fsuid; + + old_fsuid = current->fsuid; + if (uid == current->uid || uid == current->euid || + uid == current->suid || uid == current->fsuid || + capable(CAP_SETUID)) + { + if (uid != old_fsuid) + { + current->mm->dumpable = 0; + wmb(); + } + current->fsuid = uid; + } + + /* We emulate fsuid by essentially doing a scaled-down version + * of what we did in setresuid and friends. However, we only + * operate on the fs-specific bits of the process' effective + * capabilities + * + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ + + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + if (old_fsuid == 0 && current->fsuid != 0) { + cap_t(current->cap_effective) &= ~CAP_FS_MASK; + } + if (old_fsuid != 0 && current->fsuid == 0) { + cap_t(current->cap_effective) |= + (cap_t(current->cap_permitted) & CAP_FS_MASK); + } + } + + return old_fsuid; +} + +/* + * Samma på svenska.. + */ +asmlinkage long sys_setfsgid(gid_t gid) +{ + int old_fsgid; + + old_fsgid = current->fsgid; + if (gid == current->gid || gid == current->egid || + gid == current->sgid || gid == current->fsgid || + capable(CAP_SETGID)) + { + if (gid != old_fsgid) + { + current->mm->dumpable = 0; + wmb(); + } + current->fsgid = gid; + } + return old_fsgid; +} + +asmlinkage long sys_times(struct tms * tbuf) +{ + /* + * In the SMP world we might just be unlucky and have one of + * the times increment as we use it. Since the value is an + * atomically safe type this is just fine. Conceptually its + * as if the syscall took an instant longer to occur. + */ + if (tbuf) + if (copy_to_user(tbuf, ¤t->times, sizeof(struct tms))) + return -EFAULT; + return jiffies; +} + +/* + * This needs some heavy checking ... + * I just haven't the stomach for it. I also don't fully + * understand sessions/pgrp etc. Let somebody who does explain it. + * + * OK, I think I have the protection semantics right.... this is really + * only important on a multi-user system anyway, to make sure one user + * can't send a signal to a process owned by another. -TYT, 12/12/91 + * + * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. + * LBT 04.03.94 + */ + +asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) +{ + struct task_struct * p; + int err = -EINVAL; + + if (!pid) + pid = current->pid; + if (!pgid) + pgid = pid; + if (pgid < 0) + return -EINVAL; + + /* From this point forward we keep holding onto the tasklist lock + * so that our parent does not change from under us. -DaveM + */ + read_lock(&tasklist_lock); + + err = -ESRCH; + p = find_task_by_pid(pid); + if (!p) + goto out; + + if (p->p_pptr == current || p->p_opptr == current) { + err = -EPERM; + if (p->session != current->session) + goto out; + err = -EACCES; + if (p->did_exec) + goto out; + } else if (p != current) + goto out; + err = -EPERM; + if (p->leader) + goto out; + if (pgid != pid) { + struct task_struct * tmp; + for_each_task (tmp) { + if (tmp->pgrp == pgid && + tmp->session == current->session) + goto ok_pgid; + } + goto out; + } + +ok_pgid: + p->pgrp = pgid; + err = 0; +out: + /* All paths lead to here, thus we are safe. -DaveM */ + read_unlock(&tasklist_lock); + return err; +} + +asmlinkage long sys_getpgid(pid_t pid) +{ + if (!pid) { + return current->pgrp; + } else { + int retval; + struct task_struct *p; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + + retval = -ESRCH; + if (p) + retval = p->pgrp; + read_unlock(&tasklist_lock); + return retval; + } +} + +asmlinkage long sys_getpgrp(void) +{ + /* SMP - assuming writes are word atomic this is fine */ + return current->pgrp; +} + +asmlinkage long sys_getsid(pid_t pid) +{ + if (!pid) { + return current->session; + } else { + int retval; + struct task_struct *p; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + + retval = -ESRCH; + if(p) + retval = p->session; + read_unlock(&tasklist_lock); + return retval; + } +} + +asmlinkage long sys_setsid(void) +{ + struct task_struct * p; + int err = -EPERM; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pgrp == current->pid) + goto out; + } + + current->leader = 1; + current->session = current->pgrp = current->pid; + current->tty = NULL; + current->tty_old_pgrp = 0; + err = current->pgrp; +out: + read_unlock(&tasklist_lock); + return err; +} + +/* + * Supplementary group IDs + */ +asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist) +{ + int i; + + /* + * SMP: Nobody else can change our grouplist. Thus we are + * safe. + */ + + if (gidsetsize < 0) + return -EINVAL; + i = current->ngroups; + if (gidsetsize) { + if (i > gidsetsize) + return -EINVAL; + if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i)) + return -EFAULT; + } + return i; +} + +/* + * SMP: Our groups are not shared. We can copy to/from them safely + * without another task interfering. + */ + +asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist) +{ + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned) gidsetsize > NGROUPS) + return -EINVAL; + if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t))) + return -EFAULT; + current->ngroups = gidsetsize; + return 0; +} + +static int supplemental_group_member(gid_t grp) +{ + int i = current->ngroups; + + if (i) { + gid_t *groups = current->groups; + do { + if (*groups == grp) + return 1; + groups++; + i--; + } while (i); + } + return 0; +} + +/* + * Check whether we're fsgid/egid or in the supplemental group.. + */ +int in_group_p(gid_t grp) +{ + int retval = 1; + if (grp != current->fsgid) + retval = supplemental_group_member(grp); + return retval; +} + +int in_egroup_p(gid_t grp) +{ + int retval = 1; + if (grp != current->egid) + retval = supplemental_group_member(grp); + return retval; +} + +DECLARE_RWSEM(uts_sem); + +#ifdef CONFIG_IPV6_NODEINFO +DECLARE_RWSEM(icmpv6_sethostname_hook_sem); +void (*icmpv6_sethostname_hook)(struct new_utsname *) = NULL; +#endif + +asmlinkage long sys_newuname(struct new_utsname * name) +{ + int errno = 0; + + down_read(&uts_sem); + if (copy_to_user(name,&system_utsname,sizeof *name)) + errno = -EFAULT; + up_read(&uts_sem); + return errno; +} + +asmlinkage long sys_sethostname(char *name, int len) +{ + int errno; + char tmp[__NEW_UTS_LEN]; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (len < 0 || len > __NEW_UTS_LEN) + return -EINVAL; + down_write(&uts_sem); + errno = -EFAULT; + if (!copy_from_user(tmp, name, len)) { + memcpy(system_utsname.nodename, tmp, len); + system_utsname.nodename[len] = 0; +#ifdef CONFIG_IPV6_NODEINFO + down_read(&icmpv6_sethostname_hook_sem); + if (icmpv6_sethostname_hook) + icmpv6_sethostname_hook(&system_utsname); + up_read(&icmpv6_sethostname_hook_sem); +#endif + errno = 0; + } + up_write(&uts_sem); + return errno; +} + +asmlinkage long sys_gethostname(char *name, int len) +{ + int i, errno; + + if (len < 0) + return -EINVAL; + down_read(&uts_sem); + i = 1 + strlen(system_utsname.nodename); + if (i > len) + i = len; + errno = 0; + if (copy_to_user(name, system_utsname.nodename, i)) + errno = -EFAULT; + up_read(&uts_sem); + return errno; +} + +/* + * Only setdomainname; getdomainname can be implemented by calling + * uname() + */ +asmlinkage long sys_setdomainname(char *name, int len) +{ + int errno; + char tmp[__NEW_UTS_LEN]; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (len < 0 || len > __NEW_UTS_LEN) + return -EINVAL; + + down_write(&uts_sem); + errno = -EFAULT; + if (!copy_from_user(tmp, name, len)) { + memcpy(system_utsname.domainname, tmp, len); + system_utsname.domainname[len] = 0; + errno = 0; + } + up_write(&uts_sem); + return errno; +} + +asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim) +{ + if (resource >= RLIM_NLIMITS) + return -EINVAL; + else + return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim)) + ? -EFAULT : 0; +} + +#if !defined(__ia64__) + +/* + * Back compatibility for getrlimit. Needed for some apps. + */ + +asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim) +{ + struct rlimit x; + if (resource >= RLIM_NLIMITS) + return -EINVAL; + + memcpy(&x, current->rlim + resource, sizeof(*rlim)); + if(x.rlim_cur > 0x7FFFFFFF) + x.rlim_cur = 0x7FFFFFFF; + if(x.rlim_max > 0x7FFFFFFF) + x.rlim_max = 0x7FFFFFFF; + return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; +} + +#endif + +asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim) +{ + struct rlimit new_rlim, *old_rlim; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; + if (new_rlim.rlim_cur > new_rlim.rlim_max) + return -EINVAL; + old_rlim = current->rlim + resource; + if (((new_rlim.rlim_cur > old_rlim->rlim_max) || + (new_rlim.rlim_max > old_rlim->rlim_max)) && + !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (resource == RLIMIT_NOFILE) { + if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN) + return -EPERM; + } + *old_rlim = new_rlim; + return 0; +} + +/* + * It would make sense to put struct rusage in the task_struct, + * except that would make the task_struct be *really big*. After + * task_struct gets moved into malloc'ed memory, it would + * make sense to do this. It will make moving the rest of the information + * a lot simpler! (Which we're not doing right now because we're not + * measuring them yet). + * + * This is SMP safe. Either we are called from sys_getrusage on ourselves + * below (we know we aren't going to exit/disappear and only we change our + * rusage counters), or we are called from wait4() on a process which is + * either stopped or zombied. In the zombied case the task won't get + * reaped till shortly after the call to getrusage(), in both cases the + * task being examined is in a frozen state so the counters won't change. + * + * FIXME! Get the fault counts properly! + */ +int getrusage(struct task_struct *p, int who, struct rusage *ru) +{ + struct rusage r; + + memset((char *) &r, 0, sizeof(r)); + switch (who) { + case RUSAGE_SELF: + r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime); + r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime); + r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime); + r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime); + r.ru_minflt = p->min_flt; + r.ru_majflt = p->maj_flt; + r.ru_nswap = p->nswap; + break; + case RUSAGE_CHILDREN: + r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime); + r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime); + r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime); + r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime); + r.ru_minflt = p->cmin_flt; + r.ru_majflt = p->cmaj_flt; + r.ru_nswap = p->cnswap; + break; + default: + r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime); + r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime); + r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime); + r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime); + r.ru_minflt = p->min_flt + p->cmin_flt; + r.ru_majflt = p->maj_flt + p->cmaj_flt; + r.ru_nswap = p->nswap + p->cnswap; + break; + } + return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; +} + +asmlinkage long sys_getrusage(int who, struct rusage *ru) +{ + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + return -EINVAL; + return getrusage(current, who, ru); +} + +asmlinkage long sys_umask(int mask) +{ + mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); + return mask; +} + +asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + int error = 0; + int sig; + + switch (option) { + case PR_SET_PDEATHSIG: + sig = arg2; + if (sig < 0 || sig > _NSIG) { + error = -EINVAL; + break; + } + current->pdeath_signal = sig; + break; + case PR_GET_PDEATHSIG: + error = put_user(current->pdeath_signal, (int *)arg2); + break; + case PR_GET_DUMPABLE: + if (is_dumpable(current)) + error = 1; + break; + case PR_SET_DUMPABLE: + if (arg2 != 0 && arg2 != 1) { + error = -EINVAL; + break; + } + current->mm->dumpable = arg2; + break; + + case PR_SET_UNALIGN: + error = SET_UNALIGN_CTL(current, arg2); + break; + case PR_GET_UNALIGN: + error = GET_UNALIGN_CTL(current, arg2); + break; + case PR_SET_FPEMU: + error = SET_FPEMU_CTL(current, arg2); + break; + case PR_GET_FPEMU: + error = GET_FPEMU_CTL(current, arg2); + break; + case PR_SET_FPEXC: + error = SET_FPEXC_CTL(current, arg2); + break; + case PR_GET_FPEXC: + error = GET_FPEXC_CTL(current, arg2); + break; + + case PR_GET_KEEPCAPS: + if (current->keep_capabilities) + error = 1; + break; + case PR_SET_KEEPCAPS: + if (arg2 != 0 && arg2 != 1) { + error = -EINVAL; + break; + } + current->keep_capabilities = arg2; + break; + default: + error = -EINVAL; + break; + } + return error; +} + +EXPORT_SYMBOL(notifier_chain_register); +EXPORT_SYMBOL(notifier_chain_unregister); +EXPORT_SYMBOL(notifier_call_chain); +EXPORT_SYMBOL(register_reboot_notifier); +EXPORT_SYMBOL(unregister_reboot_notifier); +EXPORT_SYMBOL(in_group_p); +EXPORT_SYMBOL(in_egroup_p); diff --git a/uClinux-2.4.31-uc0/kernel/sysctl.c b/uClinux-2.4.31-uc0/kernel/sysctl.c new file mode 100644 index 0000000..420204e --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/sysctl.c @@ -0,0 +1,1543 @@ +/* + * sysctl.c: General linux system control interface + * + * Begun 24 March 1995, Stephen Tweedie + * Added /proc support, Dec 1995 + * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. + * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. + * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. + * Dynamic registration fixes, Stephen Tweedie. + * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. + * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris + * Horn. + * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer. + * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer. + * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill + * Wendling. + * The list_for_each() macro wasn't appropriate for the sysctl loop. + * Removed it and replaced it with older style, 03/23/00, Bill Wendling + */ + +#include <linux/config.h> +#include <linux/slab.h> +#include <linux/sysctl.h> +#include <linux/swapctl.h> +#include <linux/proc_fs.h> +#include <linux/ctype.h> +#include <linux/utsname.h> +#include <linux/capability.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/sysrq.h> +#include <linux/highuid.h> +#include <linux/swap.h> + +#include <asm/uaccess.h> +#include <asm/semaphore.h> + +#ifdef CONFIG_ROOT_NFS +#include <linux/nfs_fs.h> +#endif + +#if defined(CONFIG_SYSCTL) + +/* External variables not in a header file. */ +extern int panic_timeout; +extern int C_A_D; +extern int bdf_prm[], bdflush_min[], bdflush_max[]; +#ifndef NO_MM +extern int sysctl_overcommit_memory; +#endif +extern int max_threads; +extern atomic_t nr_queued_signals; +extern int max_queued_signals; +extern int sysrq_enabled; +extern int core_uses_pid; +extern int core_setuid_ok; +extern char core_pattern[]; +extern int cad_pid; +extern int laptop_mode; +extern int block_dump; + +/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ +static int maxolduid = 65535; +static int minolduid; + +#ifdef CONFIG_KMOD +extern char modprobe_path[]; +#endif +#ifdef CONFIG_HOTPLUG +extern char hotplug_path[]; +#endif +#ifdef CONFIG_CHR_DEV_SG +extern int sg_big_buff; +#endif +#ifdef CONFIG_SYSVIPC +extern size_t shm_ctlmax; +extern size_t shm_ctlall; +extern int shm_ctlmni; +extern int msg_ctlmax; +extern int msg_ctlmnb; +extern int msg_ctlmni; +extern int sem_ctls[]; +#endif + +extern int exception_trace; + +#ifdef __sparc__ +extern char reboot_command []; +extern int stop_a_enabled; +extern int scons_pwroff; +#endif + +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_MATHEMU +extern int sysctl_ieee_emulation_warnings; +#endif +extern int sysctl_userprocess_debug; +#endif + +#ifdef CONFIG_PPC32 +extern unsigned long zero_paged_on, powersave_nap; +int proc_dol2crvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp); +int proc_dol3crvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp); +#endif + +#ifdef CONFIG_BSD_PROCESS_ACCT +extern int acct_parm[]; +#endif + +#ifndef NO_MM +extern int pgt_cache_water[]; +#endif + +static int parse_table(int *, int, void *, size_t *, void *, size_t, + ctl_table *, void **); +static int proc_doutsstring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp); + +static ctl_table root_table[]; +static struct ctl_table_header root_table_header = + { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; + +static ctl_table kern_table[]; +#ifndef NO_MM +static ctl_table vm_table[]; +#endif +#ifdef CONFIG_NET +extern ctl_table net_table[]; +#endif +static ctl_table proc_table[]; +static ctl_table fs_table[]; +static ctl_table debug_table[]; +static ctl_table dev_table[]; +extern ctl_table random_table[]; + +/* /proc declarations: */ + +#ifdef CONFIG_PROC_FS + +static ssize_t proc_readsys(struct file *, char *, size_t, loff_t *); +static ssize_t proc_writesys(struct file *, const char *, size_t, loff_t *); +static int proc_sys_permission(struct inode *, int); + +struct file_operations proc_sys_file_operations = { + read: proc_readsys, + write: proc_writesys, +}; + +static struct inode_operations proc_sys_inode_operations = { + permission: proc_sys_permission, +}; + +extern struct proc_dir_entry *proc_sys_root; + +static void register_proc_table(ctl_table *, struct proc_dir_entry *); +static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); +#endif + +/* The default sysctl tables: */ + +static ctl_table root_table[] = { + {CTL_KERN, "kernel", NULL, 0, 0555, kern_table}, +#ifndef NO_MM + {CTL_VM, "vm", NULL, 0, 0555, vm_table}, +#endif +#ifdef CONFIG_NET + {CTL_NET, "net", NULL, 0, 0555, net_table}, +#endif + {CTL_PROC, "proc", NULL, 0, 0555, proc_table}, + {CTL_FS, "fs", NULL, 0, 0555, fs_table}, + {CTL_DEBUG, "debug", NULL, 0, 0555, debug_table}, + {CTL_DEV, "dev", NULL, 0, 0555, dev_table}, + {0} +}; + +static ctl_table kern_table[] = { + {KERN_OSTYPE, "ostype", system_utsname.sysname, 64, + 0444, NULL, &proc_doutsstring, &sysctl_string}, + {KERN_OSRELEASE, "osrelease", system_utsname.release, 64, + 0444, NULL, &proc_doutsstring, &sysctl_string}, + {KERN_VERSION, "version", system_utsname.version, 64, + 0444, NULL, &proc_doutsstring, &sysctl_string}, + {KERN_NODENAME, "hostname", system_utsname.nodename, 64, + 0644, NULL, &proc_doutsstring, &sysctl_string}, + {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64, + 0644, NULL, &proc_doutsstring, &sysctl_string}, + {KERN_PANIC, "panic", &panic_timeout, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_CORE_USES_PID, "core_uses_pid", &core_uses_pid, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_CORE_SETUID, "core_setuid_ok", &core_setuid_ok, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_CORE_PATTERN, "core_pattern", core_pattern, 64, + 0644, NULL, &proc_dostring, &sysctl_string}, + {KERN_TAINTED, "tainted", &tainted, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t), + 0600, NULL, &proc_dointvec_bset}, +#ifdef CONFIG_BLK_DEV_INITRD + {KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif +#ifdef __sparc__ + {KERN_SPARC_REBOOT, "reboot-cmd", reboot_command, + 256, 0644, NULL, &proc_dostring, &sysctl_string }, + {KERN_SPARC_STOP_A, "stop-a", &stop_a_enabled, sizeof (int), + 0644, NULL, &proc_dointvec}, + {KERN_SPARC_SCONS_PWROFF, "scons-poweroff", &scons_pwroff, sizeof (int), + 0644, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_PPC32 + {KERN_PPC_ZEROPAGED, "zero-paged", &zero_paged_on, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PPC_POWERSAVE_NAP, "powersave-nap", &powersave_nap, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PPC_L2CR, "l2cr", NULL, 0, + 0644, NULL, &proc_dol2crvec}, + {KERN_PPC_L3CR, "l3cr", NULL, 0, + 0644, NULL, &proc_dol3crvec}, +#endif + {KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int), + 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_KMOD + {KERN_MODPROBE, "modprobe", &modprobe_path, 256, + 0644, NULL, &proc_dostring, &sysctl_string }, +#endif +#ifdef CONFIG_HOTPLUG + {KERN_HOTPLUG, "hotplug", &hotplug_path, 256, + 0644, NULL, &proc_dostring, &sysctl_string }, +#endif +#ifdef CONFIG_CHR_DEV_SG + {KERN_SG_BIG_BUFF, "sg-big-buff", &sg_big_buff, sizeof (int), + 0444, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_BSD_PROCESS_ACCT + {KERN_ACCT, "acct", &acct_parm, 3*sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif + {KERN_RTSIGNR, "rtsig-nr", &nr_queued_signals, sizeof(int), + 0444, NULL, &proc_dointvec}, + {KERN_RTSIGMAX, "rtsig-max", &max_queued_signals, sizeof(int), + 0644, NULL, &proc_dointvec}, +#ifdef CONFIG_SYSVIPC + {KERN_SHMMAX, "shmmax", &shm_ctlmax, sizeof (size_t), + 0644, NULL, &proc_doulongvec_minmax}, + {KERN_SHMALL, "shmall", &shm_ctlall, sizeof (size_t), + 0644, NULL, &proc_doulongvec_minmax}, + {KERN_SHMMNI, "shmmni", &shm_ctlmni, sizeof (int), + 0644, NULL, &proc_dointvec}, + {KERN_MSGMAX, "msgmax", &msg_ctlmax, sizeof (int), + 0644, NULL, &proc_dointvec}, + {KERN_MSGMNI, "msgmni", &msg_ctlmni, sizeof (int), + 0644, NULL, &proc_dointvec}, + {KERN_MSGMNB, "msgmnb", &msg_ctlmnb, sizeof (int), + 0644, NULL, &proc_dointvec}, + {KERN_SEM, "sem", &sem_ctls, 4*sizeof (int), + 0644, NULL, &proc_dointvec}, +#endif +#ifdef CONFIG_MAGIC_SYSRQ + {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int), + 0644, NULL, &proc_dointvec}, +#endif + {KERN_CADPID, "cad_pid", &cad_pid, sizeof (int), + 0600, NULL, &proc_dointvec}, + {KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_RANDOM, "random", NULL, 0, 0555, random_table}, + {KERN_OVERFLOWUID, "overflowuid", &overflowuid, sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &minolduid, &maxolduid}, + {KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &minolduid, &maxolduid}, +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_MATHEMU + {KERN_IEEE_EMULATION_WARNINGS,"ieee_emulation_warnings", + &sysctl_ieee_emulation_warnings,sizeof(int),0644,NULL,&proc_dointvec}, +#endif + {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", + &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, +#endif +#ifdef __x86_64__ + {KERN_EXCEPTION_TRACE,"exception-trace", + &exception_trace,sizeof(int),0644,NULL,&proc_dointvec}, +#endif + {0} +}; + +#ifndef NO_MM +static ctl_table vm_table[] = { + {VM_GFP_DEBUG, "vm_gfp_debug", + &vm_gfp_debug, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_VFS_SCAN_RATIO, "vm_vfs_scan_ratio", + &vm_vfs_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_CACHE_SCAN_RATIO, "vm_cache_scan_ratio", + &vm_cache_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MAPPED_RATIO, "vm_mapped_ratio", + &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_ANON_LRU, "vm_anon_lru", + &vm_anon_lru, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_LRU_BALANCE_RATIO, "vm_lru_balance_ratio", + &vm_lru_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_PASSES, "vm_passes", + &vm_passes, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &bdflush_min, &bdflush_max}, + {VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory, + sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec}, + {VM_PAGERDAEMON, "kswapd", + &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, + {VM_PGT_CACHE, "pagetable_cache", + &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_PAGE_CLUSTER, "page-cluster", + &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MIN_READAHEAD, "min-readahead", + &vm_min_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MAX_READAHEAD, "max-readahead", + &vm_max_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MAX_MAP_COUNT, "max_map_count", + &max_map_count, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_LAPTOP_MODE, "laptop_mode", + &laptop_mode, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_BLOCK_DUMP, "block_dump", + &block_dump, sizeof(int), 0644, NULL, &proc_dointvec}, + {0} +}; +#endif /* NO_MM */ + +static ctl_table proc_table[] = { + {0} +}; + +static ctl_table fs_table[] = { + {FS_NRINODE, "inode-nr", &inodes_stat, 2*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_STATINODE, "inode-state", &inodes_stat, 7*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_NRFILE, "file-nr", &files_stat, 3*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_MAXFILE, "file-max", &files_stat.max_files, sizeof(int), + 0644, NULL, &proc_dointvec}, + {FS_DENTRY, "dentry-state", &dentry_stat, 6*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_OVERFLOWUID, "overflowuid", &fs_overflowuid, sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &minolduid, &maxolduid}, + {FS_OVERFLOWGID, "overflowgid", &fs_overflowgid, sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &minolduid, &maxolduid}, + {FS_LEASES, "leases-enable", &leases_enable, sizeof(int), + 0644, NULL, &proc_dointvec}, + {FS_DIR_NOTIFY, "dir-notify-enable", &dir_notify_enable, + sizeof(int), 0644, NULL, &proc_dointvec}, + {FS_LEASE_TIME, "lease-break-time", &lease_break_time, sizeof(int), + 0644, NULL, &proc_dointvec}, + {0} +}; + +static ctl_table debug_table[] = { + {0} +}; + +static ctl_table dev_table[] = { + {0} +}; + +extern void init_irq_proc (void); + +void __init sysctl_init(void) +{ +#ifdef CONFIG_PROC_FS + register_proc_table(root_table, proc_sys_root); + init_irq_proc(); +#endif +} + +int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, + void *newval, size_t newlen) +{ + struct list_head *tmp; + + if (nlen <= 0 || nlen >= CTL_MAXNAME) + return -ENOTDIR; + if (oldval) { + int old_len; + if (!oldlenp || get_user(old_len, oldlenp)) + return -EFAULT; + /* XXX: insufficient for SMP, but should be redundant anyway */ + if ((ssize_t)old_len < 0) + return -EINVAL; + } + tmp = &root_table_header.ctl_entry; + do { + struct ctl_table_header *head = + list_entry(tmp, struct ctl_table_header, ctl_entry); + void *context = NULL; + int error = parse_table(name, nlen, oldval, oldlenp, + newval, newlen, head->ctl_table, + &context); + if (context) + kfree(context); + if (error != -ENOTDIR) + return error; + tmp = tmp->next; + } while (tmp != &root_table_header.ctl_entry); + return -ENOTDIR; +} + +extern asmlinkage long sys_sysctl(struct __sysctl_args *args) +{ + struct __sysctl_args tmp; + int error; + + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + lock_kernel(); + error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, + tmp.newval, tmp.newlen); + unlock_kernel(); + return error; +} + +/* + * ctl_perm does NOT grant the superuser all rights automatically, because + * some sysctl variables are readonly even to root. + */ + +static int test_perm(int mode, int op) +{ + if (!current->euid) + mode >>= 6; + else if (in_egroup_p(0)) + mode >>= 3; + if ((mode & op & 0007) == op) + return 0; + return -EACCES; +} + +static inline int ctl_perm(ctl_table *table, int op) +{ + return test_perm(table->mode, op); +} + +static int parse_table(int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, + ctl_table *table, void **context) +{ + int n; +repeat: + if (!nlen) + return -ENOTDIR; + if (get_user(n, name)) + return -EFAULT; + for ( ; table->ctl_name; table++) { + if (n == table->ctl_name || table->ctl_name == CTL_ANY) { + int error; + if (table->child) { + if (ctl_perm(table, 001)) + return -EPERM; + if (table->strategy) { + error = table->strategy( + table, name, nlen, + oldval, oldlenp, + newval, newlen, context); + if (error) + return error; + } + name++; + nlen--; + table = table->child; + goto repeat; + } + error = do_sysctl_strategy(table, name, nlen, + oldval, oldlenp, + newval, newlen, context); + return error; + } + } + return -ENOTDIR; +} + +/* Perform the actual read/write of a sysctl table entry. */ +int do_sysctl_strategy (ctl_table *table, + int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + int op = 0, rc; + size_t len; + + if (oldval) + op |= 004; + if (newval) + op |= 002; + if (ctl_perm(table, op)) + return -EPERM; + + if (table->strategy) { + rc = table->strategy(table, name, nlen, oldval, oldlenp, + newval, newlen, context); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + } + + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; + } + } + if (newval && newlen) { + len = newlen; + if (len > table->maxlen) + len = table->maxlen; + if(copy_from_user(table->data, newval, len)) + return -EFAULT; + } + } + return 0; +} + +/** + * register_sysctl_table - register a sysctl hierarchy + * @table: the top-level table structure + * @insert_at_head: whether the entry should be inserted in front or at the end + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. An entry with a ctl_name of 0 terminates the table. + * + * The members of the &ctl_table structure are used as follows: + * + * ctl_name - This is the numeric sysctl value used by sysctl(2). The number + * must be unique within that level of sysctl + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * strategy - the strategy routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return + * + * < 0 - Error occurred (error is passed to user process) + * + * 0 - OK - proceed with automatic read or write. + * + * > 0 - OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), + * proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +struct ctl_table_header *register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + struct ctl_table_header *tmp; + tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); + if (!tmp) + return NULL; + tmp->ctl_table = table; + INIT_LIST_HEAD(&tmp->ctl_entry); + if (insert_at_head) + list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); + else + list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); +#ifdef CONFIG_PROC_FS + register_proc_table(table, proc_sys_root); +#endif + return tmp; +} + +/** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + list_del(&header->ctl_entry); +#ifdef CONFIG_PROC_FS + unregister_proc_table(header->ctl_table, proc_sys_root); +#endif + kfree(header); +} + +/* + * /proc/sys support + */ + +#ifdef CONFIG_PROC_FS + +/* Scan the sysctl entries in table and add them all into /proc */ +static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) +{ + struct proc_dir_entry *de; + int len; + mode_t mode; + + for (; table->ctl_name; table++) { + /* Can't do anything without a proc name. */ + if (!table->procname) + continue; + /* Maybe we can't do anything with it... */ + if (!table->proc_handler && !table->child) { + printk(KERN_WARNING "SYSCTL: Can't register %s\n", + table->procname); + continue; + } + + len = strlen(table->procname); + mode = table->mode; + + de = NULL; + if (table->proc_handler) + mode |= S_IFREG; + else { + mode |= S_IFDIR; + for (de = root->subdir; de; de = de->next) { + if (proc_match(len, table->procname, de)) + break; + } + /* If the subdir exists already, de is non-NULL */ + } + + if (!de) { + de = create_proc_entry(table->procname, mode, root); + if (!de) + continue; + de->data = (void *) table; + if (table->proc_handler) { + de->proc_fops = &proc_sys_file_operations; + de->proc_iops = &proc_sys_inode_operations; + } + } + table->de = de; + if (de->mode & S_IFDIR) + register_proc_table(table->child, de); + } +} + +/* + * Unregister a /proc sysctl table and any subdirectories. + */ +static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) +{ + struct proc_dir_entry *de; + for (; table->ctl_name; table++) { + if (!(de = table->de)) + continue; + if (de->mode & S_IFDIR) { + if (!table->child) { + printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); + continue; + } + unregister_proc_table(table->child, de); + + /* Don't unregister directories which still have entries.. */ + if (de->subdir) + continue; + } + + /* Don't unregister proc entries that are still being used.. */ + if (atomic_read(&de->count)) + continue; + + table->de = NULL; + remove_proc_entry(table->procname, root); + } +} + +static ssize_t do_rw_proc(int write, struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int op; + struct proc_dir_entry *de; + struct ctl_table *table; + size_t res; + ssize_t error; + + de = (struct proc_dir_entry*) file->f_dentry->d_inode->u.generic_ip; + if (!de || !de->data) + return -ENOTDIR; + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + return -ENOTDIR; + op = (write ? 002 : 004); + if (ctl_perm(table, op)) + return -EPERM; + + res = count; + + /* + * FIXME: we need to pass on ppos to the handler. + */ + + error = (*table->proc_handler) (table, write, file, buf, &res); + if (error) + return error; + return res; +} + +static ssize_t proc_readsys(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + return do_rw_proc(0, file, buf, count, ppos); +} + +static ssize_t proc_writesys(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + return do_rw_proc(1, file, (char *) buf, count, ppos); +} + +static int proc_sys_permission(struct inode *inode, int op) +{ + return test_perm(inode->i_mode, op); +} + +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. + */ +int proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + size_t len; + char *p, c; + + if (!table->data || !table->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = 0; + p = buffer; + while (len < *lenp) { + if (get_user(c, p++)) + return -EFAULT; + if (c == 0 || c == '\n') + break; + len++; + } + if (len >= table->maxlen) + len = table->maxlen-1; + if(copy_from_user(table->data, buffer, len)) + return -EFAULT; + ((char *) table->data)[len] = 0; + filp->f_pos += *lenp; + } else { + len = strlen(table->data); + if (len > table->maxlen) + len = table->maxlen; + if (len > *lenp) + len = *lenp; + if (len) + if(copy_to_user(buffer, table->data, len)) + return -EFAULT; + if (len < *lenp) { + if(put_user('\n', ((char *) buffer) + len)) + return -EFAULT; + len++; + } + *lenp = len; + filp->f_pos += len; + } + return 0; +} + +/* + * Special case of dostring for the UTS structure. This has locks + * to observe. Should this be in kernel/sys.c ???? + */ + +static int proc_doutsstring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + int r; + + if (!write) { + down_read(&uts_sem); + r=proc_dostring(table,0,filp,buffer,lenp); + up_read(&uts_sem); + } else { + down_write(&uts_sem); + r=proc_dostring(table,1,filp,buffer,lenp); + up_write(&uts_sem); + } + return r; +} + +#define OP_SET 0 +#define OP_AND 1 +#define OP_OR 2 +#define OP_MAX 3 +#define OP_MIN 4 + +static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp, int conv, int op) +{ + int *i, vleft, first=1, neg, val; + size_t left, len; + + #define TMPBUFLEN 20 + char buf[TMPBUFLEN], *p; + + if (!table->data || !table->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + i = (int *) table->data; + vleft = table->maxlen / sizeof(int); + left = *lenp; + + for (; left && vleft--; i++, first=0) { + if (write) { + while (left) { + char c; + if (get_user(c, (char *) buffer)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + buffer++; + } + if (!left) + break; + neg = 0; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p == '-' && left > 1) { + neg = 1; + left--, p++; + } + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0) * conv; + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + if (neg) + val = -val; + buffer += len; + left -= len; + switch(op) { + case OP_SET: *i = val; break; + case OP_AND: *i &= val; break; + case OP_OR: *i |= val; break; + case OP_MAX: if(*i < val) + *i = val; + break; + case OP_MIN: if(*i > val) + *i = val; + break; + } + } else { + p = buf; + if (!first) + *p++ = '\t'; + sprintf(p, "%d", (*i) / conv); + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + buffer += len; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, buffer++; + } + if (write) { + p = (char *) buffer; + while (left) { + char c; + if (get_user(c, p++)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + filp->f_pos += *lenp; + return 0; +} + +/** + * proc_dointvec - read a vector of integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ +int proc_dointvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET); +} + +/* + * init may raise the set. + */ + +int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + if (!capable(CAP_SYS_MODULE)) { + return -EPERM; + } + return do_proc_dointvec(table,write,filp,buffer,lenp,1, + (current->pid == 1) ? OP_SET : OP_AND); +} + +/** + * proc_dointvec_minmax - read a vector of integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ +int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + int *i, *min, *max, vleft, first=1, neg, val; + size_t len, left; + #define TMPBUFLEN 20 + char buf[TMPBUFLEN], *p; + + if (!table->data || !table->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + i = (int *) table->data; + min = (int *) table->extra1; + max = (int *) table->extra2; + vleft = table->maxlen / sizeof(int); + left = *lenp; + + for (; left && vleft--; i++, min++, max++, first=0) { + if (write) { + while (left) { + char c; + if (get_user(c, (char *) buffer)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + buffer++; + } + if (!left) + break; + neg = 0; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p == '-' && left > 1) { + neg = 1; + left--, p++; + } + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0); + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + if (neg) + val = -val; + buffer += len; + left -= len; + + if ((min && val < *min) || (max && val > *max)) + continue; + *i = val; + } else { + p = buf; + if (!first) + *p++ = '\t'; + sprintf(p, "%d", *i); + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + buffer += len; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, buffer++; + } + if (write) { + p = (char *) buffer; + while (left) { + char c; + if (get_user(c, p++)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + filp->f_pos += *lenp; + return 0; +} + +static int do_proc_doulongvec_minmax(ctl_table *table, int write, + struct file *filp, + void *buffer, size_t *lenp, + unsigned long convmul, + unsigned long convdiv) +{ +#define TMPBUFLEN 20 + unsigned long *i, *min, *max, val; + int vleft, first=1, neg; + size_t len, left; + char buf[TMPBUFLEN], *p; + + if (!table->data || !table->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + i = (unsigned long *) table->data; + min = (unsigned long *) table->extra1; + max = (unsigned long *) table->extra2; + vleft = table->maxlen / sizeof(unsigned long); + left = *lenp; + + for (; left && vleft--; i++, first=0) { + if (write) { + while (left) { + char c; + if (get_user(c, (char *) buffer)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + buffer++; + } + if (!left) + break; + neg = 0; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p == '-' && left > 1) { + neg = 1; + left--, p++; + } + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0) * convmul / convdiv ; + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + if (neg) + val = -val; + buffer += len; + left -= len; + + if(neg) + continue; + if (min && val < *min++) + continue; + if (max && val > *max++) + continue; + *i = val; + } else { + p = buf; + if (!first) + *p++ = '\t'; + sprintf(p, "%lu", convdiv * (*i) / convmul); + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + buffer += len; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, buffer++; + } + if (write) { + p = (char *) buffer; + while (left) { + char c; + if (get_user(c, p++)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + filp->f_pos += *lenp; + return 0; +#undef TMPBUFLEN +} + +/** + * proc_doulongvec_minmax - read a vector of long integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ +int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l); +} + +/** + * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. The values + * are treated as milliseconds, and converted to jiffies when they are stored. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ +int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, + struct file *filp, + void *buffer, size_t *lenp) +{ + return do_proc_doulongvec_minmax(table, write, filp, buffer, + lenp, HZ, 1000l); +} + + +/** + * proc_dointvec_jiffies - read a vector of integers as seconds + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in seconds, and are converted into + * jiffies. + * + * Returns 0 on success. + */ +int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return do_proc_dointvec(table,write,filp,buffer,lenp,HZ,OP_SET); +} + +#else /* CONFIG_PROC_FS */ + +int proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +static int proc_doutsstring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, + struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + + +#endif /* CONFIG_PROC_FS */ + + +/* + * General sysctl support routines + */ + +/* The generic string strategy routine: */ +int sysctl_string(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + size_t l, len; + + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + l = strlen(table->data); + if (len > l) len = l; + if (len >= table->maxlen) + len = table->maxlen; + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(0, ((char *) oldval) + len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; + } + } + if (newval && newlen) { + len = newlen; + if (len > table->maxlen) + len = table->maxlen; + if(copy_from_user(table->data, newval, len)) + return -EFAULT; + if (len == table->maxlen) + len--; + ((char *) table->data)[len] = 0; + } + return 0; +} + +/* + * This function makes sure that all of the integers in the vector + * are between the minimum and maximum values given in the arrays + * table->extra1 and table->extra2, respectively. + */ +int sysctl_intvec(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + int i, *vec, *min, *max; + size_t length; + + if (newval && newlen) { + if (newlen % sizeof(int) != 0) + return -EINVAL; + + if (!table->extra1 && !table->extra2) + return 0; + + if (newlen > table->maxlen) + newlen = table->maxlen; + length = newlen / sizeof(int); + + vec = (int *) newval; + min = (int *) table->extra1; + max = (int *) table->extra2; + + for (i = 0; i < length; i++) { + int value; + if (get_user(value, vec + i)) + return -EFAULT; + if (min && value < min[i]) + return -EINVAL; + if (max && value > max[i]) + return -EINVAL; + } + } + return 0; +} + +/* Strategy function to convert jiffies to seconds */ +int sysctl_jiffies(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + if (oldval) { + size_t olen; + if (oldlenp) { + if (get_user(olen, oldlenp)) + return -EFAULT; + if (olen!=sizeof(int)) + return -EINVAL; + } + if (put_user(*(int *)(table->data) / HZ, (int *)oldval) || + (oldlenp && put_user(sizeof(int),oldlenp))) + return -EFAULT; + } + if (newval && newlen) { + int new; + if (newlen != sizeof(int)) + return -EINVAL; + if (get_user(new, (int *)newval)) + return -EFAULT; + *(int *)(table->data) = new*HZ; + } + return 1; +} + + +#else /* CONFIG_SYSCTL */ + + +extern asmlinkage long sys_sysctl(struct __sysctl_args *args) +{ + return -ENOSYS; +} + +int sysctl_string(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + return -ENOSYS; +} + +int sysctl_intvec(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + return -ENOSYS; +} + +int sysctl_jiffies(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + return -ENOSYS; +} + +int proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, + struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + return 0; +} + +void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +#endif /* CONFIG_SYSCTL */ diff --git a/uClinux-2.4.31-uc0/kernel/time.c b/uClinux-2.4.31-uc0/kernel/time.c new file mode 100644 index 0000000..6be88fc --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/time.c @@ -0,0 +1,480 @@ +/* + * linux/kernel/time.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This file contains the interface functions for the various + * time related system calls: time, stime, gettimeofday, settimeofday, + * adjtime + */ +/* + * Modification history kernel/time.c + * + * 1993-09-02 Philip Gladstone + * Created file with time related functions from sched.c and adjtimex() + * 1993-10-08 Torsten Duwe + * adjtime interface update and CMOS clock write code + * 1995-08-13 Torsten Duwe + * kernel PLL updated to 1994-12-13 specs (rfc-1589) + * 1999-01-16 Ulrich Windl + * Introduced error checking for many cases in adjtimex(). + * Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) + * (Even though the technical memorandum forbids it) + */ + +#include <linux/mm.h> +#include <linux/timex.h> +#include <linux/smp_lock.h> + +#include <asm/uaccess.h> + +/* + * The timezone where the local system is located. Used as a default by some + * programs who obtain this value by using gettimeofday. + */ +struct timezone sys_tz; + +/* The xtime_lock is not only serializing the xtime read/writes but it's also + serializing all accesses to the global NTP variables now. */ +extern rwlock_t xtime_lock; + +static void print_time_change(const char *msg, struct timeval new_tv) +{ + long s, j, d, m, y; + + j = new_tv.tv_sec / 86400L + 719469; + s = new_tv.tv_sec % 86400L; + + if( s < 0 ) { s += 86400L; j--; } + + y = (4L * j - 1L) / 146097L; + j = 4L * j - 1L - 146097L * y; + d = j / 4L; + j = (4L * d + 3L) / 1461L; + d = 4L * d + 3L - 1461L * j; + d = (d + 4L) / 4L; + m = (5L * d - 3L) / 153L; + d = 5L * d - 3 - 153L * m; + d = (d + 5L) / 5L; + y = 100L * y + j; + if (m < 10) + m += 2; + else + { + m -= 10; + ++y; + } + printk(KERN_NOTICE "Clock: %s time %04d/%02d/%02d - %02d:%02d:%02d\n", + msg, (int) y, (int) m + 1, (int) d, (int) (s / 3600 ), (int) (s / 60) % 60, (int) s % 60); +} + +#ifndef ABS +#define ABS(X) ((X) < 0 ? -(X) : (X)) +#endif + +static void check_print_time_change(const struct timeval old_tv, const struct timeval new_tv) +{ + static long accumulated_usecs; + + if (ABS(new_tv.tv_sec - old_tv.tv_sec) <= 2) { + /* No more than 2 seconds of change */ + accumulated_usecs += (new_tv.tv_sec - old_tv.tv_sec) * 1000000L + (new_tv.tv_usec - old_tv.tv_usec); + if (ABS(accumulated_usecs) < 1000000L) { + /* Less than 1 second of accumulated change */ + return; + } + } + + accumulated_usecs = 0; + + print_time_change("old", old_tv); + print_time_change("new", new_tv); +} + +#if !defined(__alpha__) && !defined(__ia64__) + +/* + * sys_time() can be implemented in user-level using + * sys_gettimeofday(). Is this for backwards compatibility? If so, + * why not move it into the appropriate arch directory (for those + * architectures that need it). + * + * XXX This function is NOT 64-bit clean! + */ +asmlinkage long sys_time(int * tloc) +{ + struct timeval now; + int i; + + do_gettimeofday(&now); + i = now.tv_sec; + if (tloc) { + if (put_user(i,tloc)) + i = -EFAULT; + } + return i; +} + +/* + * sys_stime() can be implemented in user-level using + * sys_settimeofday(). Is this for backwards compatibility? If so, + * why not move it into the appropriate arch directory (for those + * architectures that need it). + */ + +asmlinkage long sys_stime(int * tptr) +{ + int value; + struct timeval old_tv, new_tv; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + if (get_user(value, tptr)) + return -EFAULT; + + do_gettimeofday(&old_tv); + write_lock_irq(&xtime_lock); + vxtime_lock(); + xtime.tv_sec = value; + xtime.tv_usec = 0; + vxtime_unlock(); + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_unlock_irq(&xtime_lock); + do_gettimeofday(&new_tv); + check_print_time_change(old_tv, new_tv); + return 0; +} + +#endif + +asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (copy_to_user(tv, &ktv, sizeof(ktv))) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +/* + * Adjust the time obtained from the CMOS to be UTC time instead of + * local time. + * + * This is ugly, but preferable to the alternatives. Otherwise we + * would either need to write a program to do it in /etc/rc (and risk + * confusion if the program gets run more than once; it would also be + * hard to make the program warp the clock precisely n hours) or + * compile in the timezone information into the kernel. Bad, bad.... + * + * - TYT, 1992-01-01 + * + * The best thing to do is to keep the CMOS clock in universal time (UTC) + * as real UNIX machines always do it. This avoids all headaches about + * daylight saving times and warping kernel clocks. + */ +inline static void warp_clock(void) +{ + write_lock_irq(&xtime_lock); + vxtime_lock(); + xtime.tv_sec += sys_tz.tz_minuteswest * 60; + vxtime_unlock(); + write_unlock_irq(&xtime_lock); +} + +/* + * In case for some reason the CMOS clock has not already been running + * in UTC, but in some local time: The first time we set the timezone, + * we will warp the clock so that it is ticking UTC time instead of + * local time. Presumably, if someone is setting the timezone then we + * are running in an environment where the programs understand about + * timezones. This should be done at boot time in the /etc/rc script, + * as soon as possible, so that the clock can be set right. Otherwise, + * various programs will get confused when the clock gets warped. + */ + +int do_sys_settimeofday(struct timeval *tv, struct timezone *tz) +{ + static int firsttime = 1; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (tz) { + /* SMP safe, global irq locking makes it work. */ + sys_tz = *tz; + if (firsttime) { + firsttime = 0; + if (!tv) + warp_clock(); + } + } + if (tv) + { + /* SMP safe, again the code in arch/foo/time.c should + * globally block out interrupts when it runs. + */ + do_settimeofday(tv); + } + return 0; +} + +asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz) +{ + struct timeval new_tv; + struct timezone new_tz; + int ret; + + struct timeval old_tv; + + if (tv) { + if (copy_from_user(&new_tv, tv, sizeof(*tv))) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&new_tz, tz, sizeof(*tz))) + return -EFAULT; + } + + if (tv) { + do_gettimeofday(&old_tv); + } + ret = do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL); + if (tv) { + check_print_time_change(old_tv, new_tv); + } + + return ret; +} + +long pps_offset; /* pps time offset (us) */ +long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ + +long pps_freq; /* frequency offset (scaled ppm) */ +long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ + +long pps_valid = PPS_VALID; /* pps signal watchdog counter */ + +int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ + +long pps_jitcnt; /* jitter limit exceeded */ +long pps_calcnt; /* calibration intervals */ +long pps_errcnt; /* calibration errors */ +long pps_stbcnt; /* stability limit exceeded */ + +/* hook for a loadable hardpps kernel module */ +void (*hardpps_ptr)(struct timeval *); + +/* adjtimex mainly allows reading (and writing, if superuser) of + * kernel time-keeping variables. used by xntpd. + */ +int do_adjtimex(struct timex *txc) +{ + long ltemp, mtemp, save_adjust; + int result; + + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + + /* Now we validate the data before disabling interrupts */ + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + /* singleshot must not be used with any other mode bits */ + if (txc->modes != ADJ_OFFSET_SINGLESHOT) + return -EINVAL; + + if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) + /* adjustment Offset limited to +- .512 seconds */ + if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) + return -EINVAL; + + /* if the quartz is off by more than 10% something is VERY wrong ! */ + if (txc->modes & ADJ_TICK) + if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) + return -EINVAL; + + write_lock_irq(&xtime_lock); + result = time_state; /* mostly `TIME_OK' */ + + /* Save for later - semantics of adjtime is to return old value */ + save_adjust = time_adjust; + +#if 0 /* STA_CLOCKERR is never set yet */ + time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ +#endif + /* If there are input parameters, then process them */ + if (txc->modes) + { + if (txc->modes & ADJ_STATUS) /* only set allowed bits */ + time_status = (txc->status & ~STA_RONLY) | + (time_status & STA_RONLY); + + if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ + if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { + result = -EINVAL; + goto leave; + } + time_freq = txc->freq - pps_freq; + } + + if (txc->modes & ADJ_MAXERROR) { + if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_maxerror = txc->maxerror; + } + + if (txc->modes & ADJ_ESTERROR) { + if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_esterror = txc->esterror; + } + + if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ + if (txc->constant < 0) { /* NTP v4 uses values > 6 */ + result = -EINVAL; + goto leave; + } + time_constant = txc->constant; + } + + if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ + if (txc->modes == ADJ_OFFSET_SINGLESHOT) { + /* adjtime() is independent from ntp_adjtime() */ + time_adjust = txc->offset; + } + else if ( time_status & (STA_PLL | STA_PPSTIME) ) { + ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == + (STA_PPSTIME | STA_PPSSIGNAL) ? + pps_offset : txc->offset; + + /* + * Scale the phase adjustment and + * clamp to the operating range. + */ + if (ltemp > MAXPHASE) + time_offset = MAXPHASE << SHIFT_UPDATE; + else if (ltemp < -MAXPHASE) + time_offset = -(MAXPHASE << SHIFT_UPDATE); + else + time_offset = ltemp << SHIFT_UPDATE; + + /* + * Select whether the frequency is to be controlled + * and in which mode (PLL or FLL). Clamp to the operating + * range. Ugly multiply/divide should be replaced someday. + */ + + if (time_status & STA_FREQHOLD || time_reftime == 0) + time_reftime = xtime.tv_sec; + mtemp = xtime.tv_sec - time_reftime; + time_reftime = xtime.tv_sec; + if (time_status & STA_FLL) { + if (mtemp >= MINSEC) { + ltemp = (time_offset / mtemp) << (SHIFT_USEC - + SHIFT_UPDATE); + if (ltemp < 0) + time_freq -= -ltemp >> SHIFT_KH; + else + time_freq += ltemp >> SHIFT_KH; + } else /* calibration interval too short (p. 12) */ + result = TIME_ERROR; + } else { /* PLL mode */ + if (mtemp < MAXSEC) { + ltemp *= mtemp; + if (ltemp < 0) + time_freq -= -ltemp >> (time_constant + + time_constant + + SHIFT_KF - SHIFT_USEC); + else + time_freq += ltemp >> (time_constant + + time_constant + + SHIFT_KF - SHIFT_USEC); + } else /* calibration interval too long (p. 12) */ + result = TIME_ERROR; + } + if (time_freq > time_tolerance) + time_freq = time_tolerance; + else if (time_freq < -time_tolerance) + time_freq = -time_tolerance; + } /* STA_PLL || STA_PPSTIME */ + } /* txc->modes & ADJ_OFFSET */ + if (txc->modes & ADJ_TICK) { + /* if the quartz is off by more than 10% something is + VERY wrong ! */ + if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) { + result = -EINVAL; + goto leave; + } + tick = txc->tick; + } + } /* txc->modes */ +leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 + || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 + && (time_status & STA_PPSSIGNAL) == 0) + /* p. 24, (b) */ + || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) + == (STA_PPSTIME|STA_PPSJITTER)) + /* p. 24, (c) */ + || ((time_status & STA_PPSFREQ) != 0 + && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) + /* p. 24, (d) */ + result = TIME_ERROR; + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + txc->offset = save_adjust; + else { + if (time_offset < 0) + txc->offset = -(-time_offset >> SHIFT_UPDATE); + else + txc->offset = time_offset >> SHIFT_UPDATE; + } + txc->freq = time_freq + pps_freq; + txc->maxerror = time_maxerror; + txc->esterror = time_esterror; + txc->status = time_status; + txc->constant = time_constant; + txc->precision = time_precision; + txc->tolerance = time_tolerance; + txc->tick = tick; + txc->ppsfreq = pps_freq; + txc->jitter = pps_jitter >> PPS_AVG; + txc->shift = pps_shift; + txc->stabil = pps_stabil; + txc->jitcnt = pps_jitcnt; + txc->calcnt = pps_calcnt; + txc->errcnt = pps_errcnt; + txc->stbcnt = pps_stbcnt; + write_unlock_irq(&xtime_lock); + do_gettimeofday(&txc->time); + return(result); +} + +asmlinkage long sys_adjtimex(struct timex *txc_p) +{ + struct timex txc; /* Local copy of parameter */ + int ret; + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if(copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + ret = do_adjtimex(&txc); + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} diff --git a/uClinux-2.4.31-uc0/kernel/timer.c b/uClinux-2.4.31-uc0/kernel/timer.c new file mode 100644 index 0000000..c14c251 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/timer.c @@ -0,0 +1,896 @@ +/* + * linux/kernel/timer.c + * + * Kernel internal timers, kernel timekeeping, basic process system calls + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + * Copyright (C) 1998 Andrea Arcangeli + * 1999-03-10 Improved NTP compatibility by Ulrich Windl + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/timex.h> +#include <linux/delay.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> + +#include <asm/uaccess.h> +#include <asm/io.h> + +/* + * Timekeeping variables + */ + +long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ + +/* The current time */ +struct timeval xtime __attribute__ ((aligned (16))); + +/* Don't completely fail for HZ > 500. */ +int tickadj = 500/HZ ? : 1; /* microsecs */ + +DECLARE_TASK_QUEUE(tq_timer); +DECLARE_TASK_QUEUE(tq_immediate); + +/* + * phase-lock loop variables + */ +/* TIME_ERROR prevents overwriting the CMOS clock */ +int time_state = TIME_OK; /* clock synchronization status */ +int time_status = STA_UNSYNC; /* clock status bits */ +long time_offset; /* time adjustment (us) */ +long time_constant = 2; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ +long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ +long time_phase; /* phase offset (scaled us) */ +long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; + /* frequency offset (scaled ppm)*/ +long time_adj; /* tick adjust (scaled 1 / HZ) */ +long time_reftime; /* time at last adjustment (s) */ + +long time_adjust; +long time_adjust_step; + +unsigned long event; + +extern int do_setitimer(int, struct itimerval *, struct itimerval *); + +unsigned long volatile jiffies; + +unsigned int * prof_buffer; +unsigned long prof_len; +unsigned long prof_shift; + +/* + * Event timer code + */ +#define TVN_BITS 6 +#define TVR_BITS 8 +#define TVN_SIZE (1 << TVN_BITS) +#define TVR_SIZE (1 << TVR_BITS) +#define TVN_MASK (TVN_SIZE - 1) +#define TVR_MASK (TVR_SIZE - 1) + +struct timer_vec { + int index; + struct list_head vec[TVN_SIZE]; +}; + +struct timer_vec_root { + int index; + struct list_head vec[TVR_SIZE]; +}; + +static struct timer_vec tv5; +static struct timer_vec tv4; +static struct timer_vec tv3; +static struct timer_vec tv2; +static struct timer_vec_root tv1; + +static struct timer_vec * const tvecs[] = { + (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 +}; + +static struct list_head * run_timer_list_running; + +#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) + +void init_timervecs (void) +{ + int i; + + for (i = 0; i < TVN_SIZE; i++) { + INIT_LIST_HEAD(tv5.vec + i); + INIT_LIST_HEAD(tv4.vec + i); + INIT_LIST_HEAD(tv3.vec + i); + INIT_LIST_HEAD(tv2.vec + i); + } + for (i = 0; i < TVR_SIZE; i++) + INIT_LIST_HEAD(tv1.vec + i); +} + +static unsigned long timer_jiffies; + +static inline void internal_add_timer(struct timer_list *timer) +{ + /* + * must be cli-ed when calling this + */ + unsigned long expires = timer->expires; + unsigned long idx = expires - timer_jiffies; + struct list_head * vec; + + if (run_timer_list_running) + vec = run_timer_list_running; + else if (idx < TVR_SIZE) { + int i = expires & TVR_MASK; + vec = tv1.vec + i; + } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { + int i = (expires >> TVR_BITS) & TVN_MASK; + vec = tv2.vec + i; + } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; + vec = tv3.vec + i; + } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; + vec = tv4.vec + i; + } else if ((signed long) idx < 0) { + /* can happen if you add a timer with expires == jiffies, + * or you set a timer to go off in the past + */ + vec = tv1.vec + tv1.index; + } else if (idx <= 0xffffffffUL) { + int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + vec = tv5.vec + i; + } else { + /* Can only get here on architectures with 64-bit jiffies */ + INIT_LIST_HEAD(&timer->list); + return; + } + /* + * Timers are FIFO! + */ + list_add(&timer->list, vec->prev); +} + +/* Initialize both explicitly - let's try to have them in the same cache line */ +spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_SMP +volatile struct timer_list * volatile running_timer; +#define timer_enter(t) do { running_timer = t; mb(); } while (0) +#define timer_exit() do { running_timer = NULL; } while (0) +#define timer_is_running(t) (running_timer == t) +#define timer_synchronize(t) while (timer_is_running(t)) barrier() +#else +#define timer_enter(t) do { } while (0) +#define timer_exit() do { } while (0) +#endif + +void add_timer(struct timer_list *timer) +{ + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + if (timer_pending(timer)) + goto bug; + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return; +bug: + spin_unlock_irqrestore(&timerlist_lock, flags); + printk("bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); +} + +static inline int detach_timer (struct timer_list *timer) +{ + if (!timer_pending(timer)) + return 0; + list_del(&timer->list); + return 1; +} + +int mod_timer(struct timer_list *timer, unsigned long expires) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + timer->expires = expires; + ret = detach_timer(timer); + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} + +int del_timer(struct timer_list * timer) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + ret = detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} + +#ifdef CONFIG_SMP +void sync_timers(void) +{ + spin_unlock_wait(&global_bh_lock); +} + +/* + * SMP specific function to delete periodic timer. + * Caller must disable by some means restarting the timer + * for new. Upon exit the timer is not queued and handler is not running + * on any CPU. It returns number of times, which timer was deleted + * (for reference counting). + */ + +int del_timer_sync(struct timer_list * timer) +{ + int ret = 0; + + for (;;) { + unsigned long flags; + int running; + + spin_lock_irqsave(&timerlist_lock, flags); + ret += detach_timer(timer); + timer->list.next = timer->list.prev = 0; + running = timer_is_running(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + + if (!running) + break; + + timer_synchronize(timer); + } + + return ret; +} +#endif + + +static inline void cascade_timers(struct timer_vec *tv) +{ + /* cascade all the timers from tv up one level */ + struct list_head *head, *curr, *next; + + head = tv->vec + tv->index; + curr = head->next; + /* + * We are removing _all_ timers from the list, so we don't have to + * detach them individually, just clear the list afterwards. + */ + while (curr != head) { + struct timer_list *tmp; + + tmp = list_entry(curr, struct timer_list, list); + next = curr->next; + list_del(curr); // not needed + internal_add_timer(tmp); + curr = next; + } + INIT_LIST_HEAD(head); + tv->index = (tv->index + 1) & TVN_MASK; +} + +static inline void run_timer_list(void) +{ + spin_lock_irq(&timerlist_lock); + while ((long)(jiffies - timer_jiffies) >= 0) { + LIST_HEAD(queued); + struct list_head *head, *curr; + if (!tv1.index) { + int n = 1; + do { + cascade_timers(tvecs[n]); + } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + } + run_timer_list_running = &queued; +repeat: + head = tv1.vec + tv1.index; + curr = head->next; + if (curr != head) { + struct timer_list *timer; + void (*fn)(unsigned long); + unsigned long data; + + timer = list_entry(curr, struct timer_list, list); + fn = timer->function; + data= timer->data; + + detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + timer_enter(timer); + spin_unlock_irq(&timerlist_lock); + fn(data); + spin_lock_irq(&timerlist_lock); + timer_exit(); + goto repeat; + } + run_timer_list_running = NULL; + ++timer_jiffies; + tv1.index = (tv1.index + 1) & TVR_MASK; + + curr = queued.next; + while (curr != &queued) { + struct timer_list *timer; + + timer = list_entry(curr, struct timer_list, list); + curr = curr->next; + internal_add_timer(timer); + } + } + spin_unlock_irq(&timerlist_lock); +} + +spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; + +void tqueue_bh(void) +{ + run_task_queue(&tq_timer); +} + +void immediate_bh(void) +{ + run_task_queue(&tq_immediate); +} + +/* + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + */ +static void second_overflow(void) +{ + long ltemp; + + /* Bump the maxerror field */ + time_maxerror += time_tolerance >> SHIFT_USEC; + if ( time_maxerror > NTP_PHASE_LIMIT ) { + time_maxerror = NTP_PHASE_LIMIT; + time_status |= STA_UNSYNC; + } + + /* + * Leap second processing. If in leap-insert state at + * the end of the day, the system clock is set back one + * second; if in leap-delete state, the system clock is + * set ahead one second. The microtime() routine or + * external clock driver will insure that reported time + * is always monotonic. The ugly divides should be + * replaced. + */ + switch (time_state) { + + case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; + break; + + case TIME_INS: + if (xtime.tv_sec % 86400 == 0) { + xtime.tv_sec--; + time_state = TIME_OOP; + printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); + } + break; + + case TIME_DEL: + if ((xtime.tv_sec + 1) % 86400 == 0) { + xtime.tv_sec++; + time_state = TIME_WAIT; + printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); + } + break; + + case TIME_OOP: + time_state = TIME_WAIT; + break; + + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + } + + /* + * Compute the phase adjustment for the next second. In + * PLL mode, the offset is reduced by a fixed factor + * times the time constant. In FLL mode the offset is + * used directly. In either mode, the maximum phase + * adjustment for each second is clamped so as to spread + * the adjustment over not more than the number of + * seconds between updates. + */ + if (time_offset < 0) { + ltemp = -time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset += ltemp; + time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } else { + ltemp = time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset -= ltemp; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } + + /* + * Compute the frequency estimate and additional phase + * adjustment due to frequency error for the next + * second. When the PPS signal is engaged, gnaw on the + * watchdog counter and update the frequency computed by + * the pll and the PPS signal. + */ + pps_valid++; + if (pps_valid == PPS_VALID) { /* PPS signal lost */ + pps_jitter = MAXTIME; + pps_stabil = MAXFREQ; + time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | + STA_PPSWANDER | STA_PPSERROR); + } + ltemp = time_freq + pps_freq; + if (ltemp < 0) + time_adj -= -ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + else + time_adj += ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + +#if HZ == 100 + /* Compensate for (HZ==100) != (1 << SHIFT_HZ). + * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) + */ + if (time_adj < 0) + time_adj -= (-time_adj >> 2) + (-time_adj >> 5); + else + time_adj += (time_adj >> 2) + (time_adj >> 5); +#endif +} + +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) +{ + if ( (time_adjust_step = time_adjust) != 0 ) { + /* We are doing an adjtime thing. + * + * Prepare time_adjust_step to be within bounds. + * Note that a positive time_adjust means we want the clock + * to run faster. + * + * Limit the amount of the step to be in the range + * -tickadj .. +tickadj + */ + if (time_adjust > tickadj) + time_adjust_step = tickadj; + else if (time_adjust < -tickadj) + time_adjust_step = -tickadj; + + /* Reduce by this step the amount of time left */ + time_adjust -= time_adjust_step; + } +#if 0 +//#if defined(CONFIG_MTD_NETtel) && !defined(CONFIG_TIMEPEG) +{ + extern void *cpu_mmcrp; + register long ms; + /* Work around for glitching problem of SC520 - Rev A1 silicon */ + if (cpu_mmcrp) { + /* Use SC520 millisecond timer */ + ms = *((volatile unsigned short *) (cpu_mmcrp + 0xc60)); + xtime.tv_usec += (ms * 1000) + time_adjust_step; + } else { + xtime.tv_usec += tick + time_adjust_step; + } +} +#else + xtime.tv_usec += tick + time_adjust_step; +#endif + /* + * Advance the phase, once it gets to one microsecond, then + * advance the tick more. + */ + time_phase += time_adj; + if (time_phase <= -FINEUSEC) { + long ltemp = -time_phase >> SHIFT_SCALE; + time_phase += ltemp << SHIFT_SCALE; + xtime.tv_usec -= ltemp; + } + else if (time_phase >= FINEUSEC) { + long ltemp = time_phase >> SHIFT_SCALE; + time_phase -= ltemp << SHIFT_SCALE; + xtime.tv_usec += ltemp; + } +} + +/* + * Using a loop looks inefficient, but "ticks" is + * usually just one (we shouldn't be losing ticks, + * we're doing this this way mainly for interrupt + * latency reasons, not because we think we'll + * have lots of lost timer ticks + */ +static void update_wall_time(unsigned long ticks) +{ + do { + ticks--; + update_wall_time_one_tick(); + } while (ticks); + + if (xtime.tv_usec >= 1000000) { + xtime.tv_usec -= 1000000; + xtime.tv_sec++; + second_overflow(); + } +} + +static inline void do_process_times(struct task_struct *p, + unsigned long user, unsigned long system) +{ + unsigned long psecs; + + psecs = (p->times.tms_utime += user); + psecs += (p->times.tms_stime += system); + if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { + /* Send SIGXCPU every second.. */ + if (!(psecs % HZ)) + send_sig(SIGXCPU, p, 1); + /* and SIGKILL when we go over max.. */ + if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) + send_sig(SIGKILL, p, 1); + } +} + +static inline void do_it_virt(struct task_struct * p, unsigned long ticks) +{ + unsigned long it_virt = p->it_virt_value; + + if (it_virt) { + it_virt -= ticks; + if (!it_virt) { + it_virt = p->it_virt_incr; + send_sig(SIGVTALRM, p, 1); + } + p->it_virt_value = it_virt; + } +} + +static inline void do_it_prof(struct task_struct *p) +{ + unsigned long it_prof = p->it_prof_value; + + if (it_prof) { + if (--it_prof == 0) { + it_prof = p->it_prof_incr; + send_sig(SIGPROF, p, 1); + } + p->it_prof_value = it_prof; + } +} + +void update_one_process(struct task_struct *p, unsigned long user, + unsigned long system, int cpu) +{ + p->per_cpu_utime[cpu] += user; + p->per_cpu_stime[cpu] += system; + do_process_times(p, user, system); + do_it_virt(p, user); + do_it_prof(p); +} + +/* + * Called from the timer interrupt handler to charge one tick to the current + * process. user_tick is 1 if the tick is user time, 0 for system. + */ +void update_process_times(int user_tick) +{ + struct task_struct *p = current; + int cpu = smp_processor_id(), system = user_tick ^ 1; + + update_one_process(p, user_tick, system, cpu); + if (p->pid) { + if (--p->counter <= 0) { + p->counter = 0; + /* + * SCHED_FIFO is priority preemption, so this is + * not the place to decide whether to reschedule a + * SCHED_FIFO task or not - Bhavesh Davda + */ + if (p->policy != SCHED_FIFO) { + p->need_resched = 1; + } + } + if (p->nice > 0) + kstat.per_cpu_nice[cpu] += user_tick; + else + kstat.per_cpu_user[cpu] += user_tick; + kstat.per_cpu_system[cpu] += system; + } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1) + kstat.per_cpu_system[cpu] += system; +} + +/* + * Nr of active tasks - counted in fixed-point numbers + */ +static unsigned long count_active_tasks(void) +{ + struct task_struct *p; + unsigned long nr = 0; + + read_lock(&tasklist_lock); + for_each_task(p) { + if ((p->state == TASK_RUNNING || + (p->state & TASK_UNINTERRUPTIBLE))) + nr += FIXED_1; + } + read_unlock(&tasklist_lock); + return nr; +} + +/* + * Hmm.. Changed this, as the GNU make sources (load.c) seems to + * imply that avenrun[] is the standard name for this kind of thing. + * Nothing else seems to be standardized: the fractional size etc + * all seem to differ on different machines. + */ +unsigned long avenrun[3]; + +static inline void calc_load(unsigned long ticks) +{ + unsigned long active_tasks; /* fixed-point */ + static int count = LOAD_FREQ; + + count -= ticks; + if (count < 0) { + count += LOAD_FREQ; + active_tasks = count_active_tasks(); + CALC_LOAD(avenrun[0], EXP_1, active_tasks); + CALC_LOAD(avenrun[1], EXP_5, active_tasks); + CALC_LOAD(avenrun[2], EXP_15, active_tasks); + } +} + +/* jiffies at the most recent update of wall time */ +unsigned long wall_jiffies; + +/* + * This spinlock protect us from races in SMP while playing with xtime. -arca + */ +rwlock_t xtime_lock = RW_LOCK_UNLOCKED; + +static inline void update_times(void) +{ + unsigned long ticks; + + /* + * update_times() is run from the raw timer_bh handler so we + * just know that the irqs are locally enabled and so we don't + * need to save/restore the flags of the local CPU here. -arca + */ + write_lock_irq(&xtime_lock); + vxtime_lock(); + + ticks = jiffies - wall_jiffies; + if (ticks) { + wall_jiffies += ticks; + update_wall_time(ticks); + } + vxtime_unlock(); + write_unlock_irq(&xtime_lock); + calc_load(ticks); +} + +void timer_bh(void) +{ + update_times(); + run_timer_list(); +} + +void do_timer(struct pt_regs *regs) +{ + (*(unsigned long *)&jiffies)++; +#ifndef CONFIG_SMP + /* SMP process accounting uses the local APIC timer */ + + update_process_times(user_mode(regs)); +#endif + mark_bh(TIMER_BH); + if (TQ_ACTIVE(tq_timer)) + mark_bh(TQUEUE_BH); +#ifdef CONFIG_SNAPDOG + snapdog_service(regs); +#endif +} + +#if !defined(__alpha__) && !defined(__ia64__) + +/* + * For backwards compatibility? This can be done in libc so Alpha + * and all newer ports shouldn't need it. + */ +asmlinkage unsigned long sys_alarm(unsigned int seconds) +{ + struct itimerval it_new, it_old; + unsigned int oldalarm; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + do_setitimer(ITIMER_REAL, &it_new, &it_old); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return 0 if we have an alarm pending.. */ + /* And we'd better return too much than too little anyway */ + if (it_old.it_value.tv_usec) + oldalarm++; + return oldalarm; +} + +#endif + +#ifndef __alpha__ + +/* + * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this + * should be moved into arch/i386 instead? + */ + +/** + * sys_getpid - return the thread group id of the current process + * + * Note, despite the name, this returns the tgid not the pid. The tgid and + * the pid are identical unless CLONE_THREAD was specified on clone() in + * which case the tgid is the same in all threads of the same group. + * + * This is SMP safe as current->tgid does not change. + */ +asmlinkage long sys_getpid(void) +{ + return current->tgid; +} + +/* + * This is not strictly SMP safe: p_opptr could change + * from under us. However, rather than getting any lock + * we can use an optimistic algorithm: get the parent + * pid, and go back and check that the parent is still + * the same. If it has changed (which is extremely unlikely + * indeed), we just try again.. + * + * NOTE! This depends on the fact that even if we _do_ + * get an old value of "parent", we can happily dereference + * the pointer: we just can't necessarily trust the result + * until we know that the parent pointer is valid. + * + * The "mb()" macro is a memory barrier - a synchronizing + * event. It also makes sure that gcc doesn't optimize + * away the necessary memory references.. The barrier doesn't + * have to have all that strong semantics: on x86 we don't + * really require a synchronizing instruction, for example. + * The barrier is more important for code generation than + * for any real memory ordering semantics (even if there is + * a small window for a race, using the old pointer is + * harmless for a while). + */ +asmlinkage long sys_getppid(void) +{ + int pid; + struct task_struct * me = current; + struct task_struct * parent; + + parent = me->p_opptr; + for (;;) { + pid = parent->pid; +#if CONFIG_SMP +{ + struct task_struct *old = parent; + mb(); + parent = me->p_opptr; + if (old != parent) + continue; +} +#endif + break; + } + return pid; +} + +asmlinkage long sys_getuid(void) +{ + /* Only we change this so SMP safe */ + return current->uid; +} + +asmlinkage long sys_geteuid(void) +{ + /* Only we change this so SMP safe */ + return current->euid; +} + +asmlinkage long sys_getgid(void) +{ + /* Only we change this so SMP safe */ + return current->gid; +} + +asmlinkage long sys_getegid(void) +{ + /* Only we change this so SMP safe */ + return current->egid; +} + +#endif + +/* Thread ID - the internal kernel "pid" */ +asmlinkage long sys_gettid(void) +{ + return current->pid; +} + +asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) +{ + struct timespec t; + unsigned long expire; + + if(copy_from_user(&t, rqtp, sizeof(struct timespec))) + return -EFAULT; + + if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) + return -EINVAL; + + + if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && + current->policy != SCHED_OTHER) + { + /* + * Short delay requests up to 2 ms will be handled with + * high precision by a busy wait for all real-time processes. + * + * Its important on SMP not to do this holding locks. + */ + udelay((t.tv_nsec + 999) / 1000); + return 0; + } + + expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); + + current->state = TASK_INTERRUPTIBLE; + expire = schedule_timeout(expire); + + if (expire) { + if (rmtp) { + jiffies_to_timespec(expire, &t); + if (copy_to_user(rmtp, &t, sizeof(struct timespec))) + return -EFAULT; + } + return -EINTR; + } + return 0; +} + diff --git a/uClinux-2.4.31-uc0/kernel/uid16.c b/uClinux-2.4.31-uc0/kernel/uid16.c new file mode 100644 index 0000000..f76e4fd --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/uid16.c @@ -0,0 +1,163 @@ +/* + * Wrapper functions for 16bit uid back compatibility. All nicely tied + * together in the faint hope we can take the out in five years time. + */ + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/mman.h> +#include <linux/smp_lock.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/prctl.h> +#include <linux/init.h> +#include <linux/highuid.h> + +#include <asm/uaccess.h> + +extern asmlinkage long sys_chown(const char *, uid_t,gid_t); +extern asmlinkage long sys_lchown(const char *, uid_t,gid_t); +extern asmlinkage long sys_fchown(unsigned int, uid_t,gid_t); +extern asmlinkage long sys_setregid(gid_t, gid_t); +extern asmlinkage long sys_setgid(gid_t); +extern asmlinkage long sys_setreuid(uid_t, uid_t); +extern asmlinkage long sys_setuid(uid_t); +extern asmlinkage long sys_setresuid(uid_t, uid_t, uid_t); +extern asmlinkage long sys_setresgid(gid_t, gid_t, gid_t); +extern asmlinkage long sys_setfsuid(uid_t); +extern asmlinkage long sys_setfsgid(gid_t); + +asmlinkage long sys_chown16(const char * filename, old_uid_t user, old_gid_t group) +{ + return sys_chown(filename, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys_lchown16(const char * filename, old_uid_t user, old_gid_t group) +{ + return sys_lchown(filename, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) +{ + return sys_fchown(fd, low2highuid(user), low2highgid(group)); +} + +asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) +{ + return sys_setregid(low2highgid(rgid), low2highgid(egid)); +} + +asmlinkage long sys_setgid16(old_gid_t gid) +{ + return sys_setgid((gid_t)gid); +} + +asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) +{ + return sys_setreuid(low2highuid(ruid), low2highuid(euid)); +} + +asmlinkage long sys_setuid16(old_uid_t uid) +{ + return sys_setuid((uid_t)uid); +} + +asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) +{ + return sys_setresuid(low2highuid(ruid), low2highuid(euid), + low2highuid(suid)); +} + +asmlinkage long sys_getresuid16(old_uid_t *ruid, old_uid_t *euid, old_uid_t *suid) +{ + int retval; + + if (!(retval = put_user(high2lowuid(current->uid), ruid)) && + !(retval = put_user(high2lowuid(current->euid), euid))) + retval = put_user(high2lowuid(current->suid), suid); + + return retval; +} + +asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) +{ + return sys_setresgid(low2highgid(rgid), low2highgid(egid), + low2highgid(sgid)); +} + +asmlinkage long sys_getresgid16(old_gid_t *rgid, old_gid_t *egid, old_gid_t *sgid) +{ + int retval; + + if (!(retval = put_user(high2lowgid(current->gid), rgid)) && + !(retval = put_user(high2lowgid(current->egid), egid))) + retval = put_user(high2lowgid(current->sgid), sgid); + + return retval; +} + +asmlinkage long sys_setfsuid16(old_uid_t uid) +{ + return sys_setfsuid((uid_t)uid); +} + +asmlinkage long sys_setfsgid16(old_gid_t gid) +{ + return sys_setfsgid((gid_t)gid); +} + +asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t *grouplist) +{ + old_gid_t groups[NGROUPS]; + int i,j; + + if (gidsetsize < 0) + return -EINVAL; + i = current->ngroups; + if (gidsetsize) { + if (i > gidsetsize) + return -EINVAL; + for(j=0;j<i;j++) + groups[j] = current->groups[j]; + if (copy_to_user(grouplist, groups, sizeof(old_gid_t)*i)) + return -EFAULT; + } + return i; +} + +asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t *grouplist) +{ + old_gid_t groups[NGROUPS]; + int i; + + if (!capable(CAP_SETGID)) + return -EPERM; + if ((unsigned) gidsetsize > NGROUPS) + return -EINVAL; + if (copy_from_user(groups, grouplist, gidsetsize * sizeof(old_gid_t))) + return -EFAULT; + for (i = 0 ; i < gidsetsize ; i++) + current->groups[i] = (gid_t)groups[i]; + current->ngroups = gidsetsize; + return 0; +} + +asmlinkage long sys_getuid16(void) +{ + return high2lowuid(current->uid); +} + +asmlinkage long sys_geteuid16(void) +{ + return high2lowuid(current->euid); +} + +asmlinkage long sys_getgid16(void) +{ + return high2lowgid(current->gid); +} + +asmlinkage long sys_getegid16(void) +{ + return high2lowgid(current->egid); +} diff --git a/uClinux-2.4.31-uc0/kernel/user.c b/uClinux-2.4.31-uc0/kernel/user.c new file mode 100644 index 0000000..ddf9c11 --- /dev/null +++ b/uClinux-2.4.31-uc0/kernel/user.c @@ -0,0 +1,154 @@ +/* + * The "user cache". + * + * (C) Copyright 1991-2000 Linus Torvalds + * + * We have a per-user structure to keep track of how many + * processes, files etc the user has claimed, in order to be + * able to have per-user limits for system resources. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/slab.h> + +/* + * UID task count cache, to get fast user lookup in "alloc_uid" + * when changing user ID's (ie setuid() and friends). + */ +#define UIDHASH_BITS 8 +#define UIDHASH_SZ (1 << UIDHASH_BITS) +#define UIDHASH_MASK (UIDHASH_SZ - 1) +#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) ^ uid) & UIDHASH_MASK) +#define uidhashentry(uid) (uidhash_table + __uidhashfn(uid)) + +static kmem_cache_t *uid_cachep; +static struct user_struct *uidhash_table[UIDHASH_SZ]; +static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED; + +struct user_struct root_user = { + __count: ATOMIC_INIT(1), + processes: ATOMIC_INIT(1), + files: ATOMIC_INIT(0) +}; + +/* + * These routines must be called with the uidhash spinlock held! + */ +static inline void uid_hash_insert(struct user_struct *up, struct user_struct **hashent) +{ + struct user_struct *next = *hashent; + + up->next = next; + if (next) + next->pprev = &up->next; + up->pprev = hashent; + *hashent = up; +} + +static inline void uid_hash_remove(struct user_struct *up) +{ + struct user_struct *next = up->next; + struct user_struct **pprev = up->pprev; + + if (next) + next->pprev = pprev; + *pprev = next; +} + +static inline struct user_struct *uid_hash_find(uid_t uid, struct user_struct **hashent) +{ + struct user_struct *next; + + next = *hashent; + for (;;) { + struct user_struct *up = next; + if (next) { + next = up->next; + if (up->uid != uid) + continue; + atomic_inc(&up->__count); + } + return up; + } +} + +void free_uid(struct user_struct *up) +{ + if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { + uid_hash_remove(up); + kmem_cache_free(uid_cachep, up); + spin_unlock(&uidhash_lock); + } +} + +struct user_struct * alloc_uid(uid_t uid) +{ + struct user_struct **hashent = uidhashentry(uid); + struct user_struct *up; + + spin_lock(&uidhash_lock); + up = uid_hash_find(uid, hashent); + spin_unlock(&uidhash_lock); + + if (!up) { + struct user_struct *new; + + new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); + if (!new) + return NULL; + new->uid = uid; + atomic_set(&new->__count, 1); + atomic_set(&new->processes, 0); + atomic_set(&new->files, 0); + + /* + * Before adding this, check whether we raced + * on adding the same user already.. + */ + spin_lock(&uidhash_lock); + up = uid_hash_find(uid, hashent); + if (up) { + kmem_cache_free(uid_cachep, new); + } else { + uid_hash_insert(new, hashent); + up = new; + } + spin_unlock(&uidhash_lock); + + } + return up; +} + +void switch_uid(struct user_struct *new_user) +{ + struct user_struct *old_user; + + /* What if a process setreuid()'s and this brings the + * new uid over his NPROC rlimit? We can check this now + * cheaply with the new uid cache, so if it matters + * we should be checking for it. -DaveM + */ + old_user = current->user; + atomic_inc(&new_user->__count); + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + current->user = new_user; + free_uid(old_user); +} + + +static int __init uid_cache_init(void) +{ + uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), + 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!uid_cachep) + panic("Cannot create uid taskcount SLAB cache\n"); + + /* Insert the root user immediately - init already runs with this */ + uid_hash_insert(&root_user, uidhashentry(0)); + return 0; +} + +module_init(uid_cache_init); |