进程的创建
6.8.1 进程的创建
新的进程通过克隆旧的程序(当前进程)而建立。fork() 和 clone()(对于线程)系统调用可用来建立新的进程。这两个系统调用结束时,内核在系统的物理内存中为新的进程分配新的 task_struct 结构,同时为新进程要使用的堆栈分配物理页。Linux 还会为新的进程分配新的进程标识符。然后,新 task_struct 结构的地址保存在链表中,而旧进程的 task_struct 结构内容被复制到新进程的 task_struct 结构中。
在克隆进程时,Linux 允许两个进程共享相同的资源。可共享的资源包括文件、信号处理程序和虚拟内存等(通过继承)。当某个资源被共享时,该资源的引用计数值会增加 1,从而只有两个进程均终止时,内核才会释放这些资源。图 6.23 说明了父进程和子进程共享打开的文件。
图 6.23父进程和子进程共享打开的文件
系统对进程虚拟内存的克隆过程则更加巧妙些。新的 vm_area_struct 结构、新进程自己的 mm_struct 结构以及新进程的页表必须在一开始就准备好,但这时并不复制任何虚拟内存。如果旧进程的某些虚拟内存在物理内存中,而有些在交换文件中,那么虚拟内存的复制将会非常困难和费时。实际上,Linux 采用了称为写时复制的技术,也就是说,只有当两个进程中的任意一个向虚拟内存中写入数据时才复制相应的虚拟内存;而没有写入的任何内存页均可以在两个进程之间共享。代码页实际总是可以共享的。
此外,内核线程是调用kernel_thread()函数创建的,而kernel_thread()在内核态调用了clone()系统调用。内核线程通常没有用户地址空间,即p->mm = NULL,它总是直接访问内核地址空间。
不管是fork()还是clone()系统调用,最终都调用了内核中的do_fork(),其源代码在kernel/fork.c:
/* * Ok, this is the main fork-routine. It copies the system process * information (task[nr]) and sets up the necessary registers. It also * copies the data segment in its entirety. The "stack_start" and * "stack_top" arguments are simply passed along to the platform * specific copy_thread() routine. Most platforms ignore stack_top. * For an example that's using stack_top, see * arch/ia64/kernel/process.c. */int do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size){ int retval; struct task_struct *p; struct completion vfork; retval = -EPERM; /* * CLONE_PID is only allowed for the initial SMP swapper * calls */ if (clone_flags & CLONE_PID) { if (current->pid) goto fork_out; } retval = -ENOMEM; p = alloc_task_struct(); if (!p) goto fork_out; *p = *current; retval = -EAGAIN; /* * Check if we are over our maximum process limit, but be sure to * exclude root. This is needed to make it possible for login and * friends to set the per-user process limit to something lower * than the amount of processes root is running. -- Rik */ if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur && !capable(CAP_SYS_ADMIN) !capable(CAP_SYS_RESOURCE)) goto bad_fork_free; atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); /* * Counter increases are protected by * the kernel lock so nr_threads can't * increase under us (but it may decrease). */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; get_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); p->pid = get_pid(clone_flags); p->run_list.next = NULL; p->run_list.prev = NULL; p->p_cptr = NULL; init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } spin_lock_init(&p->alloc_lock); p->sigpending = 0; init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; p->times.tms_utime = p->times.tms_stime = 0; p->times.tms_cutime = p->times.tms_cstime = 0; #ifdef CONFIG_SMP { int i; p->cpus_runnable = ~0UL; p->processor = current->processor; /* ?? should we just memset this ?? */ for(i = 0; i < smp_num_cpus; i++) p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; spin_lock_init(&p->sigmask_lock); } #endif p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; INIT_LIST_HEAD(&p->local_pages); retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; if (copy_fs(clone_flags, p)) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_sighand; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_mm; p->semundo = NULL; /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->swappable = 1; p->exit_signal = clone_flags & CSIGNAL; p->pdeath_signal = 0; /* * "share" dynamic priority between parent and child, thus the * total amount of dynamic priorities in the system doesnt change, * more scheduling fairness. This is only important in the first * timeslice, on the long run the scheduling behaviour is unchanged. */ p->counter = (current->counter + 1) >> 1; current->counter >>= 1; if (!current->counter) current->need_resched = 1; /* * Ok, add it to the run-queues and make it * visible to the rest of the system. * * Let it rip! */ retval = p->pid; p->tgid = retval; INIT_LIST_HEAD(&p->thread_group); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT and CLONE_THREAD re-use the old parent */ p->p_opptr = current->p_opptr; p->p_pptr = current->p_pptr; if (!(clone_flags & (CLONE_PARENT | CLONE_THREAD))) { p->p_opptr = current; if (!(p->ptrace & PT_PTRACED)) p->p_pptr = current; } if (clone_flags & CLONE_THREAD) { p->tgid = current->tgid; list_add(&p->thread_group, ¤t->thread_group); } SET_LINKS(p); hash_pid(p); nr_threads++; write_unlock_irq(&tasklist_lock); if (p->ptrace & PT_PTRACED) send_sig(SIGSTOP, p, 1); wake_up_process(p); /* do this last */ ++total_forks; if (clone_flags & CLONE_VFORK) wait_for_completion(&vfork); fork_out: return retval; bad_fork_cleanup_mm: exit_mm(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup: put_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_DEC_USE_COUNT(p->binfmt->module); bad_fork_cleanup_count: atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task_struct(p); goto fork_out; }