Minix2.0/src/mm/exec.c
/* This file handles the EXEC system call. It performs the work as follows:
* - see if the permissions allow the file to be executed
* - read the header and extract the sizes
* - fetch the initial args and environment from the user space
* - allocate the memory for the new process
* - copy the initial stack from MM to the process
* - read in the text and data segments and copy to the process
* - take care of setuid and setgid bits
* - fix up 'mproc' table
* - tell kernel about EXEC
* - save offset to initial argc (for ps)
*
* The entry points into this file are:
* do_exec: perform the EXEC system call
* find_share: find a process whose text segment can be shared
*/
#include "mm.h"
#include <sys/stat.h>
#include <minix/callnr.h>
#include <a.out.h>
#include <signal.h>
#include <string.h>
#include "mproc.h"
#include "param.h"
FORWARD _PROTOTYPE( void load_seg, (int fd, int seg, vir_bytes seg_bytes) );
FORWARD _PROTOTYPE( int new_mem, (struct mproc *sh_mp, vir_bytes text_bytes,
vir_bytes data_bytes, vir_bytes bss_bytes,
vir_bytes stk_bytes, phys_bytes tot_bytes) );
FORWARD _PROTOTYPE( void patch_ptr, (char stack [ARG_MAX ], vir_bytes base) );
FORWARD _PROTOTYPE( int read_header, (int fd, int *ft, vir_bytes *text_bytes,
vir_bytes *data_bytes, vir_bytes *bss_bytes,
phys_bytes *tot_bytes, long *sym_bytes, vir_clicks sc,
vir_bytes *pc) );
#if (SHADOWING == 1)
FORWARD _PROTOTYPE( int relocate, (int fd, unsigned char *buf) );
#endif
/*===========================================================================*
* do_exec *
*===========================================================================*/
PUBLIC int do_exec()
{
/* Perform the execve(name, argv, envp) call. The user library builds a
* complete stack image, including pointers, args, environ, etc. The stack
* is copied to a buffer inside MM, and then to the new core image.
*/
register struct mproc *rmp;
struct mproc *sh_mp;
int m, r, fd, ft, sn;
static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */
static char name_buf[PATH_MAX]; /* the name of the file to exec */
char *new_sp, *basename;
vir_bytes src, dst, text_bytes, data_bytes, bss_bytes, stk_bytes, vsp;
phys_bytes tot_bytes; /* total space for program, including gap */
long sym_bytes;
vir_clicks sc;
struct stat s_buf;
vir_bytes pc;
/* Do some validity checks. */
rmp = mp;
stk_bytes = (vir_bytes) stack_bytes;
if (stk_bytes > ARG_MAX) return(ENOMEM); /* stack too big */
if (exec_len <= 0 || exec_len > PATH_MAX) return(EINVAL);
/* Get the exec file name and see if the file is executable. */
src = (vir_bytes) exec_name;
dst = (vir_bytes) name_buf;
r = sys_copy(who, D, (phys_bytes) src,
MM_PROC_NR, D, (phys_bytes) dst, (phys_bytes) exec_len);
if (r != OK) return(r); /* file name not in user data segment */
tell_fs(CHDIR, who, FALSE, 0); /* switch to the user's FS environ. */
fd = allowed(name_buf, &s_buf, X_BIT); /* is file executable? */
if (fd < 0) return(fd); /* file was not executable */
/* Read the file header and extract the segment sizes. */
sc = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
m = read_header(fd, &ft, &text_bytes, &data_bytes, &bss_bytes,
&tot_bytes, &sym_bytes, sc, &pc);
if (m < 0) {
close(fd); /* something wrong with header */
return(ENOEXEC);
}
/* Fetch the stack from the user before destroying the old core image. */
src = (vir_bytes) stack_ptr;
dst = (vir_bytes) mbuf;
r = sys_copy(who, D, (phys_bytes) src,
MM_PROC_NR, D, (phys_bytes) dst, (phys_bytes)stk_bytes);
if (r != OK) {
close(fd); /* can't fetch stack (e.g. bad virtual addr) */
return(EACCES);
}
/* Can the process' text be shared with that of one already running? */
sh_mp = find_share(rmp, s_buf.st_ino, s_buf.st_dev, s_buf.st_ctime);
/* Allocate new memory and release old memory. Fix map and tell kernel. */
r = new_mem(sh_mp, text_bytes, data_bytes, bss_bytes, stk_bytes, tot_bytes);
if (r != OK) {
close(fd); /* insufficient core or program too big */
return(r);
}
/* Save file identification to allow it to be shared. */
rmp->mp_ino = s_buf.st_ino;
rmp->mp_dev = s_buf.st_dev;
rmp->mp_ctime = s_buf.st_ctime;
/* Patch up stack and copy it from MM to new core image. */
vsp = (vir_bytes) rmp->mp_seg[S].mem_vir << CLICK_SHIFT;
vsp += (vir_bytes) rmp->mp_seg[S].mem_len << CLICK_SHIFT;
vsp -= stk_bytes;
patch_ptr(mbuf, vsp);
src = (vir_bytes) mbuf;
r = sys_copy(MM_PROC_NR, D, (phys_bytes) src,
who, D, (phys_bytes) vsp, (phys_bytes)stk_bytes);
if (r != OK) panic("do_exec stack copy err", NO_NUM);
/* Read in text and data segments. */
if (sh_mp != NULL) {
lseek(fd, (off_t) text_bytes, SEEK_CUR); /* shared: skip text */
} else {
load_seg(fd, T, text_bytes);
}
load_seg(fd, D, data_bytes);
#if (SHADOWING == 1)
if (lseek(fd, (off_t)sym_bytes, SEEK_CUR) == (off_t) -1) ; /* error */
if (relocate(fd, (unsigned char *)mbuf) < 0) ; /* error */
pc += (vir_bytes) rp->mp_seg[T].mem_vir << CLICK_SHIFT;
#endif
close(fd); /* don't need exec file any more */
/* Take care of setuid/setgid bits. */
if ((rmp->mp_flags & TRACED) == 0) { /* suppress if tracing */
if (s_buf.st_mode & I_SET_UID_BIT) {
rmp->mp_effuid = s_buf.st_uid;
tell_fs(SETUID,who, (int)rmp->mp_realuid, (int)rmp->mp_effuid);
}
if (s_buf.st_mode & I_SET_GID_BIT) {
rmp->mp_effgid = s_buf.st_gid;
tell_fs(SETGID,who, (int)rmp->mp_realgid, (int)rmp->mp_effgid);
}
}
/* Save offset to initial argc (for ps) */
rmp->mp_procargs = vsp;
/* Fix 'mproc' fields, tell kernel that exec is done, reset caught sigs. */
for (sn = 1; sn <= _NSIG; sn++) {
if (sigismember(&rmp->mp_catch, sn)) {
sigdelset(&rmp->mp_catch, sn);
rmp->mp_sigact[sn].sa_handler = SIG_DFL;
sigemptyset(&rmp->mp_sigact[sn].sa_mask);
}
}
rmp->mp_flags &= ~SEPARATE; /* turn off SEPARATE bit */
rmp->mp_flags |= ft; /* turn it on for separate I & D files */
new_sp = (char *) vsp;
tell_fs(EXEC, who, 0, 0); /* allow FS to handle FD_CLOEXEC files */
/* System will save command line for debugging, ps(1) output, etc. */
basename = strrchr(name_buf, '/');
if (basename == NULL) basename = name_buf; else basename++;
sys_exec(who, new_sp, rmp->mp_flags & TRACED, basename, pc);
return(OK);
}
/*===========================================================================*
* read_header *
*===========================================================================*/
PRIVATE int read_header(fd, ft, text_bytes, data_bytes, bss_bytes,
tot_bytes, sym_bytes, sc, pc)
int fd; /* file descriptor for reading exec file */
int *ft; /* place to return ft number */
vir_bytes *text_bytes; /* place to return text size */
vir_bytes *data_bytes; /* place to return initialized data size */
vir_bytes *bss_bytes; /* place to return bss size */
phys_bytes *tot_bytes; /* place to return total size */
long *sym_bytes; /* place to return symbol table size */
vir_clicks sc; /* stack size in clicks */
vir_bytes *pc; /* program entry point (initial PC) */
{
/* Read the header and extract the text, data, bss and total sizes from it. */
int m, ct;
vir_clicks tc, dc, s_vir, dvir;
phys_clicks totc;
struct exec hdr; /* a.out header is read in here */
/* Read the header and check the magic number. The standard MINIX header
* is defined in <a.out.h>. It consists of 8 chars followed by 6 longs.
* Then come 4 more longs that are not used here.
* Byte 0: magic number 0x01
* Byte 1: magic number 0x03
* Byte 2: normal = 0x10 (not checked, 0 is OK), separate I/D = 0x20
* Byte 3: CPU type, Intel 16 bit = 0x04, Intel 32 bit = 0x10,
* Motorola = 0x0B, Sun SPARC = 0x17
* Byte 4: Header length = 0x20
* Bytes 5-7 are not used.
*
* Now come the 6 longs
* Bytes 8-11: size of text segments in bytes
* Bytes 12-15: size of initialized data segment in bytes
* Bytes 16-19: size of bss in bytes
* Bytes 20-23: program entry point
* Bytes 24-27: total memory allocated to program (text, data + stack)
* Bytes 28-31: size of symbol table in bytes
* The longs are represented in a machine dependent order,
* little-endian on the 8088, big-endian on the 68000.
* The header is followed directly by the text and data segments, and the
* symbol table (if any). The sizes are given in the header. Only the
* text and data segments are copied into memory by exec. The header is
* used here only. The symbol table is for the benefit of a debugger and
* is ignored here.
*/
if (read(fd, (char *) &hdr, A_MINHDR) != A_MINHDR) return(ENOEXEC);
/* Check magic number, cpu type, and flags. */
if (BADMAG(hdr)) return(ENOEXEC);
#if (CHIP == INTEL && _WORD_SIZE == 2)
if (hdr.a_cpu != A_I8086) return(ENOEXEC);
#endif
#if (CHIP == INTEL && _WORD_SIZE == 4)
if (hdr.a_cpu != A_I80386) return(ENOEXEC);
#endif
if ((hdr.a_flags & ~(A_NSYM | A_EXEC | A_SEP)) != 0) return(ENOEXEC);
*ft = ( (hdr.a_flags & A_SEP) ? SEPARATE : 0); /* separate I & D or not */
/* Get text and data sizes. */
*text_bytes = (vir_bytes) hdr.a_text; /* text size in bytes */
*data_bytes = (vir_bytes) hdr.a_data; /* data size in bytes */
*bss_bytes = (vir_bytes) hdr.a_bss; /* bss size in bytes */
*tot_bytes = hdr.a_total; /* total bytes to allocate for prog */
*sym_bytes = hdr.a_syms; /* symbol table size in bytes */
if (*tot_bytes == 0) return(ENOEXEC);
if (*ft != SEPARATE) {
#if (SHADOWING == 0)
/* If I & D space is not separated, it is all considered data. Text=0*/
*data_bytes += *text_bytes;
*text_bytes = 0;
#else
/*
* Treating text as data increases the shadowing overhead.
* Under the assumption that programs DO NOT MODIFY TEXT
* we can share the text between father and child processes.
* This is similar to the UNIX V7 -n option of ld(1).
* However, for MINIX the linker did not provide alignment
* to click boundaries, so an incomplete text click at the end
* must be treated as data.
* Correct tot_bytes, since it excludes the text segment.
*/
*data_bytes += *text_bytes;
*text_bytes = (*text_bytes >> CLICK_SHIFT) << CLICK_SHIFT;
*data_bytes -= *text_bytes;
*tot_bytes -= *text_bytes;
#endif
}
*pc = hdr.a_entry; /* initial address to start execution */
/* Check to see if segment sizes are feasible. */
tc = ((unsigned long) *text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
dc = (*data_bytes + *bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
totc = (*tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
if (dc >= totc) return(ENOEXEC); /* stack must be at least 1 click */
dvir = (*ft == SEPARATE ? 0 : tc);
s_vir = dvir + (totc - sc);
m = size_ok(*ft, tc, dc, sc, dvir, s_vir);
ct = hdr.a_hdrlen & BYTE; /* header length */
if (ct > A_MINHDR) lseek(fd, (off_t) ct, SEEK_SET); /* skip unused hdr */
return(m);
}
/*===========================================================================*
* new_mem *
*===========================================================================*/
PRIVATE int new_mem(sh_mp, text_bytes, data_bytes,bss_bytes,stk_bytes,tot_bytes)
struct mproc *sh_mp; /* text can be shared with this process */
vir_bytes text_bytes; /* text segment size in bytes */
vir_bytes data_bytes; /* size of initialized data in bytes */
vir_bytes bss_bytes; /* size of bss in bytes */
vir_bytes stk_bytes; /* size of initial stack segment in bytes */
phys_bytes tot_bytes; /* total memory to allocate, including gap */
{
/* Allocate new memory and release the old memory. Change the map and report
* the new map to the kernel. Zero the new core image's bss, gap and stack.
*/
register struct mproc *rmp;
vir_clicks text_clicks, data_clicks, gap_clicks, stack_clicks, tot_clicks;
phys_clicks new_base;
#if (SHADOWING == 1)
phys_clicks base, size;
#else
static char zero[1024]; /* used to zero bss */
phys_bytes bytes, base, count, bss_offset;
#endif
/* No need to allocate text if it can be shared. */
if (sh_mp != NULL) text_bytes = 0;
/* Acquire the new memory. Each of the 4 parts: text, (data+bss), gap,
* and stack occupies an integral number of clicks, starting at click
* boundary. The data and bss parts are run together with no space.
*/
text_clicks = ((unsigned long) text_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
data_clicks = (data_bytes + bss_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
stack_clicks = (stk_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
tot_clicks = (tot_bytes + CLICK_SIZE - 1) >> CLICK_SHIFT;
gap_clicks = tot_clicks - data_clicks - stack_clicks;
if ( (int) gap_clicks < 0) return(ENOMEM);
/* Check to see if there is a hole big enough. If so, we can risk first
* releasing the old core image before allocating the new one, since we
* know it will succeed. If there is not enough, return failure.
*/
if (text_clicks + tot_clicks > max_hole()) return(EAGAIN);
/* There is enough memory for the new core image. Release the old one. */
rmp = mp;
#if (SHADOWING == 0)
if (find_share(rmp, rmp->mp_ino, rmp->mp_dev, rmp->mp_ctime) == NULL) {
/* No other process shares the text segment, so free it. */
free_mem(rmp->mp_seg[T].mem_phys, rmp->mp_seg[T].mem_len);
}
/* Free the data and stack segments. */
free_mem(rmp->mp_seg[D].mem_phys,
rmp->mp_seg[S].mem_vir + rmp->mp_seg[S].mem_len - rmp->mp_seg[D].mem_vir);
#endif
/* We have now passed the point of no return. The old core image has been
* forever lost. The call must go through now. Set up and report new map.
*/
new_base = alloc_mem(text_clicks + tot_clicks); /* new core image */
if (new_base == NO_MEM) panic("MM hole list is inconsistent", NO_NUM);
if (sh_mp != NULL) {
/* Share the text segment. */
rmp->mp_seg[T] = sh_mp->mp_seg[T];
} else {
rmp->mp_seg[T].mem_phys = new_base;
rmp->mp_seg[T].mem_vir = 0;
rmp->mp_seg[T].mem_len = text_clicks;
}
rmp->mp_seg[D].mem_phys = new_base + text_clicks;
rmp->mp_seg[D].mem_vir = 0;
rmp->mp_seg[D].mem_len = data_clicks;
rmp->mp_seg[S].mem_phys = rmp->mp_seg[D].mem_phys + data_clicks + gap_clicks;
rmp->mp_seg[S].mem_vir = rmp->mp_seg[D].mem_vir + data_clicks + gap_clicks;
rmp->mp_seg[S].mem_len = stack_clicks;
#if (CHIP == M68000)
#if (SHADOWING == 0)
rmp->mp_seg[T].mem_vir = 0;
rmp->mp_seg[D].mem_vir = rmp->mp_seg[T].mem_len;
rmp->mp_seg[S].mem_vir = rmp->mp_seg[D].mem_vir + rmp->mp_seg[D].mem_len + gap_clicks;
#else
rmp->mp_seg[T].mem_vir = rmp->mp_seg[T].mem_phys;
rmp->mp_seg[D].mem_vir = rmp->mp_seg[D].mem_phys;
rmp->mp_seg[S].mem_vir = rmp->mp_seg[S].mem_phys;
#endif
#endif
#if (SHADOWING == 0)
sys_newmap(who, rmp->mp_seg); /* report new map to the kernel */
/* Zero the bss, gap, and stack segment. */
bytes = (phys_bytes)(data_clicks + gap_clicks + stack_clicks) << CLICK_SHIFT;
base = (phys_bytes) rmp->mp_seg[D].mem_phys << CLICK_SHIFT;
bss_offset = (data_bytes >> CLICK_SHIFT) << CLICK_SHIFT;
base += bss_offset;
bytes -= bss_offset;
while (bytes > 0) {
count = MIN(bytes, (phys_bytes) sizeof(zero));
if (sys_copy(MM_PROC_NR, D, (phys_bytes) zero,
ABS, 0, base, count) != OK) {
panic("new_mem can't zero", NO_NUM);
}
base += count;
bytes -= count;
}
#endif
#if (SHADOWING == 1)
sys_fresh(who, rmp->mp_seg, (phys_clicks)(data_bytes >> CLICK_SHIFT),
&base, &size);
free_mem(base, size);
#endif
return(OK);
}
/*===========================================================================*
* patch_ptr *
*===========================================================================*/
PRIVATE void patch_ptr(stack, base)
char stack[ARG_MAX]; /* pointer to stack image within MM */
vir_bytes base; /* virtual address of stack base inside user */
{
/* When doing an exec(name, argv, envp) call, the user builds up a stack
* image with arg and env pointers relative to the start of the stack. Now
* these pointers must be relocated, since the stack is not positioned at
* address 0 in the user's address space.
*/
char **ap, flag;
vir_bytes v;
flag = 0; /* counts number of 0-pointers seen */
ap = (char **) stack; /* points initially to 'nargs' */
ap++; /* now points to argv[0] */
while (flag < 2) {
if (ap >= (char **) &stack[ARG_MAX]) return; /* too bad */
if (*ap != NIL_PTR) {
v = (vir_bytes) *ap; /* v is relative pointer */
v += base; /* relocate it */
*ap = (char *) v; /* put it back */
} else {
flag++;
}
ap++;
}
}
/*===========================================================================*
* load_seg *
*===========================================================================*/
PRIVATE void load_seg(fd, seg, seg_bytes)
int fd; /* file descriptor to read from */
int seg; /* T or D */
vir_bytes seg_bytes; /* how big is the segment */
{
/* Read in text or data from the exec file and copy to the new core image.
* This procedure is a little bit tricky. The logical way to load a segment
* would be to read it block by block and copy each block to the user space
* one at a time. This is too slow, so we do something dirty here, namely
* send the user space and virtual address to the file system in the upper
* 10 bits of the file descriptor, and pass it the user virtual address
* instead of a MM address. The file system extracts these parameters when
* gets a read call from the memory manager, which is the only process that
* is permitted to use this trick. The file system then copies the whole
* segment directly to user space, bypassing MM completely.
*/
int new_fd, bytes;
char *ubuf_ptr;
new_fd = (who << 8) | (seg << 6) | fd;
ubuf_ptr = (char *) ((vir_bytes)mp->mp_seg[seg].mem_vir << CLICK_SHIFT);
while (seg_bytes != 0) {
bytes = (INT_MAX / BLOCK_SIZE) * BLOCK_SIZE;
if (seg_bytes < bytes)
bytes = (int)seg_bytes;
if (read(new_fd, ubuf_ptr, bytes) != bytes)
break; /* error */
ubuf_ptr += bytes;
seg_bytes -= bytes;
}
}
/*===========================================================================*
* find_share *
*===========================================================================*/
PUBLIC struct mproc *find_share(mp_ign, ino, dev, ctime)
struct mproc *mp_ign; /* process that should not be looked at */
ino_t ino; /* parameters that uniquely identify a file */
dev_t dev;
time_t ctime;
{
/* Look for a process that is the file <ino, dev, ctime> in execution. Don't
* accidentally "find" mp_ign, because it is the process on whose behalf this
* call is made.
*/
struct mproc *sh_mp;
for (sh_mp = &mproc[INIT_PROC_NR]; sh_mp < &mproc[NR_PROCS]; sh_mp++) {
if ((sh_mp->mp_flags & (IN_USE | HANGING | SEPARATE))
!= (IN_USE | SEPARATE)) continue;
if (sh_mp == mp_ign) continue;
if (sh_mp->mp_ino != ino) continue;
if (sh_mp->mp_dev != dev) continue;
if (sh_mp->mp_ctime != ctime) continue;
return sh_mp;
}
return(NULL);
}
#if (SHADOWING == 1)
/*===========================================================================*
* relocate *
*===========================================================================*/
PRIVATE int relocate(fd, buf)
int fd; /* file descriptor to read from */
unsigned char *buf; /* borrowed from do_exec() */
{
register int n;
register unsigned char *p, c;
register phys_bytes off;
register phys_bytes adr;
/* Read in relocation info from the exec file and relocate.
* Relocation info is in GEMDOS format. Only longs can be relocated.
*
* The GEMDOS format starts with a long L: the offset to the
* beginning of text for the first long to be relocated.
* If L==0 then no relocations have to be made.
*
* The long is followed by zero or more bytes. Each byte B is
* processed separately, in one of the following ways:
*
* B==0:
* end of relocation
* B==1:
* no relocation, but add 254 to the current offset
* B==0bWWWWWWW0:
* B is added to the current offset and the long addressed
* is relocated. Note that 00000010 means 1 word distance.
* B==0bXXXXXXX1:
* illegal
*/
off = (phys_bytes)mp->mp_seg[T].mem_phys << CLICK_SHIFT;
p = buf;
n = read(fd, (char *)p, ARG_MAX);
if (n < sizeof(long)) return(-1); /* error */
if (*((long *)p) == 0) return(0); /* ok */
adr = off + *((long *)p);
n -= sizeof(long);
p += sizeof(long);
*((long *)adr) += off;
while (1) { /* once per relocation byte */
if (--n < 0) {
p = buf;
n = read(fd, (char *)p, ARG_MAX);
if (--n < 0)
return(-1); /* error */
}
c = *p++;
if (c == 1)
adr += 254;
else if (c == 0)
return(0); /* ok */
else if (c & 1)
return(-1); /* error */
else {
adr += c;
*((long *)adr) += off;
}
}
}
#endif