Coherent4.2.10/coh.386/exec.c
/* $Header: /ker/coh.386/RCS/exec.c,v 2.7 93/10/29 00:55:04 nigel Exp Locker: nigel $ */
/*
* Processing of the exec () system call.
*
* $Log: exec.c,v $
* Revision 2.7 93/10/29 00:55:04 nigel
* R98 (aka 4.2 Beta) prior to removing System Global memory
*
* Revision 2.6 93/09/02 18:04:39 nigel
* Use new flag stuff, fix interrupt fubar
*
* Revision 2.4 93/08/19 03:26:24 nigel
* Nigel's r83 (Stylistic cleanup)
*
* Revision 2.2 93/07/26 15:22:36 nigel
* Nigel's R80
*/
#include <common/_gregset.h>
#include <common/_tricks.h>
#include <kernel/proc_lib.h>
#include <kernel/cred_lib.h>
#include <kernel/sig_lib.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/cred.h>
#include <signal.h>
#include <fcntl.h>
#include <stddef.h>
#define _KERNEL 1
#include <kernel/reg.h>
#include <sys/uproc.h>
#include <sys/mmu.h>
#include <sys/acct.h>
#include <sys/buf.h>
#include <canon.h>
#include <sys/con.h>
#include <sys/ino.h>
#include <sys/inode.h>
#include <a.out.h>
#include <l.out.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/seg.h>
#include <sys/fd.h>
#include <sys/types.h>
/*
* Round section and segment start address to nearest lower click boundary.
*/
static void
xecrnd(xsp)
struct xecseg * xsp;
{
int diff;
diff = xsp->fbase & (NBPC - 1);
xsp->mbase -= diff;
xsp->fbase -= diff;
xsp->size += diff;
}
static SEG *
exsread (sp, ip, xsp, shrdSz)
SEG *sp;
struct inode *ip;
struct xecseg *xsp;
int shrdSz;
{
int sa, so;
IO io;
sa = xsp->fbase;
so = xsp->mbase & (NBPC - 1);
io.io_seg = IOPHY;
io.io_seek = sa;
io.io.pbase = MAPIO (sp->s_vmem, so);
io.io_flag = 0;
if (shrdSz) { /* shared l.out? */
/* Load SHRD. */
io.io_ioc = shrdSz;
sp->s_lrefc ++;
iread (ip, & io);
sp->s_lrefc --;
if ((io.io_ioc = xsp->size - shrdSz) != 0) {
/* Advance file and RAM offsets past SHRD. */
sa += shrdSz;
so += shrdSz;
/* Advance RAM offset to next 16-byte boundary. */
so = (so + 15) & ~ 15; /* round up */
/* Load PRVD. */
io.io_seg = IOPHY;
io.io_seek = sa;
io.io.pbase = MAPIO (sp->s_vmem, so);
io.io_flag = 0;
sp->s_lrefc ++;
iread (ip, & io);
sp->s_lrefc --;
}
} else { /* NOT shared l.out */
io.io_ioc = xsp->size;
sp->s_lrefc ++;
iread (ip, & io);
sp->s_lrefc --;
}
/*
* NIGEL: This perturbs me. This check seems to really belong
* somewhere at the top-level, and/or from testing the return values
* from the read calls. Why isn't the residual from the read tested?
*/
if (curr_signal_pending ())
set_user_error (EINTR);
return get_user_error () == 0 ? sp : NULL;
}
struct adata { /* Storage for arg and env data */
int np; /* Number of pointers in vector */
int nc; /* Number of characters in strings */
};
/*
* Given a pointer to a list of arguments, a pointer to an argument count
* and a pointer to a byte count, count the #characters/#strings
* in the arguments
*/
static int
excount(usrvp, adp, wdin)
caddr_t usrvp;
struct adata *adp;
int wdin;
{
caddr_t usrcp;
int c;
unsigned nb;
unsigned na;
int wdmask;
wdmask = -1;
if (wdin == sizeof (short))
wdmask = (unsigned short) wdmask;
na = nb = 0;
if (usrvp != NULL) {
for (;;) {
usrcp = (caddr_t) (getupd (usrvp) & wdmask);
usrvp += wdin;
if (get_user_error ())
return 0;
if (usrcp == NULL)
break;
na ++;
for (;;) {
c = getubd (usrcp ++);
if (get_user_error ())
return 0;
nb ++;
if (c == '\0')
break;
}
}
}
adp->np = na;
adp->nc = nb;
return 1;
}
static int
exarg (out, in)
caddr_t in, out;
{
char c;
caddr_t init_in;
init_in = in;
do {
c = getubd (in ++);
dmaout (sizeof (char), out ++, & c);
} while (c);
return in - init_in;
}
/*
* Given a pointer to a list of arguments and a pointer to a list of
* environments, return a stack with the arguments and environments on it.
*/
static SEG *
exstack(xhp, argp, envp, wdin)
struct xechdr *xhp;
caddr_t argp, envp;
int wdin;
{
register SEG *sp; /* Stack segment pointer */
struct sdata { /* To keep segment pointers */
caddr_t vp; /* Argv[i], envp[i] pointer */
caddr_t cp; /* Argv[i][j], envp[i][j] pointer */
} stk;
struct adata arg, env;
int chrsz, vecsz, stksz, wdmask, wdout, stkoff, stktop;
int stkenvp;
int i;
/* Validate and evaluate size of args and envs */
if (! excount (argp, & arg, wdin) || ! excount (envp, & env, wdin))
return NULL;
/* Calculate stack size and allocate it */
chrsz = __ROUND_UP_TO_MULTIPLE (arg.nc + env.nc, sizeof (int));
vecsz = (arg.np + 1 + env.np + 1) * sizeof (long);
stksz = __ROUND_UP_TO_MULTIPLE (vecsz + chrsz + ISTSIZE, NBPC);
if (stksz > MADSIZE || (sp = salloc (stksz, SFDOWN)) == NULL) {
set_user_error (E2BIG);
return NULL;
}
/* Set up target stack */
stktop = xhp->segs [SISTACK].mbase;
stk.cp = (caddr_t) stktop - chrsz;
stk.vp = (caddr_t) stktop - chrsz - vecsz;
stkoff = MAPIO (sp->s_vmem, stksz - stktop);
u.u_argc = arg.np;
u.u_argp = stk.vp;
wdmask = -1;
if (wdin == sizeof (short))
wdmask = (unsigned short) wdmask;
switch (stktop) {
case ISP_386:
wdout = sizeof (long);
xhp->initsp = (unsigned long) stk.vp - sizeof (long);
dmaout (sizeof (long), xhp->initsp + stkoff, & arg.np);
break;
case ISP_286:
wdout = sizeof (short);
xhp->initsp = (unsigned long) stk.vp - 3 * sizeof (short);
stkenvp = (unsigned long) stk.vp + (arg.np + 1) * sizeof (short);
dmaout (sizeof (short), xhp->initsp + stkoff, & arg.np);
dmaout (sizeof (short), xhp->initsp + sizeof (short) + stkoff,
& stk.vp);
dmaout (sizeof (short), xhp->initsp + 2 * sizeof (short) +
stkoff, & stkenvp);
break;
default:
ASSERT ("impossible switch selector" == NULL);
return NULL;
}
/* Arguments */
for (i = 0 ; i < arg.np ; i ++, argp += wdin, stk.vp += wdout) {
dmaout (wdout, stk.vp + stkoff, & stk.cp);
stk.cp += exarg (stk.cp + stkoff, getupd (argp) & wdmask);
}
/* skip null word after arguments */
stk.vp += wdout;
/* Environments */
for (i = 0; i < env.np ; i ++, envp += wdin, stk.vp += wdout) {
dmaout (wdout, stk.vp + stkoff, & stk.cp);
stk.cp += exarg (stk.cp + stkoff, getupd (envp) & wdmask);
}
return sp;
}
/*
* Set up the first process, a small program which will exec
* the init program.
*/
extern char aicode []; /* actual init data */
extern char aicode_end [];
void
eveinit ()
{
SEG * sp;
size_t size = aicode_end - aicode;
/* static struct xechdr xecinit[NUSEG+1] = { {0},{0},{0},{ISP_386}}; */
/*
* Allocate, record, initialize code segment, make it executable.
*/
if ((sp = salloc (__ROUND_UP_TO_MULTIPLE (size, NBPC), 0)) == NULL)
cmn_err (CE_PANIC, "eveinit ()");
SELF->p_segl [SIPDATA].sr_segp = sp;
/*
* Start process.
*/
u.u_argp = 0;
if (sproto (0) == 0)
cmn_err (CE_PANIC, "eveinit ()");
segload ();
setspace (SEL_386_UD);
kucopy (aicode, 0, size);
}
/*
* Open a file, make sure it is l.out, coff, or v86 as well as
* executable.
*
* "xhp" points to a cleared xechdr supplied by the caller.
* "np" is the file name.
* "shrds" points to an int that will be written by exlopen().
* *shrds is set nonzero only for shared l.out.
*
* If file is COFF, there may be multiple text (or data?) sections.
* Use "xlist" linked structure to keep track of variably many sections
* after the first text and data sections.
*
* return NULL if failure, else return inode pointer for the file.
*/
struct inode *
exlopen (xhp, np, shrds, xlist, dirent)
struct xechdr *xhp;
char *np;
int *shrds;
struct xecnode ** xlist;
struct direct * dirent;
{
int i, nscn, hdrsize;
buf_t * bp;
unsigned short magic;
struct ldheader head;
struct filehdr fhead;
struct aouthdr ahead;
struct scnhdr scnhdr;
/*
* Make sure the file is executable and read the header. Note that
* this is about the only case of ftoi () with mode 'r' that actually
* uses the resulting filename information.
*/
if (ftoi (np, 'r', IOUSR, NULL, dirent, SELF->p_credp))
return NULL;
if ((u.u_cdiri->i_mode & IFMT) != IFREG ||
! iaccess (u.u_cdiri, IPE, SELF->p_credp)) {
idetach (u.u_cdiri);
return NULL;
}
#if 0
/*
* The check here for IPE is redundant, so the IFMT check was moved
* up.
*/
if ((ip->i_mode & (IPE | (IPE << 3) | (IPE << 6))) == 0 ||
(ip->i_mode & IFMT) != IFREG) {
set_user_error (EACCES);
idetach (ip);
return NULL;
}
#endif
if ((bp = vread (u.u_cdiri, (daddr_t) 0)) == NULL) {
set_user_error (ENOEXEC);
idetach (u.u_cdiri);
return NULL;
}
/*
* Copy everything we need from the l.out header and check magic
* number and machine type.
*/
* shrds = 0;
magic = * (unsigned short *) bp->b_vaddr;
canint (magic);
switch (magic) {
case L_MAGIC: /* Coherent 286 format */
memcpy (& head, bp->b_vaddr, sizeof (struct ldheader));
canint (head.l_machine);
if (head.l_machine != M_8086)
goto bad;
for (i = 0 ; i < NXSEG ; i ++)
cansize (head.l_ssize [i]);
canint (head.l_flag);
canvaddr (head.l_entry);
/*
* If a shared and separated image
* has stuff in segments that makes it impossible
* to share, give an error immediately so that we don't
* lose the parent.
*/
head.l_flag &= LF_SHR | LF_SEP | LF_KER;
if ((head.l_flag & LF_SEP) == 0 ||
(head.l_flag & LF_KER) != 0 ||
head.l_ssize [L_PRVI] || head.l_ssize [L_BSSI])
goto bad;
xhp->magic = XMAGIC (I286MAGIC,I_MAGIC);
xhp->entry = head.l_entry;
xhp->segs [SISTEXT].fbase = sizeof (struct ldheader);
xhp->segs [SISTEXT].mbase = NBPS;
xhp->segs [SISTEXT].size = head.l_ssize [L_SHRI];
xhp->segs [SIPDATA].fbase = sizeof (struct ldheader) +
xhp->segs [SISTEXT].size;
xhp->segs [SIPDATA].mbase = 0;
xhp->segs [SIPDATA].size = head.l_ssize [L_SHRD] +
head.l_ssize [L_PRVD];
if (head.l_flag & LF_SHR)
* shrds = head.l_ssize [L_SHRD];
xhp->segs [SIBSS].fbase = 0;
xhp->segs [SIBSS].mbase = xhp->segs [SIPDATA].size;
xhp->segs [SIBSS].size = head.l_ssize [L_BSSD];
xhp->segs [SISTACK].mbase = ISP_286; /* size 0, fbase 0 */
brelease (bp);
return u.u_cdiri;
case I386MAGIC: /* ... COFF */
memcpy (& fhead, bp->b_vaddr, sizeof (struct filehdr));
hdrsize = sizeof (ahead) + sizeof (fhead);
if (fhead.f_opthdr != sizeof (ahead) ||
(fhead.f_flags & F_EXEC) == 0 ||
fhead.f_nscns * sizeof (scnhdr) > BSIZE)
goto bad;
memcpy (& ahead, bp->b_vaddr + sizeof (fhead),
sizeof (ahead));
if (ahead.magic != Z_MAGIC)
goto bad;
xhp->magic = XMAGIC (I386MAGIC, ahead.magic);
xhp->entry = ahead.entry;
for (i = 0 ; i < fhead.f_nscns ; i ++) {
memcpy (& scnhdr,
bp->b_vaddr + hdrsize + sizeof (scnhdr) * i,
sizeof (scnhdr));
switch ((int) (scnhdr.s_flags)) {
case STYP_INFO:
continue;
case STYP_BSS:
nscn = SIBSS;
break;
case STYP_TEXT:
nscn = SISTEXT;
break;
case STYP_DATA:
nscn = SIPDATA;
break;
default:
goto bad;
}
/* Text/data shouldn't collide with stack. */
if ((unsigned) scnhdr.s_vaddr >= ISP_386)
goto bad;
/* Have we already seen a segment of this type? */
if (xhp->segs [nscn].size) {
struct xecnode * tmp;
if (nscn != SISTEXT)
goto bad;
/* insert new node at head of "xlist" */
tmp = (struct xecnode *)
kalloc (sizeof (struct xecnode));
if (tmp == NULL) {
cmn_err (CE_WARN,
"can't kalloc(xecnode)");
goto bad;
}
tmp->xn = * xlist;
* xlist = tmp;
tmp->segtype = nscn;
tmp->xseg.mbase = scnhdr.s_vaddr;
tmp->xseg.fbase = scnhdr.s_scnptr;
tmp->xseg.size = scnhdr.s_size;
} else {
xhp->segs [nscn].mbase = scnhdr.s_vaddr;
xhp->segs [nscn].fbase = scnhdr.s_scnptr;
xhp->segs [nscn].size = scnhdr.s_size;
}
}
/* Text and data segments must both be nonempty. */
if (! xhp->segs [SISTEXT].size || ! xhp->segs [SIPDATA].size)
goto bad;
xhp->entry = ahead.entry;
xhp->segs [SISTACK].mbase = ISP_386; /* size 0, fbase 0 */
xhp->magic = XMAGIC (I386MAGIC, ahead.magic);
brelease (bp);
return u.u_cdiri;
default:
bad:
brelease (bp);
set_user_error (ENOEXEC);
idetach (u.u_cdiri);
return NULL;
}
}
/*
* Pass control to an image in a file.
* Make sure the format is acceptable. Release
* the old segments. Read in the new ones. Some special
* care is taken so that shared and (more important) shared
* and separated images can be run on the 8086.
*/
int
pexece (np, argp, envp, regsetp)
char * np;
char * argp [];
char * envp [];
gregset_t * regsetp;
{
struct xechdr head;
struct inode *ip; /* Load file INODE */
SEG * textseg;
SEG * dataseg;
SEG * ssegp;
int i; /* For looping over segments*/
int roundup;
int shrdsize;
struct xecnode * xlist = NULL; /* list head */
struct xecnode * xp;
struct xecseg tempseg;
unsigned int textSize;
struct direct dir;
memset (& head, 0, sizeof (head));
if ((ip = exlopen (& head, np, & shrdsize, & xlist, & dir)) == NULL)
goto done;
roundup = shrdsize & 0xf;
ssegp = exstack (& head, argp, envp,
__xmode_286 (regsetp) ? sizeof (short) :
sizeof (int));
if (! ssegp) {
idetach(ip);
goto done;
}
/* Release shared memory. */
shmAllDt ();
/*
* At this point the file has been validated as an object module, and
* the argument list has been built. Release all of the original
* segments. At this point we have committed to the new image. A "sys
* exec" that gets an I/O error is doomed.
*
* NOTE: User-area segment is NOT released.
* Segment pointer in proc is erased BEFORE invoking sfree().
*/
for (i = 1 ; i < NUSEG ; ++ i) {
SR * segp;
if ((segp = SELF->p_segl [i].sr_segp) != NULL) {
SELF->p_segl [i].sr_segp = NULL;
sfree (segp);
}
}
/*
* Read in the loadable segments.
*/
switch (head.magic) {
case XMAGIC (I286MAGIC, I_MAGIC):
if ((textseg = ssalloc (ip, SFTEXT,
head.segs [SISTEXT].size)) == NULL)
goto out;
if (! exsread (textseg, ip, & head.segs [SISTEXT], 0))
goto out;
if ((dataseg = ssalloc (ip, 0, roundup +
head.segs [SIPDATA].size +
head.segs [SIBSS].size)) == NULL)
goto out;
if (! exsread (dataseg, ip, & head.segs [SIPDATA], shrdsize))
goto out;
head.segs [SIPDATA].size += roundup;
break;
case XMAGIC (I386MAGIC, Z_MAGIC):
/*
* Round segment address down to nearest click boundary.
* Ciaran did this. I'm not sure why, but will preserve
* it for now. -hws-
*/
tempseg = head.segs [SISTEXT]; /* save pre-rounding value */
xecrnd (head.segs + SISTEXT);
xecrnd (head.segs + SIPDATA);
/*
* Compute text segment size by taking highest address
* seen in any text section.
*/
textSize = head.segs [SISTEXT].size +
head.segs [SISTEXT].mbase;
for (xp = xlist ; xp ; xp = xp->xn) {
unsigned int tmpSize;
if (xp->segtype != SISTEXT)
continue;
tmpSize = xp->xseg.size + xp->xseg.mbase;
if (tmpSize > textSize)
textSize = tmpSize;
}
/* Entry point must be within text segment. */
if (head.entry >= textSize)
goto out;
if ((textseg = ssalloc (ip, SFTEXT | SFSHRX,
textSize)) == NULL)
goto out;
if (textseg->s_ip == 0) {
if (! exsread (textseg, ip, & tempseg, 0))
goto out;
/* load additional text sections, if any */
for (xp = xlist ; xp ; xp = xp->xn) {
if (xp->segtype != SISTEXT)
continue;
if (! exsread (textseg, ip, & xlist->xseg, 0))
goto out;
}
textseg->s_ip = ip;
ip->i_refc ++;
}
if ((dataseg = ssalloc (ip, 0, head.segs [SIPDATA].size +
head.segs[SIBSS].size)) == NULL)
goto out;
if (dataseg->s_ip == 0 &&
! exsread (dataseg, ip, & head.segs [SIPDATA], 0))
goto out;
/* Deallocate nodes hooked into xlist by exlopen. */
while (xlist != NULL) {
struct xecnode * tmp = xlist->xn;
kfree (xlist);
xlist = tmp;
}
break;
default:
ASSERT ("Impossible switch selector" == NULL);
goto out;
}
SELF->p_segl [SISTACK].sr_segp = ssegp;
SELF->p_segl [SISTEXT].sr_segp = textseg;
SELF->p_segl [SIPDATA].sr_segp = dataseg;
if (sproto (& head) == 0)
goto out;
/*
* At this point, and no earlier, we can modify the user register
* image for the new process because now we are committed to executing
* the new image.
*
* As a general security thing, we begin by zeroing out the user-level
* register image /except/ for the flags, where we just clear the
* user-settable status bits.
*/
{
__flag_reg_t flags = __FLAG_REG (regsetp);
memset (regsetp, 0, sizeof (* regsetp));
__FLAG_REG (regsetp) = __FLAG_CLEAR_STATUS (flags);
}
switch (head.magic) {
case XMAGIC (I286MAGIC, I_MAGIC):
__SET_SELECTOR (regsetp->_i286._cs, SEL_286_UII);
__SET_SELECTOR (regsetp->_i286._ds, SEL_286_UD);
regsetp->_i286._ss = regsetp->_i286._es = regsetp->_i286._ds;
regsetp->_i286._ip = head.entry;
regsetp->_i286._usp = head.initsp;
break;
case XMAGIC (I386MAGIC, Z_MAGIC):
__SET_SELECTOR (regsetp->_i386._cs, SEL_386_UI);
__SET_SELECTOR (regsetp->_i386._ds, SEL_386_UD);
regsetp->_i386._ss = regsetp->_i386._es = regsetp->_i386._ds;
regsetp->_i386._eip = head.entry;
regsetp->_i386._uesp = head.initsp;
break;
}
/*
* The new image is read in
* and mapped. Perform the final grunge
* (set-uid stuff, accounting, loading up
* registers, etc).
*/
u.u_flag &= ~AFORK;
memcpy (SELF->p_comm, dir.d_name, sizeof (SELF->p_comm));
if (iaccess (ip, IPR, SELF->p_credp) == 0) {
/* Can't read ? no dump or trace */
SELF->p_flags |= PFNDMP;
SELF->p_flags &= ~PFTRAC;
}
/*
* Record file access time.
*/
iaccessed (ip);
{
n_uid_t uid = SELF->p_credp->cr_uid;
n_gid_t gid = SELF->p_credp->cr_gid;
if ((ip->i_mode & ISUID) != 0) {
/* Set user id ? no trace */
uid = ip->i_uid;
SELF->p_flags &= ~PFTRAC;
}
if ((ip->i_mode & ISGID) != 0) {
/* Set group id ? no trace */
gid = ip->i_gid;
SELF->p_flags &= ~PFTRAC;
}
if ((SELF->p_credp = cred_execid (SELF->p_credp, uid,
gid)) == NULL)
goto out;
}
for (i = 0 ; i < NOFILE; i ++) {
int j = fd_get_flags (i);
if (j != -1 && (j & FD_CLOEXEC) != 0)
fd_close (i); /* close fd on exec bit set */
}
/*
* Default every signal that is not ignored.
*/
for (i = 1 ; i <= _SIGNAL_MAX ; ++ i) {
__sigaction_t act;
curr_signal_action (i, NULL, & act);
if (act.sa_handler != SIG_IGN) {
act.sa_handler = SIG_DFL;
act.sa_flags = 0;
___SIGSET_SET (act.sa_mask, 0);
curr_signal_action (i, & act, NULL);
}
}
/*
* We have successfully completed an exec (), which means that the
* setpgid () function can no longer be used to change our process
* group from the parent process.
*/
SELF->p_flags |= PFEXEC;
if (SELF->p_flags & PFTRAC) /* Being traced */
sendsig (SIGTRAP, SELF);
idetach (ip);
/* initialize u area ndp fields */
ndpNewProc ();
segload ();
/*
* Also, set up the new scheduling priority for the process.
* We cannot assume this process will have the same properties
* as the one we exec'ed from.
* Starting priority should be the same as in
* ~/coh.386/lib/proc_init.c
*/
SELF->p_schedPri = NCRTICK * (100 / 2);
SELF->p_foodstamp = 0;
goto done;
/*
* Alas, exec() has failed..
*/
out:
/* Deallocate nodes hooked into xlist by exlopen. */
while (xlist != NULL) {
struct xecnode * tmp = xlist->xn;
kfree (xlist);
xlist = tmp;
}
/* Release the inode for the load file. */
idetach (ip);
/* If we allocated a text segment, let it go. */
if ((textseg = SELF->p_segl [SISTEXT].sr_segp) != NULL) {
SELF->p_segl [SISTEXT].sr_segp = NULL;
sfree (textseg);
}
/* If we allocated a data segment, let it go. */
if ((dataseg = SELF->p_segl [SIPDATA].sr_segp) != NULL) {
SELF->p_segl [SIPDATA].sr_segp = NULL;
sfree (dataseg);
}
/*
* Return through the "sys exit" code with a "SIGSYS", or with the
* signal actually received if we are aborting due to interrupted exec.
*/
pexit (get_user_error () == EINTR ? curr_signal_pending () : SIGSYS);
done:
return 0;
}