NetBSD-5.0.2/sys/compat/linux/arch/i386/linux_exec_machdep.c

Compare this file to the similar file:
Show the results in this format:

/*	$NetBSD: linux_exec_machdep.c,v 1.11.4.1 2009/04/01 00:25:22 snj Exp $	*/

/*-
 * Copyright (c) 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.11.4.1 2009/04/01 00:25:22 snj Exp $");

#if defined(_KERNEL_OPT)
#include "opt_vm86.h"
#include "opt_user_ldt.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/resource.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/exec.h>
#include <sys/exec_elf.h>
#include <sys/vnode.h>
#include <sys/lwp.h>

#include <sys/cpu.h>
#include <machine/vmparam.h>

#include <uvm/uvm.h>

#include <sys/syscallargs.h>

#ifndef DEBUG_LINUX
#define DPRINTF(a)
#else
#define DPRINTF(a)	uprintf a
#endif

#include <compat/linux/common/linux_types.h>
#include <compat/linux/common/linux_signal.h>
#include <compat/linux/common/linux_machdep.h>
#include <compat/linux/common/linux_util.h>
#include <compat/linux/common/linux_ioctl.h>
#include <compat/linux/common/linux_hdio.h>
#include <compat/linux/common/linux_exec.h>
#include <compat/linux/common/linux_errno.h>
#include <compat/linux//linux_syscallargs.h>


int
linux_exec_setup_stack(struct lwp *l, struct exec_package *epp)
{
	u_long max_stack_size;
	u_long access_linear_min, access_size;
	u_long noaccess_linear_min, noaccess_size;

#ifndef	USRSTACK32
#define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
#endif

	if (epp->ep_flags & EXEC_32) {
		epp->ep_minsaddr = USRSTACK32;
		max_stack_size = MAXSSIZ;
	} else {
		epp->ep_minsaddr = USRSTACK;
		max_stack_size = MAXSSIZ;
	}

	if (epp->ep_minsaddr > LINUX_USRSTACK)
		epp->ep_minsaddr = LINUX_USRSTACK;
#ifdef DEBUG_LINUX
	else {
		/*
		 * Someone needs to make KERNBASE and TEXTADDR
		 * java versions < 1.4.2 need the stack to be
		 * at 0xC0000000
		 */
		uprintf("Cannot setup stack to 0xC0000000, "
		    "java will not work properly\n");
	}
#endif
	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
		max_stack_size);
	epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur;

	/*
	 * set up commands for stack.  note that this takes *two*, one to
	 * map the part of the stack which we can access, and one to map
	 * the part which we can't.
	 *
	 * arguably, it could be made into one, but that would require the
	 * addition of another mapping proc, which is unnecessary
	 */
	access_size = epp->ep_ssize;
	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
	noaccess_size = max_stack_size - access_size;
	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
	    access_size), noaccess_size);
	if (noaccess_size > 0) {
		NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
		    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE, VMCMD_STACK);
	}
	KASSERT(access_size > 0);
	NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
	    access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE,
	    VMCMD_STACK);

	return 0;
}


#ifdef LINUX_NPTL
static __inline void
load_gs(u_int sel)
{
        __asm __volatile("movl %0,%%gs" : : "rm" (sel)); 
}


int
linux_init_thread_area(struct lwp *l, struct lwp *l2)
{
	struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs;
	struct pcb *pcb2 = &l2->l_addr->u_pcb;
	struct linux_user_desc info;
	struct segment_descriptor sd;
	int error, idx, a[2];

	error = copyin((void *)tf->tf_esi, &info, sizeof(info));
	if (error)
		return error;
	idx = info.entry_number;

	/* 
	 * looks like we're getting the idx we returned
	 * in the set_thread_area() syscall
	 */
	if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) {
		DPRINTF(("resetting idx %d to GUGS_SEL", idx));
		idx = GUGS_SEL;
	}

	/* this doesnt happen in practice */
	if (idx == LINUX_GLIBC_TLS_SEL) {
		/* we might copy out the entry_number as 3 */
		info.entry_number = GUGS_SEL;
		error = copyout(&info, (void *)tf->tf_esi, sizeof(info));
		if (error)
			return error;
	}

	a[0] = LINUX_LDT_entry_a(&info);
	a[1] = LINUX_LDT_entry_b(&info);

	(void)memcpy(&sd, &a, sizeof(a));
	KASSERT(ISMEMSDP((&sd)));
	DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, "
	    "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, "
	    "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
	    sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl,
	    sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran));

	(void)memcpy(&pcb2->pcb_gsd, &sd, sizeof(sd));
	tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL);

	return 0;
}


int
linux_sys_set_thread_area(struct lwp *l,
    const struct linux_sys_set_thread_area_args *uap, register_t *retval)
{
	struct pcb *pcb = &l->l_addr->u_pcb;
	struct linux_user_desc info;
	struct segment_descriptor sd;
	int error, idx, a[2];

	*retval = 0;
	error = copyin(SCARG(uap, desc), &info, sizeof(info));
	if (error)
		return error;

	DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n",
	    info.entry_number, info.base_addr, info.limit, info.seg_32bit,
	    info.contents, info.read_exec_only, info.limit_in_pages,
	    info.seg_not_present, info.useable));

	idx = info.entry_number;
	/* 
	 * Semantics of linux version: every thread in the system has array of
	 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 
	 * syscall loads one of the selected tls decriptors with a value and
	 * also loads GDT descriptors 6, 7 and 8 with the content of the
	 * per-thread descriptors.
	 *
	 * Semantics of fbsd version: I think we can ignore that linux has 3 
	 * per-thread descriptors and use just the 1st one. The tls_array[]
	 * is used only in set/get-thread_area() syscalls and for loading the
	 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
	 * we will load just one. 
	 *
	 * XXX: this doesn't work when a user space process tries to use more
	 * than 1 TLS segment. Comment in the linux sources says wine might do
	 * this.
	 */

	/* 
	 * we support just GLIBC TLS now 
	 * we should let 3 proceed as well because we use this segment so
	 * if code does two subsequent calls it should succeed
	 */
	if (idx != LINUX_GLIBC_TLS_SEL && idx != -1 && idx != GUGS_SEL)
		return EINVAL;

	/* 
	 * we have to copy out the GDT entry we use
	 * FreeBSD uses GDT entry #3 for storing %gs so load that
	 *
	 * XXX: what if a user space program doesn't check this value and tries
	 * to use 6, 7 or 8? 
	 */
	idx = info.entry_number = GUGS_SEL;
	error = copyout(&info, SCARG(uap, desc), sizeof(info));
	if (error)
		return error;

	if (LINUX_LDT_empty(&info)) {
		a[0] = 0;
		a[1] = 0;
	} else {
		a[0] = LINUX_LDT_entry_a(&info);
		a[1] = LINUX_LDT_entry_b(&info);
	}

	(void)memcpy(&sd, &a, sizeof(a));
	KASSERT(ISMEMSDP((&sd)));
	DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, "
	    "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, "
	    "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit,
	    sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
	    sd.sd_def32, sd.sd_gran));

	kpreempt_disable();
	(void)memcpy(&pcb->pcb_gsd, &sd, sizeof(sd));
	(void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd));
	load_gs(GSEL(GUGS_SEL, SEL_UPL));
	kpreempt_enable();
	return 0;
}

int
linux_sys_get_thread_area(struct lwp *l,
    const struct linux_sys_get_thread_area_args *uap, register_t *retval)
{
	struct pcb *pcb = &l->l_addr->u_pcb;
	struct linux_user_desc info;
	struct linux_desc_struct desc;
	struct segment_descriptor sd;
	int error, idx;

	*retval = 0;
	error = copyin(SCARG(uap, desc), &info, sizeof(info));
	if (error)
		return error;

	idx = info.entry_number;
	/* XXX: I am not sure if we want 3 to be allowed too. */
	if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL)
		return EINVAL;

	idx = GUGS_SEL;

	(void)memset(&info, 0, sizeof(info));
	(void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd));
	(void)memcpy(&desc, &sd, sizeof(desc));

	info.entry_number = idx;
	info.base_addr = LINUX_GET_BASE(&desc);
	info.limit = LINUX_GET_LIMIT(&desc);
	info.seg_32bit = LINUX_GET_32BIT(&desc);
	info.contents = LINUX_GET_CONTENTS(&desc);
	info.read_exec_only = !LINUX_GET_WRITABLE(&desc);
	info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc);
	info.seg_not_present = !LINUX_GET_PRESENT(&desc);
	info.useable = LINUX_GET_USEABLE(&desc);

	return copyout(&info, SCARG(uap, desc), sizeof(info));
}

#endif