NetBSD-5.0.2/lib/libc/citrus/modules/citrus_iconv_std.c

Compare this file to the similar file:
Show the results in this format:

/*	$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $	*/

/*-
 * Copyright (c)2003 Citrus Project,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $");
#endif /* LIBC_SCCS and not lint */

#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <machine/endian.h>
#include <sys/queue.h>

#include "citrus_namespace.h"
#include "citrus_types.h"
#include "citrus_module.h"
#include "citrus_region.h"
#include "citrus_mmap.h"
#include "citrus_hash.h"
#include "citrus_iconv.h"
#include "citrus_stdenc.h"
#include "citrus_mapper.h"
#include "citrus_csmapper.h"
#include "citrus_memstream.h"
#include "citrus_iconv_std.h"
#include "citrus_esdb.h"

/* ---------------------------------------------------------------------- */

_CITRUS_ICONV_DECLS(iconv_std);
_CITRUS_ICONV_DEF_OPS(iconv_std);


/* ---------------------------------------------------------------------- */

int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
			       u_int32_t expected_version)
{
	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
		return (EINVAL);

	memcpy(ops, &_citrus_iconv_std_iconv_ops,
	       sizeof(_citrus_iconv_std_iconv_ops));

	return (0);
}

/* ---------------------------------------------------------------------- */

/*
 * convenience routines for stdenc.
 */
static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding *se)
{
	if (se->se_ps)
		memcpy(se->se_pssaved, se->se_ps,
		       _stdenc_get_state_size(se->se_handle));
}

static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding *se)
{
	if (se->se_ps)
		memcpy(se->se_ps, se->se_pssaved,
		       _stdenc_get_state_size(se->se_handle));
}

static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding *se)
{
	if (se->se_ps)
		_stdenc_init_state(se->se_handle, se->se_ps);
}

static __inline int
mbtocsx(struct _citrus_iconv_std_encoding *se,
	_csid_t *csid, _index_t *idx, const char **s, size_t n,
	size_t *nresult)
{
	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
			      nresult);
}

static __inline int
cstombx(struct _citrus_iconv_std_encoding *se,
	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
{
	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
			      nresult);
}

static __inline int
wctombx(struct _citrus_iconv_std_encoding *se,
	char *s, size_t n, _wc_t wc, size_t *nresult)
{
	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
}

static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding *se,
		 char *s, size_t n, size_t *nresult)
{
	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
}

static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
{
	int ret;
	struct _stdenc_state_desc ssd;

	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
				     _STDENC_SDID_GENERIC, &ssd);
	if (!ret)
		*rstate = ssd.u.generic.state;

	return ret;
}

/*
 * init encoding context
 */
static int
init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
	      void *ps1, void *ps2)
{
	int ret = -1;

	se->se_handle = cs;
	se->se_ps = ps1;
	se->se_pssaved = ps2;

	if (se->se_ps)
		ret = _stdenc_init_state(cs, se->se_ps);
	if (!ret && se->se_pssaved)
		ret = _stdenc_init_state(cs, se->se_pssaved);

	return ret;
}

static int
open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
	      unsigned long *rnorm)
{
	int ret;
	struct _csmapper *cm;

	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
	if (ret)
		return ret;
	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
	    _csmapper_get_state_size(cm) != 0) {
		_csmapper_close(cm);
		return EINVAL;
	}

	*rcm = cm;

	return 0;
}

static void
close_dsts(struct _citrus_iconv_std_dst_list *dl)
{
	struct _citrus_iconv_std_dst *sd;

	while ((sd=TAILQ_FIRST(dl)) != NULL) {
		TAILQ_REMOVE(dl, sd, sd_entry);
		_csmapper_close(sd->sd_mapper);
		free(sd);
	}
}

static int
open_dsts(struct _citrus_iconv_std_dst_list *dl,
	  const struct _esdb_charset *ec, const struct _esdb *dbdst)
{
	int i, ret;
	struct _citrus_iconv_std_dst *sd, *sdtmp;
	unsigned long norm;

	sd = malloc(sizeof(*sd));
	if (sd == NULL)
		return errno;

	for (i=0; i<dbdst->db_num_charsets; i++) {
		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
				    dbdst->db_charsets[i].ec_csname, &norm);
		if (ret == 0) {
			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
			sd->sd_norm = norm;
			/* insert this mapper by sorted order. */
			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
				if (sdtmp->sd_norm > norm) {
					TAILQ_INSERT_BEFORE(sdtmp, sd,
							    sd_entry);
					sd = NULL;
					break;
				}
			}
			if (sd)
				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
			sd = malloc(sizeof(*sd));
			if (sd == NULL) {
				ret = errno;
				close_dsts(dl);
				return ret;
			}
		} else if (ret != ENOENT) {
			close_dsts(dl);
			free(sd);
			return ret;
		}
	}
	free(sd);
	return 0;
}

static void
close_srcs(struct _citrus_iconv_std_src_list *sl)
{
	struct _citrus_iconv_std_src *ss;

	while ((ss=TAILQ_FIRST(sl)) != NULL) {
		TAILQ_REMOVE(sl, ss, ss_entry);
		close_dsts(&ss->ss_dsts);
		free(ss);
	}
}

static int
open_srcs(struct _citrus_iconv_std_src_list *sl,
	  const struct _esdb *dbsrc, const struct _esdb *dbdst)
{
	int i, ret, count = 0;
	struct _citrus_iconv_std_src *ss;

	ss = malloc(sizeof(*ss));
	if (ss == NULL)
		return errno;

	TAILQ_INIT(&ss->ss_dsts);

	for (i=0; i<dbsrc->db_num_charsets; i++) {
		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
		if (ret)
			goto err;
		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
			ss = malloc(sizeof(*ss));
			if (ss == NULL) {
				ret = errno;
				goto err;
			}
			count++;
			TAILQ_INIT(&ss->ss_dsts);
		}
	}
	free(ss);

	return count ? 0 : ENOENT;

err:
	free(ss);
	close_srcs(sl);
	return ret;
}

/* do convert a character */
#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
static int
/*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared *is,
	struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
{
	_index_t tmpidx;
	int ret;
	struct _citrus_iconv_std_src *ss;
	struct _citrus_iconv_std_dst *sd;

	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
		if (ss->ss_csid == *csid) {
			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
				ret = _csmapper_convert(sd->sd_mapper,
							&tmpidx, *idx, NULL);
				switch (ret) {
				case _MAPPER_CONVERT_SUCCESS:
					*csid = sd->sd_csid;
					*idx = tmpidx;
					return 0;
				case _MAPPER_CONVERT_NONIDENTICAL:
					break;
				case _MAPPER_CONVERT_SRC_MORE:
					/*FALLTHROUGH*/
				case _MAPPER_CONVERT_DST_MORE:
					/*FALLTHROUGH*/
				case _MAPPER_CONVERT_FATAL:
					return EINVAL;
				case _MAPPER_CONVERT_ILSEQ:
					return EILSEQ;
				}
			}
			break;
		}
	}

	return E_NO_CORRESPONDING_CHAR;
}
/* ---------------------------------------------------------------------- */

static int
/*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
				    const char * __restrict curdir,
				    const char * __restrict src,
				    const char * __restrict dst,
				    const void * __restrict var, size_t lenvar)
{
	int ret;
	struct _citrus_iconv_std_shared *is;
	struct _citrus_esdb esdbsrc, esdbdst;

	is = malloc(sizeof(*is));
	if (is==NULL) {
		ret = errno;
		goto err0;
	}
	ret = _citrus_esdb_open(&esdbsrc, src);
	if (ret)
		goto err1;
	ret = _citrus_esdb_open(&esdbdst, dst);
	if (ret)
		goto err2;
	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
			   esdbsrc.db_variable, esdbsrc.db_len_variable);
	if (ret)
		goto err3;
	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
			   esdbdst.db_variable, esdbdst.db_len_variable);
	if (ret)
		goto err4;
	is->is_use_invalid = esdbdst.db_use_invalid;
	is->is_invalid = esdbdst.db_invalid;

	TAILQ_INIT(&is->is_srcs);
	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
	if (ret)
		goto err5;

	_esdb_close(&esdbsrc);
	_esdb_close(&esdbdst);
	ci->ci_closure = is;

	return 0;

err5:
	_stdenc_close(is->is_dst_encoding);
err4:
	_stdenc_close(is->is_src_encoding);
err3:
	_esdb_close(&esdbdst);
err2:
	_esdb_close(&esdbsrc);
err1:
	free(is);
err0:
	return ret;
}

static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
{
	struct _citrus_iconv_std_shared *is = ci->ci_closure;

	if (is == NULL)
		return;

	_stdenc_close(is->is_src_encoding);
	_stdenc_close(is->is_dst_encoding);
	close_srcs(&is->is_srcs);
	free(is);
}

static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
{
	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
	struct _citrus_iconv_std_context *sc;
	int ret;
	size_t szpssrc, szpsdst, sz;
	char *ptr;

	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);

	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
	sc = malloc(sz);
	if (sc == NULL)
		return errno;

	ptr = (char *)&sc[1];
	if (szpssrc)
		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
			      ptr, ptr+szpssrc);
	else
		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
			      NULL, NULL);
	ptr += szpssrc*2;
	if (szpsdst)
		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
			      ptr, ptr+szpsdst);
	else
		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
			      NULL, NULL);

	cv->cv_closure = (void *)sc;

	return 0;
}

static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
{
	free(cv->cv_closure);
}

static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
				const char * __restrict * __restrict in,
				size_t * __restrict inbytes,
				char * __restrict * __restrict out,
				size_t * __restrict outbytes, u_int32_t flags,
				size_t * __restrict invalids)
{
	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
	struct _citrus_iconv_std_context *sc = cv->cv_closure;
	_index_t idx;
	_csid_t csid;
	int ret, state;
	size_t szrin, szrout;
	size_t inval;
	const char *tmpin;

	inval = 0;
	if (in==NULL || *in==NULL) {
		/* special cases */
		if (out!=NULL && *out!=NULL) {
			/* init output state and store the shift sequence */
			save_encoding_state(&sc->sc_src_encoding);
			save_encoding_state(&sc->sc_dst_encoding);
			szrout = 0;

			ret = put_state_resetx(&sc->sc_dst_encoding,
					       *out, *outbytes,
					       &szrout);
			if (ret)
				goto err;

			if (szrout == (size_t)-2) {
				/* too small to store the character */
				ret = EINVAL;
				goto err;
			}
			*out += szrout;
			*outbytes -= szrout;
		} else
			/* otherwise, discard the shift sequence */
			init_encoding_state(&sc->sc_dst_encoding);
		init_encoding_state(&sc->sc_src_encoding);
		*invalids = 0;
		return 0;
	}

	/* normal case */
	for (;;) {
		if (*inbytes==0) {
			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
			if (state == _STDENC_SDGEN_INITIAL ||
			    state == _STDENC_SDGEN_STABLE)
				break;
		}

		/* save the encoding states for the error recovery */
		save_encoding_state(&sc->sc_src_encoding);
		save_encoding_state(&sc->sc_dst_encoding);

		/* mb -> csid/index */
		tmpin = *in;
		szrin = szrout = 0;
		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
			      &tmpin, *inbytes, &szrin);
		if (ret)
			goto err;

		if (szrin == (size_t)-2) {
			/* incompleted character */
			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
			if (ret) {
				ret = EINVAL;
				goto err;
			}
			switch (state) {
			case _STDENC_SDGEN_INITIAL:
			case _STDENC_SDGEN_STABLE:
				/* fetch shift sequences only. */
				goto next;
			}
			ret = EINVAL;
			goto err;
		}
		/* convert the character */
		ret = do_conv(is, sc, &csid, &idx);
		if (ret) {
			if (ret == E_NO_CORRESPONDING_CHAR) {
				inval++;
				szrout = 0;
				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
				    is->is_use_invalid) {
					ret = wctombx(&sc->sc_dst_encoding,
						      *out, *outbytes,
						      is->is_invalid,
						      &szrout);
					if (ret)
						goto err;
				}
				goto next;
			} else {
				goto err;
			}
		}
		/* csid/index -> mb */
		ret = cstombx(&sc->sc_dst_encoding,
			      *out, *outbytes, csid, idx, &szrout);
		if (ret)
			goto err;
next:
		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
		*in = tmpin;
		*outbytes -= szrout;
		*out += szrout;
	}
	*invalids = inval;

	return 0;

err:
	restore_encoding_state(&sc->sc_src_encoding);
	restore_encoding_state(&sc->sc_dst_encoding);
err_norestore:
	*invalids = inval;

	return ret;
}