NetBSD-5.0.2/sys/altq/altq_blue.c

Compare this file to the similar file:
Show the results in this format:

/*	$NetBSD: altq_blue.c,v 1.22 2007/03/04 05:59:00 christos Exp $	*/
/*	$KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/

/*
 * Copyright (C) 1997-2002
 *	Sony Computer Science Laboratories Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */
/*
 * Copyright (c) 1990-1994 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the Computer Systems
 *	Engineering Group at Lawrence Berkeley Laboratory.
 * 4. Neither the name of the University nor of the Laboratory may be used
 *    to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: altq_blue.c,v 1.22 2007/03/04 05:59:00 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_altq.h"
#include "opt_inet.h"
#endif

#ifdef ALTQ_BLUE	/* blue is enabled by ALTQ_BLUE option in opt_altq.h */

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/if_types.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif

#include <altq/altq.h>
#include <altq/altq_conf.h>
#include <altq/altq_blue.h>

#ifdef ALTQ3_COMPAT
/*
 * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
 * more information on Blue is available from
 * http://www.eecs.umich.edu/~wuchang/blue/
 */

/* fixed-point uses 12-bit decimal places */
#define	FP_SHIFT	12	/* fixed-point shift */

#define	BLUE_LIMIT	200	/* default max queue lenght */
#define	BLUE_STATS		/* collect statistics */

/* blue_list keeps all blue_state_t's allocated. */
static blue_queue_t *blue_list = NULL;

/* internal function prototypes */
static int blue_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
static struct mbuf *blue_dequeue(struct ifaltq *, int);
static int drop_early(blue_t *);
static int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
static int blue_detach(blue_queue_t *);
static int blue_request(struct ifaltq *, int, void *);

/*
 * blue device interface
 */
altqdev_decl(blue);

int
blueopen(dev_t dev, int flag, int fmt,
    struct lwp *l)
{
	/* everything will be done when the queueing scheme is attached. */
	return 0;
}

int
blueclose(dev_t dev, int flag, int fmt,
    struct lwp *l)
{
	blue_queue_t *rqp;
	int err, error = 0;

	while ((rqp = blue_list) != NULL) {
		/* destroy all */
		err = blue_detach(rqp);
		if (err != 0 && error == 0)
			error = err;
	}

	return error;
}

int
blueioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
    struct lwp *l)
{
	blue_queue_t *rqp;
	struct blue_interface *ifacep;
	struct ifnet *ifp;
	int	error = 0;

	/* check super-user privilege */
	switch (cmd) {
	case BLUE_GETSTATS:
		break;
	default:
#if (__FreeBSD_version > 400000)
		if ((error = suser(p)) != 0)
			return (error);
#else
		if ((error = kauth_authorize_network(l->l_cred,
		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_BLUE, NULL,
		    NULL, NULL)) != 0)
			return (error);
#endif
		break;
	}

	switch (cmd) {

	case BLUE_ENABLE:
		ifacep = (struct blue_interface *)addr;
		if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
			error = EBADF;
			break;
		}
		error = altq_enable(rqp->rq_ifq);
		break;

	case BLUE_DISABLE:
		ifacep = (struct blue_interface *)addr;
		if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
			error = EBADF;
			break;
		}
		error = altq_disable(rqp->rq_ifq);
		break;

	case BLUE_IF_ATTACH:
		ifp = ifunit(((struct blue_interface *)addr)->blue_ifname);
		if (ifp == NULL) {
			error = ENXIO;
			break;
		}

		/* allocate and initialize blue_state_t */
		rqp = malloc(sizeof(blue_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
		if (rqp == NULL) {
			error = ENOMEM;
			break;
		}

		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
		    M_WAITOK|M_ZERO);
		if (rqp->rq_q == NULL) {
			free(rqp, M_DEVBUF);
			error = ENOMEM;
			break;
		}

		rqp->rq_blue = malloc(sizeof(blue_t), M_DEVBUF,
		    M_WAITOK|M_ZERO);
		if (rqp->rq_blue == NULL) {
			free(rqp->rq_q, M_DEVBUF);
			free(rqp, M_DEVBUF);
			error = ENOMEM;
			break;
		}

		rqp->rq_ifq = &ifp->if_snd;
		qtail(rqp->rq_q) = NULL;
		qlen(rqp->rq_q) = 0;
		qlimit(rqp->rq_q) = BLUE_LIMIT;

		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
		blue_init(rqp->rq_blue, 0, 800, 1000, 50000);

		/*
		 * set BLUE to this ifnet structure.
		 */
		error = altq_attach(rqp->rq_ifq, ALTQT_BLUE, rqp,
				    blue_enqueue, blue_dequeue, blue_request,
				    NULL, NULL);
		if (error) {
			free(rqp->rq_blue, M_DEVBUF);
			free(rqp->rq_q, M_DEVBUF);
			free(rqp, M_DEVBUF);
			break;
		}

		/* add this state to the blue list */
		rqp->rq_next = blue_list;
		blue_list = rqp;
		break;

	case BLUE_IF_DETACH:
		ifacep = (struct blue_interface *)addr;
		if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
			error = EBADF;
			break;
		}
		error = blue_detach(rqp);
		break;

	case BLUE_GETSTATS:
		do {
			struct blue_stats *q_stats;
			blue_t *rp;

			q_stats = (struct blue_stats *)addr;
			if ((rqp = altq_lookup(q_stats->iface.blue_ifname,
					     ALTQT_BLUE)) == NULL) {
				error = EBADF;
				break;
			}

			q_stats->q_len 	   = qlen(rqp->rq_q);
			q_stats->q_limit   = qlimit(rqp->rq_q);

			rp = rqp->rq_blue;
			q_stats->q_pmark = rp->blue_pmark;
			q_stats->xmit_packets  = rp->blue_stats.xmit_packets;
			q_stats->xmit_bytes    = rp->blue_stats.xmit_bytes;
			q_stats->drop_packets  = rp->blue_stats.drop_packets;
			q_stats->drop_bytes    = rp->blue_stats.drop_bytes;
			q_stats->drop_forced   = rp->blue_stats.drop_forced;
			q_stats->drop_unforced = rp->blue_stats.drop_unforced;
			q_stats->marked_packets = rp->blue_stats.marked_packets;

		} while (/*CONSTCOND*/ 0);
		break;

	case BLUE_CONFIG:
		do {
			struct blue_conf *fc;
			int limit;

			fc = (struct blue_conf *)addr;
			if ((rqp = altq_lookup(fc->iface.blue_ifname,
					       ALTQT_BLUE)) == NULL) {
				error = EBADF;
				break;
			}
			limit = fc->blue_limit;
			qlimit(rqp->rq_q) = limit;
			fc->blue_limit = limit;	/* write back the new value */
			if (fc->blue_pkttime > 0)
				rqp->rq_blue->blue_pkttime = fc->blue_pkttime;
			if (fc->blue_max_pmark > 0)
				rqp->rq_blue->blue_max_pmark = fc->blue_max_pmark;
			if (fc->blue_hold_time > 0)
				rqp->rq_blue->blue_hold_time = fc->blue_hold_time;
			rqp->rq_blue->blue_flags = fc->blue_flags;

			blue_init(rqp->rq_blue, rqp->rq_blue->blue_flags,
				  rqp->rq_blue->blue_pkttime,
				  rqp->rq_blue->blue_max_pmark,
				  rqp->rq_blue->blue_hold_time);
		} while (/*CONSTCOND*/ 0);
		break;

	default:
		error = EINVAL;
		break;
	}
	return error;
}

static int
blue_detach(blue_queue_t *rqp)
{
	blue_queue_t *tmp;
	int error = 0;

	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
		altq_disable(rqp->rq_ifq);

	if ((error = altq_detach(rqp->rq_ifq)))
		return (error);

	if (blue_list == rqp)
		blue_list = rqp->rq_next;
	else {
		for (tmp = blue_list; tmp != NULL; tmp = tmp->rq_next)
			if (tmp->rq_next == rqp) {
				tmp->rq_next = rqp->rq_next;
				break;
			}
		if (tmp == NULL)
			printf("blue_detach: no state found in blue_list!\n");
	}

	free(rqp->rq_q, M_DEVBUF);
	free(rqp->rq_blue, M_DEVBUF);
	free(rqp, M_DEVBUF);
	return (error);
}

/*
 * blue support routines
 */

int
blue_init(blue_t *rp, int flags, int pkttime, int blue_max_pmark,
    int blue_hold_time)
{
	int npkts_per_sec;

	rp->blue_idle = 1;
	rp->blue_flags = flags;
	rp->blue_pkttime = pkttime;
	rp->blue_max_pmark = blue_max_pmark;
	rp->blue_hold_time = blue_hold_time;
	if (pkttime == 0)
		rp->blue_pkttime = 1;

	/* when the link is very slow, adjust blue parameters */
	npkts_per_sec = 1000000 / rp->blue_pkttime;
	if (npkts_per_sec < 50) {
	}
	else if (npkts_per_sec < 300) {
	}

	microtime(&rp->blue_last);
	return (0);
}

/*
 * enqueue routine:
 *
 *	returns: 0 when successfully queued.
 *		 ENOBUFS when drop occurs.
 */
static int
blue_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
{
	blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
	int error = 0;

	if (blue_addq(rqp->rq_blue, rqp->rq_q, m, pktattr) == 0)
		ifq->ifq_len++;
	else
		error = ENOBUFS;
	return error;
}

#define	DTYPE_NODROP	0	/* no drop */
#define	DTYPE_FORCED	1	/* a "forced" drop */
#define	DTYPE_EARLY	2	/* an "unforced" (early) drop */

int
blue_addq(blue_t *rp, class_queue_t *q, struct mbuf *m,
    struct altq_pktattr *pktattr)
{
	int droptype;

	/*
	 * if we were idle, this is an enqueue onto an empty queue
	 * and we should decrement marking probability
	 *
	 */
	if (rp->blue_idle) {
		struct timeval now;
		int t;
		rp->blue_idle = 0;
		microtime(&now);
		t = (now.tv_sec - rp->blue_last.tv_sec);
		if ( t > 1) {
			rp->blue_pmark = 1;
			microtime(&rp->blue_last);
		} else {
			t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
			if (t > rp->blue_hold_time) {
				rp->blue_pmark--;
				if (rp->blue_pmark < 0) rp->blue_pmark = 0;
				microtime(&rp->blue_last);
			}
		}
	}

	/* see if we drop early */
	droptype = DTYPE_NODROP;
	if (drop_early(rp) && qlen(q) > 1) {
		/* mark or drop by blue */
		if ((rp->blue_flags & BLUEF_ECN) &&
		    mark_ecn(m, pktattr, rp->blue_flags)) {
			/* successfully marked.  do not drop. */
#ifdef BLUE_STATS
			rp->blue_stats.marked_packets++;
#endif
		} else {
			/* unforced drop by blue */
			droptype = DTYPE_EARLY;
		}
	}

	/*
	 * if the queue length hits the hard limit, it's a forced drop.
	 */
	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
		droptype = DTYPE_FORCED;

	/* if successful or forced drop, enqueue this packet. */
	if (droptype != DTYPE_EARLY)
		_addq(q, m);

	if (droptype != DTYPE_NODROP) {
		if (droptype == DTYPE_EARLY) {
			/* drop the incoming packet */
#ifdef BLUE_STATS
			rp->blue_stats.drop_unforced++;
#endif
		} else {
			struct timeval now;
			int t;
			/* forced drop, select a victim packet in the queue. */
			m = _getq_random(q);
			microtime(&now);
			t = (now.tv_sec - rp->blue_last.tv_sec);
			t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
			if (t > rp->blue_hold_time) {
				rp->blue_pmark += rp->blue_max_pmark >> 3;
				if (rp->blue_pmark > rp->blue_max_pmark)
					rp->blue_pmark = rp->blue_max_pmark;
				microtime(&rp->blue_last);
			}
#ifdef BLUE_STATS
			rp->blue_stats.drop_forced++;
#endif
		}
#ifdef BLUE_STATS
		rp->blue_stats.drop_packets++;
		rp->blue_stats.drop_bytes += m->m_pkthdr.len;
#endif
		m_freem(m);
		return (-1);
	}
	/* successfully queued */
	return (0);
}

/*
 * early-drop probability is kept in blue_pmark
 *
 */
static int
drop_early(blue_t *rp)
{
	if ((arc4random() % rp->blue_max_pmark) < rp->blue_pmark) {
		/* drop or mark */
		return (1);
	}
	/* no drop/mark */
	return (0);
}

/*
 * try to mark CE bit to the packet.
 *    returns 1 if successfully marked, 0 otherwise.
 */
static int
mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
{
	struct mbuf *m0;

	if (pktattr == NULL ||
	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
		return (0);

	/* verify that pattr_hdr is within the mbuf data */
	for (m0 = m; m0 != NULL; m0 = m0->m_next)
		if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
		    ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
			break;
	if (m0 == NULL) {
		/* ick, pattr_hdr is stale */
		pktattr->pattr_af = AF_UNSPEC;
		return (0);
	}

	switch (pktattr->pattr_af) {
	case AF_INET:
		if (flags & BLUEF_ECN4) {
			struct ip *ip = (struct ip *)pktattr->pattr_hdr;
			u_int8_t otos;
			int sum;

			if (ip->ip_v != 4)
				return (0);	/* version mismatch! */
			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
				return (0);	/* not-ECT */
			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
				return (1);	/* already marked */

			/*
			 * ecn-capable but not marked,
			 * mark CE and update checksum
			 */
			otos = ip->ip_tos;
			ip->ip_tos |= IPTOS_ECN_CE;
			/*
			 * update checksum (from RFC1624)
			 *	   HC' = ~(~HC + ~m + m')
			 */
			sum = ~ntohs(ip->ip_sum) & 0xffff;
			sum += (~otos & 0xffff) + ip->ip_tos;
			sum = (sum >> 16) + (sum & 0xffff);
			sum += (sum >> 16);  /* add carry */
			ip->ip_sum = htons(~sum & 0xffff);
			return (1);
		}
		break;
#ifdef INET6
	case AF_INET6:
		if (flags & BLUEF_ECN6) {
			struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
			u_int32_t flowlabel;

			flowlabel = ntohl(ip6->ip6_flow);
			if ((flowlabel >> 28) != 6)
				return (0);	/* version mismatch! */
			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
			    (IPTOS_ECN_NOTECT << 20))
				return (0);	/* not-ECT */
			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
			    (IPTOS_ECN_CE << 20))
				return (1);	/* already marked */
			/*
			 * ecn-capable but not marked,  mark CE
			 */
			flowlabel |= (IPTOS_ECN_CE << 20);
			ip6->ip6_flow = htonl(flowlabel);
			return (1);
		}
		break;
#endif  /* INET6 */
	}

	/* not marked */
	return (0);
}

/*
 * dequeue routine:
 *	must be called in splnet.
 *
 *	returns: mbuf dequeued.
 *		 NULL when no packet is available in the queue.
 */

static struct mbuf *
blue_dequeue(struct ifaltq * ifq, int op)
{
	blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
	struct mbuf *m = NULL;

	if (op == ALTDQ_POLL)
		return (qhead(rqp->rq_q));

	m = blue_getq(rqp->rq_blue, rqp->rq_q);
	if (m != NULL)
		ifq->ifq_len--;
	return m;
}

struct mbuf *
blue_getq(blue_t *rp, class_queue_t *q)
{
	struct mbuf *m;

	if ((m = _getq(q)) == NULL) {
		if (rp->blue_idle == 0) {
			rp->blue_idle = 1;
			microtime(&rp->blue_last);
		}
		return NULL;
	}

	rp->blue_idle = 0;
#ifdef BLUE_STATS
	rp->blue_stats.xmit_packets++;
	rp->blue_stats.xmit_bytes += m->m_pkthdr.len;
#endif
	return (m);
}

static int
blue_request(struct ifaltq *ifq, int req, void *arg)
{
	blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;

	switch (req) {
	case ALTRQ_PURGE:
		_flushq(rqp->rq_q);
		if (ALTQ_IS_ENABLED(ifq))
			ifq->ifq_len = 0;
		break;
	}
	return (0);
}


#ifdef KLD_MODULE

static struct altqsw blue_sw =
	{"blue", blueopen, blueclose, blueioctl};

ALTQ_MODULE(altq_blue, ALTQT_BLUE, &blue_sw);

#endif /* KLD_MODULE */

#endif /* ALTQ3_COMPAT */
#endif /* ALTQ_BLUE */