OpenSolaris_b135/cmd/powertop/common/cpuidle.c

/*
 * Copyright 2009, Intel Corporation
 * Copyright 2009, Sun Microsystems, Inc
 *
 * This file is part of PowerTOP
 *
 * This program file is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program in a file named COPYING; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301 USA
 *
 * Authors:
 *	Arjan van de Ven <arjan@linux.intel.com>
 *	Eric C Saxe <eric.saxe@sun.com>
 *	Aubrey Li <aubrey.li@intel.com>
 */

/*
 * GPL Disclaimer
 *
 * For the avoidance of doubt, except that if any license choice other
 * than GPL or LGPL is available it will apply instead, Sun elects to
 * use only the General Public License version 2 (GPLv2) at this time
 * for any software where a choice of GPL license versions is made
 * available with the language indicating that GPLv2 or any later
 * version may be used, or where a choice of which version of the GPL
 * is applied is otherwise unspecified.
 */

#include <string.h>
#include <dtrace.h>
#include "powertop.h"

#define	S2NS(x)		((x) * (NANOSEC))

static dtrace_hdl_t 	*dtp;

/*
 * Buffer containing DTrace program to track CPU idle state transitions
 */
static const char *dtp_cpuidle =
":::idle-state-transition"
"/arg0 != 0/"
"{"
"	self->start = timestamp;"
"	self->state = arg0;"
"}"
""
":::idle-state-transition"
"/arg0 == 0 && self->start/"
"{"
"	@number[self->state] = count();"
"	@times[self->state] = sum(timestamp - self->start);"
"	self->start = 0;"
"	self->state = 0;"
"}";

/*
 * Same as above but only for a specific CPU
 */
static const char *dtp_cpuidle_c =
":::idle-state-transition"
"/cpu == $0 &&"
" arg0 != 0/"
"{"
"	self->start = timestamp;"
"	self->state = arg0;"
"}"
""
":::idle-state-transition"
"/cpu == $0 &&"
" arg0 == 0 && self->start/"
"{"
"	@number[self->state] = count();"
"	@times[self->state] = sum(timestamp - self->start);"
"	self->start = 0;"
"	self->state = 0;"
"}";

static int 	pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *, void *);

/*
 * Perform setup necessary to track CPU idle state transitions
 */
int
pt_cpuidle_stat_prepare(void)
{
	dtrace_prog_t 		*prog;
	dtrace_proginfo_t 	info;
	dtrace_optval_t 	statustime;
	int 			err;
	char			*prog_ptr;

	if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
		pt_error("cannot open dtrace library for the %s report: %s\n",
		    g_msg_idle_state, dtrace_errmsg(NULL, err));
		return (-1);
	}

	/*
	 * Execute different scripts (defined above) depending on
	 * user specified options.
	 */
	if (PT_ON_CPU)
		prog_ptr = (char *)dtp_cpuidle_c;
	else
		prog_ptr = (char *)dtp_cpuidle;

	if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
	    DTRACE_PROBESPEC_NAME, 0, g_argc, g_argv)) == NULL) {
		pt_error("failed to compile %s program\n", g_msg_idle_state);
		return (dtrace_errno(dtp));
	}

	if (dtrace_program_exec(dtp, prog, &info) == -1) {
		pt_error("failed to enable %s probes\n", g_msg_idle_state);
		return (dtrace_errno(dtp));
	}

	if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
		pt_error("failed to set %s 'aggsize'\n", g_msg_idle_state);

	if (dtrace_setopt(dtp, "aggrate", "0") == -1)
		pt_error("failed to set %s 'aggrate'\n", g_msg_idle_state);

	if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
		pt_error("failed to set %s 'aggpercpu'\n", g_msg_idle_state);

	if (dtrace_go(dtp) != 0) {
		pt_error("failed to start %s observation\n", g_msg_idle_state);
		return (dtrace_errno(dtp));
	}

	if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
		pt_error("failed to get %s 'statusrate'\n", g_msg_idle_state);
		return (dtrace_errno(dtp));
	}

	return (0);
}

/*
 * The DTrace probes have been enabled, and are tracking CPU idle state
 * transitions. Take a snapshot of the aggregations, and invoke the aggregation
 * walker to process any records. The walker does most of the accounting work
 * chalking up time spent into the g_cstate_info structure.
 */
int
pt_cpuidle_stat_collect(double interval)
{
	int i;
	hrtime_t t = 0;

	/*
	 * Assume that all the time spent in this interval will
	 * be the default "0" state. The DTrace walker will reallocate
	 * time out of the default bucket as it processes aggregation
	 * records for time spent in other states.
	 */
	g_cstate_info[0].total_time = (uint64_t)S2NS(interval *
	    g_ncpus_observed);

	if (dtrace_status(dtp) == -1)
		return (-1);

	if (dtrace_aggregate_snap(dtp) != 0)
		pt_error("failed to collect data for %s\n", g_msg_idle_state);

	if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpuidle_dtrace_walk,
	    NULL) != 0)
		pt_error("failed to sort %s data\n", g_msg_idle_state);

	dtrace_aggregate_clear(dtp);

	/*
	 * Populate g_cstate_info with the correct amount of time spent
	 * in each C state and update the number of C states in g_max_cstate
	 */
	g_total_c_time = 0;
	for (i = 0; i < NSTATES; i++) {
		if (g_cstate_info[i].total_time > 0) {
			g_total_c_time += g_cstate_info[i].total_time;
			if (i > g_max_cstate)
				g_max_cstate = i;
			if (g_cstate_info[i].last_time > t) {
				t = g_cstate_info[i].last_time;
				g_longest_cstate = i;
			}
		}
	}

	return (0);
}

/*
 * DTrace aggregation walker that sorts through a snapshot of data records
 * collected during firings of the idle-state-transition probe.
 *
 * XXX A way of querying the current idle state for a CPU is needed in addition
 *     to logic similar to that in cpufreq.c
 */
/*ARGSUSED*/
static int
pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
{
	dtrace_aggdesc_t 	*aggdesc = data->dtada_desc;
	dtrace_recdesc_t 	*rec;
	uint64_t 		n = 0, state;
	int 			i;

	rec = &aggdesc->dtagd_rec[1];

	switch (g_bit_depth) {
		case 32:
			/* LINTED - alignment */
			state = *(uint32_t *)(data->dtada_data +
			    rec->dtrd_offset);
			break;
		case 64:
			/* LINTED - alignment */
			state = *(uint64_t *)(data->dtada_data +
			    rec->dtrd_offset);
			break;
	}

	if (strcmp(aggdesc->dtagd_name, "number") == 0) {
		for (i = 0; i < g_ncpus; i++) {
			/* LINTED - alignment */
			n += *((uint64_t *)(data->dtada_percpu[i]));
		}
		g_total_events += n;
		g_cstate_info[state].events += n;
	}
	else
		if (strcmp(aggdesc->dtagd_name, "times") == 0) {
			for (i = 0; i < g_ncpus; i++) {
				/* LINTED - alignment */
				n += *((uint64_t *)(data->dtada_percpu[i]));
			}
			g_cstate_info[state].last_time = n;
			g_cstate_info[state].total_time += n;
			if (g_cstate_info[0].total_time >= n)
				g_cstate_info[0].total_time -= n;
			else
				g_cstate_info[0].total_time = 0;
		}

	return (DTRACE_AGGWALK_NEXT);
}