OpenSolaris_b135/lib/libshell/common/scripts/simplefileattributetree1.sh

#!/usr/bin/ksh93

#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
#

#
# simplefileattributetree1 - build a simple file tree (including file attributes)
#

# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant
export PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin

# Make sure all math stuff runs in the "C" locale to avoid problems
# with alternative # radix point representations (e.g. ',' instead of
# '.' in de_DE.*-locales). This needs to be set _before_ any
# floating-point constants are defined in this script).
if [[ "${LC_ALL}" != "" ]] ; then
    export \
        LC_MONETARY="${LC_ALL}" \
        LC_MESSAGES="${LC_ALL}" \
        LC_COLLATE="${LC_ALL}" \
        LC_CTYPE="${LC_ALL}"
        unset LC_ALL
fi
export LC_NUMERIC=C


function add_file_to_tree
{
	typeset treename=$1
	typeset filename=$2
	nameref destnodename=$3
	integer i
	typeset nodepath # full name of compound variable
	typeset -a pe # path elements

	# first built an array containing the names of each path element
	# (e.g. "foo/var/baz"" results in an array containing "( 'foo' 'bar' 'baz' )")
	typeset IFS='/'
	pe+=( ${filename} )
	
	[[ ${pe[0]} == '' ]] && pe[0]='/'

	# walk path described via the "pe" array and build nodes if
	# there aren't any nodes yet
	nodepath="${treename}"
	for (( i=0 ; i < (${#pe[@]}-1) ; i++ )) ; do
		nameref x="${nodepath}"
		[[ ! -v x.node ]] && compound -A x.nodes
	
		nodepath+=".nodes[${pe[i]}]"
	done
	
	# insert element
	nameref node="${nodepath}"
	[[ ! -v node.elements ]] && compound -A node.elements
	node.elements[${pe[i]}]=(
		filepath="${filename}"
	)
	
	destnodename="${!node}.elements[${pe[i]}]"
	
	return 0
}

function parse_findls
{
	nameref out=$1
	typeset str="$2"
	
	# find -ls on Solaris uses the following output format by default:
	#604302    3 -rw-r--r--   1 test001  users        2678 May  9 00:46 ./httpsresdump

	integer out.inodenum="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\1}"
	integer out.kbblocks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\2}"
	typeset out.mode="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\3}"
	integer out.numlinks="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\4}"
	compound out.owner=(
		typeset user="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\5}"
		typeset group="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\6}"
	)
	integer out.filesize="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\7}"
	typeset out.date="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\8}"
	typeset out.filepath="${str/~(Elr)[[:space:]]*([[:digit:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]-]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:alnum:]]+)[[:space:]]+([[:digit:]]+)[[:space:]]+([[:alpha:]]*[[:space:]]+[[:digit:]]*[[:space:]]+[[:digit:]:]+)[[:space:]]+(.+)/\9}"

	return 0
}

function usage
{
	OPTIND=0
	getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT '-?'
	exit 2
}

# main
builtin basename
builtin dirname

set -o noglob
set -o nounset

# tree base
compound filetree

# benchmark data
compound bench=(
	float start
	float stop
)

compound appconfig=(
	typeset do_benchmarking=false
	compound do_record=(
		typeset content=false
		typeset filetype=false
	)
)


integer i

typeset progname="${ basename "${0}" ; }"

typeset -r simplefileattributetree1_usage=$'+
[-?\n@(#)\$Id: simplefileattributetree1 (Roland Mainz) 2009-06-26 \$\n]
[-author?Roland Mainz <roland.mainz@nrubsig.org>]
[+NAME?simplefileattributetree1 - generate compound variable tree which contains file names and their attributes]
[+DESCRIPTION?\bsimplefileattributetree1\b is a simple variable tree 
	demo which builds a compound variable tree based on the output
	of /usr/xpg4/bin/file which contains the file name, the file attributes
	and optionally file type and content]
[b:benchmark?Print time needed to generate the tree.]
[c:includecontent?Include the file\'s content in the tree, split into 1kb blocks.]
[t:includefiletype?Include the file type (output of /usr/xpg4/bin/file).]

path

[+SEE ALSO?\bksh93\b(1), \bfile\b(1), \bfind\b(1)]
'

while getopts -a "${progname}" "${simplefileattributetree1_usage}" OPT ; do 
#	printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|"
	case ${OPT} in
		b)	appconfig.do_benchmarking="true"	;;
		+b)	appconfig.do_benchmarking="false"	;;
		c)	appconfig.do_record.content="true"	;;
		+c)	appconfig.do_record.content="false"	;;
		t)	appconfig.do_record.filetype="true"	;;
		+t)	appconfig.do_record.filetype="false"	;;
		*)	usage ;;
	esac
done
shift $((OPTIND-1))


# argument prechecks
if (( $# == 0 )) ; then
	print -u2 -f "%s: Missing <path> argument.\n" "${progname}"
	exit 1
fi


print -u2 -f "# reading file names...\n"
while (( $# > 0 )) ; do
	# "ulimit -c 0" use used to force ksh93 to use a seperate process for subshells,
	# this is used to work around a bug with LC_ALL changes bleeding through subshells
	IFS=$'\n' ; typeset -a findls_lines=( $(ulimit -c 0 ; LC_ALL=C find "$1" -type f -ls) ) ; IFS=$' \t\n'
	shift
done


print -u2 -f "# building tree...\n"

${appconfig.do_benchmarking} && (( bench.start=SECONDS ))

for (( i=0 ; i < ${#findls_lines[@]} ; i++ )) ; do
	compound parseddata
	typeset treenodename
	
	# parse "find -ls" output
	parse_findls parseddata "${findls_lines[i]}"
	
	# add node to tree and return it's absolute name in "treenodename"
	add_file_to_tree filetree "${parseddata.filepath}" treenodename
	
	# merge parsed "find -ls" output into tree node
	nameref treenode="${treenodename}"
	treenode+=parseddata
	
	# extras (calculated from the existing values in "parseddata")
	typeset treenode.dirname="${ dirname "${treenode.filepath}" ; }"
	typeset treenode.basename="${ basename "${treenode.filepath}" ; }"
	
	if ${appconfig.do_record.filetype} ; then
		# Using /usr/(xpg4/)*/bin/file requires a |fork()|+|exec()| which makes the script a few hundred times slower... ;-(
		typeset treenode.filetype="$(file "${treenode.filepath}")"
	fi
	
	if ${appconfig.do_record.content} ; then
		if [[ -r "${treenode.filepath}" ]] ; then
			# We use an array of compound variables here to support
			# files with holes (and later alternative streams, too)
			compound -a treenode.content
			integer cl=0
			while \
				{
					treenode.content[${cl}]=(
						typeset type="data" # (todo: "add support for "holes" (sparse files))
						typeset -b bin
					)
					read -n1024 treenode.content[${cl}].bin
				} ; do
				(( cl++ ))
			done < "${treenode.filepath}"
			unset treenode.content[${cl}]

			typeset -A treenode.hashsum=(
				[md5]="$(sum -x md5 < "${treenode.filepath}")"
				[sha512]="$(sum -x sha512 < "${treenode.filepath}")"
			)
		
			# we do this for internal debugging only
			if [[ "${ {
					integer j
					for (( j=0 ; j < ${#treenode.content[@]} ; j++ )) ; do
						printf "%B" treenode.content[$j].bin
					done
				} | sum -x sha512 ; }" != "${treenode.hashsum[sha512]}" ]] ; then
				# this should never happen...
				print -u2 -f "fatal hash mismatch for %s\n" "${treenode.filepath}"
				unset treenode.content treenode.hashsum
			fi
		fi
	fi
done

${appconfig.do_benchmarking} && (( bench.stop=SECONDS ))


if ${appconfig.do_benchmarking} ; then
	# print benchmark data
	print -u2 -f "# time used: %f\n" $((bench.stop - bench.start))
fi

# print variable tree
print -v filetree

exit 0
# EOF.