2.11BSD/src/lib/libc/pdp/crt/uldiv.s

Compare this file to the similar file:
Show the results in this format:
/*
 * Program: uldiv.s
 * Copyright 1993, GTE Government Systems
 * Author:  Steven M. Schultz
 *
 *  Version	Date		Modification
 *	0.0	02Feb91		1. Initial inspiration struck.
 *	1.0	05Jun93		2. Released into the Public Domain.
*/

#include "DEFS.h"

/*
 * All routines have both a C interface and an assembly interface.  Normally
 * the two are the same.  In the case of 'ulsh' the compiler has placed one
 * of the operands in r0 and r1 so the assembly interface differs from the
 * C interface.
*/

/*
 * u_long uldiv(lhs, rhs)
 *	u_long	lhs, rhs;
 *
 * unsigned 32-bit "/" routine.  Calls to uldiv are generated automatically 
 * by the C compiler.
 */

#if !defined(KERNEL)
/*
 * uldiv for applications (uses floating point)
 */
	.globl l2f, l6f
	.globl uldiv
uldiv:
ENTRY(uldiv)
	jsr	pc,l2f		/ 2(sp) -> fr0
	jsr	pc,l6f		/ 6(sp) -> fr3
	tstf	fr3		/ check for zero divisor
	cfcc			/   don't want to have an FP fault
	beq	1f		/   in integer arithmetic
	divf	fr3,fr0		/ fr0 /= rhs
1:
	movfi	fr0,-(sp)
	mov	(sp)+,r0	/ return result
	mov	(sp)+,r1
	seti
	rts	pc
#else
/*
 * uldiv for the kernel (fixed point only - no FP)
 */

	.globl uldiv
uldiv:
ENTRY(uldiv)
	mov	r2,-(sp)	/ faster than csv/cret ...
	mov	r3,-(sp)
	mov	r4,-(sp)
	mov	14.(sp),r3	/ r3 = lo(rhs)
	bmi	slowuldiv	/  rhs >= 2^15
	tst	12.(sp)		/ hi(rhs) empty?
	bne	slowuldiv	/   no, rhs >= 2^16

	mov	10.(sp),r2	/ r2 = lo(lhs)
	mov	8.(sp),r1	/ r1 = hi(lhs)

	clr	r0		/ r0 = hi(lhs) / lo(rhs)
	div	r3,r0		/ r1 = hi(lhs) % lo(rhs)
	mov	r0,r4		/ save high quotient
	mov	r1,-(sp)	/ stash hi(tmp)
	mov	r1,r0		/ tmp=(hi(lhs)%lo(rhs))<<16 | lo(lhs)
	mov	r2,r1		/ (r0:r1 = tmp)
	div	r3,r0		/ r0 = tmp / lo(rhs)
	bvc	3f		/ done if tmp/lo(rhs) < 2^15

	mov	(sp),r0		/ reload r0:r1 with tmp (regs may be
	mov	r2,r1		/   clobbered by failed div)
	sub	r3,r0		/ r0:r1 -= 2^16 * lo(rhs)
	div	r3,r0
	tst	r1		/ if (negative) remainder, subtract one from
	sxt	r1		/   quotient
	add	r1,r0		/ cannot overflow!
3:
	tst	(sp)+		/ pop hi(tmp) off stack
	mov	r0,r1		/ r1 (lo(quo)) = tmp / lo(rhs)
	mov	r4,r0		/ r0 (hi(quo)) = hi(lhs) / lo(rhs)
9:
	mov	(sp)+,r4	/ restore registers
	mov	(sp)+,r3
	mov	(sp)+,r2
	rts	pc

/*
 * The divisor (rhs) is known to be >= 2^15 so we perform a shift and
 * subtract algorithm.  It's slow - feel free to improve it.
 *
 * The algorithm for signed divide broke down for unsigned operands, a slower
 * larger, more painful algorithm was implmented using scaling and
 * repetitive subraction/shifting.  Works best for large numbers (fewer
 * shifts that way).
 */
slowuldiv:
	mov	8.(sp),r0	/ r0 = hi(lhs)
	mov	10.(sp),r1	/ r1 = lo(lhs)
	mov	12.(sp),r2	/ r2 = hi(rhs)
				/ r3 = lo(rhs) - already done

	clr	r4		/ init scale of lhs
2:
	ashc	$1,r0
	blos	1f		/ check for zero at same time
	inc	r4
	br	2b
1:
	mov	r4,-(sp)	/ save scale of lhs
	clr	r4
2:
	asl	r3
	rol	r2
	bcs	1f
	inc	r4		/ bump rhs scale
	br	2b
1:
	clr	r0
	mov	$1,r1
	sub	(sp)+,r4	/ difference in scale (rhs - lhs)
	ashc	r4,r0		/ initial quotient adder
	mov	r1,-(sp)	/ quoadder lo
	mov	r0,-(sp)	/ quoadder hi
	mov	12.(sp),r0	/ r0 = hi(lhs)
	mov	14.(sp),r1	/ r1 = lo(lhs)
	mov	16.(sp),r2	/ r2 = hi(rhs)
	mov	18.(sp),r3	/ r3 = lo(rhs)

	ashc	r4,r2		/ scale rhs up for repetitive subtraction
	clr	r4		/ quo lo
	clr	-(sp)		/ quo hi
docmp:
	cmp	r2,r0
	bhi	noadd
	blo	dosub
	cmp	r3,r1
	bhi	noadd
dosub:
	sub	r3,r1
	sbc	r0
	sub	r2,r0
	add	4(sp),r4	/ quo lo += quoadder lo
	adc	(sp)		/ quo hi
	add	2(sp),(sp)	/ quo hi += quoadder hi
	br	docmp
noadd:
	clc			/ right shift rhs
	ror	r2
	ror	r3
	clc			/ right shift quotient adder
	ror	2(sp)
	ror	4(sp)
	bne	docmp		/ quo adder not 0 means more to do
	tst	2(sp)		
	bne	docmp
	mov	(sp)+,r0	/ quo hi
	mov	r4,r1		/ quo lo
	cmp	(sp)+,(sp)+	/ remove quot adder
	br	9b
#endif KERNEL

/*
 * u_long ualdiv(lhs, rhs)
 *	u_long	*lhs, rhs;
 *
 * 32-bit "/=" routine.  Calls to ualdiv are generated automatically by the C
 * compiler.
 */

	.globl	ualdiv
ualdiv:
ENTRY(ualdiv)
	mov	r2,-(sp)	/ need a register to point at the lhs
	mov	8.(sp),-(sp)	/ The divide algorithm is long
	mov	8.(sp),-(sp)	/   enough that it just doesn't make sense
	mov	8.(sp),r2	/   to bother repeating it.  We just translate
	mov	2(r2),-(sp)	/   the call for uldiv and let it do the work
	mov	(r2),-(sp)	/   and return its results (also stuffing it
	jsr	pc,uldiv	/   into *lhs)
	add	$8.,sp		/ clean up stack
	mov	r0,(r2)+	/ store high word,
	mov	r1,(r2)		/   and low
	mov	(sp)+,r2	/ restore r2
	rts	pc		/   and return