misc/060tsys/t28fpsp.s (3/3)
1 2 3

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_b		* yes

load_byte_cont:
	fmove.b	d0,fp0			* read into %fp0;convert to xprec
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC

	fbeq.w	load_byte_zero		* src op is a ZERO
	rts
load_byte_zero:
	move.b	#ZERO,STAG(a6)		* set optype tag to ZERO
	rts

load_byte_immed:
	bsr.l	_imem_read_word		* fetch src operand immed data

	tst.l	d1			* did ifetch fail?
	bne.l	funimp_iacc		* yes
	bra.b	load_byte_cont

*########################################
* load a SGL into %fp0:			#
* 	-number can't fault		#
*	(1) calc ea			#
*	(2) read 4 bytes into L_SCR1	#
*	(3) fmov.s into %fp0		#
*########################################
load_sgl:
	moveq.l	#$4,d0			* pass: 4 (bytes)
	bsr.l	_dcalc_ea		* calc <ea>; <ea> in %a0

	cmpi.b	#immed_flg,SPCOND_FLG(a6)
	beq.b	load_sgl_immed

	bsr.l	_dmem_read_long		* fetch src operand from memory
	move.l	d0,L_SCR1(a6)		* store src op on stack

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_l		* yes

load_sgl_cont:
	lea	L_SCR1(a6),a0		* pass: ptr to sgl src op
	bsr.l	set_tag_s		* determine src type tag
	move.b	d0,STAG(a6)		* save src optype tag on stack

	cmpi.b	#DENORM,d0		* is it a sgl DENORM?
	beq.w	get_sgl_denorm		* yes

	cmpi.b	#SNAN,d0		* is it a sgl SNAN?
	beq.w	get_sgl_snan		* yes

	fmove.s	L_SCR1(a6),fp0		* read into %fp0;convert to xprec
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	rts

load_sgl_immed:
	bsr.l	_imem_read_long		* fetch src operand immed data

	tst.l	d1			* did ifetch fail?
	bne.l	funimp_iacc		* yes
	bra.b	load_sgl_cont

* must convert sgl denorm format to an Xprec denorm fmt suitable for 
* normalization...
* %a0 : points to sgl denorm
get_sgl_denorm:
	clr.w	FP_SRC_EX(a6)
	bfextu	(a0){#9:#23},d0		* fetch sgl hi(_mantissa)
	lsl.l	#$8,d0
	move.l	d0,FP_SRC_HI(a6)	* set ext hi(_mantissa)
	clr.l	FP_SRC_LO(a6)		* set ext lo(_mantissa)

	clr.w	FP_SRC_EX(a6)
	btst	#$7,(a0)		* is sgn bit set?
	beq.b	sgl_dnrm_norm
	bset	#$7,FP_SRC_EX(a6)	* set sgn of xprec value

sgl_dnrm_norm:
	lea	FP_SRC(a6),a0
	bsr.l	norm			* normalize number
	move.w	#$3f81,d1		* xprec exp = 0x3f81
	sub.w	d0,d1			* exp = 0x3f81 - shft amt.
	or.w	d1,FP_SRC_EX(a6)	* {sgn,exp}

	move.b	#NORM,STAG(a6)		* fix src type tag
	rts

* convert sgl to ext SNAN
* %a0 : points to sgl SNAN
get_sgl_snan:
	move.w	#$7fff,FP_SRC_EX(a6)	* set exp of SNAN
	bfextu	(a0){#9:#23},d0
	lsl.l	#$8,d0			* extract and insert hi(man)
	move.l	d0,FP_SRC_HI(a6)
	clr.l	FP_SRC_LO(a6)

	btst	#$7,(a0)		* see if sign of SNAN is set
	beq.b	no_sgl_snan_sgn
	bset	#$7,FP_SRC_EX(a6)
no_sgl_snan_sgn:
	rts

*########################################
* load a DBL into %fp0:			#
* 	-number can't fault		#
*	(1) calc ea			#
*	(2) read 8 bytes into L_SCR(1,2)#
*	(3) fmov.d into %fp0		#
*########################################
load_dbl:
	moveq.l	#$8,d0			* pass: 8 (bytes)
	bsr.l	_dcalc_ea		* calc <ea>; <ea> in %a0

	cmpi.b	#immed_flg,SPCOND_FLG(a6)
	beq.b	load_dbl_immed

	lea	L_SCR1(a6),a1		* pass: ptr to input dbl tmp space
	moveq.l	#$8,d0			* pass: # bytes to read
	bsr.l	_dmem_read		* fetch src operand from memory

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_d		* yes

load_dbl_cont:
	lea	L_SCR1(a6),a0		* pass: ptr to input dbl
	bsr.l	set_tag_d		* determine src type tag
	move.b	d0,STAG(a6)		* set src optype tag

	cmpi.b	#DENORM,d0		* is it a dbl DENORM?
	beq.w	get_dbl_denorm		* yes

	cmpi.b	#SNAN,d0		* is it a dbl SNAN?
	beq.w	get_dbl_snan		* yes

	fmove.d	L_SCR1(a6),fp0		* read into %fp0;convert to xprec
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	rts

load_dbl_immed:
	lea	L_SCR1(a6),a1		* pass: ptr to input dbl tmp space
	moveq.l	#$8,d0			* pass: # bytes to read
	bsr.l	_imem_read		* fetch src operand from memory

	tst.l	d1			* did ifetch fail?
	bne.l	funimp_iacc		* yes
	bra.b	load_dbl_cont

* must convert dbl denorm format to an Xprec denorm fmt suitable for 
* normalization...
* %a0 : loc. of dbl denorm
get_dbl_denorm:
	clr.w	FP_SRC_EX(a6)
	bfextu	(a0){#12:#31},d0	* fetch hi(_mantissa)
	move.l	d0,FP_SRC_HI(a6)
	bfextu	4(a0){#11:#21},d0	* fetch lo(_mantissa)
	moveq.l	#$b,d1
	lsl.l	d1,d0
	move.l	d0,FP_SRC_LO(a6)

	btst	#$7,(a0)		* is sgn bit set?
	beq.b	dbl_dnrm_norm
	bset	#$7,FP_SRC_EX(a6)	* set sgn of xprec value

dbl_dnrm_norm:
	lea	FP_SRC(a6),a0
	bsr.l	norm			* normalize number
	move.w	#$3c01,d1		* xprec exp = 0x3c01
	sub.w	d0,d1			* exp = 0x3c01 - shft amt.
	or.w	d1,FP_SRC_EX(a6)	* {sgn,exp}

	move.b	#NORM,STAG(a6)		* fix src type tag
	rts

* convert dbl to ext SNAN
* %a0 : points to dbl SNAN
get_dbl_snan:
	move.w	#$7fff,FP_SRC_EX(a6)	* set exp of SNAN

	bfextu	(a0){#12:#31},d0	* fetch hi(_mantissa)
	move.l	d0,FP_SRC_HI(a6)
	bfextu	4(a0){#11:#21},d0	* fetch lo(_mantissa)
	moveq.l	#$b,d1
	lsl.l	d1,d0
	move.l	d0,FP_SRC_LO(a6)

	btst	#$7,(a0)		* see if sign of SNAN is set
	beq.b	no_dbl_snan_sgn
	bset	#$7,FP_SRC_EX(a6)
no_dbl_snan_sgn:
	rts

*################################################
* load a Xprec into %fp0:			#
* 	-number can't fault			#
*	(1) calc ea				#
*	(2) read 12 bytes into L_SCR(1,2)	#
*	(3) fmov.x into %fp0			#
*################################################
load_ext:
	moveq.l	#$c,d0			* pass: 12 (bytes)
	bsr.l	_dcalc_ea		* calc <ea>

	lea	FP_SRC(a6),a1		* pass: ptr to input ext tmp space
	moveq.l	#$c,d0			* pass: # of bytes to read
	bsr.l	_dmem_read		* fetch src operand from memory

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_x		* yes

	lea	FP_SRC(a6),a0		* pass: ptr to src op
	bsr.l	set_tag_x		* determine src type tag

	cmpi.b	#UNNORM,d0		* is the src op an UNNORM?
	beq.b	load_ext_unnorm		* yes

	move.b	d0,STAG(a6)		* store the src optype tag
	rts

load_ext_unnorm:
	bsr.l	unnorm_fix		* fix the src UNNORM
	move.b	d0,STAG(a6)		* store the src optype tag
	rts

*################################################
* load a packed into %fp0:			#
* 	-number can't fault			#
*	(1) calc ea				#
*	(2) read 12 bytes into L_SCR(1,2,3)	#
*	(3) fmov.x into %fp0			#
*################################################
load_packed:
	bsr.l	get_packed

	lea	FP_SRC(a6),a0		* pass ptr to src op
	bsr.l	set_tag_x		* determine src type tag
	cmpi.b	#UNNORM,d0		* is the src op an UNNORM ZERO?
	beq.b	load_packed_unnorm	* yes

	move.b	d0,STAG(a6)		* store the src optype tag
	rts

load_packed_unnorm:
	bsr.l	unnorm_fix		* fix the UNNORM ZERO
	move.b	d0,STAG(a6)		* store the src optype tag
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	fout(): move from fp register to memory or data register	#
*									#
* XREF ****************************************************************	#
*	_round() - needed to create EXOP for sgl/dbl precision		#
*	norm() - needed to create EXOP for extended precision		#
*	ovf_res() - create default overflow result for sgl/dbl precision#
*	unf_res() - create default underflow result for sgl/dbl prec.	#
*	dst_dbl() - create rounded dbl precision result.		#
*	dst_sgl() - create rounded sgl precision result.		#
*	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
*	bindec() - convert FP binary number to packed number.		#
*	_mem_write() - write data to memory.				#
*	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
*	_dmem_write_{byte,word,long}() - write data to memory.		#
*	store_dreg_{b,w,l}() - store data to data register file.	#
*	facc_out_{b,w,l,d,x}() - data access error occurred.		#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0 = round prec,mode						#
* 									#
* OUTPUT **************************************************************	#
*	fp0 : intermediate underflow or overflow result if		#
*	      OVFL/UNFL occurred for a sgl or dbl operand		#
*									#
* ALGORITHM ***********************************************************	#
*	This routine is accessed by many handlers that need to do an	#
* opclass three move of an operand out to memory.			#
*	Decode an fmove out (opclass 3) instruction to determine if	#
* it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
* register or memory. The algorithm uses a standard "fmove" to create	#
* the rounded result. Also, since exceptions are disabled, this also	#
* create the correct OPERR default result if appropriate.		#
*	For sgl or dbl precision, overflow or underflow can occur. If	#
* either occurs and is enabled, the EXOP.				#
*	For extended precision, the stacked <ea> must be fixed along	#
* w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
* the source is a denorm and if underflow is enabled, an EXOP must be	#
* created.								#
* 	For packed, the k-factor must be fetched from the instruction	#
* word or a data register. The <ea> must be fixed as w/ extended 	#
* precision. Then, bindec() is called to create the appropriate 	#
* packed result.							#
*	If at any time an access error is flagged by one of the move-	#
* to-memory routines, then a special exit must be made so that the	#
* access error can be handled properly.					#
*									#
*########################################################################

	global	fout
fout:
	bfextu	EXC_CMDREG(a6){#3:#3},d1	* extract dst fmt
	move.w	(tbl_fout.b,pc,d1.w*2),a1	* use as index
	jmp	(tbl_fout.b,pc,a1.l)	* jump to routine

	.dc.w	$4AFC,$8
tbl_fout:
	.dc.w	fout_long-tbl_fout
	.dc.w	fout_sgl-tbl_fout
	.dc.w	fout_ext-tbl_fout
	.dc.w	fout_pack-tbl_fout
	.dc.w	fout_word-tbl_fout
	.dc.w	fout_dbl-tbl_fout
	.dc.w	fout_byte-tbl_fout
	.dc.w	fout_pack-tbl_fout

*################################################################
* fmove.b out ###################################################
*################################################################

* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_byte:
	tst.b	STAG(a6)		* is operand normalized?
	bne.b	fout_byte_denorm	* no

	fmovem.x	SRC.w(a0),fp0	* load value

fout_byte_norm:
	fmove.l	d0,fpcr			* insert rnd prec,mode

	fmove.b	fp0,d0			* exec move out w/ correct rnd mode

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* fetch FPSR
	or.w	d1,2+USER_FPSR(a6)	* save new exc,accrued bits

	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_byte_dn		* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_byte	* write byte

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_b		* yes

	rts

fout_byte_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_b
	rts

fout_byte_denorm:
	move.l	SRC_EX.w(a0),d1
	andi.l	#$80000000,d1		* keep DENORM sign
	ori.l	#$00800000,d1		* make smallest sgl
	fmove.s	d1,fp0
	bra.b	fout_byte_norm

*################################################################
* fmove.w out ###################################################
*################################################################

* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_word:
	tst.b	STAG(a6)		* is operand normalized?
	bne.b	fout_word_denorm	* no

	fmovem.x	SRC.w(a0),fp0	* load value

fout_word_norm:
	fmove.l	d0,fpcr			* insert rnd prec:mode

	fmove.w	fp0,d0			* exec move out w/ correct rnd mode

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* fetch FPSR
	or.w	d1,2+USER_FPSR(a6)	* save new exc,accrued bits

	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_word_dn		* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_word	* write word

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_w		* yes

	rts

fout_word_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_w
	rts

fout_word_denorm:
	move.l	SRC_EX.w(a0),d1
	andi.l	#$80000000,d1		* keep DENORM sign
	ori.l	#$00800000,d1		* make smallest sgl
	fmove.s	d1,fp0
	bra.b	fout_word_norm

*################################################################
* fmove.l out ###################################################
*################################################################

* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_long:
	tst.b	STAG(a6)		* is operand normalized?
	bne.b	fout_long_denorm	* no

	fmovem.x	SRC.w(a0),fp0	* load value

fout_long_norm:
	fmove.l	d0,fpcr			* insert rnd prec:mode

	fmove.l	fp0,d0			* exec move out w/ correct rnd mode

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* fetch FPSR
	or.w	d1,2+USER_FPSR(a6)	* save new exc,accrued bits

fout_long_write:
	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_long_dn		* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_long	* write long

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_l		* yes

	rts

fout_long_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_l
	rts

fout_long_denorm:
	move.l	SRC_EX.w(a0),d1
	andi.l	#$80000000,d1		* keep DENORM sign
	ori.l	#$00800000,d1		* make smallest sgl
	fmove.s	d1,fp0
	bra.b	fout_long_norm

*################################################################
* fmove.x out ###################################################
*################################################################

* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
* The DENORM causes an Underflow exception.
fout_ext:

* we copy the extended precision result to FP_SCR0 so that the reserved
* 16-bit field gets zeroed. we do this since we promise not to disturb
* what's at SRC(a0).
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	clr.w	2+FP_SCR0_EX(a6)	* clear reserved field
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	fmovem.x	SRC.w(a0),fp0	* return result

	bsr.l	_calc_ea_fout		* fix stacked <ea>

	move.l	a0,a1			* pass: dst addr
	lea	FP_SCR0(a6),a0		* pass: src addr
	moveq.l	#$c,d0			* pass: opsize is 12 bytes

* we must not yet write the extended precision data to the stack
* in the pre-decrement case from supervisor mode or else we'll corrupt 
* the stack frame. so, leave it in FP_SRC for now and deal with it later...
	cmpi.b	#mda7_flg,SPCOND_FLG(a6)
	beq.b	fout_ext_a7

	bsr.l	_dmem_write		* write ext prec number to memory

	tst.l	d1			* did dstore fail?
	bne.w	fout_ext_err		* yes

	tst.b	STAG(a6)		* is operand normalized?
	bne.b	fout_ext_denorm		* no
	rts

* the number is a DENORM. must set the underflow exception bit
fout_ext_denorm:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set underflow exc bit

	move.b	FPCR_ENABLE(a6),d0
	andi.b	#$0a,d0			* is UNFL or INEX enabled?
	bne.b	fout_ext_exc		* yes
	rts

* we don't want to do the write if the exception occurred in supervisor mode
* so _mem_write2() handles this for us.
fout_ext_a7:
	bsr.l	_mem_write2		* write ext prec number to memory

	tst.l	d1			* did dstore fail?
	bne.w	fout_ext_err		* yes

	tst.b	STAG(a6)		* is operand normalized?
	bne.b	fout_ext_denorm		* no
	rts

fout_ext_exc:
	lea	FP_SCR0(a6),a0
	bsr.l	norm			* normalize the mantissa
	neg.w	d0			* new exp = -(shft amt)
	andi.w	#$7fff,d0
	andi.w	#$8000,FP_SCR0_EX(a6)	* keep only old sign
	or.w	d0,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

fout_ext_err:
	move.l	EXC_A6(a6),(a6)		* fix stacked a6
	bra.l	facc_out_x

*########################################################################
* fmove.s out ###########################################################
*########################################################################
fout_sgl:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl prec
	move.l	d0,L_SCR3(a6)		* save rnd prec,mode on stack

*
* operand is a normalized number. first, we check to see if the move out
* would cause either an underflow or overflow. these cases are handled
* separately. otherwise, set the FPCR to the proper rounding mode and
* execute the move.
*
	move.w	SRC_EX.w(a0),d0		* extract exponent
	andi.w	#$7fff,d0		* strip sign

	cmpi.w	#SGL_HI,d0		* will operand overflow?
	bgt.w	fout_sgl_ovfl		* yes; go handle OVFL
	beq.w	fout_sgl_may_ovfl	* maybe; go handle possible OVFL
	cmpi.w	#SGL_LO,d0		* will operand underflow?
	blt.w	fout_sgl_unfl		* yes; go handle underflow

*
* NORMs(in range) can be stored out by a simple "fmov.s"
* Unnormalized inputs can come through this point.
*
fout_sgl_exg:
	fmovem.x	SRC.w(a0),fp0	* fetch fop from stack

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmove.s	fp0,d0			* store does convert and round

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save FPSR

	or.w	d1,2+USER_FPSR(a6)	* set possible inex2/ainex

fout_sgl_exg_write:
	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_sgl_exg_write_dn	* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_long	* write long

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_l		* yes

	rts

fout_sgl_exg_write_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_l
	rts

*
* here, we know that the operand would UNFL if moved out to single prec,
* so, denorm and round and then use generic store single routine to
* write the value to memory.
*
fout_sgl_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set UNFL

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.l	a0,-(sp)

	clr.l	d0			* pass: S.F. = 0

	cmpi.b	#DENORM,STAG(a6)	* fetch src optype tag
	bne.b	fout_sgl_unfl_cont	* let DENORMs fall through

	lea	FP_SCR0(a6),a0
	bsr.l	norm			* normalize the DENORM

fout_sgl_unfl_cont:
	lea	FP_SCR0(a6),a0		* pass: ptr to operand
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calc default underflow result

	lea	FP_SCR0(a6),a0		* pass: ptr to fop
	bsr.l	dst_sgl			* convert to single prec

	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_sgl_unfl_dn	* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_long	* write long

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_l		* yes

	bra.b	fout_sgl_unfl_chkexc

fout_sgl_unfl_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_l

fout_sgl_unfl_chkexc:
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0a,d1			* is UNFL or INEX enabled?
	bne.w	fout_sd_exc_unfl	* yes
	addq.l	#$4,sp
	rts

*
* it's definitely an overflow so call ovf_res to get the correct answer
*
fout_sgl_ovfl:
	tst.b	3+SRC_HI(a0)		* is result inexact?
	bne.b	fout_sgl_ovfl_inex2
	tst.l	SRC_LO(a0)		* is result inexact?
	bne.b	fout_sgl_ovfl_inex2
	ori.w	#ovfl_inx_mask,2+USER_FPSR(a6)	* set ovfl/aovfl/ainex
	bra.b	fout_sgl_ovfl_cont
fout_sgl_ovfl_inex2:
	ori.w	#ovfinx_mask,2+USER_FPSR(a6)	* set ovfl/aovfl/ainex/inex2

fout_sgl_ovfl_cont:
	move.l	a0,-(sp)

* call ovf_res() w/ sgl prec and the correct rnd mode to create the default
* overflow result. DON'T save the returned ccodes from ovf_res() since
* fmove out doesn't alter them. 
	tst.b	SRC_EX.w(a0)		* is operand negative?
	smi	d1			* set if so
	move.l	L_SCR3(a6),d0		* pass: sgl prec,rnd mode
	bsr.l	ovf_res			* calc OVFL result
	fmovem.x	(a0),fp0	* load default overflow result
	fmove.s	fp0,d0			* store to single

	move.b	1+EXC_OPWORD(a6),d1	* extract dst mode
	andi.b	#$38,d1			* is mode == 0? (Dreg dst)
	beq.b	fout_sgl_ovfl_dn	* must save to integer regfile

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	bsr.l	_dmem_write_long	* write long

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_l		* yes

	bra.b	fout_sgl_ovfl_chkexc

fout_sgl_ovfl_dn:
	move.b	1+EXC_OPWORD(a6),d1	* extract Dn
	andi.w	#$7,d1
	bsr.l	store_dreg_l

fout_sgl_ovfl_chkexc:
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0a,d1			* is UNFL or INEX enabled?
	bne.w	fout_sd_exc_ovfl	* yes
	addq.l	#$4,sp
	rts

*
* move out MAY overflow:
* (1) force the exp to 0x3fff
* (2) do a move w/ appropriate rnd mode
* (3) if exp still equals zero, then insert original exponent
*	for the correct result.
*     if exp now equals one, then it overflowed so call ovf_res.
*
fout_sgl_may_ovfl:
	move.w	SRC_EX.w(a0),d1		* fetch current sign
	andi.w	#$8000,d1		* keep it,clear exp
	ori.w	#$3fff,d1		* insert exp = 0
	move.w	d1,FP_SCR0_EX(a6)	* insert scaled exp
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)	* copy hi(man)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)	* copy lo(man)

	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fmove.x	FP_SCR0(a6),fp0		* force fop to be rounded
	fmove.l	#$0,fpcr		* clear FPCR

	fabs.x	fp0			* need absolute value
	fcmp.b	#$2,fp0			* did exponent increase?
	fblt.w	fout_sgl_exg		* no; go finish NORM	
	bra.w	fout_sgl_ovfl		* yes; go handle overflow

*###############

fout_sd_exc_unfl:
	move.l	(sp)+,a0

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	cmpi.b	#DENORM,STAG(a6)	* was src a DENORM?
	bne.b	fout_sd_exc_cont	* no

	lea	FP_SCR0(a6),a0
	bsr.l	norm
	neg.l	d0
	andi.w	#$7fff,d0
	bfins	d0,FP_SCR0_EX(a6){#1:#15}
	bra.b	fout_sd_exc_cont

fout_sd_exc:
fout_sd_exc_ovfl:
	move.l	(sp)+,a0		* restore a0

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

fout_sd_exc_cont:
	bclr	#$7,FP_SCR0_EX(a6)	* clear sign bit
	sne.b	2+FP_SCR0_EX(a6)	* set internal sign bit
	lea	FP_SCR0(a6),a0		* pass: ptr to DENORM

	move.b	3+L_SCR3(a6),d1
	lsr.b	#$4,d1
	andi.w	#$0c,d1
	swap	d1
	move.b	3+L_SCR3(a6),d1
	lsr.b	#$4,d1
	andi.w	#$03,d1
	clr.l	d0			* pass: zero g,r,s
	bsr.l	_round			* round the DENORM

	tst.b	2+FP_SCR0_EX(a6)	* is EXOP negative?
	beq.b	fout_sd_exc_done	* no
	bset	#$7,FP_SCR0_EX(a6)	* yes

fout_sd_exc_done:
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

*################################################################
* fmove.d out ###################################################
*################################################################
fout_dbl:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl prec
	move.l	d0,L_SCR3(a6)		* save rnd prec,mode on stack

*
* operand is a normalized number. first, we check to see if the move out
* would cause either an underflow or overflow. these cases are handled
* separately. otherwise, set the FPCR to the proper rounding mode and
* execute the move.
*
	move.w	SRC_EX.w(a0),d0		* extract exponent
	andi.w	#$7fff,d0		* strip sign

	cmpi.w	#DBL_HI,d0		* will operand overflow?
	bgt.w	fout_dbl_ovfl		* yes; go handle OVFL
	beq.w	fout_dbl_may_ovfl	* maybe; go handle possible OVFL
	cmpi.w	#DBL_LO,d0		* will operand underflow?
	blt.w	fout_dbl_unfl		* yes; go handle underflow

*
* NORMs(in range) can be stored out by a simple "fmov.d"
* Unnormalized inputs can come through this point.
*
fout_dbl_exg:
	fmovem.x	SRC.w(a0),fp0	* fetch fop from stack

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmove.d	fp0,L_SCR1(a6)		* store does convert and round

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d0			* save FPSR

	or.w	d0,2+USER_FPSR(a6)	* set possible inex2/ainex

	move.l	EXC_EA(a6),a1		* pass: dst addr
	lea	L_SCR1(a6),a0		* pass: src addr
	moveq.l	#$8,d0			* pass: opsize is 8 bytes
	bsr.l	_dmem_write		* store dbl fop to memory

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_d		* yes

	rts				* no; so we're finished	

*
* here, we know that the operand would UNFL if moved out to double prec,
* so, denorm and round and then use generic store double routine to
* write the value to memory.
*
fout_dbl_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set UNFL

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.l	a0,-(sp)

	clr.l	d0			* pass: S.F. = 0

	cmpi.b	#DENORM,STAG(a6)	* fetch src optype tag
	bne.b	fout_dbl_unfl_cont	* let DENORMs fall through

	lea	FP_SCR0(a6),a0
	bsr.l	norm			* normalize the DENORM

fout_dbl_unfl_cont:
	lea	FP_SCR0(a6),a0		* pass: ptr to operand
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calc default underflow result

	lea	FP_SCR0(a6),a0		* pass: ptr to fop
	bsr.l	dst_dbl			* convert to single prec
	move.l	d0,L_SCR1(a6)
	move.l	d1,L_SCR2(a6)

	move.l	EXC_EA(a6),a1		* pass: dst addr
	lea	L_SCR1(a6),a0		* pass: src addr
	moveq.l	#$8,d0			* pass: opsize is 8 bytes
	bsr.l	_dmem_write		* store dbl fop to memory

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_d		* yes

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0a,d1			* is UNFL or INEX enabled?
	bne.w	fout_sd_exc_unfl	* yes
	addq.l	#$4,sp
	rts

*
* it's definitely an overflow so call ovf_res to get the correct answer
*
fout_dbl_ovfl:
	move.w	2+SRC_LO(a0),d0
	andi.w	#$7ff,d0
	bne.b	fout_dbl_ovfl_inex2

	ori.w	#ovfl_inx_mask,2+USER_FPSR(a6)	* set ovfl/aovfl/ainex
	bra.b	fout_dbl_ovfl_cont
fout_dbl_ovfl_inex2:
	ori.w	#ovfinx_mask,2+USER_FPSR(a6)	* set ovfl/aovfl/ainex/inex2

fout_dbl_ovfl_cont:
	move.l	a0,-(sp)

* call ovf_res() w/ dbl prec and the correct rnd mode to create the default
* overflow result. DON'T save the returned ccodes from ovf_res() since
* fmove out doesn't alter them. 
	tst.b	SRC_EX.w(a0)		* is operand negative?
	smi	d1			* set if so
	move.l	L_SCR3(a6),d0		* pass: dbl prec,rnd mode
	bsr.l	ovf_res			* calc OVFL result
	fmovem.x	(a0),fp0	* load default overflow result
	fmove.d	fp0,L_SCR1(a6)		* store to double

	move.l	EXC_EA(a6),a1		* pass: dst addr
	lea	L_SCR1(a6),a0		* pass: src addr
	moveq.l	#$8,d0			* pass: opsize is 8 bytes
	bsr.l	_dmem_write		* store dbl fop to memory

	tst.l	d1			* did dstore fail?
	bne.l	facc_out_d		* yes

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0a,d1			* is UNFL or INEX enabled?
	bne.w	fout_sd_exc_ovfl	* yes
	addq.l	#$4,sp
	rts

*
* move out MAY overflow:
* (1) force the exp to 0x3fff
* (2) do a move w/ appropriate rnd mode
* (3) if exp still equals zero, then insert original exponent
*	for the correct result.
*     if exp now equals one, then it overflowed so call ovf_res.
*
fout_dbl_may_ovfl:
	move.w	SRC_EX.w(a0),d1		* fetch current sign
	andi.w	#$8000,d1		* keep it,clear exp
	ori.w	#$3fff,d1		* insert exp = 0
	move.w	d1,FP_SCR0_EX(a6)	* insert scaled exp
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)	* copy hi(man)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)	* copy lo(man)

	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fmove.x	FP_SCR0(a6),fp0		* force fop to be rounded
	fmove.l	#$0,fpcr		* clear FPCR

	fabs.x	fp0			* need absolute value
	fcmp.b	#$2,fp0			* did exponent increase?
	fblt.w	fout_dbl_exg		* no; go finish NORM	
	bra.w	fout_dbl_ovfl		* yes; go handle overflow

*########################################################################
* XDEF ****************************************************************	#
* 	dst_dbl(): create double precision value from extended prec.	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to source operand in extended precision		#
* 									#
* OUTPUT **************************************************************	#
*	d0 = hi(double precision result)				#
*	d1 = lo(double precision result)				#
*									#
* ALGORITHM ***********************************************************	#
*									#
*  Changes extended precision to double precision.			#
*  Note: no attempt is made to round the extended value to double.	#
*	dbl_sign = ext_sign						#
*	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
*	get rid of ext integer bit					#
*	dbl_mant = ext_mant{62:12}					#
*									#
*	    	---------------   ---------------    ---------------	#
*  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
*	    	---------------   ---------------    ---------------	#
*	   	 95	    64    63 62	      32      31     11	  0	#
*				     |			     |		#
*				     |			     |		#
*				     |			     |		#
*		 	             v   		     v		#
*	    		      ---------------   ---------------		#
*  double   ->  	      |s|exp| mant  |   |  mant       |		#
*	    		      ---------------   ---------------		#
*	   	 	      63     51   32   31	       0	#
*									#
*########################################################################

dst_dbl:
	clr.l	d0			* clear d0
	move.w	FTEMP_EX.w(a0),d0	* get exponent
	subi.w	#EXT_BIAS,d0		* subtract extended precision bias
	addi.w	#DBL_BIAS,d0		* add double precision bias
	tst.b	FTEMP_HI(a0)		* is number a denorm?
	bmi.b	dst_get_dupper		* no
	subq.w	#$1,d0			* yes; denorm bias = DBL_BIAS - 1
dst_get_dupper:
	swap	d0			* d0 now in upper word
	lsl.l	#$4,d0			* d0 in proper place for dbl prec exp
	tst.b	FTEMP_EX.w(a0)		* test sign
	bpl.b	dst_get_dman		* if postive, go process mantissa
	bset	#$1f,d0			* if negative, set sign
dst_get_dman:
	move.l	FTEMP_HI(a0),d1		* get ms mantissa
	bfextu	d1{#1:#20},d1		* get upper 20 bits of ms
	or.l	d1,d0			* put these bits in ms word of double
	move.l	d0,L_SCR1(a6)		* put the new exp back on the stack
	move.l	FTEMP_HI(a0),d1		* get ms mantissa
	moveq.l	#21,d0			* load shift count
	lsl.l	d0,d1			* put lower 11 bits in upper bits
	move.l	d1,L_SCR2(a6)		* build lower lword in memory
	move.l	FTEMP_LO(a0),d1		* get ls mantissa
	bfextu	d1{#0:#21},d0		* get ls 21 bits of double
	move.l	L_SCR2(a6),d1
	or.l	d0,d1			* put them in double result
	move.l	L_SCR1(a6),d0
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	dst_sgl(): create single precision value from extended prec	#
*									#
* XREF ****************************************************************	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to source operand in extended precision		#
* 									#
* OUTPUT **************************************************************	#
*	d0 = single precision result					#
*									#
* ALGORITHM ***********************************************************	#
*									#
* Changes extended precision to single precision.			#
*	sgl_sign = ext_sign						#
*	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
*	get rid of ext integer bit					#
*	sgl_mant = ext_mant{62:12}					#
*									#
*	    	---------------   ---------------    ---------------	#
*  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
*	    	---------------   ---------------    ---------------	#
*	   	 95	    64    63 62	   40 32      31     12	  0	#
*				     |	   |				#
*				     |	   |				#
*				     |	   |				#
*		 	             v     v				#
*	    		      ---------------				#
*  single   ->  	      |s|exp| mant  |				#
*	    		      ---------------				#
*	   	 	      31     22     0				#
*									#
*########################################################################

dst_sgl:
	clr.l	d0
	move.w	FTEMP_EX.w(a0),d0	* get exponent
	subi.w	#EXT_BIAS,d0		* subtract extended precision bias
	addi.w	#SGL_BIAS,d0		* add single precision bias
	tst.b	FTEMP_HI(a0)		* is number a denorm?
	bmi.b	dst_get_supper		* no
	subq.w	#$1,d0			* yes; denorm bias = SGL_BIAS - 1
dst_get_supper:
	swap	d0			* put exp in upper word of d0
	lsl.l	#$7,d0			* shift it into single exp bits
	tst.b	FTEMP_EX.w(a0)		* test sign
	bpl.b	dst_get_sman		* if positive, continue
	bset	#$1f,d0			* if negative, put in sign first
dst_get_sman:
	move.l	FTEMP_HI(a0),d1		* get ms mantissa
	andi.l	#$7fffff00,d1		* get upper 23 bits of ms
	lsr.l	#$8,d1			* and put them flush right
	or.l	d1,d0			* put these bits in ms word of single
	rts

*#############################################################################
fout_pack:
	bsr.l	_calc_ea_fout		* fetch the <ea>
	move.l	a0,-(sp)

	move.b	STAG(a6),d0		* fetch input type
	bne.w	fout_pack_not_norm	* input is not NORM

fout_pack_norm:
	btst	#$4,EXC_CMDREG(a6)	* static or dynamic?
	beq.b	fout_pack_s		* static

fout_pack_d:
	move.b	1+EXC_CMDREG(a6),d1	* fetch dynamic reg
	lsr.b	#$4,d1
	andi.w	#$7,d1

	bsr.l	fetch_dreg		* fetch Dn w/ k-factor

	bra.b	fout_pack_type
fout_pack_s:
	move.b	1+EXC_CMDREG(a6),d0	* fetch static field

fout_pack_type:
	bfexts	d0{#25:#7},d0		* extract k-factor
	move.l	d0,-(sp)

	lea	FP_SRC(a6),a0		* pass: ptr to input

* bindec is currently scrambling FP_SRC for denorm inputs.
* we'll have to change this, but for now, tough luck!!!
	bsr.l	bindec			* convert xprec to packed

*	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
	andi.l	#$cffff00f,FP_SCR0(a6)		* clear unused fields

	move.l	(sp)+,d0

	tst.b	3+FP_SCR0_EX(a6)
	bne.b	fout_pack_set
	tst.l	FP_SCR0_HI(a6)
	bne.b	fout_pack_set
	tst.l	FP_SCR0_LO(a6)
	bne.b	fout_pack_set

* add the extra condition that only if the k-factor was zero, too, should
* we zero the exponent
	tst.l	d0
	bne.b	fout_pack_set
* "mantissa" is all zero which means that the answer is zero. but, the '040
* algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
* if the mantissa is zero, I will zero the exponent, too.
* the question now is whether the exponents sign bit is allowed to be non-zero
* for a zero, also...
	andi.w	#$f000,FP_SCR0(a6)

fout_pack_set:

	lea	FP_SCR0(a6),a0		* pass: src addr

fout_pack_write:
	move.l	(sp)+,a1		* pass: dst addr
	moveq.l	#$c,d0			* pass: opsize is 12 bytes

	cmpi.b	#mda7_flg,SPCOND_FLG(a6)
	beq.b	fout_pack_a7

	bsr.l	_dmem_write		* write ext prec number to memory

	tst.l	d1			* did dstore fail?
	bne.w	fout_ext_err		* yes

	rts

* we don't want to do the write if the exception occurred in supervisor mode
* so _mem_write2() handles this for us.
fout_pack_a7:
	bsr.l	_mem_write2		* write ext prec number to memory

	tst.l	d1			* did dstore fail?
	bne.w	fout_ext_err		* yes

	rts

fout_pack_not_norm:
	cmpi.b	#DENORM,d0		* is it a DENORM?
	beq.w	fout_pack_norm		* yes
	lea	FP_SRC(a6),a0
	clr.w	2+FP_SRC_EX(a6)
	cmpi.b	#SNAN,d0		* is it an SNAN?
	beq.b	fout_pack_snan		* yes
	bra.b	fout_pack_write		* no

fout_pack_snan:
	ori.w	#snaniop2_mask,FPSR_EXCEPT(a6)	* set SNAN/AIOP
	bset	#$6,FP_SRC_HI(a6)	* set snan bit
	bra.b	fout_pack_write

*########################################################################
* XDEF ****************************************************************	#
*	fetch_dreg(): fetch register according to index in d1		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d1 = index of register to fetch from				#
* 									#
* OUTPUT **************************************************************	#
*	d0 = value of register fetched					#
*									#
* ALGORITHM ***********************************************************	#
*	According to the index value in d1 which can range from zero 	#
* to fifteen, load the corresponding register file value (where 	#
* address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
* stack. The rest should still be in their original places.		#
*									#
*########################################################################

* this routine leaves d1 intact for subsequent store_dreg calls.
	global	fetch_dreg
fetch_dreg:
	move.w	(tbl_fdreg.b,pc,d1.w*2),d0
	jmp	(tbl_fdreg.b,pc,d0.w*1)

tbl_fdreg:
	.dc.w	fdreg0-tbl_fdreg
	.dc.w	fdreg1-tbl_fdreg
	.dc.w	fdreg2-tbl_fdreg
	.dc.w	fdreg3-tbl_fdreg
	.dc.w	fdreg4-tbl_fdreg
	.dc.w	fdreg5-tbl_fdreg
	.dc.w	fdreg6-tbl_fdreg
	.dc.w	fdreg7-tbl_fdreg
	.dc.w	fdreg8-tbl_fdreg
	.dc.w	fdreg9-tbl_fdreg
	.dc.w	fdrega-tbl_fdreg
	.dc.w	fdregb-tbl_fdreg
	.dc.w	fdregc-tbl_fdreg
	.dc.w	fdregd-tbl_fdreg
	.dc.w	fdrege-tbl_fdreg
	.dc.w	fdregf-tbl_fdreg

fdreg0:
	move.l	EXC_DREGS+$0(a6),d0
	rts
fdreg1:
	move.l	EXC_DREGS+$4(a6),d0
	rts
fdreg2:
	move.l	d2,d0
	rts
fdreg3:
	move.l	d3,d0
	rts
fdreg4:
	move.l	d4,d0
	rts
fdreg5:
	move.l	d5,d0
	rts
fdreg6:
	move.l	d6,d0
	rts
fdreg7:
	move.l	d7,d0
	rts
fdreg8:
	move.l	EXC_DREGS+$8(a6),d0
	rts
fdreg9:
	move.l	EXC_DREGS+$c(a6),d0
	rts
fdrega:
	move.l	a2,d0
	rts
fdregb:
	move.l	a3,d0
	rts
fdregc:
	move.l	a4,d0
	rts
fdregd:
	move.l	a5,d0
	rts
fdrege:
	move.l	(a6),d0
	rts
fdregf:
	move.l	EXC_A7(a6),d0
	rts

*########################################################################
* XDEF ****************************************************************	#
*	store_dreg_l(): store longword to data register specified by d1	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = longowrd value to store					#
*	d1 = index of register to fetch from				#
* 									#
* OUTPUT **************************************************************	#
*	(data register is updated)					#
*									#
* ALGORITHM ***********************************************************	#
*	According to the index value in d1, store the longword value	#
* in d0 to the corresponding data register. D0/D1 are on the stack	#
* while the rest are in their initial places.				#
*									#
*########################################################################

	global	store_dreg_l
store_dreg_l:
	move.w	(tbl_sdregl.b,pc,d1.w*2),d1
	jmp	(tbl_sdregl.b,pc,d1.w*1)

tbl_sdregl:
	.dc.w	sdregl0-tbl_sdregl
	.dc.w	sdregl1-tbl_sdregl
	.dc.w	sdregl2-tbl_sdregl
	.dc.w	sdregl3-tbl_sdregl
	.dc.w	sdregl4-tbl_sdregl
	.dc.w	sdregl5-tbl_sdregl
	.dc.w	sdregl6-tbl_sdregl
	.dc.w	sdregl7-tbl_sdregl

sdregl0:
	move.l	d0,EXC_DREGS+$0(a6)
	rts
sdregl1:
	move.l	d0,EXC_DREGS+$4(a6)
	rts
sdregl2:
	move.l	d0,d2
	rts
sdregl3:
	move.l	d0,d3
	rts
sdregl4:
	move.l	d0,d4
	rts
sdregl5:
	move.l	d0,d5
	rts
sdregl6:
	move.l	d0,d6
	rts
sdregl7:
	move.l	d0,d7
	rts

*########################################################################
* XDEF ****************************************************************	#
*	store_dreg_w(): store word to data register specified by d1	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = word value to store					#
*	d1 = index of register to fetch from				#
* 									#
* OUTPUT **************************************************************	#
*	(data register is updated)					#
*									#
* ALGORITHM ***********************************************************	#
*	According to the index value in d1, store the word value	#
* in d0 to the corresponding data register. D0/D1 are on the stack	#
* while the rest are in their initial places.				#
*									#
*########################################################################

	global	store_dreg_w
store_dreg_w:
	move.w	(tbl_sdregw.b,pc,d1.w*2),d1
	jmp	(tbl_sdregw.b,pc,d1.w*1)

tbl_sdregw:
	.dc.w	sdregw0-tbl_sdregw
	.dc.w	sdregw1-tbl_sdregw
	.dc.w	sdregw2-tbl_sdregw
	.dc.w	sdregw3-tbl_sdregw
	.dc.w	sdregw4-tbl_sdregw
	.dc.w	sdregw5-tbl_sdregw
	.dc.w	sdregw6-tbl_sdregw
	.dc.w	sdregw7-tbl_sdregw

sdregw0:
	move.w	d0,2+EXC_DREGS+$0(a6)
	rts
sdregw1:
	move.w	d0,2+EXC_DREGS+$4(a6)
	rts
sdregw2:
	move.w	d0,d2
	rts
sdregw3:
	move.w	d0,d3
	rts
sdregw4:
	move.w	d0,d4
	rts
sdregw5:
	move.w	d0,d5
	rts
sdregw6:
	move.w	d0,d6
	rts
sdregw7:
	move.w	d0,d7
	rts

*########################################################################
* XDEF ****************************************************************	#
*	store_dreg_b(): store byte to data register specified by d1	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = byte value to store					#
*	d1 = index of register to fetch from				#
* 									#
* OUTPUT **************************************************************	#
*	(data register is updated)					#
*									#
* ALGORITHM ***********************************************************	#
*	According to the index value in d1, store the byte value	#
* in d0 to the corresponding data register. D0/D1 are on the stack	#
* while the rest are in their initial places.				#
*									#
*########################################################################

	global	store_dreg_b
store_dreg_b:
	move.w	(tbl_sdregb.b,pc,d1.w*2),d1
	jmp	(tbl_sdregb.b,pc,d1.w*1)

tbl_sdregb:
	.dc.w	sdregb0-tbl_sdregb
	.dc.w	sdregb1-tbl_sdregb
	.dc.w	sdregb2-tbl_sdregb
	.dc.w	sdregb3-tbl_sdregb
	.dc.w	sdregb4-tbl_sdregb
	.dc.w	sdregb5-tbl_sdregb
	.dc.w	sdregb6-tbl_sdregb
	.dc.w	sdregb7-tbl_sdregb

sdregb0:
	move.b	d0,3+EXC_DREGS+$0(a6)
	rts
sdregb1:
	move.b	d0,3+EXC_DREGS+$4(a6)
	rts
sdregb2:
	move.b	d0,d2
	rts
sdregb3:
	move.b	d0,d3
	rts
sdregb4:
	move.b	d0,d4
	rts
sdregb5:
	move.b	d0,d5
	rts
sdregb6:
	move.b	d0,d6
	rts
sdregb7:
	move.b	d0,d7
	rts

*########################################################################
* XDEF ****************************************************************	#
*	inc_areg(): increment an address register by the value in d0	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = amount to increment by					#
*	d1 = index of address register to increment			#
* 									#
* OUTPUT **************************************************************	#
*	(address register is updated)					#
*									#
* ALGORITHM ***********************************************************	#
* 	Typically used for an instruction w/ a post-increment <ea>, 	#
* this routine adds the increment value in d0 to the address register	#
* specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
* in their original places.						#
* 	For a7, if the increment amount is one, then we have to 	#
* increment by two. For any a7 update, set the mia7_flag so that if	#
* an access error exception occurs later in emulation, this address	#
* register update can be undone.					#
*									#
*########################################################################

	global	inc_areg
inc_areg:
	move.w	(tbl_iareg.b,pc,d1.w*2),d1
	jmp	(tbl_iareg.b,pc,d1.w*1)

tbl_iareg:
	.dc.w	iareg0-tbl_iareg
	.dc.w	iareg1-tbl_iareg
	.dc.w	iareg2-tbl_iareg
	.dc.w	iareg3-tbl_iareg
	.dc.w	iareg4-tbl_iareg
	.dc.w	iareg5-tbl_iareg
	.dc.w	iareg6-tbl_iareg
	.dc.w	iareg7-tbl_iareg

iareg0:	add.l	d0,EXC_DREGS+$8(a6)
	rts
iareg1:	add.l	d0,EXC_DREGS+$c(a6)
	rts
iareg2:	add.l	d0,a2
	rts
iareg3:	add.l	d0,a3
	rts
iareg4:	add.l	d0,a4
	rts
iareg5:	add.l	d0,a5
	rts
iareg6:	add.l	d0,(a6)
	rts
iareg7:	move.b	#mia7_flg,SPCOND_FLG(a6)
	cmpi.b	#$1,d0
	beq.b	iareg7b
	add.l	d0,EXC_A7(a6)
	rts
iareg7b:
	addq.l	#$2,EXC_A7(a6)
	rts

*########################################################################
* XDEF ****************************************************************	#
*	dec_areg(): decrement an address register by the value in d0	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = amount to decrement by					#
*	d1 = index of address register to decrement			#
* 									#
* OUTPUT **************************************************************	#
*	(address register is updated)					#
*									#
* ALGORITHM ***********************************************************	#
* 	Typically used for an instruction w/ a pre-decrement <ea>, 	#
* this routine adds the decrement value in d0 to the address register	#
* specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
* in their original places.						#
* 	For a7, if the decrement amount is one, then we have to 	#
* decrement by two. For any a7 update, set the mda7_flag so that if	#
* an access error exception occurs later in emulation, this address	#
* register update can be undone.					#
*									#
*########################################################################

	global	dec_areg
dec_areg:
	move.w	(tbl_dareg.b,pc,d1.w*2),d1
	jmp	(tbl_dareg.b,pc,d1.w*1)

tbl_dareg:
	.dc.w	dareg0-tbl_dareg
	.dc.w	dareg1-tbl_dareg
	.dc.w	dareg2-tbl_dareg
	.dc.w	dareg3-tbl_dareg
	.dc.w	dareg4-tbl_dareg
	.dc.w	dareg5-tbl_dareg
	.dc.w	dareg6-tbl_dareg
	.dc.w	dareg7-tbl_dareg

dareg0:	sub.l	d0,EXC_DREGS+$8(a6)
	rts
dareg1:	sub.l	d0,EXC_DREGS+$c(a6)
	rts
dareg2:	sub.l	d0,a2
	rts
dareg3:	sub.l	d0,a3
	rts
dareg4:	sub.l	d0,a4
	rts
dareg5:	sub.l	d0,a5
	rts
dareg6:	sub.l	d0,(a6)
	rts
dareg7:	move.b	#mda7_flg,SPCOND_FLG(a6)
	cmpi.b	#$1,d0
	beq.b	dareg7b
	sub.l	d0,EXC_A7(a6)
	rts
dareg7b:
	subq.l	#$2,EXC_A7(a6)
	rts

*#############################################################################

*########################################################################
* XDEF ****************************************************************	#
*	load_fpn1(): load FP register value into FP_SRC(a6).		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = index of FP register to load				#
* 									#
* OUTPUT **************************************************************	#
*	FP_SRC(a6) = value loaded from FP register file			#
*									#
* ALGORITHM ***********************************************************	#
*	Using the index in d0, load FP_SRC(a6) with a number from the 	#
* FP register file.							#
*									#
*########################################################################

	global	load_fpn1
load_fpn1:
	move.w	(tbl_load_fpn1.b,pc,d0.w*2),d0
	jmp	(tbl_load_fpn1.b,pc,d0.w*1)

tbl_load_fpn1:
	.dc.w	load_fpn1_0-tbl_load_fpn1
	.dc.w	load_fpn1_1-tbl_load_fpn1
	.dc.w	load_fpn1_2-tbl_load_fpn1
	.dc.w	load_fpn1_3-tbl_load_fpn1
	.dc.w	load_fpn1_4-tbl_load_fpn1
	.dc.w	load_fpn1_5-tbl_load_fpn1
	.dc.w	load_fpn1_6-tbl_load_fpn1
	.dc.w	load_fpn1_7-tbl_load_fpn1

load_fpn1_0:
	move.l	0+EXC_FP0(a6),0+FP_SRC(a6)
	move.l	4+EXC_FP0(a6),4+FP_SRC(a6)
	move.l	8+EXC_FP0(a6),8+FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_1:
	move.l	0+EXC_FP1(a6),0+FP_SRC(a6)
	move.l	4+EXC_FP1(a6),4+FP_SRC(a6)
	move.l	8+EXC_FP1(a6),8+FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_2:
	fmovem.x	fp2,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_3:
	fmovem.x	fp3,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_4:
	fmovem.x	fp4,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_5:
	fmovem.x	fp5,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_6:
	fmovem.x	fp6,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts
load_fpn1_7:
	fmovem.x	fp7,FP_SRC(a6)
	lea	FP_SRC(a6),a0
	rts

*############################################################################

*########################################################################
* XDEF ****************************************************************	#
*	load_fpn2(): load FP register value into FP_DST(a6).		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	d0 = index of FP register to load				#
* 									#
* OUTPUT **************************************************************	#
*	FP_DST(a6) = value loaded from FP register file			#
*									#
* ALGORITHM ***********************************************************	#
*	Using the index in d0, load FP_DST(a6) with a number from the 	#
* FP register file.							#
*									#
*########################################################################

	global	load_fpn2
load_fpn2:
	move.w	(tbl_load_fpn2.b,pc,d0.w*2),d0
	jmp	(tbl_load_fpn2.b,pc,d0.w*1)

tbl_load_fpn2:
	.dc.w	load_fpn2_0-tbl_load_fpn2
	.dc.w	load_fpn2_1-tbl_load_fpn2
	.dc.w	load_fpn2_2-tbl_load_fpn2
	.dc.w	load_fpn2_3-tbl_load_fpn2
	.dc.w	load_fpn2_4-tbl_load_fpn2
	.dc.w	load_fpn2_5-tbl_load_fpn2
	.dc.w	load_fpn2_6-tbl_load_fpn2
	.dc.w	load_fpn2_7-tbl_load_fpn2

load_fpn2_0:
	move.l	0+EXC_FP0(a6),0+FP_DST(a6)
	move.l	4+EXC_FP0(a6),4+FP_DST(a6)
	move.l	8+EXC_FP0(a6),8+FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_1:
	move.l	0+EXC_FP1(a6),0+FP_DST(a6)
	move.l	4+EXC_FP1(a6),4+FP_DST(a6)
	move.l	8+EXC_FP1(a6),8+FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_2:
	fmovem.x	fp2,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_3:
	fmovem.x	fp3,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_4:
	fmovem.x	fp4,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_5:
	fmovem.x	fp5,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_6:
	fmovem.x	fp6,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts
load_fpn2_7:
	fmovem.x	fp7,FP_DST(a6)
	lea	FP_DST(a6),a0
	rts

*############################################################################

*########################################################################
* XDEF ****************************************************************	#
* 	store_fpreg(): store an fp value to the fpreg designated d0.	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	fp0 = extended precision value to store				#
*	d0  = index of floating-point register				#
* 									#
* OUTPUT **************************************************************	#
*	None								#
*									#
* ALGORITHM ***********************************************************	#
*	Store the value in fp0 to the FP register designated by the	#
* value in d0. The FP number can be DENORM or SNAN so we have to be	#
* careful that we don't take an exception here.				#
*									#
*########################################################################

	global	store_fpreg
store_fpreg:
	move.w	(tbl_store_fpreg.b,pc,d0.w*2),d0
	jmp	(tbl_store_fpreg.b,pc,d0.w*1)

tbl_store_fpreg:
	.dc.w	store_fpreg_0-tbl_store_fpreg
	.dc.w	store_fpreg_1-tbl_store_fpreg
	.dc.w	store_fpreg_2-tbl_store_fpreg
	.dc.w	store_fpreg_3-tbl_store_fpreg
	.dc.w	store_fpreg_4-tbl_store_fpreg
	.dc.w	store_fpreg_5-tbl_store_fpreg
	.dc.w	store_fpreg_6-tbl_store_fpreg
	.dc.w	store_fpreg_7-tbl_store_fpreg

store_fpreg_0:
	fmovem.x	fp0,EXC_FP0(a6)
	rts
store_fpreg_1:
	fmovem.x	fp0,EXC_FP1(a6)
	rts
store_fpreg_2:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp2
	rts
store_fpreg_3:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp3
	rts
store_fpreg_4:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp4
	rts
store_fpreg_5:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp5
	rts
store_fpreg_6:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp6
	rts
store_fpreg_7:
	fmovem.x	fp0,-(sp)
	fmovem.x	(sp)+,fp7
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	_denorm(): denormalize an intermediate result			#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT *************************************************************** #
*	a0 = points to the operand to be denormalized			#
*		(in the internal extended format)			#
*		 							#
*	d0 = rounding precision						#
*									#
* OUTPUT **************************************************************	#
*	a0 = pointer to the denormalized result				#
*		(in the internal extended format)			#
*									#
*	d0 = guard,round,sticky						#
*									#
* ALGORITHM ***********************************************************	#
* 	According to the exponent underflow threshold for the given	#
* precision, shift the mantissa bits to the right in order raise the	#
* exponent of the operand to the threshold value. While shifting the 	#
* mantissa bits right, maintain the value of the guard, round, and 	#
* sticky bits.								#
* other notes:								#
*	(1) _denorm() is called by the underflow routines		#
*	(2) _denorm() does NOT affect the status register		#
*									#
*########################################################################

*
* table of exponent threshold values for each precision
*
tbl_thresh:
	.dc.w	$0
	.dc.w	sgl_thresh
	.dc.w	dbl_thresh

	global	_denorm
_denorm:
*
* Load the exponent threshold for the precision selected and check
* to see if (threshold - exponent) is > 65 in which case we can 
* simply calculate the sticky bit and zero the mantissa. otherwise
* we have to call the denormalization routine.
*
	lsr.b	#$2,d0			* shift prec to lo bits
	move.w	(tbl_thresh.b,pc,d0.w*2),d1	* load prec threshold
	move.w	d1,d0			* copy d1 into d0
	sub.w	FTEMP_EX.w(a0),d0	* diff = threshold - exp
	cmpi.w	#66,d0			* is diff > 65? (mant + g,r bits)
	bpl.b	denorm_set_stky		* yes; just calc sticky

	clr.l	d0			* clear g,r,s
	btst	#inex2_bit,FPSR_EXCEPT(a6)	* yes; was INEX2 set?
	beq.b	denorm_call		* no; don't change anything
	bset	#29,d0			* yes; set sticky bit

denorm_call:
	bsr.l	dnrm_lp			* denormalize the number
	rts

*
* all bit would have been shifted off during the denorm so simply
* calculate if the sticky should be set and clear the entire mantissa.
*
denorm_set_stky:
	move.l	#$20000000,d0		* set sticky bit in return value
	move.w	d1,FTEMP_EX.w(a0)	* load exp with threshold
	clr.l	FTEMP_HI(a0)		* set d1 = 0 (ms mantissa)
	clr.l	FTEMP_LO(a0)		* set d2 = 0 (ms mantissa)
	rts

*									#
* dnrm_lp(): normalize exponent/mantissa to specified threshhold	#
*									#
* INPUT:								#
*	%a0	   : points to the operand to be denormalized		#
*	%d0{31:29} : initial guard,round,sticky				#
*	%d1{15:0}  : denormalization threshold				#
* OUTPUT:								#
*	%a0	   : points to the denormalized operand		 	#
*	%d0{31:29} : final guard,round,sticky				#
*									#

* *** Local Equates *** #
GRS	set	L_SCR2			* g,r,s temp storage
FTEMP_LO2	set	L_SCR1		* FTEMP_LO copy

	global	dnrm_lp
dnrm_lp:

*
* make a copy of FTEMP_LO and place the g,r,s bits directly after it
* in memory so as to make the bitfield extraction for denormalization easier.
*
	move.l	FTEMP_LO(a0),FTEMP_LO2(a6)	* make FTEMP_LO copy
	move.l	d0,GRS(a6)		* place g,r,s after it

*
* check to see how much less than the underflow threshold the operand
* exponent is. 
*
	move.l	d1,d0			* copy the denorm threshold
	sub.w	FTEMP_EX.w(a0),d1	* d1 = threshold - uns exponent
	ble.b	dnrm_no_lp		* d1 <= 0
	cmpi.w	#$20,d1			* is ( 0 <= d1 < 32) ?
	blt.b	case_1			* yes
	cmpi.w	#$40,d1			* is (32 <= d1 < 64) ?
	blt.b	case_2			* yes
	bra.w	case_3			* (d1 >= 64)

*
* No normalization necessary
*
dnrm_no_lp:
	move.l	GRS(a6),d0		* restore original g,r,s
	rts

*
* case (0<d1<32)
*
* %d0 = denorm threshold
* %d1 = "n" = amt to shift
*
*	---------------------------------------------------------
*	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
*	---------------------------------------------------------
*	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
*	\	   \		      \			 \
*	 \	    \		       \		  \
*	  \	     \			\		   \
*	   \	      \			 \		    \
*	    \	       \		  \		     \
*	     \		\		   \		      \
*	      \		 \		    \		       \
*	       \	  \		     \			\
*	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->	
*	---------------------------------------------------------
*	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
*	---------------------------------------------------------
*
case_1:
	move.l	d2,-(sp)		* create temp storage

	move.w	d0,FTEMP_EX.w(a0)	* exponent = denorm threshold
	move.l	#32,d0
	sub.w	d1,d0			* %d0 = 32 - %d1

	cmpi.w	#29,d1			* is shft amt >= 29
	blt.b	case1_extract		* no; no fix needed
	move.b	GRS(a6),d2
	or.b	d2,3+FTEMP_LO2(a6)

case1_extract:
	bfextu	FTEMP_HI(a0){#0:d0},d2		* %d2 = new FTEMP_HI
	bfextu	FTEMP_HI(a0){d0:#32},d1		* %d1 = new FTEMP_LO
	bfextu	FTEMP_LO2(a6){d0:#32},d0	* %d0 = new G,R,S

	move.l	d2,FTEMP_HI(a0)		* store new FTEMP_HI
	move.l	d1,FTEMP_LO(a0)		* store new FTEMP_LO

	bftst	d0{#2:#30}		* were bits shifted off?
	beq.b	case1_sticky_clear	* no; go finish
	bset	#rnd_stky_bit,d0	* yes; set sticky bit

case1_sticky_clear:
	andi.l	#$e0000000,d0		* clear all but G,R,S
	move.l	(sp)+,d2		* restore temp register
	rts

*
* case (32<=d1<64)
*
* %d0 = denorm threshold
* %d1 = "n" = amt to shift
*
*	---------------------------------------------------------
*	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
*	---------------------------------------------------------
*	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
*	\	   \		      \
*	 \	    \		       \
*	  \	     \			-------------------
*	   \	      --------------------		   \
*	    -------------------	  	  \		    \
*	     		       \	   \		     \
*	      		 	\     	    \		      \
*	       		  	 \	     \		       \
*	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
*	---------------------------------------------------------
*	|0...............0|0....0| NEW_LO     |grs		|
*	---------------------------------------------------------
*
case_2:
	move.l	d2,-(sp)		* create temp storage

	move.w	d0,FTEMP_EX.w(a0)	* exponent = denorm threshold
	subi.w	#$20,d1			* %d1 now between 0 and 32
	move.l	#$20,d0
	sub.w	d1,d0			* %d0 = 32 - %d1

* subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
* the number of bits to check for the sticky detect.
* it only plays a role in shift amounts of 61-63.
	move.b	GRS(a6),d2
	or.b	d2,3+FTEMP_LO2(a6)

	bfextu	FTEMP_HI(a0){#0:d0},d2		* %d2 = new FTEMP_LO
	bfextu	FTEMP_HI(a0){d0:#32},d1		* %d1 = new G,R,S

	bftst	d1{#2:#30}		* were any bits shifted off?
	bne.b	case2_set_sticky	* yes; set sticky bit
	bftst	FTEMP_LO2(a6){d0:#31}	* were any bits shifted off?
	bne.b	case2_set_sticky	* yes; set sticky bit

	move.l	d1,d0			* move new G,R,S to %d0
	bra.b	case2_end

case2_set_sticky:
	move.l	d1,d0			* move new G,R,S to %d0
	bset	#rnd_stky_bit,d0	* set sticky bit

case2_end:
	clr.l	FTEMP_HI(a0)		* store FTEMP_HI = 0
	move.l	d2,FTEMP_LO(a0)		* store FTEMP_LO
	andi.l	#$e0000000,d0		* clear all but G,R,S

	move.l	(sp)+,d2		* restore temp register
	rts

*
* case (d1>=64)
*
* %d0 = denorm threshold
* %d1 = amt to shift
*
case_3:
	move.w	d0,FTEMP_EX.w(a0)	* insert denorm threshold

	cmpi.w	#65,d1			* is shift amt > 65?
	blt.b	case3_64		* no; it's == 64
	beq.b	case3_65		* no; it's == 65

*
* case (d1>65)
*
* Shift value is > 65 and out of range. All bits are shifted off.
* Return a zero mantissa with the sticky bit set
*
	clr.l	FTEMP_HI(a0)		* clear hi(mantissa)
	clr.l	FTEMP_LO(a0)		* clear lo(mantissa)
	move.l	#$20000000,d0		* set sticky bit
	rts

*
* case (d1 == 64)
*
*	---------------------------------------------------------
*	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
*	---------------------------------------------------------
*	<-------(32)------>
*	\	   	   \
*	 \	    	    \
*	  \	     	     \
*	   \	      	      ------------------------------
*	    -------------------------------		    \
*	     		       		   \		     \
*	      		 	     	    \		      \
*	       		  	 	     \		       \
*					      <-------(32)------>
*	---------------------------------------------------------
*	|0...............0|0................0|grs		|
*	---------------------------------------------------------
*
case3_64:
	move.l	FTEMP_HI(a0),d0		* fetch hi(mantissa)
	move.l	d0,d1			* make a copy
	andi.l	#$c0000000,d0		* extract G,R
	andi.l	#$3fffffff,d1		* extract other bits

	bra.b	case3_complete

*
* case (d1 == 65)
*
*	---------------------------------------------------------
*	|     FTEMP_HI	  |    	FTEMP_LO     |grs000.........000|
*	---------------------------------------------------------
*	<-------(32)------>
*	\	   	   \
*	 \	    	    \
*	  \	     	     \
*	   \	      	      ------------------------------
*	    --------------------------------		    \
*	     		       		    \		     \
*	      		 	     	     \		      \
*	       		  	 	      \		       \
*					       <-------(31)----->
*	---------------------------------------------------------
*	|0...............0|0................0|0rs		|
*	---------------------------------------------------------
*
case3_65:
	move.l	FTEMP_HI(a0),d0		* fetch hi(mantissa)
	andi.l	#$80000000,d0		* extract R bit
	lsr.l	#$1,d0			* shift high bit into R bit
	andi.l	#$7fffffff,d1		* extract other bits

case3_complete:
* last operation done was an "and" of the bits shifted off so the condition
* codes are already set so branch accordingly.
	bne.b	case3_set_sticky	* yes; go set new sticky
	tst.l	FTEMP_LO(a0)		* were any bits shifted off?
	bne.b	case3_set_sticky	* yes; go set new sticky
	tst.b	GRS(a6)			* were any bits shifted off?
	bne.b	case3_set_sticky	* yes; go set new sticky

*
* no bits were shifted off so don't set the sticky bit.
* the guard and
* the entire mantissa is zero.
*
	clr.l	FTEMP_HI(a0)		* clear hi(mantissa)
	clr.l	FTEMP_LO(a0)		* clear lo(mantissa)
	rts

*
* some bits were shifted off so set the sticky bit.
* the entire mantissa is zero.
*
case3_set_sticky:
	bset	#rnd_stky_bit,d0	* set new sticky bit
	clr.l	FTEMP_HI(a0)		* clear hi(mantissa)
	clr.l	FTEMP_LO(a0)		* clear lo(mantissa)
	rts

*########################################################################
* XDEF ****************************************************************	#
*	_round(): round result according to precision/mode		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0	  = ptr to input operand in internal extended format 	#
*	d1(hi)    = contains rounding precision:			#
*			ext = $0000xxxx					#
*			sgl = $0004xxxx					#
*			dbl = $0008xxxx					#
*	d1(lo)	  = contains rounding mode:				#
*			RN  = $xxxx0000					#
*			RZ  = $xxxx0001					#
*			RM  = $xxxx0002					#
*			RP  = $xxxx0003					#
*	d0{31:29} = contains the g,r,s bits (extended)			#
*									#
* OUTPUT **************************************************************	#
*	a0 = pointer to rounded result					#
*									#
* ALGORITHM ***********************************************************	#
*	On return the value pointed to by a0 is correctly rounded,	#
*	a0 is preserved and the g-r-s bits in d0 are cleared.		#
*	The result is not typed - the tag field is invalid.  The	#
*	result is still in the internal extended format.		#
*									#
*	The INEX bit of USER_FPSR will be set if the rounded result was	#
*	inexact (i.e. if any of the g-r-s bits were set).		#
*									#
*########################################################################

	global	_round
_round:
*
* ext_grs() looks at the rounding precision and sets the appropriate
* G,R,S bits.
* If (G,R,S == 0) then result is exact and round is done, else set 
* the inex flag in status reg and continue.
*
	bsr.l	ext_grs			* extract G,R,S

	tst.l	d0			* are G,R,S zero?
	beq.w	truncate		* yes; round is complete

	ori.w	#inx2a_mask,2+USER_FPSR(a6)	* set inex2/ainex

*
* Use rounding mode as an index into a jump table for these modes.
* All of the following assumes grs != 0.
*
	move.w	(tbl_mode.b,pc,d1.w*2),a1	* load jump offset
	jmp	(tbl_mode.b,pc,a1.l)	* jmp to rnd mode handler

tbl_mode:
	.dc.w	rnd_near-tbl_mode
	.dc.w	truncate-tbl_mode	* RZ always truncates
	.dc.w	rnd_mnus-tbl_mode
	.dc.w	rnd_plus-tbl_mode

*################################################################
*	ROUND PLUS INFINITY					#
*								#
*	If sign of fp number = 0 (positive), then add 1 to l.	#
*################################################################
rnd_plus:
	tst.b	FTEMP_SGN(a0)		* check for sign
	bmi.w	truncate		* if positive then truncate

	move.l	#$ffffffff,d0		* force g,r,s to be all f's
	swap	d1			* set up d1 for round prec.

	cmpi.b	#s_mode,d1		* is prec = sgl?
	beq.w	add_sgl			* yes
	bgt.w	add_dbl			* no; it's dbl
	bra.w	add_ext			* no; it's ext

*################################################################
*	ROUND MINUS INFINITY					#
*								#
*	If sign of fp number = 1 (negative), then add 1 to l.	#
*################################################################
rnd_mnus:
	tst.b	FTEMP_SGN(a0)		* check for sign	
	bpl.w	truncate		* if negative then truncate

	move.l	#$ffffffff,d0		* force g,r,s to be all f's
	swap	d1			* set up d1 for round prec.

	cmpi.b	#s_mode,d1		* is prec = sgl?
	beq.w	add_sgl			* yes
	bgt.w	add_dbl			* no; it's dbl
	bra.w	add_ext			* no; it's ext

*################################################################
*	ROUND NEAREST						#
*								#
*	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
*	Note that this will round to even in case of a tie.	#
*################################################################
rnd_near:
	asl.l	#$1,d0			* shift g-bit to c-bit
	bcc.w	truncate		* if (g=1) then

	swap	d1			* set up d1 for round prec.

	cmpi.b	#s_mode,d1		* is prec = sgl?
	beq.w	add_sgl			* yes
	bgt.w	add_dbl			* no; it's dbl
	bra.w	add_ext			* no; it's ext

* *** LOCAL EQUATES ***
ad_1_sgl	set	$00000100	* constant to add 1 to l-bit in sgl prec
ad_1_dbl	set	$00000800	* constant to add 1 to l-bit in dbl prec

*########################
*	ADD SINGLE	#
*########################
add_sgl:
	addi.l	#ad_1_sgl,FTEMP_HI(a0)
	bcc.b	scc_clr			* no mantissa overflow
	roxr.w	FTEMP_HI(a0)		* shift v-bit back in
	roxr.w	FTEMP_HI+2(a0)		* shift v-bit back in
	addq.w	#$1,FTEMP_EX.w(a0)	* and incr exponent
scc_clr:
	tst.l	d0			* test for rs = 0
	bne.b	sgl_done
	andi.w	#$fe00,FTEMP_HI+2(a0)		* clear the l-bit
sgl_done:
	andi.l	#$ffffff00,FTEMP_HI(a0)		* truncate bits beyond sgl limit
	clr.l	FTEMP_LO(a0)		* clear d2
	rts

*########################
*	ADD EXTENDED	#
*########################
add_ext:
	addq.l	#1,FTEMP_LO(a0)		* add 1 to l-bit
	bcc.b	xcc_clr			* test for carry out
	addq.l	#1,FTEMP_HI(a0)		* propogate carry
	bcc.b	xcc_clr
	roxr.w	FTEMP_HI(a0)		* mant is 0 so restore v-bit
	roxr.w	FTEMP_HI+2(a0)		* mant is 0 so restore v-bit
	roxr.w	FTEMP_LO(a0)
	roxr.w	FTEMP_LO+2(a0)
	addq.w	#$1,FTEMP_EX.w(a0)	* and inc exp
xcc_clr:
	tst.l	d0			* test rs = 0
	bne.b	add_ext_done
	andi.b	#$fe,FTEMP_LO+3(a0)	* clear the l bit
add_ext_done:
	rts

*########################
*	ADD DOUBLE	#
*########################
add_dbl:
	addi.l	#ad_1_dbl,FTEMP_LO(a0)		* add 1 to lsb
	bcc.b	dcc_clr			* no carry
	addq.l	#$1,FTEMP_HI(a0)	* propogate carry
	bcc.b	dcc_clr			* no carry

	roxr.w	FTEMP_HI(a0)		* mant is 0 so restore v-bit
	roxr.w	FTEMP_HI+2(a0)		* mant is 0 so restore v-bit
	roxr.w	FTEMP_LO(a0)
	roxr.w	FTEMP_LO+2(a0)
	addq.w	#$1,FTEMP_EX.w(a0)	* incr exponent
dcc_clr:
	tst.l	d0			* test for rs = 0
	bne.b	dbl_done
	andi.w	#$f000,FTEMP_LO+2(a0)		* clear the l-bit

dbl_done:
	andi.l	#$fffff800,FTEMP_LO(a0)		* truncate bits beyond dbl limit
	rts

*##########################
* Truncate all other bits #
*##########################
truncate:
	swap	d1			* select rnd prec

	cmpi.b	#s_mode,d1		* is prec sgl?
	beq.w	sgl_done		* yes
	bgt.b	dbl_done		* no; it's dbl
	rts				* no; it's ext


*
* ext_grs(): extract guard, round and sticky bits according to
*	     rounding precision.
*
* INPUT
*	d0	   = extended precision g,r,s (in d0{31:29})
*	d1 	   = {PREC,ROUND}
* OUTPUT
*	d0{31:29}  = guard, round, sticky
*
* The ext_grs extract the guard/round/sticky bits according to the
* selected rounding precision. It is called by the round subroutine
* only.  All registers except d0 are kept intact. d0 becomes an
* updated guard,round,sticky in d0{31:29}
*
* Notes: the ext_grs uses the round PREC, and therefore has to swap d1
*	 prior to usage, and needs to restore d1 to original. this
*	 routine is tightly tied to the round routine and not meant to
*	 uphold standard subroutine calling practices.
*

ext_grs:
	swap	d1			* have d1.w point to round precision
	tst.b	d1			* is rnd prec = extended?
	bne.b	ext_grs_not_ext		* no; go handle sgl or dbl

*
* %d0 actually already hold g,r,s since _round() had it before calling
* this function. so, as long as we don't disturb it, we are "returning" it.
*
ext_grs_ext:
	swap	d1			* yes; return to correct positions
	rts

ext_grs_not_ext:
	movem.l	d2-d3,-(sp)		* make some temp registers {d2/d3}

	cmpi.b	#s_mode,d1		* is rnd prec = sgl?
	bne.b	ext_grs_dbl		* no; go handle dbl

*
* sgl:
*	96		64	  40	32		0
*	-----------------------------------------------------
*	| EXP	|XXXXXXX|	  |xx	|		|grs|
*	-----------------------------------------------------
*			<--(24)--->nn\			   /
*				   ee ---------------------
*				   ww		|
*						v
*				   gr	   new sticky
*
ext_grs_sgl:
	bfextu	FTEMP_HI(a0){#24:#2},d3		* sgl prec. g-r are 2 bits right
	move.l	#30,d2			* of the sgl prec. limits
	lsl.l	d2,d3			* shift g-r bits to MSB of d3
	move.l	FTEMP_HI(a0),d2		* get word 2 for s-bit test
	andi.l	#$0000003f,d2		* s bit is the or of all other 
	bne.b	ext_grs_st_stky		* bits to the right of g-r
	tst.l	FTEMP_LO(a0)		* test lower mantissa
	bne.b	ext_grs_st_stky		* if any are set, set sticky
	tst.l	d0			* test original g,r,s
	bne.b	ext_grs_st_stky		* if any are set, set sticky
	bra.b	ext_grs_end_sd		* if words 3 and 4 are clr, exit

*
* dbl:
*	96		64	  	32	 11	0
*	-----------------------------------------------------
*	| EXP	|XXXXXXX|	  	|	 |xx	|grs|
*	-----------------------------------------------------
*						  nn\	    /
*						  ee -------
*						  ww	|
*							v
*						  gr	new sticky
*
ext_grs_dbl:
	bfextu	FTEMP_LO(a0){#21:#2},d3		* dbl-prec. g-r are 2 bits right
	move.l	#30,d2			* of the dbl prec. limits
	lsl.l	d2,d3			* shift g-r bits to the MSB of d3
	move.l	FTEMP_LO(a0),d2		* get lower mantissa  for s-bit test
	andi.l	#$000001ff,d2		* s bit is the or-ing of all 
	bne.b	ext_grs_st_stky		* other bits to the right of g-r
	tst.l	d0			* test word original g,r,s
	bne.b	ext_grs_st_stky		* if any are set, set sticky
	bra.b	ext_grs_end_sd		* if clear, exit

ext_grs_st_stky:
	bset	#rnd_stky_bit,d3	* set sticky bit
ext_grs_end_sd:
	move.l	d3,d0			* return grs to d0

	movem.l	(sp)+,d2-d3		* restore scratch registers {d2/d3}

	swap	d1			* restore d1 to original
	rts

*########################################################################
* norm(): normalize the mantissa of an extended precision input. the	#
*	  input operand should not be normalized already.		#
*									#
* XDEF ****************************************************************	#
*	norm()								#
*									#
* XREF **************************************************************** #
*	none								#
*									#
* INPUT *************************************************************** #
*	a0 = pointer fp extended precision operand to normalize		#
*									#
* OUTPUT ************************************************************** #
* 	d0 = number of bit positions the mantissa was shifted		#
*	a0 = the input operand's mantissa is normalized; the exponent	#
*	     is unchanged.						#
*									#
*########################################################################
	global	norm
norm:
	move.l	d2,-(sp)		* create some temp regs
	move.l	d3,-(sp)

	move.l	FTEMP_HI(a0),d0		* load hi(mantissa)
	move.l	FTEMP_LO(a0),d1		* load lo(mantissa)

	bfffo	d0{#0:#32},d2		* how many places to shift?
	beq.b	norm_lo			* hi(man) is all zeroes!

norm_hi:
	lsl.l	d2,d0			* left shift hi(man)
	bfextu	d1{#0:d2},d3		* extract lo bits

	or.l	d3,d0			* create hi(man)
	lsl.l	d2,d1			* create lo(man)

	move.l	d0,FTEMP_HI(a0)		* store new hi(man)
	move.l	d1,FTEMP_LO(a0)		* store new lo(man)

	move.l	d2,d0			* return shift amount

	move.l	(sp)+,d3		* restore temp regs
	move.l	(sp)+,d2

	rts

norm_lo:
	bfffo	d1{#0:#32},d2		* how many places to shift?
	lsl.l	d2,d1			* shift lo(man)
	addi.l	#32,d2			* add 32 to shft amount

	move.l	d1,FTEMP_HI(a0)		* store hi(man)
	clr.l	FTEMP_LO(a0)		* lo(man) is now zero

	move.l	d2,d0			* return shift amount

	move.l	(sp)+,d3		* restore temp regs
	move.l	(sp)+,d2

	rts

*########################################################################
* unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
*		- returns corresponding optype tag			#
*									#
* XDEF ****************************************************************	#
*	unnorm_fix()							#
*									#
* XREF **************************************************************** #
*	norm() - normalize the mantissa					#
*									#
* INPUT *************************************************************** #
*	a0 = pointer to unnormalized extended precision number		#
*									#
* OUTPUT ************************************************************** #
*	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
*	a0 = input operand has been converted to a norm, denorm, or	#
*	     zero; both the exponent and mantissa are changed.		#
*									#
*########################################################################

	global	unnorm_fix
unnorm_fix:
	bfffo	FTEMP_HI(a0){#0:#32},d0		* how many shifts are needed?
	bne.b	unnorm_shift		* hi(man) is not all zeroes

*
* hi(man) is all zeroes so see if any bits in lo(man) are set
*
unnorm_chk_lo:
	bfffo	FTEMP_LO(a0){#0:#32},d0		* is operand really a zero?
	beq.w	unnorm_zero		* yes

	addi.w	#32,d0			* no; fix shift distance

*
* d0 = # shifts needed for complete normalization
*
unnorm_shift:
	clr.l	d1			* clear top word
	move.w	FTEMP_EX.w(a0),d1	* extract exponent
	andi.w	#$7fff,d1		* strip off sgn

	cmp.w	d1,d0			* will denorm push exp < 0?
	bgt.b	unnorm_nrm_zero		* yes; denorm only until exp = 0

*
* exponent would not go < 0. therefore, number stays normalized
*
	sub.w	d0,d1			* shift exponent value
	move.w	FTEMP_EX.w(a0),d0	* load old exponent
	andi.w	#$8000,d0		* save old sign
	or.w	d0,d1			* {sgn,new exp}
	move.w	d1,FTEMP_EX.w(a0)	* insert new exponent

	bsr.l	norm			* normalize UNNORM

	move.b	#NORM,d0		* return new optype tag
	rts

*
* exponent would go < 0, so only denormalize until exp = 0
*
unnorm_nrm_zero:
	cmpi.b	#32,d1			* is exp <= 32?
	bgt.b	unnorm_nrm_zero_lrg	* no; go handle large exponent

	bfextu	FTEMP_HI(a0){d1:#32},d0		* extract new hi(man)
	move.l	d0,FTEMP_HI(a0)		* save new hi(man)

	move.l	FTEMP_LO(a0),d0		* fetch old lo(man)
	lsl.l	d1,d0			* extract new lo(man)
	move.l	d0,FTEMP_LO(a0)		* save new lo(man)

	andi.w	#$8000,FTEMP_EX.w(a0)	* set exp = 0

	move.b	#DENORM,d0		* return new optype tag
	rts

*
* only mantissa bits set are in lo(man)
*
unnorm_nrm_zero_lrg:
	subi.w	#32,d1			* adjust shft amt by 32

	move.l	FTEMP_LO(a0),d0		* fetch old lo(man)
	lsl.l	d1,d0			* left shift lo(man)

	move.l	d0,FTEMP_HI(a0)		* store new hi(man)
	clr.l	FTEMP_LO(a0)		* lo(man) = 0

	andi.w	#$8000,FTEMP_EX.w(a0)	* set exp = 0

	move.b	#DENORM,d0		* return new optype tag
	rts

*
* whole mantissa is zero so this UNNORM is actually a zero
*
unnorm_zero:
	andi.w	#$8000,FTEMP_EX.w(a0)	* force exponent to zero

	move.b	#ZERO,d0		* fix optype tag
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	set_tag_x(): return the optype of the input ext fp number	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision operand			#
* 									#
* OUTPUT **************************************************************	#
*	d0 = value of type tag						#
* 		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
*									#
* ALGORITHM ***********************************************************	#
*	Simply test the exponent, j-bit, and mantissa values to 	#
* determine the type of operand.					#
*	If it's an unnormalized zero, alter the operand and force it	#
* to be a normal zero.							#
*									#
*########################################################################

	global	set_tag_x
set_tag_x:
	move.w	FTEMP_EX.w(a0),d0	* extract exponent
	andi.w	#$7fff,d0		* strip off sign
	cmpi.w	#$7fff,d0		* is (EXP == MAX)?
	beq.b	inf_or_nan_x
not_inf_or_nan_x:
	btst	#$7,FTEMP_HI(a0)
	beq.b	not_norm_x
is_norm_x:
	move.b	#NORM,d0
	rts
not_norm_x:
	tst.w	d0			* is exponent = 0?
	bne.b	is_unnorm_x
not_unnorm_x:
	tst.l	FTEMP_HI(a0)
	bne.b	is_denorm_x
	tst.l	FTEMP_LO(a0)
	bne.b	is_denorm_x
is_zero_x:
	move.b	#ZERO,d0
	rts
is_denorm_x:
	move.b	#DENORM,d0
	rts
* must distinguish now "Unnormalized zeroes" which we
* must convert to zero.
is_unnorm_x:
	tst.l	FTEMP_HI(a0)
	bne.b	is_unnorm_reg_x
	tst.l	FTEMP_LO(a0)
	bne.b	is_unnorm_reg_x
* it's an "unnormalized zero". let's convert it to an actual zero...
	andi.w	#$8000,FTEMP_EX.w(a0)	* clear exponent
	move.b	#ZERO,d0
	rts
is_unnorm_reg_x:
	move.b	#UNNORM,d0
	rts
inf_or_nan_x:
	tst.l	FTEMP_LO(a0)
	bne.b	is_nan_x
	move.l	FTEMP_HI(a0),d0
	andi.l	#$7fffffff,d0		* msb is a don't care!
	bne.b	is_nan_x
is_inf_x:
	move.b	#INF,d0
	rts
is_nan_x:
	btst	#$6,FTEMP_HI(a0)
	beq.b	is_snan_x
	move.b	#QNAN,d0
	rts
is_snan_x:
	move.b	#SNAN,d0
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	set_tag_d(): return the optype of the input dbl fp number	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0 = points to double precision operand				#
* 									#
* OUTPUT **************************************************************	#
*	d0 = value of type tag						#
* 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
*									#
* ALGORITHM ***********************************************************	#
*	Simply test the exponent, j-bit, and mantissa values to 	#
* determine the type of operand.					#
*									#
*########################################################################

	global	set_tag_d
set_tag_d:
	move.l	FTEMP.w(a0),d0
	move.l	d0,d1

	andi.l	#$7ff00000,d0
	beq.b	zero_or_denorm_d

	cmpi.l	#$7ff00000,d0
	beq.b	inf_or_nan_d

is_norm_d:
	move.b	#NORM,d0
	rts
zero_or_denorm_d:
	andi.l	#$000fffff,d1
	bne.l	is_denorm_d
	tst.l	4+FTEMP(a0)
	bne.l	is_denorm_d
is_zero_d:
	move.b	#ZERO,d0
	rts
is_denorm_d:
	move.b	#DENORM,d0
	rts
inf_or_nan_d:
	andi.l	#$000fffff,d1
	bne.l	is_nan_d
	tst.l	4+FTEMP(a0)
	bne.l	is_nan_d
is_inf_d:
	move.b	#INF,d0
	rts
is_nan_d:
	btst	#19,d1
	bne.l	is_qnan_d
is_snan_d:
	move.b	#SNAN,d0
	rts
is_qnan_d:
	move.b	#QNAN,d0
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	set_tag_s(): return the optype of the input sgl fp number	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to single precision operand			#
* 									#
* OUTPUT **************************************************************	#
*	d0 = value of type tag						#
* 		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
*									#
* ALGORITHM ***********************************************************	#
*	Simply test the exponent, j-bit, and mantissa values to 	#
* determine the type of operand.					#
*									#
*########################################################################

	global	set_tag_s
set_tag_s:
	move.l	FTEMP.w(a0),d0
	move.l	d0,d1

	andi.l	#$7f800000,d0
	beq.b	zero_or_denorm_s

	cmpi.l	#$7f800000,d0
	beq.b	inf_or_nan_s

is_norm_s:
	move.b	#NORM,d0
	rts
zero_or_denorm_s:
	andi.l	#$007fffff,d1
	bne.l	is_denorm_s
is_zero_s:
	move.b	#ZERO,d0
	rts
is_denorm_s:
	move.b	#DENORM,d0
	rts
inf_or_nan_s:
	andi.l	#$007fffff,d1
	bne.l	is_nan_s
is_inf_s:
	move.b	#INF,d0
	rts
is_nan_s:
	btst	#22,d1
	bne.l	is_qnan_s
is_snan_s:
	move.b	#SNAN,d0
	rts
is_qnan_s:
	move.b	#QNAN,d0
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	unf_res(): routine to produce default underflow result of a 	#
*	 	   scaled extended precision number; this is used by 	#
*		   fadd/fdiv/fmul/etc. emulation routines.		#
* 	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
*		    single round prec and extended prec mode.		#
*									#
* XREF ****************************************************************	#
*	_denorm() - denormalize according to scale factor		#
* 	_round() - round denormalized number according to rnd prec	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precison operand			#
*	d0 = scale factor						#
*	d1 = rounding precision/mode					#
*									#
* OUTPUT **************************************************************	#
*	a0 = pointer to default underflow result in extended precision	#
*	d0.b = result FPSR_cc which caller may or may not want to save	#
*									#
* ALGORITHM ***********************************************************	#
* 	Convert the input operand to "internal format" which means the	#
* exponent is extended to 16 bits and the sign is stored in the unused	#
* portion of the extended precison operand. Denormalize the number	#
* according to the scale factor passed in d0. Then, round the 		#
* denormalized result.							#
* 	Set the FPSR_exc bits as appropriate but return the cc bits in	#
* d0 in case the caller doesn't want to save them (as is the case for	#
* fmove out).								#
* 	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
* precision and the rounding mode to single.				#
*									#
*########################################################################
	global	unf_res
unf_res:
	move.l	d1,-(sp)		* save rnd prec,mode on stack

	btst	#$7,FTEMP_EX.w(a0)	* make "internal" format
	sne	FTEMP_SGN(a0)

	move.w	FTEMP_EX.w(a0),d1	* extract exponent
	andi.w	#$7fff,d1
	sub.w	d0,d1
	move.w	d1,FTEMP_EX.w(a0)	* insert 16 bit exponent

	move.l	a0,-(sp)		* save operand ptr during calls

	move.l	$4(sp),d0		* pass rnd prec.
	andi.w	#$00c0,d0
	lsr.w	#$4,d0
	bsr.l	_denorm			* denorm result

	move.l	(sp),a0
	move.w	$6(sp),d1		* load prec:mode into %d1
	andi.w	#$c0,d1			* extract rnd prec
	lsr.w	#$4,d1
	swap	d1
	move.w	$6(sp),d1
	andi.w	#$30,d1
	lsr.w	#$4,d1
	bsr.l	_round			* round the denorm

	move.l	(sp)+,a0

* result is now rounded properly. convert back to normal format
	bclr	#$7,FTEMP_EX.w(a0)	* clear sgn first; may have residue
	tst.b	FTEMP_SGN(a0)		* is "internal result" sign set?
	beq.b	unf_res_chkifzero	* no; result is positive
	bset	#$7,FTEMP_EX.w(a0)	* set result sgn
	clr.b	FTEMP_SGN(a0)		* clear temp sign

* the number may have become zero after rounding. set ccodes accordingly.
unf_res_chkifzero:
	clr.l	d0
	tst.l	FTEMP_HI(a0)		* is value now a zero?
	bne.b	unf_res_cont		* no
	tst.l	FTEMP_LO(a0)
	bne.b	unf_res_cont		* no
*	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
	bset	#z_bit,d0		* yes; set zero ccode bit

unf_res_cont:

*
* can inex1 also be set along with unfl and inex2???
*
* we know that underflow has occurred. aunfl should be set if INEX2 is also set.
*
	btst	#inex2_bit,FPSR_EXCEPT(a6)	* is INEX2 set?
	beq.b	unf_res_end		* no
	bset	#aunfl_bit,FPSR_AEXCEPT(a6)	* yes; set aunfl

unf_res_end:
	add.l	#$4,sp			* clear stack
	rts

* unf_res() for fsglmul() and fsgldiv().
	global	unf_res4
unf_res4:
	move.l	d1,-(sp)		* save rnd prec,mode on stack

	btst	#$7,FTEMP_EX.w(a0)	* make "internal" format
	sne	FTEMP_SGN(a0)

	move.w	FTEMP_EX.w(a0),d1	* extract exponent
	andi.w	#$7fff,d1
	sub.w	d0,d1
	move.w	d1,FTEMP_EX.w(a0)	* insert 16 bit exponent

	move.l	a0,-(sp)		* save operand ptr during calls

	clr.l	d0			* force rnd prec = ext
	bsr.l	_denorm			* denorm result

	move.l	(sp),a0
	move.w	#s_mode,d1		* force rnd prec = sgl
	swap	d1
	move.w	$6(sp),d1		* load rnd mode
	andi.w	#$30,d1			* extract rnd prec
	lsr.w	#$4,d1
	bsr.l	_round			* round the denorm

	move.l	(sp)+,a0

* result is now rounded properly. convert back to normal format
	bclr	#$7,FTEMP_EX.w(a0)	* clear sgn first; may have residue
	tst.b	FTEMP_SGN(a0)		* is "internal result" sign set?
	beq.b	unf_res4_chkifzero	* no; result is positive
	bset	#$7,FTEMP_EX.w(a0)	* set result sgn
	clr.b	FTEMP_SGN(a0)		* clear temp sign

* the number may have become zero after rounding. set ccodes accordingly.
unf_res4_chkifzero:
	clr.l	d0
	tst.l	FTEMP_HI(a0)		* is value now a zero?
	bne.b	unf_res4_cont		* no
	tst.l	FTEMP_LO(a0)
	bne.b	unf_res4_cont		* no
*	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
	bset	#z_bit,d0		* yes; set zero ccode bit

unf_res4_cont:

*
* can inex1 also be set along with unfl and inex2???
*
* we know that underflow has occurred. aunfl should be set if INEX2 is also set.
*
	btst	#inex2_bit,FPSR_EXCEPT(a6)	* is INEX2 set?
	beq.b	unf_res4_end		* no
	bset	#aunfl_bit,FPSR_AEXCEPT(a6)	* yes; set aunfl

unf_res4_end:
	add.l	#$4,sp			* clear stack
	rts

*########################################################################
* XDEF ****************************************************************	#
*	ovf_res(): routine to produce the default overflow result of	#
*		   an overflowing number.				#
*	ovf_res2(): same as above but the rnd mode/prec are passed	#
*		    differently.					#
*									#
* XREF ****************************************************************	#
*	none								#
*									#
* INPUT ***************************************************************	#
*	d1.b 	= '-1' => (-); '0' => (+)				#
*   ovf_res():								#
*	d0 	= rnd mode/prec						#
*   ovf_res2():								#
*	hi(d0) 	= rnd prec						#
*	lo(d0)	= rnd mode						#
*									#
* OUTPUT **************************************************************	#
*	a0   	= points to extended precision result			#
*	d0.b 	= condition code bits					#
*									#
* ALGORITHM ***********************************************************	#
*	The default overflow result can be determined by the sign of	#
* the result and the rounding mode/prec in effect. These bits are	#
* concatenated together to create an index into the default result 	#
* table. A pointer to the correct result is returned in a0. The		#
* resulting condition codes are returned in d0 in case the caller 	#
* doesn't want FPSR_cc altered (as is the case for fmove out).		#
*									#
*########################################################################

	global	ovf_res
ovf_res:
	andi.w	#$10,d1			* keep result sign
	lsr.b	#$4,d0			* shift prec/mode
	or.b	d0,d1			* concat the two
	move.w	d1,d0			* make a copy
	lsl.b	#$1,d1			* multiply d1 by 2
	bra.b	ovf_res_load

	global	ovf_res2
ovf_res2:
	andi.w	#$10,d1			* keep result sign
	or.b	d0,d1			* insert rnd mode
	swap	d0
	or.b	d0,d1			* insert rnd prec
	move.w	d1,d0			* make a copy
	lsl.b	#$1,d1			* shift left by 1

*
* use the rounding mode, precision, and result sign as in index into the
* two tables below to fetch the default result and the result ccodes.
*
ovf_res_load:
	move.b	(tbl_ovfl_cc.b,pc,d0.w*1),d0		* fetch result ccodes
	lea	(tbl_ovfl_result.b,pc,d1.w*8),a0	* return result ptr

	rts

tbl_ovfl_cc:
	.dc.b	$2,$0,$0,$2
	.dc.b	$2,$0,$0,$2
	.dc.b	$2,$0,$0,$2
	.dc.b	$0,$0,$0,$0
	.dc.b	$2+$8,$8,$2+$8,$8
	.dc.b	$2+$8,$8,$2+$8,$8
	.dc.b	$2+$8,$8,$2+$8,$8

tbl_ovfl_result:
	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RN
	.dc.l	$7ffe0000,$ffffffff,$ffffffff,$00000000		* +EXT; RZ
	.dc.l	$7ffe0000,$ffffffff,$ffffffff,$00000000		* +EXT; RM
	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RP

	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RN
	.dc.l	$407e0000,$ffffff00,$00000000,$00000000		* +SGL; RZ
	.dc.l	$407e0000,$ffffff00,$00000000,$00000000		* +SGL; RM
	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RP

	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RN
	.dc.l	$43fe0000,$ffffffff,$fffff800,$00000000		* +DBL; RZ
	.dc.l	$43fe0000,$ffffffff,$fffff800,$00000000		* +DBL; RM
	.dc.l	$7fff0000,$00000000,$00000000,$00000000		* +INF; RP

	.dc.l	$00000000,$00000000,$00000000,$00000000
	.dc.l	$00000000,$00000000,$00000000,$00000000
	.dc.l	$00000000,$00000000,$00000000,$00000000
	.dc.l	$00000000,$00000000,$00000000,$00000000

	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RN
	.dc.l	$fffe0000,$ffffffff,$ffffffff,$00000000		* -EXT; RZ
	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RM
	.dc.l	$fffe0000,$ffffffff,$ffffffff,$00000000		* -EXT; RP

	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RN
	.dc.l	$c07e0000,$ffffff00,$00000000,$00000000		* -SGL; RZ
	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RM
	.dc.l	$c07e0000,$ffffff00,$00000000,$00000000		* -SGL; RP

	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RN
	.dc.l	$c3fe0000,$ffffffff,$fffff800,$00000000		* -DBL; RZ
	.dc.l	$ffff0000,$00000000,$00000000,$00000000		* -INF; RM
	.dc.l	$c3fe0000,$ffffffff,$fffff800,$00000000		* -DBL; RP

*########################################################################
* XDEF ****************************************************************	#
*	get_packed(): fetch a packed operand from memory and then	#
*		      convert it to a floating-point binary number.	#
*									#
* XREF ****************************************************************	#
*	_dcalc_ea() - calculate the correct <ea>			#
*	_mem_read() - fetch the packed operand from memory		#
*	facc_in_x() - the fetch failed so jump to special exit code	#
*	decbin()    - convert packed to binary extended precision	#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	If no failure on _mem_read():					#
* 	FP_SRC(a6) = packed operand now as a binary FP number		#
*									#
* ALGORITHM ***********************************************************	#
*	Get the correct <ea> whihc is the value on the exception stack 	#
* frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
* Then, fetch the operand from memory. If the fetch fails, exit		#
* through facc_in_x().							#
*	If the packed operand is a ZERO,NAN, or INF, convert it to	#
* its binary representation here. Else, call decbin() which will 	#
* convert the packed value to an extended precision binary value.	#
*									#
*########################################################################

* the stacked <ea> for packed is correct except for -(An).
* the base reg must be updated for both -(An) and (An)+.
	global	get_packed
get_packed:
	move.l	#$c,d0			* packed is 12 bytes
	bsr.l	_dcalc_ea		* fetch <ea>; correct An

	lea	FP_SRC(a6),a1		* pass: ptr to super dst
	move.l	#$c,d0			* pass: 12 bytes
	bsr.l	_dmem_read		* read packed operand

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_x		* yes

* The packed operand is an INF or a NAN if the exponent field is all ones.
	bfextu	FP_SRC(a6){#1:#15},d0	* get exp
	cmpi.w	#$7fff,d0		* INF or NAN?
	bne.b	gp_try_zero		* no
	rts				* operand is an INF or NAN

* The packed operand is a zero if the mantissa is all zero, else it's
* a normal packed op.
gp_try_zero:
	move.b	3+FP_SRC(a6),d0		* get byte 4
	andi.b	#$0f,d0			* clear all but last nybble
	bne.b	gp_not_spec		* not a zero
	tst.l	FP_SRC_HI(a6)		* is lw 2 zero?
	bne.b	gp_not_spec		* not a zero
	tst.l	FP_SRC_LO(a6)		* is lw 3 zero?
	bne.b	gp_not_spec		* not a zero
	rts				* operand is a ZERO
gp_not_spec:
	lea	FP_SRC(a6),a0		* pass: ptr to packed op
	bsr.l	decbin			* convert to extended
	fmovem.x	fp0,FP_SRC(a6)	* make this the srcop
	rts

*########################################################################
* decbin(): Converts normalized packed bcd value pointed to by register	#
*	    a0 to extended-precision value in fp0.			#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to normalized packed bcd value			#
*									#
* OUTPUT **************************************************************	#
*	fp0 = exact fp representation of the packed bcd value.		#
*									#
* ALGORITHM ***********************************************************	#
*	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
*	and NaN operands are dispatched without entering this routine)	#
*	value in 68881/882 format at location (a0).			#
*									#
*	A1. Convert the bcd exponent to binary by successive adds and 	#
*	muls. Set the sign according to SE. Subtract 16 to compensate	#
*	for the mantissa which is to be interpreted as 17 integer	#
*	digits, rather than 1 integer and 16 fraction digits.		#
*	Note: this operation can never overflow.			#
*									#
*	A2. Convert the bcd mantissa to binary by successive		#
*	adds and muls in FP0. Set the sign according to SM.		#
*	The mantissa digits will be converted with the decimal point	#
*	assumed following the least-significant digit.			#
*	Note: this operation can never overflow.			#
*									#
*	A3. Count the number of leading/trailing zeros in the		#
*	bcd string.  If SE is positive, count the leading zeros;	#
*	if negative, count the trailing zeros.  Set the adjusted	#
*	exponent equal to the exponent from A1 and the zero count	#
*	added if SM = 1 and subtracted if SM = 0.  Scale the		#
*	mantissa the equivalent of forcing in the bcd value:		#
*									#
*	SM = 0	a non-zero digit in the integer position		#
*	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
*									#
*	this will insure that any value, regardless of its		#
*	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
*	consistently.							#
*									#
*	A4. Calculate the factor 10^exp in FP1 using a table of		#
*	10^(2^n) values.  To reduce the error in forming factors	#
*	greater than 10^27, a directed rounding scheme is used with	#
*	tables rounded to RN, RM, and RP, according to the table	#
*	in the comments of the pwrten section.				#
*									#
*	A5. Form the final binary number by scaling the mantissa by	#
*	the exponent factor.  This is done by multiplying the		#
*	mantissa in FP0 by the factor in FP1 if the adjusted		#
*	exponent sign is positive, and dividing FP0 by FP1 if		#
*	it is negative.							#
*									#
*	Clean up and return. Check if the final mul or div was inexact.	#
*	If so, set INEX1 in USER_FPSR.					#
*									#
*########################################################################

*
*	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
*	to nearest, minus, and plus, respectively.  The tables include
*	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
*	is required until the power is greater than 27, however, all
*	tables include the first 5 for ease of indexing.
*
RTABLE:
	.dc.b	0,0,0,0
	.dc.b	2,3,2,3
	.dc.b	2,3,3,2
	.dc.b	3,2,2,3

FNIBS	set	7
FSTRT	set	0

ESTRT	set	4
EDIGITS	set	2

	global	decbin
decbin:
	move.l	$0.w(a0),FP_SCR0_EX(a6)		* make a copy of input 
	move.l	$4(a0),FP_SCR0_HI(a6)		* so we don't alter it
	move.l	$8(a0),FP_SCR0_LO(a6)

	lea	FP_SCR0(a6),a0

	movem.l	d2-d5,-(sp)		* save d2-d5
	fmovem.x	fp0,-(sp)	* save fp1
*
* Calculate exponent:
*  1. Copy bcd value in memory for use as a working copy.
*  2. Calculate absolute value of exponent in d1 by mul and add.
*  3. Correct for exponent sign.
*  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
*     (i.e., all digits assumed left of the decimal point.)
*
* Register usage:
*
*  calc_e:
*	(*)  d0: temp digit storage
*	(*)  d1: accumulator for binary exponent
*	(*)  d2: digit count
*	(*)  d3: offset pointer
*	( )  d4: first word of bcd
*	( )  a0: pointer to working bcd value
*	( )  a6: pointer to original bcd value
*	(*)  FP_SCR1: working copy of original bcd value
*	(*)  L_SCR1: copy of original exponent word
*
calc_e:
	move.l	#EDIGITS,d2		* # of nibbles (digits) in fraction part
	move.l	#ESTRT,d3		* counter to pick up digits
	move.l	(a0),d4			* get first word of bcd
	clr.l	d1			* zero d1 for accumulator
e_gd:
	mulu.l	#$a,d1			* mul partial product by one digit place
	bfextu	d4{d3:#4},d0		* get the digit and zero extend into d0
	add.l	d0,d1			* d1 = d1 + d0
	addq.b	#4,d3			* advance d3 to the next digit
	dbf.w	d2,e_gd			* if we have used all 3 digits, exit loop
	btst	#30,d4			* get SE
	beq.b	e_pos			* don't negate if pos
	neg.l	d1			* negate before subtracting
e_pos:
	subi.l	#16,d1			* sub to compensate for shift of mant
	bge.b	e_save			* if still pos, do not neg
	neg.l	d1			* now negative, make pos and set SE
	ori.l	#$40000000,d4		* set SE in d4,
	ori.l	#$40000000,(a0)		* and in working bcd
e_save:
	move.l	d1,-(sp)		* save exp on stack
*
*
* Calculate mantissa:
*  1. Calculate absolute value of mantissa in fp0 by mul and add.
*  2. Correct for mantissa sign.
*     (i.e., all digits assumed left of the decimal point.)
*
* Register usage:
*
*  calc_m:
*	(*)  d0: temp digit storage
*	(*)  d1: lword counter
*	(*)  d2: digit count
*	(*)  d3: offset pointer
*	( )  d4: words 2 and 3 of bcd
*	( )  a0: pointer to working bcd value
*	( )  a6: pointer to original bcd value
*	(*) fp0: mantissa accumulator
*	( )  FP_SCR1: working copy of original bcd value
*	( )  L_SCR1: copy of original exponent word
*
calc_m:
	move.l	#1,d1			* word counter, init to 1
	fmove.s	#$00000000,fp0		* accumulator
*
*
*  Since the packed number has a long word between the first & second parts,
*  get the integer digit then skip down & get the rest of the
*  mantissa.  We will unroll the loop once.
*
	bfextu	(a0){#28:#4},d0		* integer part is ls digit in long word
	fadd.b	d0,fp0			* add digit to sum in fp0
*
*
*  Get the rest of the mantissa.
*
loadlw:
	move.l	(a0,d1.L*4),d4		* load mantissa lonqword into d4
	move.l	#FSTRT,d3		* counter to pick up digits
	move.l	#FNIBS,d2		* reset number of digits per a0 ptr
md2b:
	fmul.s	#$41200000,fp0		* fp0 = fp0 * 10
	bfextu	d4{d3:#4},d0		* get the digit and zero extend
	fadd.b	d0,fp0			* fp0 = fp0 + digit
*
*
*  If all the digits (8) in that long word have been converted (d2=0),
*  then inc d1 (=2) to point to the next long word and reset d3 to 0
*  to initialize the digit offset, and set d2 to 7 for the digit count;
*  else continue with this long word.
*
	addq.b	#4,d3			* advance d3 to the next digit
	dbf.w	d2,md2b			* check for last digit in this lw
nextlw:
	addq.l	#1,d1			* inc lw pointer in mantissa
	cmpi.l	#2,d1			* test for last lw
	ble.b	loadlw			* if not, get last one
*
*  Check the sign of the mant and make the value in fp0 the same sign.
*
m_sign:
	btst	#31,(a0)		* test sign of the mantissa
	beq.b	ap_st_z			* if clear, go to append/strip zeros
	fneg.x	fp0			* if set, negate fp0
*
* Append/strip zeros:
*
*  For adjusted exponents which have an absolute value greater than 27*,
*  this routine calculates the amount needed to normalize the mantissa
*  for the adjusted exponent.  That number is subtracted from the exp
*  if the exp was positive, and added if it was negative.  The purpose
*  of this is to reduce the value of the exponent and the possibility
*  of error in calculation of pwrten.
*
*  1. Branch on the sign of the adjusted exponent.
*  2p.(positive exp)
*   2. Check M16 and the digits in lwords 2 and 3 in decending order.
*   3. Add one for each zero encountered until a non-zero digit.
*   4. Subtract the count from the exp.
*   5. Check if the exp has crossed zero in #3 above; make the exp abs
*	   and set SE.
*	6. Multiply the mantissa by 10**count.
*  2n.(negative exp)
*   2. Check the digits in lwords 3 and 2 in decending order.
*   3. Add one for each zero encountered until a non-zero digit.
*   4. Add the count to the exp.
*   5. Check if the exp has crossed zero in #3 above; clear SE.
*   6. Divide the mantissa by 10**count.
*
*  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
*   any adjustment due to append/strip zeros will drive the resultane
*   exponent towards zero.  Since all pwrten constants with a power
*   of 27 or less are exact, there is no need to use this routine to
*   attempt to lessen the resultant exponent.
*
* Register usage:
*
*  ap_st_z:
*	(*)  d0: temp digit storage
*	(*)  d1: zero count
*	(*)  d2: digit count
*	(*)  d3: offset pointer
*	( )  d4: first word of bcd
*	(*)  d5: lword counter
*	( )  a0: pointer to working bcd value
*	( )  FP_SCR1: working copy of original bcd value
*	( )  L_SCR1: copy of original exponent word
*
*
* First check the absolute value of the exponent to see if this
* routine is necessary.  If so, then check the sign of the exponent
* and do append (+) or strip (-) zeros accordingly.
* This section handles a positive adjusted exponent.
*
ap_st_z:
	move.l	(sp),d1			* load expA for range test
	cmpi.l	#27,d1			* test is with 27
	ble.w	pwrten			* if abs(expA) <28, skip ap/st zeros
	btst	#30,(a0)		* check sign of exp
	bne.b	ap_st_n			* if neg, go to neg side
	clr.l	d1			* zero count reg
	move.l	(a0),d4			* load lword 1 to d4
	bfextu	d4{#28:#4},d0		* get M16 in d0
	bne.b	ap_p_fx			* if M16 is non-zero, go fix exp
	addq.l	#1,d1			* inc zero count
	move.l	#1,d5			* init lword counter
	move.l	(a0,d5.L*4),d4		* get lword 2 to d4
	bne.b	ap_p_cl			* if lw 2 is zero, skip it
	addq.l	#8,d1			* and inc count by 8
	addq.l	#1,d5			* inc lword counter
	move.l	(a0,d5.L*4),d4		* get lword 3 to d4
ap_p_cl:
	clr.l	d3			* init offset reg
	move.l	#7,d2			* init digit counter
ap_p_gd:
	bfextu	d4{d3:#4},d0		* get digit
	bne.b	ap_p_fx			* if non-zero, go to fix exp
	addq.l	#4,d3			* point to next digit
	addq.l	#1,d1			* inc digit counter
	dbf.w	d2,ap_p_gd		* get next digit
ap_p_fx:
	move.l	d1,d0			* copy counter to d2
	move.l	(sp),d1			* get adjusted exp from memory
	sub.l	d0,d1			* subtract count from exp
	bge.b	ap_p_fm			* if still pos, go to pwrten
	neg.l	d1			* now its neg; get abs
	move.l	(a0),d4			* load lword 1 to d4
	ori.l	#$40000000,d4		* and set SE in d4
	ori.l	#$40000000,(a0)		* and in memory
*
* Calculate the mantissa multiplier to compensate for the striping of
* zeros from the mantissa.
*
ap_p_fm:
	lea.l	PTENRN.l(pc),a1		* get address of power-of-ten table
	clr.l	d3			* init table index
	fmove.s	#$3f800000,fp1		* init fp1 to 1
	move.l	#3,d2			* init d2 to count bits in counter
ap_p_el:
	asr.l	#1,d0			* shift lsb into carry
	bcc.b	ap_p_en			* if 1, mul fp1 by pwrten factor
	fmul.x	(a1,d3.l),fp1		* mul by 10**(d3_bit_no)
ap_p_en:
	addi.l	#12,d3			* inc d3 to next rtable entry
	tst.l	d0			* check if d0 is zero
	bne.b	ap_p_el			* if not, get next bit
	fmul.x	fp1,fp0			* mul mantissa by 10**(no_bits_shifted)
	bra.b	pwrten			* go calc pwrten
*
* This section handles a negative adjusted exponent.
*
ap_st_n:
	clr.l	d1			* clr counter
	move.l	#2,d5			* set up d5 to point to lword 3
	move.l	(a0,d5.L*4),d4		* get lword 3
	bne.b	ap_n_cl			* if not zero, check digits
	sub.l	#1,d5			* dec d5 to point to lword 2
	addq.l	#8,d1			* inc counter by 8
	move.l	(a0,d5.L*4),d4		* get lword 2
ap_n_cl:
	move.l	#28,d3			* point to last digit
	move.l	#7,d2			* init digit counter
ap_n_gd:
	bfextu	d4{d3:#4},d0		* get digit
	bne.b	ap_n_fx			* if non-zero, go to exp fix
	subq.l	#4,d3			* point to previous digit
	addq.l	#1,d1			* inc digit counter
	dbf.w	d2,ap_n_gd		* get next digit
ap_n_fx:
	move.l	d1,d0			* copy counter to d0
	move.l	(sp),d1			* get adjusted exp from memory
	sub.l	d0,d1			* subtract count from exp
	bgt.b	ap_n_fm			* if still pos, go fix mantissa
	neg.l	d1			* take abs of exp and clr SE
	move.l	(a0),d4			* load lword 1 to d4
	andi.l	#$bfffffff,d4		* and clr SE in d4
	andi.l	#$bfffffff,(a0)		* and in memory
*
* Calculate the mantissa multiplier to compensate for the appending of
* zeros to the mantissa.
*
ap_n_fm:
	lea.l	PTENRN.l(pc),a1		* get address of power-of-ten table
	clr.l	d3			* init table index
	fmove.s	#$3f800000,fp1		* init fp1 to 1
	move.l	#3,d2			* init d2 to count bits in counter
ap_n_el:
	asr.l	#1,d0			* shift lsb into carry
	bcc.b	ap_n_en			* if 1, mul fp1 by pwrten factor
	fmul.x	(a1,d3.l),fp1		* mul by 10**(d3_bit_no)
ap_n_en:
	addi.l	#12,d3			* inc d3 to next rtable entry
	tst.l	d0			* check if d0 is zero
	bne.b	ap_n_el			* if not, get next bit
	fdiv.x	fp1,fp0			* div mantissa by 10**(no_bits_shifted)
*
*
* Calculate power-of-ten factor from adjusted and shifted exponent.
*
* Register usage:
*
*  pwrten:
*	(*)  d0: temp
*	( )  d1: exponent
*	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
*	(*)  d3: FPCR work copy
*	( )  d4: first word of bcd
*	(*)  a1: RTABLE pointer
*  calc_p:
*	(*)  d0: temp
*	( )  d1: exponent
*	(*)  d3: PWRTxx table index
*	( )  a0: pointer to working copy of bcd
*	(*)  a1: PWRTxx pointer
*	(*) fp1: power-of-ten accumulator
*
* Pwrten calculates the exponent factor in the selected rounding mode
* according to the following table:
*	
*	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
*
*	ANY	  ANY	RN	RN
*
*	 +	   +	RP	RP
*	 -	   +	RP	RM
*	 +	   -	RP	RM
*	 -	   -	RP	RP
*
*	 +	   +	RM	RM
*	 -	   +	RM	RP
*	 +	   -	RM	RP
*	 -	   -	RM	RM
*
*	 +	   +	RZ	RM
*	 -	   +	RZ	RM
*	 +	   -	RZ	RP
*	 -	   -	RZ	RP
*
*
pwrten:
	move.l	USER_FPCR(a6),d3	* get user's FPCR
	bfextu	d3{#26:#2},d2		* isolate rounding mode bits
	move.l	(a0),d4			* reload 1st bcd word to d4
	asl.l	#2,d2			* format d2 to be
	bfextu	d4{#0:#2},d0		* {FPCR[6],FPCR[5],SM,SE}
	add.l	d0,d2			* in d2 as index into RTABLE
	lea.l	RTABLE(pc),a1		* load rtable base
	move.b	(a1,d2.l),d0		* load new rounding bits from table
	clr.l	d3			* clear d3 to force no exc and extended
	bfins	d0,d3{#26:#2}		* stuff new rounding bits in FPCR
	fmove.l	d3,fpcr			* write new FPCR
	asr.l	#1,d0			* write correct PTENxx table
	bcc.b	not_rp			* to a1
	lea.l	PTENRP.l(pc),a1		* it is RP
	bra.b	calc_p			* go to init section
not_rp:
	asr.l	#1,d0			* keep checking
	bcc.b	not_rm
	lea.l	PTENRM.l(pc),a1		* it is RM
	bra.b	calc_p			* go to init section
not_rm:
	lea.l	PTENRN.l(pc),a1		* it is RN
calc_p:
	move.l	d1,d0			* copy exp to d0;use d0
	bpl.b	no_neg			* if exp is negative,
	neg.l	d0			* invert it
	ori.l	#$40000000,(a0)		* and set SE bit
no_neg:
	clr.l	d3			* table index
	fmove.s	#$3f800000,fp1		* init fp1 to 1
e_loop:
	asr.l	#1,d0			* shift next bit into carry
	bcc.b	e_next			* if zero, skip the mul
	fmul.x	(a1,d3.l),fp1		* mul by 10**(d3_bit_no)
e_next:
	addi.l	#12,d3			* inc d3 to next rtable entry
	tst.l	d0			* check if d0 is zero
	bne.b	e_loop			* not zero, continue shifting
*
*
*  Check the sign of the adjusted exp and make the value in fp0 the
*  same sign. If the exp was pos then multiply fp1*fp0;
*  else divide fp0/fp1.
*
* Register Usage:
*  norm:
*	( )  a0: pointer to working bcd value
*	(*) fp0: mantissa accumulator
*	( ) fp1: scaling factor - 10**(abs(exp))
*
pnorm:
	btst	#30,(a0)		* test the sign of the exponent
	beq.b	mul			* if clear, go to multiply
div:
	fdiv.x	fp1,fp0			* exp is negative, so divide mant by exp
	bra.b	end_dec
mul:
	fmul.x	fp1,fp0			* exp is positive, so multiply by exp
*
*
* Clean up and return with result in fp0.
*
* If the final mul/div in decbin incurred an inex exception,
* it will be inex2, but will be reported as inex1 by get_op.
*
end_dec:
	fmove.l	fpsr,d0			* get status register	
	bclr	#inex2_bit+8,d0		* test for inex2 and clear it
	beq.b	no_exc			* skip this if no exc
	ori.w	#inx1a_mask,2+USER_FPSR(a6)	* set INEX1/AINEX
no_exc:
	add.l	#$4,sp			* clear 1 lw param
	fmovem.x	(sp)+,fp1	* restore fp1
	movem.l	(sp)+,d2-d5		* restore d2-d5
	fmove.l	#$0,fpcr
	fmove.l	#$0,fpsr
	rts

*########################################################################
* bindec(): Converts an input in extended precision format to bcd format#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to the input extended precision value in memory.	#
*	     the input may be either normalized, unnormalized, or 	#
*	     denormalized.						#
*	d0 = contains the k-factor sign-extended to 32-bits. 		#
*									#
* OUTPUT **************************************************************	#
*	FP_SCR0(a6) = bcd format result on the stack.			#
*									#
* ALGORITHM ***********************************************************	#
*									#
*	A1.	Set RM and size ext;  Set SIGMA = sign of input.  	#
*		The k-factor is saved for use in d7. Clear the		#
*		BINDEC_FLG for separating normalized/denormalized	#
*		input.  If input is unnormalized or denormalized,	#
*		normalize it.						#
*									#
*	A2.	Set X = abs(input).					#
*									#
*	A3.	Compute ILOG.						#
*		ILOG is the log base 10 of the input value.  It is	#
*		approximated by adding e + 0.f when the original 	#
*		value is viewed as 2^^e * 1.f in extended precision.  	#
*		This value is stored in d6.				#
*									#
*	A4.	Clr INEX bit.						#
*		The operation in A3 above may have set INEX2.  		#
*									#
*	A5.	Set ICTR = 0;						#
*		ICTR is a flag used in A13.  It must be set before the 	#
*		loop entry A6.						#
*									#
*	A6.	Calculate LEN.						#
*		LEN is the number of digits to be displayed.  The	#
*		k-factor can dictate either the total number of digits,	#
*		if it is a positive number, or the number of digits	#
*		after the decimal point which are to be included as	#
*		significant.  See the 68882 manual for examples.	#
*		If LEN is computed to be greater than 17, set OPERR in	#
*		USER_FPSR.  LEN is stored in d4.			#
*									#
*	A7.	Calculate SCALE.					#
*		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
*		of decimal places needed to insure LEN integer digits	#
*		in the output before conversion to bcd. LAMBDA is the	#
*		sign of ISCALE, used in A9. Fp1 contains		#
*		10^^(abs(ISCALE)) using a rounding mode which is a	#
*		function of the original rounding mode and the signs	#
*		of ISCALE and X.  A table is given in the code.		#
*									#
*	A8.	Clr INEX; Force RZ.					#
*		The operation in A3 above may have set INEX2.  		#
*		RZ mode is forced for the scaling operation to insure	#
*		only one rounding error.  The grs bits are collected in #
*		the INEX flag for use in A10.				#
*									#
*	A9.	Scale X -> Y.						#
*		The mantissa is scaled to the desired number of		#
*		significant digits.  The excess digits are collected	#
*		in INEX2.						#
*									#
*	A10.	Or in INEX.						#
*		If INEX is set, round error occured.  This is		#
*		compensated for by 'or-ing' in the INEX2 flag to	#
*		the lsb of Y.						#
*									#
*	A11.	Restore original FPCR; set size ext.			#
*		Perform FINT operation in the user's rounding mode.	#
*		Keep the size to extended.				#
*									#
*	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
*		mode.  The FPSP routine sintd0 is used.  The output	#
*		is in fp0.						#
*									#
*	A13.	Check for LEN digits.					#
*		If the int operation results in more than LEN digits,	#
*		or less than LEN -1 digits, adjust ILOG and repeat from	#
*		A6.  This test occurs only on the first pass.  If the	#
*		result is exactly 10^LEN, decrement ILOG and divide	#
*		the mantissa by 10.					#
*									#
*	A14.	Convert the mantissa to bcd.				#
*		The binstr routine is used to convert the LEN digit 	#
*		mantissa to bcd in memory.  The input to binstr is	#
*		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
*		such that the decimal point is to the left of bit 63.	#
*		The bcd digits are stored in the correct position in 	#
*		the final string area in memory.			#
*									#
*	A15.	Convert the exponent to bcd.				#
*		As in A14 above, the exp is converted to bcd and the	#
*		digits are stored in the final string.			#
*		Test the length of the final exponent string.  If the	#
*		length is 4, set operr.					#
*									#
*	A16.	Write sign bits to final string.			#
*									#
*########################################################################

BINDEC_FLG	set	EXC_TEMP	* DENORM flag

* Constants in extended precision
PLOG2:
	.dc.l	$3FFD0000,$9A209A84,$FBCFF798,$00000000
PLOG2UP1:
	.dc.l	$3FFD0000,$9A209A84,$FBCFF799,$00000000

* Constants in single precision
FONE:
	.dc.l	$3F800000,$00000000,$00000000,$00000000
FTWO:
	.dc.l	$40000000,$00000000,$00000000,$00000000
FTEN:
	.dc.l	$41200000,$00000000,$00000000,$00000000
F4933:
	.dc.l	$459A2800,$00000000,$00000000,$00000000

RBDTBL:
	.dc.b	0,0,0,0
	.dc.b	3,3,2,2
	.dc.b	3,2,2,3
	.dc.b	2,3,3,2

*	Implementation Notes:
*
*	The registers are used as follows:
*
*		d0: scratch; LEN input to binstr
*		d1: scratch
*		d2: upper 32-bits of mantissa for binstr
*		d3: scratch;lower 32-bits of mantissa for binstr
*		d4: LEN
*      		d5: LAMBDA/ICTR
*		d6: ILOG
*		d7: k-factor
*		a0: ptr for original operand/final result
*		a1: scratch pointer
*		a2: pointer to FP_X; abs(original value) in ext
*		fp0: scratch
*		fp1: scratch
*		fp2: scratch
*		F_SCR1:
*		F_SCR2:
*		L_SCR1:
*		L_SCR2:

	global	bindec
bindec:
	movem.l	d2-d7/a2,-(sp)	*  {%d2-%d7/%a2}
	fmovem.x	fp0-fp2,-(sp)	*  {%fp0-%fp2}

* A1. Set RM and size ext. Set SIGMA = sign input;
*     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
*     separating  normalized/denormalized input.  If the input
*     is a denormalized number, set the BINDEC_FLG memory word
*     to signal denorm.  If the input is unnormalized, normalize
*     the input and test for denormalized result.  
*
	fmove.l	#rm_mode*$10,fpcr	* set RM and ext
	move.l	(a0),L_SCR2(a6)		* save exponent for sign check
	move.l	d0,d7		* move k-factor to d7

	clr.b	BINDEC_FLG(a6)	* clr norm/denorm flag
	cmpi.b	#DENORM,STAG(a6)	* is input a DENORM?
	bne.w	A2_str		* no; input is a NORM

*
* Normalize the denorm
*
un_de_norm:
	move.w	(a0),d0
	andi.w	#$7fff,d0	* strip sign of normalized exp
	move.l	4(a0),d1
	move.l	8(a0),d2
norm_loop:
	subq.w	#1,d0
	lsl.l	#1,d2
	roxl.l	#1,d1
	tst.l	d1
	bge.b	norm_loop
*
* Test if the normalized input is denormalized
*
	tst.w	d0
	bgt.b	pos_exp		* if greater than zero, it is a norm
	st	BINDEC_FLG(a6)	* set flag for denorm
pos_exp:
	andi.w	#$7fff,d0	* strip sign of normalized exp
	move.w	d0,(a0)
	move.l	d1,4(a0)
	move.l	d2,8(a0)

* A2. Set X = abs(input).
*
A2_str:
	move.l	(a0),FP_SCR1(a6)	* move input to work space
	move.l	4(a0),FP_SCR1+4(a6)	* move input to work space
	move.l	8(a0),FP_SCR1+8(a6)	* move input to work space
	andi.l	#$7fffffff,FP_SCR1(a6)		* create abs(X)

* A3. Compute ILOG.
*     ILOG is the log base 10 of the input value.  It is approx-
*     imated by adding e + 0.f when the original value is viewed
*     as 2^^e * 1.f in extended precision.  This value is stored
*     in d6.
*
* Register usage:
*	Input/Output
*	d0: k-factor/exponent
*	d2: x/x
*	d3: x/x
*	d4: x/x
*	d5: x/x
*	d6: x/ILOG
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/final result
*	a1: x/x
*	a2: x/x
*	fp0: x/float(ILOG)
*	fp1: x/x
*	fp2: x/x
*	F_SCR1:x/x
*	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
*	L_SCR1:x/x
*	L_SCR2:first word of X packed/Unchanged

	tst.b	BINDEC_FLG(a6)	* check for denorm
	beq.b	A3_cont		* if clr, continue with norm
	move.l	#-4933,d6	* force ILOG = -4933
	bra.b	A4_str
A3_cont:
	move.w	FP_SCR1(a6),d0		* move exp to d0
	move.w	#$3fff,FP_SCR1(a6)	* replace exponent with 0x3fff
	fmove.x	FP_SCR1(a6),fp0		* now fp0 has 1.f
	subi.w	#$3fff,d0	* strip off bias
	fadd.w	d0,fp0		* add in exp
	fsub.s	FONE(pc),fp0	* subtract off 1.0
	fbge.w	pos_res		* if pos, branch 
	fmul.x	PLOG2UP1(pc),fp0	* if neg, mul by LOG2UP1
	fmove.l	fp0,d6		* put ILOG in d6 as a lword
	bra.b	A4_str		* go move out ILOG
pos_res:
	fmul.x	PLOG2(pc),fp0	* if pos, mul by LOG2
	fmove.l	fp0,d6		* put ILOG in d6 as a lword


* A4. Clr INEX bit.
*     The operation in A3 above may have set INEX2.  

A4_str:
	fmove.l	#0,fpsr		* zero all of fpsr - nothing needed


* A5. Set ICTR = 0;
*     ICTR is a flag used in A13.  It must be set before the 
*     loop entry A6. The lower word of d5 is used for ICTR.

	clr.w	d5		* clear ICTR

* A6. Calculate LEN.
*     LEN is the number of digits to be displayed.  The k-factor
*     can dictate either the total number of digits, if it is
*     a positive number, or the number of digits after the
*     original decimal point which are to be included as
*     significant.  See the 68882 manual for examples.
*     If LEN is computed to be greater than 17, set OPERR in
*     USER_FPSR.  LEN is stored in d4.
*
* Register usage:
*	Input/Output
*	d0: exponent/Unchanged
*	d2: x/x/scratch
*	d3: x/x
*	d4: exc picture/LEN
*	d5: ICTR/Unchanged
*	d6: ILOG/Unchanged
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/final result
*	a1: x/x
*	a2: x/x
*	fp0: float(ILOG)/Unchanged
*	fp1: x/x
*	fp2: x/x
*	F_SCR1:x/x
*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
*	L_SCR1:x/x
*	L_SCR2:first word of X packed/Unchanged

A6_str:
	tst.l	d7		* branch on sign of k
	ble.b	k_neg		* if k <= 0, LEN = ILOG + 1 - k
	move.l	d7,d4		* if k > 0, LEN = k
	bra.b	len_ck		* skip to LEN check
k_neg:
	move.l	d6,d4		* first load ILOG to d4
	sub.l	d7,d4		* subtract off k
	addq.l	#1,d4		* add in the 1
len_ck:
	tst.l	d4		* LEN check: branch on sign of LEN
	ble.b	LEN_ng		* if neg, set LEN = 1
	cmpi.l	#17,d4		* test if LEN > 17
	ble.b	A7_str		* if not, forget it
	move.l	#17,d4		* set max LEN = 17
	tst.l	d7		* if negative, never set OPERR
	ble.b	A7_str		* if positive, continue
	ori.l	#opaop_mask,USER_FPSR(a6)	* set OPERR & AIOP in USER_FPSR
	bra.b	A7_str		* finished here
LEN_ng:
	move.l	#1,d4		* min LEN is 1


* A7. Calculate SCALE.
*     SCALE is equal to 10^ISCALE, where ISCALE is the number
*     of decimal places needed to insure LEN integer digits
*     in the output before conversion to bcd. LAMBDA is the sign
*     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
*     the rounding mode as given in the following table (see
*     Coonen, p. 7.23 as ref.; however, the SCALE variable is
*     of opposite sign in bindec.sa from Coonen).
*
*	Initial					USE
*	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
*	----------------------------------------------
*	 RN	00	   0	   0		00/0	RN
*	 RN	00	   0	   1		00/0	RN
*	 RN	00	   1	   0		00/0	RN
*	 RN	00	   1	   1		00/0	RN
*	 RZ	01	   0	   0		11/3	RP
*	 RZ	01	   0	   1		11/3	RP
*	 RZ	01	   1	   0		10/2	RM
*	 RZ	01	   1	   1		10/2	RM
*	 RM	10	   0	   0		11/3	RP
*	 RM	10	   0	   1		10/2	RM
*	 RM	10	   1	   0		10/2	RM
*	 RM	10	   1	   1		11/3	RP
*	 RP	11	   0	   0		10/2	RM
*	 RP	11	   0	   1		11/3	RP
*	 RP	11	   1	   0		11/3	RP
*	 RP	11	   1	   1		10/2	RM
*
* Register usage:
*	Input/Output
*	d0: exponent/scratch - final is 0
*	d2: x/0 or 24 for A9
*	d3: x/scratch - offset ptr into PTENRM array
*	d4: LEN/Unchanged
*	d5: 0/ICTR:LAMBDA
*	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/final result
*	a1: x/ptr to PTENRM array
*	a2: x/x
*	fp0: float(ILOG)/Unchanged
*	fp1: x/10^ISCALE
*	fp2: x/x
*	F_SCR1:x/x
*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
*	L_SCR1:x/x
*	L_SCR2:first word of X packed/Unchanged

A7_str:
	tst.l	d7		* test sign of k
	bgt.b	k_pos		* if pos and > 0, skip this
	cmp.l	d6,d7		* test k - ILOG
	blt.b	k_pos		* if ILOG >= k, skip this
	move.l	d7,d6		* if ((k<0) & (ILOG < k)) ILOG = k
k_pos:
	move.l	d6,d0		* calc ILOG + 1 - LEN in d0
	addq.l	#1,d0		* add the 1
	sub.l	d4,d0		* sub off LEN
	swap	d5		* use upper word of d5 for LAMBDA
	clr.w	d5		* set it zero initially
	clr.w	d2		* set up d2 for very small case
	tst.l	d0		* test sign of ISCALE
	bge.b	iscale		* if pos, skip next inst
	addq.w	#1,d5		* if neg, set LAMBDA true
	cmpi.l	#$ffffecd4,d0	* test iscale <= -4908
	bgt.b	no_inf		* if false, skip rest
	addi.l	#24,d0		* add in 24 to iscale
	move.l	#24,d2		* put 24 in d2 for A9
no_inf:
	neg.l	d0		* and take abs of ISCALE
iscale:
	fmove.s	FONE(pc),fp1	* init fp1 to 1
	bfextu	USER_FPCR(a6){#26:#2},d1	* get initial rmode bits
	lsl.w	#1,d1		* put them in bits 2:1
	add.w	d5,d1		* add in LAMBDA
	lsl.w	#1,d1		* put them in bits 3:1
	tst.l	L_SCR2(a6)	* test sign of original x
	bge.b	x_pos		* if pos, don't set bit 0
	addq.l	#1,d1		* if neg, set bit 0
x_pos:
	lea.l	RBDTBL(pc),a2	* load rbdtbl base
	move.b	(a2,d1.l),d3	* load d3 with new rmode
	lsl.l	#4,d3		* put bits in proper position
	fmove.l	d3,fpcr		* load bits into fpu
	lsr.l	#4,d3		* put bits in proper position
	tst.b	d3		* decode new rmode for pten table
	bne.b	not_rn		* if zero, it is RN
	lea.l	PTENRN.l(pc),a1	* load a1 with RN table base
	bra.b	rmode		* exit decode
not_rn:
	lsr.b	#1,d3		* get lsb in carry
	bcc.b	not_rp2		* if carry clear, it is RM
	lea.l	PTENRP.l(pc),a1	* load a1 with RP table base
	bra.b	rmode		* exit decode
not_rp2:
	lea.l	PTENRM.l(pc),a1	* load a1 with RM table base
rmode:
	clr.l	d3		* clr table index
e_loop2:
	lsr.l	#1,d0		* shift next bit into carry
	bcc.b	e_next2		* if zero, skip the mul
	fmul.x	(a1,d3.l),fp1	* mul by 10**(d3_bit_no)
e_next2:
	addi.l	#12,d3		* inc d3 to next pwrten table entry
	tst.l	d0		* test if ISCALE is zero
	bne.b	e_loop2		* if not, loop

* A8. Clr INEX; Force RZ.
*     The operation in A3 above may have set INEX2.  
*     RZ mode is forced for the scaling operation to insure
*     only one rounding error.  The grs bits are collected in 
*     the INEX flag for use in A10.
*
* Register usage:
*	Input/Output

	fmove.l	#0,fpsr		* clr INEX 
	fmove.l	#rz_mode*$10,fpcr	* set RZ rounding mode

* A9. Scale X -> Y.
*     The mantissa is scaled to the desired number of significant
*     digits.  The excess digits are collected in INEX2. If mul,
*     Check d2 for excess 10 exponential value.  If not zero, 
*     the iscale value would have caused the pwrten calculation
*     to overflow.  Only a negative iscale can cause this, so
*     multiply by 10^(d2), which is now only allowed to be 24,
*     with a multiply by 10^8 and 10^16, which is exact since
*     10^24 is exact.  If the input was denormalized, we must
*     create a busy stack frame with the mul command and the
*     two operands, and allow the fpu to complete the multiply.
*
* Register usage:
*	Input/Output
*	d0: FPCR with RZ mode/Unchanged
*	d2: 0 or 24/unchanged
*	d3: x/x
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA
*	d6: ILOG/Unchanged
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/final result
*	a1: ptr to PTENRM array/Unchanged
*	a2: x/x
*	fp0: float(ILOG)/X adjusted for SCALE (Y)
*	fp1: 10^ISCALE/Unchanged
*	fp2: x/x
*	F_SCR1:x/x
*	F_SCR2:Abs(X) with $3fff exponent/Unchanged
*	L_SCR1:x/x
*	L_SCR2:first word of X packed/Unchanged

A9_str:
	fmove.x	(a0),fp0	* load X from memory
	fabs.x	fp0		* use abs(X)
	tst.w	d5		* LAMBDA is in lower word of d5
	bne.b	sc_mul		* if neg (LAMBDA = 1), scale by mul
	fdiv.x	fp1,fp0		* calculate X / SCALE -> Y to fp0
	bra.w	A10_st		* branch to A10

sc_mul:
	tst.b	BINDEC_FLG(a6)	* check for denorm
	beq.w	A9_norm		* if norm, continue with mul

* for DENORM, we must calculate:
*	fp0 = input_op * 10^ISCALE * 10^24
* since the input operand is a DENORM, we can't multiply it directly.
* so, we do the multiplication of the exponents and mantissas separately.
* in this way, we avoid underflow on intermediate stages of the
* multiplication and guarantee a result without exception.
	fmovem.x	fp1,-(sp)	* save 10^ISCALE to stack

	move.w	(sp),d3		* grab exponent
	andi.w	#$7fff,d3	* clear sign
	ori.w	#$8000,(a0)	* make DENORM exp negative
	add.w	(a0),d3		* add DENORM exp to 10^ISCALE exp
	subi.w	#$3fff,d3	* subtract BIAS
	add.w	36(a1),d3
	subi.w	#$3fff,d3	* subtract BIAS
	add.w	48(a1),d3
	subi.w	#$3fff,d3	* subtract BIAS

	bmi.w	sc_mul_err	* is result is DENORM, punt!!!

	andi.w	#$8000,(sp)	* keep sign
	or.w	d3,(sp)		* insert new exponent
	andi.w	#$7fff,(a0)	* clear sign bit on DENORM again
	move.l	$8(a0),-(sp)	* put input op mantissa on stk
	move.l	$4(a0),-(sp)
	move.l	#$3fff0000,-(sp)	* force exp to zero
	fmovem.x	(sp)+,fp0	* load normalized DENORM into fp0
	fmul.x	(sp)+,fp0

*	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
*	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
	move.l	36+8(a1),-(sp)		* get 10^8 mantissa
	move.l	36+4(a1),-(sp)
	move.l	#$3fff0000,-(sp)	* force exp to zero
	move.l	48+8(a1),-(sp)		* get 10^16 mantissa
	move.l	48+4(a1),-(sp)
	move.l	#$3fff0000,-(sp)	* force exp to zero
	fmul.x	(sp)+,fp0	* multiply fp0 by 10^8
	fmul.x	(sp)+,fp0	* multiply fp0 by 10^16
	bra.b	A10_st

sc_mul_err:
	bra.b	sc_mul_err

A9_norm:
	tst.w	d2		* test for small exp case
	beq.b	A9_con		* if zero, continue as normal
	fmul.x	36(a1),fp0	* multiply fp0 by 10^8
	fmul.x	48(a1),fp0	* multiply fp0 by 10^16
A9_con:
	fmul.x	fp1,fp0		* calculate X * SCALE -> Y to fp0

* A10. Or in INEX.
*      If INEX is set, round error occured.  This is compensated
*      for by 'or-ing' in the INEX2 flag to the lsb of Y.
*
* Register usage:
*	Input/Output
*	d0: FPCR with RZ mode/FPSR with INEX2 isolated
*	d2: x/x
*	d3: x/x
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA
*	d6: ILOG/Unchanged
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/final result
*	a1: ptr to PTENxx array/Unchanged
*	a2: x/ptr to FP_SCR1(a6)
*	fp0: Y/Y with lsb adjusted
*	fp1: 10^ISCALE/Unchanged
*	fp2: x/x

A10_st:
	fmove.l	fpsr,d0		* get FPSR
	fmove.x	fp0,FP_SCR1(a6)		* move Y to memory
	lea.l	FP_SCR1(a6),a2		* load a2 with ptr to FP_SCR1
	btst	#9,d0		* check if INEX2 set
	beq.b	A11_st		* if clear, skip rest
	ori.l	#1,8(a2)	* or in 1 to lsb of mantissa
	fmove.x	FP_SCR1(a6),fp0		* write adjusted Y back to fpu


* A11. Restore original FPCR; set size ext.
*      Perform FINT operation in the user's rounding mode.  Keep
*      the size to extended.  The sintdo entry point in the sint
*      routine expects the FPCR value to be in USER_FPCR for
*      mode and precision.  The original FPCR is saved in L_SCR1.

A11_st:
	move.l	USER_FPCR(a6),L_SCR1(a6)	* save it for later
	andi.l	#$00000030,USER_FPCR(a6)	* set size to ext, 
*					;block exceptions


* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
*      The FPSP routine sintd0 is used.  The output is in fp0.
*
* Register usage:
*	Input/Output
*	d0: FPSR with AINEX cleared/FPCR with size set to ext
*	d2: x/x/scratch
*	d3: x/x
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA/Unchanged
*	d6: ILOG/Unchanged
*	d7: k-factor/Unchanged
*	a0: ptr for original operand/src ptr for sintdo
*	a1: ptr to PTENxx array/Unchanged
*	a2: ptr to FP_SCR1(a6)/Unchanged
*	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
*	fp0: Y/YINT
*	fp1: 10^ISCALE/Unchanged
*	fp2: x/x
*	F_SCR1:x/x
*	F_SCR2:Y adjusted for inex/Y with original exponent
*	L_SCR1:x/original USER_FPCR
*	L_SCR2:first word of X packed/Unchanged

A12_st:
	movem.l	d0-d1/a0-a1,-(sp)	* save regs used by sintd0	 {%d0-%d1/%a0-%a1}
	move.l	L_SCR1(a6),-(sp)
	move.l	L_SCR2(a6),-(sp)

	lea.l	FP_SCR1(a6),a0		* a0 is ptr to FP_SCR1(a6)
	fmove.x	fp0,(a0)	* move Y to memory at FP_SCR1(a6)
	tst.l	L_SCR2(a6)	* test sign of original operand
	bge.b	do_fint12		* if pos, use Y 
	ori.l	#$80000000,(a0)		* if neg, use -Y
do_fint12:
	move.l	USER_FPSR(a6),-(sp)
*	bsr	sintdo		# sint routine returns int in fp0

	fmove.l	USER_FPCR(a6),fpcr
	fmove.l	#$0,fpsr		* clear the AEXC bits!!!
*#	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
*#	andi.l		&0x00000030,%d0
*#	fmov.l		%d0,%fpcr
	fint.x	FP_SCR1(a6),fp0		* do fint()
	fmove.l	fpsr,d0
	or.w	d0,FPSR_EXCEPT(a6)
*#	fmov.l		&0x0,%fpcr
*#	fmov.l		%fpsr,%d0		# don't keep ccodes
*#	or.w		%d0,FPSR_EXCEPT(%a6)

	move.b	(sp),USER_FPSR(a6)
	add.l	#4,sp

	move.l	(sp)+,L_SCR2(a6)
	move.l	(sp)+,L_SCR1(a6)
	movem.l	(sp)+,d0-d1/a0-a1	* restore regs used by sint	 {%d0-%d1/%a0-%a1}

	move.l	L_SCR2(a6),FP_SCR1(a6)	* restore original exponent
	move.l	L_SCR1(a6),USER_FPCR(a6)	* restore user's FPCR

* A13. Check for LEN digits.
*      If the int operation results in more than LEN digits,
*      or less than LEN -1 digits, adjust ILOG and repeat from
*      A6.  This test occurs only on the first pass.  If the
*      result is exactly 10^LEN, decrement ILOG and divide
*      the mantissa by 10.  The calculation of 10^LEN cannot
*      be inexact, since all powers of ten upto 10^27 are exact
*      in extended precision, so the use of a previous power-of-ten
*      table will introduce no error.
*
*
* Register usage:
*	Input/Output
*	d0: FPCR with size set to ext/scratch final = 0
*	d2: x/x
*	d3: x/scratch final = x
*	d4: LEN/LEN adjusted
*	d5: ICTR:LAMBDA/LAMBDA:ICTR
*	d6: ILOG/ILOG adjusted
*	d7: k-factor/Unchanged
*	a0: pointer into memory for packed bcd string formation
*	a1: ptr to PTENxx array/Unchanged
*	a2: ptr to FP_SCR1(a6)/Unchanged
*	fp0: int portion of Y/abs(YINT) adjusted
*	fp1: 10^ISCALE/Unchanged
*	fp2: x/10^LEN
*	F_SCR1:x/x
*	F_SCR2:Y with original exponent/Unchanged
*	L_SCR1:original USER_FPCR/Unchanged
*	L_SCR2:first word of X packed/Unchanged

A13_st:
	swap	d5		* put ICTR in lower word of d5
	tst.w	d5		* check if ICTR = 0
	bne.l	not_zr		* if non-zero, go to second test
*
* Compute 10^(LEN-1)
*
	fmove.s	FONE(pc),fp2	* init fp2 to 1.0
	move.l	d4,d0		* put LEN in d0
	subq.l	#1,d0		* d0 = LEN -1
	clr.l	d3		* clr table index
l_loop:
	lsr.l	#1,d0		* shift next bit into carry
	bcc.b	l_next		* if zero, skip the mul
	fmul.x	(a1,d3.l),fp2	* mul by 10**(d3_bit_no)
l_next:
	addi.l	#12,d3		* inc d3 to next pwrten table entry
	tst.l	d0		* test if LEN is zero
	bne.b	l_loop		* if not, loop
*
* 10^LEN-1 is computed for this test and A14.  If the input was
* denormalized, check only the case in which YINT > 10^LEN.
*
	tst.b	BINDEC_FLG(a6)	* check if input was norm
	beq.b	A13_con		* if norm, continue with checking
	fabs.x	fp0		* take abs of YINT
	bra.l	test_2
*
* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
*
A13_con:
	fabs.x	fp0		* take abs of YINT
	fcmp.x	fp2,fp0		* compare abs(YINT) with 10^(LEN-1)
	fbge.w	test_2		* if greater, do next test
	subq.l	#1,d6		* subtract 1 from ILOG
	move.w	#1,d5		* set ICTR
	fmove.l	#rm_mode*$10,fpcr	* set rmode to RM
	fmul.s	FTEN(pc),fp2	* compute 10^LEN 
	bra.w	A6_str		* return to A6 and recompute YINT
test_2:
	fmul.s	FTEN(pc),fp2	* compute 10^LEN
	fcmp.x	fp2,fp0		* compare abs(YINT) with 10^LEN
	fblt.w	A14_st		* if less, all is ok, go to A14
	fbgt.w	fix_ex		* if greater, fix and redo
	fdiv.s	FTEN(pc),fp0	* if equal, divide by 10
	addq.l	#1,d6		* and inc ILOG
	bra.b	A14_st		* and continue elsewhere
fix_ex:
	addq.l	#1,d6		* increment ILOG by 1
	move.w	#1,d5		* set ICTR
	fmove.l	#rm_mode*$10,fpcr	* set rmode to RM
	bra.w	A6_str		* return to A6 and recompute YINT
*
* Since ICTR <> 0, we have already been through one adjustment, 
* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
* 10^LEN is again computed using whatever table is in a1 since the
* value calculated cannot be inexact.
*
not_zr:
	fmove.s	FONE(pc),fp2	* init fp2 to 1.0
	move.l	d4,d0		* put LEN in d0
	clr.l	d3		* clr table index
z_loop:
	lsr.l	#1,d0		* shift next bit into carry
	bcc.b	z_next		* if zero, skip the mul
	fmul.x	(a1,d3.l),fp2	* mul by 10**(d3_bit_no)
z_next:
	addi.l	#12,d3		* inc d3 to next pwrten table entry
	tst.l	d0		* test if LEN is zero
	bne.b	z_loop		* if not, loop
	fabs.x	fp0		* get abs(YINT)
	fcmp.x	fp2,fp0		* check if abs(YINT) = 10^LEN
	fbne.w	A14_st		* if not, skip this
	fdiv.s	FTEN(pc),fp0	* divide abs(YINT) by 10
	addq.l	#1,d6		* and inc ILOG by 1
	addq.l	#1,d4		* and inc LEN
	fmul.s	FTEN(pc),fp2	* if LEN++, the get 10^^LEN

* A14. Convert the mantissa to bcd.
*      The binstr routine is used to convert the LEN digit 
*      mantissa to bcd in memory.  The input to binstr is
*      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
*      such that the decimal point is to the left of bit 63.
*      The bcd digits are stored in the correct position in 
*      the final string area in memory.
*
*
* Register usage:
*	Input/Output
*	d0: x/LEN call to binstr - final is 0
*	d1: x/0
*	d2: x/ms 32-bits of mant of abs(YINT)
*	d3: x/ls 32-bits of mant of abs(YINT)
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA/LAMBDA:ICTR
*	d6: ILOG
*	d7: k-factor/Unchanged
*	a0: pointer into memory for packed bcd string formation
*	    /ptr to first mantissa byte in result string
*	a1: ptr to PTENxx array/Unchanged
*	a2: ptr to FP_SCR1(a6)/Unchanged
*	fp0: int portion of Y/abs(YINT) adjusted
*	fp1: 10^ISCALE/Unchanged
*	fp2: 10^LEN/Unchanged
*	F_SCR1:x/Work area for final result
*	F_SCR2:Y with original exponent/Unchanged
*	L_SCR1:original USER_FPCR/Unchanged
*	L_SCR2:first word of X packed/Unchanged

A14_st:
	fmove.l	#rz_mode*$10,fpcr	* force rz for conversion
	fdiv.x	fp2,fp0		* divide abs(YINT) by 10^LEN
	lea.l	FP_SCR0(a6),a0
	fmove.x	fp0,(a0)	* move abs(YINT)/10^LEN to memory
	move.l	4(a0),d2	* move 2nd word of FP_RES to d2
	move.l	8(a0),d3	* move 3rd word of FP_RES to d3
	clr.l	4(a0)		* zero word 2 of FP_RES
	clr.l	8(a0)		* zero word 3 of FP_RES
	move.l	(a0),d0		* move exponent to d0
	swap	d0		* put exponent in lower word
	beq.b	no_sft		* if zero, don't shift
	subi.l	#$3ffd,d0	* sub bias less 2 to make fract
	tst.l	d0		* check if > 1
	bgt.b	no_sft		* if so, don't shift
	neg.l	d0		* make exp positive
m_loop:
	lsr.l	#1,d2		* shift d2:d3 right, add 0s 
	roxr.l	#1,d3		* the number of places
	dbf.w	d0,m_loop	* given in d0
no_sft:
	tst.l	d2		* check for mantissa of zero
	bne.b	no_zr		* if not, go on
	tst.l	d3		* continue zero check
	beq.b	zer_m		* if zero, go directly to binstr
no_zr:
	clr.l	d1		* put zero in d1 for addx
	addi.l	#$00000080,d3	* inc at bit 7
	addx.l	d1,d2		* continue inc
	andi.l	#$ffffff80,d3	* strip off lsb not used by 882
zer_m:
	move.l	d4,d0		* put LEN in d0 for binstr call
	addq.l	#3,a0		* a0 points to M16 byte in result
	bsr.l	binstr		* call binstr to convert mant


* A15. Convert the exponent to bcd.
*      As in A14 above, the exp is converted to bcd and the
*      digits are stored in the final string.
*
*      Digits are stored in L_SCR1(a6) on return from BINDEC as:
*
*  	 32               16 15                0
*	-----------------------------------------
*  	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
*	-----------------------------------------
*
* And are moved into their proper places in FP_SCR0.  If digit e4
* is non-zero, OPERR is signaled.  In all cases, all 4 digits are
* written as specified in the 881/882 manual for packed decimal.
*
* Register usage:
*	Input/Output
*	d0: x/LEN call to binstr - final is 0
*	d1: x/scratch (0);shift count for final exponent packing
*	d2: x/ms 32-bits of exp fraction/scratch
*	d3: x/ls 32-bits of exp fraction
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA/LAMBDA:ICTR
*	d6: ILOG
*	d7: k-factor/Unchanged
*	a0: ptr to result string/ptr to L_SCR1(a6)
*	a1: ptr to PTENxx array/Unchanged
*	a2: ptr to FP_SCR1(a6)/Unchanged
*	fp0: abs(YINT) adjusted/float(ILOG)
*	fp1: 10^ISCALE/Unchanged
*	fp2: 10^LEN/Unchanged
*	F_SCR1:Work area for final result/BCD result
*	F_SCR2:Y with original exponent/ILOG/10^4
*	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
*	L_SCR2:first word of X packed/Unchanged

A15_st:
	tst.b	BINDEC_FLG(a6)	* check for denorm
	beq.b	not_denorm
	ftst.x	fp0		* test for zero
	fbeq.w	den_zero	* if zero, use k-factor or 4933
	fmove.l	d6,fp0		* float ILOG
	fabs.x	fp0		* get abs of ILOG
	bra.b	convrt
den_zero:
	tst.l	d7		* check sign of the k-factor
	blt.b	use_ilog	* if negative, use ILOG
	fmove.s	F4933(pc),fp0	* force exponent to 4933
	bra.b	convrt		* do it
use_ilog:
	fmove.l	d6,fp0		* float ILOG
	fabs.x	fp0		* get abs of ILOG
	bra.b	convrt
not_denorm:
	ftst.x	fp0		* test for zero
	fbne.w	not_zero	* if zero, force exponent
	fmove.s	FONE(pc),fp0	* force exponent to 1
	bra.b	convrt		* do it
not_zero:
	fmove.l	d6,fp0		* float ILOG
	fabs.x	fp0		* get abs of ILOG
convrt:
	fdiv.x	24(a1),fp0	* compute ILOG/10^4
	fmove.x	fp0,FP_SCR1(a6)		* store fp0 in memory
	move.l	4(a2),d2	* move word 2 to d2
	move.l	8(a2),d3	* move word 3 to d3
	move.w	(a2),d0		* move exp to d0
	beq.b	x_loop_fin	* if zero, skip the shift
	subi.w	#$3ffd,d0	* subtract off bias
	neg.w	d0		* make exp positive
x_loop:
	lsr.l	#1,d2		* shift d2:d3 right 
	roxr.l	#1,d3		* the number of places
	dbf.w	d0,x_loop	* given in d0
x_loop_fin:
	clr.l	d1		* put zero in d1 for addx
	addi.l	#$00000080,d3	* inc at bit 6
	addx.l	d1,d2		* continue inc
	andi.l	#$ffffff80,d3	* strip off lsb not used by 882
	move.l	#4,d0		* put 4 in d0 for binstr call
	lea.l	L_SCR1(a6),a0	* a0 is ptr to L_SCR1 for exp digits
	bsr.l	binstr		* call binstr to convert exp
	move.l	L_SCR1(a6),d0	* load L_SCR1 lword to d0 
	move.l	#12,d1		* use d1 for shift count
	lsr.l	d1,d0		* shift d0 right by 12
	bfins	d0,FP_SCR0(a6){#4:#12}		* put e3:e2:e1 in FP_SCR0
	lsr.l	d1,d0		* shift d0 right by 12
	bfins	d0,FP_SCR0(a6){#16:#4}		* put e4 in FP_SCR0 
	tst.b	d0		* check if e4 is zero
	beq.b	A16_st		* if zero, skip rest
	ori.l	#opaop_mask,USER_FPSR(a6)	* set OPERR & AIOP in USER_FPSR


* A16. Write sign bits to final string.
*	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
*
* Register usage:
*	Input/Output
*	d0: x/scratch - final is x
*	d2: x/x
*	d3: x/x
*	d4: LEN/Unchanged
*	d5: ICTR:LAMBDA/LAMBDA:ICTR
*	d6: ILOG/ILOG adjusted
*	d7: k-factor/Unchanged
*	a0: ptr to L_SCR1(a6)/Unchanged
*	a1: ptr to PTENxx array/Unchanged
*	a2: ptr to FP_SCR1(a6)/Unchanged
*	fp0: float(ILOG)/Unchanged
*	fp1: 10^ISCALE/Unchanged
*	fp2: 10^LEN/Unchanged
*	F_SCR1:BCD result with correct signs
*	F_SCR2:ILOG/10^4
*	L_SCR1:Exponent digits on return from binstr
*	L_SCR2:first word of X packed/Unchanged

A16_st:
	clr.l	d0		* clr d0 for collection of signs
	andi.b	#$0f,FP_SCR0(a6)	* clear first nibble of FP_SCR0 
	tst.l	L_SCR2(a6)	* check sign of original mantissa
	bge.b	mant_p		* if pos, don't set SM
	move.l	#2,d0		* move 2 in to d0 for SM
mant_p:
	tst.l	d6		* check sign of ILOG
	bge.b	wr_sgn		* if pos, don't set SE
	addq.l	#1,d0		* set bit 0 in d0 for SE 
wr_sgn:
	bfins	d0,FP_SCR0(a6){#0:#2}	* insert SM and SE into FP_SCR0

* Clean up and restore all registers used.

	fmove.l	#0,fpsr		* clear possible inex2/ainex bits
	fmovem.x	(sp)+,fp0-fp2	*  {%fp0-%fp2}
	movem.l	(sp)+,d2-d7/a2	*  {%d2-%d7/%a2}
	rts

	global	PTENRN
PTENRN:
	.dc.l	$40020000,$A0000000,$00000000		* 10 ^ 1
	.dc.l	$40050000,$C8000000,$00000000		* 10 ^ 2
	.dc.l	$400C0000,$9C400000,$00000000		* 10 ^ 4
	.dc.l	$40190000,$BEBC2000,$00000000		* 10 ^ 8
	.dc.l	$40340000,$8E1BC9BF,$04000000		* 10 ^ 16
	.dc.l	$40690000,$9DC5ADA8,$2B70B59E		* 10 ^ 32
	.dc.l	$40D30000,$C2781F49,$FFCFA6D5		* 10 ^ 64
	.dc.l	$41A80000,$93BA47C9,$80E98CE0		* 10 ^ 128
	.dc.l	$43510000,$AA7EEBFB,$9DF9DE8E		* 10 ^ 256
	.dc.l	$46A30000,$E319A0AE,$A60E91C7		* 10 ^ 512
	.dc.l	$4D480000,$C9767586,$81750C17		* 10 ^ 1024
	.dc.l	$5A920000,$9E8B3B5D,$C53D5DE5		* 10 ^ 2048
	.dc.l	$75250000,$C4605202,$8A20979B		* 10 ^ 4096

	global	PTENRP
PTENRP:
	.dc.l	$40020000,$A0000000,$00000000		* 10 ^ 1
	.dc.l	$40050000,$C8000000,$00000000		* 10 ^ 2
	.dc.l	$400C0000,$9C400000,$00000000		* 10 ^ 4
	.dc.l	$40190000,$BEBC2000,$00000000		* 10 ^ 8
	.dc.l	$40340000,$8E1BC9BF,$04000000		* 10 ^ 16
	.dc.l	$40690000,$9DC5ADA8,$2B70B59E		* 10 ^ 32
	.dc.l	$40D30000,$C2781F49,$FFCFA6D6		* 10 ^ 64
	.dc.l	$41A80000,$93BA47C9,$80E98CE0		* 10 ^ 128
	.dc.l	$43510000,$AA7EEBFB,$9DF9DE8E		* 10 ^ 256
	.dc.l	$46A30000,$E319A0AE,$A60E91C7		* 10 ^ 512
	.dc.l	$4D480000,$C9767586,$81750C18		* 10 ^ 1024
	.dc.l	$5A920000,$9E8B3B5D,$C53D5DE5		* 10 ^ 2048
	.dc.l	$75250000,$C4605202,$8A20979B		* 10 ^ 4096

	global	PTENRM
PTENRM:
	.dc.l	$40020000,$A0000000,$00000000		* 10 ^ 1
	.dc.l	$40050000,$C8000000,$00000000		* 10 ^ 2
	.dc.l	$400C0000,$9C400000,$00000000		* 10 ^ 4
	.dc.l	$40190000,$BEBC2000,$00000000		* 10 ^ 8
	.dc.l	$40340000,$8E1BC9BF,$04000000		* 10 ^ 16
	.dc.l	$40690000,$9DC5ADA8,$2B70B59D		* 10 ^ 32
	.dc.l	$40D30000,$C2781F49,$FFCFA6D5		* 10 ^ 64
	.dc.l	$41A80000,$93BA47C9,$80E98CDF		* 10 ^ 128
	.dc.l	$43510000,$AA7EEBFB,$9DF9DE8D		* 10 ^ 256
	.dc.l	$46A30000,$E319A0AE,$A60E91C6		* 10 ^ 512
	.dc.l	$4D480000,$C9767586,$81750C17		* 10 ^ 1024
	.dc.l	$5A920000,$9E8B3B5D,$C53D5DE4		* 10 ^ 2048
	.dc.l	$75250000,$C4605202,$8A20979A		* 10 ^ 4096

*########################################################################
* binstr(): Converts a 64-bit binary integer to bcd.			#
*									#
* INPUT *************************************************************** #
*	d2:d3 = 64-bit binary integer					#
*	d0    = desired length (LEN)					#
*	a0    = pointer to start in memory for bcd characters		#
*          	(This pointer must point to byte 4 of the first		#
*          	 lword of the packed decimal memory string.)		#
*									#
* OUTPUT ************************************************************** #
*	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
*									#
* ALGORITHM ***********************************************************	#
*	The 64-bit binary is assumed to have a decimal point before	#
*	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
*	shift and a mul by 8 shift.  The bits shifted out of the	#
*	msb form a decimal digit.  This process is iterated until	#
*	LEN digits are formed.						#
*									#
* A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
*     digit formed will be assumed the least significant.  This is	#
*     to force the first byte formed to have a 0 in the upper 4 bits.	#
*									#
* A2. Beginning of the loop:						#
*     Copy the fraction in d2:d3 to d4:d5.				#
*									#
* A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
*     extracts and shifts.  The three msbs from d2 will go into d1.	#
*									#
* A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
*     will be collected by the carry.					#
*									#
* A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
*     into d2:d3.  D1 will contain the bcd digit formed.		#
*									#
* A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
*     zero, it is the ls digit.  Put the digit in its place in the	#
*     upper word of d0.  If it is the ls digit, write the word		#
*     from d0 to memory.						#
*									#
* A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
*									#
*########################################################################

*	Implementation Notes:
*
*	The registers are used as follows:
*
*		d0: LEN counter
*		d1: temp used to form the digit
*		d2: upper 32-bits of fraction for mul by 8
*		d3: lower 32-bits of fraction for mul by 8
*		d4: upper 32-bits of fraction for mul by 2
*		d5: lower 32-bits of fraction for mul by 2
*		d6: temp for bit-field extracts
*		d7: byte digit formation word;digit count {0,1}
*		a0: pointer into memory for packed bcd string formation
*

	global	binstr
binstr:
	movem.l	d0-d7,-(sp)	*  {%d0-%d7}

*
* A1: Init d7
*
	move.l	#1,d7		* init d7 for second digit
	subq.l	#1,d0		* for dbf d0 would have LEN+1 passes
*
* A2. Copy d2:d3 to d4:d5.  Start loop.
*
loop:
	move.l	d2,d4		* copy the fraction before muls
	move.l	d3,d5		* to d4:d5
*
* A3. Multiply d2:d3 by 8; extract msbs into d1.
*
	bfextu	d2{#0:#3},d1	* copy 3 msbs of d2 into d1
	asl.l	#3,d2		* shift d2 left by 3 places
	bfextu	d3{#0:#3},d6	* copy 3 msbs of d3 into d6
	asl.l	#3,d3		* shift d3 left by 3 places
	or.l	d6,d2		* or in msbs from d3 into d2
*
* A4. Multiply d4:d5 by 2; add carry out to d1.
*
	asl.l	#1,d5		* mul d5 by 2
	roxl.l	#1,d4		* mul d4 by 2
	swap	d6		* put 0 in d6 lower word
	addx.w	d6,d1		* add in extend from mul by 2
*
* A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
*
	add.l	d5,d3		* add lower 32 bits
	nop			* ERRATA FIX #13 (Rev. 1.2 6/6/90)
	addx.l	d4,d2		* add with extend upper 32 bits
	nop			* ERRATA FIX #13 (Rev. 1.2 6/6/90)
	addx.w	d6,d1		* add in extend from add to d1
	swap	d6		* with d6 = 0; put 0 in upper word
*
* A6. Test d7 and branch.
*
	tst.w	d7		* if zero, store digit & to loop
	beq.b	first_d		* if non-zero, form byte & write
sec_d:
	swap	d7		* bring first digit to word d7b
	asl.w	#4,d7		* first digit in upper 4 bits d7b
	add.w	d1,d7		* add in ls digit to d7b
	move.b	d7,(a0)+	* store d7b byte in memory
	swap	d7		* put LEN counter in word d7a
	clr.w	d7		* set d7a to signal no digits done
	dbf.w	d0,loop		* do loop some more!
	bra.b	end_bstr	* finished, so exit
first_d:
	swap	d7		* put digit word in d7b
	move.w	d1,d7		* put new digit in d7b
	swap	d7		* put LEN counter in word d7a
	addq.w	#1,d7		* set d7a to signal first digit done
	dbf.w	d0,loop		* do loop some more!
	swap	d7		* put last digit in string
	lsl.w	#4,d7		* move it to upper 4 bits
	move.b	d7,(a0)+	* store it in memory string
*
* Clean up and return with result in fp0.
*
end_bstr:
	movem.l	(sp)+,d0-d7	*  {%d0-%d7}
	rts

*########################################################################
* XDEF ****************************************************************	#
*	facc_in_b(): dmem_read_byte failed				#
*	facc_in_w(): dmem_read_word failed				#
*	facc_in_l(): dmem_read_long failed				#
*	facc_in_d(): dmem_read of dbl prec failed			#
*	facc_in_x(): dmem_read of ext prec failed			#
*									#
*	facc_out_b(): dmem_write_byte failed				#
*	facc_out_w(): dmem_write_word failed				#
*	facc_out_l(): dmem_write_long failed				#
*	facc_out_d(): dmem_write of dbl prec failed			#
*	facc_out_x(): dmem_write of ext prec failed			#
*									#
* XREF ****************************************************************	#
*	_real_access() - exit through access error handler		#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	None								#
*									#
* ALGORITHM ***********************************************************	#
* 	Flow jumps here when an FP data fetch call gets an error 	#
* result. This means the operating system wants an access error frame	#
* made out of the current exception stack frame. 			#
*	So, we first call restore() which makes sure that any updated	#
* -(an)+ register gets returned to its pre-exception value and then	#
* we change the stack to an acess error stack frame.			#
*									#
*########################################################################

facc_in_b:
	moveq.l	#$1,d0				* one byte
	bsr.w	restore				* fix An

	move.w	#$0121,EXC_VOFF(a6)		* set FSLW
	bra.w	facc_finish

facc_in_w:
	moveq.l	#$2,d0				* two bytes
	bsr.w	restore				* fix An

	move.w	#$0141,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_in_l:
	moveq.l	#$4,d0				* four bytes
	bsr.w	restore				* fix An

	move.w	#$0101,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_in_d:
	moveq.l	#$8,d0				* eight bytes
	bsr.w	restore				* fix An

	move.w	#$0161,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_in_x:
	moveq.l	#$c,d0				* twelve bytes
	bsr.w	restore				* fix An

	move.w	#$0161,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

*###############################################################

facc_out_b:
	moveq.l	#$1,d0				* one byte
	bsr.w	restore				* restore An

	move.w	#$00a1,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_out_w:
	moveq.l	#$2,d0				* two bytes
	bsr.w	restore				* restore An

	move.w	#$00c1,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_out_l:
	moveq.l	#$4,d0				* four bytes
	bsr.w	restore				* restore An

	move.w	#$0081,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_out_d:
	moveq.l	#$8,d0				* eight bytes
	bsr.w	restore				* restore An

	move.w	#$00e1,EXC_VOFF(a6)		* set FSLW
	bra.b	facc_finish

facc_out_x:
	move.l	#$c,d0				* twelve bytes
	bsr.w	restore				* restore An

	move.w	#$00e1,EXC_VOFF(a6)		* set FSLW

* here's where we actually create the access error frame from the
* current exception stack frame.
facc_finish:
	move.l	USER_FPIAR(a6),EXC_PC(a6)	* store current PC

	fmovem.x	EXC_FPREGS(a6),fp0-fp1	* restore fp0-fp1
	fmovem.l	USER_FPCR(a6),fpcr/fpsr/fpiar	* restore ctrl regs
	movem.l	EXC_DREGS(a6),d0-d1/a0-a1	* restore d0-d1/a0-a1

	unlk	a6

	move.l	(sp),-(sp)		* store SR, hi(PC)
	move.l	$8(sp),$4(sp)		* store lo(PC)
	move.l	$c(sp),$8(sp)		* store EA
	move.l	#$00000001,$c(sp)	* store FSLW
	move.w	$6(sp),$c(sp)		* fix FSLW (size)
	move.w	#$4008,$6(sp)		* store voff

	btst	#$5,(sp)		* supervisor or user mode?
	beq.b	facc_out2		* user
	bset	#$2,$d(sp)		* set supervisor TM bit

facc_out2:
	bra.l	_real_access

*#################################################################

* if the effective addressing mode was predecrement or postincrement,
* the emulation has already changed its value to the correct post-
* instruction value. but since we're exiting to the access error
* handler, then AN must be returned to its pre-instruction value.
* we do that here.
restore:
	move.b	EXC_OPWORD+$1(a6),d1
	andi.b	#$38,d1			* extract opmode
	cmpi.b	#$18,d1			* postinc?
	beq.w	rest_inc
	cmpi.b	#$20,d1			* predec?
	beq.w	rest_dec
	rts

rest_inc:
	move.b	EXC_OPWORD+$1(a6),d1
	andi.w	#$0007,d1		* fetch An

	move.w	(tbl_rest_inc.b,pc,d1.w*2),d1
	jmp	(tbl_rest_inc.b,pc,d1.w*1)

tbl_rest_inc:
	.dc.w	ri_a0-tbl_rest_inc
	.dc.w	ri_a1-tbl_rest_inc
	.dc.w	ri_a2-tbl_rest_inc
	.dc.w	ri_a3-tbl_rest_inc
	.dc.w	ri_a4-tbl_rest_inc
	.dc.w	ri_a5-tbl_rest_inc
	.dc.w	ri_a6-tbl_rest_inc
	.dc.w	ri_a7-tbl_rest_inc

ri_a0:
	sub.l	d0,EXC_DREGS+$8(a6)	* fix stacked a0
	rts
ri_a1:
	sub.l	d0,EXC_DREGS+$c(a6)	* fix stacked a1
	rts
ri_a2:
	sub.l	d0,a2			* fix a2
	rts
ri_a3:
	sub.l	d0,a3			* fix a3
	rts
ri_a4:
	sub.l	d0,a4			* fix a4
	rts
ri_a5:
	sub.l	d0,a5			* fix a5
	rts
ri_a6:
	sub.l	d0,(a6)			* fix stacked a6
	rts
* if it's a fmove out instruction, we don't have to fix a7
* because we hadn't changed it yet. if it's an opclass two
* instruction (data moved in) and the exception was in supervisor
* mode, then also also wasn't updated. if it was user mode, then
* restore the correct a7 which is in the USP currently.
ri_a7:
	cmpi.b	#$30,EXC_VOFF(a6)	* move in or out?
	bne.b	ri_a7_done		* out

	btst	#$5,EXC_SR(a6)		* user or supervisor?
	bne.b	ri_a7_done		* supervisor
	movec	usp,a0			* restore USP
	sub.l	d0,a0
	movec	a0,usp
ri_a7_done:
	rts

* need to invert adjustment value if the <ea> was predec
rest_dec:
	neg.l	d0
	bra.b	rest_inc
1 2 3