misc/060tsys/t28fpsp.s (2/3)
1 2 3
Mod_Loop_pre:
	addq.l	#$4,sp			* erase exp(X)
*..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
Mod_Loop:
	tst.l	d6			* test carry bit
	bgt.b	R_GT_Y

*..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
	cmp.l	d4,d1			* compare hi(R) and hi(Y)
	bne.b	R_NE_Y
	cmp.l	d5,d2			* compare lo(R) and lo(Y)
	bne.b	R_NE_Y

*..At this point, R = Y
	bra.w	Rem_is_0

R_NE_Y:
*..use the borrow of the previous compare
	bcs.b	R_LT_Y			* borrow is set iff R < Y

R_GT_Y:
*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
*..and Y < (D1,D2) < 2Y. Either way, perform R - Y
	sub.l	d5,d2			* lo(R) - lo(Y)
	subx.l	d4,d1			* hi(R) - hi(Y)
	clr.l	d6			* clear carry
	addq.l	#1,d3			* Q := Q + 1

R_LT_Y:
*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
	tst.l	d0			* see if j = 0.
	beq.b	PostLoop

	add.l	d3,d3			* Q := 2Q
	add.l	d2,d2			* lo(R) = 2lo(R)
	roxl.l	#1,d1			* hi(R) = 2hi(R) + carry
	scs	d6			* set Carry if 2(R) overflows
	addq.l	#1,a1			* k := k+1
	subq.l	#1,d0			* j := j - 1
*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.

	bra.b	Mod_Loop

PostLoop:
*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.

*..normalize R.
	move.l	L_SCR1(a6),d0		* new biased expo of R
	tst.l	d1
	bne.b	HiR_not0

HiR_0:
	move.l	d2,d1
	clr.l	d2
	subi.l	#32,d0
	clr.l	d6
	bfffo	d1{#0:#32},d6
	lsl.l	d6,d1
	sub.l	d6,d0			* (D0,D1,D2) is normalized
*                                       ...with bias $7FFD
	bra.b	Get_Mod

HiR_not0:
	clr.l	d6
	bfffo	d1{#0:#32},d6
	bmi.b	Get_Mod			* already normalized
	sub.l	d6,d0
	lsl.l	d6,d1
	move.l	d2,d7			* a copy of D2
	lsl.l	d6,d2
	neg.l	d6
	addi.l	#32,d6
	lsr.l	d6,d7
	or.l	d7,d1			* (D0,D1,D2) normalized

*
Get_Mod:
	cmpi.l	#$000041FE,d0
	bge.b	No_Scale
Do_Scale:
	move.w	d0,R(a6)
	move.l	d1,R_Hi(a6)
	move.l	d2,R_Lo(a6)
	move.l	L_SCR1(a6),d6
	move.w	d6,Y(a6)
	move.l	d4,Y_Hi(a6)
	move.l	d5,Y_Lo(a6)
	fmove.x	R(a6),fp0		* no exception
	move.b	#1,Sc_Flag(a6)
	bra.b	ModOrRem
No_Scale:
	move.l	d1,R_Hi(a6)
	move.l	d2,R_Lo(a6)
	subi.l	#$3FFE,d0
	move.w	d0,R(a6)
	move.l	L_SCR1(a6),d6
	subi.l	#$3FFE,d6
	move.l	d6,L_SCR1(a6)
	fmove.x	R(a6),fp0
	move.w	d6,Y(a6)
	move.l	d4,Y_Hi(a6)
	move.l	d5,Y_Lo(a6)
	clr.b	Sc_Flag(a6)

*
ModOrRem:
	tst.b	Mod_Flag(a6)
	beq.b	Fix_Sign

	move.l	L_SCR1(a6),d6		* new biased expo(Y)
	subq.l	#1,d6			* biased expo(Y/2)
	cmp.l	d6,d0
	blt.b	Fix_Sign
	bgt.b	Last_Sub

	cmp.l	d4,d1
	bne.b	Not_EQ
	cmp.l	d5,d2
	bne.b	Not_EQ
	bra.w	Tie_Case

Not_EQ:
	bcs.b	Fix_Sign

Last_Sub:
*
	fsub.x	Y(a6),fp0		* no exceptions
	addq.l	#1,d3			* Q := Q + 1

*
Fix_Sign:
*..Get sign of X
	move.w	SignX(a6),d6
	bge.b	Get_Q
	fneg.x	fp0

*..Get Q
*
Get_Q:
	clr.l	d6
	move.w	SignQ(a6),d6		* D6 is sign(Q)
	move.l	#8,d7
	lsr.l	d7,d6
	andi.l	#$0000007F,d3		* 7 bits of Q
	or.l	d6,d3			* sign and bits of Q
*	swap		%d3
*	fmov.l		%fpsr,%d6
*	and.l		&0xFF00FFFF,%d6
*	or.l		%d3,%d6
*	fmov.l		%d6,%fpsr		# put Q in fpsr
	move.b	d3,FPSR_QBYTE(a6)	* put Q in fpsr

*
Restore:
	movem.l	(sp)+,d2-d7		*  {%d2-%d7}
	move.l	(sp)+,d0
	fmove.l	d0,fpcr
	tst.b	Sc_Flag(a6)
	beq.b	Finish
	move.b	#FMUL_OP,d1		* last inst is MUL
	fmul.x	Scale(pc),fp0		* may cause underflow
	bra.l	t_catch2
* the '040 package did this apparently to see if the dst operand for the 
* preceding fmul was a denorm. but, it better not have been since the 
* algorithm just got done playing with fp0 and expected no exceptions
* as a result. trust me...
*	bra		t_avoid_unsupp		# check for denorm as a
*						;result of the scaling

Finish:
	move.b	#FMOV_OP,d1		* last inst is MOVE
	fmove.x	fp0,fp0			* capture exceptions & round
	bra.l	t_catch2

Rem_is_0:
*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
	addq.l	#1,d3
	cmpi.l	#8,d0			* D0 is j 
	bge.b	Q_Big

	lsl.l	d0,d3
	bra.b	Set_R_0

Q_Big:
	clr.l	d3

Set_R_0:
	fmove.s	#$00000000,fp0
	clr.b	Sc_Flag(a6)
	bra.w	Fix_Sign

Tie_Case:
*..Check parity of Q
	move.l	d3,d6
	andi.l	#$00000001,d6
	tst.l	d6
	beq.w	Fix_Sign		* Q is even

*..Q is odd, Q := Q + 1, signX := -signX
	addq.l	#1,d3
	move.w	SignX(a6),d6
	eori.l	#$00008000,d6
	move.w	d6,SignX(a6)
	bra.w	Fix_Sign

qnan:	.dc.l	$7fff0000,$ffffffff,$ffffffff

*########################################################################
* XDEF ****************************************************************	#
*	t_dz(): Handle DZ exception during transcendental emulation.	#
*	        Sets N bit according to sign of source operand.		#
*	t_dz2(): Handle DZ exception during transcendental emulation.	#
*		 Sets N bit always.					#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to source operand					#
* 									#
* OUTPUT **************************************************************	#
*	fp0 = default result						#
*									#
* ALGORITHM ***********************************************************	#
*	- Store properly signed INF into fp0.				#
*	- Set FPSR exception status dz bit, ccode inf bit, and 		#
*	  accrued dz bit.						#
*									#
*########################################################################

	global	t_dz
t_dz:
	tst.b	SRC_EX.w(a0)		* no; is src negative?
	bmi.b	t_dz2			* yes

dz_pinf:
	fmove.s	#$7f800000,fp0		* return +INF in fp0
	ori.l	#dzinf_mask,USER_FPSR(a6)	* set I/DZ/ADZ
	rts

	global	t_dz2
t_dz2:
	fmove.s	#$ff800000,fp0		* return -INF in fp0
	ori.l	#dzinf_mask+neg_mask,USER_FPSR(a6)	* set N/I/DZ/ADZ
	rts

*################################################################
* OPERR exception:						#
*	- set FPSR exception status operr bit, condition code 	#
*	  nan bit; Store default NAN into fp0			#
*################################################################
	global	t_operr
t_operr:
	ori.l	#opnan_mask,USER_FPSR(a6)	* set NaN/OPERR/AIOP
	fmovem.x	qnan(pc),fp0	* return default NAN in fp0
	rts

*################################################################
* Extended DENORM:						#
* 	- For all functions that have a denormalized input and	#
*	  that f(x)=x, this is the entry point.			#
*	- we only return the EXOP here if either underflow or	#
*	  inexact is enabled.					#
*################################################################

* Entry point for scale w/ extended denorm. The function does
* NOT set INEX2/AUNFL/AINEX.
	global	t_resdnrm
t_resdnrm:
	ori.l	#unfl_mask,USER_FPSR(a6)	* set UNFL
	bra.b	xdnrm_con

	global	t_extdnrm
t_extdnrm:
	ori.l	#unfinx_mask,USER_FPSR(a6)	* set UNFL/INEX2/AUNFL/AINEX

xdnrm_con:
	move.l	a0,a1			* make copy of src ptr
	move.l	d0,d1			* make copy of rnd prec,mode
	andi.b	#$c0,d1			* extended precision?
	bne.b	xdnrm_sd		* no

* result precision is extended.
	tst.b	__LOCAL___EX.w(a0)	* is denorm negative?
	bpl.b	xdnrm_exit		* no

	bset	#neg_bit,FPSR_CC(a6)	* yes; set 'N' ccode bit
	bra.b	xdnrm_exit

* result precision is single or double
xdnrm_sd:
	move.l	a1,-(sp)
	tst.b	__LOCAL___EX.w(a0)	* is denorm pos or neg?
	smi.b	d1			* set d0 accodingly
	bsr.l	unf_sub
	move.l	(sp)+,a1
xdnrm_exit:
	fmovem.x	(a0),fp0	* return default result in fp0

	move.b	FPCR_ENABLE(a6),d0
	andi.b	#$0a,d0			* is UNFL or INEX enabled?
	bne.b	xdnrm_ena		* yes
	rts

*###############
* unfl enabled #
*###############
* we have a DENORM that needs to be converted into an EXOP.
* so, normalize the mantissa, add 0x6000 to the new exponent,
* and return the result in fp1.
xdnrm_ena:
	move.w	__LOCAL___EX.w(a1),FP_SCR0_EX(a6)
	move.l	__LOCAL___HI(a1),FP_SCR0_HI(a6)
	move.l	__LOCAL___LO(a1),FP_SCR0_LO(a6)

	lea	FP_SCR0(a6),a0
	bsr.l	norm			* normalize mantissa
	addi.l	#$6000,d0		* add extra bias
	andi.w	#$8000,FP_SCR0_EX(a6)	* keep old sign
	or.w	d0,FP_SCR0_EX(a6)	* insert new exponent

	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

*################################################################
* UNFL exception:						#
* 	- This routine is for cases where even an EXOP isn't	#
*  	  large enough to hold the range of this result.	#
*	  In such a case, the EXOP equals zero.			#
*  	- Return the default result to the proper precision 	#
*	  with the sign of this result being the same as that	#
*	  of the src operand.					#
* 	- t_unfl2() is provided to force the result sign to 	#
*	  positive which is the desired result for fetox().	#
*################################################################
	global	t_unfl
t_unfl:
	ori.l	#unfinx_mask,USER_FPSR(a6)	* set UNFL/INEX2/AUNFL/AINEX

	tst.b	(a0)			* is result pos or neg?
	smi.b	d1			* set d1 accordingly
	bsr.l	unf_sub			* calc default unfl result
	fmovem.x	(a0),fp0	* return default result in fp0

	fmove.s	#$00000000,fp1		* return EXOP in fp1
	rts

* t_unfl2 ALWAYS tells unf_sub to create a positive result
	global	t_unfl2
t_unfl2:
	ori.l	#unfinx_mask,USER_FPSR(a6)	* set UNFL/INEX2/AUNFL/AINEX

	sf.b	d1			* set d0 to represent positive
	bsr.l	unf_sub			* calc default unfl result
	fmovem.x	(a0),fp0	* return default result in fp0

	fmove.s	#$0000000,fp1		* return EXOP in fp1
	rts

*################################################################
* OVFL exception:						#
* 	- This routine is for cases where even an EXOP isn't	#
*  	  large enough to hold the range of this result.	#
* 	- Return the default result to the proper precision 	#
*	  with the sign of this result being the same as that 	#
*	  of the src operand.					#
* 	- t_ovfl2() is provided to force the result sign to 	#
*	  positive which is the desired result for fcosh().	#
* 	- t_ovfl_sc() is provided for scale() which only sets 	#
*	  the inexact bits if the number is inexact for the 	#
*	  precision indicated.					#
*################################################################

	global	t_ovfl_sc
t_ovfl_sc:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set OVFL/AOVFL/AINEX

	move.b	d0,d1			* fetch rnd mode/prec
	andi.b	#$c0,d1			* extract rnd prec
	beq.b	ovfl_work		* prec is extended

	tst.b	__LOCAL___HI(a0)	* is dst a DENORM?
	bmi.b	ovfl_sc_norm		* no

* dst op is a DENORM. we have to normalize the mantissa to see if the
* result would be inexact for the given precision. make a copy of the
* dst so we don't screw up the version passed to us.
	move.w	__LOCAL___EX.w(a0),FP_SCR0_EX(a6)
	move.l	__LOCAL___HI(a0),FP_SCR0_HI(a6)
	move.l	__LOCAL___LO(a0),FP_SCR0_LO(a6)
	lea	FP_SCR0(a6),a0		* pass ptr to FP_SCR0
	movem.l	d0-d1/a0,-(sp)		* save d0-d1/a0
	bsr.l	norm			* normalize mantissa
	movem.l	(sp)+,d0-d1/a0		* restore d0-d1/a0

ovfl_sc_norm:
	cmpi.b	#$40,d1			* is prec dbl?
	bne.b	ovfl_sc_dbl		* no; sgl
ovfl_sc_sgl:
	tst.l	__LOCAL___LO(a0)	* is lo lw of sgl set?
	bne.b	ovfl_sc_inx		* yes
	tst.b	3+__LOCAL___HI(a0)	* is lo byte of hi lw set?
	bne.b	ovfl_sc_inx		* yes
	bra.b	ovfl_work		* don't set INEX2
ovfl_sc_dbl:
	move.l	__LOCAL___LO(a0),d1	* are any of lo 11 bits of
	andi.l	#$7ff,d1		* dbl mantissa set?
	beq.b	ovfl_work		* no; don't set INEX2
ovfl_sc_inx:
	ori.l	#inex2_mask,USER_FPSR(a6)	* set INEX2
	bra.b	ovfl_work		* continue

	global	t_ovfl
t_ovfl:
	ori.l	#ovfinx_mask,USER_FPSR(a6)	* set OVFL/INEX2/AOVFL/AINEX

ovfl_work:
	tst.b	__LOCAL___EX.w(a0)	* what is the sign?
	smi.b	d1			* set d1 accordingly
	bsr.l	ovf_res			* calc default ovfl result
	move.b	d0,FPSR_CC(a6)		* insert new ccodes
	fmovem.x	(a0),fp0	* return default result in fp0

	fmove.s	#$00000000,fp1		* return EXOP in fp1
	rts

* t_ovfl2 ALWAYS tells ovf_res to create a positive result
	global	t_ovfl2
t_ovfl2:
	ori.l	#ovfinx_mask,USER_FPSR(a6)	* set OVFL/INEX2/AOVFL/AINEX

	sf.b	d1			* clear sign flag for positive
	bsr.l	ovf_res			* calc default ovfl result
	move.b	d0,FPSR_CC(a6)		* insert new ccodes
	fmovem.x	(a0),fp0	* return default result in fp0

	fmove.s	#$00000000,fp1		* return EXOP in fp1
	rts

*################################################################
* t_catch(): 							#
*	- the last operation of a transcendental emulation	#
* 	  routine may have caused an underflow or overflow. 	#
* 	  we find out if this occurred by doing an fsave and 	#
*	  checking the exception bit. if one did occur, then we	#
*	  jump to fgen_except() which creates the default	#
*	  result and EXOP for us.				#
*################################################################
	global	t_catch
t_catch:

	fsave	-(sp)
	tst.b	$2(sp)
	bmi.b	catch
	add.l	#$c,sp

*################################################################
* INEX2 exception:						#
*	- The inex2 and ainex bits are set.			#
*################################################################
	global	t_inx2
t_inx2:
	fblt.w	t_minx2
	fbeq.w	inx2_zero

	global	t_pinx2
t_pinx2:
	ori.w	#inx2a_mask,2+USER_FPSR(a6)	* set INEX2/AINEX
	rts

	global	t_minx2
t_minx2:
	ori.l	#inx2a_mask+neg_mask,USER_FPSR(a6)	* set N/INEX2/AINEX
	rts

inx2_zero:
	move.b	#z_bmask,FPSR_CC(a6)
	ori.w	#inx2a_mask,2+USER_FPSR(a6)	* set INEX2/AINEX
	rts

* an underflow or overflow exception occurred.
* we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
catch:
	ori.w	#inx2a_mask,FPSR_EXCEPT(a6)
catch2:
	bsr.l	fgen_except
	add.l	#$c,sp
	rts

	global	t_catch2
t_catch2:

	fsave	-(sp)

	tst.b	$2(sp)
	bmi.b	catch2
	add.l	#$c,sp

	fmove.l	fpsr,d0
	or.l	d0,USER_FPSR(a6)

	rts

*########################################################################

*########################################################################
* unf_res(): underflow default result calculation for transcendentals	#
*									#
* INPUT:								#
* 	d0   : rnd mode,precision					#
* 	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
* OUTPUT:								#
*	a0   : points to result (in instruction memory)			#
*########################################################################
unf_sub:
	ori.l	#unfinx_mask,USER_FPSR(a6)

	andi.w	#$10,d1			* keep sign bit in 4th spot

	lsr.b	#$4,d0			* shift rnd prec,mode to lo bits
	andi.b	#$f,d0			* strip hi rnd mode bit
	or.b	d1,d0			* concat {sgn,mode,prec}

	move.l	d0,d1			* make a copy
	lsl.b	#$1,d1			* mult index 2 by 2

	move.b	(tbl_unf_cc.b,pc,d0.w*1),FPSR_CC(a6)	* insert ccode bits
	lea	(tbl_unf_result.b,pc,d1.w*8),a0		* grab result ptr
	rts

tbl_unf_cc:
	.dc.b	$4,$4,$4,$0
	.dc.b	$4,$4,$4,$0
	.dc.b	$4,$4,$4,$0
	.dc.b	$0,$0,$0,$0
	.dc.b	$8+$4,$8+$4,$8,$8+$4
	.dc.b	$8+$4,$8+$4,$8,$8+$4
	.dc.b	$8+$4,$8+$4,$8,$8+$4

tbl_unf_result:
	.dc.l	$00000000,$00000000,$00000000,$0	* ZERO;ext
	.dc.l	$00000000,$00000000,$00000000,$0	* ZERO;ext
	.dc.l	$00000000,$00000000,$00000000,$0	* ZERO;ext
	.dc.l	$00000000,$00000000,$00000001,$0	* MIN; ext

	.dc.l	$3f810000,$00000000,$00000000,$0	* ZERO;sgl
	.dc.l	$3f810000,$00000000,$00000000,$0	* ZERO;sgl
	.dc.l	$3f810000,$00000000,$00000000,$0	* ZERO;sgl
	.dc.l	$3f810000,$00000100,$00000000,$0	* MIN; sgl

	.dc.l	$3c010000,$00000000,$00000000,$0	* ZERO;dbl
	.dc.l	$3c010000,$00000000,$00000000,$0	* ZER0;dbl
	.dc.l	$3c010000,$00000000,$00000000,$0	* ZERO;dbl
	.dc.l	$3c010000,$00000000,$00000800,$0	* MIN; dbl

	.dc.l	$0,$0,$0,$0
	.dc.l	$0,$0,$0,$0
	.dc.l	$0,$0,$0,$0
	.dc.l	$0,$0,$0,$0

	.dc.l	$80000000,$00000000,$00000000,$0	* ZERO;ext
	.dc.l	$80000000,$00000000,$00000000,$0	* ZERO;ext
	.dc.l	$80000000,$00000000,$00000001,$0	* MIN; ext
	.dc.l	$80000000,$00000000,$00000000,$0	* ZERO;ext

	.dc.l	$bf810000,$00000000,$00000000,$0	* ZERO;sgl
	.dc.l	$bf810000,$00000000,$00000000,$0	* ZERO;sgl
	.dc.l	$bf810000,$00000100,$00000000,$0	* MIN; sgl
	.dc.l	$bf810000,$00000000,$00000000,$0	* ZERO;sgl

	.dc.l	$bc010000,$00000000,$00000000,$0	* ZERO;dbl
	.dc.l	$bc010000,$00000000,$00000000,$0	* ZERO;dbl
	.dc.l	$bc010000,$00000000,$00000800,$0	* MIN; dbl
	.dc.l	$bc010000,$00000000,$00000000,$0	* ZERO;dbl

*###########################################################

*########################################################################
* src_zero(): Return signed zero according to sign of src operand.	#
*########################################################################
	global	src_zero
src_zero:
	tst.b	SRC_EX.w(a0)		* get sign of src operand
	bmi.b	ld_mzero		* if neg, load neg zero

*
* ld_pzero(): return a positive zero.
*
	global	ld_pzero
ld_pzero:
	fmove.s	#$00000000,fp0		* load +0
	move.b	#z_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts

* ld_mzero(): return a negative zero.
	global	ld_mzero
ld_mzero:
	fmove.s	#$80000000,fp0		* load -0
	move.b	#neg_bmask+z_bmask,FPSR_CC(a6)	* set 'N','Z' ccode bits
	rts

*########################################################################
* dst_zero(): Return signed zero according to sign of dst operand.	#
*########################################################################
	global	dst_zero
dst_zero:
	tst.b	DST_EX.w(a1)		* get sign of dst operand
	bmi.b	ld_mzero		* if neg, load neg zero
	bra.b	ld_pzero		* load positive zero

*########################################################################
* src_inf(): Return signed inf according to sign of src operand.	#
*########################################################################
	global	src_inf
src_inf:
	tst.b	SRC_EX.w(a0)		* get sign of src operand
	bmi.b	ld_minf			* if negative branch

*
* ld_pinf(): return a positive infinity.
*
	global	ld_pinf
ld_pinf:
	fmove.s	#$7f800000,fp0		* load +INF
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'INF' ccode bit
	rts

*
* ld_minf():return a negative infinity.
*
	global	ld_minf
ld_minf:
	fmove.s	#$ff800000,fp0		* load -INF
	move.b	#neg_bmask+inf_bmask,FPSR_CC(a6)	* set 'N','I' ccode bits
	rts

*########################################################################
* dst_inf(): Return signed inf according to sign of dst operand.	#
*########################################################################
	global	dst_inf
dst_inf:
	tst.b	DST_EX.w(a1)		* get sign of dst operand
	bmi.b	ld_minf			* if negative branch
	bra.b	ld_pinf

	global	szr_inf
*################################################################
* szr_inf(): Return +ZERO for a negative src operand or		#
*	            +INF for a positive src operand.		#
*	     Routine used for fetox, ftwotox, and ftentox.	#
*################################################################
szr_inf:
	tst.b	SRC_EX.w(a0)		* check sign of source
	bmi.b	ld_pzero
	bra.b	ld_pinf

*########################################################################
* sopr_inf(): Return +INF for a positive src operand or			#
*	      jump to operand error routine for a negative src operand.	#
*	      Routine used for flogn, flognp1, flog10, and flog2.	#
*########################################################################
	global	sopr_inf
sopr_inf:
	tst.b	SRC_EX.w(a0)		* check sign of source
	bmi.w	t_operr
	bra.b	ld_pinf

*################################################################
* setoxm1i(): Return minus one for a negative src operand or	#
*	      positive infinity for a positive src operand.	#
*	      Routine used for fetoxm1.				#
*################################################################
	global	setoxm1i
setoxm1i:
	tst.b	SRC_EX.w(a0)		* check sign of source
	bmi.b	ld_mone
	bra.b	ld_pinf

*########################################################################
* src_one(): Return signed one according to sign of src operand.	#
*########################################################################
	global	src_one
src_one:
	tst.b	SRC_EX.w(a0)		* check sign of source
	bmi.b	ld_mone

*
* ld_pone(): return positive one.
*
	global	ld_pone
ld_pone:
	fmove.s	#$3f800000,fp0		* load +1
	clr.b	FPSR_CC(a6)
	rts

*
* ld_mone(): return negative one.
*
	global	ld_mone
ld_mone:
	fmove.s	#$bf800000,fp0		* load -1
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'N' ccode bit
	rts

ppiby2:	.dc.l	$3fff0000,$c90fdaa2,$2168c235
mpiby2:	.dc.l	$bfff0000,$c90fdaa2,$2168c235

*################################################################
* spi_2(): Return signed PI/2 according to sign of src operand.	#
*################################################################
	global	spi_2
spi_2:
	tst.b	SRC_EX.w(a0)		* check sign of source
	bmi.b	ld_mpi2

*
* ld_ppi2(): return positive PI/2.
*
	global	ld_ppi2
ld_ppi2:
	fmove.l	d0,fpcr
	fmove.x	ppiby2(pc),fp0		* load +pi/2
	bra.w	t_pinx2			* set INEX2

*
* ld_mpi2(): return negative PI/2.
*
	global	ld_mpi2
ld_mpi2:
	fmove.l	d0,fpcr
	fmove.x	mpiby2(pc),fp0		* load -pi/2
	bra.w	t_minx2			* set INEX2

*###################################################
* The following routines give support for fsincos. #
*###################################################

*
* ssincosz(): When the src operand is ZERO, store a one in the
* 	      cosine register and return a ZERO in fp0 w/ the same sign
*	      as the src operand.
*
	global	ssincosz
ssincosz:
	fmove.s	#$3f800000,fp1
	tst.b	SRC_EX.w(a0)		* test sign
	bpl.b	sincoszp
	fmove.s	#$80000000,fp0		* return sin result in fp0
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)
	bra.b	sto_cos			* store cosine result
sincoszp:
	fmove.s	#$00000000,fp0		* return sin result in fp0
	move.b	#z_bmask,FPSR_CC(a6)
	bra.b	sto_cos			* store cosine result

*
* ssincosi(): When the src operand is INF, store a QNAN in the cosine
*	      register and jump to the operand error routine for negative
*	      src operands.
*
	global	ssincosi
ssincosi:
	fmove.x	qnan(pc),fp1		* load NAN
	bsr.l	sto_cos			* store cosine result
	bra.w	t_operr

*
* ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
* 		 register and branch to the src QNAN routine.
*
	global	ssincosqnan
ssincosqnan:
	fmove.x	__LOCAL___EX.w(a0),fp1
	bsr.l	sto_cos
	bra.w	src_qnan

*
* ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
*		 in the cosine register and branch to the src SNAN routine.
*
	global	ssincossnan
ssincossnan:
	fmove.x	__LOCAL___EX.w(a0),fp1
	bsr.l	sto_cos
	bra.w	src_snan

*#######################################################################

*########################################################################
* sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
*	     fp1 holds the result of the cosine portion of ssincos().	#
*	     the value in fp1 will not take any exceptions when moved.	#
* INPUT:								#
*	fp1 : fp value to store						#
* MODIFIED:								#
*	d0								#
*########################################################################
	global	sto_cos
sto_cos:
	move.b	1+EXC_CMDREG(a6),d0
	andi.w	#$7,d0
	move.w	(tbl_sto_cos.b,pc,d0.w*2),d0
	jmp	(tbl_sto_cos.b,pc,d0.w*1)

tbl_sto_cos:
	.dc.w	sto_cos_0-tbl_sto_cos
	.dc.w	sto_cos_1-tbl_sto_cos
	.dc.w	sto_cos_2-tbl_sto_cos
	.dc.w	sto_cos_3-tbl_sto_cos
	.dc.w	sto_cos_4-tbl_sto_cos
	.dc.w	sto_cos_5-tbl_sto_cos
	.dc.w	sto_cos_6-tbl_sto_cos
	.dc.w	sto_cos_7-tbl_sto_cos

sto_cos_0:
	fmovem.x	fp1,EXC_FP0(a6)
	rts
sto_cos_1:
	fmovem.x	fp1,EXC_FP1(a6)
	rts
sto_cos_2:
	fmove.x	fp1,fp2
	rts
sto_cos_3:
	fmove.x	fp1,fp3
	rts
sto_cos_4:
	fmove.x	fp1,fp4
	rts
sto_cos_5:
	fmove.x	fp1,fp5
	rts
sto_cos_6:
	fmove.x	fp1,fp6
	rts
sto_cos_7:
	fmove.x	fp1,fp7
	rts

*#################################################################
	global	smod_sdnrm
	global	smod_snorm
smod_sdnrm:
smod_snorm:
	move.b	DTAG(a6),d1
	beq.l	smod
	cmpi.b	#ZERO,d1
	beq.w	smod_zro
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	smod
	cmpi.b	#SNAN,d1
	beq.l	dst_snan
	bra.l	dst_qnan

	global	smod_szero
smod_szero:
	move.b	DTAG(a6),d1
	beq.l	t_operr
	cmpi.b	#ZERO,d1
	beq.l	t_operr
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	t_operr
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

	global	smod_sinf
smod_sinf:
	move.b	DTAG(a6),d1
	beq.l	smod_fpn
	cmpi.b	#ZERO,d1
	beq.l	smod_zro
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	smod_fpn
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

smod_zro:
srem_zro:
	move.b	SRC_EX.w(a0),d1		* get src sign
	move.b	DST_EX.w(a1),d0		* get dst sign
	eor.b	d0,d1			* get qbyte sign
	andi.b	#$80,d1
	move.b	d1,FPSR_QBYTE(a6)
	tst.b	d0
	bpl.w	ld_pzero
	bra.w	ld_mzero

smod_fpn:
srem_fpn:
	clr.b	FPSR_QBYTE(a6)
	move.l	d0,-(sp)
	move.b	SRC_EX.w(a0),d1		* get src sign
	move.b	DST_EX.w(a1),d0		* get dst sign
	eor.b	d0,d1			* get qbyte sign
	andi.b	#$80,d1
	move.b	d1,FPSR_QBYTE(a6)
	cmpi.b	#DENORM,DTAG(a6)
	bne.b	smod_nrm
	lea	DST.w(a1),a0
	move.l	(sp)+,d0
	bra.l	t_resdnrm
smod_nrm:
	fmove.l	(sp)+,fpcr
	fmove.x	DST.w(a1),fp0
	tst.b	DST_EX.w(a1)
	bmi.b	smod_nrm_neg
	rts

smod_nrm_neg:
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'N' ccode
	rts

*########################################################################
	global	srem_snorm
	global	srem_sdnrm
srem_sdnrm:
srem_snorm:
	move.b	DTAG(a6),d1
	beq.l	srem
	cmpi.b	#ZERO,d1
	beq.w	srem_zro
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	srem
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

	global	srem_szero
srem_szero:
	move.b	DTAG(a6),d1
	beq.l	t_operr
	cmpi.b	#ZERO,d1
	beq.l	t_operr
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	t_operr
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

	global	srem_sinf
srem_sinf:
	move.b	DTAG(a6),d1
	beq.w	srem_fpn
	cmpi.b	#ZERO,d1
	beq.w	srem_zro
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	srem_fpn
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

*########################################################################
	global	sscale_snorm
	global	sscale_sdnrm
sscale_snorm:
sscale_sdnrm:
	move.b	DTAG(a6),d1
	beq.l	sscale
	cmpi.b	#ZERO,d1
	beq.l	dst_zero
	cmpi.b	#INF,d1
	beq.l	dst_inf
	cmpi.b	#DENORM,d1
	beq.l	sscale
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

	global	sscale_szero
sscale_szero:
	move.b	DTAG(a6),d1
	beq.l	sscale
	cmpi.b	#ZERO,d1
	beq.l	dst_zero
	cmpi.b	#INF,d1
	beq.l	dst_inf
	cmpi.b	#DENORM,d1
	beq.l	sscale
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	bra.l	dst_snan

	global	sscale_sinf
sscale_sinf:
	move.b	DTAG(a6),d1
	beq.l	t_operr
	cmpi.b	#QNAN,d1
	beq.l	dst_qnan
	cmpi.b	#SNAN,d1
	beq.l	dst_snan
	bra.l	t_operr

*#######################################################################

*
* sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
*
	global	sop_sqnan
sop_sqnan:
	move.b	DTAG(a6),d1
	cmpi.b	#QNAN,d1
	beq.b	dst_qnan
	cmpi.b	#SNAN,d1
	beq.b	dst_snan
	bra.b	src_qnan

*
* sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
*
	global	sop_ssnan
sop_ssnan:
	move.b	DTAG(a6),d1
	cmpi.b	#QNAN,d1
	beq.b	dst_qnan_src_snan
	cmpi.b	#SNAN,d1
	beq.b	dst_snan
	bra.b	src_snan

dst_qnan_src_snan:
	ori.l	#snaniop_mask,USER_FPSR(a6)	* set NAN/SNAN/AIOP
	bra.b	dst_qnan

*
* dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
*
	global	dst_snan
dst_snan:
	fmove.x	DST.w(a1),fp0		* the fmove sets the SNAN bit
	fmove.l	fpsr,d0			* catch resulting status
	or.l	d0,USER_FPSR(a6)	* store status
	rts

*
* dst_qnan(): Return the dst QNAN.
*
	global	dst_qnan
dst_qnan:
	fmove.x	DST.w(a1),fp0		* return the non-signalling nan
	tst.b	DST_EX.w(a1)		* set ccodes according to QNAN sign
	bmi.b	dst_qnan_m
dst_qnan_p:
	move.b	#nan_bmask,FPSR_CC(a6)
	rts
dst_qnan_m:
	move.b	#neg_bmask+nan_bmask,FPSR_CC(a6)
	rts

*
* src_snan(): Return the src SNAN w/ the SNAN bit set.
*
	global	src_snan
src_snan:
	fmove.x	SRC.w(a0),fp0		* the fmove sets the SNAN bit
	fmove.l	fpsr,d0			* catch resulting status
	or.l	d0,USER_FPSR(a6)	* store status
	rts

*
* src_qnan(): Return the src QNAN.
*
	global	src_qnan
src_qnan:
	fmove.x	SRC.w(a0),fp0		* return the non-signalling nan
	tst.b	SRC_EX.w(a0)		* set ccodes according to QNAN sign
	bmi.b	dst_qnan_m
src_qnan_p:
	move.b	#nan_bmask,FPSR_CC(a6)
	rts
src_qnan_m:
	move.b	#neg_bmask+nan_bmask,FPSR_CC(a6)
	rts

*
* fkern2.s:
*	These entry points are used by the exception handler
* routines where an instruction is selected by an index into
* a large jump table corresponding to a given instruction which 
* has been decoded. Flow continues here where we now decode 
* further accoding to the source operand type.
*

	global	fsinh
fsinh:
	move.b	STAG(a6),d1
	beq.l	ssinh
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	src_inf
	cmpi.b	#DENORM,d1
	beq.l	ssinhd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	flognp1
flognp1:
	move.b	STAG(a6),d1
	beq.l	slognp1
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	sopr_inf
	cmpi.b	#DENORM,d1
	beq.l	slognp1d
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fetoxm1
fetoxm1:
	move.b	STAG(a6),d1
	beq.l	setoxm1
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	setoxm1i
	cmpi.b	#DENORM,d1
	beq.l	setoxm1d
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	ftanh
ftanh:
	move.b	STAG(a6),d1
	beq.l	stanh
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	src_one
	cmpi.b	#DENORM,d1
	beq.l	stanhd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fatan
fatan:
	move.b	STAG(a6),d1
	beq.l	satan
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	spi_2
	cmpi.b	#DENORM,d1
	beq.l	satand
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fasin
fasin:
	move.b	STAG(a6),d1
	beq.l	sasin
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	sasind
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fatanh
fatanh:
	move.b	STAG(a6),d1
	beq.l	satanh
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	satanhd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fsine
fsine:
	move.b	STAG(a6),d1
	beq.l	ssin
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	ssind
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	ftan
ftan:
	move.b	STAG(a6),d1
	beq.l	stan
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	stand
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fetox
fetox:
	move.b	STAG(a6),d1
	beq.l	setox
	cmpi.b	#ZERO,d1
	beq.l	ld_pone
	cmpi.b	#INF,d1
	beq.l	szr_inf
	cmpi.b	#DENORM,d1
	beq.l	setoxd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	ftwotox
ftwotox:
	move.b	STAG(a6),d1
	beq.l	stwotox
	cmpi.b	#ZERO,d1
	beq.l	ld_pone
	cmpi.b	#INF,d1
	beq.l	szr_inf
	cmpi.b	#DENORM,d1
	beq.l	stwotoxd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	ftentox
ftentox:
	move.b	STAG(a6),d1
	beq.l	stentox
	cmpi.b	#ZERO,d1
	beq.l	ld_pone
	cmpi.b	#INF,d1
	beq.l	szr_inf
	cmpi.b	#DENORM,d1
	beq.l	stentoxd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	flogn
flogn:
	move.b	STAG(a6),d1
	beq.l	slogn
	cmpi.b	#ZERO,d1
	beq.l	t_dz2
	cmpi.b	#INF,d1
	beq.l	sopr_inf
	cmpi.b	#DENORM,d1
	beq.l	slognd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	flog10
flog10:
	move.b	STAG(a6),d1
	beq.l	slog10
	cmpi.b	#ZERO,d1
	beq.l	t_dz2
	cmpi.b	#INF,d1
	beq.l	sopr_inf
	cmpi.b	#DENORM,d1
	beq.l	slog10d
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	flog2
flog2:
	move.b	STAG(a6),d1
	beq.l	slog2
	cmpi.b	#ZERO,d1
	beq.l	t_dz2
	cmpi.b	#INF,d1
	beq.l	sopr_inf
	cmpi.b	#DENORM,d1
	beq.l	slog2d
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fcosh
fcosh:
	move.b	STAG(a6),d1
	beq.l	scosh
	cmpi.b	#ZERO,d1
	beq.l	ld_pone
	cmpi.b	#INF,d1
	beq.l	ld_pinf
	cmpi.b	#DENORM,d1
	beq.l	scoshd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	facos
facos:
	move.b	STAG(a6),d1
	beq.l	sacos
	cmpi.b	#ZERO,d1
	beq.l	ld_ppi2
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	sacosd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fcos
fcos:
	move.b	STAG(a6),d1
	beq.l	scos
	cmpi.b	#ZERO,d1
	beq.l	ld_pone
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	scosd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fgetexp
fgetexp:
	move.b	STAG(a6),d1
	beq.l	sgetexp
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	sgetexpd
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fgetman
fgetman:
	move.b	STAG(a6),d1
	beq.l	sgetman
	cmpi.b	#ZERO,d1
	beq.l	src_zero
	cmpi.b	#INF,d1
	beq.l	t_operr
	cmpi.b	#DENORM,d1
	beq.l	sgetmand
	cmpi.b	#QNAN,d1
	beq.l	src_qnan
	bra.l	src_snan

	global	fsincos
fsincos:
	move.b	STAG(a6),d1
	beq.l	ssincos
	cmpi.b	#ZERO,d1
	beq.l	ssincosz
	cmpi.b	#INF,d1
	beq.l	ssincosi
	cmpi.b	#DENORM,d1
	beq.l	ssincosd
	cmpi.b	#QNAN,d1
	beq.l	ssincosqnan
	bra.l	ssincossnan

	global	fmod
fmod:
	move.b	STAG(a6),d1
	beq.l	smod_snorm
	cmpi.b	#ZERO,d1
	beq.l	smod_szero
	cmpi.b	#INF,d1
	beq.l	smod_sinf
	cmpi.b	#DENORM,d1
	beq.l	smod_sdnrm
	cmpi.b	#QNAN,d1
	beq.l	sop_sqnan
	bra.l	sop_ssnan

	global	frem
frem:
	move.b	STAG(a6),d1
	beq.l	srem_snorm
	cmpi.b	#ZERO,d1
	beq.l	srem_szero
	cmpi.b	#INF,d1
	beq.l	srem_sinf
	cmpi.b	#DENORM,d1
	beq.l	srem_sdnrm
	cmpi.b	#QNAN,d1
	beq.l	sop_sqnan
	bra.l	sop_ssnan

	global	fscale
fscale:
	move.b	STAG(a6),d1
	beq.l	sscale_snorm
	cmpi.b	#ZERO,d1
	beq.l	sscale_szero
	cmpi.b	#INF,d1
	beq.l	sscale_sinf
	cmpi.b	#DENORM,d1
	beq.l	sscale_sdnrm
	cmpi.b	#QNAN,d1
	beq.l	sop_sqnan
	bra.l	sop_ssnan

*########################################################################
* XDEF ****************************************************************	#
* 	fgen_except(): catch an exception during transcendental 	#
*		       emulation					#
*									#
* XREF ****************************************************************	#
*	fmul() - emulate a multiply instruction				# 
*	fadd() - emulate an add instruction				#
*	fin() - emulate an fmove instruction				#
*									#
* INPUT ***************************************************************	#
*	fp0 = destination operand					#
*	d0  = type of instruction that took exception			#
*	fsave frame = source operand					#
* 									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP							#
*									#
* ALGORITHM ***********************************************************	#
* 	An exception occurred on the last instruction of the 		#
* transcendental emulation. hopefully, this won't be happening much 	#
* because it will be VERY slow.						#
* 	The only exceptions capable of passing through here are		#
* Overflow, Underflow, and Unsupported Data Type.			#
*									#
*########################################################################

	global	fgen_except
fgen_except:
	cmpi.b	#$7,$3(sp)		* is exception UNSUPP?
	beq.b	fge_unsupp		* yes

	move.b	#NORM,STAG(a6)

fge_cont:
	move.b	#NORM,DTAG(a6)

* ok, I have a problem with putting the dst op at FP_DST. the emulation
* routines aren't supposed to alter the operands but we've just squashed
* FP_DST here...

* 8/17/93 - this turns out to be more of a "cleanliness" standpoint
* then a potential bug. to begin with, only the dyadic functions
* frem,fmod, and fscale would get the dst trashed here. But, for
* the 060SP, the FP_DST is never used again anyways.
	fmovem.x	fp0,FP_DST(a6)	* dst op is in fp0

	lea	$4(sp),a0		* pass: ptr to src op
	lea	FP_DST(a6),a1		* pass: ptr to dst op

	cmpi.b	#FMOV_OP,d1
	beq.b	fge_fin			* it was an "fmov"
	cmpi.b	#FADD_OP,d1
	beq.b	fge_fadd		* it was an "fadd"
fge_fmul:
	bsr.l	fmul
	rts
fge_fadd:
	bsr.l	fadd
	rts
fge_fin:
	bsr.l	fin
	rts

fge_unsupp:
	move.b	#DENORM,STAG(a6)
	bra.b	fge_cont

*
* This table holds the offsets of the emulation routines for each individual
* math operation relative to the address of this table. Included are
* routines like fadd/fmul/fabs as well as the transcendentals.
* The location within the table is determined by the extension bits of the
* operation longword.
*

	.dc.w	$4AFC,109
tbl_unsupp:
	.dc.l	fin-tbl_unsupp			* 00: fmove
	.dc.l	fint-tbl_unsupp			* 01: fint
	.dc.l	fsinh-tbl_unsupp		* 02: fsinh
	.dc.l	fintrz-tbl_unsupp		* 03: fintrz
	.dc.l	fsqrt-tbl_unsupp		* 04: fsqrt
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	flognp1-tbl_unsupp		* 06: flognp1
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fetoxm1-tbl_unsupp		* 08: fetoxm1
	.dc.l	ftanh-tbl_unsupp		* 09: ftanh
	.dc.l	fatan-tbl_unsupp		* 0a: fatan
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fasin-tbl_unsupp		* 0c: fasin
	.dc.l	fatanh-tbl_unsupp		* 0d: fatanh
	.dc.l	fsine-tbl_unsupp		* 0e: fsin
	.dc.l	ftan-tbl_unsupp			* 0f: ftan
	.dc.l	fetox-tbl_unsupp		* 10: fetox
	.dc.l	ftwotox-tbl_unsupp		* 11: ftwotox
	.dc.l	ftentox-tbl_unsupp		* 12: ftentox
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	flogn-tbl_unsupp		* 14: flogn
	.dc.l	flog10-tbl_unsupp		* 15: flog10
	.dc.l	flog2-tbl_unsupp		* 16: flog2
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fabs-tbl_unsupp			* 18: fabs
	.dc.l	fcosh-tbl_unsupp		* 19: fcosh
	.dc.l	fneg-tbl_unsupp			* 1a: fneg
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	facos-tbl_unsupp		* 1c: facos
	.dc.l	fcos-tbl_unsupp			* 1d: fcos
	.dc.l	fgetexp-tbl_unsupp		* 1e: fgetexp
	.dc.l	fgetman-tbl_unsupp		* 1f: fgetman
	.dc.l	fdiv-tbl_unsupp			* 20: fdiv
	.dc.l	fmod-tbl_unsupp			* 21: fmod
	.dc.l	fadd-tbl_unsupp			* 22: fadd
	.dc.l	fmul-tbl_unsupp			* 23: fmul
	.dc.l	fsgldiv-tbl_unsupp		* 24: fsgldiv
	.dc.l	frem-tbl_unsupp			* 25: frem
	.dc.l	fscale-tbl_unsupp		* 26: fscale
	.dc.l	fsglmul-tbl_unsupp		* 27: fsglmul
	.dc.l	fsub-tbl_unsupp			* 28: fsub
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsincos-tbl_unsupp		* 30: fsincos
	.dc.l	fsincos-tbl_unsupp		* 31: fsincos
	.dc.l	fsincos-tbl_unsupp		* 32: fsincos
	.dc.l	fsincos-tbl_unsupp		* 33: fsincos
	.dc.l	fsincos-tbl_unsupp		* 34: fsincos
	.dc.l	fsincos-tbl_unsupp		* 35: fsincos
	.dc.l	fsincos-tbl_unsupp		* 36: fsincos
	.dc.l	fsincos-tbl_unsupp		* 37: fsincos
	.dc.l	__fcmp__-tbl_unsupp		* 38: fcmp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	ftst-tbl_unsupp			* 3a: ftst
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsin-tbl_unsupp			* 40: fsmove
	.dc.l	fssqrt-tbl_unsupp		* 41: fssqrt
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fdin-tbl_unsupp			* 44: fdmove
	.dc.l	fdsqrt-tbl_unsupp		* 45: fdsqrt
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsabs-tbl_unsupp		* 58: fsabs
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsneg-tbl_unsupp		* 5a: fsneg
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fdabs-tbl_unsupp		* 5c: fdabs
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fdneg-tbl_unsupp		* 5e: fdneg
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsdiv-tbl_unsupp		* 60: fsdiv
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fsadd-tbl_unsupp		* 62: fsadd
	.dc.l	fsmul-tbl_unsupp		* 63: fsmul
	.dc.l	fddiv-tbl_unsupp		* 64: fddiv
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fdadd-tbl_unsupp		* 66: fdadd
	.dc.l	fdmul-tbl_unsupp		* 67: fdmul
	.dc.l	fssub-tbl_unsupp		* 68: fssub
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	tbl_unsupp-tbl_unsupp
	.dc.l	fdsub-tbl_unsupp		* 6c: fdsub

*########################################################################
* XDEF ****************************************************************	#
* 	fmul(): emulates the fmul instruction				#
*	fsmul(): emulates the fsmul instruction				#
*	fdmul(): emulates the fdmul instruction				#
*									#
* XREF ****************************************************************	#
*	scale_to_zero_src() - scale src exponent to zero		#
*	scale_to_zero_dst() - scale dst exponent to zero		#
*	unf_res() - return default underflow result			#
*	ovf_res() - return default overflow result			#
* 	res_qnan() - return QNAN result					#
* 	res_snan() - return SNAN result					#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	a1 = pointer to extended precision destination operand		#
*	d0  rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms/denorms into ext/sgl/dbl precision.				#
*	For norms/denorms, scale the exponents such that a multiply	#
* instruction won't cause an exception. Use the regular fmul to		#
* compute a result. Check if the regular operands would have taken	#
* an exception. If so, return the default overflow/underflow result	#
* and return the EXOP if exceptions are enabled. Else, scale the 	#
* result operand to the proper exponent.				#
*									#
*########################################################################

	align	$10,$51FC
tbl_fmul_ovfl:
	.dc.l	$3fff-$7ffe		* ext_max
	.dc.l	$3fff-$407e		* sgl_max
	.dc.l	$3fff-$43fe		* dbl_max
tbl_fmul_unfl:
	.dc.l	$3fff+$0001		* ext_unfl
	.dc.l	$3fff-$3f80		* sgl_unfl
	.dc.l	$3fff-$3c00		* dbl_unfl

	global	fsmul
fsmul:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl prec
	bra.b	fmul

	global	fdmul
fdmul:
	andi.b	#$30,d0
	ori.b	#d_mode*$10,d0		* insert dbl prec

	global	fmul
fmul:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1		* combine src tags
	bne.w	fmul_not_norm		* optimize on non-norm input

fmul_norm:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_to_zero_src	* scale src exponent
	move.l	d0,-(sp)		* save scale factor 1

	bsr.l	scale_to_zero_dst	* scale dst exponent

	add.l	d0,(sp)			* SCALE_FACTOR = scale1 + scale2

	move.w	2+L_SCR3(a6),d1		* fetch precision
	lsr.b	#$6,d1			* shift to lo bits
	move.l	(sp)+,d0		* load S.F.
	cmp.l	(tbl_fmul_ovfl.b,pc,d1.w*4),d0		* would result ovfl?
	beq.w	fmul_may_ovfl		* result may rnd to overflow
	blt.w	fmul_ovfl		* result will overflow

	cmp.l	(tbl_fmul_unfl.b,pc,d1.w*4),d0		* would result unfl?
	beq.w	fmul_may_unfl		* result may rnd to no unfl
	bgt.w	fmul_unfl		* result will underflow

*
* NORMAL:
* - the result of the multiply operation will neither overflow nor underflow.
* - do the multiply to the proper precision and rounding mode. 
* - scale the result exponent using the scale factor. if both operands were
* normalized then we really don't need to go through this scaling. but for now,
* this will do.
*
fmul_normal:
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply	

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fmul_normal_exit:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* load {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* OVERFLOW:
* - the result of the multiply operation is an overflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
* - if overflow or inexact is enabled, we need a multiply result rounded to
* extended precision. if the original operation was extended, then we have this
* result. if the original operation was single or double, we have to do another
* multiply using extended precision and the correct rounding mode. the result
* of this operation then has its exponent scaled by -0x6000 to create the
* exceptional operand.
*
fmul_ovfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply	

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

* save setting this until now because this is where fmul_may_ovfl may jump in
fmul_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fmul_ovfl_ena		* yes

* calculate the default result
fmul_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass rnd prec,mode
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

*
* OVFL is enabled; Create EXOP:
* - if precision is extended, then we have the EXOP. simply bias the exponent
* with an extra -0x6000. if the precision is single or double, we need to
* calculate a result rounded to extended precision.
*
fmul_ovfl_ena:
	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* test the rnd prec
	bne.b	fmul_ovfl_ena_sd	* it's sgl or dbl

fmul_ovfl_ena_cont:
	fmovem.x	fp0,FP_SCR0(a6)	* move result to stack

	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.w	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1		* clear sign bit
	andi.w	#$8000,d2		* keep old sign
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fmul_ovfl_dis

fmul_ovfl_ena_sd:
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* keep rnd mode only
	fmove.l	d1,fpcr			* set FPCR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply

	fmove.l	#$0,fpcr		* clear FPCR
	bra.b	fmul_ovfl_ena_cont

*
* may OVERFLOW:
* - the result of the multiply operation MAY overflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
*
fmul_may_ovfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| >= 2.b?
	fbge.w	fmul_ovfl_tst		* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fmul_normal_exit

*
* UNDERFLOW:
* - the result of the multiply operation is an underflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
* - if overflow or inexact is enabled, we need a multiply result rounded to
* extended precision. if the original operation was extended, then we have this
* result. if the original operation was single or double, we have to do another
* multiply using extended precision and the correct rounding mode. the result
* of this operation then has its exponent scaled by -0x6000 to create the
* exceptional operand.
*
fmul_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

* for fun, let's use only extended precision, round to zero. then, let
* the unf_res() routine figure out all the rest.
* will we get the correct answer.
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fmul_unfl_ena		* yes

fmul_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* unf_res2 may have set 'Z'
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* UNFL is enabled. 
*
fmul_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1	* load dst op

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fmul_unfl_ena_sd	* no, sgl or dbl

* if the rnd mode is anything but RZ, then we have to re-do the above
* multiplication becuase we used RZ for all.
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

fmul_unfl_ena_cont:
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp1		* execute multiply	

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* save result to stack
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	addi.l	#$6000,d1		* add bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.w	fmul_unfl_dis

fmul_unfl_ena_sd:
	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* use only rnd mode
	fmove.l	d1,fpcr			* set FPCR

	bra.b	fmul_unfl_ena_cont

* MAY UNDERFLOW:
* -use the correct rounding mode and precision. this code favors operations
* that do not underflow.
fmul_may_unfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp0		* execute multiply	

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| > 2.b?
	fbgt.w	fmul_normal_exit	* no; no underflow occurred
	fblt.w	fmul_unfl		* yes; underflow occurred

*
* we still don't know if underflow occurred. result is ~ equal to 2. but,
* we don't know if the result was an underflow that rounded up to a 2 or
* a normalized number that rounded down to a 2. so, redo the entire operation
* using RZ as the rounding mode to see what the pre-rounded result is.
* this case should be relatively rare.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst operand

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* keep rnd prec
	ori.b	#rz_mode*$10,d1		* insert RZ

	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fmul.x	FP_SCR0(a6),fp1		* execute multiply	

	fmove.l	#$0,fpcr		* clear FPCR
	fabs.x	fp1			* make absolute value
	fcmp.b	#$2,fp1			* is |result| < 2.b?
	fbge.w	fmul_normal_exit	* no; no underflow occurred
	bra.w	fmul_unfl		* yes, underflow occurred

*###############################################################################

*
* Multiply: inputs are not both normalized; what are they?
*
fmul_not_norm:
	move.w	(tbl_fmul_op.b,pc,d1.w*2),d1
	jmp	(tbl_fmul_op.b,pc,d1.w)

	.dc.w	$4AFC,48
tbl_fmul_op:
	.dc.w	fmul_norm-tbl_fmul_op		* NORM x NORM
	.dc.w	fmul_zero-tbl_fmul_op		* NORM x ZERO
	.dc.w	fmul_inf_src-tbl_fmul_op	* NORM x INF
	.dc.w	fmul_res_qnan-tbl_fmul_op	* NORM x QNAN
	.dc.w	fmul_norm-tbl_fmul_op		* NORM x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* NORM x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

	.dc.w	fmul_zero-tbl_fmul_op		* ZERO x NORM
	.dc.w	fmul_zero-tbl_fmul_op		* ZERO x ZERO
	.dc.w	fmul_res_operr-tbl_fmul_op	* ZERO x INF
	.dc.w	fmul_res_qnan-tbl_fmul_op	* ZERO x QNAN
	.dc.w	fmul_zero-tbl_fmul_op		* ZERO x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* ZERO x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

	.dc.w	fmul_inf_dst-tbl_fmul_op	* INF x NORM
	.dc.w	fmul_res_operr-tbl_fmul_op	* INF x ZERO
	.dc.w	fmul_inf_dst-tbl_fmul_op	* INF x INF
	.dc.w	fmul_res_qnan-tbl_fmul_op	* INF x QNAN
	.dc.w	fmul_inf_dst-tbl_fmul_op	* INF x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* INF x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

	.dc.w	fmul_res_qnan-tbl_fmul_op	* QNAN x NORM
	.dc.w	fmul_res_qnan-tbl_fmul_op	* QNAN x ZERO
	.dc.w	fmul_res_qnan-tbl_fmul_op	* QNAN x INF
	.dc.w	fmul_res_qnan-tbl_fmul_op	* QNAN x QNAN
	.dc.w	fmul_res_qnan-tbl_fmul_op	* QNAN x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* QNAN x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

	.dc.w	fmul_norm-tbl_fmul_op		* NORM x NORM
	.dc.w	fmul_zero-tbl_fmul_op		* NORM x ZERO
	.dc.w	fmul_inf_src-tbl_fmul_op	* NORM x INF
	.dc.w	fmul_res_qnan-tbl_fmul_op	* NORM x QNAN
	.dc.w	fmul_norm-tbl_fmul_op		* NORM x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* NORM x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x NORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x ZERO
	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x INF
	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x QNAN
	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x DENORM
	.dc.w	fmul_res_snan-tbl_fmul_op	* SNAN x SNAN
	.dc.w	tbl_fmul_op-tbl_fmul_op		*
	.dc.w	tbl_fmul_op-tbl_fmul_op		*

fmul_res_operr:
	bra.l	res_operr
fmul_res_snan:
	bra.l	res_snan
fmul_res_qnan:
	bra.l	res_qnan

*
* Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
*
	global	fmul_zero		* global for fsglmul
fmul_zero:
	move.b	SRC_EX.w(a0),d0		* exclusive or the signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bpl.b	fmul_zero_p		* result ZERO is pos.
fmul_zero_n:
	fmove.s	#$80000000,fp0		* load -ZERO
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set Z/N
	rts
fmul_zero_p:
	fmove.s	#$00000000,fp0		* load +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

*
* Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
*
* Note: The j-bit for an infinity is a don't-care. However, to be
* strictly compatible w/ the 68881/882, we make sure to return an
* INF w/ the j-bit set if the input INF j-bit was set. Destination
* INFs take priority.
*
	global	fmul_inf_dst		* global for fsglmul
fmul_inf_dst:
	fmovem.x	DST.w(a1),fp0	* return INF result in fp0
	move.b	SRC_EX.w(a0),d0		* exclusive or the signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bpl.b	fmul_inf_dst_p		* result INF is pos.
fmul_inf_dst_n:
	fabs.x	fp0			* clear result sign
	fneg.x	fp0			* set result sign
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set INF/N
	rts
fmul_inf_dst_p:
	fabs.x	fp0			* clear result sign
	move.b	#inf_bmask,FPSR_CC(a6)	* set INF
	rts

	global	fmul_inf_src		* global for fsglmul
fmul_inf_src:
	fmovem.x	SRC.w(a0),fp0	* return INF result in fp0
	move.b	SRC_EX.w(a0),d0		* exclusive or the signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bpl.b	fmul_inf_dst_p		* result INF is pos.
	bra.b	fmul_inf_dst_n

*########################################################################
* XDEF ****************************************************************	#
*	fin(): emulates the fmove instruction				#
*	fsin(): emulates the fsmove instruction				#
*	fdin(): emulates the fdmove instruction				#
*									#
* XREF ****************************************************************	#
*	norm() - normalize mantissa for EXOP on denorm			#
*	scale_to_zero_src() - scale src exponent to zero		#
*	ovf_res() - return default overflow result			#
* 	unf_res() - return default underflow result			#
*	res_qnan_1op() - return QNAN result				#
*	res_snan_1op() - return SNAN result				#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0 = round prec/mode						#
* 									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
* 	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms into extended, single, and double precision.			#
* 	Norms can be emulated w/ a regular fmove instruction. For	#
* sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
* if the result would have overflowed/underflowed. If so, use unf_res()	#
* or ovf_res() to return the default result. Also return EXOP if	#
* exception is enabled. If no exception, return the default result.	#
*	Unnorms don't pass through here.				#
*									#
*########################################################################

	global	fsin
fsin:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl precision
	bra.b	fin

	global	fdin
fdin:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl precision

	global	fin
fin:
	move.l	d0,L_SCR3(a6)		* store rnd info

	move.b	STAG(a6),d1		* fetch src optype tag
	bne.w	fin_not_norm		* optimize on non-norm input

*
* FP MOVE IN: NORMs and DENORMs ONLY!
*
fin_norm:
	andi.b	#$c0,d0			* is precision extended?
	bne.w	fin_not_ext		* no, so go handle dbl or sgl

*
* precision selected is extended. so...we cannot get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
	tst.b	SRC_EX.w(a0)		* is the operand negative?
	bpl.b	fin_norm_done		* no
	bset	#neg_bit,FPSR_CC(a6)	* yes, so set 'N' ccode bit
fin_norm_done:
	fmovem.x	SRC.w(a0),fp0	* return result in fp0
	rts

*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fin_denorm:
	andi.b	#$c0,d0			* is precision extended?
	bne.w	fin_not_ext		* no, so go handle dbl or sgl

	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit
	tst.b	SRC_EX.w(a0)		* is the operand negative?
	bpl.b	fin_denorm_done		* no
	bset	#neg_bit,FPSR_CC(a6)	* yes, so set 'N' ccode bit
fin_denorm_done:
	fmovem.x	SRC.w(a0),fp0	* return result in fp0
	btst	#unfl_bit,FPCR_ENABLE(a6)	* is UNFL enabled?
	bne.b	fin_denorm_unfl_ena	* yes
	rts

*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fin_denorm_unfl_ena:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	lea	FP_SCR0(a6),a0		* pass: ptr to operand
	bsr.l	norm			* normalize result
	neg.w	d0			* new exponent = -(shft val)
	addi.w	#$6000,d0		* add new bias to exponent
	move.w	FP_SCR0_EX(a6),d1	* fetch old sign,exp
	andi.w	#$8000,d1		* keep old sign
	andi.w	#$7fff,d0		* clear sign position
	or.w	d1,d0			* concat new exo,old sign
	move.w	d0,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

*
* operand is to be rounded to single or double precision
*	
fin_not_ext:
	cmpi.b	#s_mode*$10,d0		* separate sgl/dbl prec
	bne.b	fin_dbl

*
* operand is to be rounded to single precision
*
fin_sgl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3f80,d0		* will move in underflow?
	bge.w	fin_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$407e,d0		* will move in overflow?
	beq.w	fin_sd_may_ovfl		* maybe; go check
	blt.w	fin_sd_ovfl		* yes; go handle overflow

*
* operand will NOT overflow or underflow when moved into the fp reg file
*
fin_sd_normal:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fmove.x	FP_SCR0(a6),fp0		* perform move

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fin_sd_normal_exit:
	move.l	d2,-(sp)		* save d2
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.w	FP_SCR0_EX(a6),d1	* load {sgn,exp}
	move.w	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	andi.w	#$8000,d2		* keep old sign
	or.w	d1,d2			* concat old sign,new exponent
	move.w	d2,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* operand is to be rounded to double precision
*
fin_dbl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3c00,d0		* will move in underflow?
	bge.w	fin_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$43fe,d0		* will move in overflow?
	beq.w	fin_sd_may_ovfl		* maybe; go check
	blt.w	fin_sd_ovfl		* yes; go handle overflow
	bra.w	fin_sd_normal		* no; ho handle normalized op

*
* operand WILL underflow when moved in to the fp register file
*
fin_sd_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	tst.b	FP_SCR0_EX(a6)		* is operand negative?
	bpl.b	fin_sd_unfl_tst
	bset	#neg_bit,FPSR_CC(a6)	* set 'N' ccode bit

* if underflow or inexact is enabled, then go calculate the EXOP first.
fin_sd_unfl_tst:
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fin_sd_unfl_ena		* yes

fin_sd_unfl_dis:
	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* unf_res may have set 'Z'
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* operand will underflow AND underflow or inexact is enabled. 
* therefore, we must return the result rounded to extended precision.
*
fin_sd_unfl_ena:
	move.l	FP_SCR0_HI(a6),FP_SCR1_HI(a6)
	move.l	FP_SCR0_LO(a6),FP_SCR1_LO(a6)
	move.w	FP_SCR0_EX(a6),d1	* load current exponent

	move.l	d2,-(sp)		* save d2
	move.w	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* subtract scale factor
	andi.w	#$8000,d2		* extract old sign
	addi.l	#$6000,d1		* add new bias
	andi.w	#$7fff,d1
	or.w	d1,d2			* concat old sign,new exp
	move.w	d2,FP_SCR1_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR1(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fin_sd_unfl_dis

*
* operand WILL overflow.
*
fin_sd_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fmove.x	FP_SCR0(a6),fp0		* perform move

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save FPSR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fin_sd_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fin_sd_ovfl_ena		* yes

*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fin_sd_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass: prec,mode
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fin_sd_ovfl_ena:
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1
	or.w	d2,d1
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fin_sd_ovfl_dis

*
* the move in MAY overflow. so...
*
fin_sd_may_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fmove.x	FP_SCR0(a6),fp0		* perform the move

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| >= 2.b?
	fbge.w	fin_sd_ovfl_tst		* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fin_sd_normal_exit

*#########################################################################

*
* operand is not a NORM: check its optype and branch accordingly
*
fin_not_norm:
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.w	fin_denorm
	cmpi.b	#SNAN,d1		* weed out SNANs
	beq.l	res_snan_1op
	cmpi.b	#QNAN,d1		* weed out QNANs
	beq.l	res_qnan_1op

*
* do the fmove in; at this point, only possible ops are ZERO and INF.
* use fmov to determine ccodes.
* prec:mode should be zero at this point but it won't affect answer anyways.
*
	fmove.x	SRC.w(a0),fp0		* do fmove in
	fmove.l	fpsr,d0			* no exceptions possible
	rol.l	#$8,d0			* put ccodes in lo byte
	move.b	d0,FPSR_CC(a6)		* insert correct ccodes
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	fdiv(): emulates the fdiv instruction				#
*	fsdiv(): emulates the fsdiv instruction				#
*	fddiv(): emulates the fddiv instruction				#
*									#
* XREF ****************************************************************	#
*	scale_to_zero_src() - scale src exponent to zero		#
*	scale_to_zero_dst() - scale dst exponent to zero		#
*	unf_res() - return default underflow result			#
*	ovf_res() - return default overflow result			#
* 	res_qnan() - return QNAN result					#
* 	res_snan() - return SNAN result					#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	a1 = pointer to extended precision destination operand		#
*	d0  rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms/denorms into ext/sgl/dbl precision.				#
*	For norms/denorms, scale the exponents such that a divide	#
* instruction won't cause an exception. Use the regular fdiv to		#
* compute a result. Check if the regular operands would have taken	#
* an exception. If so, return the default overflow/underflow result	#
* and return the EXOP if exceptions are enabled. Else, scale the 	#
* result operand to the proper exponent.				#
*									#
*########################################################################

	align	$10,$51FC
tbl_fdiv_unfl:
	.dc.l	$3fff-$0000		* ext_unfl
	.dc.l	$3fff-$3f81		* sgl_unfl
	.dc.l	$3fff-$3c01		* dbl_unfl

tbl_fdiv_ovfl:
	.dc.l	$3fff-$7ffe		* ext overflow exponent
	.dc.l	$3fff-$407e		* sgl overflow exponent
	.dc.l	$3fff-$43fe		* dbl overflow exponent

	global	fsdiv
fsdiv:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl prec
	bra.b	fdiv

	global	fddiv
fddiv:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl prec

	global	fdiv
fdiv:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1		* combine src tags

	bne.w	fdiv_not_norm		* optimize on non-norm input

*
* DIVIDE: NORMs and DENORMs ONLY!
*
fdiv_norm:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_to_zero_src	* scale src exponent
	move.l	d0,-(sp)		* save scale factor 1

	bsr.l	scale_to_zero_dst	* scale dst exponent

	neg.l	(sp)			* SCALE FACTOR = scale1 - scale2
	add.l	d0,(sp)

	move.w	2+L_SCR3(a6),d1		* fetch precision
	lsr.b	#$6,d1			* shift to lo bits
	move.l	(sp)+,d0		* load S.F.
	cmp.l	(tbl_fdiv_ovfl.b,pc,d1.w*4),d0		* will result overflow?
	ble.w	fdiv_may_ovfl		* result will overflow

	cmp.l	(tbl_fdiv_unfl.w,pc,d1.w*4),d0		* will result underflow?
	beq.w	fdiv_may_unfl		* maybe
	bgt.w	fdiv_unfl		* yes; go handle underflow

fdiv_normal:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* save FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fdiv.x	FP_SCR0(a6),fp0		* perform divide

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fdiv_normal_exit:
	fmovem.x	fp0,FP_SCR0(a6)	* store result on stack
	move.l	d2,-(sp)		* store d2
	move.w	FP_SCR0_EX(a6),d1	* load {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

tbl_fdiv_ovfl2:
	.dc.l	$7fff
	.dc.l	$407f
	.dc.l	$43ff

fdiv_no_ovfl:
	move.l	(sp)+,d0		* restore scale factor
	bra.b	fdiv_normal_exit

fdiv_may_ovfl:
	move.l	d0,-(sp)		* save scale factor

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* set FPSR

	fdiv.x	FP_SCR0(a6),fp0		* execute divide

	fmove.l	fpsr,d0
	fmove.l	#$0,fpcr

	or.l	d0,USER_FPSR(a6)	* save INEX,N

	fmovem.x	fp0,-(sp)	* save result to stack
	move.w	(sp),d0			* fetch new exponent
	add.l	#$c,sp			* clear result from stack
	andi.l	#$7fff,d0		* strip sign
	sub.l	(sp),d0			* add scale factor
	cmp.l	(tbl_fdiv_ovfl2.b,pc,d1.w*4),d0
	blt.b	fdiv_no_ovfl
	move.l	(sp)+,d0

fdiv_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fdiv_ovfl_ena		* yes

fdiv_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass prec:rnd
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

fdiv_ovfl_ena:
	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fdiv_ovfl_ena_sd	* no, do sgl or dbl

fdiv_ovfl_ena_cont:
	fmovem.x	fp0,FP_SCR0(a6)	* move result to stack

	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.w	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1		* clear sign bit
	andi.w	#$8000,d2		* keep old sign
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fdiv_ovfl_dis

fdiv_ovfl_ena_sd:
	fmovem.x	FP_SCR1(a6),fp0	* load dst operand

	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* keep rnd mode
	fmove.l	d1,fpcr			* set FPCR

	fdiv.x	FP_SCR0(a6),fp0		* execute divide

	fmove.l	#$0,fpcr		* clear FPCR
	bra.b	fdiv_ovfl_ena_cont

fdiv_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fdiv.x	FP_SCR0(a6),fp0		* execute divide

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fdiv_unfl_ena		* yes

fdiv_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* 'Z' may have been set
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* UNFL is enabled. 
*
fdiv_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1	* load dst op

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fdiv_unfl_ena_sd	* no, sgl or dbl

	fmove.l	L_SCR3(a6),fpcr		* set FPCR

fdiv_unfl_ena_cont:
	fmove.l	#$0,fpsr		* clear FPSR

	fdiv.x	FP_SCR0(a6),fp1		* execute divide

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* save result to stack
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factoer
	addi.l	#$6000,d1		* add bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exp
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.w	fdiv_unfl_dis

fdiv_unfl_ena_sd:
	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* use only rnd mode
	fmove.l	d1,fpcr			* set FPCR

	bra.b	fdiv_unfl_ena_cont

*
* the divide operation MAY underflow:
*
fdiv_may_unfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fdiv.x	FP_SCR0(a6),fp0		* execute divide

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$1,fp1			* is |result| > 1.b?
	fbgt.w	fdiv_normal_exit	* no; no underflow occurred
	fblt.w	fdiv_unfl		* yes; underflow occurred

*
* we still don't know if underflow occurred. result is ~ equal to 1. but,
* we don't know if the result was an underflow that rounded up to a 1
* or a normalized number that rounded down to a 1. so, redo the entire 
* operation using RZ as the rounding mode to see what the pre-rounded 
* result is. this case should be relatively rare.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst op into fp1

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* keep rnd prec
	ori.b	#rz_mode*$10,d1		* insert RZ

	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fdiv.x	FP_SCR0(a6),fp1		* execute divide

	fmove.l	#$0,fpcr		* clear FPCR
	fabs.x	fp1			* make absolute value
	fcmp.b	#$1,fp1			* is |result| < 1.b?
	fbge.w	fdiv_normal_exit	* no; no underflow occurred
	bra.w	fdiv_unfl		* yes; underflow occurred

*###########################################################################

*
* Divide: inputs are not both normalized; what are they?
*
fdiv_not_norm:
	move.w	(tbl_fdiv_op.b,pc,d1.w*2),d1
	jmp	(tbl_fdiv_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fdiv_op:
	.dc.w	fdiv_norm-tbl_fdiv_op		* NORM / NORM
	.dc.w	fdiv_inf_load-tbl_fdiv_op	* NORM / ZERO
	.dc.w	fdiv_zero_load-tbl_fdiv_op	* NORM / INF
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* NORM / QNAN
	.dc.w	fdiv_norm-tbl_fdiv_op		* NORM / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* NORM / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

	.dc.w	fdiv_zero_load-tbl_fdiv_op	* ZERO / NORM
	.dc.w	fdiv_res_operr-tbl_fdiv_op	* ZERO / ZERO
	.dc.w	fdiv_zero_load-tbl_fdiv_op	* ZERO / INF
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* ZERO / QNAN
	.dc.w	fdiv_zero_load-tbl_fdiv_op	* ZERO / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* ZERO / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

	.dc.w	fdiv_inf_dst-tbl_fdiv_op	* INF / NORM
	.dc.w	fdiv_inf_dst-tbl_fdiv_op	* INF / ZERO
	.dc.w	fdiv_res_operr-tbl_fdiv_op	* INF / INF
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* INF / QNAN
	.dc.w	fdiv_inf_dst-tbl_fdiv_op	* INF / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* INF / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* QNAN / NORM
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* QNAN / ZERO
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* QNAN / INF
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* QNAN / QNAN
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* QNAN / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* QNAN / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

	.dc.w	fdiv_norm-tbl_fdiv_op		* DENORM / NORM
	.dc.w	fdiv_inf_load-tbl_fdiv_op	* DENORM / ZERO
	.dc.w	fdiv_zero_load-tbl_fdiv_op	* DENORM / INF
	.dc.w	fdiv_res_qnan-tbl_fdiv_op	* DENORM / QNAN
	.dc.w	fdiv_norm-tbl_fdiv_op		* DENORM / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* DENORM / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / NORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / ZERO
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / INF
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / QNAN
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / DENORM
	.dc.w	fdiv_res_snan-tbl_fdiv_op	* SNAN / SNAN
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*
	.dc.w	tbl_fdiv_op-tbl_fdiv_op		*

fdiv_res_qnan:
	bra.l	res_qnan
fdiv_res_snan:
	bra.l	res_snan
fdiv_res_operr:
	bra.l	res_operr

	global	fdiv_zero_load		* global for fsgldiv
fdiv_zero_load:
	move.b	SRC_EX.w(a0),d0		* result sign is exclusive
	move.b	DST_EX.w(a1),d1		* or of input signs.
	eor.b	d0,d1
	bpl.b	fdiv_zero_load_p	* result is positive
	fmove.s	#$80000000,fp0		* load a -ZERO
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set Z/N
	rts
fdiv_zero_load_p:
	fmove.s	#$00000000,fp0		* load a +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

*
* The destination was In Range and the source was a ZERO. The result,
* therefore, is an INF w/ the proper sign.
* So, determine the sign and return a new INF (w/ the j-bit cleared).
*
	global	fdiv_inf_load		* global for fsgldiv
fdiv_inf_load:
	ori.w	#dz_mask+adz_mask,2+USER_FPSR(a6)	* no; set DZ/ADZ
	move.b	SRC_EX.w(a0),d0		* load both signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bpl.b	fdiv_inf_load_p		* result is positive
	fmove.s	#$ff800000,fp0		* make result -INF
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set INF/N
	rts
fdiv_inf_load_p:
	fmove.s	#$7f800000,fp0		* make result +INF
	move.b	#inf_bmask,FPSR_CC(a6)	* set INF
	rts

*
* The destination was an INF w/ an In Range or ZERO source, the result is 
* an INF w/ the proper sign. 
* The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
* dst INF is set, then then j-bit of the result INF is also set).
*
	global	fdiv_inf_dst		* global for fsgldiv
fdiv_inf_dst:
	move.b	DST_EX.w(a1),d0		* load both signs
	move.b	SRC_EX.w(a0),d1
	eor.b	d0,d1
	bpl.b	fdiv_inf_dst_p		* result is positive

	fmovem.x	DST.w(a1),fp0	* return result in fp0
	fabs.x	fp0			* clear sign bit
	fneg.x	fp0			* set sign bit
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set INF/NEG
	rts

fdiv_inf_dst_p:
	fmovem.x	DST.w(a1),fp0	* return result in fp0
	fabs.x	fp0			* return positive INF
	move.b	#inf_bmask,FPSR_CC(a6)	* set INF
	rts

*########################################################################
* XDEF ****************************************************************	#
*	fneg(): emulates the fneg instruction				#
*	fsneg(): emulates the fsneg instruction				#
*	fdneg(): emulates the fdneg instruction				#
*									#
* XREF ****************************************************************	#
* 	norm() - normalize a denorm to provide EXOP			#
*	scale_to_zero_src() - scale sgl/dbl source exponent		#
*	ovf_res() - return default overflow result			#
*	unf_res() - return default underflow result			#
* 	res_qnan_1op() - return QNAN result				#
*	res_snan_1op() - return SNAN result				#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0 = rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, zeroes, and infinities as special cases. Separate	#
* norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
* emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
* and an actual fneg performed to see if overflow/underflow would have	#
* occurred. If so, return default underflow/overflow result. Else,	#
* scale the result exponent and return result. FPSR gets set based on	#
* the result value.							#
*									#
*########################################################################

	global	fsneg
fsneg:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl precision
	bra.b	fneg

	global	fdneg
fdneg:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl prec

	global	fneg
fneg:
	move.l	d0,L_SCR3(a6)		* store rnd info
	move.b	STAG(a6),d1
	bne.w	fneg_not_norm		* optimize on non-norm input

*
* NEGATE SIGN : norms and denorms ONLY!
*
fneg_norm:
	andi.b	#$c0,d0			* is precision extended?
	bne.w	fneg_not_ext		* no; go handle sgl or dbl

*
* precision selected is extended. so...we can not get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.w	SRC_EX.w(a0),d0
	eori.w	#$8000,d0		* negate sign
	bpl.b	fneg_norm_load		* sign is positive
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'N' ccode bit
fneg_norm_load:
	move.w	d0,FP_SCR0_EX(a6)
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fneg_denorm:
	andi.b	#$c0,d0			* is precision extended?
	bne.b	fneg_not_ext		* no; go handle sgl or dbl

	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.w	SRC_EX.w(a0),d0
	eori.w	#$8000,d0		* negate sign
	bpl.b	fneg_denorm_done	* no
	move.b	#neg_bmask,FPSR_CC(a6)	* yes, set 'N' ccode bit
fneg_denorm_done:
	move.w	d0,FP_SCR0_EX(a6)
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0

	btst	#unfl_bit,FPCR_ENABLE(a6)	* is UNFL enabled?
	bne.b	fneg_ext_unfl_ena	* yes
	rts

*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fneg_ext_unfl_ena:
	lea	FP_SCR0(a6),a0		* pass: ptr to operand
	bsr.l	norm			* normalize result
	neg.w	d0			* new exponent = -(shft val)
	addi.w	#$6000,d0		* add new bias to exponent
	move.w	FP_SCR0_EX(a6),d1	* fetch old sign,exp
	andi.w	#$8000,d1		* keep old sign
	andi.w	#$7fff,d0		* clear sign position
	or.w	d1,d0			* concat old sign, new exponent
	move.w	d0,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

*
* operand is either single or double
*
fneg_not_ext:
	cmpi.b	#s_mode*$10,d0		* separate sgl/dbl prec
	bne.b	fneg_dbl

*
* operand is to be rounded to single precision
*
fneg_sgl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3f80,d0		* will move in underflow?
	bge.w	fneg_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$407e,d0		* will move in overflow?
	beq.w	fneg_sd_may_ovfl	* maybe; go check
	blt.w	fneg_sd_ovfl		* yes; go handle overflow

*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fneg_sd_normal:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fneg.x	FP_SCR0(a6),fp0		* perform negation

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fneg_sd_normal_exit:
	move.l	d2,-(sp)		* save d2
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.w	FP_SCR0_EX(a6),d1	* load sgn,exp
	move.w	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	andi.w	#$8000,d2		* keep old sign
	or.w	d1,d2			* concat old sign,new exp
	move.w	d2,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* operand is to be rounded to double precision
*
fneg_dbl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3c00,d0		* will move in underflow?
	bge.b	fneg_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$43fe,d0		* will move in overflow?
	beq.w	fneg_sd_may_ovfl	* maybe; go check
	blt.w	fneg_sd_ovfl		* yes; go handle overflow
	bra.w	fneg_sd_normal		* no; ho handle normalized op

*
* operand WILL underflow when moved in to the fp register file
*
fneg_sd_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	eori.b	#$80,FP_SCR0_EX(a6)	* negate sign	
	bpl.b	fneg_sd_unfl_tst
	bset	#neg_bit,FPSR_CC(a6)	* set 'N' ccode bit

* if underflow or inexact is enabled, go calculate EXOP first.
fneg_sd_unfl_tst:
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fneg_sd_unfl_ena	* yes

fneg_sd_unfl_dis:
	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* unf_res may have set 'Z'
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* operand will underflow AND underflow is enabled. 
* therefore, we must return the result rounded to extended precision.
*
fneg_sd_unfl_ena:
	move.l	FP_SCR0_HI(a6),FP_SCR1_HI(a6)
	move.l	FP_SCR0_LO(a6),FP_SCR1_LO(a6)
	move.w	FP_SCR0_EX(a6),d1	* load current exponent

	move.l	d2,-(sp)		* save d2
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* subtract scale factor
	addi.l	#$6000,d1		* add new bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat new sign,new exp
	move.w	d1,FP_SCR1_EX(a6)	* insert new exp
	fmovem.x	FP_SCR1(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fneg_sd_unfl_dis

*
* operand WILL overflow.
*
fneg_sd_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fneg.x	FP_SCR0(a6),fp0		* perform negation

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save FPSR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fneg_sd_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fneg_sd_ovfl_ena	* yes

*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fneg_sd_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass: prec,mode
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fneg_sd_ovfl_ena:
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat sign,exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fneg_sd_ovfl_dis

*
* the move in MAY underflow. so...
*
fneg_sd_may_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fneg.x	FP_SCR0(a6),fp0		* perform negation

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| >= 2.b?
	fbge.w	fneg_sd_ovfl_tst	* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fneg_sd_normal_exit

*#########################################################################

*
* input is not normalized; what is it?
*
fneg_not_norm:
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.w	fneg_denorm
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	cmpi.b	#QNAN,d1		* weed out QNAN
	beq.l	res_qnan_1op

*
* do the fneg; at this point, only possible ops are ZERO and INF.
* use fneg to determine ccodes.
* prec:mode should be zero at this point but it won't affect answer anyways.
*
	fneg.x	SRC_EX.w(a0),fp0	* do fneg
	fmove.l	fpsr,d0
	rol.l	#$8,d0			* put ccodes in lo byte
	move.b	d0,FPSR_CC(a6)		* insert correct ccodes
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	ftst(): emulates the ftest instruction				#
*									#
* XREF ****************************************************************	#
* 	res{s,q}nan_1op() - set NAN result for monadic instruction	#
*									#
* INPUT ***************************************************************	#
* 	a0 = pointer to extended precision source operand		#
*									#
* OUTPUT **************************************************************	#
*	none								#
*									#
* ALGORITHM ***********************************************************	#
* 	Check the source operand tag (STAG) and set the FPCR according	#
* to the operand type and sign.						#
*									#
*########################################################################

	global	ftst
ftst:
	move.b	STAG(a6),d1
	bne.b	ftst_not_norm		* optimize on non-norm input

*
* Norm:
*
ftst_norm:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.b	ftst_norm_m		* yes
	rts
ftst_norm_m:
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'N' ccode bit
	rts

*
* input is not normalized; what is it?
*
ftst_not_norm:
	cmpi.b	#ZERO,d1		* weed out ZERO
	beq.b	ftst_zero
	cmpi.b	#INF,d1			* weed out INF
	beq.b	ftst_inf
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	cmpi.b	#QNAN,d1		* weed out QNAN
	beq.l	res_qnan_1op

*
* Denorm:
*
ftst_denorm:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.b	ftst_denorm_m		* yes
	rts
ftst_denorm_m:
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'N' ccode bit
	rts

*
* Infinity:
*
ftst_inf:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.b	ftst_inf_m		* yes
ftst_inf_p:
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'I' ccode bit
	rts
ftst_inf_m:
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set 'I','N' ccode bits
	rts

*
* Zero:
*
ftst_zero:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.b	ftst_zero_m		* yes
ftst_zero_p:
	move.b	#z_bmask,FPSR_CC(a6)	* set 'N' ccode bit
	rts
ftst_zero_m:
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set 'Z','N' ccode bits
	rts

*########################################################################
* XDEF ****************************************************************	#
*	fint(): emulates the fint instruction				#
*									#
* XREF ****************************************************************	#
*	res_{s,q}nan_1op() - set NAN result for monadic operation	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0 = round precision/mode					#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*									#
* ALGORITHM ***********************************************************	#
* 	Separate according to operand type. Unnorms don't pass through 	#
* here. For norms, load the rounding mode/prec, execute a "fint", then 	#
* store the resulting FPSR bits.					#
* 	For denorms, force the j-bit to a one and do the same as for	#
* norms. Denorms are so low that the answer will either be a zero or a 	#
* one.									#
* 	For zeroes/infs/NANs, return the same while setting the FPSR	#
* as appropriate.							#
*									#
*########################################################################

	global	fint
fint:
	move.b	STAG(a6),d1
	bne.b	fint_not_norm		* optimize on non-norm input

*
* Norm:
*
fint_norm:
	andi.b	#$30,d0			* set prec = ext

	fmove.l	d0,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fint.x	SRC.w(a0),fp0		* execute fint

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d0			* save FPSR
	or.l	d0,USER_FPSR(a6)	* set exception bits

	rts

*
* input is not normalized; what is it?
*
fint_not_norm:
	cmpi.b	#ZERO,d1		* weed out ZERO
	beq.b	fint_zero
	cmpi.b	#INF,d1			* weed out INF
	beq.b	fint_inf
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.b	fint_denorm
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	bra.l	res_qnan_1op		* weed out QNAN

*
* Denorm:
*
* for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
* also, the INEX2 and AINEX exception bits will be set.
* so, we could either set these manually or force the DENORM
* to a very small NORM and ship it to the NORM routine.
* I do the latter.
*
fint_denorm:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)	* copy sign, zero exp
	move.b	#$80,FP_SCR0_HI(a6)	* force DENORM ==> small NORM
	lea	FP_SCR0(a6),a0
	bra.b	fint_norm

*
* Zero:
*
fint_zero:
	tst.b	SRC_EX.w(a0)		* is ZERO negative?
	bmi.b	fint_zero_m		* yes
fint_zero_p:
	fmove.s	#$00000000,fp0		* return +ZERO in fp0
	move.b	#z_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts
fint_zero_m:
	fmove.s	#$80000000,fp0		* return -ZERO in fp0
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set 'Z','N' ccode bits
	rts

*
* Infinity:
*
fint_inf:
	fmovem.x	SRC.w(a0),fp0	* return result in fp0
	tst.b	SRC_EX.w(a0)		* is INF negative?
	bmi.b	fint_inf_m		* yes
fint_inf_p:
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'I' ccode bit
	rts
fint_inf_m:
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set 'N','I' ccode bits
	rts

*########################################################################
* XDEF ****************************************************************	#
*	fintrz(): emulates the fintrz instruction			#
*									#
* XREF ****************************************************************	#
*	res_{s,q}nan_1op() - set NAN result for monadic operation	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0 = round precision/mode					#
*									#
* OUTPUT **************************************************************	#
* 	fp0 = result							#
*									#
* ALGORITHM ***********************************************************	#
*	Separate according to operand type. Unnorms don't pass through	#
* here. For norms, load the rounding mode/prec, execute a "fintrz", 	#
* then store the resulting FPSR bits.					#
* 	For denorms, force the j-bit to a one and do the same as for	#
* norms. Denorms are so low that the answer will either be a zero or a	#
* one.									#
* 	For zeroes/infs/NANs, return the same while setting the FPSR	#
* as appropriate.							#
*									#
*########################################################################

	global	fintrz
fintrz:
	move.b	STAG(a6),d1
	bne.b	fintrz_not_norm		* optimize on non-norm input

*
* Norm:
*
fintrz_norm:
	fmove.l	#$0,fpsr		* clear FPSR

	fintrz.x	SRC.w(a0),fp0	* execute fintrz

	fmove.l	fpsr,d0			* save FPSR
	or.l	d0,USER_FPSR(a6)	* set exception bits

	rts

*
* input is not normalized; what is it?
*
fintrz_not_norm:
	cmpi.b	#ZERO,d1		* weed out ZERO
	beq.b	fintrz_zero
	cmpi.b	#INF,d1			* weed out INF
	beq.b	fintrz_inf
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.b	fintrz_denorm
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	bra.l	res_qnan_1op		* weed out QNAN

*
* Denorm:
*
* for DENORMs, the result will be (+/-)ZERO.
* also, the INEX2 and AINEX exception bits will be set.
* so, we could either set these manually or force the DENORM
* to a very small NORM and ship it to the NORM routine.
* I do the latter.
*
fintrz_denorm:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)	* copy sign, zero exp
	move.b	#$80,FP_SCR0_HI(a6)	* force DENORM ==> small NORM
	lea	FP_SCR0(a6),a0
	bra.b	fintrz_norm

*
* Zero:
*
fintrz_zero:
	tst.b	SRC_EX.w(a0)		* is ZERO negative?
	bmi.b	fintrz_zero_m		* yes
fintrz_zero_p:
	fmove.s	#$00000000,fp0		* return +ZERO in fp0
	move.b	#z_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts
fintrz_zero_m:
	fmove.s	#$80000000,fp0		* return -ZERO in fp0
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set 'Z','N' ccode bits
	rts

*
* Infinity:
*
fintrz_inf:
	fmovem.x	SRC.w(a0),fp0	* return result in fp0
	tst.b	SRC_EX.w(a0)		* is INF negative?
	bmi.b	fintrz_inf_m		* yes
fintrz_inf_p:
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'I' ccode bit
	rts
fintrz_inf_m:
	move.b	#inf_bmask+neg_bmask,FPSR_CC(a6)	* set 'N','I' ccode bits
	rts

*########################################################################
* XDEF ****************************************************************	#
*	fabs():  emulates the fabs instruction				#
*	fsabs(): emulates the fsabs instruction				#
*	fdabs(): emulates the fdabs instruction				#
*									#
* XREF **************************************************************** #
*	norm() - normalize denorm mantissa to provide EXOP		#
*	scale_to_zero_src() - make exponent. = 0; get scale factor	#
*	unf_res() - calculate underflow result				#
*	ovf_res() - calculate overflow result				#
*	res_{s,q}nan_1op() - set NAN result for monadic operation	#
*									#
* INPUT *************************************************************** #
*	a0 = pointer to extended precision source operand		#
*	d0 = rnd precision/mode						#
*									#
* OUTPUT ************************************************************** #
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms into extended, single, and double precision. 			#
* 	Simply clear sign for extended precision norm. Ext prec denorm	#
* gets an EXOP created for it since it's an underflow.			#
*	Double and single precision can overflow and underflow. First,	#
* scale the operand such that the exponent is zero. Perform an "fabs"	#
* using the correct rnd mode/prec. Check to see if the original 	#
* exponent would take an exception. If so, use unf_res() or ovf_res()	#
* to calculate the default result. Also, create the EXOP for the	#
* exceptional case. If no exception should occur, insert the correct 	#
* result exponent and return.						#
* 	Unnorms don't pass through here.				#
*									#
*########################################################################

	global	fsabs
fsabs:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl precision
	bra.b	fabs

	global	fdabs
fdabs:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl precision

	global	fabs
fabs:
	move.l	d0,L_SCR3(a6)		* store rnd info
	move.b	STAG(a6),d1
	bne.w	fabs_not_norm		* optimize on non-norm input

*
* ABSOLUTE VALUE: norms and denorms ONLY!
*
fabs_norm:
	andi.b	#$c0,d0			* is precision extended?
	bne.b	fabs_not_ext		* no; go handle sgl or dbl

*
* precision selected is extended. so...we can not get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.w	SRC_EX.w(a0),d1
	bclr	#15,d1			* force absolute value
	move.w	d1,FP_SCR0_EX(a6)	* insert exponent
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fabs_denorm:
	andi.b	#$c0,d0			* is precision extended?
	bne.b	fabs_not_ext		* no

	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.w	SRC_EX.w(a0),d0
	bclr	#15,d0			* clear sign
	move.w	d0,FP_SCR0_EX(a6)	* insert exponent

	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0

	btst	#unfl_bit,FPCR_ENABLE(a6)	* is UNFL enabled?
	bne.b	fabs_ext_unfl_ena
	rts

*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fabs_ext_unfl_ena:
	lea	FP_SCR0(a6),a0		* pass: ptr to operand
	bsr.l	norm			* normalize result
	neg.w	d0			* new exponent = -(shft val)
	addi.w	#$6000,d0		* add new bias to exponent
	move.w	FP_SCR0_EX(a6),d1	* fetch old sign,exp
	andi.w	#$8000,d1		* keep old sign
	andi.w	#$7fff,d0		* clear sign position
	or.w	d1,d0			* concat old sign, new exponent
	move.w	d0,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	rts

*
* operand is either single or double
*
fabs_not_ext:
	cmpi.b	#s_mode*$10,d0		* separate sgl/dbl prec
	bne.b	fabs_dbl

*
* operand is to be rounded to single precision
*
fabs_sgl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3f80,d0		* will move in underflow?
	bge.w	fabs_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$407e,d0		* will move in overflow?
	beq.w	fabs_sd_may_ovfl	* maybe; go check
	blt.w	fabs_sd_ovfl		* yes; go handle overflow

*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fabs_sd_normal:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fabs.x	FP_SCR0(a6),fp0		* perform absolute

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fabs_sd_normal_exit:
	move.l	d2,-(sp)		* save d2
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.w	FP_SCR0_EX(a6),d1	* load sgn,exp
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	andi.w	#$8000,d2		* keep old sign
	or.w	d1,d2			* concat old sign,new exp
	move.w	d2,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* operand is to be rounded to double precision
*
fabs_dbl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* calculate scale factor

	cmpi.l	#$3fff-$3c00,d0		* will move in underflow?
	bge.b	fabs_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$43fe,d0		* will move in overflow?
	beq.w	fabs_sd_may_ovfl	* maybe; go check
	blt.w	fabs_sd_ovfl		* yes; go handle overflow
	bra.w	fabs_sd_normal		* no; ho handle normalized op

*
* operand WILL underflow when moved in to the fp register file
*
fabs_sd_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	bclr	#$7,FP_SCR0_EX(a6)	* force absolute value

* if underflow or inexact is enabled, go calculate EXOP first.
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fabs_sd_unfl_ena	* yes

fabs_sd_unfl_dis:
	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set possible 'Z' ccode
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* operand will underflow AND underflow is enabled. 
* therefore, we must return the result rounded to extended precision.
*
fabs_sd_unfl_ena:
	move.l	FP_SCR0_HI(a6),FP_SCR1_HI(a6)
	move.l	FP_SCR0_LO(a6),FP_SCR1_LO(a6)
	move.w	FP_SCR0_EX(a6),d1	* load current exponent

	move.l	d2,-(sp)		* save d2
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* subtract scale factor
	addi.l	#$6000,d1		* add new bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat new sign,new exp
	move.w	d1,FP_SCR1_EX(a6)	* insert new exp
	fmovem.x	FP_SCR1(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fabs_sd_unfl_dis

*
* operand WILL overflow.
*
fabs_sd_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fabs.x	FP_SCR0(a6),fp0		* perform absolute

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save FPSR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fabs_sd_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fabs_sd_ovfl_ena	* yes

*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fabs_sd_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass: prec,mode
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fabs_sd_ovfl_ena:
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat sign,exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fabs_sd_ovfl_dis

*
* the move in MAY underflow. so...
*
fabs_sd_may_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fabs.x	FP_SCR0(a6),fp0		* perform absolute

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| >= 2.b?
	fbge.w	fabs_sd_ovfl_tst	* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fabs_sd_normal_exit

*#########################################################################

*
* input is not normalized; what is it?
*
fabs_not_norm:
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.w	fabs_denorm
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	cmpi.b	#QNAN,d1		* weed out QNAN
	beq.l	res_qnan_1op

	fabs.x	SRC.w(a0),fp0		* force absolute value

	cmpi.b	#INF,d1			* weed out INF
	beq.b	fabs_inf
fabs_zero:
	move.b	#z_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts
fabs_inf:
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'I' ccode bit
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	fcmp(): fp compare op routine					#
*									#
* XREF ****************************************************************	#
* 	res_qnan() - return QNAN result					#
*	res_snan() - return SNAN result					#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	a1 = pointer to extended precision destination operand		#
*	d0 = round prec/mode						#
*									#
* OUTPUT ************************************************************** #
*	None								#
*									#
* ALGORITHM ***********************************************************	#
* 	Handle NANs and denorms as special cases. For everything else,	#
* just use the actual fcmp instruction to produce the correct condition	#
* codes.								#
*									#
*########################################################################

	global	__fcmp__
__fcmp__:
	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1
	bne.b	fcmp_not_norm		* optimize on non-norm input

*
* COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
*
fcmp_norm:
	fmovem.x	DST.w(a1),fp0	* load dst op

	fcmp.x	SRC.w(a0),fp0		* do compare

	fmove.l	fpsr,d0			* save FPSR
	rol.l	#$8,d0			* extract ccode bits
	move.b	d0,FPSR_CC(a6)		* set ccode bits(no exc bits are set)

	rts

*
* fcmp: inputs are not both normalized; what are they?
*
fcmp_not_norm:
	move.w	(tbl_fcmp_op.b,pc,d1.w*2),d1
	jmp	(tbl_fcmp_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fcmp_op:
	.dc.w	fcmp_norm-tbl_fcmp_op		* NORM - NORM
	.dc.w	fcmp_norm-tbl_fcmp_op		* NORM - ZERO
	.dc.w	fcmp_norm-tbl_fcmp_op		* NORM - INF
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* NORM - QNAN
	.dc.w	fcmp_nrm_dnrm-tbl_fcmp_op	* NORM - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* NORM - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

	.dc.w	fcmp_norm-tbl_fcmp_op		* ZERO - NORM
	.dc.w	fcmp_norm-tbl_fcmp_op		* ZERO - ZERO
	.dc.w	fcmp_norm-tbl_fcmp_op		* ZERO - INF
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* ZERO - QNAN
	.dc.w	fcmp_dnrm_s-tbl_fcmp_op		* ZERO - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* ZERO - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

	.dc.w	fcmp_norm-tbl_fcmp_op		* INF - NORM
	.dc.w	fcmp_norm-tbl_fcmp_op		* INF - ZERO
	.dc.w	fcmp_norm-tbl_fcmp_op		* INF - INF
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* INF - QNAN
	.dc.w	fcmp_dnrm_s-tbl_fcmp_op		* INF - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* INF - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* QNAN - NORM
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* QNAN - ZERO
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* QNAN - INF
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* QNAN - QNAN
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* QNAN - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* QNAN - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

	.dc.w	fcmp_dnrm_nrm-tbl_fcmp_op	* DENORM - NORM
	.dc.w	fcmp_dnrm_d-tbl_fcmp_op		* DENORM - ZERO
	.dc.w	fcmp_dnrm_d-tbl_fcmp_op		* DENORM - INF
	.dc.w	fcmp_res_qnan-tbl_fcmp_op	* DENORM - QNAN
	.dc.w	fcmp_dnrm_sd-tbl_fcmp_op	* DENORM - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* DENORM - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - NORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - ZERO
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - INF
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - QNAN
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - DENORM
	.dc.w	fcmp_res_snan-tbl_fcmp_op	* SNAN - SNAN
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*
	.dc.w	tbl_fcmp_op-tbl_fcmp_op		*

* unlike all other functions for QNAN and SNAN, fcmp does NOT set the
* 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
fcmp_res_qnan:
	bsr.l	res_qnan
	andi.b	#$f7,FPSR_CC(a6)
	rts
fcmp_res_snan:
	bsr.l	res_snan
	andi.b	#$f7,FPSR_CC(a6)
	rts

*
* DENORMs are a little more difficult. 
* If you have a 2 DENORMs, then you can just force the j-bit to a one 
* and use the fcmp_norm routine.
* If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
* and use the fcmp_norm routine.
* If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
* But with a DENORM and a NORM of the same sign, the neg bit is set if the
* (1) signs are (+) and the DENORM is the dst or
* (2) signs are (-) and the DENORM is the src
*

fcmp_dnrm_s:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),d0
	bset	#31,d0			* DENORM src; make into small norm
	move.l	d0,FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	lea	FP_SCR0(a6),a0
	bra.w	fcmp_norm

fcmp_dnrm_d:
	move.l	DST_EX.w(a1),FP_SCR0_EX(a6)
	move.l	DST_HI(a1),d0
	bset	#31,d0			* DENORM src; make into small norm
	move.l	d0,FP_SCR0_HI(a6)
	move.l	DST_LO(a1),FP_SCR0_LO(a6)
	lea	FP_SCR0(a6),a1
	bra.w	fcmp_norm

fcmp_dnrm_sd:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	DST_HI(a1),d0
	bset	#31,d0			* DENORM dst; make into small norm
	move.l	d0,FP_SCR1_HI(a6)
	move.l	SRC_HI(a0),d0
	bset	#31,d0			* DENORM dst; make into small norm
	move.l	d0,FP_SCR0_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	lea	FP_SCR1(a6),a1
	lea	FP_SCR0(a6),a0
	bra.w	fcmp_norm

fcmp_nrm_dnrm:
	move.b	SRC_EX.w(a0),d0		* determine if like signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bmi.w	fcmp_dnrm_s

* signs are the same, so must determine the answer ourselves.
	tst.b	d0			* is src op negative?
	bmi.b	fcmp_nrm_dnrm_m		* yes
	rts
fcmp_nrm_dnrm_m:
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts

fcmp_dnrm_nrm:
	move.b	SRC_EX.w(a0),d0		* determine if like signs
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bmi.w	fcmp_dnrm_d

* signs are the same, so must determine the answer ourselves.
	tst.b	d0			* is src op negative?
	bpl.b	fcmp_dnrm_nrm_m		* no
	rts
fcmp_dnrm_nrm_m:
	move.b	#neg_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	fsglmul(): emulates the fsglmul instruction			#
*									#
* XREF ****************************************************************	#
*	scale_to_zero_src() - scale src exponent to zero		#
*	scale_to_zero_dst() - scale dst exponent to zero		#
*	unf_res4() - return default underflow result for sglop		#
*	ovf_res() - return default overflow result			#
* 	res_qnan() - return QNAN result					#
* 	res_snan() - return SNAN result					#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	a1 = pointer to extended precision destination operand		#
*	d0  rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms/denorms into ext/sgl/dbl precision.				#
*	For norms/denorms, scale the exponents such that a multiply	#
* instruction won't cause an exception. Use the regular fsglmul to	#
* compute a result. Check if the regular operands would have taken	#
* an exception. If so, return the default overflow/underflow result	#
* and return the EXOP if exceptions are enabled. Else, scale the 	#
* result operand to the proper exponent.				#
*									#
*########################################################################

	global	fsglmul
fsglmul:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1

	bne.w	fsglmul_not_norm	* optimize on non-norm input

fsglmul_norm:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_to_zero_src	* scale exponent
	move.l	d0,-(sp)		* save scale factor 1

	bsr.l	scale_to_zero_dst	* scale dst exponent

	add.l	(sp)+,d0		* SCALE_FACTOR = scale1 + scale2

	cmpi.l	#$3fff-$7ffe,d0		* would result ovfl?
	beq.w	fsglmul_may_ovfl	* result may rnd to overflow
	blt.w	fsglmul_ovfl		* result will overflow

	cmpi.l	#$3fff+$0001,d0		* would result unfl?
	beq.w	fsglmul_may_unfl	* result may rnd to no unfl
	bgt.w	fsglmul_unfl		* result will underflow

fsglmul_normal:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp0	* execute sgl multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fsglmul_normal_exit:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* load {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

fsglmul_ovfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp0	* execute sgl multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fsglmul_ovfl_tst:

* save setting this until now because this is where fsglmul_may_ovfl may jump in
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fsglmul_ovfl_ena	* yes

fsglmul_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass prec:rnd
	andi.b	#$30,d0			* force prec = ext
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

fsglmul_ovfl_ena:
	fmovem.x	fp0,FP_SCR0(a6)	* move result to stack

	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1
	andi.w	#$8000,d2		* keep old sign
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fsglmul_ovfl_dis

fsglmul_may_ovfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp0	* execute sgl multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| >= 2.b?
	fbge.w	fsglmul_ovfl_tst	* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fsglmul_normal_exit

fsglmul_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp0	* execute sgl multiply

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fsglmul_unfl_ena	* yes

fsglmul_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res4		* calculate default result
	or.b	d0,FPSR_CC(a6)		* 'Z' bit may have been set
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* UNFL is enabled. 
*
fsglmul_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp1	* execute sgl multiply	

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* save result to stack
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	addi.l	#$6000,d1		* add bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.w	fsglmul_unfl_dis

fsglmul_may_unfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp0	* execute sgl multiply	

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$2,fp1			* is |result| > 2.b?
	fbgt.w	fsglmul_normal_exit	* no; no underflow occurred
	fblt.w	fsglmul_unfl		* yes; underflow occurred

*
* we still don't know if underflow occurred. result is ~ equal to 2. but,
* we don't know if the result was an underflow that rounded up to a 2 or
* a normalized number that rounded down to a 2. so, redo the entire operation
* using RZ as the rounding mode to see what the pre-rounded result is.
* this case should be relatively rare.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst op into fp1

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* keep rnd prec
	ori.b	#rz_mode*$10,d1		* insert RZ

	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsglmul.x	FP_SCR0(a6),fp1	* execute sgl multiply	

	fmove.l	#$0,fpcr		* clear FPCR
	fabs.x	fp1			* make absolute value
	fcmp.b	#$2,fp1			* is |result| < 2.b?
	fbge.w	fsglmul_normal_exit	* no; no underflow occurred
	bra.w	fsglmul_unfl		* yes, underflow occurred

*#############################################################################

*
* Single Precision Multiply: inputs are not both normalized; what are they?
*
fsglmul_not_norm:
	move.w	(tbl_fsglmul_op.b,pc,d1.w*2),d1
	jmp	(tbl_fsglmul_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fsglmul_op:
	.dc.w	fsglmul_norm-tbl_fsglmul_op			* NORM x NORM
	.dc.w	fsglmul_zero-tbl_fsglmul_op			* NORM x ZERO
	.dc.w	fsglmul_inf_src-tbl_fsglmul_op			* NORM x INF
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* NORM x QNAN
	.dc.w	fsglmul_norm-tbl_fsglmul_op			* NORM x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* NORM x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

	.dc.w	fsglmul_zero-tbl_fsglmul_op			* ZERO x NORM
	.dc.w	fsglmul_zero-tbl_fsglmul_op			* ZERO x ZERO
	.dc.w	fsglmul_res_operr-tbl_fsglmul_op		* ZERO x INF
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* ZERO x QNAN
	.dc.w	fsglmul_zero-tbl_fsglmul_op			* ZERO x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* ZERO x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

	.dc.w	fsglmul_inf_dst-tbl_fsglmul_op			* INF x NORM
	.dc.w	fsglmul_res_operr-tbl_fsglmul_op		* INF x ZERO
	.dc.w	fsglmul_inf_dst-tbl_fsglmul_op			* INF x INF
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* INF x QNAN
	.dc.w	fsglmul_inf_dst-tbl_fsglmul_op			* INF x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* INF x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* QNAN x NORM
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* QNAN x ZERO
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* QNAN x INF
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* QNAN x QNAN
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* QNAN x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* QNAN x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

	.dc.w	fsglmul_norm-tbl_fsglmul_op			* NORM x NORM
	.dc.w	fsglmul_zero-tbl_fsglmul_op			* NORM x ZERO
	.dc.w	fsglmul_inf_src-tbl_fsglmul_op			* NORM x INF
	.dc.w	fsglmul_res_qnan-tbl_fsglmul_op			* NORM x QNAN
	.dc.w	fsglmul_norm-tbl_fsglmul_op			* NORM x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* NORM x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x NORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x ZERO
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x INF
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x QNAN
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x DENORM
	.dc.w	fsglmul_res_snan-tbl_fsglmul_op			* SNAN x SNAN
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*
	.dc.w	tbl_fsglmul_op-tbl_fsglmul_op			*

fsglmul_res_operr:
	bra.l	res_operr
fsglmul_res_snan:
	bra.l	res_snan
fsglmul_res_qnan:
	bra.l	res_qnan
fsglmul_zero:
	bra.l	fmul_zero
fsglmul_inf_src:
	bra.l	fmul_inf_src
fsglmul_inf_dst:
	bra.l	fmul_inf_dst

*########################################################################
* XDEF ****************************************************************	#
* 	fsgldiv(): emulates the fsgldiv instruction			#
*									#
* XREF ****************************************************************	#
*	scale_to_zero_src() - scale src exponent to zero		#
*	scale_to_zero_dst() - scale dst exponent to zero		#
*	unf_res4() - return default underflow result for sglop		#
*	ovf_res() - return default overflow result			#
* 	res_qnan() - return QNAN result					#
* 	res_snan() - return SNAN result					#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	a1 = pointer to extended precision destination operand		#
*	d0  rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms/denorms into ext/sgl/dbl precision.				#
*	For norms/denorms, scale the exponents such that a divide	#
* instruction won't cause an exception. Use the regular fsgldiv to	#
* compute a result. Check if the regular operands would have taken	#
* an exception. If so, return the default overflow/underflow result	#
* and return the EXOP if exceptions are enabled. Else, scale the 	#
* result operand to the proper exponent.				#
*									#
*########################################################################

	global	fsgldiv
fsgldiv:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1		* combine src tags

	bne.w	fsgldiv_not_norm	* optimize on non-norm input

*
* DIVIDE: NORMs and DENORMs ONLY!
*
fsgldiv_norm:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_to_zero_src	* calculate scale factor 1
	move.l	d0,-(sp)		* save scale factor 1

	bsr.l	scale_to_zero_dst	* calculate scale factor 2

	neg.l	(sp)			* S.F. = scale1 - scale2
	add.l	d0,(sp)

	move.w	2+L_SCR3(a6),d1		* fetch precision,mode
	lsr.b	#$6,d1
	move.l	(sp)+,d0
	cmpi.l	#$3fff-$7ffe,d0
	ble.w	fsgldiv_may_ovfl

	cmpi.l	#$3fff-$0000,d0		* will result underflow?
	beq.w	fsgldiv_may_unfl	* maybe
	bgt.w	fsgldiv_unfl		* yes; go handle underflow

fsgldiv_normal:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* save FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsgldiv.x	FP_SCR0(a6),fp0	* perform sgl divide

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fsgldiv_normal_exit:
	fmovem.x	fp0,FP_SCR0(a6)	* store result on stack
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* load {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

fsgldiv_may_ovfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* set FPSR

	fsgldiv.x	FP_SCR0(a6),fp0	* execute divide

	fmove.l	fpsr,d1
	fmove.l	#$0,fpcr

	or.l	d1,USER_FPSR(a6)	* save INEX,N

	fmovem.x	fp0,-(sp)	* save result to stack
	move.w	(sp),d1			* fetch new exponent
	add.l	#$c,sp			* clear result
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	cmpi.l	#$7fff,d1		* did divide overflow?
	blt.b	fsgldiv_normal_exit

fsgldiv_ovfl_tst:
	ori.w	#ovfl_inx_mask,2+USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fsgldiv_ovfl_ena	* yes

fsgldiv_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass prec:rnd
	andi.b	#$30,d0			* kill precision
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

fsgldiv_ovfl_ena:
	fmovem.x	fp0,FP_SCR0(a6)	* move result to stack

	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract new bias
	andi.w	#$7fff,d1		* clear ms bit
	or.w	d2,d1			* concat old sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fsgldiv_ovfl_dis

fsgldiv_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsgldiv.x	FP_SCR0(a6),fp0	* execute sgl divide

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fsgldiv_unfl_ena	* yes

fsgldiv_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res4		* calculate default result
	or.b	d0,FPSR_CC(a6)		* 'Z' bit may have been set
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* UNFL is enabled. 
*
fsgldiv_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsgldiv.x	FP_SCR0(a6),fp1	* execute sgl divide

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* save result to stack
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	addi.l	#$6000,d1		* add bias
	andi.w	#$7fff,d1		* clear top bit
	or.w	d2,d1			* concat old sign, new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.b	fsgldiv_unfl_dis

*
* the divide operation MAY underflow:
*
fsgldiv_may_unfl:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsgldiv.x	FP_SCR0(a6),fp0	* execute sgl divide

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fabs.x	fp0,fp1			* make a copy of result
	fcmp.b	#$1,fp1			* is |result| > 1.b?
	fbgt.w	fsgldiv_normal_exit	* no; no underflow occurred
	fblt.w	fsgldiv_unfl		* yes; underflow occurred

*
* we still don't know if underflow occurred. result is ~ equal to 1. but,
* we don't know if the result was an underflow that rounded up to a 1
* or a normalized number that rounded down to a 1. so, redo the entire 
* operation using RZ as the rounding mode to see what the pre-rounded 
* result is. this case should be relatively rare.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst op into %fp1

	clr.l	d1			* clear scratch register
	ori.b	#rz_mode*$10,d1		* force RZ rnd mode

	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsgldiv.x	FP_SCR0(a6),fp1	* execute sgl divide

	fmove.l	#$0,fpcr		* clear FPCR
	fabs.x	fp1			* make absolute value
	fcmp.b	#$1,fp1			* is |result| < 1.b?
	fbge.w	fsgldiv_normal_exit	* no; no underflow occurred
	bra.w	fsgldiv_unfl		* yes; underflow occurred

*###########################################################################

*
* Divide: inputs are not both normalized; what are they?
*
fsgldiv_not_norm:
	move.w	(tbl_fsgldiv_op.b,pc,d1.w*2),d1
	jmp	(tbl_fsgldiv_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fsgldiv_op:
	.dc.w	fsgldiv_norm-tbl_fsgldiv_op			* NORM / NORM
	.dc.w	fsgldiv_inf_load-tbl_fsgldiv_op			* NORM / ZERO
	.dc.w	fsgldiv_zero_load-tbl_fsgldiv_op		* NORM / INF
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* NORM / QNAN
	.dc.w	fsgldiv_norm-tbl_fsgldiv_op			* NORM / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* NORM / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

	.dc.w	fsgldiv_zero_load-tbl_fsgldiv_op		* ZERO / NORM
	.dc.w	fsgldiv_res_operr-tbl_fsgldiv_op		* ZERO / ZERO
	.dc.w	fsgldiv_zero_load-tbl_fsgldiv_op		* ZERO / INF
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* ZERO / QNAN
	.dc.w	fsgldiv_zero_load-tbl_fsgldiv_op		* ZERO / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* ZERO / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

	.dc.w	fsgldiv_inf_dst-tbl_fsgldiv_op			* INF / NORM
	.dc.w	fsgldiv_inf_dst-tbl_fsgldiv_op			* INF / ZERO
	.dc.w	fsgldiv_res_operr-tbl_fsgldiv_op		* INF / INF
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* INF / QNAN
	.dc.w	fsgldiv_inf_dst-tbl_fsgldiv_op			* INF / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* INF / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* QNAN / NORM
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* QNAN / ZERO
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* QNAN / INF
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* QNAN / QNAN
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* QNAN / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* QNAN / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

	.dc.w	fsgldiv_norm-tbl_fsgldiv_op			* DENORM / NORM
	.dc.w	fsgldiv_inf_load-tbl_fsgldiv_op			* DENORM / ZERO
	.dc.w	fsgldiv_zero_load-tbl_fsgldiv_op		* DENORM / INF
	.dc.w	fsgldiv_res_qnan-tbl_fsgldiv_op			* DENORM / QNAN
	.dc.w	fsgldiv_norm-tbl_fsgldiv_op			* DENORM / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* DENORM / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / NORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / ZERO
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / INF
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / QNAN
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / DENORM
	.dc.w	fsgldiv_res_snan-tbl_fsgldiv_op			* SNAN / SNAN
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*
	.dc.w	tbl_fsgldiv_op-tbl_fsgldiv_op			*

fsgldiv_res_qnan:
	bra.l	res_qnan
fsgldiv_res_snan:
	bra.l	res_snan
fsgldiv_res_operr:
	bra.l	res_operr
fsgldiv_inf_load:
	bra.l	fdiv_inf_load
fsgldiv_zero_load:
	bra.l	fdiv_zero_load
fsgldiv_inf_dst:
	bra.l	fdiv_inf_dst

*########################################################################
* XDEF ****************************************************************	#
*	fadd(): emulates the fadd instruction				#
*	fsadd(): emulates the fadd instruction				#
*	fdadd(): emulates the fdadd instruction				#
*									#
* XREF ****************************************************************	#
* 	addsub_scaler2() - scale the operands so they won't take exc	#
*	ovf_res() - return default overflow result			#
*	unf_res() - return default underflow result			#
*	res_qnan() - set QNAN result					#
* 	res_snan() - set SNAN result					#
*	res_operr() - set OPERR result					#
*	scale_to_zero_src() - set src operand exponent equal to zero	#
*	scale_to_zero_dst() - set dst operand exponent equal to zero	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
* 	a1 = pointer to extended precision destination operand		#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
* 	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms into extended, single, and double precision.			#
*	Do addition after scaling exponents such that exception won't	#
* occur. Then, check result exponent to see if exception would have	#
* occurred. If so, return default result and maybe EXOP. Else, insert	#
* the correct result exponent and return. Set FPSR bits as appropriate.	#
*									#
*########################################################################

	global	fsadd
fsadd:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl prec
	bra.b	fadd

	global	fdadd
fdadd:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl prec

	global	fadd
fadd:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1		* combine src tags

	bne.w	fadd_not_norm		* optimize on non-norm input

*
* ADD: norms and denorms
*
fadd_norm:
	bsr.l	addsub_scaler2		* scale exponents

fadd_zero_entry:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fadd.x	FP_SCR0(a6),fp0		* execute add

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* fetch INEX2,N,Z

	or.l	d1,USER_FPSR(a6)	* save exc and ccode bits

	fbeq.w	fadd_zero_exit		* if result is zero, end now

	move.l	d2,-(sp)		* save d2

	fmovem.x	fp0,-(sp)	* save result to stack

	move.w	2+L_SCR3(a6),d1
	lsr.b	#$6,d1

	move.w	(sp),d2			* fetch new sign, exp
	andi.l	#$7fff,d2		* strip sign
	sub.l	d0,d2			* add scale factor

	cmp.l	(tbl_fadd_ovfl.b,pc,d1.w*4),d2		* is it an overflow?
	bge.b	fadd_ovfl		* yes

	cmp.l	(tbl_fadd_unfl.b,pc,d1.w*4),d2		* is it an underflow?
	blt.w	fadd_unfl		* yes
	beq.w	fadd_may_unfl		* maybe; go find out

fadd_normal:
	move.w	(sp),d1
	andi.w	#$8000,d1		* keep sign
	or.w	d2,d1			* concat sign,new exp
	move.w	d1,(sp)			* insert new exponent

	fmovem.x	(sp)+,fp0	* return result in fp0

	move.l	(sp)+,d2		* restore d2
	rts

fadd_zero_exit:
*	fmov.s		&0x00000000,%fp0	# return zero in fp0
	rts

tbl_fadd_ovfl:
	.dc.l	$7fff			* ext ovfl
	.dc.l	$407f			* sgl ovfl
	.dc.l	$43ff			* dbl ovfl

tbl_fadd_unfl:
	.dc.l	$0000			* ext unfl
	.dc.l	$3f81			* sgl unfl
	.dc.l	$3c01			* dbl unfl

fadd_ovfl:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fadd_ovfl_ena		* yes

	add.l	#$c,sp
fadd_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass prec:rnd
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	move.l	(sp)+,d2		* restore d2
	rts

fadd_ovfl_ena:
	move.b	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fadd_ovfl_ena_sd	* no; prec = sgl or dbl

fadd_ovfl_ena_cont:
	move.w	(sp),d1
	andi.w	#$8000,d1		* keep sign
	subi.l	#$6000,d2		* add extra bias
	andi.w	#$7fff,d2
	or.w	d2,d1			* concat sign,new exp
	move.w	d1,(sp)			* insert new exponent

	fmovem.x	(sp)+,fp1	* return EXOP in fp1
	bra.b	fadd_ovfl_dis

fadd_ovfl_ena_sd:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* keep rnd mode
	fmove.l	d1,fpcr			* set FPCR

	fadd.x	FP_SCR0(a6),fp0		* execute add

	fmove.l	#$0,fpcr		* clear FPCR

	add.l	#$c,sp
	fmovem.x	fp0,-(sp)
	bra.b	fadd_ovfl_ena_cont

fadd_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	add.l	#$c,sp

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fadd.x	FP_SCR0(a6),fp0		* execute add

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save status

	or.l	d1,USER_FPSR(a6)	* save INEX,N

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fadd_unfl_ena		* yes

fadd_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* 'Z' bit may have been set
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	move.l	(sp)+,d2		* restore d2
	rts

fadd_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1	* load dst op

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fadd_unfl_ena_sd	* no; sgl or dbl

	fmove.l	L_SCR3(a6),fpcr		* set FPCR

fadd_unfl_ena_cont:
	fmove.l	#$0,fpsr		* clear FPSR

	fadd.x	FP_SCR0(a6),fp1		* execute multiply

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* save result to stack
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	addi.l	#$6000,d1		* add new bias
	andi.w	#$7fff,d1		* clear top bit
	or.w	d2,d1			* concat sign,new exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.w	fadd_unfl_dis

fadd_unfl_ena_sd:
	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* use only rnd mode
	fmove.l	d1,fpcr			* set FPCR

	bra.b	fadd_unfl_ena_cont

*
* result is equal to the smallest normalized number in the selected precision
* if the precision is extended, this result could not have come from an 
* underflow that rounded up.
*
fadd_may_unfl:
	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1
	beq.w	fadd_normal		* yes; no underflow occurred

	move.l	$4(sp),d1		* extract hi(man)
	cmpi.l	#$80000000,d1		* is hi(man) = 0x80000000?
	bne.w	fadd_normal		* no; no underflow occurred

	tst.l	$8(sp)			* is lo(man) = 0x0?
	bne.w	fadd_normal		* no; no underflow occurred

	btst	#inex2_bit,FPSR_EXCEPT(a6)	* is INEX2 set?
	beq.w	fadd_normal		* no; no underflow occurred

*
* ok, so now the result has a exponent equal to the smallest normalized
* exponent for the selected precision. also, the mantissa is equal to
* 0x8000000000000000 and this mantissa is the result of rounding non-zero
* g,r,s. 
* now, we must determine whether the pre-rounded result was an underflow
* rounded "up" or a normalized number rounded "down".
* so, we do this be re-executing the add using RZ as the rounding mode and
* seeing if the new result is smaller or equal to the current result.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst op into fp1

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* keep rnd prec
	ori.b	#rz_mode*$10,d1		* insert rnd mode
	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fadd.x	FP_SCR0(a6),fp1		* execute add

	fmove.l	#$0,fpcr		* clear FPCR

	fabs.x	fp0			* compare absolute values
	fabs.x	fp1
	fcmp.x	fp1,fp0			* is first result > second?

	fbgt.w	fadd_unfl		* yes; it's an underflow
	bra.w	fadd_normal		* no; it's not an underflow

*#########################################################################

*
* Add: inputs are not both normalized; what are they?
*
fadd_not_norm:
	move.w	(tbl_fadd_op.b,pc,d1.w*2),d1
	jmp	(tbl_fadd_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fadd_op:
	.dc.w	fadd_norm-tbl_fadd_op		* NORM + NORM
	.dc.w	fadd_zero_src-tbl_fadd_op	* NORM + ZERO
	.dc.w	fadd_inf_src-tbl_fadd_op	* NORM + INF
	.dc.w	fadd_res_qnan-tbl_fadd_op	* NORM + QNAN
	.dc.w	fadd_norm-tbl_fadd_op		* NORM + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* NORM + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

	.dc.w	fadd_zero_dst-tbl_fadd_op	* ZERO + NORM
	.dc.w	fadd_zero_2-tbl_fadd_op		* ZERO + ZERO
	.dc.w	fadd_inf_src-tbl_fadd_op	* ZERO + INF
	.dc.w	fadd_res_qnan-tbl_fadd_op	* NORM + QNAN
	.dc.w	fadd_zero_dst-tbl_fadd_op	* ZERO + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* NORM + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

	.dc.w	fadd_inf_dst-tbl_fadd_op	* INF + NORM
	.dc.w	fadd_inf_dst-tbl_fadd_op	* INF + ZERO
	.dc.w	fadd_inf_2-tbl_fadd_op		* INF + INF
	.dc.w	fadd_res_qnan-tbl_fadd_op	* NORM + QNAN
	.dc.w	fadd_inf_dst-tbl_fadd_op	* INF + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* NORM + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

	.dc.w	fadd_res_qnan-tbl_fadd_op	* QNAN + NORM
	.dc.w	fadd_res_qnan-tbl_fadd_op	* QNAN + ZERO
	.dc.w	fadd_res_qnan-tbl_fadd_op	* QNAN + INF
	.dc.w	fadd_res_qnan-tbl_fadd_op	* QNAN + QNAN
	.dc.w	fadd_res_qnan-tbl_fadd_op	* QNAN + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* QNAN + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

	.dc.w	fadd_norm-tbl_fadd_op		* DENORM + NORM
	.dc.w	fadd_zero_src-tbl_fadd_op	* DENORM + ZERO
	.dc.w	fadd_inf_src-tbl_fadd_op	* DENORM + INF
	.dc.w	fadd_res_qnan-tbl_fadd_op	* NORM + QNAN
	.dc.w	fadd_norm-tbl_fadd_op		* DENORM + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* NORM + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + NORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + ZERO
	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + INF
	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + QNAN
	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + DENORM
	.dc.w	fadd_res_snan-tbl_fadd_op	* SNAN + SNAN
	.dc.w	tbl_fadd_op-tbl_fadd_op		*
	.dc.w	tbl_fadd_op-tbl_fadd_op		*

fadd_res_qnan:
	bra.l	res_qnan
fadd_res_snan:
	bra.l	res_snan

*
* both operands are ZEROes
*
fadd_zero_2:
	move.b	SRC_EX.w(a0),d0		* are the signs opposite
	move.b	DST_EX.w(a1),d1
	eor.b	d0,d1
	bmi.w	fadd_zero_2_chk_rm	* weed out (-ZERO)+(+ZERO)

* the signs are the same. so determine whether they are positive or negative
* and return the appropriately signed zero.
	tst.b	d0			* are ZEROes positive or negative?
	bmi.b	fadd_zero_rm		* negative
	fmove.s	#$00000000,fp0		* return +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

*
* the ZEROes have opposite signs:
* - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
* - -ZERO is returned in the case of RM.
*
fadd_zero_2_chk_rm:
	move.b	3+L_SCR3(a6),d1
	andi.b	#$30,d1			* extract rnd mode
	cmpi.b	#rm_mode*$10,d1		* is rnd mode == RM?
	beq.b	fadd_zero_rm		* yes
	fmove.s	#$00000000,fp0		* return +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

fadd_zero_rm:
	fmove.s	#$80000000,fp0		* return -ZERO
	move.b	#neg_bmask+z_bmask,FPSR_CC(a6)	* set NEG/Z
	rts

*
* one operand is a ZERO and the other is a DENORM or NORM. scale
* the DENORM or NORM and jump to the regular fadd routine.
*
fadd_zero_dst:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* scale the operand
	clr.w	FP_SCR1_EX(a6)
	clr.l	FP_SCR1_HI(a6)
	clr.l	FP_SCR1_LO(a6)
	bra.w	fadd_zero_entry		* go execute fadd

fadd_zero_src:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)
	bsr.l	scale_to_zero_dst	* scale the operand
	clr.w	FP_SCR0_EX(a6)
	clr.l	FP_SCR0_HI(a6)
	clr.l	FP_SCR0_LO(a6)
	bra.w	fadd_zero_entry		* go execute fadd

*
* both operands are INFs. an OPERR will result if the INFs have
* different signs. else, an INF of the same sign is returned
*
fadd_inf_2:
	move.b	SRC_EX.w(a0),d0		* exclusive or the signs
	move.b	DST_EX.w(a1),d1
	eor.b	d1,d0
	bmi.l	res_operr		* weed out (-INF)+(+INF)

* ok, so it's not an OPERR. but, we do have to remember to return the 
* src INF since that's where the 881/882 gets the j-bit from...

*
* operands are INF and one of {ZERO, INF, DENORM, NORM}
*
fadd_inf_src:
	fmovem.x	SRC.w(a0),fp0	* return src INF
	tst.b	SRC_EX.w(a0)		* is INF positive?
	bpl.b	fadd_inf_done		* yes; we're done
	move.b	#neg_bmask+inf_bmask,FPSR_CC(a6)	* set INF/NEG
	rts

*
* operands are INF and one of {ZERO, INF, DENORM, NORM}
*
fadd_inf_dst:
	fmovem.x	DST.w(a1),fp0	* return dst INF
	tst.b	DST_EX.w(a1)		* is INF positive?
	bpl.b	fadd_inf_done		* yes; we're done
	move.b	#neg_bmask+inf_bmask,FPSR_CC(a6)	* set INF/NEG
	rts

fadd_inf_done:
	move.b	#inf_bmask,FPSR_CC(a6)	* set INF
	rts

*########################################################################
* XDEF ****************************************************************	#
*	fsub(): emulates the fsub instruction				#
*	fssub(): emulates the fssub instruction				#
*	fdsub(): emulates the fdsub instruction				#
*									#
* XREF ****************************************************************	#
* 	addsub_scaler2() - scale the operands so they won't take exc	#
*	ovf_res() - return default overflow result			#
*	unf_res() - return default underflow result			#
*	res_qnan() - set QNAN result					#
* 	res_snan() - set SNAN result					#
*	res_operr() - set OPERR result					#
*	scale_to_zero_src() - set src operand exponent equal to zero	#
*	scale_to_zero_dst() - set dst operand exponent equal to zero	#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
* 	a1 = pointer to extended precision destination operand		#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
* 	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms into extended, single, and double precision.			#
*	Do subtraction after scaling exponents such that exception won't#
* occur. Then, check result exponent to see if exception would have	#
* occurred. If so, return default result and maybe EXOP. Else, insert	#
* the correct result exponent and return. Set FPSR bits as appropriate.	#
*									#
*########################################################################

	global	fssub
fssub:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl prec
	bra.b	fsub

	global	fdsub
fdsub:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl prec

	global	fsub
fsub:
	move.l	d0,L_SCR3(a6)		* store rnd info

	clr.w	d1
	move.b	DTAG(a6),d1
	lsl.b	#$3,d1
	or.b	STAG(a6),d1		* combine src tags

	bne.w	fsub_not_norm		* optimize on non-norm input

*
* SUB: norms and denorms
*
fsub_norm:
	bsr.l	addsub_scaler2		* scale exponents

fsub_zero_entry:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fsub.x	FP_SCR0(a6),fp0		* execute subtract

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* fetch INEX2, N, Z

	or.l	d1,USER_FPSR(a6)	* save exc and ccode bits

	fbeq.w	fsub_zero_exit		* if result zero, end now

	move.l	d2,-(sp)		* save d2

	fmovem.x	fp0,-(sp)	* save result to stack

	move.w	2+L_SCR3(a6),d1
	lsr.b	#$6,d1

	move.w	(sp),d2			* fetch new exponent
	andi.l	#$7fff,d2		* strip sign
	sub.l	d0,d2			* add scale factor

	cmp.l	(tbl_fsub_ovfl.b,pc,d1.w*4),d2		* is it an overflow?
	bge.b	fsub_ovfl		* yes

	cmp.l	(tbl_fsub_unfl.b,pc,d1.w*4),d2		* is it an underflow?
	blt.w	fsub_unfl		* yes
	beq.w	fsub_may_unfl		* maybe; go find out

fsub_normal:
	move.w	(sp),d1
	andi.w	#$8000,d1		* keep sign
	or.w	d2,d1			* insert new exponent
	move.w	d1,(sp)			* insert new exponent

	fmovem.x	(sp)+,fp0	* return result in fp0

	move.l	(sp)+,d2		* restore d2
	rts

fsub_zero_exit:
*	fmov.s		&0x00000000,%fp0	# return zero in fp0
	rts

tbl_fsub_ovfl:
	.dc.l	$7fff			* ext ovfl
	.dc.l	$407f			* sgl ovfl
	.dc.l	$43ff			* dbl ovfl

tbl_fsub_unfl:
	.dc.l	$0000			* ext unfl
	.dc.l	$3f81			* sgl unfl
	.dc.l	$3c01			* dbl unfl

fsub_ovfl:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fsub_ovfl_ena		* yes

	add.l	#$c,sp
fsub_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass prec:rnd
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	move.l	(sp)+,d2		* restore d2
	rts

fsub_ovfl_ena:
	move.b	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fsub_ovfl_ena_sd	* no

fsub_ovfl_ena_cont:
	move.w	(sp),d1			* fetch {sgn,exp}
	andi.w	#$8000,d1		* keep sign
	subi.l	#$6000,d2		* subtract new bias
	andi.w	#$7fff,d2		* clear top bit
	or.w	d2,d1			* concat sign,exp
	move.w	d1,(sp)			* insert new exponent

	fmovem.x	(sp)+,fp1	* return EXOP in fp1
	bra.b	fsub_ovfl_dis

fsub_ovfl_ena_sd:
	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* clear rnd prec
	fmove.l	d1,fpcr			* set FPCR

	fsub.x	FP_SCR0(a6),fp0		* execute subtract

	fmove.l	#$0,fpcr		* clear FPCR

	add.l	#$c,sp
	fmovem.x	fp0,-(sp)
	bra.b	fsub_ovfl_ena_cont

fsub_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	add.l	#$c,sp

	fmovem.x	FP_SCR1(a6),fp0	* load dst op

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsub.x	FP_SCR0(a6),fp0		* execute subtract

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save status

	or.l	d1,USER_FPSR(a6)

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fsub_unfl_ena		* yes

fsub_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* 'Z' may have been set
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	move.l	(sp)+,d2		* restore d2
	rts

fsub_unfl_ena:
	fmovem.x	FP_SCR1(a6),fp1

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* is precision extended?
	bne.b	fsub_unfl_ena_sd	* no

	fmove.l	L_SCR3(a6),fpcr		* set FPCR

fsub_unfl_ena_cont:
	fmove.l	#$0,fpsr		* clear FPSR

	fsub.x	FP_SCR0(a6),fp1		* execute subtract

	fmove.l	#$0,fpcr		* clear FPCR

	fmovem.x	fp1,FP_SCR0(a6)	* store result to stack
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	addi.l	#$6000,d1		* subtract new bias
	andi.w	#$7fff,d1		* clear top bit
	or.w	d2,d1			* concat sgn,exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	bra.w	fsub_unfl_dis

fsub_unfl_ena_sd:
	move.l	L_SCR3(a6),d1
	andi.b	#$30,d1			* clear rnd prec
	fmove.l	d1,fpcr			* set FPCR

	bra.b	fsub_unfl_ena_cont

*
* result is equal to the smallest normalized number in the selected precision
* if the precision is extended, this result could not have come from an 
* underflow that rounded up.
*
fsub_may_unfl:
	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* fetch rnd prec
	beq.w	fsub_normal		* yes; no underflow occurred

	move.l	$4(sp),d1
	cmpi.l	#$80000000,d1		* is hi(man) = 0x80000000?
	bne.w	fsub_normal		* no; no underflow occurred

	tst.l	$8(sp)			* is lo(man) = 0x0?
	bne.w	fsub_normal		* no; no underflow occurred

	btst	#inex2_bit,FPSR_EXCEPT(a6)	* is INEX2 set?
	beq.w	fsub_normal		* no; no underflow occurred

*
* ok, so now the result has a exponent equal to the smallest normalized
* exponent for the selected precision. also, the mantissa is equal to
* 0x8000000000000000 and this mantissa is the result of rounding non-zero
* g,r,s. 
* now, we must determine whether the pre-rounded result was an underflow
* rounded "up" or a normalized number rounded "down".
* so, we do this be re-executing the add using RZ as the rounding mode and
* seeing if the new result is smaller or equal to the current result.
*
	fmovem.x	FP_SCR1(a6),fp1	* load dst op into fp1

	move.l	L_SCR3(a6),d1
	andi.b	#$c0,d1			* keep rnd prec
	ori.b	#rz_mode*$10,d1		* insert rnd mode
	fmove.l	d1,fpcr			* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsub.x	FP_SCR0(a6),fp1		* execute subtract

	fmove.l	#$0,fpcr		* clear FPCR

	fabs.x	fp0			* compare absolute values
	fabs.x	fp1
	fcmp.x	fp1,fp0			* is first result > second?

	fbgt.w	fsub_unfl		* yes; it's an underflow
	bra.w	fsub_normal		* no; it's not an underflow

*#########################################################################

*
* Sub: inputs are not both normalized; what are they?
*
fsub_not_norm:
	move.w	(tbl_fsub_op.b,pc,d1.w*2),d1
	jmp	(tbl_fsub_op.b,pc,d1.w*1)

	.dc.w	$4AFC,48
tbl_fsub_op:
	.dc.w	fsub_norm-tbl_fsub_op		* NORM - NORM
	.dc.w	fsub_zero_src-tbl_fsub_op	* NORM - ZERO
	.dc.w	fsub_inf_src-tbl_fsub_op	* NORM - INF
	.dc.w	fsub_res_qnan-tbl_fsub_op	* NORM - QNAN
	.dc.w	fsub_norm-tbl_fsub_op		* NORM - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* NORM - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

	.dc.w	fsub_zero_dst-tbl_fsub_op	* ZERO - NORM
	.dc.w	fsub_zero_2-tbl_fsub_op		* ZERO - ZERO
	.dc.w	fsub_inf_src-tbl_fsub_op	* ZERO - INF
	.dc.w	fsub_res_qnan-tbl_fsub_op	* NORM - QNAN
	.dc.w	fsub_zero_dst-tbl_fsub_op	* ZERO - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* NORM - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

	.dc.w	fsub_inf_dst-tbl_fsub_op	* INF - NORM
	.dc.w	fsub_inf_dst-tbl_fsub_op	* INF - ZERO
	.dc.w	fsub_inf_2-tbl_fsub_op		* INF - INF
	.dc.w	fsub_res_qnan-tbl_fsub_op	* NORM - QNAN
	.dc.w	fsub_inf_dst-tbl_fsub_op	* INF - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* NORM - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

	.dc.w	fsub_res_qnan-tbl_fsub_op	* QNAN - NORM
	.dc.w	fsub_res_qnan-tbl_fsub_op	* QNAN - ZERO
	.dc.w	fsub_res_qnan-tbl_fsub_op	* QNAN - INF
	.dc.w	fsub_res_qnan-tbl_fsub_op	* QNAN - QNAN
	.dc.w	fsub_res_qnan-tbl_fsub_op	* QNAN - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* QNAN - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

	.dc.w	fsub_norm-tbl_fsub_op		* DENORM - NORM
	.dc.w	fsub_zero_src-tbl_fsub_op	* DENORM - ZERO
	.dc.w	fsub_inf_src-tbl_fsub_op	* DENORM - INF
	.dc.w	fsub_res_qnan-tbl_fsub_op	* NORM - QNAN
	.dc.w	fsub_norm-tbl_fsub_op		* DENORM - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* NORM - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - NORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - ZERO
	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - INF
	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - QNAN
	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - DENORM
	.dc.w	fsub_res_snan-tbl_fsub_op	* SNAN - SNAN
	.dc.w	tbl_fsub_op-tbl_fsub_op		*
	.dc.w	tbl_fsub_op-tbl_fsub_op		*

fsub_res_qnan:
	bra.l	res_qnan
fsub_res_snan:
	bra.l	res_snan

*
* both operands are ZEROes
*
fsub_zero_2:
	move.b	SRC_EX.w(a0),d0
	move.b	DST_EX.w(a1),d1
	eor.b	d1,d0
	bpl.b	fsub_zero_2_chk_rm

* the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
	tst.b	d0			* is dst negative?
	bmi.b	fsub_zero_2_rm		* yes
	fmove.s	#$00000000,fp0		* no; return +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

*
* the ZEROes have the same signs:
* - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
* - -ZERO is returned in the case of RM.
*
fsub_zero_2_chk_rm:
	move.b	3+L_SCR3(a6),d1
	andi.b	#$30,d1			* extract rnd mode
	cmpi.b	#rm_mode*$10,d1		* is rnd mode = RM?
	beq.b	fsub_zero_2_rm		* yes
	fmove.s	#$00000000,fp0		* no; return +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set Z
	rts

fsub_zero_2_rm:
	fmove.s	#$80000000,fp0		* return -ZERO
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set Z/NEG
	rts

*
* one operand is a ZERO and the other is a DENORM or a NORM.
* scale the DENORM or NORM and jump to the regular fsub routine.
*
fsub_zero_dst:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	bsr.l	scale_to_zero_src	* scale the operand
	clr.w	FP_SCR1_EX(a6)
	clr.l	FP_SCR1_HI(a6)
	clr.l	FP_SCR1_LO(a6)
	bra.w	fsub_zero_entry		* go execute fsub

fsub_zero_src:
	move.w	DST_EX.w(a1),FP_SCR1_EX(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)
	bsr.l	scale_to_zero_dst	* scale the operand
	clr.w	FP_SCR0_EX(a6)
	clr.l	FP_SCR0_HI(a6)
	clr.l	FP_SCR0_LO(a6)
	bra.w	fsub_zero_entry		* go execute fsub

*
* both operands are INFs. an OPERR will result if the INFs have the
* same signs. else, 
*
fsub_inf_2:
	move.b	SRC_EX.w(a0),d0		* exclusive or the signs
	move.b	DST_EX.w(a1),d1
	eor.b	d1,d0
	bpl.l	res_operr		* weed out (-INF)+(+INF)

* ok, so it's not an OPERR. but we do have to remember to return
* the src INF since that's where the 881/882 gets the j-bit.

fsub_inf_src:
	fmovem.x	SRC.w(a0),fp0	* return src INF
	fneg.x	fp0			* invert sign
	fbge.w	fsub_inf_done		* sign is now positive
	move.b	#neg_bmask+inf_bmask,FPSR_CC(a6)	* set INF/NEG	
	rts

fsub_inf_dst:
	fmovem.x	DST.w(a1),fp0	* return dst INF
	tst.b	DST_EX.w(a1)		* is INF negative?
	bpl.b	fsub_inf_done		* no
	move.b	#neg_bmask+inf_bmask,FPSR_CC(a6)	* set INF/NEG
	rts

fsub_inf_done:
	move.b	#inf_bmask,FPSR_CC(a6)	* set INF
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	fsqrt(): emulates the fsqrt instruction				#
*	fssqrt(): emulates the fssqrt instruction			#
*	fdsqrt(): emulates the fdsqrt instruction			#
*									#
* XREF ****************************************************************	#
*	scale_sqrt() - scale the source operand				#
*	unf_res() - return default underflow result			#
*	ovf_res() - return default overflow result			#
* 	res_qnan_1op() - return QNAN result				#
* 	res_snan_1op() - return SNAN result				#
*									#
* INPUT ***************************************************************	#
*	a0 = pointer to extended precision source operand		#
*	d0  rnd prec,mode						#
*									#
* OUTPUT **************************************************************	#
*	fp0 = result							#
*	fp1 = EXOP (if exception occurred)				#
*									#
* ALGORITHM ***********************************************************	#
*	Handle NANs, infinities, and zeroes as special cases. Divide	#
* norms/denorms into ext/sgl/dbl precision.				#
*	For norms/denorms, scale the exponents such that a sqrt		#
* instruction won't cause an exception. Use the regular fsqrt to	#
* compute a result. Check if the regular operands would have taken	#
* an exception. If so, return the default overflow/underflow result	#
* and return the EXOP if exceptions are enabled. Else, scale the 	#
* result operand to the proper exponent.				#
*									#
*########################################################################

	global	fssqrt
fssqrt:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#s_mode*$10,d0		* insert sgl precision
	bra.b	fsqrt

	global	fdsqrt
fdsqrt:
	andi.b	#$30,d0			* clear rnd prec
	ori.b	#d_mode*$10,d0		* insert dbl precision

	global	fsqrt
fsqrt:
	move.l	d0,L_SCR3(a6)		* store rnd info
	clr.w	d1
	move.b	STAG(a6),d1
	bne.w	fsqrt_not_norm		* optimize on non-norm input

*
* SQUARE ROOT: norms and denorms ONLY!
*
fsqrt_norm:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.l	res_operr		* yes

	andi.b	#$c0,d0			* is precision extended?
	bne.b	fsqrt_not_ext		* no; go handle sgl or dbl

	fmove.l	L_SCR3(a6),fpcr		* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsqrt.x	(a0),fp0		* execute square root

	fmove.l	fpsr,d1
	or.l	d1,USER_FPSR(a6)	* set N,INEX

	rts

fsqrt_denorm:
	tst.b	SRC_EX.w(a0)		* is operand negative?
	bmi.l	res_operr		* yes

	andi.b	#$c0,d0			* is precision extended?
	bne.b	fsqrt_not_ext		* no; go handle sgl or dbl

	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_sqrt		* calculate scale factor

	bra.w	fsqrt_sd_normal

*
* operand is either single or double
*
fsqrt_not_ext:
	cmpi.b	#s_mode*$10,d0		* separate sgl/dbl prec
	bne.w	fsqrt_dbl

*
* operand is to be rounded to single precision
*
fsqrt_sgl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_sqrt		* calculate scale factor

	cmpi.l	#$3fff-$3f81,d0		* will move in underflow?
	beq.w	fsqrt_sd_may_unfl
	bgt.w	fsqrt_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$407f,d0		* will move in overflow?
	beq.w	fsqrt_sd_may_ovfl	* maybe; go check
	blt.w	fsqrt_sd_ovfl		* yes; go handle overflow

*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fsqrt_sd_normal:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fsqrt.x	FP_SCR0(a6),fp0		* perform absolute

	fmove.l	fpsr,d1			* save FPSR
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fsqrt_sd_normal_exit:
	move.l	d2,-(sp)		* save d2
	fmovem.x	fp0,FP_SCR0(a6)	* store out result
	move.w	FP_SCR0_EX(a6),d1	* load sgn,exp
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	sub.l	d0,d1			* add scale factor
	andi.w	#$8000,d2		* keep old sign
	or.w	d1,d2			* concat old sign,new exp
	move.w	d2,FP_SCR0_EX(a6)	* insert new exponent
	move.l	(sp)+,d2		* restore d2
	fmovem.x	FP_SCR0(a6),fp0	* return result in fp0
	rts

*
* operand is to be rounded to double precision
*
fsqrt_dbl:
	move.w	SRC_EX.w(a0),FP_SCR0_EX(a6)
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)

	bsr.l	scale_sqrt		* calculate scale factor

	cmpi.l	#$3fff-$3c01,d0		* will move in underflow?
	beq.w	fsqrt_sd_may_unfl
	bgt.b	fsqrt_sd_unfl		* yes; go handle underflow
	cmpi.l	#$3fff-$43ff,d0		* will move in overflow?
	beq.w	fsqrt_sd_may_ovfl	* maybe; go check
	blt.w	fsqrt_sd_ovfl		* yes; go handle overflow
	bra.w	fsqrt_sd_normal		* no; ho handle normalized op

* we're on the line here and the distinguising characteristic is whether
* the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
* elsewise fall through to underflow.
fsqrt_sd_may_unfl:
	btst	#$0,1+FP_SCR0_EX(a6)	* is exponent 0x3fff?
	bne.w	fsqrt_sd_normal		* yes, so no underflow

*
* operand WILL underflow when moved in to the fp register file
*
fsqrt_sd_unfl:
	bset	#unfl_bit,FPSR_EXCEPT(a6)	* set unfl exc bit

	fmove.l	#rz_mode*$10,fpcr	* set FPCR
	fmove.l	#$0,fpsr		* clear FPSR

	fsqrt.x	FP_SCR0(a6),fp0		* execute square root

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

* if underflow or inexact is enabled, go calculate EXOP first.
	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$0b,d1			* is UNFL or INEX enabled?
	bne.b	fsqrt_sd_unfl_ena	* yes

fsqrt_sd_unfl_dis:
	fmovem.x	fp0,FP_SCR0(a6)	* store out result

	lea	FP_SCR0(a6),a0		* pass: result addr
	move.l	L_SCR3(a6),d1		* pass: rnd prec,mode
	bsr.l	unf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set possible 'Z' ccode
	fmovem.x	FP_SCR0(a6),fp0	* return default result in fp0
	rts

*
* operand will underflow AND underflow is enabled. 
* therefore, we must return the result rounded to extended precision.
*
fsqrt_sd_unfl_ena:
	move.l	FP_SCR0_HI(a6),FP_SCR1_HI(a6)
	move.l	FP_SCR0_LO(a6),FP_SCR1_LO(a6)
	move.w	FP_SCR0_EX(a6),d1	* load current exponent

	move.l	d2,-(sp)		* save d2
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* subtract scale factor
	addi.l	#$6000,d1		* add new bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat new sign,new exp
	move.w	d1,FP_SCR1_EX(a6)	* insert new exp
	fmovem.x	FP_SCR1(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fsqrt_sd_unfl_dis

*
* operand WILL overflow.
*
fsqrt_sd_ovfl:
	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fsqrt.x	FP_SCR0(a6),fp0		* perform square root

	fmove.l	#$0,fpcr		* clear FPCR
	fmove.l	fpsr,d1			* save FPSR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

fsqrt_sd_ovfl_tst:
	ori.l	#ovfl_inx_mask,USER_FPSR(a6)	* set ovfl/aovfl/ainex

	move.b	FPCR_ENABLE(a6),d1
	andi.b	#$13,d1			* is OVFL or INEX enabled?
	bne.b	fsqrt_sd_ovfl_ena	* yes

*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fsqrt_sd_ovfl_dis:
	btst	#neg_bit,FPSR_CC(a6)	* is result negative?
	sne	d1			* set sign param accordingly
	move.l	L_SCR3(a6),d0		* pass: prec,mode
	bsr.l	ovf_res			* calculate default result
	or.b	d0,FPSR_CC(a6)		* set INF,N if applicable
	fmovem.x	(a0),fp0	* return default result in fp0
	rts

*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fsqrt_sd_ovfl_ena:
	move.l	d2,-(sp)		* save d2
	move.w	FP_SCR0_EX(a6),d1	* fetch {sgn,exp}
	move.l	d1,d2			* make a copy
	andi.l	#$7fff,d1		* strip sign
	andi.w	#$8000,d2		* keep old sign
	sub.l	d0,d1			* add scale factor
	subi.l	#$6000,d1		* subtract bias
	andi.w	#$7fff,d1
	or.w	d2,d1			* concat sign,exp
	move.w	d1,FP_SCR0_EX(a6)	* insert new exponent
	fmovem.x	FP_SCR0(a6),fp1	* return EXOP in fp1
	move.l	(sp)+,d2		* restore d2
	bra.b	fsqrt_sd_ovfl_dis

*
* the move in MAY underflow. so...
*
fsqrt_sd_may_ovfl:
	btst	#$0,1+FP_SCR0_EX(a6)	* is exponent 0x3fff?
	bne.w	fsqrt_sd_ovfl		* yes, so overflow

	fmove.l	#$0,fpsr		* clear FPSR
	fmove.l	L_SCR3(a6),fpcr		* set FPCR

	fsqrt.x	FP_SCR0(a6),fp0		* perform absolute

	fmove.l	fpsr,d1			* save status
	fmove.l	#$0,fpcr		* clear FPCR

	or.l	d1,USER_FPSR(a6)	* save INEX2,N

	fmove.x	fp0,fp1			* make a copy of result
	fcmp.b	#$1,fp1			* is |result| >= 1.b?
	fbge.w	fsqrt_sd_ovfl_tst	* yes; overflow has occurred

* no, it didn't overflow; we have correct result
	bra.w	fsqrt_sd_normal_exit

*#########################################################################

*
* input is not normalized; what is it?
*
fsqrt_not_norm:
	cmpi.b	#DENORM,d1		* weed out DENORM
	beq.w	fsqrt_denorm
	cmpi.b	#ZERO,d1		* weed out ZERO
	beq.b	fsqrt_zero
	cmpi.b	#INF,d1			* weed out INF
	beq.b	fsqrt_inf
	cmpi.b	#SNAN,d1		* weed out SNAN
	beq.l	res_snan_1op
	bra.l	res_qnan_1op

*
* 	fsqrt(+0) = +0
* 	fsqrt(-0) = -0
*	fsqrt(+INF) = +INF
* 	fsqrt(-INF) = OPERR
*
fsqrt_zero:
	tst.b	SRC_EX.w(a0)		* is ZERO positive or negative?
	bmi.b	fsqrt_zero_m		* negative
fsqrt_zero_p:
	fmove.s	#$00000000,fp0		* return +ZERO
	move.b	#z_bmask,FPSR_CC(a6)	* set 'Z' ccode bit
	rts
fsqrt_zero_m:
	fmove.s	#$80000000,fp0		* return -ZERO
	move.b	#z_bmask+neg_bmask,FPSR_CC(a6)	* set 'Z','N' ccode bits
	rts

fsqrt_inf:
	tst.b	SRC_EX.w(a0)		* is INF positive or negative?
	bmi.l	res_operr		* negative
fsqrt_inf_p:
	fmovem.x	SRC.w(a0),fp0	* return +INF in fp0
	move.b	#inf_bmask,FPSR_CC(a6)	* set 'I' ccode bit
	rts

*#########################################################################

*########################################################################
* XDEF ****************************************************************	#
*	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
*			  OVFL/UNFL exceptions will result		#
*									#
* XREF ****************************************************************	#
*	norm() - normalize mantissa after adjusting exponent		#
*									#
* INPUT ***************************************************************	#
*	FP_SRC(a6) = fp op1(src)					#
*	FP_DST(a6) = fp op2(dst)					#
* 									#
* OUTPUT **************************************************************	#
*	FP_SRC(a6) = fp op1 scaled(src)					#
*	FP_DST(a6) = fp op2 scaled(dst)					#
*	d0         = scale amount					#
*									#
* ALGORITHM ***********************************************************	#
* 	If the DST exponent is > the SRC exponent, set the DST exponent	#
* equal to 0x3fff and scale the SRC exponent by the value that the	#
* DST exponent was scaled by. If the SRC exponent is greater or equal,	#
* do the opposite. Return this scale factor in d0.			#
*	If the two exponents differ by > the number of mantissa bits	#
* plus two, then set the smallest exponent to a very small value as a	#
* quick shortcut.							#
*									#
*########################################################################

	global	addsub_scaler2
addsub_scaler2:
	move.l	SRC_HI(a0),FP_SCR0_HI(a6)
	move.l	DST_HI(a1),FP_SCR1_HI(a6)
	move.l	SRC_LO(a0),FP_SCR0_LO(a6)
	move.l	DST_LO(a1),FP_SCR1_LO(a6)
	move.w	SRC_EX.w(a0),d0
	move.w	DST_EX.w(a1),d1
	move.w	d0,FP_SCR0_EX(a6)
	move.w	d1,FP_SCR1_EX(a6)

	andi.w	#$7fff,d0
	andi.w	#$7fff,d1
	move.w	d0,L_SCR1(a6)		* store src exponent
	move.w	d1,2+L_SCR1(a6)		* store dst exponent

	cmp.w	d1,d0			* is src exp >= dst exp?
	bge.l	src_exp_ge2

* dst exp is >  src exp; scale dst to exp = 0x3fff
dst_exp_gt2:
	bsr.l	scale_to_zero_dst
	move.l	d0,-(sp)		* save scale factor

	cmpi.b	#DENORM,STAG(a6)	* is dst denormalized?
	bne.b	cmpexp12

	lea	FP_SCR0(a6),a0
	bsr.l	norm			* normalize the denorm; result is new exp
	neg.w	d0			* new exp = -(shft val)
	move.w	d0,L_SCR1(a6)		* inset new exp

cmpexp12:
	move.w	2+L_SCR1(a6),d0
	subi.w	#mantissalen+2,d0	* subtract mantissalen+2 from larger exp

	cmp.w	L_SCR1(a6),d0		* is difference >= len(mantissa)+2?
	bge.b	quick_scale12

	move.w	L_SCR1(a6),d0
	add.w	$2(sp),d0		* scale src exponent by scale factor
	move.w	FP_SCR0_EX(a6),d1
	andi.w	#$8000,d1
	or.w	d1,d0			* concat {sgn,new exp}
	move.w	d0,FP_SCR0_EX(a6)	* insert new dst exponent

	move.l	(sp)+,d0		* return SCALE factor
	rts

quick_scale12:
	andi.w	#$8000,FP_SCR0_EX(a6)	* zero src exponent
	bset	#$0,1+FP_SCR0_EX(a6)	* set exp = 1

	move.l	(sp)+,d0		* return SCALE factor	
	rts

* src exp is >= dst exp; scale src to exp = 0x3fff
src_exp_ge2:
	bsr.l	scale_to_zero_src
	move.l	d0,-(sp)		* save scale factor

	cmpi.b	#DENORM,DTAG(a6)	* is dst denormalized?
	bne.b	cmpexp22
	lea	FP_SCR1(a6),a0
	bsr.l	norm			* normalize the denorm; result is new exp
	neg.w	d0			* new exp = -(shft val)
	move.w	d0,2+L_SCR1(a6)		* inset new exp

cmpexp22:
	move.w	L_SCR1(a6),d0
	subi.w	#mantissalen+2,d0	* subtract mantissalen+2 from larger exp

	cmp.w	2+L_SCR1(a6),d0		* is difference >= len(mantissa)+2?
	bge.b	quick_scale22

	move.w	2+L_SCR1(a6),d0
	add.w	$2(sp),d0		* scale dst exponent by scale factor
	move.w	FP_SCR1_EX(a6),d1
	andi.w	#$8000,d1
	or.w	d1,d0			* concat {sgn,new exp}
	move.w	d0,FP_SCR1_EX(a6)	* insert new dst exponent

	move.l	(sp)+,d0		* return SCALE factor
	rts

quick_scale22:
	andi.w	#$8000,FP_SCR1_EX(a6)	* zero dst exponent
	bset	#$0,1+FP_SCR1_EX(a6)	* set exp = 1

	move.l	(sp)+,d0		* return SCALE factor	
	rts

*#########################################################################

*########################################################################
* XDEF ****************************************************************	#
*	scale_to_zero_src(): scale the exponent of extended precision	#
*			     value at FP_SCR0(a6).			#
*									#
* XREF ****************************************************************	#
*	norm() - normalize the mantissa if the operand was a DENORM	#
*									#
* INPUT ***************************************************************	#
*	FP_SCR0(a6) = extended precision operand to be scaled		#
* 									#
* OUTPUT **************************************************************	#
*	FP_SCR0(a6) = scaled extended precision operand			#
*	d0	    = scale value					#
*									#
* ALGORITHM ***********************************************************	#
* 	Set the exponent of the input operand to 0x3fff. Save the value	#
* of the difference between the original and new exponent. Then, 	#
* normalize the operand if it was a DENORM. Add this normalization	#
* value to the previous value. Return the result.			#
*									#
*########################################################################

	global	scale_to_zero_src
scale_to_zero_src:
	move.w	FP_SCR0_EX(a6),d1	* extract operand's {sgn,exp}
	move.w	d1,d0			* make a copy

	andi.l	#$7fff,d1		* extract operand's exponent

	andi.w	#$8000,d0		* extract operand's sgn
	ori.w	#$3fff,d0		* insert new operand's exponent(=0)

	move.w	d0,FP_SCR0_EX(a6)	* insert biased exponent

	cmpi.b	#DENORM,STAG(a6)	* is operand normalized?
	beq.b	stzs_denorm		* normalize the DENORM

stzs_norm:
	move.l	#$3fff,d0
	sub.l	d1,d0			* scale = BIAS + (-exp)

	rts

stzs_denorm:
	lea	FP_SCR0(a6),a0		* pass ptr to src op
	bsr.l	norm			* normalize denorm
	neg.l	d0			* new exponent = -(shft val)
	move.l	d0,d1			* prepare for op_norm call
	bra.b	stzs_norm		* finish scaling

*##

*########################################################################
* XDEF ****************************************************************	#
*	scale_sqrt(): scale the input operand exponent so a subsequent	#
*		      fsqrt operation won't take an exception.		#
*									#
* XREF ****************************************************************	#
*	norm() - normalize the mantissa if the operand was a DENORM	#
*									#
* INPUT ***************************************************************	#
*	FP_SCR0(a6) = extended precision operand to be scaled		#
* 									#
* OUTPUT **************************************************************	#
*	FP_SCR0(a6) = scaled extended precision operand			#
*	d0	    = scale value					#
*									#
* ALGORITHM ***********************************************************	#
*	If the input operand is a DENORM, normalize it.			#
* 	If the exponent of the input operand is even, set the exponent	#
* to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the 	#
* exponent of the input operand is off, set the exponent to ox3fff and	#
* return a scale factor of "(exp-0x3fff)/2". 				#
*									#
*########################################################################

	global	scale_sqrt
scale_sqrt:
	cmpi.b	#DENORM,STAG(a6)	* is operand normalized?
	beq.b	ss_denorm		* normalize the DENORM

	move.w	FP_SCR0_EX(a6),d1	* extract operand's {sgn,exp}
	andi.l	#$7fff,d1		* extract operand's exponent

	andi.w	#$8000,FP_SCR0_EX(a6)	* extract operand's sgn

	btst	#$0,d1			* is exp even or odd?
	beq.b	ss_norm_even

	ori.w	#$3fff,FP_SCR0_EX(a6)	* insert new operand's exponent(=0)

	move.l	#$3fff,d0
	sub.l	d1,d0			* scale = BIAS + (-exp)
	asr.l	#$1,d0			* divide scale factor by 2
	rts

ss_norm_even:
	ori.w	#$3ffe,FP_SCR0_EX(a6)	* insert new operand's exponent(=0)

	move.l	#$3ffe,d0
	sub.l	d1,d0			* scale = BIAS + (-exp)
	asr.l	#$1,d0			* divide scale factor by 2
	rts

ss_denorm:
	lea	FP_SCR0(a6),a0		* pass ptr to src op
	bsr.l	norm			* normalize denorm

	btst	#$0,d0			* is exp even or odd?
	beq.b	ss_denorm_even

	ori.w	#$3fff,FP_SCR0_EX(a6)	* insert new operand's exponent(=0)

	addi.l	#$3fff,d0
	asr.l	#$1,d0			* divide scale factor by 2
	rts

ss_denorm_even:
	ori.w	#$3ffe,FP_SCR0_EX(a6)	* insert new operand's exponent(=0)

	addi.l	#$3ffe,d0
	asr.l	#$1,d0			* divide scale factor by 2
	rts

*##

*########################################################################
* XDEF ****************************************************************	#
*	scale_to_zero_dst(): scale the exponent of extended precision	#
*			     value at FP_SCR1(a6).			#
*									#
* XREF ****************************************************************	#
*	norm() - normalize the mantissa if the operand was a DENORM	#
*									#
* INPUT ***************************************************************	#
*	FP_SCR1(a6) = extended precision operand to be scaled		#
* 									#
* OUTPUT **************************************************************	#
*	FP_SCR1(a6) = scaled extended precision operand			#
*	d0	    = scale value					#
*									#
* ALGORITHM ***********************************************************	#
* 	Set the exponent of the input operand to 0x3fff. Save the value	#
* of the difference between the original and new exponent. Then, 	#
* normalize the operand if it was a DENORM. Add this normalization	#
* value to the previous value. Return the result.			#
*									#
*########################################################################

	global	scale_to_zero_dst
scale_to_zero_dst:
	move.w	FP_SCR1_EX(a6),d1	* extract operand's {sgn,exp}
	move.w	d1,d0			* make a copy

	andi.l	#$7fff,d1		* extract operand's exponent

	andi.w	#$8000,d0		* extract operand's sgn
	ori.w	#$3fff,d0		* insert new operand's exponent(=0)

	move.w	d0,FP_SCR1_EX(a6)	* insert biased exponent

	cmpi.b	#DENORM,DTAG(a6)	* is operand normalized?
	beq.b	stzd_denorm		* normalize the DENORM

stzd_norm:
	move.l	#$3fff,d0
	sub.l	d1,d0			* scale = BIAS + (-exp)
	rts

stzd_denorm:
	lea	FP_SCR1(a6),a0		* pass ptr to dst op
	bsr.l	norm			* normalize denorm
	neg.l	d0			* new exponent = -(shft val)
	move.l	d0,d1			* prepare for op_norm call
	bra.b	stzd_norm		* finish scaling

*#########################################################################

*########################################################################
* XDEF ****************************************************************	#
*	res_qnan(): return default result w/ QNAN operand for dyadic	#
*	res_snan(): return default result w/ SNAN operand for dyadic	#
*	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
*	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	FP_SRC(a6) = pointer to extended precision src operand		#
*	FP_DST(a6) = pointer to extended precision dst operand		#
* 									#
* OUTPUT **************************************************************	#
*	fp0 = default result						#
*									#
* ALGORITHM ***********************************************************	#
* 	If either operand (but not both operands) of an operation is a	#
* nonsignalling NAN, then that NAN is returned as the result. If both	#
* operands are nonsignalling NANs, then the destination operand 	#
* nonsignalling NAN is returned as the result.				#
* 	If either operand to an operation is a signalling NAN (SNAN),	#
* then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
* enable bit is set in the FPCR, then the trap is taken and the 	#
* destination is not modified. If the SNAN trap enable bit is not set,	#
* then the SNAN is converted to a nonsignalling NAN (by setting the 	#
* SNAN bit in the operand to one), and the operation continues as 	#
* described in the preceding paragraph, for nonsignalling NANs.		#
*	Make sure the appropriate FPSR bits are set before exiting.	#
*									#
*########################################################################

	global	res_qnan
	global	res_snan
res_qnan:
res_snan:
	cmpi.b	#SNAN,DTAG(a6)		* is the dst an SNAN?
	beq.b	dst_snan2
	cmpi.b	#QNAN,DTAG(a6)		* is the dst a  QNAN?
	beq.b	dst_qnan2
src_nan:
	cmp.b	#QNAN,STAG(a6)
	beq.b	src_qnan2
	global	res_snan_1op
res_snan_1op:
src_snan2:
	bset	#$6,FP_SRC_HI(a6)	* set SNAN bit
	ori.l	#nan_mask+aiop_mask+snan_mask,USER_FPSR(a6)
	lea	FP_SRC(a6),a0
	bra.b	nan_comp
	global	res_qnan_1op
res_qnan_1op:
src_qnan2:
	ori.l	#nan_mask,USER_FPSR(a6)
	lea	FP_SRC(a6),a0
	bra.b	nan_comp
dst_snan2:
	ori.l	#nan_mask+aiop_mask+snan_mask,USER_FPSR(a6)
	bset	#$6,FP_DST_HI(a6)	* set SNAN bit
	lea	FP_DST(a6),a0
	bra.b	nan_comp
dst_qnan2:
	lea	FP_DST(a6),a0
	cmp.b	#SNAN,STAG(a6)
	bne.l	nan_done
	ori.l	#aiop_mask+snan_mask,USER_FPSR(a6)
nan_done:
	ori.l	#nan_mask,USER_FPSR(a6)
nan_comp:
	btst	#$7,FTEMP_EX.w(a0)	* is NAN neg?
	beq.b	nan_not_neg
	ori.l	#neg_mask,USER_FPSR(a6)
nan_not_neg:
	fmovem.x	(a0),fp0
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	res_operr(): return default result during operand error		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	fp0 = default operand error result				#
*									#
* ALGORITHM ***********************************************************	#
*	An nonsignalling NAN is returned as the default result when	#
* an operand error occurs for the following cases:			#
*									#
* 	Multiply: (Infinity x Zero)					#
* 	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
*									#
*########################################################################

	global	res_operr
res_operr:
	ori.l	#nan_mask+operr_mask+aiop_mask,USER_FPSR(a6)
	fmovem.x	nan_return.l(pc),fp0
	rts

nan_return:
	.dc.l	$7fff0000,$ffffffff,$ffffffff

*########################################################################
* fdbcc(): routine to emulate the fdbcc instruction			#
*									#
* XDEF **************************************************************** #
*	_fdbcc()							#
*									#
* XREF **************************************************************** #
*	fetch_dreg() - fetch Dn value					#
*	store_dreg_l() - store updated Dn value				#
*									#
* INPUT ***************************************************************	#
*	d0 = displacement						#
*									#
* OUTPUT ************************************************************** #
*	none								#
*									#
* ALGORITHM ***********************************************************	#
*	This routine checks which conditional predicate is specified by	#
* the stacked fdbcc instruction opcode and then branches to a routine	#
* for that predicate. The corresponding fbcc instruction is then used	#
* to see whether the condition (specified by the stacked FPSR) is true	#
* or false.								#
*	If a BSUN exception should be indicated, the BSUN and ABSUN	#
* bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
* enabled BSUN should not be flagged and the predicate is true, then	#
* Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
* the displacement value to the stacked PC so that when an "rte" is	#
* finally executed, the branch occurs.					#
*									#
*########################################################################
	global	_fdbcc
_fdbcc:
	move.l	d0,L_SCR1(a6)		* save displacement

	move.w	EXC_CMDREG(a6),d0	* fetch predicate

	clr.l	d1			* clear scratch reg
	move.b	FPSR_CC(a6),d1		* fetch fp ccodes
	ror.l	#$8,d1			* rotate to top byte
	fmove.l	d1,fpsr			* insert into FPSR

	move.w	(tbl_fdbcc.b,pc,d0.w*2),d1	* load table 
	jmp	(tbl_fdbcc.b,pc,d1.w)	* jump to fdbcc routine

tbl_fdbcc:
	.dc.w	fdbcc_f-tbl_fdbcc			* 00
	.dc.w	fdbcc_eq-tbl_fdbcc			* 01
	.dc.w	fdbcc_ogt-tbl_fdbcc			* 02
	.dc.w	fdbcc_oge-tbl_fdbcc			* 03
	.dc.w	fdbcc_olt-tbl_fdbcc			* 04
	.dc.w	fdbcc_ole-tbl_fdbcc			* 05
	.dc.w	fdbcc_ogl-tbl_fdbcc			* 06
	.dc.w	fdbcc_or-tbl_fdbcc			* 07
	.dc.w	fdbcc_un-tbl_fdbcc			* 08
	.dc.w	fdbcc_ueq-tbl_fdbcc			* 09
	.dc.w	fdbcc_ugt-tbl_fdbcc			* 10
	.dc.w	fdbcc_uge-tbl_fdbcc			* 11
	.dc.w	fdbcc_ult-tbl_fdbcc			* 12
	.dc.w	fdbcc_ule-tbl_fdbcc			* 13
	.dc.w	fdbcc_neq-tbl_fdbcc			* 14
	.dc.w	fdbcc_t-tbl_fdbcc			* 15
	.dc.w	fdbcc_sf-tbl_fdbcc			* 16
	.dc.w	fdbcc_seq-tbl_fdbcc			* 17
	.dc.w	fdbcc_gt-tbl_fdbcc			* 18
	.dc.w	fdbcc_ge-tbl_fdbcc			* 19
	.dc.w	fdbcc_lt-tbl_fdbcc			* 20
	.dc.w	fdbcc_le-tbl_fdbcc			* 21
	.dc.w	fdbcc_gl-tbl_fdbcc			* 22
	.dc.w	fdbcc_gle-tbl_fdbcc			* 23
	.dc.w	fdbcc_ngle-tbl_fdbcc			* 24
	.dc.w	fdbcc_ngl-tbl_fdbcc			* 25
	.dc.w	fdbcc_nle-tbl_fdbcc			* 26
	.dc.w	fdbcc_nlt-tbl_fdbcc			* 27
	.dc.w	fdbcc_nge-tbl_fdbcc			* 28
	.dc.w	fdbcc_ngt-tbl_fdbcc			* 29
	.dc.w	fdbcc_sneq-tbl_fdbcc			* 30
	.dc.w	fdbcc_st-tbl_fdbcc			* 31

*########################################################################
*									#
* IEEE Nonaware tests							#
*									#
* For the IEEE nonaware tests, only the false branch changes the 	#
* counter. However, the true branch may set bsun so we check to see	#
* if the NAN bit is set, in which case BSUN and AIOP will be set.	#
*									#
* The cases EQ and NE are shared by the Aware and Nonaware groups	#
* and are incapable of setting the BSUN exception bit.			#
*									#
* Typically, only one of the two possible branch directions could	#
* have the NAN bit set.							#
* (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
*  is preserved.)							#
*									#
*########################################################################

*
* equal:
*
*	Z
*
fdbcc_eq:
	fbeq.w	fdbcc_eq_yes		* equal?
fdbcc_eq_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_eq_yes:
	rts

*
* not equal:
*	_
*	Z
*
fdbcc_neq:
	fbne.w	fdbcc_neq_yes		* not equal?
fdbcc_neq_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_neq_yes:
	rts

*
* greater than:
*	_______
*	NANvZvN
*
fdbcc_gt:
	fbgt.w	fdbcc_gt_yes		* greater than?
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_false		* no;go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_gt_yes:
	rts				* do nothing

*
* not greater than:
*
*	NANvZvN	
*
fdbcc_ngt:
	fbngt.w	fdbcc_ngt_yes		* not greater than?
fdbcc_ngt_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ngt_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_ngt_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
fdbcc_ngt_done:
	rts				* no; do nothing

*
* greater than or equal:
*	   _____
*	Zv(NANvN)
*
fdbcc_ge:
	fbge.w	fdbcc_ge_yes		* greater than or equal?
fdbcc_ge_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_false		* no;go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ge_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_ge_yes_done	* no;go do nothing
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
fdbcc_ge_yes_done:
	rts				* do nothing

*
* not (greater than or equal):
*	       _
*	NANv(N^Z)
*
fdbcc_nge:
	fbnge.w	fdbcc_nge_yes		* not (greater than or equal)?
fdbcc_nge_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_nge_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_nge_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
fdbcc_nge_done:
	rts				* no; do nothing

*
* less than:
*	   _____
*	N^(NANvZ)
*
fdbcc_lt:
	fblt.w	fdbcc_lt_yes		* less than?
fdbcc_lt_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_false		* no; go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_lt_yes:
	rts				* do nothing

*
* not less than:
*	       _
*	NANv(ZvN)
*
fdbcc_nlt:
	fbnlt.w	fdbcc_nlt_yes		* not less than?
fdbcc_nlt_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_nlt_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_nlt_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
fdbcc_nlt_done:
	rts				* no; do nothing

*
* less than or equal:
*	     ___
*	Zv(N^NAN)
*
fdbcc_le:
	fble.w	fdbcc_le_yes		* less than or equal?
fdbcc_le_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_false		* no; go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_le_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_le_yes_done	* no; go do nothing
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception	
fdbcc_le_yes_done:
	rts				* do nothing

*
* not (less than or equal):
*	     ___
*	NANv(NvZ)
*
fdbcc_nle:
	fbnle.w	fdbcc_nle_yes		* not (less than or equal)?
fdbcc_nle_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_nle_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_nle_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
fdbcc_nle_done:
	rts				* no; do nothing

*
* greater or less than:
*	_____
*	NANvZ
*
fdbcc_gl:
	fbgl.w	fdbcc_gl_yes		* greater or less than?
fdbcc_gl_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fdbcc_false		* no; handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_gl_yes:
	rts				* do nothing

*
* not (greater or less than):
*
*	NANvZ
*
fdbcc_ngl:
	fbngl.w	fdbcc_ngl_yes		* not (greater or less than)?
fdbcc_ngl_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ngl_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	fdbcc_ngl_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
fdbcc_ngl_done:
	rts				* no; do nothing

*
* greater, less, or equal:
*	___
*	NAN
*
fdbcc_gle:
	fbgle.w	fdbcc_gle_yes		* greater, less, or equal?
fdbcc_gle_no:
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_gle_yes:
	rts				* do nothing

*
* not (greater, less, or equal):
*
*	NAN
*
fdbcc_ngle:
	fbngle.w	fdbcc_ngle_yes	* not (greater, less, or equal)?
fdbcc_ngle_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ngle_yes:
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	rts				* no; do nothing

*########################################################################
*									#
* Miscellaneous tests							#
*									#
* For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
*									#
*########################################################################

*
* false:
*
*	False
*
fdbcc_f:				* no bsun possible
	bra.w	fdbcc_false		* go handle counter

*
* true:
*
*	True
*
fdbcc_t:				* no bsun possible
	rts				* do nothing

*
* signalling false:
*
*	False
*
fdbcc_sf:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set?
	beq.w	fdbcc_false		* no;go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	bra.w	fdbcc_false		* go handle counter

*
* signalling true:
*
*	True
*
fdbcc_st:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set?
	beq.b	fdbcc_st_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
fdbcc_st_done:
	rts

*
* signalling equal:
*
*	Z
*
fdbcc_seq:
	fbseq.w	fdbcc_seq_yes		* signalling equal?
fdbcc_seq_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set?
	beq.w	fdbcc_false		* no;go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	bra.w	fdbcc_false		* go handle counter
fdbcc_seq_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set?
	beq.b	fdbcc_seq_yes_done	* no;go do nothing
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
fdbcc_seq_yes_done:
	rts				* yes; do nothing

*
* signalling not equal:
*	_
*	Z
*
fdbcc_sneq:
	fbsne.w	fdbcc_sneq_yes		* signalling not equal?
fdbcc_sneq_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set?
	beq.w	fdbcc_false		* no;go handle counter
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
	bra.w	fdbcc_false		* go handle counter
fdbcc_sneq_yes:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fdbcc_sneq_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* is BSUN enabled?
	bne.w	fdbcc_bsun		* yes; we have an exception
fdbcc_sneq_done:
	rts

*########################################################################
*									#
* IEEE Aware tests							#
*									#
* For the IEEE aware tests, action is only taken if the result is false.#
* Therefore, the opposite branch type is used to jump to the decrement	#
* routine. 								#
* The BSUN exception will not be set for any of these tests.		#
*									#
*########################################################################

*
* ordered greater than:
*	_______
*	NANvZvN
*
fdbcc_ogt:
	fbogt.w	fdbcc_ogt_yes		* ordered greater than?
fdbcc_ogt_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ogt_yes:
	rts				* yes; do nothing

*
* unordered or less or equal:
*	_______
*	NANvZvN
*
fdbcc_ule:
	fbule.w	fdbcc_ule_yes		* unordered or less or equal?
fdbcc_ule_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ule_yes:
	rts				* yes; do nothing

*
* ordered greater than or equal:
*	   _____
*	Zv(NANvN)
*
fdbcc_oge:
	fboge.w	fdbcc_oge_yes		* ordered greater than or equal?
fdbcc_oge_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_oge_yes:
	rts				* yes; do nothing

*
* unordered or less than:
*	       _
*	NANv(N^Z)
*
fdbcc_ult:
	fbult.w	fdbcc_ult_yes		* unordered or less than?
fdbcc_ult_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ult_yes:
	rts				* yes; do nothing

*
* ordered less than:
*	   _____
*	N^(NANvZ)
*
fdbcc_olt:
	fbolt.w	fdbcc_olt_yes		* ordered less than?
fdbcc_olt_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_olt_yes:
	rts				* yes; do nothing

*
* unordered or greater or equal:
*
*	NANvZvN
*
fdbcc_uge:
	fbuge.w	fdbcc_uge_yes		* unordered or greater than?
fdbcc_uge_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_uge_yes:
	rts				* yes; do nothing

*
* ordered less than or equal:
*	     ___
*	Zv(N^NAN)
*
fdbcc_ole:
	fbole.w	fdbcc_ole_yes		* ordered greater or less than?
fdbcc_ole_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ole_yes:
	rts				* yes; do nothing

*
* unordered or greater than:
*	     ___
*	NANv(NvZ)
*
fdbcc_ugt:
	fbugt.w	fdbcc_ugt_yes		* unordered or greater than?
fdbcc_ugt_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ugt_yes:
	rts				* yes; do nothing

*
* ordered greater or less than:
*	_____
*	NANvZ
*
fdbcc_ogl:
	fbogl.w	fdbcc_ogl_yes		* ordered greater or less than?
fdbcc_ogl_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ogl_yes:
	rts				* yes; do nothing

*
* unordered or equal:
*
*	NANvZ
*
fdbcc_ueq:
	fbueq.w	fdbcc_ueq_yes		* unordered or equal?
fdbcc_ueq_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_ueq_yes:
	rts				* yes; do nothing

*
* ordered:
*	___
*	NAN
*
fdbcc_or:
	fbor.w	fdbcc_or_yes		* ordered?
fdbcc_or_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_or_yes:
	rts				* yes; do nothing

*
* unordered:
*
*	NAN
*
fdbcc_un:
	fbun.w	fdbcc_un_yes		* unordered?
fdbcc_un_no:
	bra.w	fdbcc_false		* no; go handle counter
fdbcc_un_yes:
	rts				* yes; do nothing

*######################################################################

*
* the bsun exception bit was not set.
*
* (1) subtract 1 from the count register
* (2) if (cr == -1) then
*	pc = pc of next instruction
*     else
*	pc += sign_ext(16-bit displacement)
*
fdbcc_false:
	move.b	1+EXC_OPWORD(a6),d1	* fetch lo opword 
	andi.w	#$7,d1			* extract count register

	bsr.l	fetch_dreg		* fetch count value
* make sure that d0 isn't corrupted between calls...

	subq.w	#$1,d0			* Dn - 1 -> Dn

	bsr.l	store_dreg_l		* store new count value

	cmpi.w	#-$1,d0			* is (Dn == -1)?
	bne.b	fdbcc_false_cont	* no; 
	rts

fdbcc_false_cont:
	move.l	L_SCR1(a6),d0		* fetch displacement
	add.l	USER_FPIAR(a6),d0	* add instruction PC
	addq.l	#$4,d0			* add instruction length
	move.l	d0,EXC_PC(a6)		* set new PC
	rts

* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
fdbcc_bsun:
	move.b	#fbsun_flg,SPCOND_FLG(a6)
	rts

*########################################################################
* ftrapcc(): routine to emulate the ftrapcc instruction			#
*									#
* XDEF ****************************************************************	#
*	_ftrapcc()							#
*									#
* XREF ****************************************************************	#
*	none								#
*									#
* INPUT *************************************************************** #
*	none								#
*									#
* OUTPUT ************************************************************** #
*	none								#
*									#
* ALGORITHM *********************************************************** #
*	This routine checks which conditional predicate is specified by	#
* the stacked ftrapcc instruction opcode and then branches to a routine	#
* for that predicate. The corresponding fbcc instruction is then used	#
* to see whether the condition (specified by the stacked FPSR) is true	#
* or false.								#
*	If a BSUN exception should be indicated, the BSUN and ABSUN	#
* bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
* enabled BSUN should not be flagged and the predicate is true, then	#
* the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
* flags indicate to the calling routine to emulate the exceptional	#
* condition.								#
*									#
*########################################################################

	global	_ftrapcc
_ftrapcc:
	move.w	EXC_CMDREG(a6),d0	* fetch predicate

	clr.l	d1			* clear scratch reg
	move.b	FPSR_CC(a6),d1		* fetch fp ccodes
	ror.l	#$8,d1			* rotate to top byte
	fmove.l	d1,fpsr			* insert into FPSR

	move.w	(tbl_ftrapcc.b,pc,d0.w*2),d1		* load table 
	jmp	(tbl_ftrapcc.b,pc,d1.w)		* jump to ftrapcc routine

tbl_ftrapcc:
	.dc.w	ftrapcc_f-tbl_ftrapcc			* 00
	.dc.w	ftrapcc_eq-tbl_ftrapcc			* 01
	.dc.w	ftrapcc_ogt-tbl_ftrapcc			* 02
	.dc.w	ftrapcc_oge-tbl_ftrapcc			* 03
	.dc.w	ftrapcc_olt-tbl_ftrapcc			* 04
	.dc.w	ftrapcc_ole-tbl_ftrapcc			* 05
	.dc.w	ftrapcc_ogl-tbl_ftrapcc			* 06
	.dc.w	ftrapcc_or-tbl_ftrapcc			* 07
	.dc.w	ftrapcc_un-tbl_ftrapcc			* 08
	.dc.w	ftrapcc_ueq-tbl_ftrapcc			* 09
	.dc.w	ftrapcc_ugt-tbl_ftrapcc			* 10
	.dc.w	ftrapcc_uge-tbl_ftrapcc			* 11
	.dc.w	ftrapcc_ult-tbl_ftrapcc			* 12
	.dc.w	ftrapcc_ule-tbl_ftrapcc			* 13
	.dc.w	ftrapcc_neq-tbl_ftrapcc			* 14
	.dc.w	ftrapcc_t-tbl_ftrapcc			* 15
	.dc.w	ftrapcc_sf-tbl_ftrapcc			* 16
	.dc.w	ftrapcc_seq-tbl_ftrapcc			* 17
	.dc.w	ftrapcc_gt-tbl_ftrapcc			* 18
	.dc.w	ftrapcc_ge-tbl_ftrapcc			* 19
	.dc.w	ftrapcc_lt-tbl_ftrapcc			* 20
	.dc.w	ftrapcc_le-tbl_ftrapcc			* 21
	.dc.w	ftrapcc_gl-tbl_ftrapcc			* 22
	.dc.w	ftrapcc_gle-tbl_ftrapcc			* 23
	.dc.w	ftrapcc_ngle-tbl_ftrapcc		* 24
	.dc.w	ftrapcc_ngl-tbl_ftrapcc			* 25
	.dc.w	ftrapcc_nle-tbl_ftrapcc			* 26
	.dc.w	ftrapcc_nlt-tbl_ftrapcc			* 27
	.dc.w	ftrapcc_nge-tbl_ftrapcc			* 28
	.dc.w	ftrapcc_ngt-tbl_ftrapcc			* 29
	.dc.w	ftrapcc_sneq-tbl_ftrapcc		* 30
	.dc.w	ftrapcc_st-tbl_ftrapcc			* 31

*########################################################################
*									#
* IEEE Nonaware tests							#
*									#
* For the IEEE nonaware tests, we set the result based on the		#
* floating point condition codes. In addition, we check to see		#
* if the NAN bit is set, in which case BSUN and AIOP will be set.	#
*									#
* The cases EQ and NE are shared by the Aware and Nonaware groups	#
* and are incapable of setting the BSUN exception bit.			#
*									#
* Typically, only one of the two possible branch directions could	#
* have the NAN bit set.							#
*									#
*########################################################################

*
* equal:
*
*	Z
*
ftrapcc_eq:
	fbeq.w	ftrapcc_trap		* equal?
ftrapcc_eq_no:
	rts				* do nothing

*
* not equal:
*	_
*	Z
*
ftrapcc_neq:
	fbne.w	ftrapcc_trap		* not equal?
ftrapcc_neq_no:
	rts				* do nothing

*
* greater than:
*	_______
*	NANvZvN
*
ftrapcc_gt:
	fbgt.w	ftrapcc_trap		* greater than?
ftrapcc_gt_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	ftrapcc_gt_done		* no
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_gt_done:
	rts				* no; do nothing

*
* not greater than:
*
*	NANvZvN	
*
ftrapcc_ngt:
	fbngt.w	ftrapcc_ngt_yes		* not greater than?
ftrapcc_ngt_no:
	rts				* do nothing
ftrapcc_ngt_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* greater than or equal:
*	   _____
*	Zv(NANvN)
*
ftrapcc_ge:
	fbge.w	ftrapcc_ge_yes		* greater than or equal?
ftrapcc_ge_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	ftrapcc_ge_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_ge_done:
	rts				* no; do nothing
ftrapcc_ge_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* not (greater than or equal):
*	       _
*	NANv(N^Z)
*
ftrapcc_nge:
	fbnge.w	ftrapcc_nge_yes		* not (greater than or equal)?
ftrapcc_nge_no:
	rts				* do nothing
ftrapcc_nge_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* less than:
*	   _____
*	N^(NANvZ)
*
ftrapcc_lt:
	fblt.w	ftrapcc_trap		* less than?
ftrapcc_lt_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	ftrapcc_lt_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_lt_done:
	rts				* no; do nothing

*
* not less than:
*	       _
*	NANv(ZvN)
*
ftrapcc_nlt:
	fbnlt.w	ftrapcc_nlt_yes		* not less than?
ftrapcc_nlt_no:
	rts				* do nothing
ftrapcc_nlt_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* less than or equal:
*	     ___
*	Zv(N^NAN)
*
ftrapcc_le:
	fble.w	ftrapcc_le_yes		* less than or equal?
ftrapcc_le_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	ftrapcc_le_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_le_done:
	rts				* no; do nothing
ftrapcc_le_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* not (less than or equal):
*	     ___
*	NANv(NvZ)
*
ftrapcc_nle:
	fbnle.w	ftrapcc_nle_yes		* not (less than or equal)?
ftrapcc_nle_no:
	rts				* do nothing
ftrapcc_nle_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* greater or less than:
*	_____
*	NANvZ
*
ftrapcc_gl:
	fbgl.w	ftrapcc_trap		* greater or less than?
ftrapcc_gl_no:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.b	ftrapcc_gl_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_gl_done:
	rts				* no; do nothing

*
* not (greater or less than):
*
*	NANvZ
*
ftrapcc_ngl:
	fbngl.w	ftrapcc_ngl_yes		* not (greater or less than)?
ftrapcc_ngl_no:
	rts				* do nothing
ftrapcc_ngl_yes:
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* greater, less, or equal:
*	___
*	NAN
*
ftrapcc_gle:
	fbgle.w	ftrapcc_trap		* greater, less, or equal?
ftrapcc_gle_no:
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	rts				* no; do nothing

*
* not (greater, less, or equal):
*
*	NAN
*
ftrapcc_ngle:
	fbngle.w	ftrapcc_ngle_yes	* not (greater, less, or equal)?
ftrapcc_ngle_no:
	rts				* do nothing
ftrapcc_ngle_yes:
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*########################################################################
*									#
* Miscellaneous tests							#
*									#
* For the IEEE aware tests, we only have to set the result based on the	#
* floating point condition codes. The BSUN exception will not be	#
* set for any of these tests.						#
*									#
*########################################################################

*
* false:
*
*	False
*
ftrapcc_f:
	rts				* do nothing

*
* true:
*
*	True
*
ftrapcc_t:
	bra.w	ftrapcc_trap		* go take trap

*
* signalling false:
*
*	False
*
ftrapcc_sf:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.b	ftrapcc_sf_done		* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_sf_done:
	rts				* no; do nothing

*
* signalling true:
*
*	True
*
ftrapcc_st:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* signalling equal:
*
*	Z
*
ftrapcc_seq:
	fbseq.w	ftrapcc_seq_yes		* signalling equal?
ftrapcc_seq_no:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	ftrapcc_seq_done	* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_seq_done:
	rts				* no; do nothing
ftrapcc_seq_yes:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*
* signalling not equal:
*	_
*	Z
*
ftrapcc_sneq:
	fbsne.w	ftrapcc_sneq_yes	* signalling equal?
ftrapcc_sneq_no:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	ftrapcc_sneq_no_done	* no; go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
ftrapcc_sneq_no_done:
	rts				* do nothing
ftrapcc_sneq_yes:
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	ftrapcc_trap		* no; go take trap
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	ftrapcc_bsun		* yes
	bra.w	ftrapcc_trap		* no; go take trap

*########################################################################
*									#
* IEEE Aware tests							#
*									#
* For the IEEE aware tests, we only have to set the result based on the	#
* floating point condition codes. The BSUN exception will not be	#
* set for any of these tests.						#
*									#
*########################################################################

*
* ordered greater than:
*	_______
*	NANvZvN
*
ftrapcc_ogt:
	fbogt.w	ftrapcc_trap		* ordered greater than?
ftrapcc_ogt_no:
	rts				* do nothing

*
* unordered or less or equal:
*	_______
*	NANvZvN
*
ftrapcc_ule:
	fbule.w	ftrapcc_trap		* unordered or less or equal?
ftrapcc_ule_no:
	rts				* do nothing

*
* ordered greater than or equal:
*	   _____
*	Zv(NANvN)
*
ftrapcc_oge:
	fboge.w	ftrapcc_trap		* ordered greater than or equal?
ftrapcc_oge_no:
	rts				* do nothing

*
* unordered or less than:
*	       _
*	NANv(N^Z)
*
ftrapcc_ult:
	fbult.w	ftrapcc_trap		* unordered or less than?
ftrapcc_ult_no:
	rts				* do nothing

*
* ordered less than:
*	   _____
*	N^(NANvZ)
*
ftrapcc_olt:
	fbolt.w	ftrapcc_trap		* ordered less than?
ftrapcc_olt_no:
	rts				* do nothing

*
* unordered or greater or equal:
*
*	NANvZvN
*
ftrapcc_uge:
	fbuge.w	ftrapcc_trap		* unordered or greater than?
ftrapcc_uge_no:
	rts				* do nothing

*
* ordered less than or equal:
*	     ___
*	Zv(N^NAN)
*
ftrapcc_ole:
	fbole.w	ftrapcc_trap		* ordered greater or less than?
ftrapcc_ole_no:
	rts				* do nothing

*
* unordered or greater than:
*	     ___
*	NANv(NvZ)
*
ftrapcc_ugt:
	fbugt.w	ftrapcc_trap		* unordered or greater than?
ftrapcc_ugt_no:
	rts				* do nothing

*
* ordered greater or less than:
*	_____
*	NANvZ
*
ftrapcc_ogl:
	fbogl.w	ftrapcc_trap		* ordered greater or less than?
ftrapcc_ogl_no:
	rts				* do nothing

*
* unordered or equal:
*
*	NANvZ
*
ftrapcc_ueq:
	fbueq.w	ftrapcc_trap		* unordered or equal?
ftrapcc_ueq_no:
	rts				* do nothing

*
* ordered:
*	___
*	NAN
*
ftrapcc_or:
	fbor.w	ftrapcc_trap		* ordered?
ftrapcc_or_no:
	rts				* do nothing

*
* unordered:
*
*	NAN
*
ftrapcc_un:
	fbun.w	ftrapcc_trap		* unordered?
ftrapcc_un_no:
	rts				* do nothing

*######################################################################

* the bsun exception bit was not set.
* we will need to jump to the ftrapcc vector. the stack frame
* is the same size as that of the fp unimp instruction. the
* only difference is that the <ea> field should hold the PC
* of the ftrapcc instruction and the vector offset field
* should denote the ftrapcc trap.
ftrapcc_trap:
	move.b	#ftrapcc_flg,SPCOND_FLG(a6)
	rts

* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
ftrapcc_bsun:
	move.b	#fbsun_flg,SPCOND_FLG(a6)
	rts

*########################################################################
* fscc(): routine to emulate the fscc instruction			#
*									#
* XDEF **************************************************************** #
*	_fscc()								#
*									#
* XREF **************************************************************** #
*	store_dreg_b() - store result to data register file		#
*	dec_areg() - decrement an areg for -(an) mode			#
*	inc_areg() - increment an areg for (an)+ mode			#
*	_dmem_write_byte() - store result to memory			#
*									#
* INPUT ***************************************************************	#
*	none								#
*									#
* OUTPUT ************************************************************** #
*	none								#
*									#
* ALGORITHM ***********************************************************	#
*	This routine checks which conditional predicate is specified by	#
* the stacked fscc instruction opcode and then branches to a routine	#
* for that predicate. The corresponding fbcc instruction is then used	#
* to see whether the condition (specified by the stacked FPSR) is true	#
* or false.								#
*	If a BSUN exception should be indicated, the BSUN and ABSUN	#
* bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an 	#
* enabled BSUN should not be flagged and the predicate is true, then	#
* the result is stored to the data register file or memory		#
*									#
*########################################################################

	global	_fscc
_fscc:
	move.w	EXC_CMDREG(a6),d0	* fetch predicate

	clr.l	d1			* clear scratch reg
	move.b	FPSR_CC(a6),d1		* fetch fp ccodes
	ror.l	#$8,d1			* rotate to top byte
	fmove.l	d1,fpsr			* insert into FPSR

	move.w	(tbl_fscc.b,pc,d0.w*2),d1	* load table 
	jmp	(tbl_fscc.b,pc,d1.w)	* jump to fscc routine

tbl_fscc:
	.dc.w	fscc_f-tbl_fscc				* 00
	.dc.w	fscc_eq-tbl_fscc			* 01
	.dc.w	fscc_ogt-tbl_fscc			* 02
	.dc.w	fscc_oge-tbl_fscc			* 03
	.dc.w	fscc_olt-tbl_fscc			* 04
	.dc.w	fscc_ole-tbl_fscc			* 05
	.dc.w	fscc_ogl-tbl_fscc			* 06
	.dc.w	fscc_or-tbl_fscc			* 07
	.dc.w	fscc_un-tbl_fscc			* 08
	.dc.w	fscc_ueq-tbl_fscc			* 09
	.dc.w	fscc_ugt-tbl_fscc			* 10
	.dc.w	fscc_uge-tbl_fscc			* 11
	.dc.w	fscc_ult-tbl_fscc			* 12
	.dc.w	fscc_ule-tbl_fscc			* 13
	.dc.w	fscc_neq-tbl_fscc			* 14
	.dc.w	fscc_t-tbl_fscc				* 15
	.dc.w	fscc_sf-tbl_fscc			* 16
	.dc.w	fscc_seq-tbl_fscc			* 17
	.dc.w	fscc_gt-tbl_fscc			* 18
	.dc.w	fscc_ge-tbl_fscc			* 19
	.dc.w	fscc_lt-tbl_fscc			* 20
	.dc.w	fscc_le-tbl_fscc			* 21
	.dc.w	fscc_gl-tbl_fscc			* 22
	.dc.w	fscc_gle-tbl_fscc			* 23
	.dc.w	fscc_ngle-tbl_fscc			* 24
	.dc.w	fscc_ngl-tbl_fscc			* 25
	.dc.w	fscc_nle-tbl_fscc			* 26
	.dc.w	fscc_nlt-tbl_fscc			* 27
	.dc.w	fscc_nge-tbl_fscc			* 28
	.dc.w	fscc_ngt-tbl_fscc			* 29
	.dc.w	fscc_sneq-tbl_fscc			* 30
	.dc.w	fscc_st-tbl_fscc			* 31

*########################################################################
*									#
* IEEE Nonaware tests							#
*									#
* For the IEEE nonaware tests, we set the result based on the		#
* floating point condition codes. In addition, we check to see		#
* if the NAN bit is set, in which case BSUN and AIOP will be set.	#
*									#
* The cases EQ and NE are shared by the Aware and Nonaware groups	#
* and are incapable of setting the BSUN exception bit.			#
*									#
* Typically, only one of the two possible branch directions could	#
* have the NAN bit set.							#
*									#
*########################################################################

*
* equal:
*
*	Z
*
fscc_eq:
	fbeq.w	fscc_eq_yes		* equal?
fscc_eq_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_eq_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* not equal:
*	_
*	Z
*
fscc_neq:
	fbne.w	fscc_neq_yes		* not equal?
fscc_neq_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_neq_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* greater than:
*	_______
*	NANvZvN
*
fscc_gt:
	fbgt.w	fscc_gt_yes		* greater than?
fscc_gt_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_gt_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* not greater than:
*
*	NANvZvN	
*
fscc_ngt:
	fbngt.w	fscc_ngt_yes		* not greater than?
fscc_ngt_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ngt_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* greater than or equal:
*	   _____
*	Zv(NANvN)
*
fscc_ge:
	fbge.w	fscc_ge_yes		* greater than or equal?
fscc_ge_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_ge_yes:
	st	d0			* set true	
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* not (greater than or equal):
*	       _
*	NANv(N^Z)
*
fscc_nge:
	fbnge.w	fscc_nge_yes		* not (greater than or equal)?
fscc_nge_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_nge_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* less than:
*	   _____
*	N^(NANvZ)
*
fscc_lt:
	fblt.w	fscc_lt_yes		* less than?
fscc_lt_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_lt_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* not less than:
*	       _
*	NANv(ZvN)
*
fscc_nlt:
	fbnlt.w	fscc_nlt_yes		* not less than?
fscc_nlt_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_nlt_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* less than or equal:
*	     ___
*	Zv(N^NAN)
*
fscc_le:
	fble.w	fscc_le_yes		* less than or equal?
fscc_le_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_le_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* not (less than or equal):
*	     ___
*	NANv(NvZ)
*
fscc_nle:
	fbnle.w	fscc_nle_yes		* not (less than or equal)?
fscc_nle_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_nle_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* greater or less than:
*	_____
*	NANvZ
*
fscc_gl:
	fbgl.w	fscc_gl_yes		* greater or less than?
fscc_gl_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_gl_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* not (greater or less than):
*
*	NANvZ
*
fscc_ngl:
	fbngl.w	fscc_ngl_yes		* not (greater or less than)?
fscc_ngl_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ngl_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* is NAN set in cc?
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* greater, less, or equal:
*	___
*	NAN
*
fscc_gle:
	fbgle.w	fscc_gle_yes		* greater, less, or equal?
fscc_gle_no:
	clr.b	d0			* set false
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_gle_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* not (greater, less, or equal):
*
*	NAN
*
fscc_ngle:
	fbngle.w	fscc_ngle_yes	* not (greater, less, or equal)?
fscc_ngle_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ngle_yes:
	st	d0			* set true
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*########################################################################
*									#
* Miscellaneous tests							#
*									#
* For the IEEE aware tests, we only have to set the result based on the	#
* floating point condition codes. The BSUN exception will not be	#
* set for any of these tests.						#
*									#
*########################################################################

*
* false:
*
*	False
*
fscc_f:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish

*
* true:
*
*	True
*
fscc_t:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* signalling false:
*
*	False
*
fscc_sf:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* signalling true:
*
*	True
*
fscc_st:
	st	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* signalling equal:
*
*	Z
*
fscc_seq:
	fbseq.w	fscc_seq_yes		* signalling equal?
fscc_seq_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_seq_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*
* signalling not equal:
*	_
*	Z
*
fscc_sneq:
	fbsne.w	fscc_sneq_yes		* signalling equal?
fscc_sneq_no:
	clr.b	d0			* set false
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish
fscc_sneq_yes:
	st	d0			* set true
	btst	#nan_bit,FPSR_CC(a6)	* set BSUN exc bit
	beq.w	fscc_done		* no;go finish
	ori.l	#bsun_mask+aiop_mask,USER_FPSR(a6)	* set BSUN exc bit
	bra.w	fscc_chk_bsun		* go finish

*########################################################################
*									#
* IEEE Aware tests							#
*									#
* For the IEEE aware tests, we only have to set the result based on the	#
* floating point condition codes. The BSUN exception will not be	#
* set for any of these tests.						#
*									#
*########################################################################

*
* ordered greater than:
*	_______
*	NANvZvN
*
fscc_ogt:
	fbogt.w	fscc_ogt_yes		* ordered greater than?
fscc_ogt_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ogt_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered or less or equal:
*	_______
*	NANvZvN
*
fscc_ule:
	fbule.w	fscc_ule_yes		* unordered or less or equal?
fscc_ule_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ule_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* ordered greater than or equal:
*	   _____
*	Zv(NANvN)
*
fscc_oge:
	fboge.w	fscc_oge_yes		* ordered greater than or equal?
fscc_oge_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_oge_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered or less than:
*	       _
*	NANv(N^Z)
*
fscc_ult:
	fbult.w	fscc_ult_yes		* unordered or less than?
fscc_ult_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ult_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* ordered less than:
*	   _____
*	N^(NANvZ)
*
fscc_olt:
	fbolt.w	fscc_olt_yes		* ordered less than?
fscc_olt_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_olt_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered or greater or equal:
*
*	NANvZvN
*
fscc_uge:
	fbuge.w	fscc_uge_yes		* unordered or greater than?
fscc_uge_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_uge_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* ordered less than or equal:
*	     ___
*	Zv(N^NAN)
*
fscc_ole:
	fbole.w	fscc_ole_yes		* ordered greater or less than?
fscc_ole_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ole_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered or greater than:
*	     ___
*	NANv(NvZ)
*
fscc_ugt:
	fbugt.w	fscc_ugt_yes		* unordered or greater than?
fscc_ugt_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ugt_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* ordered greater or less than:
*	_____
*	NANvZ
*
fscc_ogl:
	fbogl.w	fscc_ogl_yes		* ordered greater or less than?
fscc_ogl_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ogl_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered or equal:
*
*	NANvZ
*
fscc_ueq:
	fbueq.w	fscc_ueq_yes		* unordered or equal?
fscc_ueq_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_ueq_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* ordered:
*	___
*	NAN
*
fscc_or:
	fbor.w	fscc_or_yes		* ordered?
fscc_or_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_or_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*
* unordered:
*
*	NAN
*
fscc_un:
	fbun.w	fscc_un_yes		* unordered?
fscc_un_no:
	clr.b	d0			* set false
	bra.w	fscc_done		* go finish
fscc_un_yes:
	st	d0			* set true
	bra.w	fscc_done		* go finish

*######################################################################

*
* the bsun exception bit was set. now, check to see is BSUN 
* is enabled. if so, don't store result and correct stack frame
* for a bsun exception.
*
fscc_chk_bsun:
	btst	#bsun_bit,FPCR_ENABLE(a6)	* was BSUN set?
	bne.w	fscc_bsun

*
* the bsun exception bit was not set.
* the result has been selected.
* now, check to see if the result is to be stored in the data register
* file or in memory.
*
fscc_done:
	move.l	d0,a0			* save result for a moment

	move.b	1+EXC_OPWORD(a6),d1	* fetch lo opword 
	move.l	d1,d0			* make a copy
	andi.b	#$38,d1			* extract src mode

	bne.b	fscc_mem_op		* it's a memory operation

	move.l	d0,d1
	andi.w	#$7,d1			* pass index in d1
	move.l	a0,d0			* pass result in d0
	bsr.l	store_dreg_b		* save result in regfile
	rts

*
* the stacked <ea> is correct with the exception of:
* 	-> Dn : <ea> is garbage
*
* if the addressing mode is post-increment or pre-decrement,
* then the address registers have not been updated.
*
fscc_mem_op:
	cmpi.b	#$18,d1			* is <ea> (An)+ ?
	beq.b	fscc_mem_inc		* yes
	cmpi.b	#$20,d1			* is <ea> -(An) ?
	beq.b	fscc_mem_dec		* yes

	move.l	a0,d0			* pass result in d0
	move.l	EXC_EA(a6),a0		* fetch <ea>
	bsr.l	_dmem_write_byte	* write result byte	

	tst.l	d1			* did dstore fail?
	bne.w	fscc_err		* yes

	rts

* addresing mode is post-increment. write the result byte. if the write
* fails then don't update the address register. if write passes then
* call inc_areg() to update the address register.
fscc_mem_inc:
	move.l	a0,d0			* pass result in d0
	move.l	EXC_EA(a6),a0		* fetch <ea>
	bsr.l	_dmem_write_byte	* write result byte

	tst.l	d1			* did dstore fail?
	bne.w	fscc_err		* yes

	move.b	$1+EXC_OPWORD(a6),d1	* fetch opword
	andi.w	#$7,d1			* pass index in d1
	moveq.l	#$1,d0			* pass amt to inc by
	bsr.l	inc_areg		* increment address register

	rts

* addressing mode is pre-decrement. write the result byte. if the write
* fails then don't update the address register. if the write passes then
* call dec_areg() to update the address register.
fscc_mem_dec:
	move.l	a0,d0			* pass result in d0
	move.l	EXC_EA(a6),a0		* fetch <ea>
	bsr.l	_dmem_write_byte	* write result byte

	tst.l	d1			* did dstore fail?
	bne.w	fscc_err		* yes

	move.b	$1+EXC_OPWORD(a6),d1	* fetch opword
	andi.w	#$7,d1			* pass index in d1
	moveq.l	#$1,d0			* pass amt to dec by
	bsr.l	dec_areg		* decrement address register

	rts

* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
fscc_bsun:
	move.b	#fbsun_flg,SPCOND_FLG(a6)
	rts

* the byte write to memory has failed. pass the failing effective address
* and a FSLW to funimp_dacc().
fscc_err:
	move.w	#$00a1,EXC_VOFF(a6)
	bra.l	facc_finish

*########################################################################
* XDEF ****************************************************************	#
*	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
*									#
* XREF ****************************************************************	#
*	fetch_dreg() - fetch data register				#
*	{i,d,}mem_read() - fetch data from memory			#
*	_mem_write() - write data to memory				#
*	iea_iacc() - instruction memory access error occurred		#
*	iea_dacc() - data memory access error occurred			#
*	restore() - restore An index regs if access error occurred	#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
*		d0 = size of dump					#
*		d1 = Dn							#
*	Else if instruction access error,				#
*		d0 = FSLW						#
*	Else if data access error,					#
*		d0 = FSLW						#
*		a0 = address of fault					#
*	Else								#
*		none.							#
*									#
* ALGORITHM ***********************************************************	#
*	The effective address must be calculated since this is entered	#
* from an "Unimplemented Effective Address" exception handler. So, we	#
* have our own fcalc_ea() routine here. If an access error is flagged	#
* by a _{i,d,}mem_read() call, we must exit through the special		#
* handler.								#
*	The data register is determined and its value loaded to get the	#
* string of FP registers affected. This value is used as an index into	#
* a lookup table such that we can determine the number of bytes		#
* involved. 								#
*	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
* to read in all FP values. Again, _mem_read() may fail and require a	#
* special exit. 							#
*	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
* to write all FP values. _mem_write() may also fail.			#
* 	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
* then we return the size of the dump and the string to the caller	#
* so that the move can occur outside of this routine. This special	#
* case is required so that moves to the system stack are handled	#
* correctly.								#
*									#
* DYNAMIC:								#
* 	fmovm.x	dn, <ea>						#
* 	fmovm.x	<ea>, dn						#
*									#
*	      <WORD 1>		      <WORD2>				#
*	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
*					  				#
*	& = (0): predecrement addressing mode				#
*	    (1): postincrement or control addressing mode		#
*	@ = (0): move listed regs from memory to the FPU		#
*	    (1): move listed regs from the FPU to memory		#
*	$$$    : index of data register holding reg select mask		#
*									#
* NOTES:								#
*	If the data register holds a zero, then the			#
*	instruction is a nop.						#
*									#
*########################################################################

	global	fmovem_dynamic
fmovem_dynamic:

* extract the data register in which the bit string resides...
	move.b	1+EXC_EXTWORD(a6),d1	* fetch extword
	andi.w	#$70,d1			* extract reg bits
	lsr.b	#$4,d1			* shift into lo bits

* fetch the bit string into d0...
	bsr.l	fetch_dreg		* fetch reg string

	andi.l	#$000000ff,d0		* keep only lo byte

	move.l	d0,-(sp)		* save strg
	move.b	(tbl_fmovem_size.w,pc,d0.l),d0
	move.l	d0,-(sp)		* save size
	bsr.l	fmovem_calc_ea		* calculate <ea>
	move.l	(sp)+,d0		* restore size
	move.l	(sp)+,d1		* restore strg

* if the bit string is a zero, then the operation is a no-op
* but, make sure that we've calculated ea and advanced the opword pointer
	beq.w	fmovem_data_done

* separate move ins from move outs...
	btst	#$5,EXC_EXTWORD(a6)	* is it a move in or out?
	beq.w	fmovem_data_in		* it's a move out

*############
* MOVE OUT: #
*############
fmovem_data_out:
	btst	#$4,EXC_EXTWORD(a6)	* control or predecrement?
	bne.w	fmovem_out_ctrl		* control

*###########################
fmovem_out_predec:
* for predecrement mode, the bit string is the opposite of both control
* operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
* here, we convert it to be just like the others...
	move.b	(tbl_fmovem_convert.w,pc,d1.w*1),d1

	btst	#$5,EXC_SR(a6)		* user or supervisor mode?
	beq.b	fmovem_out_ctrl		* user

fmovem_out_predec_s:
	cmpi.b	#mda7_flg,SPCOND_FLG(a6)	* is <ea> mode -(a7)?
	bne.b	fmovem_out_ctrl

* the operation was unfortunately an: fmovm.x dn,-(sp)
* called from supervisor mode.
* we're also passing "size" and "strg" back to the calling routine
	rts

*###########################
fmovem_out_ctrl:
	move.l	a0,a1			* move <ea> to a1

	sub.l	d0,sp			* subtract size of dump
	lea	(sp),a0

	tst.b	d1			* should FP0 be moved?
	bpl.b	fmovem_out_ctrl_fp1	* no

	move.l	$0+EXC_FP0(a6),(a0)+	* yes
	move.l	$4+EXC_FP0(a6),(a0)+
	move.l	$8+EXC_FP0(a6),(a0)+

fmovem_out_ctrl_fp1:
	lsl.b	#$1,d1			* should FP1 be moved?
	bpl.b	fmovem_out_ctrl_fp2	* no

	move.l	$0+EXC_FP1(a6),(a0)+	* yes
	move.l	$4+EXC_FP1(a6),(a0)+
	move.l	$8+EXC_FP1(a6),(a0)+

fmovem_out_ctrl_fp2:
	lsl.b	#$1,d1			* should FP2 be moved?
	bpl.b	fmovem_out_ctrl_fp3	* no

	fmovem.x	fp2,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_fp3:
	lsl.b	#$1,d1			* should FP3 be moved?
	bpl.b	fmovem_out_ctrl_fp4	* no

	fmovem.x	fp3,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_fp4:
	lsl.b	#$1,d1			* should FP4 be moved?
	bpl.b	fmovem_out_ctrl_fp5	* no

	fmovem.x	fp4,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_fp5:
	lsl.b	#$1,d1			* should FP5 be moved?
	bpl.b	fmovem_out_ctrl_fp6	* no

	fmovem.x	fp5,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_fp6:
	lsl.b	#$1,d1			* should FP6 be moved?
	bpl.b	fmovem_out_ctrl_fp7	* no

	fmovem.x	fp6,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_fp7:
	lsl.b	#$1,d1			* should FP7 be moved?
	bpl.b	fmovem_out_ctrl_done	* no

	fmovem.x	fp7,(a0)	* yes
	add.l	#$c,a0

fmovem_out_ctrl_done:
	move.l	a1,L_SCR1(a6)

	lea	(sp),a0			* pass: supervisor src
	move.l	d0,-(sp)		* save size
	bsr.l	_dmem_write		* copy data to user mem

	move.l	(sp)+,d0
	add.l	d0,sp			* clear fpreg data from stack

	tst.l	d1			* did dstore err?
	bne.w	fmovem_out_err		* yes

	rts

*###########
* MOVE IN: #
*###########
fmovem_data_in:
	move.l	a0,L_SCR1(a6)

	sub.l	d0,sp			* make room for fpregs
	lea	(sp),a1

	move.l	d1,-(sp)		* save bit string for later
	move.l	d0,-(sp)		* save # of bytes

	bsr.l	_dmem_read		* copy data from user mem

	move.l	(sp)+,d0		* retrieve # of bytes

	tst.l	d1			* did dfetch fail?
	bne.w	fmovem_in_err		* yes

	move.l	(sp)+,d1		* load bit string

	lea	(sp),a0			* addr of stack

	tst.b	d1			* should FP0 be moved?
	bpl.b	fmovem_data_in_fp1	* no

	move.l	(a0)+,$0+EXC_FP0(a6)	* yes
	move.l	(a0)+,$4+EXC_FP0(a6)
	move.l	(a0)+,$8+EXC_FP0(a6)

fmovem_data_in_fp1:
	lsl.b	#$1,d1			* should FP1 be moved?
	bpl.b	fmovem_data_in_fp2	* no

	move.l	(a0)+,$0+EXC_FP1(a6)	* yes
	move.l	(a0)+,$4+EXC_FP1(a6)
	move.l	(a0)+,$8+EXC_FP1(a6)

fmovem_data_in_fp2:
	lsl.b	#$1,d1			* should FP2 be moved?
	bpl.b	fmovem_data_in_fp3	* no

	fmovem.x	(a0)+,fp2	* yes

fmovem_data_in_fp3:
	lsl.b	#$1,d1			* should FP3 be moved?
	bpl.b	fmovem_data_in_fp4	* no

	fmovem.x	(a0)+,fp3	* yes

fmovem_data_in_fp4:
	lsl.b	#$1,d1			* should FP4 be moved?
	bpl.b	fmovem_data_in_fp5	* no

	fmovem.x	(a0)+,fp4	* yes

fmovem_data_in_fp5:
	lsl.b	#$1,d1			* should FP5 be moved?
	bpl.b	fmovem_data_in_fp6	* no

	fmovem.x	(a0)+,fp5	* yes

fmovem_data_in_fp6:
	lsl.b	#$1,d1			* should FP6 be moved?
	bpl.b	fmovem_data_in_fp7	* no

	fmovem.x	(a0)+,fp6	* yes

fmovem_data_in_fp7:
	lsl.b	#$1,d1			* should FP7 be moved?
	bpl.b	fmovem_data_in_done	* no

	fmovem.x	(a0)+,fp7	* yes

fmovem_data_in_done:
	add.l	d0,sp			* remove fpregs from stack
	rts

*####################################

fmovem_data_done:
	rts

*#############################################################################

*
* table indexed by the operation's bit string that gives the number
* of bytes that will be moved.
*
* number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
*
tbl_fmovem_size:
	.dc.b	$00,$0c,$0c,$18,$0c,$18,$18,$24
	.dc.b	$0c,$18,$18,$24,$18,$24,$24,$30
	.dc.b	$0c,$18,$18,$24,$18,$24,$24,$30
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$0c,$18,$18,$24,$18,$24,$24,$30
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$0c,$18,$18,$24,$18,$24,$24,$30
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$30,$3c,$3c,$48,$3c,$48,$48,$54
	.dc.b	$0c,$18,$18,$24,$18,$24,$24,$30
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$30,$3c,$3c,$48,$3c,$48,$48,$54
	.dc.b	$18,$24,$24,$30,$24,$30,$30,$3c
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$30,$3c,$3c,$48,$3c,$48,$48,$54
	.dc.b	$24,$30,$30,$3c,$30,$3c,$3c,$48
	.dc.b	$30,$3c,$3c,$48,$3c,$48,$48,$54
	.dc.b	$30,$3c,$3c,$48,$3c,$48,$48,$54
	.dc.b	$3c,$48,$48,$54,$48,$54,$54,$60

*
* table to convert a pre-decrement bit string into a post-increment
* or control bit string.
* ex: 	0x00	==>	0x00
*	0x01	==>	0x80
*	0x02	==>	0x40
*		.
*		.
*	0xfd	==>	0xbf
*	0xfe	==>	0x7f
*	0xff	==>	0xff
*
tbl_fmovem_convert:
	.dc.b	$00,$80,$40,$c0,$20,$a0,$60,$e0
	.dc.b	$10,$90,$50,$d0,$30,$b0,$70,$f0
	.dc.b	$08,$88,$48,$c8,$28,$a8,$68,$e8
	.dc.b	$18,$98,$58,$d8,$38,$b8,$78,$f8
	.dc.b	$04,$84,$44,$c4,$24,$a4,$64,$e4
	.dc.b	$14,$94,$54,$d4,$34,$b4,$74,$f4
	.dc.b	$0c,$8c,$4c,$cc,$2c,$ac,$6c,$ec
	.dc.b	$1c,$9c,$5c,$dc,$3c,$bc,$7c,$fc
	.dc.b	$02,$82,$42,$c2,$22,$a2,$62,$e2
	.dc.b	$12,$92,$52,$d2,$32,$b2,$72,$f2
	.dc.b	$0a,$8a,$4a,$ca,$2a,$aa,$6a,$ea
	.dc.b	$1a,$9a,$5a,$da,$3a,$ba,$7a,$fa
	.dc.b	$06,$86,$46,$c6,$26,$a6,$66,$e6
	.dc.b	$16,$96,$56,$d6,$36,$b6,$76,$f6
	.dc.b	$0e,$8e,$4e,$ce,$2e,$ae,$6e,$ee
	.dc.b	$1e,$9e,$5e,$de,$3e,$be,$7e,$fe
	.dc.b	$01,$81,$41,$c1,$21,$a1,$61,$e1
	.dc.b	$11,$91,$51,$d1,$31,$b1,$71,$f1
	.dc.b	$09,$89,$49,$c9,$29,$a9,$69,$e9
	.dc.b	$19,$99,$59,$d9,$39,$b9,$79,$f9
	.dc.b	$05,$85,$45,$c5,$25,$a5,$65,$e5
	.dc.b	$15,$95,$55,$d5,$35,$b5,$75,$f5
	.dc.b	$0d,$8d,$4d,$cd,$2d,$ad,$6d,$ed
	.dc.b	$1d,$9d,$5d,$dd,$3d,$bd,$7d,$fd
	.dc.b	$03,$83,$43,$c3,$23,$a3,$63,$e3
	.dc.b	$13,$93,$53,$d3,$33,$b3,$73,$f3
	.dc.b	$0b,$8b,$4b,$cb,$2b,$ab,$6b,$eb
	.dc.b	$1b,$9b,$5b,$db,$3b,$bb,$7b,$fb
	.dc.b	$07,$87,$47,$c7,$27,$a7,$67,$e7
	.dc.b	$17,$97,$57,$d7,$37,$b7,$77,$f7
	.dc.b	$0f,$8f,$4f,$cf,$2f,$af,$6f,$ef
	.dc.b	$1f,$9f,$5f,$df,$3f,$bf,$7f,$ff

	global	fmovem_calc_ea
*##############################################
* _fmovm_calc_ea: calculate effective address #
*##############################################
fmovem_calc_ea:
	move.l	d0,a0			* move # bytes to a0

* currently, MODE and REG are taken from the EXC_OPWORD. this could be
* easily changed if they were inputs passed in registers.
	move.w	EXC_OPWORD(a6),d0	* fetch opcode word
	move.w	d0,d1			* make a copy

	andi.w	#$3f,d0			* extract mode field
	andi.l	#$7,d1			* extract reg  field

* jump to the corresponding function for each {MODE,REG} pair.
	move.w	(tbl_fea_mode.b,pc,d0.w*2),d0		* fetch jmp distance
	jmp	(tbl_fea_mode.b,pc,d0.w*1)	* jmp to correct ea mode

	.dc.w	$4AFC,64
tbl_fea_mode:
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode

	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode

	.dc.w	faddr_ind_a0-tbl_fea_mode
	.dc.w	faddr_ind_a1-tbl_fea_mode
	.dc.w	faddr_ind_a2-tbl_fea_mode
	.dc.w	faddr_ind_a3-tbl_fea_mode
	.dc.w	faddr_ind_a4-tbl_fea_mode
	.dc.w	faddr_ind_a5-tbl_fea_mode
	.dc.w	faddr_ind_a6-tbl_fea_mode
	.dc.w	faddr_ind_a7-tbl_fea_mode

	.dc.w	faddr_ind_p_a0-tbl_fea_mode
	.dc.w	faddr_ind_p_a1-tbl_fea_mode
	.dc.w	faddr_ind_p_a2-tbl_fea_mode
	.dc.w	faddr_ind_p_a3-tbl_fea_mode
	.dc.w	faddr_ind_p_a4-tbl_fea_mode
	.dc.w	faddr_ind_p_a5-tbl_fea_mode
	.dc.w	faddr_ind_p_a6-tbl_fea_mode
	.dc.w	faddr_ind_p_a7-tbl_fea_mode

	.dc.w	faddr_ind_m_a0-tbl_fea_mode
	.dc.w	faddr_ind_m_a1-tbl_fea_mode
	.dc.w	faddr_ind_m_a2-tbl_fea_mode
	.dc.w	faddr_ind_m_a3-tbl_fea_mode
	.dc.w	faddr_ind_m_a4-tbl_fea_mode
	.dc.w	faddr_ind_m_a5-tbl_fea_mode
	.dc.w	faddr_ind_m_a6-tbl_fea_mode
	.dc.w	faddr_ind_m_a7-tbl_fea_mode

	.dc.w	faddr_ind_disp_a0-tbl_fea_mode
	.dc.w	faddr_ind_disp_a1-tbl_fea_mode
	.dc.w	faddr_ind_disp_a2-tbl_fea_mode
	.dc.w	faddr_ind_disp_a3-tbl_fea_mode
	.dc.w	faddr_ind_disp_a4-tbl_fea_mode
	.dc.w	faddr_ind_disp_a5-tbl_fea_mode
	.dc.w	faddr_ind_disp_a6-tbl_fea_mode
	.dc.w	faddr_ind_disp_a7-tbl_fea_mode

	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode
	.dc.w	faddr_ind_ext-tbl_fea_mode

	.dc.w	fabs_short-tbl_fea_mode
	.dc.w	fabs_long-tbl_fea_mode
	.dc.w	fpc_ind-tbl_fea_mode
	.dc.w	fpc_ind_ext-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode
	.dc.w	tbl_fea_mode-tbl_fea_mode

*##################################
* Address register indirect: (An) #
*##################################
faddr_ind_a0:
	move.l	EXC_DREGS+$8(a6),a0	* Get current a0
	rts

faddr_ind_a1:
	move.l	EXC_DREGS+$c(a6),a0	* Get current a1
	rts

faddr_ind_a2:
	move.l	a2,a0			* Get current a2
	rts

faddr_ind_a3:
	move.l	a3,a0			* Get current a3
	rts

faddr_ind_a4:
	move.l	a4,a0			* Get current a4
	rts

faddr_ind_a5:
	move.l	a5,a0			* Get current a5
	rts

faddr_ind_a6:
	move.l	(a6),a0			* Get current a6
	rts

faddr_ind_a7:
	move.l	EXC_A7(a6),a0		* Get current a7
	rts

*####################################################
* Address register indirect w/ postincrement: (An)+ #
*####################################################
faddr_ind_p_a0:
	move.l	EXC_DREGS+$8(a6),d0	* Get current a0
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,EXC_DREGS+$8(a6)	* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a1:
	move.l	EXC_DREGS+$c(a6),d0	* Get current a1
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,EXC_DREGS+$c(a6)	* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a2:
	move.l	a2,d0			* Get current a2
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,a2			* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a3:
	move.l	a3,d0			* Get current a3
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,a3			* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a4:
	move.l	a4,d0			* Get current a4
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,a4			* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a5:
	move.l	a5,d0			* Get current a5
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,a5			* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a6:
	move.l	(a6),d0			* Get current a6
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,(a6)			* Save incr value
	move.l	d0,a0
	rts

faddr_ind_p_a7:
	move.b	#mia7_flg,SPCOND_FLG(a6)	* set "special case" flag

	move.l	EXC_A7(a6),d0		* Get current a7
	move.l	d0,d1
	add.l	a0,d1			* Increment
	move.l	d1,EXC_A7(a6)		* Save incr value
	move.l	d0,a0
	rts

*###################################################
* Address register indirect w/ predecrement: -(An) #
*###################################################
faddr_ind_m_a0:
	move.l	EXC_DREGS+$8(a6),d0	* Get current a0
	sub.l	a0,d0			* Decrement
	move.l	d0,EXC_DREGS+$8(a6)	* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a1:
	move.l	EXC_DREGS+$c(a6),d0	* Get current a1
	sub.l	a0,d0			* Decrement
	move.l	d0,EXC_DREGS+$c(a6)	* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a2:
	move.l	a2,d0			* Get current a2
	sub.l	a0,d0			* Decrement
	move.l	d0,a2			* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a3:
	move.l	a3,d0			* Get current a3
	sub.l	a0,d0			* Decrement
	move.l	d0,a3			* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a4:
	move.l	a4,d0			* Get current a4
	sub.l	a0,d0			* Decrement
	move.l	d0,a4			* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a5:
	move.l	a5,d0			* Get current a5
	sub.l	a0,d0			* Decrement
	move.l	d0,a5			* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a6:
	move.l	(a6),d0			* Get current a6
	sub.l	a0,d0			* Decrement
	move.l	d0,(a6)			* Save decr value
	move.l	d0,a0
	rts

faddr_ind_m_a7:
	move.b	#mda7_flg,SPCOND_FLG(a6)	* set "special case" flag

	move.l	EXC_A7(a6),d0		* Get current a7
	sub.l	a0,d0			* Decrement
	move.l	d0,EXC_A7(a6)		* Save decr value
	move.l	d0,a0
	rts

*#######################################################
* Address register indirect w/ displacement: (d16, An) #
*#######################################################
faddr_ind_disp_a0:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	EXC_DREGS+$8(a6),a0	* a0 + d16
	rts

faddr_ind_disp_a1:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	EXC_DREGS+$c(a6),a0	* a1 + d16
	rts

faddr_ind_disp_a2:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	a2,a0			* a2 + d16
	rts

faddr_ind_disp_a3:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	a3,a0			* a3 + d16
	rts

faddr_ind_disp_a4:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	a4,a0			* a4 + d16
	rts

faddr_ind_disp_a5:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	a5,a0			* a5 + d16
	rts

faddr_ind_disp_a6:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	(a6),a0			* a6 + d16
	rts

faddr_ind_disp_a7:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	EXC_A7(a6),a0		* a7 + d16
	rts

*#######################################################################
* Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
*    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
* Memory indirect postindexed: ([bd, An], Xn, od)		       #
* Memory indirect preindexed: ([bd, An, Xn], od)		       #
*#######################################################################
faddr_ind_ext:
	addq.l	#$8,d1
	bsr.l	fetch_dreg		* fetch base areg
	move.l	d0,-(sp)

	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word		* fetch extword in d0

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	(sp)+,a0

	btst	#$8,d0
	bne.w	fcalc_mem_ind

	move.l	d0,L_SCR1(a6)		* hold opword

	move.l	d0,d1
	rol.w	#$4,d1
	andi.w	#$f,d1			* extract index regno

* count on fetch_dreg() not to alter a0...
	bsr.l	fetch_dreg		* fetch index

	move.l	d2,-(sp)		* save d2
	move.l	L_SCR1(a6),d2		* fetch opword

	btst	#$b,d2			* is it word or long?
	bne.b	faii8_long
	ext.l	d0			* sign extend word index
faii8_long:
	move.l	d2,d1
	rol.w	#$7,d1
	andi.l	#$3,d1			* extract scale value

	lsl.l	d1,d0			* shift index by scale

	extb.l	d2			* sign extend displacement
	add.l	d2,d0			* index + disp
	add.l	d0,a0			* An + (index + disp)

	move.l	(sp)+,d2		* restore old d2
	rts

*##########################
* Absolute short: (XXX).W #
*##########################
fabs_short:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word		* fetch short address

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* return <ea> in a0
	rts

*#########################
* Absolute long: (XXX).L #
*#########################
fabs_long:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch long address

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,a0			* return <ea> in a0
	rts

*######################################################
* Program counter indirect w/ displacement: (d16, PC) #
*######################################################
fpc_ind:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word		* fetch word displacement

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.w	d0,a0			* sign extend displacement

	add.l	EXC_EXTWPTR(a6),a0	* pc + d16

* _imem_read_word() increased the extwptr by 2. need to adjust here.
	subq.l	#$2,a0			* adjust <ea>
	rts

*#########################################################
* PC indirect w/ index(8-bit displacement): (d8, PC, An) #
* "     "     w/   "  (base displacement): (bd, PC, An)  #
* PC memory indirect postindexed: ([bd, PC], Xn, od)     #
* PC memory indirect preindexed: ([bd, PC, Xn], od)      #
*#########################################################
fpc_ind_ext:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word		* fetch ext word

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	EXC_EXTWPTR(a6),a0	* put base in a0
	subq.l	#$2,a0			* adjust base

	btst	#$8,d0			* is disp only 8 bits?
	bne.w	fcalc_mem_ind		* calc memory indirect

	move.l	d0,L_SCR1(a6)		* store opword

	move.l	d0,d1			* make extword copy
	rol.w	#$4,d1			* rotate reg num into place
	andi.w	#$f,d1			* extract register number

* count on fetch_dreg() not to alter a0...
	bsr.l	fetch_dreg		* fetch index

	move.l	d2,-(sp)		* save d2
	move.l	L_SCR1(a6),d2		* fetch opword

	btst	#$b,d2			* is index word or long?
	bne.b	fpii8_long		* long
	ext.l	d0			* sign extend word index
fpii8_long:
	move.l	d2,d1
	rol.w	#$7,d1			* rotate scale value into place
	andi.l	#$3,d1			* extract scale value

	lsl.l	d1,d0			* shift index by scale

	extb.l	d2			* sign extend displacement
	add.l	d2,d0			* disp + index
	add.l	d0,a0			* An + (index + disp)

	move.l	(sp)+,d2		* restore temp register
	rts

* d2 = index
* d3 = base
* d4 = od
* d5 = extword
fcalc_mem_ind:
	btst	#$6,d0			* is the index suppressed?
	beq.b	fcalc_index

	movem.l	d2-d5,-(sp)		* save d2-d5

	move.l	d0,d5			* put extword in d5
	move.l	a0,d3			* put base in d3

	clr.l	d2			* yes, so index = 0
	bra.b	fbase_supp_ck

* index:
fcalc_index:
	move.l	d0,L_SCR1(a6)		* save d0 (opword)
	bfextu	d0{#16:#4},d1		* fetch dreg index
	bsr.l	fetch_dreg

	movem.l	d2-d5,-(sp)		* save d2-d5
	move.l	d0,d2			* put index in d2
	move.l	L_SCR1(a6),d5
	move.l	a0,d3

	btst	#$b,d5			* is index word or long?
	bne.b	fno_ext
	ext.l	d2

fno_ext:
	bfextu	d5{#21:#2},d0
	lsl.l	d0,d2

* base address (passed as parameter in d3):
* we clear the value here if it should actually be suppressed.
fbase_supp_ck:
	btst	#$7,d5			* is the bd suppressed?
	beq.b	fno_base_sup
	clr.l	d3

* base displacement:
fno_base_sup:
	bfextu	d5{#26:#2},d0		* get bd size
*	beq.l		fmovm_error		# if (size == 0) it's reserved

	cmpi.b	#$2,d0
	blt.b	fno_bd
	beq.b	fget_word_bd

	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long

	tst.l	d1			* did ifetch fail?
	bne.l	fcea_iacc		* yes

	bra.b	fchk_ind

fget_word_bd:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	fcea_iacc		* yes

	ext.l	d0			* sign extend bd

fchk_ind:
	add.l	d0,d3			* base += bd

* outer displacement:
fno_bd:
	bfextu	d5{#30:#2},d0		* is od suppressed?
	beq.w	faii_bd

	cmpi.b	#$2,d0
	blt.b	fnull_od
	beq.b	fword_od

	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long

	tst.l	d1			* did ifetch fail?
	bne.l	fcea_iacc		* yes

	bra.b	fadd_them

fword_od:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$2,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_word

	tst.l	d1			* did ifetch fail?
	bne.l	fcea_iacc		* yes

	ext.l	d0			* sign extend od
	bra.b	fadd_them

fnull_od:
	clr.l	d0

fadd_them:
	move.l	d0,d4

	btst	#$2,d5			* pre or post indexing?
	beq.b	fpre_indexed

	move.l	d3,a0
	bsr.l	_dmem_read_long

	tst.l	d1			* did dfetch fail?
	bne.w	fcea_err		* yes

	add.l	d2,d0			* <ea> += index
	add.l	d4,d0			* <ea> += od
	bra.b	fdone_ea

fpre_indexed:
	add.l	d2,d3			* preindexing
	move.l	d3,a0
	bsr.l	_dmem_read_long

	tst.l	d1			* did dfetch fail?
	bne.w	fcea_err		* yes

	add.l	d4,d0			* ea += od
	bra.b	fdone_ea

faii_bd:
	add.l	d2,d3			* ea = (base + bd) + index
	move.l	d3,d0
fdone_ea:
	move.l	d0,a0

	movem.l	(sp)+,d2-d5		* restore d2-d5
	rts

*########################################################
fcea_err:
	move.l	d3,a0

	movem.l	(sp)+,d2-d5		* restore d2-d5
	move.w	#$0101,d0
	bra.l	iea_dacc

fcea_iacc:
	movem.l	(sp)+,d2-d5		* restore d2-d5
	bra.l	iea_iacc

fmovem_out_err:
	bsr.l	restore
	move.w	#$00e1,d0
	bra.b	fmovem_err

fmovem_in_err:
	bsr.l	restore
	move.w	#$0161,d0

fmovem_err:
	move.l	L_SCR1(a6),a0
	bra.l	iea_dacc

*########################################################################
* XDEF ****************************************************************	#
* 	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
*									#
* XREF ****************************************************************	#
*	_imem_read_long() - read longword from memory			#
*	iea_iacc() - _imem_read_long() failed; error recovery		#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	If _imem_read_long() doesn't fail:				#
*		USER_FPCR(a6)  = new FPCR value				#
*		USER_FPSR(a6)  = new FPSR value				#
*		USER_FPIAR(a6) = new FPIAR value			#
*									#
* ALGORITHM ***********************************************************	#
* 	Decode the instruction type by looking at the extension word 	#
* in order to see how many control registers to fetch from memory.	#
* Fetch them using _imem_read_long(). If this fetch fails, exit through	#
* the special access error exit handler iea_iacc().			#
*									#
* Instruction word decoding:						#
*									#
* 	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
*									#
*		WORD1			WORD2				#
*	1111 0010 00 111100	100$ $$00 0000 0000			#
*									#
*	$$$ (100): FPCR							#
*	    (010): FPSR							#
*	    (001): FPIAR						#
*	    (000): FPIAR						#
*									#
*########################################################################

	global	fmovem_ctrl
fmovem_ctrl:
	move.b	EXC_EXTWORD(a6),d0	* fetch reg select bits
	cmpi.b	#$9c,d0			* fpcr & fpsr & fpiar ?
	beq.w	fctrl_in_7		* yes
	cmpi.b	#$98,d0			* fpcr & fpsr ?
	beq.w	fctrl_in_6		* yes
	cmpi.b	#$94,d0			* fpcr & fpiar ?
	beq.b	fctrl_in_5		* yes

* fmovem.l #<data>, fpsr/fpiar
fctrl_in_3:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPSR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPSR(a6)	* store new FPSR to stack
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPIAR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPIAR(a6)	* store new FPIAR to stack
	rts

* fmovem.l #<data>, fpcr/fpiar
fctrl_in_5:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPCR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPCR(a6)	* store new FPCR to stack
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPIAR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPIAR(a6)	* store new FPIAR to stack
	rts

* fmovem.l #<data>, fpcr/fpsr
fctrl_in_6:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPCR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPCR(a6)	* store new FPCR to mem
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPSR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPSR(a6)	* store new FPSR to mem
	rts

* fmovem.l #<data>, fpcr/fpsr/fpiar
fctrl_in_7:
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPCR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPCR(a6)	* store new FPCR to mem
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPSR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPSR(a6)	* store new FPSR to mem
	move.l	EXC_EXTWPTR(a6),a0	* fetch instruction addr
	addq.l	#$4,EXC_EXTWPTR(a6)	* incr instruction ptr
	bsr.l	_imem_read_long		* fetch FPIAR from mem

	tst.l	d1			* did ifetch fail?
	bne.l	iea_iacc		* yes

	move.l	d0,USER_FPIAR(a6)	* store new FPIAR to mem
	rts

*########################################################################
* XDEF ****************************************************************	#
*	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
*									#
* XREF ****************************************************************	#
*	inc_areg() - increment an address register			#
*	dec_areg() - decrement an address register			#
*									#
* INPUT ***************************************************************	#
*	d0 = number of bytes to adjust <ea> by				#
* 									#
* OUTPUT **************************************************************	#
*	None								#
*									#
* ALGORITHM ***********************************************************	#
* "Dummy" CALCulate Effective Address:					#
* 	The stacked <ea> for FP unimplemented instructions and opclass	#
*	two packed instructions is correct with the exception of...	#
*									#
*	1) -(An)   : The register is not updated regardless of size.	#
*		     Also, for extended precision and packed, the 	#
*		     stacked <ea> value is 8 bytes too big		#
*	2) (An)+   : The register is not updated.			#
*	3) #<data> : The upper longword of the immediate operand is 	#
*		     stacked b,w,l and s sizes are completely stacked. 	#
*		     d,x, and p are not.				#
*									#
*########################################################################

	global	_dcalc_ea
_dcalc_ea:
	move.l	d0,a0			* move # bytes to %a0

	move.b	1+EXC_OPWORD(a6),d0	* fetch opcode word
	move.l	d0,d1			* make a copy

	andi.w	#$38,d0			* extract mode field
	andi.l	#$7,d1			* extract reg  field

	cmpi.b	#$18,d0			* is mode (An)+ ?
	beq.b	dcea_pi			* yes

	cmpi.b	#$20,d0			* is mode -(An) ?
	beq.b	dcea_pd			* yes

	or.w	d1,d0			* concat mode,reg
	cmpi.b	#$3c,d0			* is mode #<data>?

	beq.b	dcea_imm		* yes

	move.l	EXC_EA(a6),a0		* return <ea>
	rts

* need to set immediate data flag here since we'll need to do
* an imem_read to fetch this later.
dcea_imm:
	move.b	#immed_flg,SPCOND_FLG(a6)
	lea	([USER_FPIAR,a6],$4),a0		* no; return <ea>
	rts

* here, the <ea> is stacked correctly. however, we must update the 
* address register...	
dcea_pi:
	move.l	a0,d0			* pass amt to inc by
	bsr.l	inc_areg		* inc addr register

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	rts

* the <ea> is stacked correctly for all but extended and packed which 
* the <ea>s are 8 bytes too large.
* it would make no sense to have a pre-decrement to a7 in supervisor
* mode so we don't even worry about this tricky case here : )
dcea_pd:
	move.l	a0,d0			* pass amt to dec by
	bsr.l	dec_areg		* dec addr register

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct

	cmpi.b	#$c,d0			* is opsize ext or packed?
	beq.b	dcea_pd2		* yes
	rts
dcea_pd2:
	sub.l	#$8,a0			* correct <ea>
	move.l	a0,EXC_EA(a6)		* put correct <ea> on stack
	rts

*########################################################################
* XDEF ****************************************************************	#
* 	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
*			 and packed data opclass 3 operations.		#
*									#
* XREF ****************************************************************	#
*	None								#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	a0 = return correct effective address				#
*									#
* ALGORITHM ***********************************************************	#
*	For opclass 3 extended and packed data operations, the <ea>	#
* stacked for the exception is incorrect for -(an) and (an)+ addressing	#
* modes. Also, while we're at it, the index register itself must get 	#
* updated.								#
* 	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
* and return that value as the correct <ea> and store that value in An.	#
* For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
*									#
*########################################################################

* This calc_ea is currently used to retrieve the correct <ea> 
* for fmove outs of type extended and packed.
	global	_calc_ea_fout
_calc_ea_fout:
	move.b	1+EXC_OPWORD(a6),d0	* fetch opcode word
	move.l	d0,d1			* make a copy

	andi.w	#$38,d0			* extract mode field
	andi.l	#$7,d1			* extract reg  field

	cmpi.b	#$18,d0			* is mode (An)+ ?
	beq.b	ceaf_pi			* yes

	cmpi.b	#$20,d0			* is mode -(An) ?
	beq.w	ceaf_pd			* yes

	move.l	EXC_EA(a6),a0		* stacked <ea> is correct
	rts

* (An)+ : extended and packed fmove out
*	: stacked <ea> is correct
*	: "An" not updated 
ceaf_pi:
	move.w	(tbl_ceaf_pi.b,pc,d1.w*2),d1
	move.l	EXC_EA(a6),a0
	jmp	(tbl_ceaf_pi.b,pc,d1.w*1)

	.dc.w	$4AFC,$8
tbl_ceaf_pi:
	.dc.w	ceaf_pi0-tbl_ceaf_pi
	.dc.w	ceaf_pi1-tbl_ceaf_pi
	.dc.w	ceaf_pi2-tbl_ceaf_pi
	.dc.w	ceaf_pi3-tbl_ceaf_pi
	.dc.w	ceaf_pi4-tbl_ceaf_pi
	.dc.w	ceaf_pi5-tbl_ceaf_pi
	.dc.w	ceaf_pi6-tbl_ceaf_pi
	.dc.w	ceaf_pi7-tbl_ceaf_pi

ceaf_pi0:
	addi.l	#$c,EXC_DREGS+$8(a6)
	rts
ceaf_pi1:
	addi.l	#$c,EXC_DREGS+$c(a6)
	rts
ceaf_pi2:
	add.l	#$c,a2
	rts
ceaf_pi3:
	add.l	#$c,a3
	rts
ceaf_pi4:
	add.l	#$c,a4
	rts
ceaf_pi5:
	add.l	#$c,a5
	rts
ceaf_pi6:
	addi.l	#$c,EXC_A6(a6)
	rts
ceaf_pi7:
	move.b	#mia7_flg,SPCOND_FLG(a6)
	addi.l	#$c,EXC_A7(a6)
	rts

* -(An) : extended and packed fmove out
*	: stacked <ea> = actual <ea> + 8
*	: "An" not updated
ceaf_pd:
	move.w	(tbl_ceaf_pd.b,pc,d1.w*2),d1
	move.l	EXC_EA(a6),a0
	sub.l	#$8,a0
	sub.l	#$8,EXC_EA(a6)
	jmp	(tbl_ceaf_pd.b,pc,d1.w*1)

	.dc.w	$4AFC,$8
tbl_ceaf_pd:
	.dc.w	ceaf_pd0-tbl_ceaf_pd
	.dc.w	ceaf_pd1-tbl_ceaf_pd
	.dc.w	ceaf_pd2-tbl_ceaf_pd
	.dc.w	ceaf_pd3-tbl_ceaf_pd
	.dc.w	ceaf_pd4-tbl_ceaf_pd
	.dc.w	ceaf_pd5-tbl_ceaf_pd
	.dc.w	ceaf_pd6-tbl_ceaf_pd
	.dc.w	ceaf_pd7-tbl_ceaf_pd

ceaf_pd0:
	move.l	a0,EXC_DREGS+$8(a6)
	rts
ceaf_pd1:
	move.l	a0,EXC_DREGS+$c(a6)
	rts
ceaf_pd2:
	move.l	a0,a2
	rts
ceaf_pd3:
	move.l	a0,a3
	rts
ceaf_pd4:
	move.l	a0,a4
	rts
ceaf_pd5:
	move.l	a0,a5
	rts
ceaf_pd6:
	move.l	a0,EXC_A6(a6)
	rts
ceaf_pd7:
	move.l	a0,EXC_A7(a6)
	move.b	#mda7_flg,SPCOND_FLG(a6)
	rts

*########################################################################
* XDEF ****************************************************************	#
*	_load_fop(): load operand for unimplemented FP exception	#
*									#
* XREF ****************************************************************	#
*	set_tag_x() - determine ext prec optype tag			#
*	set_tag_s() - determine sgl prec optype tag			#
*	set_tag_d() - determine dbl prec optype tag			#
*	unnorm_fix() - convert normalized number to denorm or zero	#
*	norm() - normalize a denormalized number			#
*	get_packed() - fetch a packed operand from memory		#
*	_dcalc_ea() - calculate <ea>, fixing An in process		#
*									#
*	_imem_read_{word,long}() - read from instruction memory		#
*	_dmem_read() - read from data memory				#
*	_dmem_read_{byte,word,long}() - read from data memory		#
*									#
*	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
*									#
* INPUT ***************************************************************	#
*	None								#
* 									#
* OUTPUT **************************************************************	#
*	If memory access doesn't fail:					#
*		FP_SRC(a6) = source operand in extended precision	#
* 		FP_DST(a6) = destination operand in extended precision	#
*									#
* ALGORITHM ***********************************************************	#
* 	This is called from the Unimplemented FP exception handler in	#
* order to load the source and maybe destination operand into		#
* FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
* the source and destination from the FP register file. Set the optype	#
* tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
* convert it to a DENORM or a ZERO.					#
* 	If the instruction is opclass two (memory->reg), then fetch	#
* the destination from the register file and the source operand from 	#
* memory. Tag and fix both as above w/ opclass zero instructions.	#
* 	If the source operand is byte,word,long, or single, it may be	#
* in the data register file. If it's actually out in memory, use one of	#
* the mem_read() routines to fetch it. If the mem_read() access returns	#
* a failing value, exit through the special facc_in() routine which	#
* will create an acess error exception frame from the current exception #
* frame.								#
* 	Immediate data and regular data accesses are separated because 	#
* if an immediate data access fails, the resulting fault status		#
* longword stacked for the access error exception must have the 	#
* instruction bit set.							#
*									#
*########################################################################

	global	_load_fop
_load_fop:

*  15     13 12 10  9 7  6       0
* /        \ /   \ /  \ /         \
* ---------------------------------
* | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
* ---------------------------------
*

*	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
*	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
*	beq.w		op010			# handle <ea> -> fpn
*	bgt.w		op011			# handle fpn -> <ea>

* we're not using op011 for now...
	btst	#$6,EXC_CMDREG(a6)
	bne.b	op010

*###########################
* OPCLASS '000: reg -> reg #
*###########################
op000:
	move.b	1+EXC_CMDREG(a6),d0	* fetch extension word lo
	btst	#$5,d0			* testing extension bits
	beq.b	op000_src		* (bit 5 == 0) => monadic
	btst	#$4,d0			* (bit 5 == 1)
	beq.b	op000_dst		* (bit 4 == 0) => dyadic
	andi.w	#$007f,d0		* extract extension bits {6:0}
	cmpi.w	#$0038,d0		* is it an fcmp (dyadic) ?
	bne.b	op000_src		* it's an fcmp

op000_dst:
	bfextu	EXC_CMDREG(a6){#6:#3},d0	* extract dst field
	bsr.l	load_fpn2		* fetch dst fpreg into FP_DST

	bsr.l	set_tag_x		* get dst optype tag

	cmpi.b	#UNNORM,d0		* is dst fpreg an UNNORM?
	beq.b	op000_dst_unnorm	* yes
op000_dst_cont:
	move.b	d0,DTAG(a6)		* store the dst optype tag

op000_src:
	bfextu	EXC_CMDREG(a6){#3:#3},d0	* extract src field
	bsr.l	load_fpn1		* fetch src fpreg into FP_SRC

	bsr.l	set_tag_x		* get src optype tag

	cmpi.b	#UNNORM,d0		* is src fpreg an UNNORM?
	beq.b	op000_src_unnorm	* yes
op000_src_cont:
	move.b	d0,STAG(a6)		* store the src optype tag
	rts

op000_dst_unnorm:
	bsr.l	unnorm_fix		* fix the dst UNNORM
	bra.b	op000_dst_cont
op000_src_unnorm:
	bsr.l	unnorm_fix		* fix the src UNNORM
	bra.b	op000_src_cont

*############################
* OPCLASS '010: <ea> -> reg #
*############################
op010:
	move.w	EXC_CMDREG(a6),d0	* fetch extension word
	btst	#$5,d0			* testing extension bits
	beq.b	op010_src		* (bit 5 == 0) => monadic
	btst	#$4,d0			* (bit 5 == 1)
	beq.b	op010_dst		* (bit 4 == 0) => dyadic
	andi.w	#$007f,d0		* extract extension bits {6:0}
	cmpi.w	#$0038,d0		* is it an fcmp (dyadic) ?
	bne.b	op010_src		* it's an fcmp

op010_dst:
	bfextu	EXC_CMDREG(a6){#6:#3},d0	* extract dst field
	bsr.l	load_fpn2		* fetch dst fpreg ptr

	bsr.l	set_tag_x		* get dst type tag

	cmpi.b	#UNNORM,d0		* is dst fpreg an UNNORM?
	beq.b	op010_dst_unnorm	* yes
op010_dst_cont:
	move.b	d0,DTAG(a6)		* store the dst optype tag

op010_src:
	bfextu	EXC_CMDREG(a6){#3:#3},d0	* extract src type field

	bfextu	EXC_OPWORD(a6){#10:#3},d1	* extract <ea> mode field
	bne.w	fetch_from_mem		* src op is in memory

op010_dreg:
	clr.b	STAG(a6)		* either NORM or ZERO
	bfextu	EXC_OPWORD(a6){#13:#3},d1	* extract src reg field

	move.w	(tbl_op010_dreg.b,pc,d0.w*2),d0		* jmp based on optype
	jmp	(tbl_op010_dreg.b,pc,d0.w*1)	* fetch src from dreg

op010_dst_unnorm:
	bsr.l	unnorm_fix		* fix the dst UNNORM
	bra.b	op010_dst_cont

	.dc.w	$4AFC,$8
tbl_op010_dreg:
	.dc.w	opd_long-tbl_op010_dreg
	.dc.w	opd_sgl-tbl_op010_dreg
	.dc.w	tbl_op010_dreg-tbl_op010_dreg
	.dc.w	tbl_op010_dreg-tbl_op010_dreg
	.dc.w	opd_word-tbl_op010_dreg
	.dc.w	tbl_op010_dreg-tbl_op010_dreg
	.dc.w	opd_byte-tbl_op010_dreg
	.dc.w	tbl_op010_dreg-tbl_op010_dreg

*
* LONG: can be either NORM or ZERO...
*
opd_long:
	bsr.l	fetch_dreg		* fetch long in d0
	fmove.l	d0,fp0			* load a long
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	fbeq.w	opd_long_zero		* long is a ZERO
	rts
opd_long_zero:
	move.b	#ZERO,STAG(a6)		* set ZERO optype flag
	rts

*
* WORD: can be either NORM or ZERO...
*
opd_word:
	bsr.l	fetch_dreg		* fetch word in d0
	fmove.w	d0,fp0			* load a word
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	fbeq.w	opd_word_zero		* WORD is a ZERO
	rts
opd_word_zero:
	move.b	#ZERO,STAG(a6)		* set ZERO optype flag
	rts

*
* BYTE: can be either NORM or ZERO...
*
opd_byte:
	bsr.l	fetch_dreg		* fetch word in d0
	fmove.b	d0,fp0			* load a byte
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	fbeq.w	opd_byte_zero		* byte is a ZERO
	rts
opd_byte_zero:
	move.b	#ZERO,STAG(a6)		* set ZERO optype flag
	rts

*
* SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
*
* separate SNANs and DENORMs so they can be loaded w/ special care.
* all others can simply be moved "in" using fmove.
*
opd_sgl:
	bsr.l	fetch_dreg		* fetch sgl in d0
	move.l	d0,L_SCR1(a6)

	lea	L_SCR1(a6),a0		* pass: ptr to the sgl
	bsr.l	set_tag_s		* determine sgl type
	move.b	d0,STAG(a6)		* save the src tag

	cmpi.b	#SNAN,d0		* is it an SNAN?
	beq.w	get_sgl_snan		* yes

	cmpi.b	#DENORM,d0		* is it a DENORM?
	beq.w	get_sgl_denorm		* yes

	fmove.s	(a0),fp0		* no, so can load it regular
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC
	rts

*#############################################################################

*########################################################################
* fetch_from_mem():							#
* - src is out in memory. must:						#
*	(1) calc ea - must read AFTER you know the src type since	#
*		      if the ea is -() or ()+, need to know # of bytes.	#
*	(2) read it in from either user or supervisor space		#
*	(3) if (b || w || l) then simply read in			#
*	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
*	    if (packed) then punt for now				#
* INPUT:								#
*	%d0 : src type field						#
*########################################################################
fetch_from_mem:
	clr.b	STAG(a6)		* either NORM or ZERO

	move.w	(tbl_fp_type.b,pc,d0.w*2),d0		* index by src type field
	jmp	(tbl_fp_type.b,pc,d0.w*1)

	.dc.w	$4AFC,$8
tbl_fp_type:
	.dc.w	load_long-tbl_fp_type
	.dc.w	load_sgl-tbl_fp_type
	.dc.w	load_ext-tbl_fp_type
	.dc.w	load_packed-tbl_fp_type
	.dc.w	load_word-tbl_fp_type
	.dc.w	load_dbl-tbl_fp_type
	.dc.w	load_byte-tbl_fp_type
	.dc.w	tbl_fp_type-tbl_fp_type

*########################################
* load a LONG into %fp0:		#
* 	-number can't fault		#
*	(1) calc ea			#
*	(2) read 4 bytes into L_SCR1	#
*	(3) fmov.l into %fp0		#
*########################################
load_long:
	moveq.l	#$4,d0			* pass: 4 (bytes)
	bsr.l	_dcalc_ea		* calc <ea>; <ea> in %a0

	cmpi.b	#immed_flg,SPCOND_FLG(a6)
	beq.b	load_long_immed

	bsr.l	_dmem_read_long		* fetch src operand from memory

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_l		* yes

load_long_cont:
	fmove.l	d0,fp0			* read into %fp0;convert to xprec
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC

	fbeq.w	load_long_zero		* src op is a ZERO
	rts
load_long_zero:
	move.b	#ZERO,STAG(a6)		* set optype tag to ZERO
	rts

load_long_immed:
	bsr.l	_imem_read_long		* fetch src operand immed data

	tst.l	d1			* did ifetch fail?
	bne.l	funimp_iacc		* yes
	bra.b	load_long_cont

*########################################
* load a WORD into %fp0:		#
* 	-number can't fault		#
*	(1) calc ea			#
*	(2) read 2 bytes into L_SCR1	#
*	(3) fmov.w into %fp0		#
*########################################
load_word:
	moveq.l	#$2,d0			* pass: 2 (bytes)
	bsr.l	_dcalc_ea		* calc <ea>; <ea> in %a0

	cmpi.b	#immed_flg,SPCOND_FLG(a6)
	beq.b	load_word_immed

	bsr.l	_dmem_read_word		* fetch src operand from memory

	tst.l	d1			* did dfetch fail?
	bne.l	facc_in_w		* yes

load_word_cont:
	fmove.w	d0,fp0			* read into %fp0;convert to xprec
	fmovem.x	fp0,FP_SRC(a6)	* return src op in FP_SRC

	fbeq.w	load_word_zero		* src op is a ZERO
	rts
load_word_zero:
	move.b	#ZERO,STAG(a6)		* set optype tag to ZERO
	rts

load_word_immed:
	bsr.l	_imem_read_word		* fetch src operand immed data

	tst.l	d1			* did ifetch fail?
	bne.l	funimp_iacc		* yes
	bra.b	load_word_cont

*########################################
* load a BYTE into %fp0:		#
* 	-number can't fault		#
*	(1) calc ea			#
*	(2) read 1 byte into L_SCR1	#
*	(3) fmov.b into %fp0		#
*########################################
load_byte:
	moveq.l	#$1,d0			* pass: 1 (byte)
	bsr.l	_dcalc_ea		* calc <ea>; <ea> in %a0

	cmpi.b	#immed_flg,SPCOND_FLG(a6)
	beq.b	load_byte_immed

	bsr.l	_dmem_read_byte		* fetch src operand from memory
1 2 3