misc/060tsys/t28fpsp.s (2/3)
1 2 3Mod_Loop_pre:
addq.l #$4,sp * erase exp(X)
*..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
Mod_Loop:
tst.l d6 * test carry bit
bgt.b R_GT_Y
*..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
cmp.l d4,d1 * compare hi(R) and hi(Y)
bne.b R_NE_Y
cmp.l d5,d2 * compare lo(R) and lo(Y)
bne.b R_NE_Y
*..At this point, R = Y
bra.w Rem_is_0
R_NE_Y:
*..use the borrow of the previous compare
bcs.b R_LT_Y * borrow is set iff R < Y
R_GT_Y:
*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
*..and Y < (D1,D2) < 2Y. Either way, perform R - Y
sub.l d5,d2 * lo(R) - lo(Y)
subx.l d4,d1 * hi(R) - hi(Y)
clr.l d6 * clear carry
addq.l #1,d3 * Q := Q + 1
R_LT_Y:
*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
tst.l d0 * see if j = 0.
beq.b PostLoop
add.l d3,d3 * Q := 2Q
add.l d2,d2 * lo(R) = 2lo(R)
roxl.l #1,d1 * hi(R) = 2hi(R) + carry
scs d6 * set Carry if 2(R) overflows
addq.l #1,a1 * k := k+1
subq.l #1,d0 * j := j - 1
*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
bra.b Mod_Loop
PostLoop:
*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
*..normalize R.
move.l L_SCR1(a6),d0 * new biased expo of R
tst.l d1
bne.b HiR_not0
HiR_0:
move.l d2,d1
clr.l d2
subi.l #32,d0
clr.l d6
bfffo d1{#0:#32},d6
lsl.l d6,d1
sub.l d6,d0 * (D0,D1,D2) is normalized
* ...with bias $7FFD
bra.b Get_Mod
HiR_not0:
clr.l d6
bfffo d1{#0:#32},d6
bmi.b Get_Mod * already normalized
sub.l d6,d0
lsl.l d6,d1
move.l d2,d7 * a copy of D2
lsl.l d6,d2
neg.l d6
addi.l #32,d6
lsr.l d6,d7
or.l d7,d1 * (D0,D1,D2) normalized
*
Get_Mod:
cmpi.l #$000041FE,d0
bge.b No_Scale
Do_Scale:
move.w d0,R(a6)
move.l d1,R_Hi(a6)
move.l d2,R_Lo(a6)
move.l L_SCR1(a6),d6
move.w d6,Y(a6)
move.l d4,Y_Hi(a6)
move.l d5,Y_Lo(a6)
fmove.x R(a6),fp0 * no exception
move.b #1,Sc_Flag(a6)
bra.b ModOrRem
No_Scale:
move.l d1,R_Hi(a6)
move.l d2,R_Lo(a6)
subi.l #$3FFE,d0
move.w d0,R(a6)
move.l L_SCR1(a6),d6
subi.l #$3FFE,d6
move.l d6,L_SCR1(a6)
fmove.x R(a6),fp0
move.w d6,Y(a6)
move.l d4,Y_Hi(a6)
move.l d5,Y_Lo(a6)
clr.b Sc_Flag(a6)
*
ModOrRem:
tst.b Mod_Flag(a6)
beq.b Fix_Sign
move.l L_SCR1(a6),d6 * new biased expo(Y)
subq.l #1,d6 * biased expo(Y/2)
cmp.l d6,d0
blt.b Fix_Sign
bgt.b Last_Sub
cmp.l d4,d1
bne.b Not_EQ
cmp.l d5,d2
bne.b Not_EQ
bra.w Tie_Case
Not_EQ:
bcs.b Fix_Sign
Last_Sub:
*
fsub.x Y(a6),fp0 * no exceptions
addq.l #1,d3 * Q := Q + 1
*
Fix_Sign:
*..Get sign of X
move.w SignX(a6),d6
bge.b Get_Q
fneg.x fp0
*..Get Q
*
Get_Q:
clr.l d6
move.w SignQ(a6),d6 * D6 is sign(Q)
move.l #8,d7
lsr.l d7,d6
andi.l #$0000007F,d3 * 7 bits of Q
or.l d6,d3 * sign and bits of Q
* swap %d3
* fmov.l %fpsr,%d6
* and.l &0xFF00FFFF,%d6
* or.l %d3,%d6
* fmov.l %d6,%fpsr # put Q in fpsr
move.b d3,FPSR_QBYTE(a6) * put Q in fpsr
*
Restore:
movem.l (sp)+,d2-d7 * {%d2-%d7}
move.l (sp)+,d0
fmove.l d0,fpcr
tst.b Sc_Flag(a6)
beq.b Finish
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x Scale(pc),fp0 * may cause underflow
bra.l t_catch2
* the '040 package did this apparently to see if the dst operand for the
* preceding fmul was a denorm. but, it better not have been since the
* algorithm just got done playing with fp0 and expected no exceptions
* as a result. trust me...
* bra t_avoid_unsupp # check for denorm as a
* ;result of the scaling
Finish:
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x fp0,fp0 * capture exceptions & round
bra.l t_catch2
Rem_is_0:
*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
addq.l #1,d3
cmpi.l #8,d0 * D0 is j
bge.b Q_Big
lsl.l d0,d3
bra.b Set_R_0
Q_Big:
clr.l d3
Set_R_0:
fmove.s #$00000000,fp0
clr.b Sc_Flag(a6)
bra.w Fix_Sign
Tie_Case:
*..Check parity of Q
move.l d3,d6
andi.l #$00000001,d6
tst.l d6
beq.w Fix_Sign * Q is even
*..Q is odd, Q := Q + 1, signX := -signX
addq.l #1,d3
move.w SignX(a6),d6
eori.l #$00008000,d6
move.w d6,SignX(a6)
bra.w Fix_Sign
qnan: .dc.l $7fff0000,$ffffffff,$ffffffff
*########################################################################
* XDEF **************************************************************** #
* t_dz(): Handle DZ exception during transcendental emulation. #
* Sets N bit according to sign of source operand. #
* t_dz2(): Handle DZ exception during transcendental emulation. #
* Sets N bit always. #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = pointer to source operand #
* #
* OUTPUT ************************************************************** #
* fp0 = default result #
* #
* ALGORITHM *********************************************************** #
* - Store properly signed INF into fp0. #
* - Set FPSR exception status dz bit, ccode inf bit, and #
* accrued dz bit. #
* #
*########################################################################
global t_dz
t_dz:
tst.b SRC_EX.w(a0) * no; is src negative?
bmi.b t_dz2 * yes
dz_pinf:
fmove.s #$7f800000,fp0 * return +INF in fp0
ori.l #dzinf_mask,USER_FPSR(a6) * set I/DZ/ADZ
rts
global t_dz2
t_dz2:
fmove.s #$ff800000,fp0 * return -INF in fp0
ori.l #dzinf_mask+neg_mask,USER_FPSR(a6) * set N/I/DZ/ADZ
rts
*################################################################
* OPERR exception: #
* - set FPSR exception status operr bit, condition code #
* nan bit; Store default NAN into fp0 #
*################################################################
global t_operr
t_operr:
ori.l #opnan_mask,USER_FPSR(a6) * set NaN/OPERR/AIOP
fmovem.x qnan(pc),fp0 * return default NAN in fp0
rts
*################################################################
* Extended DENORM: #
* - For all functions that have a denormalized input and #
* that f(x)=x, this is the entry point. #
* - we only return the EXOP here if either underflow or #
* inexact is enabled. #
*################################################################
* Entry point for scale w/ extended denorm. The function does
* NOT set INEX2/AUNFL/AINEX.
global t_resdnrm
t_resdnrm:
ori.l #unfl_mask,USER_FPSR(a6) * set UNFL
bra.b xdnrm_con
global t_extdnrm
t_extdnrm:
ori.l #unfinx_mask,USER_FPSR(a6) * set UNFL/INEX2/AUNFL/AINEX
xdnrm_con:
move.l a0,a1 * make copy of src ptr
move.l d0,d1 * make copy of rnd prec,mode
andi.b #$c0,d1 * extended precision?
bne.b xdnrm_sd * no
* result precision is extended.
tst.b __LOCAL___EX.w(a0) * is denorm negative?
bpl.b xdnrm_exit * no
bset #neg_bit,FPSR_CC(a6) * yes; set 'N' ccode bit
bra.b xdnrm_exit
* result precision is single or double
xdnrm_sd:
move.l a1,-(sp)
tst.b __LOCAL___EX.w(a0) * is denorm pos or neg?
smi.b d1 * set d0 accodingly
bsr.l unf_sub
move.l (sp)+,a1
xdnrm_exit:
fmovem.x (a0),fp0 * return default result in fp0
move.b FPCR_ENABLE(a6),d0
andi.b #$0a,d0 * is UNFL or INEX enabled?
bne.b xdnrm_ena * yes
rts
*###############
* unfl enabled #
*###############
* we have a DENORM that needs to be converted into an EXOP.
* so, normalize the mantissa, add 0x6000 to the new exponent,
* and return the result in fp1.
xdnrm_ena:
move.w __LOCAL___EX.w(a1),FP_SCR0_EX(a6)
move.l __LOCAL___HI(a1),FP_SCR0_HI(a6)
move.l __LOCAL___LO(a1),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0
bsr.l norm * normalize mantissa
addi.l #$6000,d0 * add extra bias
andi.w #$8000,FP_SCR0_EX(a6) * keep old sign
or.w d0,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
*################################################################
* UNFL exception: #
* - This routine is for cases where even an EXOP isn't #
* large enough to hold the range of this result. #
* In such a case, the EXOP equals zero. #
* - Return the default result to the proper precision #
* with the sign of this result being the same as that #
* of the src operand. #
* - t_unfl2() is provided to force the result sign to #
* positive which is the desired result for fetox(). #
*################################################################
global t_unfl
t_unfl:
ori.l #unfinx_mask,USER_FPSR(a6) * set UNFL/INEX2/AUNFL/AINEX
tst.b (a0) * is result pos or neg?
smi.b d1 * set d1 accordingly
bsr.l unf_sub * calc default unfl result
fmovem.x (a0),fp0 * return default result in fp0
fmove.s #$00000000,fp1 * return EXOP in fp1
rts
* t_unfl2 ALWAYS tells unf_sub to create a positive result
global t_unfl2
t_unfl2:
ori.l #unfinx_mask,USER_FPSR(a6) * set UNFL/INEX2/AUNFL/AINEX
sf.b d1 * set d0 to represent positive
bsr.l unf_sub * calc default unfl result
fmovem.x (a0),fp0 * return default result in fp0
fmove.s #$0000000,fp1 * return EXOP in fp1
rts
*################################################################
* OVFL exception: #
* - This routine is for cases where even an EXOP isn't #
* large enough to hold the range of this result. #
* - Return the default result to the proper precision #
* with the sign of this result being the same as that #
* of the src operand. #
* - t_ovfl2() is provided to force the result sign to #
* positive which is the desired result for fcosh(). #
* - t_ovfl_sc() is provided for scale() which only sets #
* the inexact bits if the number is inexact for the #
* precision indicated. #
*################################################################
global t_ovfl_sc
t_ovfl_sc:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set OVFL/AOVFL/AINEX
move.b d0,d1 * fetch rnd mode/prec
andi.b #$c0,d1 * extract rnd prec
beq.b ovfl_work * prec is extended
tst.b __LOCAL___HI(a0) * is dst a DENORM?
bmi.b ovfl_sc_norm * no
* dst op is a DENORM. we have to normalize the mantissa to see if the
* result would be inexact for the given precision. make a copy of the
* dst so we don't screw up the version passed to us.
move.w __LOCAL___EX.w(a0),FP_SCR0_EX(a6)
move.l __LOCAL___HI(a0),FP_SCR0_HI(a6)
move.l __LOCAL___LO(a0),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0 * pass ptr to FP_SCR0
movem.l d0-d1/a0,-(sp) * save d0-d1/a0
bsr.l norm * normalize mantissa
movem.l (sp)+,d0-d1/a0 * restore d0-d1/a0
ovfl_sc_norm:
cmpi.b #$40,d1 * is prec dbl?
bne.b ovfl_sc_dbl * no; sgl
ovfl_sc_sgl:
tst.l __LOCAL___LO(a0) * is lo lw of sgl set?
bne.b ovfl_sc_inx * yes
tst.b 3+__LOCAL___HI(a0) * is lo byte of hi lw set?
bne.b ovfl_sc_inx * yes
bra.b ovfl_work * don't set INEX2
ovfl_sc_dbl:
move.l __LOCAL___LO(a0),d1 * are any of lo 11 bits of
andi.l #$7ff,d1 * dbl mantissa set?
beq.b ovfl_work * no; don't set INEX2
ovfl_sc_inx:
ori.l #inex2_mask,USER_FPSR(a6) * set INEX2
bra.b ovfl_work * continue
global t_ovfl
t_ovfl:
ori.l #ovfinx_mask,USER_FPSR(a6) * set OVFL/INEX2/AOVFL/AINEX
ovfl_work:
tst.b __LOCAL___EX.w(a0) * what is the sign?
smi.b d1 * set d1 accordingly
bsr.l ovf_res * calc default ovfl result
move.b d0,FPSR_CC(a6) * insert new ccodes
fmovem.x (a0),fp0 * return default result in fp0
fmove.s #$00000000,fp1 * return EXOP in fp1
rts
* t_ovfl2 ALWAYS tells ovf_res to create a positive result
global t_ovfl2
t_ovfl2:
ori.l #ovfinx_mask,USER_FPSR(a6) * set OVFL/INEX2/AOVFL/AINEX
sf.b d1 * clear sign flag for positive
bsr.l ovf_res * calc default ovfl result
move.b d0,FPSR_CC(a6) * insert new ccodes
fmovem.x (a0),fp0 * return default result in fp0
fmove.s #$00000000,fp1 * return EXOP in fp1
rts
*################################################################
* t_catch(): #
* - the last operation of a transcendental emulation #
* routine may have caused an underflow or overflow. #
* we find out if this occurred by doing an fsave and #
* checking the exception bit. if one did occur, then we #
* jump to fgen_except() which creates the default #
* result and EXOP for us. #
*################################################################
global t_catch
t_catch:
fsave -(sp)
tst.b $2(sp)
bmi.b catch
add.l #$c,sp
*################################################################
* INEX2 exception: #
* - The inex2 and ainex bits are set. #
*################################################################
global t_inx2
t_inx2:
fblt.w t_minx2
fbeq.w inx2_zero
global t_pinx2
t_pinx2:
ori.w #inx2a_mask,2+USER_FPSR(a6) * set INEX2/AINEX
rts
global t_minx2
t_minx2:
ori.l #inx2a_mask+neg_mask,USER_FPSR(a6) * set N/INEX2/AINEX
rts
inx2_zero:
move.b #z_bmask,FPSR_CC(a6)
ori.w #inx2a_mask,2+USER_FPSR(a6) * set INEX2/AINEX
rts
* an underflow or overflow exception occurred.
* we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
catch:
ori.w #inx2a_mask,FPSR_EXCEPT(a6)
catch2:
bsr.l fgen_except
add.l #$c,sp
rts
global t_catch2
t_catch2:
fsave -(sp)
tst.b $2(sp)
bmi.b catch2
add.l #$c,sp
fmove.l fpsr,d0
or.l d0,USER_FPSR(a6)
rts
*########################################################################
*########################################################################
* unf_res(): underflow default result calculation for transcendentals #
* #
* INPUT: #
* d0 : rnd mode,precision #
* d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
* OUTPUT: #
* a0 : points to result (in instruction memory) #
*########################################################################
unf_sub:
ori.l #unfinx_mask,USER_FPSR(a6)
andi.w #$10,d1 * keep sign bit in 4th spot
lsr.b #$4,d0 * shift rnd prec,mode to lo bits
andi.b #$f,d0 * strip hi rnd mode bit
or.b d1,d0 * concat {sgn,mode,prec}
move.l d0,d1 * make a copy
lsl.b #$1,d1 * mult index 2 by 2
move.b (tbl_unf_cc.b,pc,d0.w*1),FPSR_CC(a6) * insert ccode bits
lea (tbl_unf_result.b,pc,d1.w*8),a0 * grab result ptr
rts
tbl_unf_cc:
.dc.b $4,$4,$4,$0
.dc.b $4,$4,$4,$0
.dc.b $4,$4,$4,$0
.dc.b $0,$0,$0,$0
.dc.b $8+$4,$8+$4,$8,$8+$4
.dc.b $8+$4,$8+$4,$8,$8+$4
.dc.b $8+$4,$8+$4,$8,$8+$4
tbl_unf_result:
.dc.l $00000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $00000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $00000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $00000000,$00000000,$00000001,$0 * MIN; ext
.dc.l $3f810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $3f810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $3f810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $3f810000,$00000100,$00000000,$0 * MIN; sgl
.dc.l $3c010000,$00000000,$00000000,$0 * ZERO;dbl
.dc.l $3c010000,$00000000,$00000000,$0 * ZER0;dbl
.dc.l $3c010000,$00000000,$00000000,$0 * ZERO;dbl
.dc.l $3c010000,$00000000,$00000800,$0 * MIN; dbl
.dc.l $0,$0,$0,$0
.dc.l $0,$0,$0,$0
.dc.l $0,$0,$0,$0
.dc.l $0,$0,$0,$0
.dc.l $80000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $80000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $80000000,$00000000,$00000001,$0 * MIN; ext
.dc.l $80000000,$00000000,$00000000,$0 * ZERO;ext
.dc.l $bf810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $bf810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $bf810000,$00000100,$00000000,$0 * MIN; sgl
.dc.l $bf810000,$00000000,$00000000,$0 * ZERO;sgl
.dc.l $bc010000,$00000000,$00000000,$0 * ZERO;dbl
.dc.l $bc010000,$00000000,$00000000,$0 * ZERO;dbl
.dc.l $bc010000,$00000000,$00000800,$0 * MIN; dbl
.dc.l $bc010000,$00000000,$00000000,$0 * ZERO;dbl
*###########################################################
*########################################################################
* src_zero(): Return signed zero according to sign of src operand. #
*########################################################################
global src_zero
src_zero:
tst.b SRC_EX.w(a0) * get sign of src operand
bmi.b ld_mzero * if neg, load neg zero
*
* ld_pzero(): return a positive zero.
*
global ld_pzero
ld_pzero:
fmove.s #$00000000,fp0 * load +0
move.b #z_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
* ld_mzero(): return a negative zero.
global ld_mzero
ld_mzero:
fmove.s #$80000000,fp0 * load -0
move.b #neg_bmask+z_bmask,FPSR_CC(a6) * set 'N','Z' ccode bits
rts
*########################################################################
* dst_zero(): Return signed zero according to sign of dst operand. #
*########################################################################
global dst_zero
dst_zero:
tst.b DST_EX.w(a1) * get sign of dst operand
bmi.b ld_mzero * if neg, load neg zero
bra.b ld_pzero * load positive zero
*########################################################################
* src_inf(): Return signed inf according to sign of src operand. #
*########################################################################
global src_inf
src_inf:
tst.b SRC_EX.w(a0) * get sign of src operand
bmi.b ld_minf * if negative branch
*
* ld_pinf(): return a positive infinity.
*
global ld_pinf
ld_pinf:
fmove.s #$7f800000,fp0 * load +INF
move.b #inf_bmask,FPSR_CC(a6) * set 'INF' ccode bit
rts
*
* ld_minf():return a negative infinity.
*
global ld_minf
ld_minf:
fmove.s #$ff800000,fp0 * load -INF
move.b #neg_bmask+inf_bmask,FPSR_CC(a6) * set 'N','I' ccode bits
rts
*########################################################################
* dst_inf(): Return signed inf according to sign of dst operand. #
*########################################################################
global dst_inf
dst_inf:
tst.b DST_EX.w(a1) * get sign of dst operand
bmi.b ld_minf * if negative branch
bra.b ld_pinf
global szr_inf
*################################################################
* szr_inf(): Return +ZERO for a negative src operand or #
* +INF for a positive src operand. #
* Routine used for fetox, ftwotox, and ftentox. #
*################################################################
szr_inf:
tst.b SRC_EX.w(a0) * check sign of source
bmi.b ld_pzero
bra.b ld_pinf
*########################################################################
* sopr_inf(): Return +INF for a positive src operand or #
* jump to operand error routine for a negative src operand. #
* Routine used for flogn, flognp1, flog10, and flog2. #
*########################################################################
global sopr_inf
sopr_inf:
tst.b SRC_EX.w(a0) * check sign of source
bmi.w t_operr
bra.b ld_pinf
*################################################################
* setoxm1i(): Return minus one for a negative src operand or #
* positive infinity for a positive src operand. #
* Routine used for fetoxm1. #
*################################################################
global setoxm1i
setoxm1i:
tst.b SRC_EX.w(a0) * check sign of source
bmi.b ld_mone
bra.b ld_pinf
*########################################################################
* src_one(): Return signed one according to sign of src operand. #
*########################################################################
global src_one
src_one:
tst.b SRC_EX.w(a0) * check sign of source
bmi.b ld_mone
*
* ld_pone(): return positive one.
*
global ld_pone
ld_pone:
fmove.s #$3f800000,fp0 * load +1
clr.b FPSR_CC(a6)
rts
*
* ld_mone(): return negative one.
*
global ld_mone
ld_mone:
fmove.s #$bf800000,fp0 * load -1
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
ppiby2: .dc.l $3fff0000,$c90fdaa2,$2168c235
mpiby2: .dc.l $bfff0000,$c90fdaa2,$2168c235
*################################################################
* spi_2(): Return signed PI/2 according to sign of src operand. #
*################################################################
global spi_2
spi_2:
tst.b SRC_EX.w(a0) * check sign of source
bmi.b ld_mpi2
*
* ld_ppi2(): return positive PI/2.
*
global ld_ppi2
ld_ppi2:
fmove.l d0,fpcr
fmove.x ppiby2(pc),fp0 * load +pi/2
bra.w t_pinx2 * set INEX2
*
* ld_mpi2(): return negative PI/2.
*
global ld_mpi2
ld_mpi2:
fmove.l d0,fpcr
fmove.x mpiby2(pc),fp0 * load -pi/2
bra.w t_minx2 * set INEX2
*###################################################
* The following routines give support for fsincos. #
*###################################################
*
* ssincosz(): When the src operand is ZERO, store a one in the
* cosine register and return a ZERO in fp0 w/ the same sign
* as the src operand.
*
global ssincosz
ssincosz:
fmove.s #$3f800000,fp1
tst.b SRC_EX.w(a0) * test sign
bpl.b sincoszp
fmove.s #$80000000,fp0 * return sin result in fp0
move.b #z_bmask+neg_bmask,FPSR_CC(a6)
bra.b sto_cos * store cosine result
sincoszp:
fmove.s #$00000000,fp0 * return sin result in fp0
move.b #z_bmask,FPSR_CC(a6)
bra.b sto_cos * store cosine result
*
* ssincosi(): When the src operand is INF, store a QNAN in the cosine
* register and jump to the operand error routine for negative
* src operands.
*
global ssincosi
ssincosi:
fmove.x qnan(pc),fp1 * load NAN
bsr.l sto_cos * store cosine result
bra.w t_operr
*
* ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
* register and branch to the src QNAN routine.
*
global ssincosqnan
ssincosqnan:
fmove.x __LOCAL___EX.w(a0),fp1
bsr.l sto_cos
bra.w src_qnan
*
* ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
* in the cosine register and branch to the src SNAN routine.
*
global ssincossnan
ssincossnan:
fmove.x __LOCAL___EX.w(a0),fp1
bsr.l sto_cos
bra.w src_snan
*#######################################################################
*########################################################################
* sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
* fp1 holds the result of the cosine portion of ssincos(). #
* the value in fp1 will not take any exceptions when moved. #
* INPUT: #
* fp1 : fp value to store #
* MODIFIED: #
* d0 #
*########################################################################
global sto_cos
sto_cos:
move.b 1+EXC_CMDREG(a6),d0
andi.w #$7,d0
move.w (tbl_sto_cos.b,pc,d0.w*2),d0
jmp (tbl_sto_cos.b,pc,d0.w*1)
tbl_sto_cos:
.dc.w sto_cos_0-tbl_sto_cos
.dc.w sto_cos_1-tbl_sto_cos
.dc.w sto_cos_2-tbl_sto_cos
.dc.w sto_cos_3-tbl_sto_cos
.dc.w sto_cos_4-tbl_sto_cos
.dc.w sto_cos_5-tbl_sto_cos
.dc.w sto_cos_6-tbl_sto_cos
.dc.w sto_cos_7-tbl_sto_cos
sto_cos_0:
fmovem.x fp1,EXC_FP0(a6)
rts
sto_cos_1:
fmovem.x fp1,EXC_FP1(a6)
rts
sto_cos_2:
fmove.x fp1,fp2
rts
sto_cos_3:
fmove.x fp1,fp3
rts
sto_cos_4:
fmove.x fp1,fp4
rts
sto_cos_5:
fmove.x fp1,fp5
rts
sto_cos_6:
fmove.x fp1,fp6
rts
sto_cos_7:
fmove.x fp1,fp7
rts
*#################################################################
global smod_sdnrm
global smod_snorm
smod_sdnrm:
smod_snorm:
move.b DTAG(a6),d1
beq.l smod
cmpi.b #ZERO,d1
beq.w smod_zro
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l smod
cmpi.b #SNAN,d1
beq.l dst_snan
bra.l dst_qnan
global smod_szero
smod_szero:
move.b DTAG(a6),d1
beq.l t_operr
cmpi.b #ZERO,d1
beq.l t_operr
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l t_operr
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
global smod_sinf
smod_sinf:
move.b DTAG(a6),d1
beq.l smod_fpn
cmpi.b #ZERO,d1
beq.l smod_zro
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l smod_fpn
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
smod_zro:
srem_zro:
move.b SRC_EX.w(a0),d1 * get src sign
move.b DST_EX.w(a1),d0 * get dst sign
eor.b d0,d1 * get qbyte sign
andi.b #$80,d1
move.b d1,FPSR_QBYTE(a6)
tst.b d0
bpl.w ld_pzero
bra.w ld_mzero
smod_fpn:
srem_fpn:
clr.b FPSR_QBYTE(a6)
move.l d0,-(sp)
move.b SRC_EX.w(a0),d1 * get src sign
move.b DST_EX.w(a1),d0 * get dst sign
eor.b d0,d1 * get qbyte sign
andi.b #$80,d1
move.b d1,FPSR_QBYTE(a6)
cmpi.b #DENORM,DTAG(a6)
bne.b smod_nrm
lea DST.w(a1),a0
move.l (sp)+,d0
bra.l t_resdnrm
smod_nrm:
fmove.l (sp)+,fpcr
fmove.x DST.w(a1),fp0
tst.b DST_EX.w(a1)
bmi.b smod_nrm_neg
rts
smod_nrm_neg:
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode
rts
*########################################################################
global srem_snorm
global srem_sdnrm
srem_sdnrm:
srem_snorm:
move.b DTAG(a6),d1
beq.l srem
cmpi.b #ZERO,d1
beq.w srem_zro
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l srem
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
global srem_szero
srem_szero:
move.b DTAG(a6),d1
beq.l t_operr
cmpi.b #ZERO,d1
beq.l t_operr
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l t_operr
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
global srem_sinf
srem_sinf:
move.b DTAG(a6),d1
beq.w srem_fpn
cmpi.b #ZERO,d1
beq.w srem_zro
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l srem_fpn
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
*########################################################################
global sscale_snorm
global sscale_sdnrm
sscale_snorm:
sscale_sdnrm:
move.b DTAG(a6),d1
beq.l sscale
cmpi.b #ZERO,d1
beq.l dst_zero
cmpi.b #INF,d1
beq.l dst_inf
cmpi.b #DENORM,d1
beq.l sscale
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
global sscale_szero
sscale_szero:
move.b DTAG(a6),d1
beq.l sscale
cmpi.b #ZERO,d1
beq.l dst_zero
cmpi.b #INF,d1
beq.l dst_inf
cmpi.b #DENORM,d1
beq.l sscale
cmpi.b #QNAN,d1
beq.l dst_qnan
bra.l dst_snan
global sscale_sinf
sscale_sinf:
move.b DTAG(a6),d1
beq.l t_operr
cmpi.b #QNAN,d1
beq.l dst_qnan
cmpi.b #SNAN,d1
beq.l dst_snan
bra.l t_operr
*#######################################################################
*
* sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
*
global sop_sqnan
sop_sqnan:
move.b DTAG(a6),d1
cmpi.b #QNAN,d1
beq.b dst_qnan
cmpi.b #SNAN,d1
beq.b dst_snan
bra.b src_qnan
*
* sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
*
global sop_ssnan
sop_ssnan:
move.b DTAG(a6),d1
cmpi.b #QNAN,d1
beq.b dst_qnan_src_snan
cmpi.b #SNAN,d1
beq.b dst_snan
bra.b src_snan
dst_qnan_src_snan:
ori.l #snaniop_mask,USER_FPSR(a6) * set NAN/SNAN/AIOP
bra.b dst_qnan
*
* dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
*
global dst_snan
dst_snan:
fmove.x DST.w(a1),fp0 * the fmove sets the SNAN bit
fmove.l fpsr,d0 * catch resulting status
or.l d0,USER_FPSR(a6) * store status
rts
*
* dst_qnan(): Return the dst QNAN.
*
global dst_qnan
dst_qnan:
fmove.x DST.w(a1),fp0 * return the non-signalling nan
tst.b DST_EX.w(a1) * set ccodes according to QNAN sign
bmi.b dst_qnan_m
dst_qnan_p:
move.b #nan_bmask,FPSR_CC(a6)
rts
dst_qnan_m:
move.b #neg_bmask+nan_bmask,FPSR_CC(a6)
rts
*
* src_snan(): Return the src SNAN w/ the SNAN bit set.
*
global src_snan
src_snan:
fmove.x SRC.w(a0),fp0 * the fmove sets the SNAN bit
fmove.l fpsr,d0 * catch resulting status
or.l d0,USER_FPSR(a6) * store status
rts
*
* src_qnan(): Return the src QNAN.
*
global src_qnan
src_qnan:
fmove.x SRC.w(a0),fp0 * return the non-signalling nan
tst.b SRC_EX.w(a0) * set ccodes according to QNAN sign
bmi.b dst_qnan_m
src_qnan_p:
move.b #nan_bmask,FPSR_CC(a6)
rts
src_qnan_m:
move.b #neg_bmask+nan_bmask,FPSR_CC(a6)
rts
*
* fkern2.s:
* These entry points are used by the exception handler
* routines where an instruction is selected by an index into
* a large jump table corresponding to a given instruction which
* has been decoded. Flow continues here where we now decode
* further accoding to the source operand type.
*
global fsinh
fsinh:
move.b STAG(a6),d1
beq.l ssinh
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l src_inf
cmpi.b #DENORM,d1
beq.l ssinhd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global flognp1
flognp1:
move.b STAG(a6),d1
beq.l slognp1
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l sopr_inf
cmpi.b #DENORM,d1
beq.l slognp1d
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fetoxm1
fetoxm1:
move.b STAG(a6),d1
beq.l setoxm1
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l setoxm1i
cmpi.b #DENORM,d1
beq.l setoxm1d
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global ftanh
ftanh:
move.b STAG(a6),d1
beq.l stanh
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l src_one
cmpi.b #DENORM,d1
beq.l stanhd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fatan
fatan:
move.b STAG(a6),d1
beq.l satan
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l spi_2
cmpi.b #DENORM,d1
beq.l satand
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fasin
fasin:
move.b STAG(a6),d1
beq.l sasin
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l sasind
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fatanh
fatanh:
move.b STAG(a6),d1
beq.l satanh
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l satanhd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fsine
fsine:
move.b STAG(a6),d1
beq.l ssin
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l ssind
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global ftan
ftan:
move.b STAG(a6),d1
beq.l stan
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l stand
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fetox
fetox:
move.b STAG(a6),d1
beq.l setox
cmpi.b #ZERO,d1
beq.l ld_pone
cmpi.b #INF,d1
beq.l szr_inf
cmpi.b #DENORM,d1
beq.l setoxd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global ftwotox
ftwotox:
move.b STAG(a6),d1
beq.l stwotox
cmpi.b #ZERO,d1
beq.l ld_pone
cmpi.b #INF,d1
beq.l szr_inf
cmpi.b #DENORM,d1
beq.l stwotoxd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global ftentox
ftentox:
move.b STAG(a6),d1
beq.l stentox
cmpi.b #ZERO,d1
beq.l ld_pone
cmpi.b #INF,d1
beq.l szr_inf
cmpi.b #DENORM,d1
beq.l stentoxd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global flogn
flogn:
move.b STAG(a6),d1
beq.l slogn
cmpi.b #ZERO,d1
beq.l t_dz2
cmpi.b #INF,d1
beq.l sopr_inf
cmpi.b #DENORM,d1
beq.l slognd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global flog10
flog10:
move.b STAG(a6),d1
beq.l slog10
cmpi.b #ZERO,d1
beq.l t_dz2
cmpi.b #INF,d1
beq.l sopr_inf
cmpi.b #DENORM,d1
beq.l slog10d
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global flog2
flog2:
move.b STAG(a6),d1
beq.l slog2
cmpi.b #ZERO,d1
beq.l t_dz2
cmpi.b #INF,d1
beq.l sopr_inf
cmpi.b #DENORM,d1
beq.l slog2d
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fcosh
fcosh:
move.b STAG(a6),d1
beq.l scosh
cmpi.b #ZERO,d1
beq.l ld_pone
cmpi.b #INF,d1
beq.l ld_pinf
cmpi.b #DENORM,d1
beq.l scoshd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global facos
facos:
move.b STAG(a6),d1
beq.l sacos
cmpi.b #ZERO,d1
beq.l ld_ppi2
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l sacosd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fcos
fcos:
move.b STAG(a6),d1
beq.l scos
cmpi.b #ZERO,d1
beq.l ld_pone
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l scosd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fgetexp
fgetexp:
move.b STAG(a6),d1
beq.l sgetexp
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l sgetexpd
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fgetman
fgetman:
move.b STAG(a6),d1
beq.l sgetman
cmpi.b #ZERO,d1
beq.l src_zero
cmpi.b #INF,d1
beq.l t_operr
cmpi.b #DENORM,d1
beq.l sgetmand
cmpi.b #QNAN,d1
beq.l src_qnan
bra.l src_snan
global fsincos
fsincos:
move.b STAG(a6),d1
beq.l ssincos
cmpi.b #ZERO,d1
beq.l ssincosz
cmpi.b #INF,d1
beq.l ssincosi
cmpi.b #DENORM,d1
beq.l ssincosd
cmpi.b #QNAN,d1
beq.l ssincosqnan
bra.l ssincossnan
global fmod
fmod:
move.b STAG(a6),d1
beq.l smod_snorm
cmpi.b #ZERO,d1
beq.l smod_szero
cmpi.b #INF,d1
beq.l smod_sinf
cmpi.b #DENORM,d1
beq.l smod_sdnrm
cmpi.b #QNAN,d1
beq.l sop_sqnan
bra.l sop_ssnan
global frem
frem:
move.b STAG(a6),d1
beq.l srem_snorm
cmpi.b #ZERO,d1
beq.l srem_szero
cmpi.b #INF,d1
beq.l srem_sinf
cmpi.b #DENORM,d1
beq.l srem_sdnrm
cmpi.b #QNAN,d1
beq.l sop_sqnan
bra.l sop_ssnan
global fscale
fscale:
move.b STAG(a6),d1
beq.l sscale_snorm
cmpi.b #ZERO,d1
beq.l sscale_szero
cmpi.b #INF,d1
beq.l sscale_sinf
cmpi.b #DENORM,d1
beq.l sscale_sdnrm
cmpi.b #QNAN,d1
beq.l sop_sqnan
bra.l sop_ssnan
*########################################################################
* XDEF **************************************************************** #
* fgen_except(): catch an exception during transcendental #
* emulation #
* #
* XREF **************************************************************** #
* fmul() - emulate a multiply instruction #
* fadd() - emulate an add instruction #
* fin() - emulate an fmove instruction #
* #
* INPUT *************************************************************** #
* fp0 = destination operand #
* d0 = type of instruction that took exception #
* fsave frame = source operand #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP #
* #
* ALGORITHM *********************************************************** #
* An exception occurred on the last instruction of the #
* transcendental emulation. hopefully, this won't be happening much #
* because it will be VERY slow. #
* The only exceptions capable of passing through here are #
* Overflow, Underflow, and Unsupported Data Type. #
* #
*########################################################################
global fgen_except
fgen_except:
cmpi.b #$7,$3(sp) * is exception UNSUPP?
beq.b fge_unsupp * yes
move.b #NORM,STAG(a6)
fge_cont:
move.b #NORM,DTAG(a6)
* ok, I have a problem with putting the dst op at FP_DST. the emulation
* routines aren't supposed to alter the operands but we've just squashed
* FP_DST here...
* 8/17/93 - this turns out to be more of a "cleanliness" standpoint
* then a potential bug. to begin with, only the dyadic functions
* frem,fmod, and fscale would get the dst trashed here. But, for
* the 060SP, the FP_DST is never used again anyways.
fmovem.x fp0,FP_DST(a6) * dst op is in fp0
lea $4(sp),a0 * pass: ptr to src op
lea FP_DST(a6),a1 * pass: ptr to dst op
cmpi.b #FMOV_OP,d1
beq.b fge_fin * it was an "fmov"
cmpi.b #FADD_OP,d1
beq.b fge_fadd * it was an "fadd"
fge_fmul:
bsr.l fmul
rts
fge_fadd:
bsr.l fadd
rts
fge_fin:
bsr.l fin
rts
fge_unsupp:
move.b #DENORM,STAG(a6)
bra.b fge_cont
*
* This table holds the offsets of the emulation routines for each individual
* math operation relative to the address of this table. Included are
* routines like fadd/fmul/fabs as well as the transcendentals.
* The location within the table is determined by the extension bits of the
* operation longword.
*
.dc.w $4AFC,109
tbl_unsupp:
.dc.l fin-tbl_unsupp * 00: fmove
.dc.l fint-tbl_unsupp * 01: fint
.dc.l fsinh-tbl_unsupp * 02: fsinh
.dc.l fintrz-tbl_unsupp * 03: fintrz
.dc.l fsqrt-tbl_unsupp * 04: fsqrt
.dc.l tbl_unsupp-tbl_unsupp
.dc.l flognp1-tbl_unsupp * 06: flognp1
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fetoxm1-tbl_unsupp * 08: fetoxm1
.dc.l ftanh-tbl_unsupp * 09: ftanh
.dc.l fatan-tbl_unsupp * 0a: fatan
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fasin-tbl_unsupp * 0c: fasin
.dc.l fatanh-tbl_unsupp * 0d: fatanh
.dc.l fsine-tbl_unsupp * 0e: fsin
.dc.l ftan-tbl_unsupp * 0f: ftan
.dc.l fetox-tbl_unsupp * 10: fetox
.dc.l ftwotox-tbl_unsupp * 11: ftwotox
.dc.l ftentox-tbl_unsupp * 12: ftentox
.dc.l tbl_unsupp-tbl_unsupp
.dc.l flogn-tbl_unsupp * 14: flogn
.dc.l flog10-tbl_unsupp * 15: flog10
.dc.l flog2-tbl_unsupp * 16: flog2
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fabs-tbl_unsupp * 18: fabs
.dc.l fcosh-tbl_unsupp * 19: fcosh
.dc.l fneg-tbl_unsupp * 1a: fneg
.dc.l tbl_unsupp-tbl_unsupp
.dc.l facos-tbl_unsupp * 1c: facos
.dc.l fcos-tbl_unsupp * 1d: fcos
.dc.l fgetexp-tbl_unsupp * 1e: fgetexp
.dc.l fgetman-tbl_unsupp * 1f: fgetman
.dc.l fdiv-tbl_unsupp * 20: fdiv
.dc.l fmod-tbl_unsupp * 21: fmod
.dc.l fadd-tbl_unsupp * 22: fadd
.dc.l fmul-tbl_unsupp * 23: fmul
.dc.l fsgldiv-tbl_unsupp * 24: fsgldiv
.dc.l frem-tbl_unsupp * 25: frem
.dc.l fscale-tbl_unsupp * 26: fscale
.dc.l fsglmul-tbl_unsupp * 27: fsglmul
.dc.l fsub-tbl_unsupp * 28: fsub
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsincos-tbl_unsupp * 30: fsincos
.dc.l fsincos-tbl_unsupp * 31: fsincos
.dc.l fsincos-tbl_unsupp * 32: fsincos
.dc.l fsincos-tbl_unsupp * 33: fsincos
.dc.l fsincos-tbl_unsupp * 34: fsincos
.dc.l fsincos-tbl_unsupp * 35: fsincos
.dc.l fsincos-tbl_unsupp * 36: fsincos
.dc.l fsincos-tbl_unsupp * 37: fsincos
.dc.l __fcmp__-tbl_unsupp * 38: fcmp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l ftst-tbl_unsupp * 3a: ftst
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsin-tbl_unsupp * 40: fsmove
.dc.l fssqrt-tbl_unsupp * 41: fssqrt
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fdin-tbl_unsupp * 44: fdmove
.dc.l fdsqrt-tbl_unsupp * 45: fdsqrt
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsabs-tbl_unsupp * 58: fsabs
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsneg-tbl_unsupp * 5a: fsneg
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fdabs-tbl_unsupp * 5c: fdabs
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fdneg-tbl_unsupp * 5e: fdneg
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsdiv-tbl_unsupp * 60: fsdiv
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fsadd-tbl_unsupp * 62: fsadd
.dc.l fsmul-tbl_unsupp * 63: fsmul
.dc.l fddiv-tbl_unsupp * 64: fddiv
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fdadd-tbl_unsupp * 66: fdadd
.dc.l fdmul-tbl_unsupp * 67: fdmul
.dc.l fssub-tbl_unsupp * 68: fssub
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l tbl_unsupp-tbl_unsupp
.dc.l fdsub-tbl_unsupp * 6c: fdsub
*########################################################################
* XDEF **************************************************************** #
* fmul(): emulates the fmul instruction #
* fsmul(): emulates the fsmul instruction #
* fdmul(): emulates the fdmul instruction #
* #
* XREF **************************************************************** #
* scale_to_zero_src() - scale src exponent to zero #
* scale_to_zero_dst() - scale dst exponent to zero #
* unf_res() - return default underflow result #
* ovf_res() - return default overflow result #
* res_qnan() - return QNAN result #
* res_snan() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* d0 rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms/denorms into ext/sgl/dbl precision. #
* For norms/denorms, scale the exponents such that a multiply #
* instruction won't cause an exception. Use the regular fmul to #
* compute a result. Check if the regular operands would have taken #
* an exception. If so, return the default overflow/underflow result #
* and return the EXOP if exceptions are enabled. Else, scale the #
* result operand to the proper exponent. #
* #
*########################################################################
align $10,$51FC
tbl_fmul_ovfl:
.dc.l $3fff-$7ffe * ext_max
.dc.l $3fff-$407e * sgl_max
.dc.l $3fff-$43fe * dbl_max
tbl_fmul_unfl:
.dc.l $3fff+$0001 * ext_unfl
.dc.l $3fff-$3f80 * sgl_unfl
.dc.l $3fff-$3c00 * dbl_unfl
global fsmul
fsmul:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl prec
bra.b fmul
global fdmul
fdmul:
andi.b #$30,d0
ori.b #d_mode*$10,d0 * insert dbl prec
global fmul
fmul:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1 * combine src tags
bne.w fmul_not_norm * optimize on non-norm input
fmul_norm:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * scale src exponent
move.l d0,-(sp) * save scale factor 1
bsr.l scale_to_zero_dst * scale dst exponent
add.l d0,(sp) * SCALE_FACTOR = scale1 + scale2
move.w 2+L_SCR3(a6),d1 * fetch precision
lsr.b #$6,d1 * shift to lo bits
move.l (sp)+,d0 * load S.F.
cmp.l (tbl_fmul_ovfl.b,pc,d1.w*4),d0 * would result ovfl?
beq.w fmul_may_ovfl * result may rnd to overflow
blt.w fmul_ovfl * result will overflow
cmp.l (tbl_fmul_unfl.b,pc,d1.w*4),d0 * would result unfl?
beq.w fmul_may_unfl * result may rnd to no unfl
bgt.w fmul_unfl * result will underflow
*
* NORMAL:
* - the result of the multiply operation will neither overflow nor underflow.
* - do the multiply to the proper precision and rounding mode.
* - scale the result exponent using the scale factor. if both operands were
* normalized then we really don't need to go through this scaling. but for now,
* this will do.
*
fmul_normal:
fmovem.x FP_SCR1(a6),fp0 * load dst operand
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fmul_normal_exit:
fmovem.x fp0,FP_SCR0(a6) * store out result
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * load {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* OVERFLOW:
* - the result of the multiply operation is an overflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
* - if overflow or inexact is enabled, we need a multiply result rounded to
* extended precision. if the original operation was extended, then we have this
* result. if the original operation was single or double, we have to do another
* multiply using extended precision and the correct rounding mode. the result
* of this operation then has its exponent scaled by -0x6000 to create the
* exceptional operand.
*
fmul_ovfl:
fmovem.x FP_SCR1(a6),fp0 * load dst operand
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
* save setting this until now because this is where fmul_may_ovfl may jump in
fmul_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fmul_ovfl_ena * yes
* calculate the default result
fmul_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass rnd prec,mode
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
*
* OVFL is enabled; Create EXOP:
* - if precision is extended, then we have the EXOP. simply bias the exponent
* with an extra -0x6000. if the precision is single or double, we need to
* calculate a result rounded to extended precision.
*
fmul_ovfl_ena:
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * test the rnd prec
bne.b fmul_ovfl_ena_sd * it's sgl or dbl
fmul_ovfl_ena_cont:
fmovem.x fp0,FP_SCR0(a6) * move result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.w d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1 * clear sign bit
andi.w #$8000,d2 * keep old sign
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fmul_ovfl_dis
fmul_ovfl_ena_sd:
fmovem.x FP_SCR1(a6),fp0 * load dst operand
move.l L_SCR3(a6),d1
andi.b #$30,d1 * keep rnd mode only
fmove.l d1,fpcr * set FPCR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l #$0,fpcr * clear FPCR
bra.b fmul_ovfl_ena_cont
*
* may OVERFLOW:
* - the result of the multiply operation MAY overflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
*
fmul_may_ovfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| >= 2.b?
fbge.w fmul_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fmul_normal_exit
*
* UNDERFLOW:
* - the result of the multiply operation is an underflow.
* - do the multiply to the proper precision and rounding mode in order to
* set the inexact bits.
* - calculate the default result and return it in fp0.
* - if overflow or inexact is enabled, we need a multiply result rounded to
* extended precision. if the original operation was extended, then we have this
* result. if the original operation was single or double, we have to do another
* multiply using extended precision and the correct rounding mode. the result
* of this operation then has its exponent scaled by -0x6000 to create the
* exceptional operand.
*
fmul_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
* for fun, let's use only extended precision, round to zero. then, let
* the unf_res() routine figure out all the rest.
* will we get the correct answer.
fmovem.x FP_SCR1(a6),fp0 * load dst operand
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fmul_unfl_ena * yes
fmul_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * unf_res2 may have set 'Z'
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* UNFL is enabled.
*
fmul_unfl_ena:
fmovem.x FP_SCR1(a6),fp1 * load dst op
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fmul_unfl_ena_sd * no, sgl or dbl
* if the rnd mode is anything but RZ, then we have to re-do the above
* multiplication becuase we used RZ for all.
fmove.l L_SCR3(a6),fpcr * set FPCR
fmul_unfl_ena_cont:
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp1 * execute multiply
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * save result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
addi.l #$6000,d1 * add bias
andi.w #$7fff,d1
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.w fmul_unfl_dis
fmul_unfl_ena_sd:
move.l L_SCR3(a6),d1
andi.b #$30,d1 * use only rnd mode
fmove.l d1,fpcr * set FPCR
bra.b fmul_unfl_ena_cont
* MAY UNDERFLOW:
* -use the correct rounding mode and precision. this code favors operations
* that do not underflow.
fmul_may_unfl:
fmovem.x FP_SCR1(a6),fp0 * load dst operand
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp0 * execute multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| > 2.b?
fbgt.w fmul_normal_exit * no; no underflow occurred
fblt.w fmul_unfl * yes; underflow occurred
*
* we still don't know if underflow occurred. result is ~ equal to 2. but,
* we don't know if the result was an underflow that rounded up to a 2 or
* a normalized number that rounded down to a 2. so, redo the entire operation
* using RZ as the rounding mode to see what the pre-rounded result is.
* this case should be relatively rare.
*
fmovem.x FP_SCR1(a6),fp1 * load dst operand
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * keep rnd prec
ori.b #rz_mode*$10,d1 * insert RZ
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmul.x FP_SCR0(a6),fp1 * execute multiply
fmove.l #$0,fpcr * clear FPCR
fabs.x fp1 * make absolute value
fcmp.b #$2,fp1 * is |result| < 2.b?
fbge.w fmul_normal_exit * no; no underflow occurred
bra.w fmul_unfl * yes, underflow occurred
*###############################################################################
*
* Multiply: inputs are not both normalized; what are they?
*
fmul_not_norm:
move.w (tbl_fmul_op.b,pc,d1.w*2),d1
jmp (tbl_fmul_op.b,pc,d1.w)
.dc.w $4AFC,48
tbl_fmul_op:
.dc.w fmul_norm-tbl_fmul_op * NORM x NORM
.dc.w fmul_zero-tbl_fmul_op * NORM x ZERO
.dc.w fmul_inf_src-tbl_fmul_op * NORM x INF
.dc.w fmul_res_qnan-tbl_fmul_op * NORM x QNAN
.dc.w fmul_norm-tbl_fmul_op * NORM x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * NORM x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w fmul_zero-tbl_fmul_op * ZERO x NORM
.dc.w fmul_zero-tbl_fmul_op * ZERO x ZERO
.dc.w fmul_res_operr-tbl_fmul_op * ZERO x INF
.dc.w fmul_res_qnan-tbl_fmul_op * ZERO x QNAN
.dc.w fmul_zero-tbl_fmul_op * ZERO x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * ZERO x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w fmul_inf_dst-tbl_fmul_op * INF x NORM
.dc.w fmul_res_operr-tbl_fmul_op * INF x ZERO
.dc.w fmul_inf_dst-tbl_fmul_op * INF x INF
.dc.w fmul_res_qnan-tbl_fmul_op * INF x QNAN
.dc.w fmul_inf_dst-tbl_fmul_op * INF x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * INF x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w fmul_res_qnan-tbl_fmul_op * QNAN x NORM
.dc.w fmul_res_qnan-tbl_fmul_op * QNAN x ZERO
.dc.w fmul_res_qnan-tbl_fmul_op * QNAN x INF
.dc.w fmul_res_qnan-tbl_fmul_op * QNAN x QNAN
.dc.w fmul_res_qnan-tbl_fmul_op * QNAN x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * QNAN x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w fmul_norm-tbl_fmul_op * NORM x NORM
.dc.w fmul_zero-tbl_fmul_op * NORM x ZERO
.dc.w fmul_inf_src-tbl_fmul_op * NORM x INF
.dc.w fmul_res_qnan-tbl_fmul_op * NORM x QNAN
.dc.w fmul_norm-tbl_fmul_op * NORM x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * NORM x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x NORM
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x ZERO
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x INF
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x QNAN
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x DENORM
.dc.w fmul_res_snan-tbl_fmul_op * SNAN x SNAN
.dc.w tbl_fmul_op-tbl_fmul_op *
.dc.w tbl_fmul_op-tbl_fmul_op *
fmul_res_operr:
bra.l res_operr
fmul_res_snan:
bra.l res_snan
fmul_res_qnan:
bra.l res_qnan
*
* Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
*
global fmul_zero * global for fsglmul
fmul_zero:
move.b SRC_EX.w(a0),d0 * exclusive or the signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bpl.b fmul_zero_p * result ZERO is pos.
fmul_zero_n:
fmove.s #$80000000,fp0 * load -ZERO
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set Z/N
rts
fmul_zero_p:
fmove.s #$00000000,fp0 * load +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
*
* Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
*
* Note: The j-bit for an infinity is a don't-care. However, to be
* strictly compatible w/ the 68881/882, we make sure to return an
* INF w/ the j-bit set if the input INF j-bit was set. Destination
* INFs take priority.
*
global fmul_inf_dst * global for fsglmul
fmul_inf_dst:
fmovem.x DST.w(a1),fp0 * return INF result in fp0
move.b SRC_EX.w(a0),d0 * exclusive or the signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bpl.b fmul_inf_dst_p * result INF is pos.
fmul_inf_dst_n:
fabs.x fp0 * clear result sign
fneg.x fp0 * set result sign
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set INF/N
rts
fmul_inf_dst_p:
fabs.x fp0 * clear result sign
move.b #inf_bmask,FPSR_CC(a6) * set INF
rts
global fmul_inf_src * global for fsglmul
fmul_inf_src:
fmovem.x SRC.w(a0),fp0 * return INF result in fp0
move.b SRC_EX.w(a0),d0 * exclusive or the signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bpl.b fmul_inf_dst_p * result INF is pos.
bra.b fmul_inf_dst_n
*########################################################################
* XDEF **************************************************************** #
* fin(): emulates the fmove instruction #
* fsin(): emulates the fsmove instruction #
* fdin(): emulates the fdmove instruction #
* #
* XREF **************************************************************** #
* norm() - normalize mantissa for EXOP on denorm #
* scale_to_zero_src() - scale src exponent to zero #
* ovf_res() - return default overflow result #
* unf_res() - return default underflow result #
* res_qnan_1op() - return QNAN result #
* res_snan_1op() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = round prec/mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms into extended, single, and double precision. #
* Norms can be emulated w/ a regular fmove instruction. For #
* sgl/dbl, must scale exponent and perform an "fmove". Check to see #
* if the result would have overflowed/underflowed. If so, use unf_res() #
* or ovf_res() to return the default result. Also return EXOP if #
* exception is enabled. If no exception, return the default result. #
* Unnorms don't pass through here. #
* #
*########################################################################
global fsin
fsin:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl precision
bra.b fin
global fdin
fdin:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl precision
global fin
fin:
move.l d0,L_SCR3(a6) * store rnd info
move.b STAG(a6),d1 * fetch src optype tag
bne.w fin_not_norm * optimize on non-norm input
*
* FP MOVE IN: NORMs and DENORMs ONLY!
*
fin_norm:
andi.b #$c0,d0 * is precision extended?
bne.w fin_not_ext * no, so go handle dbl or sgl
*
* precision selected is extended. so...we cannot get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
tst.b SRC_EX.w(a0) * is the operand negative?
bpl.b fin_norm_done * no
bset #neg_bit,FPSR_CC(a6) * yes, so set 'N' ccode bit
fin_norm_done:
fmovem.x SRC.w(a0),fp0 * return result in fp0
rts
*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fin_denorm:
andi.b #$c0,d0 * is precision extended?
bne.w fin_not_ext * no, so go handle dbl or sgl
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
tst.b SRC_EX.w(a0) * is the operand negative?
bpl.b fin_denorm_done * no
bset #neg_bit,FPSR_CC(a6) * yes, so set 'N' ccode bit
fin_denorm_done:
fmovem.x SRC.w(a0),fp0 * return result in fp0
btst #unfl_bit,FPCR_ENABLE(a6) * is UNFL enabled?
bne.b fin_denorm_unfl_ena * yes
rts
*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fin_denorm_unfl_ena:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0 * pass: ptr to operand
bsr.l norm * normalize result
neg.w d0 * new exponent = -(shft val)
addi.w #$6000,d0 * add new bias to exponent
move.w FP_SCR0_EX(a6),d1 * fetch old sign,exp
andi.w #$8000,d1 * keep old sign
andi.w #$7fff,d0 * clear sign position
or.w d1,d0 * concat new exo,old sign
move.w d0,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
*
* operand is to be rounded to single or double precision
*
fin_not_ext:
cmpi.b #s_mode*$10,d0 * separate sgl/dbl prec
bne.b fin_dbl
*
* operand is to be rounded to single precision
*
fin_sgl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3f80,d0 * will move in underflow?
bge.w fin_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$407e,d0 * will move in overflow?
beq.w fin_sd_may_ovfl * maybe; go check
blt.w fin_sd_ovfl * yes; go handle overflow
*
* operand will NOT overflow or underflow when moved into the fp reg file
*
fin_sd_normal:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.x FP_SCR0(a6),fp0 * perform move
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fin_sd_normal_exit:
move.l d2,-(sp) * save d2
fmovem.x fp0,FP_SCR0(a6) * store out result
move.w FP_SCR0_EX(a6),d1 * load {sgn,exp}
move.w d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
andi.w #$8000,d2 * keep old sign
or.w d1,d2 * concat old sign,new exponent
move.w d2,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* operand is to be rounded to double precision
*
fin_dbl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3c00,d0 * will move in underflow?
bge.w fin_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$43fe,d0 * will move in overflow?
beq.w fin_sd_may_ovfl * maybe; go check
blt.w fin_sd_ovfl * yes; go handle overflow
bra.w fin_sd_normal * no; ho handle normalized op
*
* operand WILL underflow when moved in to the fp register file
*
fin_sd_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
tst.b FP_SCR0_EX(a6) * is operand negative?
bpl.b fin_sd_unfl_tst
bset #neg_bit,FPSR_CC(a6) * set 'N' ccode bit
* if underflow or inexact is enabled, then go calculate the EXOP first.
fin_sd_unfl_tst:
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fin_sd_unfl_ena * yes
fin_sd_unfl_dis:
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * unf_res may have set 'Z'
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* operand will underflow AND underflow or inexact is enabled.
* therefore, we must return the result rounded to extended precision.
*
fin_sd_unfl_ena:
move.l FP_SCR0_HI(a6),FP_SCR1_HI(a6)
move.l FP_SCR0_LO(a6),FP_SCR1_LO(a6)
move.w FP_SCR0_EX(a6),d1 * load current exponent
move.l d2,-(sp) * save d2
move.w d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * subtract scale factor
andi.w #$8000,d2 * extract old sign
addi.l #$6000,d1 * add new bias
andi.w #$7fff,d1
or.w d1,d2 * concat old sign,new exp
move.w d2,FP_SCR1_EX(a6) * insert new exponent
fmovem.x FP_SCR1(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fin_sd_unfl_dis
*
* operand WILL overflow.
*
fin_sd_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.x FP_SCR0(a6),fp0 * perform move
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save FPSR
or.l d1,USER_FPSR(a6) * save INEX2,N
fin_sd_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fin_sd_ovfl_ena * yes
*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fin_sd_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass: prec,mode
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fin_sd_ovfl_ena:
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1
or.w d2,d1
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fin_sd_ovfl_dis
*
* the move in MAY overflow. so...
*
fin_sd_may_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.x FP_SCR0(a6),fp0 * perform the move
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| >= 2.b?
fbge.w fin_sd_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fin_sd_normal_exit
*#########################################################################
*
* operand is not a NORM: check its optype and branch accordingly
*
fin_not_norm:
cmpi.b #DENORM,d1 * weed out DENORM
beq.w fin_denorm
cmpi.b #SNAN,d1 * weed out SNANs
beq.l res_snan_1op
cmpi.b #QNAN,d1 * weed out QNANs
beq.l res_qnan_1op
*
* do the fmove in; at this point, only possible ops are ZERO and INF.
* use fmov to determine ccodes.
* prec:mode should be zero at this point but it won't affect answer anyways.
*
fmove.x SRC.w(a0),fp0 * do fmove in
fmove.l fpsr,d0 * no exceptions possible
rol.l #$8,d0 * put ccodes in lo byte
move.b d0,FPSR_CC(a6) * insert correct ccodes
rts
*########################################################################
* XDEF **************************************************************** #
* fdiv(): emulates the fdiv instruction #
* fsdiv(): emulates the fsdiv instruction #
* fddiv(): emulates the fddiv instruction #
* #
* XREF **************************************************************** #
* scale_to_zero_src() - scale src exponent to zero #
* scale_to_zero_dst() - scale dst exponent to zero #
* unf_res() - return default underflow result #
* ovf_res() - return default overflow result #
* res_qnan() - return QNAN result #
* res_snan() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* d0 rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms/denorms into ext/sgl/dbl precision. #
* For norms/denorms, scale the exponents such that a divide #
* instruction won't cause an exception. Use the regular fdiv to #
* compute a result. Check if the regular operands would have taken #
* an exception. If so, return the default overflow/underflow result #
* and return the EXOP if exceptions are enabled. Else, scale the #
* result operand to the proper exponent. #
* #
*########################################################################
align $10,$51FC
tbl_fdiv_unfl:
.dc.l $3fff-$0000 * ext_unfl
.dc.l $3fff-$3f81 * sgl_unfl
.dc.l $3fff-$3c01 * dbl_unfl
tbl_fdiv_ovfl:
.dc.l $3fff-$7ffe * ext overflow exponent
.dc.l $3fff-$407e * sgl overflow exponent
.dc.l $3fff-$43fe * dbl overflow exponent
global fsdiv
fsdiv:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl prec
bra.b fdiv
global fddiv
fddiv:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl prec
global fdiv
fdiv:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1 * combine src tags
bne.w fdiv_not_norm * optimize on non-norm input
*
* DIVIDE: NORMs and DENORMs ONLY!
*
fdiv_norm:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * scale src exponent
move.l d0,-(sp) * save scale factor 1
bsr.l scale_to_zero_dst * scale dst exponent
neg.l (sp) * SCALE FACTOR = scale1 - scale2
add.l d0,(sp)
move.w 2+L_SCR3(a6),d1 * fetch precision
lsr.b #$6,d1 * shift to lo bits
move.l (sp)+,d0 * load S.F.
cmp.l (tbl_fdiv_ovfl.b,pc,d1.w*4),d0 * will result overflow?
ble.w fdiv_may_ovfl * result will overflow
cmp.l (tbl_fdiv_unfl.w,pc,d1.w*4),d0 * will result underflow?
beq.w fdiv_may_unfl * maybe
bgt.w fdiv_unfl * yes; go handle underflow
fdiv_normal:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * save FPCR
fmove.l #$0,fpsr * clear FPSR
fdiv.x FP_SCR0(a6),fp0 * perform divide
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fdiv_normal_exit:
fmovem.x fp0,FP_SCR0(a6) * store result on stack
move.l d2,-(sp) * store d2
move.w FP_SCR0_EX(a6),d1 * load {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
tbl_fdiv_ovfl2:
.dc.l $7fff
.dc.l $407f
.dc.l $43ff
fdiv_no_ovfl:
move.l (sp)+,d0 * restore scale factor
bra.b fdiv_normal_exit
fdiv_may_ovfl:
move.l d0,-(sp) * save scale factor
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * set FPSR
fdiv.x FP_SCR0(a6),fp0 * execute divide
fmove.l fpsr,d0
fmove.l #$0,fpcr
or.l d0,USER_FPSR(a6) * save INEX,N
fmovem.x fp0,-(sp) * save result to stack
move.w (sp),d0 * fetch new exponent
add.l #$c,sp * clear result from stack
andi.l #$7fff,d0 * strip sign
sub.l (sp),d0 * add scale factor
cmp.l (tbl_fdiv_ovfl2.b,pc,d1.w*4),d0
blt.b fdiv_no_ovfl
move.l (sp)+,d0
fdiv_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fdiv_ovfl_ena * yes
fdiv_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass prec:rnd
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
fdiv_ovfl_ena:
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fdiv_ovfl_ena_sd * no, do sgl or dbl
fdiv_ovfl_ena_cont:
fmovem.x fp0,FP_SCR0(a6) * move result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.w d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1 * clear sign bit
andi.w #$8000,d2 * keep old sign
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fdiv_ovfl_dis
fdiv_ovfl_ena_sd:
fmovem.x FP_SCR1(a6),fp0 * load dst operand
move.l L_SCR3(a6),d1
andi.b #$30,d1 * keep rnd mode
fmove.l d1,fpcr * set FPCR
fdiv.x FP_SCR0(a6),fp0 * execute divide
fmove.l #$0,fpcr * clear FPCR
bra.b fdiv_ovfl_ena_cont
fdiv_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fdiv.x FP_SCR0(a6),fp0 * execute divide
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fdiv_unfl_ena * yes
fdiv_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * 'Z' may have been set
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* UNFL is enabled.
*
fdiv_unfl_ena:
fmovem.x FP_SCR1(a6),fp1 * load dst op
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fdiv_unfl_ena_sd * no, sgl or dbl
fmove.l L_SCR3(a6),fpcr * set FPCR
fdiv_unfl_ena_cont:
fmove.l #$0,fpsr * clear FPSR
fdiv.x FP_SCR0(a6),fp1 * execute divide
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * save result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factoer
addi.l #$6000,d1 * add bias
andi.w #$7fff,d1
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exp
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.w fdiv_unfl_dis
fdiv_unfl_ena_sd:
move.l L_SCR3(a6),d1
andi.b #$30,d1 * use only rnd mode
fmove.l d1,fpcr * set FPCR
bra.b fdiv_unfl_ena_cont
*
* the divide operation MAY underflow:
*
fdiv_may_unfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fdiv.x FP_SCR0(a6),fp0 * execute divide
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$1,fp1 * is |result| > 1.b?
fbgt.w fdiv_normal_exit * no; no underflow occurred
fblt.w fdiv_unfl * yes; underflow occurred
*
* we still don't know if underflow occurred. result is ~ equal to 1. but,
* we don't know if the result was an underflow that rounded up to a 1
* or a normalized number that rounded down to a 1. so, redo the entire
* operation using RZ as the rounding mode to see what the pre-rounded
* result is. this case should be relatively rare.
*
fmovem.x FP_SCR1(a6),fp1 * load dst op into fp1
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * keep rnd prec
ori.b #rz_mode*$10,d1 * insert RZ
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fdiv.x FP_SCR0(a6),fp1 * execute divide
fmove.l #$0,fpcr * clear FPCR
fabs.x fp1 * make absolute value
fcmp.b #$1,fp1 * is |result| < 1.b?
fbge.w fdiv_normal_exit * no; no underflow occurred
bra.w fdiv_unfl * yes; underflow occurred
*###########################################################################
*
* Divide: inputs are not both normalized; what are they?
*
fdiv_not_norm:
move.w (tbl_fdiv_op.b,pc,d1.w*2),d1
jmp (tbl_fdiv_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fdiv_op:
.dc.w fdiv_norm-tbl_fdiv_op * NORM / NORM
.dc.w fdiv_inf_load-tbl_fdiv_op * NORM / ZERO
.dc.w fdiv_zero_load-tbl_fdiv_op * NORM / INF
.dc.w fdiv_res_qnan-tbl_fdiv_op * NORM / QNAN
.dc.w fdiv_norm-tbl_fdiv_op * NORM / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * NORM / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w fdiv_zero_load-tbl_fdiv_op * ZERO / NORM
.dc.w fdiv_res_operr-tbl_fdiv_op * ZERO / ZERO
.dc.w fdiv_zero_load-tbl_fdiv_op * ZERO / INF
.dc.w fdiv_res_qnan-tbl_fdiv_op * ZERO / QNAN
.dc.w fdiv_zero_load-tbl_fdiv_op * ZERO / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * ZERO / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w fdiv_inf_dst-tbl_fdiv_op * INF / NORM
.dc.w fdiv_inf_dst-tbl_fdiv_op * INF / ZERO
.dc.w fdiv_res_operr-tbl_fdiv_op * INF / INF
.dc.w fdiv_res_qnan-tbl_fdiv_op * INF / QNAN
.dc.w fdiv_inf_dst-tbl_fdiv_op * INF / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * INF / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w fdiv_res_qnan-tbl_fdiv_op * QNAN / NORM
.dc.w fdiv_res_qnan-tbl_fdiv_op * QNAN / ZERO
.dc.w fdiv_res_qnan-tbl_fdiv_op * QNAN / INF
.dc.w fdiv_res_qnan-tbl_fdiv_op * QNAN / QNAN
.dc.w fdiv_res_qnan-tbl_fdiv_op * QNAN / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * QNAN / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w fdiv_norm-tbl_fdiv_op * DENORM / NORM
.dc.w fdiv_inf_load-tbl_fdiv_op * DENORM / ZERO
.dc.w fdiv_zero_load-tbl_fdiv_op * DENORM / INF
.dc.w fdiv_res_qnan-tbl_fdiv_op * DENORM / QNAN
.dc.w fdiv_norm-tbl_fdiv_op * DENORM / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * DENORM / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / NORM
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / ZERO
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / INF
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / QNAN
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / DENORM
.dc.w fdiv_res_snan-tbl_fdiv_op * SNAN / SNAN
.dc.w tbl_fdiv_op-tbl_fdiv_op *
.dc.w tbl_fdiv_op-tbl_fdiv_op *
fdiv_res_qnan:
bra.l res_qnan
fdiv_res_snan:
bra.l res_snan
fdiv_res_operr:
bra.l res_operr
global fdiv_zero_load * global for fsgldiv
fdiv_zero_load:
move.b SRC_EX.w(a0),d0 * result sign is exclusive
move.b DST_EX.w(a1),d1 * or of input signs.
eor.b d0,d1
bpl.b fdiv_zero_load_p * result is positive
fmove.s #$80000000,fp0 * load a -ZERO
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set Z/N
rts
fdiv_zero_load_p:
fmove.s #$00000000,fp0 * load a +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
*
* The destination was In Range and the source was a ZERO. The result,
* therefore, is an INF w/ the proper sign.
* So, determine the sign and return a new INF (w/ the j-bit cleared).
*
global fdiv_inf_load * global for fsgldiv
fdiv_inf_load:
ori.w #dz_mask+adz_mask,2+USER_FPSR(a6) * no; set DZ/ADZ
move.b SRC_EX.w(a0),d0 * load both signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bpl.b fdiv_inf_load_p * result is positive
fmove.s #$ff800000,fp0 * make result -INF
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set INF/N
rts
fdiv_inf_load_p:
fmove.s #$7f800000,fp0 * make result +INF
move.b #inf_bmask,FPSR_CC(a6) * set INF
rts
*
* The destination was an INF w/ an In Range or ZERO source, the result is
* an INF w/ the proper sign.
* The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
* dst INF is set, then then j-bit of the result INF is also set).
*
global fdiv_inf_dst * global for fsgldiv
fdiv_inf_dst:
move.b DST_EX.w(a1),d0 * load both signs
move.b SRC_EX.w(a0),d1
eor.b d0,d1
bpl.b fdiv_inf_dst_p * result is positive
fmovem.x DST.w(a1),fp0 * return result in fp0
fabs.x fp0 * clear sign bit
fneg.x fp0 * set sign bit
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set INF/NEG
rts
fdiv_inf_dst_p:
fmovem.x DST.w(a1),fp0 * return result in fp0
fabs.x fp0 * return positive INF
move.b #inf_bmask,FPSR_CC(a6) * set INF
rts
*########################################################################
* XDEF **************************************************************** #
* fneg(): emulates the fneg instruction #
* fsneg(): emulates the fsneg instruction #
* fdneg(): emulates the fdneg instruction #
* #
* XREF **************************************************************** #
* norm() - normalize a denorm to provide EXOP #
* scale_to_zero_src() - scale sgl/dbl source exponent #
* ovf_res() - return default overflow result #
* unf_res() - return default underflow result #
* res_qnan_1op() - return QNAN result #
* res_snan_1op() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, zeroes, and infinities as special cases. Separate #
* norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
* emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
* and an actual fneg performed to see if overflow/underflow would have #
* occurred. If so, return default underflow/overflow result. Else, #
* scale the result exponent and return result. FPSR gets set based on #
* the result value. #
* #
*########################################################################
global fsneg
fsneg:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl precision
bra.b fneg
global fdneg
fdneg:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl prec
global fneg
fneg:
move.l d0,L_SCR3(a6) * store rnd info
move.b STAG(a6),d1
bne.w fneg_not_norm * optimize on non-norm input
*
* NEGATE SIGN : norms and denorms ONLY!
*
fneg_norm:
andi.b #$c0,d0 * is precision extended?
bne.w fneg_not_ext * no; go handle sgl or dbl
*
* precision selected is extended. so...we can not get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.w SRC_EX.w(a0),d0
eori.w #$8000,d0 * negate sign
bpl.b fneg_norm_load * sign is positive
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
fneg_norm_load:
move.w d0,FP_SCR0_EX(a6)
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fneg_denorm:
andi.b #$c0,d0 * is precision extended?
bne.b fneg_not_ext * no; go handle sgl or dbl
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.w SRC_EX.w(a0),d0
eori.w #$8000,d0 * negate sign
bpl.b fneg_denorm_done * no
move.b #neg_bmask,FPSR_CC(a6) * yes, set 'N' ccode bit
fneg_denorm_done:
move.w d0,FP_SCR0_EX(a6)
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
btst #unfl_bit,FPCR_ENABLE(a6) * is UNFL enabled?
bne.b fneg_ext_unfl_ena * yes
rts
*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fneg_ext_unfl_ena:
lea FP_SCR0(a6),a0 * pass: ptr to operand
bsr.l norm * normalize result
neg.w d0 * new exponent = -(shft val)
addi.w #$6000,d0 * add new bias to exponent
move.w FP_SCR0_EX(a6),d1 * fetch old sign,exp
andi.w #$8000,d1 * keep old sign
andi.w #$7fff,d0 * clear sign position
or.w d1,d0 * concat old sign, new exponent
move.w d0,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
*
* operand is either single or double
*
fneg_not_ext:
cmpi.b #s_mode*$10,d0 * separate sgl/dbl prec
bne.b fneg_dbl
*
* operand is to be rounded to single precision
*
fneg_sgl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3f80,d0 * will move in underflow?
bge.w fneg_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$407e,d0 * will move in overflow?
beq.w fneg_sd_may_ovfl * maybe; go check
blt.w fneg_sd_ovfl * yes; go handle overflow
*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fneg_sd_normal:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fneg.x FP_SCR0(a6),fp0 * perform negation
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fneg_sd_normal_exit:
move.l d2,-(sp) * save d2
fmovem.x fp0,FP_SCR0(a6) * store out result
move.w FP_SCR0_EX(a6),d1 * load sgn,exp
move.w d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
andi.w #$8000,d2 * keep old sign
or.w d1,d2 * concat old sign,new exp
move.w d2,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* operand is to be rounded to double precision
*
fneg_dbl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3c00,d0 * will move in underflow?
bge.b fneg_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$43fe,d0 * will move in overflow?
beq.w fneg_sd_may_ovfl * maybe; go check
blt.w fneg_sd_ovfl * yes; go handle overflow
bra.w fneg_sd_normal * no; ho handle normalized op
*
* operand WILL underflow when moved in to the fp register file
*
fneg_sd_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
eori.b #$80,FP_SCR0_EX(a6) * negate sign
bpl.b fneg_sd_unfl_tst
bset #neg_bit,FPSR_CC(a6) * set 'N' ccode bit
* if underflow or inexact is enabled, go calculate EXOP first.
fneg_sd_unfl_tst:
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fneg_sd_unfl_ena * yes
fneg_sd_unfl_dis:
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * unf_res may have set 'Z'
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* operand will underflow AND underflow is enabled.
* therefore, we must return the result rounded to extended precision.
*
fneg_sd_unfl_ena:
move.l FP_SCR0_HI(a6),FP_SCR1_HI(a6)
move.l FP_SCR0_LO(a6),FP_SCR1_LO(a6)
move.w FP_SCR0_EX(a6),d1 * load current exponent
move.l d2,-(sp) * save d2
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * subtract scale factor
addi.l #$6000,d1 * add new bias
andi.w #$7fff,d1
or.w d2,d1 * concat new sign,new exp
move.w d1,FP_SCR1_EX(a6) * insert new exp
fmovem.x FP_SCR1(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fneg_sd_unfl_dis
*
* operand WILL overflow.
*
fneg_sd_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fneg.x FP_SCR0(a6),fp0 * perform negation
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save FPSR
or.l d1,USER_FPSR(a6) * save INEX2,N
fneg_sd_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fneg_sd_ovfl_ena * yes
*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fneg_sd_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass: prec,mode
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fneg_sd_ovfl_ena:
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1
or.w d2,d1 * concat sign,exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fneg_sd_ovfl_dis
*
* the move in MAY underflow. so...
*
fneg_sd_may_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fneg.x FP_SCR0(a6),fp0 * perform negation
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| >= 2.b?
fbge.w fneg_sd_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fneg_sd_normal_exit
*#########################################################################
*
* input is not normalized; what is it?
*
fneg_not_norm:
cmpi.b #DENORM,d1 * weed out DENORM
beq.w fneg_denorm
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
cmpi.b #QNAN,d1 * weed out QNAN
beq.l res_qnan_1op
*
* do the fneg; at this point, only possible ops are ZERO and INF.
* use fneg to determine ccodes.
* prec:mode should be zero at this point but it won't affect answer anyways.
*
fneg.x SRC_EX.w(a0),fp0 * do fneg
fmove.l fpsr,d0
rol.l #$8,d0 * put ccodes in lo byte
move.b d0,FPSR_CC(a6) * insert correct ccodes
rts
*########################################################################
* XDEF **************************************************************** #
* ftst(): emulates the ftest instruction #
* #
* XREF **************************************************************** #
* res{s,q}nan_1op() - set NAN result for monadic instruction #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* #
* OUTPUT ************************************************************** #
* none #
* #
* ALGORITHM *********************************************************** #
* Check the source operand tag (STAG) and set the FPCR according #
* to the operand type and sign. #
* #
*########################################################################
global ftst
ftst:
move.b STAG(a6),d1
bne.b ftst_not_norm * optimize on non-norm input
*
* Norm:
*
ftst_norm:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.b ftst_norm_m * yes
rts
ftst_norm_m:
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
*
* input is not normalized; what is it?
*
ftst_not_norm:
cmpi.b #ZERO,d1 * weed out ZERO
beq.b ftst_zero
cmpi.b #INF,d1 * weed out INF
beq.b ftst_inf
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
cmpi.b #QNAN,d1 * weed out QNAN
beq.l res_qnan_1op
*
* Denorm:
*
ftst_denorm:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.b ftst_denorm_m * yes
rts
ftst_denorm_m:
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
*
* Infinity:
*
ftst_inf:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.b ftst_inf_m * yes
ftst_inf_p:
move.b #inf_bmask,FPSR_CC(a6) * set 'I' ccode bit
rts
ftst_inf_m:
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set 'I','N' ccode bits
rts
*
* Zero:
*
ftst_zero:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.b ftst_zero_m * yes
ftst_zero_p:
move.b #z_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
ftst_zero_m:
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set 'Z','N' ccode bits
rts
*########################################################################
* XDEF **************************************************************** #
* fint(): emulates the fint instruction #
* #
* XREF **************************************************************** #
* res_{s,q}nan_1op() - set NAN result for monadic operation #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = round precision/mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* #
* ALGORITHM *********************************************************** #
* Separate according to operand type. Unnorms don't pass through #
* here. For norms, load the rounding mode/prec, execute a "fint", then #
* store the resulting FPSR bits. #
* For denorms, force the j-bit to a one and do the same as for #
* norms. Denorms are so low that the answer will either be a zero or a #
* one. #
* For zeroes/infs/NANs, return the same while setting the FPSR #
* as appropriate. #
* #
*########################################################################
global fint
fint:
move.b STAG(a6),d1
bne.b fint_not_norm * optimize on non-norm input
*
* Norm:
*
fint_norm:
andi.b #$30,d0 * set prec = ext
fmove.l d0,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fint.x SRC.w(a0),fp0 * execute fint
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d0 * save FPSR
or.l d0,USER_FPSR(a6) * set exception bits
rts
*
* input is not normalized; what is it?
*
fint_not_norm:
cmpi.b #ZERO,d1 * weed out ZERO
beq.b fint_zero
cmpi.b #INF,d1 * weed out INF
beq.b fint_inf
cmpi.b #DENORM,d1 * weed out DENORM
beq.b fint_denorm
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
bra.l res_qnan_1op * weed out QNAN
*
* Denorm:
*
* for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
* also, the INEX2 and AINEX exception bits will be set.
* so, we could either set these manually or force the DENORM
* to a very small NORM and ship it to the NORM routine.
* I do the latter.
*
fint_denorm:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6) * copy sign, zero exp
move.b #$80,FP_SCR0_HI(a6) * force DENORM ==> small NORM
lea FP_SCR0(a6),a0
bra.b fint_norm
*
* Zero:
*
fint_zero:
tst.b SRC_EX.w(a0) * is ZERO negative?
bmi.b fint_zero_m * yes
fint_zero_p:
fmove.s #$00000000,fp0 * return +ZERO in fp0
move.b #z_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
fint_zero_m:
fmove.s #$80000000,fp0 * return -ZERO in fp0
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set 'Z','N' ccode bits
rts
*
* Infinity:
*
fint_inf:
fmovem.x SRC.w(a0),fp0 * return result in fp0
tst.b SRC_EX.w(a0) * is INF negative?
bmi.b fint_inf_m * yes
fint_inf_p:
move.b #inf_bmask,FPSR_CC(a6) * set 'I' ccode bit
rts
fint_inf_m:
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set 'N','I' ccode bits
rts
*########################################################################
* XDEF **************************************************************** #
* fintrz(): emulates the fintrz instruction #
* #
* XREF **************************************************************** #
* res_{s,q}nan_1op() - set NAN result for monadic operation #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = round precision/mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* #
* ALGORITHM *********************************************************** #
* Separate according to operand type. Unnorms don't pass through #
* here. For norms, load the rounding mode/prec, execute a "fintrz", #
* then store the resulting FPSR bits. #
* For denorms, force the j-bit to a one and do the same as for #
* norms. Denorms are so low that the answer will either be a zero or a #
* one. #
* For zeroes/infs/NANs, return the same while setting the FPSR #
* as appropriate. #
* #
*########################################################################
global fintrz
fintrz:
move.b STAG(a6),d1
bne.b fintrz_not_norm * optimize on non-norm input
*
* Norm:
*
fintrz_norm:
fmove.l #$0,fpsr * clear FPSR
fintrz.x SRC.w(a0),fp0 * execute fintrz
fmove.l fpsr,d0 * save FPSR
or.l d0,USER_FPSR(a6) * set exception bits
rts
*
* input is not normalized; what is it?
*
fintrz_not_norm:
cmpi.b #ZERO,d1 * weed out ZERO
beq.b fintrz_zero
cmpi.b #INF,d1 * weed out INF
beq.b fintrz_inf
cmpi.b #DENORM,d1 * weed out DENORM
beq.b fintrz_denorm
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
bra.l res_qnan_1op * weed out QNAN
*
* Denorm:
*
* for DENORMs, the result will be (+/-)ZERO.
* also, the INEX2 and AINEX exception bits will be set.
* so, we could either set these manually or force the DENORM
* to a very small NORM and ship it to the NORM routine.
* I do the latter.
*
fintrz_denorm:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6) * copy sign, zero exp
move.b #$80,FP_SCR0_HI(a6) * force DENORM ==> small NORM
lea FP_SCR0(a6),a0
bra.b fintrz_norm
*
* Zero:
*
fintrz_zero:
tst.b SRC_EX.w(a0) * is ZERO negative?
bmi.b fintrz_zero_m * yes
fintrz_zero_p:
fmove.s #$00000000,fp0 * return +ZERO in fp0
move.b #z_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
fintrz_zero_m:
fmove.s #$80000000,fp0 * return -ZERO in fp0
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set 'Z','N' ccode bits
rts
*
* Infinity:
*
fintrz_inf:
fmovem.x SRC.w(a0),fp0 * return result in fp0
tst.b SRC_EX.w(a0) * is INF negative?
bmi.b fintrz_inf_m * yes
fintrz_inf_p:
move.b #inf_bmask,FPSR_CC(a6) * set 'I' ccode bit
rts
fintrz_inf_m:
move.b #inf_bmask+neg_bmask,FPSR_CC(a6) * set 'N','I' ccode bits
rts
*########################################################################
* XDEF **************************************************************** #
* fabs(): emulates the fabs instruction #
* fsabs(): emulates the fsabs instruction #
* fdabs(): emulates the fdabs instruction #
* #
* XREF **************************************************************** #
* norm() - normalize denorm mantissa to provide EXOP #
* scale_to_zero_src() - make exponent. = 0; get scale factor #
* unf_res() - calculate underflow result #
* ovf_res() - calculate overflow result #
* res_{s,q}nan_1op() - set NAN result for monadic operation #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = rnd precision/mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms into extended, single, and double precision. #
* Simply clear sign for extended precision norm. Ext prec denorm #
* gets an EXOP created for it since it's an underflow. #
* Double and single precision can overflow and underflow. First, #
* scale the operand such that the exponent is zero. Perform an "fabs" #
* using the correct rnd mode/prec. Check to see if the original #
* exponent would take an exception. If so, use unf_res() or ovf_res() #
* to calculate the default result. Also, create the EXOP for the #
* exceptional case. If no exception should occur, insert the correct #
* result exponent and return. #
* Unnorms don't pass through here. #
* #
*########################################################################
global fsabs
fsabs:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl precision
bra.b fabs
global fdabs
fdabs:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl precision
global fabs
fabs:
move.l d0,L_SCR3(a6) * store rnd info
move.b STAG(a6),d1
bne.w fabs_not_norm * optimize on non-norm input
*
* ABSOLUTE VALUE: norms and denorms ONLY!
*
fabs_norm:
andi.b #$c0,d0 * is precision extended?
bne.b fabs_not_ext * no; go handle sgl or dbl
*
* precision selected is extended. so...we can not get an underflow
* or overflow because of rounding to the correct precision. so...
* skip the scaling and unscaling...
*
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.w SRC_EX.w(a0),d1
bclr #15,d1 * force absolute value
move.w d1,FP_SCR0_EX(a6) * insert exponent
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* for an extended precision DENORM, the UNFL exception bit is set
* the accrued bit is NOT set in this instance(no inexactness!)
*
fabs_denorm:
andi.b #$c0,d0 * is precision extended?
bne.b fabs_not_ext * no
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.w SRC_EX.w(a0),d0
bclr #15,d0 * clear sign
move.w d0,FP_SCR0_EX(a6) * insert exponent
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
btst #unfl_bit,FPCR_ENABLE(a6) * is UNFL enabled?
bne.b fabs_ext_unfl_ena
rts
*
* the input is an extended DENORM and underflow is enabled in the FPCR.
* normalize the mantissa and add the bias of 0x6000 to the resulting negative
* exponent and insert back into the operand.
*
fabs_ext_unfl_ena:
lea FP_SCR0(a6),a0 * pass: ptr to operand
bsr.l norm * normalize result
neg.w d0 * new exponent = -(shft val)
addi.w #$6000,d0 * add new bias to exponent
move.w FP_SCR0_EX(a6),d1 * fetch old sign,exp
andi.w #$8000,d1 * keep old sign
andi.w #$7fff,d0 * clear sign position
or.w d1,d0 * concat old sign, new exponent
move.w d0,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
*
* operand is either single or double
*
fabs_not_ext:
cmpi.b #s_mode*$10,d0 * separate sgl/dbl prec
bne.b fabs_dbl
*
* operand is to be rounded to single precision
*
fabs_sgl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3f80,d0 * will move in underflow?
bge.w fabs_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$407e,d0 * will move in overflow?
beq.w fabs_sd_may_ovfl * maybe; go check
blt.w fabs_sd_ovfl * yes; go handle overflow
*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fabs_sd_normal:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fabs.x FP_SCR0(a6),fp0 * perform absolute
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs_sd_normal_exit:
move.l d2,-(sp) * save d2
fmovem.x fp0,FP_SCR0(a6) * store out result
move.w FP_SCR0_EX(a6),d1 * load sgn,exp
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
andi.w #$8000,d2 * keep old sign
or.w d1,d2 * concat old sign,new exp
move.w d2,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* operand is to be rounded to double precision
*
fabs_dbl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor
cmpi.l #$3fff-$3c00,d0 * will move in underflow?
bge.b fabs_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$43fe,d0 * will move in overflow?
beq.w fabs_sd_may_ovfl * maybe; go check
blt.w fabs_sd_ovfl * yes; go handle overflow
bra.w fabs_sd_normal * no; ho handle normalized op
*
* operand WILL underflow when moved in to the fp register file
*
fabs_sd_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
bclr #$7,FP_SCR0_EX(a6) * force absolute value
* if underflow or inexact is enabled, go calculate EXOP first.
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fabs_sd_unfl_ena * yes
fabs_sd_unfl_dis:
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * set possible 'Z' ccode
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* operand will underflow AND underflow is enabled.
* therefore, we must return the result rounded to extended precision.
*
fabs_sd_unfl_ena:
move.l FP_SCR0_HI(a6),FP_SCR1_HI(a6)
move.l FP_SCR0_LO(a6),FP_SCR1_LO(a6)
move.w FP_SCR0_EX(a6),d1 * load current exponent
move.l d2,-(sp) * save d2
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * subtract scale factor
addi.l #$6000,d1 * add new bias
andi.w #$7fff,d1
or.w d2,d1 * concat new sign,new exp
move.w d1,FP_SCR1_EX(a6) * insert new exp
fmovem.x FP_SCR1(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fabs_sd_unfl_dis
*
* operand WILL overflow.
*
fabs_sd_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fabs.x FP_SCR0(a6),fp0 * perform absolute
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save FPSR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs_sd_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fabs_sd_ovfl_ena * yes
*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fabs_sd_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass: prec,mode
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fabs_sd_ovfl_ena:
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1
or.w d2,d1 * concat sign,exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fabs_sd_ovfl_dis
*
* the move in MAY underflow. so...
*
fabs_sd_may_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fabs.x FP_SCR0(a6),fp0 * perform absolute
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| >= 2.b?
fbge.w fabs_sd_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fabs_sd_normal_exit
*#########################################################################
*
* input is not normalized; what is it?
*
fabs_not_norm:
cmpi.b #DENORM,d1 * weed out DENORM
beq.w fabs_denorm
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
cmpi.b #QNAN,d1 * weed out QNAN
beq.l res_qnan_1op
fabs.x SRC.w(a0),fp0 * force absolute value
cmpi.b #INF,d1 * weed out INF
beq.b fabs_inf
fabs_zero:
move.b #z_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
fabs_inf:
move.b #inf_bmask,FPSR_CC(a6) * set 'I' ccode bit
rts
*########################################################################
* XDEF **************************************************************** #
* fcmp(): fp compare op routine #
* #
* XREF **************************************************************** #
* res_qnan() - return QNAN result #
* res_snan() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* d0 = round prec/mode #
* #
* OUTPUT ************************************************************** #
* None #
* #
* ALGORITHM *********************************************************** #
* Handle NANs and denorms as special cases. For everything else, #
* just use the actual fcmp instruction to produce the correct condition #
* codes. #
* #
*########################################################################
global __fcmp__
__fcmp__:
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1
bne.b fcmp_not_norm * optimize on non-norm input
*
* COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
*
fcmp_norm:
fmovem.x DST.w(a1),fp0 * load dst op
fcmp.x SRC.w(a0),fp0 * do compare
fmove.l fpsr,d0 * save FPSR
rol.l #$8,d0 * extract ccode bits
move.b d0,FPSR_CC(a6) * set ccode bits(no exc bits are set)
rts
*
* fcmp: inputs are not both normalized; what are they?
*
fcmp_not_norm:
move.w (tbl_fcmp_op.b,pc,d1.w*2),d1
jmp (tbl_fcmp_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fcmp_op:
.dc.w fcmp_norm-tbl_fcmp_op * NORM - NORM
.dc.w fcmp_norm-tbl_fcmp_op * NORM - ZERO
.dc.w fcmp_norm-tbl_fcmp_op * NORM - INF
.dc.w fcmp_res_qnan-tbl_fcmp_op * NORM - QNAN
.dc.w fcmp_nrm_dnrm-tbl_fcmp_op * NORM - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * NORM - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w fcmp_norm-tbl_fcmp_op * ZERO - NORM
.dc.w fcmp_norm-tbl_fcmp_op * ZERO - ZERO
.dc.w fcmp_norm-tbl_fcmp_op * ZERO - INF
.dc.w fcmp_res_qnan-tbl_fcmp_op * ZERO - QNAN
.dc.w fcmp_dnrm_s-tbl_fcmp_op * ZERO - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * ZERO - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w fcmp_norm-tbl_fcmp_op * INF - NORM
.dc.w fcmp_norm-tbl_fcmp_op * INF - ZERO
.dc.w fcmp_norm-tbl_fcmp_op * INF - INF
.dc.w fcmp_res_qnan-tbl_fcmp_op * INF - QNAN
.dc.w fcmp_dnrm_s-tbl_fcmp_op * INF - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * INF - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w fcmp_res_qnan-tbl_fcmp_op * QNAN - NORM
.dc.w fcmp_res_qnan-tbl_fcmp_op * QNAN - ZERO
.dc.w fcmp_res_qnan-tbl_fcmp_op * QNAN - INF
.dc.w fcmp_res_qnan-tbl_fcmp_op * QNAN - QNAN
.dc.w fcmp_res_qnan-tbl_fcmp_op * QNAN - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * QNAN - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w fcmp_dnrm_nrm-tbl_fcmp_op * DENORM - NORM
.dc.w fcmp_dnrm_d-tbl_fcmp_op * DENORM - ZERO
.dc.w fcmp_dnrm_d-tbl_fcmp_op * DENORM - INF
.dc.w fcmp_res_qnan-tbl_fcmp_op * DENORM - QNAN
.dc.w fcmp_dnrm_sd-tbl_fcmp_op * DENORM - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * DENORM - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - NORM
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - ZERO
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - INF
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - QNAN
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - DENORM
.dc.w fcmp_res_snan-tbl_fcmp_op * SNAN - SNAN
.dc.w tbl_fcmp_op-tbl_fcmp_op *
.dc.w tbl_fcmp_op-tbl_fcmp_op *
* unlike all other functions for QNAN and SNAN, fcmp does NOT set the
* 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
fcmp_res_qnan:
bsr.l res_qnan
andi.b #$f7,FPSR_CC(a6)
rts
fcmp_res_snan:
bsr.l res_snan
andi.b #$f7,FPSR_CC(a6)
rts
*
* DENORMs are a little more difficult.
* If you have a 2 DENORMs, then you can just force the j-bit to a one
* and use the fcmp_norm routine.
* If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
* and use the fcmp_norm routine.
* If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
* But with a DENORM and a NORM of the same sign, the neg bit is set if the
* (1) signs are (+) and the DENORM is the dst or
* (2) signs are (-) and the DENORM is the src
*
fcmp_dnrm_s:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),d0
bset #31,d0 * DENORM src; make into small norm
move.l d0,FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0
bra.w fcmp_norm
fcmp_dnrm_d:
move.l DST_EX.w(a1),FP_SCR0_EX(a6)
move.l DST_HI(a1),d0
bset #31,d0 * DENORM src; make into small norm
move.l d0,FP_SCR0_HI(a6)
move.l DST_LO(a1),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a1
bra.w fcmp_norm
fcmp_dnrm_sd:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l DST_HI(a1),d0
bset #31,d0 * DENORM dst; make into small norm
move.l d0,FP_SCR1_HI(a6)
move.l SRC_HI(a0),d0
bset #31,d0 * DENORM dst; make into small norm
move.l d0,FP_SCR0_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
lea FP_SCR1(a6),a1
lea FP_SCR0(a6),a0
bra.w fcmp_norm
fcmp_nrm_dnrm:
move.b SRC_EX.w(a0),d0 * determine if like signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bmi.w fcmp_dnrm_s
* signs are the same, so must determine the answer ourselves.
tst.b d0 * is src op negative?
bmi.b fcmp_nrm_dnrm_m * yes
rts
fcmp_nrm_dnrm_m:
move.b #neg_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
fcmp_dnrm_nrm:
move.b SRC_EX.w(a0),d0 * determine if like signs
move.b DST_EX.w(a1),d1
eor.b d0,d1
bmi.w fcmp_dnrm_d
* signs are the same, so must determine the answer ourselves.
tst.b d0 * is src op negative?
bpl.b fcmp_dnrm_nrm_m * no
rts
fcmp_dnrm_nrm_m:
move.b #neg_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
*########################################################################
* XDEF **************************************************************** #
* fsglmul(): emulates the fsglmul instruction #
* #
* XREF **************************************************************** #
* scale_to_zero_src() - scale src exponent to zero #
* scale_to_zero_dst() - scale dst exponent to zero #
* unf_res4() - return default underflow result for sglop #
* ovf_res() - return default overflow result #
* res_qnan() - return QNAN result #
* res_snan() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* d0 rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms/denorms into ext/sgl/dbl precision. #
* For norms/denorms, scale the exponents such that a multiply #
* instruction won't cause an exception. Use the regular fsglmul to #
* compute a result. Check if the regular operands would have taken #
* an exception. If so, return the default overflow/underflow result #
* and return the EXOP if exceptions are enabled. Else, scale the #
* result operand to the proper exponent. #
* #
*########################################################################
global fsglmul
fsglmul:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1
bne.w fsglmul_not_norm * optimize on non-norm input
fsglmul_norm:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * scale exponent
move.l d0,-(sp) * save scale factor 1
bsr.l scale_to_zero_dst * scale dst exponent
add.l (sp)+,d0 * SCALE_FACTOR = scale1 + scale2
cmpi.l #$3fff-$7ffe,d0 * would result ovfl?
beq.w fsglmul_may_ovfl * result may rnd to overflow
blt.w fsglmul_ovfl * result will overflow
cmpi.l #$3fff+$0001,d0 * would result unfl?
beq.w fsglmul_may_unfl * result may rnd to no unfl
bgt.w fsglmul_unfl * result will underflow
fsglmul_normal:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp0 * execute sgl multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fsglmul_normal_exit:
fmovem.x fp0,FP_SCR0(a6) * store out result
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * load {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
fsglmul_ovfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp0 * execute sgl multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fsglmul_ovfl_tst:
* save setting this until now because this is where fsglmul_may_ovfl may jump in
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fsglmul_ovfl_ena * yes
fsglmul_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass prec:rnd
andi.b #$30,d0 * force prec = ext
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
fsglmul_ovfl_ena:
fmovem.x fp0,FP_SCR0(a6) * move result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1
andi.w #$8000,d2 * keep old sign
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fsglmul_ovfl_dis
fsglmul_may_ovfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp0 * execute sgl multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| >= 2.b?
fbge.w fsglmul_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fsglmul_normal_exit
fsglmul_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp0 * execute sgl multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fsglmul_unfl_ena * yes
fsglmul_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res4 * calculate default result
or.b d0,FPSR_CC(a6) * 'Z' bit may have been set
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* UNFL is enabled.
*
fsglmul_unfl_ena:
fmovem.x FP_SCR1(a6),fp1 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp1 * execute sgl multiply
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * save result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
addi.l #$6000,d1 * add bias
andi.w #$7fff,d1
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.w fsglmul_unfl_dis
fsglmul_may_unfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp0 * execute sgl multiply
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$2,fp1 * is |result| > 2.b?
fbgt.w fsglmul_normal_exit * no; no underflow occurred
fblt.w fsglmul_unfl * yes; underflow occurred
*
* we still don't know if underflow occurred. result is ~ equal to 2. but,
* we don't know if the result was an underflow that rounded up to a 2 or
* a normalized number that rounded down to a 2. so, redo the entire operation
* using RZ as the rounding mode to see what the pre-rounded result is.
* this case should be relatively rare.
*
fmovem.x FP_SCR1(a6),fp1 * load dst op into fp1
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * keep rnd prec
ori.b #rz_mode*$10,d1 * insert RZ
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsglmul.x FP_SCR0(a6),fp1 * execute sgl multiply
fmove.l #$0,fpcr * clear FPCR
fabs.x fp1 * make absolute value
fcmp.b #$2,fp1 * is |result| < 2.b?
fbge.w fsglmul_normal_exit * no; no underflow occurred
bra.w fsglmul_unfl * yes, underflow occurred
*#############################################################################
*
* Single Precision Multiply: inputs are not both normalized; what are they?
*
fsglmul_not_norm:
move.w (tbl_fsglmul_op.b,pc,d1.w*2),d1
jmp (tbl_fsglmul_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fsglmul_op:
.dc.w fsglmul_norm-tbl_fsglmul_op * NORM x NORM
.dc.w fsglmul_zero-tbl_fsglmul_op * NORM x ZERO
.dc.w fsglmul_inf_src-tbl_fsglmul_op * NORM x INF
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * NORM x QNAN
.dc.w fsglmul_norm-tbl_fsglmul_op * NORM x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * NORM x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w fsglmul_zero-tbl_fsglmul_op * ZERO x NORM
.dc.w fsglmul_zero-tbl_fsglmul_op * ZERO x ZERO
.dc.w fsglmul_res_operr-tbl_fsglmul_op * ZERO x INF
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * ZERO x QNAN
.dc.w fsglmul_zero-tbl_fsglmul_op * ZERO x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * ZERO x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w fsglmul_inf_dst-tbl_fsglmul_op * INF x NORM
.dc.w fsglmul_res_operr-tbl_fsglmul_op * INF x ZERO
.dc.w fsglmul_inf_dst-tbl_fsglmul_op * INF x INF
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * INF x QNAN
.dc.w fsglmul_inf_dst-tbl_fsglmul_op * INF x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * INF x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * QNAN x NORM
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * QNAN x ZERO
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * QNAN x INF
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * QNAN x QNAN
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * QNAN x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * QNAN x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w fsglmul_norm-tbl_fsglmul_op * NORM x NORM
.dc.w fsglmul_zero-tbl_fsglmul_op * NORM x ZERO
.dc.w fsglmul_inf_src-tbl_fsglmul_op * NORM x INF
.dc.w fsglmul_res_qnan-tbl_fsglmul_op * NORM x QNAN
.dc.w fsglmul_norm-tbl_fsglmul_op * NORM x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * NORM x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x NORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x ZERO
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x INF
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x QNAN
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x DENORM
.dc.w fsglmul_res_snan-tbl_fsglmul_op * SNAN x SNAN
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
.dc.w tbl_fsglmul_op-tbl_fsglmul_op *
fsglmul_res_operr:
bra.l res_operr
fsglmul_res_snan:
bra.l res_snan
fsglmul_res_qnan:
bra.l res_qnan
fsglmul_zero:
bra.l fmul_zero
fsglmul_inf_src:
bra.l fmul_inf_src
fsglmul_inf_dst:
bra.l fmul_inf_dst
*########################################################################
* XDEF **************************************************************** #
* fsgldiv(): emulates the fsgldiv instruction #
* #
* XREF **************************************************************** #
* scale_to_zero_src() - scale src exponent to zero #
* scale_to_zero_dst() - scale dst exponent to zero #
* unf_res4() - return default underflow result for sglop #
* ovf_res() - return default overflow result #
* res_qnan() - return QNAN result #
* res_snan() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* d0 rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms/denorms into ext/sgl/dbl precision. #
* For norms/denorms, scale the exponents such that a divide #
* instruction won't cause an exception. Use the regular fsgldiv to #
* compute a result. Check if the regular operands would have taken #
* an exception. If so, return the default overflow/underflow result #
* and return the EXOP if exceptions are enabled. Else, scale the #
* result operand to the proper exponent. #
* #
*########################################################################
global fsgldiv
fsgldiv:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1 * combine src tags
bne.w fsgldiv_not_norm * optimize on non-norm input
*
* DIVIDE: NORMs and DENORMs ONLY!
*
fsgldiv_norm:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * calculate scale factor 1
move.l d0,-(sp) * save scale factor 1
bsr.l scale_to_zero_dst * calculate scale factor 2
neg.l (sp) * S.F. = scale1 - scale2
add.l d0,(sp)
move.w 2+L_SCR3(a6),d1 * fetch precision,mode
lsr.b #$6,d1
move.l (sp)+,d0
cmpi.l #$3fff-$7ffe,d0
ble.w fsgldiv_may_ovfl
cmpi.l #$3fff-$0000,d0 * will result underflow?
beq.w fsgldiv_may_unfl * maybe
bgt.w fsgldiv_unfl * yes; go handle underflow
fsgldiv_normal:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * save FPCR
fmove.l #$0,fpsr * clear FPSR
fsgldiv.x FP_SCR0(a6),fp0 * perform sgl divide
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fsgldiv_normal_exit:
fmovem.x fp0,FP_SCR0(a6) * store result on stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * load {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
fsgldiv_may_ovfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * set FPSR
fsgldiv.x FP_SCR0(a6),fp0 * execute divide
fmove.l fpsr,d1
fmove.l #$0,fpcr
or.l d1,USER_FPSR(a6) * save INEX,N
fmovem.x fp0,-(sp) * save result to stack
move.w (sp),d1 * fetch new exponent
add.l #$c,sp * clear result
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
cmpi.l #$7fff,d1 * did divide overflow?
blt.b fsgldiv_normal_exit
fsgldiv_ovfl_tst:
ori.w #ovfl_inx_mask,2+USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fsgldiv_ovfl_ena * yes
fsgldiv_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass prec:rnd
andi.b #$30,d0 * kill precision
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
fsgldiv_ovfl_ena:
fmovem.x fp0,FP_SCR0(a6) * move result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract new bias
andi.w #$7fff,d1 * clear ms bit
or.w d2,d1 * concat old sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fsgldiv_ovfl_dis
fsgldiv_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsgldiv.x FP_SCR0(a6),fp0 * execute sgl divide
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fsgldiv_unfl_ena * yes
fsgldiv_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res4 * calculate default result
or.b d0,FPSR_CC(a6) * 'Z' bit may have been set
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* UNFL is enabled.
*
fsgldiv_unfl_ena:
fmovem.x FP_SCR1(a6),fp1 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsgldiv.x FP_SCR0(a6),fp1 * execute sgl divide
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * save result to stack
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
addi.l #$6000,d1 * add bias
andi.w #$7fff,d1 * clear top bit
or.w d2,d1 * concat old sign, new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.b fsgldiv_unfl_dis
*
* the divide operation MAY underflow:
*
fsgldiv_may_unfl:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsgldiv.x FP_SCR0(a6),fp0 * execute sgl divide
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fabs.x fp0,fp1 * make a copy of result
fcmp.b #$1,fp1 * is |result| > 1.b?
fbgt.w fsgldiv_normal_exit * no; no underflow occurred
fblt.w fsgldiv_unfl * yes; underflow occurred
*
* we still don't know if underflow occurred. result is ~ equal to 1. but,
* we don't know if the result was an underflow that rounded up to a 1
* or a normalized number that rounded down to a 1. so, redo the entire
* operation using RZ as the rounding mode to see what the pre-rounded
* result is. this case should be relatively rare.
*
fmovem.x FP_SCR1(a6),fp1 * load dst op into %fp1
clr.l d1 * clear scratch register
ori.b #rz_mode*$10,d1 * force RZ rnd mode
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsgldiv.x FP_SCR0(a6),fp1 * execute sgl divide
fmove.l #$0,fpcr * clear FPCR
fabs.x fp1 * make absolute value
fcmp.b #$1,fp1 * is |result| < 1.b?
fbge.w fsgldiv_normal_exit * no; no underflow occurred
bra.w fsgldiv_unfl * yes; underflow occurred
*###########################################################################
*
* Divide: inputs are not both normalized; what are they?
*
fsgldiv_not_norm:
move.w (tbl_fsgldiv_op.b,pc,d1.w*2),d1
jmp (tbl_fsgldiv_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fsgldiv_op:
.dc.w fsgldiv_norm-tbl_fsgldiv_op * NORM / NORM
.dc.w fsgldiv_inf_load-tbl_fsgldiv_op * NORM / ZERO
.dc.w fsgldiv_zero_load-tbl_fsgldiv_op * NORM / INF
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * NORM / QNAN
.dc.w fsgldiv_norm-tbl_fsgldiv_op * NORM / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * NORM / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w fsgldiv_zero_load-tbl_fsgldiv_op * ZERO / NORM
.dc.w fsgldiv_res_operr-tbl_fsgldiv_op * ZERO / ZERO
.dc.w fsgldiv_zero_load-tbl_fsgldiv_op * ZERO / INF
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * ZERO / QNAN
.dc.w fsgldiv_zero_load-tbl_fsgldiv_op * ZERO / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * ZERO / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w fsgldiv_inf_dst-tbl_fsgldiv_op * INF / NORM
.dc.w fsgldiv_inf_dst-tbl_fsgldiv_op * INF / ZERO
.dc.w fsgldiv_res_operr-tbl_fsgldiv_op * INF / INF
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * INF / QNAN
.dc.w fsgldiv_inf_dst-tbl_fsgldiv_op * INF / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * INF / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * QNAN / NORM
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * QNAN / ZERO
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * QNAN / INF
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * QNAN / QNAN
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * QNAN / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * QNAN / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w fsgldiv_norm-tbl_fsgldiv_op * DENORM / NORM
.dc.w fsgldiv_inf_load-tbl_fsgldiv_op * DENORM / ZERO
.dc.w fsgldiv_zero_load-tbl_fsgldiv_op * DENORM / INF
.dc.w fsgldiv_res_qnan-tbl_fsgldiv_op * DENORM / QNAN
.dc.w fsgldiv_norm-tbl_fsgldiv_op * DENORM / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * DENORM / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / NORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / ZERO
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / INF
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / QNAN
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / DENORM
.dc.w fsgldiv_res_snan-tbl_fsgldiv_op * SNAN / SNAN
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
.dc.w tbl_fsgldiv_op-tbl_fsgldiv_op *
fsgldiv_res_qnan:
bra.l res_qnan
fsgldiv_res_snan:
bra.l res_snan
fsgldiv_res_operr:
bra.l res_operr
fsgldiv_inf_load:
bra.l fdiv_inf_load
fsgldiv_zero_load:
bra.l fdiv_zero_load
fsgldiv_inf_dst:
bra.l fdiv_inf_dst
*########################################################################
* XDEF **************************************************************** #
* fadd(): emulates the fadd instruction #
* fsadd(): emulates the fadd instruction #
* fdadd(): emulates the fdadd instruction #
* #
* XREF **************************************************************** #
* addsub_scaler2() - scale the operands so they won't take exc #
* ovf_res() - return default overflow result #
* unf_res() - return default underflow result #
* res_qnan() - set QNAN result #
* res_snan() - set SNAN result #
* res_operr() - set OPERR result #
* scale_to_zero_src() - set src operand exponent equal to zero #
* scale_to_zero_dst() - set dst operand exponent equal to zero #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms into extended, single, and double precision. #
* Do addition after scaling exponents such that exception won't #
* occur. Then, check result exponent to see if exception would have #
* occurred. If so, return default result and maybe EXOP. Else, insert #
* the correct result exponent and return. Set FPSR bits as appropriate. #
* #
*########################################################################
global fsadd
fsadd:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl prec
bra.b fadd
global fdadd
fdadd:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl prec
global fadd
fadd:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1 * combine src tags
bne.w fadd_not_norm * optimize on non-norm input
*
* ADD: norms and denorms
*
fadd_norm:
bsr.l addsub_scaler2 * scale exponents
fadd_zero_entry:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fadd.x FP_SCR0(a6),fp0 * execute add
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * fetch INEX2,N,Z
or.l d1,USER_FPSR(a6) * save exc and ccode bits
fbeq.w fadd_zero_exit * if result is zero, end now
move.l d2,-(sp) * save d2
fmovem.x fp0,-(sp) * save result to stack
move.w 2+L_SCR3(a6),d1
lsr.b #$6,d1
move.w (sp),d2 * fetch new sign, exp
andi.l #$7fff,d2 * strip sign
sub.l d0,d2 * add scale factor
cmp.l (tbl_fadd_ovfl.b,pc,d1.w*4),d2 * is it an overflow?
bge.b fadd_ovfl * yes
cmp.l (tbl_fadd_unfl.b,pc,d1.w*4),d2 * is it an underflow?
blt.w fadd_unfl * yes
beq.w fadd_may_unfl * maybe; go find out
fadd_normal:
move.w (sp),d1
andi.w #$8000,d1 * keep sign
or.w d2,d1 * concat sign,new exp
move.w d1,(sp) * insert new exponent
fmovem.x (sp)+,fp0 * return result in fp0
move.l (sp)+,d2 * restore d2
rts
fadd_zero_exit:
* fmov.s &0x00000000,%fp0 # return zero in fp0
rts
tbl_fadd_ovfl:
.dc.l $7fff * ext ovfl
.dc.l $407f * sgl ovfl
.dc.l $43ff * dbl ovfl
tbl_fadd_unfl:
.dc.l $0000 * ext unfl
.dc.l $3f81 * sgl unfl
.dc.l $3c01 * dbl unfl
fadd_ovfl:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fadd_ovfl_ena * yes
add.l #$c,sp
fadd_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass prec:rnd
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
move.l (sp)+,d2 * restore d2
rts
fadd_ovfl_ena:
move.b L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fadd_ovfl_ena_sd * no; prec = sgl or dbl
fadd_ovfl_ena_cont:
move.w (sp),d1
andi.w #$8000,d1 * keep sign
subi.l #$6000,d2 * add extra bias
andi.w #$7fff,d2
or.w d2,d1 * concat sign,new exp
move.w d1,(sp) * insert new exponent
fmovem.x (sp)+,fp1 * return EXOP in fp1
bra.b fadd_ovfl_dis
fadd_ovfl_ena_sd:
fmovem.x FP_SCR1(a6),fp0 * load dst op
move.l L_SCR3(a6),d1
andi.b #$30,d1 * keep rnd mode
fmove.l d1,fpcr * set FPCR
fadd.x FP_SCR0(a6),fp0 * execute add
fmove.l #$0,fpcr * clear FPCR
add.l #$c,sp
fmovem.x fp0,-(sp)
bra.b fadd_ovfl_ena_cont
fadd_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
add.l #$c,sp
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fadd.x FP_SCR0(a6),fp0 * execute add
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save status
or.l d1,USER_FPSR(a6) * save INEX,N
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fadd_unfl_ena * yes
fadd_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * 'Z' bit may have been set
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
move.l (sp)+,d2 * restore d2
rts
fadd_unfl_ena:
fmovem.x FP_SCR1(a6),fp1 * load dst op
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fadd_unfl_ena_sd * no; sgl or dbl
fmove.l L_SCR3(a6),fpcr * set FPCR
fadd_unfl_ena_cont:
fmove.l #$0,fpsr * clear FPSR
fadd.x FP_SCR0(a6),fp1 * execute multiply
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * save result to stack
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
addi.l #$6000,d1 * add new bias
andi.w #$7fff,d1 * clear top bit
or.w d2,d1 * concat sign,new exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.w fadd_unfl_dis
fadd_unfl_ena_sd:
move.l L_SCR3(a6),d1
andi.b #$30,d1 * use only rnd mode
fmove.l d1,fpcr * set FPCR
bra.b fadd_unfl_ena_cont
*
* result is equal to the smallest normalized number in the selected precision
* if the precision is extended, this result could not have come from an
* underflow that rounded up.
*
fadd_may_unfl:
move.l L_SCR3(a6),d1
andi.b #$c0,d1
beq.w fadd_normal * yes; no underflow occurred
move.l $4(sp),d1 * extract hi(man)
cmpi.l #$80000000,d1 * is hi(man) = 0x80000000?
bne.w fadd_normal * no; no underflow occurred
tst.l $8(sp) * is lo(man) = 0x0?
bne.w fadd_normal * no; no underflow occurred
btst #inex2_bit,FPSR_EXCEPT(a6) * is INEX2 set?
beq.w fadd_normal * no; no underflow occurred
*
* ok, so now the result has a exponent equal to the smallest normalized
* exponent for the selected precision. also, the mantissa is equal to
* 0x8000000000000000 and this mantissa is the result of rounding non-zero
* g,r,s.
* now, we must determine whether the pre-rounded result was an underflow
* rounded "up" or a normalized number rounded "down".
* so, we do this be re-executing the add using RZ as the rounding mode and
* seeing if the new result is smaller or equal to the current result.
*
fmovem.x FP_SCR1(a6),fp1 * load dst op into fp1
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * keep rnd prec
ori.b #rz_mode*$10,d1 * insert rnd mode
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fadd.x FP_SCR0(a6),fp1 * execute add
fmove.l #$0,fpcr * clear FPCR
fabs.x fp0 * compare absolute values
fabs.x fp1
fcmp.x fp1,fp0 * is first result > second?
fbgt.w fadd_unfl * yes; it's an underflow
bra.w fadd_normal * no; it's not an underflow
*#########################################################################
*
* Add: inputs are not both normalized; what are they?
*
fadd_not_norm:
move.w (tbl_fadd_op.b,pc,d1.w*2),d1
jmp (tbl_fadd_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fadd_op:
.dc.w fadd_norm-tbl_fadd_op * NORM + NORM
.dc.w fadd_zero_src-tbl_fadd_op * NORM + ZERO
.dc.w fadd_inf_src-tbl_fadd_op * NORM + INF
.dc.w fadd_res_qnan-tbl_fadd_op * NORM + QNAN
.dc.w fadd_norm-tbl_fadd_op * NORM + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * NORM + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w fadd_zero_dst-tbl_fadd_op * ZERO + NORM
.dc.w fadd_zero_2-tbl_fadd_op * ZERO + ZERO
.dc.w fadd_inf_src-tbl_fadd_op * ZERO + INF
.dc.w fadd_res_qnan-tbl_fadd_op * NORM + QNAN
.dc.w fadd_zero_dst-tbl_fadd_op * ZERO + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * NORM + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w fadd_inf_dst-tbl_fadd_op * INF + NORM
.dc.w fadd_inf_dst-tbl_fadd_op * INF + ZERO
.dc.w fadd_inf_2-tbl_fadd_op * INF + INF
.dc.w fadd_res_qnan-tbl_fadd_op * NORM + QNAN
.dc.w fadd_inf_dst-tbl_fadd_op * INF + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * NORM + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w fadd_res_qnan-tbl_fadd_op * QNAN + NORM
.dc.w fadd_res_qnan-tbl_fadd_op * QNAN + ZERO
.dc.w fadd_res_qnan-tbl_fadd_op * QNAN + INF
.dc.w fadd_res_qnan-tbl_fadd_op * QNAN + QNAN
.dc.w fadd_res_qnan-tbl_fadd_op * QNAN + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * QNAN + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w fadd_norm-tbl_fadd_op * DENORM + NORM
.dc.w fadd_zero_src-tbl_fadd_op * DENORM + ZERO
.dc.w fadd_inf_src-tbl_fadd_op * DENORM + INF
.dc.w fadd_res_qnan-tbl_fadd_op * NORM + QNAN
.dc.w fadd_norm-tbl_fadd_op * DENORM + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * NORM + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + NORM
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + ZERO
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + INF
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + QNAN
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + DENORM
.dc.w fadd_res_snan-tbl_fadd_op * SNAN + SNAN
.dc.w tbl_fadd_op-tbl_fadd_op *
.dc.w tbl_fadd_op-tbl_fadd_op *
fadd_res_qnan:
bra.l res_qnan
fadd_res_snan:
bra.l res_snan
*
* both operands are ZEROes
*
fadd_zero_2:
move.b SRC_EX.w(a0),d0 * are the signs opposite
move.b DST_EX.w(a1),d1
eor.b d0,d1
bmi.w fadd_zero_2_chk_rm * weed out (-ZERO)+(+ZERO)
* the signs are the same. so determine whether they are positive or negative
* and return the appropriately signed zero.
tst.b d0 * are ZEROes positive or negative?
bmi.b fadd_zero_rm * negative
fmove.s #$00000000,fp0 * return +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
*
* the ZEROes have opposite signs:
* - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
* - -ZERO is returned in the case of RM.
*
fadd_zero_2_chk_rm:
move.b 3+L_SCR3(a6),d1
andi.b #$30,d1 * extract rnd mode
cmpi.b #rm_mode*$10,d1 * is rnd mode == RM?
beq.b fadd_zero_rm * yes
fmove.s #$00000000,fp0 * return +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
fadd_zero_rm:
fmove.s #$80000000,fp0 * return -ZERO
move.b #neg_bmask+z_bmask,FPSR_CC(a6) * set NEG/Z
rts
*
* one operand is a ZERO and the other is a DENORM or NORM. scale
* the DENORM or NORM and jump to the regular fadd routine.
*
fadd_zero_dst:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * scale the operand
clr.w FP_SCR1_EX(a6)
clr.l FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6)
bra.w fadd_zero_entry * go execute fadd
fadd_zero_src:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
bsr.l scale_to_zero_dst * scale the operand
clr.w FP_SCR0_EX(a6)
clr.l FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6)
bra.w fadd_zero_entry * go execute fadd
*
* both operands are INFs. an OPERR will result if the INFs have
* different signs. else, an INF of the same sign is returned
*
fadd_inf_2:
move.b SRC_EX.w(a0),d0 * exclusive or the signs
move.b DST_EX.w(a1),d1
eor.b d1,d0
bmi.l res_operr * weed out (-INF)+(+INF)
* ok, so it's not an OPERR. but, we do have to remember to return the
* src INF since that's where the 881/882 gets the j-bit from...
*
* operands are INF and one of {ZERO, INF, DENORM, NORM}
*
fadd_inf_src:
fmovem.x SRC.w(a0),fp0 * return src INF
tst.b SRC_EX.w(a0) * is INF positive?
bpl.b fadd_inf_done * yes; we're done
move.b #neg_bmask+inf_bmask,FPSR_CC(a6) * set INF/NEG
rts
*
* operands are INF and one of {ZERO, INF, DENORM, NORM}
*
fadd_inf_dst:
fmovem.x DST.w(a1),fp0 * return dst INF
tst.b DST_EX.w(a1) * is INF positive?
bpl.b fadd_inf_done * yes; we're done
move.b #neg_bmask+inf_bmask,FPSR_CC(a6) * set INF/NEG
rts
fadd_inf_done:
move.b #inf_bmask,FPSR_CC(a6) * set INF
rts
*########################################################################
* XDEF **************************************************************** #
* fsub(): emulates the fsub instruction #
* fssub(): emulates the fssub instruction #
* fdsub(): emulates the fdsub instruction #
* #
* XREF **************************************************************** #
* addsub_scaler2() - scale the operands so they won't take exc #
* ovf_res() - return default overflow result #
* unf_res() - return default underflow result #
* res_qnan() - set QNAN result #
* res_snan() - set SNAN result #
* res_operr() - set OPERR result #
* scale_to_zero_src() - set src operand exponent equal to zero #
* scale_to_zero_dst() - set dst operand exponent equal to zero #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* a1 = pointer to extended precision destination operand #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms into extended, single, and double precision. #
* Do subtraction after scaling exponents such that exception won't#
* occur. Then, check result exponent to see if exception would have #
* occurred. If so, return default result and maybe EXOP. Else, insert #
* the correct result exponent and return. Set FPSR bits as appropriate. #
* #
*########################################################################
global fssub
fssub:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl prec
bra.b fsub
global fdsub
fdsub:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl prec
global fsub
fsub:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b DTAG(a6),d1
lsl.b #$3,d1
or.b STAG(a6),d1 * combine src tags
bne.w fsub_not_norm * optimize on non-norm input
*
* SUB: norms and denorms
*
fsub_norm:
bsr.l addsub_scaler2 * scale exponents
fsub_zero_entry:
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fsub.x FP_SCR0(a6),fp0 * execute subtract
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * fetch INEX2, N, Z
or.l d1,USER_FPSR(a6) * save exc and ccode bits
fbeq.w fsub_zero_exit * if result zero, end now
move.l d2,-(sp) * save d2
fmovem.x fp0,-(sp) * save result to stack
move.w 2+L_SCR3(a6),d1
lsr.b #$6,d1
move.w (sp),d2 * fetch new exponent
andi.l #$7fff,d2 * strip sign
sub.l d0,d2 * add scale factor
cmp.l (tbl_fsub_ovfl.b,pc,d1.w*4),d2 * is it an overflow?
bge.b fsub_ovfl * yes
cmp.l (tbl_fsub_unfl.b,pc,d1.w*4),d2 * is it an underflow?
blt.w fsub_unfl * yes
beq.w fsub_may_unfl * maybe; go find out
fsub_normal:
move.w (sp),d1
andi.w #$8000,d1 * keep sign
or.w d2,d1 * insert new exponent
move.w d1,(sp) * insert new exponent
fmovem.x (sp)+,fp0 * return result in fp0
move.l (sp)+,d2 * restore d2
rts
fsub_zero_exit:
* fmov.s &0x00000000,%fp0 # return zero in fp0
rts
tbl_fsub_ovfl:
.dc.l $7fff * ext ovfl
.dc.l $407f * sgl ovfl
.dc.l $43ff * dbl ovfl
tbl_fsub_unfl:
.dc.l $0000 * ext unfl
.dc.l $3f81 * sgl unfl
.dc.l $3c01 * dbl unfl
fsub_ovfl:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fsub_ovfl_ena * yes
add.l #$c,sp
fsub_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass prec:rnd
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
move.l (sp)+,d2 * restore d2
rts
fsub_ovfl_ena:
move.b L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fsub_ovfl_ena_sd * no
fsub_ovfl_ena_cont:
move.w (sp),d1 * fetch {sgn,exp}
andi.w #$8000,d1 * keep sign
subi.l #$6000,d2 * subtract new bias
andi.w #$7fff,d2 * clear top bit
or.w d2,d1 * concat sign,exp
move.w d1,(sp) * insert new exponent
fmovem.x (sp)+,fp1 * return EXOP in fp1
bra.b fsub_ovfl_dis
fsub_ovfl_ena_sd:
fmovem.x FP_SCR1(a6),fp0 * load dst op
move.l L_SCR3(a6),d1
andi.b #$30,d1 * clear rnd prec
fmove.l d1,fpcr * set FPCR
fsub.x FP_SCR0(a6),fp0 * execute subtract
fmove.l #$0,fpcr * clear FPCR
add.l #$c,sp
fmovem.x fp0,-(sp)
bra.b fsub_ovfl_ena_cont
fsub_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
add.l #$c,sp
fmovem.x FP_SCR1(a6),fp0 * load dst op
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsub.x FP_SCR0(a6),fp0 * execute subtract
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save status
or.l d1,USER_FPSR(a6)
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fsub_unfl_ena * yes
fsub_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * 'Z' may have been set
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
move.l (sp)+,d2 * restore d2
rts
fsub_unfl_ena:
fmovem.x FP_SCR1(a6),fp1
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * is precision extended?
bne.b fsub_unfl_ena_sd * no
fmove.l L_SCR3(a6),fpcr * set FPCR
fsub_unfl_ena_cont:
fmove.l #$0,fpsr * clear FPSR
fsub.x FP_SCR0(a6),fp1 * execute subtract
fmove.l #$0,fpcr * clear FPCR
fmovem.x fp1,FP_SCR0(a6) * store result to stack
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
addi.l #$6000,d1 * subtract new bias
andi.w #$7fff,d1 * clear top bit
or.w d2,d1 * concat sgn,exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
bra.w fsub_unfl_dis
fsub_unfl_ena_sd:
move.l L_SCR3(a6),d1
andi.b #$30,d1 * clear rnd prec
fmove.l d1,fpcr * set FPCR
bra.b fsub_unfl_ena_cont
*
* result is equal to the smallest normalized number in the selected precision
* if the precision is extended, this result could not have come from an
* underflow that rounded up.
*
fsub_may_unfl:
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * fetch rnd prec
beq.w fsub_normal * yes; no underflow occurred
move.l $4(sp),d1
cmpi.l #$80000000,d1 * is hi(man) = 0x80000000?
bne.w fsub_normal * no; no underflow occurred
tst.l $8(sp) * is lo(man) = 0x0?
bne.w fsub_normal * no; no underflow occurred
btst #inex2_bit,FPSR_EXCEPT(a6) * is INEX2 set?
beq.w fsub_normal * no; no underflow occurred
*
* ok, so now the result has a exponent equal to the smallest normalized
* exponent for the selected precision. also, the mantissa is equal to
* 0x8000000000000000 and this mantissa is the result of rounding non-zero
* g,r,s.
* now, we must determine whether the pre-rounded result was an underflow
* rounded "up" or a normalized number rounded "down".
* so, we do this be re-executing the add using RZ as the rounding mode and
* seeing if the new result is smaller or equal to the current result.
*
fmovem.x FP_SCR1(a6),fp1 * load dst op into fp1
move.l L_SCR3(a6),d1
andi.b #$c0,d1 * keep rnd prec
ori.b #rz_mode*$10,d1 * insert rnd mode
fmove.l d1,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsub.x FP_SCR0(a6),fp1 * execute subtract
fmove.l #$0,fpcr * clear FPCR
fabs.x fp0 * compare absolute values
fabs.x fp1
fcmp.x fp1,fp0 * is first result > second?
fbgt.w fsub_unfl * yes; it's an underflow
bra.w fsub_normal * no; it's not an underflow
*#########################################################################
*
* Sub: inputs are not both normalized; what are they?
*
fsub_not_norm:
move.w (tbl_fsub_op.b,pc,d1.w*2),d1
jmp (tbl_fsub_op.b,pc,d1.w*1)
.dc.w $4AFC,48
tbl_fsub_op:
.dc.w fsub_norm-tbl_fsub_op * NORM - NORM
.dc.w fsub_zero_src-tbl_fsub_op * NORM - ZERO
.dc.w fsub_inf_src-tbl_fsub_op * NORM - INF
.dc.w fsub_res_qnan-tbl_fsub_op * NORM - QNAN
.dc.w fsub_norm-tbl_fsub_op * NORM - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * NORM - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w fsub_zero_dst-tbl_fsub_op * ZERO - NORM
.dc.w fsub_zero_2-tbl_fsub_op * ZERO - ZERO
.dc.w fsub_inf_src-tbl_fsub_op * ZERO - INF
.dc.w fsub_res_qnan-tbl_fsub_op * NORM - QNAN
.dc.w fsub_zero_dst-tbl_fsub_op * ZERO - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * NORM - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w fsub_inf_dst-tbl_fsub_op * INF - NORM
.dc.w fsub_inf_dst-tbl_fsub_op * INF - ZERO
.dc.w fsub_inf_2-tbl_fsub_op * INF - INF
.dc.w fsub_res_qnan-tbl_fsub_op * NORM - QNAN
.dc.w fsub_inf_dst-tbl_fsub_op * INF - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * NORM - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w fsub_res_qnan-tbl_fsub_op * QNAN - NORM
.dc.w fsub_res_qnan-tbl_fsub_op * QNAN - ZERO
.dc.w fsub_res_qnan-tbl_fsub_op * QNAN - INF
.dc.w fsub_res_qnan-tbl_fsub_op * QNAN - QNAN
.dc.w fsub_res_qnan-tbl_fsub_op * QNAN - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * QNAN - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w fsub_norm-tbl_fsub_op * DENORM - NORM
.dc.w fsub_zero_src-tbl_fsub_op * DENORM - ZERO
.dc.w fsub_inf_src-tbl_fsub_op * DENORM - INF
.dc.w fsub_res_qnan-tbl_fsub_op * NORM - QNAN
.dc.w fsub_norm-tbl_fsub_op * DENORM - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * NORM - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - NORM
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - ZERO
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - INF
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - QNAN
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - DENORM
.dc.w fsub_res_snan-tbl_fsub_op * SNAN - SNAN
.dc.w tbl_fsub_op-tbl_fsub_op *
.dc.w tbl_fsub_op-tbl_fsub_op *
fsub_res_qnan:
bra.l res_qnan
fsub_res_snan:
bra.l res_snan
*
* both operands are ZEROes
*
fsub_zero_2:
move.b SRC_EX.w(a0),d0
move.b DST_EX.w(a1),d1
eor.b d1,d0
bpl.b fsub_zero_2_chk_rm
* the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
tst.b d0 * is dst negative?
bmi.b fsub_zero_2_rm * yes
fmove.s #$00000000,fp0 * no; return +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
*
* the ZEROes have the same signs:
* - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
* - -ZERO is returned in the case of RM.
*
fsub_zero_2_chk_rm:
move.b 3+L_SCR3(a6),d1
andi.b #$30,d1 * extract rnd mode
cmpi.b #rm_mode*$10,d1 * is rnd mode = RM?
beq.b fsub_zero_2_rm * yes
fmove.s #$00000000,fp0 * no; return +ZERO
move.b #z_bmask,FPSR_CC(a6) * set Z
rts
fsub_zero_2_rm:
fmove.s #$80000000,fp0 * return -ZERO
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set Z/NEG
rts
*
* one operand is a ZERO and the other is a DENORM or a NORM.
* scale the DENORM or NORM and jump to the regular fsub routine.
*
fsub_zero_dst:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_to_zero_src * scale the operand
clr.w FP_SCR1_EX(a6)
clr.l FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6)
bra.w fsub_zero_entry * go execute fsub
fsub_zero_src:
move.w DST_EX.w(a1),FP_SCR1_EX(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
bsr.l scale_to_zero_dst * scale the operand
clr.w FP_SCR0_EX(a6)
clr.l FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6)
bra.w fsub_zero_entry * go execute fsub
*
* both operands are INFs. an OPERR will result if the INFs have the
* same signs. else,
*
fsub_inf_2:
move.b SRC_EX.w(a0),d0 * exclusive or the signs
move.b DST_EX.w(a1),d1
eor.b d1,d0
bpl.l res_operr * weed out (-INF)+(+INF)
* ok, so it's not an OPERR. but we do have to remember to return
* the src INF since that's where the 881/882 gets the j-bit.
fsub_inf_src:
fmovem.x SRC.w(a0),fp0 * return src INF
fneg.x fp0 * invert sign
fbge.w fsub_inf_done * sign is now positive
move.b #neg_bmask+inf_bmask,FPSR_CC(a6) * set INF/NEG
rts
fsub_inf_dst:
fmovem.x DST.w(a1),fp0 * return dst INF
tst.b DST_EX.w(a1) * is INF negative?
bpl.b fsub_inf_done * no
move.b #neg_bmask+inf_bmask,FPSR_CC(a6) * set INF/NEG
rts
fsub_inf_done:
move.b #inf_bmask,FPSR_CC(a6) * set INF
rts
*########################################################################
* XDEF **************************************************************** #
* fsqrt(): emulates the fsqrt instruction #
* fssqrt(): emulates the fssqrt instruction #
* fdsqrt(): emulates the fdsqrt instruction #
* #
* XREF **************************************************************** #
* scale_sqrt() - scale the source operand #
* unf_res() - return default underflow result #
* ovf_res() - return default overflow result #
* res_qnan_1op() - return QNAN result #
* res_snan_1op() - return SNAN result #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 rnd prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = result #
* fp1 = EXOP (if exception occurred) #
* #
* ALGORITHM *********************************************************** #
* Handle NANs, infinities, and zeroes as special cases. Divide #
* norms/denorms into ext/sgl/dbl precision. #
* For norms/denorms, scale the exponents such that a sqrt #
* instruction won't cause an exception. Use the regular fsqrt to #
* compute a result. Check if the regular operands would have taken #
* an exception. If so, return the default overflow/underflow result #
* and return the EXOP if exceptions are enabled. Else, scale the #
* result operand to the proper exponent. #
* #
*########################################################################
global fssqrt
fssqrt:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl precision
bra.b fsqrt
global fdsqrt
fdsqrt:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl precision
global fsqrt
fsqrt:
move.l d0,L_SCR3(a6) * store rnd info
clr.w d1
move.b STAG(a6),d1
bne.w fsqrt_not_norm * optimize on non-norm input
*
* SQUARE ROOT: norms and denorms ONLY!
*
fsqrt_norm:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.l res_operr * yes
andi.b #$c0,d0 * is precision extended?
bne.b fsqrt_not_ext * no; go handle sgl or dbl
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsqrt.x (a0),fp0 * execute square root
fmove.l fpsr,d1
or.l d1,USER_FPSR(a6) * set N,INEX
rts
fsqrt_denorm:
tst.b SRC_EX.w(a0) * is operand negative?
bmi.l res_operr * yes
andi.b #$c0,d0 * is precision extended?
bne.b fsqrt_not_ext * no; go handle sgl or dbl
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_sqrt * calculate scale factor
bra.w fsqrt_sd_normal
*
* operand is either single or double
*
fsqrt_not_ext:
cmpi.b #s_mode*$10,d0 * separate sgl/dbl prec
bne.w fsqrt_dbl
*
* operand is to be rounded to single precision
*
fsqrt_sgl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_sqrt * calculate scale factor
cmpi.l #$3fff-$3f81,d0 * will move in underflow?
beq.w fsqrt_sd_may_unfl
bgt.w fsqrt_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$407f,d0 * will move in overflow?
beq.w fsqrt_sd_may_ovfl * maybe; go check
blt.w fsqrt_sd_ovfl * yes; go handle overflow
*
* operand will NOT overflow or underflow when moved in to the fp reg file
*
fsqrt_sd_normal:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fsqrt.x FP_SCR0(a6),fp0 * perform absolute
fmove.l fpsr,d1 * save FPSR
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fsqrt_sd_normal_exit:
move.l d2,-(sp) * save d2
fmovem.x fp0,FP_SCR0(a6) * store out result
move.w FP_SCR0_EX(a6),d1 * load sgn,exp
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
sub.l d0,d1 * add scale factor
andi.w #$8000,d2 * keep old sign
or.w d1,d2 * concat old sign,new exp
move.w d2,FP_SCR0_EX(a6) * insert new exponent
move.l (sp)+,d2 * restore d2
fmovem.x FP_SCR0(a6),fp0 * return result in fp0
rts
*
* operand is to be rounded to double precision
*
fsqrt_dbl:
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
bsr.l scale_sqrt * calculate scale factor
cmpi.l #$3fff-$3c01,d0 * will move in underflow?
beq.w fsqrt_sd_may_unfl
bgt.b fsqrt_sd_unfl * yes; go handle underflow
cmpi.l #$3fff-$43ff,d0 * will move in overflow?
beq.w fsqrt_sd_may_ovfl * maybe; go check
blt.w fsqrt_sd_ovfl * yes; go handle overflow
bra.w fsqrt_sd_normal * no; ho handle normalized op
* we're on the line here and the distinguising characteristic is whether
* the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
* elsewise fall through to underflow.
fsqrt_sd_may_unfl:
btst #$0,1+FP_SCR0_EX(a6) * is exponent 0x3fff?
bne.w fsqrt_sd_normal * yes, so no underflow
*
* operand WILL underflow when moved in to the fp register file
*
fsqrt_sd_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set unfl exc bit
fmove.l #rz_mode*$10,fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fsqrt.x FP_SCR0(a6),fp0 * execute square root
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
* if underflow or inexact is enabled, go calculate EXOP first.
move.b FPCR_ENABLE(a6),d1
andi.b #$0b,d1 * is UNFL or INEX enabled?
bne.b fsqrt_sd_unfl_ena * yes
fsqrt_sd_unfl_dis:
fmovem.x fp0,FP_SCR0(a6) * store out result
lea FP_SCR0(a6),a0 * pass: result addr
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calculate default result
or.b d0,FPSR_CC(a6) * set possible 'Z' ccode
fmovem.x FP_SCR0(a6),fp0 * return default result in fp0
rts
*
* operand will underflow AND underflow is enabled.
* therefore, we must return the result rounded to extended precision.
*
fsqrt_sd_unfl_ena:
move.l FP_SCR0_HI(a6),FP_SCR1_HI(a6)
move.l FP_SCR0_LO(a6),FP_SCR1_LO(a6)
move.w FP_SCR0_EX(a6),d1 * load current exponent
move.l d2,-(sp) * save d2
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * subtract scale factor
addi.l #$6000,d1 * add new bias
andi.w #$7fff,d1
or.w d2,d1 * concat new sign,new exp
move.w d1,FP_SCR1_EX(a6) * insert new exp
fmovem.x FP_SCR1(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fsqrt_sd_unfl_dis
*
* operand WILL overflow.
*
fsqrt_sd_ovfl:
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fsqrt.x FP_SCR0(a6),fp0 * perform square root
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save FPSR
or.l d1,USER_FPSR(a6) * save INEX2,N
fsqrt_sd_ovfl_tst:
ori.l #ovfl_inx_mask,USER_FPSR(a6) * set ovfl/aovfl/ainex
move.b FPCR_ENABLE(a6),d1
andi.b #$13,d1 * is OVFL or INEX enabled?
bne.b fsqrt_sd_ovfl_ena * yes
*
* OVFL is not enabled; therefore, we must create the default result by
* calling ovf_res().
*
fsqrt_sd_ovfl_dis:
btst #neg_bit,FPSR_CC(a6) * is result negative?
sne d1 * set sign param accordingly
move.l L_SCR3(a6),d0 * pass: prec,mode
bsr.l ovf_res * calculate default result
or.b d0,FPSR_CC(a6) * set INF,N if applicable
fmovem.x (a0),fp0 * return default result in fp0
rts
*
* OVFL is enabled.
* the INEX2 bit has already been updated by the round to the correct precision.
* now, round to extended(and don't alter the FPSR).
*
fsqrt_sd_ovfl_ena:
move.l d2,-(sp) * save d2
move.w FP_SCR0_EX(a6),d1 * fetch {sgn,exp}
move.l d1,d2 * make a copy
andi.l #$7fff,d1 * strip sign
andi.w #$8000,d2 * keep old sign
sub.l d0,d1 * add scale factor
subi.l #$6000,d1 * subtract bias
andi.w #$7fff,d1
or.w d2,d1 * concat sign,exp
move.w d1,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
move.l (sp)+,d2 * restore d2
bra.b fsqrt_sd_ovfl_dis
*
* the move in MAY underflow. so...
*
fsqrt_sd_may_ovfl:
btst #$0,1+FP_SCR0_EX(a6) * is exponent 0x3fff?
bne.w fsqrt_sd_ovfl * yes, so overflow
fmove.l #$0,fpsr * clear FPSR
fmove.l L_SCR3(a6),fpcr * set FPCR
fsqrt.x FP_SCR0(a6),fp0 * perform absolute
fmove.l fpsr,d1 * save status
fmove.l #$0,fpcr * clear FPCR
or.l d1,USER_FPSR(a6) * save INEX2,N
fmove.x fp0,fp1 * make a copy of result
fcmp.b #$1,fp1 * is |result| >= 1.b?
fbge.w fsqrt_sd_ovfl_tst * yes; overflow has occurred
* no, it didn't overflow; we have correct result
bra.w fsqrt_sd_normal_exit
*#########################################################################
*
* input is not normalized; what is it?
*
fsqrt_not_norm:
cmpi.b #DENORM,d1 * weed out DENORM
beq.w fsqrt_denorm
cmpi.b #ZERO,d1 * weed out ZERO
beq.b fsqrt_zero
cmpi.b #INF,d1 * weed out INF
beq.b fsqrt_inf
cmpi.b #SNAN,d1 * weed out SNAN
beq.l res_snan_1op
bra.l res_qnan_1op
*
* fsqrt(+0) = +0
* fsqrt(-0) = -0
* fsqrt(+INF) = +INF
* fsqrt(-INF) = OPERR
*
fsqrt_zero:
tst.b SRC_EX.w(a0) * is ZERO positive or negative?
bmi.b fsqrt_zero_m * negative
fsqrt_zero_p:
fmove.s #$00000000,fp0 * return +ZERO
move.b #z_bmask,FPSR_CC(a6) * set 'Z' ccode bit
rts
fsqrt_zero_m:
fmove.s #$80000000,fp0 * return -ZERO
move.b #z_bmask+neg_bmask,FPSR_CC(a6) * set 'Z','N' ccode bits
rts
fsqrt_inf:
tst.b SRC_EX.w(a0) * is INF positive or negative?
bmi.l res_operr * negative
fsqrt_inf_p:
fmovem.x SRC.w(a0),fp0 * return +INF in fp0
move.b #inf_bmask,FPSR_CC(a6) * set 'I' ccode bit
rts
*#########################################################################
*########################################################################
* XDEF **************************************************************** #
* addsub_scaler2(): scale inputs to fadd/fsub such that no #
* OVFL/UNFL exceptions will result #
* #
* XREF **************************************************************** #
* norm() - normalize mantissa after adjusting exponent #
* #
* INPUT *************************************************************** #
* FP_SRC(a6) = fp op1(src) #
* FP_DST(a6) = fp op2(dst) #
* #
* OUTPUT ************************************************************** #
* FP_SRC(a6) = fp op1 scaled(src) #
* FP_DST(a6) = fp op2 scaled(dst) #
* d0 = scale amount #
* #
* ALGORITHM *********************************************************** #
* If the DST exponent is > the SRC exponent, set the DST exponent #
* equal to 0x3fff and scale the SRC exponent by the value that the #
* DST exponent was scaled by. If the SRC exponent is greater or equal, #
* do the opposite. Return this scale factor in d0. #
* If the two exponents differ by > the number of mantissa bits #
* plus two, then set the smallest exponent to a very small value as a #
* quick shortcut. #
* #
*########################################################################
global addsub_scaler2
addsub_scaler2:
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l DST_HI(a1),FP_SCR1_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.l DST_LO(a1),FP_SCR1_LO(a6)
move.w SRC_EX.w(a0),d0
move.w DST_EX.w(a1),d1
move.w d0,FP_SCR0_EX(a6)
move.w d1,FP_SCR1_EX(a6)
andi.w #$7fff,d0
andi.w #$7fff,d1
move.w d0,L_SCR1(a6) * store src exponent
move.w d1,2+L_SCR1(a6) * store dst exponent
cmp.w d1,d0 * is src exp >= dst exp?
bge.l src_exp_ge2
* dst exp is > src exp; scale dst to exp = 0x3fff
dst_exp_gt2:
bsr.l scale_to_zero_dst
move.l d0,-(sp) * save scale factor
cmpi.b #DENORM,STAG(a6) * is dst denormalized?
bne.b cmpexp12
lea FP_SCR0(a6),a0
bsr.l norm * normalize the denorm; result is new exp
neg.w d0 * new exp = -(shft val)
move.w d0,L_SCR1(a6) * inset new exp
cmpexp12:
move.w 2+L_SCR1(a6),d0
subi.w #mantissalen+2,d0 * subtract mantissalen+2 from larger exp
cmp.w L_SCR1(a6),d0 * is difference >= len(mantissa)+2?
bge.b quick_scale12
move.w L_SCR1(a6),d0
add.w $2(sp),d0 * scale src exponent by scale factor
move.w FP_SCR0_EX(a6),d1
andi.w #$8000,d1
or.w d1,d0 * concat {sgn,new exp}
move.w d0,FP_SCR0_EX(a6) * insert new dst exponent
move.l (sp)+,d0 * return SCALE factor
rts
quick_scale12:
andi.w #$8000,FP_SCR0_EX(a6) * zero src exponent
bset #$0,1+FP_SCR0_EX(a6) * set exp = 1
move.l (sp)+,d0 * return SCALE factor
rts
* src exp is >= dst exp; scale src to exp = 0x3fff
src_exp_ge2:
bsr.l scale_to_zero_src
move.l d0,-(sp) * save scale factor
cmpi.b #DENORM,DTAG(a6) * is dst denormalized?
bne.b cmpexp22
lea FP_SCR1(a6),a0
bsr.l norm * normalize the denorm; result is new exp
neg.w d0 * new exp = -(shft val)
move.w d0,2+L_SCR1(a6) * inset new exp
cmpexp22:
move.w L_SCR1(a6),d0
subi.w #mantissalen+2,d0 * subtract mantissalen+2 from larger exp
cmp.w 2+L_SCR1(a6),d0 * is difference >= len(mantissa)+2?
bge.b quick_scale22
move.w 2+L_SCR1(a6),d0
add.w $2(sp),d0 * scale dst exponent by scale factor
move.w FP_SCR1_EX(a6),d1
andi.w #$8000,d1
or.w d1,d0 * concat {sgn,new exp}
move.w d0,FP_SCR1_EX(a6) * insert new dst exponent
move.l (sp)+,d0 * return SCALE factor
rts
quick_scale22:
andi.w #$8000,FP_SCR1_EX(a6) * zero dst exponent
bset #$0,1+FP_SCR1_EX(a6) * set exp = 1
move.l (sp)+,d0 * return SCALE factor
rts
*#########################################################################
*########################################################################
* XDEF **************************************************************** #
* scale_to_zero_src(): scale the exponent of extended precision #
* value at FP_SCR0(a6). #
* #
* XREF **************************************************************** #
* norm() - normalize the mantissa if the operand was a DENORM #
* #
* INPUT *************************************************************** #
* FP_SCR0(a6) = extended precision operand to be scaled #
* #
* OUTPUT ************************************************************** #
* FP_SCR0(a6) = scaled extended precision operand #
* d0 = scale value #
* #
* ALGORITHM *********************************************************** #
* Set the exponent of the input operand to 0x3fff. Save the value #
* of the difference between the original and new exponent. Then, #
* normalize the operand if it was a DENORM. Add this normalization #
* value to the previous value. Return the result. #
* #
*########################################################################
global scale_to_zero_src
scale_to_zero_src:
move.w FP_SCR0_EX(a6),d1 * extract operand's {sgn,exp}
move.w d1,d0 * make a copy
andi.l #$7fff,d1 * extract operand's exponent
andi.w #$8000,d0 * extract operand's sgn
ori.w #$3fff,d0 * insert new operand's exponent(=0)
move.w d0,FP_SCR0_EX(a6) * insert biased exponent
cmpi.b #DENORM,STAG(a6) * is operand normalized?
beq.b stzs_denorm * normalize the DENORM
stzs_norm:
move.l #$3fff,d0
sub.l d1,d0 * scale = BIAS + (-exp)
rts
stzs_denorm:
lea FP_SCR0(a6),a0 * pass ptr to src op
bsr.l norm * normalize denorm
neg.l d0 * new exponent = -(shft val)
move.l d0,d1 * prepare for op_norm call
bra.b stzs_norm * finish scaling
*##
*########################################################################
* XDEF **************************************************************** #
* scale_sqrt(): scale the input operand exponent so a subsequent #
* fsqrt operation won't take an exception. #
* #
* XREF **************************************************************** #
* norm() - normalize the mantissa if the operand was a DENORM #
* #
* INPUT *************************************************************** #
* FP_SCR0(a6) = extended precision operand to be scaled #
* #
* OUTPUT ************************************************************** #
* FP_SCR0(a6) = scaled extended precision operand #
* d0 = scale value #
* #
* ALGORITHM *********************************************************** #
* If the input operand is a DENORM, normalize it. #
* If the exponent of the input operand is even, set the exponent #
* to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
* exponent of the input operand is off, set the exponent to ox3fff and #
* return a scale factor of "(exp-0x3fff)/2". #
* #
*########################################################################
global scale_sqrt
scale_sqrt:
cmpi.b #DENORM,STAG(a6) * is operand normalized?
beq.b ss_denorm * normalize the DENORM
move.w FP_SCR0_EX(a6),d1 * extract operand's {sgn,exp}
andi.l #$7fff,d1 * extract operand's exponent
andi.w #$8000,FP_SCR0_EX(a6) * extract operand's sgn
btst #$0,d1 * is exp even or odd?
beq.b ss_norm_even
ori.w #$3fff,FP_SCR0_EX(a6) * insert new operand's exponent(=0)
move.l #$3fff,d0
sub.l d1,d0 * scale = BIAS + (-exp)
asr.l #$1,d0 * divide scale factor by 2
rts
ss_norm_even:
ori.w #$3ffe,FP_SCR0_EX(a6) * insert new operand's exponent(=0)
move.l #$3ffe,d0
sub.l d1,d0 * scale = BIAS + (-exp)
asr.l #$1,d0 * divide scale factor by 2
rts
ss_denorm:
lea FP_SCR0(a6),a0 * pass ptr to src op
bsr.l norm * normalize denorm
btst #$0,d0 * is exp even or odd?
beq.b ss_denorm_even
ori.w #$3fff,FP_SCR0_EX(a6) * insert new operand's exponent(=0)
addi.l #$3fff,d0
asr.l #$1,d0 * divide scale factor by 2
rts
ss_denorm_even:
ori.w #$3ffe,FP_SCR0_EX(a6) * insert new operand's exponent(=0)
addi.l #$3ffe,d0
asr.l #$1,d0 * divide scale factor by 2
rts
*##
*########################################################################
* XDEF **************************************************************** #
* scale_to_zero_dst(): scale the exponent of extended precision #
* value at FP_SCR1(a6). #
* #
* XREF **************************************************************** #
* norm() - normalize the mantissa if the operand was a DENORM #
* #
* INPUT *************************************************************** #
* FP_SCR1(a6) = extended precision operand to be scaled #
* #
* OUTPUT ************************************************************** #
* FP_SCR1(a6) = scaled extended precision operand #
* d0 = scale value #
* #
* ALGORITHM *********************************************************** #
* Set the exponent of the input operand to 0x3fff. Save the value #
* of the difference between the original and new exponent. Then, #
* normalize the operand if it was a DENORM. Add this normalization #
* value to the previous value. Return the result. #
* #
*########################################################################
global scale_to_zero_dst
scale_to_zero_dst:
move.w FP_SCR1_EX(a6),d1 * extract operand's {sgn,exp}
move.w d1,d0 * make a copy
andi.l #$7fff,d1 * extract operand's exponent
andi.w #$8000,d0 * extract operand's sgn
ori.w #$3fff,d0 * insert new operand's exponent(=0)
move.w d0,FP_SCR1_EX(a6) * insert biased exponent
cmpi.b #DENORM,DTAG(a6) * is operand normalized?
beq.b stzd_denorm * normalize the DENORM
stzd_norm:
move.l #$3fff,d0
sub.l d1,d0 * scale = BIAS + (-exp)
rts
stzd_denorm:
lea FP_SCR1(a6),a0 * pass ptr to dst op
bsr.l norm * normalize denorm
neg.l d0 * new exponent = -(shft val)
move.l d0,d1 * prepare for op_norm call
bra.b stzd_norm * finish scaling
*#########################################################################
*########################################################################
* XDEF **************************************************************** #
* res_qnan(): return default result w/ QNAN operand for dyadic #
* res_snan(): return default result w/ SNAN operand for dyadic #
* res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
* res_snan_1op(): return dflt result w/ SNAN operand for monadic #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* FP_SRC(a6) = pointer to extended precision src operand #
* FP_DST(a6) = pointer to extended precision dst operand #
* #
* OUTPUT ************************************************************** #
* fp0 = default result #
* #
* ALGORITHM *********************************************************** #
* If either operand (but not both operands) of an operation is a #
* nonsignalling NAN, then that NAN is returned as the result. If both #
* operands are nonsignalling NANs, then the destination operand #
* nonsignalling NAN is returned as the result. #
* If either operand to an operation is a signalling NAN (SNAN), #
* then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
* enable bit is set in the FPCR, then the trap is taken and the #
* destination is not modified. If the SNAN trap enable bit is not set, #
* then the SNAN is converted to a nonsignalling NAN (by setting the #
* SNAN bit in the operand to one), and the operation continues as #
* described in the preceding paragraph, for nonsignalling NANs. #
* Make sure the appropriate FPSR bits are set before exiting. #
* #
*########################################################################
global res_qnan
global res_snan
res_qnan:
res_snan:
cmpi.b #SNAN,DTAG(a6) * is the dst an SNAN?
beq.b dst_snan2
cmpi.b #QNAN,DTAG(a6) * is the dst a QNAN?
beq.b dst_qnan2
src_nan:
cmp.b #QNAN,STAG(a6)
beq.b src_qnan2
global res_snan_1op
res_snan_1op:
src_snan2:
bset #$6,FP_SRC_HI(a6) * set SNAN bit
ori.l #nan_mask+aiop_mask+snan_mask,USER_FPSR(a6)
lea FP_SRC(a6),a0
bra.b nan_comp
global res_qnan_1op
res_qnan_1op:
src_qnan2:
ori.l #nan_mask,USER_FPSR(a6)
lea FP_SRC(a6),a0
bra.b nan_comp
dst_snan2:
ori.l #nan_mask+aiop_mask+snan_mask,USER_FPSR(a6)
bset #$6,FP_DST_HI(a6) * set SNAN bit
lea FP_DST(a6),a0
bra.b nan_comp
dst_qnan2:
lea FP_DST(a6),a0
cmp.b #SNAN,STAG(a6)
bne.l nan_done
ori.l #aiop_mask+snan_mask,USER_FPSR(a6)
nan_done:
ori.l #nan_mask,USER_FPSR(a6)
nan_comp:
btst #$7,FTEMP_EX.w(a0) * is NAN neg?
beq.b nan_not_neg
ori.l #neg_mask,USER_FPSR(a6)
nan_not_neg:
fmovem.x (a0),fp0
rts
*########################################################################
* XDEF **************************************************************** #
* res_operr(): return default result during operand error #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* fp0 = default operand error result #
* #
* ALGORITHM *********************************************************** #
* An nonsignalling NAN is returned as the default result when #
* an operand error occurs for the following cases: #
* #
* Multiply: (Infinity x Zero) #
* Divide : (Zero / Zero) || (Infinity / Infinity) #
* #
*########################################################################
global res_operr
res_operr:
ori.l #nan_mask+operr_mask+aiop_mask,USER_FPSR(a6)
fmovem.x nan_return.l(pc),fp0
rts
nan_return:
.dc.l $7fff0000,$ffffffff,$ffffffff
*########################################################################
* fdbcc(): routine to emulate the fdbcc instruction #
* #
* XDEF **************************************************************** #
* _fdbcc() #
* #
* XREF **************************************************************** #
* fetch_dreg() - fetch Dn value #
* store_dreg_l() - store updated Dn value #
* #
* INPUT *************************************************************** #
* d0 = displacement #
* #
* OUTPUT ************************************************************** #
* none #
* #
* ALGORITHM *********************************************************** #
* This routine checks which conditional predicate is specified by #
* the stacked fdbcc instruction opcode and then branches to a routine #
* for that predicate. The corresponding fbcc instruction is then used #
* to see whether the condition (specified by the stacked FPSR) is true #
* or false. #
* If a BSUN exception should be indicated, the BSUN and ABSUN #
* bits are set in the stacked FPSR. If the BSUN exception is enabled, #
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
* enabled BSUN should not be flagged and the predicate is true, then #
* Dn is fetched and decremented by one. If Dn is not equal to -1, add #
* the displacement value to the stacked PC so that when an "rte" is #
* finally executed, the branch occurs. #
* #
*########################################################################
global _fdbcc
_fdbcc:
move.l d0,L_SCR1(a6) * save displacement
move.w EXC_CMDREG(a6),d0 * fetch predicate
clr.l d1 * clear scratch reg
move.b FPSR_CC(a6),d1 * fetch fp ccodes
ror.l #$8,d1 * rotate to top byte
fmove.l d1,fpsr * insert into FPSR
move.w (tbl_fdbcc.b,pc,d0.w*2),d1 * load table
jmp (tbl_fdbcc.b,pc,d1.w) * jump to fdbcc routine
tbl_fdbcc:
.dc.w fdbcc_f-tbl_fdbcc * 00
.dc.w fdbcc_eq-tbl_fdbcc * 01
.dc.w fdbcc_ogt-tbl_fdbcc * 02
.dc.w fdbcc_oge-tbl_fdbcc * 03
.dc.w fdbcc_olt-tbl_fdbcc * 04
.dc.w fdbcc_ole-tbl_fdbcc * 05
.dc.w fdbcc_ogl-tbl_fdbcc * 06
.dc.w fdbcc_or-tbl_fdbcc * 07
.dc.w fdbcc_un-tbl_fdbcc * 08
.dc.w fdbcc_ueq-tbl_fdbcc * 09
.dc.w fdbcc_ugt-tbl_fdbcc * 10
.dc.w fdbcc_uge-tbl_fdbcc * 11
.dc.w fdbcc_ult-tbl_fdbcc * 12
.dc.w fdbcc_ule-tbl_fdbcc * 13
.dc.w fdbcc_neq-tbl_fdbcc * 14
.dc.w fdbcc_t-tbl_fdbcc * 15
.dc.w fdbcc_sf-tbl_fdbcc * 16
.dc.w fdbcc_seq-tbl_fdbcc * 17
.dc.w fdbcc_gt-tbl_fdbcc * 18
.dc.w fdbcc_ge-tbl_fdbcc * 19
.dc.w fdbcc_lt-tbl_fdbcc * 20
.dc.w fdbcc_le-tbl_fdbcc * 21
.dc.w fdbcc_gl-tbl_fdbcc * 22
.dc.w fdbcc_gle-tbl_fdbcc * 23
.dc.w fdbcc_ngle-tbl_fdbcc * 24
.dc.w fdbcc_ngl-tbl_fdbcc * 25
.dc.w fdbcc_nle-tbl_fdbcc * 26
.dc.w fdbcc_nlt-tbl_fdbcc * 27
.dc.w fdbcc_nge-tbl_fdbcc * 28
.dc.w fdbcc_ngt-tbl_fdbcc * 29
.dc.w fdbcc_sneq-tbl_fdbcc * 30
.dc.w fdbcc_st-tbl_fdbcc * 31
*########################################################################
* #
* IEEE Nonaware tests #
* #
* For the IEEE nonaware tests, only the false branch changes the #
* counter. However, the true branch may set bsun so we check to see #
* if the NAN bit is set, in which case BSUN and AIOP will be set. #
* #
* The cases EQ and NE are shared by the Aware and Nonaware groups #
* and are incapable of setting the BSUN exception bit. #
* #
* Typically, only one of the two possible branch directions could #
* have the NAN bit set. #
* (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
* is preserved.) #
* #
*########################################################################
*
* equal:
*
* Z
*
fdbcc_eq:
fbeq.w fdbcc_eq_yes * equal?
fdbcc_eq_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_eq_yes:
rts
*
* not equal:
* _
* Z
*
fdbcc_neq:
fbne.w fdbcc_neq_yes * not equal?
fdbcc_neq_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_neq_yes:
rts
*
* greater than:
* _______
* NANvZvN
*
fdbcc_gt:
fbgt.w fdbcc_gt_yes * greater than?
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_false * no;go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_gt_yes:
rts * do nothing
*
* not greater than:
*
* NANvZvN
*
fdbcc_ngt:
fbngt.w fdbcc_ngt_yes * not greater than?
fdbcc_ngt_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ngt_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_ngt_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_ngt_done:
rts * no; do nothing
*
* greater than or equal:
* _____
* Zv(NANvN)
*
fdbcc_ge:
fbge.w fdbcc_ge_yes * greater than or equal?
fdbcc_ge_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_false * no;go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_ge_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_ge_yes_done * no;go do nothing
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_ge_yes_done:
rts * do nothing
*
* not (greater than or equal):
* _
* NANv(N^Z)
*
fdbcc_nge:
fbnge.w fdbcc_nge_yes * not (greater than or equal)?
fdbcc_nge_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_nge_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_nge_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_nge_done:
rts * no; do nothing
*
* less than:
* _____
* N^(NANvZ)
*
fdbcc_lt:
fblt.w fdbcc_lt_yes * less than?
fdbcc_lt_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_false * no; go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_lt_yes:
rts * do nothing
*
* not less than:
* _
* NANv(ZvN)
*
fdbcc_nlt:
fbnlt.w fdbcc_nlt_yes * not less than?
fdbcc_nlt_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_nlt_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_nlt_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_nlt_done:
rts * no; do nothing
*
* less than or equal:
* ___
* Zv(N^NAN)
*
fdbcc_le:
fble.w fdbcc_le_yes * less than or equal?
fdbcc_le_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_false * no; go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_le_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_le_yes_done * no; go do nothing
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_le_yes_done:
rts * do nothing
*
* not (less than or equal):
* ___
* NANv(NvZ)
*
fdbcc_nle:
fbnle.w fdbcc_nle_yes * not (less than or equal)?
fdbcc_nle_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_nle_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_nle_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_nle_done:
rts * no; do nothing
*
* greater or less than:
* _____
* NANvZ
*
fdbcc_gl:
fbgl.w fdbcc_gl_yes * greater or less than?
fdbcc_gl_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fdbcc_false * no; handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_gl_yes:
rts * do nothing
*
* not (greater or less than):
*
* NANvZ
*
fdbcc_ngl:
fbngl.w fdbcc_ngl_yes * not (greater or less than)?
fdbcc_ngl_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ngl_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b fdbcc_ngl_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_ngl_done:
rts * no; do nothing
*
* greater, less, or equal:
* ___
* NAN
*
fdbcc_gle:
fbgle.w fdbcc_gle_yes * greater, less, or equal?
fdbcc_gle_no:
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * no; go handle counter
fdbcc_gle_yes:
rts * do nothing
*
* not (greater, less, or equal):
*
* NAN
*
fdbcc_ngle:
fbngle.w fdbcc_ngle_yes * not (greater, less, or equal)?
fdbcc_ngle_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ngle_yes:
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
rts * no; do nothing
*########################################################################
* #
* Miscellaneous tests #
* #
* For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
* #
*########################################################################
*
* false:
*
* False
*
fdbcc_f: * no bsun possible
bra.w fdbcc_false * go handle counter
*
* true:
*
* True
*
fdbcc_t: * no bsun possible
rts * do nothing
*
* signalling false:
*
* False
*
fdbcc_sf:
btst #nan_bit,FPSR_CC(a6) * is NAN set?
beq.w fdbcc_false * no;go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * go handle counter
*
* signalling true:
*
* True
*
fdbcc_st:
btst #nan_bit,FPSR_CC(a6) * is NAN set?
beq.b fdbcc_st_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_st_done:
rts
*
* signalling equal:
*
* Z
*
fdbcc_seq:
fbseq.w fdbcc_seq_yes * signalling equal?
fdbcc_seq_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set?
beq.w fdbcc_false * no;go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * go handle counter
fdbcc_seq_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set?
beq.b fdbcc_seq_yes_done * no;go do nothing
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_seq_yes_done:
rts * yes; do nothing
*
* signalling not equal:
* _
* Z
*
fdbcc_sneq:
fbsne.w fdbcc_sneq_yes * signalling not equal?
fdbcc_sneq_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set?
beq.w fdbcc_false * no;go handle counter
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
bra.w fdbcc_false * go handle counter
fdbcc_sneq_yes:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fdbcc_sneq_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * is BSUN enabled?
bne.w fdbcc_bsun * yes; we have an exception
fdbcc_sneq_done:
rts
*########################################################################
* #
* IEEE Aware tests #
* #
* For the IEEE aware tests, action is only taken if the result is false.#
* Therefore, the opposite branch type is used to jump to the decrement #
* routine. #
* The BSUN exception will not be set for any of these tests. #
* #
*########################################################################
*
* ordered greater than:
* _______
* NANvZvN
*
fdbcc_ogt:
fbogt.w fdbcc_ogt_yes * ordered greater than?
fdbcc_ogt_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ogt_yes:
rts * yes; do nothing
*
* unordered or less or equal:
* _______
* NANvZvN
*
fdbcc_ule:
fbule.w fdbcc_ule_yes * unordered or less or equal?
fdbcc_ule_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ule_yes:
rts * yes; do nothing
*
* ordered greater than or equal:
* _____
* Zv(NANvN)
*
fdbcc_oge:
fboge.w fdbcc_oge_yes * ordered greater than or equal?
fdbcc_oge_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_oge_yes:
rts * yes; do nothing
*
* unordered or less than:
* _
* NANv(N^Z)
*
fdbcc_ult:
fbult.w fdbcc_ult_yes * unordered or less than?
fdbcc_ult_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ult_yes:
rts * yes; do nothing
*
* ordered less than:
* _____
* N^(NANvZ)
*
fdbcc_olt:
fbolt.w fdbcc_olt_yes * ordered less than?
fdbcc_olt_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_olt_yes:
rts * yes; do nothing
*
* unordered or greater or equal:
*
* NANvZvN
*
fdbcc_uge:
fbuge.w fdbcc_uge_yes * unordered or greater than?
fdbcc_uge_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_uge_yes:
rts * yes; do nothing
*
* ordered less than or equal:
* ___
* Zv(N^NAN)
*
fdbcc_ole:
fbole.w fdbcc_ole_yes * ordered greater or less than?
fdbcc_ole_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ole_yes:
rts * yes; do nothing
*
* unordered or greater than:
* ___
* NANv(NvZ)
*
fdbcc_ugt:
fbugt.w fdbcc_ugt_yes * unordered or greater than?
fdbcc_ugt_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ugt_yes:
rts * yes; do nothing
*
* ordered greater or less than:
* _____
* NANvZ
*
fdbcc_ogl:
fbogl.w fdbcc_ogl_yes * ordered greater or less than?
fdbcc_ogl_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ogl_yes:
rts * yes; do nothing
*
* unordered or equal:
*
* NANvZ
*
fdbcc_ueq:
fbueq.w fdbcc_ueq_yes * unordered or equal?
fdbcc_ueq_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_ueq_yes:
rts * yes; do nothing
*
* ordered:
* ___
* NAN
*
fdbcc_or:
fbor.w fdbcc_or_yes * ordered?
fdbcc_or_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_or_yes:
rts * yes; do nothing
*
* unordered:
*
* NAN
*
fdbcc_un:
fbun.w fdbcc_un_yes * unordered?
fdbcc_un_no:
bra.w fdbcc_false * no; go handle counter
fdbcc_un_yes:
rts * yes; do nothing
*######################################################################
*
* the bsun exception bit was not set.
*
* (1) subtract 1 from the count register
* (2) if (cr == -1) then
* pc = pc of next instruction
* else
* pc += sign_ext(16-bit displacement)
*
fdbcc_false:
move.b 1+EXC_OPWORD(a6),d1 * fetch lo opword
andi.w #$7,d1 * extract count register
bsr.l fetch_dreg * fetch count value
* make sure that d0 isn't corrupted between calls...
subq.w #$1,d0 * Dn - 1 -> Dn
bsr.l store_dreg_l * store new count value
cmpi.w #-$1,d0 * is (Dn == -1)?
bne.b fdbcc_false_cont * no;
rts
fdbcc_false_cont:
move.l L_SCR1(a6),d0 * fetch displacement
add.l USER_FPIAR(a6),d0 * add instruction PC
addq.l #$4,d0 * add instruction length
move.l d0,EXC_PC(a6) * set new PC
rts
* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
fdbcc_bsun:
move.b #fbsun_flg,SPCOND_FLG(a6)
rts
*########################################################################
* ftrapcc(): routine to emulate the ftrapcc instruction #
* #
* XDEF **************************************************************** #
* _ftrapcc() #
* #
* XREF **************************************************************** #
* none #
* #
* INPUT *************************************************************** #
* none #
* #
* OUTPUT ************************************************************** #
* none #
* #
* ALGORITHM *********************************************************** #
* This routine checks which conditional predicate is specified by #
* the stacked ftrapcc instruction opcode and then branches to a routine #
* for that predicate. The corresponding fbcc instruction is then used #
* to see whether the condition (specified by the stacked FPSR) is true #
* or false. #
* If a BSUN exception should be indicated, the BSUN and ABSUN #
* bits are set in the stacked FPSR. If the BSUN exception is enabled, #
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
* enabled BSUN should not be flagged and the predicate is true, then #
* the ftrapcc_flg is set in the SPCOND_FLG location. These special #
* flags indicate to the calling routine to emulate the exceptional #
* condition. #
* #
*########################################################################
global _ftrapcc
_ftrapcc:
move.w EXC_CMDREG(a6),d0 * fetch predicate
clr.l d1 * clear scratch reg
move.b FPSR_CC(a6),d1 * fetch fp ccodes
ror.l #$8,d1 * rotate to top byte
fmove.l d1,fpsr * insert into FPSR
move.w (tbl_ftrapcc.b,pc,d0.w*2),d1 * load table
jmp (tbl_ftrapcc.b,pc,d1.w) * jump to ftrapcc routine
tbl_ftrapcc:
.dc.w ftrapcc_f-tbl_ftrapcc * 00
.dc.w ftrapcc_eq-tbl_ftrapcc * 01
.dc.w ftrapcc_ogt-tbl_ftrapcc * 02
.dc.w ftrapcc_oge-tbl_ftrapcc * 03
.dc.w ftrapcc_olt-tbl_ftrapcc * 04
.dc.w ftrapcc_ole-tbl_ftrapcc * 05
.dc.w ftrapcc_ogl-tbl_ftrapcc * 06
.dc.w ftrapcc_or-tbl_ftrapcc * 07
.dc.w ftrapcc_un-tbl_ftrapcc * 08
.dc.w ftrapcc_ueq-tbl_ftrapcc * 09
.dc.w ftrapcc_ugt-tbl_ftrapcc * 10
.dc.w ftrapcc_uge-tbl_ftrapcc * 11
.dc.w ftrapcc_ult-tbl_ftrapcc * 12
.dc.w ftrapcc_ule-tbl_ftrapcc * 13
.dc.w ftrapcc_neq-tbl_ftrapcc * 14
.dc.w ftrapcc_t-tbl_ftrapcc * 15
.dc.w ftrapcc_sf-tbl_ftrapcc * 16
.dc.w ftrapcc_seq-tbl_ftrapcc * 17
.dc.w ftrapcc_gt-tbl_ftrapcc * 18
.dc.w ftrapcc_ge-tbl_ftrapcc * 19
.dc.w ftrapcc_lt-tbl_ftrapcc * 20
.dc.w ftrapcc_le-tbl_ftrapcc * 21
.dc.w ftrapcc_gl-tbl_ftrapcc * 22
.dc.w ftrapcc_gle-tbl_ftrapcc * 23
.dc.w ftrapcc_ngle-tbl_ftrapcc * 24
.dc.w ftrapcc_ngl-tbl_ftrapcc * 25
.dc.w ftrapcc_nle-tbl_ftrapcc * 26
.dc.w ftrapcc_nlt-tbl_ftrapcc * 27
.dc.w ftrapcc_nge-tbl_ftrapcc * 28
.dc.w ftrapcc_ngt-tbl_ftrapcc * 29
.dc.w ftrapcc_sneq-tbl_ftrapcc * 30
.dc.w ftrapcc_st-tbl_ftrapcc * 31
*########################################################################
* #
* IEEE Nonaware tests #
* #
* For the IEEE nonaware tests, we set the result based on the #
* floating point condition codes. In addition, we check to see #
* if the NAN bit is set, in which case BSUN and AIOP will be set. #
* #
* The cases EQ and NE are shared by the Aware and Nonaware groups #
* and are incapable of setting the BSUN exception bit. #
* #
* Typically, only one of the two possible branch directions could #
* have the NAN bit set. #
* #
*########################################################################
*
* equal:
*
* Z
*
ftrapcc_eq:
fbeq.w ftrapcc_trap * equal?
ftrapcc_eq_no:
rts * do nothing
*
* not equal:
* _
* Z
*
ftrapcc_neq:
fbne.w ftrapcc_trap * not equal?
ftrapcc_neq_no:
rts * do nothing
*
* greater than:
* _______
* NANvZvN
*
ftrapcc_gt:
fbgt.w ftrapcc_trap * greater than?
ftrapcc_gt_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b ftrapcc_gt_done * no
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_gt_done:
rts * no; do nothing
*
* not greater than:
*
* NANvZvN
*
ftrapcc_ngt:
fbngt.w ftrapcc_ngt_yes * not greater than?
ftrapcc_ngt_no:
rts * do nothing
ftrapcc_ngt_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* greater than or equal:
* _____
* Zv(NANvN)
*
ftrapcc_ge:
fbge.w ftrapcc_ge_yes * greater than or equal?
ftrapcc_ge_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b ftrapcc_ge_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_ge_done:
rts * no; do nothing
ftrapcc_ge_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* not (greater than or equal):
* _
* NANv(N^Z)
*
ftrapcc_nge:
fbnge.w ftrapcc_nge_yes * not (greater than or equal)?
ftrapcc_nge_no:
rts * do nothing
ftrapcc_nge_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* less than:
* _____
* N^(NANvZ)
*
ftrapcc_lt:
fblt.w ftrapcc_trap * less than?
ftrapcc_lt_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b ftrapcc_lt_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_lt_done:
rts * no; do nothing
*
* not less than:
* _
* NANv(ZvN)
*
ftrapcc_nlt:
fbnlt.w ftrapcc_nlt_yes * not less than?
ftrapcc_nlt_no:
rts * do nothing
ftrapcc_nlt_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* less than or equal:
* ___
* Zv(N^NAN)
*
ftrapcc_le:
fble.w ftrapcc_le_yes * less than or equal?
ftrapcc_le_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b ftrapcc_le_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_le_done:
rts * no; do nothing
ftrapcc_le_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* not (less than or equal):
* ___
* NANv(NvZ)
*
ftrapcc_nle:
fbnle.w ftrapcc_nle_yes * not (less than or equal)?
ftrapcc_nle_no:
rts * do nothing
ftrapcc_nle_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* greater or less than:
* _____
* NANvZ
*
ftrapcc_gl:
fbgl.w ftrapcc_trap * greater or less than?
ftrapcc_gl_no:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.b ftrapcc_gl_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_gl_done:
rts * no; do nothing
*
* not (greater or less than):
*
* NANvZ
*
ftrapcc_ngl:
fbngl.w ftrapcc_ngl_yes * not (greater or less than)?
ftrapcc_ngl_no:
rts * do nothing
ftrapcc_ngl_yes:
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* greater, less, or equal:
* ___
* NAN
*
ftrapcc_gle:
fbgle.w ftrapcc_trap * greater, less, or equal?
ftrapcc_gle_no:
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
rts * no; do nothing
*
* not (greater, less, or equal):
*
* NAN
*
ftrapcc_ngle:
fbngle.w ftrapcc_ngle_yes * not (greater, less, or equal)?
ftrapcc_ngle_no:
rts * do nothing
ftrapcc_ngle_yes:
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*########################################################################
* #
* Miscellaneous tests #
* #
* For the IEEE aware tests, we only have to set the result based on the #
* floating point condition codes. The BSUN exception will not be #
* set for any of these tests. #
* #
*########################################################################
*
* false:
*
* False
*
ftrapcc_f:
rts * do nothing
*
* true:
*
* True
*
ftrapcc_t:
bra.w ftrapcc_trap * go take trap
*
* signalling false:
*
* False
*
ftrapcc_sf:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.b ftrapcc_sf_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_sf_done:
rts * no; do nothing
*
* signalling true:
*
* True
*
ftrapcc_st:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* signalling equal:
*
* Z
*
ftrapcc_seq:
fbseq.w ftrapcc_seq_yes * signalling equal?
ftrapcc_seq_no:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w ftrapcc_seq_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_seq_done:
rts * no; do nothing
ftrapcc_seq_yes:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*
* signalling not equal:
* _
* Z
*
ftrapcc_sneq:
fbsne.w ftrapcc_sneq_yes * signalling equal?
ftrapcc_sneq_no:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w ftrapcc_sneq_no_done * no; go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
ftrapcc_sneq_no_done:
rts * do nothing
ftrapcc_sneq_yes:
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w ftrapcc_trap * no; go take trap
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w ftrapcc_bsun * yes
bra.w ftrapcc_trap * no; go take trap
*########################################################################
* #
* IEEE Aware tests #
* #
* For the IEEE aware tests, we only have to set the result based on the #
* floating point condition codes. The BSUN exception will not be #
* set for any of these tests. #
* #
*########################################################################
*
* ordered greater than:
* _______
* NANvZvN
*
ftrapcc_ogt:
fbogt.w ftrapcc_trap * ordered greater than?
ftrapcc_ogt_no:
rts * do nothing
*
* unordered or less or equal:
* _______
* NANvZvN
*
ftrapcc_ule:
fbule.w ftrapcc_trap * unordered or less or equal?
ftrapcc_ule_no:
rts * do nothing
*
* ordered greater than or equal:
* _____
* Zv(NANvN)
*
ftrapcc_oge:
fboge.w ftrapcc_trap * ordered greater than or equal?
ftrapcc_oge_no:
rts * do nothing
*
* unordered or less than:
* _
* NANv(N^Z)
*
ftrapcc_ult:
fbult.w ftrapcc_trap * unordered or less than?
ftrapcc_ult_no:
rts * do nothing
*
* ordered less than:
* _____
* N^(NANvZ)
*
ftrapcc_olt:
fbolt.w ftrapcc_trap * ordered less than?
ftrapcc_olt_no:
rts * do nothing
*
* unordered or greater or equal:
*
* NANvZvN
*
ftrapcc_uge:
fbuge.w ftrapcc_trap * unordered or greater than?
ftrapcc_uge_no:
rts * do nothing
*
* ordered less than or equal:
* ___
* Zv(N^NAN)
*
ftrapcc_ole:
fbole.w ftrapcc_trap * ordered greater or less than?
ftrapcc_ole_no:
rts * do nothing
*
* unordered or greater than:
* ___
* NANv(NvZ)
*
ftrapcc_ugt:
fbugt.w ftrapcc_trap * unordered or greater than?
ftrapcc_ugt_no:
rts * do nothing
*
* ordered greater or less than:
* _____
* NANvZ
*
ftrapcc_ogl:
fbogl.w ftrapcc_trap * ordered greater or less than?
ftrapcc_ogl_no:
rts * do nothing
*
* unordered or equal:
*
* NANvZ
*
ftrapcc_ueq:
fbueq.w ftrapcc_trap * unordered or equal?
ftrapcc_ueq_no:
rts * do nothing
*
* ordered:
* ___
* NAN
*
ftrapcc_or:
fbor.w ftrapcc_trap * ordered?
ftrapcc_or_no:
rts * do nothing
*
* unordered:
*
* NAN
*
ftrapcc_un:
fbun.w ftrapcc_trap * unordered?
ftrapcc_un_no:
rts * do nothing
*######################################################################
* the bsun exception bit was not set.
* we will need to jump to the ftrapcc vector. the stack frame
* is the same size as that of the fp unimp instruction. the
* only difference is that the <ea> field should hold the PC
* of the ftrapcc instruction and the vector offset field
* should denote the ftrapcc trap.
ftrapcc_trap:
move.b #ftrapcc_flg,SPCOND_FLG(a6)
rts
* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
ftrapcc_bsun:
move.b #fbsun_flg,SPCOND_FLG(a6)
rts
*########################################################################
* fscc(): routine to emulate the fscc instruction #
* #
* XDEF **************************************************************** #
* _fscc() #
* #
* XREF **************************************************************** #
* store_dreg_b() - store result to data register file #
* dec_areg() - decrement an areg for -(an) mode #
* inc_areg() - increment an areg for (an)+ mode #
* _dmem_write_byte() - store result to memory #
* #
* INPUT *************************************************************** #
* none #
* #
* OUTPUT ************************************************************** #
* none #
* #
* ALGORITHM *********************************************************** #
* This routine checks which conditional predicate is specified by #
* the stacked fscc instruction opcode and then branches to a routine #
* for that predicate. The corresponding fbcc instruction is then used #
* to see whether the condition (specified by the stacked FPSR) is true #
* or false. #
* If a BSUN exception should be indicated, the BSUN and ABSUN #
* bits are set in the stacked FPSR. If the BSUN exception is enabled, #
* the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
* enabled BSUN should not be flagged and the predicate is true, then #
* the result is stored to the data register file or memory #
* #
*########################################################################
global _fscc
_fscc:
move.w EXC_CMDREG(a6),d0 * fetch predicate
clr.l d1 * clear scratch reg
move.b FPSR_CC(a6),d1 * fetch fp ccodes
ror.l #$8,d1 * rotate to top byte
fmove.l d1,fpsr * insert into FPSR
move.w (tbl_fscc.b,pc,d0.w*2),d1 * load table
jmp (tbl_fscc.b,pc,d1.w) * jump to fscc routine
tbl_fscc:
.dc.w fscc_f-tbl_fscc * 00
.dc.w fscc_eq-tbl_fscc * 01
.dc.w fscc_ogt-tbl_fscc * 02
.dc.w fscc_oge-tbl_fscc * 03
.dc.w fscc_olt-tbl_fscc * 04
.dc.w fscc_ole-tbl_fscc * 05
.dc.w fscc_ogl-tbl_fscc * 06
.dc.w fscc_or-tbl_fscc * 07
.dc.w fscc_un-tbl_fscc * 08
.dc.w fscc_ueq-tbl_fscc * 09
.dc.w fscc_ugt-tbl_fscc * 10
.dc.w fscc_uge-tbl_fscc * 11
.dc.w fscc_ult-tbl_fscc * 12
.dc.w fscc_ule-tbl_fscc * 13
.dc.w fscc_neq-tbl_fscc * 14
.dc.w fscc_t-tbl_fscc * 15
.dc.w fscc_sf-tbl_fscc * 16
.dc.w fscc_seq-tbl_fscc * 17
.dc.w fscc_gt-tbl_fscc * 18
.dc.w fscc_ge-tbl_fscc * 19
.dc.w fscc_lt-tbl_fscc * 20
.dc.w fscc_le-tbl_fscc * 21
.dc.w fscc_gl-tbl_fscc * 22
.dc.w fscc_gle-tbl_fscc * 23
.dc.w fscc_ngle-tbl_fscc * 24
.dc.w fscc_ngl-tbl_fscc * 25
.dc.w fscc_nle-tbl_fscc * 26
.dc.w fscc_nlt-tbl_fscc * 27
.dc.w fscc_nge-tbl_fscc * 28
.dc.w fscc_ngt-tbl_fscc * 29
.dc.w fscc_sneq-tbl_fscc * 30
.dc.w fscc_st-tbl_fscc * 31
*########################################################################
* #
* IEEE Nonaware tests #
* #
* For the IEEE nonaware tests, we set the result based on the #
* floating point condition codes. In addition, we check to see #
* if the NAN bit is set, in which case BSUN and AIOP will be set. #
* #
* The cases EQ and NE are shared by the Aware and Nonaware groups #
* and are incapable of setting the BSUN exception bit. #
* #
* Typically, only one of the two possible branch directions could #
* have the NAN bit set. #
* #
*########################################################################
*
* equal:
*
* Z
*
fscc_eq:
fbeq.w fscc_eq_yes * equal?
fscc_eq_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_eq_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* not equal:
* _
* Z
*
fscc_neq:
fbne.w fscc_neq_yes * not equal?
fscc_neq_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_neq_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* greater than:
* _______
* NANvZvN
*
fscc_gt:
fbgt.w fscc_gt_yes * greater than?
fscc_gt_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_gt_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* not greater than:
*
* NANvZvN
*
fscc_ngt:
fbngt.w fscc_ngt_yes * not greater than?
fscc_ngt_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ngt_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* greater than or equal:
* _____
* Zv(NANvN)
*
fscc_ge:
fbge.w fscc_ge_yes * greater than or equal?
fscc_ge_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_ge_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* not (greater than or equal):
* _
* NANv(N^Z)
*
fscc_nge:
fbnge.w fscc_nge_yes * not (greater than or equal)?
fscc_nge_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_nge_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* less than:
* _____
* N^(NANvZ)
*
fscc_lt:
fblt.w fscc_lt_yes * less than?
fscc_lt_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_lt_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* not less than:
* _
* NANv(ZvN)
*
fscc_nlt:
fbnlt.w fscc_nlt_yes * not less than?
fscc_nlt_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_nlt_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* less than or equal:
* ___
* Zv(N^NAN)
*
fscc_le:
fble.w fscc_le_yes * less than or equal?
fscc_le_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_le_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* not (less than or equal):
* ___
* NANv(NvZ)
*
fscc_nle:
fbnle.w fscc_nle_yes * not (less than or equal)?
fscc_nle_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_nle_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* greater or less than:
* _____
* NANvZ
*
fscc_gl:
fbgl.w fscc_gl_yes * greater or less than?
fscc_gl_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_gl_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* not (greater or less than):
*
* NANvZ
*
fscc_ngl:
fbngl.w fscc_ngl_yes * not (greater or less than)?
fscc_ngl_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ngl_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * is NAN set in cc?
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* greater, less, or equal:
* ___
* NAN
*
fscc_gle:
fbgle.w fscc_gle_yes * greater, less, or equal?
fscc_gle_no:
clr.b d0 * set false
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_gle_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* not (greater, less, or equal):
*
* NAN
*
fscc_ngle:
fbngle.w fscc_ngle_yes * not (greater, less, or equal)?
fscc_ngle_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ngle_yes:
st d0 * set true
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*########################################################################
* #
* Miscellaneous tests #
* #
* For the IEEE aware tests, we only have to set the result based on the #
* floating point condition codes. The BSUN exception will not be #
* set for any of these tests. #
* #
*########################################################################
*
* false:
*
* False
*
fscc_f:
clr.b d0 * set false
bra.w fscc_done * go finish
*
* true:
*
* True
*
fscc_t:
st d0 * set true
bra.w fscc_done * go finish
*
* signalling false:
*
* False
*
fscc_sf:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* signalling true:
*
* True
*
fscc_st:
st d0 * set false
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* signalling equal:
*
* Z
*
fscc_seq:
fbseq.w fscc_seq_yes * signalling equal?
fscc_seq_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_seq_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*
* signalling not equal:
* _
* Z
*
fscc_sneq:
fbsne.w fscc_sneq_yes * signalling equal?
fscc_sneq_no:
clr.b d0 * set false
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
fscc_sneq_yes:
st d0 * set true
btst #nan_bit,FPSR_CC(a6) * set BSUN exc bit
beq.w fscc_done * no;go finish
ori.l #bsun_mask+aiop_mask,USER_FPSR(a6) * set BSUN exc bit
bra.w fscc_chk_bsun * go finish
*########################################################################
* #
* IEEE Aware tests #
* #
* For the IEEE aware tests, we only have to set the result based on the #
* floating point condition codes. The BSUN exception will not be #
* set for any of these tests. #
* #
*########################################################################
*
* ordered greater than:
* _______
* NANvZvN
*
fscc_ogt:
fbogt.w fscc_ogt_yes * ordered greater than?
fscc_ogt_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ogt_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered or less or equal:
* _______
* NANvZvN
*
fscc_ule:
fbule.w fscc_ule_yes * unordered or less or equal?
fscc_ule_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ule_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* ordered greater than or equal:
* _____
* Zv(NANvN)
*
fscc_oge:
fboge.w fscc_oge_yes * ordered greater than or equal?
fscc_oge_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_oge_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered or less than:
* _
* NANv(N^Z)
*
fscc_ult:
fbult.w fscc_ult_yes * unordered or less than?
fscc_ult_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ult_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* ordered less than:
* _____
* N^(NANvZ)
*
fscc_olt:
fbolt.w fscc_olt_yes * ordered less than?
fscc_olt_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_olt_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered or greater or equal:
*
* NANvZvN
*
fscc_uge:
fbuge.w fscc_uge_yes * unordered or greater than?
fscc_uge_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_uge_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* ordered less than or equal:
* ___
* Zv(N^NAN)
*
fscc_ole:
fbole.w fscc_ole_yes * ordered greater or less than?
fscc_ole_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ole_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered or greater than:
* ___
* NANv(NvZ)
*
fscc_ugt:
fbugt.w fscc_ugt_yes * unordered or greater than?
fscc_ugt_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ugt_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* ordered greater or less than:
* _____
* NANvZ
*
fscc_ogl:
fbogl.w fscc_ogl_yes * ordered greater or less than?
fscc_ogl_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ogl_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered or equal:
*
* NANvZ
*
fscc_ueq:
fbueq.w fscc_ueq_yes * unordered or equal?
fscc_ueq_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_ueq_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* ordered:
* ___
* NAN
*
fscc_or:
fbor.w fscc_or_yes * ordered?
fscc_or_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_or_yes:
st d0 * set true
bra.w fscc_done * go finish
*
* unordered:
*
* NAN
*
fscc_un:
fbun.w fscc_un_yes * unordered?
fscc_un_no:
clr.b d0 * set false
bra.w fscc_done * go finish
fscc_un_yes:
st d0 * set true
bra.w fscc_done * go finish
*######################################################################
*
* the bsun exception bit was set. now, check to see is BSUN
* is enabled. if so, don't store result and correct stack frame
* for a bsun exception.
*
fscc_chk_bsun:
btst #bsun_bit,FPCR_ENABLE(a6) * was BSUN set?
bne.w fscc_bsun
*
* the bsun exception bit was not set.
* the result has been selected.
* now, check to see if the result is to be stored in the data register
* file or in memory.
*
fscc_done:
move.l d0,a0 * save result for a moment
move.b 1+EXC_OPWORD(a6),d1 * fetch lo opword
move.l d1,d0 * make a copy
andi.b #$38,d1 * extract src mode
bne.b fscc_mem_op * it's a memory operation
move.l d0,d1
andi.w #$7,d1 * pass index in d1
move.l a0,d0 * pass result in d0
bsr.l store_dreg_b * save result in regfile
rts
*
* the stacked <ea> is correct with the exception of:
* -> Dn : <ea> is garbage
*
* if the addressing mode is post-increment or pre-decrement,
* then the address registers have not been updated.
*
fscc_mem_op:
cmpi.b #$18,d1 * is <ea> (An)+ ?
beq.b fscc_mem_inc * yes
cmpi.b #$20,d1 * is <ea> -(An) ?
beq.b fscc_mem_dec * yes
move.l a0,d0 * pass result in d0
move.l EXC_EA(a6),a0 * fetch <ea>
bsr.l _dmem_write_byte * write result byte
tst.l d1 * did dstore fail?
bne.w fscc_err * yes
rts
* addresing mode is post-increment. write the result byte. if the write
* fails then don't update the address register. if write passes then
* call inc_areg() to update the address register.
fscc_mem_inc:
move.l a0,d0 * pass result in d0
move.l EXC_EA(a6),a0 * fetch <ea>
bsr.l _dmem_write_byte * write result byte
tst.l d1 * did dstore fail?
bne.w fscc_err * yes
move.b $1+EXC_OPWORD(a6),d1 * fetch opword
andi.w #$7,d1 * pass index in d1
moveq.l #$1,d0 * pass amt to inc by
bsr.l inc_areg * increment address register
rts
* addressing mode is pre-decrement. write the result byte. if the write
* fails then don't update the address register. if the write passes then
* call dec_areg() to update the address register.
fscc_mem_dec:
move.l a0,d0 * pass result in d0
move.l EXC_EA(a6),a0 * fetch <ea>
bsr.l _dmem_write_byte * write result byte
tst.l d1 * did dstore fail?
bne.w fscc_err * yes
move.b $1+EXC_OPWORD(a6),d1 * fetch opword
andi.w #$7,d1 * pass index in d1
moveq.l #$1,d0 * pass amt to dec by
bsr.l dec_areg * decrement address register
rts
* the emulation routine set bsun and BSUN was enabled. have to
* fix stack and jump to the bsun handler.
* let the caller of this routine shift the stack frame up to
* eliminate the effective address field.
fscc_bsun:
move.b #fbsun_flg,SPCOND_FLG(a6)
rts
* the byte write to memory has failed. pass the failing effective address
* and a FSLW to funimp_dacc().
fscc_err:
move.w #$00a1,EXC_VOFF(a6)
bra.l facc_finish
*########################################################################
* XDEF **************************************************************** #
* fmovm_dynamic(): emulate "fmovm" dynamic instruction #
* #
* XREF **************************************************************** #
* fetch_dreg() - fetch data register #
* {i,d,}mem_read() - fetch data from memory #
* _mem_write() - write data to memory #
* iea_iacc() - instruction memory access error occurred #
* iea_dacc() - data memory access error occurred #
* restore() - restore An index regs if access error occurred #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* If instr is "fmovm Dn,-(A7)" from supervisor mode, #
* d0 = size of dump #
* d1 = Dn #
* Else if instruction access error, #
* d0 = FSLW #
* Else if data access error, #
* d0 = FSLW #
* a0 = address of fault #
* Else #
* none. #
* #
* ALGORITHM *********************************************************** #
* The effective address must be calculated since this is entered #
* from an "Unimplemented Effective Address" exception handler. So, we #
* have our own fcalc_ea() routine here. If an access error is flagged #
* by a _{i,d,}mem_read() call, we must exit through the special #
* handler. #
* The data register is determined and its value loaded to get the #
* string of FP registers affected. This value is used as an index into #
* a lookup table such that we can determine the number of bytes #
* involved. #
* If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
* to read in all FP values. Again, _mem_read() may fail and require a #
* special exit. #
* If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
* to write all FP values. _mem_write() may also fail. #
* If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
* then we return the size of the dump and the string to the caller #
* so that the move can occur outside of this routine. This special #
* case is required so that moves to the system stack are handled #
* correctly. #
* #
* DYNAMIC: #
* fmovm.x dn, <ea> #
* fmovm.x <ea>, dn #
* #
* <WORD 1> <WORD2> #
* 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
* #
* & = (0): predecrement addressing mode #
* (1): postincrement or control addressing mode #
* @ = (0): move listed regs from memory to the FPU #
* (1): move listed regs from the FPU to memory #
* $$$ : index of data register holding reg select mask #
* #
* NOTES: #
* If the data register holds a zero, then the #
* instruction is a nop. #
* #
*########################################################################
global fmovem_dynamic
fmovem_dynamic:
* extract the data register in which the bit string resides...
move.b 1+EXC_EXTWORD(a6),d1 * fetch extword
andi.w #$70,d1 * extract reg bits
lsr.b #$4,d1 * shift into lo bits
* fetch the bit string into d0...
bsr.l fetch_dreg * fetch reg string
andi.l #$000000ff,d0 * keep only lo byte
move.l d0,-(sp) * save strg
move.b (tbl_fmovem_size.w,pc,d0.l),d0
move.l d0,-(sp) * save size
bsr.l fmovem_calc_ea * calculate <ea>
move.l (sp)+,d0 * restore size
move.l (sp)+,d1 * restore strg
* if the bit string is a zero, then the operation is a no-op
* but, make sure that we've calculated ea and advanced the opword pointer
beq.w fmovem_data_done
* separate move ins from move outs...
btst #$5,EXC_EXTWORD(a6) * is it a move in or out?
beq.w fmovem_data_in * it's a move out
*############
* MOVE OUT: #
*############
fmovem_data_out:
btst #$4,EXC_EXTWORD(a6) * control or predecrement?
bne.w fmovem_out_ctrl * control
*###########################
fmovem_out_predec:
* for predecrement mode, the bit string is the opposite of both control
* operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
* here, we convert it to be just like the others...
move.b (tbl_fmovem_convert.w,pc,d1.w*1),d1
btst #$5,EXC_SR(a6) * user or supervisor mode?
beq.b fmovem_out_ctrl * user
fmovem_out_predec_s:
cmpi.b #mda7_flg,SPCOND_FLG(a6) * is <ea> mode -(a7)?
bne.b fmovem_out_ctrl
* the operation was unfortunately an: fmovm.x dn,-(sp)
* called from supervisor mode.
* we're also passing "size" and "strg" back to the calling routine
rts
*###########################
fmovem_out_ctrl:
move.l a0,a1 * move <ea> to a1
sub.l d0,sp * subtract size of dump
lea (sp),a0
tst.b d1 * should FP0 be moved?
bpl.b fmovem_out_ctrl_fp1 * no
move.l $0+EXC_FP0(a6),(a0)+ * yes
move.l $4+EXC_FP0(a6),(a0)+
move.l $8+EXC_FP0(a6),(a0)+
fmovem_out_ctrl_fp1:
lsl.b #$1,d1 * should FP1 be moved?
bpl.b fmovem_out_ctrl_fp2 * no
move.l $0+EXC_FP1(a6),(a0)+ * yes
move.l $4+EXC_FP1(a6),(a0)+
move.l $8+EXC_FP1(a6),(a0)+
fmovem_out_ctrl_fp2:
lsl.b #$1,d1 * should FP2 be moved?
bpl.b fmovem_out_ctrl_fp3 * no
fmovem.x fp2,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_fp3:
lsl.b #$1,d1 * should FP3 be moved?
bpl.b fmovem_out_ctrl_fp4 * no
fmovem.x fp3,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_fp4:
lsl.b #$1,d1 * should FP4 be moved?
bpl.b fmovem_out_ctrl_fp5 * no
fmovem.x fp4,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_fp5:
lsl.b #$1,d1 * should FP5 be moved?
bpl.b fmovem_out_ctrl_fp6 * no
fmovem.x fp5,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_fp6:
lsl.b #$1,d1 * should FP6 be moved?
bpl.b fmovem_out_ctrl_fp7 * no
fmovem.x fp6,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_fp7:
lsl.b #$1,d1 * should FP7 be moved?
bpl.b fmovem_out_ctrl_done * no
fmovem.x fp7,(a0) * yes
add.l #$c,a0
fmovem_out_ctrl_done:
move.l a1,L_SCR1(a6)
lea (sp),a0 * pass: supervisor src
move.l d0,-(sp) * save size
bsr.l _dmem_write * copy data to user mem
move.l (sp)+,d0
add.l d0,sp * clear fpreg data from stack
tst.l d1 * did dstore err?
bne.w fmovem_out_err * yes
rts
*###########
* MOVE IN: #
*###########
fmovem_data_in:
move.l a0,L_SCR1(a6)
sub.l d0,sp * make room for fpregs
lea (sp),a1
move.l d1,-(sp) * save bit string for later
move.l d0,-(sp) * save # of bytes
bsr.l _dmem_read * copy data from user mem
move.l (sp)+,d0 * retrieve # of bytes
tst.l d1 * did dfetch fail?
bne.w fmovem_in_err * yes
move.l (sp)+,d1 * load bit string
lea (sp),a0 * addr of stack
tst.b d1 * should FP0 be moved?
bpl.b fmovem_data_in_fp1 * no
move.l (a0)+,$0+EXC_FP0(a6) * yes
move.l (a0)+,$4+EXC_FP0(a6)
move.l (a0)+,$8+EXC_FP0(a6)
fmovem_data_in_fp1:
lsl.b #$1,d1 * should FP1 be moved?
bpl.b fmovem_data_in_fp2 * no
move.l (a0)+,$0+EXC_FP1(a6) * yes
move.l (a0)+,$4+EXC_FP1(a6)
move.l (a0)+,$8+EXC_FP1(a6)
fmovem_data_in_fp2:
lsl.b #$1,d1 * should FP2 be moved?
bpl.b fmovem_data_in_fp3 * no
fmovem.x (a0)+,fp2 * yes
fmovem_data_in_fp3:
lsl.b #$1,d1 * should FP3 be moved?
bpl.b fmovem_data_in_fp4 * no
fmovem.x (a0)+,fp3 * yes
fmovem_data_in_fp4:
lsl.b #$1,d1 * should FP4 be moved?
bpl.b fmovem_data_in_fp5 * no
fmovem.x (a0)+,fp4 * yes
fmovem_data_in_fp5:
lsl.b #$1,d1 * should FP5 be moved?
bpl.b fmovem_data_in_fp6 * no
fmovem.x (a0)+,fp5 * yes
fmovem_data_in_fp6:
lsl.b #$1,d1 * should FP6 be moved?
bpl.b fmovem_data_in_fp7 * no
fmovem.x (a0)+,fp6 * yes
fmovem_data_in_fp7:
lsl.b #$1,d1 * should FP7 be moved?
bpl.b fmovem_data_in_done * no
fmovem.x (a0)+,fp7 * yes
fmovem_data_in_done:
add.l d0,sp * remove fpregs from stack
rts
*####################################
fmovem_data_done:
rts
*#############################################################################
*
* table indexed by the operation's bit string that gives the number
* of bytes that will be moved.
*
* number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
*
tbl_fmovem_size:
.dc.b $00,$0c,$0c,$18,$0c,$18,$18,$24
.dc.b $0c,$18,$18,$24,$18,$24,$24,$30
.dc.b $0c,$18,$18,$24,$18,$24,$24,$30
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $0c,$18,$18,$24,$18,$24,$24,$30
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $0c,$18,$18,$24,$18,$24,$24,$30
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $30,$3c,$3c,$48,$3c,$48,$48,$54
.dc.b $0c,$18,$18,$24,$18,$24,$24,$30
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $30,$3c,$3c,$48,$3c,$48,$48,$54
.dc.b $18,$24,$24,$30,$24,$30,$30,$3c
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $30,$3c,$3c,$48,$3c,$48,$48,$54
.dc.b $24,$30,$30,$3c,$30,$3c,$3c,$48
.dc.b $30,$3c,$3c,$48,$3c,$48,$48,$54
.dc.b $30,$3c,$3c,$48,$3c,$48,$48,$54
.dc.b $3c,$48,$48,$54,$48,$54,$54,$60
*
* table to convert a pre-decrement bit string into a post-increment
* or control bit string.
* ex: 0x00 ==> 0x00
* 0x01 ==> 0x80
* 0x02 ==> 0x40
* .
* .
* 0xfd ==> 0xbf
* 0xfe ==> 0x7f
* 0xff ==> 0xff
*
tbl_fmovem_convert:
.dc.b $00,$80,$40,$c0,$20,$a0,$60,$e0
.dc.b $10,$90,$50,$d0,$30,$b0,$70,$f0
.dc.b $08,$88,$48,$c8,$28,$a8,$68,$e8
.dc.b $18,$98,$58,$d8,$38,$b8,$78,$f8
.dc.b $04,$84,$44,$c4,$24,$a4,$64,$e4
.dc.b $14,$94,$54,$d4,$34,$b4,$74,$f4
.dc.b $0c,$8c,$4c,$cc,$2c,$ac,$6c,$ec
.dc.b $1c,$9c,$5c,$dc,$3c,$bc,$7c,$fc
.dc.b $02,$82,$42,$c2,$22,$a2,$62,$e2
.dc.b $12,$92,$52,$d2,$32,$b2,$72,$f2
.dc.b $0a,$8a,$4a,$ca,$2a,$aa,$6a,$ea
.dc.b $1a,$9a,$5a,$da,$3a,$ba,$7a,$fa
.dc.b $06,$86,$46,$c6,$26,$a6,$66,$e6
.dc.b $16,$96,$56,$d6,$36,$b6,$76,$f6
.dc.b $0e,$8e,$4e,$ce,$2e,$ae,$6e,$ee
.dc.b $1e,$9e,$5e,$de,$3e,$be,$7e,$fe
.dc.b $01,$81,$41,$c1,$21,$a1,$61,$e1
.dc.b $11,$91,$51,$d1,$31,$b1,$71,$f1
.dc.b $09,$89,$49,$c9,$29,$a9,$69,$e9
.dc.b $19,$99,$59,$d9,$39,$b9,$79,$f9
.dc.b $05,$85,$45,$c5,$25,$a5,$65,$e5
.dc.b $15,$95,$55,$d5,$35,$b5,$75,$f5
.dc.b $0d,$8d,$4d,$cd,$2d,$ad,$6d,$ed
.dc.b $1d,$9d,$5d,$dd,$3d,$bd,$7d,$fd
.dc.b $03,$83,$43,$c3,$23,$a3,$63,$e3
.dc.b $13,$93,$53,$d3,$33,$b3,$73,$f3
.dc.b $0b,$8b,$4b,$cb,$2b,$ab,$6b,$eb
.dc.b $1b,$9b,$5b,$db,$3b,$bb,$7b,$fb
.dc.b $07,$87,$47,$c7,$27,$a7,$67,$e7
.dc.b $17,$97,$57,$d7,$37,$b7,$77,$f7
.dc.b $0f,$8f,$4f,$cf,$2f,$af,$6f,$ef
.dc.b $1f,$9f,$5f,$df,$3f,$bf,$7f,$ff
global fmovem_calc_ea
*##############################################
* _fmovm_calc_ea: calculate effective address #
*##############################################
fmovem_calc_ea:
move.l d0,a0 * move # bytes to a0
* currently, MODE and REG are taken from the EXC_OPWORD. this could be
* easily changed if they were inputs passed in registers.
move.w EXC_OPWORD(a6),d0 * fetch opcode word
move.w d0,d1 * make a copy
andi.w #$3f,d0 * extract mode field
andi.l #$7,d1 * extract reg field
* jump to the corresponding function for each {MODE,REG} pair.
move.w (tbl_fea_mode.b,pc,d0.w*2),d0 * fetch jmp distance
jmp (tbl_fea_mode.b,pc,d0.w*1) * jmp to correct ea mode
.dc.w $4AFC,64
tbl_fea_mode:
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w faddr_ind_a0-tbl_fea_mode
.dc.w faddr_ind_a1-tbl_fea_mode
.dc.w faddr_ind_a2-tbl_fea_mode
.dc.w faddr_ind_a3-tbl_fea_mode
.dc.w faddr_ind_a4-tbl_fea_mode
.dc.w faddr_ind_a5-tbl_fea_mode
.dc.w faddr_ind_a6-tbl_fea_mode
.dc.w faddr_ind_a7-tbl_fea_mode
.dc.w faddr_ind_p_a0-tbl_fea_mode
.dc.w faddr_ind_p_a1-tbl_fea_mode
.dc.w faddr_ind_p_a2-tbl_fea_mode
.dc.w faddr_ind_p_a3-tbl_fea_mode
.dc.w faddr_ind_p_a4-tbl_fea_mode
.dc.w faddr_ind_p_a5-tbl_fea_mode
.dc.w faddr_ind_p_a6-tbl_fea_mode
.dc.w faddr_ind_p_a7-tbl_fea_mode
.dc.w faddr_ind_m_a0-tbl_fea_mode
.dc.w faddr_ind_m_a1-tbl_fea_mode
.dc.w faddr_ind_m_a2-tbl_fea_mode
.dc.w faddr_ind_m_a3-tbl_fea_mode
.dc.w faddr_ind_m_a4-tbl_fea_mode
.dc.w faddr_ind_m_a5-tbl_fea_mode
.dc.w faddr_ind_m_a6-tbl_fea_mode
.dc.w faddr_ind_m_a7-tbl_fea_mode
.dc.w faddr_ind_disp_a0-tbl_fea_mode
.dc.w faddr_ind_disp_a1-tbl_fea_mode
.dc.w faddr_ind_disp_a2-tbl_fea_mode
.dc.w faddr_ind_disp_a3-tbl_fea_mode
.dc.w faddr_ind_disp_a4-tbl_fea_mode
.dc.w faddr_ind_disp_a5-tbl_fea_mode
.dc.w faddr_ind_disp_a6-tbl_fea_mode
.dc.w faddr_ind_disp_a7-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w faddr_ind_ext-tbl_fea_mode
.dc.w fabs_short-tbl_fea_mode
.dc.w fabs_long-tbl_fea_mode
.dc.w fpc_ind-tbl_fea_mode
.dc.w fpc_ind_ext-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
.dc.w tbl_fea_mode-tbl_fea_mode
*##################################
* Address register indirect: (An) #
*##################################
faddr_ind_a0:
move.l EXC_DREGS+$8(a6),a0 * Get current a0
rts
faddr_ind_a1:
move.l EXC_DREGS+$c(a6),a0 * Get current a1
rts
faddr_ind_a2:
move.l a2,a0 * Get current a2
rts
faddr_ind_a3:
move.l a3,a0 * Get current a3
rts
faddr_ind_a4:
move.l a4,a0 * Get current a4
rts
faddr_ind_a5:
move.l a5,a0 * Get current a5
rts
faddr_ind_a6:
move.l (a6),a0 * Get current a6
rts
faddr_ind_a7:
move.l EXC_A7(a6),a0 * Get current a7
rts
*####################################################
* Address register indirect w/ postincrement: (An)+ #
*####################################################
faddr_ind_p_a0:
move.l EXC_DREGS+$8(a6),d0 * Get current a0
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,EXC_DREGS+$8(a6) * Save incr value
move.l d0,a0
rts
faddr_ind_p_a1:
move.l EXC_DREGS+$c(a6),d0 * Get current a1
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,EXC_DREGS+$c(a6) * Save incr value
move.l d0,a0
rts
faddr_ind_p_a2:
move.l a2,d0 * Get current a2
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,a2 * Save incr value
move.l d0,a0
rts
faddr_ind_p_a3:
move.l a3,d0 * Get current a3
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,a3 * Save incr value
move.l d0,a0
rts
faddr_ind_p_a4:
move.l a4,d0 * Get current a4
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,a4 * Save incr value
move.l d0,a0
rts
faddr_ind_p_a5:
move.l a5,d0 * Get current a5
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,a5 * Save incr value
move.l d0,a0
rts
faddr_ind_p_a6:
move.l (a6),d0 * Get current a6
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,(a6) * Save incr value
move.l d0,a0
rts
faddr_ind_p_a7:
move.b #mia7_flg,SPCOND_FLG(a6) * set "special case" flag
move.l EXC_A7(a6),d0 * Get current a7
move.l d0,d1
add.l a0,d1 * Increment
move.l d1,EXC_A7(a6) * Save incr value
move.l d0,a0
rts
*###################################################
* Address register indirect w/ predecrement: -(An) #
*###################################################
faddr_ind_m_a0:
move.l EXC_DREGS+$8(a6),d0 * Get current a0
sub.l a0,d0 * Decrement
move.l d0,EXC_DREGS+$8(a6) * Save decr value
move.l d0,a0
rts
faddr_ind_m_a1:
move.l EXC_DREGS+$c(a6),d0 * Get current a1
sub.l a0,d0 * Decrement
move.l d0,EXC_DREGS+$c(a6) * Save decr value
move.l d0,a0
rts
faddr_ind_m_a2:
move.l a2,d0 * Get current a2
sub.l a0,d0 * Decrement
move.l d0,a2 * Save decr value
move.l d0,a0
rts
faddr_ind_m_a3:
move.l a3,d0 * Get current a3
sub.l a0,d0 * Decrement
move.l d0,a3 * Save decr value
move.l d0,a0
rts
faddr_ind_m_a4:
move.l a4,d0 * Get current a4
sub.l a0,d0 * Decrement
move.l d0,a4 * Save decr value
move.l d0,a0
rts
faddr_ind_m_a5:
move.l a5,d0 * Get current a5
sub.l a0,d0 * Decrement
move.l d0,a5 * Save decr value
move.l d0,a0
rts
faddr_ind_m_a6:
move.l (a6),d0 * Get current a6
sub.l a0,d0 * Decrement
move.l d0,(a6) * Save decr value
move.l d0,a0
rts
faddr_ind_m_a7:
move.b #mda7_flg,SPCOND_FLG(a6) * set "special case" flag
move.l EXC_A7(a6),d0 * Get current a7
sub.l a0,d0 * Decrement
move.l d0,EXC_A7(a6) * Save decr value
move.l d0,a0
rts
*#######################################################
* Address register indirect w/ displacement: (d16, An) #
*#######################################################
faddr_ind_disp_a0:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l EXC_DREGS+$8(a6),a0 * a0 + d16
rts
faddr_ind_disp_a1:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l EXC_DREGS+$c(a6),a0 * a1 + d16
rts
faddr_ind_disp_a2:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l a2,a0 * a2 + d16
rts
faddr_ind_disp_a3:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l a3,a0 * a3 + d16
rts
faddr_ind_disp_a4:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l a4,a0 * a4 + d16
rts
faddr_ind_disp_a5:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l a5,a0 * a5 + d16
rts
faddr_ind_disp_a6:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l (a6),a0 * a6 + d16
rts
faddr_ind_disp_a7:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l EXC_A7(a6),a0 * a7 + d16
rts
*#######################################################################
* Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
* " " " w/ " (base displacement): (bd, An, Xn) #
* Memory indirect postindexed: ([bd, An], Xn, od) #
* Memory indirect preindexed: ([bd, An, Xn], od) #
*#######################################################################
faddr_ind_ext:
addq.l #$8,d1
bsr.l fetch_dreg * fetch base areg
move.l d0,-(sp)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word * fetch extword in d0
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l (sp)+,a0
btst #$8,d0
bne.w fcalc_mem_ind
move.l d0,L_SCR1(a6) * hold opword
move.l d0,d1
rol.w #$4,d1
andi.w #$f,d1 * extract index regno
* count on fetch_dreg() not to alter a0...
bsr.l fetch_dreg * fetch index
move.l d2,-(sp) * save d2
move.l L_SCR1(a6),d2 * fetch opword
btst #$b,d2 * is it word or long?
bne.b faii8_long
ext.l d0 * sign extend word index
faii8_long:
move.l d2,d1
rol.w #$7,d1
andi.l #$3,d1 * extract scale value
lsl.l d1,d0 * shift index by scale
extb.l d2 * sign extend displacement
add.l d2,d0 * index + disp
add.l d0,a0 * An + (index + disp)
move.l (sp)+,d2 * restore old d2
rts
*##########################
* Absolute short: (XXX).W #
*##########################
fabs_short:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word * fetch short address
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * return <ea> in a0
rts
*#########################
* Absolute long: (XXX).L #
*#########################
fabs_long:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch long address
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,a0 * return <ea> in a0
rts
*######################################################
* Program counter indirect w/ displacement: (d16, PC) #
*######################################################
fpc_ind:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word * fetch word displacement
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.w d0,a0 * sign extend displacement
add.l EXC_EXTWPTR(a6),a0 * pc + d16
* _imem_read_word() increased the extwptr by 2. need to adjust here.
subq.l #$2,a0 * adjust <ea>
rts
*#########################################################
* PC indirect w/ index(8-bit displacement): (d8, PC, An) #
* " " w/ " (base displacement): (bd, PC, An) #
* PC memory indirect postindexed: ([bd, PC], Xn, od) #
* PC memory indirect preindexed: ([bd, PC, Xn], od) #
*#########################################################
fpc_ind_ext:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word * fetch ext word
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l EXC_EXTWPTR(a6),a0 * put base in a0
subq.l #$2,a0 * adjust base
btst #$8,d0 * is disp only 8 bits?
bne.w fcalc_mem_ind * calc memory indirect
move.l d0,L_SCR1(a6) * store opword
move.l d0,d1 * make extword copy
rol.w #$4,d1 * rotate reg num into place
andi.w #$f,d1 * extract register number
* count on fetch_dreg() not to alter a0...
bsr.l fetch_dreg * fetch index
move.l d2,-(sp) * save d2
move.l L_SCR1(a6),d2 * fetch opword
btst #$b,d2 * is index word or long?
bne.b fpii8_long * long
ext.l d0 * sign extend word index
fpii8_long:
move.l d2,d1
rol.w #$7,d1 * rotate scale value into place
andi.l #$3,d1 * extract scale value
lsl.l d1,d0 * shift index by scale
extb.l d2 * sign extend displacement
add.l d2,d0 * disp + index
add.l d0,a0 * An + (index + disp)
move.l (sp)+,d2 * restore temp register
rts
* d2 = index
* d3 = base
* d4 = od
* d5 = extword
fcalc_mem_ind:
btst #$6,d0 * is the index suppressed?
beq.b fcalc_index
movem.l d2-d5,-(sp) * save d2-d5
move.l d0,d5 * put extword in d5
move.l a0,d3 * put base in d3
clr.l d2 * yes, so index = 0
bra.b fbase_supp_ck
* index:
fcalc_index:
move.l d0,L_SCR1(a6) * save d0 (opword)
bfextu d0{#16:#4},d1 * fetch dreg index
bsr.l fetch_dreg
movem.l d2-d5,-(sp) * save d2-d5
move.l d0,d2 * put index in d2
move.l L_SCR1(a6),d5
move.l a0,d3
btst #$b,d5 * is index word or long?
bne.b fno_ext
ext.l d2
fno_ext:
bfextu d5{#21:#2},d0
lsl.l d0,d2
* base address (passed as parameter in d3):
* we clear the value here if it should actually be suppressed.
fbase_supp_ck:
btst #$7,d5 * is the bd suppressed?
beq.b fno_base_sup
clr.l d3
* base displacement:
fno_base_sup:
bfextu d5{#26:#2},d0 * get bd size
* beq.l fmovm_error # if (size == 0) it's reserved
cmpi.b #$2,d0
blt.b fno_bd
beq.b fget_word_bd
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long
tst.l d1 * did ifetch fail?
bne.l fcea_iacc * yes
bra.b fchk_ind
fget_word_bd:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l fcea_iacc * yes
ext.l d0 * sign extend bd
fchk_ind:
add.l d0,d3 * base += bd
* outer displacement:
fno_bd:
bfextu d5{#30:#2},d0 * is od suppressed?
beq.w faii_bd
cmpi.b #$2,d0
blt.b fnull_od
beq.b fword_od
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long
tst.l d1 * did ifetch fail?
bne.l fcea_iacc * yes
bra.b fadd_them
fword_od:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word
tst.l d1 * did ifetch fail?
bne.l fcea_iacc * yes
ext.l d0 * sign extend od
bra.b fadd_them
fnull_od:
clr.l d0
fadd_them:
move.l d0,d4
btst #$2,d5 * pre or post indexing?
beq.b fpre_indexed
move.l d3,a0
bsr.l _dmem_read_long
tst.l d1 * did dfetch fail?
bne.w fcea_err * yes
add.l d2,d0 * <ea> += index
add.l d4,d0 * <ea> += od
bra.b fdone_ea
fpre_indexed:
add.l d2,d3 * preindexing
move.l d3,a0
bsr.l _dmem_read_long
tst.l d1 * did dfetch fail?
bne.w fcea_err * yes
add.l d4,d0 * ea += od
bra.b fdone_ea
faii_bd:
add.l d2,d3 * ea = (base + bd) + index
move.l d3,d0
fdone_ea:
move.l d0,a0
movem.l (sp)+,d2-d5 * restore d2-d5
rts
*########################################################
fcea_err:
move.l d3,a0
movem.l (sp)+,d2-d5 * restore d2-d5
move.w #$0101,d0
bra.l iea_dacc
fcea_iacc:
movem.l (sp)+,d2-d5 * restore d2-d5
bra.l iea_iacc
fmovem_out_err:
bsr.l restore
move.w #$00e1,d0
bra.b fmovem_err
fmovem_in_err:
bsr.l restore
move.w #$0161,d0
fmovem_err:
move.l L_SCR1(a6),a0
bra.l iea_dacc
*########################################################################
* XDEF **************************************************************** #
* fmovm_ctrl(): emulate fmovm.l of control registers instr #
* #
* XREF **************************************************************** #
* _imem_read_long() - read longword from memory #
* iea_iacc() - _imem_read_long() failed; error recovery #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* If _imem_read_long() doesn't fail: #
* USER_FPCR(a6) = new FPCR value #
* USER_FPSR(a6) = new FPSR value #
* USER_FPIAR(a6) = new FPIAR value #
* #
* ALGORITHM *********************************************************** #
* Decode the instruction type by looking at the extension word #
* in order to see how many control registers to fetch from memory. #
* Fetch them using _imem_read_long(). If this fetch fails, exit through #
* the special access error exit handler iea_iacc(). #
* #
* Instruction word decoding: #
* #
* fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
* #
* WORD1 WORD2 #
* 1111 0010 00 111100 100$ $$00 0000 0000 #
* #
* $$$ (100): FPCR #
* (010): FPSR #
* (001): FPIAR #
* (000): FPIAR #
* #
*########################################################################
global fmovem_ctrl
fmovem_ctrl:
move.b EXC_EXTWORD(a6),d0 * fetch reg select bits
cmpi.b #$9c,d0 * fpcr & fpsr & fpiar ?
beq.w fctrl_in_7 * yes
cmpi.b #$98,d0 * fpcr & fpsr ?
beq.w fctrl_in_6 * yes
cmpi.b #$94,d0 * fpcr & fpiar ?
beq.b fctrl_in_5 * yes
* fmovem.l #<data>, fpsr/fpiar
fctrl_in_3:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPSR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPSR(a6) * store new FPSR to stack
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPIAR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPIAR(a6) * store new FPIAR to stack
rts
* fmovem.l #<data>, fpcr/fpiar
fctrl_in_5:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPCR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPCR(a6) * store new FPCR to stack
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPIAR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPIAR(a6) * store new FPIAR to stack
rts
* fmovem.l #<data>, fpcr/fpsr
fctrl_in_6:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPCR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPCR(a6) * store new FPCR to mem
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPSR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPSR(a6) * store new FPSR to mem
rts
* fmovem.l #<data>, fpcr/fpsr/fpiar
fctrl_in_7:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPCR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPCR(a6) * store new FPCR to mem
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPSR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPSR(a6) * store new FPSR to mem
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch FPIAR from mem
tst.l d1 * did ifetch fail?
bne.l iea_iacc * yes
move.l d0,USER_FPIAR(a6) * store new FPIAR to mem
rts
*########################################################################
* XDEF **************************************************************** #
* _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
* #
* XREF **************************************************************** #
* inc_areg() - increment an address register #
* dec_areg() - decrement an address register #
* #
* INPUT *************************************************************** #
* d0 = number of bytes to adjust <ea> by #
* #
* OUTPUT ************************************************************** #
* None #
* #
* ALGORITHM *********************************************************** #
* "Dummy" CALCulate Effective Address: #
* The stacked <ea> for FP unimplemented instructions and opclass #
* two packed instructions is correct with the exception of... #
* #
* 1) -(An) : The register is not updated regardless of size. #
* Also, for extended precision and packed, the #
* stacked <ea> value is 8 bytes too big #
* 2) (An)+ : The register is not updated. #
* 3) #<data> : The upper longword of the immediate operand is #
* stacked b,w,l and s sizes are completely stacked. #
* d,x, and p are not. #
* #
*########################################################################
global _dcalc_ea
_dcalc_ea:
move.l d0,a0 * move # bytes to %a0
move.b 1+EXC_OPWORD(a6),d0 * fetch opcode word
move.l d0,d1 * make a copy
andi.w #$38,d0 * extract mode field
andi.l #$7,d1 * extract reg field
cmpi.b #$18,d0 * is mode (An)+ ?
beq.b dcea_pi * yes
cmpi.b #$20,d0 * is mode -(An) ?
beq.b dcea_pd * yes
or.w d1,d0 * concat mode,reg
cmpi.b #$3c,d0 * is mode #<data>?
beq.b dcea_imm * yes
move.l EXC_EA(a6),a0 * return <ea>
rts
* need to set immediate data flag here since we'll need to do
* an imem_read to fetch this later.
dcea_imm:
move.b #immed_flg,SPCOND_FLG(a6)
lea ([USER_FPIAR,a6],$4),a0 * no; return <ea>
rts
* here, the <ea> is stacked correctly. however, we must update the
* address register...
dcea_pi:
move.l a0,d0 * pass amt to inc by
bsr.l inc_areg * inc addr register
move.l EXC_EA(a6),a0 * stacked <ea> is correct
rts
* the <ea> is stacked correctly for all but extended and packed which
* the <ea>s are 8 bytes too large.
* it would make no sense to have a pre-decrement to a7 in supervisor
* mode so we don't even worry about this tricky case here : )
dcea_pd:
move.l a0,d0 * pass amt to dec by
bsr.l dec_areg * dec addr register
move.l EXC_EA(a6),a0 * stacked <ea> is correct
cmpi.b #$c,d0 * is opsize ext or packed?
beq.b dcea_pd2 * yes
rts
dcea_pd2:
sub.l #$8,a0 * correct <ea>
move.l a0,EXC_EA(a6) * put correct <ea> on stack
rts
*########################################################################
* XDEF **************************************************************** #
* _calc_ea_fout(): calculate correct stacked <ea> for extended #
* and packed data opclass 3 operations. #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* a0 = return correct effective address #
* #
* ALGORITHM *********************************************************** #
* For opclass 3 extended and packed data operations, the <ea> #
* stacked for the exception is incorrect for -(an) and (an)+ addressing #
* modes. Also, while we're at it, the index register itself must get #
* updated. #
* So, for -(an), we must subtract 8 off of the stacked <ea> value #
* and return that value as the correct <ea> and store that value in An. #
* For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
* #
*########################################################################
* This calc_ea is currently used to retrieve the correct <ea>
* for fmove outs of type extended and packed.
global _calc_ea_fout
_calc_ea_fout:
move.b 1+EXC_OPWORD(a6),d0 * fetch opcode word
move.l d0,d1 * make a copy
andi.w #$38,d0 * extract mode field
andi.l #$7,d1 * extract reg field
cmpi.b #$18,d0 * is mode (An)+ ?
beq.b ceaf_pi * yes
cmpi.b #$20,d0 * is mode -(An) ?
beq.w ceaf_pd * yes
move.l EXC_EA(a6),a0 * stacked <ea> is correct
rts
* (An)+ : extended and packed fmove out
* : stacked <ea> is correct
* : "An" not updated
ceaf_pi:
move.w (tbl_ceaf_pi.b,pc,d1.w*2),d1
move.l EXC_EA(a6),a0
jmp (tbl_ceaf_pi.b,pc,d1.w*1)
.dc.w $4AFC,$8
tbl_ceaf_pi:
.dc.w ceaf_pi0-tbl_ceaf_pi
.dc.w ceaf_pi1-tbl_ceaf_pi
.dc.w ceaf_pi2-tbl_ceaf_pi
.dc.w ceaf_pi3-tbl_ceaf_pi
.dc.w ceaf_pi4-tbl_ceaf_pi
.dc.w ceaf_pi5-tbl_ceaf_pi
.dc.w ceaf_pi6-tbl_ceaf_pi
.dc.w ceaf_pi7-tbl_ceaf_pi
ceaf_pi0:
addi.l #$c,EXC_DREGS+$8(a6)
rts
ceaf_pi1:
addi.l #$c,EXC_DREGS+$c(a6)
rts
ceaf_pi2:
add.l #$c,a2
rts
ceaf_pi3:
add.l #$c,a3
rts
ceaf_pi4:
add.l #$c,a4
rts
ceaf_pi5:
add.l #$c,a5
rts
ceaf_pi6:
addi.l #$c,EXC_A6(a6)
rts
ceaf_pi7:
move.b #mia7_flg,SPCOND_FLG(a6)
addi.l #$c,EXC_A7(a6)
rts
* -(An) : extended and packed fmove out
* : stacked <ea> = actual <ea> + 8
* : "An" not updated
ceaf_pd:
move.w (tbl_ceaf_pd.b,pc,d1.w*2),d1
move.l EXC_EA(a6),a0
sub.l #$8,a0
sub.l #$8,EXC_EA(a6)
jmp (tbl_ceaf_pd.b,pc,d1.w*1)
.dc.w $4AFC,$8
tbl_ceaf_pd:
.dc.w ceaf_pd0-tbl_ceaf_pd
.dc.w ceaf_pd1-tbl_ceaf_pd
.dc.w ceaf_pd2-tbl_ceaf_pd
.dc.w ceaf_pd3-tbl_ceaf_pd
.dc.w ceaf_pd4-tbl_ceaf_pd
.dc.w ceaf_pd5-tbl_ceaf_pd
.dc.w ceaf_pd6-tbl_ceaf_pd
.dc.w ceaf_pd7-tbl_ceaf_pd
ceaf_pd0:
move.l a0,EXC_DREGS+$8(a6)
rts
ceaf_pd1:
move.l a0,EXC_DREGS+$c(a6)
rts
ceaf_pd2:
move.l a0,a2
rts
ceaf_pd3:
move.l a0,a3
rts
ceaf_pd4:
move.l a0,a4
rts
ceaf_pd5:
move.l a0,a5
rts
ceaf_pd6:
move.l a0,EXC_A6(a6)
rts
ceaf_pd7:
move.l a0,EXC_A7(a6)
move.b #mda7_flg,SPCOND_FLG(a6)
rts
*########################################################################
* XDEF **************************************************************** #
* _load_fop(): load operand for unimplemented FP exception #
* #
* XREF **************************************************************** #
* set_tag_x() - determine ext prec optype tag #
* set_tag_s() - determine sgl prec optype tag #
* set_tag_d() - determine dbl prec optype tag #
* unnorm_fix() - convert normalized number to denorm or zero #
* norm() - normalize a denormalized number #
* get_packed() - fetch a packed operand from memory #
* _dcalc_ea() - calculate <ea>, fixing An in process #
* #
* _imem_read_{word,long}() - read from instruction memory #
* _dmem_read() - read from data memory #
* _dmem_read_{byte,word,long}() - read from data memory #
* #
* facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* If memory access doesn't fail: #
* FP_SRC(a6) = source operand in extended precision #
* FP_DST(a6) = destination operand in extended precision #
* #
* ALGORITHM *********************************************************** #
* This is called from the Unimplemented FP exception handler in #
* order to load the source and maybe destination operand into #
* FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
* the source and destination from the FP register file. Set the optype #
* tags for both if dyadic, one for monadic. If a number is an UNNORM, #
* convert it to a DENORM or a ZERO. #
* If the instruction is opclass two (memory->reg), then fetch #
* the destination from the register file and the source operand from #
* memory. Tag and fix both as above w/ opclass zero instructions. #
* If the source operand is byte,word,long, or single, it may be #
* in the data register file. If it's actually out in memory, use one of #
* the mem_read() routines to fetch it. If the mem_read() access returns #
* a failing value, exit through the special facc_in() routine which #
* will create an acess error exception frame from the current exception #
* frame. #
* Immediate data and regular data accesses are separated because #
* if an immediate data access fails, the resulting fault status #
* longword stacked for the access error exception must have the #
* instruction bit set. #
* #
*########################################################################
global _load_fop
_load_fop:
* 15 13 12 10 9 7 6 0
* / \ / \ / \ / \
* ---------------------------------
* | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)
* ---------------------------------
*
* bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
* cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
* beq.w op010 # handle <ea> -> fpn
* bgt.w op011 # handle fpn -> <ea>
* we're not using op011 for now...
btst #$6,EXC_CMDREG(a6)
bne.b op010
*###########################
* OPCLASS '000: reg -> reg #
*###########################
op000:
move.b 1+EXC_CMDREG(a6),d0 * fetch extension word lo
btst #$5,d0 * testing extension bits
beq.b op000_src * (bit 5 == 0) => monadic
btst #$4,d0 * (bit 5 == 1)
beq.b op000_dst * (bit 4 == 0) => dyadic
andi.w #$007f,d0 * extract extension bits {6:0}
cmpi.w #$0038,d0 * is it an fcmp (dyadic) ?
bne.b op000_src * it's an fcmp
op000_dst:
bfextu EXC_CMDREG(a6){#6:#3},d0 * extract dst field
bsr.l load_fpn2 * fetch dst fpreg into FP_DST
bsr.l set_tag_x * get dst optype tag
cmpi.b #UNNORM,d0 * is dst fpreg an UNNORM?
beq.b op000_dst_unnorm * yes
op000_dst_cont:
move.b d0,DTAG(a6) * store the dst optype tag
op000_src:
bfextu EXC_CMDREG(a6){#3:#3},d0 * extract src field
bsr.l load_fpn1 * fetch src fpreg into FP_SRC
bsr.l set_tag_x * get src optype tag
cmpi.b #UNNORM,d0 * is src fpreg an UNNORM?
beq.b op000_src_unnorm * yes
op000_src_cont:
move.b d0,STAG(a6) * store the src optype tag
rts
op000_dst_unnorm:
bsr.l unnorm_fix * fix the dst UNNORM
bra.b op000_dst_cont
op000_src_unnorm:
bsr.l unnorm_fix * fix the src UNNORM
bra.b op000_src_cont
*############################
* OPCLASS '010: <ea> -> reg #
*############################
op010:
move.w EXC_CMDREG(a6),d0 * fetch extension word
btst #$5,d0 * testing extension bits
beq.b op010_src * (bit 5 == 0) => monadic
btst #$4,d0 * (bit 5 == 1)
beq.b op010_dst * (bit 4 == 0) => dyadic
andi.w #$007f,d0 * extract extension bits {6:0}
cmpi.w #$0038,d0 * is it an fcmp (dyadic) ?
bne.b op010_src * it's an fcmp
op010_dst:
bfextu EXC_CMDREG(a6){#6:#3},d0 * extract dst field
bsr.l load_fpn2 * fetch dst fpreg ptr
bsr.l set_tag_x * get dst type tag
cmpi.b #UNNORM,d0 * is dst fpreg an UNNORM?
beq.b op010_dst_unnorm * yes
op010_dst_cont:
move.b d0,DTAG(a6) * store the dst optype tag
op010_src:
bfextu EXC_CMDREG(a6){#3:#3},d0 * extract src type field
bfextu EXC_OPWORD(a6){#10:#3},d1 * extract <ea> mode field
bne.w fetch_from_mem * src op is in memory
op010_dreg:
clr.b STAG(a6) * either NORM or ZERO
bfextu EXC_OPWORD(a6){#13:#3},d1 * extract src reg field
move.w (tbl_op010_dreg.b,pc,d0.w*2),d0 * jmp based on optype
jmp (tbl_op010_dreg.b,pc,d0.w*1) * fetch src from dreg
op010_dst_unnorm:
bsr.l unnorm_fix * fix the dst UNNORM
bra.b op010_dst_cont
.dc.w $4AFC,$8
tbl_op010_dreg:
.dc.w opd_long-tbl_op010_dreg
.dc.w opd_sgl-tbl_op010_dreg
.dc.w tbl_op010_dreg-tbl_op010_dreg
.dc.w tbl_op010_dreg-tbl_op010_dreg
.dc.w opd_word-tbl_op010_dreg
.dc.w tbl_op010_dreg-tbl_op010_dreg
.dc.w opd_byte-tbl_op010_dreg
.dc.w tbl_op010_dreg-tbl_op010_dreg
*
* LONG: can be either NORM or ZERO...
*
opd_long:
bsr.l fetch_dreg * fetch long in d0
fmove.l d0,fp0 * load a long
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w opd_long_zero * long is a ZERO
rts
opd_long_zero:
move.b #ZERO,STAG(a6) * set ZERO optype flag
rts
*
* WORD: can be either NORM or ZERO...
*
opd_word:
bsr.l fetch_dreg * fetch word in d0
fmove.w d0,fp0 * load a word
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w opd_word_zero * WORD is a ZERO
rts
opd_word_zero:
move.b #ZERO,STAG(a6) * set ZERO optype flag
rts
*
* BYTE: can be either NORM or ZERO...
*
opd_byte:
bsr.l fetch_dreg * fetch word in d0
fmove.b d0,fp0 * load a byte
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w opd_byte_zero * byte is a ZERO
rts
opd_byte_zero:
move.b #ZERO,STAG(a6) * set ZERO optype flag
rts
*
* SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
*
* separate SNANs and DENORMs so they can be loaded w/ special care.
* all others can simply be moved "in" using fmove.
*
opd_sgl:
bsr.l fetch_dreg * fetch sgl in d0
move.l d0,L_SCR1(a6)
lea L_SCR1(a6),a0 * pass: ptr to the sgl
bsr.l set_tag_s * determine sgl type
move.b d0,STAG(a6) * save the src tag
cmpi.b #SNAN,d0 * is it an SNAN?
beq.w get_sgl_snan * yes
cmpi.b #DENORM,d0 * is it a DENORM?
beq.w get_sgl_denorm * yes
fmove.s (a0),fp0 * no, so can load it regular
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
rts
*#############################################################################
*########################################################################
* fetch_from_mem(): #
* - src is out in memory. must: #
* (1) calc ea - must read AFTER you know the src type since #
* if the ea is -() or ()+, need to know # of bytes. #
* (2) read it in from either user or supervisor space #
* (3) if (b || w || l) then simply read in #
* if (s || d || x) then check for SNAN,UNNORM,DENORM #
* if (packed) then punt for now #
* INPUT: #
* %d0 : src type field #
*########################################################################
fetch_from_mem:
clr.b STAG(a6) * either NORM or ZERO
move.w (tbl_fp_type.b,pc,d0.w*2),d0 * index by src type field
jmp (tbl_fp_type.b,pc,d0.w*1)
.dc.w $4AFC,$8
tbl_fp_type:
.dc.w load_long-tbl_fp_type
.dc.w load_sgl-tbl_fp_type
.dc.w load_ext-tbl_fp_type
.dc.w load_packed-tbl_fp_type
.dc.w load_word-tbl_fp_type
.dc.w load_dbl-tbl_fp_type
.dc.w load_byte-tbl_fp_type
.dc.w tbl_fp_type-tbl_fp_type
*########################################
* load a LONG into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 4 bytes into L_SCR1 #
* (3) fmov.l into %fp0 #
*########################################
load_long:
moveq.l #$4,d0 * pass: 4 (bytes)
bsr.l _dcalc_ea * calc <ea>; <ea> in %a0
cmpi.b #immed_flg,SPCOND_FLG(a6)
beq.b load_long_immed
bsr.l _dmem_read_long * fetch src operand from memory
tst.l d1 * did dfetch fail?
bne.l facc_in_l * yes
load_long_cont:
fmove.l d0,fp0 * read into %fp0;convert to xprec
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w load_long_zero * src op is a ZERO
rts
load_long_zero:
move.b #ZERO,STAG(a6) * set optype tag to ZERO
rts
load_long_immed:
bsr.l _imem_read_long * fetch src operand immed data
tst.l d1 * did ifetch fail?
bne.l funimp_iacc * yes
bra.b load_long_cont
*########################################
* load a WORD into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 2 bytes into L_SCR1 #
* (3) fmov.w into %fp0 #
*########################################
load_word:
moveq.l #$2,d0 * pass: 2 (bytes)
bsr.l _dcalc_ea * calc <ea>; <ea> in %a0
cmpi.b #immed_flg,SPCOND_FLG(a6)
beq.b load_word_immed
bsr.l _dmem_read_word * fetch src operand from memory
tst.l d1 * did dfetch fail?
bne.l facc_in_w * yes
load_word_cont:
fmove.w d0,fp0 * read into %fp0;convert to xprec
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w load_word_zero * src op is a ZERO
rts
load_word_zero:
move.b #ZERO,STAG(a6) * set optype tag to ZERO
rts
load_word_immed:
bsr.l _imem_read_word * fetch src operand immed data
tst.l d1 * did ifetch fail?
bne.l funimp_iacc * yes
bra.b load_word_cont
*########################################
* load a BYTE into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 1 byte into L_SCR1 #
* (3) fmov.b into %fp0 #
*########################################
load_byte:
moveq.l #$1,d0 * pass: 1 (byte)
bsr.l _dcalc_ea * calc <ea>; <ea> in %a0
cmpi.b #immed_flg,SPCOND_FLG(a6)
beq.b load_byte_immed
bsr.l _dmem_read_byte * fetch src operand from memory