misc/060tsys/t28fpsp.s (3/3)
1 2 3
tst.l d1 * did dfetch fail?
bne.l facc_in_b * yes
load_byte_cont:
fmove.b d0,fp0 * read into %fp0;convert to xprec
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
fbeq.w load_byte_zero * src op is a ZERO
rts
load_byte_zero:
move.b #ZERO,STAG(a6) * set optype tag to ZERO
rts
load_byte_immed:
bsr.l _imem_read_word * fetch src operand immed data
tst.l d1 * did ifetch fail?
bne.l funimp_iacc * yes
bra.b load_byte_cont
*########################################
* load a SGL into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 4 bytes into L_SCR1 #
* (3) fmov.s into %fp0 #
*########################################
load_sgl:
moveq.l #$4,d0 * pass: 4 (bytes)
bsr.l _dcalc_ea * calc <ea>; <ea> in %a0
cmpi.b #immed_flg,SPCOND_FLG(a6)
beq.b load_sgl_immed
bsr.l _dmem_read_long * fetch src operand from memory
move.l d0,L_SCR1(a6) * store src op on stack
tst.l d1 * did dfetch fail?
bne.l facc_in_l * yes
load_sgl_cont:
lea L_SCR1(a6),a0 * pass: ptr to sgl src op
bsr.l set_tag_s * determine src type tag
move.b d0,STAG(a6) * save src optype tag on stack
cmpi.b #DENORM,d0 * is it a sgl DENORM?
beq.w get_sgl_denorm * yes
cmpi.b #SNAN,d0 * is it a sgl SNAN?
beq.w get_sgl_snan * yes
fmove.s L_SCR1(a6),fp0 * read into %fp0;convert to xprec
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
rts
load_sgl_immed:
bsr.l _imem_read_long * fetch src operand immed data
tst.l d1 * did ifetch fail?
bne.l funimp_iacc * yes
bra.b load_sgl_cont
* must convert sgl denorm format to an Xprec denorm fmt suitable for
* normalization...
* %a0 : points to sgl denorm
get_sgl_denorm:
clr.w FP_SRC_EX(a6)
bfextu (a0){#9:#23},d0 * fetch sgl hi(_mantissa)
lsl.l #$8,d0
move.l d0,FP_SRC_HI(a6) * set ext hi(_mantissa)
clr.l FP_SRC_LO(a6) * set ext lo(_mantissa)
clr.w FP_SRC_EX(a6)
btst #$7,(a0) * is sgn bit set?
beq.b sgl_dnrm_norm
bset #$7,FP_SRC_EX(a6) * set sgn of xprec value
sgl_dnrm_norm:
lea FP_SRC(a6),a0
bsr.l norm * normalize number
move.w #$3f81,d1 * xprec exp = 0x3f81
sub.w d0,d1 * exp = 0x3f81 - shft amt.
or.w d1,FP_SRC_EX(a6) * {sgn,exp}
move.b #NORM,STAG(a6) * fix src type tag
rts
* convert sgl to ext SNAN
* %a0 : points to sgl SNAN
get_sgl_snan:
move.w #$7fff,FP_SRC_EX(a6) * set exp of SNAN
bfextu (a0){#9:#23},d0
lsl.l #$8,d0 * extract and insert hi(man)
move.l d0,FP_SRC_HI(a6)
clr.l FP_SRC_LO(a6)
btst #$7,(a0) * see if sign of SNAN is set
beq.b no_sgl_snan_sgn
bset #$7,FP_SRC_EX(a6)
no_sgl_snan_sgn:
rts
*########################################
* load a DBL into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 8 bytes into L_SCR(1,2)#
* (3) fmov.d into %fp0 #
*########################################
load_dbl:
moveq.l #$8,d0 * pass: 8 (bytes)
bsr.l _dcalc_ea * calc <ea>; <ea> in %a0
cmpi.b #immed_flg,SPCOND_FLG(a6)
beq.b load_dbl_immed
lea L_SCR1(a6),a1 * pass: ptr to input dbl tmp space
moveq.l #$8,d0 * pass: # bytes to read
bsr.l _dmem_read * fetch src operand from memory
tst.l d1 * did dfetch fail?
bne.l facc_in_d * yes
load_dbl_cont:
lea L_SCR1(a6),a0 * pass: ptr to input dbl
bsr.l set_tag_d * determine src type tag
move.b d0,STAG(a6) * set src optype tag
cmpi.b #DENORM,d0 * is it a dbl DENORM?
beq.w get_dbl_denorm * yes
cmpi.b #SNAN,d0 * is it a dbl SNAN?
beq.w get_dbl_snan * yes
fmove.d L_SCR1(a6),fp0 * read into %fp0;convert to xprec
fmovem.x fp0,FP_SRC(a6) * return src op in FP_SRC
rts
load_dbl_immed:
lea L_SCR1(a6),a1 * pass: ptr to input dbl tmp space
moveq.l #$8,d0 * pass: # bytes to read
bsr.l _imem_read * fetch src operand from memory
tst.l d1 * did ifetch fail?
bne.l funimp_iacc * yes
bra.b load_dbl_cont
* must convert dbl denorm format to an Xprec denorm fmt suitable for
* normalization...
* %a0 : loc. of dbl denorm
get_dbl_denorm:
clr.w FP_SRC_EX(a6)
bfextu (a0){#12:#31},d0 * fetch hi(_mantissa)
move.l d0,FP_SRC_HI(a6)
bfextu 4(a0){#11:#21},d0 * fetch lo(_mantissa)
moveq.l #$b,d1
lsl.l d1,d0
move.l d0,FP_SRC_LO(a6)
btst #$7,(a0) * is sgn bit set?
beq.b dbl_dnrm_norm
bset #$7,FP_SRC_EX(a6) * set sgn of xprec value
dbl_dnrm_norm:
lea FP_SRC(a6),a0
bsr.l norm * normalize number
move.w #$3c01,d1 * xprec exp = 0x3c01
sub.w d0,d1 * exp = 0x3c01 - shft amt.
or.w d1,FP_SRC_EX(a6) * {sgn,exp}
move.b #NORM,STAG(a6) * fix src type tag
rts
* convert dbl to ext SNAN
* %a0 : points to dbl SNAN
get_dbl_snan:
move.w #$7fff,FP_SRC_EX(a6) * set exp of SNAN
bfextu (a0){#12:#31},d0 * fetch hi(_mantissa)
move.l d0,FP_SRC_HI(a6)
bfextu 4(a0){#11:#21},d0 * fetch lo(_mantissa)
moveq.l #$b,d1
lsl.l d1,d0
move.l d0,FP_SRC_LO(a6)
btst #$7,(a0) * see if sign of SNAN is set
beq.b no_dbl_snan_sgn
bset #$7,FP_SRC_EX(a6)
no_dbl_snan_sgn:
rts
*################################################
* load a Xprec into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 12 bytes into L_SCR(1,2) #
* (3) fmov.x into %fp0 #
*################################################
load_ext:
moveq.l #$c,d0 * pass: 12 (bytes)
bsr.l _dcalc_ea * calc <ea>
lea FP_SRC(a6),a1 * pass: ptr to input ext tmp space
moveq.l #$c,d0 * pass: # of bytes to read
bsr.l _dmem_read * fetch src operand from memory
tst.l d1 * did dfetch fail?
bne.l facc_in_x * yes
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * determine src type tag
cmpi.b #UNNORM,d0 * is the src op an UNNORM?
beq.b load_ext_unnorm * yes
move.b d0,STAG(a6) * store the src optype tag
rts
load_ext_unnorm:
bsr.l unnorm_fix * fix the src UNNORM
move.b d0,STAG(a6) * store the src optype tag
rts
*################################################
* load a packed into %fp0: #
* -number can't fault #
* (1) calc ea #
* (2) read 12 bytes into L_SCR(1,2,3) #
* (3) fmov.x into %fp0 #
*################################################
load_packed:
bsr.l get_packed
lea FP_SRC(a6),a0 * pass ptr to src op
bsr.l set_tag_x * determine src type tag
cmpi.b #UNNORM,d0 * is the src op an UNNORM ZERO?
beq.b load_packed_unnorm * yes
move.b d0,STAG(a6) * store the src optype tag
rts
load_packed_unnorm:
bsr.l unnorm_fix * fix the UNNORM ZERO
move.b d0,STAG(a6) * store the src optype tag
rts
*########################################################################
* XDEF **************************************************************** #
* fout(): move from fp register to memory or data register #
* #
* XREF **************************************************************** #
* _round() - needed to create EXOP for sgl/dbl precision #
* norm() - needed to create EXOP for extended precision #
* ovf_res() - create default overflow result for sgl/dbl precision#
* unf_res() - create default underflow result for sgl/dbl prec. #
* dst_dbl() - create rounded dbl precision result. #
* dst_sgl() - create rounded sgl precision result. #
* fetch_dreg() - fetch dynamic k-factor reg for packed. #
* bindec() - convert FP binary number to packed number. #
* _mem_write() - write data to memory. #
* _mem_write2() - write data to memory unless supv mode -(a7) exc.#
* _dmem_write_{byte,word,long}() - write data to memory. #
* store_dreg_{b,w,l}() - store data to data register file. #
* facc_out_{b,w,l,d,x}() - data access error occurred. #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision source operand #
* d0 = round prec,mode #
* #
* OUTPUT ************************************************************** #
* fp0 : intermediate underflow or overflow result if #
* OVFL/UNFL occurred for a sgl or dbl operand #
* #
* ALGORITHM *********************************************************** #
* This routine is accessed by many handlers that need to do an #
* opclass three move of an operand out to memory. #
* Decode an fmove out (opclass 3) instruction to determine if #
* it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
* register or memory. The algorithm uses a standard "fmove" to create #
* the rounded result. Also, since exceptions are disabled, this also #
* create the correct OPERR default result if appropriate. #
* For sgl or dbl precision, overflow or underflow can occur. If #
* either occurs and is enabled, the EXOP. #
* For extended precision, the stacked <ea> must be fixed along #
* w/ the address index register as appropriate w/ _calc_ea_fout(). If #
* the source is a denorm and if underflow is enabled, an EXOP must be #
* created. #
* For packed, the k-factor must be fetched from the instruction #
* word or a data register. The <ea> must be fixed as w/ extended #
* precision. Then, bindec() is called to create the appropriate #
* packed result. #
* If at any time an access error is flagged by one of the move- #
* to-memory routines, then a special exit must be made so that the #
* access error can be handled properly. #
* #
*########################################################################
global fout
fout:
bfextu EXC_CMDREG(a6){#3:#3},d1 * extract dst fmt
move.w (tbl_fout.b,pc,d1.w*2),a1 * use as index
jmp (tbl_fout.b,pc,a1.l) * jump to routine
.dc.w $4AFC,$8
tbl_fout:
.dc.w fout_long-tbl_fout
.dc.w fout_sgl-tbl_fout
.dc.w fout_ext-tbl_fout
.dc.w fout_pack-tbl_fout
.dc.w fout_word-tbl_fout
.dc.w fout_dbl-tbl_fout
.dc.w fout_byte-tbl_fout
.dc.w fout_pack-tbl_fout
*################################################################
* fmove.b out ###################################################
*################################################################
* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_byte:
tst.b STAG(a6) * is operand normalized?
bne.b fout_byte_denorm * no
fmovem.x SRC.w(a0),fp0 * load value
fout_byte_norm:
fmove.l d0,fpcr * insert rnd prec,mode
fmove.b fp0,d0 * exec move out w/ correct rnd mode
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * fetch FPSR
or.w d1,2+USER_FPSR(a6) * save new exc,accrued bits
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_byte_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_byte * write byte
tst.l d1 * did dstore fail?
bne.l facc_out_b * yes
rts
fout_byte_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_b
rts
fout_byte_denorm:
move.l SRC_EX.w(a0),d1
andi.l #$80000000,d1 * keep DENORM sign
ori.l #$00800000,d1 * make smallest sgl
fmove.s d1,fp0
bra.b fout_byte_norm
*################################################################
* fmove.w out ###################################################
*################################################################
* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_word:
tst.b STAG(a6) * is operand normalized?
bne.b fout_word_denorm * no
fmovem.x SRC.w(a0),fp0 * load value
fout_word_norm:
fmove.l d0,fpcr * insert rnd prec:mode
fmove.w fp0,d0 * exec move out w/ correct rnd mode
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * fetch FPSR
or.w d1,2+USER_FPSR(a6) * save new exc,accrued bits
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_word_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_word * write word
tst.l d1 * did dstore fail?
bne.l facc_out_w * yes
rts
fout_word_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_w
rts
fout_word_denorm:
move.l SRC_EX.w(a0),d1
andi.l #$80000000,d1 * keep DENORM sign
ori.l #$00800000,d1 * make smallest sgl
fmove.s d1,fp0
bra.b fout_word_norm
*################################################################
* fmove.l out ###################################################
*################################################################
* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
fout_long:
tst.b STAG(a6) * is operand normalized?
bne.b fout_long_denorm * no
fmovem.x SRC.w(a0),fp0 * load value
fout_long_norm:
fmove.l d0,fpcr * insert rnd prec:mode
fmove.l fp0,d0 * exec move out w/ correct rnd mode
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * fetch FPSR
or.w d1,2+USER_FPSR(a6) * save new exc,accrued bits
fout_long_write:
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_long_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_long * write long
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
rts
fout_long_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_l
rts
fout_long_denorm:
move.l SRC_EX.w(a0),d1
andi.l #$80000000,d1 * keep DENORM sign
ori.l #$00800000,d1 * make smallest sgl
fmove.s d1,fp0
bra.b fout_long_norm
*################################################################
* fmove.x out ###################################################
*################################################################
* Only "Unimplemented Data Type" exceptions enter here. The operand
* is either a DENORM or a NORM.
* The DENORM causes an Underflow exception.
fout_ext:
* we copy the extended precision result to FP_SCR0 so that the reserved
* 16-bit field gets zeroed. we do this since we promise not to disturb
* what's at SRC(a0).
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
clr.w 2+FP_SCR0_EX(a6) * clear reserved field
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
fmovem.x SRC.w(a0),fp0 * return result
bsr.l _calc_ea_fout * fix stacked <ea>
move.l a0,a1 * pass: dst addr
lea FP_SCR0(a6),a0 * pass: src addr
moveq.l #$c,d0 * pass: opsize is 12 bytes
* we must not yet write the extended precision data to the stack
* in the pre-decrement case from supervisor mode or else we'll corrupt
* the stack frame. so, leave it in FP_SRC for now and deal with it later...
cmpi.b #mda7_flg,SPCOND_FLG(a6)
beq.b fout_ext_a7
bsr.l _dmem_write * write ext prec number to memory
tst.l d1 * did dstore fail?
bne.w fout_ext_err * yes
tst.b STAG(a6) * is operand normalized?
bne.b fout_ext_denorm * no
rts
* the number is a DENORM. must set the underflow exception bit
fout_ext_denorm:
bset #unfl_bit,FPSR_EXCEPT(a6) * set underflow exc bit
move.b FPCR_ENABLE(a6),d0
andi.b #$0a,d0 * is UNFL or INEX enabled?
bne.b fout_ext_exc * yes
rts
* we don't want to do the write if the exception occurred in supervisor mode
* so _mem_write2() handles this for us.
fout_ext_a7:
bsr.l _mem_write2 * write ext prec number to memory
tst.l d1 * did dstore fail?
bne.w fout_ext_err * yes
tst.b STAG(a6) * is operand normalized?
bne.b fout_ext_denorm * no
rts
fout_ext_exc:
lea FP_SCR0(a6),a0
bsr.l norm * normalize the mantissa
neg.w d0 * new exp = -(shft amt)
andi.w #$7fff,d0
andi.w #$8000,FP_SCR0_EX(a6) * keep only old sign
or.w d0,FP_SCR0_EX(a6) * insert new exponent
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
fout_ext_err:
move.l EXC_A6(a6),(a6) * fix stacked a6
bra.l facc_out_x
*########################################################################
* fmove.s out ###########################################################
*########################################################################
fout_sgl:
andi.b #$30,d0 * clear rnd prec
ori.b #s_mode*$10,d0 * insert sgl prec
move.l d0,L_SCR3(a6) * save rnd prec,mode on stack
*
* operand is a normalized number. first, we check to see if the move out
* would cause either an underflow or overflow. these cases are handled
* separately. otherwise, set the FPCR to the proper rounding mode and
* execute the move.
*
move.w SRC_EX.w(a0),d0 * extract exponent
andi.w #$7fff,d0 * strip sign
cmpi.w #SGL_HI,d0 * will operand overflow?
bgt.w fout_sgl_ovfl * yes; go handle OVFL
beq.w fout_sgl_may_ovfl * maybe; go handle possible OVFL
cmpi.w #SGL_LO,d0 * will operand underflow?
blt.w fout_sgl_unfl * yes; go handle underflow
*
* NORMs(in range) can be stored out by a simple "fmov.s"
* Unnormalized inputs can come through this point.
*
fout_sgl_exg:
fmovem.x SRC.w(a0),fp0 * fetch fop from stack
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmove.s fp0,d0 * store does convert and round
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d1 * save FPSR
or.w d1,2+USER_FPSR(a6) * set possible inex2/ainex
fout_sgl_exg_write:
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_sgl_exg_write_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_long * write long
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
rts
fout_sgl_exg_write_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_l
rts
*
* here, we know that the operand would UNFL if moved out to single prec,
* so, denorm and round and then use generic store single routine to
* write the value to memory.
*
fout_sgl_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set UNFL
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.l a0,-(sp)
clr.l d0 * pass: S.F. = 0
cmpi.b #DENORM,STAG(a6) * fetch src optype tag
bne.b fout_sgl_unfl_cont * let DENORMs fall through
lea FP_SCR0(a6),a0
bsr.l norm * normalize the DENORM
fout_sgl_unfl_cont:
lea FP_SCR0(a6),a0 * pass: ptr to operand
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calc default underflow result
lea FP_SCR0(a6),a0 * pass: ptr to fop
bsr.l dst_sgl * convert to single prec
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_sgl_unfl_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_long * write long
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
bra.b fout_sgl_unfl_chkexc
fout_sgl_unfl_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_l
fout_sgl_unfl_chkexc:
move.b FPCR_ENABLE(a6),d1
andi.b #$0a,d1 * is UNFL or INEX enabled?
bne.w fout_sd_exc_unfl * yes
addq.l #$4,sp
rts
*
* it's definitely an overflow so call ovf_res to get the correct answer
*
fout_sgl_ovfl:
tst.b 3+SRC_HI(a0) * is result inexact?
bne.b fout_sgl_ovfl_inex2
tst.l SRC_LO(a0) * is result inexact?
bne.b fout_sgl_ovfl_inex2
ori.w #ovfl_inx_mask,2+USER_FPSR(a6) * set ovfl/aovfl/ainex
bra.b fout_sgl_ovfl_cont
fout_sgl_ovfl_inex2:
ori.w #ovfinx_mask,2+USER_FPSR(a6) * set ovfl/aovfl/ainex/inex2
fout_sgl_ovfl_cont:
move.l a0,-(sp)
* call ovf_res() w/ sgl prec and the correct rnd mode to create the default
* overflow result. DON'T save the returned ccodes from ovf_res() since
* fmove out doesn't alter them.
tst.b SRC_EX.w(a0) * is operand negative?
smi d1 * set if so
move.l L_SCR3(a6),d0 * pass: sgl prec,rnd mode
bsr.l ovf_res * calc OVFL result
fmovem.x (a0),fp0 * load default overflow result
fmove.s fp0,d0 * store to single
move.b 1+EXC_OPWORD(a6),d1 * extract dst mode
andi.b #$38,d1 * is mode == 0? (Dreg dst)
beq.b fout_sgl_ovfl_dn * must save to integer regfile
move.l EXC_EA(a6),a0 * stacked <ea> is correct
bsr.l _dmem_write_long * write long
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
bra.b fout_sgl_ovfl_chkexc
fout_sgl_ovfl_dn:
move.b 1+EXC_OPWORD(a6),d1 * extract Dn
andi.w #$7,d1
bsr.l store_dreg_l
fout_sgl_ovfl_chkexc:
move.b FPCR_ENABLE(a6),d1
andi.b #$0a,d1 * is UNFL or INEX enabled?
bne.w fout_sd_exc_ovfl * yes
addq.l #$4,sp
rts
*
* move out MAY overflow:
* (1) force the exp to 0x3fff
* (2) do a move w/ appropriate rnd mode
* (3) if exp still equals zero, then insert original exponent
* for the correct result.
* if exp now equals one, then it overflowed so call ovf_res.
*
fout_sgl_may_ovfl:
move.w SRC_EX.w(a0),d1 * fetch current sign
andi.w #$8000,d1 * keep it,clear exp
ori.w #$3fff,d1 * insert exp = 0
move.w d1,FP_SCR0_EX(a6) * insert scaled exp
move.l SRC_HI(a0),FP_SCR0_HI(a6) * copy hi(man)
move.l SRC_LO(a0),FP_SCR0_LO(a6) * copy lo(man)
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.x FP_SCR0(a6),fp0 * force fop to be rounded
fmove.l #$0,fpcr * clear FPCR
fabs.x fp0 * need absolute value
fcmp.b #$2,fp0 * did exponent increase?
fblt.w fout_sgl_exg * no; go finish NORM
bra.w fout_sgl_ovfl * yes; go handle overflow
*###############
fout_sd_exc_unfl:
move.l (sp)+,a0
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
cmpi.b #DENORM,STAG(a6) * was src a DENORM?
bne.b fout_sd_exc_cont * no
lea FP_SCR0(a6),a0
bsr.l norm
neg.l d0
andi.w #$7fff,d0
bfins d0,FP_SCR0_EX(a6){#1:#15}
bra.b fout_sd_exc_cont
fout_sd_exc:
fout_sd_exc_ovfl:
move.l (sp)+,a0 * restore a0
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
fout_sd_exc_cont:
bclr #$7,FP_SCR0_EX(a6) * clear sign bit
sne.b 2+FP_SCR0_EX(a6) * set internal sign bit
lea FP_SCR0(a6),a0 * pass: ptr to DENORM
move.b 3+L_SCR3(a6),d1
lsr.b #$4,d1
andi.w #$0c,d1
swap d1
move.b 3+L_SCR3(a6),d1
lsr.b #$4,d1
andi.w #$03,d1
clr.l d0 * pass: zero g,r,s
bsr.l _round * round the DENORM
tst.b 2+FP_SCR0_EX(a6) * is EXOP negative?
beq.b fout_sd_exc_done * no
bset #$7,FP_SCR0_EX(a6) * yes
fout_sd_exc_done:
fmovem.x FP_SCR0(a6),fp1 * return EXOP in fp1
rts
*################################################################
* fmove.d out ###################################################
*################################################################
fout_dbl:
andi.b #$30,d0 * clear rnd prec
ori.b #d_mode*$10,d0 * insert dbl prec
move.l d0,L_SCR3(a6) * save rnd prec,mode on stack
*
* operand is a normalized number. first, we check to see if the move out
* would cause either an underflow or overflow. these cases are handled
* separately. otherwise, set the FPCR to the proper rounding mode and
* execute the move.
*
move.w SRC_EX.w(a0),d0 * extract exponent
andi.w #$7fff,d0 * strip sign
cmpi.w #DBL_HI,d0 * will operand overflow?
bgt.w fout_dbl_ovfl * yes; go handle OVFL
beq.w fout_dbl_may_ovfl * maybe; go handle possible OVFL
cmpi.w #DBL_LO,d0 * will operand underflow?
blt.w fout_dbl_unfl * yes; go handle underflow
*
* NORMs(in range) can be stored out by a simple "fmov.d"
* Unnormalized inputs can come through this point.
*
fout_dbl_exg:
fmovem.x SRC.w(a0),fp0 * fetch fop from stack
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.l #$0,fpsr * clear FPSR
fmove.d fp0,L_SCR1(a6) * store does convert and round
fmove.l #$0,fpcr * clear FPCR
fmove.l fpsr,d0 * save FPSR
or.w d0,2+USER_FPSR(a6) * set possible inex2/ainex
move.l EXC_EA(a6),a1 * pass: dst addr
lea L_SCR1(a6),a0 * pass: src addr
moveq.l #$8,d0 * pass: opsize is 8 bytes
bsr.l _dmem_write * store dbl fop to memory
tst.l d1 * did dstore fail?
bne.l facc_out_d * yes
rts * no; so we're finished
*
* here, we know that the operand would UNFL if moved out to double prec,
* so, denorm and round and then use generic store double routine to
* write the value to memory.
*
fout_dbl_unfl:
bset #unfl_bit,FPSR_EXCEPT(a6) * set UNFL
move.w SRC_EX.w(a0),FP_SCR0_EX(a6)
move.l SRC_HI(a0),FP_SCR0_HI(a6)
move.l SRC_LO(a0),FP_SCR0_LO(a6)
move.l a0,-(sp)
clr.l d0 * pass: S.F. = 0
cmpi.b #DENORM,STAG(a6) * fetch src optype tag
bne.b fout_dbl_unfl_cont * let DENORMs fall through
lea FP_SCR0(a6),a0
bsr.l norm * normalize the DENORM
fout_dbl_unfl_cont:
lea FP_SCR0(a6),a0 * pass: ptr to operand
move.l L_SCR3(a6),d1 * pass: rnd prec,mode
bsr.l unf_res * calc default underflow result
lea FP_SCR0(a6),a0 * pass: ptr to fop
bsr.l dst_dbl * convert to single prec
move.l d0,L_SCR1(a6)
move.l d1,L_SCR2(a6)
move.l EXC_EA(a6),a1 * pass: dst addr
lea L_SCR1(a6),a0 * pass: src addr
moveq.l #$8,d0 * pass: opsize is 8 bytes
bsr.l _dmem_write * store dbl fop to memory
tst.l d1 * did dstore fail?
bne.l facc_out_d * yes
move.b FPCR_ENABLE(a6),d1
andi.b #$0a,d1 * is UNFL or INEX enabled?
bne.w fout_sd_exc_unfl * yes
addq.l #$4,sp
rts
*
* it's definitely an overflow so call ovf_res to get the correct answer
*
fout_dbl_ovfl:
move.w 2+SRC_LO(a0),d0
andi.w #$7ff,d0
bne.b fout_dbl_ovfl_inex2
ori.w #ovfl_inx_mask,2+USER_FPSR(a6) * set ovfl/aovfl/ainex
bra.b fout_dbl_ovfl_cont
fout_dbl_ovfl_inex2:
ori.w #ovfinx_mask,2+USER_FPSR(a6) * set ovfl/aovfl/ainex/inex2
fout_dbl_ovfl_cont:
move.l a0,-(sp)
* call ovf_res() w/ dbl prec and the correct rnd mode to create the default
* overflow result. DON'T save the returned ccodes from ovf_res() since
* fmove out doesn't alter them.
tst.b SRC_EX.w(a0) * is operand negative?
smi d1 * set if so
move.l L_SCR3(a6),d0 * pass: dbl prec,rnd mode
bsr.l ovf_res * calc OVFL result
fmovem.x (a0),fp0 * load default overflow result
fmove.d fp0,L_SCR1(a6) * store to double
move.l EXC_EA(a6),a1 * pass: dst addr
lea L_SCR1(a6),a0 * pass: src addr
moveq.l #$8,d0 * pass: opsize is 8 bytes
bsr.l _dmem_write * store dbl fop to memory
tst.l d1 * did dstore fail?
bne.l facc_out_d * yes
move.b FPCR_ENABLE(a6),d1
andi.b #$0a,d1 * is UNFL or INEX enabled?
bne.w fout_sd_exc_ovfl * yes
addq.l #$4,sp
rts
*
* move out MAY overflow:
* (1) force the exp to 0x3fff
* (2) do a move w/ appropriate rnd mode
* (3) if exp still equals zero, then insert original exponent
* for the correct result.
* if exp now equals one, then it overflowed so call ovf_res.
*
fout_dbl_may_ovfl:
move.w SRC_EX.w(a0),d1 * fetch current sign
andi.w #$8000,d1 * keep it,clear exp
ori.w #$3fff,d1 * insert exp = 0
move.w d1,FP_SCR0_EX(a6) * insert scaled exp
move.l SRC_HI(a0),FP_SCR0_HI(a6) * copy hi(man)
move.l SRC_LO(a0),FP_SCR0_LO(a6) * copy lo(man)
fmove.l L_SCR3(a6),fpcr * set FPCR
fmove.x FP_SCR0(a6),fp0 * force fop to be rounded
fmove.l #$0,fpcr * clear FPCR
fabs.x fp0 * need absolute value
fcmp.b #$2,fp0 * did exponent increase?
fblt.w fout_dbl_exg * no; go finish NORM
bra.w fout_dbl_ovfl * yes; go handle overflow
*########################################################################
* XDEF **************************************************************** #
* dst_dbl(): create double precision value from extended prec. #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = pointer to source operand in extended precision #
* #
* OUTPUT ************************************************************** #
* d0 = hi(double precision result) #
* d1 = lo(double precision result) #
* #
* ALGORITHM *********************************************************** #
* #
* Changes extended precision to double precision. #
* Note: no attempt is made to round the extended value to double. #
* dbl_sign = ext_sign #
* dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
* get rid of ext integer bit #
* dbl_mant = ext_mant{62:12} #
* #
* --------------- --------------- --------------- #
* extended -> |s| exp | |1| ms mant | | ls mant | #
* --------------- --------------- --------------- #
* 95 64 63 62 32 31 11 0 #
* | | #
* | | #
* | | #
* v v #
* --------------- --------------- #
* double -> |s|exp| mant | | mant | #
* --------------- --------------- #
* 63 51 32 31 0 #
* #
*########################################################################
dst_dbl:
clr.l d0 * clear d0
move.w FTEMP_EX.w(a0),d0 * get exponent
subi.w #EXT_BIAS,d0 * subtract extended precision bias
addi.w #DBL_BIAS,d0 * add double precision bias
tst.b FTEMP_HI(a0) * is number a denorm?
bmi.b dst_get_dupper * no
subq.w #$1,d0 * yes; denorm bias = DBL_BIAS - 1
dst_get_dupper:
swap d0 * d0 now in upper word
lsl.l #$4,d0 * d0 in proper place for dbl prec exp
tst.b FTEMP_EX.w(a0) * test sign
bpl.b dst_get_dman * if postive, go process mantissa
bset #$1f,d0 * if negative, set sign
dst_get_dman:
move.l FTEMP_HI(a0),d1 * get ms mantissa
bfextu d1{#1:#20},d1 * get upper 20 bits of ms
or.l d1,d0 * put these bits in ms word of double
move.l d0,L_SCR1(a6) * put the new exp back on the stack
move.l FTEMP_HI(a0),d1 * get ms mantissa
moveq.l #21,d0 * load shift count
lsl.l d0,d1 * put lower 11 bits in upper bits
move.l d1,L_SCR2(a6) * build lower lword in memory
move.l FTEMP_LO(a0),d1 * get ls mantissa
bfextu d1{#0:#21},d0 * get ls 21 bits of double
move.l L_SCR2(a6),d1
or.l d0,d1 * put them in double result
move.l L_SCR1(a6),d0
rts
*########################################################################
* XDEF **************************************************************** #
* dst_sgl(): create single precision value from extended prec #
* #
* XREF **************************************************************** #
* #
* INPUT *************************************************************** #
* a0 = pointer to source operand in extended precision #
* #
* OUTPUT ************************************************************** #
* d0 = single precision result #
* #
* ALGORITHM *********************************************************** #
* #
* Changes extended precision to single precision. #
* sgl_sign = ext_sign #
* sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
* get rid of ext integer bit #
* sgl_mant = ext_mant{62:12} #
* #
* --------------- --------------- --------------- #
* extended -> |s| exp | |1| ms mant | | ls mant | #
* --------------- --------------- --------------- #
* 95 64 63 62 40 32 31 12 0 #
* | | #
* | | #
* | | #
* v v #
* --------------- #
* single -> |s|exp| mant | #
* --------------- #
* 31 22 0 #
* #
*########################################################################
dst_sgl:
clr.l d0
move.w FTEMP_EX.w(a0),d0 * get exponent
subi.w #EXT_BIAS,d0 * subtract extended precision bias
addi.w #SGL_BIAS,d0 * add single precision bias
tst.b FTEMP_HI(a0) * is number a denorm?
bmi.b dst_get_supper * no
subq.w #$1,d0 * yes; denorm bias = SGL_BIAS - 1
dst_get_supper:
swap d0 * put exp in upper word of d0
lsl.l #$7,d0 * shift it into single exp bits
tst.b FTEMP_EX.w(a0) * test sign
bpl.b dst_get_sman * if positive, continue
bset #$1f,d0 * if negative, put in sign first
dst_get_sman:
move.l FTEMP_HI(a0),d1 * get ms mantissa
andi.l #$7fffff00,d1 * get upper 23 bits of ms
lsr.l #$8,d1 * and put them flush right
or.l d1,d0 * put these bits in ms word of single
rts
*#############################################################################
fout_pack:
bsr.l _calc_ea_fout * fetch the <ea>
move.l a0,-(sp)
move.b STAG(a6),d0 * fetch input type
bne.w fout_pack_not_norm * input is not NORM
fout_pack_norm:
btst #$4,EXC_CMDREG(a6) * static or dynamic?
beq.b fout_pack_s * static
fout_pack_d:
move.b 1+EXC_CMDREG(a6),d1 * fetch dynamic reg
lsr.b #$4,d1
andi.w #$7,d1
bsr.l fetch_dreg * fetch Dn w/ k-factor
bra.b fout_pack_type
fout_pack_s:
move.b 1+EXC_CMDREG(a6),d0 * fetch static field
fout_pack_type:
bfexts d0{#25:#7},d0 * extract k-factor
move.l d0,-(sp)
lea FP_SRC(a6),a0 * pass: ptr to input
* bindec is currently scrambling FP_SRC for denorm inputs.
* we'll have to change this, but for now, tough luck!!!
bsr.l bindec * convert xprec to packed
* andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
andi.l #$cffff00f,FP_SCR0(a6) * clear unused fields
move.l (sp)+,d0
tst.b 3+FP_SCR0_EX(a6)
bne.b fout_pack_set
tst.l FP_SCR0_HI(a6)
bne.b fout_pack_set
tst.l FP_SCR0_LO(a6)
bne.b fout_pack_set
* add the extra condition that only if the k-factor was zero, too, should
* we zero the exponent
tst.l d0
bne.b fout_pack_set
* "mantissa" is all zero which means that the answer is zero. but, the '040
* algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
* if the mantissa is zero, I will zero the exponent, too.
* the question now is whether the exponents sign bit is allowed to be non-zero
* for a zero, also...
andi.w #$f000,FP_SCR0(a6)
fout_pack_set:
lea FP_SCR0(a6),a0 * pass: src addr
fout_pack_write:
move.l (sp)+,a1 * pass: dst addr
moveq.l #$c,d0 * pass: opsize is 12 bytes
cmpi.b #mda7_flg,SPCOND_FLG(a6)
beq.b fout_pack_a7
bsr.l _dmem_write * write ext prec number to memory
tst.l d1 * did dstore fail?
bne.w fout_ext_err * yes
rts
* we don't want to do the write if the exception occurred in supervisor mode
* so _mem_write2() handles this for us.
fout_pack_a7:
bsr.l _mem_write2 * write ext prec number to memory
tst.l d1 * did dstore fail?
bne.w fout_ext_err * yes
rts
fout_pack_not_norm:
cmpi.b #DENORM,d0 * is it a DENORM?
beq.w fout_pack_norm * yes
lea FP_SRC(a6),a0
clr.w 2+FP_SRC_EX(a6)
cmpi.b #SNAN,d0 * is it an SNAN?
beq.b fout_pack_snan * yes
bra.b fout_pack_write * no
fout_pack_snan:
ori.w #snaniop2_mask,FPSR_EXCEPT(a6) * set SNAN/AIOP
bset #$6,FP_SRC_HI(a6) * set snan bit
bra.b fout_pack_write
*########################################################################
* XDEF **************************************************************** #
* fetch_dreg(): fetch register according to index in d1 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d1 = index of register to fetch from #
* #
* OUTPUT ************************************************************** #
* d0 = value of register fetched #
* #
* ALGORITHM *********************************************************** #
* According to the index value in d1 which can range from zero #
* to fifteen, load the corresponding register file value (where #
* address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
* stack. The rest should still be in their original places. #
* #
*########################################################################
* this routine leaves d1 intact for subsequent store_dreg calls.
global fetch_dreg
fetch_dreg:
move.w (tbl_fdreg.b,pc,d1.w*2),d0
jmp (tbl_fdreg.b,pc,d0.w*1)
tbl_fdreg:
.dc.w fdreg0-tbl_fdreg
.dc.w fdreg1-tbl_fdreg
.dc.w fdreg2-tbl_fdreg
.dc.w fdreg3-tbl_fdreg
.dc.w fdreg4-tbl_fdreg
.dc.w fdreg5-tbl_fdreg
.dc.w fdreg6-tbl_fdreg
.dc.w fdreg7-tbl_fdreg
.dc.w fdreg8-tbl_fdreg
.dc.w fdreg9-tbl_fdreg
.dc.w fdrega-tbl_fdreg
.dc.w fdregb-tbl_fdreg
.dc.w fdregc-tbl_fdreg
.dc.w fdregd-tbl_fdreg
.dc.w fdrege-tbl_fdreg
.dc.w fdregf-tbl_fdreg
fdreg0:
move.l EXC_DREGS+$0(a6),d0
rts
fdreg1:
move.l EXC_DREGS+$4(a6),d0
rts
fdreg2:
move.l d2,d0
rts
fdreg3:
move.l d3,d0
rts
fdreg4:
move.l d4,d0
rts
fdreg5:
move.l d5,d0
rts
fdreg6:
move.l d6,d0
rts
fdreg7:
move.l d7,d0
rts
fdreg8:
move.l EXC_DREGS+$8(a6),d0
rts
fdreg9:
move.l EXC_DREGS+$c(a6),d0
rts
fdrega:
move.l a2,d0
rts
fdregb:
move.l a3,d0
rts
fdregc:
move.l a4,d0
rts
fdregd:
move.l a5,d0
rts
fdrege:
move.l (a6),d0
rts
fdregf:
move.l EXC_A7(a6),d0
rts
*########################################################################
* XDEF **************************************************************** #
* store_dreg_l(): store longword to data register specified by d1 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = longowrd value to store #
* d1 = index of register to fetch from #
* #
* OUTPUT ************************************************************** #
* (data register is updated) #
* #
* ALGORITHM *********************************************************** #
* According to the index value in d1, store the longword value #
* in d0 to the corresponding data register. D0/D1 are on the stack #
* while the rest are in their initial places. #
* #
*########################################################################
global store_dreg_l
store_dreg_l:
move.w (tbl_sdregl.b,pc,d1.w*2),d1
jmp (tbl_sdregl.b,pc,d1.w*1)
tbl_sdregl:
.dc.w sdregl0-tbl_sdregl
.dc.w sdregl1-tbl_sdregl
.dc.w sdregl2-tbl_sdregl
.dc.w sdregl3-tbl_sdregl
.dc.w sdregl4-tbl_sdregl
.dc.w sdregl5-tbl_sdregl
.dc.w sdregl6-tbl_sdregl
.dc.w sdregl7-tbl_sdregl
sdregl0:
move.l d0,EXC_DREGS+$0(a6)
rts
sdregl1:
move.l d0,EXC_DREGS+$4(a6)
rts
sdregl2:
move.l d0,d2
rts
sdregl3:
move.l d0,d3
rts
sdregl4:
move.l d0,d4
rts
sdregl5:
move.l d0,d5
rts
sdregl6:
move.l d0,d6
rts
sdregl7:
move.l d0,d7
rts
*########################################################################
* XDEF **************************************************************** #
* store_dreg_w(): store word to data register specified by d1 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = word value to store #
* d1 = index of register to fetch from #
* #
* OUTPUT ************************************************************** #
* (data register is updated) #
* #
* ALGORITHM *********************************************************** #
* According to the index value in d1, store the word value #
* in d0 to the corresponding data register. D0/D1 are on the stack #
* while the rest are in their initial places. #
* #
*########################################################################
global store_dreg_w
store_dreg_w:
move.w (tbl_sdregw.b,pc,d1.w*2),d1
jmp (tbl_sdregw.b,pc,d1.w*1)
tbl_sdregw:
.dc.w sdregw0-tbl_sdregw
.dc.w sdregw1-tbl_sdregw
.dc.w sdregw2-tbl_sdregw
.dc.w sdregw3-tbl_sdregw
.dc.w sdregw4-tbl_sdregw
.dc.w sdregw5-tbl_sdregw
.dc.w sdregw6-tbl_sdregw
.dc.w sdregw7-tbl_sdregw
sdregw0:
move.w d0,2+EXC_DREGS+$0(a6)
rts
sdregw1:
move.w d0,2+EXC_DREGS+$4(a6)
rts
sdregw2:
move.w d0,d2
rts
sdregw3:
move.w d0,d3
rts
sdregw4:
move.w d0,d4
rts
sdregw5:
move.w d0,d5
rts
sdregw6:
move.w d0,d6
rts
sdregw7:
move.w d0,d7
rts
*########################################################################
* XDEF **************************************************************** #
* store_dreg_b(): store byte to data register specified by d1 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = byte value to store #
* d1 = index of register to fetch from #
* #
* OUTPUT ************************************************************** #
* (data register is updated) #
* #
* ALGORITHM *********************************************************** #
* According to the index value in d1, store the byte value #
* in d0 to the corresponding data register. D0/D1 are on the stack #
* while the rest are in their initial places. #
* #
*########################################################################
global store_dreg_b
store_dreg_b:
move.w (tbl_sdregb.b,pc,d1.w*2),d1
jmp (tbl_sdregb.b,pc,d1.w*1)
tbl_sdregb:
.dc.w sdregb0-tbl_sdregb
.dc.w sdregb1-tbl_sdregb
.dc.w sdregb2-tbl_sdregb
.dc.w sdregb3-tbl_sdregb
.dc.w sdregb4-tbl_sdregb
.dc.w sdregb5-tbl_sdregb
.dc.w sdregb6-tbl_sdregb
.dc.w sdregb7-tbl_sdregb
sdregb0:
move.b d0,3+EXC_DREGS+$0(a6)
rts
sdregb1:
move.b d0,3+EXC_DREGS+$4(a6)
rts
sdregb2:
move.b d0,d2
rts
sdregb3:
move.b d0,d3
rts
sdregb4:
move.b d0,d4
rts
sdregb5:
move.b d0,d5
rts
sdregb6:
move.b d0,d6
rts
sdregb7:
move.b d0,d7
rts
*########################################################################
* XDEF **************************************************************** #
* inc_areg(): increment an address register by the value in d0 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = amount to increment by #
* d1 = index of address register to increment #
* #
* OUTPUT ************************************************************** #
* (address register is updated) #
* #
* ALGORITHM *********************************************************** #
* Typically used for an instruction w/ a post-increment <ea>, #
* this routine adds the increment value in d0 to the address register #
* specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
* in their original places. #
* For a7, if the increment amount is one, then we have to #
* increment by two. For any a7 update, set the mia7_flag so that if #
* an access error exception occurs later in emulation, this address #
* register update can be undone. #
* #
*########################################################################
global inc_areg
inc_areg:
move.w (tbl_iareg.b,pc,d1.w*2),d1
jmp (tbl_iareg.b,pc,d1.w*1)
tbl_iareg:
.dc.w iareg0-tbl_iareg
.dc.w iareg1-tbl_iareg
.dc.w iareg2-tbl_iareg
.dc.w iareg3-tbl_iareg
.dc.w iareg4-tbl_iareg
.dc.w iareg5-tbl_iareg
.dc.w iareg6-tbl_iareg
.dc.w iareg7-tbl_iareg
iareg0: add.l d0,EXC_DREGS+$8(a6)
rts
iareg1: add.l d0,EXC_DREGS+$c(a6)
rts
iareg2: add.l d0,a2
rts
iareg3: add.l d0,a3
rts
iareg4: add.l d0,a4
rts
iareg5: add.l d0,a5
rts
iareg6: add.l d0,(a6)
rts
iareg7: move.b #mia7_flg,SPCOND_FLG(a6)
cmpi.b #$1,d0
beq.b iareg7b
add.l d0,EXC_A7(a6)
rts
iareg7b:
addq.l #$2,EXC_A7(a6)
rts
*########################################################################
* XDEF **************************************************************** #
* dec_areg(): decrement an address register by the value in d0 #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = amount to decrement by #
* d1 = index of address register to decrement #
* #
* OUTPUT ************************************************************** #
* (address register is updated) #
* #
* ALGORITHM *********************************************************** #
* Typically used for an instruction w/ a pre-decrement <ea>, #
* this routine adds the decrement value in d0 to the address register #
* specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
* in their original places. #
* For a7, if the decrement amount is one, then we have to #
* decrement by two. For any a7 update, set the mda7_flag so that if #
* an access error exception occurs later in emulation, this address #
* register update can be undone. #
* #
*########################################################################
global dec_areg
dec_areg:
move.w (tbl_dareg.b,pc,d1.w*2),d1
jmp (tbl_dareg.b,pc,d1.w*1)
tbl_dareg:
.dc.w dareg0-tbl_dareg
.dc.w dareg1-tbl_dareg
.dc.w dareg2-tbl_dareg
.dc.w dareg3-tbl_dareg
.dc.w dareg4-tbl_dareg
.dc.w dareg5-tbl_dareg
.dc.w dareg6-tbl_dareg
.dc.w dareg7-tbl_dareg
dareg0: sub.l d0,EXC_DREGS+$8(a6)
rts
dareg1: sub.l d0,EXC_DREGS+$c(a6)
rts
dareg2: sub.l d0,a2
rts
dareg3: sub.l d0,a3
rts
dareg4: sub.l d0,a4
rts
dareg5: sub.l d0,a5
rts
dareg6: sub.l d0,(a6)
rts
dareg7: move.b #mda7_flg,SPCOND_FLG(a6)
cmpi.b #$1,d0
beq.b dareg7b
sub.l d0,EXC_A7(a6)
rts
dareg7b:
subq.l #$2,EXC_A7(a6)
rts
*#############################################################################
*########################################################################
* XDEF **************************************************************** #
* load_fpn1(): load FP register value into FP_SRC(a6). #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = index of FP register to load #
* #
* OUTPUT ************************************************************** #
* FP_SRC(a6) = value loaded from FP register file #
* #
* ALGORITHM *********************************************************** #
* Using the index in d0, load FP_SRC(a6) with a number from the #
* FP register file. #
* #
*########################################################################
global load_fpn1
load_fpn1:
move.w (tbl_load_fpn1.b,pc,d0.w*2),d0
jmp (tbl_load_fpn1.b,pc,d0.w*1)
tbl_load_fpn1:
.dc.w load_fpn1_0-tbl_load_fpn1
.dc.w load_fpn1_1-tbl_load_fpn1
.dc.w load_fpn1_2-tbl_load_fpn1
.dc.w load_fpn1_3-tbl_load_fpn1
.dc.w load_fpn1_4-tbl_load_fpn1
.dc.w load_fpn1_5-tbl_load_fpn1
.dc.w load_fpn1_6-tbl_load_fpn1
.dc.w load_fpn1_7-tbl_load_fpn1
load_fpn1_0:
move.l 0+EXC_FP0(a6),0+FP_SRC(a6)
move.l 4+EXC_FP0(a6),4+FP_SRC(a6)
move.l 8+EXC_FP0(a6),8+FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_1:
move.l 0+EXC_FP1(a6),0+FP_SRC(a6)
move.l 4+EXC_FP1(a6),4+FP_SRC(a6)
move.l 8+EXC_FP1(a6),8+FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_2:
fmovem.x fp2,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_3:
fmovem.x fp3,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_4:
fmovem.x fp4,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_5:
fmovem.x fp5,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_6:
fmovem.x fp6,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
load_fpn1_7:
fmovem.x fp7,FP_SRC(a6)
lea FP_SRC(a6),a0
rts
*############################################################################
*########################################################################
* XDEF **************************************************************** #
* load_fpn2(): load FP register value into FP_DST(a6). #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* d0 = index of FP register to load #
* #
* OUTPUT ************************************************************** #
* FP_DST(a6) = value loaded from FP register file #
* #
* ALGORITHM *********************************************************** #
* Using the index in d0, load FP_DST(a6) with a number from the #
* FP register file. #
* #
*########################################################################
global load_fpn2
load_fpn2:
move.w (tbl_load_fpn2.b,pc,d0.w*2),d0
jmp (tbl_load_fpn2.b,pc,d0.w*1)
tbl_load_fpn2:
.dc.w load_fpn2_0-tbl_load_fpn2
.dc.w load_fpn2_1-tbl_load_fpn2
.dc.w load_fpn2_2-tbl_load_fpn2
.dc.w load_fpn2_3-tbl_load_fpn2
.dc.w load_fpn2_4-tbl_load_fpn2
.dc.w load_fpn2_5-tbl_load_fpn2
.dc.w load_fpn2_6-tbl_load_fpn2
.dc.w load_fpn2_7-tbl_load_fpn2
load_fpn2_0:
move.l 0+EXC_FP0(a6),0+FP_DST(a6)
move.l 4+EXC_FP0(a6),4+FP_DST(a6)
move.l 8+EXC_FP0(a6),8+FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_1:
move.l 0+EXC_FP1(a6),0+FP_DST(a6)
move.l 4+EXC_FP1(a6),4+FP_DST(a6)
move.l 8+EXC_FP1(a6),8+FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_2:
fmovem.x fp2,FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_3:
fmovem.x fp3,FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_4:
fmovem.x fp4,FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_5:
fmovem.x fp5,FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_6:
fmovem.x fp6,FP_DST(a6)
lea FP_DST(a6),a0
rts
load_fpn2_7:
fmovem.x fp7,FP_DST(a6)
lea FP_DST(a6),a0
rts
*############################################################################
*########################################################################
* XDEF **************************************************************** #
* store_fpreg(): store an fp value to the fpreg designated d0. #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* fp0 = extended precision value to store #
* d0 = index of floating-point register #
* #
* OUTPUT ************************************************************** #
* None #
* #
* ALGORITHM *********************************************************** #
* Store the value in fp0 to the FP register designated by the #
* value in d0. The FP number can be DENORM or SNAN so we have to be #
* careful that we don't take an exception here. #
* #
*########################################################################
global store_fpreg
store_fpreg:
move.w (tbl_store_fpreg.b,pc,d0.w*2),d0
jmp (tbl_store_fpreg.b,pc,d0.w*1)
tbl_store_fpreg:
.dc.w store_fpreg_0-tbl_store_fpreg
.dc.w store_fpreg_1-tbl_store_fpreg
.dc.w store_fpreg_2-tbl_store_fpreg
.dc.w store_fpreg_3-tbl_store_fpreg
.dc.w store_fpreg_4-tbl_store_fpreg
.dc.w store_fpreg_5-tbl_store_fpreg
.dc.w store_fpreg_6-tbl_store_fpreg
.dc.w store_fpreg_7-tbl_store_fpreg
store_fpreg_0:
fmovem.x fp0,EXC_FP0(a6)
rts
store_fpreg_1:
fmovem.x fp0,EXC_FP1(a6)
rts
store_fpreg_2:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp2
rts
store_fpreg_3:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp3
rts
store_fpreg_4:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp4
rts
store_fpreg_5:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp5
rts
store_fpreg_6:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp6
rts
store_fpreg_7:
fmovem.x fp0,-(sp)
fmovem.x (sp)+,fp7
rts
*########################################################################
* XDEF **************************************************************** #
* _denorm(): denormalize an intermediate result #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = points to the operand to be denormalized #
* (in the internal extended format) #
* #
* d0 = rounding precision #
* #
* OUTPUT ************************************************************** #
* a0 = pointer to the denormalized result #
* (in the internal extended format) #
* #
* d0 = guard,round,sticky #
* #
* ALGORITHM *********************************************************** #
* According to the exponent underflow threshold for the given #
* precision, shift the mantissa bits to the right in order raise the #
* exponent of the operand to the threshold value. While shifting the #
* mantissa bits right, maintain the value of the guard, round, and #
* sticky bits. #
* other notes: #
* (1) _denorm() is called by the underflow routines #
* (2) _denorm() does NOT affect the status register #
* #
*########################################################################
*
* table of exponent threshold values for each precision
*
tbl_thresh:
.dc.w $0
.dc.w sgl_thresh
.dc.w dbl_thresh
global _denorm
_denorm:
*
* Load the exponent threshold for the precision selected and check
* to see if (threshold - exponent) is > 65 in which case we can
* simply calculate the sticky bit and zero the mantissa. otherwise
* we have to call the denormalization routine.
*
lsr.b #$2,d0 * shift prec to lo bits
move.w (tbl_thresh.b,pc,d0.w*2),d1 * load prec threshold
move.w d1,d0 * copy d1 into d0
sub.w FTEMP_EX.w(a0),d0 * diff = threshold - exp
cmpi.w #66,d0 * is diff > 65? (mant + g,r bits)
bpl.b denorm_set_stky * yes; just calc sticky
clr.l d0 * clear g,r,s
btst #inex2_bit,FPSR_EXCEPT(a6) * yes; was INEX2 set?
beq.b denorm_call * no; don't change anything
bset #29,d0 * yes; set sticky bit
denorm_call:
bsr.l dnrm_lp * denormalize the number
rts
*
* all bit would have been shifted off during the denorm so simply
* calculate if the sticky should be set and clear the entire mantissa.
*
denorm_set_stky:
move.l #$20000000,d0 * set sticky bit in return value
move.w d1,FTEMP_EX.w(a0) * load exp with threshold
clr.l FTEMP_HI(a0) * set d1 = 0 (ms mantissa)
clr.l FTEMP_LO(a0) * set d2 = 0 (ms mantissa)
rts
* #
* dnrm_lp(): normalize exponent/mantissa to specified threshhold #
* #
* INPUT: #
* %a0 : points to the operand to be denormalized #
* %d0{31:29} : initial guard,round,sticky #
* %d1{15:0} : denormalization threshold #
* OUTPUT: #
* %a0 : points to the denormalized operand #
* %d0{31:29} : final guard,round,sticky #
* #
* *** Local Equates *** #
GRS set L_SCR2 * g,r,s temp storage
FTEMP_LO2 set L_SCR1 * FTEMP_LO copy
global dnrm_lp
dnrm_lp:
*
* make a copy of FTEMP_LO and place the g,r,s bits directly after it
* in memory so as to make the bitfield extraction for denormalization easier.
*
move.l FTEMP_LO(a0),FTEMP_LO2(a6) * make FTEMP_LO copy
move.l d0,GRS(a6) * place g,r,s after it
*
* check to see how much less than the underflow threshold the operand
* exponent is.
*
move.l d1,d0 * copy the denorm threshold
sub.w FTEMP_EX.w(a0),d1 * d1 = threshold - uns exponent
ble.b dnrm_no_lp * d1 <= 0
cmpi.w #$20,d1 * is ( 0 <= d1 < 32) ?
blt.b case_1 * yes
cmpi.w #$40,d1 * is (32 <= d1 < 64) ?
blt.b case_2 * yes
bra.w case_3 * (d1 >= 64)
*
* No normalization necessary
*
dnrm_no_lp:
move.l GRS(a6),d0 * restore original g,r,s
rts
*
* case (0<d1<32)
*
* %d0 = denorm threshold
* %d1 = "n" = amt to shift
*
* ---------------------------------------------------------
* | FTEMP_HI | FTEMP_LO |grs000.........000|
* ---------------------------------------------------------
* <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
* \ \ \ \
* \ \ \ \
* \ \ \ \
* \ \ \ \
* \ \ \ \
* \ \ \ \
* \ \ \ \
* \ \ \ \
* <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
* ---------------------------------------------------------
* |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
* ---------------------------------------------------------
*
case_1:
move.l d2,-(sp) * create temp storage
move.w d0,FTEMP_EX.w(a0) * exponent = denorm threshold
move.l #32,d0
sub.w d1,d0 * %d0 = 32 - %d1
cmpi.w #29,d1 * is shft amt >= 29
blt.b case1_extract * no; no fix needed
move.b GRS(a6),d2
or.b d2,3+FTEMP_LO2(a6)
case1_extract:
bfextu FTEMP_HI(a0){#0:d0},d2 * %d2 = new FTEMP_HI
bfextu FTEMP_HI(a0){d0:#32},d1 * %d1 = new FTEMP_LO
bfextu FTEMP_LO2(a6){d0:#32},d0 * %d0 = new G,R,S
move.l d2,FTEMP_HI(a0) * store new FTEMP_HI
move.l d1,FTEMP_LO(a0) * store new FTEMP_LO
bftst d0{#2:#30} * were bits shifted off?
beq.b case1_sticky_clear * no; go finish
bset #rnd_stky_bit,d0 * yes; set sticky bit
case1_sticky_clear:
andi.l #$e0000000,d0 * clear all but G,R,S
move.l (sp)+,d2 * restore temp register
rts
*
* case (32<=d1<64)
*
* %d0 = denorm threshold
* %d1 = "n" = amt to shift
*
* ---------------------------------------------------------
* | FTEMP_HI | FTEMP_LO |grs000.........000|
* ---------------------------------------------------------
* <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
* \ \ \
* \ \ \
* \ \ -------------------
* \ -------------------- \
* ------------------- \ \
* \ \ \
* \ \ \
* \ \ \
* <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
* ---------------------------------------------------------
* |0...............0|0....0| NEW_LO |grs |
* ---------------------------------------------------------
*
case_2:
move.l d2,-(sp) * create temp storage
move.w d0,FTEMP_EX.w(a0) * exponent = denorm threshold
subi.w #$20,d1 * %d1 now between 0 and 32
move.l #$20,d0
sub.w d1,d0 * %d0 = 32 - %d1
* subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
* the number of bits to check for the sticky detect.
* it only plays a role in shift amounts of 61-63.
move.b GRS(a6),d2
or.b d2,3+FTEMP_LO2(a6)
bfextu FTEMP_HI(a0){#0:d0},d2 * %d2 = new FTEMP_LO
bfextu FTEMP_HI(a0){d0:#32},d1 * %d1 = new G,R,S
bftst d1{#2:#30} * were any bits shifted off?
bne.b case2_set_sticky * yes; set sticky bit
bftst FTEMP_LO2(a6){d0:#31} * were any bits shifted off?
bne.b case2_set_sticky * yes; set sticky bit
move.l d1,d0 * move new G,R,S to %d0
bra.b case2_end
case2_set_sticky:
move.l d1,d0 * move new G,R,S to %d0
bset #rnd_stky_bit,d0 * set sticky bit
case2_end:
clr.l FTEMP_HI(a0) * store FTEMP_HI = 0
move.l d2,FTEMP_LO(a0) * store FTEMP_LO
andi.l #$e0000000,d0 * clear all but G,R,S
move.l (sp)+,d2 * restore temp register
rts
*
* case (d1>=64)
*
* %d0 = denorm threshold
* %d1 = amt to shift
*
case_3:
move.w d0,FTEMP_EX.w(a0) * insert denorm threshold
cmpi.w #65,d1 * is shift amt > 65?
blt.b case3_64 * no; it's == 64
beq.b case3_65 * no; it's == 65
*
* case (d1>65)
*
* Shift value is > 65 and out of range. All bits are shifted off.
* Return a zero mantissa with the sticky bit set
*
clr.l FTEMP_HI(a0) * clear hi(mantissa)
clr.l FTEMP_LO(a0) * clear lo(mantissa)
move.l #$20000000,d0 * set sticky bit
rts
*
* case (d1 == 64)
*
* ---------------------------------------------------------
* | FTEMP_HI | FTEMP_LO |grs000.........000|
* ---------------------------------------------------------
* <-------(32)------>
* \ \
* \ \
* \ \
* \ ------------------------------
* ------------------------------- \
* \ \
* \ \
* \ \
* <-------(32)------>
* ---------------------------------------------------------
* |0...............0|0................0|grs |
* ---------------------------------------------------------
*
case3_64:
move.l FTEMP_HI(a0),d0 * fetch hi(mantissa)
move.l d0,d1 * make a copy
andi.l #$c0000000,d0 * extract G,R
andi.l #$3fffffff,d1 * extract other bits
bra.b case3_complete
*
* case (d1 == 65)
*
* ---------------------------------------------------------
* | FTEMP_HI | FTEMP_LO |grs000.........000|
* ---------------------------------------------------------
* <-------(32)------>
* \ \
* \ \
* \ \
* \ ------------------------------
* -------------------------------- \
* \ \
* \ \
* \ \
* <-------(31)----->
* ---------------------------------------------------------
* |0...............0|0................0|0rs |
* ---------------------------------------------------------
*
case3_65:
move.l FTEMP_HI(a0),d0 * fetch hi(mantissa)
andi.l #$80000000,d0 * extract R bit
lsr.l #$1,d0 * shift high bit into R bit
andi.l #$7fffffff,d1 * extract other bits
case3_complete:
* last operation done was an "and" of the bits shifted off so the condition
* codes are already set so branch accordingly.
bne.b case3_set_sticky * yes; go set new sticky
tst.l FTEMP_LO(a0) * were any bits shifted off?
bne.b case3_set_sticky * yes; go set new sticky
tst.b GRS(a6) * were any bits shifted off?
bne.b case3_set_sticky * yes; go set new sticky
*
* no bits were shifted off so don't set the sticky bit.
* the guard and
* the entire mantissa is zero.
*
clr.l FTEMP_HI(a0) * clear hi(mantissa)
clr.l FTEMP_LO(a0) * clear lo(mantissa)
rts
*
* some bits were shifted off so set the sticky bit.
* the entire mantissa is zero.
*
case3_set_sticky:
bset #rnd_stky_bit,d0 * set new sticky bit
clr.l FTEMP_HI(a0) * clear hi(mantissa)
clr.l FTEMP_LO(a0) * clear lo(mantissa)
rts
*########################################################################
* XDEF **************************************************************** #
* _round(): round result according to precision/mode #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = ptr to input operand in internal extended format #
* d1(hi) = contains rounding precision: #
* ext = $0000xxxx #
* sgl = $0004xxxx #
* dbl = $0008xxxx #
* d1(lo) = contains rounding mode: #
* RN = $xxxx0000 #
* RZ = $xxxx0001 #
* RM = $xxxx0002 #
* RP = $xxxx0003 #
* d0{31:29} = contains the g,r,s bits (extended) #
* #
* OUTPUT ************************************************************** #
* a0 = pointer to rounded result #
* #
* ALGORITHM *********************************************************** #
* On return the value pointed to by a0 is correctly rounded, #
* a0 is preserved and the g-r-s bits in d0 are cleared. #
* The result is not typed - the tag field is invalid. The #
* result is still in the internal extended format. #
* #
* The INEX bit of USER_FPSR will be set if the rounded result was #
* inexact (i.e. if any of the g-r-s bits were set). #
* #
*########################################################################
global _round
_round:
*
* ext_grs() looks at the rounding precision and sets the appropriate
* G,R,S bits.
* If (G,R,S == 0) then result is exact and round is done, else set
* the inex flag in status reg and continue.
*
bsr.l ext_grs * extract G,R,S
tst.l d0 * are G,R,S zero?
beq.w truncate * yes; round is complete
ori.w #inx2a_mask,2+USER_FPSR(a6) * set inex2/ainex
*
* Use rounding mode as an index into a jump table for these modes.
* All of the following assumes grs != 0.
*
move.w (tbl_mode.b,pc,d1.w*2),a1 * load jump offset
jmp (tbl_mode.b,pc,a1.l) * jmp to rnd mode handler
tbl_mode:
.dc.w rnd_near-tbl_mode
.dc.w truncate-tbl_mode * RZ always truncates
.dc.w rnd_mnus-tbl_mode
.dc.w rnd_plus-tbl_mode
*################################################################
* ROUND PLUS INFINITY #
* #
* If sign of fp number = 0 (positive), then add 1 to l. #
*################################################################
rnd_plus:
tst.b FTEMP_SGN(a0) * check for sign
bmi.w truncate * if positive then truncate
move.l #$ffffffff,d0 * force g,r,s to be all f's
swap d1 * set up d1 for round prec.
cmpi.b #s_mode,d1 * is prec = sgl?
beq.w add_sgl * yes
bgt.w add_dbl * no; it's dbl
bra.w add_ext * no; it's ext
*################################################################
* ROUND MINUS INFINITY #
* #
* If sign of fp number = 1 (negative), then add 1 to l. #
*################################################################
rnd_mnus:
tst.b FTEMP_SGN(a0) * check for sign
bpl.w truncate * if negative then truncate
move.l #$ffffffff,d0 * force g,r,s to be all f's
swap d1 * set up d1 for round prec.
cmpi.b #s_mode,d1 * is prec = sgl?
beq.w add_sgl * yes
bgt.w add_dbl * no; it's dbl
bra.w add_ext * no; it's ext
*################################################################
* ROUND NEAREST #
* #
* If (g=1), then add 1 to l and if (r=s=0), then clear l #
* Note that this will round to even in case of a tie. #
*################################################################
rnd_near:
asl.l #$1,d0 * shift g-bit to c-bit
bcc.w truncate * if (g=1) then
swap d1 * set up d1 for round prec.
cmpi.b #s_mode,d1 * is prec = sgl?
beq.w add_sgl * yes
bgt.w add_dbl * no; it's dbl
bra.w add_ext * no; it's ext
* *** LOCAL EQUATES ***
ad_1_sgl set $00000100 * constant to add 1 to l-bit in sgl prec
ad_1_dbl set $00000800 * constant to add 1 to l-bit in dbl prec
*########################
* ADD SINGLE #
*########################
add_sgl:
addi.l #ad_1_sgl,FTEMP_HI(a0)
bcc.b scc_clr * no mantissa overflow
roxr.w FTEMP_HI(a0) * shift v-bit back in
roxr.w FTEMP_HI+2(a0) * shift v-bit back in
addq.w #$1,FTEMP_EX.w(a0) * and incr exponent
scc_clr:
tst.l d0 * test for rs = 0
bne.b sgl_done
andi.w #$fe00,FTEMP_HI+2(a0) * clear the l-bit
sgl_done:
andi.l #$ffffff00,FTEMP_HI(a0) * truncate bits beyond sgl limit
clr.l FTEMP_LO(a0) * clear d2
rts
*########################
* ADD EXTENDED #
*########################
add_ext:
addq.l #1,FTEMP_LO(a0) * add 1 to l-bit
bcc.b xcc_clr * test for carry out
addq.l #1,FTEMP_HI(a0) * propogate carry
bcc.b xcc_clr
roxr.w FTEMP_HI(a0) * mant is 0 so restore v-bit
roxr.w FTEMP_HI+2(a0) * mant is 0 so restore v-bit
roxr.w FTEMP_LO(a0)
roxr.w FTEMP_LO+2(a0)
addq.w #$1,FTEMP_EX.w(a0) * and inc exp
xcc_clr:
tst.l d0 * test rs = 0
bne.b add_ext_done
andi.b #$fe,FTEMP_LO+3(a0) * clear the l bit
add_ext_done:
rts
*########################
* ADD DOUBLE #
*########################
add_dbl:
addi.l #ad_1_dbl,FTEMP_LO(a0) * add 1 to lsb
bcc.b dcc_clr * no carry
addq.l #$1,FTEMP_HI(a0) * propogate carry
bcc.b dcc_clr * no carry
roxr.w FTEMP_HI(a0) * mant is 0 so restore v-bit
roxr.w FTEMP_HI+2(a0) * mant is 0 so restore v-bit
roxr.w FTEMP_LO(a0)
roxr.w FTEMP_LO+2(a0)
addq.w #$1,FTEMP_EX.w(a0) * incr exponent
dcc_clr:
tst.l d0 * test for rs = 0
bne.b dbl_done
andi.w #$f000,FTEMP_LO+2(a0) * clear the l-bit
dbl_done:
andi.l #$fffff800,FTEMP_LO(a0) * truncate bits beyond dbl limit
rts
*##########################
* Truncate all other bits #
*##########################
truncate:
swap d1 * select rnd prec
cmpi.b #s_mode,d1 * is prec sgl?
beq.w sgl_done * yes
bgt.b dbl_done * no; it's dbl
rts * no; it's ext
*
* ext_grs(): extract guard, round and sticky bits according to
* rounding precision.
*
* INPUT
* d0 = extended precision g,r,s (in d0{31:29})
* d1 = {PREC,ROUND}
* OUTPUT
* d0{31:29} = guard, round, sticky
*
* The ext_grs extract the guard/round/sticky bits according to the
* selected rounding precision. It is called by the round subroutine
* only. All registers except d0 are kept intact. d0 becomes an
* updated guard,round,sticky in d0{31:29}
*
* Notes: the ext_grs uses the round PREC, and therefore has to swap d1
* prior to usage, and needs to restore d1 to original. this
* routine is tightly tied to the round routine and not meant to
* uphold standard subroutine calling practices.
*
ext_grs:
swap d1 * have d1.w point to round precision
tst.b d1 * is rnd prec = extended?
bne.b ext_grs_not_ext * no; go handle sgl or dbl
*
* %d0 actually already hold g,r,s since _round() had it before calling
* this function. so, as long as we don't disturb it, we are "returning" it.
*
ext_grs_ext:
swap d1 * yes; return to correct positions
rts
ext_grs_not_ext:
movem.l d2-d3,-(sp) * make some temp registers {d2/d3}
cmpi.b #s_mode,d1 * is rnd prec = sgl?
bne.b ext_grs_dbl * no; go handle dbl
*
* sgl:
* 96 64 40 32 0
* -----------------------------------------------------
* | EXP |XXXXXXX| |xx | |grs|
* -----------------------------------------------------
* <--(24)--->nn\ /
* ee ---------------------
* ww |
* v
* gr new sticky
*
ext_grs_sgl:
bfextu FTEMP_HI(a0){#24:#2},d3 * sgl prec. g-r are 2 bits right
move.l #30,d2 * of the sgl prec. limits
lsl.l d2,d3 * shift g-r bits to MSB of d3
move.l FTEMP_HI(a0),d2 * get word 2 for s-bit test
andi.l #$0000003f,d2 * s bit is the or of all other
bne.b ext_grs_st_stky * bits to the right of g-r
tst.l FTEMP_LO(a0) * test lower mantissa
bne.b ext_grs_st_stky * if any are set, set sticky
tst.l d0 * test original g,r,s
bne.b ext_grs_st_stky * if any are set, set sticky
bra.b ext_grs_end_sd * if words 3 and 4 are clr, exit
*
* dbl:
* 96 64 32 11 0
* -----------------------------------------------------
* | EXP |XXXXXXX| | |xx |grs|
* -----------------------------------------------------
* nn\ /
* ee -------
* ww |
* v
* gr new sticky
*
ext_grs_dbl:
bfextu FTEMP_LO(a0){#21:#2},d3 * dbl-prec. g-r are 2 bits right
move.l #30,d2 * of the dbl prec. limits
lsl.l d2,d3 * shift g-r bits to the MSB of d3
move.l FTEMP_LO(a0),d2 * get lower mantissa for s-bit test
andi.l #$000001ff,d2 * s bit is the or-ing of all
bne.b ext_grs_st_stky * other bits to the right of g-r
tst.l d0 * test word original g,r,s
bne.b ext_grs_st_stky * if any are set, set sticky
bra.b ext_grs_end_sd * if clear, exit
ext_grs_st_stky:
bset #rnd_stky_bit,d3 * set sticky bit
ext_grs_end_sd:
move.l d3,d0 * return grs to d0
movem.l (sp)+,d2-d3 * restore scratch registers {d2/d3}
swap d1 * restore d1 to original
rts
*########################################################################
* norm(): normalize the mantissa of an extended precision input. the #
* input operand should not be normalized already. #
* #
* XDEF **************************************************************** #
* norm() #
* #
* XREF **************************************************************** #
* none #
* #
* INPUT *************************************************************** #
* a0 = pointer fp extended precision operand to normalize #
* #
* OUTPUT ************************************************************** #
* d0 = number of bit positions the mantissa was shifted #
* a0 = the input operand's mantissa is normalized; the exponent #
* is unchanged. #
* #
*########################################################################
global norm
norm:
move.l d2,-(sp) * create some temp regs
move.l d3,-(sp)
move.l FTEMP_HI(a0),d0 * load hi(mantissa)
move.l FTEMP_LO(a0),d1 * load lo(mantissa)
bfffo d0{#0:#32},d2 * how many places to shift?
beq.b norm_lo * hi(man) is all zeroes!
norm_hi:
lsl.l d2,d0 * left shift hi(man)
bfextu d1{#0:d2},d3 * extract lo bits
or.l d3,d0 * create hi(man)
lsl.l d2,d1 * create lo(man)
move.l d0,FTEMP_HI(a0) * store new hi(man)
move.l d1,FTEMP_LO(a0) * store new lo(man)
move.l d2,d0 * return shift amount
move.l (sp)+,d3 * restore temp regs
move.l (sp)+,d2
rts
norm_lo:
bfffo d1{#0:#32},d2 * how many places to shift?
lsl.l d2,d1 * shift lo(man)
addi.l #32,d2 * add 32 to shft amount
move.l d1,FTEMP_HI(a0) * store hi(man)
clr.l FTEMP_LO(a0) * lo(man) is now zero
move.l d2,d0 * return shift amount
move.l (sp)+,d3 * restore temp regs
move.l (sp)+,d2
rts
*########################################################################
* unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
* - returns corresponding optype tag #
* #
* XDEF **************************************************************** #
* unnorm_fix() #
* #
* XREF **************************************************************** #
* norm() - normalize the mantissa #
* #
* INPUT *************************************************************** #
* a0 = pointer to unnormalized extended precision number #
* #
* OUTPUT ************************************************************** #
* d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
* a0 = input operand has been converted to a norm, denorm, or #
* zero; both the exponent and mantissa are changed. #
* #
*########################################################################
global unnorm_fix
unnorm_fix:
bfffo FTEMP_HI(a0){#0:#32},d0 * how many shifts are needed?
bne.b unnorm_shift * hi(man) is not all zeroes
*
* hi(man) is all zeroes so see if any bits in lo(man) are set
*
unnorm_chk_lo:
bfffo FTEMP_LO(a0){#0:#32},d0 * is operand really a zero?
beq.w unnorm_zero * yes
addi.w #32,d0 * no; fix shift distance
*
* d0 = # shifts needed for complete normalization
*
unnorm_shift:
clr.l d1 * clear top word
move.w FTEMP_EX.w(a0),d1 * extract exponent
andi.w #$7fff,d1 * strip off sgn
cmp.w d1,d0 * will denorm push exp < 0?
bgt.b unnorm_nrm_zero * yes; denorm only until exp = 0
*
* exponent would not go < 0. therefore, number stays normalized
*
sub.w d0,d1 * shift exponent value
move.w FTEMP_EX.w(a0),d0 * load old exponent
andi.w #$8000,d0 * save old sign
or.w d0,d1 * {sgn,new exp}
move.w d1,FTEMP_EX.w(a0) * insert new exponent
bsr.l norm * normalize UNNORM
move.b #NORM,d0 * return new optype tag
rts
*
* exponent would go < 0, so only denormalize until exp = 0
*
unnorm_nrm_zero:
cmpi.b #32,d1 * is exp <= 32?
bgt.b unnorm_nrm_zero_lrg * no; go handle large exponent
bfextu FTEMP_HI(a0){d1:#32},d0 * extract new hi(man)
move.l d0,FTEMP_HI(a0) * save new hi(man)
move.l FTEMP_LO(a0),d0 * fetch old lo(man)
lsl.l d1,d0 * extract new lo(man)
move.l d0,FTEMP_LO(a0) * save new lo(man)
andi.w #$8000,FTEMP_EX.w(a0) * set exp = 0
move.b #DENORM,d0 * return new optype tag
rts
*
* only mantissa bits set are in lo(man)
*
unnorm_nrm_zero_lrg:
subi.w #32,d1 * adjust shft amt by 32
move.l FTEMP_LO(a0),d0 * fetch old lo(man)
lsl.l d1,d0 * left shift lo(man)
move.l d0,FTEMP_HI(a0) * store new hi(man)
clr.l FTEMP_LO(a0) * lo(man) = 0
andi.w #$8000,FTEMP_EX.w(a0) * set exp = 0
move.b #DENORM,d0 * return new optype tag
rts
*
* whole mantissa is zero so this UNNORM is actually a zero
*
unnorm_zero:
andi.w #$8000,FTEMP_EX.w(a0) * force exponent to zero
move.b #ZERO,d0 * fix optype tag
rts
*########################################################################
* XDEF **************************************************************** #
* set_tag_x(): return the optype of the input ext fp number #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision operand #
* #
* OUTPUT ************************************************************** #
* d0 = value of type tag #
* one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
* #
* ALGORITHM *********************************************************** #
* Simply test the exponent, j-bit, and mantissa values to #
* determine the type of operand. #
* If it's an unnormalized zero, alter the operand and force it #
* to be a normal zero. #
* #
*########################################################################
global set_tag_x
set_tag_x:
move.w FTEMP_EX.w(a0),d0 * extract exponent
andi.w #$7fff,d0 * strip off sign
cmpi.w #$7fff,d0 * is (EXP == MAX)?
beq.b inf_or_nan_x
not_inf_or_nan_x:
btst #$7,FTEMP_HI(a0)
beq.b not_norm_x
is_norm_x:
move.b #NORM,d0
rts
not_norm_x:
tst.w d0 * is exponent = 0?
bne.b is_unnorm_x
not_unnorm_x:
tst.l FTEMP_HI(a0)
bne.b is_denorm_x
tst.l FTEMP_LO(a0)
bne.b is_denorm_x
is_zero_x:
move.b #ZERO,d0
rts
is_denorm_x:
move.b #DENORM,d0
rts
* must distinguish now "Unnormalized zeroes" which we
* must convert to zero.
is_unnorm_x:
tst.l FTEMP_HI(a0)
bne.b is_unnorm_reg_x
tst.l FTEMP_LO(a0)
bne.b is_unnorm_reg_x
* it's an "unnormalized zero". let's convert it to an actual zero...
andi.w #$8000,FTEMP_EX.w(a0) * clear exponent
move.b #ZERO,d0
rts
is_unnorm_reg_x:
move.b #UNNORM,d0
rts
inf_or_nan_x:
tst.l FTEMP_LO(a0)
bne.b is_nan_x
move.l FTEMP_HI(a0),d0
andi.l #$7fffffff,d0 * msb is a don't care!
bne.b is_nan_x
is_inf_x:
move.b #INF,d0
rts
is_nan_x:
btst #$6,FTEMP_HI(a0)
beq.b is_snan_x
move.b #QNAN,d0
rts
is_snan_x:
move.b #SNAN,d0
rts
*########################################################################
* XDEF **************************************************************** #
* set_tag_d(): return the optype of the input dbl fp number #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = points to double precision operand #
* #
* OUTPUT ************************************************************** #
* d0 = value of type tag #
* one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
* #
* ALGORITHM *********************************************************** #
* Simply test the exponent, j-bit, and mantissa values to #
* determine the type of operand. #
* #
*########################################################################
global set_tag_d
set_tag_d:
move.l FTEMP.w(a0),d0
move.l d0,d1
andi.l #$7ff00000,d0
beq.b zero_or_denorm_d
cmpi.l #$7ff00000,d0
beq.b inf_or_nan_d
is_norm_d:
move.b #NORM,d0
rts
zero_or_denorm_d:
andi.l #$000fffff,d1
bne.l is_denorm_d
tst.l 4+FTEMP(a0)
bne.l is_denorm_d
is_zero_d:
move.b #ZERO,d0
rts
is_denorm_d:
move.b #DENORM,d0
rts
inf_or_nan_d:
andi.l #$000fffff,d1
bne.l is_nan_d
tst.l 4+FTEMP(a0)
bne.l is_nan_d
is_inf_d:
move.b #INF,d0
rts
is_nan_d:
btst #19,d1
bne.l is_qnan_d
is_snan_d:
move.b #SNAN,d0
rts
is_qnan_d:
move.b #QNAN,d0
rts
*########################################################################
* XDEF **************************************************************** #
* set_tag_s(): return the optype of the input sgl fp number #
* #
* XREF **************************************************************** #
* None #
* #
* INPUT *************************************************************** #
* a0 = pointer to single precision operand #
* #
* OUTPUT ************************************************************** #
* d0 = value of type tag #
* one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
* #
* ALGORITHM *********************************************************** #
* Simply test the exponent, j-bit, and mantissa values to #
* determine the type of operand. #
* #
*########################################################################
global set_tag_s
set_tag_s:
move.l FTEMP.w(a0),d0
move.l d0,d1
andi.l #$7f800000,d0
beq.b zero_or_denorm_s
cmpi.l #$7f800000,d0
beq.b inf_or_nan_s
is_norm_s:
move.b #NORM,d0
rts
zero_or_denorm_s:
andi.l #$007fffff,d1
bne.l is_denorm_s
is_zero_s:
move.b #ZERO,d0
rts
is_denorm_s:
move.b #DENORM,d0
rts
inf_or_nan_s:
andi.l #$007fffff,d1
bne.l is_nan_s
is_inf_s:
move.b #INF,d0
rts
is_nan_s:
btst #22,d1
bne.l is_qnan_s
is_snan_s:
move.b #SNAN,d0
rts
is_qnan_s:
move.b #QNAN,d0
rts
*########################################################################
* XDEF **************************************************************** #
* unf_res(): routine to produce default underflow result of a #
* scaled extended precision number; this is used by #
* fadd/fdiv/fmul/etc. emulation routines. #
* unf_res4(): same as above but for fsglmul/fsgldiv which use #
* single round prec and extended prec mode. #
* #
* XREF **************************************************************** #
* _denorm() - denormalize according to scale factor #
* _round() - round denormalized number according to rnd prec #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precison operand #
* d0 = scale factor #
* d1 = rounding precision/mode #
* #
* OUTPUT ************************************************************** #
* a0 = pointer to default underflow result in extended precision #
* d0.b = result FPSR_cc which caller may or may not want to save #
* #
* ALGORITHM *********************************************************** #
* Convert the input operand to "internal format" which means the #
* exponent is extended to 16 bits and the sign is stored in the unused #
* portion of the extended precison operand. Denormalize the number #
* according to the scale factor passed in d0. Then, round the #
* denormalized result. #
* Set the FPSR_exc bits as appropriate but return the cc bits in #
* d0 in case the caller doesn't want to save them (as is the case for #
* fmove out). #
* unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
* precision and the rounding mode to single. #
* #
*########################################################################
global unf_res
unf_res:
move.l d1,-(sp) * save rnd prec,mode on stack
btst #$7,FTEMP_EX.w(a0) * make "internal" format
sne FTEMP_SGN(a0)
move.w FTEMP_EX.w(a0),d1 * extract exponent
andi.w #$7fff,d1
sub.w d0,d1
move.w d1,FTEMP_EX.w(a0) * insert 16 bit exponent
move.l a0,-(sp) * save operand ptr during calls
move.l $4(sp),d0 * pass rnd prec.
andi.w #$00c0,d0
lsr.w #$4,d0
bsr.l _denorm * denorm result
move.l (sp),a0
move.w $6(sp),d1 * load prec:mode into %d1
andi.w #$c0,d1 * extract rnd prec
lsr.w #$4,d1
swap d1
move.w $6(sp),d1
andi.w #$30,d1
lsr.w #$4,d1
bsr.l _round * round the denorm
move.l (sp)+,a0
* result is now rounded properly. convert back to normal format
bclr #$7,FTEMP_EX.w(a0) * clear sgn first; may have residue
tst.b FTEMP_SGN(a0) * is "internal result" sign set?
beq.b unf_res_chkifzero * no; result is positive
bset #$7,FTEMP_EX.w(a0) * set result sgn
clr.b FTEMP_SGN(a0) * clear temp sign
* the number may have become zero after rounding. set ccodes accordingly.
unf_res_chkifzero:
clr.l d0
tst.l FTEMP_HI(a0) * is value now a zero?
bne.b unf_res_cont * no
tst.l FTEMP_LO(a0)
bne.b unf_res_cont * no
* bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
bset #z_bit,d0 * yes; set zero ccode bit
unf_res_cont:
*
* can inex1 also be set along with unfl and inex2???
*
* we know that underflow has occurred. aunfl should be set if INEX2 is also set.
*
btst #inex2_bit,FPSR_EXCEPT(a6) * is INEX2 set?
beq.b unf_res_end * no
bset #aunfl_bit,FPSR_AEXCEPT(a6) * yes; set aunfl
unf_res_end:
add.l #$4,sp * clear stack
rts
* unf_res() for fsglmul() and fsgldiv().
global unf_res4
unf_res4:
move.l d1,-(sp) * save rnd prec,mode on stack
btst #$7,FTEMP_EX.w(a0) * make "internal" format
sne FTEMP_SGN(a0)
move.w FTEMP_EX.w(a0),d1 * extract exponent
andi.w #$7fff,d1
sub.w d0,d1
move.w d1,FTEMP_EX.w(a0) * insert 16 bit exponent
move.l a0,-(sp) * save operand ptr during calls
clr.l d0 * force rnd prec = ext
bsr.l _denorm * denorm result
move.l (sp),a0
move.w #s_mode,d1 * force rnd prec = sgl
swap d1
move.w $6(sp),d1 * load rnd mode
andi.w #$30,d1 * extract rnd prec
lsr.w #$4,d1
bsr.l _round * round the denorm
move.l (sp)+,a0
* result is now rounded properly. convert back to normal format
bclr #$7,FTEMP_EX.w(a0) * clear sgn first; may have residue
tst.b FTEMP_SGN(a0) * is "internal result" sign set?
beq.b unf_res4_chkifzero * no; result is positive
bset #$7,FTEMP_EX.w(a0) * set result sgn
clr.b FTEMP_SGN(a0) * clear temp sign
* the number may have become zero after rounding. set ccodes accordingly.
unf_res4_chkifzero:
clr.l d0
tst.l FTEMP_HI(a0) * is value now a zero?
bne.b unf_res4_cont * no
tst.l FTEMP_LO(a0)
bne.b unf_res4_cont * no
* bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
bset #z_bit,d0 * yes; set zero ccode bit
unf_res4_cont:
*
* can inex1 also be set along with unfl and inex2???
*
* we know that underflow has occurred. aunfl should be set if INEX2 is also set.
*
btst #inex2_bit,FPSR_EXCEPT(a6) * is INEX2 set?
beq.b unf_res4_end * no
bset #aunfl_bit,FPSR_AEXCEPT(a6) * yes; set aunfl
unf_res4_end:
add.l #$4,sp * clear stack
rts
*########################################################################
* XDEF **************************************************************** #
* ovf_res(): routine to produce the default overflow result of #
* an overflowing number. #
* ovf_res2(): same as above but the rnd mode/prec are passed #
* differently. #
* #
* XREF **************************************************************** #
* none #
* #
* INPUT *************************************************************** #
* d1.b = '-1' => (-); '0' => (+) #
* ovf_res(): #
* d0 = rnd mode/prec #
* ovf_res2(): #
* hi(d0) = rnd prec #
* lo(d0) = rnd mode #
* #
* OUTPUT ************************************************************** #
* a0 = points to extended precision result #
* d0.b = condition code bits #
* #
* ALGORITHM *********************************************************** #
* The default overflow result can be determined by the sign of #
* the result and the rounding mode/prec in effect. These bits are #
* concatenated together to create an index into the default result #
* table. A pointer to the correct result is returned in a0. The #
* resulting condition codes are returned in d0 in case the caller #
* doesn't want FPSR_cc altered (as is the case for fmove out). #
* #
*########################################################################
global ovf_res
ovf_res:
andi.w #$10,d1 * keep result sign
lsr.b #$4,d0 * shift prec/mode
or.b d0,d1 * concat the two
move.w d1,d0 * make a copy
lsl.b #$1,d1 * multiply d1 by 2
bra.b ovf_res_load
global ovf_res2
ovf_res2:
andi.w #$10,d1 * keep result sign
or.b d0,d1 * insert rnd mode
swap d0
or.b d0,d1 * insert rnd prec
move.w d1,d0 * make a copy
lsl.b #$1,d1 * shift left by 1
*
* use the rounding mode, precision, and result sign as in index into the
* two tables below to fetch the default result and the result ccodes.
*
ovf_res_load:
move.b (tbl_ovfl_cc.b,pc,d0.w*1),d0 * fetch result ccodes
lea (tbl_ovfl_result.b,pc,d1.w*8),a0 * return result ptr
rts
tbl_ovfl_cc:
.dc.b $2,$0,$0,$2
.dc.b $2,$0,$0,$2
.dc.b $2,$0,$0,$2
.dc.b $0,$0,$0,$0
.dc.b $2+$8,$8,$2+$8,$8
.dc.b $2+$8,$8,$2+$8,$8
.dc.b $2+$8,$8,$2+$8,$8
tbl_ovfl_result:
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RN
.dc.l $7ffe0000,$ffffffff,$ffffffff,$00000000 * +EXT; RZ
.dc.l $7ffe0000,$ffffffff,$ffffffff,$00000000 * +EXT; RM
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RP
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RN
.dc.l $407e0000,$ffffff00,$00000000,$00000000 * +SGL; RZ
.dc.l $407e0000,$ffffff00,$00000000,$00000000 * +SGL; RM
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RP
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RN
.dc.l $43fe0000,$ffffffff,$fffff800,$00000000 * +DBL; RZ
.dc.l $43fe0000,$ffffffff,$fffff800,$00000000 * +DBL; RM
.dc.l $7fff0000,$00000000,$00000000,$00000000 * +INF; RP
.dc.l $00000000,$00000000,$00000000,$00000000
.dc.l $00000000,$00000000,$00000000,$00000000
.dc.l $00000000,$00000000,$00000000,$00000000
.dc.l $00000000,$00000000,$00000000,$00000000
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RN
.dc.l $fffe0000,$ffffffff,$ffffffff,$00000000 * -EXT; RZ
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RM
.dc.l $fffe0000,$ffffffff,$ffffffff,$00000000 * -EXT; RP
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RN
.dc.l $c07e0000,$ffffff00,$00000000,$00000000 * -SGL; RZ
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RM
.dc.l $c07e0000,$ffffff00,$00000000,$00000000 * -SGL; RP
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RN
.dc.l $c3fe0000,$ffffffff,$fffff800,$00000000 * -DBL; RZ
.dc.l $ffff0000,$00000000,$00000000,$00000000 * -INF; RM
.dc.l $c3fe0000,$ffffffff,$fffff800,$00000000 * -DBL; RP
*########################################################################
* XDEF **************************************************************** #
* get_packed(): fetch a packed operand from memory and then #
* convert it to a floating-point binary number. #
* #
* XREF **************************************************************** #
* _dcalc_ea() - calculate the correct <ea> #
* _mem_read() - fetch the packed operand from memory #
* facc_in_x() - the fetch failed so jump to special exit code #
* decbin() - convert packed to binary extended precision #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* If no failure on _mem_read(): #
* FP_SRC(a6) = packed operand now as a binary FP number #
* #
* ALGORITHM *********************************************************** #
* Get the correct <ea> whihc is the value on the exception stack #
* frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
* Then, fetch the operand from memory. If the fetch fails, exit #
* through facc_in_x(). #
* If the packed operand is a ZERO,NAN, or INF, convert it to #
* its binary representation here. Else, call decbin() which will #
* convert the packed value to an extended precision binary value. #
* #
*########################################################################
* the stacked <ea> for packed is correct except for -(An).
* the base reg must be updated for both -(An) and (An)+.
global get_packed
get_packed:
move.l #$c,d0 * packed is 12 bytes
bsr.l _dcalc_ea * fetch <ea>; correct An
lea FP_SRC(a6),a1 * pass: ptr to super dst
move.l #$c,d0 * pass: 12 bytes
bsr.l _dmem_read * read packed operand
tst.l d1 * did dfetch fail?
bne.l facc_in_x * yes
* The packed operand is an INF or a NAN if the exponent field is all ones.
bfextu FP_SRC(a6){#1:#15},d0 * get exp
cmpi.w #$7fff,d0 * INF or NAN?
bne.b gp_try_zero * no
rts * operand is an INF or NAN
* The packed operand is a zero if the mantissa is all zero, else it's
* a normal packed op.
gp_try_zero:
move.b 3+FP_SRC(a6),d0 * get byte 4
andi.b #$0f,d0 * clear all but last nybble
bne.b gp_not_spec * not a zero
tst.l FP_SRC_HI(a6) * is lw 2 zero?
bne.b gp_not_spec * not a zero
tst.l FP_SRC_LO(a6) * is lw 3 zero?
bne.b gp_not_spec * not a zero
rts * operand is a ZERO
gp_not_spec:
lea FP_SRC(a6),a0 * pass: ptr to packed op
bsr.l decbin * convert to extended
fmovem.x fp0,FP_SRC(a6) * make this the srcop
rts
*########################################################################
* decbin(): Converts normalized packed bcd value pointed to by register #
* a0 to extended-precision value in fp0. #
* #
* INPUT *************************************************************** #
* a0 = pointer to normalized packed bcd value #
* #
* OUTPUT ************************************************************** #
* fp0 = exact fp representation of the packed bcd value. #
* #
* ALGORITHM *********************************************************** #
* Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
* and NaN operands are dispatched without entering this routine) #
* value in 68881/882 format at location (a0). #
* #
* A1. Convert the bcd exponent to binary by successive adds and #
* muls. Set the sign according to SE. Subtract 16 to compensate #
* for the mantissa which is to be interpreted as 17 integer #
* digits, rather than 1 integer and 16 fraction digits. #
* Note: this operation can never overflow. #
* #
* A2. Convert the bcd mantissa to binary by successive #
* adds and muls in FP0. Set the sign according to SM. #
* The mantissa digits will be converted with the decimal point #
* assumed following the least-significant digit. #
* Note: this operation can never overflow. #
* #
* A3. Count the number of leading/trailing zeros in the #
* bcd string. If SE is positive, count the leading zeros; #
* if negative, count the trailing zeros. Set the adjusted #
* exponent equal to the exponent from A1 and the zero count #
* added if SM = 1 and subtracted if SM = 0. Scale the #
* mantissa the equivalent of forcing in the bcd value: #
* #
* SM = 0 a non-zero digit in the integer position #
* SM = 1 a non-zero digit in Mant0, lsd of the fraction #
* #
* this will insure that any value, regardless of its #
* representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
* consistently. #
* #
* A4. Calculate the factor 10^exp in FP1 using a table of #
* 10^(2^n) values. To reduce the error in forming factors #
* greater than 10^27, a directed rounding scheme is used with #
* tables rounded to RN, RM, and RP, according to the table #
* in the comments of the pwrten section. #
* #
* A5. Form the final binary number by scaling the mantissa by #
* the exponent factor. This is done by multiplying the #
* mantissa in FP0 by the factor in FP1 if the adjusted #
* exponent sign is positive, and dividing FP0 by FP1 if #
* it is negative. #
* #
* Clean up and return. Check if the final mul or div was inexact. #
* If so, set INEX1 in USER_FPSR. #
* #
*########################################################################
*
* PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
* to nearest, minus, and plus, respectively. The tables include
* 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
* is required until the power is greater than 27, however, all
* tables include the first 5 for ease of indexing.
*
RTABLE:
.dc.b 0,0,0,0
.dc.b 2,3,2,3
.dc.b 2,3,3,2
.dc.b 3,2,2,3
FNIBS set 7
FSTRT set 0
ESTRT set 4
EDIGITS set 2
global decbin
decbin:
move.l $0.w(a0),FP_SCR0_EX(a6) * make a copy of input
move.l $4(a0),FP_SCR0_HI(a6) * so we don't alter it
move.l $8(a0),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0
movem.l d2-d5,-(sp) * save d2-d5
fmovem.x fp0,-(sp) * save fp1
*
* Calculate exponent:
* 1. Copy bcd value in memory for use as a working copy.
* 2. Calculate absolute value of exponent in d1 by mul and add.
* 3. Correct for exponent sign.
* 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
* (i.e., all digits assumed left of the decimal point.)
*
* Register usage:
*
* calc_e:
* (*) d0: temp digit storage
* (*) d1: accumulator for binary exponent
* (*) d2: digit count
* (*) d3: offset pointer
* ( ) d4: first word of bcd
* ( ) a0: pointer to working bcd value
* ( ) a6: pointer to original bcd value
* (*) FP_SCR1: working copy of original bcd value
* (*) L_SCR1: copy of original exponent word
*
calc_e:
move.l #EDIGITS,d2 * # of nibbles (digits) in fraction part
move.l #ESTRT,d3 * counter to pick up digits
move.l (a0),d4 * get first word of bcd
clr.l d1 * zero d1 for accumulator
e_gd:
mulu.l #$a,d1 * mul partial product by one digit place
bfextu d4{d3:#4},d0 * get the digit and zero extend into d0
add.l d0,d1 * d1 = d1 + d0
addq.b #4,d3 * advance d3 to the next digit
dbf.w d2,e_gd * if we have used all 3 digits, exit loop
btst #30,d4 * get SE
beq.b e_pos * don't negate if pos
neg.l d1 * negate before subtracting
e_pos:
subi.l #16,d1 * sub to compensate for shift of mant
bge.b e_save * if still pos, do not neg
neg.l d1 * now negative, make pos and set SE
ori.l #$40000000,d4 * set SE in d4,
ori.l #$40000000,(a0) * and in working bcd
e_save:
move.l d1,-(sp) * save exp on stack
*
*
* Calculate mantissa:
* 1. Calculate absolute value of mantissa in fp0 by mul and add.
* 2. Correct for mantissa sign.
* (i.e., all digits assumed left of the decimal point.)
*
* Register usage:
*
* calc_m:
* (*) d0: temp digit storage
* (*) d1: lword counter
* (*) d2: digit count
* (*) d3: offset pointer
* ( ) d4: words 2 and 3 of bcd
* ( ) a0: pointer to working bcd value
* ( ) a6: pointer to original bcd value
* (*) fp0: mantissa accumulator
* ( ) FP_SCR1: working copy of original bcd value
* ( ) L_SCR1: copy of original exponent word
*
calc_m:
move.l #1,d1 * word counter, init to 1
fmove.s #$00000000,fp0 * accumulator
*
*
* Since the packed number has a long word between the first & second parts,
* get the integer digit then skip down & get the rest of the
* mantissa. We will unroll the loop once.
*
bfextu (a0){#28:#4},d0 * integer part is ls digit in long word
fadd.b d0,fp0 * add digit to sum in fp0
*
*
* Get the rest of the mantissa.
*
loadlw:
move.l (a0,d1.L*4),d4 * load mantissa lonqword into d4
move.l #FSTRT,d3 * counter to pick up digits
move.l #FNIBS,d2 * reset number of digits per a0 ptr
md2b:
fmul.s #$41200000,fp0 * fp0 = fp0 * 10
bfextu d4{d3:#4},d0 * get the digit and zero extend
fadd.b d0,fp0 * fp0 = fp0 + digit
*
*
* If all the digits (8) in that long word have been converted (d2=0),
* then inc d1 (=2) to point to the next long word and reset d3 to 0
* to initialize the digit offset, and set d2 to 7 for the digit count;
* else continue with this long word.
*
addq.b #4,d3 * advance d3 to the next digit
dbf.w d2,md2b * check for last digit in this lw
nextlw:
addq.l #1,d1 * inc lw pointer in mantissa
cmpi.l #2,d1 * test for last lw
ble.b loadlw * if not, get last one
*
* Check the sign of the mant and make the value in fp0 the same sign.
*
m_sign:
btst #31,(a0) * test sign of the mantissa
beq.b ap_st_z * if clear, go to append/strip zeros
fneg.x fp0 * if set, negate fp0
*
* Append/strip zeros:
*
* For adjusted exponents which have an absolute value greater than 27*,
* this routine calculates the amount needed to normalize the mantissa
* for the adjusted exponent. That number is subtracted from the exp
* if the exp was positive, and added if it was negative. The purpose
* of this is to reduce the value of the exponent and the possibility
* of error in calculation of pwrten.
*
* 1. Branch on the sign of the adjusted exponent.
* 2p.(positive exp)
* 2. Check M16 and the digits in lwords 2 and 3 in decending order.
* 3. Add one for each zero encountered until a non-zero digit.
* 4. Subtract the count from the exp.
* 5. Check if the exp has crossed zero in #3 above; make the exp abs
* and set SE.
* 6. Multiply the mantissa by 10**count.
* 2n.(negative exp)
* 2. Check the digits in lwords 3 and 2 in decending order.
* 3. Add one for each zero encountered until a non-zero digit.
* 4. Add the count to the exp.
* 5. Check if the exp has crossed zero in #3 above; clear SE.
* 6. Divide the mantissa by 10**count.
*
* *Why 27? If the adjusted exponent is within -28 < expA < 28, than
* any adjustment due to append/strip zeros will drive the resultane
* exponent towards zero. Since all pwrten constants with a power
* of 27 or less are exact, there is no need to use this routine to
* attempt to lessen the resultant exponent.
*
* Register usage:
*
* ap_st_z:
* (*) d0: temp digit storage
* (*) d1: zero count
* (*) d2: digit count
* (*) d3: offset pointer
* ( ) d4: first word of bcd
* (*) d5: lword counter
* ( ) a0: pointer to working bcd value
* ( ) FP_SCR1: working copy of original bcd value
* ( ) L_SCR1: copy of original exponent word
*
*
* First check the absolute value of the exponent to see if this
* routine is necessary. If so, then check the sign of the exponent
* and do append (+) or strip (-) zeros accordingly.
* This section handles a positive adjusted exponent.
*
ap_st_z:
move.l (sp),d1 * load expA for range test
cmpi.l #27,d1 * test is with 27
ble.w pwrten * if abs(expA) <28, skip ap/st zeros
btst #30,(a0) * check sign of exp
bne.b ap_st_n * if neg, go to neg side
clr.l d1 * zero count reg
move.l (a0),d4 * load lword 1 to d4
bfextu d4{#28:#4},d0 * get M16 in d0
bne.b ap_p_fx * if M16 is non-zero, go fix exp
addq.l #1,d1 * inc zero count
move.l #1,d5 * init lword counter
move.l (a0,d5.L*4),d4 * get lword 2 to d4
bne.b ap_p_cl * if lw 2 is zero, skip it
addq.l #8,d1 * and inc count by 8
addq.l #1,d5 * inc lword counter
move.l (a0,d5.L*4),d4 * get lword 3 to d4
ap_p_cl:
clr.l d3 * init offset reg
move.l #7,d2 * init digit counter
ap_p_gd:
bfextu d4{d3:#4},d0 * get digit
bne.b ap_p_fx * if non-zero, go to fix exp
addq.l #4,d3 * point to next digit
addq.l #1,d1 * inc digit counter
dbf.w d2,ap_p_gd * get next digit
ap_p_fx:
move.l d1,d0 * copy counter to d2
move.l (sp),d1 * get adjusted exp from memory
sub.l d0,d1 * subtract count from exp
bge.b ap_p_fm * if still pos, go to pwrten
neg.l d1 * now its neg; get abs
move.l (a0),d4 * load lword 1 to d4
ori.l #$40000000,d4 * and set SE in d4
ori.l #$40000000,(a0) * and in memory
*
* Calculate the mantissa multiplier to compensate for the striping of
* zeros from the mantissa.
*
ap_p_fm:
lea.l PTENRN.l(pc),a1 * get address of power-of-ten table
clr.l d3 * init table index
fmove.s #$3f800000,fp1 * init fp1 to 1
move.l #3,d2 * init d2 to count bits in counter
ap_p_el:
asr.l #1,d0 * shift lsb into carry
bcc.b ap_p_en * if 1, mul fp1 by pwrten factor
fmul.x (a1,d3.l),fp1 * mul by 10**(d3_bit_no)
ap_p_en:
addi.l #12,d3 * inc d3 to next rtable entry
tst.l d0 * check if d0 is zero
bne.b ap_p_el * if not, get next bit
fmul.x fp1,fp0 * mul mantissa by 10**(no_bits_shifted)
bra.b pwrten * go calc pwrten
*
* This section handles a negative adjusted exponent.
*
ap_st_n:
clr.l d1 * clr counter
move.l #2,d5 * set up d5 to point to lword 3
move.l (a0,d5.L*4),d4 * get lword 3
bne.b ap_n_cl * if not zero, check digits
sub.l #1,d5 * dec d5 to point to lword 2
addq.l #8,d1 * inc counter by 8
move.l (a0,d5.L*4),d4 * get lword 2
ap_n_cl:
move.l #28,d3 * point to last digit
move.l #7,d2 * init digit counter
ap_n_gd:
bfextu d4{d3:#4},d0 * get digit
bne.b ap_n_fx * if non-zero, go to exp fix
subq.l #4,d3 * point to previous digit
addq.l #1,d1 * inc digit counter
dbf.w d2,ap_n_gd * get next digit
ap_n_fx:
move.l d1,d0 * copy counter to d0
move.l (sp),d1 * get adjusted exp from memory
sub.l d0,d1 * subtract count from exp
bgt.b ap_n_fm * if still pos, go fix mantissa
neg.l d1 * take abs of exp and clr SE
move.l (a0),d4 * load lword 1 to d4
andi.l #$bfffffff,d4 * and clr SE in d4
andi.l #$bfffffff,(a0) * and in memory
*
* Calculate the mantissa multiplier to compensate for the appending of
* zeros to the mantissa.
*
ap_n_fm:
lea.l PTENRN.l(pc),a1 * get address of power-of-ten table
clr.l d3 * init table index
fmove.s #$3f800000,fp1 * init fp1 to 1
move.l #3,d2 * init d2 to count bits in counter
ap_n_el:
asr.l #1,d0 * shift lsb into carry
bcc.b ap_n_en * if 1, mul fp1 by pwrten factor
fmul.x (a1,d3.l),fp1 * mul by 10**(d3_bit_no)
ap_n_en:
addi.l #12,d3 * inc d3 to next rtable entry
tst.l d0 * check if d0 is zero
bne.b ap_n_el * if not, get next bit
fdiv.x fp1,fp0 * div mantissa by 10**(no_bits_shifted)
*
*
* Calculate power-of-ten factor from adjusted and shifted exponent.
*
* Register usage:
*
* pwrten:
* (*) d0: temp
* ( ) d1: exponent
* (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
* (*) d3: FPCR work copy
* ( ) d4: first word of bcd
* (*) a1: RTABLE pointer
* calc_p:
* (*) d0: temp
* ( ) d1: exponent
* (*) d3: PWRTxx table index
* ( ) a0: pointer to working copy of bcd
* (*) a1: PWRTxx pointer
* (*) fp1: power-of-ten accumulator
*
* Pwrten calculates the exponent factor in the selected rounding mode
* according to the following table:
*
* Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
*
* ANY ANY RN RN
*
* + + RP RP
* - + RP RM
* + - RP RM
* - - RP RP
*
* + + RM RM
* - + RM RP
* + - RM RP
* - - RM RM
*
* + + RZ RM
* - + RZ RM
* + - RZ RP
* - - RZ RP
*
*
pwrten:
move.l USER_FPCR(a6),d3 * get user's FPCR
bfextu d3{#26:#2},d2 * isolate rounding mode bits
move.l (a0),d4 * reload 1st bcd word to d4
asl.l #2,d2 * format d2 to be
bfextu d4{#0:#2},d0 * {FPCR[6],FPCR[5],SM,SE}
add.l d0,d2 * in d2 as index into RTABLE
lea.l RTABLE(pc),a1 * load rtable base
move.b (a1,d2.l),d0 * load new rounding bits from table
clr.l d3 * clear d3 to force no exc and extended
bfins d0,d3{#26:#2} * stuff new rounding bits in FPCR
fmove.l d3,fpcr * write new FPCR
asr.l #1,d0 * write correct PTENxx table
bcc.b not_rp * to a1
lea.l PTENRP.l(pc),a1 * it is RP
bra.b calc_p * go to init section
not_rp:
asr.l #1,d0 * keep checking
bcc.b not_rm
lea.l PTENRM.l(pc),a1 * it is RM
bra.b calc_p * go to init section
not_rm:
lea.l PTENRN.l(pc),a1 * it is RN
calc_p:
move.l d1,d0 * copy exp to d0;use d0
bpl.b no_neg * if exp is negative,
neg.l d0 * invert it
ori.l #$40000000,(a0) * and set SE bit
no_neg:
clr.l d3 * table index
fmove.s #$3f800000,fp1 * init fp1 to 1
e_loop:
asr.l #1,d0 * shift next bit into carry
bcc.b e_next * if zero, skip the mul
fmul.x (a1,d3.l),fp1 * mul by 10**(d3_bit_no)
e_next:
addi.l #12,d3 * inc d3 to next rtable entry
tst.l d0 * check if d0 is zero
bne.b e_loop * not zero, continue shifting
*
*
* Check the sign of the adjusted exp and make the value in fp0 the
* same sign. If the exp was pos then multiply fp1*fp0;
* else divide fp0/fp1.
*
* Register Usage:
* norm:
* ( ) a0: pointer to working bcd value
* (*) fp0: mantissa accumulator
* ( ) fp1: scaling factor - 10**(abs(exp))
*
pnorm:
btst #30,(a0) * test the sign of the exponent
beq.b mul * if clear, go to multiply
div:
fdiv.x fp1,fp0 * exp is negative, so divide mant by exp
bra.b end_dec
mul:
fmul.x fp1,fp0 * exp is positive, so multiply by exp
*
*
* Clean up and return with result in fp0.
*
* If the final mul/div in decbin incurred an inex exception,
* it will be inex2, but will be reported as inex1 by get_op.
*
end_dec:
fmove.l fpsr,d0 * get status register
bclr #inex2_bit+8,d0 * test for inex2 and clear it
beq.b no_exc * skip this if no exc
ori.w #inx1a_mask,2+USER_FPSR(a6) * set INEX1/AINEX
no_exc:
add.l #$4,sp * clear 1 lw param
fmovem.x (sp)+,fp1 * restore fp1
movem.l (sp)+,d2-d5 * restore d2-d5
fmove.l #$0,fpcr
fmove.l #$0,fpsr
rts
*########################################################################
* bindec(): Converts an input in extended precision format to bcd format#
* #
* INPUT *************************************************************** #
* a0 = pointer to the input extended precision value in memory. #
* the input may be either normalized, unnormalized, or #
* denormalized. #
* d0 = contains the k-factor sign-extended to 32-bits. #
* #
* OUTPUT ************************************************************** #
* FP_SCR0(a6) = bcd format result on the stack. #
* #
* ALGORITHM *********************************************************** #
* #
* A1. Set RM and size ext; Set SIGMA = sign of input. #
* The k-factor is saved for use in d7. Clear the #
* BINDEC_FLG for separating normalized/denormalized #
* input. If input is unnormalized or denormalized, #
* normalize it. #
* #
* A2. Set X = abs(input). #
* #
* A3. Compute ILOG. #
* ILOG is the log base 10 of the input value. It is #
* approximated by adding e + 0.f when the original #
* value is viewed as 2^^e * 1.f in extended precision. #
* This value is stored in d6. #
* #
* A4. Clr INEX bit. #
* The operation in A3 above may have set INEX2. #
* #
* A5. Set ICTR = 0; #
* ICTR is a flag used in A13. It must be set before the #
* loop entry A6. #
* #
* A6. Calculate LEN. #
* LEN is the number of digits to be displayed. The #
* k-factor can dictate either the total number of digits, #
* if it is a positive number, or the number of digits #
* after the decimal point which are to be included as #
* significant. See the 68882 manual for examples. #
* If LEN is computed to be greater than 17, set OPERR in #
* USER_FPSR. LEN is stored in d4. #
* #
* A7. Calculate SCALE. #
* SCALE is equal to 10^ISCALE, where ISCALE is the number #
* of decimal places needed to insure LEN integer digits #
* in the output before conversion to bcd. LAMBDA is the #
* sign of ISCALE, used in A9. Fp1 contains #
* 10^^(abs(ISCALE)) using a rounding mode which is a #
* function of the original rounding mode and the signs #
* of ISCALE and X. A table is given in the code. #
* #
* A8. Clr INEX; Force RZ. #
* The operation in A3 above may have set INEX2. #
* RZ mode is forced for the scaling operation to insure #
* only one rounding error. The grs bits are collected in #
* the INEX flag for use in A10. #
* #
* A9. Scale X -> Y. #
* The mantissa is scaled to the desired number of #
* significant digits. The excess digits are collected #
* in INEX2. #
* #
* A10. Or in INEX. #
* If INEX is set, round error occured. This is #
* compensated for by 'or-ing' in the INEX2 flag to #
* the lsb of Y. #
* #
* A11. Restore original FPCR; set size ext. #
* Perform FINT operation in the user's rounding mode. #
* Keep the size to extended. #
* #
* A12. Calculate YINT = FINT(Y) according to user's rounding #
* mode. The FPSP routine sintd0 is used. The output #
* is in fp0. #
* #
* A13. Check for LEN digits. #
* If the int operation results in more than LEN digits, #
* or less than LEN -1 digits, adjust ILOG and repeat from #
* A6. This test occurs only on the first pass. If the #
* result is exactly 10^LEN, decrement ILOG and divide #
* the mantissa by 10. #
* #
* A14. Convert the mantissa to bcd. #
* The binstr routine is used to convert the LEN digit #
* mantissa to bcd in memory. The input to binstr is #
* to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
* such that the decimal point is to the left of bit 63. #
* The bcd digits are stored in the correct position in #
* the final string area in memory. #
* #
* A15. Convert the exponent to bcd. #
* As in A14 above, the exp is converted to bcd and the #
* digits are stored in the final string. #
* Test the length of the final exponent string. If the #
* length is 4, set operr. #
* #
* A16. Write sign bits to final string. #
* #
*########################################################################
BINDEC_FLG set EXC_TEMP * DENORM flag
* Constants in extended precision
PLOG2:
.dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000
PLOG2UP1:
.dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000
* Constants in single precision
FONE:
.dc.l $3F800000,$00000000,$00000000,$00000000
FTWO:
.dc.l $40000000,$00000000,$00000000,$00000000
FTEN:
.dc.l $41200000,$00000000,$00000000,$00000000
F4933:
.dc.l $459A2800,$00000000,$00000000,$00000000
RBDTBL:
.dc.b 0,0,0,0
.dc.b 3,3,2,2
.dc.b 3,2,2,3
.dc.b 2,3,3,2
* Implementation Notes:
*
* The registers are used as follows:
*
* d0: scratch; LEN input to binstr
* d1: scratch
* d2: upper 32-bits of mantissa for binstr
* d3: scratch;lower 32-bits of mantissa for binstr
* d4: LEN
* d5: LAMBDA/ICTR
* d6: ILOG
* d7: k-factor
* a0: ptr for original operand/final result
* a1: scratch pointer
* a2: pointer to FP_X; abs(original value) in ext
* fp0: scratch
* fp1: scratch
* fp2: scratch
* F_SCR1:
* F_SCR2:
* L_SCR1:
* L_SCR2:
global bindec
bindec:
movem.l d2-d7/a2,-(sp) * {%d2-%d7/%a2}
fmovem.x fp0-fp2,-(sp) * {%fp0-%fp2}
* A1. Set RM and size ext. Set SIGMA = sign input;
* The k-factor is saved for use in d7. Clear BINDEC_FLG for
* separating normalized/denormalized input. If the input
* is a denormalized number, set the BINDEC_FLG memory word
* to signal denorm. If the input is unnormalized, normalize
* the input and test for denormalized result.
*
fmove.l #rm_mode*$10,fpcr * set RM and ext
move.l (a0),L_SCR2(a6) * save exponent for sign check
move.l d0,d7 * move k-factor to d7
clr.b BINDEC_FLG(a6) * clr norm/denorm flag
cmpi.b #DENORM,STAG(a6) * is input a DENORM?
bne.w A2_str * no; input is a NORM
*
* Normalize the denorm
*
un_de_norm:
move.w (a0),d0
andi.w #$7fff,d0 * strip sign of normalized exp
move.l 4(a0),d1
move.l 8(a0),d2
norm_loop:
subq.w #1,d0
lsl.l #1,d2
roxl.l #1,d1
tst.l d1
bge.b norm_loop
*
* Test if the normalized input is denormalized
*
tst.w d0
bgt.b pos_exp * if greater than zero, it is a norm
st BINDEC_FLG(a6) * set flag for denorm
pos_exp:
andi.w #$7fff,d0 * strip sign of normalized exp
move.w d0,(a0)
move.l d1,4(a0)
move.l d2,8(a0)
* A2. Set X = abs(input).
*
A2_str:
move.l (a0),FP_SCR1(a6) * move input to work space
move.l 4(a0),FP_SCR1+4(a6) * move input to work space
move.l 8(a0),FP_SCR1+8(a6) * move input to work space
andi.l #$7fffffff,FP_SCR1(a6) * create abs(X)
* A3. Compute ILOG.
* ILOG is the log base 10 of the input value. It is approx-
* imated by adding e + 0.f when the original value is viewed
* as 2^^e * 1.f in extended precision. This value is stored
* in d6.
*
* Register usage:
* Input/Output
* d0: k-factor/exponent
* d2: x/x
* d3: x/x
* d4: x/x
* d5: x/x
* d6: x/ILOG
* d7: k-factor/Unchanged
* a0: ptr for original operand/final result
* a1: x/x
* a2: x/x
* fp0: x/float(ILOG)
* fp1: x/x
* fp2: x/x
* F_SCR1:x/x
* F_SCR2:Abs(X)/Abs(X) with $3fff exponent
* L_SCR1:x/x
* L_SCR2:first word of X packed/Unchanged
tst.b BINDEC_FLG(a6) * check for denorm
beq.b A3_cont * if clr, continue with norm
move.l #-4933,d6 * force ILOG = -4933
bra.b A4_str
A3_cont:
move.w FP_SCR1(a6),d0 * move exp to d0
move.w #$3fff,FP_SCR1(a6) * replace exponent with 0x3fff
fmove.x FP_SCR1(a6),fp0 * now fp0 has 1.f
subi.w #$3fff,d0 * strip off bias
fadd.w d0,fp0 * add in exp
fsub.s FONE(pc),fp0 * subtract off 1.0
fbge.w pos_res * if pos, branch
fmul.x PLOG2UP1(pc),fp0 * if neg, mul by LOG2UP1
fmove.l fp0,d6 * put ILOG in d6 as a lword
bra.b A4_str * go move out ILOG
pos_res:
fmul.x PLOG2(pc),fp0 * if pos, mul by LOG2
fmove.l fp0,d6 * put ILOG in d6 as a lword
* A4. Clr INEX bit.
* The operation in A3 above may have set INEX2.
A4_str:
fmove.l #0,fpsr * zero all of fpsr - nothing needed
* A5. Set ICTR = 0;
* ICTR is a flag used in A13. It must be set before the
* loop entry A6. The lower word of d5 is used for ICTR.
clr.w d5 * clear ICTR
* A6. Calculate LEN.
* LEN is the number of digits to be displayed. The k-factor
* can dictate either the total number of digits, if it is
* a positive number, or the number of digits after the
* original decimal point which are to be included as
* significant. See the 68882 manual for examples.
* If LEN is computed to be greater than 17, set OPERR in
* USER_FPSR. LEN is stored in d4.
*
* Register usage:
* Input/Output
* d0: exponent/Unchanged
* d2: x/x/scratch
* d3: x/x
* d4: exc picture/LEN
* d5: ICTR/Unchanged
* d6: ILOG/Unchanged
* d7: k-factor/Unchanged
* a0: ptr for original operand/final result
* a1: x/x
* a2: x/x
* fp0: float(ILOG)/Unchanged
* fp1: x/x
* fp2: x/x
* F_SCR1:x/x
* F_SCR2:Abs(X) with $3fff exponent/Unchanged
* L_SCR1:x/x
* L_SCR2:first word of X packed/Unchanged
A6_str:
tst.l d7 * branch on sign of k
ble.b k_neg * if k <= 0, LEN = ILOG + 1 - k
move.l d7,d4 * if k > 0, LEN = k
bra.b len_ck * skip to LEN check
k_neg:
move.l d6,d4 * first load ILOG to d4
sub.l d7,d4 * subtract off k
addq.l #1,d4 * add in the 1
len_ck:
tst.l d4 * LEN check: branch on sign of LEN
ble.b LEN_ng * if neg, set LEN = 1
cmpi.l #17,d4 * test if LEN > 17
ble.b A7_str * if not, forget it
move.l #17,d4 * set max LEN = 17
tst.l d7 * if negative, never set OPERR
ble.b A7_str * if positive, continue
ori.l #opaop_mask,USER_FPSR(a6) * set OPERR & AIOP in USER_FPSR
bra.b A7_str * finished here
LEN_ng:
move.l #1,d4 * min LEN is 1
* A7. Calculate SCALE.
* SCALE is equal to 10^ISCALE, where ISCALE is the number
* of decimal places needed to insure LEN integer digits
* in the output before conversion to bcd. LAMBDA is the sign
* of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
* the rounding mode as given in the following table (see
* Coonen, p. 7.23 as ref.; however, the SCALE variable is
* of opposite sign in bindec.sa from Coonen).
*
* Initial USE
* FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
* ----------------------------------------------
* RN 00 0 0 00/0 RN
* RN 00 0 1 00/0 RN
* RN 00 1 0 00/0 RN
* RN 00 1 1 00/0 RN
* RZ 01 0 0 11/3 RP
* RZ 01 0 1 11/3 RP
* RZ 01 1 0 10/2 RM
* RZ 01 1 1 10/2 RM
* RM 10 0 0 11/3 RP
* RM 10 0 1 10/2 RM
* RM 10 1 0 10/2 RM
* RM 10 1 1 11/3 RP
* RP 11 0 0 10/2 RM
* RP 11 0 1 11/3 RP
* RP 11 1 0 11/3 RP
* RP 11 1 1 10/2 RM
*
* Register usage:
* Input/Output
* d0: exponent/scratch - final is 0
* d2: x/0 or 24 for A9
* d3: x/scratch - offset ptr into PTENRM array
* d4: LEN/Unchanged
* d5: 0/ICTR:LAMBDA
* d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
* d7: k-factor/Unchanged
* a0: ptr for original operand/final result
* a1: x/ptr to PTENRM array
* a2: x/x
* fp0: float(ILOG)/Unchanged
* fp1: x/10^ISCALE
* fp2: x/x
* F_SCR1:x/x
* F_SCR2:Abs(X) with $3fff exponent/Unchanged
* L_SCR1:x/x
* L_SCR2:first word of X packed/Unchanged
A7_str:
tst.l d7 * test sign of k
bgt.b k_pos * if pos and > 0, skip this
cmp.l d6,d7 * test k - ILOG
blt.b k_pos * if ILOG >= k, skip this
move.l d7,d6 * if ((k<0) & (ILOG < k)) ILOG = k
k_pos:
move.l d6,d0 * calc ILOG + 1 - LEN in d0
addq.l #1,d0 * add the 1
sub.l d4,d0 * sub off LEN
swap d5 * use upper word of d5 for LAMBDA
clr.w d5 * set it zero initially
clr.w d2 * set up d2 for very small case
tst.l d0 * test sign of ISCALE
bge.b iscale * if pos, skip next inst
addq.w #1,d5 * if neg, set LAMBDA true
cmpi.l #$ffffecd4,d0 * test iscale <= -4908
bgt.b no_inf * if false, skip rest
addi.l #24,d0 * add in 24 to iscale
move.l #24,d2 * put 24 in d2 for A9
no_inf:
neg.l d0 * and take abs of ISCALE
iscale:
fmove.s FONE(pc),fp1 * init fp1 to 1
bfextu USER_FPCR(a6){#26:#2},d1 * get initial rmode bits
lsl.w #1,d1 * put them in bits 2:1
add.w d5,d1 * add in LAMBDA
lsl.w #1,d1 * put them in bits 3:1
tst.l L_SCR2(a6) * test sign of original x
bge.b x_pos * if pos, don't set bit 0
addq.l #1,d1 * if neg, set bit 0
x_pos:
lea.l RBDTBL(pc),a2 * load rbdtbl base
move.b (a2,d1.l),d3 * load d3 with new rmode
lsl.l #4,d3 * put bits in proper position
fmove.l d3,fpcr * load bits into fpu
lsr.l #4,d3 * put bits in proper position
tst.b d3 * decode new rmode for pten table
bne.b not_rn * if zero, it is RN
lea.l PTENRN.l(pc),a1 * load a1 with RN table base
bra.b rmode * exit decode
not_rn:
lsr.b #1,d3 * get lsb in carry
bcc.b not_rp2 * if carry clear, it is RM
lea.l PTENRP.l(pc),a1 * load a1 with RP table base
bra.b rmode * exit decode
not_rp2:
lea.l PTENRM.l(pc),a1 * load a1 with RM table base
rmode:
clr.l d3 * clr table index
e_loop2:
lsr.l #1,d0 * shift next bit into carry
bcc.b e_next2 * if zero, skip the mul
fmul.x (a1,d3.l),fp1 * mul by 10**(d3_bit_no)
e_next2:
addi.l #12,d3 * inc d3 to next pwrten table entry
tst.l d0 * test if ISCALE is zero
bne.b e_loop2 * if not, loop
* A8. Clr INEX; Force RZ.
* The operation in A3 above may have set INEX2.
* RZ mode is forced for the scaling operation to insure
* only one rounding error. The grs bits are collected in
* the INEX flag for use in A10.
*
* Register usage:
* Input/Output
fmove.l #0,fpsr * clr INEX
fmove.l #rz_mode*$10,fpcr * set RZ rounding mode
* A9. Scale X -> Y.
* The mantissa is scaled to the desired number of significant
* digits. The excess digits are collected in INEX2. If mul,
* Check d2 for excess 10 exponential value. If not zero,
* the iscale value would have caused the pwrten calculation
* to overflow. Only a negative iscale can cause this, so
* multiply by 10^(d2), which is now only allowed to be 24,
* with a multiply by 10^8 and 10^16, which is exact since
* 10^24 is exact. If the input was denormalized, we must
* create a busy stack frame with the mul command and the
* two operands, and allow the fpu to complete the multiply.
*
* Register usage:
* Input/Output
* d0: FPCR with RZ mode/Unchanged
* d2: 0 or 24/unchanged
* d3: x/x
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA
* d6: ILOG/Unchanged
* d7: k-factor/Unchanged
* a0: ptr for original operand/final result
* a1: ptr to PTENRM array/Unchanged
* a2: x/x
* fp0: float(ILOG)/X adjusted for SCALE (Y)
* fp1: 10^ISCALE/Unchanged
* fp2: x/x
* F_SCR1:x/x
* F_SCR2:Abs(X) with $3fff exponent/Unchanged
* L_SCR1:x/x
* L_SCR2:first word of X packed/Unchanged
A9_str:
fmove.x (a0),fp0 * load X from memory
fabs.x fp0 * use abs(X)
tst.w d5 * LAMBDA is in lower word of d5
bne.b sc_mul * if neg (LAMBDA = 1), scale by mul
fdiv.x fp1,fp0 * calculate X / SCALE -> Y to fp0
bra.w A10_st * branch to A10
sc_mul:
tst.b BINDEC_FLG(a6) * check for denorm
beq.w A9_norm * if norm, continue with mul
* for DENORM, we must calculate:
* fp0 = input_op * 10^ISCALE * 10^24
* since the input operand is a DENORM, we can't multiply it directly.
* so, we do the multiplication of the exponents and mantissas separately.
* in this way, we avoid underflow on intermediate stages of the
* multiplication and guarantee a result without exception.
fmovem.x fp1,-(sp) * save 10^ISCALE to stack
move.w (sp),d3 * grab exponent
andi.w #$7fff,d3 * clear sign
ori.w #$8000,(a0) * make DENORM exp negative
add.w (a0),d3 * add DENORM exp to 10^ISCALE exp
subi.w #$3fff,d3 * subtract BIAS
add.w 36(a1),d3
subi.w #$3fff,d3 * subtract BIAS
add.w 48(a1),d3
subi.w #$3fff,d3 * subtract BIAS
bmi.w sc_mul_err * is result is DENORM, punt!!!
andi.w #$8000,(sp) * keep sign
or.w d3,(sp) * insert new exponent
andi.w #$7fff,(a0) * clear sign bit on DENORM again
move.l $8(a0),-(sp) * put input op mantissa on stk
move.l $4(a0),-(sp)
move.l #$3fff0000,-(sp) * force exp to zero
fmovem.x (sp)+,fp0 * load normalized DENORM into fp0
fmul.x (sp)+,fp0
* fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
* fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
move.l 36+8(a1),-(sp) * get 10^8 mantissa
move.l 36+4(a1),-(sp)
move.l #$3fff0000,-(sp) * force exp to zero
move.l 48+8(a1),-(sp) * get 10^16 mantissa
move.l 48+4(a1),-(sp)
move.l #$3fff0000,-(sp) * force exp to zero
fmul.x (sp)+,fp0 * multiply fp0 by 10^8
fmul.x (sp)+,fp0 * multiply fp0 by 10^16
bra.b A10_st
sc_mul_err:
bra.b sc_mul_err
A9_norm:
tst.w d2 * test for small exp case
beq.b A9_con * if zero, continue as normal
fmul.x 36(a1),fp0 * multiply fp0 by 10^8
fmul.x 48(a1),fp0 * multiply fp0 by 10^16
A9_con:
fmul.x fp1,fp0 * calculate X * SCALE -> Y to fp0
* A10. Or in INEX.
* If INEX is set, round error occured. This is compensated
* for by 'or-ing' in the INEX2 flag to the lsb of Y.
*
* Register usage:
* Input/Output
* d0: FPCR with RZ mode/FPSR with INEX2 isolated
* d2: x/x
* d3: x/x
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA
* d6: ILOG/Unchanged
* d7: k-factor/Unchanged
* a0: ptr for original operand/final result
* a1: ptr to PTENxx array/Unchanged
* a2: x/ptr to FP_SCR1(a6)
* fp0: Y/Y with lsb adjusted
* fp1: 10^ISCALE/Unchanged
* fp2: x/x
A10_st:
fmove.l fpsr,d0 * get FPSR
fmove.x fp0,FP_SCR1(a6) * move Y to memory
lea.l FP_SCR1(a6),a2 * load a2 with ptr to FP_SCR1
btst #9,d0 * check if INEX2 set
beq.b A11_st * if clear, skip rest
ori.l #1,8(a2) * or in 1 to lsb of mantissa
fmove.x FP_SCR1(a6),fp0 * write adjusted Y back to fpu
* A11. Restore original FPCR; set size ext.
* Perform FINT operation in the user's rounding mode. Keep
* the size to extended. The sintdo entry point in the sint
* routine expects the FPCR value to be in USER_FPCR for
* mode and precision. The original FPCR is saved in L_SCR1.
A11_st:
move.l USER_FPCR(a6),L_SCR1(a6) * save it for later
andi.l #$00000030,USER_FPCR(a6) * set size to ext,
* ;block exceptions
* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
* The FPSP routine sintd0 is used. The output is in fp0.
*
* Register usage:
* Input/Output
* d0: FPSR with AINEX cleared/FPCR with size set to ext
* d2: x/x/scratch
* d3: x/x
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA/Unchanged
* d6: ILOG/Unchanged
* d7: k-factor/Unchanged
* a0: ptr for original operand/src ptr for sintdo
* a1: ptr to PTENxx array/Unchanged
* a2: ptr to FP_SCR1(a6)/Unchanged
* a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
* fp0: Y/YINT
* fp1: 10^ISCALE/Unchanged
* fp2: x/x
* F_SCR1:x/x
* F_SCR2:Y adjusted for inex/Y with original exponent
* L_SCR1:x/original USER_FPCR
* L_SCR2:first word of X packed/Unchanged
A12_st:
movem.l d0-d1/a0-a1,-(sp) * save regs used by sintd0 {%d0-%d1/%a0-%a1}
move.l L_SCR1(a6),-(sp)
move.l L_SCR2(a6),-(sp)
lea.l FP_SCR1(a6),a0 * a0 is ptr to FP_SCR1(a6)
fmove.x fp0,(a0) * move Y to memory at FP_SCR1(a6)
tst.l L_SCR2(a6) * test sign of original operand
bge.b do_fint12 * if pos, use Y
ori.l #$80000000,(a0) * if neg, use -Y
do_fint12:
move.l USER_FPSR(a6),-(sp)
* bsr sintdo # sint routine returns int in fp0
fmove.l USER_FPCR(a6),fpcr
fmove.l #$0,fpsr * clear the AEXC bits!!!
*# mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
*# andi.l &0x00000030,%d0
*# fmov.l %d0,%fpcr
fint.x FP_SCR1(a6),fp0 * do fint()
fmove.l fpsr,d0
or.w d0,FPSR_EXCEPT(a6)
*# fmov.l &0x0,%fpcr
*# fmov.l %fpsr,%d0 # don't keep ccodes
*# or.w %d0,FPSR_EXCEPT(%a6)
move.b (sp),USER_FPSR(a6)
add.l #4,sp
move.l (sp)+,L_SCR2(a6)
move.l (sp)+,L_SCR1(a6)
movem.l (sp)+,d0-d1/a0-a1 * restore regs used by sint {%d0-%d1/%a0-%a1}
move.l L_SCR2(a6),FP_SCR1(a6) * restore original exponent
move.l L_SCR1(a6),USER_FPCR(a6) * restore user's FPCR
* A13. Check for LEN digits.
* If the int operation results in more than LEN digits,
* or less than LEN -1 digits, adjust ILOG and repeat from
* A6. This test occurs only on the first pass. If the
* result is exactly 10^LEN, decrement ILOG and divide
* the mantissa by 10. The calculation of 10^LEN cannot
* be inexact, since all powers of ten upto 10^27 are exact
* in extended precision, so the use of a previous power-of-ten
* table will introduce no error.
*
*
* Register usage:
* Input/Output
* d0: FPCR with size set to ext/scratch final = 0
* d2: x/x
* d3: x/scratch final = x
* d4: LEN/LEN adjusted
* d5: ICTR:LAMBDA/LAMBDA:ICTR
* d6: ILOG/ILOG adjusted
* d7: k-factor/Unchanged
* a0: pointer into memory for packed bcd string formation
* a1: ptr to PTENxx array/Unchanged
* a2: ptr to FP_SCR1(a6)/Unchanged
* fp0: int portion of Y/abs(YINT) adjusted
* fp1: 10^ISCALE/Unchanged
* fp2: x/10^LEN
* F_SCR1:x/x
* F_SCR2:Y with original exponent/Unchanged
* L_SCR1:original USER_FPCR/Unchanged
* L_SCR2:first word of X packed/Unchanged
A13_st:
swap d5 * put ICTR in lower word of d5
tst.w d5 * check if ICTR = 0
bne.l not_zr * if non-zero, go to second test
*
* Compute 10^(LEN-1)
*
fmove.s FONE(pc),fp2 * init fp2 to 1.0
move.l d4,d0 * put LEN in d0
subq.l #1,d0 * d0 = LEN -1
clr.l d3 * clr table index
l_loop:
lsr.l #1,d0 * shift next bit into carry
bcc.b l_next * if zero, skip the mul
fmul.x (a1,d3.l),fp2 * mul by 10**(d3_bit_no)
l_next:
addi.l #12,d3 * inc d3 to next pwrten table entry
tst.l d0 * test if LEN is zero
bne.b l_loop * if not, loop
*
* 10^LEN-1 is computed for this test and A14. If the input was
* denormalized, check only the case in which YINT > 10^LEN.
*
tst.b BINDEC_FLG(a6) * check if input was norm
beq.b A13_con * if norm, continue with checking
fabs.x fp0 * take abs of YINT
bra.l test_2
*
* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
*
A13_con:
fabs.x fp0 * take abs of YINT
fcmp.x fp2,fp0 * compare abs(YINT) with 10^(LEN-1)
fbge.w test_2 * if greater, do next test
subq.l #1,d6 * subtract 1 from ILOG
move.w #1,d5 * set ICTR
fmove.l #rm_mode*$10,fpcr * set rmode to RM
fmul.s FTEN(pc),fp2 * compute 10^LEN
bra.w A6_str * return to A6 and recompute YINT
test_2:
fmul.s FTEN(pc),fp2 * compute 10^LEN
fcmp.x fp2,fp0 * compare abs(YINT) with 10^LEN
fblt.w A14_st * if less, all is ok, go to A14
fbgt.w fix_ex * if greater, fix and redo
fdiv.s FTEN(pc),fp0 * if equal, divide by 10
addq.l #1,d6 * and inc ILOG
bra.b A14_st * and continue elsewhere
fix_ex:
addq.l #1,d6 * increment ILOG by 1
move.w #1,d5 * set ICTR
fmove.l #rm_mode*$10,fpcr * set rmode to RM
bra.w A6_str * return to A6 and recompute YINT
*
* Since ICTR <> 0, we have already been through one adjustment,
* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
* 10^LEN is again computed using whatever table is in a1 since the
* value calculated cannot be inexact.
*
not_zr:
fmove.s FONE(pc),fp2 * init fp2 to 1.0
move.l d4,d0 * put LEN in d0
clr.l d3 * clr table index
z_loop:
lsr.l #1,d0 * shift next bit into carry
bcc.b z_next * if zero, skip the mul
fmul.x (a1,d3.l),fp2 * mul by 10**(d3_bit_no)
z_next:
addi.l #12,d3 * inc d3 to next pwrten table entry
tst.l d0 * test if LEN is zero
bne.b z_loop * if not, loop
fabs.x fp0 * get abs(YINT)
fcmp.x fp2,fp0 * check if abs(YINT) = 10^LEN
fbne.w A14_st * if not, skip this
fdiv.s FTEN(pc),fp0 * divide abs(YINT) by 10
addq.l #1,d6 * and inc ILOG by 1
addq.l #1,d4 * and inc LEN
fmul.s FTEN(pc),fp2 * if LEN++, the get 10^^LEN
* A14. Convert the mantissa to bcd.
* The binstr routine is used to convert the LEN digit
* mantissa to bcd in memory. The input to binstr is
* to be a fraction; i.e. (mantissa)/10^LEN and adjusted
* such that the decimal point is to the left of bit 63.
* The bcd digits are stored in the correct position in
* the final string area in memory.
*
*
* Register usage:
* Input/Output
* d0: x/LEN call to binstr - final is 0
* d1: x/0
* d2: x/ms 32-bits of mant of abs(YINT)
* d3: x/ls 32-bits of mant of abs(YINT)
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA/LAMBDA:ICTR
* d6: ILOG
* d7: k-factor/Unchanged
* a0: pointer into memory for packed bcd string formation
* /ptr to first mantissa byte in result string
* a1: ptr to PTENxx array/Unchanged
* a2: ptr to FP_SCR1(a6)/Unchanged
* fp0: int portion of Y/abs(YINT) adjusted
* fp1: 10^ISCALE/Unchanged
* fp2: 10^LEN/Unchanged
* F_SCR1:x/Work area for final result
* F_SCR2:Y with original exponent/Unchanged
* L_SCR1:original USER_FPCR/Unchanged
* L_SCR2:first word of X packed/Unchanged
A14_st:
fmove.l #rz_mode*$10,fpcr * force rz for conversion
fdiv.x fp2,fp0 * divide abs(YINT) by 10^LEN
lea.l FP_SCR0(a6),a0
fmove.x fp0,(a0) * move abs(YINT)/10^LEN to memory
move.l 4(a0),d2 * move 2nd word of FP_RES to d2
move.l 8(a0),d3 * move 3rd word of FP_RES to d3
clr.l 4(a0) * zero word 2 of FP_RES
clr.l 8(a0) * zero word 3 of FP_RES
move.l (a0),d0 * move exponent to d0
swap d0 * put exponent in lower word
beq.b no_sft * if zero, don't shift
subi.l #$3ffd,d0 * sub bias less 2 to make fract
tst.l d0 * check if > 1
bgt.b no_sft * if so, don't shift
neg.l d0 * make exp positive
m_loop:
lsr.l #1,d2 * shift d2:d3 right, add 0s
roxr.l #1,d3 * the number of places
dbf.w d0,m_loop * given in d0
no_sft:
tst.l d2 * check for mantissa of zero
bne.b no_zr * if not, go on
tst.l d3 * continue zero check
beq.b zer_m * if zero, go directly to binstr
no_zr:
clr.l d1 * put zero in d1 for addx
addi.l #$00000080,d3 * inc at bit 7
addx.l d1,d2 * continue inc
andi.l #$ffffff80,d3 * strip off lsb not used by 882
zer_m:
move.l d4,d0 * put LEN in d0 for binstr call
addq.l #3,a0 * a0 points to M16 byte in result
bsr.l binstr * call binstr to convert mant
* A15. Convert the exponent to bcd.
* As in A14 above, the exp is converted to bcd and the
* digits are stored in the final string.
*
* Digits are stored in L_SCR1(a6) on return from BINDEC as:
*
* 32 16 15 0
* -----------------------------------------
* | 0 | e3 | e2 | e1 | e4 | X | X | X |
* -----------------------------------------
*
* And are moved into their proper places in FP_SCR0. If digit e4
* is non-zero, OPERR is signaled. In all cases, all 4 digits are
* written as specified in the 881/882 manual for packed decimal.
*
* Register usage:
* Input/Output
* d0: x/LEN call to binstr - final is 0
* d1: x/scratch (0);shift count for final exponent packing
* d2: x/ms 32-bits of exp fraction/scratch
* d3: x/ls 32-bits of exp fraction
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA/LAMBDA:ICTR
* d6: ILOG
* d7: k-factor/Unchanged
* a0: ptr to result string/ptr to L_SCR1(a6)
* a1: ptr to PTENxx array/Unchanged
* a2: ptr to FP_SCR1(a6)/Unchanged
* fp0: abs(YINT) adjusted/float(ILOG)
* fp1: 10^ISCALE/Unchanged
* fp2: 10^LEN/Unchanged
* F_SCR1:Work area for final result/BCD result
* F_SCR2:Y with original exponent/ILOG/10^4
* L_SCR1:original USER_FPCR/Exponent digits on return from binstr
* L_SCR2:first word of X packed/Unchanged
A15_st:
tst.b BINDEC_FLG(a6) * check for denorm
beq.b not_denorm
ftst.x fp0 * test for zero
fbeq.w den_zero * if zero, use k-factor or 4933
fmove.l d6,fp0 * float ILOG
fabs.x fp0 * get abs of ILOG
bra.b convrt
den_zero:
tst.l d7 * check sign of the k-factor
blt.b use_ilog * if negative, use ILOG
fmove.s F4933(pc),fp0 * force exponent to 4933
bra.b convrt * do it
use_ilog:
fmove.l d6,fp0 * float ILOG
fabs.x fp0 * get abs of ILOG
bra.b convrt
not_denorm:
ftst.x fp0 * test for zero
fbne.w not_zero * if zero, force exponent
fmove.s FONE(pc),fp0 * force exponent to 1
bra.b convrt * do it
not_zero:
fmove.l d6,fp0 * float ILOG
fabs.x fp0 * get abs of ILOG
convrt:
fdiv.x 24(a1),fp0 * compute ILOG/10^4
fmove.x fp0,FP_SCR1(a6) * store fp0 in memory
move.l 4(a2),d2 * move word 2 to d2
move.l 8(a2),d3 * move word 3 to d3
move.w (a2),d0 * move exp to d0
beq.b x_loop_fin * if zero, skip the shift
subi.w #$3ffd,d0 * subtract off bias
neg.w d0 * make exp positive
x_loop:
lsr.l #1,d2 * shift d2:d3 right
roxr.l #1,d3 * the number of places
dbf.w d0,x_loop * given in d0
x_loop_fin:
clr.l d1 * put zero in d1 for addx
addi.l #$00000080,d3 * inc at bit 6
addx.l d1,d2 * continue inc
andi.l #$ffffff80,d3 * strip off lsb not used by 882
move.l #4,d0 * put 4 in d0 for binstr call
lea.l L_SCR1(a6),a0 * a0 is ptr to L_SCR1 for exp digits
bsr.l binstr * call binstr to convert exp
move.l L_SCR1(a6),d0 * load L_SCR1 lword to d0
move.l #12,d1 * use d1 for shift count
lsr.l d1,d0 * shift d0 right by 12
bfins d0,FP_SCR0(a6){#4:#12} * put e3:e2:e1 in FP_SCR0
lsr.l d1,d0 * shift d0 right by 12
bfins d0,FP_SCR0(a6){#16:#4} * put e4 in FP_SCR0
tst.b d0 * check if e4 is zero
beq.b A16_st * if zero, skip rest
ori.l #opaop_mask,USER_FPSR(a6) * set OPERR & AIOP in USER_FPSR
* A16. Write sign bits to final string.
* Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
*
* Register usage:
* Input/Output
* d0: x/scratch - final is x
* d2: x/x
* d3: x/x
* d4: LEN/Unchanged
* d5: ICTR:LAMBDA/LAMBDA:ICTR
* d6: ILOG/ILOG adjusted
* d7: k-factor/Unchanged
* a0: ptr to L_SCR1(a6)/Unchanged
* a1: ptr to PTENxx array/Unchanged
* a2: ptr to FP_SCR1(a6)/Unchanged
* fp0: float(ILOG)/Unchanged
* fp1: 10^ISCALE/Unchanged
* fp2: 10^LEN/Unchanged
* F_SCR1:BCD result with correct signs
* F_SCR2:ILOG/10^4
* L_SCR1:Exponent digits on return from binstr
* L_SCR2:first word of X packed/Unchanged
A16_st:
clr.l d0 * clr d0 for collection of signs
andi.b #$0f,FP_SCR0(a6) * clear first nibble of FP_SCR0
tst.l L_SCR2(a6) * check sign of original mantissa
bge.b mant_p * if pos, don't set SM
move.l #2,d0 * move 2 in to d0 for SM
mant_p:
tst.l d6 * check sign of ILOG
bge.b wr_sgn * if pos, don't set SE
addq.l #1,d0 * set bit 0 in d0 for SE
wr_sgn:
bfins d0,FP_SCR0(a6){#0:#2} * insert SM and SE into FP_SCR0
* Clean up and restore all registers used.
fmove.l #0,fpsr * clear possible inex2/ainex bits
fmovem.x (sp)+,fp0-fp2 * {%fp0-%fp2}
movem.l (sp)+,d2-d7/a2 * {%d2-%d7/%a2}
rts
global PTENRN
PTENRN:
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59E * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D5 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CE0 * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8E * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C7 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C17 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE5 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979B * 10 ^ 4096
global PTENRP
PTENRP:
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59E * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D6 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CE0 * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8E * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C7 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C18 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE5 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979B * 10 ^ 4096
global PTENRM
PTENRM:
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59D * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D5 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CDF * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8D * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C6 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C17 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE4 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979A * 10 ^ 4096
*########################################################################
* binstr(): Converts a 64-bit binary integer to bcd. #
* #
* INPUT *************************************************************** #
* d2:d3 = 64-bit binary integer #
* d0 = desired length (LEN) #
* a0 = pointer to start in memory for bcd characters #
* (This pointer must point to byte 4 of the first #
* lword of the packed decimal memory string.) #
* #
* OUTPUT ************************************************************** #
* a0 = pointer to LEN bcd digits representing the 64-bit integer. #
* #
* ALGORITHM *********************************************************** #
* The 64-bit binary is assumed to have a decimal point before #
* bit 63. The fraction is multiplied by 10 using a mul by 2 #
* shift and a mul by 8 shift. The bits shifted out of the #
* msb form a decimal digit. This process is iterated until #
* LEN digits are formed. #
* #
* A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
* digit formed will be assumed the least significant. This is #
* to force the first byte formed to have a 0 in the upper 4 bits. #
* #
* A2. Beginning of the loop: #
* Copy the fraction in d2:d3 to d4:d5. #
* #
* A3. Multiply the fraction in d2:d3 by 8 using bit-field #
* extracts and shifts. The three msbs from d2 will go into d1. #
* #
* A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
* will be collected by the carry. #
* #
* A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
* into d2:d3. D1 will contain the bcd digit formed. #
* #
* A6. Test d7. If zero, the digit formed is the ms digit. If non- #
* zero, it is the ls digit. Put the digit in its place in the #
* upper word of d0. If it is the ls digit, write the word #
* from d0 to memory. #
* #
* A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
* #
*########################################################################
* Implementation Notes:
*
* The registers are used as follows:
*
* d0: LEN counter
* d1: temp used to form the digit
* d2: upper 32-bits of fraction for mul by 8
* d3: lower 32-bits of fraction for mul by 8
* d4: upper 32-bits of fraction for mul by 2
* d5: lower 32-bits of fraction for mul by 2
* d6: temp for bit-field extracts
* d7: byte digit formation word;digit count {0,1}
* a0: pointer into memory for packed bcd string formation
*
global binstr
binstr:
movem.l d0-d7,-(sp) * {%d0-%d7}
*
* A1: Init d7
*
move.l #1,d7 * init d7 for second digit
subq.l #1,d0 * for dbf d0 would have LEN+1 passes
*
* A2. Copy d2:d3 to d4:d5. Start loop.
*
loop:
move.l d2,d4 * copy the fraction before muls
move.l d3,d5 * to d4:d5
*
* A3. Multiply d2:d3 by 8; extract msbs into d1.
*
bfextu d2{#0:#3},d1 * copy 3 msbs of d2 into d1
asl.l #3,d2 * shift d2 left by 3 places
bfextu d3{#0:#3},d6 * copy 3 msbs of d3 into d6
asl.l #3,d3 * shift d3 left by 3 places
or.l d6,d2 * or in msbs from d3 into d2
*
* A4. Multiply d4:d5 by 2; add carry out to d1.
*
asl.l #1,d5 * mul d5 by 2
roxl.l #1,d4 * mul d4 by 2
swap d6 * put 0 in d6 lower word
addx.w d6,d1 * add in extend from mul by 2
*
* A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
*
add.l d5,d3 * add lower 32 bits
nop * ERRATA FIX #13 (Rev. 1.2 6/6/90)
addx.l d4,d2 * add with extend upper 32 bits
nop * ERRATA FIX #13 (Rev. 1.2 6/6/90)
addx.w d6,d1 * add in extend from add to d1
swap d6 * with d6 = 0; put 0 in upper word
*
* A6. Test d7 and branch.
*
tst.w d7 * if zero, store digit & to loop
beq.b first_d * if non-zero, form byte & write
sec_d:
swap d7 * bring first digit to word d7b
asl.w #4,d7 * first digit in upper 4 bits d7b
add.w d1,d7 * add in ls digit to d7b
move.b d7,(a0)+ * store d7b byte in memory
swap d7 * put LEN counter in word d7a
clr.w d7 * set d7a to signal no digits done
dbf.w d0,loop * do loop some more!
bra.b end_bstr * finished, so exit
first_d:
swap d7 * put digit word in d7b
move.w d1,d7 * put new digit in d7b
swap d7 * put LEN counter in word d7a
addq.w #1,d7 * set d7a to signal first digit done
dbf.w d0,loop * do loop some more!
swap d7 * put last digit in string
lsl.w #4,d7 * move it to upper 4 bits
move.b d7,(a0)+ * store it in memory string
*
* Clean up and return with result in fp0.
*
end_bstr:
movem.l (sp)+,d0-d7 * {%d0-%d7}
rts
*########################################################################
* XDEF **************************************************************** #
* facc_in_b(): dmem_read_byte failed #
* facc_in_w(): dmem_read_word failed #
* facc_in_l(): dmem_read_long failed #
* facc_in_d(): dmem_read of dbl prec failed #
* facc_in_x(): dmem_read of ext prec failed #
* #
* facc_out_b(): dmem_write_byte failed #
* facc_out_w(): dmem_write_word failed #
* facc_out_l(): dmem_write_long failed #
* facc_out_d(): dmem_write of dbl prec failed #
* facc_out_x(): dmem_write of ext prec failed #
* #
* XREF **************************************************************** #
* _real_access() - exit through access error handler #
* #
* INPUT *************************************************************** #
* None #
* #
* OUTPUT ************************************************************** #
* None #
* #
* ALGORITHM *********************************************************** #
* Flow jumps here when an FP data fetch call gets an error #
* result. This means the operating system wants an access error frame #
* made out of the current exception stack frame. #
* So, we first call restore() which makes sure that any updated #
* -(an)+ register gets returned to its pre-exception value and then #
* we change the stack to an acess error stack frame. #
* #
*########################################################################
facc_in_b:
moveq.l #$1,d0 * one byte
bsr.w restore * fix An
move.w #$0121,EXC_VOFF(a6) * set FSLW
bra.w facc_finish
facc_in_w:
moveq.l #$2,d0 * two bytes
bsr.w restore * fix An
move.w #$0141,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_in_l:
moveq.l #$4,d0 * four bytes
bsr.w restore * fix An
move.w #$0101,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_in_d:
moveq.l #$8,d0 * eight bytes
bsr.w restore * fix An
move.w #$0161,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_in_x:
moveq.l #$c,d0 * twelve bytes
bsr.w restore * fix An
move.w #$0161,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
*###############################################################
facc_out_b:
moveq.l #$1,d0 * one byte
bsr.w restore * restore An
move.w #$00a1,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_out_w:
moveq.l #$2,d0 * two bytes
bsr.w restore * restore An
move.w #$00c1,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_out_l:
moveq.l #$4,d0 * four bytes
bsr.w restore * restore An
move.w #$0081,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_out_d:
moveq.l #$8,d0 * eight bytes
bsr.w restore * restore An
move.w #$00e1,EXC_VOFF(a6) * set FSLW
bra.b facc_finish
facc_out_x:
move.l #$c,d0 * twelve bytes
bsr.w restore * restore An
move.w #$00e1,EXC_VOFF(a6) * set FSLW
* here's where we actually create the access error frame from the
* current exception stack frame.
facc_finish:
move.l USER_FPIAR(a6),EXC_PC(a6) * store current PC
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
move.l (sp),-(sp) * store SR, hi(PC)
move.l $8(sp),$4(sp) * store lo(PC)
move.l $c(sp),$8(sp) * store EA
move.l #$00000001,$c(sp) * store FSLW
move.w $6(sp),$c(sp) * fix FSLW (size)
move.w #$4008,$6(sp) * store voff
btst #$5,(sp) * supervisor or user mode?
beq.b facc_out2 * user
bset #$2,$d(sp) * set supervisor TM bit
facc_out2:
bra.l _real_access
*#################################################################
* if the effective addressing mode was predecrement or postincrement,
* the emulation has already changed its value to the correct post-
* instruction value. but since we're exiting to the access error
* handler, then AN must be returned to its pre-instruction value.
* we do that here.
restore:
move.b EXC_OPWORD+$1(a6),d1
andi.b #$38,d1 * extract opmode
cmpi.b #$18,d1 * postinc?
beq.w rest_inc
cmpi.b #$20,d1 * predec?
beq.w rest_dec
rts
rest_inc:
move.b EXC_OPWORD+$1(a6),d1
andi.w #$0007,d1 * fetch An
move.w (tbl_rest_inc.b,pc,d1.w*2),d1
jmp (tbl_rest_inc.b,pc,d1.w*1)
tbl_rest_inc:
.dc.w ri_a0-tbl_rest_inc
.dc.w ri_a1-tbl_rest_inc
.dc.w ri_a2-tbl_rest_inc
.dc.w ri_a3-tbl_rest_inc
.dc.w ri_a4-tbl_rest_inc
.dc.w ri_a5-tbl_rest_inc
.dc.w ri_a6-tbl_rest_inc
.dc.w ri_a7-tbl_rest_inc
ri_a0:
sub.l d0,EXC_DREGS+$8(a6) * fix stacked a0
rts
ri_a1:
sub.l d0,EXC_DREGS+$c(a6) * fix stacked a1
rts
ri_a2:
sub.l d0,a2 * fix a2
rts
ri_a3:
sub.l d0,a3 * fix a3
rts
ri_a4:
sub.l d0,a4 * fix a4
rts
ri_a5:
sub.l d0,a5 * fix a5
rts
ri_a6:
sub.l d0,(a6) * fix stacked a6
rts
* if it's a fmove out instruction, we don't have to fix a7
* because we hadn't changed it yet. if it's an opclass two
* instruction (data moved in) and the exception was in supervisor
* mode, then also also wasn't updated. if it was user mode, then
* restore the correct a7 which is in the USP currently.
ri_a7:
cmpi.b #$30,EXC_VOFF(a6) * move in or out?
bne.b ri_a7_done * out
btst #$5,EXC_SR(a6) * user or supervisor?
bne.b ri_a7_done * supervisor
movec usp,a0 * restore USP
sub.l d0,a0
movec a0,usp
ri_a7_done:
rts
* need to invert adjustment value if the <ea> was predec
rest_dec:
neg.l d0
bra.b rest_inc