misc/060tsys/t28fpsp.s (1/3)
1 2 3;----------------------------------------------------------------
;
; 68060 FLOATING-POINT SOFTWARE PACKAGE
;
;----------------------------------------------------------------
.cpu 68060
.xdef _060FPSP_TABLE
.xdef _fpsp_snan ;_060FPSP_TABLE+$00
;_060FPSP_TABLE+$00001742
.xdef _fpsp_operr ;_060FPSP_TABLE+$08
;_060FPSP_TABLE+$000015FE
.xdef _fpsp_ovfl ;_060FPSP_TABLE+$10
;_060FPSP_TABLE+$000002C8
.xdef _fpsp_unfl ;_060FPSP_TABLE+$18
;_060FPSP_TABLE+$0000048A
.xdef _fpsp_dz ;_060FPSP_TABLE+$20
;_060FPSP_TABLE+$00001B32
.xdef _fpsp_inex ;_060FPSP_TABLE+$28
;_060FPSP_TABLE+$000019D4
.xdef _fpsp_fline ;_060FPSP_TABLE+$30
;_060FPSP_TABLE+$00001B8C
.xdef _fpsp_unsupp ;_060FPSP_TABLE+$38
;_060FPSP_TABLE+$00000668
.xdef _fpsp_effadd ;_060FPSP_TABLE+$40
;_060FPSP_TABLE+$0000106E
.xdef sacos ;_060FPSP_TABLE+$00003D42
.xdef sasin ;_060FPSP_TABLE+$00003C8E
.xdef satan ;_060FPSP_TABLE+$00003A4A
.xdef satanh ;_060FPSP_TABLE+$000056B2
.xdef scos ;_060FPSP_TABLE+$00002404
.xdef scosh ;_060FPSP_TABLE+$00004778
.xdef setox ;_060FPSP_TABLE+$00004272
.xdef setoxm1 ;_060FPSP_TABLE+$00004442
.xdef sgetexp ;_060FPSP_TABLE+$000046FE
.xdef sgetman ;_060FPSP_TABLE+$00004732
.xdef slog10 ;_060FPSP_TABLE+$0000575A
.xdef slog2 ;_060FPSP_TABLE+$000057AA
.xdef slogn ;_060FPSP_TABLE+$00005318
.xdef slognp1 ;_060FPSP_TABLE+$00005558
.xdef smod_snorm ;_060FPSP_TABLE+$00006C36
.xdef srem_snorm ;_060FPSP_TABLE+$00006D3E
.xdef sscale_snorm ;_060FPSP_TABLE+$00006DE0
.xdef ssin ;_060FPSP_TABLE+$000023FA
.xdef ssincos ;_060FPSP_TABLE+$000025F2
.xdef ssinh ;_060FPSP_TABLE+$00004836
.xdef stan ;_060FPSP_TABLE+$00002EBC
.xdef stanh ;_060FPSP_TABLE+$0000491A
.xdef stentox ;_060FPSP_TABLE+$00005D8C
.xdef stwotox ;_060FPSP_TABLE+$00005C94
global .macro
.endm
;----------------------------------------------------------------
;FPSPコールアウトテーブル
_FP_CALL_TOP::
.dc.l _060_real_bsun-_FP_CALL_TOP
.dc.l _060_real_snan-_FP_CALL_TOP
.dc.l _060_real_operr-_FP_CALL_TOP
.dc.l _060_real_ovfl-_FP_CALL_TOP
.dc.l _060_real_unfl-_FP_CALL_TOP
.dc.l _060_real_dz-_FP_CALL_TOP
.dc.l _060_real_inex-_FP_CALL_TOP
.dc.l _060_real_fline-_FP_CALL_TOP
.dc.l _060_real_fpu_disabled-_FP_CALL_TOP
.dc.l _060_real_trap-_FP_CALL_TOP
.dc.l _060_real_trace-_FP_CALL_TOP
.dc.l _060_real_access-_FP_CALL_TOP
.dc.l _060_fpsp_done-_FP_CALL_TOP
.dc.l 0
.dc.l 0
.dc.l 0
.dc.l _060_imem_read-_FP_CALL_TOP
.dc.l _060_dmem_read-_FP_CALL_TOP
.dc.l _060_dmem_write-_FP_CALL_TOP
.dc.l _060_imem_read_word-_FP_CALL_TOP
.dc.l _060_imem_read_long-_FP_CALL_TOP
.dc.l _060_dmem_read_byte-_FP_CALL_TOP
.dc.l _060_dmem_read_word-_FP_CALL_TOP
.dc.l _060_dmem_read_long-_FP_CALL_TOP
.dc.l _060_dmem_write_byte-_FP_CALL_TOP
.dc.l _060_dmem_write_word-_FP_CALL_TOP
.dc.l _060_dmem_write_long-_FP_CALL_TOP
.dc.l 0
.dc.l 0
.dc.l 0
.dc.l 0
.dc.l 0
;----------------------------------------------------------------
;FPSP本体
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
* M68000 Hi-Performance Microprocessor Division
* M68060 Software Package Production Release
*
* M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
* All rights reserved.
*
* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
* To the maximum extent permitted by applicable law,
* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
* FOR A PARTICULAR PURPOSE and any warranty against infringement with
* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
* and any accompanying written materials.
*
* To the maximum extent permitted by applicable law,
* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
* BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
* ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
*
* Motorola assumes no responsibility for the maintenance and support
* of the SOFTWARE.
*
* You are hereby granted a copyright license to use, modify, and distribute the
* SOFTWARE so long as this entire notice is retained without alteration
* in any modified and/or redistributed versions, and that such modified
* versions are clearly identified as such.
* No licenses are granted by implication, estoppel or otherwise under any
* patents or trademarks of Motorola, Inc.
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* freal.s:
* This file is appended to the top of the 060FPSP package
* and contains the entry points into the package. The user, in
* effect, branches to one of the branch table entries located
* after _060FPSP_TABLE.
* Also, subroutine stubs exist in this file (_fpsp_done for
* example) that are referenced by the FPSP package itself in order
* to call a given routine. The stub routine actually performs the
* callout. The FPSP code does a "bsr" to the stub routine. This
* extra layer of hierarchy adds a slight performance penalty but
* it makes the FPSP code easier to read and more mainatinable.
*
_off_bsun set $00
_off_snan set $04
_off_operr set $08
_off_ovfl set $0c
_off_unfl set $10
_off_dz set $14
_off_inex set $18
_off_fline set $1c
_off_fpu_dis set $20
_off_trap set $24
_off_trace set $28
_off_access set $2c
_off_done set $30
_off_imr set $40
_off_dmr set $44
_off_dmw set $48
_off_irw set $4c
_off_irl set $50
_off_drb set $54
_off_drw set $58
_off_drl set $5c
_off_dwb set $60
_off_dww set $64
_off_dwl set $68
_060FPSP_TABLE:
*##############################################################
* Here's the table of ENTRY POINTS for those linking the package.
bra.l _fpsp_snan
.dc.w $0000
bra.l _fpsp_operr
.dc.w $0000
bra.l _fpsp_ovfl
.dc.w $0000
bra.l _fpsp_unfl
.dc.w $0000
bra.l _fpsp_dz
.dc.w $0000
bra.l _fpsp_inex
.dc.w $0000
bra.l _fpsp_fline
.dc.w $0000
bra.l _fpsp_unsupp
.dc.w $0000
bra.l _fpsp_effadd
.dc.w $0000
.dcb.w 56/2,$51FC
*##############################################################
global _fpsp_done
_fpsp_done:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_done,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_ovfl
_real_ovfl:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_ovfl,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_unfl
_real_unfl:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_unfl,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_inex
_real_inex:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_inex,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_bsun
_real_bsun:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_bsun,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_operr
_real_operr:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_operr,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_snan
_real_snan:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_snan,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_dz
_real_dz:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dz,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_fline
_real_fline:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_fline,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_fpu_disabled
_real_fpu_disabled:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_fpu_dis,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_trap
_real_trap:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_trap,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_trace
_real_trace:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_trace,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _real_access
_real_access:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_access,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
*######################################
global _imem_read
_imem_read:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_imr,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_read
_dmem_read:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dmr,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_write
_dmem_write:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dmw,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _imem_read_word
_imem_read_word:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_irw,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _imem_read_long
_imem_read_long:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_irl,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_read_byte
_dmem_read_byte:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_drb,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_read_word
_dmem_read_word:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_drw,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_read_long
_dmem_read_long:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_drl,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_write_byte
_dmem_write_byte:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dwb,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_write_word
_dmem_write_word:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dww,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
global _dmem_write_long
_dmem_write_long:
move.l d0,-(sp)
move.l (_060FPSP_TABLE-$80+_off_dwl,pc),d0
pea.l (_060FPSP_TABLE-$80.l,pc,d0.l)
move.l $4(sp),d0
rtd #$4
*
* This file contains a set of define statements for constants
* in order to promote readability within the corecode itself.
*
__LOCAL___SIZE set 192 * stack frame size(bytes)
LV set -__LOCAL___SIZE * stack offset
EXC_SR set $4 * stack status register
EXC_PC set $6 * stack pc
EXC_VOFF set $a * stacked vector offset
EXC_EA set $c * stacked <ea>
EXC_FP set $0 * frame pointer
EXC_AREGS set -68 * offset of all address regs
EXC_DREGS set -100 * offset of all data regs
EXC_FPREGS set -36 * offset of all fp regs
EXC_A7 set EXC_AREGS+(7*4) * offset of saved a7
OLD_A7 set EXC_AREGS+(6*4) * extra copy of saved a7
EXC_A6 set EXC_AREGS+(6*4) * offset of saved a6
EXC_A5 set EXC_AREGS+(5*4)
EXC_A4 set EXC_AREGS+(4*4)
EXC_A3 set EXC_AREGS+(3*4)
EXC_A2 set EXC_AREGS+(2*4)
EXC_A1 set EXC_AREGS+(1*4)
EXC_A0 set EXC_AREGS+(0*4)
EXC_D7 set EXC_DREGS+(7*4)
EXC_D6 set EXC_DREGS+(6*4)
EXC_D5 set EXC_DREGS+(5*4)
EXC_D4 set EXC_DREGS+(4*4)
EXC_D3 set EXC_DREGS+(3*4)
EXC_D2 set EXC_DREGS+(2*4)
EXC_D1 set EXC_DREGS+(1*4)
EXC_D0 set EXC_DREGS+(0*4)
EXC_FP0 set EXC_FPREGS+(0*12) * offset of saved fp0
EXC_FP1 set EXC_FPREGS+(1*12) * offset of saved fp1
EXC_FP2 set EXC_FPREGS+(2*12) * offset of saved fp2 (not used)
FP_SCR1 set LV+80 * fp scratch 1
FP_SCR1_EX set FP_SCR1+0
FP_SCR1_SGN set FP_SCR1+2
FP_SCR1_HI set FP_SCR1+4
FP_SCR1_LO set FP_SCR1+8
FP_SCR0 set LV+68 * fp scratch 0
FP_SCR0_EX set FP_SCR0+0
FP_SCR0_SGN set FP_SCR0+2
FP_SCR0_HI set FP_SCR0+4
FP_SCR0_LO set FP_SCR0+8
FP_DST set LV+56 * fp destination operand
FP_DST_EX set FP_DST+0
FP_DST_SGN set FP_DST+2
FP_DST_HI set FP_DST+4
FP_DST_LO set FP_DST+8
FP_SRC set LV+44 * fp source operand
FP_SRC_EX set FP_SRC+0
FP_SRC_SGN set FP_SRC+2
FP_SRC_HI set FP_SRC+4
FP_SRC_LO set FP_SRC+8
USER_FPIAR set LV+40 * FP instr address register
USER_FPSR set LV+36 * FP status register
FPSR_CC set USER_FPSR+0 * FPSR condition codes
FPSR_QBYTE set USER_FPSR+1 * FPSR qoutient byte
FPSR_EXCEPT set USER_FPSR+2 * FPSR exception status byte
FPSR_AEXCEPT set USER_FPSR+3 * FPSR accrued exception byte
USER_FPCR set LV+32 * FP control register
FPCR_ENABLE set USER_FPCR+2 * FPCR exception enable
FPCR_MODE set USER_FPCR+3 * FPCR rounding mode control
L_SCR3 set LV+28 * integer scratch 3
L_SCR2 set LV+24 * integer scratch 2
L_SCR1 set LV+20 * integer scratch 1
STORE_FLG set LV+19 * flag: operand store (ie. not fcmp/ftst)
EXC_TEMP2 set LV+24 * temporary space
EXC_TEMP set LV+16 * temporary space
DTAG set LV+15 * destination operand type
STAG set LV+14 * source operand type
SPCOND_FLG set LV+10 * flag: special case (see below)
EXC_CC set LV+8 * saved condition codes
EXC_EXTWPTR set LV+4 * saved current PC (active)
EXC_EXTWORD set LV+2 * saved extension word
EXC_CMDREG set LV+2 * saved extension word
EXC_OPWORD set LV+0 * saved operation word
*###############################
* Helpful macros
FTEMP set 0 * offsets within an
FTEMP_EX set 0 * extended precision
FTEMP_SGN set 2 * value saved in memory.
FTEMP_HI set 4
FTEMP_LO set 8
FTEMP_GRS set 12
__LOCAL__ set 0 * offsets within an
__LOCAL___EX set 0 * extended precision
__LOCAL___SGN set 2 * value saved in memory.
__LOCAL___HI set 4
__LOCAL___LO set 8
__LOCAL___GRS set 12
DST set 0 * offsets within an
DST_EX set 0 * extended precision
DST_HI set 4 * value saved in memory.
DST_LO set 8
SRC set 0 * offsets within an
SRC_EX set 0 * extended precision
SRC_HI set 4 * value saved in memory.
SRC_LO set 8
SGL_LO set $3f81 * min sgl prec exponent
SGL_HI set $407e * max sgl prec exponent
DBL_LO set $3c01 * min dbl prec exponent
DBL_HI set $43fe * max dbl prec exponent
EXT_LO set $0 * min ext prec exponent
EXT_HI set $7ffe * max ext prec exponent
EXT_BIAS set $3fff * extended precision bias
SGL_BIAS set $007f * single precision bias
DBL_BIAS set $03ff * double precision bias
NORM set $00 * operand type for STAG/DTAG
ZERO set $01 * operand type for STAG/DTAG
INF set $02 * operand type for STAG/DTAG
QNAN set $03 * operand type for STAG/DTAG
DENORM set $04 * operand type for STAG/DTAG
SNAN set $05 * operand type for STAG/DTAG
UNNORM set $06 * operand type for STAG/DTAG
*#################
* FPSR/FPCR bits #
*#################
neg_bit set $3 * negative result
z_bit set $2 * zero result
inf_bit set $1 * infinite result
nan_bit set $0 * NAN result
q_sn_bit set $7 * sign bit of quotient byte
bsun_bit set 7 * branch on unordered
snan_bit set 6 * signalling NAN
operr_bit set 5 * operand error
ovfl_bit set 4 * overflow
unfl_bit set 3 * underflow
dz_bit set 2 * divide by zero
inex2_bit set 1 * inexact result 2
inex1_bit set 0 * inexact result 1
aiop_bit set 7 * accrued inexact operation bit
aovfl_bit set 6 * accrued overflow bit
aunfl_bit set 5 * accrued underflow bit
adz_bit set 4 * accrued dz bit
ainex_bit set 3 * accrued inexact bit
*############################
* FPSR individual bit masks #
*############################
neg_mask set $08000000 * negative bit mask (lw)
inf_mask set $02000000 * infinity bit mask (lw)
z_mask set $04000000 * zero bit mask (lw)
nan_mask set $01000000 * nan bit mask (lw)
neg_bmask set $08 * negative bit mask (byte)
inf_bmask set $02 * infinity bit mask (byte)
z_bmask set $04 * zero bit mask (byte)
nan_bmask set $01 * nan bit mask (byte)
bsun_mask set $00008000 * bsun exception mask
snan_mask set $00004000 * snan exception mask
operr_mask set $00002000 * operr exception mask
ovfl_mask set $00001000 * overflow exception mask
unfl_mask set $00000800 * underflow exception mask
dz_mask set $00000400 * dz exception mask
inex2_mask set $00000200 * inex2 exception mask
inex1_mask set $00000100 * inex1 exception mask
aiop_mask set $00000080 * accrued illegal operation
aovfl_mask set $00000040 * accrued overflow
aunfl_mask set $00000020 * accrued underflow
adz_mask set $00000010 * accrued divide by zero
ainex_mask set $00000008 * accrued inexact
*#####################################
* FPSR combinations used in the FPSP #
*#####################################
dzinf_mask set inf_mask+dz_mask+adz_mask
opnan_mask set nan_mask+operr_mask+aiop_mask
nzi_mask set $01ffffff *clears N, Z, and I
unfinx_mask set unfl_mask+inex2_mask+aunfl_mask+ainex_mask
unf2inx_mask set unfl_mask+inex2_mask+ainex_mask
ovfinx_mask set ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
inx1a_mask set inex1_mask+ainex_mask
inx2a_mask set inex2_mask+ainex_mask
snaniop_mask set nan_mask+snan_mask+aiop_mask
snaniop2_mask set snan_mask+aiop_mask
naniop_mask set nan_mask+aiop_mask
neginf_mask set neg_mask+inf_mask
infaiop_mask set inf_mask+aiop_mask
negz_mask set neg_mask+z_mask
opaop_mask set operr_mask+aiop_mask
unfl_inx_mask set unfl_mask+aunfl_mask+ainex_mask
ovfl_inx_mask set ovfl_mask+aovfl_mask+ainex_mask
*########
* misc. #
*########
rnd_stky_bit set 29 * stky bit pos in longword
sign_bit set $7 * sign bit
signan_bit set $6 * signalling nan bit
sgl_thresh set $3f81 * minimum sgl exponent
dbl_thresh set $3c01 * minimum dbl exponent
x_mode set $0 * extended precision
s_mode set $4 * single precision
d_mode set $8 * double precision
rn_mode set $0 * round-to-nearest
rz_mode set $1 * round-to-zero
rm_mode set $2 * round-tp-minus-infinity
rp_mode set $3 * round-to-plus-infinity
mantissalen set 64 * length of mantissa in bits
BYTE set 1 * len(byte) == 1 byte
WORD set 2 * len(word) == 2 bytes
LONG set 4 * len(longword) == 2 bytes
BSUN_VEC set $c0 * bsun vector offset
INEX_VEC set $c4 * inexact vector offset
DZ_VEC set $c8 * dz vector offset
UNFL_VEC set $cc * unfl vector offset
OPERR_VEC set $d0 * operr vector offset
OVFL_VEC set $d4 * ovfl vector offset
SNAN_VEC set $d8 * snan vector offset
*##########################
* SPecial CONDition FLaGs #
*##########################
ftrapcc_flg set $01 * flag bit: ftrapcc exception
fbsun_flg set $02 * flag bit: bsun exception
mia7_flg set $04 * flag bit: (a7)+ <ea>
mda7_flg set $08 * flag bit: -(a7) <ea>
fmovem_flg set $40 * flag bit: fmovm instruction
immed_flg set $80 * flag bit: &<data> <ea>
ftrapcc_bit set $0
fbsun_bit set $1
mia7_bit set $2
mda7_bit set $3
immed_bit set $7
*#################################
* TRANSCENDENTAL "LAST-OP" FLAGS #
*#################################
FMUL_OP set $0 * fmul instr performed last
FDIV_OP set $1 * fdiv performed last
FADD_OP set $2 * fadd performed last
FMOV_OP set $3 * fmov performed last
*############
* CONSTANTS #
*############
T1: .dc.l $40C62D38,$D3D64634 * 16381 LOG2 LEAD
T2: .dc.l $3D6F90AE,$B1E75CC7 * 16381 LOG2 TRAIL
PI: .dc.l $40000000,$C90FDAA2,$2168C235,$00000000
PIBY2: .dc.l $3FFF0000,$C90FDAA2,$2168C235,$00000000
TWOBYPI:
.dc.l $3FE45F30,$6DC9C883
*########################################################################
* XDEF **************************************************************** #
* _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Overflow exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* set_tag_x() - determine optype of src/dst operands #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* unnorm_fix() - change UNNORM operands to NORM or ZERO #
* load_fpn2() - load dst operand from FP regfile #
* fout() - emulate an opclass 3 instruction #
* tbl_unsupp - add of table of emulation routines for opclass 0,2 #
* _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
* _real_ovfl() - "callout" for Overflow exception enabled code #
* _real_inex() - "callout" for Inexact exception enabled code #
* _real_trace() - "callout" for Trace exception code #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP Ovfl exception stack frame #
* - The fsave frame contains the source operand #
* #
* OUTPUT ************************************************************** #
* Overflow Exception enabled: #
* - The system stack is unchanged #
* - The fsave frame contains the adjusted src op for opclass 0,2 #
* Overflow Exception disabled: #
* - The system stack is unchanged #
* - The "exception present" flag in the fsave frame is cleared #
* #
* ALGORITHM *********************************************************** #
* On the 060, if an FP overflow is present as the result of any #
* instruction, the 060 will take an overflow exception whether the #
* exception is enabled or disabled in the FPCR. For the disabled case, #
* This handler emulates the instruction to determine what the correct #
* default result should be for the operation. This default result is #
* then stored in either the FP regfile, data regfile, or memory. #
* Finally, the handler exits through the "callout" _fpsp_done() #
* denoting that no exceptional conditions exist within the machine. #
* If the exception is enabled, then this handler must create the #
* exceptional operand and plave it in the fsave state frame, and store #
* the default result (only if the instruction is opclass 3). For #
* exceptions enabled, this handler must exit through the "callout" #
* _real_ovfl() so that the operating system enabled overflow handler #
* can handle this case. #
* Two other conditions exist. First, if overflow was disabled #
* but the inexact exception was enabled, this handler must exit #
* through the "callout" _real_inex() regardless of whether the result #
* was inexact. #
* Also, in the case of an opclass three instruction where #
* overflow was disabled and the trace exception was enabled, this #
* handler must exit through the "callout" _real_trace(). #
* #
*########################################################################
global _fpsp_ovfl
_fpsp_ovfl:
*$# sub.l &24,%sp # make room for src/dst
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
btst #$5,EXC_CMDREG(a6) * is instr an fmove out?
bne.w fovfl_out
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
* since, I believe, only NORMs and DENORMs can come through here,
* maybe we can avoid the subroutine call.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * tag the operand type
move.b d0,STAG(a6) * maybe NORM,DENORM
* bit five of the fp extension word separates the monadic and dyadic operations
* that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
* will never take this exception.
btst #$5,1+EXC_CMDREG(a6) * is operation monadic or dyadic?
beq.b fovfl_extract * monadic
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
bsr.l load_fpn2 * load dst into FP_DST
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b fovfl_op2_done * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
fovfl_op2_done:
move.b d0,DTAG(a6) * save dst optype tag
fovfl_extract:
*$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
*$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
*$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
*$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
*$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
*$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec/mode
move.b 1+EXC_CMDREG(a6),d1
andi.w #$007f,d1 * extract extension
andi.l #$00ff01ff,USER_FPSR(a6) * zero all but accured field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
* maybe we can make these entry points ONLY the OVFL entry points of each routine.
move.l (tbl_unsupp.l,pc,d1.w*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
* the operation has been emulated. the result is in fp0.
* the EXOP, if an exception occurred, is in fp1.
* we must save the default result regardless of whether
* traps are enabled or disabled.
bfextu EXC_CMDREG(a6){#6:#3},d0
bsr.l store_fpreg
* the exceptional possibilities we have left ourselves with are ONLY overflow
* and inexact. and, the inexact is such that overflow occurred and was disabled
* but inexact was enabled.
btst #ovfl_bit,FPCR_ENABLE(a6)
bne.b fovfl_ovfl_on
btst #inex2_bit,FPCR_ENABLE(a6)
bne.b fovfl_inex_on
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
*$# add.l &24,%sp
bra.l _fpsp_done
* overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
* in fp1. now, simply jump to _real_ovfl()!
fovfl_ovfl_on:
fmovem.x fp1,FP_SRC(a6) * save EXOP (fp1) to stack
move.w #$e005,2+FP_SRC(a6) * save exc status
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * do this after fmovm,other f<op>s!
unlk a6
bra.l _real_ovfl
* overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
* we must jump to real_inex().
fovfl_inex_on:
fmovem.x fp1,FP_SRC(a6) * save EXOP (fp1) to stack
move.b #$c4,1+EXC_VOFF(a6) * vector offset = 0xc4
move.w #$e001,2+FP_SRC(a6) * save exc status
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * do this after fmovm,other f<op>s!
unlk a6
bra.l _real_inex
*#######################################################################
fovfl_out:
*$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
*$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
*$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
* the src operand is definitely a NORM(!), so tag it as such
move.b #NORM,STAG(a6) * set src optype tag
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec/mode
andi.l #$ffff00ff,USER_FPSR(a6) * zero all but accured field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
lea FP_SRC(a6),a0 * pass ptr to src operand
bsr.l fout
btst #ovfl_bit,FPCR_ENABLE(a6)
bne.w fovfl_ovfl_on
btst #inex2_bit,FPCR_ENABLE(a6)
bne.w fovfl_inex_on
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
*$# add.l &24,%sp
btst #$7,(sp) * is trace on?
beq.l _fpsp_done * no
fmove.l fpiar,$8(sp) * "Current PC" is in FPIAR
move.w #$2024,$6(sp) * stk fmt = 0x2; voff = 0x024
bra.l _real_trace
*########################################################################
* XDEF **************************************************************** #
* _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Underflow exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* set_tag_x() - determine optype of src/dst operands #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* unnorm_fix() - change UNNORM operands to NORM or ZERO #
* load_fpn2() - load dst operand from FP regfile #
* fout() - emulate an opclass 3 instruction #
* tbl_unsupp - add of table of emulation routines for opclass 0,2 #
* _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
* _real_ovfl() - "callout" for Overflow exception enabled code #
* _real_inex() - "callout" for Inexact exception enabled code #
* _real_trace() - "callout" for Trace exception code #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP Unfl exception stack frame #
* - The fsave frame contains the source operand #
* #
* OUTPUT ************************************************************** #
* Underflow Exception enabled: #
* - The system stack is unchanged #
* - The fsave frame contains the adjusted src op for opclass 0,2 #
* Underflow Exception disabled: #
* - The system stack is unchanged #
* - The "exception present" flag in the fsave frame is cleared #
* #
* ALGORITHM *********************************************************** #
* On the 060, if an FP underflow is present as the result of any #
* instruction, the 060 will take an underflow exception whether the #
* exception is enabled or disabled in the FPCR. For the disabled case, #
* This handler emulates the instruction to determine what the correct #
* default result should be for the operation. This default result is #
* then stored in either the FP regfile, data regfile, or memory. #
* Finally, the handler exits through the "callout" _fpsp_done() #
* denoting that no exceptional conditions exist within the machine. #
* If the exception is enabled, then this handler must create the #
* exceptional operand and plave it in the fsave state frame, and store #
* the default result (only if the instruction is opclass 3). For #
* exceptions enabled, this handler must exit through the "callout" #
* _real_unfl() so that the operating system enabled overflow handler #
* can handle this case. #
* Two other conditions exist. First, if underflow was disabled #
* but the inexact exception was enabled and the result was inexact, #
* this handler must exit through the "callout" _real_inex(). #
* was inexact. #
* Also, in the case of an opclass three instruction where #
* underflow was disabled and the trace exception was enabled, this #
* handler must exit through the "callout" _real_trace(). #
* #
*########################################################################
global _fpsp_unfl
_fpsp_unfl:
*$# sub.l &24,%sp # make room for src/dst
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
btst #$5,EXC_CMDREG(a6) * is instr an fmove out?
bne.w funfl_out
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * tag the operand type
move.b d0,STAG(a6) * maybe NORM,DENORM
* bit five of the fp ext word separates the monadic and dyadic operations
* that can pass through fpsp_unfl(). remember that fcmp, and ftst
* will never take this exception.
btst #$5,1+EXC_CMDREG(a6) * is op monadic or dyadic?
beq.b funfl_extract * monadic
* now, what's left that's not dyadic is fsincos. we can distinguish it
* from all dyadics by the '0110xxx pattern
btst #$4,1+EXC_CMDREG(a6) * is op an fsincos?
bne.b funfl_extract * yes
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
bsr.l load_fpn2 * load dst into FP_DST
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b funfl_op2_done * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
funfl_op2_done:
move.b d0,DTAG(a6) * save dst optype tag
funfl_extract:
*$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
*$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
*$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
*$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
*$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
*$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec/mode
move.b 1+EXC_CMDREG(a6),d1
andi.w #$007f,d1 * extract extension
andi.l #$00ff01ff,USER_FPSR(a6)
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
* maybe we can make these entry points ONLY the OVFL entry points of each routine.
move.l (tbl_unsupp.l,pc,d1.w*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
bfextu EXC_CMDREG(a6){#6:#3},d0
bsr.l store_fpreg
* The `060 FPU multiplier hardware is such that if the result of a
* multiply operation is the smallest possible normalized number
* (0x00000000_80000000_00000000), then the machine will take an
* underflow exception. Since this is incorrect, we need to check
* if our emulation, after re-doing the operation, decided that
* no underflow was called for. We do these checks only in
* funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
* special case will simply exit gracefully with the correct result.
* the exceptional possibilities we have left ourselves with are ONLY overflow
* and inexact. and, the inexact is such that overflow occurred and was disabled
* but inexact was enabled.
btst #unfl_bit,FPCR_ENABLE(a6)
bne.b funfl_unfl_on
funfl_chkinex:
btst #inex2_bit,FPCR_ENABLE(a6)
bne.b funfl_inex_on
funfl_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
*$# add.l &24,%sp
bra.l _fpsp_done
* overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
* in fp1 (don't forget to save fp0). what to do now?
* well, we simply have to get to go to _real_unfl()!
funfl_unfl_on:
* The `060 FPU multiplier hardware is such that if the result of a
* multiply operation is the smallest possible normalized number
* (0x00000000_80000000_00000000), then the machine will take an
* underflow exception. Since this is incorrect, we check here to see
* if our emulation, after re-doing the operation, decided that
* no underflow was called for.
btst #unfl_bit,FPSR_EXCEPT(a6)
beq.w funfl_chkinex
funfl_unfl_on2:
fmovem.x fp1,FP_SRC(a6) * save EXOP (fp1) to stack
move.w #$e003,2+FP_SRC(a6) * save exc status
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * do this after fmovm,other f<op>s!
unlk a6
bra.l _real_unfl
* undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
* we must jump to real_inex().
funfl_inex_on:
* The `060 FPU multiplier hardware is such that if the result of a
* multiply operation is the smallest possible normalized number
* (0x00000000_80000000_00000000), then the machine will take an
* underflow exception.
* But, whether bogus or not, if inexact is enabled AND it occurred,
* then we have to branch to real_inex.
btst #inex2_bit,FPSR_EXCEPT(a6)
beq.w funfl_exit
funfl_inex_on2:
fmovem.x fp1,FP_SRC(a6) * save EXOP to stack
move.b #$c4,1+EXC_VOFF(a6) * vector offset = 0xc4
move.w #$e001,2+FP_SRC(a6) * save exc status
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * do this after fmovm,other f<op>s!
unlk a6
bra.l _real_inex
*######################################################################
funfl_out:
*$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
*$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
*$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
* the src operand is definitely a NORM(!), so tag it as such
move.b #NORM,STAG(a6) * set src optype tag
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec/mode
andi.l #$ffff00ff,USER_FPSR(a6) * zero all but accured field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
lea FP_SRC(a6),a0 * pass ptr to src operand
bsr.l fout
btst #unfl_bit,FPCR_ENABLE(a6)
bne.w funfl_unfl_on2
btst #inex2_bit,FPCR_ENABLE(a6)
bne.w funfl_inex_on2
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
*$# add.l &24,%sp
btst #$7,(sp) * is trace on?
beq.l _fpsp_done * no
fmove.l fpiar,$8(sp) * "Current PC" is in FPIAR
move.w #$2024,$6(sp) * stk fmt = 0x2; voff = 0x024
bra.l _real_trace
*########################################################################
* XDEF **************************************************************** #
* _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
* Data Type" exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Unimplemented Data Type exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_{word,long}() - read instruction word/longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* set_tag_x() - determine optype of src/dst operands #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* unnorm_fix() - change UNNORM operands to NORM or ZERO #
* load_fpn2() - load dst operand from FP regfile #
* load_fpn1() - load src operand from FP regfile #
* fout() - emulate an opclass 3 instruction #
* tbl_unsupp - add of table of emulation routines for opclass 0,2 #
* _real_inex() - "callout" to operating system inexact handler #
* _fpsp_done() - "callout" for exit; work all done #
* _real_trace() - "callout" for Trace enabled exception #
* funimp_skew() - adjust fsave src ops to "incorrect" value #
* _real_snan() - "callout" for SNAN exception #
* _real_operr() - "callout" for OPERR exception #
* _real_ovfl() - "callout" for OVFL exception #
* _real_unfl() - "callout" for UNFL exception #
* get_packed() - fetch packed operand from memory #
* #
* INPUT *************************************************************** #
* - The system stack contains the "Unimp Data Type" stk frame #
* - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
* #
* OUTPUT ************************************************************** #
* If Inexact exception (opclass 3): #
* - The system stack is changed to an Inexact exception stk frame #
* If SNAN exception (opclass 3): #
* - The system stack is changed to an SNAN exception stk frame #
* If OPERR exception (opclass 3): #
* - The system stack is changed to an OPERR exception stk frame #
* If OVFL exception (opclass 3): #
* - The system stack is changed to an OVFL exception stk frame #
* If UNFL exception (opclass 3): #
* - The system stack is changed to an UNFL exception stack frame #
* If Trace exception enabled: #
* - The system stack is changed to a Trace exception stack frame #
* Else: (normal case) #
* - Correct result has been stored as appropriate #
* #
* ALGORITHM *********************************************************** #
* Two main instruction types can enter here: (1) DENORM or UNNORM #
* unimplemented data types. These can be either opclass 0,2 or 3 #
* instructions, and (2) PACKED unimplemented data format instructions #
* also of opclasses 0,2, or 3. #
* For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
* operand from the fsave state frame and the dst operand (if dyadic) #
* from the FP register file. The instruction is then emulated by #
* choosing an emulation routine from a table of routines indexed by #
* instruction type. Once the instruction has been emulated and result #
* saved, then we check to see if any enabled exceptions resulted from #
* instruction emulation. If none, then we exit through the "callout" #
* _fpsp_done(). If there is an enabled FP exception, then we insert #
* this exception into the FPU in the fsave state frame and then exit #
* through _fpsp_done(). #
* PACKED opclass 0 and 2 is similar in how the instruction is #
* emulated and exceptions handled. The differences occur in how the #
* handler loads the packed op (by calling get_packed() routine) and #
* by the fact that a Trace exception could be pending for PACKED ops. #
* If a Trace exception is pending, then the current exception stack #
* frame is changed to a Trace exception stack frame and an exit is #
* made through _real_trace(). #
* For UNNORM/DENORM opclass 3, the actual move out to memory is #
* performed by calling the routine fout(). If no exception should occur #
* as the result of emulation, then an exit either occurs through #
* _fpsp_done() or through _real_trace() if a Trace exception is pending #
* (a Trace stack frame must be created here, too). If an FP exception #
* should occur, then we must create an exception stack frame of that #
* type and jump to either _real_snan(), _real_operr(), _real_inex(), #
* _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
* emulation is performed in a similar manner. #
* #
*########################################################################
*
* (1) DENORM and UNNORM (unimplemented) data types:
*
* post-instruction
* *****************
* * EA *
* pre-instruction * *
* ***************** *****************
* * 0x0 * 0x0dc * * 0x3 * 0x0dc *
* ***************** *****************
* * Next * * Next *
* * PC * * PC *
* ***************** *****************
* * SR * * SR *
* ***************** *****************
*
* (2) PACKED format (unsupported) opclasses two and three:
* *****************
* * EA *
* * *
* *****************
* * 0x2 * 0x0dc *
* *****************
* * Next *
* * PC *
* *****************
* * SR *
* *****************
*
global _fpsp_unsupp
_fpsp_unsupp:
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * save fp state
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
btst #$5,EXC_SR(a6) * user or supervisor mode?
bne.b fu_s
fu_u:
move.l usp,a0 * fetch user stack pointer
move.l a0,EXC_A7(a6) * save on stack
bra.b fu_cont
* if the exception is an opclass zero or two unimplemented data type
* exception, then the a7' calculated here is wrong since it doesn't
* stack an ea. however, we don't need an a7' for this case anyways.
fu_s:
lea $4+EXC_EA(a6),a0 * load old a7'
move.l a0,EXC_A7(a6) * save on stack
fu_cont:
* the FPIAR holds the "current PC" of the faulting instruction
* the FPIAR should be set correctly for ALL exceptions passing through
* this point.
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6) * store OPWORD and EXTWORD
*###########################
clr.b SPCOND_FLG(a6) * clear special condition flag
* Separate opclass three (fpn-to-mem) ops since they have a different
* stack frame and protocol.
btst #$5,EXC_CMDREG(a6) * is it an fmove out?
bne.w fu_out * yes
* Separate packed opclass two instructions.
bfextu EXC_CMDREG(a6){#0:#6},d0
cmpi.b #$13,d0
beq.w fu_in_pack
* I'm not sure at this point what FPSR bits are valid for this instruction.
* so, since the emulation routines re-create them anyways, zero exception field
andi.l #$00ff00ff,USER_FPSR(a6) * zero exception field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
* Opclass two w/ memory-to-fpn operation will have an incorrect extended
* precision format if the src format was single or double and the
* source data type was an INF, NAN, DENORM, or UNNORM
lea FP_SRC(a6),a0 * pass ptr to input
bsr.l fix_skewed_ops
* we don't know whether the src operand or the dst operand (or both) is the
* UNNORM or DENORM. call the function that tags the operand type. if the
* input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b fu_op2 * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
fu_op2:
move.b d0,STAG(a6) * save src optype tag
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
* bit five of the fp extension word separates the monadic and dyadic operations
* at this point
btst #$5,1+EXC_CMDREG(a6) * is operation monadic or dyadic?
beq.b fu_extract * monadic
cmpi.b #$3a,1+EXC_CMDREG(a6) * is operation an ftst?
beq.b fu_extract * yes, so it's monadic, too
bsr.l load_fpn2 * load dst into FP_DST
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b fu_op2_done * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
fu_op2_done:
move.b d0,DTAG(a6) * save dst optype tag
fu_extract:
clr.l d0
move.b FPCR_MODE(a6),d0 * fetch rnd mode/prec
bfextu 1+EXC_CMDREG(a6){#1:#7},d1 * extract extension
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
move.l (tbl_unsupp.l,pc,d1.l*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
*
* Exceptions in order of precedence:
* BSUN : none
* SNAN : all dyadic ops
* OPERR : fsqrt(-NORM)
* OVFL : all except ftst,fcmp
* UNFL : all except ftst,fcmp
* DZ : fdiv
* INEX2 : all except ftst,fcmp
* INEX1 : none (packed doesn't go through here)
*
* we determine the highest priority exception(if any) set by the
* emulation routine that has also been enabled by the user.
move.b FPCR_ENABLE(a6),d0 * fetch exceptions set
bne.b fu_in_ena * some are enabled
fu_in_cont:
* fcmp and ftst do not store any result.
move.b 1+EXC_CMDREG(a6),d0 * fetch extension
andi.b #$38,d0 * extract bits 3-5
cmpi.b #$38,d0 * is instr fcmp or ftst?
beq.b fu_in_exit * yes
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
bsr.l store_fpreg * store the result
fu_in_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
bra.l _fpsp_done
fu_in_ena:
and.b FPSR_EXCEPT(a6),d0 * keep only ones enabled
bfffo d0{#24:#8},d0 * find highest priority exception
bne.b fu_in_exc * there is at least one set
*
* No exceptions occurred that were also enabled. Now:
*
* if (OVFL && ovfl_disabled && inexact_enabled) {
* branch to _real_inex() (even if the result was exact!);
* } else {
* save the result in the proper fp reg (unless the op is fcmp or ftst);
* return;
* }
*
btst #ovfl_bit,FPSR_EXCEPT(a6) * was overflow set?
beq.b fu_in_cont * no
fu_in_ovflchk:
btst #inex2_bit,FPCR_ENABLE(a6) * was inexact enabled?
beq.b fu_in_cont * no
bra.w fu_in_exc_ovfl * go insert overflow frame
*
* An exception occurred and that exception was enabled:
*
* shift enabled exception field into lo byte of d0;
* if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
* ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
* /*
* * this is the case where we must call _real_inex() now or else
* * there will be no other way to pass it the exceptional operand
* */
* call _real_inex();
* } else {
* restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
* }
*
fu_in_exc:
subi.l #24,d0 * fix offset to be 0-8
cmpi.b #$6,d0 * is exception INEX? (6)
bne.b fu_in_exc_exit * no
* the enabled exception was inexact
btst #unfl_bit,FPSR_EXCEPT(a6) * did disabled underflow occur?
bne.w fu_in_exc_unfl * yes
btst #ovfl_bit,FPSR_EXCEPT(a6) * did disabled overflow occur?
bne.w fu_in_exc_ovfl * yes
* here, we insert the correct fsave status value into the fsave frame for the
* corresponding exception. the operand in the fsave frame should be the original
* src operand.
fu_in_exc_exit:
move.l d0,-(sp) * save d0
bsr.l funimp_skew * skew sgl or dbl inputs
move.l (sp)+,d0 * restore d0
move.w (tbl_except.b,pc,d0.w*2),2+FP_SRC(a6) * create exc status
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * restore src op
unlk a6
bra.l _fpsp_done
tbl_except:
.dc.w $e000,$e006,$e004,$e005
.dc.w $e003,$e002,$e001,$e001
fu_in_exc_unfl:
move.w #$4,d0
bra.b fu_in_exc_exit
fu_in_exc_ovfl:
move.w #$03,d0
bra.b fu_in_exc_exit
* If the input operand to this operation was opclass two and a single
* or double precision denorm, inf, or nan, the operand needs to be
* "corrected" in order to have the proper equivalent extended precision
* number.
global fix_skewed_ops
fix_skewed_ops:
bfextu EXC_CMDREG(a6){#0:#6},d0 * extract opclass,src fmt
cmpi.b #$11,d0 * is class = 2 & fmt = sgl?
beq.b fso_sgl * yes
cmpi.b #$15,d0 * is class = 2 & fmt = dbl?
beq.b fso_dbl * yes
rts * no
fso_sgl:
move.w __LOCAL___EX.w(a0),d0 * fetch src exponent
andi.w #$7fff,d0 * strip sign
cmpi.w #$3f80,d0 * is |exp| == $3f80?
beq.b fso_sgl_dnrm_zero * yes
cmpi.w #$407f,d0 * no; is |exp| == $407f?
beq.b fso_infnan * yes
rts * no
fso_sgl_dnrm_zero:
andi.l #$7fffffff,__LOCAL___HI(a0) * clear j-bit
beq.b fso_zero * it's a skewed zero
fso_sgl_dnrm:
* here, we count on norm not to alter a0...
bsr.l norm * normalize mantissa
neg.w d0 * -shft amt
addi.w #$3f81,d0 * adjust new exponent
andi.w #$8000,__LOCAL___EX.w(a0) * clear old exponent
or.w d0,__LOCAL___EX.w(a0) * insert new exponent
rts
fso_zero:
andi.w #$8000,__LOCAL___EX.w(a0) * clear bogus exponent
rts
fso_infnan:
andi.b #$7f,__LOCAL___HI(a0) * clear j-bit
ori.w #$7fff,__LOCAL___EX.w(a0) * make exponent = $7fff
rts
fso_dbl:
move.w __LOCAL___EX.w(a0),d0 * fetch src exponent
andi.w #$7fff,d0 * strip sign
cmpi.w #$3c00,d0 * is |exp| == $3c00?
beq.b fso_dbl_dnrm_zero * yes
cmpi.w #$43ff,d0 * no; is |exp| == $43ff?
beq.b fso_infnan * yes
rts * no
fso_dbl_dnrm_zero:
andi.l #$7fffffff,__LOCAL___HI(a0) * clear j-bit
bne.b fso_dbl_dnrm * it's a skewed denorm
tst.l __LOCAL___LO(a0) * is it a zero?
beq.b fso_zero * yes
fso_dbl_dnrm:
* here, we count on norm not to alter a0...
bsr.l norm * normalize mantissa
neg.w d0 * -shft amt
addi.w #$3c01,d0 * adjust new exponent
andi.w #$8000,__LOCAL___EX.w(a0) * clear old exponent
or.w d0,__LOCAL___EX.w(a0) * insert new exponent
rts
*################################################################
* fmove out took an unimplemented data type exception.
* the src operand is in FP_SRC. Call _fout() to write out the result and
* to determine which exceptions, if any, to take.
fu_out:
* Separate packed move outs from the UNNORM and DENORM move outs.
bfextu EXC_CMDREG(a6){#3:#3},d0
cmpi.b #$3,d0
beq.w fu_out_pack
cmpi.b #$7,d0
beq.w fu_out_pack
* I'm not sure at this point what FPSR bits are valid for this instruction.
* so, since the emulation routines re-create them anyways, zero exception field.
* fmove out doesn't affect ccodes.
andi.l #$ffff00ff,USER_FPSR(a6) * zero exception field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
* the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
* call here. just figure out what it is...
move.w FP_SRC_EX(a6),d0 * get exponent
andi.w #$7fff,d0 * strip sign
beq.b fu_out_denorm * it's a DENORM
lea FP_SRC(a6),a0
bsr.l unnorm_fix * yes; fix it
move.b d0,STAG(a6)
bra.b fu_out_cont
fu_out_denorm:
move.b #DENORM,STAG(a6)
fu_out_cont:
clr.l d0
move.b FPCR_MODE(a6),d0 * fetch rnd mode/prec
lea FP_SRC(a6),a0 * pass ptr to src operand
move.l (a6),EXC_A6(a6) * in case a6 changes
bsr.l fout * call fmove out routine
* Exceptions in order of precedence:
* BSUN : none
* SNAN : none
* OPERR : fmove.{b,w,l} out of large UNNORM
* OVFL : fmove.{s,d}
* UNFL : fmove.{s,d,x}
* DZ : none
* INEX2 : all
* INEX1 : none (packed doesn't travel through here)
* determine the highest priority exception(if any) set by the
* emulation routine that has also been enabled by the user.
move.b FPCR_ENABLE(a6),d0 * fetch exceptions enabled
bne.w fu_out_ena * some are enabled
fu_out_done:
move.l EXC_A6(a6),(a6) * in case a6 changed
* on extended precision opclass three instructions using pre-decrement or
* post-increment addressing mode, the address register is not updated. is the
* address register was the stack pointer used from user mode, then let's update
* it here. if it was used from supervisor mode, then we have to handle this
* as a special case.
btst #$5,EXC_SR(a6)
bne.b fu_out_done_s
move.l EXC_A7(a6),a0 * restore a7
move.l a0,usp
fu_out_done_cont:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
btst #$7,(sp) * is trace on?
bne.b fu_out_trace * yes
bra.l _fpsp_done
* is the ea mode pre-decrement of the stack pointer from supervisor mode?
* ("fmov.x fpm,-(a7)") if so,
fu_out_done_s:
cmpi.b #mda7_flg,SPCOND_FLG(a6)
bne.b fu_out_done_cont
* the extended precision result is still in fp0. but, we need to save it
* somewhere on the stack until we can copy it to its final resting place.
* here, we're counting on the top of the stack to be the old place-holders
* for fp0/fp1 which have already been restored. that way, we can write
* over those destinations with the shifted stack frame.
fmovem.x fp0,FP_SRC(a6) * put answer on stack
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
* now, copy the result to the proper place on the stack
move.l __LOCAL___SIZE+FP_SRC_EX(sp),__LOCAL___SIZE+EXC_SR+$0(sp)
move.l __LOCAL___SIZE+FP_SRC_HI(sp),__LOCAL___SIZE+EXC_SR+$4(sp)
move.l __LOCAL___SIZE+FP_SRC_LO(sp),__LOCAL___SIZE+EXC_SR+$8(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
btst #$7,(sp)
bne.b fu_out_trace
bra.l _fpsp_done
fu_out_ena:
and.b FPSR_EXCEPT(a6),d0 * keep only ones enabled
bfffo d0{#24:#8},d0 * find highest priority exception
bne.b fu_out_exc * there is at least one set
* no exceptions were set.
* if a disabled overflow occurred and inexact was enabled but the result
* was exact, then a branch to _real_inex() is made.
btst #ovfl_bit,FPSR_EXCEPT(a6) * was overflow set?
beq.w fu_out_done * no
fu_out_ovflchk:
btst #inex2_bit,FPCR_ENABLE(a6) * was inexact enabled?
beq.w fu_out_done * no
bra.w fu_inex * yes
*
* The fp move out that took the "Unimplemented Data Type" exception was
* being traced. Since the stack frames are similar, get the "current" PC
* from FPIAR and put it in the trace stack frame then jump to _real_trace().
*
* UNSUPP FRAME TRACE FRAME
* ***************** *****************
* * EA * * Current *
* * * * PC *
* ***************** *****************
* * 0x3 * 0x0dc * * 0x2 * 0x024 *
* ***************** *****************
* * Next * * Next *
* * PC * * PC *
* ***************** *****************
* * SR * * SR *
* ***************** *****************
*
fu_out_trace:
move.w #$2024,$6(sp)
fmove.l fpiar,$8(sp)
bra.l _real_trace
* an exception occurred and that exception was enabled.
fu_out_exc:
subi.l #24,d0 * fix offset to be 0-8
* we don't mess with the existing fsave frame. just re-insert it and
* jump to the "_real_{}()" handler...
move.w (tbl_fu_out.b,pc,d0.w*2),d0
jmp (tbl_fu_out.b,pc,d0.w*1)
.dc.w $4AFC,$8
tbl_fu_out:
.dc.w tbl_fu_out-tbl_fu_out * BSUN can't happen
.dc.w tbl_fu_out-tbl_fu_out * SNAN can't happen
.dc.w fu_operr-tbl_fu_out * OPERR
.dc.w fu_ovfl-tbl_fu_out * OVFL
.dc.w fu_unfl-tbl_fu_out * UNFL
.dc.w tbl_fu_out-tbl_fu_out * DZ can't happen
.dc.w fu_inex-tbl_fu_out * INEX2
.dc.w tbl_fu_out-tbl_fu_out * INEX1 won't make it here
* for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
* frestore it.
fu_snan:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30d8,EXC_VOFF(a6) * vector offset = 0xd8
move.w #$e006,2+FP_SRC(a6)
frestore FP_SRC(a6)
unlk a6
bra.l _real_snan
fu_operr:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30d0,EXC_VOFF(a6) * vector offset = 0xd0
move.w #$e004,2+FP_SRC(a6)
frestore FP_SRC(a6)
unlk a6
bra.l _real_operr
fu_ovfl:
fmovem.x fp1,FP_SRC(a6) * save EXOP to the stack
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30d4,EXC_VOFF(a6) * vector offset = 0xd4
move.w #$e005,2+FP_SRC(a6)
frestore FP_SRC(a6) * restore EXOP
unlk a6
bra.l _real_ovfl
* underflow can happen for extended precision. extended precision opclass
* three instruction exceptions don't update the stack pointer. so, if the
* exception occurred from user mode, then simply update a7 and exit normally.
* if the exception occurred from supervisor mode, check if
fu_unfl:
move.l EXC_A6(a6),(a6) * restore a6
btst #$5,EXC_SR(a6)
bne.w fu_unfl_s
move.l EXC_A7(a6),a0 * restore a7 whether we need
move.l a0,usp * to or not...
fu_unfl_cont:
fmovem.x fp1,FP_SRC(a6) * save EXOP to the stack
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30cc,EXC_VOFF(a6) * vector offset = 0xcc
move.w #$e003,2+FP_SRC(a6)
frestore FP_SRC(a6) * restore EXOP
unlk a6
bra.l _real_unfl
fu_unfl_s:
cmpi.b #mda7_flg,SPCOND_FLG(a6) * was the <ea> mode -(sp)?
bne.b fu_unfl_cont
* the extended precision result is still in fp0. but, we need to save it
* somewhere on the stack until we can copy it to its final resting place
* (where the exc frame is currently). make sure it's not at the top of the
* frame or it will get overwritten when the exc stack frame is shifted "down".
fmovem.x fp0,FP_SRC(a6) * put answer on stack
fmovem.x fp1,FP_DST(a6) * put EXOP on stack
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30cc,EXC_VOFF(a6) * vector offset = 0xcc
move.w #$e003,2+FP_DST(a6)
frestore FP_DST(a6) * restore EXOP
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
move.l __LOCAL___SIZE+EXC_EA(sp),__LOCAL___SIZE+EXC_EA-$c(sp)
* now, copy the result to the proper place on the stack
move.l __LOCAL___SIZE+FP_SRC_EX(sp),__LOCAL___SIZE+EXC_SR+$0(sp)
move.l __LOCAL___SIZE+FP_SRC_HI(sp),__LOCAL___SIZE+EXC_SR+$4(sp)
move.l __LOCAL___SIZE+FP_SRC_LO(sp),__LOCAL___SIZE+EXC_SR+$8(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
bra.l _real_unfl
* fmove in and out enter here.
fu_inex:
fmovem.x fp1,FP_SRC(a6) * save EXOP to the stack
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30c4,EXC_VOFF(a6) * vector offset = 0xc4
move.w #$e001,2+FP_SRC(a6)
frestore FP_SRC(a6) * restore EXOP
unlk a6
bra.l _real_inex
*########################################################################
*########################################################################
fu_in_pack:
* I'm not sure at this point what FPSR bits are valid for this instruction.
* so, since the emulation routines re-create them anyways, zero exception field
andi.l #$0ff00ff,USER_FPSR(a6) * zero exception field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
bsr.l get_packed * fetch packed src operand
lea FP_SRC(a6),a0 * pass ptr to src
bsr.l set_tag_x * set src optype tag
move.b d0,STAG(a6) * save src optype tag
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
* bit five of the fp extension word separates the monadic and dyadic operations
* at this point
btst #$5,1+EXC_CMDREG(a6) * is operation monadic or dyadic?
beq.b fu_extract_p * monadic
cmpi.b #$3a,1+EXC_CMDREG(a6) * is operation an ftst?
beq.b fu_extract_p * yes, so it's monadic, too
bsr.l load_fpn2 * load dst into FP_DST
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b fu_op2_done_p * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
fu_op2_done_p:
move.b d0,DTAG(a6) * save dst optype tag
fu_extract_p:
clr.l d0
move.b FPCR_MODE(a6),d0 * fetch rnd mode/prec
bfextu 1+EXC_CMDREG(a6){#1:#7},d1 * extract extension
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
move.l (tbl_unsupp.l,pc,d1.l*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
*
* Exceptions in order of precedence:
* BSUN : none
* SNAN : all dyadic ops
* OPERR : fsqrt(-NORM)
* OVFL : all except ftst,fcmp
* UNFL : all except ftst,fcmp
* DZ : fdiv
* INEX2 : all except ftst,fcmp
* INEX1 : all
*
* we determine the highest priority exception(if any) set by the
* emulation routine that has also been enabled by the user.
move.b FPCR_ENABLE(a6),d0 * fetch exceptions enabled
bne.w fu_in_ena_p * some are enabled
fu_in_cont_p:
* fcmp and ftst do not store any result.
move.b 1+EXC_CMDREG(a6),d0 * fetch extension
andi.b #$38,d0 * extract bits 3-5
cmpi.b #$38,d0 * is instr fcmp or ftst?
beq.b fu_in_exit_p * yes
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
bsr.l store_fpreg * store the result
fu_in_exit_p:
btst #$5,EXC_SR(a6) * user or supervisor?
bne.w fu_in_exit_s_p * supervisor
move.l EXC_A7(a6),a0 * update user a7
move.l a0,usp
fu_in_exit_cont_p:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6 * unravel stack frame
btst #$7,(sp) * is trace on?
bne.w fu_trace_p * yes
bra.l _fpsp_done * exit to os
* the exception occurred in supervisor mode. check to see if the
* addressing mode was (a7)+. if so, we'll need to shift the
* stack frame "up".
fu_in_exit_s_p:
btst #mia7_bit,SPCOND_FLG(a6) * was ea mode (a7)+
beq.b fu_in_exit_cont_p * no
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6 * unravel stack frame
* shift the stack frame "up". we don't really care about the <ea> field.
move.l $4(sp),$10(sp)
move.l $0.w(sp),$c(sp)
add.l #$c,sp
btst #$7,(sp) * is trace on?
bne.w fu_trace_p * yes
bra.l _fpsp_done * exit to os
fu_in_ena_p:
and.b FPSR_EXCEPT(a6),d0 * keep only ones enabled & set
bfffo d0{#24:#8},d0 * find highest priority exception
bne.b fu_in_exc_p * at least one was set
*
* No exceptions occurred that were also enabled. Now:
*
* if (OVFL && ovfl_disabled && inexact_enabled) {
* branch to _real_inex() (even if the result was exact!);
* } else {
* save the result in the proper fp reg (unless the op is fcmp or ftst);
* return;
* }
*
btst #ovfl_bit,FPSR_EXCEPT(a6) * was overflow set?
beq.w fu_in_cont_p * no
fu_in_ovflchk_p:
btst #inex2_bit,FPCR_ENABLE(a6) * was inexact enabled?
beq.w fu_in_cont_p * no
bra.w fu_in_exc_ovfl_p * do _real_inex() now
*
* An exception occurred and that exception was enabled:
*
* shift enabled exception field into lo byte of d0;
* if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
* ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
* /*
* * this is the case where we must call _real_inex() now or else
* * there will be no other way to pass it the exceptional operand
* */
* call _real_inex();
* } else {
* restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
* }
*
fu_in_exc_p:
subi.l #24,d0 * fix offset to be 0-8
cmpi.b #$6,d0 * is exception INEX? (6 or 7)
blt.b fu_in_exc_exit_p * no
* the enabled exception was inexact
btst #unfl_bit,FPSR_EXCEPT(a6) * did disabled underflow occur?
bne.w fu_in_exc_unfl_p * yes
btst #ovfl_bit,FPSR_EXCEPT(a6) * did disabled overflow occur?
bne.w fu_in_exc_ovfl_p * yes
* here, we insert the correct fsave status value into the fsave frame for the
* corresponding exception. the operand in the fsave frame should be the original
* src operand.
* as a reminder for future predicted pain and agony, we are passing in fsave the
* "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
* this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
fu_in_exc_exit_p:
btst #$5,EXC_SR(a6) * user or supervisor?
bne.w fu_in_exc_exit_s_p * supervisor
move.l EXC_A7(a6),a0 * update user a7
move.l a0,usp
fu_in_exc_exit_cont_p:
move.w (tbl_except_p.b,pc,d0.w*2),2+FP_SRC(a6)
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * restore src op
unlk a6
btst #$7,(sp) * is trace enabled?
bne.w fu_trace_p * yes
bra.l _fpsp_done
tbl_except_p:
.dc.w $e000,$e006,$e004,$e005
.dc.w $e003,$e002,$e001,$e001
fu_in_exc_ovfl_p:
move.w #$3,d0
bra.w fu_in_exc_exit_p
fu_in_exc_unfl_p:
move.w #$4,d0
bra.w fu_in_exc_exit_p
fu_in_exc_exit_s_p:
btst #mia7_bit,SPCOND_FLG(a6)
beq.b fu_in_exc_exit_cont_p
move.w (tbl_except_p.b,pc,d0.w*2),2+FP_SRC(a6)
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * restore src op
unlk a6 * unravel stack frame
* shift stack frame "up". who cares about <ea> field.
move.l $4(sp),$10(sp)
move.l $0.w(sp),$c(sp)
add.l #$c,sp
btst #$7,(sp) * is trace on?
bne.b fu_trace_p * yes
bra.l _fpsp_done * exit to os
*
* The opclass two PACKED instruction that took an "Unimplemented Data Type"
* exception was being traced. Make the "current" PC the FPIAR and put it in the
* trace stack frame then jump to _real_trace().
*
* UNSUPP FRAME TRACE FRAME
* ***************** *****************
* * EA * * Current *
* * * * PC *
* ***************** *****************
* * 0x2 * 0x0dc * * 0x2 * 0x024 *
* ***************** *****************
* * Next * * Next *
* * PC * * PC *
* ***************** *****************
* * SR * * SR *
* ***************** *****************
fu_trace_p:
move.w #$2024,$6(sp)
fmove.l fpiar,$8(sp)
bra.l _real_trace
*########################################################
*########################################################
fu_out_pack:
* I'm not sure at this point what FPSR bits are valid for this instruction.
* so, since the emulation routines re-create them anyways, zero exception field.
* fmove out doesn't affect ccodes.
andi.l #$ffff00ff,USER_FPSR(a6) * zero exception field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
bfextu EXC_CMDREG(a6){#6:#3},d0
bsr.l load_fpn1
* unlike other opclass 3, unimplemented data type exceptions, packed must be
* able to detect all operand types.
lea FP_SRC(a6),a0
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b fu_op2_p * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
fu_op2_p:
move.b d0,STAG(a6) * save src optype tag
clr.l d0
move.b FPCR_MODE(a6),d0 * fetch rnd mode/prec
lea FP_SRC(a6),a0 * pass ptr to src operand
move.l (a6),EXC_A6(a6) * in case a6 changes
bsr.l fout * call fmove out routine
* Exceptions in order of precedence:
* BSUN : no
* SNAN : yes
* OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
* OVFL : no
* UNFL : no
* DZ : no
* INEX2 : yes
* INEX1 : no
* determine the highest priority exception(if any) set by the
* emulation routine that has also been enabled by the user.
move.b FPCR_ENABLE(a6),d0 * fetch exceptions enabled
bne.w fu_out_ena_p * some are enabled
fu_out_exit_p:
move.l EXC_A6(a6),(a6) * restore a6
btst #$5,EXC_SR(a6) * user or supervisor?
bne.b fu_out_exit_s_p * supervisor
move.l EXC_A7(a6),a0 * update user a7
move.l a0,usp
fu_out_exit_cont_p:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6 * unravel stack frame
btst #$7,(sp) * is trace on?
bne.w fu_trace_p * yes
bra.l _fpsp_done * exit to os
* the exception occurred in supervisor mode. check to see if the
* addressing mode was -(a7). if so, we'll need to shift the
* stack frame "down".
fu_out_exit_s_p:
btst #mda7_bit,SPCOND_FLG(a6) * was ea mode -(a7)
beq.b fu_out_exit_cont_p * no
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
* now, copy the result to the proper place on the stack
move.l __LOCAL___SIZE+FP_DST_EX(sp),__LOCAL___SIZE+EXC_SR+$0(sp)
move.l __LOCAL___SIZE+FP_DST_HI(sp),__LOCAL___SIZE+EXC_SR+$4(sp)
move.l __LOCAL___SIZE+FP_DST_LO(sp),__LOCAL___SIZE+EXC_SR+$8(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
btst #$7,(sp)
bne.w fu_trace_p
bra.l _fpsp_done
fu_out_ena_p:
and.b FPSR_EXCEPT(a6),d0 * keep only ones enabled
bfffo d0{#24:#8},d0 * find highest priority exception
beq.w fu_out_exit_p
move.l EXC_A6(a6),(a6) * restore a6
* an exception occurred and that exception was enabled.
* the only exception possible on packed move out are INEX, OPERR, and SNAN.
fu_out_exc_p:
cmpi.b #$1a,d0
bgt.w fu_inex_p2
beq.w fu_operr_p
fu_snan_p:
btst #$5,EXC_SR(a6)
bne.b fu_snan_s_p
move.l EXC_A7(a6),a0
move.l a0,usp
bra.w fu_snan
fu_snan_s_p:
cmpi.b #mda7_flg,SPCOND_FLG(a6)
bne.w fu_snan
* the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
* the strategy is to move the exception frame "down" 12 bytes. then, we
* can store the default result where the exception frame was.
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30d8,EXC_VOFF(a6) * vector offset = 0xd0
move.w #$e006,2+FP_SRC(a6) * set fsave status
frestore FP_SRC(a6) * restore src operand
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
move.l __LOCAL___SIZE+EXC_EA(sp),__LOCAL___SIZE+EXC_EA-$c(sp)
* now, we copy the default result to it's proper location
move.l __LOCAL___SIZE+FP_DST_EX(sp),__LOCAL___SIZE+$4(sp)
move.l __LOCAL___SIZE+FP_DST_HI(sp),__LOCAL___SIZE+$8(sp)
move.l __LOCAL___SIZE+FP_DST_LO(sp),__LOCAL___SIZE+$c(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
bra.l _real_snan
fu_operr_p:
btst #$5,EXC_SR(a6)
bne.w fu_operr_p_s
move.l EXC_A7(a6),a0
move.l a0,usp
bra.w fu_operr
fu_operr_p_s:
cmpi.b #mda7_flg,SPCOND_FLG(a6)
bne.w fu_operr
* the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
* the strategy is to move the exception frame "down" 12 bytes. then, we
* can store the default result where the exception frame was.
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30d0,EXC_VOFF(a6) * vector offset = 0xd0
move.w #$e004,2+FP_SRC(a6) * set fsave status
frestore FP_SRC(a6) * restore src operand
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
move.l __LOCAL___SIZE+EXC_EA(sp),__LOCAL___SIZE+EXC_EA-$c(sp)
* now, we copy the default result to it's proper location
move.l __LOCAL___SIZE+FP_DST_EX(sp),__LOCAL___SIZE+$4(sp)
move.l __LOCAL___SIZE+FP_DST_HI(sp),__LOCAL___SIZE+$8(sp)
move.l __LOCAL___SIZE+FP_DST_LO(sp),__LOCAL___SIZE+$c(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
bra.l _real_operr
fu_inex_p2:
btst #$5,EXC_SR(a6)
bne.w fu_inex_s_p2
move.l EXC_A7(a6),a0
move.l a0,usp
bra.w fu_inex
fu_inex_s_p2:
cmpi.b #mda7_flg,SPCOND_FLG(a6)
bne.w fu_inex
* the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
* the strategy is to move the exception frame "down" 12 bytes. then, we
* can store the default result where the exception frame was.
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0/fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.w #$30c4,EXC_VOFF(a6) * vector offset = 0xc4
move.w #$e001,2+FP_SRC(a6) * set fsave status
frestore FP_SRC(a6) * restore src operand
move.l (a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+2+EXC_PC(sp),__LOCAL___SIZE+2+EXC_PC-$c(sp)
move.l __LOCAL___SIZE+EXC_EA(sp),__LOCAL___SIZE+EXC_EA-$c(sp)
* now, we copy the default result to it's proper location
move.l __LOCAL___SIZE+FP_DST_EX(sp),__LOCAL___SIZE+$4(sp)
move.l __LOCAL___SIZE+FP_DST_HI(sp),__LOCAL___SIZE+$8(sp)
move.l __LOCAL___SIZE+FP_DST_LO(sp),__LOCAL___SIZE+$c(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
bra.l _real_inex
*########################################################################
*
* if we're stuffing a source operand back into an fsave frame then we
* have to make sure that for single or double source operands that the
* format stuffed is as weird as the hardware usually makes it.
*
global funimp_skew
funimp_skew:
bfextu EXC_EXTWORD(a6){#3:#3},d0 * extract src specifier
cmpi.b #$1,d0 * was src sgl?
beq.b funimp_skew_sgl * yes
cmpi.b #$5,d0 * was src dbl?
beq.b funimp_skew_dbl * yes
rts
funimp_skew_sgl:
move.w FP_SRC_EX(a6),d0 * fetch DENORM exponent
andi.w #$7fff,d0 * strip sign
beq.b funimp_skew_sgl_not
cmpi.w #$3f80,d0
bgt.b funimp_skew_sgl_not
neg.w d0 * make exponent negative
addi.w #$3f81,d0 * find amt to shift
move.l FP_SRC_HI(a6),d1 * fetch DENORM hi(man)
lsr.l d0,d1 * shift it
bset #31,d1 * set j-bit
move.l d1,FP_SRC_HI(a6) * insert new hi(man)
andi.w #$8000,FP_SRC_EX(a6) * clear old exponent
ori.w #$3f80,FP_SRC_EX(a6) * insert new "skewed" exponent
funimp_skew_sgl_not:
rts
funimp_skew_dbl:
move.w FP_SRC_EX(a6),d0 * fetch DENORM exponent
andi.w #$7fff,d0 * strip sign
beq.b funimp_skew_dbl_not
cmpi.w #$3c00,d0
bgt.b funimp_skew_dbl_not
tst.b FP_SRC_EX(a6) * make "internal format"
smi.b $2+FP_SRC(a6)
move.w d0,FP_SRC_EX(a6) * insert exponent with cleared sign
clr.l d0 * clear g,r,s
lea FP_SRC(a6),a0 * pass ptr to src op
move.w #$3c01,d1 * pass denorm threshold
bsr.l dnrm_lp * denorm it
move.w #$3c00,d0 * new exponent
tst.b $2+FP_SRC(a6) * is sign set?
beq.b fss_dbl_denorm_done * no
bset #15,d0 * set sign
fss_dbl_denorm_done:
bset #$7,FP_SRC_HI(a6) * set j-bit
move.w d0,FP_SRC_EX(a6) * insert new exponent
funimp_skew_dbl_not:
rts
*########################################################################
global _mem_write2
_mem_write2:
btst #$5,EXC_SR(a6)
beq.l _dmem_write
move.l $0.w(a0),FP_DST_EX(a6)
move.l $4(a0),FP_DST_HI(a6)
move.l $8(a0),FP_DST_LO(a6)
clr.l d1
rts
*########################################################################
* XDEF **************************************************************** #
* _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
* effective address" exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Unimplemented Effective Address exception in an operating #
* system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* set_tag_x() - determine optype of src/dst operands #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* unnorm_fix() - change UNNORM operands to NORM or ZERO #
* load_fpn2() - load dst operand from FP regfile #
* tbl_unsupp - add of table of emulation routines for opclass 0,2 #
* decbin() - convert packed data to FP binary data #
* _real_fpu_disabled() - "callout" for "FPU disabled" exception #
* _real_access() - "callout" for access error exception #
* _mem_read() - read extended immediate operand from memory #
* _fpsp_done() - "callout" for exit; work all done #
* _real_trace() - "callout" for Trace enabled exception #
* fmovm_dynamic() - emulate dynamic fmovm instruction #
* fmovm_ctrl() - emulate fmovm control instruction #
* #
* INPUT *************************************************************** #
* - The system stack contains the "Unimplemented <ea>" stk frame #
* #
* OUTPUT ************************************************************** #
* If access error: #
* - The system stack is changed to an access error stack frame #
* If FPU disabled: #
* - The system stack is changed to an FPU disabled stack frame #
* If Trace exception enabled: #
* - The system stack is changed to a Trace exception stack frame #
* Else: (normal case) #
* - None (correct result has been stored as appropriate) #
* #
* ALGORITHM *********************************************************** #
* This exception handles 3 types of operations: #
* (1) FP Instructions using extended precision or packed immediate #
* addressing mode. #
* (2) The "fmovm.x" instruction w/ dynamic register specification. #
* (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
* #
* For immediate data operations, the data is read in w/ a #
* _mem_read() "callout", converted to FP binary (if packed), and used #
* as the source operand to the instruction specified by the instruction #
* word. If no FP exception should be reported ads a result of the #
* emulation, then the result is stored to the destination register and #
* the handler exits through _fpsp_done(). If an enabled exc has been #
* signalled as a result of emulation, then an fsave state frame #
* corresponding to the FP exception type must be entered into the 060 #
* FPU before exiting. In either the enabled or disabled cases, we #
* must also check if a Trace exception is pending, in which case, we #
* must create a Trace exception stack frame from the current exception #
* stack frame. If no Trace is pending, we simply exit through #
* _fpsp_done(). #
* For "fmovm.x", call the routine fmovm_dynamic() which will #
* decode and emulate the instruction. No FP exceptions can be pending #
* as a result of this operation emulation. A Trace exception can be #
* pending, though, which means the current stack frame must be changed #
* to a Trace stack frame and an exit made through _real_trace(). #
* For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
* was executed from supervisor mode, this handler must store the FP #
* register file values to the system stack by itself since #
* fmovm_dynamic() can't handle this. A normal exit is made through #
* fpsp_done(). #
* For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
* Again, a Trace exception may be pending and an exit made through #
* _real_trace(). Else, a normal exit is made through _fpsp_done(). #
* #
* Before any of the above is attempted, it must be checked to #
* see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
* before the "FPU disabled" exception, but the "FPU disabled" exception #
* has higher priority, we check the disabled bit in the PCR. If set, #
* then we must create an 8 word "FPU disabled" exception stack frame #
* from the current 4 word exception stack frame. This includes #
* reproducing the effective address of the instruction to put on the #
* new stack frame. #
* #
* In the process of all emulation work, if a _mem_read() #
* "callout" returns a failing result indicating an access error, then #
* we must create an access error stack frame from the current stack #
* frame. This information includes a faulting address and a fault- #
* status-longword. These are created within this handler. #
* #
*########################################################################
global _fpsp_effadd
_fpsp_effadd:
* This exception type takes priority over the "Line F Emulator"
* exception. Therefore, the FPU could be disabled when entering here.
* So, we must check to see if it's disabled and handle that case separately.
move.l d0,-(sp) * save d0
movec pcr,d0 * load proc cr
btst #$1,d0 * is FPU disabled?
bne.w iea_disabled * yes
move.l (sp)+,d0 * restore d0
link a6,#-__LOCAL___SIZE * init stack frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* PC of instruction that took the exception is the PC in the frame
move.l EXC_PC(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6) * store OPWORD and EXTWORD
*########################################################################
tst.w d0 * is operation fmovem?
bmi.w iea_fmovem * yes
*
* here, we will have:
* fabs fdabs fsabs facos fmod
* fadd fdadd fsadd fasin frem
* fcmp fatan fscale
* fdiv fddiv fsdiv fatanh fsin
* fint fcos fsincos
* fintrz fcosh fsinh
* fmove fdmove fsmove fetox ftan
* fmul fdmul fsmul fetoxm1 ftanh
* fneg fdneg fsneg fgetexp ftentox
* fsgldiv fgetman ftwotox
* fsglmul flog10
* fsqrt flog2
* fsub fdsub fssub flogn
* ftst flognp1
* which can all use f<op>.{x,p}
* so, now it's immediate data extended precision AND PACKED FORMAT!
*
iea_op:
andi.l #$00ff00ff,USER_FPSR(a6)
btst #$a,d0 * is src fmt x or p?
bne.b iea_op_pack * packed
move.l EXC_EXTWPTR(a6),a0 * pass: ptr to #<data>
lea FP_SRC(a6),a1 * pass: ptr to super addr
moveq.l #$c,d0 * pass: 12 bytes
bsr.l _imem_read * read extended immediate
tst.l d1 * did ifetch fail?
bne.w iea_iacc * yes
bra.b iea_op_setsrc
iea_op_pack:
move.l EXC_EXTWPTR(a6),a0 * pass: ptr to #<data>
lea FP_SRC(a6),a1 * pass: ptr to super dst
moveq.l #$c,d0 * pass: 12 bytes
bsr.l _imem_read * read packed operand
tst.l d1 * did ifetch fail?
bne.w iea_iacc * yes
* The packed operand is an INF or a NAN if the exponent field is all ones.
bfextu FP_SRC(a6){#1:#15},d0 * get exp
cmpi.w #$7fff,d0 * INF or NAN?
beq.b iea_op_setsrc * operand is an INF or NAN
* The packed operand is a zero if the mantissa is all zero, else it's
* a normal packed op.
move.b 3+FP_SRC(a6),d0 * get byte 4
andi.b #$0f,d0 * clear all but last nybble
bne.b iea_op_gp_not_spec * not a zero
tst.l FP_SRC_HI(a6) * is lw 2 zero?
bne.b iea_op_gp_not_spec * not a zero
tst.l FP_SRC_LO(a6) * is lw 3 zero?
beq.b iea_op_setsrc * operand is a ZERO
iea_op_gp_not_spec:
lea FP_SRC(a6),a0 * pass: ptr to packed op
bsr.l decbin * convert to extended
fmovem.x fp0,FP_SRC(a6) * make this the srcop
iea_op_setsrc:
addi.l #$c,EXC_EXTWPTR(a6) * update extension word pointer
* FP_SRC now holds the src operand.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * tag the operand type
move.b d0,STAG(a6) * could be ANYTHING!!!
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b iea_op_getdst * no
bsr.l unnorm_fix * yes; convert to NORM/DENORM/ZERO
move.b d0,STAG(a6) * set new optype tag
iea_op_getdst:
clr.b STORE_FLG(a6) * clear "store result" boolean
btst #$5,1+EXC_CMDREG(a6) * is operation monadic or dyadic?
beq.b iea_op_extract * monadic
btst #$4,1+EXC_CMDREG(a6) * is operation fsincos,ftst,fcmp?
bne.b iea_op_spec * yes
iea_op_loaddst:
bfextu EXC_CMDREG(a6){#6:#3},d0 * fetch dst regno
bsr.l load_fpn2 * load dst operand
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
move.b d0,DTAG(a6) * could be ANYTHING!!!
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b iea_op_extract * no
bsr.l unnorm_fix * yes; convert to NORM/DENORM/ZERO
move.b d0,DTAG(a6) * set new optype tag
bra.b iea_op_extract
* the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
iea_op_spec:
btst #$3,1+EXC_CMDREG(a6) * is operation fsincos?
beq.b iea_op_extract * yes
* now, we're left with ftst and fcmp. so, first let's tag them so that they don't
* store a result. then, only fcmp will branch back and pick up a dst operand.
st STORE_FLG(a6) * don't store a final result
btst #$1,1+EXC_CMDREG(a6) * is operation fcmp?
beq.b iea_op_loaddst * yes
iea_op_extract:
clr.l d0
move.b FPCR_MODE(a6),d0 * pass: rnd mode,prec
move.b 1+EXC_CMDREG(a6),d1
andi.w #$007f,d1 * extract extension
fmove.l #$0,fpcr
fmove.l #$0,fpsr
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
move.l (tbl_unsupp.l,pc,d1.w*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
*
* Exceptions in order of precedence:
* BSUN : none
* SNAN : all operations
* OPERR : all reg-reg or mem-reg operations that can normally operr
* OVFL : same as OPERR
* UNFL : same as OPERR
* DZ : same as OPERR
* INEX2 : same as OPERR
* INEX1 : all packed immediate operations
*
* we determine the highest priority exception(if any) set by the
* emulation routine that has also been enabled by the user.
move.b FPCR_ENABLE(a6),d0 * fetch exceptions enabled
bne.b iea_op_ena * some are enabled
* now, we save the result, unless, of course, the operation was ftst or fcmp.
* these don't save results.
iea_op_save:
tst.b STORE_FLG(a6) * does this op store a result?
bne.b iea_op_exit1 * exit with no frestore
iea_op_store:
bfextu EXC_CMDREG(a6){#6:#3},d0 * fetch dst regno
bsr.l store_fpreg * store the result
iea_op_exit1:
move.l EXC_PC(a6),USER_FPIAR(a6) * set FPIAR to "Current PC"
move.l EXC_EXTWPTR(a6),EXC_PC(a6) * set "Next PC" in exc frame
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6 * unravel the frame
btst #$7,(sp) * is trace on?
bne.w iea_op_trace * yes
bra.l _fpsp_done * exit to os
iea_op_ena:
and.b FPSR_EXCEPT(a6),d0 * keep only ones enable and set
bfffo d0{#24:#8},d0 * find highest priority exception
bne.b iea_op_exc * at least one was set
* no exception occurred. now, did a disabled, exact overflow occur with inexact
* enabled? if so, then we have to stuff an overflow frame into the FPU.
btst #ovfl_bit,FPSR_EXCEPT(a6) * did overflow occur?
beq.b iea_op_save
iea_op_ovfl:
btst #inex2_bit,FPCR_ENABLE(a6) * is inexact enabled?
beq.b iea_op_store * no
bra.b iea_op_exc_ovfl * yes
* an enabled exception occurred. we have to insert the exception type back into
* the machine.
iea_op_exc:
subi.l #24,d0 * fix offset to be 0-8
cmpi.b #$6,d0 * is exception INEX?
bne.b iea_op_exc_force * no
* the enabled exception was inexact. so, if it occurs with an overflow
* or underflow that was disabled, then we have to force an overflow or
* underflow frame.
btst #ovfl_bit,FPSR_EXCEPT(a6) * did overflow occur?
bne.b iea_op_exc_ovfl * yes
btst #unfl_bit,FPSR_EXCEPT(a6) * did underflow occur?
bne.b iea_op_exc_unfl * yes
iea_op_exc_force:
move.w (tbl_iea_except.b,pc,d0.w*2),2+FP_SRC(a6)
bra.b iea_op_exit2 * exit with frestore
tbl_iea_except:
.dc.w $e002,$e006,$e004,$e005
.dc.w $e003,$e002,$e001,$e001
iea_op_exc_ovfl:
move.w #$e005,2+FP_SRC(a6)
bra.b iea_op_exit2
iea_op_exc_unfl:
move.w #$e003,2+FP_SRC(a6)
iea_op_exit2:
move.l EXC_PC(a6),USER_FPIAR(a6) * set FPIAR to "Current PC"
move.l EXC_EXTWPTR(a6),EXC_PC(a6) * set "Next PC" in exc frame
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * restore exceptional state
unlk a6 * unravel the frame
btst #$7,(sp) * is trace on?
bne.b iea_op_trace * yes
bra.l _fpsp_done * exit to os
*
* The opclass two instruction that took an "Unimplemented Effective Address"
* exception was being traced. Make the "current" PC the FPIAR and put it in
* the trace stack frame then jump to _real_trace().
*
* UNIMP EA FRAME TRACE FRAME
* ***************** *****************
* * 0x0 * 0x0f0 * * Current *
* ***************** * PC *
* * Current * *****************
* * PC * * 0x2 * 0x024 *
* ***************** *****************
* * SR * * Next *
* ***************** * PC *
* *****************
* * SR *
* *****************
iea_op_trace:
move.l (sp),-(sp) * shift stack frame "down"
move.w $8(sp),$4(sp)
move.w #$2024,$6(sp) * stk fmt = 0x2; voff = 0x024
fmove.l fpiar,$8(sp) * "Current PC" is in FPIAR
bra.l _real_trace
*########################################################################
iea_fmovem:
btst #14,d0 * ctrl or data reg
beq.w iea_fmovem_ctrl
iea_fmovem_data:
btst #$5,EXC_SR(a6) * user or supervisor mode
bne.b iea_fmovem_data_s
iea_fmovem_data_u:
move.l usp,a0
move.l a0,EXC_A7(a6) * store current a7
bsr.l fmovem_dynamic * do dynamic fmovm
move.l EXC_A7(a6),a0 * load possibly new a7
move.l a0,usp * update usp
bra.w iea_fmovem_exit
iea_fmovem_data_s:
clr.b SPCOND_FLG(a6)
lea $2+EXC_VOFF(a6),a0
move.l a0,EXC_A7(a6)
bsr.l fmovem_dynamic * do dynamic fmovm
cmpi.b #mda7_flg,SPCOND_FLG(a6)
beq.w iea_fmovem_data_predec
cmpi.b #mia7_flg,SPCOND_FLG(a6)
bne.w iea_fmovem_exit
* right now, d0 = the size.
* the data has been fetched from the supervisor stack, but we have not
* incremented the stack pointer by the appropriate number of bytes.
* do it here.
iea_fmovem_data_postinc:
btst #$7,EXC_SR(a6)
bne.b iea_fmovem_data_pi_trace
move.w EXC_SR(a6),(EXC_SR.b,a6,d0.l)
move.l EXC_EXTWPTR(a6),(EXC_PC.b,a6,d0.l)
move.w #$00f0,(EXC_VOFF.b,a6,d0.l)
lea (EXC_SR.b,a6,d0.l),a0
move.l a0,EXC_SR(a6)
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
move.l (sp)+,sp
bra.l _fpsp_done
iea_fmovem_data_pi_trace:
move.w EXC_SR(a6),(EXC_SR-$4.b,a6,d0.l)
move.l EXC_EXTWPTR(a6),(EXC_PC-$4.b,a6,d0.l)
move.w #$2024,(EXC_VOFF-$4.b,a6,d0.l)
move.l EXC_PC(a6),(EXC_VOFF+$2-$4.b,a6,d0.l)
lea (EXC_SR-$4.b,a6,d0.l),a0
move.l a0,EXC_SR(a6)
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
move.l (sp)+,sp
bra.l _real_trace
* right now, d1 = size and d0 = the strg.
iea_fmovem_data_predec:
move.b d1,EXC_VOFF(a6) * store strg
move.b d0,$1+EXC_VOFF(a6) * store size
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
move.l (a6),-(sp) * make a copy of a6
move.l d0,-(sp) * save d0
move.l d1,-(sp) * save d1
move.l EXC_EXTWPTR(a6),-(sp) * make a copy of Next PC
clr.l d0
move.b $1+EXC_VOFF(a6),d0 * fetch size
neg.l d0 * get negative of size
btst #$7,EXC_SR(a6) * is trace enabled?
beq.b iea_fmovem_data_p2
move.w EXC_SR(a6),(EXC_SR-$4.b,a6,d0.l)
move.l EXC_PC(a6),(EXC_VOFF-$2.b,a6,d0.l)
move.l (sp)+,(EXC_PC-$4.b,a6,d0.l)
move.w #$2024,(EXC_VOFF-$4.b,a6,d0.l)
pea (a6,d0.l) * create final sp
bra.b iea_fmovem_data_p3
iea_fmovem_data_p2:
move.w EXC_SR(a6),(EXC_SR.b,a6,d0.l)
move.l (sp)+,(EXC_PC.b,a6,d0.l)
move.w #$00f0,(EXC_VOFF.b,a6,d0.l)
pea ($4.b,a6,d0.l) * create final sp
iea_fmovem_data_p3:
clr.l d1
move.b EXC_VOFF(a6),d1 * fetch strg
tst.b d1
bpl.b fm_1
fmovem.x fp0,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_1:
lsl.b #$1,d1
bpl.b fm_2
fmovem.x fp1,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_2:
lsl.b #$1,d1
bpl.b fm_3
fmovem.x fp2,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_3:
lsl.b #$1,d1
bpl.b fm_4
fmovem.x fp3,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_4:
lsl.b #$1,d1
bpl.b fm_5
fmovem.x fp4,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_5:
lsl.b #$1,d1
bpl.b fm_6
fmovem.x fp5,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_6:
lsl.b #$1,d1
bpl.b fm_7
fmovem.x fp6,($4+$8.b,a6,d0.l)
addi.l #$c,d0
fm_7:
lsl.b #$1,d1
bpl.b fm_end
fmovem.x fp7,($4+$8.b,a6,d0.l)
fm_end:
move.l $4(sp),d1
move.l $8(sp),d0
move.l $c(sp),a6
move.l (sp)+,sp
btst #$7,(sp) * is trace enabled?
beq.l _fpsp_done
bra.l _real_trace
*########################################################################
iea_fmovem_ctrl:
bsr.l fmovem_ctrl * load ctrl regs
iea_fmovem_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
btst #$7,EXC_SR(a6) * is trace on?
bne.b iea_fmovem_trace * yes
move.l EXC_EXTWPTR(a6),EXC_PC(a6) * set Next PC
unlk a6 * unravel the frame
bra.l _fpsp_done * exit to os
*
* The control reg instruction that took an "Unimplemented Effective Address"
* exception was being traced. The "Current PC" for the trace frame is the
* PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
* After fixing the stack frame, jump to _real_trace().
*
* UNIMP EA FRAME TRACE FRAME
* ***************** *****************
* * 0x0 * 0x0f0 * * Current *
* ***************** * PC *
* * Current * *****************
* * PC * * 0x2 * 0x024 *
* ***************** *****************
* * SR * * Next *
* ***************** * PC *
* *****************
* * SR *
* *****************
* this ain't a pretty solution, but it works:
* -restore a6 (not with unlk)
* -shift stack frame down over where old a6 used to be
* -add LOCAL_SIZE to stack pointer
iea_fmovem_trace:
move.l (a6),a6 * restore frame pointer
move.w EXC_SR+__LOCAL___SIZE(sp),$0+__LOCAL___SIZE(sp)
move.l EXC_PC+__LOCAL___SIZE(sp),$8+__LOCAL___SIZE(sp)
move.l EXC_EXTWPTR+__LOCAL___SIZE(sp),$2+__LOCAL___SIZE(sp)
move.w #$2024,$6+__LOCAL___SIZE(sp) * stk fmt = 0x2; voff = 0x024
add.l #__LOCAL___SIZE.l.l,sp * clear stack frame
bra.l _real_trace
*########################################################################
* The FPU is disabled and so we should really have taken the "Line
* F Emulator" exception. So, here we create an 8-word stack frame
* from our 4-word stack frame. This means we must calculate the length
* the the faulting instruction to get the "next PC". This is trivial for
* immediate operands but requires some extra work for fmovm dynamic
* which can use most addressing modes.
iea_disabled:
move.l (sp)+,d0 * restore d0
link a6,#-__LOCAL___SIZE * init stack frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
* PC of instruction that took the exception is the PC in the frame
move.l EXC_PC(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6) * store OPWORD and EXTWORD
tst.w d0 * is instr fmovm?
bmi.b iea_dis_fmovem * yes
* instruction is using an extended precision immediate operand. therefore,
* the total instruction length is 16 bytes.
iea_dis_immed:
moveq.l #$10,d0 * 16 bytes of instruction
bra.b iea_dis_cont
iea_dis_fmovem:
btst #$e,d0 * is instr fmovm ctrl
bne.b iea_dis_fmovem_data * no
* the instruction is a fmovm.l with 2 or 3 registers.
bfextu d0{#19:#3},d1
moveq.l #$c,d0
cmpi.b #$7,d1 * move all regs?
bne.b iea_dis_cont
addq.l #$4,d0
bra.b iea_dis_cont
* the instruction is an fmovm.x dynamic which can use many addressing
* modes and thus can have several different total instruction lengths.
* call fmovm_calc_ea which will go through the ea calc process and,
* as a by-product, will tell us how long the instruction is.
iea_dis_fmovem_data:
clr.l d0
bsr.l fmovem_calc_ea
move.l EXC_EXTWPTR(a6),d0
sub.l EXC_PC(a6),d0
iea_dis_cont:
move.w d0,EXC_VOFF(a6) * store stack shift value
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
* here, we actually create the 8-word frame from the 4-word frame,
* with the "next PC" as additional info.
* the <ea> field is let as undefined.
subq.l #$8,sp * make room for new stack
move.l d0,-(sp) * save d0
move.w $c(sp),$4(sp) * move SR
move.l $e(sp),$6(sp) * move Current PC
clr.l d0
move.w $12(sp),d0
move.l $6(sp),$10(sp) * move Current PC
add.l d0,$6(sp) * make Next PC
move.w #$402c,$a(sp) * insert offset,frame format
move.l (sp)+,d0 * restore d0
bra.l _real_fpu_disabled
*#########
iea_iacc:
movec pcr,d0
btst #$1,d0
bne.b iea_iacc_cont
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1 on stack
iea_iacc_cont:
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
subq.w #$8,sp * make stack frame bigger
move.l $8(sp),(sp) * store SR,hi(PC)
move.w $c(sp),$4(sp) * store lo(PC)
move.w #$4008,$6(sp) * store voff
move.l $2(sp),$8(sp) * store ea
move.l #$09428001,$c(sp) * store fslw
iea_acc_done:
btst #$5,(sp) * user or supervisor mode?
beq.b iea_acc_done2 * user
bset #$2,$d(sp) * set supervisor TM bit
iea_acc_done2:
bra.l _real_access
iea_dacc:
lea -__LOCAL___SIZE(a6),sp
movec pcr,d1
btst #$1,d1
bne.b iea_dacc_cont
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1 on stack
fmovem.l __LOCAL___SIZE+USER_FPCR(sp),fpcr/fpsr/fpiar * restore ctrl regs
iea_dacc_cont:
move.l (a6),a6
move.l $4+__LOCAL___SIZE(sp),-$8+$4+__LOCAL___SIZE(sp)
move.w $8+__LOCAL___SIZE(sp),-$8+$8+__LOCAL___SIZE(sp)
move.w #$4008,-$8+$a+__LOCAL___SIZE(sp)
move.l a0,-$8+$c+__LOCAL___SIZE(sp)
move.w d0,-$8+$10+__LOCAL___SIZE(sp)
move.w #$0001,-$8+$12+__LOCAL___SIZE(sp)
movem.l __LOCAL___SIZE+EXC_DREGS(sp),d0-d1/a0-a1 * restore d0-d1/a0-a1
add.w #__LOCAL___SIZE-$4,sp
bra.b iea_acc_done
*########################################################################
* XDEF **************************************************************** #
* _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Operand Error exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* _real_operr() - "callout" to operating system operr handler #
* _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
* store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
* facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP Operr exception frame #
* - The fsave frame contains the source operand #
* #
* OUTPUT ************************************************************** #
* No access error: #
* - The system stack is unchanged #
* - The fsave frame contains the adjusted src op for opclass 0,2 #
* #
* ALGORITHM *********************************************************** #
* In a system where the FP Operr exception is enabled, the goal #
* is to get to the handler specified at _real_operr(). But, on the 060, #
* for opclass zero and two instruction taking this exception, the #
* input operand in the fsave frame may be incorrect for some cases #
* and needs to be corrected. This handler calls fix_skewed_ops() to #
* do just this and then exits through _real_operr(). #
* For opclass 3 instructions, the 060 doesn't store the default #
* operr result out to memory or data register file as it should. #
* This code must emulate the move out before finally exiting through #
* _real_inex(). The move out, if to memory, is performed using #
* _mem_write() "callout" routines that may return a failing result. #
* In this special case, the handler must exit through facc_out() #
* which creates an access error stack frame from the current operr #
* stack frame. #
* #
*########################################################################
global _fpsp_operr
_fpsp_operr:
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
btst #13,d0 * is instr an fmove out?
bne.b foperr_out * fmove out
* here, we simply see if the operand in the fsave frame needs to be "unskewed".
* this would be the case for opclass two operations with a source infinity or
* denorm operand in the sgl or dbl format. NANs also become skewed, but can't
* cause an operr so we don't need to check for them here.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
foperr_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6)
unlk a6
bra.l _real_operr
*#######################################################################
*
* the hardware does not save the default result to memory on enabled
* operand error exceptions. we do this here before passing control to
* the user operand error handler.
*
* byte, word, and long destination format operations can pass
* through here. we simply need to test the sign of the src
* operand and save the appropriate minimum or maximum integer value
* to the effective address as pointed to by the stacked effective address.
*
* although packed opclass three operations can take operand error
* exceptions, they won't pass through here since they are caught
* first by the unsupported data format exception handler. that handler
* sends them directly to _real_operr() if necessary.
*
foperr_out:
move.w FP_SRC_EX(a6),d1 * fetch exponent
andi.w #$7fff,d1
cmpi.w #$7fff,d1
bne.b foperr_out_not_qnan
* the operand is either an infinity or a QNAN.
tst.l FP_SRC_LO(a6)
bne.b foperr_out_qnan
move.l FP_SRC_HI(a6),d1
andi.l #$7fffffff,d1
beq.b foperr_out_not_qnan
foperr_out_qnan:
move.l FP_SRC_HI(a6),L_SCR1(a6)
bra.b foperr_out_jmp
foperr_out_not_qnan:
move.l #$7fffffff,d1
tst.b FP_SRC_EX(a6)
bpl.b foperr_out_not_qnan2
addq.l #$1,d1
foperr_out_not_qnan2:
move.l d1,L_SCR1(a6)
foperr_out_jmp:
bfextu d0{#19:#3},d0 * extract dst format field
move.b 1+EXC_OPWORD(a6),d1 * extract <ea> mode,reg
move.w (tbl_operr.b,pc,d0.w*2),a0
jmp (tbl_operr.b,pc,a0.l)
tbl_operr:
.dc.w foperr_out_l-tbl_operr * long word integer
.dc.w tbl_operr-tbl_operr * sgl prec shouldn't happen
.dc.w tbl_operr-tbl_operr * ext prec shouldn't happen
.dc.w foperr_exit-tbl_operr * packed won't enter here
.dc.w foperr_out_w-tbl_operr * word integer
.dc.w tbl_operr-tbl_operr * dbl prec shouldn't happen
.dc.w foperr_out_b-tbl_operr * byte integer
.dc.w tbl_operr-tbl_operr * packed won't enter here
foperr_out_b:
move.b L_SCR1(a6),d0 * load positive default result
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b foperr_out_b_save_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_byte * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_b * yes
bra.w foperr_exit
foperr_out_b_save_dn:
andi.w #$0007,d1
bsr.l store_dreg_b * store result to regfile
bra.w foperr_exit
foperr_out_w:
move.w L_SCR1(a6),d0 * load positive default result
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b foperr_out_w_save_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_word * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_w * yes
bra.w foperr_exit
foperr_out_w_save_dn:
andi.w #$0007,d1
bsr.l store_dreg_w * store result to regfile
bra.w foperr_exit
foperr_out_l:
move.l L_SCR1(a6),d0 * load positive default result
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b foperr_out_l_save_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_long * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
bra.w foperr_exit
foperr_out_l_save_dn:
andi.w #$0007,d1
bsr.l store_dreg_l * store result to regfile
bra.w foperr_exit
*########################################################################
* XDEF **************************************************************** #
* _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Signalling NAN exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* _real_snan() - "callout" to operating system SNAN handler #
* _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
* store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
* facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
* _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP SNAN exception frame #
* - The fsave frame contains the source operand #
* #
* OUTPUT ************************************************************** #
* No access error: #
* - The system stack is unchanged #
* - The fsave frame contains the adjusted src op for opclass 0,2 #
* #
* ALGORITHM *********************************************************** #
* In a system where the FP SNAN exception is enabled, the goal #
* is to get to the handler specified at _real_snan(). But, on the 060, #
* for opclass zero and two instructions taking this exception, the #
* input operand in the fsave frame may be incorrect for some cases #
* and needs to be corrected. This handler calls fix_skewed_ops() to #
* do just this and then exits through _real_snan(). #
* For opclass 3 instructions, the 060 doesn't store the default #
* SNAN result out to memory or data register file as it should. #
* This code must emulate the move out before finally exiting through #
* _real_snan(). The move out, if to memory, is performed using #
* _mem_write() "callout" routines that may return a failing result. #
* In this special case, the handler must exit through facc_out() #
* which creates an access error stack frame from the current SNAN #
* stack frame. #
* For the case of an extended precision opclass 3 instruction, #
* if the effective addressing mode was -() or ()+, then the address #
* register must get updated by calling _calc_ea_fout(). If the <ea> #
* was -(a7) from supervisor mode, then the exception frame currently #
* on the system stack must be carefully moved "down" to make room #
* for the operand being moved. #
* #
*########################################################################
global _fpsp_snan
_fpsp_snan:
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
btst #13,d0 * is instr an fmove out?
bne.w fsnan_out * fmove out
* here, we simply see if the operand in the fsave frame needs to be "unskewed".
* this would be the case for opclass two operations with a source infinity or
* denorm operand in the sgl or dbl format. NANs also become skewed and must be
* fixed here.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
fsnan_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6)
unlk a6
bra.l _real_snan
*#######################################################################
*
* the hardware does not save the default result to memory on enabled
* snan exceptions. we do this here before passing control to
* the user snan handler.
*
* byte, word, long, and packed destination format operations can pass
* through here. since packed format operations already were handled by
* fpsp_unsupp(), then we need to do nothing else for them here.
* for byte, word, and long, we simply need to test the sign of the src
* operand and save the appropriate minimum or maximum integer value
* to the effective address as pointed to by the stacked effective address.
*
fsnan_out:
bfextu d0{#19:#3},d0 * extract dst format field
move.b 1+EXC_OPWORD(a6),d1 * extract <ea> mode,reg
move.w (tbl_snan.b,pc,d0.w*2),a0
jmp (tbl_snan.b,pc,a0.l)
tbl_snan:
.dc.w fsnan_out_l-tbl_snan * long word integer
.dc.w fsnan_out_s-tbl_snan * sgl prec shouldn't happen
.dc.w fsnan_out_x-tbl_snan * ext prec shouldn't happen
.dc.w tbl_snan-tbl_snan * packed needs no help
.dc.w fsnan_out_w-tbl_snan * word integer
.dc.w fsnan_out_d-tbl_snan * dbl prec shouldn't happen
.dc.w fsnan_out_b-tbl_snan * byte integer
.dc.w tbl_snan-tbl_snan * packed needs no help
fsnan_out_b:
move.b FP_SRC_HI(a6),d0 * load upper byte of SNAN
bset #6,d0 * set SNAN bit
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b fsnan_out_b_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_byte * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_b * yes
bra.w fsnan_exit
fsnan_out_b_dn:
andi.w #$0007,d1
bsr.l store_dreg_b * store result to regfile
bra.w fsnan_exit
fsnan_out_w:
move.w FP_SRC_HI(a6),d0 * load upper word of SNAN
bset #14,d0 * set SNAN bit
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b fsnan_out_w_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_word * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_w * yes
bra.w fsnan_exit
fsnan_out_w_dn:
andi.w #$0007,d1
bsr.l store_dreg_w * store result to regfile
bra.w fsnan_exit
fsnan_out_l:
move.l FP_SRC_HI(a6),d0 * load upper longword of SNAN
bset #30,d0 * set SNAN bit
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b fsnan_out_l_dn * yes
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_long * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
bra.w fsnan_exit
fsnan_out_l_dn:
andi.w #$0007,d1
bsr.l store_dreg_l * store result to regfile
bra.w fsnan_exit
fsnan_out_s:
cmpi.b #$7,d1 * is <ea> mode a data reg?
ble.b fsnan_out_d_dn * yes
move.l FP_SRC_EX(a6),d0 * fetch SNAN sign
andi.l #$80000000,d0 * keep sign
ori.l #$7fc00000,d0 * insert new exponent,SNAN bit
move.l FP_SRC_HI(a6),d1 * load mantissa
lsr.l #$8,d1 * shift mantissa for sgl
or.l d1,d0 * create sgl SNAN
move.l EXC_EA(a6),a0 * pass: <ea> of default result
bsr.l _dmem_write_long * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_l * yes
bra.w fsnan_exit
fsnan_out_d_dn:
move.l FP_SRC_EX(a6),d0 * fetch SNAN sign
andi.l #$80000000,d0 * keep sign
ori.l #$7fc00000,d0 * insert new exponent,SNAN bit
move.l d1,-(sp)
move.l FP_SRC_HI(a6),d1 * load mantissa
lsr.l #$8,d1 * shift mantissa for sgl
or.l d1,d0 * create sgl SNAN
move.l (sp)+,d1
andi.w #$0007,d1
bsr.l store_dreg_l * store result to regfile
bra.w fsnan_exit
fsnan_out_d:
move.l FP_SRC_EX(a6),d0 * fetch SNAN sign
andi.l #$80000000,d0 * keep sign
ori.l #$7ff80000,d0 * insert new exponent,SNAN bit
move.l FP_SRC_HI(a6),d1 * load hi mantissa
move.l d0,FP_SCR0_EX(a6) * store to temp space
moveq.l #11,d0 * load shift amt
lsr.l d0,d1
or.l d1,FP_SCR0_EX(a6) * create dbl hi
move.l FP_SRC_HI(a6),d1 * load hi mantissa
andi.l #$000007ff,d1
ror.l d0,d1
move.l d1,FP_SCR0_HI(a6) * store to temp space
move.l FP_SRC_LO(a6),d1 * load lo mantissa
lsr.l d0,d1
or.l d1,FP_SCR0_HI(a6) * create dbl lo
lea FP_SCR0(a6),a0 * pass: ptr to operand
move.l EXC_EA(a6),a1 * pass: dst addr
moveq.l #$8,d0 * pass: size of 8 bytes
bsr.l _dmem_write * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_d * yes
bra.w fsnan_exit
* for extended precision, if the addressing mode is pre-decrement or
* post-increment, then the address register did not get updated.
* in addition, for pre-decrement, the stacked <ea> is incorrect.
fsnan_out_x:
clr.b SPCOND_FLG(a6) * clear special case flag
move.w FP_SRC_EX(a6),FP_SCR0_EX(a6)
clr.w 2+FP_SCR0(a6)
move.l FP_SRC_HI(a6),d0
bset #30,d0
move.l d0,FP_SCR0_HI(a6)
move.l FP_SRC_LO(a6),FP_SCR0_LO(a6)
btst #$5,EXC_SR(a6) * supervisor mode exception?
bne.b fsnan_out_x_s * yes
move.l usp,a0 * fetch user stack pointer
move.l a0,EXC_A7(a6) * save on stack for calc_ea()
move.l (a6),EXC_A6(a6)
bsr.l _calc_ea_fout * find the correct ea,update An
move.l a0,a1
move.l a0,EXC_EA(a6) * stack correct <ea>
move.l EXC_A7(a6),a0
move.l a0,usp * restore user stack pointer
move.l EXC_A6(a6),(a6)
fsnan_out_x_save:
lea FP_SCR0(a6),a0 * pass: ptr to operand
moveq.l #$c,d0 * pass: size of extended
bsr.l _dmem_write * write the default result
tst.l d1 * did dstore fail?
bne.l facc_out_x * yes
bra.w fsnan_exit
fsnan_out_x_s:
move.l (a6),EXC_A6(a6)
bsr.l _calc_ea_fout * find the correct ea,update An
move.l a0,a1
move.l a0,EXC_EA(a6) * stack correct <ea>
move.l EXC_A6(a6),(a6)
cmpi.b #mda7_flg,SPCOND_FLG(a6) * is <ea> mode -(a7)?
bne.b fsnan_out_x_save * no
* the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6)
move.l EXC_A6(a6),a6 * restore frame pointer
move.l __LOCAL___SIZE+EXC_SR(sp),__LOCAL___SIZE+EXC_SR-$c(sp)
move.l __LOCAL___SIZE+EXC_PC+$2(sp),__LOCAL___SIZE+EXC_PC+$2-$c(sp)
move.l __LOCAL___SIZE+EXC_EA(sp),__LOCAL___SIZE+EXC_EA-$c(sp)
move.l __LOCAL___SIZE+FP_SCR0_EX(sp),__LOCAL___SIZE+EXC_SR(sp)
move.l __LOCAL___SIZE+FP_SCR0_HI(sp),__LOCAL___SIZE+EXC_PC+$2(sp)
move.l __LOCAL___SIZE+FP_SCR0_LO(sp),__LOCAL___SIZE+EXC_EA(sp)
add.l #__LOCAL___SIZE-$8.l.l,sp
bra.l _real_snan
*########################################################################
* XDEF **************************************************************** #
* _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Inexact exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword #
* fix_skewed_ops() - adjust src operand in fsave frame #
* set_tag_x() - determine optype of src/dst operands #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* unnorm_fix() - change UNNORM operands to NORM or ZERO #
* load_fpn2() - load dst operand from FP regfile #
* smovcr() - emulate an "fmovcr" instruction #
* fout() - emulate an opclass 3 instruction #
* tbl_unsupp - add of table of emulation routines for opclass 0,2 #
* _real_inex() - "callout" to operating system inexact handler #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP Inexact exception frame #
* - The fsave frame contains the source operand #
* #
* OUTPUT ************************************************************** #
* - The system stack is unchanged #
* - The fsave frame contains the adjusted src op for opclass 0,2 #
* #
* ALGORITHM *********************************************************** #
* In a system where the FP Inexact exception is enabled, the goal #
* is to get to the handler specified at _real_inex(). But, on the 060, #
* for opclass zero and two instruction taking this exception, the #
* hardware doesn't store the correct result to the destination FP #
* register as did the '040 and '881/2. This handler must emulate the #
* instruction in order to get this value and then store it to the #
* correct register before calling _real_inex(). #
* For opclass 3 instructions, the 060 doesn't store the default #
* inexact result out to memory or data register file as it should. #
* This code must emulate the move out by calling fout() before finally #
* exiting through _real_inex(). #
* #
*########################################################################
global _fpsp_inex
_fpsp_inex:
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
btst #13,d0 * is instr an fmove out?
bne.w finex_out * fmove out
* the hardware, for "fabs" and "fneg" w/ a long source format, puts the
* longword integer directly into the upper longword of the mantissa along
* w/ an exponent value of 0x401e. we convert this to extended precision here.
bfextu d0{#19:#3},d0 * fetch instr size
bne.b finex_cont * instr size is not long
cmpi.w #$401e,FP_SRC_EX(a6) * is exponent 0x401e?
bne.b finex_cont * no
fmove.l #$0,fpcr
fmove.l FP_SRC_HI(a6),fp0 * load integer src
fmove.x fp0,FP_SRC(a6) * store integer as extended precision
move.w #$e001,$2+FP_SRC(a6)
finex_cont:
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
* Here, we zero the ccode and exception byte field since we're going to
* emulate the whole instruction. Notice, though, that we don't kill the
* INEX1 bit. This is because a packed op has long since been converted
* to extended before arriving here. Therefore, we need to retain the
* INEX1 bit from when the operand was first converted.
andi.l #$00ff01ff,USER_FPSR(a6) * zero all but accured field
fmove.l #$0,fpcr * zero current control regs
fmove.l #$0,fpsr
bfextu EXC_EXTWORD(a6){#0:#6},d1 * extract upper 6 of cmdreg
cmpi.b #$17,d1 * is op an fmovecr?
beq.w finex_fmovecr * yes
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l set_tag_x * tag the operand type
move.b d0,STAG(a6) * maybe NORM,DENORM
* bits four and five of the fp extension word separate the monadic and dyadic
* operations that can pass through fpsp_inex(). remember that fcmp and ftst
* will never take this exception, but fsincos will.
btst #$5,1+EXC_CMDREG(a6) * is operation monadic or dyadic?
beq.b finex_extract * monadic
btst #$4,1+EXC_CMDREG(a6) * is operation an fsincos?
bne.b finex_extract * yes
bfextu EXC_CMDREG(a6){#6:#3},d0 * dyadic; load dst reg
bsr.l load_fpn2 * load dst into FP_DST
lea FP_DST(a6),a0 * pass: ptr to dst op
bsr.l set_tag_x * tag the operand type
cmpi.b #UNNORM,d0 * is operand an UNNORM?
bne.b finex_op2_done * no
bsr.l unnorm_fix * yes; convert to NORM,DENORM,or ZERO
finex_op2_done:
move.b d0,DTAG(a6) * save dst optype tag
finex_extract:
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec/mode
move.b 1+EXC_CMDREG(a6),d1
andi.w #$007f,d1 * extract extension
lea FP_SRC(a6),a0
lea FP_DST(a6),a1
move.l (tbl_unsupp.l,pc,d1.w*4),d1 * fetch routine addr
jsr (tbl_unsupp.l,pc,d1.l*1)
* the operation has been emulated. the result is in fp0.
finex_save:
bfextu EXC_CMDREG(a6){#6:#3},d0
bsr.l store_fpreg
finex_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6)
unlk a6
bra.l _real_inex
finex_fmovecr:
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec,mode
move.b 1+EXC_CMDREG(a6),d1
andi.l #$0000007f,d1 * pass rom offset
bsr.l smovecr
bra.b finex_save
*#######################################################################
*
* the hardware does not save the default result to memory on enabled
* inexact exceptions. we do this here before passing control to
* the user inexact handler.
*
* byte, word, and long destination format operations can pass
* through here. so can double and single precision.
* although packed opclass three operations can take inexact
* exceptions, they won't pass through here since they are caught
* first by the unsupported data format exception handler. that handler
* sends them directly to _real_inex() if necessary.
*
finex_out:
move.b #NORM,STAG(a6) * src is a NORM
clr.l d0
move.b FPCR_MODE(a6),d0 * pass rnd prec,mode
andi.l #$ffff00ff,USER_FPSR(a6) * zero exception field
lea FP_SRC(a6),a0 * pass ptr to src operand
bsr.l fout * store the default result
bra.b finex_exit
*########################################################################
* XDEF **************************************************************** #
* _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
* #
* This handler should be the first code executed upon taking #
* the FP DZ exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_long() - read instruction longword from memory #
* fix_skewed_ops() - adjust fsave operand #
* _real_dz() - "callout" exit point from FP DZ handler #
* #
* INPUT *************************************************************** #
* - The system stack contains the FP DZ exception stack. #
* - The fsave frame contains the source operand. #
* #
* OUTPUT ************************************************************** #
* - The system stack contains the FP DZ exception stack. #
* - The fsave frame contains the adjusted source operand. #
* #
* ALGORITHM *********************************************************** #
* In a system where the DZ exception is enabled, the goal is to #
* get to the handler specified at _real_dz(). But, on the 060, when the #
* exception is taken, the input operand in the fsave state frame may #
* be incorrect for some cases and need to be adjusted. So, this package #
* adjusts the operand using fix_skewed_ops() and then branches to #
* _real_dz(). #
* #
*########################################################################
global _fpsp_dz
_fpsp_dz:
link.w a6,#-__LOCAL___SIZE * init stack frame
fsave FP_SRC(a6) * grab the "busy" frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1 on stack
* the FPIAR holds the "current PC" of the faulting instruction
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*#############################################################################
* here, we simply see if the operand in the fsave frame needs to be "unskewed".
* this would be the case for opclass two operations with a source zero
* in the sgl or dbl format.
lea FP_SRC(a6),a0 * pass: ptr to src op
bsr.l fix_skewed_ops * fix src op
fdz_exit:
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6)
unlk a6
bra.l _real_dz
*########################################################################
* XDEF **************************************************************** #
* _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
* #
* This handler should be the first code executed upon taking the #
* "Line F Emulator" exception in an operating system. #
* #
* XREF **************************************************************** #
* _fpsp_unimp() - handle "FP Unimplemented" exceptions #
* _real_fpu_disabled() - handle "FPU disabled" exceptions #
* _real_fline() - handle "FLINE" exceptions #
* _imem_read_long() - read instruction longword #
* #
* INPUT *************************************************************** #
* - The system stack contains a "Line F Emulator" exception #
* stack frame. #
* #
* OUTPUT ************************************************************** #
* - The system stack is unchanged #
* #
* ALGORITHM *********************************************************** #
* When a "Line F Emulator" exception occurs, there are 3 possible #
* exception types, denoted by the exception stack frame format number: #
* (1) FPU unimplemented instruction (6 word stack frame) #
* (2) FPU disabled (8 word stack frame) #
* (3) Line F (4 word stack frame) #
* #
* This module determines which and forks the flow off to the #
* appropriate "callout" (for "disabled" and "Line F") or to the #
* correct emulation code (for "FPU unimplemented"). #
* This code also must check for "fmovecr" instructions w/ a #
* non-zero <ea> field. These may get flagged as "Line F" but should #
* really be flagged as "FPU Unimplemented". (This is a "feature" on #
* the '060. #
* #
*########################################################################
global _fpsp_fline
_fpsp_fline:
* check to see if this exception is a "FP Unimplemented Instruction"
* exception. if so, branch directly to that handler's entry point.
cmpi.w #$202c,$6(sp)
beq.l _fpsp_unimp
* check to see if the FPU is disabled. if so, jump to the OS entry
* point for that condition.
cmpi.w #$402c,$6(sp)
beq.l _real_fpu_disabled
* the exception was an "F-Line Illegal" exception. we check to see
* if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
* so, convert the F-Line exception stack frame to an FP Unimplemented
* Instruction exception stack frame else branch to the OS entry
* point for the F-Line exception handler.
link.w a6,#-__LOCAL___SIZE * init stack frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
move.l EXC_PC(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch instruction words
bfextu d0{#0:#10},d1 * is it an fmovecr?
cmpi.w #$03c8,d1
bne.b fline_fline * no
bfextu d0{#16:#6},d1 * is it an fmovecr?
cmpi.b #$17,d1
bne.b fline_fline * no
* it's an fmovecr w/ a non-zero <ea> that has entered through
* the F-Line Illegal exception.
* so, we need to convert the F-Line exception stack frame into an
* FP Unimplemented Instruction stack frame and jump to that entry
* point.
*
* but, if the FPU is disabled, then we need to jump to the FPU diabled
* entry point.
movec pcr,d0
btst #$1,d0
beq.b fline_fmovecr
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
sub.l #$8,sp * make room for "Next PC", <ea>
move.w $8(sp),(sp)
move.l $a(sp),$2(sp) * move "Current PC"
move.w #$402c,$6(sp)
move.l $2(sp),$c(sp)
addq.l #$4,$2(sp) * set "Next PC"
bra.l _real_fpu_disabled
fline_fmovecr:
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
fmove.l $2(sp),fpiar * set current PC
addq.l #$4,$2(sp) * set Next PC
move.l (sp),-(sp)
move.l $8(sp),$4(sp)
move.b #$20,$6(sp)
bra.l _fpsp_unimp
fline_fline:
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
bra.l _real_fline
*########################################################################
* XDEF **************************************************************** #
* _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
* Instruction" exception. #
* #
* This handler should be the first code executed upon taking the #
* FP Unimplemented Instruction exception in an operating system. #
* #
* XREF **************************************************************** #
* _imem_read_{word,long}() - read instruction word/longword #
* load_fop() - load src/dst ops from memory and/or FP regfile #
* store_fpreg() - store opclass 0 or 2 result to FP regfile #
* tbl_trans - addr of table of emulation routines for trnscndls #
* _real_access() - "callout" for access error exception #
* _fpsp_done() - "callout" for exit; work all done #
* _real_trace() - "callout" for Trace enabled exception #
* smovcr() - emulate "fmovecr" instruction #
* funimp_skew() - adjust fsave src ops to "incorrect" value #
* _ftrapcc() - emulate an "ftrapcc" instruction #
* _fdbcc() - emulate an "fdbcc" instruction #
* _fscc() - emulate an "fscc" instruction #
* _real_trap() - "callout" for Trap exception #
* _real_bsun() - "callout" for enabled Bsun exception #
* #
* INPUT *************************************************************** #
* - The system stack contains the "Unimplemented Instr" stk frame #
* #
* OUTPUT ************************************************************** #
* If access error: #
* - The system stack is changed to an access error stack frame #
* If Trace exception enabled: #
* - The system stack is changed to a Trace exception stack frame #
* Else: (normal case) #
* - Correct result has been stored as appropriate #
* #
* ALGORITHM *********************************************************** #
* There are two main cases of instructions that may enter here to #
* be emulated: (1) the FPgen instructions, most of which were also #
* unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
* For the first set, this handler calls the routine load_fop() #
* to load the source and destination (for dyadic) operands to be used #
* for instruction emulation. The correct emulation routine is then #
* chosen by decoding the instruction type and indexing into an #
* emulation subroutine index table. After emulation returns, this #
* handler checks to see if an exception should occur as a result of the #
* FP instruction emulation. If so, then an FP exception of the correct #
* type is inserted into the FPU state frame using the "frestore" #
* instruction before exiting through _fpsp_done(). In either the #
* exceptional or non-exceptional cases, we must check to see if the #
* Trace exception is enabled. If so, then we must create a Trace #
* exception frame from the current exception frame and exit through #
* _real_trace(). #
* For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
* _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
* may flag that a BSUN exception should be taken. If so, then the #
* current exception stack frame is converted into a BSUN exception #
* stack frame and an exit is made through _real_bsun(). If the #
* instruction was "ftrapcc" and a Trap exception should result, a Trap #
* exception stack frame is created from the current frame and an exit #
* is made through _real_trap(). If a Trace exception is pending, then #
* a Trace exception frame is created from the current frame and a jump #
* is made to _real_trace(). Finally, if none of these conditions exist, #
* then the handler exits though the callout _fpsp_done(). #
* #
* In any of the above scenarios, if a _mem_read() or _mem_write() #
* "callout" returns a failing value, then an access error stack frame #
* is created from the current stack frame and an exit is made through #
* _real_access(). #
* #
*########################################################################
*
* FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
*
* *****************
* * * => <ea> of fp unimp instr.
* - EA -
* * *
* *****************
* * 0x2 * 0x02c * => frame format and vector offset(vector #11)
* *****************
* * *
* - Next PC - => PC of instr to execute after exc handling
* * *
* *****************
* * SR * => SR at the time the exception was taken
* *****************
*
* Note: the !NULL bit does not get set in the fsave frame when the
* machine encounters an fp unimp exception. Therefore, it must be set
* before leaving this handler.
*
global _fpsp_unimp
_fpsp_unimp:
link.w a6,#-__LOCAL___SIZE * init stack frame
movem.l d0-d1/a0-a1,EXC_DREGS(a6) * save d0-d1/a0-a1
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) * save ctrl regs
fmovem.x fp0-fp1,EXC_FPREGS(a6) * save fp0-fp1
btst #$5,EXC_SR(a6) * user mode exception?
bne.b funimp_s * no; supervisor mode
* save the value of the user stack pointer onto the stack frame
funimp_u:
move.l usp,a0 * fetch user stack pointer
move.l a0,EXC_A7(a6) * store in stack frame
bra.b funimp_cont
* store the value of the supervisor stack pointer BEFORE the exc occurred.
* old_sp is address just above stacked effective address.
funimp_s:
lea 4+EXC_EA(a6),a0 * load old a7'
move.l a0,EXC_A7(a6) * store a7'
move.l a0,OLD_A7(a6) * make a copy
funimp_cont:
* the FPIAR holds the "current PC" of the faulting instruction.
move.l USER_FPIAR(a6),EXC_EXTWPTR(a6)
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$4,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_long * fetch the instruction words
move.l d0,EXC_OPWORD(a6)
*###########################################################################
fmove.l #$0,fpcr * clear FPCR
fmove.l #$0,fpsr * clear FPSR
clr.b SPCOND_FLG(a6) * clear "special case" flag
* Divide the fp instructions into 8 types based on the TYPE field in
* bits 6-8 of the opword(classes 6,7 are undefined).
* (for the '060, only two types can take this exception)
* bftst %d0{&7:&3} # test TYPE
btst #22,d0 * type 0 or 1 ?
bne.w funimp_misc * type 1
*########################################
* TYPE == 0: General instructions #
*########################################
funimp_gen:
clr.b STORE_FLG(a6) * clear "store result" flag
* clear the ccode byte and exception status byte
andi.l #$00ff00ff,USER_FPSR(a6)
bfextu d0{#16:#6},d1 * extract upper 6 of cmdreg
cmpi.b #$17,d1 * is op an fmovecr?
beq.w funimp_fmovecr * yes
funimp_gen_op:
bsr.l _load_fop * load
clr.l d0
move.b FPCR_MODE(a6),d0 * fetch rnd mode
move.b 1+EXC_CMDREG(a6),d1
andi.w #$003f,d1 * extract extension bits
lsl.w #$3,d1 * shift right 3 bits
or.b STAG(a6),d1 * insert src optag bits
lea FP_DST(a6),a1 * pass dst ptr in a1
lea FP_SRC(a6),a0 * pass src ptr in a0
move.w (tbl_trans.w,pc,d1.w*2),d1
jsr (tbl_trans.w,pc,d1.w*1) * emulate
funimp_fsave:
move.b FPCR_ENABLE(a6),d0 * fetch exceptions enabled
bne.w funimp_ena * some are enabled
funimp_store:
bfextu EXC_CMDREG(a6){#6:#3},d0 * fetch Dn
bsr.l store_fpreg * store result to fp regfile
funimp_gen_exit:
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
funimp_gen_exit_cmp:
cmpi.b #mia7_flg,SPCOND_FLG(a6) * was the ea mode (sp)+ ?
beq.b funimp_gen_exit_a7 * yes
cmpi.b #mda7_flg,SPCOND_FLG(a6) * was the ea mode -(sp) ?
beq.b funimp_gen_exit_a7 * yes
funimp_gen_exit_cont:
unlk a6
funimp_gen_exit_cont2:
btst #$7,(sp) * is trace on?
beq.l _fpsp_done * no
* this catches a problem with the case where an exception will be re-inserted
* into the machine. the frestore has already been executed...so, the fmov.l
* alone of the control register would trigger an unwanted exception.
* until I feel like fixing this, we'll sidestep the exception.
fsave -(sp)
fmove.l fpiar,$14(sp) * "Current PC" is in FPIAR
frestore (sp)+
move.w #$2024,$6(sp) * stk fmt = 0x2; voff = 0x24
bra.l _real_trace
funimp_gen_exit_a7:
btst #$5,EXC_SR(a6) * supervisor or user mode?
bne.b funimp_gen_exit_a7_s * supervisor
move.l a0,-(sp)
move.l EXC_A7(a6),a0
move.l a0,usp
move.l (sp)+,a0
bra.b funimp_gen_exit_cont
* if the instruction was executed from supervisor mode and the addressing
* mode was (a7)+, then the stack frame for the rte must be shifted "up"
* "n" bytes where "n" is the size of the src operand type.
* f<op>.{b,w,l,s,d,x,p}
funimp_gen_exit_a7_s:
move.l d0,-(sp) * save d0
move.l EXC_A7(a6),d0 * load new a7'
sub.l OLD_A7(a6),d0 * subtract old a7'
move.l $2+EXC_PC(a6),($2+EXC_PC.b,a6,d0.l) * shift stack frame
move.l EXC_SR(a6),(EXC_SR.b,a6,d0.l) * shift stack frame
move.w d0,EXC_SR(a6) * store incr number
move.l (sp)+,d0 * restore d0
unlk a6
add.w (sp),sp * stack frame shifted
bra.b funimp_gen_exit_cont2
*#####################
* fmovecr.x #ccc,fpn #
*#####################
funimp_fmovecr:
clr.l d0
move.b FPCR_MODE(a6),d0
move.b 1+EXC_CMDREG(a6),d1
andi.l #$0000007f,d1 * pass rom offset in d1
bsr.l smovecr
bra.w funimp_fsave
*########################################################################
*
* the user has enabled some exceptions. we figure not to see this too
* often so that's why it gets lower priority.
*
funimp_ena:
* was an exception set that was also enabled?
and.b FPSR_EXCEPT(a6),d0 * keep only ones enabled and set
bfffo d0{#24:#8},d0 * find highest priority exception
bne.b funimp_exc * at least one was set
* no exception that was enabled was set BUT if we got an exact overflow
* and overflow wasn't enabled but inexact was (yech!) then this is
* an inexact exception; otherwise, return to normal non-exception flow.
btst #ovfl_bit,FPSR_EXCEPT(a6) * did overflow occur?
beq.w funimp_store * no; return to normal flow
* the overflow w/ exact result happened but was inexact set in the FPCR?
funimp_ovfl:
btst #inex2_bit,FPCR_ENABLE(a6) * is inexact enabled?
beq.w funimp_store * no; return to normal flow
bra.b funimp_exc_ovfl * yes
* some exception happened that was actually enabled.
* we'll insert this new exception into the FPU and then return.
funimp_exc:
subi.l #24,d0 * fix offset to be 0-8
cmpi.b #$6,d0 * is exception INEX?
bne.b funimp_exc_force * no
* the enabled exception was inexact. so, if it occurs with an overflow
* or underflow that was disabled, then we have to force an overflow or
* underflow frame. the eventual overflow or underflow handler will see that
* it's actually an inexact and act appropriately. this is the only easy
* way to have the EXOP available for the enabled inexact handler when
* a disabled overflow or underflow has also happened.
btst #ovfl_bit,FPSR_EXCEPT(a6) * did overflow occur?
bne.b funimp_exc_ovfl * yes
btst #unfl_bit,FPSR_EXCEPT(a6) * did underflow occur?
bne.b funimp_exc_unfl * yes
* force the fsave exception status bits to signal an exception of the
* appropriate type. don't forget to "skew" the source operand in case we
* "unskewed" the one the hardware initially gave us.
funimp_exc_force:
move.l d0,-(sp) * save d0
bsr.l funimp_skew * check for special case
move.l (sp)+,d0 * restore d0
move.w (tbl_funimp_except.b,pc,d0.w*2),2+FP_SRC(a6)
bra.b funimp_gen_exit2 * exit with frestore
tbl_funimp_except:
.dc.w $e002,$e006,$e004,$e005
.dc.w $e003,$e002,$e001,$e001
* insert an overflow frame
funimp_exc_ovfl:
bsr.l funimp_skew * check for special case
move.w #$e005,2+FP_SRC(a6)
bra.b funimp_gen_exit2
* insert an underflow frame
funimp_exc_unfl:
bsr.l funimp_skew * check for special case
move.w #$e003,2+FP_SRC(a6)
* this is the general exit point for an enabled exception that will be
* restored into the machine for the instruction just emulated.
funimp_gen_exit2:
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * insert exceptional status
bra.w funimp_gen_exit_cmp
*###########################################################################
*
* TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
*
* These instructions were implemented on the '881/2 and '040 in hardware but
* are emulated in software on the '060.
*
funimp_misc:
bfextu d0{#10:#3},d1 * extract mode field
cmpi.b #$1,d1 * is it an fdb<cc>?
beq.w funimp_fdbcc * yes
cmpi.b #$7,d1 * is it an fs<cc>?
bne.w funimp_fscc * yes
bfextu d0{#13:#3},d1
cmpi.b #$2,d1 * is it an fs<cc>?
blt.w funimp_fscc * yes
*########################
* ftrap<cc> #
* ftrap<cc>.w #<data> #
* ftrap<cc>.l #<data> #
*########################
funimp_ftrapcc:
bsr.l _ftrapcc * FTRAP<cc>()
cmpi.b #fbsun_flg,SPCOND_FLG(a6) * is enabled bsun occurring?
beq.w funimp_bsun * yes
cmpi.b #ftrapcc_flg,SPCOND_FLG(a6) * should a trap occur?
bne.w funimp_done * no
* FP UNIMP FRAME TRAP FRAME
* ***************** *****************
* ** <EA> ** ** Current PC **
* ***************** *****************
* * 0x2 * 0x02c * * 0x2 * 0x01c *
* ***************** *****************
* ** Next PC ** ** Next PC **
* ***************** *****************
* * SR * * SR *
* ***************** *****************
* (6 words) (6 words)
*
* the ftrapcc instruction should take a trap. so, here we must create a
* trap stack frame from an unimplemented fp instruction stack frame and
* jump to the user supplied entry point for the trap exception
funimp_ftrapcc_tp:
move.l USER_FPIAR(a6),EXC_EA(a6) * Address = Current PC
move.w #$201c,EXC_VOFF(a6) * Vector Offset = 0x01c
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
bra.l _real_trap
*########################
* fdb<cc> Dn,<label> #
*########################
funimp_fdbcc:
move.l EXC_EXTWPTR(a6),a0 * fetch instruction addr
addq.l #$2,EXC_EXTWPTR(a6) * incr instruction ptr
bsr.l _imem_read_word * read displacement
tst.l d1 * did ifetch fail?
bne.w funimp_iacc * yes
ext.l d0 * sign extend displacement
bsr.l _fdbcc * FDB<cc>()
cmpi.b #fbsun_flg,SPCOND_FLG(a6) * is enabled bsun occurring?
beq.w funimp_bsun
bra.w funimp_done * branch to finish
*################
* fs<cc>.b <ea> #
*################
funimp_fscc:
bsr.l _fscc * FS<cc>()
* I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
* does not need to update "An" before taking a bsun exception.
cmpi.b #fbsun_flg,SPCOND_FLG(a6) * is enabled bsun occurring?
beq.w funimp_bsun
btst #$5,EXC_SR(a6) * yes; is it a user mode exception?
bne.b funimp_fscc_s * no
funimp_fscc_u:
move.l EXC_A7(a6),a0 * yes; set new USP
move.l a0,usp
bra.w funimp_done * branch to finish
* remember, I'm assuming that post-increment is bogus...(it IS!!!)
* so, the least significant WORD of the stacked effective address got
* overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
* so that the rte will work correctly without destroying the result.
* even though the operation size is byte, the stack ptr is decr by 2.
*
* remember, also, this instruction may be traced.
funimp_fscc_s:
cmpi.b #mda7_flg,SPCOND_FLG(a6) * was a7 modified?
bne.w funimp_done * no
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
btst #$7,(sp) * is trace enabled?
bne.b funimp_fscc_s_trace * yes
subq.l #$2,sp
move.l $2(sp),(sp) * shift SR,hi(PC) "down"
move.l $6(sp),$4(sp) * shift lo(PC),voff "down"
bra.l _fpsp_done
funimp_fscc_s_trace:
subq.l #$2,sp
move.l $2(sp),(sp) * shift SR,hi(PC) "down"
move.w $6(sp),$4(sp) * shift lo(PC)
move.w #$2024,$6(sp) * fmt/voff = $2024
fmove.l fpiar,$8(sp) * insert "current PC"
bra.l _real_trace
*
* The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
* the fp unimplemented instruction exception stack frame into a bsun stack frame,
* restore a bsun exception into the machine, and branch to the user
* supplied bsun hook.
*
* FP UNIMP FRAME BSUN FRAME
* ***************** *****************
* ** <EA> ** * 0x0 * 0x0c0 *
* ***************** *****************
* * 0x2 * 0x02c * ** Current PC **
* ***************** *****************
* ** Next PC ** * SR *
* ***************** *****************
* * SR * (4 words)
* *****************
* (6 words)
*
funimp_bsun:
move.w #$00c0,2+EXC_EA(a6) * Fmt = 0x0; Vector Offset = 0x0c0
move.l USER_FPIAR(a6),EXC_VOFF(a6) * PC = Current PC
move.w EXC_SR(a6),2+EXC_PC(a6) * shift SR "up"
move.w #$e000,2+FP_SRC(a6) * bsun exception enabled
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
frestore FP_SRC(a6) * restore bsun exception
unlk a6
addq.l #$4,sp * erase sludge
bra.l _real_bsun * branch to user bsun hook
*
* all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
* and return.
*
* as usual, we have to check for trace mode being on here. since instructions
* modifying the supervisor stack frame don't pass through here, this is a
* relatively easy task.
*
funimp_done:
fmovem.x EXC_FP0(a6),fp0-fp1 * restore fp0-fp1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
unlk a6
btst #$7,(sp) * is trace enabled?
bne.b funimp_trace * yes
bra.l _fpsp_done
* FP UNIMP FRAME TRACE FRAME
* ***************** *****************
* ** <EA> ** ** Current PC **
* ***************** *****************
* * 0x2 * 0x02c * * 0x2 * 0x024 *
* ***************** *****************
* ** Next PC ** ** Next PC **
* ***************** *****************
* * SR * * SR *
* ***************** *****************
* (6 words) (6 words)
*
* the fscc instruction should take a trace trap. so, here we must create a
* trace stack frame from an unimplemented fp instruction stack frame and
* jump to the user supplied entry point for the trace exception
funimp_trace:
fmove.l fpiar,$8(sp) * current PC is in fpiar
move.b #$24,$7(sp) * vector offset = 0x024
bra.l _real_trace
*###############################################################
global tbl_trans
.dc.w $4AFC,$1c0
tbl_trans:
.dc.w tbl_trans-tbl_trans * $00-0 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-1 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-2 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-3 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-4 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-5 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-6 fmovecr all
.dc.w tbl_trans-tbl_trans * $00-7 fmovecr all
.dc.w tbl_trans-tbl_trans * $01-0 fint norm
.dc.w tbl_trans-tbl_trans * $01-1 fint zero
.dc.w tbl_trans-tbl_trans * $01-2 fint inf
.dc.w tbl_trans-tbl_trans * $01-3 fint qnan
.dc.w tbl_trans-tbl_trans * $01-5 fint denorm
.dc.w tbl_trans-tbl_trans * $01-4 fint snan
.dc.w tbl_trans-tbl_trans * $01-6 fint unnorm
.dc.w tbl_trans-tbl_trans * $01-7 ERROR
.dc.w ssinh-tbl_trans * $02-0 fsinh norm
.dc.w src_zero-tbl_trans * $02-1 fsinh zero
.dc.w src_inf-tbl_trans * $02-2 fsinh inf
.dc.w src_qnan-tbl_trans * $02-3 fsinh qnan
.dc.w ssinhd-tbl_trans * $02-5 fsinh denorm
.dc.w src_snan-tbl_trans * $02-4 fsinh snan
.dc.w tbl_trans-tbl_trans * $02-6 fsinh unnorm
.dc.w tbl_trans-tbl_trans * $02-7 ERROR
.dc.w tbl_trans-tbl_trans * $03-0 fintrz norm
.dc.w tbl_trans-tbl_trans * $03-1 fintrz zero
.dc.w tbl_trans-tbl_trans * $03-2 fintrz inf
.dc.w tbl_trans-tbl_trans * $03-3 fintrz qnan
.dc.w tbl_trans-tbl_trans * $03-5 fintrz denorm
.dc.w tbl_trans-tbl_trans * $03-4 fintrz snan
.dc.w tbl_trans-tbl_trans * $03-6 fintrz unnorm
.dc.w tbl_trans-tbl_trans * $03-7 ERROR
.dc.w tbl_trans-tbl_trans * $04-0 fsqrt norm
.dc.w tbl_trans-tbl_trans * $04-1 fsqrt zero
.dc.w tbl_trans-tbl_trans * $04-2 fsqrt inf
.dc.w tbl_trans-tbl_trans * $04-3 fsqrt qnan
.dc.w tbl_trans-tbl_trans * $04-5 fsqrt denorm
.dc.w tbl_trans-tbl_trans * $04-4 fsqrt snan
.dc.w tbl_trans-tbl_trans * $04-6 fsqrt unnorm
.dc.w tbl_trans-tbl_trans * $04-7 ERROR
.dc.w tbl_trans-tbl_trans * $05-0 ERROR
.dc.w tbl_trans-tbl_trans * $05-1 ERROR
.dc.w tbl_trans-tbl_trans * $05-2 ERROR
.dc.w tbl_trans-tbl_trans * $05-3 ERROR
.dc.w tbl_trans-tbl_trans * $05-4 ERROR
.dc.w tbl_trans-tbl_trans * $05-5 ERROR
.dc.w tbl_trans-tbl_trans * $05-6 ERROR
.dc.w tbl_trans-tbl_trans * $05-7 ERROR
.dc.w slognp1-tbl_trans * $06-0 flognp1 norm
.dc.w src_zero-tbl_trans * $06-1 flognp1 zero
.dc.w sopr_inf-tbl_trans * $06-2 flognp1 inf
.dc.w src_qnan-tbl_trans * $06-3 flognp1 qnan
.dc.w slognp1d-tbl_trans * $06-5 flognp1 denorm
.dc.w src_snan-tbl_trans * $06-4 flognp1 snan
.dc.w tbl_trans-tbl_trans * $06-6 flognp1 unnorm
.dc.w tbl_trans-tbl_trans * $06-7 ERROR
.dc.w tbl_trans-tbl_trans * $07-0 ERROR
.dc.w tbl_trans-tbl_trans * $07-1 ERROR
.dc.w tbl_trans-tbl_trans * $07-2 ERROR
.dc.w tbl_trans-tbl_trans * $07-3 ERROR
.dc.w tbl_trans-tbl_trans * $07-4 ERROR
.dc.w tbl_trans-tbl_trans * $07-5 ERROR
.dc.w tbl_trans-tbl_trans * $07-6 ERROR
.dc.w tbl_trans-tbl_trans * $07-7 ERROR
.dc.w setoxm1-tbl_trans * $08-0 fetoxm1 norm
.dc.w src_zero-tbl_trans * $08-1 fetoxm1 zero
.dc.w setoxm1i-tbl_trans * $08-2 fetoxm1 inf
.dc.w src_qnan-tbl_trans * $08-3 fetoxm1 qnan
.dc.w setoxm1d-tbl_trans * $08-5 fetoxm1 denorm
.dc.w src_snan-tbl_trans * $08-4 fetoxm1 snan
.dc.w tbl_trans-tbl_trans * $08-6 fetoxm1 unnorm
.dc.w tbl_trans-tbl_trans * $08-7 ERROR
.dc.w stanh-tbl_trans * $09-0 ftanh norm
.dc.w src_zero-tbl_trans * $09-1 ftanh zero
.dc.w src_one-tbl_trans * $09-2 ftanh inf
.dc.w src_qnan-tbl_trans * $09-3 ftanh qnan
.dc.w stanhd-tbl_trans * $09-5 ftanh denorm
.dc.w src_snan-tbl_trans * $09-4 ftanh snan
.dc.w tbl_trans-tbl_trans * $09-6 ftanh unnorm
.dc.w tbl_trans-tbl_trans * $09-7 ERROR
.dc.w satan-tbl_trans * $0a-0 fatan norm
.dc.w src_zero-tbl_trans * $0a-1 fatan zero
.dc.w spi_2-tbl_trans * $0a-2 fatan inf
.dc.w src_qnan-tbl_trans * $0a-3 fatan qnan
.dc.w satand-tbl_trans * $0a-5 fatan denorm
.dc.w src_snan-tbl_trans * $0a-4 fatan snan
.dc.w tbl_trans-tbl_trans * $0a-6 fatan unnorm
.dc.w tbl_trans-tbl_trans * $0a-7 ERROR
.dc.w tbl_trans-tbl_trans * $0b-0 ERROR
.dc.w tbl_trans-tbl_trans * $0b-1 ERROR
.dc.w tbl_trans-tbl_trans * $0b-2 ERROR
.dc.w tbl_trans-tbl_trans * $0b-3 ERROR
.dc.w tbl_trans-tbl_trans * $0b-4 ERROR
.dc.w tbl_trans-tbl_trans * $0b-5 ERROR
.dc.w tbl_trans-tbl_trans * $0b-6 ERROR
.dc.w tbl_trans-tbl_trans * $0b-7 ERROR
.dc.w sasin-tbl_trans * $0c-0 fasin norm
.dc.w src_zero-tbl_trans * $0c-1 fasin zero
.dc.w t_operr-tbl_trans * $0c-2 fasin inf
.dc.w src_qnan-tbl_trans * $0c-3 fasin qnan
.dc.w sasind-tbl_trans * $0c-5 fasin denorm
.dc.w src_snan-tbl_trans * $0c-4 fasin snan
.dc.w tbl_trans-tbl_trans * $0c-6 fasin unnorm
.dc.w tbl_trans-tbl_trans * $0c-7 ERROR
.dc.w satanh-tbl_trans * $0d-0 fatanh norm
.dc.w src_zero-tbl_trans * $0d-1 fatanh zero
.dc.w t_operr-tbl_trans * $0d-2 fatanh inf
.dc.w src_qnan-tbl_trans * $0d-3 fatanh qnan
.dc.w satanhd-tbl_trans * $0d-5 fatanh denorm
.dc.w src_snan-tbl_trans * $0d-4 fatanh snan
.dc.w tbl_trans-tbl_trans * $0d-6 fatanh unnorm
.dc.w tbl_trans-tbl_trans * $0d-7 ERROR
.dc.w ssin-tbl_trans * $0e-0 fsin norm
.dc.w src_zero-tbl_trans * $0e-1 fsin zero
.dc.w t_operr-tbl_trans * $0e-2 fsin inf
.dc.w src_qnan-tbl_trans * $0e-3 fsin qnan
.dc.w ssind-tbl_trans * $0e-5 fsin denorm
.dc.w src_snan-tbl_trans * $0e-4 fsin snan
.dc.w tbl_trans-tbl_trans * $0e-6 fsin unnorm
.dc.w tbl_trans-tbl_trans * $0e-7 ERROR
.dc.w stan-tbl_trans * $0f-0 ftan norm
.dc.w src_zero-tbl_trans * $0f-1 ftan zero
.dc.w t_operr-tbl_trans * $0f-2 ftan inf
.dc.w src_qnan-tbl_trans * $0f-3 ftan qnan
.dc.w stand-tbl_trans * $0f-5 ftan denorm
.dc.w src_snan-tbl_trans * $0f-4 ftan snan
.dc.w tbl_trans-tbl_trans * $0f-6 ftan unnorm
.dc.w tbl_trans-tbl_trans * $0f-7 ERROR
.dc.w setox-tbl_trans * $10-0 fetox norm
.dc.w ld_pone-tbl_trans * $10-1 fetox zero
.dc.w szr_inf-tbl_trans * $10-2 fetox inf
.dc.w src_qnan-tbl_trans * $10-3 fetox qnan
.dc.w setoxd-tbl_trans * $10-5 fetox denorm
.dc.w src_snan-tbl_trans * $10-4 fetox snan
.dc.w tbl_trans-tbl_trans * $10-6 fetox unnorm
.dc.w tbl_trans-tbl_trans * $10-7 ERROR
.dc.w stwotox-tbl_trans * $11-0 ftwotox norm
.dc.w ld_pone-tbl_trans * $11-1 ftwotox zero
.dc.w szr_inf-tbl_trans * $11-2 ftwotox inf
.dc.w src_qnan-tbl_trans * $11-3 ftwotox qnan
.dc.w stwotoxd-tbl_trans * $11-5 ftwotox denorm
.dc.w src_snan-tbl_trans * $11-4 ftwotox snan
.dc.w tbl_trans-tbl_trans * $11-6 ftwotox unnorm
.dc.w tbl_trans-tbl_trans * $11-7 ERROR
.dc.w stentox-tbl_trans * $12-0 ftentox norm
.dc.w ld_pone-tbl_trans * $12-1 ftentox zero
.dc.w szr_inf-tbl_trans * $12-2 ftentox inf
.dc.w src_qnan-tbl_trans * $12-3 ftentox qnan
.dc.w stentoxd-tbl_trans * $12-5 ftentox denorm
.dc.w src_snan-tbl_trans * $12-4 ftentox snan
.dc.w tbl_trans-tbl_trans * $12-6 ftentox unnorm
.dc.w tbl_trans-tbl_trans * $12-7 ERROR
.dc.w tbl_trans-tbl_trans * $13-0 ERROR
.dc.w tbl_trans-tbl_trans * $13-1 ERROR
.dc.w tbl_trans-tbl_trans * $13-2 ERROR
.dc.w tbl_trans-tbl_trans * $13-3 ERROR
.dc.w tbl_trans-tbl_trans * $13-4 ERROR
.dc.w tbl_trans-tbl_trans * $13-5 ERROR
.dc.w tbl_trans-tbl_trans * $13-6 ERROR
.dc.w tbl_trans-tbl_trans * $13-7 ERROR
.dc.w slogn-tbl_trans * $14-0 flogn norm
.dc.w t_dz2-tbl_trans * $14-1 flogn zero
.dc.w sopr_inf-tbl_trans * $14-2 flogn inf
.dc.w src_qnan-tbl_trans * $14-3 flogn qnan
.dc.w slognd-tbl_trans * $14-5 flogn denorm
.dc.w src_snan-tbl_trans * $14-4 flogn snan
.dc.w tbl_trans-tbl_trans * $14-6 flogn unnorm
.dc.w tbl_trans-tbl_trans * $14-7 ERROR
.dc.w slog10-tbl_trans * $15-0 flog10 norm
.dc.w t_dz2-tbl_trans * $15-1 flog10 zero
.dc.w sopr_inf-tbl_trans * $15-2 flog10 inf
.dc.w src_qnan-tbl_trans * $15-3 flog10 qnan
.dc.w slog10d-tbl_trans * $15-5 flog10 denorm
.dc.w src_snan-tbl_trans * $15-4 flog10 snan
.dc.w tbl_trans-tbl_trans * $15-6 flog10 unnorm
.dc.w tbl_trans-tbl_trans * $15-7 ERROR
.dc.w slog2-tbl_trans * $16-0 flog2 norm
.dc.w t_dz2-tbl_trans * $16-1 flog2 zero
.dc.w sopr_inf-tbl_trans * $16-2 flog2 inf
.dc.w src_qnan-tbl_trans * $16-3 flog2 qnan
.dc.w slog2d-tbl_trans * $16-5 flog2 denorm
.dc.w src_snan-tbl_trans * $16-4 flog2 snan
.dc.w tbl_trans-tbl_trans * $16-6 flog2 unnorm
.dc.w tbl_trans-tbl_trans * $16-7 ERROR
.dc.w tbl_trans-tbl_trans * $17-0 ERROR
.dc.w tbl_trans-tbl_trans * $17-1 ERROR
.dc.w tbl_trans-tbl_trans * $17-2 ERROR
.dc.w tbl_trans-tbl_trans * $17-3 ERROR
.dc.w tbl_trans-tbl_trans * $17-4 ERROR
.dc.w tbl_trans-tbl_trans * $17-5 ERROR
.dc.w tbl_trans-tbl_trans * $17-6 ERROR
.dc.w tbl_trans-tbl_trans * $17-7 ERROR
.dc.w tbl_trans-tbl_trans * $18-0 fabs norm
.dc.w tbl_trans-tbl_trans * $18-1 fabs zero
.dc.w tbl_trans-tbl_trans * $18-2 fabs inf
.dc.w tbl_trans-tbl_trans * $18-3 fabs qnan
.dc.w tbl_trans-tbl_trans * $18-5 fabs denorm
.dc.w tbl_trans-tbl_trans * $18-4 fabs snan
.dc.w tbl_trans-tbl_trans * $18-6 fabs unnorm
.dc.w tbl_trans-tbl_trans * $18-7 ERROR
.dc.w scosh-tbl_trans * $19-0 fcosh norm
.dc.w ld_pone-tbl_trans * $19-1 fcosh zero
.dc.w ld_pinf-tbl_trans * $19-2 fcosh inf
.dc.w src_qnan-tbl_trans * $19-3 fcosh qnan
.dc.w scoshd-tbl_trans * $19-5 fcosh denorm
.dc.w src_snan-tbl_trans * $19-4 fcosh snan
.dc.w tbl_trans-tbl_trans * $19-6 fcosh unnorm
.dc.w tbl_trans-tbl_trans * $19-7 ERROR
.dc.w tbl_trans-tbl_trans * $1a-0 fneg norm
.dc.w tbl_trans-tbl_trans * $1a-1 fneg zero
.dc.w tbl_trans-tbl_trans * $1a-2 fneg inf
.dc.w tbl_trans-tbl_trans * $1a-3 fneg qnan
.dc.w tbl_trans-tbl_trans * $1a-5 fneg denorm
.dc.w tbl_trans-tbl_trans * $1a-4 fneg snan
.dc.w tbl_trans-tbl_trans * $1a-6 fneg unnorm
.dc.w tbl_trans-tbl_trans * $1a-7 ERROR
.dc.w tbl_trans-tbl_trans * $1b-0 ERROR
.dc.w tbl_trans-tbl_trans * $1b-1 ERROR
.dc.w tbl_trans-tbl_trans * $1b-2 ERROR
.dc.w tbl_trans-tbl_trans * $1b-3 ERROR
.dc.w tbl_trans-tbl_trans * $1b-4 ERROR
.dc.w tbl_trans-tbl_trans * $1b-5 ERROR
.dc.w tbl_trans-tbl_trans * $1b-6 ERROR
.dc.w tbl_trans-tbl_trans * $1b-7 ERROR
.dc.w sacos-tbl_trans * $1c-0 facos norm
.dc.w ld_ppi2-tbl_trans * $1c-1 facos zero
.dc.w t_operr-tbl_trans * $1c-2 facos inf
.dc.w src_qnan-tbl_trans * $1c-3 facos qnan
.dc.w sacosd-tbl_trans * $1c-5 facos denorm
.dc.w src_snan-tbl_trans * $1c-4 facos snan
.dc.w tbl_trans-tbl_trans * $1c-6 facos unnorm
.dc.w tbl_trans-tbl_trans * $1c-7 ERROR
.dc.w scos-tbl_trans * $1d-0 fcos norm
.dc.w ld_pone-tbl_trans * $1d-1 fcos zero
.dc.w t_operr-tbl_trans * $1d-2 fcos inf
.dc.w src_qnan-tbl_trans * $1d-3 fcos qnan
.dc.w scosd-tbl_trans * $1d-5 fcos denorm
.dc.w src_snan-tbl_trans * $1d-4 fcos snan
.dc.w tbl_trans-tbl_trans * $1d-6 fcos unnorm
.dc.w tbl_trans-tbl_trans * $1d-7 ERROR
.dc.w sgetexp-tbl_trans * $1e-0 fgetexp norm
.dc.w src_zero-tbl_trans * $1e-1 fgetexp zero
.dc.w t_operr-tbl_trans * $1e-2 fgetexp inf
.dc.w src_qnan-tbl_trans * $1e-3 fgetexp qnan
.dc.w sgetexpd-tbl_trans * $1e-5 fgetexp denorm
.dc.w src_snan-tbl_trans * $1e-4 fgetexp snan
.dc.w tbl_trans-tbl_trans * $1e-6 fgetexp unnorm
.dc.w tbl_trans-tbl_trans * $1e-7 ERROR
.dc.w sgetman-tbl_trans * $1f-0 fgetman norm
.dc.w src_zero-tbl_trans * $1f-1 fgetman zero
.dc.w t_operr-tbl_trans * $1f-2 fgetman inf
.dc.w src_qnan-tbl_trans * $1f-3 fgetman qnan
.dc.w sgetmand-tbl_trans * $1f-5 fgetman denorm
.dc.w src_snan-tbl_trans * $1f-4 fgetman snan
.dc.w tbl_trans-tbl_trans * $1f-6 fgetman unnorm
.dc.w tbl_trans-tbl_trans * $1f-7 ERROR
.dc.w tbl_trans-tbl_trans * $20-0 fdiv norm
.dc.w tbl_trans-tbl_trans * $20-1 fdiv zero
.dc.w tbl_trans-tbl_trans * $20-2 fdiv inf
.dc.w tbl_trans-tbl_trans * $20-3 fdiv qnan
.dc.w tbl_trans-tbl_trans * $20-5 fdiv denorm
.dc.w tbl_trans-tbl_trans * $20-4 fdiv snan
.dc.w tbl_trans-tbl_trans * $20-6 fdiv unnorm
.dc.w tbl_trans-tbl_trans * $20-7 ERROR
.dc.w smod_snorm-tbl_trans * $21-0 fmod norm
.dc.w smod_szero-tbl_trans * $21-1 fmod zero
.dc.w smod_sinf-tbl_trans * $21-2 fmod inf
.dc.w sop_sqnan-tbl_trans * $21-3 fmod qnan
.dc.w smod_sdnrm-tbl_trans * $21-5 fmod denorm
.dc.w sop_ssnan-tbl_trans * $21-4 fmod snan
.dc.w tbl_trans-tbl_trans * $21-6 fmod unnorm
.dc.w tbl_trans-tbl_trans * $21-7 ERROR
.dc.w tbl_trans-tbl_trans * $22-0 fadd norm
.dc.w tbl_trans-tbl_trans * $22-1 fadd zero
.dc.w tbl_trans-tbl_trans * $22-2 fadd inf
.dc.w tbl_trans-tbl_trans * $22-3 fadd qnan
.dc.w tbl_trans-tbl_trans * $22-5 fadd denorm
.dc.w tbl_trans-tbl_trans * $22-4 fadd snan
.dc.w tbl_trans-tbl_trans * $22-6 fadd unnorm
.dc.w tbl_trans-tbl_trans * $22-7 ERROR
.dc.w tbl_trans-tbl_trans * $23-0 fmul norm
.dc.w tbl_trans-tbl_trans * $23-1 fmul zero
.dc.w tbl_trans-tbl_trans * $23-2 fmul inf
.dc.w tbl_trans-tbl_trans * $23-3 fmul qnan
.dc.w tbl_trans-tbl_trans * $23-5 fmul denorm
.dc.w tbl_trans-tbl_trans * $23-4 fmul snan
.dc.w tbl_trans-tbl_trans * $23-6 fmul unnorm
.dc.w tbl_trans-tbl_trans * $23-7 ERROR
.dc.w tbl_trans-tbl_trans * $24-0 fsgldiv norm
.dc.w tbl_trans-tbl_trans * $24-1 fsgldiv zero
.dc.w tbl_trans-tbl_trans * $24-2 fsgldiv inf
.dc.w tbl_trans-tbl_trans * $24-3 fsgldiv qnan
.dc.w tbl_trans-tbl_trans * $24-5 fsgldiv denorm
.dc.w tbl_trans-tbl_trans * $24-4 fsgldiv snan
.dc.w tbl_trans-tbl_trans * $24-6 fsgldiv unnorm
.dc.w tbl_trans-tbl_trans * $24-7 ERROR
.dc.w srem_snorm-tbl_trans * $25-0 frem norm
.dc.w srem_szero-tbl_trans * $25-1 frem zero
.dc.w srem_sinf-tbl_trans * $25-2 frem inf
.dc.w sop_sqnan-tbl_trans * $25-3 frem qnan
.dc.w srem_sdnrm-tbl_trans * $25-5 frem denorm
.dc.w sop_ssnan-tbl_trans * $25-4 frem snan
.dc.w tbl_trans-tbl_trans * $25-6 frem unnorm
.dc.w tbl_trans-tbl_trans * $25-7 ERROR
.dc.w sscale_snorm-tbl_trans * $26-0 fscale norm
.dc.w sscale_szero-tbl_trans * $26-1 fscale zero
.dc.w sscale_sinf-tbl_trans * $26-2 fscale inf
.dc.w sop_sqnan-tbl_trans * $26-3 fscale qnan
.dc.w sscale_sdnrm-tbl_trans * $26-5 fscale denorm
.dc.w sop_ssnan-tbl_trans * $26-4 fscale snan
.dc.w tbl_trans-tbl_trans * $26-6 fscale unnorm
.dc.w tbl_trans-tbl_trans * $26-7 ERROR
.dc.w tbl_trans-tbl_trans * $27-0 fsglmul norm
.dc.w tbl_trans-tbl_trans * $27-1 fsglmul zero
.dc.w tbl_trans-tbl_trans * $27-2 fsglmul inf
.dc.w tbl_trans-tbl_trans * $27-3 fsglmul qnan
.dc.w tbl_trans-tbl_trans * $27-5 fsglmul denorm
.dc.w tbl_trans-tbl_trans * $27-4 fsglmul snan
.dc.w tbl_trans-tbl_trans * $27-6 fsglmul unnorm
.dc.w tbl_trans-tbl_trans * $27-7 ERROR
.dc.w tbl_trans-tbl_trans * $28-0 fsub norm
.dc.w tbl_trans-tbl_trans * $28-1 fsub zero
.dc.w tbl_trans-tbl_trans * $28-2 fsub inf
.dc.w tbl_trans-tbl_trans * $28-3 fsub qnan
.dc.w tbl_trans-tbl_trans * $28-5 fsub denorm
.dc.w tbl_trans-tbl_trans * $28-4 fsub snan
.dc.w tbl_trans-tbl_trans * $28-6 fsub unnorm
.dc.w tbl_trans-tbl_trans * $28-7 ERROR
.dc.w tbl_trans-tbl_trans * $29-0 ERROR
.dc.w tbl_trans-tbl_trans * $29-1 ERROR
.dc.w tbl_trans-tbl_trans * $29-2 ERROR
.dc.w tbl_trans-tbl_trans * $29-3 ERROR
.dc.w tbl_trans-tbl_trans * $29-4 ERROR
.dc.w tbl_trans-tbl_trans * $29-5 ERROR
.dc.w tbl_trans-tbl_trans * $29-6 ERROR
.dc.w tbl_trans-tbl_trans * $29-7 ERROR
.dc.w tbl_trans-tbl_trans * $2a-0 ERROR
.dc.w tbl_trans-tbl_trans * $2a-1 ERROR
.dc.w tbl_trans-tbl_trans * $2a-2 ERROR
.dc.w tbl_trans-tbl_trans * $2a-3 ERROR
.dc.w tbl_trans-tbl_trans * $2a-4 ERROR
.dc.w tbl_trans-tbl_trans * $2a-5 ERROR
.dc.w tbl_trans-tbl_trans * $2a-6 ERROR
.dc.w tbl_trans-tbl_trans * $2a-7 ERROR
.dc.w tbl_trans-tbl_trans * $2b-0 ERROR
.dc.w tbl_trans-tbl_trans * $2b-1 ERROR
.dc.w tbl_trans-tbl_trans * $2b-2 ERROR
.dc.w tbl_trans-tbl_trans * $2b-3 ERROR
.dc.w tbl_trans-tbl_trans * $2b-4 ERROR
.dc.w tbl_trans-tbl_trans * $2b-5 ERROR
.dc.w tbl_trans-tbl_trans * $2b-6 ERROR
.dc.w tbl_trans-tbl_trans * $2b-7 ERROR
.dc.w tbl_trans-tbl_trans * $2c-0 ERROR
.dc.w tbl_trans-tbl_trans * $2c-1 ERROR
.dc.w tbl_trans-tbl_trans * $2c-2 ERROR
.dc.w tbl_trans-tbl_trans * $2c-3 ERROR
.dc.w tbl_trans-tbl_trans * $2c-4 ERROR
.dc.w tbl_trans-tbl_trans * $2c-5 ERROR
.dc.w tbl_trans-tbl_trans * $2c-6 ERROR
.dc.w tbl_trans-tbl_trans * $2c-7 ERROR
.dc.w tbl_trans-tbl_trans * $2d-0 ERROR
.dc.w tbl_trans-tbl_trans * $2d-1 ERROR
.dc.w tbl_trans-tbl_trans * $2d-2 ERROR
.dc.w tbl_trans-tbl_trans * $2d-3 ERROR
.dc.w tbl_trans-tbl_trans * $2d-4 ERROR
.dc.w tbl_trans-tbl_trans * $2d-5 ERROR
.dc.w tbl_trans-tbl_trans * $2d-6 ERROR
.dc.w tbl_trans-tbl_trans * $2d-7 ERROR
.dc.w tbl_trans-tbl_trans * $2e-0 ERROR
.dc.w tbl_trans-tbl_trans * $2e-1 ERROR
.dc.w tbl_trans-tbl_trans * $2e-2 ERROR
.dc.w tbl_trans-tbl_trans * $2e-3 ERROR
.dc.w tbl_trans-tbl_trans * $2e-4 ERROR
.dc.w tbl_trans-tbl_trans * $2e-5 ERROR
.dc.w tbl_trans-tbl_trans * $2e-6 ERROR
.dc.w tbl_trans-tbl_trans * $2e-7 ERROR
.dc.w tbl_trans-tbl_trans * $2f-0 ERROR
.dc.w tbl_trans-tbl_trans * $2f-1 ERROR
.dc.w tbl_trans-tbl_trans * $2f-2 ERROR
.dc.w tbl_trans-tbl_trans * $2f-3 ERROR
.dc.w tbl_trans-tbl_trans * $2f-4 ERROR
.dc.w tbl_trans-tbl_trans * $2f-5 ERROR
.dc.w tbl_trans-tbl_trans * $2f-6 ERROR
.dc.w tbl_trans-tbl_trans * $2f-7 ERROR
.dc.w ssincos-tbl_trans * $30-0 fsincos norm
.dc.w ssincosz-tbl_trans * $30-1 fsincos zero
.dc.w ssincosi-tbl_trans * $30-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $30-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $30-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $30-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $30-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $30-7 ERROR
.dc.w ssincos-tbl_trans * $31-0 fsincos norm
.dc.w ssincosz-tbl_trans * $31-1 fsincos zero
.dc.w ssincosi-tbl_trans * $31-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $31-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $31-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $31-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $31-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $31-7 ERROR
.dc.w ssincos-tbl_trans * $32-0 fsincos norm
.dc.w ssincosz-tbl_trans * $32-1 fsincos zero
.dc.w ssincosi-tbl_trans * $32-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $32-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $32-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $32-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $32-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $32-7 ERROR
.dc.w ssincos-tbl_trans * $33-0 fsincos norm
.dc.w ssincosz-tbl_trans * $33-1 fsincos zero
.dc.w ssincosi-tbl_trans * $33-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $33-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $33-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $33-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $33-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $33-7 ERROR
.dc.w ssincos-tbl_trans * $34-0 fsincos norm
.dc.w ssincosz-tbl_trans * $34-1 fsincos zero
.dc.w ssincosi-tbl_trans * $34-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $34-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $34-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $34-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $34-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $34-7 ERROR
.dc.w ssincos-tbl_trans * $35-0 fsincos norm
.dc.w ssincosz-tbl_trans * $35-1 fsincos zero
.dc.w ssincosi-tbl_trans * $35-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $35-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $35-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $35-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $35-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $35-7 ERROR
.dc.w ssincos-tbl_trans * $36-0 fsincos norm
.dc.w ssincosz-tbl_trans * $36-1 fsincos zero
.dc.w ssincosi-tbl_trans * $36-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $36-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $36-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $36-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $36-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $36-7 ERROR
.dc.w ssincos-tbl_trans * $37-0 fsincos norm
.dc.w ssincosz-tbl_trans * $37-1 fsincos zero
.dc.w ssincosi-tbl_trans * $37-2 fsincos inf
.dc.w ssincosqnan-tbl_trans * $37-3 fsincos qnan
.dc.w ssincosd-tbl_trans * $37-5 fsincos denorm
.dc.w ssincossnan-tbl_trans * $37-4 fsincos snan
.dc.w tbl_trans-tbl_trans * $37-6 fsincos unnorm
.dc.w tbl_trans-tbl_trans * $37-7 ERROR
*#########
* the instruction fetch access for the displacement word for the
* fdbcc emulation failed. here, we create an access error frame
* from the current frame and branch to _real_access().
funimp_iacc:
movem.l EXC_DREGS(a6),d0-d1/a0-a1 * restore d0-d1/a0-a1
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar * restore ctrl regs
fmovem.x EXC_FPREGS(a6),fp0-fp1 * restore fp0-fp1
move.l USER_FPIAR(a6),EXC_PC(a6) * store current PC
unlk a6
move.l (sp),-(sp) * store SR,hi(PC)
move.w $8(sp),$4(sp) * store lo(PC)
move.w #$4008,$6(sp) * store voff
move.l $2(sp),$8(sp) * store EA
move.l #$09428001,$c(sp) * store FSLW
btst #$5,(sp) * user or supervisor mode?
beq.b funimp_iacc_end * user
bset #$2,$d(sp) * set supervisor TM bit
funimp_iacc_end:
bra.l _real_access
*########################################################################
* ssin(): computes the sine of a normalized input #
* ssind(): computes the sine of a denormalized input #
* scos(): computes the cosine of a normalized input #
* scosd(): computes the cosine of a denormalized input #
* ssincos(): computes the sine and cosine of a normalized input #
* ssincosd(): computes the sine and cosine of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = sin(X) or cos(X) #
* #
* For ssincos(X): #
* fp0 = sin(X) #
* fp1 = cos(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 1 ulp in 64 significant bit, i.e. #
* within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* SIN and COS: #
* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
* #
* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
* #
* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
* k = N mod 4, so in particular, k = 0,1,2,or 3. #
* Overwrite k by k := k + AdjN. #
* #
* 4. If k is even, go to 6. #
* #
* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
* Return sgn*cos(r) where cos(r) is approximated by an #
* even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
* s = r*r. #
* Exit. #
* #
* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
* where sin(r) is approximated by an odd polynomial in r #
* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
* Exit. #
* #
* 7. If |X| > 1, go to 9. #
* #
* 8. (|X|<2**(-40)) If SIN is invoked, return X; #
* otherwise return 1. #
* #
* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
* go back to 3. #
* #
* SINCOS: #
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
* #
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
* k = N mod 4, so in particular, k = 0,1,2,or 3. #
* #
* 3. If k is even, go to 5. #
* #
* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
* j1 exclusive or with the l.s.b. of k. #
* sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
* sin(r) and cos(r) are computed as odd and even #
* polynomials in r, respectively. Exit #
* #
* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
* sin(r) and cos(r) are computed as odd and even #
* polynomials in r, respectively. Exit #
* #
* 6. If |X| > 1, go to 8. #
* #
* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
* #
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
* go back to 2. #
* #
*########################################################################
SINA7: .dc.l $BD6AAA77,$CCC994F5
SINA6: .dc.l $3DE61209,$7AAE8DA1
SINA5: .dc.l $BE5AE645,$2A118AE4
SINA4: .dc.l $3EC71DE3,$A5341531
SINA3: .dc.l $BF2A01A0,$1A018B59,$00000000,$00000000
SINA2: .dc.l $3FF80000,$88888888,$888859AF,$00000000
SINA1: .dc.l $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
COSB8: .dc.l $3D2AC4D0,$D6011EE3
COSB7: .dc.l $BDA9396F,$9F45AC19
COSB6: .dc.l $3E21EED9,$0612C972
COSB5: .dc.l $BE927E4F,$B79D9FCF
COSB4: .dc.l $3EFA01A0,$1A01D423,$00000000,$00000000
COSB3: .dc.l $BFF50000,$B60B60B6,$0B61D438,$00000000
COSB2: .dc.l $3FFA0000,$AAAAAAAA,$AAAAAB5E
COSB1: .dc.l $BF000000
INARG set FP_SCR0
X set FP_SCR0
* set XDCARE,X+2
XFRAC set X+4
RPRIME set FP_SCR0
SPRIME set FP_SCR1
POSNEG1 set L_SCR1
TWOTO63 set L_SCR1
ENDFLAG set L_SCR2
INT set L_SCR2
ADJN set L_SCR3
*###########################################
global ssin
ssin:
move.l #0,ADJN(a6) * yes; SET ADJN TO 0
bra.b SINBGN
*###########################################
global scos
scos:
move.l #1,ADJN(a6) * yes; SET ADJN TO 1
*###########################################
SINBGN:
*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
fmove.x (a0),fp0 * LOAD INPUT
fmove.x fp0,X(a6) * save input at X
* "COMPACTIFY" X
move.l (a0),d1 * put exp in hi word
move.w 4(a0),d1 * fetch hi(man)
andi.l #$7FFFFFFF,d1 * strip sign
cmpi.l #$3FD78000,d1 * is |X| >= 2**(-40)?
bge.b SOK1 * no
bra.w SINSM * yes; input is very small
SOK1:
cmpi.l #$4004BC7E,d1 * is |X| < 15 PI?
blt.b SINMAIN * no
bra.w SREDUCEX * yes; input is very large
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
SINMAIN:
fmove.x fp0,fp1
fmul.d TWOBYPI(pc),fp1 * X*2/PI
lea PITBL+$200.l(pc),a1 * TABLE OF N*PI/2, N = -32,...,32
fmove.l fp1,INT(a6) * CONVERT TO INTEGER
move.l INT(a6),d1 * make a copy of N
asl.l #4,d1 * N *= 16
add.l d1,a1 * tbl_addr = a1 + (N*16)
* A1 IS THE ADDRESS OF N*PIBY2
* ...WHICH IS IN TWO PIECES Y1 & Y2
fsub.x (a1)+,fp0 * X-Y1
fsub.s (a1),fp0 * fp0 = R = (X-Y1)-Y2
SINCONT:
*--continuation from REDUCEX
*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
move.l INT(a6),d1
add.l ADJN(a6),d1 * SEE IF D0 IS ODD OR EVEN
ror.l #1,d1 * D0 WAS ODD IFF D0 IS NEGATIVE
cmpi.l #0,d1
blt.w COSPOLY
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
*--WHERE T=S*S.
*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
SINPOLY:
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmove.x fp0,X(a6) * X IS R
fmul.x fp0,fp0 * FP0 IS S
fmove.d SINA7(pc),fp3
fmove.d SINA6(pc),fp2
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS T
ror.l #1,d1
andi.l #$80000000,d1
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
eor.l d1,X(a6) * X IS NOW R'= SGN*R
fmul.x fp1,fp3 * TA7
fmul.x fp1,fp2 * TA6
fadd.d SINA5(pc),fp3 * A5+TA7
fadd.d SINA4(pc),fp2 * A4+TA6
fmul.x fp1,fp3 * T(A5+TA7)
fmul.x fp1,fp2 * T(A4+TA6)
fadd.d SINA3(pc),fp3 * A3+T(A5+TA7)
fadd.x SINA2(pc),fp2 * A2+T(A4+TA6)
fmul.x fp3,fp1 * T(A3+T(A5+TA7))
fmul.x fp0,fp2 * S(A2+T(A4+TA6))
fadd.x SINA1(pc),fp1 * A1+T(A3+T(A5+TA7))
fmul.x X(a6),fp0 * R'*S
fadd.x fp2,fp1 * [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
fmul.x fp1,fp0 * SIN(R')-R'
fmovem.x (sp)+,fp2-fp3 * restore fp2/fp3
fmove.l d0,fpcr * restore users round mode,prec
fadd.x X(a6),fp0 * last inst - possible exception set
bra.l t_inx2
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
*--WHERE T=S*S.
*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
*--AND IS THEREFORE STORED AS SINGLE PRECISION.
COSPOLY:
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmul.x fp0,fp0 * FP0 IS S
fmove.d COSB8(pc),fp2
fmove.d COSB7(pc),fp3
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS T
fmove.x fp0,X(a6) * X IS S
ror.l #1,d1
andi.l #$80000000,d1
* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
fmul.x fp1,fp2 * TB8
eor.l d1,X(a6) * X IS NOW S'= SGN*S
andi.l #$80000000,d1
fmul.x fp1,fp3 * TB7
ori.l #$3F800000,d1 * D0 IS SGN IN SINGLE
move.l d1,POSNEG1(a6)
fadd.d COSB6(pc),fp2 * B6+TB8
fadd.d COSB5(pc),fp3 * B5+TB7
fmul.x fp1,fp2 * T(B6+TB8)
fmul.x fp1,fp3 * T(B5+TB7)
fadd.d COSB4(pc),fp2 * B4+T(B6+TB8)
fadd.x COSB3(pc),fp3 * B3+T(B5+TB7)
fmul.x fp1,fp2 * T(B4+T(B6+TB8))
fmul.x fp3,fp1 * T(B3+T(B5+TB7))
fadd.x COSB2(pc),fp2 * B2+T(B4+T(B6+TB8))
fadd.s COSB1(pc),fp1 * B1+T(B3+T(B5+TB7))
fmul.x fp2,fp0 * S(B2+T(B4+T(B6+TB8)))
fadd.x fp1,fp0
fmul.x X(a6),fp0
fmovem.x (sp)+,fp2-fp3 * restore fp2/fp3
fmove.l d0,fpcr * restore users round mode,prec
fadd.s POSNEG1(a6),fp0 * last inst - possible exception set
bra.l t_inx2
*#############################################
* SINe: Big OR Small?
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
*--IF |X| < 2**(-40), RETURN X OR 1.
SINBORS:
cmpi.l #$3FFF8000,d1
bgt.l SREDUCEX
SINSM:
move.l ADJN(a6),d1
cmpi.l #0,d1
bgt.b COSTINY
* here, the operation may underflow iff the precision is sgl or dbl.
* extended denorms are handled through another entry point.
SINTINY:
* mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
fmove.l d0,fpcr * restore users round mode,prec
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x X(a6),fp0 * last inst - possible exception set
bra.l t_catch
COSTINY:
fmove.s #$3F800000,fp0 * fp0 = 1.0
fmove.l d0,fpcr * restore users round mode,prec
fadd.s #$80800000,fp0 * last inst - possible exception set
bra.l t_pinx2
*###############################################
global ssind
*--SIN(X) = X FOR DENORMALIZED X
ssind:
bra.l t_extdnrm
*###########################################
global scosd
*--COS(X) = 1 FOR DENORMALIZED X
scosd:
fmove.s #$3F800000,fp0 * fp0 = 1.0
bra.l t_pinx2
*#################################################
global ssincos
ssincos:
*--SET ADJN TO 4
move.l #4,ADJN(a6)
fmove.x (a0),fp0 * LOAD INPUT
fmove.x fp0,X(a6)
move.l (a0),d1
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1 * COMPACTIFY X
cmpi.l #$3FD78000,d1 * |X| >= 2**(-40)?
bge.b SCOK1
bra.w SCSM
SCOK1:
cmpi.l #$4004BC7E,d1 * |X| < 15 PI?
blt.b SCMAIN
bra.w SREDUCEX
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
SCMAIN:
fmove.x fp0,fp1
fmul.d TWOBYPI(pc),fp1 * X*2/PI
lea PITBL+$200.l(pc),a1 * TABLE OF N*PI/2, N = -32,...,32
fmove.l fp1,INT(a6) * CONVERT TO INTEGER
move.l INT(a6),d1
asl.l #4,d1
add.l d1,a1 * ADDRESS OF N*PIBY2, IN Y1, Y2
fsub.x (a1)+,fp0 * X-Y1
fsub.s (a1),fp0 * FP0 IS R = (X-Y1)-Y2
SCCONT:
*--continuation point from REDUCEX
move.l INT(a6),d1
ror.l #1,d1
cmpi.l #0,d1 * D0 < 0 IFF N IS ODD
bge.w NEVEN
SNODD:
*--REGISTERS SAVED SO FAR: D0, A0, FP2.
fmovem.x fp2,-(sp) * save fp2
fmove.x fp0,RPRIME(a6)
fmul.x fp0,fp0 * FP0 IS S = R*R
fmove.d SINA7(pc),fp1 * A7
fmove.d COSB8(pc),fp2 * B8
fmul.x fp0,fp1 * SA7
fmul.x fp0,fp2 * SB8
move.l d2,-(sp)
move.l d1,d2
ror.l #1,d2
andi.l #$80000000,d2
eor.l d1,d2
andi.l #$80000000,d2
fadd.d SINA6(pc),fp1 * A6+SA7
fadd.d COSB7(pc),fp2 * B7+SB8
fmul.x fp0,fp1 * S(A6+SA7)
eor.l d2,RPRIME(a6)
move.l (sp)+,d2
fmul.x fp0,fp2 * S(B7+SB8)
ror.l #1,d1
andi.l #$80000000,d1
move.l #$3F800000,POSNEG1(a6)
eor.l d1,POSNEG1(a6)
fadd.d SINA5(pc),fp1 * A5+S(A6+SA7)
fadd.d COSB6(pc),fp2 * B6+S(B7+SB8)
fmul.x fp0,fp1 * S(A5+S(A6+SA7))
fmul.x fp0,fp2 * S(B6+S(B7+SB8))
fmove.x fp0,SPRIME(a6)
fadd.d SINA4(pc),fp1 * A4+S(A5+S(A6+SA7))
eor.l d1,SPRIME(a6)
fadd.d COSB5(pc),fp2 * B5+S(B6+S(B7+SB8))
fmul.x fp0,fp1 * S(A4+...)
fmul.x fp0,fp2 * S(B5+...)
fadd.d SINA3(pc),fp1 * A3+S(A4+...)
fadd.d COSB4(pc),fp2 * B4+S(B5+...)
fmul.x fp0,fp1 * S(A3+...)
fmul.x fp0,fp2 * S(B4+...)
fadd.x SINA2(pc),fp1 * A2+S(A3+...)
fadd.x COSB3(pc),fp2 * B3+S(B4+...)
fmul.x fp0,fp1 * S(A2+...)
fmul.x fp0,fp2 * S(B3+...)
fadd.x SINA1(pc),fp1 * A1+S(A2+...)
fadd.x COSB2(pc),fp2 * B2+S(B3+...)
fmul.x fp0,fp1 * S(A1+...)
fmul.x fp2,fp0 * S(B2+...)
fmul.x RPRIME(a6),fp1 * R'S(A1+...)
fadd.s COSB1(pc),fp0 * B1+S(B2...)
fmul.x SPRIME(a6),fp0 * S'(B1+S(B2+...))
fmovem.x (sp)+,fp2 * restore fp2
fmove.l d0,fpcr
fadd.x RPRIME(a6),fp1 * COS(X)
bsr.l sto_cos * store cosine result
fadd.s POSNEG1(a6),fp0 * SIN(X)
bra.l t_inx2
NEVEN:
*--REGISTERS SAVED SO FAR: FP2.
fmovem.x fp2,-(sp) * save fp2
fmove.x fp0,RPRIME(a6)
fmul.x fp0,fp0 * FP0 IS S = R*R
fmove.d COSB8(pc),fp1 * B8
fmove.d SINA7(pc),fp2 * A7
fmul.x fp0,fp1 * SB8
fmove.x fp0,SPRIME(a6)
fmul.x fp0,fp2 * SA7
ror.l #1,d1
andi.l #$80000000,d1
fadd.d COSB7(pc),fp1 * B7+SB8
fadd.d SINA6(pc),fp2 * A6+SA7
eor.l d1,RPRIME(a6)
eor.l d1,SPRIME(a6)
fmul.x fp0,fp1 * S(B7+SB8)
ori.l #$3F800000,d1
move.l d1,POSNEG1(a6)
fmul.x fp0,fp2 * S(A6+SA7)
fadd.d COSB6(pc),fp1 * B6+S(B7+SB8)
fadd.d SINA5(pc),fp2 * A5+S(A6+SA7)
fmul.x fp0,fp1 * S(B6+S(B7+SB8))
fmul.x fp0,fp2 * S(A5+S(A6+SA7))
fadd.d COSB5(pc),fp1 * B5+S(B6+S(B7+SB8))
fadd.d SINA4(pc),fp2 * A4+S(A5+S(A6+SA7))
fmul.x fp0,fp1 * S(B5+...)
fmul.x fp0,fp2 * S(A4+...)
fadd.d COSB4(pc),fp1 * B4+S(B5+...)
fadd.d SINA3(pc),fp2 * A3+S(A4+...)
fmul.x fp0,fp1 * S(B4+...)
fmul.x fp0,fp2 * S(A3+...)
fadd.x COSB3(pc),fp1 * B3+S(B4+...)
fadd.x SINA2(pc),fp2 * A2+S(A3+...)
fmul.x fp0,fp1 * S(B3+...)
fmul.x fp0,fp2 * S(A2+...)
fadd.x COSB2(pc),fp1 * B2+S(B3+...)
fadd.x SINA1(pc),fp2 * A1+S(A2+...)
fmul.x fp0,fp1 * S(B2+...)
fmul.x fp2,fp0 * s(a1+...)
fadd.s COSB1(pc),fp1 * B1+S(B2...)
fmul.x RPRIME(a6),fp0 * R'S(A1+...)
fmul.x SPRIME(a6),fp1 * S'(B1+S(B2+...))
fmovem.x (sp)+,fp2 * restore fp2
fmove.l d0,fpcr
fadd.s POSNEG1(a6),fp1 * COS(X)
bsr.l sto_cos * store cosine result
fadd.x RPRIME(a6),fp0 * SIN(X)
bra.l t_inx2
*###############################################
SCBORS:
cmpi.l #$3FFF8000,d1
bgt.w SREDUCEX
*###############################################
SCSM:
* mov.w &0x0000,XDCARE(%a6)
fmove.s #$3F800000,fp1
fmove.l d0,fpcr
fsub.s #$00800000,fp1
bsr.l sto_cos * store cosine result
fmove.l fpcr,d0 * d0 must have fpcr,too
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x X(a6),fp0
bra.l t_catch
*#############################################
global ssincosd
*--SIN AND COS OF X FOR DENORMALIZED X
ssincosd:
move.l d0,-(sp) * save d0
fmove.s #$3F800000,fp1
bsr.l sto_cos * store cosine result
move.l (sp)+,d0 * restore d0
bra.l t_extdnrm
*###########################################
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
SREDUCEX:
fmovem.x fp2-fp5,-(sp) * save {fp2-fp5}
move.l d2,-(sp) * save d2
fmove.s #$00000000,fp1 * fp1 = 0
*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
*--there is a danger of unwanted overflow in first LOOP iteration. In this
*--case, reduce argument by one remainder step to make subsequent reduction
*--safe.
cmpi.l #$7ffeffff,d1 * is arg dangerously large?
bne.b SLOOP * no
* yes; create 2**16383*PI/2
move.w #$7ffe,FP_SCR0_EX(a6)
move.l #$c90fdaa2,FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6)
* create low half of 2**16383*PI/2 at FP_SCR1
move.w #$7fdc,FP_SCR1_EX(a6)
move.l #$85a308d3,FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6)
ftst.x fp0 * test sign of argument
fblt.w sred_neg
ori.b #$80,FP_SCR0_EX(a6) * positive arg
ori.b #$80,FP_SCR1_EX(a6)
sred_neg:
fadd.x FP_SCR0(a6),fp0 * high part of reduction is exact
fmove.x fp0,fp1 * save high result in fp1
fadd.x FP_SCR1(a6),fp0 * low part of reduction
fsub.x fp0,fp1 * determine low component of result
fadd.x FP_SCR1(a6),fp1 * fp0/fp1 are reduced argument.
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
*--integer quotient will be stored in N
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
SLOOP:
fmove.x fp0,INARG(a6) * +-2**K * F, 1 <= F < 2
move.w INARG(a6),d1
move.l d1,a1 * save a copy of D0
andi.l #$00007FFF,d1
subi.l #$00003FFF,d1 * d0 = K
cmpi.l #28,d1
ble.b SLASTLOOP
SCONTLOOP:
subi.l #27,d1 * d0 = L := K-27
move.b #0,ENDFLAG(a6)
bra.b SWORK
SLASTLOOP:
clr.l d1 * d0 = L := 0
move.b #1,ENDFLAG(a6)
SWORK:
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
*--2**L * (PIby2_1), 2**L * (PIby2_2)
move.l #$00003FFE,d2 * BIASED EXP OF 2/PI
sub.l d1,d2 * BIASED EXP OF 2**(-L)*(2/PI)
move.l #$A2F9836E,FP_SCR0_HI(a6)
move.l #$4E44152A,FP_SCR0_LO(a6)
move.w d2,FP_SCR0_EX(a6) * FP_SCR0 = 2**(-L)*(2/PI)
fmove.x fp0,fp2
fmul.x FP_SCR0(a6),fp2 * fp2 = X * 2**(-L)*(2/PI)
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
*--US THE DESIRED VALUE IN FLOATING POINT.
move.l a1,d2
swap d2
andi.l #$80000000,d2
ori.l #$5F000000,d2 * d2 = SIGN(INARG)*2**63 IN SGL
move.l d2,TWOTO63(a6)
fadd.s TWOTO63(a6),fp2 * THE FRACTIONAL PART OF FP1 IS ROUNDED
fsub.s TWOTO63(a6),fp2 * fp2 = N
* fint.x %fp2
*--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
move.l d1,d2 * d2 = L
addi.l #$00003FFF,d2 * BIASED EXP OF 2**L * (PI/2)
move.w d2,FP_SCR0_EX(a6)
move.l #$C90FDAA2,FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6) * FP_SCR0 = 2**(L) * Piby2_1
addi.l #$00003FDD,d1
move.w d1,FP_SCR1_EX(a6)
move.l #$85A308D3,FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6) * FP_SCR1 = 2**(L) * Piby2_2
move.b ENDFLAG(a6),d1
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
*--P2 = 2**(L) * Piby2_2
fmove.x fp2,fp4 * fp4 = N
fmul.x FP_SCR0(a6),fp4 * fp4 = W = N*P1
fmove.x fp2,fp5 * fp5 = N
fmul.x FP_SCR1(a6),fp5 * fp5 = w = N*P2
fmove.x fp4,fp3 * fp3 = W = N*P1
*--we want P+p = W+w but |p| <= half ulp of P
*--Then, we need to compute A := R-P and a := r-p
fadd.x fp5,fp3 * fp3 = P
fsub.x fp3,fp4 * fp4 = W-P
fsub.x fp3,fp0 * fp0 = A := R - P
fadd.x fp5,fp4 * fp4 = p = (W-P)+w
fmove.x fp0,fp3 * fp3 = A
fsub.x fp4,fp1 * fp1 = a := r - p
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
*--|r| <= half ulp of R.
fadd.x fp1,fp0 * fp0 = R := A+a
*--No need to calculate r if this is the last loop
cmpi.b #0,d1
bgt.w SRESTORE
*--Need to calculate r
fsub.x fp0,fp3 * fp3 = A-R
fadd.x fp3,fp1 * fp1 = r := (A-R)+a
bra.w SLOOP
SRESTORE:
fmove.l fp2,INT(a6)
move.l (sp)+,d2 * restore d2
fmovem.x (sp)+,fp2-fp5 * restore {fp2-fp5}
move.l ADJN(a6),d1
cmpi.l #4,d1
blt.w SINCONT
bra.w SCCONT
*########################################################################
* stan(): computes the tangent of a normalized input #
* stand(): computes the tangent of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = tan(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulp in 64 significant bit, i.e. #
* within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
* #
* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
* k = N mod 2, so in particular, k = 0 or 1. #
* #
* 3. If k is odd, go to 5. #
* #
* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
* rational function U/V where #
* U = r + r*s*(P1 + s*(P2 + s*P3)), and #
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
* Exit. #
* #
* 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
* a rational function U/V where #
* U = r + r*s*(P1 + s*(P2 + s*P3)), and #
* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
* -Cot(r) = -V/U. Exit. #
* #
* 6. If |X| > 1, go to 8. #
* #
* 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
* #
* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
* to 2. #
* #
*########################################################################
TANQ4:
.dc.l $3EA0B759,$F50F8688
TANP3:
.dc.l $BEF2BAA5,$A8924F04
TANQ3:
.dc.l $BF346F59,$B39BA65F,$00000000,$00000000
TANP2:
.dc.l $3FF60000,$E073D3FC,$199C4A00,$00000000
TANQ2:
.dc.l $3FF90000,$D23CD684,$15D95FA1,$00000000
TANP1:
.dc.l $BFFC0000,$8895A6C5,$FB423BCA,$00000000
TANQ1:
.dc.l $BFFD0000,$EEF57E0D,$A84BC8CE,$00000000
INVTWOPI:
.dc.l $3FFC0000,$A2F9836E,$4E44152A,$00000000
TWOPI1:
.dc.l $40010000,$C90FDAA2,$00000000,$00000000
TWOPI2:
.dc.l $3FDF0000,$85A308D4,$00000000,$00000000
*--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
*--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
*--MOST 69 BITS LONG.
* global PITBL
PITBL:
.dc.l $C0040000,$C90FDAA2,$2168C235,$21800000
.dc.l $C0040000,$C2C75BCD,$105D7C23,$A0D00000
.dc.l $C0040000,$BC7EDCF7,$FF523611,$A1E80000
.dc.l $C0040000,$B6365E22,$EE46F000,$21480000
.dc.l $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
.dc.l $C0040000,$A9A56078,$CC3063DD,$21FC0000
.dc.l $C0040000,$A35CE1A3,$BB251DCB,$21100000
.dc.l $C0040000,$9D1462CE,$AA19D7B9,$A1580000
.dc.l $C0040000,$96CBE3F9,$990E91A8,$21E00000
.dc.l $C0040000,$90836524,$88034B96,$20B00000
.dc.l $C0040000,$8A3AE64F,$76F80584,$A1880000
.dc.l $C0040000,$83F2677A,$65ECBF73,$21C40000
.dc.l $C0030000,$FB53D14A,$A9C2F2C2,$20000000
.dc.l $C0030000,$EEC2D3A0,$87AC669F,$21380000
.dc.l $C0030000,$E231D5F6,$6595DA7B,$A1300000
.dc.l $C0030000,$D5A0D84C,$437F4E58,$9FC00000
.dc.l $C0030000,$C90FDAA2,$2168C235,$21000000
.dc.l $C0030000,$BC7EDCF7,$FF523611,$A1680000
.dc.l $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
.dc.l $C0030000,$A35CE1A3,$BB251DCB,$20900000
.dc.l $C0030000,$96CBE3F9,$990E91A8,$21600000
.dc.l $C0030000,$8A3AE64F,$76F80584,$A1080000
.dc.l $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
.dc.l $C0020000,$E231D5F6,$6595DA7B,$A0B00000
.dc.l $C0020000,$C90FDAA2,$2168C235,$20800000
.dc.l $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
.dc.l $C0020000,$96CBE3F9,$990E91A8,$20E00000
.dc.l $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
.dc.l $C0010000,$C90FDAA2,$2168C235,$20000000
.dc.l $C0010000,$96CBE3F9,$990E91A8,$20600000
.dc.l $C0000000,$C90FDAA2,$2168C235,$1F800000
.dc.l $BFFF0000,$C90FDAA2,$2168C235,$1F000000
.dc.l $00000000,$00000000,$00000000,$00000000
.dc.l $3FFF0000,$C90FDAA2,$2168C235,$9F000000
.dc.l $40000000,$C90FDAA2,$2168C235,$9F800000
.dc.l $40010000,$96CBE3F9,$990E91A8,$A0600000
.dc.l $40010000,$C90FDAA2,$2168C235,$A0000000
.dc.l $40010000,$FB53D14A,$A9C2F2C2,$9F000000
.dc.l $40020000,$96CBE3F9,$990E91A8,$A0E00000
.dc.l $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
.dc.l $40020000,$C90FDAA2,$2168C235,$A0800000
.dc.l $40020000,$E231D5F6,$6595DA7B,$20B00000
.dc.l $40020000,$FB53D14A,$A9C2F2C2,$9F800000
.dc.l $40030000,$8A3AE64F,$76F80584,$21080000
.dc.l $40030000,$96CBE3F9,$990E91A8,$A1600000
.dc.l $40030000,$A35CE1A3,$BB251DCB,$A0900000
.dc.l $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
.dc.l $40030000,$BC7EDCF7,$FF523611,$21680000
.dc.l $40030000,$C90FDAA2,$2168C235,$A1000000
.dc.l $40030000,$D5A0D84C,$437F4E58,$1FC00000
.dc.l $40030000,$E231D5F6,$6595DA7B,$21300000
.dc.l $40030000,$EEC2D3A0,$87AC669F,$A1380000
.dc.l $40030000,$FB53D14A,$A9C2F2C2,$A0000000
.dc.l $40040000,$83F2677A,$65ECBF73,$A1C40000
.dc.l $40040000,$8A3AE64F,$76F80584,$21880000
.dc.l $40040000,$90836524,$88034B96,$A0B00000
.dc.l $40040000,$96CBE3F9,$990E91A8,$A1E00000
.dc.l $40040000,$9D1462CE,$AA19D7B9,$21580000
.dc.l $40040000,$A35CE1A3,$BB251DCB,$A1100000
.dc.l $40040000,$A9A56078,$CC3063DD,$A1FC0000
.dc.l $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
.dc.l $40040000,$B6365E22,$EE46F000,$A1480000
.dc.l $40040000,$BC7EDCF7,$FF523611,$21E80000
.dc.l $40040000,$C2C75BCD,$105D7C23,$20D00000
.dc.l $40040000,$C90FDAA2,$2168C235,$A1800000
INARG set FP_SCR0
TWOTO63 set L_SCR1
INT set L_SCR1
ENDFLAG set L_SCR2
global stan
stan:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1
cmpi.l #$3FD78000,d1 * |X| >= 2**(-40)?
bge.b TANOK1
bra.w TANSM
TANOK1:
cmpi.l #$4004BC7E,d1 * |X| < 15 PI?
blt.b TANMAIN
bra.w REDUCEX
TANMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
fmove.x fp0,fp1
fmul.d TWOBYPI(pc),fp1 * X*2/PI
lea.l PITBL+$200(pc),a1 * TABLE OF N*PI/2, N = -32,...,32
fmove.l fp1,d1 * CONVERT TO INTEGER
asl.l #4,d1
add.l d1,a1 * ADDRESS N*PIBY2 IN Y1, Y2
fsub.x (a1)+,fp0 * X-Y1
fsub.s (a1),fp0 * FP0 IS R = (X-Y1)-Y2
ror.l #5,d1
andi.l #$80000000,d1 * D0 WAS ODD IFF D0 < 0
TANCONT:
fmovem.x fp2-fp3,-(sp) * save fp2,fp3
cmpi.l #0,d1
blt.w NODD
fmove.x fp0,fp1
fmul.x fp1,fp1 * S = R*R
fmove.d TANQ4(pc),fp3
fmove.d TANP3(pc),fp2
fmul.x fp1,fp3 * SQ4
fmul.x fp1,fp2 * SP3
fadd.d TANQ3(pc),fp3 * Q3+SQ4
fadd.x TANP2(pc),fp2 * P2+SP3
fmul.x fp1,fp3 * S(Q3+SQ4)
fmul.x fp1,fp2 * S(P2+SP3)
fadd.x TANQ2(pc),fp3 * Q2+S(Q3+SQ4)
fadd.x TANP1(pc),fp2 * P1+S(P2+SP3)
fmul.x fp1,fp3 * S(Q2+S(Q3+SQ4))
fmul.x fp1,fp2 * S(P1+S(P2+SP3))
fadd.x TANQ1(pc),fp3 * Q1+S(Q2+S(Q3+SQ4))
fmul.x fp0,fp2 * RS(P1+S(P2+SP3))
fmul.x fp3,fp1 * S(Q1+S(Q2+S(Q3+SQ4)))
fadd.x fp2,fp0 * R+RS(P1+S(P2+SP3))
fadd.s #$3F800000,fp1 * 1+S(Q1+...)
fmovem.x (sp)+,fp2-fp3 * restore fp2,fp3
fmove.l d0,fpcr * restore users round mode,prec
fdiv.x fp1,fp0 * last inst - possible exception set
bra.l t_inx2
NODD:
fmove.x fp0,fp1
fmul.x fp0,fp0 * S = R*R
fmove.d TANQ4(pc),fp3
fmove.d TANP3(pc),fp2
fmul.x fp0,fp3 * SQ4
fmul.x fp0,fp2 * SP3
fadd.d TANQ3(pc),fp3 * Q3+SQ4
fadd.x TANP2(pc),fp2 * P2+SP3
fmul.x fp0,fp3 * S(Q3+SQ4)
fmul.x fp0,fp2 * S(P2+SP3)
fadd.x TANQ2(pc),fp3 * Q2+S(Q3+SQ4)
fadd.x TANP1(pc),fp2 * P1+S(P2+SP3)
fmul.x fp0,fp3 * S(Q2+S(Q3+SQ4))
fmul.x fp0,fp2 * S(P1+S(P2+SP3))
fadd.x TANQ1(pc),fp3 * Q1+S(Q2+S(Q3+SQ4))
fmul.x fp1,fp2 * RS(P1+S(P2+SP3))
fmul.x fp3,fp0 * S(Q1+S(Q2+S(Q3+SQ4)))
fadd.x fp2,fp1 * R+RS(P1+S(P2+SP3))
fadd.s #$3F800000,fp0 * 1+S(Q1+...)
fmovem.x (sp)+,fp2-fp3 * restore fp2,fp3
fmove.x fp1,-(sp)
eori.l #$80000000,(sp)
fmove.l d0,fpcr * restore users round mode,prec
fdiv.x (sp)+,fp0 * last inst - possible exception set
bra.l t_inx2
TANBORS:
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
*--IF |X| < 2**(-40), RETURN X OR 1.
cmpi.l #$3FFF8000,d1
bgt.b REDUCEX
TANSM:
fmove.x fp0,-(sp)
fmove.l d0,fpcr * restore users round mode,prec
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x (sp)+,fp0 * last inst - posibble exception set
bra.l t_catch
global stand
*--TAN(X) = X FOR DENORMALIZED X
stand:
bra.l t_extdnrm
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
REDUCEX:
fmovem.x fp2-fp5,-(sp) * save {fp2-fp5}
move.l d2,-(sp) * save d2
fmove.s #$00000000,fp1 * fp1 = 0
*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
*--there is a danger of unwanted overflow in first LOOP iteration. In this
*--case, reduce argument by one remainder step to make subsequent reduction
*--safe.
cmpi.l #$7ffeffff,d1 * is arg dangerously large?
bne.b LOOP * no
* yes; create 2**16383*PI/2
move.w #$7ffe,FP_SCR0_EX(a6)
move.l #$c90fdaa2,FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6)
* create low half of 2**16383*PI/2 at FP_SCR1
move.w #$7fdc,FP_SCR1_EX(a6)
move.l #$85a308d3,FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6)
ftst.x fp0 * test sign of argument
fblt.w red_neg
ori.b #$80,FP_SCR0_EX(a6) * positive arg
ori.b #$80,FP_SCR1_EX(a6)
red_neg:
fadd.x FP_SCR0(a6),fp0 * high part of reduction is exact
fmove.x fp0,fp1 * save high result in fp1
fadd.x FP_SCR1(a6),fp0 * low part of reduction
fsub.x fp0,fp1 * determine low component of result
fadd.x FP_SCR1(a6),fp1 * fp0/fp1 are reduced argument.
*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
*--integer quotient will be stored in N
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
LOOP:
fmove.x fp0,INARG(a6) * +-2**K * F, 1 <= F < 2
move.w INARG(a6),d1
move.l d1,a1 * save a copy of D0
andi.l #$00007FFF,d1
subi.l #$00003FFF,d1 * d0 = K
cmpi.l #28,d1
ble.b LASTLOOP
CONTLOOP:
subi.l #27,d1 * d0 = L := K-27
move.b #0,ENDFLAG(a6)
bra.b WORK
LASTLOOP:
clr.l d1 * d0 = L := 0
move.b #1,ENDFLAG(a6)
WORK:
*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
*--2**L * (PIby2_1), 2**L * (PIby2_2)
move.l #$00003FFE,d2 * BIASED EXP OF 2/PI
sub.l d1,d2 * BIASED EXP OF 2**(-L)*(2/PI)
move.l #$A2F9836E,FP_SCR0_HI(a6)
move.l #$4E44152A,FP_SCR0_LO(a6)
move.w d2,FP_SCR0_EX(a6) * FP_SCR0 = 2**(-L)*(2/PI)
fmove.x fp0,fp2
fmul.x FP_SCR0(a6),fp2 * fp2 = X * 2**(-L)*(2/PI)
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
*--US THE DESIRED VALUE IN FLOATING POINT.
move.l a1,d2
swap d2
andi.l #$80000000,d2
ori.l #$5F000000,d2 * d2 = SIGN(INARG)*2**63 IN SGL
move.l d2,TWOTO63(a6)
fadd.s TWOTO63(a6),fp2 * THE FRACTIONAL PART OF FP1 IS ROUNDED
fsub.s TWOTO63(a6),fp2 * fp2 = N
* fintrz.x %fp2,%fp2
*--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
move.l d1,d2 * d2 = L
addi.l #$00003FFF,d2 * BIASED EXP OF 2**L * (PI/2)
move.w d2,FP_SCR0_EX(a6)
move.l #$C90FDAA2,FP_SCR0_HI(a6)
clr.l FP_SCR0_LO(a6) * FP_SCR0 = 2**(L) * Piby2_1
addi.l #$00003FDD,d1
move.w d1,FP_SCR1_EX(a6)
move.l #$85A308D3,FP_SCR1_HI(a6)
clr.l FP_SCR1_LO(a6) * FP_SCR1 = 2**(L) * Piby2_2
move.b ENDFLAG(a6),d1
*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
*--P2 = 2**(L) * Piby2_2
fmove.x fp2,fp4 * fp4 = N
fmul.x FP_SCR0(a6),fp4 * fp4 = W = N*P1
fmove.x fp2,fp5 * fp5 = N
fmul.x FP_SCR1(a6),fp5 * fp5 = w = N*P2
fmove.x fp4,fp3 * fp3 = W = N*P1
*--we want P+p = W+w but |p| <= half ulp of P
*--Then, we need to compute A := R-P and a := r-p
fadd.x fp5,fp3 * fp3 = P
fsub.x fp3,fp4 * fp4 = W-P
fsub.x fp3,fp0 * fp0 = A := R - P
fadd.x fp5,fp4 * fp4 = p = (W-P)+w
fmove.x fp0,fp3 * fp3 = A
fsub.x fp4,fp1 * fp1 = a := r - p
*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
*--|r| <= half ulp of R.
fadd.x fp1,fp0 * fp0 = R := A+a
*--No need to calculate r if this is the last loop
cmpi.b #0,d1
bgt.w RESTORE
*--Need to calculate r
fsub.x fp0,fp3 * fp3 = A-R
fadd.x fp3,fp1 * fp1 = r := (A-R)+a
bra.w LOOP
RESTORE:
fmove.l fp2,INT(a6)
move.l (sp)+,d2 * restore d2
fmovem.x (sp)+,fp2-fp5 * restore {fp2-fp5}
move.l INT(a6),d1
ror.l #1,d1
bra.w TANCONT
*########################################################################
* satan(): computes the arctangent of a normalized number #
* satand(): computes the arctangent of a denormalized number #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = arctan(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 2 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
* #
* Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
* Note that k = -4, -3,..., or 3. #
* Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
* significant bits of X with a bit-1 attached at the 6-th #
* bit position. Define u to be u = (X-F) / (1 + X*F). #
* #
* Step 3. Approximate arctan(u) by a polynomial poly. #
* #
* Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
* table of values calculated beforehand. Exit. #
* #
* Step 5. If |X| >= 16, go to Step 7. #
* #
* Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
* #
* Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
* polynomial in X'. #
* Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
* #
*########################################################################
ATANA3: .dc.l $BFF6687E,$314987D8
ATANA2: .dc.l $4002AC69,$34A26DB3
ATANA1: .dc.l $BFC2476F,$4E1DA28E
ATANB6: .dc.l $3FB34444,$7F876989
ATANB5: .dc.l $BFB744EE,$7FAF45DB
ATANB4: .dc.l $3FBC71C6,$46940220
ATANB3: .dc.l $BFC24924,$921872F9
ATANB2: .dc.l $3FC99999,$99998FA9
ATANB1: .dc.l $BFD55555,$55555555
ATANC5: .dc.l $BFB70BF3,$98539E6A
ATANC4: .dc.l $3FBC7187,$962D1D7D
ATANC3: .dc.l $BFC24924,$827107B8
ATANC2: .dc.l $3FC99999,$9996263E
ATANC1: .dc.l $BFD55555,$55555536
PPIBY2: .dc.l $3FFF0000,$C90FDAA2,$2168C235,$00000000
NPIBY2: .dc.l $BFFF0000,$C90FDAA2,$2168C235,$00000000
PTINY: .dc.l $00010000,$80000000,$00000000,$00000000
NTINY: .dc.l $80010000,$80000000,$00000000,$00000000
ATANTBL:
.dc.l $3FFB0000,$83D152C5,$060B7A51,$00000000
.dc.l $3FFB0000,$8BC85445,$65498B8B,$00000000
.dc.l $3FFB0000,$93BE4060,$17626B0D,$00000000
.dc.l $3FFB0000,$9BB3078D,$35AEC202,$00000000
.dc.l $3FFB0000,$A3A69A52,$5DDCE7DE,$00000000
.dc.l $3FFB0000,$AB98E943,$62765619,$00000000
.dc.l $3FFB0000,$B389E502,$F9C59862,$00000000
.dc.l $3FFB0000,$BB797E43,$6B09E6FB,$00000000
.dc.l $3FFB0000,$C367A5C7,$39E5F446,$00000000
.dc.l $3FFB0000,$CB544C61,$CFF7D5C6,$00000000
.dc.l $3FFB0000,$D33F62F8,$2488533E,$00000000
.dc.l $3FFB0000,$DB28DA81,$62404C77,$00000000
.dc.l $3FFB0000,$E310A407,$8AD34F18,$00000000
.dc.l $3FFB0000,$EAF6B0A8,$188EE1EB,$00000000
.dc.l $3FFB0000,$F2DAF194,$9DBE79D5,$00000000
.dc.l $3FFB0000,$FABD5813,$61D47E3E,$00000000
.dc.l $3FFC0000,$8346AC21,$0959ECC4,$00000000
.dc.l $3FFC0000,$8B232A08,$304282D8,$00000000
.dc.l $3FFC0000,$92FB70B8,$D29AE2F9,$00000000
.dc.l $3FFC0000,$9ACF476F,$5CCD1CB4,$00000000
.dc.l $3FFC0000,$A29E7630,$4954F23F,$00000000
.dc.l $3FFC0000,$AA68C5D0,$8AB85230,$00000000
.dc.l $3FFC0000,$B22DFFFD,$9D539F83,$00000000
.dc.l $3FFC0000,$B9EDEF45,$3E900EA5,$00000000
.dc.l $3FFC0000,$C1A85F1C,$C75E3EA5,$00000000
.dc.l $3FFC0000,$C95D1BE8,$28138DE6,$00000000
.dc.l $3FFC0000,$D10BF300,$840D2DE4,$00000000
.dc.l $3FFC0000,$D8B4B2BA,$6BC05E7A,$00000000
.dc.l $3FFC0000,$E0572A6B,$B42335F6,$00000000
.dc.l $3FFC0000,$E7F32A70,$EA9CAA8F,$00000000
.dc.l $3FFC0000,$EF888432,$64ECEFAA,$00000000
.dc.l $3FFC0000,$F7170A28,$ECC06666,$00000000
.dc.l $3FFD0000,$812FD288,$332DAD32,$00000000
.dc.l $3FFD0000,$88A8D1B1,$218E4D64,$00000000
.dc.l $3FFD0000,$9012AB3F,$23E4AEE8,$00000000
.dc.l $3FFD0000,$976CC3D4,$11E7F1B9,$00000000
.dc.l $3FFD0000,$9EB68949,$3889A227,$00000000
.dc.l $3FFD0000,$A5EF72C3,$4487361B,$00000000
.dc.l $3FFD0000,$AD1700BA,$F07A7227,$00000000
.dc.l $3FFD0000,$B42CBCFA,$FD37EFB7,$00000000
.dc.l $3FFD0000,$BB303A94,$0BA80F89,$00000000
.dc.l $3FFD0000,$C22115C6,$FCAEBBAF,$00000000
.dc.l $3FFD0000,$C8FEF3E6,$86331221,$00000000
.dc.l $3FFD0000,$CFC98330,$B4000C70,$00000000
.dc.l $3FFD0000,$D6807AA1,$102C5BF9,$00000000
.dc.l $3FFD0000,$DD2399BC,$31252AA3,$00000000
.dc.l $3FFD0000,$E3B2A855,$6B8FC517,$00000000
.dc.l $3FFD0000,$EA2D764F,$64315989,$00000000
.dc.l $3FFD0000,$F3BF5BF8,$BAD1A21D,$00000000
.dc.l $3FFE0000,$801CE39E,$0D205C9A,$00000000
.dc.l $3FFE0000,$8630A2DA,$DA1ED066,$00000000
.dc.l $3FFE0000,$8C1AD445,$F3E09B8C,$00000000
.dc.l $3FFE0000,$91DB8F16,$64F350E2,$00000000
.dc.l $3FFE0000,$97731420,$365E538C,$00000000
.dc.l $3FFE0000,$9CE1C8E6,$A0B8CDBA,$00000000
.dc.l $3FFE0000,$A22832DB,$CADAAE09,$00000000
.dc.l $3FFE0000,$A746F2DD,$B7602294,$00000000
.dc.l $3FFE0000,$AC3EC0FB,$997DD6A2,$00000000
.dc.l $3FFE0000,$B110688A,$EBDC6F6A,$00000000
.dc.l $3FFE0000,$B5BCC490,$59ECC4B0,$00000000
.dc.l $3FFE0000,$BA44BC7D,$D470782F,$00000000
.dc.l $3FFE0000,$BEA94144,$FD049AAC,$00000000
.dc.l $3FFE0000,$C2EB4ABB,$661628B6,$00000000
.dc.l $3FFE0000,$C70BD54C,$E602EE14,$00000000
.dc.l $3FFE0000,$CD000549,$ADEC7159,$00000000
.dc.l $3FFE0000,$D48457D2,$D8EA4EA3,$00000000
.dc.l $3FFE0000,$DB948DA7,$12DECE3B,$00000000
.dc.l $3FFE0000,$E23855F9,$69E8096A,$00000000
.dc.l $3FFE0000,$E8771129,$C4353259,$00000000
.dc.l $3FFE0000,$EE57C16E,$0D379C0D,$00000000
.dc.l $3FFE0000,$F3E10211,$A87C3779,$00000000
.dc.l $3FFE0000,$F919039D,$758B8D41,$00000000
.dc.l $3FFE0000,$FE058B8F,$64935FB3,$00000000
.dc.l $3FFF0000,$8155FB49,$7B685D04,$00000000
.dc.l $3FFF0000,$83889E35,$49D108E1,$00000000
.dc.l $3FFF0000,$859CFA76,$511D724B,$00000000
.dc.l $3FFF0000,$87952ECF,$FF8131E7,$00000000
.dc.l $3FFF0000,$89732FD1,$9557641B,$00000000
.dc.l $3FFF0000,$8B38CAD1,$01932A35,$00000000
.dc.l $3FFF0000,$8CE7A8D8,$301EE6B5,$00000000
.dc.l $3FFF0000,$8F46A39E,$2EAE5281,$00000000
.dc.l $3FFF0000,$922DA7D7,$91888487,$00000000
.dc.l $3FFF0000,$94D19FCB,$DEDF5241,$00000000
.dc.l $3FFF0000,$973AB944,$19D2A08B,$00000000
.dc.l $3FFF0000,$996FF00E,$08E10B96,$00000000
.dc.l $3FFF0000,$9B773F95,$12321DA7,$00000000
.dc.l $3FFF0000,$9D55CC32,$0F935624,$00000000
.dc.l $3FFF0000,$9F100575,$006CC571,$00000000
.dc.l $3FFF0000,$A0A9C290,$D97CC06C,$00000000
.dc.l $3FFF0000,$A22659EB,$EBC0630A,$00000000
.dc.l $3FFF0000,$A388B4AF,$F6EF0EC9,$00000000
.dc.l $3FFF0000,$A4D35F10,$61D292C4,$00000000
.dc.l $3FFF0000,$A60895DC,$FBE3187E,$00000000
.dc.l $3FFF0000,$A72A51DC,$7367BEAC,$00000000
.dc.l $3FFF0000,$A83A5153,$0956168F,$00000000
.dc.l $3FFF0000,$A93A2007,$7539546E,$00000000
.dc.l $3FFF0000,$AA9E7245,$023B2605,$00000000
.dc.l $3FFF0000,$AC4C84BA,$6FE4D58F,$00000000
.dc.l $3FFF0000,$ADCE4A4A,$606B9712,$00000000
.dc.l $3FFF0000,$AF2A2DCD,$8D263C9C,$00000000
.dc.l $3FFF0000,$B0656F81,$F22265C7,$00000000
.dc.l $3FFF0000,$B1846515,$0F71496A,$00000000
.dc.l $3FFF0000,$B28AAA15,$6F9ADA35,$00000000
.dc.l $3FFF0000,$B37B44FF,$3766B895,$00000000
.dc.l $3FFF0000,$B458C3DC,$E9630433,$00000000
.dc.l $3FFF0000,$B525529D,$562246BD,$00000000
.dc.l $3FFF0000,$B5E2CCA9,$5F9D88CC,$00000000
.dc.l $3FFF0000,$B692CADA,$7ACA1ADA,$00000000
.dc.l $3FFF0000,$B736AEA7,$A6925838,$00000000
.dc.l $3FFF0000,$B7CFAB28,$7E9F7B36,$00000000
.dc.l $3FFF0000,$B85ECC66,$CB219835,$00000000
.dc.l $3FFF0000,$B8E4FD5A,$20A593DA,$00000000
.dc.l $3FFF0000,$B99F41F6,$4AFF9BB5,$00000000
.dc.l $3FFF0000,$BA7F1E17,$842BBE7B,$00000000
.dc.l $3FFF0000,$BB471285,$7637E17D,$00000000
.dc.l $3FFF0000,$BBFABE8A,$4788DF6F,$00000000
.dc.l $3FFF0000,$BC9D0FAD,$2B689D79,$00000000
.dc.l $3FFF0000,$BD306A39,$471ECD86,$00000000
.dc.l $3FFF0000,$BDB6C731,$856AF18A,$00000000
.dc.l $3FFF0000,$BE31CAC5,$02E80D70,$00000000
.dc.l $3FFF0000,$BEA2D55C,$E33194E2,$00000000
.dc.l $3FFF0000,$BF0B10B7,$C03128F0,$00000000
.dc.l $3FFF0000,$BF6B7A18,$DACB778D,$00000000
.dc.l $3FFF0000,$BFC4EA46,$63FA18F6,$00000000
.dc.l $3FFF0000,$C0181BDE,$8B89A454,$00000000
.dc.l $3FFF0000,$C065B066,$CFBF6439,$00000000
.dc.l $3FFF0000,$C0AE345F,$56340AE6,$00000000
.dc.l $3FFF0000,$C0F22291,$9CB9E6A7,$00000000
X set FP_SCR0
XDCARE set X+2
XFRAC set X+4
XFRACLO set X+8
ATANF set FP_SCR1
ATANFHI set ATANF+4
ATANFLO set ATANF+8
global satan
*--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
satan:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
fmove.x fp0,X(a6)
andi.l #$7FFFFFFF,d1
cmpi.l #$3FFB8000,d1 * |X| >= 1/16?
bge.b ATANOK1
bra.w ATANSM
ATANOK1:
cmpi.l #$4002FFFF,d1 * |X| < 16 ?
ble.b ATANMAIN
bra.w ATANBIG
*--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
*--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
*--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
*--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
*--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
*--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
*--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
*--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
*--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
*--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
*--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
*--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
*--WILL INVOLVE A VERY LONG POLYNOMIAL.
*--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
*--WE CHOSE F TO BE +-2^K * 1.BBBB1
*--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
*--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
*--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
*-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
ATANMAIN:
andi.l #$F8000000,XFRAC(a6) * FIRST 5 BITS
ori.l #$04000000,XFRAC(a6) * SET 6-TH BIT TO 1
move.l #$00000000,XFRACLO(a6) * LOCATION OF X IS NOW F
fmove.x fp0,fp1 * FP1 IS X
fmul.x X(a6),fp1 * FP1 IS X*F, NOTE THAT X*F > 0
fsub.x X(a6),fp0 * FP0 IS X-F
fadd.s #$3F800000,fp1 * FP1 IS 1 + X*F
fdiv.x fp1,fp0 * FP0 IS U = (X-F)/(1+X*F)
*--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
*--CREATE ATAN(F) AND STORE IT IN ATANF, AND
*--SAVE REGISTERS FP2.
move.l d2,-(sp) * SAVE d2 TEMPORARILY
move.l d1,d2 * THE EXP AND 16 BITS OF X
andi.l #$00007800,d1 * 4 VARYING BITS OF F'S FRACTION
andi.l #$7FFF0000,d2 * EXPONENT OF F
subi.l #$3FFB0000,d2 * K+4
asr.l #1,d2
add.l d2,d1 * THE 7 BITS IDENTIFYING F
asr.l #7,d1 * INDEX INTO TBL OF ATAN(|F|)
lea ATANTBL(pc),a1
add.l d1,a1 * ADDRESS OF ATAN(|F|)
move.l (a1)+,ATANF(a6)
move.l (a1)+,ATANFHI(a6)
move.l (a1)+,ATANFLO(a6) * ATANF IS NOW ATAN(|F|)
move.l X(a6),d1 * LOAD SIGN AND EXPO. AGAIN
andi.l #$80000000,d1 * SIGN(F)
or.l d1,ATANF(a6) * ATANF IS NOW SIGN(F)*ATAN(|F|)
move.l (sp)+,d2 * RESTORE d2
*--THAT'S ALL I HAVE TO DO FOR NOW,
*--BUT ALAS, THE DIVIDE IS STILL CRANKING!
*--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
*--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
*--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
*--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
*--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
*--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
*--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
fmovem.x fp2,-(sp) * save fp2
fmove.x fp0,fp1
fmul.x fp1,fp1
fmove.d ATANA3(pc),fp2
fadd.x fp1,fp2 * A3+V
fmul.x fp1,fp2 * V*(A3+V)
fmul.x fp0,fp1 * U*V
fadd.d ATANA2(pc),fp2 * A2+V*(A3+V)
fmul.d ATANA1(pc),fp1 * A1*U*V
fmul.x fp2,fp1 * A1*U*V*(A2+V*(A3+V))
fadd.x fp1,fp0 * ATAN(U), FP1 RELEASED
fmovem.x (sp)+,fp2 * restore fp2
fmove.l d0,fpcr * restore users rnd mode,prec
fadd.x ATANF(a6),fp0 * ATAN(X)
bra.l t_inx2
ATANBORS:
*--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
*--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
cmpi.l #$3FFF8000,d1
bgt.w ATANBIG * I.E. |X| >= 16
ATANSM:
*--|X| <= 1/16
*--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
*--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
*--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
*--WHERE Y = X*X, AND Z = Y*Y.
cmpi.l #$3FD78000,d1
blt.w ATANTINY
*--COMPUTE POLYNOMIAL
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmul.x fp0,fp0 * FPO IS Y = X*X
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS Z = Y*Y
fmove.d ATANB6(pc),fp2
fmove.d ATANB5(pc),fp3
fmul.x fp1,fp2 * Z*B6
fmul.x fp1,fp3 * Z*B5
fadd.d ATANB4(pc),fp2 * B4+Z*B6
fadd.d ATANB3(pc),fp3 * B3+Z*B5
fmul.x fp1,fp2 * Z*(B4+Z*B6)
fmul.x fp3,fp1 * Z*(B3+Z*B5)
fadd.d ATANB2(pc),fp2 * B2+Z*(B4+Z*B6)
fadd.d ATANB1(pc),fp1 * B1+Z*(B3+Z*B5)
fmul.x fp0,fp2 * Y*(B2+Z*(B4+Z*B6))
fmul.x X(a6),fp0 * X*Y
fadd.x fp2,fp1 * [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
fmul.x fp1,fp0 * X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
fmovem.x (sp)+,fp2-fp3 * restore fp2/fp3
fmove.l d0,fpcr * restore users rnd mode,prec
fadd.x X(a6),fp0
bra.l t_inx2
ATANTINY:
*--|X| < 2^(-40), ATAN(X) = X
fmove.l d0,fpcr * restore users rnd mode,prec
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x X(a6),fp0 * last inst - possible exception set
bra.l t_catch
ATANBIG:
*--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
*--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
cmpi.l #$40638000,d1
bgt.w ATANHUGE
*--APPROXIMATE ATAN(-1/X) BY
*--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
*--THIS CAN BE RE-WRITTEN AS
*--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmove.s #$BF800000,fp1 * LOAD -1
fdiv.x fp0,fp1 * FP1 IS -1/X
*--DIVIDE IS STILL CRANKING
fmove.x fp1,fp0 * FP0 IS X'
fmul.x fp0,fp0 * FP0 IS Y = X'*X'
fmove.x fp1,X(a6) * X IS REALLY X'
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS Z = Y*Y
fmove.d ATANC5(pc),fp3
fmove.d ATANC4(pc),fp2
fmul.x fp1,fp3 * Z*C5
fmul.x fp1,fp2 * Z*B4
fadd.d ATANC3(pc),fp3 * C3+Z*C5
fadd.d ATANC2(pc),fp2 * C2+Z*C4
fmul.x fp3,fp1 * Z*(C3+Z*C5), FP3 RELEASED
fmul.x fp0,fp2 * Y*(C2+Z*C4)
fadd.d ATANC1(pc),fp1 * C1+Z*(C3+Z*C5)
fmul.x X(a6),fp0 * X'*Y
fadd.x fp2,fp1 * [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
fmul.x fp1,fp0 * X'*Y*([B1+Z*(B3+Z*B5)]
* ... +[Y*(B2+Z*(B4+Z*B6))])
fadd.x X(a6),fp0
fmovem.x (sp)+,fp2-fp3 * restore fp2/fp3
fmove.l d0,fpcr * restore users rnd mode,prec
tst.b (a0)
bpl.b pos_big
neg_big:
fadd.x NPIBY2(pc),fp0
bra.l t_minx2
pos_big:
fadd.x PPIBY2(pc),fp0
bra.l t_pinx2
ATANHUGE:
*--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
tst.b (a0)
bpl.b pos_huge
neg_huge:
fmove.x NPIBY2(pc),fp0
fmove.l d0,fpcr
fadd.x PTINY(pc),fp0
bra.l t_minx2
pos_huge:
fmove.x PPIBY2(pc),fp0
fmove.l d0,fpcr
fadd.x NTINY(pc),fp0
bra.l t_pinx2
global satand
*--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
satand:
bra.l t_extdnrm
*########################################################################
* sasin(): computes the inverse sine of a normalized input #
* sasind(): computes the inverse sine of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = arcsin(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* ASIN #
* 1. If |X| >= 1, go to 3. #
* #
* 2. (|X| < 1) Calculate asin(X) by #
* z := sqrt( [1-X][1+X] ) #
* asin(X) = atan( x / z ). #
* Exit. #
* #
* 3. If |X| > 1, go to 5. #
* #
* 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
* #
* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
* Exit. #
* #
*########################################################################
global sasin
sasin:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1
cmpi.l #$3FFF8000,d1
bge.b ASINBIG
* This catch is added here for the '060 QSP. Originally, the call to
* satan() would handle this case by causing the exception which would
* not be caught until gen_except(). Now, with the exceptions being
* detected inside of satan(), the exception would have been handled there
* instead of inside sasin() as expected.
cmpi.l #$3FD78000,d1
blt.w ASINTINY
*--THIS IS THE USUAL CASE, |X| < 1
*--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
ASINMAIN:
fmove.s #$3F800000,fp1
fsub.x fp0,fp1 * 1-X
fmovem.x fp2,-(sp) * {fp2}
fmove.s #$3F800000,fp2
fadd.x fp0,fp2 * 1+X
fmul.x fp2,fp1 * (1+X)(1-X)
fmovem.x (sp)+,fp2 * {fp2}
fsqrt.x fp1 * SQRT([1-X][1+X])
fdiv.x fp1,fp0 * X/SQRT([1-X][1+X])
fmovem.x fp0,-(sp) * save X/SQRT(...)
lea (sp),a0 * pass ptr to X/SQRT(...)
bsr.l satan
add.l #$c,sp * clear X/SQRT(...) from stack
bra.l t_inx2
ASINBIG:
fabs.x fp0 * |X|
fcmp.s #$3F800000,fp0
fbgt.l t_operr * cause an operr exception
*--|X| = 1, ASIN(X) = +- PI/2.
ASINONE:
fmove.x PIBY2(pc),fp0
move.l (a0),d1
andi.l #$80000000,d1 * SIGN BIT OF X
ori.l #$3F800000,d1 * +-1 IN SGL FORMAT
move.l d1,-(sp) * push SIGN(X) IN SGL-FMT
fmove.l d0,fpcr
fmul.s (sp)+,fp0
bra.l t_inx2
*--|X| < 2^(-40), ATAN(X) = X
ASINTINY:
fmove.l d0,fpcr * restore users rnd mode,prec
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x (a0),fp0 * last inst - possible exception
bra.l t_catch
global sasind
*--ASIN(X) = X FOR DENORMALIZED X
sasind:
bra.l t_extdnrm
*########################################################################
* sacos(): computes the inverse cosine of a normalized input #
* sacosd(): computes the inverse cosine of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = arccos(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* ACOS #
* 1. If |X| >= 1, go to 3. #
* #
* 2. (|X| < 1) Calculate acos(X) by #
* z := (1-X) / (1+X) #
* acos(X) = 2 * atan( sqrt(z) ). #
* Exit. #
* #
* 3. If |X| > 1, go to 5. #
* #
* 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
* #
* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
* Exit. #
* #
*########################################################################
global sacos
sacos:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1 * pack exp w/ upper 16 fraction
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1
cmpi.l #$3FFF8000,d1
bge.b ACOSBIG
*--THIS IS THE USUAL CASE, |X| < 1
*--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
ACOSMAIN:
fmove.s #$3F800000,fp1
fadd.x fp0,fp1 * 1+X
fneg.x fp0 * -X
fadd.s #$3F800000,fp0 * 1-X
fdiv.x fp1,fp0 * (1-X)/(1+X)
fsqrt.x fp0 * SQRT((1-X)/(1+X))
move.l d0,-(sp) * save original users fpcr
clr.l d0
fmovem.x fp0,-(sp) * save SQRT(...) to stack
lea (sp),a0 * pass ptr to sqrt
bsr.l satan * ATAN(SQRT([1-X]/[1+X]))
add.l #$c,sp * clear SQRT(...) from stack
fmove.l (sp)+,fpcr * restore users round prec,mode
fadd.x fp0,fp0 * 2 * ATAN( STUFF )
bra.l t_pinx2
ACOSBIG:
fabs.x fp0
fcmp.s #$3F800000,fp0
fbgt.l t_operr * cause an operr exception
*--|X| = 1, ACOS(X) = 0 OR PI
tst.b (a0) * is X positive or negative?
bpl.b ACOSP1
*--X = -1
*Returns PI and inexact exception
ACOSM1:
fmove.x PI(pc),fp0 * load PI
fmove.l d0,fpcr * load round mode,prec
fadd.s #$00800000,fp0 * add a small value
bra.l t_pinx2
ACOSP1:
bra.l ld_pzero * answer is positive zero
global sacosd
*--ACOS(X) = PI/2 FOR DENORMALIZED X
sacosd:
fmove.l d0,fpcr * load user's rnd mode/prec
fmove.x PIBY2(pc),fp0
bra.l t_pinx2
*########################################################################
* setox(): computes the exponential for a normalized input #
* setoxd(): computes the exponential for a denormalized input #
* setoxm1(): computes the exponential minus 1 for a normalized input #
* setoxm1d(): computes the exponential minus 1 for a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = exp(X) or exp(X)-1 #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 0.85 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM and IMPLEMENTATION **************************************** #
* #
* setoxd #
* ------ #
* Step 1. Set ans := 1.0 #
* #
* Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
* Notes: This will always generate one exception -- inexact. #
* #
* #
* setox #
* ----- #
* #
* Step 1. Filter out extreme cases of input argument. #
* 1.1 If |X| >= 2^(-65), go to Step 1.3. #
* 1.2 Go to Step 7. #
* 1.3 If |X| < 16380 log(2), go to Step 2. #
* 1.4 Go to Step 8. #
* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
* To avoid the use of floating-point comparisons, a #
* compact representation of |X| is used. This format is a #
* 32-bit integer, the upper (more significant) 16 bits #
* are the sign and biased exponent field of |X|; the #
* lower 16 bits are the 16 most significant fraction #
* (including the explicit bit) bits of |X|. Consequently, #
* the comparisons in Steps 1.1 and 1.3 can be performed #
* by integer comparison. Note also that the constant #
* 16380 log(2) used in Step 1.3 is also in the compact #
* form. Thus taking the branch to Step 2 guarantees #
* |X| < 16380 log(2). There is no harm to have a small #
* number of cases where |X| is less than, but close to, #
* 16380 log(2) and the branch to Step 9 is taken. #
* #
* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
* 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
* was taken) #
* 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
* 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
* or 63. #
* 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
* 2.5 Calculate the address of the stored value of #
* 2^(J/64). #
* 2.6 Create the value Scale = 2^M. #
* Notes: The calculation in 2.2 is really performed by #
* Z := X * constant #
* N := round-to-nearest-integer(Z) #
* where #
* constant := single-precision( 64/log 2 ). #
* #
* Using a single-precision constant avoids memory #
* access. Another effect of using a single-precision #
* "constant" is that the calculated value Z is #
* #
* Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
* #
* This error has to be considered later in Steps 3 and 4. #
* #
* Step 3. Calculate X - N*log2/64. #
* 3.1 R := X + N*L1, #
* where L1 := single-precision(-log2/64). #
* 3.2 R := R + N*L2, #
* L2 := extended-precision(-log2/64 - L1).#
* Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
* approximate the value -log2/64 to 88 bits of accuracy. #
* b) N*L1 is exact because N is no longer than 22 bits #
* and L1 is no longer than 24 bits. #
* c) The calculation X+N*L1 is also exact due to #
* cancellation. Thus, R is practically X+N(L1+L2) to full #
* 64 bits. #
* d) It is important to estimate how large can |R| be #
* after Step 3.2. #
* #
* N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
* X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
* X*64/log2 - N = f - eps*X 64/log2 #
* X - N*log2/64 = f*log2/64 - eps*X #
* #
* #
* Now |X| <= 16446 log2, thus #
* #
* |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
* <= 0.57 log2/64. #
* This bound will be used in Step 4. #
* #
* Step 4. Approximate exp(R)-1 by a polynomial #
* p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
* Notes: a) In order to reduce memory access, the coefficients #
* are made as "short" as possible: A1 (which is 1/2), A4 #
* and A5 are single precision; A2 and A3 are double #
* precision. #
* b) Even with the restrictions above, #
* |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
* Note that 0.0062 is slightly bigger than 0.57 log2/64. #
* c) To fully utilize the pipeline, p is separated into #
* two independent pieces of roughly equal complexities #
* p = [ R + R*S*(A2 + S*A4) ] + #
* [ S*(A1 + S*(A3 + S*A5)) ] #
* where S = R*R. #
* #
* Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
* ans := T + ( T*p + t) #
* where T and t are the stored values for 2^(J/64). #
* Notes: 2^(J/64) is stored as T and t where T+t approximates #
* 2^(J/64) to roughly 85 bits; T is in extended precision #
* and t is in single precision. Note also that T is #
* rounded to 62 bits so that the last two bits of T are #
* zero. The reason for such a special form is that T-1, #
* T-2, and T-8 will all be exact --- a property that will #
* give much more accurate computation of the function #
* EXPM1. #
* #
* Step 6. Reconstruction of exp(X) #
* exp(X) = 2^M * 2^(J/64) * exp(R). #
* 6.1 If AdjFlag = 0, go to 6.3 #
* 6.2 ans := ans * AdjScale #
* 6.3 Restore the user FPCR #
* 6.4 Return ans := ans * Scale. Exit. #
* Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
* |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
* neither overflow nor underflow. If AdjFlag = 1, that #
* means that #
* X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
* Hence, exp(X) may overflow or underflow or neither. #
* When that is the case, AdjScale = 2^(M1) where M1 is #
* approximately M. Thus 6.2 will never cause #
* over/underflow. Possible exception in 6.4 is overflow #
* or underflow. The inexact exception is not generated in #
* 6.4. Although one can argue that the inexact flag #
* should always be raised, to simulate that exception #
* cost to much than the flag is worth in practical uses. #
* #
* Step 7. Return 1 + X. #
* 7.1 ans := X #
* 7.2 Restore user FPCR. #
* 7.3 Return ans := 1 + ans. Exit #
* Notes: For non-zero X, the inexact exception will always be #
* raised by 7.3. That is the only exception raised by 7.3.#
* Note also that we use the FMOVEM instruction to move X #
* in Step 7.1 to avoid unnecessary trapping. (Although #
* the FMOVEM may not seem relevant since X is normalized, #
* the precaution will be useful in the library version of #
* this code where the separate entry for denormalized #
* inputs will be done away with.) #
* #
* Step 8. Handle exp(X) where |X| >= 16380log2. #
* 8.1 If |X| > 16480 log2, go to Step 9. #
* (mimic 2.2 - 2.6) #
* 8.2 N := round-to-integer( X * 64/log2 ) #
* 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
* 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
* AdjFlag := 1. #
* 8.5 Calculate the address of the stored value #
* 2^(J/64). #
* 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
* 8.7 Go to Step 3. #
* Notes: Refer to notes for 2.2 - 2.6. #
* #
* Step 9. Handle exp(X), |X| > 16480 log2. #
* 9.1 If X < 0, go to 9.3 #
* 9.2 ans := Huge, go to 9.4 #
* 9.3 ans := Tiny. #
* 9.4 Restore user FPCR. #
* 9.5 Return ans := ans * ans. Exit. #
* Notes: Exp(X) will surely overflow or underflow, depending on #
* X's sign. "Huge" and "Tiny" are respectively large/tiny #
* extended-precision numbers whose square over/underflow #
* with an inexact result. Thus, 9.5 always raises the #
* inexact together with either overflow or underflow. #
* #
* setoxm1d #
* -------- #
* #
* Step 1. Set ans := 0 #
* #
* Step 2. Return ans := X + ans. Exit. #
* Notes: This will return X with the appropriate rounding #
* precision prescribed by the user FPCR. #
* #
* setoxm1 #
* ------- #
* #
* Step 1. Check |X| #
* 1.1 If |X| >= 1/4, go to Step 1.3. #
* 1.2 Go to Step 7. #
* 1.3 If |X| < 70 log(2), go to Step 2. #
* 1.4 Go to Step 10. #
* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
* However, it is conceivable |X| can be small very often #
* because EXPM1 is intended to evaluate exp(X)-1 #
* accurately when |X| is small. For further details on #
* the comparisons, see the notes on Step 1 of setox. #
* #
* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
* 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
* 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
* or 63. #
* 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
* 2.4 Calculate the address of the stored value of #
* 2^(J/64). #
* 2.5 Create the values Sc = 2^M and #
* OnebySc := -2^(-M). #
* Notes: See the notes on Step 2 of setox. #
* #
* Step 3. Calculate X - N*log2/64. #
* 3.1 R := X + N*L1, #
* where L1 := single-precision(-log2/64). #
* 3.2 R := R + N*L2, #
* L2 := extended-precision(-log2/64 - L1).#
* Notes: Applying the analysis of Step 3 of setox in this case #
* shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
* this case). #
* #
* Step 4. Approximate exp(R)-1 by a polynomial #
* p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
* Notes: a) In order to reduce memory access, the coefficients #
* are made as "short" as possible: A1 (which is 1/2), A5 #
* and A6 are single precision; A2, A3 and A4 are double #
* precision. #
* b) Even with the restriction above, #
* |p - (exp(R)-1)| < |R| * 2^(-72.7) #
* for all |R| <= 0.0055. #
* c) To fully utilize the pipeline, p is separated into #
* two independent pieces of roughly equal complexity #
* p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
* [ R + S*(A1 + S*(A3 + S*A5)) ] #
* where S = R*R. #
* #
* Step 5. Compute 2^(J/64)*p by #
* p := T*p #
* where T and t are the stored values for 2^(J/64). #
* Notes: 2^(J/64) is stored as T and t where T+t approximates #
* 2^(J/64) to roughly 85 bits; T is in extended precision #
* and t is in single precision. Note also that T is #
* rounded to 62 bits so that the last two bits of T are #
* zero. The reason for such a special form is that T-1, #
* T-2, and T-8 will all be exact --- a property that will #
* be exploited in Step 6 below. The total relative error #
* in p is no bigger than 2^(-67.7) compared to the final #
* result. #
* #
* Step 6. Reconstruction of exp(X)-1 #
* exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
* 6.1 If M <= 63, go to Step 6.3. #
* 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
* 6.3 If M >= -3, go to 6.5. #
* 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
* 6.5 ans := (T + OnebySc) + (p + t). #
* 6.6 Restore user FPCR. #
* 6.7 Return ans := Sc * ans. Exit. #
* Notes: The various arrangements of the expressions give #
* accurate evaluations. #
* #
* Step 7. exp(X)-1 for |X| < 1/4. #
* 7.1 If |X| >= 2^(-65), go to Step 9. #
* 7.2 Go to Step 8. #
* #
* Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
* 8.1 If |X| < 2^(-16312), goto 8.3 #
* 8.2 Restore FPCR; return ans := X - 2^(-16382). #
* Exit. #
* 8.3 X := X * 2^(140). #
* 8.4 Restore FPCR; ans := ans - 2^(-16382). #
* Return ans := ans*2^(140). Exit #
* Notes: The idea is to return "X - tiny" under the user #
* precision and rounding modes. To avoid unnecessary #
* inefficiency, we stay away from denormalized numbers #
* the best we can. For |X| >= 2^(-16312), the #
* straightforward 8.2 generates the inexact exception as #
* the case warrants. #
* #
* Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
* p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
* Notes: a) In order to reduce memory access, the coefficients #
* are made as "short" as possible: B1 (which is 1/2), B9 #
* to B12 are single precision; B3 to B8 are double #
* precision; and B2 is double extended. #
* b) Even with the restriction above, #
* |p - (exp(X)-1)| < |X| 2^(-70.6) #
* for all |X| <= 0.251. #
* Note that 0.251 is slightly bigger than 1/4. #
* c) To fully preserve accuracy, the polynomial is #
* computed as #
* X + ( S*B1 + Q ) where S = X*X and #
* Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
* d) To fully utilize the pipeline, Q is separated into #
* two independent pieces of roughly equal complexity #
* Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
* [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
* #
* Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
* 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
* practical purposes. Therefore, go to Step 1 of setox. #
* 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
* purposes. #
* ans := -1 #
* Restore user FPCR #
* Return ans := ans + 2^(-126). Exit. #
* Notes: 10.2 will always create an inexact and return -1 + tiny #
* in the user rounding precision and mode. #
* #
*########################################################################
L2: .dc.l $3FDC0000,$82E30865,$4361C4C6,$00000000
EEXPA3: .dc.l $3FA55555,$55554CC1
EEXPA2: .dc.l $3FC55555,$55554A54
EM1A4: .dc.l $3F811111,$11174385
EM1A3: .dc.l $3FA55555,$55554F5A
EM1A2: .dc.l $3FC55555,$55555555,$00000000,$00000000
EM1B8: .dc.l $3EC71DE3,$A5774682
EM1B7: .dc.l $3EFA01A0,$19D7CB68
EM1B6: .dc.l $3F2A01A0,$1A019DF3
EM1B5: .dc.l $3F56C16C,$16C170E2
EM1B4: .dc.l $3F811111,$11111111
EM1B3: .dc.l $3FA55555,$55555555
EM1B2: .dc.l $3FFC0000,$AAAAAAAA,$AAAAAAAB
.dc.l $00000000
TWO140: .dc.l $48B00000,$00000000
TWON140:
.dc.l $37300000,$00000000
EEXPTBL:
.dc.l $3FFF0000,$80000000,$00000000,$00000000
.dc.l $3FFF0000,$8164D1F3,$BC030774,$9F841A9B
.dc.l $3FFF0000,$82CD8698,$AC2BA1D8,$9FC1D5B9
.dc.l $3FFF0000,$843A28C3,$ACDE4048,$A0728369
.dc.l $3FFF0000,$85AAC367,$CC487B14,$1FC5C95C
.dc.l $3FFF0000,$871F6196,$9E8D1010,$1EE85C9F
.dc.l $3FFF0000,$88980E80,$92DA8528,$9FA20729
.dc.l $3FFF0000,$8A14D575,$496EFD9C,$A07BF9AF
.dc.l $3FFF0000,$8B95C1E3,$EA8BD6E8,$A0020DCF
.dc.l $3FFF0000,$8D1ADF5B,$7E5BA9E4,$205A63DA
.dc.l $3FFF0000,$8EA4398B,$45CD53C0,$1EB70051
.dc.l $3FFF0000,$9031DC43,$1466B1DC,$1F6EB029
.dc.l $3FFF0000,$91C3D373,$AB11C338,$A0781494
.dc.l $3FFF0000,$935A2B2F,$13E6E92C,$9EB319B0
.dc.l $3FFF0000,$94F4EFA8,$FEF70960,$2017457D
.dc.l $3FFF0000,$96942D37,$20185A00,$1F11D537
.dc.l $3FFF0000,$9837F051,$8DB8A970,$9FB952DD
.dc.l $3FFF0000,$99E04593,$20B7FA64,$1FE43087
.dc.l $3FFF0000,$9B8D39B9,$D54E5538,$1FA2A818
.dc.l $3FFF0000,$9D3ED9A7,$2CFFB750,$1FDE494D
.dc.l $3FFF0000,$9EF53260,$91A111AC,$20504890
.dc.l $3FFF0000,$A0B0510F,$B9714FC4,$A073691C
.dc.l $3FFF0000,$A2704303,$0C496818,$1F9B7A05
.dc.l $3FFF0000,$A43515AE,$09E680A0,$A0797126
.dc.l $3FFF0000,$A5FED6A9,$B15138EC,$A071A140
.dc.l $3FFF0000,$A7CD93B4,$E9653568,$204F62DA
.dc.l $3FFF0000,$A9A15AB4,$EA7C0EF8,$1F283C4A
.dc.l $3FFF0000,$AB7A39B5,$A93ED338,$9F9A7FDC
.dc.l $3FFF0000,$AD583EEA,$42A14AC8,$A05B3FAC
.dc.l $3FFF0000,$AF3B78AD,$690A4374,$1FDF2610
.dc.l $3FFF0000,$B123F581,$D2AC2590,$9F705F90
.dc.l $3FFF0000,$B311C412,$A9112488,$201F678A
.dc.l $3FFF0000,$B504F333,$F9DE6484,$1F32FB13
.dc.l $3FFF0000,$B6FD91E3,$28D17790,$20038B30
.dc.l $3FFF0000,$B8FBAF47,$62FB9EE8,$200DC3CC
.dc.l $3FFF0000,$BAFF5AB2,$133E45FC,$9F8B2AE6
.dc.l $3FFF0000,$BD08A39F,$580C36C0,$A02BBF70
.dc.l $3FFF0000,$BF1799B6,$7A731084,$A00BF518
.dc.l $3FFF0000,$C12C4CCA,$66709458,$A041DD41
.dc.l $3FFF0000,$C346CCDA,$24976408,$9FDF137B
.dc.l $3FFF0000,$C5672A11,$5506DADC,$201F1568
.dc.l $3FFF0000,$C78D74C8,$ABB9B15C,$1FC13A2E
.dc.l $3FFF0000,$C9B9BD86,$6E2F27A4,$A03F8F03
.dc.l $3FFF0000,$CBEC14FE,$F2727C5C,$1FF4907D
.dc.l $3FFF0000,$CE248C15,$1F8480E4,$9E6E53E4
.dc.l $3FFF0000,$D06333DA,$EF2B2594,$1FD6D45C
.dc.l $3FFF0000,$D2A81D91,$F12AE45C,$A076EDB9
.dc.l $3FFF0000,$D4F35AAB,$CFEDFA20,$9FA6DE21
.dc.l $3FFF0000,$D744FCCA,$D69D6AF4,$1EE69A2F
.dc.l $3FFF0000,$D99D15C2,$78AFD7B4,$207F439F
.dc.l $3FFF0000,$DBFBB797,$DAF23754,$201EC207
.dc.l $3FFF0000,$DE60F482,$5E0E9124,$9E8BE175
.dc.l $3FFF0000,$E0CCDEEC,$2A94E110,$20032C4B
.dc.l $3FFF0000,$E33F8972,$BE8A5A50,$2004DFF5
.dc.l $3FFF0000,$E5B906E7,$7C8348A8,$1E72F47A
.dc.l $3FFF0000,$E8396A50,$3C4BDC68,$1F722F22
.dc.l $3FFF0000,$EAC0C6E7,$DD243930,$A017E945
.dc.l $3FFF0000,$ED4F301E,$D9942B84,$1F401A5B
.dc.l $3FFF0000,$EFE4B99B,$DCDAF5CC,$9FB9A9E3
.dc.l $3FFF0000,$F281773C,$59FFB138,$20744C05
.dc.l $3FFF0000,$F5257D15,$2486CC2C,$1F773A19
.dc.l $3FFF0000,$F7D0DF73,$0AD13BB8,$1FFE90D5
.dc.l $3FFF0000,$FA83B2DB,$722A033C,$A041ED22
.dc.l $3FFF0000,$FD3E0C0C,$F486C174,$1F853F3A
ADJFLAG set L_SCR2
SCALE set FP_SCR0
ADJSCALE set FP_SCR1
SC set FP_SCR0
ONEBYSC set FP_SCR1
global setox
setox:
*--entry point for EXP(X), here X is finite, non-zero, and not NaN's
*--Step 1.
move.l (a0),d1 * load part of input X
andi.l #$7FFF0000,d1 * biased expo. of X
cmpi.l #$3FBE0000,d1 * 2^(-65)
bge.b EXPC1 * normal case
bra.l EXPSM
EXPC1:
*--The case |X| >= 2^(-65)
move.w 4(a0),d1 * expo. and partial sig. of |X|
cmpi.l #$400CB167,d1 * 16380 log2 trunc. 16 bits
blt.b EXPMAIN * normal case
bra.l EEXPBIG
EXPMAIN:
*--Step 2.
*--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
fmove.x (a0),fp0 * load input from (a0)
fmove.x fp0,fp1
fmul.s #$42B8AA3B,fp0 * 64/log2 * X
fmovem.x fp2-fp3,-(sp) * save fp2 {%fp2/%fp3}
move.l #0,ADJFLAG(a6)
fmove.l fp0,d1 * N = int( X * 64/log2 )
lea EEXPTBL(pc),a1
fmove.l d1,fp0 * convert to floating-format
move.l d1,L_SCR1(a6) * save N temporarily
andi.l #$3F,d1 * D0 is J = N mod 64
lsl.l #4,d1
add.l d1,a1 * address of 2^(J/64)
move.l L_SCR1(a6),d1
asr.l #6,d1 * D0 is M
addi.w #$3FFF,d1 * biased expo. of 2^(M)
move.w L2(pc),L_SCR1(a6) * prefetch L2, no need in CB
EXPCONT1:
*--Step 3.
*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
*--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
fmove.x fp0,fp2
fmul.s #$BC317218,fp0 * N * L1, L1 = lead(-log2/64)
fmul.x L2(pc),fp2 * N * L2, L1+L2 = -log2/64
fadd.x fp1,fp0 * X + N*L1
fadd.x fp2,fp0 * fp0 is R, reduced arg.
*--Step 4.
*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
*--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
fmove.x fp0,fp1
fmul.x fp1,fp1 * fp1 IS S = R*R
fmove.s #$3AB60B70,fp2 * fp2 IS A5
fmul.x fp1,fp2 * fp2 IS S*A5
fmove.x fp1,fp3
fmul.s #$3C088895,fp3 * fp3 IS S*A4
fadd.d EEXPA3(pc),fp2 * fp2 IS A3+S*A5
fadd.d EEXPA2(pc),fp3 * fp3 IS A2+S*A4
fmul.x fp1,fp2 * fp2 IS S*(A3+S*A5)
move.w d1,SCALE(a6) * SCALE is 2^(M) in extended
move.l #$80000000,SCALE+4(a6)
clr.l SCALE+8(a6)
fmul.x fp1,fp3 * fp3 IS S*(A2+S*A4)
fadd.s #$3F000000,fp2 * fp2 IS A1+S*(A3+S*A5)
fmul.x fp0,fp3 * fp3 IS R*S*(A2+S*A4)
fmul.x fp1,fp2 * fp2 IS S*(A1+S*(A3+S*A5))
fadd.x fp3,fp0 * fp0 IS R+R*S*(A2+S*A4),
fmove.x (a1)+,fp1 * fp1 is lead. pt. of 2^(J/64)
fadd.x fp2,fp0 * fp0 is EXP(R) - 1
*--Step 5
*--final reconstruction process
*--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
fmul.x fp1,fp0 * 2^(J/64)*(Exp(R)-1)
fmovem.x (sp)+,fp2-fp3 * fp2 restored {%fp2/%fp3}
fadd.s (a1),fp0 * accurate 2^(J/64)
fadd.x fp1,fp0 * 2^(J/64) + 2^(J/64)*...
move.l ADJFLAG(a6),d1
*--Step 6
tst.l d1
beq.b NORMAL
ADJUST:
fmul.x ADJSCALE(a6),fp0
NORMAL:
fmove.l d0,fpcr * restore user FPCR
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x SCALE(a6),fp0 * multiply 2^(M)
bra.l t_catch
EXPSM:
*--Step 7
fmovem.x (a0),fp0 * load X
fmove.l d0,fpcr
fadd.s #$3F800000,fp0 * 1+X in user mode
bra.l t_pinx2
EEXPBIG:
*--Step 8
cmpi.l #$400CB27C,d1 * 16480 log2
bgt.b EXP2BIG
*--Steps 8.2 -- 8.6
fmove.x (a0),fp0 * load input from (a0)
fmove.x fp0,fp1
fmul.s #$42B8AA3B,fp0 * 64/log2 * X
fmovem.x fp2-fp3,-(sp) * save fp2 {%fp2/%fp3}
move.l #1,ADJFLAG(a6)
fmove.l fp0,d1 * N = int( X * 64/log2 )
lea EEXPTBL(pc),a1
fmove.l d1,fp0 * convert to floating-format
move.l d1,L_SCR1(a6) * save N temporarily
andi.l #$3F,d1 * D0 is J = N mod 64
lsl.l #4,d1
add.l d1,a1 * address of 2^(J/64)
move.l L_SCR1(a6),d1
asr.l #6,d1 * D0 is K
move.l d1,L_SCR1(a6) * save K temporarily
asr.l #1,d1 * D0 is M1
sub.l d1,L_SCR1(a6) * a1 is M
addi.w #$3FFF,d1 * biased expo. of 2^(M1)
move.w d1,ADJSCALE(a6) * ADJSCALE := 2^(M1)
move.l #$80000000,ADJSCALE+4(a6)
clr.l ADJSCALE+8(a6)
move.l L_SCR1(a6),d1 * D0 is M
addi.w #$3FFF,d1 * biased expo. of 2^(M)
bra.w EXPCONT1 * go back to Step 3
EXP2BIG:
*--Step 9
tst.b (a0) * is X positive or negative?
bmi.l t_unfl2
bra.l t_ovfl2
global setoxd
setoxd:
*--entry point for EXP(X), X is denormalized
move.l (a0),-(sp)
andi.l #$80000000,(sp)
ori.l #$00800000,(sp) * sign(X)*2^(-126)
fmove.s #$3F800000,fp0
fmove.l d0,fpcr
fadd.s (sp)+,fp0
bra.l t_pinx2
global setoxm1
setoxm1:
*--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
*--Step 1.
*--Step 1.1
move.l (a0),d1 * load part of input X
andi.l #$7FFF0000,d1 * biased expo. of X
cmpi.l #$3FFD0000,d1 * 1/4
bge.b EM1CON1 * |X| >= 1/4
bra.l EM1SM
EM1CON1:
*--Step 1.3
*--The case |X| >= 1/4
move.w 4(a0),d1 * expo. and partial sig. of |X|
cmpi.l #$4004C215,d1 * 70log2 rounded up to 16 bits
ble.b EM1MAIN * 1/4 <= |X| <= 70log2
bra.l EM1BIG
EM1MAIN:
*--Step 2.
*--This is the case: 1/4 <= |X| <= 70 log2.
fmove.x (a0),fp0 * load input from (a0)
fmove.x fp0,fp1
fmul.s #$42B8AA3B,fp0 * 64/log2 * X
fmovem.x fp2-fp3,-(sp) * save fp2 {%fp2/%fp3}
fmove.l fp0,d1 * N = int( X * 64/log2 )
lea EEXPTBL(pc),a1
fmove.l d1,fp0 * convert to floating-format
move.l d1,L_SCR1(a6) * save N temporarily
andi.l #$3F,d1 * D0 is J = N mod 64
lsl.l #4,d1
add.l d1,a1 * address of 2^(J/64)
move.l L_SCR1(a6),d1
asr.l #6,d1 * D0 is M
move.l d1,L_SCR1(a6) * save a copy of M
*--Step 3.
*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
*--a0 points to 2^(J/64), D0 and a1 both contain M
fmove.x fp0,fp2
fmul.s #$BC317218,fp0 * N * L1, L1 = lead(-log2/64)
fmul.x L2(pc),fp2 * N * L2, L1+L2 = -log2/64
fadd.x fp1,fp0 * X + N*L1
fadd.x fp2,fp0 * fp0 is R, reduced arg.
addi.w #$3FFF,d1 * D0 is biased expo. of 2^M
*--Step 4.
*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
*--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
fmove.x fp0,fp1
fmul.x fp1,fp1 * fp1 IS S = R*R
fmove.s #$3950097B,fp2 * fp2 IS a6
fmul.x fp1,fp2 * fp2 IS S*A6
fmove.x fp1,fp3
fmul.s #$3AB60B6A,fp3 * fp3 IS S*A5
fadd.d EM1A4(pc),fp2 * fp2 IS A4+S*A6
fadd.d EM1A3(pc),fp3 * fp3 IS A3+S*A5
move.w d1,SC(a6) * SC is 2^(M) in extended
move.l #$80000000,SC+4(a6)
clr.l SC+8(a6)
fmul.x fp1,fp2 * fp2 IS S*(A4+S*A6)
move.l L_SCR1(a6),d1 * D0 is M
neg.w d1 * D0 is -M
fmul.x fp1,fp3 * fp3 IS S*(A3+S*A5)
addi.w #$3FFF,d1 * biased expo. of 2^(-M)
fadd.d EM1A2(pc),fp2 * fp2 IS A2+S*(A4+S*A6)
fadd.s #$3F000000,fp3 * fp3 IS A1+S*(A3+S*A5)
fmul.x fp1,fp2 * fp2 IS S*(A2+S*(A4+S*A6))
ori.w #$8000,d1 * signed/expo. of -2^(-M)
move.w d1,ONEBYSC(a6) * OnebySc is -2^(-M)
move.l #$80000000,ONEBYSC+4(a6)
clr.l ONEBYSC+8(a6)
fmul.x fp3,fp1 * fp1 IS S*(A1+S*(A3+S*A5))
fmul.x fp0,fp2 * fp2 IS R*S*(A2+S*(A4+S*A6))
fadd.x fp1,fp0 * fp0 IS R+S*(A1+S*(A3+S*A5))
fadd.x fp2,fp0 * fp0 IS EXP(R)-1
fmovem.x (sp)+,fp2-fp3 * fp2 restored {%fp2/%fp3}
*--Step 5
*--Compute 2^(J/64)*p
fmul.x (a1),fp0 * 2^(J/64)*(Exp(R)-1)
*--Step 6
*--Step 6.1
move.l L_SCR1(a6),d1 * retrieve M
cmpi.l #63,d1
ble.b MLE63
*--Step 6.2 M >= 64
fmove.s 12(a1),fp1 * fp1 is t
fadd.x ONEBYSC(a6),fp1 * fp1 is t+OnebySc
fadd.x fp1,fp0 * p+(t+OnebySc), fp1 released
fadd.x (a1),fp0 * T+(p+(t+OnebySc))
bra.l EM1SCALE
MLE63:
*--Step 6.3 M <= 63
cmpi.l #-3,d1
bge.b MGEN3
MLTN3:
*--Step 6.4 M <= -4
fadd.s 12(a1),fp0 * p+t
fadd.x (a1),fp0 * T+(p+t)
fadd.x ONEBYSC(a6),fp0 * OnebySc + (T+(p+t))
bra.l EM1SCALE
MGEN3:
*--Step 6.5 -3 <= M <= 63
fmove.x (a1)+,fp1 * fp1 is T
fadd.s (a1),fp0 * fp0 is p+t
fadd.x ONEBYSC(a6),fp1 * fp1 is T+OnebySc
fadd.x fp1,fp0 * (T+OnebySc)+(p+t)
EM1SCALE:
*--Step 6.6
fmove.l d0,fpcr
fmul.x SC(a6),fp0
bra.l t_inx2
EM1SM:
*--Step 7 |X| < 1/4.
cmpi.l #$3FBE0000,d1 * 2^(-65)
bge.b EM1POLY
EM1TINY:
*--Step 8 |X| < 2^(-65)
cmpi.l #$00330000,d1 * 2^(-16312)
blt.b EM12TINY
*--Step 8.2
move.l #$80010000,SC(a6) * SC is -2^(-16382)
move.l #$80000000,SC+4(a6)
clr.l SC+8(a6)
fmove.x (a0),fp0
fmove.l d0,fpcr
move.b #FADD_OP,d1 * last inst is ADD
fadd.x SC(a6),fp0
bra.l t_catch
EM12TINY:
*--Step 8.3
fmove.x (a0),fp0
fmul.d TWO140(pc),fp0
move.l #$80010000,SC(a6)
move.l #$80000000,SC+4(a6)
clr.l SC+8(a6)
fadd.x SC(a6),fp0
fmove.l d0,fpcr
move.b #FMUL_OP,d1 * last inst is MUL
fmul.d TWON140(pc),fp0
bra.l t_catch
EM1POLY:
*--Step 9 exp(X)-1 by a simple polynomial
fmove.x (a0),fp0 * fp0 is X
fmul.x fp0,fp0 * fp0 is S := X*X
fmovem.x fp2-fp3,-(sp) * save fp2 {%fp2/%fp3}
fmove.s #$2F30CAA8,fp1 * fp1 is B12
fmul.x fp0,fp1 * fp1 is S*B12
fmove.s #$310F8290,fp2 * fp2 is B11
fadd.s #$32D73220,fp1 * fp1 is B10+S*B12
fmul.x fp0,fp2 * fp2 is S*B11
fmul.x fp0,fp1 * fp1 is S*(B10 + ...
fadd.s #$3493F281,fp2 * fp2 is B9+S*...
fadd.d EM1B8(pc),fp1 * fp1 is B8+S*...
fmul.x fp0,fp2 * fp2 is S*(B9+...
fmul.x fp0,fp1 * fp1 is S*(B8+...
fadd.d EM1B7(pc),fp2 * fp2 is B7+S*...
fadd.d EM1B6(pc),fp1 * fp1 is B6+S*...
fmul.x fp0,fp2 * fp2 is S*(B7+...
fmul.x fp0,fp1 * fp1 is S*(B6+...
fadd.d EM1B5(pc),fp2 * fp2 is B5+S*...
fadd.d EM1B4(pc),fp1 * fp1 is B4+S*...
fmul.x fp0,fp2 * fp2 is S*(B5+...
fmul.x fp0,fp1 * fp1 is S*(B4+...
fadd.d EM1B3(pc),fp2 * fp2 is B3+S*...
fadd.x EM1B2(pc),fp1 * fp1 is B2+S*...
fmul.x fp0,fp2 * fp2 is S*(B3+...
fmul.x fp0,fp1 * fp1 is S*(B2+...
fmul.x fp0,fp2 * fp2 is S*S*(B3+...)
fmul.x (a0),fp1 * fp1 is X*S*(B2...
fmul.s #$3F000000,fp0 * fp0 is S*B1
fadd.x fp2,fp1 * fp1 is Q
fmovem.x (sp)+,fp2-fp3 * fp2 restored {%fp2/%fp3}
fadd.x fp1,fp0 * fp0 is S*B1+Q
fmove.l d0,fpcr
fadd.x (a0),fp0
bra.l t_inx2
EM1BIG:
*--Step 10 |X| > 70 log2
move.l (a0),d1
cmpi.l #0,d1
bgt.w EXPC1
*--Step 10.2
fmove.s #$BF800000,fp0 * fp0 is -1
fmove.l d0,fpcr
fadd.s #$00800000,fp0 * -1 + 2^(-126)
bra.l t_minx2
global setoxm1d
setoxm1d:
*--entry point for EXPM1(X), here X is denormalized
*--Step 0.
bra.l t_extdnrm
*########################################################################
* sgetexp(): returns the exponent portion of the input argument. #
* The exponent bias is removed and the exponent value is #
* returned as an extended precision number in fp0. #
* sgetexpd(): handles denormalized numbers. #
* #
* sgetman(): extracts the mantissa of the input argument. The #
* mantissa is converted to an extended precision number w/ #
* an exponent of $3fff and is returned in fp0. The range of #
* the result is [1.0 - 2.0). #
* sgetmand(): handles denormalized numbers. #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* #
* OUTPUT ************************************************************** #
* fp0 = exponent(X) or mantissa(X) #
* #
*########################################################################
global sgetexp
sgetexp:
move.w SRC_EX.w(a0),d0 * get the exponent
bclr #$f,d0 * clear the sign bit
subi.w #$3fff,d0 * subtract off the bias
fmove.w d0,fp0 * return exp in fp0
blt.b sgetexpn * it's negative
rts
sgetexpn:
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
global sgetexpd
sgetexpd:
bsr.l norm * normalize
neg.w d0 * new exp = -(shft amt)
subi.w #$3fff,d0 * subtract off the bias
fmove.w d0,fp0 * return exp in fp0
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
global sgetman
sgetman:
move.w SRC_EX.w(a0),d0 * get the exp
ori.w #$7fff,d0 * clear old exp
bclr #$e,d0 * make it the new exp +-3fff
* here, we build the result in a tmp location so as not to disturb the input
move.l SRC_HI(a0),FP_SCR0_HI(a6) * copy to tmp loc
move.l SRC_LO(a0),FP_SCR0_LO(a6) * copy to tmp loc
move.w d0,FP_SCR0_EX(a6) * insert new exponent
fmove.x FP_SCR0(a6),fp0 * put new value back in fp0
bmi.b sgetmann * it's negative
rts
sgetmann:
move.b #neg_bmask,FPSR_CC(a6) * set 'N' ccode bit
rts
*
* For denormalized numbers, shift the mantissa until the j-bit = 1,
* then load the exponent with +/1 $3fff.
*
global sgetmand
sgetmand:
bsr.l norm * normalize exponent
bra.b sgetman
*########################################################################
* scosh(): computes the hyperbolic cosine of a normalized input #
* scoshd(): computes the hyperbolic cosine of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = cosh(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* COSH #
* 1. If |X| > 16380 log2, go to 3. #
* #
* 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
* y = |X|, z = exp(Y), and #
* cosh(X) = (1/2)*( z + 1/z ). #
* Exit. #
* #
* 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
* #
* 4. (16380 log2 < |X| <= 16480 log2) #
* cosh(X) = sign(X) * exp(|X|)/2. #
* However, invoking exp(|X|) may cause premature #
* overflow. Thus, we calculate sinh(X) as follows: #
* Y := |X| #
* Fact := 2**(16380) #
* Y' := Y - 16381 log2 #
* cosh(X) := Fact * exp(Y'). #
* Exit. #
* #
* 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
* Huge*Huge to generate overflow and an infinity with #
* the appropriate sign. Huge is the largest finite number #
* in extended format. Exit. #
* #
*########################################################################
TWO16380:
.dc.l $7FFB0000,$80000000,$00000000,$00000000
global scosh
scosh:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1
cmpi.l #$400CB167,d1
bgt.b COSHBIG
*--THIS IS THE USUAL CASE, |X| < 16380 LOG2
*--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
fabs.x fp0 * |X|
move.l d0,-(sp)
clr.l d0
fmovem.x fp0,-(sp) * save |X| to stack
lea (sp),a0 * pass ptr to |X|
bsr.l setox * FP0 IS EXP(|X|)
add.l #$c,sp * erase |X| from stack
fmul.s #$3F000000,fp0 * (1/2)EXP(|X|)
move.l (sp)+,d0
fmove.s #$3E800000,fp1 * (1/4)
fdiv.x fp0,fp1 * 1/(2 EXP(|X|))
fmove.l d0,fpcr
move.b #FADD_OP,d1 * last inst is ADD
fadd.x fp1,fp0
bra.l t_catch
COSHBIG:
cmpi.l #$400CB2B3,d1
bgt.b COSHHUGE
fabs.x fp0
fsub.d T1(pc),fp0 * (|X|-16381LOG2_LEAD)
fsub.d T2(pc),fp0 * |X| - 16381 LOG2, ACCURATE
move.l d0,-(sp)
clr.l d0
fmovem.x fp0,-(sp) * save fp0 to stack
lea (sp),a0 * pass ptr to fp0
bsr.l setox
add.l #$c,sp * clear fp0 from stack
move.l (sp)+,d0
fmove.l d0,fpcr
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x TWO16380(pc),fp0
bra.l t_catch
COSHHUGE:
bra.l t_ovfl2
global scoshd
*--COSH(X) = 1 FOR DENORMALIZED X
scoshd:
fmove.s #$3F800000,fp0
fmove.l d0,fpcr
fadd.s #$00800000,fp0
bra.l t_pinx2
*########################################################################
* ssinh(): computes the hyperbolic sine of a normalized input #
* ssinhd(): computes the hyperbolic sine of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = sinh(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* SINH #
* 1. If |X| > 16380 log2, go to 3. #
* #
* 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
* y = |X|, sgn = sign(X), and z = expm1(Y), #
* sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
* Exit. #
* #
* 3. If |X| > 16480 log2, go to 5. #
* #
* 4. (16380 log2 < |X| <= 16480 log2) #
* sinh(X) = sign(X) * exp(|X|)/2. #
* However, invoking exp(|X|) may cause premature overflow. #
* Thus, we calculate sinh(X) as follows: #
* Y := |X| #
* sgn := sign(X) #
* sgnFact := sgn * 2**(16380) #
* Y' := Y - 16381 log2 #
* sinh(X) := sgnFact * exp(Y'). #
* Exit. #
* #
* 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
* sign(X)*Huge*Huge to generate overflow and an infinity with #
* the appropriate sign. Huge is the largest finite number in #
* extended format. Exit. #
* #
*########################################################################
global ssinh
ssinh:
fmove.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
move.l d1,a1 * save (compacted) operand
andi.l #$7FFFFFFF,d1
cmpi.l #$400CB167,d1
bgt.b SINHBIG
*--THIS IS THE USUAL CASE, |X| < 16380 LOG2
*--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
fabs.x fp0 * Y = |X|
movem.l d0/a1,-(sp) * {a1/d0}
fmovem.x fp0,-(sp) * save Y on stack
lea (sp),a0 * pass ptr to Y
clr.l d0
bsr.l setoxm1 * FP0 IS Z = EXPM1(Y)
add.l #$c,sp * clear Y from stack
fmove.l #0,fpcr
movem.l (sp)+,d0/a1 * {a1/d0}
fmove.x fp0,fp1
fadd.s #$3F800000,fp1 * 1+Z
fmove.x fp0,-(sp)
fdiv.x fp1,fp0 * Z/(1+Z)
move.l a1,d1
andi.l #$80000000,d1
ori.l #$3F000000,d1
fadd.x (sp)+,fp0
move.l d1,-(sp)
fmove.l d0,fpcr
move.b #FMUL_OP,d1 * last inst is MUL
fmul.s (sp)+,fp0 * last fp inst - possible exceptions set
bra.l t_catch
SINHBIG:
cmpi.l #$400CB2B3,d1
bgt.l t_ovfl
fabs.x fp0
fsub.d T1(pc),fp0 * (|X|-16381LOG2_LEAD)
move.l #0,-(sp)
move.l #$80000000,-(sp)
move.l a1,d1
andi.l #$80000000,d1
ori.l #$7FFB0000,d1
move.l d1,-(sp) * EXTENDED FMT
fsub.d T2(pc),fp0 * |X| - 16381 LOG2, ACCURATE
move.l d0,-(sp)
clr.l d0
fmovem.x fp0,-(sp) * save fp0 on stack
lea (sp),a0 * pass ptr to fp0
bsr.l setox
add.l #$c,sp * clear fp0 from stack
move.l (sp)+,d0
fmove.l d0,fpcr
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x (sp)+,fp0 * possible exception
bra.l t_catch
global ssinhd
*--SINH(X) = X FOR DENORMALIZED X
ssinhd:
bra.l t_extdnrm
*########################################################################
* stanh(): computes the hyperbolic tangent of a normalized input #
* stanhd(): computes the hyperbolic tangent of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = tanh(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* TANH #
* 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
* #
* 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
* sgn := sign(X), y := 2|X|, z := expm1(Y), and #
* tanh(X) = sgn*( z/(2+z) ). #
* Exit. #
* #
* 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
* go to 7. #
* #
* 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
* #
* 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
* sgn := sign(X), y := 2|X|, z := exp(Y), #
* tanh(X) = sgn - [ sgn*2/(1+z) ]. #
* Exit. #
* #
* 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
* calculate Tanh(X) by #
* sgn := sign(X), Tiny := 2**(-126), #
* tanh(X) := sgn - sgn*Tiny. #
* Exit. #
* #
* 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
* #
*########################################################################
X set FP_SCR0
XFRAC set X+4
SGN set L_SCR3
V set FP_SCR0
global stanh
stanh:
fmove.x (a0),fp0 * LOAD INPUT
fmove.x fp0,X(a6)
move.l (a0),d1
move.w 4(a0),d1
move.l d1,X(a6)
andi.l #$7FFFFFFF,d1
cmpi.l #$3fd78000,d1 * is |X| < 2^(-40)?
blt.w TANHBORS * yes
cmpi.l #$3fffddce,d1 * is |X| > (5/2)LOG2?
bgt.w TANHBORS * yes
*--THIS IS THE USUAL CASE
*--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
move.l X(a6),d1
move.l d1,SGN(a6)
andi.l #$7FFF0000,d1
addi.l #$00010000,d1 * EXPONENT OF 2|X|
move.l d1,X(a6)
andi.l #$80000000,SGN(a6)
fmove.x X(a6),fp0 * FP0 IS Y = 2|X|
move.l d0,-(sp)
clr.l d0
fmovem.x fp0,-(sp) * save Y on stack
lea (sp),a0 * pass ptr to Y
bsr.l setoxm1 * FP0 IS Z = EXPM1(Y)
add.l #$c,sp * clear Y from stack
move.l (sp)+,d0
fmove.x fp0,fp1
fadd.s #$40000000,fp1 * Z+2
move.l SGN(a6),d1
fmove.x fp1,V(a6)
eor.l d1,V(a6)
fmove.l d0,fpcr * restore users round prec,mode
fdiv.x V(a6),fp0
bra.l t_inx2
TANHBORS:
cmpi.l #$3FFF8000,d1
blt.w TANHSM
cmpi.l #$40048AA1,d1
bgt.w TANHHUGE
*-- (5/2) LOG2 < |X| < 50 LOG2,
*--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
*--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
move.l X(a6),d1
move.l d1,SGN(a6)
andi.l #$7FFF0000,d1
addi.l #$00010000,d1 * EXPO OF 2|X|
move.l d1,X(a6) * Y = 2|X|
andi.l #$80000000,SGN(a6)
move.l SGN(a6),d1
fmove.x X(a6),fp0 * Y = 2|X|
move.l d0,-(sp)
clr.l d0
fmovem.x fp0,-(sp) * save Y on stack
lea (sp),a0 * pass ptr to Y
bsr.l setox * FP0 IS EXP(Y)
add.l #$c,sp * clear Y from stack
move.l (sp)+,d0
move.l SGN(a6),d1
fadd.s #$3F800000,fp0 * EXP(Y)+1
eori.l #$C0000000,d1 * -SIGN(X)*2
fmove.s d1,fp1 * -SIGN(X)*2 IN SGL FMT
fdiv.x fp0,fp1 * -SIGN(X)2 / [EXP(Y)+1 ]
move.l SGN(a6),d1
ori.l #$3F800000,d1 * SGN
fmove.s d1,fp0 * SGN IN SGL FMT
fmove.l d0,fpcr * restore users round prec,mode
move.b #FADD_OP,d1 * last inst is ADD
fadd.x fp1,fp0
bra.l t_inx2
TANHSM:
fmove.l d0,fpcr * restore users round prec,mode
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x X(a6),fp0 * last inst - possible exception set
bra.l t_catch
*---RETURN SGN(X) - SGN(X)EPS
TANHHUGE:
move.l X(a6),d1
andi.l #$80000000,d1
ori.l #$3F800000,d1
fmove.s d1,fp0
andi.l #$80000000,d1
eori.l #$80800000,d1 * -SIGN(X)*EPS
fmove.l d0,fpcr * restore users round prec,mode
fadd.s d1,fp0
bra.l t_inx2
global stanhd
*--TANH(X) = X FOR DENORMALIZED X
stanhd:
bra.l t_extdnrm
*########################################################################
* slogn(): computes the natural logarithm of a normalized input #
* slognd(): computes the natural logarithm of a denormalized input #
* slognp1(): computes the log(1+X) of a normalized input #
* slognp1d(): computes the log(1+X) of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = log(X) or log(1+X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 2 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* LOGN: #
* Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
* polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
* move on to Step 2. #
* #
* Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
* seven significant bits of Y plus 2**(-7), i.e. #
* F = 1.xxxxxx1 in base 2 where the six "x" match those #
* of Y. Note that |Y-F| <= 2**(-7). #
* #
* Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
* polynomial in u, log(1+u) = poly. #
* #
* Step 4. Reconstruct #
* log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
* by k*log(2) + (log(F) + poly). The values of log(F) are #
* calculated beforehand and stored in the program. #
* #
* lognp1: #
* Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
* polynomial in u where u = 2X/(2+X). Otherwise, move on #
* to Step 2. #
* #
* Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
* in Step 2 of the algorithm for LOGN and compute #
* log(1+X) as k*log(2) + log(F) + poly where poly #
* approximates log(1+u), u = (Y-F)/F. #
* #
* Implementation Notes: #
* Note 1. There are 64 different possible values for F, thus 64 #
* log(F)'s need to be tabulated. Moreover, the values of #
* 1/F are also tabulated so that the division in (Y-F)/F #
* can be performed by a multiplication. #
* #
* Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
* the value Y-F has to be calculated carefully when #
* 1/2 <= X < 3/2. #
* #
* Note 3. To fully exploit the pipeline, polynomials are usually #
* separated into two parts evaluated independently before #
* being added up. #
* #
*########################################################################
LOGOF2:
.dc.l $3FFE0000,$B17217F7,$D1CF79AC,$00000000
one:
.dc.l $3F800000
zero:
.dc.l $00000000
infty:
.dc.l $7F800000
negone:
.dc.l $BF800000
LOGA6:
.dc.l $3FC2499A,$B5E4040B
LOGA5:
.dc.l $BFC555B5,$848CB7DB
LOGA4:
.dc.l $3FC99999,$987D8730
LOGA3:
.dc.l $BFCFFFFF,$FF6F7E97
LOGA2:
.dc.l $3FD55555,$555555A4
LOGA1:
.dc.l $BFE00000,$00000008
LOGB5:
.dc.l $3F175496,$ADD7DAD6
LOGB4:
.dc.l $3F3C71C2,$FE80C7E0
LOGB3:
.dc.l $3F624924,$928BCCFF
LOGB2:
.dc.l $3F899999,$999995EC
LOGB1:
.dc.l $3FB55555,$55555555
TWO:
.dc.l $40000000,$00000000
LTHOLD:
.dc.l $3f990000,$80000000,$00000000,$00000000
LOGTBL:
.dc.l $3FFE0000,$FE03F80F,$E03F80FE,$00000000
.dc.l $3FF70000,$FF015358,$833C47E2,$00000000
.dc.l $3FFE0000,$FA232CF2,$52138AC0,$00000000
.dc.l $3FF90000,$BDC8D83E,$AD88D549,$00000000
.dc.l $3FFE0000,$F6603D98,$0F6603DA,$00000000
.dc.l $3FFA0000,$9CF43DCF,$F5EAFD48,$00000000
.dc.l $3FFE0000,$F2B9D648,$0F2B9D65,$00000000
.dc.l $3FFA0000,$DA16EB88,$CB8DF614,$00000000
.dc.l $3FFE0000,$EF2EB71F,$C4345238,$00000000
.dc.l $3FFB0000,$8B29B775,$1BD70743,$00000000
.dc.l $3FFE0000,$EBBDB2A5,$C1619C8C,$00000000
.dc.l $3FFB0000,$A8D839F8,$30C1FB49,$00000000
.dc.l $3FFE0000,$E865AC7B,$7603A197,$00000000
.dc.l $3FFB0000,$C61A2EB1,$8CD907AD,$00000000
.dc.l $3FFE0000,$E525982A,$F70C880E,$00000000
.dc.l $3FFB0000,$E2F2A47A,$DE3A18AF,$00000000
.dc.l $3FFE0000,$E1FC780E,$1FC780E2,$00000000
.dc.l $3FFB0000,$FF64898E,$DF55D551,$00000000
.dc.l $3FFE0000,$DEE95C4C,$A037BA57,$00000000
.dc.l $3FFC0000,$8DB956A9,$7B3D0148,$00000000
.dc.l $3FFE0000,$DBEB61EE,$D19C5958,$00000000
.dc.l $3FFC0000,$9B8FE100,$F47BA1DE,$00000000
.dc.l $3FFE0000,$D901B203,$6406C80E,$00000000
.dc.l $3FFC0000,$A9372F1D,$0DA1BD17,$00000000
.dc.l $3FFE0000,$D62B80D6,$2B80D62C,$00000000
.dc.l $3FFC0000,$B6B07F38,$CE90E46B,$00000000
.dc.l $3FFE0000,$D3680D36,$80D3680D,$00000000
.dc.l $3FFC0000,$C3FD0329,$06488481,$00000000
.dc.l $3FFE0000,$D0B69FCB,$D2580D0B,$00000000
.dc.l $3FFC0000,$D11DE0FF,$15AB18CA,$00000000
.dc.l $3FFE0000,$CE168A77,$25080CE1,$00000000
.dc.l $3FFC0000,$DE1433A1,$6C66B150,$00000000
.dc.l $3FFE0000,$CB8727C0,$65C393E0,$00000000
.dc.l $3FFC0000,$EAE10B5A,$7DDC8ADD,$00000000
.dc.l $3FFE0000,$C907DA4E,$871146AD,$00000000
.dc.l $3FFC0000,$F7856E5E,$E2C9B291,$00000000
.dc.l $3FFE0000,$C6980C69,$80C6980C,$00000000
.dc.l $3FFD0000,$82012CA5,$A68206D7,$00000000
.dc.l $3FFE0000,$C4372F85,$5D824CA6,$00000000
.dc.l $3FFD0000,$882C5FCD,$7256A8C5,$00000000
.dc.l $3FFE0000,$C1E4BBD5,$95F6E947,$00000000
.dc.l $3FFD0000,$8E44C60B,$4CCFD7DE,$00000000
.dc.l $3FFE0000,$BFA02FE8,$0BFA02FF,$00000000
.dc.l $3FFD0000,$944AD09E,$F4351AF6,$00000000
.dc.l $3FFE0000,$BD691047,$07661AA3,$00000000
.dc.l $3FFD0000,$9A3EECD4,$C3EAA6B2,$00000000
.dc.l $3FFE0000,$BB3EE721,$A54D880C,$00000000
.dc.l $3FFD0000,$A0218434,$353F1DE8,$00000000
.dc.l $3FFE0000,$B92143FA,$36F5E02E,$00000000
.dc.l $3FFD0000,$A5F2FCAB,$BBC506DA,$00000000
.dc.l $3FFE0000,$B70FBB5A,$19BE3659,$00000000
.dc.l $3FFD0000,$ABB3B8BA,$2AD362A5,$00000000
.dc.l $3FFE0000,$B509E68A,$9B94821F,$00000000
.dc.l $3FFD0000,$B1641795,$CE3CA97B,$00000000
.dc.l $3FFE0000,$B30F6352,$8917C80B,$00000000
.dc.l $3FFD0000,$B7047551,$5D0F1C61,$00000000
.dc.l $3FFE0000,$B11FD3B8,$0B11FD3C,$00000000
.dc.l $3FFD0000,$BC952AFE,$EA3D13E1,$00000000
.dc.l $3FFE0000,$AF3ADDC6,$80AF3ADE,$00000000
.dc.l $3FFD0000,$C2168ED0,$F458BA4A,$00000000
.dc.l $3FFE0000,$AD602B58,$0AD602B6,$00000000
.dc.l $3FFD0000,$C788F439,$B3163BF1,$00000000
.dc.l $3FFE0000,$AB8F69E2,$8359CD11,$00000000
.dc.l $3FFD0000,$CCECAC08,$BF04565D,$00000000
.dc.l $3FFE0000,$A9C84A47,$A07F5638,$00000000
.dc.l $3FFD0000,$D2420487,$2DD85160,$00000000
.dc.l $3FFE0000,$A80A80A8,$0A80A80B,$00000000
.dc.l $3FFD0000,$D7894992,$3BC3588A,$00000000
.dc.l $3FFE0000,$A655C439,$2D7B73A8,$00000000
.dc.l $3FFD0000,$DCC2C4B4,$9887DACC,$00000000
.dc.l $3FFE0000,$A4A9CF1D,$96833751,$00000000
.dc.l $3FFD0000,$E1EEBD3E,$6D6A6B9E,$00000000
.dc.l $3FFE0000,$A3065E3F,$AE7CD0E0,$00000000
.dc.l $3FFD0000,$E70D785C,$2F9F5BDC,$00000000
.dc.l $3FFE0000,$A16B312E,$A8FC377D,$00000000
.dc.l $3FFD0000,$EC1F392C,$5179F283,$00000000
.dc.l $3FFE0000,$9FD809FD,$809FD80A,$00000000
.dc.l $3FFD0000,$F12440D3,$E36130E6,$00000000
.dc.l $3FFE0000,$9E4CAD23,$DD5F3A20,$00000000
.dc.l $3FFD0000,$F61CCE92,$346600BB,$00000000
.dc.l $3FFE0000,$9CC8E160,$C3FB19B9,$00000000
.dc.l $3FFD0000,$FB091FD3,$8145630A,$00000000
.dc.l $3FFE0000,$9B4C6F9E,$F03A3CAA,$00000000
.dc.l $3FFD0000,$FFE97042,$BFA4C2AD,$00000000
.dc.l $3FFE0000,$99D722DA,$BDE58F06,$00000000
.dc.l $3FFE0000,$825EFCED,$49369330,$00000000
.dc.l $3FFE0000,$9868C809,$868C8098,$00000000
.dc.l $3FFE0000,$84C37A7A,$B9A905C9,$00000000
.dc.l $3FFE0000,$97012E02,$5C04B809,$00000000
.dc.l $3FFE0000,$87224C2E,$8E645FB7,$00000000
.dc.l $3FFE0000,$95A02568,$095A0257,$00000000
.dc.l $3FFE0000,$897B8CAC,$9F7DE298,$00000000
.dc.l $3FFE0000,$94458094,$45809446,$00000000
.dc.l $3FFE0000,$8BCF55DE,$C4CD05FE,$00000000
.dc.l $3FFE0000,$92F11384,$0497889C,$00000000
.dc.l $3FFE0000,$8E1DC0FB,$89E125E5,$00000000
.dc.l $3FFE0000,$91A2B3C4,$D5E6F809,$00000000
.dc.l $3FFE0000,$9066E68C,$955B6C9B,$00000000
.dc.l $3FFE0000,$905A3863,$3E06C43B,$00000000
.dc.l $3FFE0000,$92AADE74,$C7BE59E0,$00000000
.dc.l $3FFE0000,$8F1779D9,$FDC3A219,$00000000
.dc.l $3FFE0000,$94E9BFF6,$15845643,$00000000
.dc.l $3FFE0000,$8DDA5202,$37694809,$00000000
.dc.l $3FFE0000,$9723A1B7,$20134203,$00000000
.dc.l $3FFE0000,$8CA29C04,$6514E023,$00000000
.dc.l $3FFE0000,$995899C8,$90EB8990,$00000000
.dc.l $3FFE0000,$8B70344A,$139BC75A,$00000000
.dc.l $3FFE0000,$9B88BDAA,$3A3DAE2F,$00000000
.dc.l $3FFE0000,$8A42F870,$5669DB46,$00000000
.dc.l $3FFE0000,$9DB4224F,$FFE1157C,$00000000
.dc.l $3FFE0000,$891AC73A,$E9819B50,$00000000
.dc.l $3FFE0000,$9FDADC26,$8B7A12DA,$00000000
.dc.l $3FFE0000,$87F78087,$F78087F8,$00000000
.dc.l $3FFE0000,$A1FCFF17,$CE733BD4,$00000000
.dc.l $3FFE0000,$86D90544,$7A34ACC6,$00000000
.dc.l $3FFE0000,$A41A9E8F,$5446FB9F,$00000000
.dc.l $3FFE0000,$85BF3761,$2CEE3C9B,$00000000
.dc.l $3FFE0000,$A633CD7E,$6771CD8B,$00000000
.dc.l $3FFE0000,$84A9F9C8,$084A9F9D,$00000000
.dc.l $3FFE0000,$A8489E60,$0B435A5E,$00000000
.dc.l $3FFE0000,$83993052,$3FBE3368,$00000000
.dc.l $3FFE0000,$AA59233C,$CCA4BD49,$00000000
.dc.l $3FFE0000,$828CBFBE,$B9A020A3,$00000000
.dc.l $3FFE0000,$AC656DAE,$6BCC4985,$00000000
.dc.l $3FFE0000,$81848DA8,$FAF0D277,$00000000
.dc.l $3FFE0000,$AE6D8EE3,$60BB2468,$00000000
.dc.l $3FFE0000,$80808080,$80808081,$00000000
.dc.l $3FFE0000,$B07197A2,$3C46C654,$00000000
ADJK set L_SCR1
X set FP_SCR0
XDCARE set X+2
XFRAC set X+4
F set FP_SCR1
FFRAC set F+4
KLOG2 set FP_SCR0
SAVEU set FP_SCR0
global slogn
*--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
slogn:
fmove.x (a0),fp0 * LOAD INPUT
move.l #$00000000,ADJK(a6)
LOGBGN:
*--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
*--A FINITE, NON-ZERO, NORMALIZED NUMBER.
move.l (a0),d1
move.w 4(a0),d1
move.l (a0),X(a6)
move.l 4(a0),X+4(a6)
move.l 8(a0),X+8(a6)
cmpi.l #0,d1 * CHECK IF X IS NEGATIVE
blt.w LOGNEG * LOG OF NEGATIVE ARGUMENT IS INVALID
* X IS POSITIVE, CHECK IF X IS NEAR 1
cmpi.l #$3ffef07d,d1 * IS X < 15/16?
blt.b LOGMAIN * YES
cmpi.l #$3fff8841,d1 * IS X > 17/16?
ble.w LOGNEAR1 * NO
LOGMAIN:
*--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
*--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
*--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
*--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
*-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
*--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
*--LOG(1+U) CAN BE VERY EFFICIENT.
*--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
*--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
*--GET K, Y, F, AND ADDRESS OF 1/F.
asr.l #8,d1
asr.l #8,d1 * SHIFTED 16 BITS, BIASED EXPO. OF X
subi.l #$3FFF,d1 * THIS IS K
add.l ADJK(a6),d1 * ADJUST K, ORIGINAL INPUT MAY BE DENORM.
lea LOGTBL(pc),a0 * BASE ADDRESS OF 1/F AND LOG(F)
fmove.l d1,fp1 * CONVERT K TO FLOATING-POINT FORMAT
*--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
move.l #$3FFF0000,X(a6) * X IS NOW Y, I.E. 2^(-K)*X
move.l XFRAC(a6),FFRAC(a6)
andi.l #$FE000000,FFRAC(a6) * FIRST 7 BITS OF Y
ori.l #$01000000,FFRAC(a6) * GET F: ATTACH A 1 AT THE EIGHTH BIT
move.l FFRAC(a6),d1 * READY TO GET ADDRESS OF 1/F
andi.l #$7E000000,d1
asr.l #8,d1
asr.l #8,d1
asr.l #4,d1 * SHIFTED 20, D0 IS THE DISPLACEMENT
add.l d1,a0 * A0 IS THE ADDRESS FOR 1/F
fmove.x X(a6),fp0
move.l #$3fff0000,F(a6)
clr.l F+8(a6)
fsub.x F(a6),fp0 * Y-F
fmovem.x fp2-fp3,-(sp) * SAVE FP2-3 WHILE FP0 IS NOT READY
*--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
*--REGISTERS SAVED: FPCR, FP1, FP2
LP1CONT1:
*--AN RE-ENTRY POINT FOR LOGNP1
fmul.x (a0),fp0 * FP0 IS U = (Y-F)/F
fmul.x LOGOF2(pc),fp1 * GET K*LOG2 WHILE FP0 IS NOT READY
fmove.x fp0,fp2
fmul.x fp2,fp2 * FP2 IS V=U*U
fmove.x fp1,KLOG2(a6) * PUT K*LOG2 IN MEMEORY, FREE FP1
*--LOG(1+U) IS APPROXIMATED BY
*--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
*--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
fmove.x fp2,fp3
fmove.x fp2,fp1
fmul.d LOGA6(pc),fp1 * V*A6
fmul.d LOGA5(pc),fp2 * V*A5
fadd.d LOGA4(pc),fp1 * A4+V*A6
fadd.d LOGA3(pc),fp2 * A3+V*A5
fmul.x fp3,fp1 * V*(A4+V*A6)
fmul.x fp3,fp2 * V*(A3+V*A5)
fadd.d LOGA2(pc),fp1 * A2+V*(A4+V*A6)
fadd.d LOGA1(pc),fp2 * A1+V*(A3+V*A5)
fmul.x fp3,fp1 * V*(A2+V*(A4+V*A6))
add.l #16,a0 * ADDRESS OF LOG(F)
fmul.x fp3,fp2 * V*(A1+V*(A3+V*A5))
fmul.x fp0,fp1 * U*V*(A2+V*(A4+V*A6))
fadd.x fp2,fp0 * U+V*(A1+V*(A3+V*A5))
fadd.x (a0),fp1 * LOG(F)+U*V*(A2+V*(A4+V*A6))
fmovem.x (sp)+,fp2-fp3 * RESTORE FP2-3
fadd.x fp1,fp0 * FP0 IS LOG(F) + LOG(1+U)
fmove.l d0,fpcr
fadd.x KLOG2(a6),fp0 * FINAL ADD
bra.l t_inx2
LOGNEAR1:
* if the input is exactly equal to one, then exit through ld_pzero.
* if these 2 lines weren't here, the correct answer would be returned
* but the INEX2 bit would be set.
fcmp.b #$1,fp0 * is it equal to one?
fbeq.l ld_pzero * yes
*--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
fmove.x fp0,fp1
fsub.s one(pc),fp1 * FP1 IS X-1
fadd.s one(pc),fp0 * FP0 IS X+1
fadd.x fp1,fp1 * FP1 IS 2(X-1)
*--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
*--IN U, U = 2(X-1)/(X+1) = FP1/FP0
LP1CONT2:
*--THIS IS AN RE-ENTRY POINT FOR LOGNP1
fdiv.x fp0,fp1 * FP1 IS U
fmovem.x fp2-fp3,-(sp) * SAVE FP2-3
*--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
*--LET V=U*U, W=V*V, CALCULATE
*--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
*--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
fmove.x fp1,fp0
fmul.x fp0,fp0 * FP0 IS V
fmove.x fp1,SAVEU(a6) * STORE U IN MEMORY, FREE FP1
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS W
fmove.d LOGB5(pc),fp3
fmove.d LOGB4(pc),fp2
fmul.x fp1,fp3 * W*B5
fmul.x fp1,fp2 * W*B4
fadd.d LOGB3(pc),fp3 * B3+W*B5
fadd.d LOGB2(pc),fp2 * B2+W*B4
fmul.x fp3,fp1 * W*(B3+W*B5), FP3 RELEASED
fmul.x fp0,fp2 * V*(B2+W*B4)
fadd.d LOGB1(pc),fp1 * B1+W*(B3+W*B5)
fmul.x SAVEU(a6),fp0 * FP0 IS U*V
fadd.x fp2,fp1 * B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
fmovem.x (sp)+,fp2-fp3 * FP2-3 RESTORED
fmul.x fp1,fp0 * U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
fmove.l d0,fpcr
fadd.x SAVEU(a6),fp0
bra.l t_inx2
*--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
LOGNEG:
bra.l t_operr
global slognd
slognd:
*--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
move.l #-100,ADJK(a6) * INPUT = 2^(ADJK) * FP0
*----normalize the input value by left shifting k bits (k to be determined
*----below), adjusting exponent and storing -k to ADJK
*----the value TWOTO100 is no longer needed.
*----Note that this code assumes the denormalized input is NON-ZERO.
movem.l d2-d7,-(sp) * save some registers {d2-d7}
move.l (a0),d3 * D3 is exponent of smallest norm. #
move.l 4(a0),d4
move.l 8(a0),d5 * (D4,D5) is (Hi_X,Lo_X)
clr.l d2 * D2 used for holding K
tst.l d4
bne.b Hi_not0
Hi_0:
move.l d5,d4
clr.l d5
moveq.l #32,d2
clr.l d6
bfffo d4{#0:#32},d6
lsl.l d6,d4
add.l d6,d2 * (D3,D4,D5) is normalized
move.l d3,X(a6)
move.l d4,XFRAC(a6)
move.l d5,XFRAC+4(a6)
neg.l d2
move.l d2,ADJK(a6)
fmove.x X(a6),fp0
movem.l (sp)+,d2-d7 * restore registers {d2-d7}
lea X(a6),a0
bra.w LOGBGN * begin regular log(X)
Hi_not0:
clr.l d6
bfffo d4{#0:#32},d6 * find first 1
move.l d6,d2 * get k
lsl.l d6,d4
move.l d5,d7 * a copy of D5
lsl.l d6,d5
neg.l d6
addi.l #32,d6
lsr.l d6,d7
or.l d7,d4 * (D3,D4,D5) normalized
move.l d3,X(a6)
move.l d4,XFRAC(a6)
move.l d5,XFRAC+4(a6)
neg.l d2
move.l d2,ADJK(a6)
fmove.x X(a6),fp0
movem.l (sp)+,d2-d7 * restore registers {d2-d7}
lea X(a6),a0
bra.w LOGBGN * begin regular log(X)
global slognp1
*--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
slognp1:
fmove.x (a0),fp0 * LOAD INPUT
fabs.x fp0 * test magnitude
fcmp.x LTHOLD(pc),fp0 * compare with min threshold
fbgt.w LP1REAL * if greater, continue
fmove.l d0,fpcr
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x (a0),fp0 * return signed argument
bra.l t_catch
LP1REAL:
fmove.x (a0),fp0 * LOAD INPUT
move.l #$00000000,ADJK(a6)
fmove.x fp0,fp1 * FP1 IS INPUT Z
fadd.s one(pc),fp0 * X := ROUND(1+Z)
fmove.x fp0,X(a6)
move.w XFRAC(a6),XDCARE(a6)
move.l X(a6),d1
cmpi.l #0,d1
ble.w LP1NEG0 * LOG OF ZERO OR -VE
cmpi.l #$3ffe8000,d1 * IS BOUNDS [1/2,3/2]?
blt.w LOGMAIN
cmpi.l #$3fffc000,d1
bgt.w LOGMAIN
*--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
*--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
*--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
LP1NEAR1:
*--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
cmpi.l #$3ffef07d,d1
blt.w LP1CARE
cmpi.l #$3fff8841,d1
bgt.w LP1CARE
LP1ONE16:
*--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
*--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
fadd.x fp1,fp1 * FP1 IS 2Z
fadd.s one(pc),fp0 * FP0 IS 1+X
*--U = FP1/FP0
bra.w LP1CONT2
LP1CARE:
*--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
*--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
*--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
*--THERE ARE ONLY TWO CASES.
*--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
*--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
*--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
*--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
move.l XFRAC(a6),FFRAC(a6)
andi.l #$FE000000,FFRAC(a6)
ori.l #$01000000,FFRAC(a6) * F OBTAINED
cmpi.l #$3FFF8000,d1 * SEE IF 1+Z > 1
bge.b KISZERO
KISNEG1:
fmove.s TWO(pc),fp0
move.l #$3fff0000,F(a6)
clr.l F+8(a6)
fsub.x F(a6),fp0 * 2-F
move.l FFRAC(a6),d1
andi.l #$7E000000,d1
asr.l #8,d1
asr.l #8,d1
asr.l #4,d1 * D0 CONTAINS DISPLACEMENT FOR 1/F
fadd.x fp1,fp1 * GET 2Z
fmovem.x fp2-fp3,-(sp) * SAVE FP2 {%fp2/%fp3}
fadd.x fp1,fp0 * FP0 IS Y-F = (2-F)+2Z
lea LOGTBL(pc),a0 * A0 IS ADDRESS OF 1/F
add.l d1,a0
fmove.s negone(pc),fp1 * FP1 IS K = -1
bra.w LP1CONT1
KISZERO:
fmove.s one(pc),fp0
move.l #$3fff0000,F(a6)
clr.l F+8(a6)
fsub.x F(a6),fp0 * 1-F
move.l FFRAC(a6),d1
andi.l #$7E000000,d1
asr.l #8,d1
asr.l #8,d1
asr.l #4,d1
fadd.x fp1,fp0 * FP0 IS Y-F
fmovem.x fp2-fp3,-(sp) * FP2 SAVED {%fp2/%fp3}
lea LOGTBL(pc),a0
add.l d1,a0 * A0 IS ADDRESS OF 1/F
fmove.s zero(pc),fp1 * FP1 IS K = 0
bra.w LP1CONT1
LP1NEG0:
*--FPCR SAVED. D0 IS X IN COMPACT FORM.
cmpi.l #0,d1
blt.b LP1NEG
LP1ZERO:
fmove.s negone(pc),fp0
fmove.l d0,fpcr
bra.l t_dz
LP1NEG:
fmove.s zero(pc),fp0
fmove.l d0,fpcr
bra.l t_operr
global slognp1d
*--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
* Simply return the denorm
slognp1d:
bra.l t_extdnrm
*########################################################################
* satanh(): computes the inverse hyperbolic tangent of a norm input #
* satanhd(): computes the inverse hyperbolic tangent of a denorm input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = arctanh(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 3 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* ATANH #
* 1. If |X| >= 1, go to 3. #
* #
* 2. (|X| < 1) Calculate atanh(X) by #
* sgn := sign(X) #
* y := |X| #
* z := 2y/(1-y) #
* atanh(X) := sgn * (1/2) * logp1(z) #
* Exit. #
* #
* 3. If |X| > 1, go to 5. #
* #
* 4. (|X| = 1) Generate infinity with an appropriate sign and #
* divide-by-zero by #
* sgn := sign(X) #
* atan(X) := sgn / (+0). #
* Exit. #
* #
* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
* Exit. #
* #
*########################################################################
global satanh
satanh:
move.l (a0),d1
move.w 4(a0),d1
andi.l #$7FFFFFFF,d1
cmpi.l #$3FFF8000,d1
bge.b ATANHBIG
*--THIS IS THE USUAL CASE, |X| < 1
*--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
fabs.x (a0),fp0 * Y = |X|
fmove.x fp0,fp1
fneg.x fp1 * -Y
fadd.x fp0,fp0 * 2Y
fadd.s #$3F800000,fp1 * 1-Y
fdiv.x fp1,fp0 * 2Y/(1-Y)
move.l (a0),d1
andi.l #$80000000,d1
ori.l #$3F000000,d1 * SIGN(X)*HALF
move.l d1,-(sp)
move.l d0,-(sp) * save rnd prec,mode
clr.l d0 * pass ext prec,RN
fmovem.x fp0,-(sp) * save Z on stack
lea (sp),a0 * pass ptr to Z
bsr.l slognp1 * LOG1P(Z)
add.l #$c,sp * clear Z from stack
move.l (sp)+,d0 * fetch old prec,mode
fmove.l d0,fpcr * load it
move.b #FMUL_OP,d1 * last inst is MUL
fmul.s (sp)+,fp0
bra.l t_catch
ATANHBIG:
fabs.x (a0),fp0 * |X|
fcmp.s #$3F800000,fp0
fbgt.l t_operr
bra.l t_dz
global satanhd
*--ATANH(X) = X FOR DENORMALIZED X
satanhd:
bra.l t_extdnrm
*########################################################################
* slog10(): computes the base-10 logarithm of a normalized input #
* slog10d(): computes the base-10 logarithm of a denormalized input #
* slog2(): computes the base-2 logarithm of a normalized input #
* slog2d(): computes the base-2 logarithm of a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = log_10(X) or log_2(X) #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 1.7 ulps in 64 significant bit, #
* i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* slog10d: #
* #
* Step 0. If X < 0, create a NaN and raise the invalid operation #
* flag. Otherwise, save FPCR in D1; set FpCR to default. #
* Notes: Default means round-to-nearest mode, no floating-point #
* traps, and precision control = double extended. #
* #
* Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
* Notes: Even if X is denormalized, log(X) is always normalized. #
* #
* Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
* 2.1 Restore the user FPCR #
* 2.2 Return ans := Y * INV_L10. #
* #
* slog10: #
* #
* Step 0. If X < 0, create a NaN and raise the invalid operation #
* flag. Otherwise, save FPCR in D1; set FpCR to default. #
* Notes: Default means round-to-nearest mode, no floating-point #
* traps, and precision control = double extended. #
* #
* Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
* #
* Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
* 2.1 Restore the user FPCR #
* 2.2 Return ans := Y * INV_L10. #
* #
* sLog2d: #
* #
* Step 0. If X < 0, create a NaN and raise the invalid operation #
* flag. Otherwise, save FPCR in D1; set FpCR to default. #
* Notes: Default means round-to-nearest mode, no floating-point #
* traps, and precision control = double extended. #
* #
* Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
* Notes: Even if X is denormalized, log(X) is always normalized. #
* #
* Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
* 2.1 Restore the user FPCR #
* 2.2 Return ans := Y * INV_L2. #
* #
* sLog2: #
* #
* Step 0. If X < 0, create a NaN and raise the invalid operation #
* flag. Otherwise, save FPCR in D1; set FpCR to default. #
* Notes: Default means round-to-nearest mode, no floating-point #
* traps, and precision control = double extended. #
* #
* Step 1. If X is not an integer power of two, i.e., X != 2^k, #
* go to Step 3. #
* #
* Step 2. Return k. #
* 2.1 Get integer k, X = 2^k. #
* 2.2 Restore the user FPCR. #
* 2.3 Return ans := convert-to-double-extended(k). #
* #
* Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
* #
* Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
* 4.1 Restore the user FPCR #
* 4.2 Return ans := Y * INV_L2. #
* #
*########################################################################
INV_L10:
.dc.l $3FFD0000,$DE5BD8A9,$37287195,$00000000
INV_L2:
.dc.l $3FFF0000,$B8AA3B29,$5C17F0BC,$00000000
global slog10
*--entry point for Log10(X), X is normalized
slog10:
fmove.b #$1,fp0
fcmp.x (a0),fp0 * if operand == 1,
fbeq.l ld_pzero * return an EXACT zero
move.l (a0),d1
blt.w invalid
move.l d0,-(sp)
clr.l d0
bsr.l slogn * log(X), X normal.
fmove.l (sp)+,fpcr
fmul.x INV_L10(pc),fp0
bra.l t_inx2
global slog10d
*--entry point for Log10(X), X is denormalized
slog10d:
move.l (a0),d1
blt.w invalid
move.l d0,-(sp)
clr.l d0
bsr.l slognd * log(X), X denorm.
fmove.l (sp)+,fpcr
fmul.x INV_L10(pc),fp0
bra.l t_minx2
global slog2
*--entry point for Log2(X), X is normalized
slog2:
move.l (a0),d1
blt.w invalid
move.l 8(a0),d1
bne.b continue * X is not 2^k
move.l 4(a0),d1
andi.l #$7FFFFFFF,d1
bne.b continue
*--X = 2^k.
move.w (a0),d1
andi.l #$00007FFF,d1
subi.l #$3FFF,d1
beq.l ld_pzero
fmove.l d0,fpcr
fmove.l d1,fp0
bra.l t_inx2
continue:
move.l d0,-(sp)
clr.l d0
bsr.l slogn * log(X), X normal.
fmove.l (sp)+,fpcr
fmul.x INV_L2(pc),fp0
bra.l t_inx2
invalid:
bra.l t_operr
global slog2d
*--entry point for Log2(X), X is denormalized
slog2d:
move.l (a0),d1
blt.w invalid
move.l d0,-(sp)
clr.l d0
bsr.l slognd * log(X), X denorm.
fmove.l (sp)+,fpcr
fmul.x INV_L2(pc),fp0
bra.l t_minx2
*########################################################################
* stwotox(): computes 2**X for a normalized input #
* stwotoxd(): computes 2**X for a denormalized input #
* stentox(): computes 10**X for a normalized input #
* stentoxd(): computes 10**X for a denormalized input #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input #
* d0 = round precision,mode #
* #
* OUTPUT ************************************************************** #
* fp0 = 2**X or 10**X #
* #
* ACCURACY and MONOTONICITY ******************************************* #
* The returned result is within 2 ulps in 64 significant bit, #
* i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
* rounded to double precision. The result is provably monotonic #
* in double precision. #
* #
* ALGORITHM *********************************************************** #
* #
* twotox #
* 1. If |X| > 16480, go to ExpBig. #
* #
* 2. If |X| < 2**(-70), go to ExpSm. #
* #
* 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
* decompose N as #
* N = 64(M + M') + j, j = 0,1,2,...,63. #
* #
* 4. Overwrite r := r * log2. Then #
* 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
* Go to expr to compute that expression. #
* #
* tentox #
* 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
* #
* 2. If |X| < 2**(-70), go to ExpSm. #
* #
* 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
* N := round-to-int(y). Decompose N as #
* N = 64(M + M') + j, j = 0,1,2,...,63. #
* #
* 4. Define r as #
* r := ((X - N*L1)-N*L2) * L10 #
* where L1, L2 are the leading and trailing parts of #
* log_10(2)/64 and L10 is the natural log of 10. Then #
* 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
* Go to expr to compute that expression. #
* #
* expr #
* 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
* #
* 2. Overwrite Fact1 and Fact2 by #
* Fact1 := 2**(M) * Fact1 #
* Fact2 := 2**(M) * Fact2 #
* Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
* #
* 3. Calculate P where 1 + P approximates exp(r): #
* P = r + r*r*(A1+r*(A2+...+r*A5)). #
* #
* 4. Let AdjFact := 2**(M'). Return #
* AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
* Exit. #
* #
* ExpBig #
* 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
* generate underflow by Tiny * Tiny. #
* #
* ExpSm #
* 1. Return 1 + X. #
* #
*########################################################################
L2TEN64:
.dc.l $406A934F,$0979A371 * 64LOG10/LOG2
L10TWO1:
.dc.l $3F734413,$509F8000 * LOG2/64LOG10
L10TWO2:
.dc.l $BFCD0000,$C0219DC1,$DA994FD2,$00000000
LOG10: .dc.l $40000000,$935D8DDD,$AAA8AC17,$00000000
LOG2: .dc.l $3FFE0000,$B17217F7,$D1CF79AC,$00000000
EXPA5: .dc.l $3F56C16D,$6F7BD0B2
EXPA4: .dc.l $3F811112,$302C712C
EXPA3: .dc.l $3FA55555,$55554CC1
EXPA2: .dc.l $3FC55555,$55554A54
EXPA1: .dc.l $3FE00000,$00000000,$00000000,$00000000
TEXPTBL:
.dc.l $3FFF0000,$80000000,$00000000,$3F738000
.dc.l $3FFF0000,$8164D1F3,$BC030773,$3FBEF7CA
.dc.l $3FFF0000,$82CD8698,$AC2BA1D7,$3FBDF8A9
.dc.l $3FFF0000,$843A28C3,$ACDE4046,$3FBCD7C9
.dc.l $3FFF0000,$85AAC367,$CC487B15,$BFBDE8DA
.dc.l $3FFF0000,$871F6196,$9E8D1010,$3FBDE85C
.dc.l $3FFF0000,$88980E80,$92DA8527,$3FBEBBF1
.dc.l $3FFF0000,$8A14D575,$496EFD9A,$3FBB80CA
.dc.l $3FFF0000,$8B95C1E3,$EA8BD6E7,$BFBA8373
.dc.l $3FFF0000,$8D1ADF5B,$7E5BA9E6,$BFBE9670
.dc.l $3FFF0000,$8EA4398B,$45CD53C0,$3FBDB700
.dc.l $3FFF0000,$9031DC43,$1466B1DC,$3FBEEEB0
.dc.l $3FFF0000,$91C3D373,$AB11C336,$3FBBFD6D
.dc.l $3FFF0000,$935A2B2F,$13E6E92C,$BFBDB319
.dc.l $3FFF0000,$94F4EFA8,$FEF70961,$3FBDBA2B
.dc.l $3FFF0000,$96942D37,$20185A00,$3FBE91D5
.dc.l $3FFF0000,$9837F051,$8DB8A96F,$3FBE8D5A
.dc.l $3FFF0000,$99E04593,$20B7FA65,$BFBCDE7B
.dc.l $3FFF0000,$9B8D39B9,$D54E5539,$BFBEBAAF
.dc.l $3FFF0000,$9D3ED9A7,$2CFFB751,$BFBD86DA
.dc.l $3FFF0000,$9EF53260,$91A111AE,$BFBEBEDD
.dc.l $3FFF0000,$A0B0510F,$B9714FC2,$3FBCC96E
.dc.l $3FFF0000,$A2704303,$0C496819,$BFBEC90B
.dc.l $3FFF0000,$A43515AE,$09E6809E,$3FBBD1DB
.dc.l $3FFF0000,$A5FED6A9,$B15138EA,$3FBCE5EB
.dc.l $3FFF0000,$A7CD93B4,$E965356A,$BFBEC274
.dc.l $3FFF0000,$A9A15AB4,$EA7C0EF8,$3FBEA83C
.dc.l $3FFF0000,$AB7A39B5,$A93ED337,$3FBECB00
.dc.l $3FFF0000,$AD583EEA,$42A14AC6,$3FBE9301
.dc.l $3FFF0000,$AF3B78AD,$690A4375,$BFBD8367
.dc.l $3FFF0000,$B123F581,$D2AC2590,$BFBEF05F
.dc.l $3FFF0000,$B311C412,$A9112489,$3FBDFB3C
.dc.l $3FFF0000,$B504F333,$F9DE6484,$3FBEB2FB
.dc.l $3FFF0000,$B6FD91E3,$28D17791,$3FBAE2CB
.dc.l $3FFF0000,$B8FBAF47,$62FB9EE9,$3FBCDC3C
.dc.l $3FFF0000,$BAFF5AB2,$133E45FB,$3FBEE9AA
.dc.l $3FFF0000,$BD08A39F,$580C36BF,$BFBEAEFD
.dc.l $3FFF0000,$BF1799B6,$7A731083,$BFBCBF51
.dc.l $3FFF0000,$C12C4CCA,$66709456,$3FBEF88A
.dc.l $3FFF0000,$C346CCDA,$24976407,$3FBD83B2
.dc.l $3FFF0000,$C5672A11,$5506DADD,$3FBDF8AB
.dc.l $3FFF0000,$C78D74C8,$ABB9B15D,$BFBDFB17
.dc.l $3FFF0000,$C9B9BD86,$6E2F27A3,$BFBEFE3C
.dc.l $3FFF0000,$CBEC14FE,$F2727C5D,$BFBBB6F8
.dc.l $3FFF0000,$CE248C15,$1F8480E4,$BFBCEE53
.dc.l $3FFF0000,$D06333DA,$EF2B2595,$BFBDA4AE
.dc.l $3FFF0000,$D2A81D91,$F12AE45A,$3FBC9124
.dc.l $3FFF0000,$D4F35AAB,$CFEDFA1F,$3FBEB243
.dc.l $3FFF0000,$D744FCCA,$D69D6AF4,$3FBDE69A
.dc.l $3FFF0000,$D99D15C2,$78AFD7B6,$BFB8BC61
.dc.l $3FFF0000,$DBFBB797,$DAF23755,$3FBDF610
.dc.l $3FFF0000,$DE60F482,$5E0E9124,$BFBD8BE1
.dc.l $3FFF0000,$E0CCDEEC,$2A94E111,$3FBACB12
.dc.l $3FFF0000,$E33F8972,$BE8A5A51,$3FBB9BFE
.dc.l $3FFF0000,$E5B906E7,$7C8348A8,$3FBCF2F4
.dc.l $3FFF0000,$E8396A50,$3C4BDC68,$3FBEF22F
.dc.l $3FFF0000,$EAC0C6E7,$DD24392F,$BFBDBF4A
.dc.l $3FFF0000,$ED4F301E,$D9942B84,$3FBEC01A
.dc.l $3FFF0000,$EFE4B99B,$DCDAF5CB,$3FBE8CAC
.dc.l $3FFF0000,$F281773C,$59FFB13A,$BFBCBB3F
.dc.l $3FFF0000,$F5257D15,$2486CC2C,$3FBEF73A
.dc.l $3FFF0000,$F7D0DF73,$0AD13BB9,$BFB8B795
.dc.l $3FFF0000,$FA83B2DB,$722A033A,$3FBEF84B
.dc.l $3FFF0000,$FD3E0C0C,$F486C175,$BFBEF581
INT set L_SCR1
X set FP_SCR0
XDCARE set X+2
XFRAC set X+4
ADJFACT set FP_SCR0
FACT1 set FP_SCR0
FACT1HI set FACT1+4
FACT1LOW set FACT1+8
FACT2 set FP_SCR1
FACT2HI set FACT2+4
FACT2LOW set FACT2+8
global stwotox
*--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
stwotox:
fmovem.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
fmove.x fp0,X(a6)
andi.l #$7FFFFFFF,d1
cmpi.l #$3FB98000,d1 * |X| >= 2**(-70)?
bge.b TWOOK1
bra.w EXPBORS
TWOOK1:
cmpi.l #$400D80C0,d1 * |X| > 16480?
ble.b TWOMAIN
bra.w EXPBORS
TWOMAIN:
*--USUAL CASE, 2^(-70) <= |X| <= 16480
fmove.x fp0,fp1
fmul.s #$42800000,fp1 * 64 * X
fmove.l fp1,INT(a6) * N = ROUND-TO-INT(64 X)
move.l d2,-(sp)
lea TEXPTBL(pc),a1 * LOAD ADDRESS OF TABLE OF 2^(J/64)
fmove.l INT(a6),fp1 * N --> FLOATING FMT
move.l INT(a6),d1
move.l d1,d2
andi.l #$3F,d1 * D0 IS J
asl.l #4,d1 * DISPLACEMENT FOR 2^(J/64)
add.l d1,a1 * ADDRESS FOR 2^(J/64)
asr.l #6,d2 * d2 IS L, N = 64L + J
move.l d2,d1
asr.l #1,d1 * D0 IS M
sub.l d1,d2 * d2 IS M', N = 64(M+M') + J
addi.l #$3FFF,d2
*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
*--ADJFACT = 2^(M').
*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmul.s #$3C800000,fp1 * (1/64)*N
move.l (a1)+,FACT1(a6)
move.l (a1)+,FACT1HI(a6)
move.l (a1)+,FACT1LOW(a6)
move.w (a1)+,FACT2(a6)
fsub.x fp1,fp0 * X - (1/64)*INT(64 X)
move.w (a1)+,FACT2HI(a6)
clr.w FACT2HI+2(a6)
clr.l FACT2LOW(a6)
add.w d1,FACT1(a6)
fmul.x LOG2(pc),fp0 * FP0 IS R
add.w d1,FACT2(a6)
bra.w expr
EXPBORS:
*--FPCR, D0 SAVED
cmpi.l #$3FFF8000,d1
bgt.b TEXPBIG
*--|X| IS SMALL, RETURN 1 + X
fmove.l d0,fpcr * restore users round prec,mode
fadd.s #$3F800000,fp0 * RETURN 1 + X
bra.l t_pinx2
TEXPBIG:
*--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
*--REGISTERS SAVE SO FAR ARE FPCR AND D0
move.l X(a6),d1
cmpi.l #0,d1
blt.b EXPNEG
bra.l t_ovfl2 * t_ovfl expects positive value
EXPNEG:
bra.l t_unfl2 * t_unfl expects positive value
global stwotoxd
stwotoxd:
*--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
fmove.l d0,fpcr * set user's rounding mode/precision
fmove.s #$3F800000,fp0 * RETURN 1 + X
move.l (a0),d1
ori.l #$00800001,d1
fadd.s d1,fp0
bra.l t_pinx2
global stentox
*--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
stentox:
fmovem.x (a0),fp0 * LOAD INPUT
move.l (a0),d1
move.w 4(a0),d1
fmove.x fp0,X(a6)
andi.l #$7FFFFFFF,d1
cmpi.l #$3FB98000,d1 * |X| >= 2**(-70)?
bge.b TENOK1
bra.w EXPBORS
TENOK1:
cmpi.l #$400B9B07,d1 * |X| <= 16480*log2/log10 ?
ble.b TENMAIN
bra.w EXPBORS
TENMAIN:
*--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
fmove.x fp0,fp1
fmul.d L2TEN64(pc),fp1 * X*64*LOG10/LOG2
fmove.l fp1,INT(a6) * N=INT(X*64*LOG10/LOG2)
move.l d2,-(sp)
lea TEXPTBL(pc),a1 * LOAD ADDRESS OF TABLE OF 2^(J/64)
fmove.l INT(a6),fp1 * N --> FLOATING FMT
move.l INT(a6),d1
move.l d1,d2
andi.l #$3F,d1 * D0 IS J
asl.l #4,d1 * DISPLACEMENT FOR 2^(J/64)
add.l d1,a1 * ADDRESS FOR 2^(J/64)
asr.l #6,d2 * d2 IS L, N = 64L + J
move.l d2,d1
asr.l #1,d1 * D0 IS M
sub.l d1,d2 * d2 IS M', N = 64(M+M') + J
addi.l #$3FFF,d2
*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
*--ADJFACT = 2^(M').
*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
fmovem.x fp2-fp3,-(sp) * save fp2/fp3
fmove.x fp1,fp2
fmul.d L10TWO1(pc),fp1 * N*(LOG2/64LOG10)_LEAD
move.l (a1)+,FACT1(a6)
fmul.x L10TWO2(pc),fp2 * N*(LOG2/64LOG10)_TRAIL
move.l (a1)+,FACT1HI(a6)
move.l (a1)+,FACT1LOW(a6)
fsub.x fp1,fp0 * X - N L_LEAD
move.w (a1)+,FACT2(a6)
fsub.x fp2,fp0 * X - N L_TRAIL
move.w (a1)+,FACT2HI(a6)
clr.w FACT2HI+2(a6)
clr.l FACT2LOW(a6)
fmul.x LOG10(pc),fp0 * FP0 IS R
add.w d1,FACT1(a6)
add.w d1,FACT2(a6)
expr:
*--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
*--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
*--FP0 IS R. THE FOLLOWING CODE COMPUTES
*-- 2**(M'+M) * 2**(J/64) * EXP(R)
fmove.x fp0,fp1
fmul.x fp1,fp1 * FP1 IS S = R*R
fmove.d EXPA5(pc),fp2 * FP2 IS A5
fmove.d EXPA4(pc),fp3 * FP3 IS A4
fmul.x fp1,fp2 * FP2 IS S*A5
fmul.x fp1,fp3 * FP3 IS S*A4
fadd.d EXPA3(pc),fp2 * FP2 IS A3+S*A5
fadd.d EXPA2(pc),fp3 * FP3 IS A2+S*A4
fmul.x fp1,fp2 * FP2 IS S*(A3+S*A5)
fmul.x fp1,fp3 * FP3 IS S*(A2+S*A4)
fadd.d EXPA1(pc),fp2 * FP2 IS A1+S*(A3+S*A5)
fmul.x fp0,fp3 * FP3 IS R*S*(A2+S*A4)
fmul.x fp1,fp2 * FP2 IS S*(A1+S*(A3+S*A5))
fadd.x fp3,fp0 * FP0 IS R+R*S*(A2+S*A4)
fadd.x fp2,fp0 * FP0 IS EXP(R) - 1
fmovem.x (sp)+,fp2-fp3 * restore fp2/fp3
*--FINAL RECONSTRUCTION PROCESS
*--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
fmul.x FACT1(a6),fp0
fadd.x FACT2(a6),fp0
fadd.x FACT1(a6),fp0
fmove.l d0,fpcr * restore users round prec,mode
move.w d2,ADJFACT(a6) * INSERT EXPONENT
move.l (sp)+,d2
move.l #$80000000,ADJFACT+4(a6)
clr.l ADJFACT+8(a6)
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x ADJFACT(a6),fp0 * FINAL ADJUSTMENT
bra.l t_catch
global stentoxd
stentoxd:
*--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
fmove.l d0,fpcr * set user's rounding mode/precision
fmove.s #$3F800000,fp0 * RETURN 1 + X
move.l (a0),d1
ori.l #$00800001,d1
fadd.s d1,fp0
bra.l t_pinx2
*########################################################################
* smovcr(): returns the ROM constant at the offset specified in d1 #
* rounded to the mode and precision specified in d0. #
* #
* INPUT *************************************************************** #
* d0 = rnd prec,mode #
* d1 = ROM offset #
* #
* OUTPUT ************************************************************** #
* fp0 = the ROM constant rounded to the user's rounding mode,prec #
* #
*########################################################################
global smovecr
smovecr:
move.l d1,-(sp) * save rom offset for a sec
lsr.b #$4,d0 * shift ctrl bits to lo
move.l d0,d1 * make a copy
andi.w #$3,d1 * extract rnd mode
andi.w #$c,d0 * extract rnd prec
swap d0 * put rnd prec in hi
move.w d1,d0 * put rnd mode in lo
move.l (sp)+,d1 * get rom offset
*
* check range of offset
*
tst.b d1 * if zero, offset is to pi
beq.b pi_tbl * it is pi
cmpi.b #$0a,d1 * check range $01 - $0a
ble.b z_val * if in this range, return zero
cmpi.b #$0e,d1 * check range $0b - $0e
ble.b sm_tbl * valid constants in this range
cmpi.b #$2f,d1 * check range $10 - $2f
ble.b z_val * if in this range, return zero
cmpi.b #$3f,d1 * check range $30 - $3f
ble.b bg_tbl * valid constants in this range
z_val:
bra.l ld_pzero * return a zero
*
* the answer is PI rounded to the proper precision.
*
* fetch a pointer to the answer table relating to the proper rounding
* precision.
*
pi_tbl:
tst.b d0 * is rmode RN?
bne.b pi_not_rn * no
pi_rn:
lea.l PIRN.l(pc),a0 * yes; load PI RN table addr
bra.w set_finx
pi_not_rn:
cmpi.b #rp_mode,d0 * is rmode RP?
beq.b pi_rp * yes
pi_rzrm:
lea.l PIRZRM.l(pc),a0 * no; load PI RZ,RM table addr
bra.b set_finx
pi_rp:
lea.l PIRP.l(pc),a0 * load PI RP table addr
bra.b set_finx
*
* the answer is one of:
* $0B log10(2) (inexact)
* $0C e (inexact)
* $0D log2(e) (inexact)
* $0E log10(e) (exact)
*
* fetch a pointer to the answer table relating to the proper rounding
* precision.
*
sm_tbl:
subi.b #$b,d1 * make offset in 0-4 range
tst.b d0 * is rmode RN?
bne.b sm_not_rn * no
sm_rn:
lea.l SMALRN.l(pc),a0 * yes; load RN table addr
sm_tbl_cont:
cmpi.b #$2,d1 * is result log10(e)?
ble.b set_finx * no; answer is inexact
bra.b no_finx * yes; answer is exact
sm_not_rn:
cmpi.b #rp_mode,d0 * is rmode RP?
beq.b sm_rp * yes
sm_rzrm:
lea.l SMALRZRM.l(pc),a0 * no; load RZ,RM table addr
bra.b sm_tbl_cont
sm_rp:
lea.l SMALRP.l(pc),a0 * load RP table addr
bra.b sm_tbl_cont
*
* the answer is one of:
* $30 ln(2) (inexact)
* $31 ln(10) (inexact)
* $32 10^0 (exact)
* $33 10^1 (exact)
* $34 10^2 (exact)
* $35 10^4 (exact)
* $36 10^8 (exact)
* $37 10^16 (exact)
* $38 10^32 (inexact)
* $39 10^64 (inexact)
* $3A 10^128 (inexact)
* $3B 10^256 (inexact)
* $3C 10^512 (inexact)
* $3D 10^1024 (inexact)
* $3E 10^2048 (inexact)
* $3F 10^4096 (inexact)
*
* fetch a pointer to the answer table relating to the proper rounding
* precision.
*
bg_tbl:
subi.b #$30,d1 * make offset in 0-f range
tst.b d0 * is rmode RN?
bne.b bg_not_rn * no
bg_rn:
lea.l BIGRN.l(pc),a0 * yes; load RN table addr
bg_tbl_cont:
cmpi.b #$1,d1 * is offset <= $31?
ble.b set_finx * yes; answer is inexact
cmpi.b #$7,d1 * is $32 <= offset <= $37?
ble.b no_finx * yes; answer is exact
bra.b set_finx * no; answer is inexact
bg_not_rn:
cmpi.b #rp_mode,d0 * is rmode RP?
beq.b bg_rp * yes
bg_rzrm:
lea.l BIGRZRM.l(pc),a0 * no; load RZ,RM table addr
bra.b bg_tbl_cont
bg_rp:
lea.l BIGRP.l(pc),a0 * load RP table addr
bra.b bg_tbl_cont
* answer is inexact, so set INEX2 and AINEX in the user's FPSR.
set_finx:
ori.l #inx2a_mask,USER_FPSR(a6) * set INEX2/AINEX
no_finx:
mulu.w #$c,d1 * offset points into tables
swap d0 * put rnd prec in lo word
tst.b d0 * is precision extended?
bne.b not_ext * if xprec, do not call round
* Precision is extended
fmovem.x (a0,d1.w),fp0 * return result in fp0
rts
* Precision is single or double
not_ext:
swap d0 * rnd prec in upper word
* call round() to round the answer to the proper precision.
* exponents out of range for single or double DO NOT cause underflow
* or overflow.
move.w $0(a0,d1.w),FP_SCR1_EX(a6) * load first word
move.l $4(a0,d1.w),FP_SCR1_HI(a6) * load second word
move.l $8(a0,d1.w),FP_SCR1_LO(a6) * load third word
move.l d0,d1
clr.l d0 * clear g,r,s
lea FP_SCR1(a6),a0 * pass ptr to answer
clr.w __LOCAL___SGN(a0) * sign always positive
bsr.l _round * round the mantissa
fmovem.x (a0),fp0 * return rounded result in fp0
rts
align $4,$51FC
PIRN: .dc.l $40000000,$c90fdaa2,$2168c235 * pi
PIRZRM: .dc.l $40000000,$c90fdaa2,$2168c234 * pi
PIRP: .dc.l $40000000,$c90fdaa2,$2168c235 * pi
SMALRN: .dc.l $3ffd0000,$9a209a84,$fbcff798 * log10(2)
.dc.l $40000000,$adf85458,$a2bb4a9a * e
.dc.l $3fff0000,$b8aa3b29,$5c17f0bc * log2(e)
.dc.l $3ffd0000,$de5bd8a9,$37287195 * log10(e)
.dc.l $00000000,$00000000,$00000000 * 0.0
SMALRZRM:
.dc.l $3ffd0000,$9a209a84,$fbcff798 * log10(2)
.dc.l $40000000,$adf85458,$a2bb4a9a * e
.dc.l $3fff0000,$b8aa3b29,$5c17f0bb * log2(e)
.dc.l $3ffd0000,$de5bd8a9,$37287195 * log10(e)
.dc.l $00000000,$00000000,$00000000 * 0.0
SMALRP: .dc.l $3ffd0000,$9a209a84,$fbcff799 * log10(2)
.dc.l $40000000,$adf85458,$a2bb4a9b * e
.dc.l $3fff0000,$b8aa3b29,$5c17f0bc * log2(e)
.dc.l $3ffd0000,$de5bd8a9,$37287195 * log10(e)
.dc.l $00000000,$00000000,$00000000 * 0.0
BIGRN: .dc.l $3ffe0000,$b17217f7,$d1cf79ac * ln(2)
.dc.l $40000000,$935d8ddd,$aaa8ac17 * ln(10)
.dc.l $3fff0000,$80000000,$00000000 * 10 ^ 0
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59E * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D5 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CE0 * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8E * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C7 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C17 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE5 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979B * 10 ^ 4096
BIGRZRM:
.dc.l $3ffe0000,$b17217f7,$d1cf79ab * ln(2)
.dc.l $40000000,$935d8ddd,$aaa8ac16 * ln(10)
.dc.l $3fff0000,$80000000,$00000000 * 10 ^ 0
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59D * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D5 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CDF * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8D * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C6 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C17 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE4 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979A * 10 ^ 4096
BIGRP:
.dc.l $3ffe0000,$b17217f7,$d1cf79ac * ln(2)
.dc.l $40000000,$935d8ddd,$aaa8ac17 * ln(10)
.dc.l $3fff0000,$80000000,$00000000 * 10 ^ 0
.dc.l $40020000,$A0000000,$00000000 * 10 ^ 1
.dc.l $40050000,$C8000000,$00000000 * 10 ^ 2
.dc.l $400C0000,$9C400000,$00000000 * 10 ^ 4
.dc.l $40190000,$BEBC2000,$00000000 * 10 ^ 8
.dc.l $40340000,$8E1BC9BF,$04000000 * 10 ^ 16
.dc.l $40690000,$9DC5ADA8,$2B70B59E * 10 ^ 32
.dc.l $40D30000,$C2781F49,$FFCFA6D6 * 10 ^ 64
.dc.l $41A80000,$93BA47C9,$80E98CE0 * 10 ^ 128
.dc.l $43510000,$AA7EEBFB,$9DF9DE8E * 10 ^ 256
.dc.l $46A30000,$E319A0AE,$A60E91C7 * 10 ^ 512
.dc.l $4D480000,$C9767586,$81750C18 * 10 ^ 1024
.dc.l $5A920000,$9E8B3B5D,$C53D5DE5 * 10 ^ 2048
.dc.l $75250000,$C4605202,$8A20979B * 10 ^ 4096
*########################################################################
* sscale(): computes the destination operand scaled by the source #
* operand. If the absoulute value of the source operand is #
* >= 2^14, an overflow or underflow is returned. #
* #
* INPUT *************************************************************** #
* a0 = pointer to double-extended source operand X #
* a1 = pointer to double-extended destination operand Y #
* #
* OUTPUT ************************************************************** #
* fp0 = scale(X,Y) #
* #
*########################################################################
SIGN set L_SCR1
global sscale
sscale:
move.l d0,-(sp) * store off ctrl bits for now
move.w DST_EX.w(a1),d1 * get dst exponent
smi.b SIGN(a6) * use SIGN to hold dst sign
andi.l #$00007fff,d1 * strip sign from dst exp
move.w SRC_EX.w(a0),d0 * check src bounds
andi.w #$7fff,d0 * clr src sign bit
cmpi.w #$3fff,d0 * is src ~ ZERO?
blt.w src_small * yes
cmpi.w #$400c,d0 * no; is src too big?
bgt.w src_out * yes
*
* Source is within 2^14 range.
*
src_ok:
fintrz.x SRC.w(a0),fp0 * calc int of src
fmove.l fp0,d0 * int src to d0
* don't want any accrued bits from the fintrz showing up later since
* we may need to read the fpsr for the last fp op in t_catch2().
fmove.l #$0,fpsr
tst.b DST_HI(a1) * is dst denormalized?
bmi.b sok_norm
* the dst is a DENORM. normalize the DENORM and add the adjustment to
* the src value. then, jump to the norm part of the routine.
sok_dnrm:
move.l d0,-(sp) * save src for now
move.w DST_EX.w(a1),FP_SCR0_EX(a6) * make a copy
move.l DST_HI(a1),FP_SCR0_HI(a6)
move.l DST_LO(a1),FP_SCR0_LO(a6)
lea FP_SCR0(a6),a0 * pass ptr to DENORM
bsr.l norm * normalize the DENORM
neg.l d0
add.l (sp)+,d0 * add adjustment to src
fmovem.x FP_SCR0(a6),fp0 * load normalized DENORM
cmpi.w #-$3fff,d0 * is the shft amt really low?
bge.b sok_norm2 * thank goodness no
* the multiply factor that we're trying to create should be a denorm
* for the multiply to work. therefore, we're going to actually do a
* multiply with a denorm which will cause an unimplemented data type
* exception to be put into the machine which will be caught and corrected
* later. we don't do this with the DENORMs above because this method
* is slower. but, don't fret, I don't see it being used much either.
fmove.l (sp)+,fpcr * restore user fpcr
move.l #$80000000,d1 * load normalized mantissa
subi.l #-$3fff,d0 * how many should we shift?
neg.l d0 * make it positive
cmpi.b #$20,d0 * is it > 32?
bge.b sok_dnrm_32 * yes
lsr.l d0,d1 * no; bit stays in upper lw
clr.l -(sp) * insert zero low mantissa
move.l d1,-(sp) * insert new high mantissa
clr.l -(sp) * make zero exponent
bra.b sok_norm_cont
sok_dnrm_32:
subi.b #$20,d0 * get shift count
lsr.l d0,d1 * make low mantissa longword
move.l d1,-(sp) * insert new low mantissa
clr.l -(sp) * insert zero high mantissa
clr.l -(sp) * make zero exponent
bra.b sok_norm_cont
* the src will force the dst to a DENORM value or worse. so, let's
* create an fp multiply that will create the result.
sok_norm:
fmovem.x DST.w(a1),fp0 * load fp0 with normalized src
sok_norm2:
fmove.l (sp)+,fpcr * restore user fpcr
addi.w #$3fff,d0 * turn src amt into exp value
swap d0 * put exponent in high word
clr.l -(sp) * insert new exponent
move.l #$80000000,-(sp) * insert new high mantissa
move.l d0,-(sp) * insert new lo mantissa
sok_norm_cont:
fmove.l fpcr,d0 * d0 needs fpcr for t_catch2
move.b #FMUL_OP,d1 * last inst is MUL
fmul.x (sp)+,fp0 * do the multiply
bra.l t_catch2 * catch any exceptions
*
* Source is outside of 2^14 range. Test the sign and branch
* to the appropriate exception handler.
*
src_out:
move.l (sp)+,d0 * restore ctrl bits
exg a0,a1 * swap src,dst ptrs
tst.b SRC_EX.w(a1) * is src negative?
bmi.l t_unfl * yes; underflow
bra.l t_ovfl_sc * no; overflow
*
* The source input is below 1, so we check for denormalized numbers
* and set unfl.
*
src_small:
tst.b DST_HI(a1) * is dst denormalized?
bpl.b ssmall_done * yes
move.l (sp)+,d0
fmove.l d0,fpcr * no; load control bits
move.b #FMOV_OP,d1 * last inst is MOVE
fmove.x DST.w(a1),fp0 * simply return dest
bra.l t_catch2
ssmall_done:
move.l (sp)+,d0 * load control bits into d1
move.l a1,a0 * pass ptr to dst
bra.l t_resdnrm
*########################################################################
* smod(): computes the fp MOD of the input values X,Y. #
* srem(): computes the fp (IEEE) REM of the input values X,Y. #
* #
* INPUT *************************************************************** #
* a0 = pointer to extended precision input X #
* a1 = pointer to extended precision input Y #
* d0 = round precision,mode #
* #
* The input operands X and Y can be either normalized or #
* denormalized. #
* #
* OUTPUT ************************************************************** #
* fp0 = FREM(X,Y) or FMOD(X,Y) #
* #
* ALGORITHM *********************************************************** #
* #
* Step 1. Save and strip signs of X and Y: signX := sign(X), #
* signY := sign(Y), X := |X|, Y := |Y|, #
* signQ := signX EOR signY. Record whether MOD or REM #
* is requested. #
* #
* Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
* If (L < 0) then #
* R := X, go to Step 4. #
* else #
* R := 2^(-L)X, j := L. #
* endif #
* #
* Step 3. Perform MOD(X,Y) #
* 3.1 If R = Y, go to Step 9. #
* 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
* 3.3 If j = 0, go to Step 4. #
* 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
* Step 3.1. #
* #
* Step 4. At this point, R = X - QY = MOD(X,Y). Set #
* Last_Subtract := false (used in Step 7 below). If #
* MOD is requested, go to Step 6. #
* #
* Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
* 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
* Step 6. #
* 5.2 If R > Y/2, then { set Last_Subtract := true, #
* Q := Q + 1, Y := signY*Y }. Go to Step 6. #
* 5.3 This is the tricky case of R = Y/2. If Q is odd, #
* then { Q := Q + 1, signX := -signX }. #
* #
* Step 6. R := signX*R. #
* #
* Step 7. If Last_Subtract = true, R := R - Y. #
* #
* Step 8. Return signQ, last 7 bits of Q, and R as required. #
* #
* Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
* X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
* R := 0. Return signQ, last 7 bits of Q, and R. #
* #
*########################################################################
Mod_Flag set L_SCR3
Sc_Flag set L_SCR3+1
SignY set L_SCR2
SignX set L_SCR2+2
SignQ set L_SCR3+2
Y set FP_SCR0
Y_Hi set Y+4
Y_Lo set Y+8
R set FP_SCR1
R_Hi set R+4
R_Lo set R+8
Scale:
.dc.l $00010000,$80000000,$00000000,$00000000
global smod
smod:
clr.b FPSR_QBYTE(a6)
move.l d0,-(sp) * save ctrl bits
clr.b Mod_Flag(a6)
bra.b Mod_Rem
global srem
srem:
clr.b FPSR_QBYTE(a6)
move.l d0,-(sp) * save ctrl bits
move.b #$1,Mod_Flag(a6)
Mod_Rem:
*..Save sign of X and Y
movem.l d2-d7,-(sp) * save data registers
move.w SRC_EX.w(a0),d3
move.w d3,SignY(a6)
andi.l #$00007FFF,d3 * Y := |Y|
*
move.l SRC_HI(a0),d4
move.l SRC_LO(a0),d5 * (D3,D4,D5) is |Y|
tst.l d3
bne.b Y_Normal
move.l #$00003FFE,d3 * $3FFD + 1
tst.l d4
bne.b HiY_not0
HiY_0:
move.l d5,d4
clr.l d5
subi.l #32,d3
clr.l d6
bfffo d4{#0:#32},d6
lsl.l d6,d4
sub.l d6,d3 * (D3,D4,D5) is normalized
* ...with bias $7FFD
bra.b Chk_X
HiY_not0:
clr.l d6
bfffo d4{#0:#32},d6
sub.l d6,d3
lsl.l d6,d4
move.l d5,d7 * a copy of D5
lsl.l d6,d5
neg.l d6
addi.l #32,d6
lsr.l d6,d7
or.l d7,d4 * (D3,D4,D5) normalized
* ...with bias $7FFD
bra.b Chk_X
Y_Normal:
addi.l #$00003FFE,d3 * (D3,D4,D5) normalized
* ...with bias $7FFD
Chk_X:
move.w DST_EX.w(a1),d0
move.w d0,SignX(a6)
move.w SignY(a6),d1
eor.l d0,d1
andi.l #$00008000,d1
move.w d1,SignQ(a6) * sign(Q) obtained
andi.l #$00007FFF,d0
move.l DST_HI(a1),d1
move.l DST_LO(a1),d2 * (D0,D1,D2) is |X|
tst.l d0
bne.b X_Normal
move.l #$00003FFE,d0
tst.l d1
bne.b HiX_not0
HiX_0:
move.l d2,d1
clr.l d2
subi.l #32,d0
clr.l d6
bfffo d1{#0:#32},d6
lsl.l d6,d1
sub.l d6,d0 * (D0,D1,D2) is normalized
* ...with bias $7FFD
bra.b Init
HiX_not0:
clr.l d6
bfffo d1{#0:#32},d6
sub.l d6,d0
lsl.l d6,d1
move.l d2,d7 * a copy of D2
lsl.l d6,d2
neg.l d6
addi.l #32,d6
lsr.l d6,d7
or.l d7,d1 * (D0,D1,D2) normalized
* ...with bias $7FFD
bra.b Init
X_Normal:
addi.l #$00003FFE,d0 * (D0,D1,D2) normalized
* ...with bias $7FFD
Init:
*
move.l d3,L_SCR1(a6) * save biased exp(Y)
move.l d0,-(sp) * save biased exp(X)
sub.l d3,d0 * L := expo(X)-expo(Y)
clr.l d6 * D6 := carry <- 0
clr.l d3 * D3 is Q
move.l #0,a1 * A1 is k; j+k=L, Q=0
*..(Carry,D1,D2) is R
tst.l d0
bge.b Mod_Loop_pre
*..expo(X) < expo(Y). Thus X = mod(X,Y)
*
move.l (sp)+,d0 * restore d0
bra.w Get_Mod