; 68020 version of qfltb.c ; Uses 32*32=64 bit multiply and divide instructions. ; Copyright 1989 by Stephen L. Moshier IDENT qfltd ; Set NQ = size of number in words. ; Also adjust rndbit[] array to have a 1 bit in the rounding ; position (see at end of the file). NQ equ 24 GLOBAL shdn1 shdn1 LINK A6,#0 MOVEM.L D7/A5,-(A7) MOVE.L 8(A6),A5 ADDQ.L #4,A5 MOVEQ #NQ-2,D7 MOVE.W #0,CCR sdnl ROXR (A5)+ DBRA D7,sdnl MOVEM.L (A7)+,A5/D7 UNLK A6 RTS GLOBAL shup1 shup1 LINK A6,#0 MOVEM.L D7/A5,-(A7) MOVE.L 8(A6),A5 ADDA.W #NQ+NQ+2,A5 MOVEQ #NQ-2,D7 ; do NQ-1 words MOVE #0,CCR sup1l ROXL -(A5) DBRA D7,sup1l MOVEM.L (A7)+,A5/D7 UNLK A6 RTS GLOBAL shdn8 shdn8 LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A5 ADDA.W #NQ+NQ+1,A5 MOVEA.L A5,A4 ADDQ.L #1,A4 MOVEQ #NQ+NQ-4,D7 ; (NQ-1) * 2 - 1 - 1 sd8l MOVE.B -(A5),-(A4) DBRA D7,sd8l MOVE.B #0,-(A4) MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS GLOBAL shup8 shup8 LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A5 ADDQ #4,A5 MOVEA.L A5,A4 ADDQ.L #1,A4 MOVEQ #NQ+NQ-4,D7 ; (NQ-1) * 2 - 1 - 1 su8l MOVE.B (A4)+,(A5)+ DBRA D7,su8l MOVE.B #0,(A5) MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS GLOBAL shdn16 shdn16 LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A5 ADDA.W #NQ+NQ,A5 MOVEA.L A5,A4 ADDQ.L #2,A4 MOVEQ #NQ-3,D7 ; do NQ-2 words sd6l MOVE.W -(A5),-(A4) DBRA D7,sd6l MOVE.W #0,-(A4) MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS GLOBAL shup16 shup16 LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A5 ADDQ #4,A5 MOVEA.L A5,A4 ADDQ.L #2,A4 MOVEQ #NQ-3,D7 ; do NQ-2 words su6l MOVE.W (A4)+,(A5)+ DBRA D7,su6l MOVE.W #0,(A5) MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS GLOBAL addm addm LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A4 ADDA.W #NQ+NQ+2,A4 ; 2 * (2 + (NQ-1)) MOVE.L 12(A6),A5 ADDA.W #NQ+NQ+2,A5 MOVEQ #NQ-2,D7 MOVE #0,CCR add1l ADDX.W -(A4),-(A5) DBRA D7,add1l MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS GLOBAL subm subm LINK A6,#0 MOVEM.L D7/A5/A4,-(A7) MOVE.L 8(A6),A4 ADDA.W #NQ+NQ+2,A4 ; 2 * (2 + (NQ-1)) MOVE.L 12(A6),A5 ADDA.W #NQ+NQ+2,A5 MOVEQ #NQ-2,D7 MOVE #0,CCR sub1l SUBX.W -(A4),-(A5) DBRA D7,sub1l MOVEM.L (A7)+,A4/A5/D7 UNLK A6 RTS ; Variable precision multiply of significands. ; c must not be in the same location as either a or b. ; ; static int mulv( a, b, c, prec ) ; unsigned short a[], b[], c[]; ; int prec; GLOBAL mulv mulv LINK A6,#0 MOVEM.L D7/D6/D5/D4/D3/D2/D1/A5/A4/A3/A2/A1,-(A7) MOVE.L 8(A6),A4 MOVE.L 12(A6),A5 MOVE.L 16(A6),A3 MOVE.L 20(A6),D6 ; precision, in words ; clear the output array of prec+3 words MOVE.L D6,D7 ADDQ.L #2,D7 ; MOVE.L A3,A0 ; ADDQ #4,A0 LEA 4(A3),A0 CLR.L D1 mv2l MOVE.W D1,(A0)+ DBRA D7,mv2l ; for( k=prec+1; k>=3; k -= 2 ) MOVE.L D6,D7 ; prec ASL.L #1,D7 ADDA.L D7,A3 ADDQ #6,A3 ; r = &c[prec+3]; ADDQ.L #2,D7 ; D7 = 2*k, k = prec+1 ; { mv0l MOVE.L A5,A1 ; q = &b[3]; ADDQ #6,A1 MOVEA.L A4,A0 ADDA.L D7,A0 ; p = &a[k]; ; for( i=k; i>=3; i-- ) ; { MOVE.L D7,D5 ; 2*k ; if( (*p == 0) || (*q == 0) ) ; { ; --p; ; ++q; ; continue; ; } ADDQ.L #4,A0 MOVE.L (A3),D6 MOVE.L -(A3),D4 MOVE.W -(A3),D2 mv1l MOVE.L -(A0),D0 ; *p-- DATA.W $4C11, $0403 ;MULU.L (A1),D3:D0 ADDQ #4,A1 ADD.L D0,D6 ADDX.L D3,D4 ADDX.W D1,D2 SUBQ #4,D5 CMPI.W #6,D5 BGE.S mv1l MOVE.W D2,(A3)+ MOVE.L D4,(A3)+ MOVE.L D6,(A3) SUBQ #4,A3 SUBQ #4,D7 CMPI.W #6,D7 BGE.S mv0l MOVEM.L (A7)+,A1/A2/A3/A4/A5/D7/D6/D5/D4/D3/D2/D1 UNLK A6 RTS ; Variable precision square. ; b must be in a different location from a. ; ; static squarev( a, b, prec ) ; unsigned short a[], b[]; ; int prec; ; { GLOBAL squarev squarev LINK A6,#0 MOVEM.L D7/D6/D5/D3/D2/D1/A5/A4/A3/A2/A1,-(A7) MOVE.L 8(A6),A4 ; a MOVE.L 12(A6),A3 ; b MOVE.L 16(A6),D6 ; precision, in words ; clear the output array of prec+3 words MOVE.L D6,D7 ; prec ADDQ.L #2,D7 MOVE.L A3,A0 ; b ADDQ #4,A0 CLR.L D1 sq5l MOVE.W D1,(A0)+ DBRA D7,sq5l ; r = &b[prec+3]; ; for( k=prec+1; k>=3; k-- ) ; { MOVE.L D6,D7 ; prec ASL.L #1,D7 ADDA.L D7,A3 ADDQ #6,A3 ; r = &b[prec+3]; ADDQ.L #2,D7 ; k = prec + 1 sq0l MOVE.L A4,A1 ; q = &a[3]; ADDQ #6,A1 MOVEA.L A4,A0 ADDA.L D7,A0 ; p = &a[k]; ;while( p >= q ) ; { sq1l CMPA.L A0,A1 BHI.S sq3l ; if( (*p == 0) || (*q == 0) ) ; { ; --p; ; ++q; ; continue; ; } MOVE.L (A0),D0 DATA.W $4C11, $0403 ;MULU.L (A1),D3:D0 CMPA.L A0,A1 BEQ.S sq2l CLR.L D1 LSL.L #1,D0 ; 2ab term of square ROXL.L #1,D3 ROXL.L #1,D1 ADD.W D1,-6(A3) sq2l SUBQ #4,A0 ADDQ #4,A1 ; SUBQ #4,A3 ; accumulate in *r CLR.L D1 ADD.L D0,(A3) MOVE.L -(A3),D2 ADDX.L D3,D2 MOVE.L D2,(A3) MOVE.W -(A3),D2 ADDX.W D1,D2 MOVE.W D2,(A3)+ ADDQ #4,A3 BRA sq1l ; } ; --r; ; } sq3l SUBQ #4,A3 SUBQ #4,D7 CMPI.W #6,D7 BGE.S sq0l ; shup1(b); MOVE.L 12(A6),-(A7) JSR shup1 ADDQ #4,A7 MOVEM.L (A7)+,A1/A2/A3/A4/A5/D7/D6/D5/D3/D2/D1 UNLK A6 RTS ; mulm( b, ac3 ) ; unsigned short b[], ac3[]; ; { GLOBAL mulm mulm LINK A6,#0 MOVEM.L D7/D6/D5/D4/D3/D2/D1/A5/A4/A3/A2/A1,-(A7) MOVE.L 8(A6),A4 ; b MOVE.L 12(A6),A2 ; ac3 SUBA.L #NQ+NQ+6,A7 MOVE.L A7,A5 ; act ; qclear( act ); MOVE.L #NQ+2,D0 MOVE.L A5,A1 CLR.L D1 mm0l MOVE.W D1,(A1)+ DBRA D0,mm0l ;act[0] = ac3[0]; ;act[1] = ac3[1]; MOVE.L A2,A0 ; ac3 MOVE.L A5,A1 ; act MOVE.W (A0)+,(A1)+ MOVE.W (A0)+,(A1)+ ;r = &act[NQ+1]; MOVE.L A5,A3 ADDA.L #NQ+NQ+2,A3 ;for( k=NQ-1; k>=3; k -= 2 ) ;{ MOVE.L #NQ-1,D7 ; k mm1l CMP.B #3,D7 BLT.S mm7l ;if( k == NQ-1 ) ; { ;; CMP.B #NQ-1,D7 ;; BNE.S mm3al ; m = NQ-3; ; o = 5; ;; MOVE.L #NQ-3,D6 ; m ;; MOVE.L #5,D5 ; o ;; BRA.S mm3bl ; } ;else ; { ; m = k; ; o = 3; mm3al MOVE.L D7,D6 ; m MOVEQ.L #3,D5 ; } ; p = &b[m]; mm3bl ASL.L #1,D6 ; m MOVE.L A4,A0 ADDA.L D6,A0 ; p ; q = &ac3[o]; ASL.L #1,D5 ; o MOVE.L A2,A1 ADDA.L D5,A1 ; q SUB.W D5,D6 ASR.W #2,D6 ; for( i=m; i>=o; i-- ) ; { ADDQ #4,A0 MOVE.L (A3),D5 MOVE.L -(A3),D4 MOVE.W -(A3),D2 mm2l MOVE.L -(A0),D0 DATA.W $4C11, $0403 ;MULU.L (A1),D3:D0 ADD.L D0,D5 ADDX.L D3,D4 ADDX.W D1,D2 mm4l ADDQ.L #4,A1 DBRA D6,mm2l ; } ;--r; mm6l MOVE.W D2,(A3)+ MOVE.L D4,(A3)+ MOVE.L D5,(A3) SUBQ.L #4,A3 SUBQ.L #2,D7 BRA.S mm1l ;} ;mdnorm( act ); mm7l MOVE.L A5,-(A7) JSR mdnorm ADDQ.L #4,A7 ;qmov( act, ac3 ); MOVE.L A5,A0 ; act MOVE.L A2,A1 ; ac3 MOVE.L #NQ-1,D0 mm8l MOVE.W (A0)+,(A1)+ DBRA D0,mm8l ;} ADDA.L #NQ+NQ+6,A7 MOVEM.L (A7)+,A1/A2/A3/A4/A5/D7/D6/D5/D4/D3/D2/D1 UNLK A6 RTS ;mulin( b, ac3 ) ;unsigned short b[], ac3[]; ;{ GLOBAL mulin mulin LINK A6,#0 MOVEM.L D7/D6/D5/D2/D1/A5/A4/A3/A2/A1,-(A7) MOVE.L 8(A6),A4 ; b MOVE.L 12(A6),A2 ; ac3 SUBA.L #NQ+NQ+4,A7 MOVE.L A7,A5 ; act ; qclear( act ); MOVE.L #NQ+1,D0 MOVE.L A5,A1 CLR.L D1 mn0l MOVE.W D1,(A1)+ DBRA D0,mn0l ;act[0] = ac3[0]; ;act[1] = ac3[1]; MOVE.L A2,A0 ; ac3 MOVE.L A5,A1 ; act MOVE.W (A0)+,(A1)+ MOVE.W (A0)+,(A1)+ ;r = &act[NQ]; MOVE.L A5,A3 ADDA.L #NQ+NQ,A3 ;y = b[3]; CLR.L D6 MOVE.W 6(A4),D6 ;p = &ac3[NQ-1]; MOVE.L A2,A1 ADDA.L #NQ+NQ-2,A1 ;for( i=NQ-1; i>=3; i-- ) ; { MOVE.L #NQ-1,D7 mn1l CMP.B #2,D7 BEQ.S mn7l ; if( *p == 0 ) ; { ; --p; ; --r; ; continue; ; } MOVE.L D6,D0 MULU (A1),D0 SUBQ #2,A1 SUBQ #2,A3 ADD.L D0,(A3) MOVE.W -(A3),D2 ADDX.W D1,D2 MOVE.W D2,(A3)+ ; ADDQ #2,A3 ; } SUBQ #1,D7 BRA.S mn1l ;mdnorm( act ); mn7l MOVE.L A5,-(A7) JSR mdnorm ADDQ #4,SP ;qmov( act, ac3 ); MOVE.L A5,A0 ; act MOVE.L A2,A1 ; ac3 MOVE.L #NQ-1,D0 mn8l MOVE.W (A0)+,(A1)+ DBRA D0,mn8l ;} ADDA.L #NQ+NQ+4,A7 MOVEM.L (A7)+,A1/A2/A3/A4/A5/D7/D6/D5/D2/D1 UNLK A6 RTS ;divsh( a, prod ) GLOBAL divsh divsh LINK A6,#0 MOVEM.L D7/D6/D5/D2/D1/A5/A4/A3/A2/A1,-(A7) MOVE.L 8(A6),A4 ; a MOVE.L 12(A6),A5 ; prod ;prod[NQ] = 0; CLR.W NQ+NQ(A5) ;prod[NQ+1] = 0; CLR.W NQ+NQ+2(A5) ;prod[NQ+2] = 0; CLR.W NQ+NQ+4(A5) ;shdn1( prod ); MOVE.L A5,-(A7) JSR shdn1 ADDQ #4,A7 ;shdn1( prod ); MOVE.L A5,-(A7) JSR shdn1 ADDQ #4,A7 ;d = a[3]; MOVE.L #-65536,D5 ; 0xffff0000 MOVE.W 6(A4),D6 SWAP D6 AND.L D5,D6 ;u = ((unsigned long )prod[3] << 16) | prod[4]; MOVE.L A5,A0 ADDA.L #6,A0 ; &prod[3] MOVE.L A0,A1 MOVE.L (A0)+,D0 MOVE.L (A0)+,D1 ;for( i=3; i