From: "Alexei A. Frounze" Newsgroups: comp.os.msdos.djgpp Subject: Re: inefficiency of GCC output code & -O problem Date: Wed, 12 Apr 2000 19:46:13 +0400 Organization: MTU-Intel ISP Lines: 730 Message-ID: <38F49A45.13F0AB1@mtu-net.ru> References: <38F20E7A DOT 3330E9A4 AT mtu-net DOT ru> <38F23A21 DOT A59621A1 AT inti DOT gov DOT ar> NNTP-Posting-Host: ppp98-177.dialup.mtu-net.ru Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------6DF912F1089F64E61B72F12B" X-Trace: gavrilo.mtu.ru 955554391 54168 212.188.98.177 (12 Apr 2000 15:46:31 GMT) X-Complaints-To: usenet-abuse AT mtu DOT ru NNTP-Posting-Date: 12 Apr 2000 15:46:31 GMT X-Mailer: Mozilla 4.61 [en] (Win95; I) X-Accept-Language: en,ru To: djgpp AT delorie DOT com DJ-Gateway: from newsgroup comp.os.msdos.djgpp Reply-To: djgpp AT delorie DOT com This is a multi-part message in MIME format. --------------6DF912F1089F64E61B72F12B Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Here goes a part of my project. I simply removed as much code as needed to leave it along. Well, it still isn't compiled with the -O2 switch, although it's okay w/o it. See the attchment... I renamed the file from .C to .TXT because Netscape treats as a binary file due to file associacions. :)) Thanks. Alexei A. Frounze ----------------------------------------- Homepage: http://alexfru.chat.ru Mirror: http://members.xoom.com/alexfru salvador wrote: > > 2nd question... > > > > Why the "-O2" switch works normally for pure C source code and makes compiler > > failing on the source with inline assembly (in the .S file made out of such .C > > an error encounters: > > "Error: Error: Missing ')' assumed" > > "Error: Error: Ignoring junk `(%ebp))' after expression")? > > W/o the -O2 switch it's compiled fine. Isn't it a little bit strange? > > Can you show a small example? I guess that's an error in your inline assembler > code, but I can't know without the actual code. --------------6DF912F1089F64E61B72F12B Content-Type: text/plain; charset=us-ascii; name="Tmapping.txt" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="Tmapping.txt" /***************************************************/ /* Written by Alexei A. Frounze on */ /* 12 of jan 1999 ... 5 mar 2000 */ /* */ /* Compiler: DJGPP 2.95.2 */ /* */ /* E-mail : alexfru AT chat DOT ru */ /* Homepage: http://www.chat.ru/~alexfru */ /* http://members.xoom.com/alexfru */ /***************************************************/ // See Mark Feldman's article about texture mapping: // "http://www.geocities.com/SiliconValley/2151" // and Mikael Kalms' article about texture mapping: // "http://www.lysator.liu.se/~mikaelk/doc/perspectivetexture/" #include #include #include typedef struct { double X, Y, Z; } Vector; #define Screen_Width 320 #define Screen_Height 200 #define Center_X 160 #define Center_Y 100 #define y2addr(y) ((y<<8)+(y<<6)) #define VGAAddr NULL #define KP 256 #define Max_Edges 64 #define ZClose 0.01 #define SUB_BITS 4 #define SUB_CNT (1 << SUB_BITS) #define FP_BITS (16-SUB_BITS) /* ORIGINAL POLYGON DATA */ // Number of polygon vertices int VCNT; // Verices of a polygon in a 3d space Vector vertices[Max_Edges+1]; // Appropriate u and v values double tu[Max_Edges+1], tv[Max_Edges+1]; /* TEMPORARY POLYGON DATA */ // Number of vertices at the screen int _VCNT; // Clipped polygon vertices Vector _Vertices[Max_Edges+1]; // Appropriate u and v values double _tu[Max_Edges+1], _tv[Max_Edges+1]; // Screen verices of a polygon double XR[Max_Edges+1], YR[Max_Edges+1]; // Buffers for projecting a polygon. Left & right screen boundaries: int Scan_Buf_L[Screen_Height], Scan_Buf_R[Screen_Height]; double inv_z_L[Screen_Height], u_div_z_L[Screen_Height], v_div_z_L[Screen_Height]; double inv_z_R[Screen_Height], u_div_z_R[Screen_Height], v_div_z_R[Screen_Height]; double Scan_Buf_L2[Screen_Height], Scan_Buf_R2[Screen_Height]; /* OTHER VARIABLES */ // Minimal & maximal screen X and Y (integer) of a polygon: int YminI, YmaxI, XminI, XmaxI; // Boolean array for tmapper. It is used to clip polygons with // Z=ZClose plain int Behind[Max_Edges+1]; // Linear interpolation choosing (0=none,1=16pxls) int SubDiv=1; /* FUNCTIONS */ // This one finds screen min & max X for each Y of polgon's edge void Scan_Edge (int N) { double X, DX; int IsLeftEdge; double X1R, Y1R, X2R, Y2R; int Y1I, Y2I, XI, YI; double iz1, iz2, uz1, uz2, vz1, vz2; double iz, uz, vz, diz, duz, dvz; double ddd; int M; short SW, LW = 0x1B3F; M = (N+1)%_VCNT; X1R = XR[N]; Y1R = YR[N]; iz1 = (double)1/_Vertices[N].Z; uz1 = _tu[N] * iz1; vz1 = _tv[N] * iz1; X2R = XR[M]; Y2R = YR[M]; iz2 = (double)1/_Vertices[M].Z; uz2 = _tu[M] * iz2; vz2 = _tv[M] * iz2; IsLeftEdge = Y2R < Y1R; ddd = 1.0 / (Y1R - Y2R); DX = (X1R - X2R) * ddd; diz = (iz1 - iz2) * ddd; duz = (uz1 - uz2) * ddd; dvz = (vz1 - vz2) * ddd; if (IsLeftEdge) { Y2I = ceil(Y2R); Y1I = ceil(Y1R)-1; if (Y2I > Y1I) return; ddd = Y2I - Y2R; X = X2R + ddd * DX; iz = iz2 + ddd * diz; uz = uz2 + ddd * duz; vz = vz2 + ddd * dvz; YI = Y1I; Y1I = Y2I; Y2I = YI; } else { Y1I = ceil(Y1R); Y2I = ceil(Y2R)-1; if (Y1I > Y2I) return; ddd = Y1I - Y1R; X = X1R + ddd * DX; iz = iz1 + ddd * diz; uz = uz1 + ddd * duz; vz = vz1 + ddd * dvz; }; if (Y1I < 0) { X -= Y1I*DX; iz -= Y1I*diz; uz -= Y1I*duz; vz -= Y1I*dvz; Y1I = 0; }; if (Y2I > Screen_Height-1) Y2I = Screen_Height-1; __asm__ __volatile__ (" fstcw (%0) fldcw (%1) fldl (%2) " : : "g" (&SW), "g" (&LW), "g" (&X) ); if (IsLeftEdge) for (YI=Y1I;YI<=Y2I;YI++) { /* Scan_Buf_L2[YI] = X; XI = ceil(X); Scan_Buf_L[YI] = XI; X += DX; */ __asm__ __volatile__ (" fld %%st fstl (%0) frndint fistpl (%2) faddl (%1) " : : "g" (&Scan_Buf_L2[YI]), "g" (&DX), "g" (&Scan_Buf_L[YI]) ); /* inv_z_L[YI] = iz; iz += diz; u_div_z_L[YI] = uz; uz += duz; v_div_z_L[YI] = vz; vz += dvz; */ __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&iz), "g" (&diz), "g" (&inv_z_L[YI]) ); __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&uz), "g" (&duz), "g" (&u_div_z_L[YI]) ); __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&vz), "g" (&dvz), "g" (&v_div_z_L[YI]) ); } else for (YI=Y1I;YI<=Y2I;YI++) { /* Scan_Buf_R2[YI] = X; XI = ceil(X)-1; Scan_Buf_R[YI] = XI; X += DX; */ __asm__ __volatile__ (" fld %%st fstl (%0) frndint fistpl (%2) faddl (%1) decl (%2) " : : "g" (&Scan_Buf_R2[YI]), "g" (&DX), "g" (&Scan_Buf_R[YI]) ); /* inv_z_R[YI] = iz; iz += diz; u_div_z_R[YI] = uz; uz += duz; v_div_z_R[YI] = vz; vz += dvz; */ __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&iz), "g" (&diz), "g" (&inv_z_R[YI]) ); __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&uz), "g" (&duz), "g" (&u_div_z_R[YI]) ); __asm__ __volatile__ (" fldl (%0) fstl (%2) faddl (%1) fstpl (%0) " : : "g" (&vz), "g" (&dvz), "g" (&v_div_z_R[YI]) ); }; __asm__ __volatile__ (" fstpl (%1) fldcw (%0) " : : "g" (&SW), "g" (&X) ); } /**********************************/ /* MAIN TEXTURE MAPPING FUNCTIONS */ /**********************************/ // Maps a texture onto an arbitrary polygon void T_Map (char *texture) { double NX, NY, IZ; int IndexMin, IndexMax, IndexMinX, IndexMaxX, i, j, k; int xl, xr; double uz, vz, duz, dvz; double iz, diz; int u, v; int zzz; char *scr; double iiz; int u1, v1, u2, v2, du, dv, n; int tmp1, tmp2; int sub_cnt = SUB_CNT; int CONST = 1 << (SUB_BITS + FP_BITS); double sss; double diz2, duz2, dvz2; if (!texture) return; for(i=0;i YR[IndexMax]) IndexMax = i; if (XR[i] < XR[IndexMinX]) IndexMinX = i; if (XR[i] > XR[IndexMaxX]) IndexMaxX = i; }; // Getting integer values of min Y and max Y YminI = ceil(YR[IndexMin]); YmaxI = ceil(YR[IndexMax])-1; XminI = ceil(XR[IndexMinX]); XmaxI = ceil(XR[IndexMaxX])-1; // Is polygon visible at screen? if ((YminI > YmaxI) || (XminI > XmaxI)) return; tmp1 = 0;//Boundaries[SSP-1].Ymin; tmp2 = 319;//Boundaries[SSP-1].Ymax; if ((YminI > tmp2) || (YmaxI < tmp1)) return; if (YminI < tmp1) YminI = tmp1; if (YmaxI > tmp2) YmaxI = tmp2; // It's visible. Finding left and right screen boundaries per each y-line. // It's done for all edges of a polygon for (i=0;i<_VCNT;i++) Scan_Edge (i); for (i=YminI;i<=YmaxI;i++) { xl=Scan_Buf_L[i]; xr=Scan_Buf_R[i]; if (xl > xr) continue; if ((xl>=Screen_Width) || (xr<0)) continue; tmp1 = 0;//(int)Boundaries[SSP-1].bounds[i][0]; tmp2 = 319;//(int)Boundaries[SSP-1].bounds[i][1]; if ((tmp1 > tmp2) || (xl > tmp2) || (xr < tmp1)) continue; iz = inv_z_L[i]; uz = u_div_z_L[i]; vz = v_div_z_L[i]; /* sss = Scan_Buf_R2[i]-Scan_Buf_L2[i]; diz = (inv_z_R[i] - inv_z_L[i]) / sss; duz = (u_div_z_R[i] - u_div_z_L[i]) / sss; dvz = (v_div_z_R[i] - v_div_z_L[i]) / sss; */ __asm__ __volatile__ (" fld1 fldl (%0) fsubl (%1)" : : "g" (&Scan_Buf_R2[i]), "g" (&Scan_Buf_L2[i]) ); __asm__ __volatile__ (" fdivrp fld %%st fld %%st fldl (%0) fsubl (%1) fmulp fstpl (%2)" : : "g" (&inv_z_R[i]), "g" (&inv_z_L[i]), "g" (&diz) ); __asm__ __volatile__ (" fldl (%0) fsubl (%1) fmulp fstpl (%2) fldl (%3) fsubl (%4) fmulp fstpl (%5)" : : "g" (&u_div_z_R[i]), "g" (&u_div_z_L[i]), "g" (&duz), "g" (&v_div_z_R[i]), "g" (&v_div_z_L[i]), "g" (&dvz) ); /* sss = xl - Scan_Buf_L2[i]; iz += diz * sss; uz += duz * sss; vz += dvz * sss; */ __asm__ __volatile__ (" fildl (%0) fsubl (%1) fld %%st fld %%st " : : "g" (&xl), "g" (&Scan_Buf_L2[i]) ); __asm__ __volatile__ (" fmull (%0) faddl (%1) fstpl (%1) fmull (%2) faddl (%3) fstpl (%3) fmull (%4) faddl (%5) fstpl (%5) " : : "g" (&diz), "g" (&iz), "g" (&duz), "g" (&uz), "g" (&dvz), "g" (&vz) ); if (xl < tmp1) { /* iz += diz*(tmp1-xl); uz += duz*(tmp1-xl); vz += dvz*(tmp1-xl); */ __asm__ __volatile__ (" fildl (%0) fisubl (%1) fld %%st fld %%st" : : "g" (&tmp1), "g" (&xl) ); __asm__ __volatile__ (" fmull (%0) faddl (%1) fstpl (%1) fmull (%2) faddl (%3) fstpl (%3) fmull (%4) faddl (%5) fstpl (%5)" : : "g" (&diz), "g" (&iz), "g" (&duz), "g" (&uz), "g" (&dvz), "g" (&vz) ); xl = tmp1; }; if (xr > tmp2) xr = tmp2; zzz = xr-xl+1; scr = NULL;//VGAAddr + xl + y2addr (i); diz2 = diz; duz2 = duz; dvz2 = dvz; if (SubDiv) { if (zzz>=SUB_CNT) { /* diz *= SUB_CNT; duz *= SUB_CNT; dvz *= SUB_CNT; */ __asm__ __volatile__ (" fildl (%0) fld %%st fld %%st fldl (%1) fmulp fstpl (%1) fldl (%2) fmulp fstpl (%2) fldl (%3) fmulp fstpl (%3)" : : "g" (&sub_cnt), "g" (&diz), "g" (&duz), "g" (&dvz) ); /* iiz = (1 << (SUB_BITS + FP_BITS)) / iz; u1 = (int)(uz * iiz); v1 = (int)(vz * iiz); */ __asm__ __volatile__ (" fildl (%0) fdivl (%1) fld %%st fmull (%2) fistpl (%4) fmull (%3) fistpl (%5)" : : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u1), "g" (&v1) ); iz += diz; uz += duz; vz += dvz; /* iiz = (1 << (SUB_BITS + FP_BITS)) / iz; u2 = (int)(uz * iiz); v2 = (int)(vz * iiz); */ __asm__ __volatile__ (" fildl (%0) fdivl (%1) fld %%st fmull (%2) fistpl (%4) fmull (%3) fistpl (%5)" : : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u2), "g" (&v2) ); du = u2 - u1; dv = v2 - v1; // sar du, SUB_BITS // sar dv, SUB_BITS __asm__ __volatile__ (" sarl %2, (%0) sarl %2, (%1)" : : "g" (&du), "g" (&dv), "g" (SUB_BITS) ); // while(1) { while (zzz>=SUB_CNT) { n = SUB_CNT; zzz -= SUB_CNT; // if (zzz<0) n+=zzz; // iz += diz; // iiz = (1 << (SUB_BITS + FP_BITS)) / iz; __asm__ __volatile__ (" fldl (%1) faddl (%2) fstl (%1) fidivrl (%0)" : : "g" (&CONST), "g" (&iz), "g" (&diz) ); // span (scr, texture, n, &u1, &v1, du, dv); scr += n; if (zzz