From: invalid AT erehwon DOT invalid (Graaagh the Mighty) Newsgroups: comp.os.msdos.djgpp Subject: Re: Mysterious NaNs... Organization: Low Charisma Anonymous Message-ID: <3b2b5a7e.169695543@news.primus.ca> References: <3b2b4290 DOT 163568911 AT news DOT primus DOT ca> <2561-Sat16Jun2001155617+0300-eliz AT is DOT elta DOT co DOT il> X-Newsreader: Forte Free Agent 1.11/32.235 Lines: 225 Date: Sat, 16 Jun 2001 14:18:04 GMT NNTP-Posting-Host: 207.176.153.43 X-Complaints-To: news AT primus DOT ca X-Trace: news2.tor.primus.ca 992701300 207.176.153.43 (Sat, 16 Jun 2001 10:21:40 EDT) NNTP-Posting-Date: Sat, 16 Jun 2001 10:21:40 EDT To: djgpp AT delorie DOT com DJ-Gateway: from newsgroup comp.os.msdos.djgpp Reply-To: djgpp AT delorie DOT com The problem is basically that I have a loop that updates some doubles; at the end of each iteration it tests these for NaN, then it tests them for lying in a certain range. If they lie in that range the loop "break"s. The loop also has a maximum number of iterations. Right after the loop end is another test for NaN on these same doubles (whose scope is local to the function, but not the loop). The exact same criterion is used to test them; the odd thing is sometimes the variables are not NaNs at the test in the final loop iteration, and then they *are* NaNs at the after-loop test, *without an intervening assignment to them*. (The loop's update operation is to increment some int or another -- no assignments to the doubles. I am aware that that increment implicitly occurs at the end of each iteration.) I don't think it can be the -ffast-math flag. The optimizer must be rearranging the code in such a way as to move an assignment to one of the doubles after the loop's NaN test or something similar, which is abnormal since it means it's altering the program semantics in a way that visibly affects the output -- reordering stuff with side effects! Even more telling: I just tested it at various optimization levels. At default optimizations, the in-loop NaN test succeeds in catching the calculations that cause NaNs. At higher optimization levels, the in-loop NaN test does not. The output is also different: there is actually one fewer occurrence of NaNs in the loop. So it looks like maybe the optimizer is mistakenly killing the in-loop test. At the end is the shortest program I was able to find that replicates the problem. It's the original program with a lot more short-circuited and dead code snipped. Compile it with no options except for each possible combination of -O2 -ffast-math. Only with both those options does the in-loop test fail. I did try to make a much shorter example, but you know optimizer bugs ... the problem function has to be kept largely intact even if the surroundings are replaced with assorted dummies. /* * nantest.c * (c) 2001 Paul Derbyshire */ #include #include #include #include // MAXINT typedef double ldouble; // Change this when porting to machines/compilers with no long double. typedef int bool; int max_components, num_components, aa_degree, max_iters, width; ldouble f_pix_size, *f_x_orbit2, *f_y_orbit2, f_aa_size; int width = 1024; void f_find_components (int); int num_components_of_period (int); double f_calc_dist_per (ldouble, ldouble); int main (void) { aa_degree = 3; max_iters = 11; width = 1024; max_components = 10; num_components = 0; f_x_orbit2 = malloc(max_iters*sizeof(ldouble)); f_y_orbit2 = malloc(max_iters*sizeof(ldouble)); f_find_components(49); free(f_x_orbit2); free(f_y_orbit2); return 0; } void f_find_components (int per) { ldouble mag = ((ldouble)(per*per))*0.25; ldouble cx, cy, ocx, ocy, x, y, xp, yp, t, *x1, *y1; int i, iters, i2; int m = 0; int n = 0; int q = num_components_of_period(per); int expected = (q > max_components)?max_components:q; int seed = random(); int shallow = 0; int nanbug = 0; int diverged = 0; int smaller = 0; int duplicates = 0; int maxi = 0; f_pix_size = 1/(width*mag); f_aa_size = f_pix_size/aa_degree; if (per == 1) { (*f_x_orbit2) = 0.0; (*f_y_orbit2) = 0.0; num_components = 1; srandom(seed); return; } if (per == 2) { (*f_x_orbit2) = -1.0; (*f_y_orbit2) = 0.0; num_components = 1; srandom(seed); return; } srandom(1746573498L); for (i = 0; i < max_iters; ++i) { cx = (((ldouble)(random()))/((ldouble)(MAXINT)))*4.0 - 2.0; cy = (((ldouble)(random()))/((ldouble)(MAXINT)))*3.0 - 1.5; x = 0.0; y = 0.0; xp = 0.0; yp = 0.0; for (iters = 0; iters < per; ++iters) { x = xp - yp + cx; y = 2.0*x*y + cy; xp = x*x; yp = y*y; if ((xp + yp) > 4.0) break; } if (iters == per) { for (iters = 0; iters < max_iters; ++iters) { ocx = cx; ocy = cy; x = 0.0; y = 0.0; xp = 0.0; yp = 0.0; for (i2 = 0; i2 < per; ++i2) { t = 2.0*(x*xp - y*yp) + 1.0; yp = 2.0*(x*yp + y*xp); xp = t; t = x*x - y*y + cx; y = 2.0*x*y + cy; x = t; } t = xp*xp + yp*yp; cx -= (xp*x + yp*y)/t; cy -= (xp*y - yp*x)/t; if (!((cx * cx) >= 0.0)) { printf("NaNs detected in loop!\ncx = %1.16f, cy = %1.16f, t = %1.16f, iters = %d.\n", cx, cy, t, iters); iters = max_iters; break; } if ((fabs(cx - ocx) < f_aa_size) && (fabs(cy - ocy) < f_aa_size)) { --iters; break; } } if (iters > maxi) { if (iters < max_iters) maxi = iters + 1; else maxi = iters; } if (!((cx * cx) >= 0.0)) { printf("NaNs detected after loop!\ncx = %1.16f, cy = %1.16f.\n", cx, cy); } else { if (iters < max_iters) { if ((fabs(cx) > 0.2) || (fabs(cy) > 0.2)) { if (f_calc_dist_per(cx, cy) < (1/(mag*mag) - f_aa_size)) { // Check for duplication, including symmetric. x1 = f_x_orbit2; y1 = f_y_orbit2; for (i2 = 0; i2 < n; ++i2) { if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy - (*y1)) < f_aa_size)) break; if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy + (*y1)) < f_aa_size)) break; ++x1; ++y1; } if (i2 == n) { // It's new! (*x1) = cx; (*y1) = fabs(cy); ++n; ++m; if (fabs(cy) > f_aa_size) ++m; if (m == q) break; // We won't find any more. if (m == max_components) break; // We have enough. } else { ++duplicates; } } else { ++smaller; } } else { ++smaller; } } else { ++diverged; } } } else { ++shallow; } } num_components = n; srandom(seed); } int num_components_of_period (int per) { int i; int n; if (per == 1) return 1; if (per > 32) return max_components + 1; n = (1 << (per - 1)) - 1; for (i = 2; i < per; ++i) { if ((per/i)*i == per) n -= num_components_of_period(i); } return n; } double f_calc_dist_per (ldouble cx, ldouble cy) { return 0.0; } -- Bill Gates: "No computer will ever need more than 640K of RAM." -- 1980 "There's nobody getting rich writing software that I know of." -- 1980 "This antitrust thing will blow over." -- 1998 Combine neo, an underscore, and one thousand sixty-one to make my hotmail addy.