From: invalid AT erehwon DOT invalid (Graaagh the Mighty)
Newsgroups: comp.os.msdos.djgpp
Subject: Re: Mysterious NaNs...
Organization: Low Charisma Anonymous
Message-ID: <3b2b5a7e.169695543@news.primus.ca>
References: <3b2b4290 DOT 163568911 AT news DOT primus DOT ca> <2561-Sat16Jun2001155617+0300-eliz AT is DOT elta DOT co DOT il>
X-Newsreader: Forte Free Agent 1.11/32.235
Lines: 225
Date: Sat, 16 Jun 2001 14:18:04 GMT
NNTP-Posting-Host: 207.176.153.43
X-Complaints-To: news AT primus DOT ca
X-Trace: news2.tor.primus.ca 992701300 207.176.153.43 (Sat, 16 Jun 2001 10:21:40 EDT)
NNTP-Posting-Date: Sat, 16 Jun 2001 10:21:40 EDT
To: djgpp AT delorie DOT com
DJ-Gateway: from newsgroup comp.os.msdos.djgpp
Reply-To: djgpp AT delorie DOT com

The problem is basically that I have a loop that updates some doubles;
at the end of each iteration it tests these for NaN, then it tests
them for lying in a certain range. If they lie in that range the loop
"break"s. The loop also has a maximum number of iterations. Right
after the loop end is another test for NaN on these same doubles
(whose scope is local to the function, but not the loop). The exact
same criterion is used to test them; the odd thing is sometimes the
variables are not NaNs at the test in the final loop iteration, and
then they *are* NaNs at the after-loop test, *without an intervening
assignment to them*. (The loop's update operation is to increment some
int or another -- no assignments to the doubles. I am aware that that
increment implicitly occurs at the end of each iteration.)

I don't think it can be the -ffast-math flag. The optimizer must be
rearranging the code in such a way as to move an assignment to one of
the doubles after the loop's NaN test or something similar, which is
abnormal since it means it's altering the program semantics in a way
that visibly affects the output -- reordering stuff with side effects!

Even more telling: I just tested it at various optimization levels. At
default optimizations, the in-loop NaN test succeeds in catching the
calculations that cause NaNs. At higher optimization levels, the
in-loop NaN test does not. The output is also different: there is
actually one fewer occurrence of NaNs in the loop. So it looks like
maybe the optimizer is mistakenly killing the in-loop test.

At the end is the shortest program I was able to find that replicates
the problem. It's the original program with a lot more short-circuited
and dead code snipped. Compile it with no options except for each
possible combination of -O2 -ffast-math. Only with both those options
does the in-loop test fail. I did try to make a much shorter example,
but you know optimizer bugs ... the problem function has to be kept
largely intact even if the surroundings are replaced with assorted
dummies.

/*
 * nantest.c
 * (c) 2001 Paul Derbyshire
 */

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <values.h> // MAXINT

typedef double ldouble; // Change this when porting to
machines/compilers with no long double.
typedef int bool;

int max_components, num_components, aa_degree, max_iters, width;
ldouble f_pix_size, *f_x_orbit2, *f_y_orbit2, f_aa_size;

int width = 1024;

void f_find_components (int);
int num_components_of_period (int);
double f_calc_dist_per (ldouble, ldouble);

int main (void) {
  aa_degree = 3;
  max_iters = 11;
  width = 1024;
  max_components = 10;
  num_components = 0;
  f_x_orbit2 = malloc(max_iters*sizeof(ldouble));
  f_y_orbit2 = malloc(max_iters*sizeof(ldouble));
  f_find_components(49);
  free(f_x_orbit2);
  free(f_y_orbit2);
  return 0;
}

void f_find_components (int per) {
  ldouble mag = ((ldouble)(per*per))*0.25;
  ldouble cx, cy, ocx, ocy, x, y, xp, yp, t, *x1, *y1;
  int i, iters, i2;
  int m = 0;
  int n = 0;
  int q = num_components_of_period(per);
  int expected = (q > max_components)?max_components:q;
  int seed = random();
  int shallow = 0;
  int nanbug = 0;
  int diverged = 0;
  int smaller = 0;
  int duplicates = 0;
  int maxi = 0;
  f_pix_size = 1/(width*mag);
  f_aa_size = f_pix_size/aa_degree;
  if (per == 1) {
    (*f_x_orbit2) = 0.0;
    (*f_y_orbit2) = 0.0;
    num_components = 1;
    srandom(seed);
    return;
  }
  if (per == 2) {
    (*f_x_orbit2) = -1.0;
    (*f_y_orbit2) = 0.0;
    num_components = 1;
    srandom(seed);
    return;
  }
  srandom(1746573498L);
  for (i = 0; i < max_iters; ++i) {
    cx = (((ldouble)(random()))/((ldouble)(MAXINT)))*4.0 - 2.0;
    cy = (((ldouble)(random()))/((ldouble)(MAXINT)))*3.0 - 1.5;
    x = 0.0;
    y = 0.0;
    xp = 0.0;
    yp = 0.0;
    for (iters = 0; iters < per; ++iters) {
      x = xp - yp + cx;
      y = 2.0*x*y + cy;
      xp = x*x;
      yp = y*y;
      if ((xp + yp) > 4.0) break;
    }
    if (iters == per) {
      for (iters = 0; iters < max_iters; ++iters) {
        ocx = cx;
        ocy = cy;
        x = 0.0;
        y = 0.0;
        xp = 0.0;
        yp = 0.0;
        for (i2 = 0; i2 < per; ++i2) {
          t = 2.0*(x*xp - y*yp) + 1.0;
          yp = 2.0*(x*yp + y*xp);
          xp = t;
          t = x*x - y*y + cx;
          y = 2.0*x*y + cy;
          x = t;
        }
        t = xp*xp + yp*yp;
        cx -= (xp*x + yp*y)/t;
        cy -= (xp*y - yp*x)/t;
        if (!((cx * cx) >= 0.0)) {
          printf("NaNs detected in loop!\ncx = %1.16f, cy = %1.16f, t
= %1.16f, iters = %d.\n", cx, cy, t, iters);
          iters = max_iters;
          break;
        }
        if ((fabs(cx - ocx) < f_aa_size) && (fabs(cy - ocy) <
f_aa_size)) {
          --iters;
          break;
        }
      }
      if (iters > maxi) {
        if (iters < max_iters)
          maxi = iters + 1;
        else
          maxi = iters;
      }
      if (!((cx * cx) >= 0.0)) {
        printf("NaNs detected after loop!\ncx = %1.16f, cy =
%1.16f.\n", cx, cy);
      } else {
        if (iters < max_iters) {
          if ((fabs(cx) > 0.2) || (fabs(cy) > 0.2)) {
            if (f_calc_dist_per(cx, cy) < (1/(mag*mag) - f_aa_size)) {
              // Check for duplication, including symmetric.
              x1 = f_x_orbit2;
              y1 = f_y_orbit2;
              for (i2 = 0; i2 < n; ++i2) {
                if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy -
(*y1)) < f_aa_size)) break;
                if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy +
(*y1)) < f_aa_size)) break;
                ++x1;
                ++y1;
              }
              if (i2 == n) {
                // It's new!
                (*x1) = cx;
                (*y1) = fabs(cy);
                ++n;
                ++m;
                if (fabs(cy) > f_aa_size) ++m;
                if (m == q) break; // We won't find any more.
                if (m == max_components) break; // We have enough.
              } else {
                ++duplicates;
              }
            } else {
              ++smaller;
            }
          } else {
            ++smaller;
          }
        } else {
          ++diverged;
        }
      }
    } else {
      ++shallow;
    }
  }
  num_components = n;
  srandom(seed);
}

int num_components_of_period (int per) {
  int i;
  int n;
  if (per == 1) return 1;
  if (per > 32) return max_components + 1;
  n = (1 << (per - 1)) - 1;
  for (i = 2; i < per; ++i) {
    if ((per/i)*i == per)
      n -= num_components_of_period(i);
  }
  return n;
}

double f_calc_dist_per (ldouble cx, ldouble cy) {
  return 0.0;
}

-- 
Bill Gates: "No computer will ever need more than 640K of RAM." -- 1980
"There's nobody getting rich writing software that I know of." -- 1980
"This antitrust thing will blow over." -- 1998
Combine neo, an underscore, and one thousand sixty-one to make my hotmail addy.