From: invalid AT erehwon DOT invalid (Graaagh the Mighty) Newsgroups: comp.os.msdos.djgpp Subject: Mysterious NaNs... Organization: Low Charisma Anonymous Message-ID: <3b2b4290.163568911@news.primus.ca> X-Newsreader: Forte Free Agent 1.11/32.235 Lines: 655 Date: Sat, 16 Jun 2001 11:36:51 GMT NNTP-Posting-Host: 207.176.153.100 X-Complaints-To: news AT primus DOT ca X-Trace: news2.tor.primus.ca 992691528 207.176.153.100 (Sat, 16 Jun 2001 07:38:48 EDT) NNTP-Posting-Date: Sat, 16 Jun 2001 07:38:48 EDT To: djgpp AT delorie DOT com DJ-Gateway: from newsgroup comp.os.msdos.djgpp Reply-To: djgpp AT delorie DOT com This is seriously weird. This is a hacked-up-for-debugging version of a Mandelbrot set related program I am working on. This altered version just runs "f_find_components" and prints loads of debugging info. The odd stuff is with a certain loop below. Compile with: gcc periods.c -o per.exe -lalleg -g -O6 -ffast-math -fomit-frame-pointer -fforce-addr -funroll-loops -march=k6 -malign-double (gcc version 2.95.2; gas version 2.8.1.) I suspect it may be an optimizer bug. There are comments in the trouble area to help explain better; these start "// DEBUG". /* * periods.c * (c) 2001 Paul Derbyshire * Lesser Gnu Public License. See http://www.fsf.org. * Quick b&w Mandelbrot renderer. Finds components of period n and writes images of them. * Uses "MDisk" algorithm from _The_Science_of_Fractal_Images_, ISBN 0-387-96608-0 * Requires Allegro and a gcc port to compile. Allegro can be had at http://www.allegro.cc. */ #include #include #include #include #include #include // MAXINT #include // biostime typedef double ldouble; // Change this when porting to machines/compilers with no long double. typedef unsigned int word; typedef int bool; int word_bits = 8*sizeof(word); int ldouble_bits = 64; // Change this when porting to machines with different long double mantissa size. int width = 1024; int height = 768; double max_ldouble_amag = 0.0; PALETTE palette; int p_greys = 0; int p_void = 255; int max_iters = 0; int max_components = 0; ldouble max_radius_squared = 9.0; //double edge_width = 0.1; double edge_width = 0.02; double min_disk_radius = 1.0; ldouble per_width = 1.0; int recursive_disk_radius = 1; int aa_degree = 1; int num_components = 0; ldouble f_cx, f_cy, f_mag, f_x1, f_y1, f_x2, f_y2, f_pix_size, f_aa_size, *f_x_orbit, *f_y_orbit, *f_x_orbit2, *f_y_orbit2; int precision_bits = 0; int num_words = 0; bool do_per; bool resume; int symmetry_line; BITMAP *main_screen; int preview_grid_size = 64; void set_max_iters (int); void f_find_components (int); int num_components_of_period (int); void f_set_cmag (ldouble, ldouble, ldouble); void image_draw_loop (int); void draw_image (void); void q_wait_key (void); void f_draw_image (void); void f_do_pixel (int, int, ldouble, ldouble); void f_do_disk_1 (int, int, ldouble, ldouble); void f_do_disk (int, int, ldouble, ldouble); double f_calc_dist (ldouble, ldouble); double f_calc_dist_per (ldouble, ldouble); ldouble f_jitter (void); bool f_check_known_components (ldouble, ldouble); int main (void) { int i, per, image_max_iters; printf("\nInitializing...\n"); srandom(biostime(0, 0)); max_ldouble_amag = pow(2.0, (double)(ldouble_bits - 3)); // --------------------------------------- // TUNABLE PARAMETERS // DON'T CHANGE THESE to debug the NaN mystery. aa_degree = 5; set_max_iters(16384); image_max_iters = 2048; // Must be less than arg to set_max_iters!!! max_components = 10; edge_width = 0.1; per = 13; resume = 0; // END TUNABLE PARAMETERS // --------------------------------------- i = num_components_of_period(per); if (i <= max_components) printf("Locating all %d components of period %d.\n", i, per); else printf("Locating %d random components of period %d.\n", max_components, per); f_find_components(per); exit(0); max_iters = image_max_iters; if (allegro_init()) { fprintf(stderr, "Initialization error!\n(non-zero return from allegro_init())\n\n"); return 1; } set_color_depth(8); if (set_gfx_mode(GFX_AUTODETECT, width, height, 0, 0)) { fprintf(stderr, "Initialization error!\nGraphics mofe %dx%dx256 unavailable.\n(non-zero return from set_gfx_mode())\n\n", width, height); return 1; } palette[p_void].r = 31; palette[p_void].g = 0; palette[p_void].b = 15; for (i = 0; i < 64; ++i) { palette[p_greys+i].r = i; palette[p_greys+i].g = i; palette[p_greys+i].b = i; } set_palette(palette); main_screen = create_sub_bitmap(screen, 0, 0, width, height); image_draw_loop(per); set_gfx_mode(GFX_TEXT, 80, 25, 0, 0); if (max_iters != 0) { free(f_x_orbit); free(f_y_orbit); free(f_x_orbit2); free(f_y_orbit2); } printf("Done.\n\n"); return 0; } void set_max_iters (int max) { if (max_iters != 0) { free(f_x_orbit); free(f_y_orbit); free(f_x_orbit2); free(f_y_orbit2); } max_iters = max; f_x_orbit = malloc(max_iters*sizeof(ldouble)); f_y_orbit = malloc(max_iters*sizeof(ldouble)); f_x_orbit2 = malloc(max_iters*sizeof(ldouble)); f_y_orbit2 = malloc(max_iters*sizeof(ldouble)); } void f_find_components (int per) { ldouble mag = ((ldouble)(per*per))*0.25; ldouble cx, cy, ocx, ocy, x, y, xp, yp, t, *x1, *y1; int i, iters, i2; int m = 0; int n = 0; int q = num_components_of_period(per); int expected = (q > max_components)?max_components:q; int seed = random(); int diverged = 0; int smaller = 0; int duplicates = 0; int maxi = 0; f_pix_size = 1/(width*mag); f_aa_size = f_pix_size/aa_degree; if (per == 1) { (*f_x_orbit2) = 0.0; (*f_y_orbit2) = 0.0; num_components = 1; srandom(seed); return; } if (per == 2) { (*f_x_orbit2) = -1.0; (*f_y_orbit2) = 0.0; num_components = 1; srandom(seed); return; } srandom(1746573498L); for (i = 0; i < max_iters; ++i) { cx = (((ldouble)(random()))/((ldouble)(MAXINT)))*4.0; cy = (((ldouble)(random()))/((ldouble)(MAXINT)))*3.0; // This following loop is the oddly behaving one. for (iters = 0; iters < max_iters; ++iters) { ocx = cx; ocy = cy; x = 0.0; y = 0.0; xp = 0.0; yp = 0.0; for (i2 = 0; i2 < per; ++i2) { t = 2.0*(x*xp - y*yp) + 1.0; yp = 2.0*(x*yp + y*xp); xp = t; t = x*x - y*y + cx; y = 2.0*x*y + cy; x = t; } t = xp*xp + yp*yp; cx -= (xp*x + yp*y)/t; cy -= (xp*y - yp*x)/t; if (!((cx * cx) >= 0.0)) { // By experiment I've found that this detects NaNs. // DEBUG: The following will never be executed. printf("NaNs detected!\ncx = %1.16f, cy = %1.16f, t = %1.16f, iters = %d.\n", cx, cy, t, iters); iters = max_iters; break; } if ((fabs(cx - ocx) < f_aa_size) && (fabs(cy - ocy) < f_aa_size)) { --iters; break; } } if (iters > maxi) { if (iters < max_iters) maxi = iters + 1; else maxi = iters; } if (!((cx * cx) >= 0.0)) printf("NaNs detected!\n"); // DEBUG: This will! // DEBUG: The weirdness is that between executing the first NaN // detector in the final loop iteration and the second one here, // cx is not assigned to. Also, as cx is a local, it's on the // stack and not the heap so if there is a bug in the pointer // arithmetic for the f_x_orbit stuff, which I doubt, it's not // clobbering cx. Also, no pointer arithmetic is invoked between // the two NaN detectors. // If the optimizer is doing something weird here, it constitutes // a bug, since it is clearly altering the program semantics! // I am aware that f_fast_math may affect the exact values of // floating point operations, but it shouldn't make floats // mysteriously change values when they aren't assigned to. printf("(%1.16f, %1.16f)\n", cx, cy); printf("(%1.16f, %1.16f)\n", ocx, ocy); printf("(%1.16f, %1.16f)\n", fabs(cx-ocx), fabs(cy-ocy)); if (iters < max_iters) { // if ((fabs(cx - ocx) < f_aa_size) && (fabs(cy - ocy) < f_aa_size)) { if ((fabs(cx) > 0.2) || (fabs(cy) > 0.2)) { if (f_calc_dist_per(cx, cy) < (1/(mag*mag) - f_aa_size)) { // Check for duplication, including symmetric. x1 = f_x_orbit2; y1 = f_y_orbit2; for (i2 = 0; i2 < n; ++i2) { if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy - (*y1)) < f_aa_size)) break; if ((fabs(cx - (*x1)) < f_aa_size) && (fabs(cy + (*y1)) < f_aa_size)) break; ++x1; ++y1; } if (i2 == n) { // It's new! (*x1) = cx; (*y1) = fabs(cy); ++n; ++m; if (fabs(cy) > f_aa_size) ++m; printf("Got %d of %d.\n", m, expected); printf("Mag: %1.16f.\n", 1/(64*f_calc_dist_per(*x1, *y1))); if (m == q) break; // We won't find any more. if (m == max_components) break; // We have enough. } else { ++duplicates; } } else { ++smaller; } } else { ++smaller; } } else { ++diverged; } if (!(i & 0x03ff)) printf("Iteration %d: found %d (%d), %d diverged, %d smaller, %d duplicates; %d maxi.\n", i, n, m, diverged, smaller, duplicates, maxi); } printf("Iteration %d: found %d (%d), %d diverged, %d smaller, %d duplicates; %d maxi.\n", i, n, m, diverged, smaller, duplicates, maxi); if (m < expected) { fprintf(stderr, "Failed to find %d components!\n\n", expected - m); exit(4); } num_components = n; srandom(seed); } int num_components_of_period (int per) { int i; int n; if (per == 1) return 1; if (per > 32) return max_components + 1; n = (1 << (per - 1)) - 1; for (i = 2; i < per; ++i) { if ((per/i)*i == per) n -= num_components_of_period(i); } return n; } void f_set_cmag (ldouble cx, ldouble cy, ldouble mag) { f_cx = cx; f_cy = cy; f_mag = mag; f_pix_size = 1/(width*mag); f_x1 = f_cx - (width/2)*f_pix_size; f_x2 = f_cx + (width/2)*f_pix_size; f_y1 = f_cy - (height/2)*f_pix_size; f_y2 = f_cy + (height/2)*f_pix_size; f_aa_size = f_pix_size/aa_degree; } void image_draw_loop (int per) { int i; ldouble *x1 = f_x_orbit2; ldouble *y1 = f_y_orbit2; ldouble mag; FILE *foo = NULL; char filename[12]; for (i = 0; i < num_components; ++i) { sprintf(filename, "p%07d.bmp", i); if (resume) foo = fopen(filename, "rb"); if (foo != NULL) { fclose(foo); } else { f_set_cmag(*x1, *y1, 1/(64*f_calc_dist_per(*x1, *y1))); draw_image(); filename[12] = 0; save_bmp(filename, main_screen, palette); // Main screen turn on. mag = ((ldouble)(per*per))*0.25; f_pix_size = 1/(width*mag); f_aa_size = f_pix_size/aa_degree; } ++x1; ++y1; } } void draw_image (void) { // if (use_float) f_draw_image(); // else // i_draw_image(); } void q_wait_key (void) { char foo[256]; gets(foo); } void f_draw_image (void) { ldouble cx, cy; int x, y; int p = preview_grid_size; do_per = 0; rectfill(screen, 0, 0, width-1, height-1, p_void); cy = f_y2; symmetry_line = -1; if ((f_y1 < 0.0) && (f_y2 > 0.0)) { for (y = 0; y < height; ++y) { cy -= f_pix_size; if (cy < 0) { symmetry_line = 2*y; break; } } cy = f_y2; } // Build "previews". while (p > 0) { for (y = 0; y < height; y+=p) { cx = f_x1; for (x = 0; x < width; x+=p) { f_do_pixel(x, y, cx, cy); cx += p*f_pix_size; } cy -= p*f_pix_size; } cy = f_y2; if (p > 1) { for (y = 0; y < height; y+=p) { cx = f_x1; for (x = 0; x < width; x+=(p/2)) { f_do_pixel(x, y, cx, cy); cx += p*f_pix_size/2; } cy -= p*f_pix_size; } cy = f_y2; } p/=2; } } void f_do_pixel (int x, int y, ldouble cx, ldouble cy) { if ((cx < -2.0) || (cx > 0.5) || (cy < -1.2) || (cy > 1.2)) { putpixel(screen, x, y, p_greys); if (symmetry_line != -1) putpixel(screen, x, symmetry_line - y, p_greys); } else if (getpixel(screen, x, y) == p_void) { f_do_disk(x, y, cx, cy); } } void f_do_disk_1 (int x, int y, ldouble cx, ldouble cy) { if ((x < 0) || (x >= width) || (y < 0) || (y >= height)) return; if ((cx < -2.0) || (cx > 0.5) || (cy < -1.2) || (cy > 1.2)) return; if (getpixel(screen, x, y) != p_void) return; f_do_disk(x, y, cx, cy); } void f_do_disk (int x, int y, ldouble cx, ldouble cy) { int radius, aa_x, aa_y, aa_count; double f_radius, dist, aa_cx, aa_cy; dist = f_calc_dist(cx, cy); if (dist < min_disk_radius*f_pix_size) { aa_count = 0; aa_cx = cx; for (aa_x = 0; aa_x < aa_degree; ++aa_x) { aa_cy = cy; for (aa_y = 0; aa_y < aa_degree; ++aa_y) { if ((aa_x != 0) || (aa_y != 0)) dist = f_calc_dist(aa_cx, aa_cy); if (dist < edge_width*f_pix_size) ++aa_count; aa_cy += f_aa_size; } aa_cx += f_aa_size; } aa_count*=63; aa_count/=(aa_degree*aa_degree); putpixel(screen, x, y, p_greys+aa_count); if (symmetry_line != -1) putpixel(screen, x, symmetry_line - y, p_greys+aa_count); } else { f_radius = dist/f_pix_size; if (f_radius > (double)width) f_radius = width; radius = f_radius; circlefill(screen, x, y, radius, p_greys); if (symmetry_line != -1) circlefill(screen, x, symmetry_line - y, radius, p_greys); if (radius >= recursive_disk_radius) { f_do_disk_1(x-radius-1, y, cx-(radius+1)*f_pix_size, cy); f_do_disk_1(x+radius+1, y, cx+(radius+1)*f_pix_size, cy); f_do_disk_1(x, y-radius-1, cx, cy+(radius+1)*f_pix_size); f_do_disk_1(x, y+radius+1, cx, cy-(radius+1)*f_pix_size); } } } double f_calc_dist (ldouble pcx, ldouble pcy) { ldouble *x1, *y1; ldouble x = 0.0; ldouble y = 0.0; ldouble x2 = 0.0; ldouble y2 = 0.0; ldouble xp, yp, t; int iters, i2; bool escaped = 0; ldouble cx = pcx + f_jitter(); ldouble cy = pcy + f_jitter(); if (f_check_known_components(cx, cy)) return 0.5*(edge_width + min_disk_radius)*f_pix_size; if (do_per) return f_calc_dist_per(cx, cy); x1 = f_x_orbit; y1 = f_y_orbit; for (iters = 0; iters < max_iters; ++iters) { y = 2.0*x*y + cy; x = x2 - y2 + cx; x2 = x*x; y2 = y*y; (*x1) = x; (*y1) = y; ++x1; ++y1; if ((x2 + y2) > max_radius_squared) { escaped = 1; break; } } if (escaped) { xp = 1.0; yp = 0.0; x1 = f_x_orbit; y1 = f_y_orbit; for (i2 = 0; i2 < iters; ++i2) { t = 2.0*((*x1)*xp - (*y1)*yp) + 1.0; yp = 2.0*((*x1)*yp + (*y1)*xp); xp = t; ++x1; ++y1; } t = sqrt(x2 + y2); return t*log(t)*0.5/sqrt(xp*xp + yp*yp); } do_per = 1; return 0.5*(edge_width + min_disk_radius)*f_pix_size; } double f_calc_dist_per (ldouble cx, ldouble cy) { ldouble *x1, *y1; ldouble x = 0.0; ldouble y = 0.0; ldouble x2 = 0.0; ldouble y2 = 0.0; ldouble pcx = 0.0; ldouble pcy = 0.0; ldouble pcx2 = 0.0; ldouble pcy2 = 0.0; ldouble xz, yz, xc, yc, xzz, yzz, xcz, ycz, xp, yp, t, u, z; int iters, i2, pi; bool even = 1; bool escaped = 0; bool periodic = 0; x1 = f_x_orbit; y1 = f_y_orbit; for (iters = 0; iters < max_iters; ++iters) { y = 2.0*x*y + cy; x = x2 - y2 + cx; x2 = x*x; y2 = y*y; (*x1) = x; (*y1) = y; ++x1; ++y1; even = 1 - even; if (even) { pcy = 2.0*pcx*pcy + cy; pcx = pcx2 - pcy2 + cx; pcx2 = pcx*pcx; pcy2 = pcy*pcy; } if ((fabs(x - pcx) < per_width*f_aa_size) && (fabs(y - pcy) < per_width*f_aa_size)) { periodic = 1; break; } if ((x2 + y2) > max_radius_squared) { escaped = 1; break; } } if (periodic) { // Find start of period. --x1; --y1; // Point back to x and y. for (pi = iters - 1; pi > 0; --pi) { --x1; --y1; if ((fabs((*x1) - x) < per_width*f_aa_size) && (fabs((*y1) - y) < per_width*f_aa_size)) break; } // Now pi is such that pi...iters-1 is one repetition of the cycle, and iters-pi is its period. Also, x1 and y1 now // point at z_{pi}. xz = 1.0; yz = 0.0; xc = 0.0; yc = 0.0; xzz = 0.0; yzz = 0.0; xcz = 0.0; ycz = 0.0; for (i2 = pi; i2 < iters; ++i2) { t = 2.0*(xz*xz - yz*yz + (*x1)*xzz - (*y1)*yzz); yzz = 2.0*(2.0*xz*yz + (*x1)*yzz + (*y1)*xzz); xzz = t; t = 2.0*(xz*xc - yz*yc + (*x1)*xcz - (*y1)*ycz); ycz = 2.0*(xz*yc + yz*xc + (*x1)*ycz + (*y1)*xcz); xcz = t; t = 2.0*((*x1)*xz - (*y1)*yz); yz = 2.0*((*x1)*yz + (*y1)*xz); xz = t; t = 2.0*((*x1)*xc - (*y1)*yc) + 1.0; yc = 2.0*((*x1)*yc + (*y1)*xc); xc = t; ++x1; ++y1; } t = 1.0 - xz; u = -yz; z = t*t + u*u; x = (xc*t + yc*u)/z; y = (yc*t - xc*u)/z; xp = xcz + xzz*x - yzz*y; yp = ycz + xzz*y + yzz*x; return 0.25*(1.0 - xz*xz - yz*yz)/sqrt(xp*xp + yp*yp); } else if (escaped) { do_per = 0; xp = 1.0; yp = 0.0; x1 = f_x_orbit; y1 = f_y_orbit; for (i2 = 0; i2 < iters; ++i2) { t = 2.0*((*x1)*xp - (*y1)*yp) + 1.0; yp = 2.0*((*x1)*yp + (*y1)*xp); xp = t; ++x1; ++y1; } t = sqrt(x2 + y2); return t*log(t)*0.5/sqrt(xp*xp + yp*yp); } return 0.5*(edge_width + min_disk_radius)*f_pix_size; } ldouble f_jitter (void) { return ((((ldouble)(random()))/((ldouble)(MAXINT))) - 0.5)*f_aa_size; } bool f_check_known_components (ldouble cx, ldouble cy) { ldouble t = cx + 1; ldouble u, v, w, a, m; if ((t*t + cy*cy) <= 0.0625) return 1; t = 1 - 4*cx; u = -4*cy; a = atan2(u, t)/2; m = sqrt(sqrt(t*t + u*u)); v = 1 + m*cos(a); w = m*sin(a); if ((v*v + w*w) <= 1) return 1; v = 2 - v; if ((v*v + w*w) <= 1) return 1; return 0; } -- Bill Gates: "No computer will ever need more than 640K of RAM." -- 1980 "There's nobody getting rich writing software that I know of." -- 1980 "This antitrust thing will blow over." -- 1998 Combine neo, an underscore, and one thousand sixty-one to make my hotmail addy.