/* Wallace generator for normally distributed random variates Copyright: Holger Vogt, 2008 */ //#define FASTNORM_ORIG #ifdef HasMain #include #else #ifndef NOSPICE #include "ngspice/ngspice.h" #endif #endif #ifdef _MSC_VER #include #define getpid _getpid #else #include #endif #include #include "ngspice/wallace.h" #include "ngspice/FastNorm3.h" #define POOLSIZE 4096 #define LPOOLSIZE 12 #define NOTRANS 3 /* number of (dual) transformations */ #define VE 10 #define VL (1 << VE) #define VM (VL-1) #define WL (4*VL) #define WM (WL-1) double *outgauss; /* output vector for user access */ unsigned int variate_used; /* actual index of variate called by user */ double ScaleGauss; static double *pool1; static double *pool2; static unsigned int *addrif, *addrib; static unsigned n = POOLSIZE; static double chi1, chi2; /* chi^2 correction values */ static unsigned int newpools; extern double drand(void); extern unsigned int CombLCGTausInt(void); extern void TausSeed(void); extern unsigned int CombLCGTausInt2(void); void PolarGauss(double* py1, double* py2) { double x1, x2, w; do { x1 = drand(); x2 = drand(); w = x1 * x1 + x2 * x2; } while ((w > 1.0) || (w < 0.25)); w = sqrt((-2.0 * log(w)) / w); *py1 = (double)(x1 * w); *py2 = (double)(x2 * w); } static void destroy_wallace(void) { tfree(pool1); tfree(pool2); tfree(addrif); tfree(addrib); } void initw(void) { unsigned i; double totsqr, nomsqr; unsigned long int coa, cob, s; /* initialize the uniform generator */ srand((unsigned int) getpid()); // srand(17); TausSeed(); ScaleGauss = 1.; newpools = 1; /* set up the two pools */ pool1 = TMALLOC(double, n); pool2 = TMALLOC(double, n); addrif = TMALLOC(unsigned int, (n + NOTRANS)); addrib = TMALLOC(unsigned int, (n + NOTRANS)); atexit(destroy_wallace); /* fill the first pool with normally distributed values */ PolarGauss(&pool1[0], &pool1[1]); for (i = 1; i < n>>1; i++) PolarGauss(&pool1[i<<1], &pool1[(i<<1) + 1]); /* normalize pool content */ /* totsqr = totsum = 0.0; * for (i = 0; i < n; i++) { * totsqr += pool1[i] * pool1[i]; * totsum += pool1[i]; * } * totsum = totsum/n; * for (i = 0; i < n; i++) { * totsqr += (pool1[i] - totsum) * (pool1[i] - totsum); * } * nomsqr = sqrt(n / totsqr); * for (i = 0; i < n; i++) * pool1[i] = (pool1[i] - totsum) * nomsqr; */ totsqr = 0.0; for (i = 0; i < n; i++) totsqr += pool1[i] * pool1[i]; nomsqr = sqrt(n / totsqr); for (i = 0; i < n; i++) pool1[i] *= nomsqr; /* calculate ch^2 value */ chi1 = sqrt(sqrt(1.0 - 1.0/n)); chi2 = sqrt(1.0 - chi1*chi1); /* first scaling, based on unused pool1[n-2] */ ScaleGauss = chi1 + chi2 * ScaleGauss * pool1[n-2]; /* access to first pool */ outgauss = pool1; /* set data counter, we return n-2 values here */ variate_used = n - 2; /* generate random reading addresses using a LCG */ s = 0; coa = 241; cob = 59; for (i = 0; i < (n + NOTRANS); i++) { // addrif[i] = s = (s * coa + cob) % (n); coa = CombLCGTausInt(); addrif[i] = coa >> (32 - LPOOLSIZE); // printf ("Random add:\t%ld\n" , s); } s = 0; coa = 193; cob = 15; for (i = 0; i < (n + NOTRANS); i++) { // addrib[i] = s = (s * coa + cob) % (n); coa = CombLCGTausInt(); addrib[i] = coa >> (32 - LPOOLSIZE); // printf ("Random add:\t%ld\n" , addrib[i]); } // printf("norm for orig. Gauss: %e, chi^2 scale: %e\n", nomsqr, ScaleGauss); // NewWa(); } /* original FastNorm3.c code */ #ifdef FASTNORM_ORIG float NewWa() { int i, j, k, m; float p, q, r, s, t; int topv[6], ord[4], *top; float *ppt[4], *ptn; float nulval, endval; float totsqr, nomsqr; nulval = ScaleGauss * pool1[0]; endval = pool1[n-1]; /* Choose 4 random start points in the wk1[] vector I want them all different. */ top = topv + 1; /* Set limiting values in top[-1], top[4] */ top[-1] = VL; top[4] = 0; reran1: m = CombLCGTausInt(); /* positive 32-bit random */ /* Extract two VE-sized randoms from m, which has 31 useable digits */ m = m >> (31 - 2*VE); top[0] = m & VM; m = m >> VE; top[1] = m & VM; m = CombLCGTausInt(); /* positive 32-bit random */ /* Extract two VE-sized randoms from m, which has 31 useable digits */ m = m >> (31 - 2*VE); top[2] = m & VM; m = m >> VE; top[3] = m & VM; for (i = 0; i < 4; i++) ord[i] = i; /* Sort in decreasing size */ for (i = 2; i >= 0; i--) for (j = 0; j <= i; j++) if (top[j] < top[j+1]) { SWAP(int, top[j], top[j+1]); SWAP(int, ord[j], ord[j+1]); } /* Ensure all different */ for (i = 0; i < 3; i++) if (top[i] == top[i+1]) goto reran1; /* Set pt pointers to their start values for the first chunk. */ for (i = 0; i < 4; i++) { j = ord[i]; ppt[j] = pool2 + j * VL + top[i]; } /* Set ptn to point into wk1 */ ptn = pool1; /* Now ready to do five chunks. The length of chunk i is top[i-1] - top[i] (I hope) At the end of chunk i, pointer ord[i] should have reached the end of its part, and need to be wrapped down to the start of its part. */ i = 0; chunk: j = top[i] - top[i-1]; /* Minus the chunk length */ for (; j < 0; j++) { p = *ptn++; s = *ptn++; q = *ptn++; r = *ptn++; t = (p + q + r + s) * 0.5; *ppt[0]++ = t - p; *ppt[1]++ = t - q; *ppt[2]++ = r - t; *ppt[3]++ = s - t; } /* This should end the chunk. See if all done */ if (i == 4) goto passdone; /* The pointer for part ord[i] should have passed its end */ j = ord[i]; #ifdef dddd printf ("Chunk %1d done. Ptr %1d now %4d\n", i, j, ppt[j]-pool2); #endif ppt[j] -= VL; i++; goto chunk; passdone: /* wk1[] values have been transformed and placed in wk2[] Transform from wk2 to wk1 with a simple shuffle */ m = (CombLCGTausInt2() >> (29 - VE)) & WM; j = 0; for (i = 0; i < 4; i++) ppt[i] = pool1 + i * VL; for (i = 0; i < VL; i++) { p = pool2[j^m]; j++; s = pool2[j^m]; j++; q = pool2[j^m]; j++; r = pool2[j^m]; j++; t = (p + q + r + s) * 0.5; *ppt[0]++ = t - p; *ppt[1]++ = q - t; *ppt[2]++ = t - r; *ppt[3]++ = s - t; } /* renormalize again if number of pools beyond limit */ if (!(newpools & 0xFFFF)) { totsqr = 0.0; for (i = 0; i < n; i++) totsqr += pool1[i] * pool1[i]; nomsqr = sqrt(n / totsqr); for (i = 0; i < n; i++) pool1[i] *= nomsqr; } outgauss = pool1; /* reset data counter */ variate_used = n - 1; /* set counter counting nomber of pools made */ newpools++; /* new scale factor using ch^2 correction, using pool1[n-1] from last pool */ ScaleGauss = chi1 + chi2 * ScaleGauss * endval; // printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss); return nulval; /* use old scale */ } #else /* Simplified code according to an algorithm published by C. S. Wallace: "Fast Pseudorandom Generators for Normal and Exponential Variates", ACM Transactions on Mathmatical Software, Vol. 22, No. 1, March 1996, pp. 119-127. Transform pool1 to pool2 and back to pool1 NOTRANS times by orthogonal 4 x 4 Hadamard-Matrix. Mixing of values is very important: Any value in the pool should contribute to every value in the new pools, at least after several passes (number of passes is set by NOTRANS to 2 or 3). 4 values are read in a continuous sequence from the total of POOLSIZE values. Values are stored in steps modulo POOLSIZE/4. During backward transformation the values are shuffled by a random number jj. */ double NewWa(void) { double nulval, endval; double bl1, bl2, bl3, bl4; /* the four values to be transformed */ double bsum; double totsqr, nomsqr; unsigned int i, j, jj, m, mm, mmm; nulval = ScaleGauss * pool1[0]; endval = pool1[n-1]; m = n >> 2; // printf("New pool after next value\n"); /* generate new pool by transformation Transformation is repeated NOTRANS times */ for (i = 0; i < NOTRANS; i++) { mm = m << 1; mmm = mm + m; /* forward transformation */ // for (j = 0; j < n; j += 4) { for (j = 0; j < m; j++) { bl1 = pool1[j]; bl2 = pool1[j+m]; bl3 = pool1[j+mm]; bl4 = pool1[j+mmm]; /* Hadamard-Matrix */ bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f; jj = j<<2; pool2[jj] = bl1 - bsum; pool2[jj+1] = bl2 - bsum; pool2[jj+2] = bsum - bl3; pool2[jj+3] = bsum - bl4; } /* backward transformation */ jj = (CombLCGTausInt2() >> (31 - LPOOLSIZE)) & (n - 1); for (j = 0; j < m; j++) { bl1 = pool2[j^jj]; bl2 = pool2[(j+m)^jj]; bl3 = pool2[(j+mm)^jj]; bl4 = pool2[(j+mmm)^jj]; /* Hadamard-Matrix */ bsum = (bl1 + bl2 + bl3 + bl4) * 0.5f; jj = j<<2; pool1[jj] = bl1 - bsum; pool1[jj+1] = bl2 - bsum; pool1[jj+2] = bsum - bl3; pool1[jj+3] = bsum - bl4; } } /* renormalize again if number of pools beyond limit */ if (!(newpools & 0xFFFF)) { totsqr = 0.0; for (i = 0; i < n; i++) totsqr += pool1[i] * pool1[i]; nomsqr = sqrt(n / totsqr); for (i = 0; i < n; i++) pool1[i] *= nomsqr; } outgauss = pool1; /* reset data counter */ variate_used = n - 1; /* set counter counting nomber of pools made */ newpools++; /* new scale factor using ch^2 correction, using pool1[n-1] from previous pool */ ScaleGauss = chi1 + chi2 * ScaleGauss * endval; // printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss); return nulval; /* use old scale */ // return pool1[0]; /* use new scale */ } #endif #ifdef FASTNORMTEST float NewWa_not(void) { float nulval, endval; float bl1, bl2, bl3, bl4; /* the four values to be transformed */ float bsum; float totsqr, nomsqr; unsigned int i, j, jj; nulval = ScaleGauss * pool1[0]; endval = pool1[n-1]; // printf("New pool after next value\n"); /* generate new pool by transformation Transformation is repeated NOTRANS times */ for (i = 0; i < NOTRANS; i++) { /* forward transformation */ for (j = 0; j < n; j += 4) { jj = j + i; bl1 = pool1[addrif[jj]]; bl2 = pool1[addrif[jj+1]]; bl3 = pool1[addrif[jj+2]]; bl4 = pool1[addrif[jj+3]]; /* s = (s*coa + cob) & (n - 1); bl1 = pool1[s]; s = (s*coa + cob) & (n - 1); bl2 = pool1[s + 1]; s = (s*coa + cob) & (n - 1); bl3 = pool1[s + 2]; s = (s*coa + cob) & (n - 1); bl4 = pool1[s + 3]; */ /* jj = j + i; bl1 = pool1[addrif[jj]]; bl2 = pool1[addrif[jj+1]]; bl3 = pool1[addrif[jj+2]]; bl4 = pool1[addrif[jj+3]]; */ /* bl1 = pool1[j]; bl2 = pool1[j+1]; bl3 = pool1[j+2]; bl4 = pool1[j+3]; */ /* Hadamard-Matrix */ bsum = (bl1 + bl2 + bl3 + bl4) * 0.5; /* pool2[j] = bl1 - bsum; pool2[j+1] = bl2 - bsum; pool2[j+2] = bsum - bl3; pool2[j+3] = bsum - bl4; */ pool2[addrib[jj]] = bl1 - bsum; pool2[addrib[jj+1]] = bl2 - bsum; pool2[addrib[jj+2]] = bsum - bl3; pool2[addrib[jj+3]] = bsum - bl4; } /* backward transformation */ for (j = 0; j < n; j += 4) { bl1 = pool2[j]; bl2 = pool2[j+1]; bl3 = pool2[j+2]; bl4 = pool2[j+3]; /* bl1 = pool2[addrib[j]]; bl2 = pool2[addrib[j+1]]; bl3 = pool2[addrib[j+2]]; bl4 = pool2[addrib[j+3]]; */ /* Hadamard-Matrix */ bsum = (bl1 + bl2 + bl3 + bl4) * 0.5; pool1[j] = bl1 - bsum; pool1[j+1] = bl2 - bsum; pool1[j+2] = bsum - bl3; pool1[j+3] = bsum - bl4; } } /* renormalize again if number of pools beyond limit */ if (!(newpools & 0xFFFF)) { totsqr = 0.0; for (i = 0; i < n; i++) totsqr += pool1[i] * pool1[i]; nomsqr = sqrt(n / totsqr); for (i = 0; i < n; i++) pool1[i] *= nomsqr; } outgauss = pool1; /* reset data counter */ variate_used = n - 1; /* set counter counting nomber of pools made */ newpools++; /* new scale factor using ch^2 correction, using pool1[n-1] from last pool */ ScaleGauss = chi1 + chi2 * ScaleGauss * endval; // printf("Pool number: %d, chi^2 scale: %e\n", newpools, ScaleGauss); return nulval; /* use old scale */ // return pool1[0]; /* use new scale */ } #endif /* --------------------- (test) main ------------------------- */ /* gcc -Wall -g -DHasMain -I../../include wallace.c CombTaus.o -o watest.exe */ #ifdef HasMain #include "ngspice/wallace.h" struct timeb timenow; struct timeb timebegin; int sec, msec; void timediff(struct timeb *now, struct timeb *begin, int *sec, int *msec) { *msec = now->millitm - begin->millitm; *sec = now->time - begin->time; if (*msec < 0) { *msec += 1000; (*sec)--; } } int main() { float x; unsigned int i; long int count; initw(); ftime(&timebegin); count = 100000000; for (i = 0; i < count; i++) { x = GaussWa; // printf("%d\t%f\n", i, x); } ftime(&timenow); timediff(&timenow, &timebegin, &sec, &msec); printf("WallaceHV: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0); initnorm(0, 0); initnorm(77, 3); ftime(&timebegin); count = 100000000; for (i = 0; i < count; i++) { x = FastNorm; // printf("%d\t%f\n", i, x); } ftime(&timenow); timediff(&timenow, &timebegin, &sec, &msec); printf("FastNorm3: %ld normal variates: %f s\n", count, sec + (float) msec / 1000.0); return (1); } #endif