Re: [LINK] Faster RNG

From: d. hall (dhall@OOI.NET)
Date: 07/14/98


>>>>>> thus on Fri, 10 Jul 1998 16:24:26 -0500, Elijah wrote:

[editted from original draft due to > 200 lines, source may have typos]
Well to beat a dead horse...

> well, yes and no.  if it isn't maintainable, then it is junk.  this kind
> of lines up with the typical security adage...  "security by obscurity
> isn't."  the math may be voodoo, but the code that implements it
> certainly doesn't have to be.

It's mainly voodoo due to the properities involved that allow it to take
advantage of modern processors.

"The generator is implemented to generate the output only by fastest
 arithmetic operations: no division, no multiplication. By generating an
 array at one time, it takes the full advantage of cache memory and
 pipeline processing."

It's impressive when using a native compiler that take full advantage of
the processor at hand.

Given the following code snippet comparing the twister to circle's random,
you'll notice a nearly 66% increase in speed (this is on a SGI O2 running
IRIX 6.3 using SGI's native IDO compiler, N32 mode).  Compiling with -O3
derives a larger difference, nearly 50% of the original.  I'm sure
egcs/pgcc on a pentium running linux can probably duplicate these results,
YMMV, people can and do lose money...

(wark)~/src/twister: cc -n32 -O2 random.c                                2:14PM
(wark)~/src/twister: ./a.out                                             2:14PM
5:297509
2:803568

(wark)~/src/twister: cc -n32 -O3 random.c                                2:17PM
(wark)~/src/twister: ./a.out                                             2:18PM
5:242571
2:645194

As a comment to a previous poster in regards to code size, and/or
instruction sets.  The size of the code does not determine the code speed,
especially with the ways modern processors handle pipelining.

Please note, I got even better results with Shawn Cokus's version, the
stats taken above were the "worst test runs".  Also note, this was on a
"virgin" machine, that wasn't running any other "CPU intensive" process at
the same time (apache and mysql don't count as CPU intensive).  The twister
algorithm, much like bzip2, seems to be CPU intensive, and heavy loads can
and do influence it's running speed.

Comments/licenses have been stripped due to posting considerations, do not
consider these to be distributions, they are not.  If you wish to
distribute or use, please use the original source with all licensing and/or
comments intact.

d.
---
#include <stdio.h>
#include <sys/time.h>

#define m  (unsigned long)2147483647
#define q  (unsigned long)127773

#define a (unsigned int)16807
#define r (unsigned int)2836

static unsigned long seed;

/* local functions */
void set_crand (unsigned long initial_seed);
unsigned long crand (void);

void set_crand (unsigned long initial_seed) {
    seed = initial_seed;
}

unsigned long crand (void) {
   register int lo, hi, test;

   hi   = seed/q;
   lo   = seed%q;

   test = a*lo - r*hi;

   if (test > 0)
      seed = test;
   else
      seed = test+ m;

   return seed;
}

/* Period parameters */
#define N 624
#define M 397
#define MATRIX_A 0x9908b0df   /* constant vector a */
#define UPPER_MASK 0x80000000 /* most significant w-r bits */
#define LOWER_MASK 0x7fffffff /* least significant r bits */

/* Tempering parameters */
#define TEMPERING_MASK_B 0x9d2c5680
#define TEMPERING_MASK_C 0xefc60000
#define TEMPERING_SHIFT_U(y)  (y >> 11)
#define TEMPERING_SHIFT_S(y)  (y << 7)
#define TEMPERING_SHIFT_T(y)  (y << 15)
#define TEMPERING_SHIFT_L(y)  (y >> 18)

static unsigned long mt[N]; /* the array for the state vector  */
static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */

/* initializing the array with a NONZERO seed */
void set_trand (unsigned long seed)
{
   mt[0]= seed & 0xffffffff;
   for (mti=1; mti<N; mti++)
      mt[mti] = (69069 * mt[mti-1]) & 0xffffffff;
}

unsigned long trand (void) {
   unsigned long y;
   static unsigned long mag01[2]={0x0, MATRIX_A};

   if (mti >= N) { /* generate N words at one time */
      int kk;

      if (mti == N+1)   /* if sgenrand() has not been called, */
         set_trand (4357); /* a default initial seed is used   */

      for (kk=0;kk<N-M;kk++) {
         y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
         mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1];
      }
      for (;kk<N-1;kk++) {
         y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
         mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1];
      }
      y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
      mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1];

      mti = 0;
   }

   y = mt[mti++];
   y ^= TEMPERING_SHIFT_U(y);
   y ^= TEMPERING_SHIFT_S(y) & TEMPERING_MASK_B;
   y ^= TEMPERING_SHIFT_T(y) & TEMPERING_MASK_C;
   y ^= TEMPERING_SHIFT_L(y);

   return y;
}

void time_diff (struct timeval *begin, struct timeval *end)
{
   if (end->tv_usec < begin->tv_usec) {
      end->tv_usec += 1000000L;
      end->tv_sec -= 1;
   }
   printf ("%ld.%ld\n", (end->tv_sec - begin->tv_sec),
           (end->tv_usec - begin->tv_usec));
}

main() {
   unsigned long i;
   struct timeval begin, end;

   set_crand (4357);
   gettimeofday (&begin);
   for (i = 0L; i < 10000000L; i++)
      crand ();

   gettimeofday (&end);
   time_diff (&begin, &end);
   set_trand (4357);
   gettimeofday (&begin);
   for (i = 0L; i < 10000000L; i++)
      trand ();

   gettimeofday (&end);
   time_diff (&begin, &end);
}


     +------------------------------------------------------------+
     | Ensure that you have read the CircleMUD Mailing List FAQ:  |
     | http://democracy.queensu.ca/~fletcher/Circle/list-faq.html |
     +------------------------------------------------------------+



This archive was generated by hypermail 2b30 : 12/15/00 PST