JSOC_Documentation: proj/mag/ident/libs/mexfunctions/mrf

00001 /***************************************************************
00002 
00003 turmon nov 2006
00004 
00005 Routines for generating a geometrically-distributed clock,
00006 where we do not care if the clock is larger than M.
00007 That is, if the return value is greater than M, the routine
00008 can choose to not compute it exactly, and return any number
00009 larger than M.
00010 
00011 tested new_clock in all three regimes: low q, low p, and moderate
00012 p, corresponding to its three cases.  
00013 In the low-q case, I tried q=0.7, 0.5, 0.1, and 0.01 with good 
00014 agreement between method 0 and method 2, versus the true density.
00015 In the low p case, I used 
00016 q = 1-p = 0.999, M=17, n=1e8, so M*p=0.017 < 1.75e-2, while giving
00017 good contrast between Pr(clk == 1) and Pr(clk == M) as estimated
00018 over n samples (ie, 10.0e4 versus 9.84e4 counts, a difference of 
00019 1600 counts in a quantity with standard deviation about 314 counts.
00020 I also did a rough check at q=0.999999, but there is no appreciable
00021 variation of the density across clock value there, so I was not able
00022 to validate the shape of the PDF.
00023 In the moderate-p case, I used q=0.9.  This code is identical for
00024 all "mode" inputs, so there's not much to check.
00025 
00026 ****************************************************************/
00027 
00028 /*
00029  * this code needs:
00030  *   typedef for Clock, currently unsigned
00031  *   maximum value CLOCK_MAX (for type Clock), currently UINT_MAX
00032  */
00033 
00034 /*
00035  * get a new clock number based on a geometric(1-q) random variable
00036  *
00037  * this is based on the dead-simple inversion method for the
00038  * exponential random variable, and then quantizing it
00039  *
00040  */
00041 static
00042 Clock
00043 new_clock_easy(double q, int M)
00044 {
00045   double rng_uniform();
00046   double clock; /* need the range */
00047 
00048   clock = log(rng_uniform())/log(q) + 1; /* initial choice */
00049   /* fix it up if needed */
00050   if (clock > CLOCK_MAX)
00051     clock = CLOCK_MAX; 
00052   return((Clock) floor(clock));
00053 }
00054 
00055 /*
00056  * get a new clock number based on a geometric(1-q) random variable
00057  *
00058  * this is the older version of the new_clock routine, which I 
00059  * replaced in 2006.  It special-cases the low and high q values
00060  * with deterministic values.  Because this is a crude way to do
00061  * the approximation, only very extreme values are special-cased.
00062  */
00063 static
00064 Clock
00065 new_clock_orig(double q, int M)
00066 {
00067   double rng_uniform();
00068   double clock; /* need the range */
00069 
00070   /* Cheat a little here by handling interval edges, which occur often,
00071    * as special cases, without drawing a new random variable.
00072    * This is somewhat speedier.  Size of edges is 10^-8 on each
00073    * side.
00074    * The deterministic approximation only fails on either endpoint
00075    * with probability about 10^-8, so even if all pixels are approximated,
00076    * only about 2 pixels in a 10K x 10K image will be in error.
00077    */
00078   if (q < 1e-8)
00079     clock = 1.0;
00080   else if (q > (1 - 1e-8)) /* -> log(p) < 1e-8 in magnitude by Taylor */
00081     clock = CLOCK_MAX;
00082   else {
00083     clock = log(rng_uniform())/log(q) + 1; /* initial choice */
00084     /* fix it up if needed */
00085     if (clock > CLOCK_MAX)
00086       clock = CLOCK_MAX; 
00087     /* NB, this approximation is compensated for by the "staleness"
00088      * checks within the main loop, that expire all clocks before 
00089      * CLOCK_MAX iterations.  There is no loss of precision from
00090      * this source.
00091      */
00092   }
00093   return((Clock) floor(clock));
00094 }
00095 
00096 /*
00097  * Get a new draw X of a random clock 
00098  * This is a geometric(p) random variable, where p = 1-q.
00099  * That is, the hold probability is q, and the change probability is p,
00100  * so X is formed by sequences like:
00101  *
00102  * X = 1: C    Pr = p
00103  * X = 2: HC   Pr = q p
00104  * X = 3: HHC  Pr = q^2 p
00105  * X = 4: HHHC Pr = q^3 p
00106  * etc.
00107  * 
00108  * In general, Pr(X=k) = p q^k for k=1,...
00109  * And, P(X>k) = q^k, because all such events start out with k "H" letters
00110  * 
00111  * We special-case the low and high q values to avoid the rather expensive
00112  * double log() required by the inversion method.  The important special
00113  * case is for q near 1, because it is most common.  This case requires
00114  * some careful reasoning.  The less-common special case is for small
00115  * q, where we just mean q less than 0.5 or so.  In this case the
00116  * geometric RV is easily simulated by implementing the Hold/Change
00117  * sequences illustrated above.
00118  */
00119 static
00120 Clock
00121 new_clock(double q, int M)
00122 {
00123   double rng_uniform();
00124   const double p = 1-q;
00125   double x; /* the random variable: need the range */
00126 
00127   if (q < 0.75) {
00128     /* CASE 1.  q small, or hold time is low.
00129      * Draw by the iteration method.
00130      * The loop below accumulates events of probability q.
00131      */
00132     /* The constant in the test above affects run time (by
00133      * selecting the fastest generation method) but not accuracy.  
00134      * At q = 0.75, on average we draw 1/(1-q) = 4 uniforms, 
00135      * about equal to the one uniform plus two logs of the inversion
00136      * method below in CASE 3 (G5, 11/2006).  At q=0.2, the code
00137      * below is about 2.2x faster than inversion.  At q=0.01, it
00138      * is 3x faster.  But note, this is not the dominant case.  */
00139     for (x = 1; rng_uniform() < q && x<=M; x++);
00140   } else if (M*p < 1.75e-2) {
00141     /* CASE 2.  q near 1, or p near 0.
00142      * Hold time is high relative to number of iterations
00143      * remaining; typically will return a value X > M.
00144      * This is the most frequent case in our application.
00145      * The code below is a factor of 2.5x faster than the default
00146      * inversion method on a G5.
00147      */
00148     /* Below, we take one of two branches with probability
00149      * Pr(X>M) = pow(1-p,M) versus Pr(X<=M).
00150      * In the former, overwhelmingly probable case, just return
00151      * CLOCK_MAX.  In the latter, generate X conditioned on X<=M.
00152      * 
00153      * Because we cannot quickly compute pow(1-p,M), we approximate it
00154      * with the first two terms of its binomial expansion:
00155      *   P(X>M) = (1-p)^M ~= 1 - M * p + M(M-1)/2 * p^2
00156      * For M*p < 1.75e-2, the relative accuracy 
00157      *   A(M,p) = [ 1 - M * p + M(M-1)/2 * p^2 ] / [ (1-p)^M ]
00158      * is better than 1 part in 10^6.  For M*p < 3.5e-3, the accuracy
00159      * is better than 1 part in 10^8.
00160      * The test below replaces 
00161      *   rng_uniform() < 1-rho 
00162      * with its equivalent,
00163      *   rng_uniform() > rho
00164      * where rho = M * p - M(M-1)/2 * p^2
00165      *           = M*p * (1 - 0.5 * (M-1) * p)
00166      *           = M*p * (1 - 0.5 * (M * p - p) )
00167      */
00168     if (rng_uniform() > M*p*(1 - 0.5*(M*p-p))) {
00169       /* CASE 2a.  Generate X | X>M, which is deterministic. */
00170       /* This branch is taken with probability approximately P(X>M),
00171        * and returns X conditioned on X > M.  We almost always
00172        * take this branch since M*p << 1.  */
00173       x = CLOCK_MAX;
00174     } else {
00175       /* CASE 2b.  Generate X | X<=M */
00176       /* This branch is taken with probability approximately P(X<=M),
00177        * and returns X conditioned on X <= M.  Censoring the 
00178        * realizations with X>M would be one way.  But it's easier to 
00179        * note that, if X is geometric, then (X-1) % M + 1 is geometric
00180        * conditioned on X<=M.  We produce X-1 by taking floor
00181        * and not ceil in the log-of-uniform below.
00182        * We almost never take this branch, so its efficiency is moot. */
00183       x = fmod(floor(log(rng_uniform())/log(q)), (double) M) + 1;
00184     }
00185   } else {
00186     /* CASE 3. Hold probability is in between: draw by inversion method. */
00187     x = floor(log(rng_uniform())/log(q)) + 1; /* initial choice */
00188     /* don't overflow the clock size; note, CLOCK_MAX >= M anyway */
00189     if (x > CLOCK_MAX)
00190       x = CLOCK_MAX; 
00191   }
00192   // mexPrintf("\tp = %g\tclock=%u\n", p, (Clock) x);
00193   // note, x is already an integer > 0
00194   return((Clock) x);
00195 }
00196
proj/mag/ident/libs/mexfunctions/mrf_newclock.h