JSOC_Documentation: proj/mag/ident/libs/util/num

00001 /*
00002  * num_strings.c
00003  * 
00004  * generally useful utilities involving numbers and strings
00005  *
00006  * needs mex.h for simple type definitions
00007  *
00008  * Michael Turmon, 2002, 2010
00009  */
00010 
00011 /*LINTLIBRARY*/
00012 
00013 #include <stdlib.h>
00014 #include <stdio.h>
00015 #include <string.h>
00016 #include <math.h>
00017 #include "mex.h"         /* mwSize type, et al. */
00018 #include "mextool.h"
00019 #include "num_strings.h" /* function templates: current file */
00020 #include "ieee_consts.h" /* mxt_getnand */
00021 
00022 /**************************************************************
00023  * 
00024  * input/output of numbers, strings, and matrices 
00025  * in text form as input to shell programs, for example
00026  *
00027  * the subroutines do not need FITSIO declarations.
00028  *
00029  * they do need the simplest matlab "mwSize" typedef's
00030  *
00031  * string_to_array: converts a string representing an arbitrary-
00032  * dimensional array to the double-precision array
00033  * string_to_matrix: converts a string representing a matrix
00034  * to a double-precision matrix
00035  * matrix_to_string: converts a double-precision matrix to a 
00036  * string
00037  * convert_string_to_type: converts a string to type number
00038  * 
00039  ****************************************************************/
00040 
00041 /* do_transpose: computes transpose, similar to matlab permute.
00042  * 
00043  * x: pointer to vectorized input
00044  * y: pointer to vectorized output (which is overwritten)
00045  * D: number of dimensions
00046  * By: bounds on each coordinate in y, as in mxGetDimensions
00047  *
00048  * Return value: y is written to.  A small malloc (size 2D+1) is
00049  * made, which may fail.  If so, 0 is returned, else 1.
00050  * 
00051  * This routine is nontrivial because it works for any dimensionality.
00052  *
00053  * Its behavior may be understood by letting ix and iy be the indexes
00054  * of corresponding elements of x and y.  If cx and cy are their
00055  * base-Bx and base-By digit expansions, we can write
00056  *
00057  * ix <--> cx[0],...cx[D-1]
00058  * iy <--> cy[0],...cy[D-1]
00059  *
00060  * where cx[0] = cy[D-1], etc., ie cy is the digit-reversed version
00061  * of cx.  So if you tell me one of these four quantities, I can find
00062  * the other three.
00063  * The loop unfolds by incrementing ix by 1 each time.  There's no
00064  * need to keep track of cx.  However, cy must be maintained to 
00065  * make the proper adjustments to iy.  In particular, ix++ corresponds 
00066  * to cx[0]++, ie to cy[D-1]++, ie to iy += delta_y where 
00067  * delta_y = By[0] * ... * By[D-2].  It's also useful to define
00068  * 
00069  * pi[0] = 1
00070  * pi[d] = By[0] * ... * By[d-1] 
00071  * 
00072  * so that pi[d] is the increment in iy due to an increment by one
00073  * in cy[d].  Note that pi[D] is the number of entries in the array.
00074  * The tricky point is carries: Some increments of ix cause a carry
00075  * "upward" in cx, which ripples downward in cy.  To adjust cy for
00076  * a carry, just set one of its digits to zero, and then increment
00077  * the previous one.  In iy, this corresponds to subtracting B*pi[d]
00078  * from iy, and adding pi[d-1] back in.
00079  * 
00080  * In July 2000 (matlab 5.3/linpack and 5.3.1/lapack) this was within 10%
00081  * of matlab's own ' for large, two-dimensional matrices.
00082  *
00083  * Michael Turmon, July 2000
00084  */
00085 int
00086 do_transpose(double *x,
00087          double *y,
00088          mwSize D,
00089          mwSize *By)
00090 {
00091   mwSize d; /* dimension counter */
00092   mwSize ix, iy; /* indexes into x and y */
00093   mwSize *pi = (mwSize *) calloc(D+1, sizeof(*pi));
00094   mwSize *cy = (mwSize *) calloc(D,   sizeof(*cy));
00095 
00096   if (pi == NULL || cy == NULL) return 0; /* failure */
00097   /* initial calculations */
00098   /* NB, pi[d] stores the increment in the scalar index implied by 
00099      incrementing digit d of the coordinate vector for y */
00100   for (pi[0] = 1, d = 1; d <= D; d++)
00101     pi[d] = pi[d-1] * By[d-1]; /* partial products */
00102   /* loop over all elements */
00103   ix = iy = 0; /* start both at origin (0,0,...0) */
00104   /* NB: all digits of cy are also 0 because of calloc */
00105   while (ix < pi[D]) {
00106     /* perform an assignment */
00107     y[iy] = x[ix];
00108     /* increment coordinates */
00109     ix++;           /* inc index into x */
00110     iy += pi[D-1];  /* inc index into y (increments MSD by 1) */
00111     cy[D-1]++;      /* inc MSD of y index, counterpart of above line */
00112     /* propagate carry down towards LSD if necessary */
00113     for (d = D-1; d > 0 && cy[d] == By[d]; d--) {
00114       /* there was overflow of digit-d --> digit-(d-1) */
00115       /* first, fix cy by carrying once towards LSD */
00116       cy[d] = 0; /* reset digit d */
00117       cy[d-1]++; /* increment digit d-1 (which may in turn overflow) */
00118       /* next, fix iy */
00119       /* step 1, subtract pi[d+1] = By[d] * pi[d].  This can be viewed
00120      as resetting iy to its status before we began looping on this 
00121      digit (doing By[d] iterations, each adding pi[d] to iy).  It
00122      can also be viewed as the precise counterpart of the line above
00123      setting cy[d] from By[d] to zero. */
00124       iy -= pi[d+1]; /* NB, d <= D-1, but pi[D] is valid */
00125       /* step 2, move up to the next element in this digit's loop by
00126      adding pi[d-1] to the index.  This is the counterpart of the
00127      line above, adding one to cy[d-1]. */
00128       iy += pi[d-1]; /* NB, d > 0, so we never reach too low into pi */
00129     }
00130   }
00131   return 1;
00132 }
00133 
00134 /* 
00135  * parse an array, input as a string
00136  *
00137  * string format:
00138  * array  := [array,array,...,array]
00139  *        := matrix
00140  * matrix := [a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn]
00141  * and in particular, the "matrix" is as indicated below in string_to_matrix.
00142  * Each component "array" must be the same size of all the other ones.
00143  * So, for example, a 1x1x1 array of the constant 42 is: [[42]].
00144  * Generally, the dimensionality is 1 plus the number of opening
00145  * left brackets (thus, always 2 or greater).
00146  * Suppose the dimensions of the array are d_1 x d_2 x ... d_N.
00147  * The number of arrays in the outermost group is d_1, and the number
00148  * in the innermost group is d_N-2.  Each of the latter arrays is 
00149  * exactly a "matrix" that is read in by string_to_matrix.  Thus,
00150  * in the input string, the "d_N" dimension changes fastest, while
00151  * in the output representation, the "d_1" dimension changes fastest.
00152  * The separating commas in the expansion of the array are not necessary
00153  * and may be omitted.
00154  * 
00155  * There are many places where errors are detected.  We use a flag and a
00156  * goto to handle errors: the flag is set to indicate error at the start
00157  * of the routine, and only set to "OK" when input is validated.  Any error
00158  * condition along the way branches to the error label, where all cleanup 
00159  * is done.  The error-free cases fall through to this cleanup stage also,
00160  * but with the flag set to indicate OK.  The cleanup is just freeing of
00161  * alloc'd variables -- they are 0 if they were never alloc'd.
00162  */
00163 mwSize *
00164 string_to_array(mwSize datamax,
00165         char *s, 
00166         mwSize *dimP, 
00167         double *data) 
00168 {
00169   char *s1;          /* string position */
00170   char *s2 = 0;      /* temporary string (alloc'd) */
00171   double *data0 = 0; /* temporary for input values (alloc'd) */
00172   /* consistently use SignedIndex below because we use (-1) for flags */
00173   mwSignedIndex *dims = 0;  /* dimension size (to be returned) (alloc'd) */
00174   mwSignedIndex *dims0 = 0; /* dimension counters (alloc'd) */
00175   mwSignedIndex Nd;    /* number of dimensions */
00176   mwSignedIndex d;     /* counts dimensions */
00177   mwSignedIndex level; /* counter */
00178   mwSignedIndex m, Nm; /* number of submatrices */
00179   mwSignedIndex M, N;  /* submatrix dimensions */
00180   size_t l;            /* a string length */
00181   int convert_ok = 0;  /* assume error, only corrected if read OK */
00182 
00183   *dimP = 0; /* nonsense value */
00184   s2 = calloc(strlen(s)+1, sizeof(char)); /* guaranteed to be long enough */
00185   data0 = calloc(datamax, sizeof(double));
00186   if (!s2 || !data0) goto error; /* failed calloc */
00187   /* check for balanced brackets, and find dimensionality */
00188   for (Nd = level = 0, s1 = s; *s1; s1++) {
00189     if (*s1 == '[') level++;
00190     else if (*s1 == ']') level--;
00191     if (level < 0) goto error; /* too many ] */
00192     if (level > Nd) Nd = level; /* update depth */
00193   }
00194   /* level != 0 => unbalanced []. Nd == 0 => no brackets found */
00195   if (level != 0 || Nd == 0) goto error;
00196   Nd++; /* #dims = #brackets + 1.  Note: Nd >= 2 */
00197   /* obtain and check size */
00198   dims  = calloc((size_t) Nd, sizeof(*dims)); /* space for the size */
00199   dims0 = calloc(Nd, sizeof(*dims0)); /* counters to compare against above */
00200   if (!dims || !dims0) goto error;
00201   for (d = 0; d < Nd; d++) dims[d] = -1; /* dims = unknown */
00202   level = 0; /* use as a flag: was any non-bracket seen? */
00203   for (d = -1, s1 = s; *s1; s1++) {
00204     /* d: which level of bracket are we at (0 = outermost)
00205        dims0[d]: # bracketed groups at level d so far
00206        dims[d]:  # bracketed groups seen before at level d, -1 = don't know
00207     */
00208     if (*s1 == '[') {
00209       if (d == -1 && dims[0] != -1)
00210     goto error; /* only allow 1 element at the top level (disallow [][]) */
00211       d++;
00212       dims0[d] = 0; /* begin checking dimension d */
00213     } else if (*s1 == ']') {
00214       /* close the group at dimension d */
00215       if (dims[d] < 0)
00216     dims[d] = dims0[d]; /* no size yet: fill it in for later */
00217       if (dims[d] != dims0[d]) 
00218     goto error;  /* check the size */
00219       d--; /* move up to the enclosing group */
00220       if (d >= 0) /* NB: outermost group has d = -1 */
00221     dims0[d]++; /* we completed one more element in that group */
00222     } else if (*s1 == ',') {
00223       if (d == Nd-2)
00224     continue;  /* all commas in lowest grouping ok (check them later) */
00225       if (*(s1 - 1) == ']' && *(s1 + 1) == '[')
00226     continue; /* higher commas must appear so: ],[ */
00227       goto error; /* disallow [[1],[2],] and [,[1]] and [1], and ,[] etc. */
00228     } else {
00229       /* other characters: must be at bottom level */
00230       if (d != Nd - 2)
00231     goto error; /* other chars, not at bottom level: must be an error */
00232       level = 1; /* we did see other characters (=> non-empty matrix) */
00233     }
00234   }
00235   /* find cumulative sizes dims[0], dims[0]*dims[1], ... 
00236    * note that dims[Nd-1] and dims[Nd-2] are not known yet */
00237   for (d = 1, dims0[0] = dims[0]; d < Nd-2; d++)
00238     dims0[d] = dims[d] * dims0[d-1];
00239   /* special-case empty matrices: hard to isolate for string_to_matrix() */
00240   if (!level) {
00241     Nm = 0; /* no matrices to read */
00242     dims[Nd-1] = dims[Nd-2] = 0; /* set these up now */
00243   } else {
00244     Nm = Nd == 2 ? 1 : dims0[Nd-3]; /* number of matrices to read */
00245   }
00246   /* read in each component matrix */
00247   for (M = N = -1/* flag */, m = 0, s1 = s; m < Nm; m++) {
00248     /* skip s1 forward to first non-bracket */
00249     l = strspn(s1, "[],");
00250     s1 += l-1;  /* s1 = "[<matrix>]..." */
00251     /* extract [<matrix>] alone from s1 into s2 -- leave no trailing junk */
00252     l = strcspn(s1, "]");
00253     strncpy(s2, s1, l+1); s2[l+1] = '\0';
00254     /* read M*N elements into data starting at s1 */
00255     /* first iter: m=0 so m*M*N=0, which is good since M,N unknown then! */
00256     if (!string_to_matrix(datamax/Nm, s2, &M, &N, data0+m*M*N, 1))
00257       goto error; /* fail */
00258     /* check submatrix size */
00259     if (m == 0) {
00260       dims[Nd-1] = N; dims[Nd-2] = M; /* first submatrix sets size for later */
00261     } else {
00262       if (dims[Nd-1] != N || dims[Nd-2] != M) goto error; /* mismatch */
00263     }
00264     /* ready for next iter */
00265     l = strcspn(s1, "]");
00266     s1 += l; /* move up to ] */
00267   }
00268   /* check for junk at end of s1 */
00269   if (Nm > 0 && (strlen(s1) != strspn(s1, "]")))
00270     goto error; /* remainder must be only ]'s */
00271   /* transpose into matlab ordering */
00272   if (!do_transpose(data0, data, (mwSize) Nd, (mwSize *) dims)) goto error;
00273   *dimP = Nd; 
00274   convert_ok = 1; /* signal success */
00275  error:  /* free all memory that has been allocated, and exit */
00276   if (s2)    free(s2);
00277   if (data0) free(data0);
00278   if (dims0) free(dims0);
00279   if (dims && !convert_ok)
00280     free(dims); /* if error, should also free dims - it will be unused */
00281   /* if success: return pointer to the dimension list (it was calloc'd here) 
00282    *             cast it from (mwSignedIndex *) to (mwSize *)
00283    * if failure: return NULL 
00284    */
00285   return convert_ok ? ((mwSize *) dims) : NULL;
00286 }
00287 
00288 /*
00289  * parse a matrix, which is input as a string
00290  *
00291  * must be a <string> like:
00292  * <[a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn]>
00293  * where each a_ik = <whitespace><NUMBER><whitespace>
00294  * the whitespace can be null, and NUMBER is either
00295  * an ordinary floating-point constant or the
00296  * literal strings NaN, Inf, -Inf, which are interpreted 
00297  * as the corresponding IEEE special values.
00298  *
00299  * returns the computed size in Mp and Np, the 
00300  * numbers in data, and 0 or 1 according to 
00301  * success or failure.  The maximum number
00302  * of items allowed to convert is in datamax.
00303  */
00304 int
00305 string_to_matrix(mwSize datamax,
00306          char *s, 
00307          mwSize *Mp, 
00308          mwSize *Np, 
00309          double *data,
00310          int raw) 
00311 {
00312   char s1[MXT_IO_TMATRIX_STRLEN]; /* s without [ ] */
00313   char row1[MXT_IO_TMATRIX_STRLEN]; /* first row of s1 */
00314   char *delim, *delim2; /* placeholders */
00315   int nchar;
00316   int l = strlen(s);
00317   int row, col; /* row and column sizes */
00318   int ri, ci;   /* row, column indexing */
00319   int nscan; /* number of data read in so far */
00320 
00321   *Mp = *Np = 0;
00322   /* strip [] */
00323   if (l < 2)
00324     return 0; /* not enough chars */ 
00325   if (s[0] != '[' || s[l-1] != ']') 
00326     return 0; /* insist on no whitespace here */
00327   strcpy(s1, s+1); /* strips [ */
00328   s1[l-2] = '\0';  /* strips ] */
00329   /* find number of rows */
00330   row = 1; /* assume one row */
00331   delim = s1;
00332   while ((delim = strchr(delim, ';')) != NULL) {
00333     row++;
00334     delim++; /* advance past ; */
00335   }
00336   /* find number of columns */
00337   /* isolate the first row of s1 into row1 */
00338   for (delim2 = row1, delim = s1; *delim && *delim != ';'; )
00339     *delim2++ = *delim++;
00340   *delim2 = '\0'; /* terminate */
00341   /* now, as before with rows */
00342   col = 1; /* assume one col */
00343   delim = row1;
00344   while ((delim = strchr(delim, ',')) != NULL) {
00345     col++; 
00346     delim++; /* advance past , */
00347   }
00348   /* check for empty matrix */
00349   if (col == 1 && row == 1) {
00350     if (strspn(s1, " \t") == strlen(s1)) /* nothing but whitespace */
00351       col = 0; /* but let row = 1 still */
00352   }
00353   /* setup and check sizes */
00354   *Mp = row;
00355   *Np = col;
00356   if (col * row > datamax)
00357     return 0;
00358   /* scan entries, enforcing format */
00359   delim = s1; /* current pos in string */
00360   nscan = -1;  /* read first element */
00361   for (ri = 0; ri < row; ri++) 
00362     for (ci = 0; ci < col; ci++) {
00363       nscan = raw ? nscan + 1 : ci*row + ri; /* current target element */
00364       /* skip leading spaces */
00365       delim += strspn(delim, " \t");
00366       /* get the number */
00367       if (strncasecmp(delim, "nan", (size_t) 3) == 0) {
00368     data[nscan] = mxt_getnand();
00369     delim += 3; /* advance */
00370       } else if (strncasecmp(delim, "-inf", (size_t) 4) == 0) {
00371     data[nscan] = -1.0 * mxt_getinfd();
00372     delim += 4; /* advance */
00373       } else if (strncasecmp(delim, "inf", (size_t) 3) == 0) {
00374     data[nscan] = mxt_getinfd();
00375     delim += 3; /* advance */
00376       } else {
00377     if (sscanf(delim, "%lf%n", &data[nscan], &nchar) != 1)
00378       return 0; /* no number present in this slot */
00379     delim += nchar; /* advance */
00380       }
00381       /* skip trailing spaces */
00382       delim += strspn(delim, " \t"); 
00383       /* check for proper delimiter */
00384       if (ci != col-1) {
00385     if (*delim != ',') /* need a , unless at end of row */
00386       return 0;
00387       } else {
00388     if (ri != row-1)
00389       if (*delim != ';') /* need a ; unless at end of matrix */
00390         return 0;
00391       }
00392       delim++; /* advance past ; or , */
00393     }
00394   delim--; /* compensate for advance past nonexistent final delimiter above */
00395   if (*delim != '\0')
00396     return 0; /* junk at end */
00397   return 1;
00398 }
00399 
00400 
00401 /*
00402  * as above, but with int arguments, for compatibility with 
00403  * extract/insert_keywords
00404  */
00405 int
00406 string_to_matrix_ints(int datamax,
00407               char *s, 
00408               int *Mp, 
00409               int *Np, 
00410               double *data,
00411               int raw) 
00412 {
00413   mwSize Mp1, Np1;
00414   int result;
00415 
00416   result =
00417     string_to_matrix((mwSize) datamax, s, &Mp1, &Np1, data, raw);
00418   *Mp = Mp1;
00419   *Np = Np1;
00420   return result;
00421 }
00422 
00423 
00424 /*
00425  * output a string corresponding to a list of doubles (matrix)
00426  *
00427  * the string s must already be allocated by the calling program
00428  *
00429  * This routine is currently little used.
00430  * It has been largely replaced by PutNumericArray/PutNumericMatrix_level
00431  *
00432  * Outputs a <string> like:
00433  * <[a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn]>
00434  * where each a_ik = is either
00435  * an ordinary floating-point constant or the
00436  * literal strings NaN, Inf, -Inf, which are written for 
00437  * the corresponding IEEE special values.
00438  *
00439  * Brackets can be omitted.
00440  *
00441  */
00442 void
00443 matrix_to_string(double *data,
00444          mwSize M, 
00445          mwSize N,
00446          int bracketed, /* 1 if surrounded by [] */
00447          char *s) 
00448 
00449 {
00450   int m, n; /* counters */
00451   int nchar; /* number of characters converted */
00452 
00453   /* opening [ */
00454   if (bracketed) *s++ = '[';
00455   /* the numbers */
00456   for (m = 0; m < M; m++) {
00457     for (n = 0; n < N; n++) {
00458       /* get the number */
00459       if (isnan(*data)) {
00460     strcpy(s, "NaN");
00461     s += 3;
00462       } else if (!finite(*data) && *data < 0) {
00463     strcpy(s, "-Inf");
00464     s += 4;
00465       } else if (!finite(*data) && *data > 0) {
00466     strcpy(s, "Inf");
00467     s += 3;
00468       } else {
00469     sprintf(s, "%.12g%n", *data, &nchar);
00470     s += nchar;
00471       }
00472       data++; /* move on */
00473       if (n != N-1)
00474     *s++ = ','; /* separating numbers: , */
00475     }
00476     if (m != M-1)
00477     *s++ = ';'; /* separating rows: ; */
00478   }
00479   /* closing ] */
00480   if (bracketed) *s++ = ']';
00481   *s++ = '\0';
00482 }
00483 
00484 /*
00485  * as above, but with ints for compatibility with extract_keywords
00486  */
00487 void
00488 matrix_to_string_ints(double *data,
00489               int M, 
00490               int N,
00491               int bracketed,
00492               char *s) 
00493 {
00494   matrix_to_string(data, (mwSize) M, (mwSize) N, bracketed, s);
00495 }
00496 
00497 /*
00498  * convert a string to a type number
00499  */
00500 
00501 int
00502 mxt_io_convert_string_to_type(const char *type)
00503 {
00504   int flag;
00505 
00506   /* account for a leading u flag */
00507   if (*type == 'u') {
00508     type++;  /* advance past it */
00509     flag = MXT_IO_UBRACK;   /* flag, if set, means "unbracketed" */
00510   } else {
00511     flag = 0;
00512   }
00513   /* OR the flag into the return value */
00514   if (strcmp(type, "TSHORT") == 0)
00515     return (flag | MXT_IO_TSHORT);
00516   else if (strcmp(type, "TINT") == 0)
00517     return (flag | MXT_IO_TINT);
00518   else if (strcmp(type, "TFLOAT") == 0)
00519     return (flag | MXT_IO_TFLOAT);
00520   else if (strcmp(type, "TDOUBLE") == 0)
00521     return (flag | MXT_IO_TDOUBLE);
00522   else if (strcmp(type, "TSTRING") == 0)
00523     return (flag | MXT_IO_TSTRING);
00524   else if (strcmp(type, "TMATRIX") == 0)
00525     return (flag | MXT_IO_TMATRIX); /* actually a matrixio type */
00526   else
00527     return MXT_IO_TBAD;
00528 }
00529
proj/mag/ident/libs/util/num_strings.c