00001 /* 00002 * num_strings.c 00003 * 00004 * generally useful utilities involving numbers and strings 00005 * 00006 * needs mex.h for simple type definitions 00007 * 00008 * Michael Turmon, 2002, 2010 00009 */ 00010 00011 /*LINTLIBRARY*/ 00012 00013 #include <stdlib.h> 00014 #include <stdio.h> 00015 #include <string.h> 00016 #include <math.h> 00017 #include "mex.h" /* mwSize type, et al. */ 00018 #include "mextool.h" 00019 #include "num_strings.h" /* function templates: current file */ 00020 #include "ieee_consts.h" /* mxt_getnand */ 00021 00022 /************************************************************** 00023 * 00024 * input/output of numbers, strings, and matrices 00025 * in text form as input to shell programs, for example 00026 * 00027 * the subroutines do not need FITSIO declarations. 00028 * 00029 * they do need the simplest matlab "mwSize" typedef's 00030 * 00031 * string_to_array: converts a string representing an arbitrary- 00032 * dimensional array to the double-precision array 00033 * string_to_matrix: converts a string representing a matrix 00034 * to a double-precision matrix 00035 * matrix_to_string: converts a double-precision matrix to a 00036 * string 00037 * convert_string_to_type: converts a string to type number 00038 * 00039 ****************************************************************/ 00040 00041 /* do_transpose: computes transpose, similar to matlab permute. 00042 * 00043 * x: pointer to vectorized input 00044 * y: pointer to vectorized output (which is overwritten) 00045 * D: number of dimensions 00046 * By: bounds on each coordinate in y, as in mxGetDimensions 00047 * 00048 * Return value: y is written to. A small malloc (size 2D+1) is 00049 * made, which may fail. If so, 0 is returned, else 1. 00050 * 00051 * This routine is nontrivial because it works for any dimensionality. 00052 * 00053 * Its behavior may be understood by letting ix and iy be the indexes 00054 * of corresponding elements of x and y. If cx and cy are their 00055 * base-Bx and base-By digit expansions, we can write 00056 * 00057 * ix <--> cx[0],...cx[D-1] 00058 * iy <--> cy[0],...cy[D-1] 00059 * 00060 * where cx[0] = cy[D-1], etc., ie cy is the digit-reversed version 00061 * of cx. So if you tell me one of these four quantities, I can find 00062 * the other three. 00063 * The loop unfolds by incrementing ix by 1 each time. There's no 00064 * need to keep track of cx. However, cy must be maintained to 00065 * make the proper adjustments to iy. In particular, ix++ corresponds 00066 * to cx[0]++, ie to cy[D-1]++, ie to iy += delta_y where 00067 * delta_y = By[0] * ... * By[D-2]. It's also useful to define 00068 * 00069 * pi[0] = 1 00070 * pi[d] = By[0] * ... * By[d-1] 00071 * 00072 * so that pi[d] is the increment in iy due to an increment by one 00073 * in cy[d]. Note that pi[D] is the number of entries in the array. 00074 * The tricky point is carries: Some increments of ix cause a carry 00075 * "upward" in cx, which ripples downward in cy. To adjust cy for 00076 * a carry, just set one of its digits to zero, and then increment 00077 * the previous one. In iy, this corresponds to subtracting B*pi[d] 00078 * from iy, and adding pi[d-1] back in. 00079 * 00080 * In July 2000 (matlab 5.3/linpack and 5.3.1/lapack) this was within 10% 00081 * of matlab's own ' for large, two-dimensional matrices. 00082 * 00083 * Michael Turmon, July 2000 00084 */ 00085 int 00086 do_transpose(double *x, 00087 double *y, 00088 mwSize D, 00089 mwSize *By) 00090 { 00091 mwSize d; /* dimension counter */ 00092 mwSize ix, iy; /* indexes into x and y */ 00093 mwSize *pi = (mwSize *) calloc(D+1, sizeof(*pi)); 00094 mwSize *cy = (mwSize *) calloc(D, sizeof(*cy)); 00095 00096 if (pi == NULL || cy == NULL) return 0; /* failure */ 00097 /* initial calculations */ 00098 /* NB, pi[d] stores the increment in the scalar index implied by 00099 incrementing digit d of the coordinate vector for y */ 00100 for (pi[0] = 1, d = 1; d <= D; d++) 00101 pi[d] = pi[d-1] * By[d-1]; /* partial products */ 00102 /* loop over all elements */ 00103 ix = iy = 0; /* start both at origin (0,0,...0) */ 00104 /* NB: all digits of cy are also 0 because of calloc */ 00105 while (ix < pi[D]) { 00106 /* perform an assignment */ 00107 y[iy] = x[ix]; 00108 /* increment coordinates */ 00109 ix++; /* inc index into x */ 00110 iy += pi[D-1]; /* inc index into y (increments MSD by 1) */ 00111 cy[D-1]++; /* inc MSD of y index, counterpart of above line */ 00112 /* propagate carry down towards LSD if necessary */ 00113 for (d = D-1; d > 0 && cy[d] == By[d]; d--) { 00114 /* there was overflow of digit-d --> digit-(d-1) */ 00115 /* first, fix cy by carrying once towards LSD */ 00116 cy[d] = 0; /* reset digit d */ 00117 cy[d-1]++; /* increment digit d-1 (which may in turn overflow) */ 00118 /* next, fix iy */ 00119 /* step 1, subtract pi[d+1] = By[d] * pi[d]. This can be viewed 00120 as resetting iy to its status before we began looping on this 00121 digit (doing By[d] iterations, each adding pi[d] to iy). It 00122 can also be viewed as the precise counterpart of the line above 00123 setting cy[d] from By[d] to zero. */ 00124 iy -= pi[d+1]; /* NB, d <= D-1, but pi[D] is valid */ 00125 /* step 2, move up to the next element in this digit's loop by 00126 adding pi[d-1] to the index. This is the counterpart of the 00127 line above, adding one to cy[d-1]. */ 00128 iy += pi[d-1]; /* NB, d > 0, so we never reach too low into pi */ 00129 } 00130 } 00131 return 1; 00132 } 00133 00134 /* 00135 * parse an array, input as a string 00136 * 00137 * string format: 00138 * array := [array,array,...,array] 00139 * := matrix 00140 * matrix := [a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn] 00141 * and in particular, the "matrix" is as indicated below in string_to_matrix. 00142 * Each component "array" must be the same size of all the other ones. 00143 * So, for example, a 1x1x1 array of the constant 42 is: [[42]]. 00144 * Generally, the dimensionality is 1 plus the number of opening 00145 * left brackets (thus, always 2 or greater). 00146 * Suppose the dimensions of the array are d_1 x d_2 x ... d_N. 00147 * The number of arrays in the outermost group is d_1, and the number 00148 * in the innermost group is d_N-2. Each of the latter arrays is 00149 * exactly a "matrix" that is read in by string_to_matrix. Thus, 00150 * in the input string, the "d_N" dimension changes fastest, while 00151 * in the output representation, the "d_1" dimension changes fastest. 00152 * The separating commas in the expansion of the array are not necessary 00153 * and may be omitted. 00154 * 00155 * There are many places where errors are detected. We use a flag and a 00156 * goto to handle errors: the flag is set to indicate error at the start 00157 * of the routine, and only set to "OK" when input is validated. Any error 00158 * condition along the way branches to the error label, where all cleanup 00159 * is done. The error-free cases fall through to this cleanup stage also, 00160 * but with the flag set to indicate OK. The cleanup is just freeing of 00161 * alloc'd variables -- they are 0 if they were never alloc'd. 00162 */ 00163 mwSize * 00164 string_to_array(mwSize datamax, 00165 char *s, 00166 mwSize *dimP, 00167 double *data) 00168 { 00169 char *s1; /* string position */ 00170 char *s2 = 0; /* temporary string (alloc'd) */ 00171 double *data0 = 0; /* temporary for input values (alloc'd) */ 00172 /* consistently use SignedIndex below because we use (-1) for flags */ 00173 mwSignedIndex *dims = 0; /* dimension size (to be returned) (alloc'd) */ 00174 mwSignedIndex *dims0 = 0; /* dimension counters (alloc'd) */ 00175 mwSignedIndex Nd; /* number of dimensions */ 00176 mwSignedIndex d; /* counts dimensions */ 00177 mwSignedIndex level; /* counter */ 00178 mwSignedIndex m, Nm; /* number of submatrices */ 00179 mwSignedIndex M, N; /* submatrix dimensions */ 00180 size_t l; /* a string length */ 00181 int convert_ok = 0; /* assume error, only corrected if read OK */ 00182 00183 *dimP = 0; /* nonsense value */ 00184 s2 = calloc(strlen(s)+1, sizeof(char)); /* guaranteed to be long enough */ 00185 data0 = calloc(datamax, sizeof(double)); 00186 if (!s2 || !data0) goto error; /* failed calloc */ 00187 /* check for balanced brackets, and find dimensionality */ 00188 for (Nd = level = 0, s1 = s; *s1; s1++) { 00189 if (*s1 == '[') level++; 00190 else if (*s1 == ']') level--; 00191 if (level < 0) goto error; /* too many ] */ 00192 if (level > Nd) Nd = level; /* update depth */ 00193 } 00194 /* level != 0 => unbalanced []. Nd == 0 => no brackets found */ 00195 if (level != 0 || Nd == 0) goto error; 00196 Nd++; /* #dims = #brackets + 1. Note: Nd >= 2 */ 00197 /* obtain and check size */ 00198 dims = calloc((size_t) Nd, sizeof(*dims)); /* space for the size */ 00199 dims0 = calloc(Nd, sizeof(*dims0)); /* counters to compare against above */ 00200 if (!dims || !dims0) goto error; 00201 for (d = 0; d < Nd; d++) dims[d] = -1; /* dims = unknown */ 00202 level = 0; /* use as a flag: was any non-bracket seen? */ 00203 for (d = -1, s1 = s; *s1; s1++) { 00204 /* d: which level of bracket are we at (0 = outermost) 00205 dims0[d]: # bracketed groups at level d so far 00206 dims[d]: # bracketed groups seen before at level d, -1 = don't know 00207 */ 00208 if (*s1 == '[') { 00209 if (d == -1 && dims[0] != -1) 00210 goto error; /* only allow 1 element at the top level (disallow [][]) */ 00211 d++; 00212 dims0[d] = 0; /* begin checking dimension d */ 00213 } else if (*s1 == ']') { 00214 /* close the group at dimension d */ 00215 if (dims[d] < 0) 00216 dims[d] = dims0[d]; /* no size yet: fill it in for later */ 00217 if (dims[d] != dims0[d]) 00218 goto error; /* check the size */ 00219 d--; /* move up to the enclosing group */ 00220 if (d >= 0) /* NB: outermost group has d = -1 */ 00221 dims0[d]++; /* we completed one more element in that group */ 00222 } else if (*s1 == ',') { 00223 if (d == Nd-2) 00224 continue; /* all commas in lowest grouping ok (check them later) */ 00225 if (*(s1 - 1) == ']' && *(s1 + 1) == '[') 00226 continue; /* higher commas must appear so: ],[ */ 00227 goto error; /* disallow [[1],[2],] and [,[1]] and [1], and ,[] etc. */ 00228 } else { 00229 /* other characters: must be at bottom level */ 00230 if (d != Nd - 2) 00231 goto error; /* other chars, not at bottom level: must be an error */ 00232 level = 1; /* we did see other characters (=> non-empty matrix) */ 00233 } 00234 } 00235 /* find cumulative sizes dims[0], dims[0]*dims[1], ... 00236 * note that dims[Nd-1] and dims[Nd-2] are not known yet */ 00237 for (d = 1, dims0[0] = dims[0]; d < Nd-2; d++) 00238 dims0[d] = dims[d] * dims0[d-1]; 00239 /* special-case empty matrices: hard to isolate for string_to_matrix() */ 00240 if (!level) { 00241 Nm = 0; /* no matrices to read */ 00242 dims[Nd-1] = dims[Nd-2] = 0; /* set these up now */ 00243 } else { 00244 Nm = Nd == 2 ? 1 : dims0[Nd-3]; /* number of matrices to read */ 00245 } 00246 /* read in each component matrix */ 00247 for (M = N = -1/* flag */, m = 0, s1 = s; m < Nm; m++) { 00248 /* skip s1 forward to first non-bracket */ 00249 l = strspn(s1, "[],"); 00250 s1 += l-1; /* s1 = "[<matrix>]..." */ 00251 /* extract [<matrix>] alone from s1 into s2 -- leave no trailing junk */ 00252 l = strcspn(s1, "]"); 00253 strncpy(s2, s1, l+1); s2[l+1] = '\0'; 00254 /* read M*N elements into data starting at s1 */ 00255 /* first iter: m=0 so m*M*N=0, which is good since M,N unknown then! */ 00256 if (!string_to_matrix(datamax/Nm, s2, &M, &N, data0+m*M*N, 1)) 00257 goto error; /* fail */ 00258 /* check submatrix size */ 00259 if (m == 0) { 00260 dims[Nd-1] = N; dims[Nd-2] = M; /* first submatrix sets size for later */ 00261 } else { 00262 if (dims[Nd-1] != N || dims[Nd-2] != M) goto error; /* mismatch */ 00263 } 00264 /* ready for next iter */ 00265 l = strcspn(s1, "]"); 00266 s1 += l; /* move up to ] */ 00267 } 00268 /* check for junk at end of s1 */ 00269 if (Nm > 0 && (strlen(s1) != strspn(s1, "]"))) 00270 goto error; /* remainder must be only ]'s */ 00271 /* transpose into matlab ordering */ 00272 if (!do_transpose(data0, data, (mwSize) Nd, (mwSize *) dims)) goto error; 00273 *dimP = Nd; 00274 convert_ok = 1; /* signal success */ 00275 error: /* free all memory that has been allocated, and exit */ 00276 if (s2) free(s2); 00277 if (data0) free(data0); 00278 if (dims0) free(dims0); 00279 if (dims && !convert_ok) 00280 free(dims); /* if error, should also free dims - it will be unused */ 00281 /* if success: return pointer to the dimension list (it was calloc'd here) 00282 * cast it from (mwSignedIndex *) to (mwSize *) 00283 * if failure: return NULL 00284 */ 00285 return convert_ok ? ((mwSize *) dims) : NULL; 00286 } 00287 00288 /* 00289 * parse a matrix, which is input as a string 00290 * 00291 * must be a <string> like: 00292 * <[a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn]> 00293 * where each a_ik = <whitespace><NUMBER><whitespace> 00294 * the whitespace can be null, and NUMBER is either 00295 * an ordinary floating-point constant or the 00296 * literal strings NaN, Inf, -Inf, which are interpreted 00297 * as the corresponding IEEE special values. 00298 * 00299 * returns the computed size in Mp and Np, the 00300 * numbers in data, and 0 or 1 according to 00301 * success or failure. The maximum number 00302 * of items allowed to convert is in datamax. 00303 */ 00304 int 00305 string_to_matrix(mwSize datamax, 00306 char *s, 00307 mwSize *Mp, 00308 mwSize *Np, 00309 double *data, 00310 int raw) 00311 { 00312 char s1[MXT_IO_TMATRIX_STRLEN]; /* s without [ ] */ 00313 char row1[MXT_IO_TMATRIX_STRLEN]; /* first row of s1 */ 00314 char *delim, *delim2; /* placeholders */ 00315 int nchar; 00316 int l = strlen(s); 00317 int row, col; /* row and column sizes */ 00318 int ri, ci; /* row, column indexing */ 00319 int nscan; /* number of data read in so far */ 00320 00321 *Mp = *Np = 0; 00322 /* strip [] */ 00323 if (l < 2) 00324 return 0; /* not enough chars */ 00325 if (s[0] != '[' || s[l-1] != ']') 00326 return 0; /* insist on no whitespace here */ 00327 strcpy(s1, s+1); /* strips [ */ 00328 s1[l-2] = '\0'; /* strips ] */ 00329 /* find number of rows */ 00330 row = 1; /* assume one row */ 00331 delim = s1; 00332 while ((delim = strchr(delim, ';')) != NULL) { 00333 row++; 00334 delim++; /* advance past ; */ 00335 } 00336 /* find number of columns */ 00337 /* isolate the first row of s1 into row1 */ 00338 for (delim2 = row1, delim = s1; *delim && *delim != ';'; ) 00339 *delim2++ = *delim++; 00340 *delim2 = '\0'; /* terminate */ 00341 /* now, as before with rows */ 00342 col = 1; /* assume one col */ 00343 delim = row1; 00344 while ((delim = strchr(delim, ',')) != NULL) { 00345 col++; 00346 delim++; /* advance past , */ 00347 } 00348 /* check for empty matrix */ 00349 if (col == 1 && row == 1) { 00350 if (strspn(s1, " \t") == strlen(s1)) /* nothing but whitespace */ 00351 col = 0; /* but let row = 1 still */ 00352 } 00353 /* setup and check sizes */ 00354 *Mp = row; 00355 *Np = col; 00356 if (col * row > datamax) 00357 return 0; 00358 /* scan entries, enforcing format */ 00359 delim = s1; /* current pos in string */ 00360 nscan = -1; /* read first element */ 00361 for (ri = 0; ri < row; ri++) 00362 for (ci = 0; ci < col; ci++) { 00363 nscan = raw ? nscan + 1 : ci*row + ri; /* current target element */ 00364 /* skip leading spaces */ 00365 delim += strspn(delim, " \t"); 00366 /* get the number */ 00367 if (strncasecmp(delim, "nan", (size_t) 3) == 0) { 00368 data[nscan] = mxt_getnand(); 00369 delim += 3; /* advance */ 00370 } else if (strncasecmp(delim, "-inf", (size_t) 4) == 0) { 00371 data[nscan] = -1.0 * mxt_getinfd(); 00372 delim += 4; /* advance */ 00373 } else if (strncasecmp(delim, "inf", (size_t) 3) == 0) { 00374 data[nscan] = mxt_getinfd(); 00375 delim += 3; /* advance */ 00376 } else { 00377 if (sscanf(delim, "%lf%n", &data[nscan], &nchar) != 1) 00378 return 0; /* no number present in this slot */ 00379 delim += nchar; /* advance */ 00380 } 00381 /* skip trailing spaces */ 00382 delim += strspn(delim, " \t"); 00383 /* check for proper delimiter */ 00384 if (ci != col-1) { 00385 if (*delim != ',') /* need a , unless at end of row */ 00386 return 0; 00387 } else { 00388 if (ri != row-1) 00389 if (*delim != ';') /* need a ; unless at end of matrix */ 00390 return 0; 00391 } 00392 delim++; /* advance past ; or , */ 00393 } 00394 delim--; /* compensate for advance past nonexistent final delimiter above */ 00395 if (*delim != '\0') 00396 return 0; /* junk at end */ 00397 return 1; 00398 } 00399 00400 00401 /* 00402 * as above, but with int arguments, for compatibility with 00403 * extract/insert_keywords 00404 */ 00405 int 00406 string_to_matrix_ints(int datamax, 00407 char *s, 00408 int *Mp, 00409 int *Np, 00410 double *data, 00411 int raw) 00412 { 00413 mwSize Mp1, Np1; 00414 int result; 00415 00416 result = 00417 string_to_matrix((mwSize) datamax, s, &Mp1, &Np1, data, raw); 00418 *Mp = Mp1; 00419 *Np = Np1; 00420 return result; 00421 } 00422 00423 00424 /* 00425 * output a string corresponding to a list of doubles (matrix) 00426 * 00427 * the string s must already be allocated by the calling program 00428 * 00429 * This routine is currently little used. 00430 * It has been largely replaced by PutNumericArray/PutNumericMatrix_level 00431 * 00432 * Outputs a <string> like: 00433 * <[a_11,a_12,...,a_1n;a_21,...,a_2n;...;a_m1,a_mn]> 00434 * where each a_ik = is either 00435 * an ordinary floating-point constant or the 00436 * literal strings NaN, Inf, -Inf, which are written for 00437 * the corresponding IEEE special values. 00438 * 00439 * Brackets can be omitted. 00440 * 00441 */ 00442 void 00443 matrix_to_string(double *data, 00444 mwSize M, 00445 mwSize N, 00446 int bracketed, /* 1 if surrounded by [] */ 00447 char *s) 00448 00449 { 00450 int m, n; /* counters */ 00451 int nchar; /* number of characters converted */ 00452 00453 /* opening [ */ 00454 if (bracketed) *s++ = '['; 00455 /* the numbers */ 00456 for (m = 0; m < M; m++) { 00457 for (n = 0; n < N; n++) { 00458 /* get the number */ 00459 if (isnan(*data)) { 00460 strcpy(s, "NaN"); 00461 s += 3; 00462 } else if (!finite(*data) && *data < 0) { 00463 strcpy(s, "-Inf"); 00464 s += 4; 00465 } else if (!finite(*data) && *data > 0) { 00466 strcpy(s, "Inf"); 00467 s += 3; 00468 } else { 00469 sprintf(s, "%.12g%n", *data, &nchar); 00470 s += nchar; 00471 } 00472 data++; /* move on */ 00473 if (n != N-1) 00474 *s++ = ','; /* separating numbers: , */ 00475 } 00476 if (m != M-1) 00477 *s++ = ';'; /* separating rows: ; */ 00478 } 00479 /* closing ] */ 00480 if (bracketed) *s++ = ']'; 00481 *s++ = '\0'; 00482 } 00483 00484 /* 00485 * as above, but with ints for compatibility with extract_keywords 00486 */ 00487 void 00488 matrix_to_string_ints(double *data, 00489 int M, 00490 int N, 00491 int bracketed, 00492 char *s) 00493 { 00494 matrix_to_string(data, (mwSize) M, (mwSize) N, bracketed, s); 00495 } 00496 00497 /* 00498 * convert a string to a type number 00499 */ 00500 00501 int 00502 mxt_io_convert_string_to_type(const char *type) 00503 { 00504 int flag; 00505 00506 /* account for a leading u flag */ 00507 if (*type == 'u') { 00508 type++; /* advance past it */ 00509 flag = MXT_IO_UBRACK; /* flag, if set, means "unbracketed" */ 00510 } else { 00511 flag = 0; 00512 } 00513 /* OR the flag into the return value */ 00514 if (strcmp(type, "TSHORT") == 0) 00515 return (flag | MXT_IO_TSHORT); 00516 else if (strcmp(type, "TINT") == 0) 00517 return (flag | MXT_IO_TINT); 00518 else if (strcmp(type, "TFLOAT") == 0) 00519 return (flag | MXT_IO_TFLOAT); 00520 else if (strcmp(type, "TDOUBLE") == 0) 00521 return (flag | MXT_IO_TDOUBLE); 00522 else if (strcmp(type, "TSTRING") == 0) 00523 return (flag | MXT_IO_TSTRING); 00524 else if (strcmp(type, "TMATRIX") == 0) 00525 return (flag | MXT_IO_TMATRIX); /* actually a matrixio type */ 00526 else 00527 return MXT_IO_TBAD; 00528 } 00529