00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <time.h>
00004 #include <string.h>
00005 #include <math.h>
00006 #include <mkl.h>
00007 #include <omp.h>
00008 #include <sys/param.h>
00009 #include "cholesky_down.h"
00010 #include "gapfill.h"
00011
00012
00013 #define minval(x,y) (((x) < (y)) ? (x) : (y))
00014 #define maxval(x,y) (((x) < (y)) ? (y) : (x))
00015
00016 #define kInterpDataDir "proj/libs/interpolate/data"
00017
00018 struct work_struct {
00019 double *a,*rh,*a1b,*a1r,*a1x;
00020 int *wgood,*wbad;
00021 };
00022
00023 void malloc_work(
00024
00025 int nsample,
00026 struct work_struct *work
00027 )
00028 {
00029 int malign=32;
00030
00031 work->wgood=(int *)(MKL_malloc(nsample*sizeof(int),malign));
00032 work->wbad=(int *)(MKL_malloc(nsample*sizeof(int),malign));
00033 work->a=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
00034 work->rh=(double *)(MKL_malloc(nsample*sizeof(double),malign));
00035 work->a1b=(double *)(MKL_malloc(nsample*sizeof(double),malign));
00036 work->a1r=(double *)(MKL_malloc(nsample*sizeof(double),malign));
00037 work->a1x=(double *)(MKL_malloc(2*nsample*sizeof(double),malign));
00038 }
00039
00040 void free_work(
00041
00042 int nsample,
00043 struct work_struct *work
00044 )
00045 {
00046 MKL_free(work->wgood);
00047 MKL_free(work->wbad);
00048 MKL_free(work->a);
00049 MKL_free(work->rh);
00050 MKL_free(work->a1b);
00051 MKL_free(work->a1r);
00052 MKL_free(work->a1x);
00053 }
00054
00055 int init_fill(
00056 int method,
00057 double pnoise,
00058 int order,
00059 int targetx,
00060 int targety,
00061 struct fill_struct *pars,
00062 char **filenamep,
00063
00064 const char *path
00065 )
00066 {
00067 double *acor;
00068 int nx=80;
00069 int malign=32;
00070 double t0,t1;
00071 FILE *fileptr;
00072 int i,j;
00073 int *offx,*offy,ox,oy;
00074 int nsample,inoise;
00075 double regul,pmin;
00076 double *a0,*rh0,*a0t,*rh0t;
00077 int info;
00078 char uplo[] = "U";
00079 char buf[PATH_MAX];
00080
00081 pars->order=order;
00082
00083 t0=dsecnd();
00084 switch (method) {
00085 case 0:
00086 pars->filename=strdup(*filenamep);
00087 break;
00088 case 8:
00089 snprintf(buf, sizeof(buf), "%s/%s/acor1_08_80.txt", path, kInterpDataDir);
00090 pars->filename=strdup(buf);
00091 break;
00092 case 9:
00093 snprintf(buf, sizeof(buf), "%s/%s/acor1_09_80.txt", path, kInterpDataDir);
00094 pars->filename=strdup(buf);
00095 break;
00096 case 10:
00097 snprintf(buf, sizeof(buf), "%s/%s/acor1_10_80.txt", path, kInterpDataDir);
00098 pars->filename=strdup(buf);
00099 break;
00100 case 11:
00101 snprintf(buf, sizeof(buf), "%s/%s/acor1_11_80.txt", path, kInterpDataDir);
00102 pars->filename=strdup(buf);
00103 break;
00104 default:
00105 printf("Unknown method in init_fill.\n");
00106 return 1;
00107 break;
00108 }
00109 fileptr = fopen (pars->filename,"r");
00110 if (fileptr==NULL) {
00111 printf("File not found in init_fill.\n");
00112 return 1;
00113 }
00114 if (method != 0) {
00115 if (filenamep != NULL) {
00116 *filenamep=strdup(pars->filename);
00117 }
00118 }
00119 acor=(double *)(MKL_malloc(nx*nx*sizeof(double),malign));
00120
00121 for (i=0;i<nx*nx;i++) fscanf(fileptr,"%lf",acor+i);
00122 fclose(fileptr);
00123 t0=dsecnd()-t0;
00124
00125
00126 regul=pnoise*pnoise;
00127 inoise=1;
00128 pmin=regul*inoise;
00129
00130 nsample=order*order;
00131 t1=dsecnd();
00132 offx=(int *)(MKL_malloc(nsample*sizeof(int),malign));
00133 offy=(int *)(MKL_malloc(nsample*sizeof(int),malign));
00134 for (i=0;i<nsample;i++) {
00135 offx[i]=(i%order)-targetx;
00136 offy[i]=(i/order)-targety;
00137 }
00138
00139 pars->a0=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
00140 pars->a00=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
00141 pars->a0t=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
00142 a0=pars->a0;
00143 a0t=pars->a0t;
00144 for (i=0;i<nsample;i++) {
00145 for (j=0;j<nsample;j++) {
00146 ox=((offx[j]-offx[i])+nx) % nx;
00147 oy=((offy[j]-offy[i])+nx) % nx;
00148 a0[i+nsample*j]=acor[ox+nx*oy];
00149 a0t[i+nsample*j]=acor[ox+nx*oy];
00150 }
00151 a0[i+nsample*i]=a0[i+nsample*i]+regul;
00152 }
00153
00154 memcpy(pars->a00,a0,nsample*nsample*sizeof(double));
00155 pars->acort00=acor[0];
00156 pars->rh0=(double *)(MKL_malloc(nsample*sizeof(double),malign));
00157 pars->rh0t=(double *)(MKL_malloc(nsample*sizeof(double),malign));
00158 rh0=pars->rh0;
00159 rh0t=pars->rh0t;
00160 for (i=0;i<nsample;i++) {
00161 rh0[i]=acor[(-offx[i]+nx) % nx+nx*((-offy[i]+nx) % nx)];
00162 rh0t[i]=acor[(-offx[i]+nx) % nx+nx*((-offy[i]+nx) % nx)];
00163 if ((offx[i]==0) && (offy[i]==0)) rh0[i]=rh0[i]+pmin;
00164 }
00165
00166 dpotrf(uplo,&nsample,pars->a0,&nsample,&info);
00167
00168 MKL_free(acor);
00169 MKL_free(offx);
00170 MKL_free(offy);
00171
00172
00173
00174
00175
00176 pars->hashmod=8191;
00177 pars->hashcount=(int *)(MKL_malloc(pars->hashmod*sizeof(int),malign));
00178 pars->hashtable=(struct fill_hash_struct **)(MKL_malloc(pars->hashmod*sizeof(struct fill_hash_struct *),malign));
00179 pars->locks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign));
00180 pars->complocks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign));
00181
00182 for (i=0;i<pars->hashmod;i++) {
00183 pars->hashcount[i]=0;
00184
00185 pars->hashtable[i]=(struct fill_hash_struct *)NULL;
00186 omp_init_lock(&(pars->locks[i]));
00187 omp_init_lock(&(pars->complocks[i]));
00188 }
00189 pars->ndiff=0;
00190
00191 t1=dsecnd()-t1;
00192
00193
00194 return 0;
00195 }
00196
00197 int free_fill(
00198 struct fill_struct *pars
00199 )
00200 {
00201 int i;
00202 struct fill_hash_struct *ptr,*oldptr;
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221 for (i=0;i<pars->hashmod;i++) {
00222 omp_destroy_lock(&(pars->locks[i]));
00223 omp_destroy_lock(&(pars->complocks[i]));
00224 }
00225
00226 MKL_free(pars->a0);
00227 MKL_free(pars->a00);
00228 MKL_free(pars->a0t);
00229 MKL_free(pars->rh0);
00230 MKL_free(pars->rh0t);
00231
00232 MKL_free(pars->locks);
00233 MKL_free(pars->complocks);
00234
00235
00236
00237 MKL_free(pars->hashcount);
00238 for (i=0;i<pars->hashmod;i++) {
00239 ptr=(pars->hashtable[i]);
00240 while (ptr != NULL) {
00241 oldptr=ptr;
00242 MKL_free(ptr->wbad);
00243 MKL_free(ptr->coeff);
00244 ptr=ptr->next;
00245 MKL_free(oldptr);
00246 }
00247 }
00248 MKL_free(pars->hashtable);
00249 free(pars->filename);
00250
00251 return 0;
00252
00253 }
00254
00255 int find_entry(
00256 struct fill_struct *pars,
00257 int hash0,
00258 int hash,
00259 int nbad,
00260 int *wbad,
00261 struct fill_hash_struct **ptr_out
00262 )
00263 {
00264 int found;
00265 int ident,i;
00266 struct fill_hash_struct *ptr;
00267
00268 found=0;
00269 *ptr_out=NULL;
00270
00271
00272 omp_set_lock(&(pars->locks[hash]));
00273 ptr=pars->hashtable[hash];
00274 omp_unset_lock(&(pars->locks[hash]));
00275
00276 while (ptr!=NULL) {
00277 if ((hash0==ptr->hash0) && (nbad==ptr->nbad)) {
00278
00279 ident=1;
00280 for (i=0;i<nbad;i++) {
00281 if (wbad[i] != ptr->wbad[i]) {
00282 ident=0;
00283 break;
00284 }
00285 }
00286 if (ident != 0) {
00287 #pragma omp atomic
00288 ptr->nhit++;
00289 found=1;
00290 *ptr_out=ptr;
00291 break;
00292 }
00293 }
00294 ptr=ptr->next;
00295 }
00296
00297 return found;
00298 }
00299
00300 double fill_point(
00301 struct fill_struct *pars,
00302 int *mask,
00303 double *vals,
00304 float *cnorm,
00305 float *ierror,
00306 struct work_struct *work
00307 )
00308 {
00309 double t1,t2,t3,t4,t5,t6,t7,t8;
00310 int nsample;
00311 int ngood,nbad;
00312 int *wgood,*wbad;
00313 int i,info,j;
00314 char uplo[] = "U";
00315 int ione = 1;
00316 int itwo = 2;
00317 int malign=32;
00318 double *a,*a0,*rh0,bta1b,bta1r,c,*coeff,*a0t,*a00,*a1x;
00319 float fillval;
00320 int hash0,hash,found;
00321 struct fill_hash_struct *ptr;
00322 double ierror2;
00323 int unlocked;
00324 int nleft;
00325 double tdown,tbrute;
00326 double sum;
00327
00328 t1=0.0;t2=0.0;t3=0.0;t4=0.0;t5=0.0;t6=0.0;t7=0.0;t8=0.0;
00329
00330 t1=dsecnd();
00331
00332 nsample=(pars->order)*(pars->order);
00333
00334
00335 a0=pars->a0;
00336 a00=pars->a00;
00337 a0t=pars->a0t;
00338 rh0=pars->rh0;
00339
00340 wgood=work->wgood;
00341 wbad=work->wbad;
00342
00343 ngood=0;
00344 nbad=0;
00345 hash0=0;
00346 for (i=0;i<nsample;i++) {
00347 if (mask[i]==0) {
00348 wgood[ngood]=i;
00349 ngood=ngood+1;
00350 }
00351 if (mask[i]!=0) {
00352 hash0=hash0+i;
00353 wbad[nbad]=i;
00354 nbad=nbad+1;
00355 }
00356 }
00357 hash=hash0%pars->hashmod;
00358
00359 unlocked=0;
00360
00361 found=find_entry(pars,hash0,hash,nbad,wbad,&ptr);
00362
00363 if (found == 0) {
00364 unlocked=omp_test_lock(&(pars->complocks[hash]));
00365 if (unlocked==0) omp_set_lock(&(pars->complocks[hash]));
00366
00367
00368 found=find_entry(pars,hash0,hash,nbad,wbad,&ptr);
00369 if (found!=0) {
00370
00371 omp_unset_lock(&(pars->complocks[hash]));
00372 found=2;
00373 }
00374 }
00375
00376 t1=dsecnd()-t1;
00377
00378 if (found == 0) {
00379
00380
00381
00382
00383
00384 ptr=(struct fill_hash_struct *)(MKL_malloc(sizeof(struct fill_hash_struct),malign));
00385 ptr->next=pars->hashtable[hash];
00386
00387 ptr->nbad=nbad;
00388 ptr->hash0=hash0;
00389 ptr->nhit=1;
00390 ptr->wbad=(int *)(MKL_malloc(nbad*sizeof(int),malign));
00391 for (i=0;i<nbad;i++) ptr->wbad[i]=wbad[i];
00392 coeff=(double *)(MKL_malloc(ngood*sizeof(double),malign));
00393 pars->hashcount[hash]++;
00394 #pragma omp atomic
00395 pars->ndiff++;
00396 ptr->coeff=coeff;
00397
00398
00399 a=work->a;
00400
00401
00402
00403 a1x=work->a1x;
00404
00405
00406
00407 tdown=0.0;
00408 nleft=nsample;
00409 for (i=nsample-1;i>=0;i--) {
00410 if (mask[i]!=0) {
00411 tdown=tdown+(nleft-i-1)*(nleft-i-1);
00412 nleft=nleft-1;
00413 }
00414 }
00415 tdown=3.9e-9*tdown;
00416 tbrute=4e-11*(ngood+0.0)*(ngood+0.0)*(ngood+0.0);
00417
00418
00419
00420 if (tdown<tbrute) {
00421 t2=dsecnd();
00422
00423
00424
00425
00426
00427 dlacpy(uplo,&nsample,&nsample,a0,&nsample,a,&nsample);
00428 t2=dsecnd()-t2;
00429 t3=dsecnd();
00430
00431
00432 dcholesky_down_mask(uplo, &nsample, a, &nsample, mask, &info);
00433 t3=dsecnd()-t3;
00434 }
00435 else {
00436 t4=dsecnd();
00437
00438
00439 for (i=0;i<ngood;i++) for (j=0;j<=i;j++) a[i*nsample+j]=a00[wgood[i]*nsample+wgood[j]];
00440 t4=dsecnd()-t4;
00441 t5=dsecnd();
00442
00443
00444 dpotrf(uplo,&ngood,a,&nsample,&info);
00445 t5=dsecnd()-t5;
00446 }
00447
00448 t6=dsecnd();
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465 for (i=0;i<ngood;i++) a1x[i]=1.;
00466 for (i=0;i<ngood;i++) a1x[i+nsample]=rh0[wgood[i]];
00467 dpotrs(uplo,&ngood,&itwo,a,&nsample,a1x,&nsample,&info);
00468 bta1b=0.;
00469 for (i=0;i<ngood;i++) bta1b=bta1b+a1x[i];
00470 bta1r=0.0;
00471 for (i=0;i<ngood;i++) bta1r=bta1r+a1x[i+nsample];
00472 c=(bta1r-1.)/bta1b;
00473 for (i=0;i<ngood;i++) coeff[i]=a1x[i+nsample]-c*a1x[i];
00474 ptr->cnorm=(float)dnrm2(&ngood,coeff,&ione);
00475
00476 t6=dsecnd()-t6;
00477 t7=dsecnd();
00478
00479
00480 ierror2=0.0;
00481 for (i=0;i<ngood;i++) ierror2=ierror2+coeff[i]*rh0[wgood[i]];
00482 ierror2=pars->acort00-2*ierror2;
00483
00484 for (j=0;j<ngood;j++) {
00485
00486
00487
00488
00489
00490
00491
00492
00493 sum=0.0;
00494 for (i=0;i<j;i++) {
00495 sum=sum+a0t[nsample*wgood[j]+wgood[i]]*coeff[i];
00496 }
00497 ierror2=ierror2+coeff[j]*(2*sum+coeff[j]*a0t[nsample*wgood[j]+wgood[j]]);
00498 }
00499 ptr->ierror=(float)sqrt(ierror2/pars->acort00);
00500
00501
00502
00503 omp_set_lock(&(pars->locks[hash]));
00504 pars->hashtable[hash]=ptr;
00505 #pragma omp flush
00506 omp_unset_lock(&(pars->locks[hash]));
00507
00508
00509 omp_unset_lock(&(pars->complocks[hash]));
00510
00511 t7=dsecnd()-t7;
00512
00513
00514
00515
00516 }
00517
00518 t8=dsecnd();
00519 fillval=(float)ddot(&ngood,ptr->coeff,&ione,vals,&ione);
00520 *cnorm=ptr->cnorm;
00521 *ierror=ptr->ierror;
00522
00523 t8=dsecnd()-t8;
00524
00525
00526 return fillval;
00527 }
00528
00529 int fgap_fill(
00530 struct fill_struct *pars,
00531 float *im,
00532 int nx,
00533 int ny,
00534 int nlead,
00535 unsigned char *mask,
00536 float *cnorm,
00537 float *ierror
00538 )
00539 {
00540 const unsigned char maskgood=0;
00541 const unsigned char maskfill=1;
00542 const unsigned char masknofill=2;
00543 int malign=32;
00544
00545 int i,j,i1,j1,i0,j0,k;
00546 int order,order2;
00547 int ngood,nfill;
00548 double *im1;
00549 int *mask1;
00550 float cnorm1,ierror1;
00551 int nsample;
00552 struct work_struct work;
00553 int nofill;
00554
00555
00556 order=pars->order;
00557 order2=order/2;
00558 nsample=order*order;
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 nfill=0;
00577 nofill=0;
00578 #pragma omp parallel default(none) \
00579 private (im1,mask1,i,i0,i1,j,j0,j1,k,cnorm1,ierror1,ngood) \
00580 shared(pars,im,nx,ny,nlead,mask,cnorm,ierror) \
00581 shared(malign,order,order2) \
00582 private(work) shared(nsample) \
00583 reduction(+:nfill) \
00584 reduction(+:nofill)
00585 {
00586
00587 im1=(double *)(MKL_malloc(order*order*sizeof(double),malign));
00588 mask1=(int *)(MKL_malloc(order*order*sizeof(int),malign));
00589
00590 malloc_work(nsample,&work);
00591
00592
00593
00594
00595
00596 #pragma omp for schedule(dynamic,256)
00597 for (k=0;k<nx*ny;k++) {{
00598
00599 i=k%nx;
00600 j=k/nx;
00601 j0=j-order2;
00602 if (mask[j*nlead+i]==maskgood) {
00603 cnorm1=1.0f;
00604 ierror1=0.0f;
00605 }
00606 if (mask[j*nlead+i]==maskfill) {
00607 i0=i-order2;
00608 ngood=0;
00609 for (j1=j0;j1<(j0+order);j1++) {
00610 for (i1=i0;i1<(i0+order);i1++) {
00611 if ((i1>=0) && (i1<nx) && (j1>=0) && (j1<ny)) {
00612 mask1[i1-i0+(j1-j0)*order]=mask[i1+j1*nlead];
00613 if (mask[j1*nlead+i1]==maskgood) {
00614 im1[ngood]=im[i1+j1*nlead];
00615 ngood=ngood+1;
00616 }
00617 }
00618 else {
00619 mask1[i1-i0+(j1-j0)*order]=1;
00620 }
00621 }
00622 }
00623 if (ngood != 0) {
00624 im[j*nlead+i]=(float)fill_point(pars,mask1,im1,&cnorm1,&ierror1,&work);
00625 }
00626 else {
00627 cnorm1=0.0f;
00628 ierror1=2.0f;
00629 nofill=nofill+1;
00630 }
00631 nfill=nfill+1;
00632 }
00633 if (mask[j*nlead+i]==masknofill) {
00634 cnorm1=0.0f;
00635 ierror1=2.0f;
00636 }
00637 cnorm[j*nlead+i]=cnorm1;
00638 ierror[j*nlead+i]=ierror1;
00639
00640
00641 }
00642 }
00643
00644 MKL_free(im1);
00645 MKL_free(mask1);
00646 free_work(nsample,&work);
00647
00648 }
00649
00650
00651 if (nofill != 0) {
00652 return 1;
00653 }
00654 else {
00655 return 0;
00656 }
00657 }
00658
00659 char *gapfill_version()
00660 {
00661 return strdup("$Id: gapfill.c,v 1.10 2011/12/06 18:11:03 arta Exp $");
00662 }
00663
00664