#include #include #include #include #include #include #include #include #include "cholesky_down.h" #include "gapfill.h" #define minval(x,y) (((x) < (y)) ? (x) : (y)) #define maxval(x,y) (((x) < (y)) ? (y) : (x)) #define kInterpDataDir "proj/libs/interpolate/data" struct work_struct { double *a,*rh,*a1b,*a1r,*a1x; int *wgood,*wbad; }; void malloc_work( // Malloc work space for each thread int nsample, struct work_struct *work ) { int malign=32; work->wgood=(int *)(MKL_malloc(nsample*sizeof(int),malign)); work->wbad=(int *)(MKL_malloc(nsample*sizeof(int),malign)); work->a=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign)); work->rh=(double *)(MKL_malloc(nsample*sizeof(double),malign)); work->a1b=(double *)(MKL_malloc(nsample*sizeof(double),malign)); work->a1r=(double *)(MKL_malloc(nsample*sizeof(double),malign)); work->a1x=(double *)(MKL_malloc(2*nsample*sizeof(double),malign)); } void free_work( // Free the workspace int nsample, struct work_struct *work ) { MKL_free(work->wgood); MKL_free(work->wbad); MKL_free(work->a); MKL_free(work->rh); MKL_free(work->a1b); MKL_free(work->a1r); MKL_free(work->a1x); } int init_fill( int method, // Interpolation method double pnoise, // Level of photon noise for trade-off int order, // Interpolation order. Generally odd. int targetx, // Target point in x (normally (order-1)/2) int targety, // Target point in y (normally (order-1)/2) struct fill_struct *pars, // Structure to save setup information etc. char **filenamep, // Pointer to name of file to read covariance from. // Set to actual file used if method > 0. const char *path // to data files read by this function. ) { double *acor; int nx=80; int malign=32; double t0,t1; FILE *fileptr; int i,j; int *offx,*offy,ox,oy; int nsample,inoise; double regul,pmin; double *a0,*rh0,*a0t,*rh0t; int info; char uplo[] = "U"; char buf[PATH_MAX]; pars->order=order; t0=dsecnd(); switch (method) { case 0: pars->filename=strdup(*filenamep); break; case 8: snprintf(buf, sizeof(buf), "%s/%s/acor1_08_80.txt", path, kInterpDataDir); pars->filename=strdup(buf); break; case 9: snprintf(buf, sizeof(buf), "%s/%s/acor1_09_80.txt", path, kInterpDataDir); pars->filename=strdup(buf); break; case 10: snprintf(buf, sizeof(buf), "%s/%s/acor1_10_80.txt", path, kInterpDataDir); pars->filename=strdup(buf); break; case 11: snprintf(buf, sizeof(buf), "%s/%s/acor1_11_80.txt", path, kInterpDataDir); pars->filename=strdup(buf); break; default: printf("Unknown method in init_fill.\n"); return 1; break; } fileptr = fopen (pars->filename,"r"); if (fileptr==NULL) { printf("File not found in init_fill.\n"); return 1; } if (method != 0) { if (filenamep != NULL) { *filenamep=strdup(pars->filename); } } acor=(double *)(MKL_malloc(nx*nx*sizeof(double),malign)); //fread ((char*)acor,sizeof(double),nx*nx,fileptr); for (i=0;ia0=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign)); pars->a00=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign)); pars->a0t=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign)); a0=pars->a0; a0t=pars->a0t; for (i=0;ia00,a0,nsample*nsample*sizeof(double)); pars->acort00=acor[0]; pars->rh0=(double *)(MKL_malloc(nsample*sizeof(double),malign)); pars->rh0t=(double *)(MKL_malloc(nsample*sizeof(double),malign)); rh0=pars->rh0; rh0t=pars->rh0t; for (i=0;ia0,&nsample,&info); // Cholesky decomposition MKL_free(acor); MKL_free(offx); MKL_free(offy); // Variables used in further calculations. Avoids multiple mallocs. //pars->wgood=(int *)(MKL_malloc(nsample*sizeof(int),malign)); //pars->wbad=(int *)(MKL_malloc(nsample*sizeof(int),malign)); pars->hashmod=8191; // Hash table size pars->hashcount=(int *)(MKL_malloc(pars->hashmod*sizeof(int),malign)); pars->hashtable=(struct fill_hash_struct **)(MKL_malloc(pars->hashmod*sizeof(struct fill_hash_struct *),malign)); pars->locks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign)); pars->complocks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign)); for (i=0;ihashmod;i++) { pars->hashcount[i]=0; // Initialize to null pointers pars->hashtable[i]=(struct fill_hash_struct *)NULL; omp_init_lock(&(pars->locks[i])); omp_init_lock(&(pars->complocks[i])); } pars->ndiff=0; t1=dsecnd()-t1; //printf("x1 %f\n",t1); return 0; } int free_fill( struct fill_struct *pars ) { int i; struct fill_hash_struct *ptr,*oldptr; /* printf("Hashmod: %d\n",pars->hashmod); printf("Ndiff: %d\n",pars->ndiff); for (i=0;ihashmod;i++) { printf("%d %d\n",i,pars->hashcount[i]); } */ /* for (i=0;ihashmod;i++) { ptr=(pars->hashtable[i]); while (ptr != NULL) { printf("# %d %d %d\n",i,ptr->nbad,ptr->nhit); ptr=ptr->next; } } */ for (i=0;ihashmod;i++) { omp_destroy_lock(&(pars->locks[i])); omp_destroy_lock(&(pars->complocks[i])); } MKL_free(pars->a0); MKL_free(pars->a00); MKL_free(pars->a0t); MKL_free(pars->rh0); MKL_free(pars->rh0t); MKL_free(pars->locks); MKL_free(pars->complocks); // Now free hash table and contents MKL_free(pars->hashcount); for (i=0;ihashmod;i++) { ptr=(pars->hashtable[i]); while (ptr != NULL) { oldptr=ptr; MKL_free(ptr->wbad); MKL_free(ptr->coeff); ptr=ptr->next; MKL_free(oldptr); } } MKL_free(pars->hashtable); free(pars->filename); return 0; // Currently no error conditions } int find_entry( struct fill_struct *pars, int hash0, int hash, int nbad, int *wbad, struct fill_hash_struct **ptr_out ) { int found; int ident,i; struct fill_hash_struct *ptr; found=0; *ptr_out=NULL; // Prevent update while starting traverse omp_set_lock(&(pars->locks[hash])); ptr=pars->hashtable[hash]; omp_unset_lock(&(pars->locks[hash])); while (ptr!=NULL) { // Not at end of table if ((hash0==ptr->hash0) && (nbad==ptr->nbad)) { // Original hash and number of elements is correct, so there is hope ident=1; for (i=0;iwbad[i]) { ident=0; break; // Out of for loop. Known not to match. } } if (ident != 0) { // Success #pragma omp atomic ptr->nhit++; found=1; *ptr_out=ptr; break; // Out of while loop. No need to test the rest. } // if } // if ptr=ptr->next; } // while return found; } // find_entry double fill_point( struct fill_struct *pars, int *mask, double *vals, float *cnorm, float *ierror, struct work_struct *work ) { double t1,t2,t3,t4,t5,t6,t7,t8/*,t9*/; int nsample; int ngood,nbad; int *wgood,*wbad; int i,info,j; char uplo[] = "U"; int ione = 1; int itwo = 2; int malign=32; double *a,*a0,*rh0,/* *rh,*a1b,*/bta1b,/**a1r,*/bta1r,c,*coeff,*a0t,*a00,*a1x; float fillval; int hash0,hash,found; struct fill_hash_struct *ptr; double ierror2; // interpolation error squared int unlocked; int nleft; double tdown,tbrute; double sum; t1=0.0;t2=0.0;t3=0.0;t4=0.0;t5=0.0;t6=0.0;t7=0.0;t8=0.0;//t9=0.0; t1=dsecnd(); nsample=(pars->order)*(pars->order); // Make work copies a0=pars->a0; a00=pars->a00; a0t=pars->a0t; rh0=pars->rh0; wgood=work->wgood; wbad=work->wbad; ngood=0; nbad=0; hash0=0; for (i=0;ihashmod; unlocked=0; found=find_entry(pars,hash0,hash,nbad,wbad,&ptr); if (found == 0) { unlocked=omp_test_lock(&(pars->complocks[hash])); // Capture lock status if (unlocked==0) omp_set_lock(&(pars->complocks[hash])); // Rerun search. Someone may have updated while waiting for lock found=find_entry(pars,hash0,hash,nbad,wbad,&ptr); if (found!=0) { // Clear lock omp_unset_lock(&(pars->complocks[hash])); found=2; // Indicate collision } } t1=dsecnd()-t1; if (found == 0) { // Got to calculate new coefficients //printf("New %d %d\n",omp_get_thread_num(),hash); // malloc new entry //for (i=0;inext=pars->hashtable[hash]; // Point to next element // Set other variables ptr->nbad=nbad; ptr->hash0=hash0; ptr->nhit=1; ptr->wbad=(int *)(MKL_malloc(nbad*sizeof(int),malign)); for (i=0;iwbad[i]=wbad[i]; coeff=(double *)(MKL_malloc(ngood*sizeof(double),malign)); pars->hashcount[hash]++; // Increment count for this hash #pragma omp atomic pars->ndiff++; // Increment count for total ptr->coeff=coeff; // Point to new coefficients // Point to work arrays a=work->a; //rh=work->rh; //a1b=work->a1b; //a1r=work->a1r; a1x=work->a1x; // Calculate new coefficients // First estimate time for each option tdown=0.0; nleft=nsample; for (i=nsample-1;i>=0;i--) { if (mask[i]!=0) { tdown=tdown+(nleft-i-1)*(nleft-i-1); nleft=nleft-1; } } tdown=3.9e-9*tdown; tbrute=4e-11*(ngood+0.0)*(ngood+0.0)*(ngood+0.0); // Constants in if statement based on timings // if (nbad < (1.50*sqrt(nsample)-6)) { if (tdowncnorm=dnrm2(&ngood,coeff,&ione); */ // Combine a1b and a1r and make single call to dpotrs for (i=0;icnorm=(float)dnrm2(&ngood,coeff,&ione); t6=dsecnd()-t6; t7=dsecnd(); // Now get interpolation error ierror2=0.0; for (i=0;iacort00-2*ierror2; // Now calculate Sum_wood[i,j] coeff_i A0t_ij coeff_j for (j=0;jierror=(float)sqrt(ierror2/pars->acort00); // Stick new entry at beginning of linked list // Prevent others from starting to traverse omp_set_lock(&(pars->locks[hash])); pars->hashtable[hash]=ptr; #pragma omp flush omp_unset_lock(&(pars->locks[hash])); // Done with computation omp_unset_lock(&(pars->complocks[hash])); t7=dsecnd()-t7; // Could store coefficients in the other 7 rotations of the mask. // Beware that the time for cholesky_down depends strongly on which // one is calculated. See timep.pro for calculation (t2s is a good predictor). } // End of calculating new coefficients t8=dsecnd(); fillval=(float)ddot(&ngood,ptr->coeff,&ione,vals,&ione); *cnorm=ptr->cnorm; *ierror=ptr->ierror; t8=dsecnd()-t8; //printf("%d %d %d %d %d %f %f %f %f %f %f %f %f\n",hash,unlocked,found,omp_get_thread_num(),nbad,t1,t2,t3,t4,t5,t6,t7,t8); return fillval; } int fgap_fill( struct fill_struct *pars, float *im, int nx, int ny, int nlead, unsigned char *mask, float *cnorm, float *ierror ) { const unsigned char maskgood=0; // Value of good entries in masks const unsigned char maskfill=1; // Value of entries to be filled const unsigned char masknofill=2; // Value of entries not to be filled int malign=32; //const int ione = 1; int i,j,i1,j1,i0,j0,k; int order,order2; int ngood,nfill; double *im1; int *mask1; float cnorm1,ierror1; int nsample; struct work_struct work; int nofill; // Number of pixels with maskfill=1 that could not be filled. //double t0; order=pars->order; order2=order/2; nsample=order*order; /* int *fcount; fcount=(int *)(MKL_malloc(ny*sizeof(int),malign)); t0=dsecnd(); #pragma omp parallel for private (i,j,nfill) for (j=0; j1. i0=i-order2; ngood=0; for (j1=j0;j1<(j0+order);j1++) { for (i1=i0;i1<(i0+order);i1++) { if ((i1>=0) && (i1=0) && (j1