(file) Return to gapfill.c CVS log (file) (dir) Up to [Development] / JSOC / proj / libs / interpolate

File: [Development] / JSOC / proj / libs / interpolate / gapfill.c (download)
Revision: 1.10, Tue Dec 6 18:11:03 2011 UTC (11 years, 2 months ago) by arta
Branch: MAIN
CVS Tags: Ver_LATEST, Ver_9-5, Ver_9-41, Ver_9-4, Ver_9-3, Ver_9-2, Ver_9-1, Ver_9-0, Ver_8-8, Ver_8-7, Ver_8-6, Ver_8-5, Ver_8-4, Ver_8-3, Ver_8-2, Ver_8-12, Ver_8-11, Ver_8-10, Ver_8-1, Ver_8-0, Ver_7-1, Ver_7-0, Ver_6-4, Ver_6-3, Ver_6-2, Ver_6-1, HEAD
Changes since 1.9: +17 -8 lines
Pass the path to the JSOC tree to the interpolation code - instead of using a hard-coded relative path. The interpolation code needs to locate data files in a directory that is relative to the interpolation source files.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <math.h>
#include <mkl.h>
#include <omp.h>
#include <sys/param.h>
#include "cholesky_down.h"
#include "gapfill.h"


#define minval(x,y) (((x) < (y)) ? (x) : (y))
#define maxval(x,y) (((x) < (y)) ? (y) : (x))

#define kInterpDataDir "proj/libs/interpolate/data"

struct work_struct {
 double *a,*rh,*a1b,*a1r,*a1x;
 int *wgood,*wbad;
};

void malloc_work(
// Malloc work space for each thread
  int nsample,
  struct work_struct *work
)
{
  int malign=32;

  work->wgood=(int *)(MKL_malloc(nsample*sizeof(int),malign));
  work->wbad=(int *)(MKL_malloc(nsample*sizeof(int),malign));
  work->a=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
  work->rh=(double *)(MKL_malloc(nsample*sizeof(double),malign));
  work->a1b=(double *)(MKL_malloc(nsample*sizeof(double),malign));
  work->a1r=(double *)(MKL_malloc(nsample*sizeof(double),malign));
  work->a1x=(double *)(MKL_malloc(2*nsample*sizeof(double),malign));
}

void free_work(
// Free the workspace
  int nsample,
  struct work_struct *work
)
{
  MKL_free(work->wgood);
  MKL_free(work->wbad);
  MKL_free(work->a);
  MKL_free(work->rh);
  MKL_free(work->a1b);
  MKL_free(work->a1r);
  MKL_free(work->a1x);
}

int init_fill(
  int method, // Interpolation method
  double pnoise, // Level of photon noise for trade-off
  int order, // Interpolation order. Generally odd.
  int targetx, // Target point in x (normally (order-1)/2)
  int targety, // Target point in y (normally (order-1)/2)
  struct fill_struct *pars, // Structure to save setup information etc.
  char **filenamep, // Pointer to name of file to read covariance from.
                    // Set to actual file used if method > 0.
  const char *path // to data files read by this function.
)
{
  double *acor;
  int nx=80;
  int malign=32;
  double t0,t1;
  FILE *fileptr;
  int i,j;
  int *offx,*offy,ox,oy;
  int nsample,inoise;
  double regul,pmin;
  double *a0,*rh0,*a0t,*rh0t;
  int info;
  char uplo[] = "U";
  char buf[PATH_MAX];

  pars->order=order;

  t0=dsecnd();
  switch (method) {
  case 0:
    pars->filename=strdup(*filenamep);
    break;
  case 8:
    snprintf(buf, sizeof(buf), "%s/%s/acor1_08_80.txt", path, kInterpDataDir);
    pars->filename=strdup(buf);
    break;
  case 9:
    snprintf(buf, sizeof(buf), "%s/%s/acor1_09_80.txt", path, kInterpDataDir);
    pars->filename=strdup(buf);
    break;
  case 10:
    snprintf(buf, sizeof(buf), "%s/%s/acor1_10_80.txt", path, kInterpDataDir);
    pars->filename=strdup(buf);
    break;
  case 11:
    snprintf(buf, sizeof(buf), "%s/%s/acor1_11_80.txt", path, kInterpDataDir);
    pars->filename=strdup(buf);
    break;
  default:
    printf("Unknown method in init_fill.\n");
    return 1;
    break;
  }
  fileptr = fopen (pars->filename,"r");
  if (fileptr==NULL) {
    printf("File not found in init_fill.\n");
    return 1;
  }
  if (method != 0) {
    if (filenamep != NULL) {
      *filenamep=strdup(pars->filename);
    }
  }
  acor=(double *)(MKL_malloc(nx*nx*sizeof(double),malign));
//fread ((char*)acor,sizeof(double),nx*nx,fileptr);
  for (i=0;i<nx*nx;i++) fscanf(fileptr,"%lf",acor+i);
  fclose(fileptr);
  t0=dsecnd()-t0;
//printf("x0 %f\n",t0);

  regul=pnoise*pnoise;
  inoise=1;
  pmin=regul*inoise;

  nsample=order*order;
  t1=dsecnd();
  offx=(int *)(MKL_malloc(nsample*sizeof(int),malign));
  offy=(int *)(MKL_malloc(nsample*sizeof(int),malign));
  for (i=0;i<nsample;i++) {
    offx[i]=(i%order)-targetx;
    offy[i]=(i/order)-targety;
  }
// Set up complete matrix
  pars->a0=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
  pars->a00=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
  pars->a0t=(double *)(MKL_malloc(nsample*nsample*sizeof(double),malign));
  a0=pars->a0;
  a0t=pars->a0t;
  for (i=0;i<nsample;i++) {
    for (j=0;j<nsample;j++) {
      ox=((offx[j]-offx[i])+nx) % nx;
      oy=((offy[j]-offy[i])+nx) % nx;
      a0[i+nsample*j]=acor[ox+nx*oy];
      a0t[i+nsample*j]=acor[ox+nx*oy];
    }
    a0[i+nsample*i]=a0[i+nsample*i]+regul;
  }
// save copy of matrix
  memcpy(pars->a00,a0,nsample*nsample*sizeof(double));
  pars->acort00=acor[0];
  pars->rh0=(double *)(MKL_malloc(nsample*sizeof(double),malign));
  pars->rh0t=(double *)(MKL_malloc(nsample*sizeof(double),malign));
  rh0=pars->rh0;
  rh0t=pars->rh0t;
  for (i=0;i<nsample;i++) {
    rh0[i]=acor[(-offx[i]+nx) % nx+nx*((-offy[i]+nx) % nx)];
    rh0t[i]=acor[(-offx[i]+nx) % nx+nx*((-offy[i]+nx) % nx)];
    if ((offx[i]==0) && (offy[i]==0)) rh0[i]=rh0[i]+pmin;
  }
// Decompose complete matrix
  dpotrf(uplo,&nsample,pars->a0,&nsample,&info); // Cholesky decomposition

  MKL_free(acor);
  MKL_free(offx);
  MKL_free(offy);

// Variables used in further calculations. Avoids multiple mallocs.
//pars->wgood=(int *)(MKL_malloc(nsample*sizeof(int),malign));
//pars->wbad=(int *)(MKL_malloc(nsample*sizeof(int),malign));

  pars->hashmod=8191; // Hash table size
  pars->hashcount=(int *)(MKL_malloc(pars->hashmod*sizeof(int),malign));
  pars->hashtable=(struct fill_hash_struct **)(MKL_malloc(pars->hashmod*sizeof(struct fill_hash_struct *),malign));
  pars->locks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign));
  pars->complocks=(omp_lock_t *)(MKL_malloc(pars->hashmod*sizeof(omp_lock_t),malign));

  for (i=0;i<pars->hashmod;i++) {
    pars->hashcount[i]=0;
// Initialize to null pointers
    pars->hashtable[i]=(struct fill_hash_struct *)NULL;
    omp_init_lock(&(pars->locks[i]));
    omp_init_lock(&(pars->complocks[i]));
  }
  pars->ndiff=0;

  t1=dsecnd()-t1;
//printf("x1 %f\n",t1);

  return 0;
}

int free_fill(
struct fill_struct *pars
)
{
  int i;
  struct fill_hash_struct *ptr,*oldptr;
  
/*
  printf("Hashmod: %d\n",pars->hashmod);
  printf("Ndiff: %d\n",pars->ndiff);
  for (i=0;i<pars->hashmod;i++) {
    printf("%d %d\n",i,pars->hashcount[i]);
  }
*/
/*
  for (i=0;i<pars->hashmod;i++) {
    ptr=(pars->hashtable[i]);
    while (ptr != NULL) {
      printf("# %d %d %d\n",i,ptr->nbad,ptr->nhit);
      ptr=ptr->next;
    }
  }
*/

  for (i=0;i<pars->hashmod;i++) {
    omp_destroy_lock(&(pars->locks[i]));
    omp_destroy_lock(&(pars->complocks[i]));
  }

  MKL_free(pars->a0);
  MKL_free(pars->a00);
  MKL_free(pars->a0t);
  MKL_free(pars->rh0);
  MKL_free(pars->rh0t);

  MKL_free(pars->locks);
  MKL_free(pars->complocks);

// Now free hash table and contents

  MKL_free(pars->hashcount);
  for (i=0;i<pars->hashmod;i++) {
    ptr=(pars->hashtable[i]);
    while (ptr != NULL) {
      oldptr=ptr;
      MKL_free(ptr->wbad);
      MKL_free(ptr->coeff);
      ptr=ptr->next;
      MKL_free(oldptr);
    }
  }
  MKL_free(pars->hashtable);
  free(pars->filename);

  return 0; // Currently no error conditions

}

int find_entry(
  struct fill_struct *pars,
  int hash0,
  int hash,
  int nbad,
  int *wbad,
  struct fill_hash_struct **ptr_out
)
{
  int found;
  int ident,i;
  struct fill_hash_struct *ptr;
  
  found=0;
  *ptr_out=NULL;

// Prevent update while starting traverse
  omp_set_lock(&(pars->locks[hash]));
  ptr=pars->hashtable[hash];
  omp_unset_lock(&(pars->locks[hash]));

  while (ptr!=NULL) { // Not at end of table
    if ((hash0==ptr->hash0) && (nbad==ptr->nbad)) {
// Original hash and number of elements is correct, so there is hope
      ident=1;
      for (i=0;i<nbad;i++) {
        if (wbad[i] != ptr->wbad[i]) {
          ident=0;
          break; // Out of for loop. Known not to match.
        }
      }
      if (ident != 0) { // Success
#pragma omp atomic
        ptr->nhit++;
        found=1;
        *ptr_out=ptr;
        break; // Out of while loop. No need to test the rest.
      } // if
    } // if
    ptr=ptr->next;
  } // while

  return found;
} // find_entry

double fill_point(
struct fill_struct *pars,
int *mask,
double *vals,
float *cnorm,
float *ierror,
struct work_struct *work
)
{
  double t1,t2,t3,t4,t5,t6,t7,t8/*,t9*/;
  int nsample;
  int ngood,nbad;
  int *wgood,*wbad;
  int i,info,j;
  char uplo[] = "U";
  int ione = 1;
  int itwo = 2;
  int malign=32;
  double *a,*a0,*rh0,/* *rh,*a1b,*/bta1b,/**a1r,*/bta1r,c,*coeff,*a0t,*a00,*a1x;
  float fillval;
  int hash0,hash,found;
  struct fill_hash_struct *ptr;
  double ierror2; // interpolation error squared
  int unlocked;
  int nleft;
  double tdown,tbrute;
  double sum;

  t1=0.0;t2=0.0;t3=0.0;t4=0.0;t5=0.0;t6=0.0;t7=0.0;t8=0.0;//t9=0.0;

  t1=dsecnd();

  nsample=(pars->order)*(pars->order);
  
// Make work copies
  a0=pars->a0;
  a00=pars->a00;
  a0t=pars->a0t;
  rh0=pars->rh0;

  wgood=work->wgood;
  wbad=work->wbad;

  ngood=0;
  nbad=0;
  hash0=0;
  for (i=0;i<nsample;i++) {
    if (mask[i]==0) {
      wgood[ngood]=i;
      ngood=ngood+1;
    }
    if (mask[i]!=0) {
      hash0=hash0+i;
      wbad[nbad]=i;
      nbad=nbad+1;
    }
  }
  hash=hash0%pars->hashmod;

  unlocked=0;

  found=find_entry(pars,hash0,hash,nbad,wbad,&ptr);

  if (found == 0) {
    unlocked=omp_test_lock(&(pars->complocks[hash])); // Capture lock status
    if (unlocked==0) omp_set_lock(&(pars->complocks[hash]));

// Rerun search. Someone may have updated while waiting for lock
    found=find_entry(pars,hash0,hash,nbad,wbad,&ptr);
    if (found!=0) {
// Clear lock
      omp_unset_lock(&(pars->complocks[hash]));
      found=2; // Indicate collision
    }
  }

  t1=dsecnd()-t1;

  if (found == 0) { // Got to calculate new coefficients

//printf("New %d %d\n",omp_get_thread_num(),hash);
// malloc new entry
//for (i=0;i<nsample;i++) printf(" %d",mask[i]);
//printf("\n");
    ptr=(struct fill_hash_struct *)(MKL_malloc(sizeof(struct fill_hash_struct),malign));
    ptr->next=pars->hashtable[hash]; // Point to next element
// Set other variables
    ptr->nbad=nbad;
    ptr->hash0=hash0;
    ptr->nhit=1;
    ptr->wbad=(int *)(MKL_malloc(nbad*sizeof(int),malign));
    for (i=0;i<nbad;i++) ptr->wbad[i]=wbad[i];
    coeff=(double *)(MKL_malloc(ngood*sizeof(double),malign));
    pars->hashcount[hash]++; // Increment count for this hash
#pragma omp atomic
    pars->ndiff++; // Increment count for total
    ptr->coeff=coeff; // Point to new coefficients

// Point to work arrays
    a=work->a;
    //rh=work->rh;
    //a1b=work->a1b;
    //a1r=work->a1r;
    a1x=work->a1x;

// Calculate new coefficients
// First estimate time for each option
    tdown=0.0;
    nleft=nsample;
    for (i=nsample-1;i>=0;i--) {
      if (mask[i]!=0) {
        tdown=tdown+(nleft-i-1)*(nleft-i-1);
        nleft=nleft-1;
      }
    }
    tdown=3.9e-9*tdown;
    tbrute=4e-11*(ngood+0.0)*(ngood+0.0)*(ngood+0.0);

// Constants in if statement based on timings
//  if (nbad < (1.50*sqrt(nsample)-6)) {
    if (tdown<tbrute) {
      t2=dsecnd();
// Get new decomposition by downsampling old one
// Copy decomposed matrix.
// This dominates dcholesky_down when only removing a few points.
// Only need to copy relevant part of matrix
//    for (i=0;i<nsample;i++) for (j=0;j<=i;j++) a[i*nsample+j]=a0[i*nsample+j];
      dlacpy(uplo,&nsample,&nsample,a0,&nsample,a,&nsample);
      t2=dsecnd()-t2;
      t3=dsecnd();

// Get decomposition of reduced matrix
      dcholesky_down_mask(uplo, &nsample, a, &nsample, mask, &info);
      t3=dsecnd()-t3;
    } // if downsampling
    else {
      t4=dsecnd();
// Calculate brute force from original matrix
// Pick good points
      for (i=0;i<ngood;i++) for (j=0;j<=i;j++) a[i*nsample+j]=a00[wgood[i]*nsample+wgood[j]];
      t4=dsecnd()-t4;
      t5=dsecnd();

// Get decomposition of reduced matrix
      dpotrf(uplo,&ngood,a,&nsample,&info); // Cholesky decomposition
      t5=dsecnd()-t5;
    } // Brute force

    t6=dsecnd();

/*
    for (i=0;i<ngood;i++) a1b[i]=1.;
    for (i=0;i<ngood;i++) a1r[i]=rh0[wgood[i]];
    dpotrs(uplo,&ngood,&ione,a,&nsample,a1b,&nsample,&info);
    dpotrs(uplo,&ngood,&ione,a,&nsample,a1r,&nsample,&info);
    bta1b=0.;
    for (i=0;i<ngood;i++) bta1b=bta1b+a1b[i];
    bta1r=0.0;
    for (i=0;i<ngood;i++) bta1r=bta1r+a1r[i];
    c=(bta1r-1.)/bta1b;
    for (i=0;i<ngood;i++) coeff[i]=a1r[i]-c*a1b[i];
    ptr->cnorm=dnrm2(&ngood,coeff,&ione);
*/

// Combine a1b and a1r and make single call to dpotrs
    for (i=0;i<ngood;i++) a1x[i]=1.;
    for (i=0;i<ngood;i++) a1x[i+nsample]=rh0[wgood[i]];
    dpotrs(uplo,&ngood,&itwo,a,&nsample,a1x,&nsample,&info);
    bta1b=0.;
    for (i=0;i<ngood;i++) bta1b=bta1b+a1x[i];
    bta1r=0.0;
    for (i=0;i<ngood;i++) bta1r=bta1r+a1x[i+nsample];
    c=(bta1r-1.)/bta1b;
    for (i=0;i<ngood;i++) coeff[i]=a1x[i+nsample]-c*a1x[i];
    ptr->cnorm=(float)dnrm2(&ngood,coeff,&ione);

    t6=dsecnd()-t6;
    t7=dsecnd();

// Now get interpolation error
    ierror2=0.0;
    for (i=0;i<ngood;i++) ierror2=ierror2+coeff[i]*rh0[wgood[i]];
    ierror2=pars->acort00-2*ierror2;
// Now calculate Sum_wood[i,j] coeff_i A0t_ij coeff_j
    for (j=0;j<ngood;j++) {
/*
      sum=0.0;
      for (i=0;i<ngood;i++) {
        sum=sum+a0t[nsample*wgood[j]+wgood[i]]*coeff[i];
      }
      ierror2=ierror2+sum*coeff[j];
*/
// a0t is symmetric, so this is more efficient
      sum=0.0;
      for (i=0;i<j;i++) {
        sum=sum+a0t[nsample*wgood[j]+wgood[i]]*coeff[i];
      }
      ierror2=ierror2+coeff[j]*(2*sum+coeff[j]*a0t[nsample*wgood[j]+wgood[j]]);
    }
    ptr->ierror=(float)sqrt(ierror2/pars->acort00);

// Stick new entry at beginning of linked list
// Prevent others from starting to traverse
    omp_set_lock(&(pars->locks[hash]));
    pars->hashtable[hash]=ptr;
#pragma omp flush
    omp_unset_lock(&(pars->locks[hash]));

// Done with computation
    omp_unset_lock(&(pars->complocks[hash]));

    t7=dsecnd()-t7;

// Could store coefficients in the other 7 rotations of the mask.
// Beware that the time for cholesky_down depends strongly on which
// one is calculated. See timep.pro for calculation (t2s is a good predictor).
  } // End of calculating new coefficients

  t8=dsecnd();
  fillval=(float)ddot(&ngood,ptr->coeff,&ione,vals,&ione);
  *cnorm=ptr->cnorm;
  *ierror=ptr->ierror;

  t8=dsecnd()-t8;

//printf("%d %d %d %d %d %f %f %f %f %f %f %f %f\n",hash,unlocked,found,omp_get_thread_num(),nbad,t1,t2,t3,t4,t5,t6,t7,t8);
  return fillval;
}

int fgap_fill(
  struct fill_struct *pars,
  float *im,
  int nx,
  int ny,
  int nlead,
  unsigned char *mask,
  float *cnorm,
  float *ierror
)
{
  const unsigned char maskgood=0; // Value of good entries in masks
  const unsigned char maskfill=1; // Value of entries to be filled
  const unsigned char masknofill=2; // Value of entries not to be filled
  int malign=32;
//const int ione = 1;
  int i,j,i1,j1,i0,j0,k;
  int order,order2;
  int ngood,nfill;
  double *im1;
  int *mask1;
  float cnorm1,ierror1;
  int nsample;
  struct work_struct work;
  int nofill; // Number of pixels with maskfill=1 that could not be filled.
  //double t0;
  
  order=pars->order;
  order2=order/2;
  nsample=order*order;
  
/*
  int *fcount;
  fcount=(int *)(MKL_malloc(ny*sizeof(int),malign));
  t0=dsecnd();
#pragma omp parallel for private (i,j,nfill)
  for (j=0; j<ny; j++) {
    nfill=0;
    for (i=0; i<nx; i++) {
      if (mask[j*nlead+i]==maskfill) nfill++;
    }
    fcount[j]=nfill;
  }
  printf("Count: %f\n",dsecnd()-t0);
  MKL_free(fcount);
*/

nfill=0;
nofill=0;
#pragma omp parallel default(none) \
  private (im1,mask1,i,i0,i1,j,j0,j1,k,cnorm1,ierror1,ngood/*,t0*/)     \
shared(pars,im,nx,ny,nlead,mask,cnorm,ierror) \
shared(malign,order,order2) \
private(work) shared(nsample) \
reduction(+:nfill) \
reduction(+:nofill)
{ // Needed to define parallel region

  im1=(double *)(MKL_malloc(order*order*sizeof(double),malign));
  mask1=(int *)(MKL_malloc(order*order*sizeof(int),malign));

  malloc_work(nsample,&work);
//printf("malloc %d %lx %lx\n",omp_get_thread_num(),im1,mask1);

//#pragma omp for schedule(dynamic,1)
//for (j=0; j<ny; j++) {
//  for (i=0; i<nx; i++) {
#pragma omp for schedule(dynamic,256)
  for (k=0;k<nx*ny;k++) {{
        //t0=dsecnd();
    i=k%nx;
    j=k/nx;
      j0=j-order2;
      if (mask[j*nlead+i]==maskgood) {
        cnorm1=1.0f; // Random noise is 1
        ierror1=0.0f; // Interpolation error is 0
      }
      if (mask[j*nlead+i]==maskfill) { // Fill if mask=1. Skip for mask>1.
        i0=i-order2;
        ngood=0;
        for (j1=j0;j1<(j0+order);j1++) {
          for (i1=i0;i1<(i0+order);i1++) {
            if ((i1>=0) && (i1<nx) && (j1>=0) && (j1<ny)) {
              mask1[i1-i0+(j1-j0)*order]=mask[i1+j1*nlead];
              if (mask[j1*nlead+i1]==maskgood) {
                im1[ngood]=im[i1+j1*nlead];
                ngood=ngood+1;
              }
            } // Inside original image
            else { // Outside original image
              mask1[i1-i0+(j1-j0)*order]=1; // Point is missing
            }
          }
        }
        if (ngood != 0) { // Have at least one good point
           im[j*nlead+i]=(float)fill_point(pars,mask1,im1,&cnorm1,&ierror1,&work);
        } 
        else { // Otherwise quietly leave unchanged, except set errors
          cnorm1=0.0f;
          ierror1=2.0f;
          nofill=nofill+1;
        }
        nfill=nfill+1;
      } // end if mask==1
      if (mask[j*nlead+i]==masknofill) { // Don't fill.
        cnorm1=0.0f;
        ierror1=2.0f;
      }
      cnorm[j*nlead+i]=cnorm1;
      ierror[j*nlead+i]=ierror1;
//    cnorm[j*nlead+i]=dsecnd()-t0;
//    ierror[j*nlead+i]=omp_get_thread_num();
    } // for i
  } // for j
//printf("free %d %lx %lx\n",omp_get_thread_num(),im1,mask1);
  MKL_free(im1);
  MKL_free(mask1);
  free_work(nsample,&work);
//printf("done %d %lx %lx\n",omp_get_thread_num(),im1,mask1);
} // End of parallel region
//printf("nfill nofill %d %d\n",nfill,nofill);

  if (nofill != 0) {
    return 1; // Could not fill all pixels
  }
  else {
    return 0; // All is good
  }
}

char *gapfill_version() // Returns CVS version of gapfill.c
{
  return strdup("$Id: gapfill.c,v 1.10 2011/12/06 18:11:03 arta Exp $");
}



Karen Tian
Powered by
ViewCVS 0.9.4