(file) Return to fresize.c CVS log (file) (dir) Up to [Development] / JSOC / proj / libs / interpolate

   1 arta  1.1 #include <stdio.h>
   2           #include <stdlib.h>
   3           #include <time.h>
   4           #include <string.h>
   5           #include <math.h>
   6           #include <mkl_blas.h>
   7           #include <mkl_service.h>
   8           #include <mkl_lapack.h>
   9           #include <mkl_vml_functions.h>
  10           #include <omp.h>
  11           #include <complex.h>
  12           #include <fftw3.h>
  13           #include "fresize.h"
  14           #define minval(x,y) (((x) < (y)) ? (x) : (y))
  15           #define maxval(x,y) (((x) < (y)) ? (y) : (x))
  16           #define fresize_sample 1
  17           #define fresize_bin 2
  18           #define fresize_1d 3
  19           #define fresize_2d 4
  20           #define fresize_1d_fft 5
  21           #define fresize_2d_fft 6
  22 arta  1.1 
  23 schou 1.2 static double sinc(double x)
  24 arta  1.1 {
  25           if (fabs(x) < (1.e-10))
  26             return 1.;
  27           else
  28             return sin(M_PI*x)/(M_PI*x);
  29           }
  30           
  31           int make_fft1d(
  32             struct fresize_struct *pars,
  33             int nxin,
  34             int nyin
  35           )
  36           {
  37             fftwf_complex *helpc,*fkernel,*fkernely;
  38             float *helpin,*helpout;
  39             fftwf_plan plan1,plan2,plan1y,plan2y;
  40             int nxinp,nyinp; // Complex array size
  41             int nin,ninp;
  42             int hwidth /*,fwidth */;
  43             int i,/*j,*/i1/*,j1*/;
  44             float c;
  45 arta  1.1 
  46             hwidth=pars->hwidth;
  47             /* fwidth=2*hwidth+1; */
  48             nxinp=(nxin/2+1);
  49             nyinp=(nyin/2+1);
  50             nin=maxval(nxin,nyin); // Max value to use same arrays
  51             ninp=maxval(nxinp,nyinp); // Max value to use same arrays
  52           
  53             helpin=(float*) fftwf_malloc(sizeof(float) * nin);
  54             fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp);
  55             fkernely=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nyinp);
  56             helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * ninp);
  57             helpout=(float*) fftwf_malloc(sizeof(float) * nin);
  58           
  59             plan1=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
  60             plan2=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
  61             plan1y=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
  62             plan2y=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
  63           
  64             pars->helpin=helpin;
  65             pars->helpout=helpout;
  66 arta  1.1   pars->fkernel=fkernel;
  67             pars->fkernely=fkernely;
  68             pars->helpc=helpc;
  69             pars->plan1=plan1;
  70             pars->plan2=plan2;
  71             pars->plan1y=plan1y;
  72             pars->plan2y=plan2y;
  73             pars->nxin=nxin;
  74             pars->nyin=nyin;
  75             pars->nxinp=nxinp;
  76             pars->nyinp=nyinp;
  77             pars->method=fresize_1d_fft;
  78           
  79           /* First transform kernel in x*/
  80           
  81           /* Zero entire array */
  82             for (i=0;i<nxin;i++) {
  83               helpin[i]=0;
  84             }
  85           
  86           // Copy kernel to new array
  87 arta  1.1   for (i=-hwidth;i<=hwidth;i++) {
  88               i1=(i+nxin) % nxin;
  89               helpin[i1]=pars->kerx[i+hwidth];
  90             }
  91           
  92           /* Transform kernel */
  93             fftwf_execute(plan1);
  94           
  95           /* Copy transform and scale */
  96             c=1.0f/nxin;
  97           
  98             for (i=0;i<nxinp;i++) {
  99               fkernel[i]=c*helpc[i];
 100             }
 101           
 102           /* Then transform kernel in y*/
 103           
 104           /* Zero entire array */
 105             for (i=0;i<nxin;i++) {
 106               helpin[i]=0;
 107             }
 108 arta  1.1 
 109           /* Copy kernel to new array */
 110             for (i=-hwidth;i<=hwidth;i++) {
 111               i1=(i+nyin) % nyin;
 112               helpin[i1]=pars->kery[i+hwidth];
 113             }
 114           
 115           /* Transform kernel */
 116             fftwf_execute(plan1y);
 117           
 118           /* Copy transform and scale */
 119             c=1.0f/nyin;
 120           
 121             for (i=0;i<nyinp;i++) {
 122               fkernely[i]=c*helpc[i];
 123             }
 124           
 125             return 0;
 126           }
 127           
 128           int make_fft2d(
 129 arta  1.1   struct fresize_struct *pars,
 130             int nxin,
 131             int nyin
 132           )
 133           {
 134             fftwf_complex *helpc,*fkernel;
 135             float *helpin,*helpout;
 136             fftwf_plan plan1,plan2;
 137             int nxinp; // Complex array size
 138             int hwidth,fwidth;
 139             int i,j,i1,j1;
 140             float c;
 141           
 142             hwidth=pars->hwidth;
 143             fwidth=2*hwidth+1;
 144             nxinp=(nxin/2+1);
 145           
 146             helpin=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
 147             fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
 148             helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
 149             helpout=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
 150 arta  1.1 
 151             plan1=fftwf_plan_dft_r2c_2d(nyin,nxin,helpin,helpc,FFTW_ESTIMATE);
 152             plan2=fftwf_plan_dft_c2r_2d(nyin,nxin,helpc,helpout,FFTW_ESTIMATE);
 153           
 154             pars->helpin=helpin;
 155             pars->helpout=helpout;
 156             pars->fkernel=fkernel;
 157             pars->helpc=helpc;
 158             pars->plan1=plan1;
 159             pars->plan2=plan2;
 160             pars->nxin=nxin;
 161             pars->nyin=nyin;
 162             pars->nxinp=nxinp;
 163             pars->method=fresize_2d_fft;
 164           
 165           /* First transform kernel */
 166           
 167           /* Zero entire array */
 168             for (j=0;j<nyin;j++) {
 169               for (i=0;i<nxin;i++) {
 170                 helpin[j*nxin+i]=0;
 171 arta  1.1     }
 172             }
 173           
 174           /*  helpin[0]=1.0f; */
 175           
 176           /* Copy kernel to new array */
 177             for (j=-hwidth;j<=hwidth;j++) {
 178               j1=(j+nyin) % nyin;
 179               for (i=-hwidth;i<=hwidth;i++) {
 180                 i1=(i+nxin) % nxin;
 181                 helpin[j1*nxin+i1]=pars->ker[(j+hwidth)*fwidth+(i+hwidth)];
 182               }
 183             }
 184           
 185           /* Transform kernel */
 186             fftwf_execute(plan1);
 187           
 188           /* Copy transform and scale */
 189             c=1.0f/nxin/nyin;
 190           
 191             for (j=0;j<nyin;j++) { 
 192 arta  1.1     for (i=0;i<nxinp;i++) {
 193                 fkernel[j*nxinp+i]=c*helpc[j*nxinp+i];
 194               }
 195             }
 196           
 197             return 0;
 198           }
 199           
 200           int init_fresize_sample(
 201             struct fresize_struct *pars,
 202             int nsub
 203           )
 204           {
 205           
 206             pars->method=fresize_sample;
 207             pars->nsub=nsub;
 208           
 209             return 0;
 210           }
 211           
 212           int init_fresize_bin(
 213 arta  1.1   struct fresize_struct *pars,
 214             int nsub
 215           )
 216           {
 217           
 218             pars->method=fresize_bin;
 219             pars->nsub=nsub;
 220           
 221             return 0;
 222           }
 223           
 224           int init_fresize_boxcar(
 225             struct fresize_struct *pars,
 226             int hwidth, // Half width of boxcar. Full is 2*hwidth+1.
 227             int nsub // Distance between sampled points
 228           )
 229           {
 230             const int malign=32;
 231             int fwidth;
 232             int i /*,j*/;
 233           
 234 arta  1.1   pars->method=fresize_1d;
 235             pars->nsub=nsub;
 236             pars->hwidth=hwidth;
 237             fwidth=2*hwidth+1;
 238             pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 239             pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 240             for (i=0;i<fwidth;i++) {
 241               pars->kerx[i]=1.0f/fwidth;
 242               pars->kery[i]=1.0f/fwidth;
 243             }
 244           
 245             return 0;
 246           }
 247           
 248           int init_fresize_boxcar_fft(
 249             struct fresize_struct *pars,
 250             int hwidth, // Half width of boxcar. Full is 2*hwidth+1.
 251             int nsub, // Distance between sampled points
 252             int nxin, // Array size
 253             int nyin // Array size
 254           )
 255 arta  1.1 {
 256             int status;
 257           
 258             status=init_fresize_boxcar(pars,hwidth,nsub);
 259             if (status != 0) return status;
 260           
 261             status=make_fft1d(pars,nxin,nyin);
 262             return status;
 263           }
 264           
 265           int init_fresize_gaussian(
 266             struct fresize_struct *pars,
 267             float sigma, // Shape is exp(-(d/sigma)^2/2)
 268             int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
 269             int nsub // Distance between sampled points
 270           )
 271           {
 272             const int malign=32;
 273             int fwidth;
 274             int i /*,j*/;
 275             float sum,x,y;
 276 arta  1.1 
 277             pars->method=fresize_1d;
 278             pars->nsub=nsub;
 279             pars->hwidth=hwidth;
 280             fwidth=2*hwidth+1;
 281             pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 282             pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 283             sum=0.0f;
 284             for (i=0;i<fwidth;i++) {
 285               x=(i-hwidth)/sigma;
 286               y=(float)exp(-x*x/2); /* There is expf (a float version), but not sure what impact this has. */
 287               sum=sum+y;
 288             }
 289             for (i=0;i<fwidth;i++) {
 290               x=(i-hwidth)/sigma;
 291               y=(float)exp(-x*x/2);
 292               pars->kerx[i]=y/sum;
 293               pars->kery[i]=y/sum;
 294             }
 295           
 296             return 0;
 297 arta  1.1 }
 298           
 299           int init_fresize_gaussian_fft( // Simple square truncated Gaussian. FFT version.
 300             struct fresize_struct *pars,
 301             float sigma, // Shape is exp(-(d/sigma)^2/2)
 302             int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
 303             int nsub, // Distance between sampled points
 304             int nxin, // Array size
 305             int nyin // Array size
 306           )
 307           {
 308             int status;
 309           
 310             status=init_fresize_gaussian(pars,sigma,hwidth,nsub);
 311             if (status != 0) return status;
 312           
 313             status=make_fft1d(pars,nxin,nyin);
 314             return status;
 315           }
 316           
 317           int init_fresize_sinc( // Sinc filter
 318 arta  1.1   struct fresize_struct *pars,
 319             float wsinc, /* Shape is sinc(d/wsinc)*ap(d)
 320                             wsinc is the amount by which the Nyquist is reduced. 
 321                             May want wsinc=nsub. */
 322             int hwidth, // Half width of kernel. Full is 2*hwidth+1.
 323             int iap, /* Apodization method. Always ap=0 for d>nap*wsinc.
 324                         iap=0 means no apodization ap=1 
 325                         iap=1 uses parabola ap=1-(d/(nap*wsinc))^2
 326                         iap=2 uses sinc ap=sinc(d/(nap*wsinc)) */
 327             int nap, /* Sinc apodization width in units of wsinc. 
 328                         Normally hwidth=nap*wsinc,
 329                         but hwidth=nap*wsinc-1 works for integer */
 330             int nsub // Distance between sampled points
 331           )
 332           {
 333             const int malign=32;
 334             int fwidth;
 335             int i /*,j*/;
 336             float sum,x,y;
 337           
 338             pars->method=fresize_1d;
 339 arta  1.1   pars->nsub=nsub;
 340             pars->hwidth=hwidth;
 341             fwidth=2*hwidth+1;
 342             pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 343             pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
 344             sum=0.0f;
 345             for (i=0;i<fwidth;i++) {
 346               x=(float)(i-hwidth);
 347               if (abs(x) > (nap*wsinc)) {
 348                 y=0.0f;
 349               }
 350               else {
 351                  y=(float)sinc(x/wsinc);
 352               }
 353               if (iap == 1) y=y*(1-(x/(nap*wsinc))*(x/(nap*wsinc)));
 354               if (iap == 2) y=(float)(y*sinc(x/(nap*wsinc)));
 355               pars->kerx[i]=y;
 356               sum=sum+y;
 357             }
 358             for (i=0;i<fwidth;i++) {
 359               pars->kerx[i]=pars->kerx[i]/sum;
 360 arta  1.1     pars->kery[i]=pars->kerx[i];
 361             }
 362           
 363             return 0;
 364           }
 365           
 366           int init_fresize_sinc_fft( // Sinc filter. FFT version.
 367             struct fresize_struct *pars,
 368             float wsinc, /* Shape is sinc(d/wsinc)*ap(d)
 369                             wsinc is the amount by which the Nyquist is reduced.
 370                             May want wsinc=nsub. */
 371             int hwidth, // Half width of kernel. Full is 2*hwidth+1.
 372             int iap, /* Apodization method. Always ap=0 for d>nap*wsinc.
 373                         iap=0 means no apodization ap=1
 374                         iap=1 uses parabola ap=1-(d/(nap*wsinc))^2
 375                         iap=2 uses sinc ap=sinc(d/(nap*wsinc))
 376                         all other cases give ap=1 (not guaranteed) */
 377             int nap, /* Sinc apodization width in units of wsinc.
 378                         Normally hwidth=nap*wsinc,
 379                         but hwidth=nap*wsinc-1 works for integer */
 380             int nsub, // Distance between sampled points
 381 arta  1.1   int nxin, // Array size
 382             int nyin // Array size
 383           )
 384           {
 385             int status;
 386           
 387             status=init_fresize_sinc(pars,wsinc,hwidth,iap,nap,nsub);
 388             if (status != 0) return status;
 389           
 390             status=make_fft1d(pars,nxin,nyin);
 391             return status;
 392           }
 393           
 394           int init_fresize_gaussian2( // Circularly truncated Gaussian
 395             struct fresize_struct *pars,
 396             float sigma, // Shape is exp(-(d/sigma)^2/2)
 397             float rmax, // Truncation radius. Probably rmax<=hwidth.
 398             int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
 399             int nsub // Distance between sampled points
 400           )
 401           {
 402 arta  1.1   const int malign=32;
 403             int fwidth;
 404             int i,j;
 405             float sum,xi,xj,y,r2,rmax2;
 406           
 407             pars->method=fresize_2d;
 408             pars->nsub=nsub;
 409             pars->hwidth=hwidth;
 410             fwidth=2*hwidth+1;
 411             pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
 412             rmax2=rmax*rmax/sigma/sigma;
 413           
 414             sum=0.0f;
 415             for (j=0;j<fwidth;j++) {
 416               xj=(j-hwidth)/sigma;
 417               for (i=0;i<fwidth;i++) {
 418                 xi=(i-hwidth)/sigma;
 419                 r2=xi*xi+xj*xj;
 420                 if (r2 <= rmax2 ) {
 421                    y=(float)exp(-r2/2);
 422                 }
 423 arta  1.1       else {
 424                   y=0.0f;
 425                 }
 426                 pars->ker[fwidth*j+i]=y;
 427                 sum=sum+y;
 428               }
 429             }
 430           // Normalize kernel
 431             for (j=0;j<fwidth;j++) {
 432               for (i=0;i<fwidth;i++) {
 433                 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
 434               }
 435             }
 436           
 437             return 0;
 438           }
 439           
 440           int init_fresize_gaussian2_fft( // Circularly truncated Gaussian. FFT version.
 441             struct fresize_struct *pars,
 442             float sigma, // Shape is exp(-(d/sigma)^2/2) 
 443             float rmax, // Truncation radius. Probably rmax<=hwidth.
 444 arta  1.1   int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
 445             int nsub, // Distance between sampled points
 446             int nxin, // Input size
 447             int nyin // Input size
 448           )
 449           {
 450             int status;
 451           
 452             status=init_fresize_gaussian2(pars,sigma,rmax,hwidth,nsub);
 453             if (status != 0) return status;
 454           
 455             status=make_fft2d(pars,nxin,nyin);
 456             return status;
 457           }
 458           
 459           int init_fresize_airy( // 2D Airy filter
 460             struct fresize_struct *pars,
 461             float cdown, /* cdown is the amount by which the Nyquist is reduced. */
 462             int hwidth, /* Half width of kernel. Full is 2*hwidth+1.
 463                            Set to <0 to make routine set appropriate value */
 464             int iap, /* Apodization method. Always ap=0 for d>Z_nap, where Z_nap os
 465 arta  1.1               the position of the nap'th zero.
 466                         iap=0 means no apodization ap=1
 467                         iap=1 uses parabola ap=1-(d/Z_nap)^2
 468                         iap=2 uses sinc ap=sinc(d/(Z_nap))
 469                         iap=3 uses Airy with first zero at Z_nap
 470                         all other cases give ap=1 (not guaranteed) */
 471             int nap, /* Apodizes to nap'th zero */
 472             int nsub // Distance between sampled points
 473           )
 474           {
 475             const int malign=32;
 476             const float beszeros[20]={0.0000000,3.8317060,7.0155867,10.173468,13.323692,16.470630,19.615859,22.760084,25.903672,29.046829,32.189680,35.332308,38.474766,41.617094,44.759319,47.901461,51.043535,54.185554,57.327525,60.469458}; // first 20 zeros in Bessel function
 477             float cb;
 478             int fwidth;
 479             int i,j;
 480             float sum,xi,xj;
 481             float r2,r,rc,airy,ra;
 482             float nzero;
 483           
 484             cb=(float)(cdown/M_PI); // Amount to scale Bessel function to get zeros right.
 485             nzero=beszeros[nap]*cb; // Position of nap'th zero
 486 arta  1.1   if (hwidth < 0) hwidth=(float)floor(nzero);
 487           
 488             pars->method=fresize_2d;
 489             pars->nsub=nsub;
 490             pars->hwidth=hwidth;
 491             fwidth=2*hwidth+1;
 492             pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
 493           
 494             sum=0.0f;
 495             for (j=0;j<fwidth;j++) {
 496                xj=(float)(j-hwidth);
 497               for (i=0;i<fwidth;i++) {
 498                  xi=(float)(i-hwidth);
 499                 r2=xi*xi+xj*xj;
 500                 r=(float)maxval(sqrt(r2),1e-20);
 501                 rc=r/cb;
 502                 if (r <= nzero ) {
 503                   airy=j1f(rc)/rc;
 504                 }
 505                 else {
 506                   airy=0.0f;
 507 arta  1.1       }
 508                 ra=r/nzero;
 509                 if (iap == 1) airy=airy*(1-ra*ra);
 510                 if (iap == 2) airy=(float)(airy*sinc(ra));
 511                 if (iap == 3) {
 512                   ra=rc*beszeros[1]/beszeros[nap];
 513                   airy=airy*j1f(ra)/ra;
 514                 }
 515                 pars->ker[fwidth*j+i]=airy;
 516                 sum=sum+airy;
 517               }
 518             }
 519           // Normalize kernel
 520             for (j=0;j<fwidth;j++) {
 521               for (i=0;i<fwidth;i++) {
 522                 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
 523               }
 524             }
 525           
 526           
 527             return 0;
 528 arta  1.1 }
 529           
 530           int init_fresize_airy_fft( // 2D Airy filter. FFT version.
 531             struct fresize_struct *pars,
 532             float cdown, /* cdown is the amount by which the Nyquist is reduced. */
 533             int hwidth, /* Half width of kernel. Full is 2*hwidth+1.
 534                            Set to <0 to make routine set appropriate value */
 535             int iap, /* Apodization method. Always ap=0 for d>Z_nap, where Z_nap os
 536                         the position of the nap'th zero.
 537                         iap=0 means no apodization ap=1
 538                         iap=1 uses parabola ap=1-(d/Z_nap)^2
 539                         iap=2 uses sinc ap=sinc(d/(Z_nap))
 540                         iap=3 uses Airy with first zero at Z_nap
 541                         all other cases give ap=1 (not guaranteed) */
 542             int nap, /* Apodizes to nap'th zero */
 543             int nsub, // Distance between sampled points
 544             int nxin, // Input size
 545             int nyin // Input size
 546           )
 547           {
 548             int status;
 549 arta  1.1 
 550             status=init_fresize_airy(pars,cdown,hwidth,iap,nap,nsub);
 551             if (status != 0) return status;
 552           
 553             status=make_fft2d(pars,nxin,nyin);
 554             return status;
 555           }
 556           
 557           int free_fresize(
 558             struct fresize_struct *pars
 559           )
 560           {
 561           
 562             if (pars->method==fresize_1d) {
 563               MKL_free (pars->kerx);
 564               MKL_free (pars->kery);
 565             }
 566           
 567             if (pars->method==fresize_2d) {
 568               MKL_free (pars->ker);
 569             }
 570 arta  1.1 
 571             if (pars->method==fresize_1d_fft) {
 572               MKL_free (pars->kerx);
 573               MKL_free (pars->kery);
 574               fftwf_free(pars->helpin);
 575               fftwf_free(pars->fkernel);
 576               fftwf_free(pars->fkernely);
 577               fftwf_free(pars->helpc);
 578               fftwf_free(pars->helpout);
 579               fftwf_destroy_plan(pars->plan1);
 580               fftwf_destroy_plan(pars->plan2);
 581               fftwf_destroy_plan(pars->plan1y);
 582               fftwf_destroy_plan(pars->plan2y);
 583             }
 584           
 585             if (pars->method==fresize_2d_fft) {
 586               MKL_free (pars->ker);
 587               fftwf_free(pars->helpin);
 588               fftwf_free(pars->fkernel);
 589               fftwf_free(pars->helpc);
 590               fftwf_free(pars->helpout);
 591 arta  1.1     fftwf_destroy_plan(pars->plan1);
 592               fftwf_destroy_plan(pars->plan2);
 593             }
 594           
 595             return 0;
 596           }
 597           
 598           int fsample(
 599             float *image_in,
 600             float *image_out,
 601             int nxin,
 602             int nyin,
 603             int nleadin,
 604             int nxout,
 605             int nyout,
 606             int nleadout,
 607             int nsub,
 608             int xoff,
 609             int yoff,
 610             float fillval
 611           )
 612 arta  1.1 
 613           {
 614             int i,j;
 615             int imin,imax,jmin,jmax;
 616             float *imp;
 617           
 618 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
 619           // for which resulting input point is within image
 620 arta  1.1   if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
 621             if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
 622             if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
 623             if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
 624           
 625 schou 1.5 // Also ensure that indices are within output array
 626           
 627             imin=maxval(0,minval(imin,nxout-1));
 628             imax=maxval(0,minval(imax,nxout-1));
 629             jmin=maxval(0,minval(jmin,nyout-1));
 630             jmax=maxval(0,minval(jmax,nyout-1));
 631           
 632 arta  1.1   imp=image_in+yoff*nleadin+xoff;
 633           
 634           #pragma omp parallel default(none) \
 635           private(i,j) \
 636           shared(image_in,image_out,imp,fillval) \
 637           shared(imin,imax,jmin,jmax) \
 638           shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
 639             { // Needed to define parallel region
 640           
 641           // Fill below valid region
 642           #pragma omp for
 643               for (j=0; j<jmin; j++) {
 644                 for (i=0; i<nxout; i++) {
 645                   image_out[j*nleadout+i]=fillval;
 646                 } // i=
 647               } //j=
 648           
 649           // Valid heights
 650           #pragma omp for
 651               for (j=jmin; j<=jmax; j++) {
 652           // Fill to the left
 653 arta  1.1       for (i=0; i<imin; i++) {
 654                   image_out[j*nleadout+i]=fillval;
 655                 } // i=
 656           // Valid region
 657                 for (i=imin; i<=imax; i++) {
 658           //      image_out[j*nleadout+i]=image_in[j*nsub*nleadin+i*nsub];
 659                   image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
 660                 } // i=
 661           // Fill to the right
 662                 for (i=imax+1; i<nxout; i++) {
 663                   image_out[j*nleadout+i]=fillval;
 664                 } // i=
 665               } //j=
 666           
 667           // Fill above valid region
 668           #pragma omp for
 669               for (j=jmax+1; j<nyout; j++) {
 670                 for (i=0; i<nxout; i++) {
 671                   image_out[j*nleadout+i]=fillval;
 672                 } // i=
 673               } //j=
 674 arta  1.1 
 675             } // End of parallel region
 676           
 677             return 0;
 678           }
 679           
 680           int fbin(
 681             float *image_in,
 682             float *image_out,
 683             int nxin,
 684             int nyin,
 685             int nleadin,
 686             int nxout,
 687             int nyout,
 688             int nleadout,
 689             int nsub,
 690             int xoff,
 691             int yoff,
 692             float fillval
 693           )
 694           
 695 arta  1.1 {
 696             int i,j,i1,j1;
 697             int imin,imax,jmin,jmax;
 698             float *imp,*impi;
 699             float sum;
 700           
 701 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
 702           // for which resulting input point is within image
 703 arta  1.1   if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
 704             if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
 705             if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
 706             if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
 707           
 708 schou 1.5 // Also ensure that indices are within output array
 709           
 710             imin=maxval(0,minval(imin,nxout-1));
 711             imax=maxval(0,minval(imax,nxout-1));
 712             jmin=maxval(0,minval(jmin,nyout-1));
 713             jmax=maxval(0,minval(jmax,nyout-1));
 714           
 715 arta  1.1   imp=image_in+yoff*nleadin+xoff;
 716           
 717           #pragma omp parallel default(none) \
 718           private(i,j,i1,j1,impi,sum) \
 719           shared(image_in,image_out,imp,fillval) \
 720           shared(imin,imax,jmin,jmax) \
 721           shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
 722             { // Needed to define parallel region
 723           
 724           // Fill below valid region
 725           #pragma omp for
 726               for (j=0; j<jmin; j++) {
 727                 for (i=0; i<nxout; i++) {
 728                   image_out[j*nleadout+i]=fillval;
 729                 } // i=
 730               } //j=
 731           
 732           // Valid heights
 733           #pragma omp for
 734               for (j=jmin; j<=jmax; j++) {
 735           // Fill to the left
 736 arta  1.1       for (i=0; i<imin; i++) {
 737                   image_out[j*nleadout+i]=fillval;
 738                 } // i=
 739           // Valid region
 740                 for (i=imin; i<=imax; i++) {
 741                   impi=imp+j*nleadin*nsub+i*nsub;
 742                   sum=0.0f;
 743                   for (j1=0; j1<nsub; j1++) {
 744                     for (i1=0; i1<nsub; i1++) {
 745                       sum=sum+impi[i1];
 746                     }
 747                     impi=impi+nleadin;
 748                   }
 749                   image_out[j*nleadout+i]=sum/(nsub*nsub);
 750                 } // i=
 751           // Fill to the right
 752                 for (i=imax+1; i<nxout; i++) {
 753                   image_out[j*nleadout+i]=fillval;
 754                 } // i=
 755               } //j=
 756           
 757 arta  1.1 // Fill above valid region
 758           #pragma omp for
 759               for (j=jmax+1; j<nyout; j++) {
 760                 for (i=0; i<nxout; i++) {
 761                   image_out[j*nleadout+i]=fillval;
 762                 } // i=
 763               } //j=
 764           
 765             } // End of parallel region
 766           
 767             return 0;
 768           }
 769           
 770           int f1d(
 771             struct fresize_struct *pars,
 772             float *image_in,
 773             float *image_out,
 774             int nxin,
 775             int nyin,
 776             int nleadin,
 777             int nxout,
 778 arta  1.1   int nyout,
 779             int nleadout,
 780             int xoff,
 781             int yoff,
 782             float fillval
 783           )
 784           {
 785             const int malign=32;
 786             /*char transpose[] = "t"; */
 787             char normal[] = "n";
 788             const int ione = 1;
 789             const float fone = 1.0f;
 790             const float fzero = 0.0f;
 791             int i,j,i1 /*,j1*/;
 792             int imin,imax,jmin,jmax;
 793             float /**inp*,*/ *inpi,*inpj;
 794             float sum;
 795             int nsub,hwidth;
 796             float *kerx,*kery,*work;
 797             int xoffl,xoffh,yoffl,yoffh;
 798             int nwork;
 799 arta  1.1   /*double t1,t2,t3;*/
 800             int n1,n2 /*,nchunk*/;
 801           
 802             nsub=pars->nsub;
 803             hwidth=pars->hwidth;
 804             kerx=pars->kerx;
 805             kery=pars->kery;
 806           
 807 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
 808           // for which resulting input point is within image
 809 arta  1.1   xoffl=xoff-hwidth;
 810             xoffh=xoff+hwidth;
 811             if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
 812             if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
 813           
 814             yoffl=yoff-hwidth;
 815             yoffh=yoff+hwidth;
 816             if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
 817             if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
 818           
 819 schou 1.5 // Also ensure that indices are within output array
 820           
 821             imin=maxval(0,minval(imin,nxout-1));
 822             imax=maxval(0,minval(imax,nxout-1));
 823             jmin=maxval(0,minval(jmin,nyout-1));
 824             jmax=maxval(0,minval(jmax,nyout-1));
 825           
 826 arta  1.1 // Get work array big enough to hold convolution in x
 827             nwork=nxout*nyin;
 828             work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
 829           
 830             /*nchunk=64;*/
 831             n1=(imax-imin+1); // Size of matrix for sgemv
 832             n2=2*hwidth+1; // Size of matrix for sgemv
 833             /*t1=dsecnd();*/
 834            
 835           #pragma omp parallel default(none) \
 836           private(i,j,i1,/*j1,*/inpi,inpj,sum)          \
 837           shared(image_in,image_out,fillval) \
 838           shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
 839           shared(normal,/*transpose,*/n1,n2/*,nchunk*/)               \
 840           shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
 841             { // Needed to define parallel region
 842           
 843           // First convolve in x
 844           // Brute force loop takes longer than it ought to, but have not
 845           // found an obvious solution.
 846           #pragma omp for
 847 arta  1.1     for (j=0;j<nyin;j++) {
 848                 inpj=image_in+j*nleadin+xoffl; // Note offset to match kernels.
 849                 for (i=imin; i<=imax; i++) {
 850                   sum=0.0f;
 851                   inpi=inpj+i*nsub;
 852                   for (i1=0; i1<=2*hwidth; i1++) {
 853                     sum=sum+kerx[i1]*inpi[i1];
 854                   }
 855                   work[j*nleadout+i]=sum;
 856                 }
 857               }
 858           
 859           // Then convolve in y
 860           
 861 schou 1.5 // Code using sgemv
 862               if (n1 > 0) {
 863 arta  1.1 #pragma omp for
 864 schou 1.5       for (j=jmin; j<=jmax; j++) {
 865                   inpj=work+(yoffl+j*nsub)*nleadout;
 866                   sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
 867 arta  1.1       }
 868               }
 869           
 870           // Fill below valid region
 871           #pragma omp for
 872               for (j=0; j<jmin; j++) {
 873                 for (i=0; i<nxout; i++) {
 874                   image_out[j*nleadout+i]=fillval;
 875                 } // i=
 876               } //j=
 877           
 878           // Valid heights
 879           #pragma omp for
 880               for (j=jmin; j<=jmax; j++) {
 881           // Fill to the left
 882                 for (i=0; i<imin; i++) {
 883                   image_out[j*nleadout+i]=fillval;
 884                 } // i=
 885           // Fill to the right
 886                 for (i=imax+1; i<nxout; i++) {
 887                   image_out[j*nleadout+i]=fillval;
 888 arta  1.1       } // i=
 889               } //j=
 890           
 891           // Fill above valid region
 892           #pragma omp for
 893               for (j=jmax+1; j<nyout; j++) {
 894                 for (i=0; i<nxout; i++) {
 895                   image_out[j*nleadout+i]=fillval;
 896                 } // i=
 897               } //j=
 898           
 899             } // End of parallel region
 900           
 901             MKL_free(work);
 902             return 0;
 903           }
 904           
 905           int f1d_fft(
 906             struct fresize_struct *pars,
 907             float *image_in,
 908             float *image_out,
 909 arta  1.1   int nxin,
 910             int nyin,
 911             int nleadin,
 912             int nxout,
 913             int nyout,
 914             int nleadout,
 915             int xoff,
 916             int yoff,
 917             float fillval
 918           )
 919           {
 920             const int malign=32;
 921             int i,j,i1 /*,j1*/;
 922             int imin,imax,jmin,jmax;
 923             float /**inp,*inpi,*/ *inpj;
 924             /*float sum;*/
 925             int nsub,hwidth;
 926             float /**kerx,*kery,*/ *work;
 927             int xoffl,xoffh,yoffl,yoffh;
 928             int nwork;
 929             double t1, /*t2,t3,*/ t4;
 930 arta  1.1   int /*n1,n2,*/ nchunk;
 931             fftwf_complex *helpc,*fkernel,*fkernely;
 932             float *helpin,*helpout;
 933             int nxinp,nyinp;
 934             float *iwork,*owork;
 935           
 936             if (pars->nxin != nxin) return 1;
 937             if (pars->nyin != nyin) return 1;
 938           
 939             nsub=pars->nsub;
 940             hwidth=pars->hwidth;
 941             /*kerx=pars->kerx;*/
 942             /*kery=pars->kery;*/
 943             helpc=pars->helpc;
 944             fkernel=pars->fkernel;
 945             fkernely=pars->fkernely;
 946             helpin=pars->helpin;
 947             helpout=pars->helpout;
 948             nxinp=pars->nxinp;
 949             nyinp=pars->nyinp;
 950           
 951 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
 952           // for which resulting input point is within image
 953 arta  1.1   xoffl=xoff-hwidth;
 954             xoffh=xoff+hwidth;
 955             if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
 956             if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
 957           
 958             yoffl=yoff-hwidth;
 959             yoffh=yoff+hwidth;
 960             if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
 961             if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
 962           
 963 schou 1.5 // Also ensure that indices are within output array
 964           
 965             imin=maxval(0,minval(imin,nxout-1));
 966             imax=maxval(0,minval(imax,nxout-1));
 967             jmin=maxval(0,minval(jmin,nyout-1));
 968             jmax=maxval(0,minval(jmax,nyout-1));
 969           
 970 arta  1.1 // Get work array big enough to hold convolution in x
 971             nwork=nxout*nyin;
 972             work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
 973           
 974             nchunk=64;
 975             iwork=(float *)(MKL_malloc(nchunk*nyin*sizeof(float),malign));
 976             owork=(float *)(MKL_malloc(nchunk*nyout*sizeof(float),malign));
 977           
 978           // No OpenMP yet, but have left statements in for good measure.
 979           //#pragma omp parallel default(none) \
 980           //private(i,j,i1,j1,inpi,inpj,sum) \
 981           //shared(image_in,image_out,fillval) \
 982           //shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
 983           //shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
 984             { // Needed to define parallel region
 985           
 986             t1=dsecnd();
 987           // First convolve in x
 988           //#pragma omp for
 989               for (j=0;j<nyin;j++) {
 990                 inpj=image_in+j*nleadin;
 991 arta  1.1       for (i=0;i<nxin;i++) helpin[i]=inpj[i];
 992                 fftwf_execute(pars->plan1);
 993                 for (i=0; i<nxinp; i++) helpc[i]=fkernel[i]*helpc[i];
 994                 fftwf_execute(pars->plan2);
 995                 for (i=imin; i<=imax; i++) work[j*nleadout+i]=helpout[i*nsub+xoff];
 996               }
 997             t1=dsecnd()-t1;
 998           
 999           // Then convolve in y
1000           
1001           // Block both the input output arrays.
1002             t4=dsecnd();
1003           //#pragma omp for
1004             for (i1=imin;i1<=imax;i1=i1+nchunk) {
1005               for (j=0;j<nyin;j++) {
1006                 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1007                   iwork[(i-i1)*nyin+j]=work[j*nleadout+i];
1008                 }
1009               }
1010               for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1011                 for (j=0;j<nyin;j++) helpin[j]=iwork[(i-i1)*nyin+j];
1012 arta  1.1       fftwf_execute(pars->plan1y);
1013                 for (j=0; j<nyinp; j++) helpc[j]=fkernely[j]*helpc[j];
1014                 fftwf_execute(pars->plan2y);
1015                 for (j=jmin; j<=jmax; j++) owork[j+(i-i1)*nyout]=helpout[j*nsub+yoff];
1016               }
1017               for (j=jmin; j<=jmax; j++) {
1018                 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1019                   image_out[j*nleadout+i]=owork[j+(i-i1)*nyout];
1020                 }
1021               }
1022             }
1023             t4=dsecnd()-t4;
1024           
1025           // Fill below valid region
1026           //#pragma omp for
1027               for (j=0; j<jmin; j++) {
1028                 for (i=0; i<nxout; i++) {
1029                   image_out[j*nleadout+i]=fillval;
1030                 } // i=
1031               } //j=
1032           
1033 arta  1.1 // Valid heights
1034           //#pragma omp for
1035               for (j=jmin; j<=jmax; j++) {
1036           // Fill to the left
1037                 for (i=0; i<imin; i++) {
1038                   image_out[j*nleadout+i]=fillval;
1039                 } // i=
1040           // Fill to the right
1041                 for (i=imax+1; i<nxout; i++) {
1042                   image_out[j*nleadout+i]=fillval;
1043                 } // i=
1044               } //j=
1045           
1046           // Fill above valid region
1047           //#pragma omp for
1048               for (j=jmax+1; j<nyout; j++) {
1049                 for (i=0; i<nxout; i++) {
1050                   image_out[j*nleadout+i]=fillval;
1051                 } // i=
1052               } //j=
1053           
1054 arta  1.1   } // End of parallel region
1055           
1056             MKL_free(iwork);
1057             MKL_free(owork);
1058 schou 1.4   MKL_free(work);
1059 arta  1.1 
1060             return 0;
1061           }
1062           
1063           int f2d(
1064             struct fresize_struct *pars,
1065             float *image_in,
1066             float *image_out,
1067             int nxin,
1068             int nyin,
1069             int nleadin,
1070             int nxout,
1071             int nyout,
1072             int nleadout,
1073             int xoff,
1074             int yoff,
1075             float fillval
1076           )
1077           
1078           {
1079             /*const int malign=32;*/
1080 arta  1.1   /*char transpose[] = "t";*/
1081             /*char normal[] = "n";*/
1082             /*const int ione = 1;*/
1083             /*const float fone = 1.0f;*/
1084             /*const float fzero = 0.0f;*/
1085             int i,j,i1,j1;
1086             int imin,imax,jmin,jmax;
1087             float *inpi,*kerp;
1088             float sum;
1089             int nsub,hwidth,fwidth;
1090             float *ker;
1091             int xoffl,xoffh,yoffl,yoffh;
1092             /*double t1,t2,t3;*/
1093           
1094             nsub=pars->nsub;
1095             hwidth=pars->hwidth;
1096             ker=pars->ker;
1097             fwidth=2*hwidth+1;
1098           
1099 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1100           // for which resulting input point is within image
1101 arta  1.1   xoffl=xoff-hwidth;
1102             xoffh=xoff+hwidth;
1103             if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1104             if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1105           
1106             yoffl=yoff-hwidth;
1107             yoffh=yoff+hwidth;
1108             if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1109             if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1110           
1111 schou 1.5 // Also ensure that indices are within output array
1112           
1113             imin=maxval(0,minval(imin,nxout-1));
1114             imax=maxval(0,minval(imax,nxout-1));
1115             jmin=maxval(0,minval(jmin,nyout-1));
1116             jmax=maxval(0,minval(jmax,nyout-1));
1117 arta  1.1 
1118             /*t1=dsecnd();*/
1119            
1120           #pragma omp parallel default(none) \
1121           private(i,j,i1,j1,inpi,kerp,sum) \
1122           shared(image_in,image_out,fillval) \
1123           shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
1124           shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
1125             { // Needed to define parallel region
1126           
1127           #pragma omp for
1128               for (j=jmin; j<=jmax; j++) {
1129                 for (i=imin; i<=imax; i++) {
1130                   inpi=image_in+(j*nsub+yoffl)*nleadin+i*nsub+xoffl; // Note offset to match kernels.
1131                   sum=0.0f;
1132                   for (j1=0; j1<=2*hwidth; j1++) {
1133                     kerp=ker+j1*fwidth;
1134                     for (i1=0; i1<=2*hwidth; i1++) {
1135                       sum=sum+kerp[i1]*inpi[i1];
1136                     }
1137                     inpi=inpi+nleadin;
1138 arta  1.1         }
1139                   image_out[j*nleadout+i]=sum;
1140                 }
1141               }
1142           
1143           // Fill below valid region
1144           #pragma omp for
1145               for (j=0; j<jmin; j++) {
1146                 for (i=0; i<nxout; i++) {
1147                   image_out[j*nleadout+i]=fillval;
1148                 } // i=
1149               } //j=
1150           
1151           // Valid heights
1152           #pragma omp for
1153               for (j=jmin; j<=jmax; j++) {
1154           // Fill to the left
1155                 for (i=0; i<imin; i++) {
1156                   image_out[j*nleadout+i]=fillval;
1157                 } // i=
1158           // Fill to the right
1159 arta  1.1       for (i=imax+1; i<nxout; i++) {
1160                   image_out[j*nleadout+i]=fillval;
1161                 } // i=
1162               } //j=
1163           
1164           // Fill above valid region
1165           #pragma omp for
1166               for (j=jmax+1; j<nyout; j++) {
1167                 for (i=0; i<nxout; i++) {
1168                   image_out[j*nleadout+i]=fillval;
1169                 } // i=
1170               } //j=
1171           
1172             } // End of parallel region
1173           
1174             return 0;
1175           }
1176           
1177           // this version uses matrix vector multiplys. Not much faster.
1178           int f2d_mat(
1179             struct fresize_struct *pars,
1180 arta  1.1   float *image_in,
1181             float *image_out,
1182             int nxin,
1183             int nyin,
1184             int nleadin,
1185             int nxout,
1186             int nyout,
1187             int nleadout,
1188             int xoff,
1189             int yoff,
1190             float fillval
1191           )
1192           
1193           {
1194             const int malign=32;
1195             char transpose[] = "t";
1196             /*char normal[] = "n";*/
1197             const int ione = 1;
1198             const float fone = 1.0f;
1199             /*const float fzero = 0.0f;*/
1200             int i,j,/*i1,*/j1;
1201 arta  1.1   int imin,imax,jmin,jmax;
1202             float *inpi,*kerp;
1203             /*float sum;*/
1204             int nsub,hwidth,fwidth;
1205             float *ker;
1206             int xoffl,xoffh,yoffl,yoffh;
1207             /*double t1,t2,t3;*/
1208             int /*n1,*/n2,nchunk,jc,nc;
1209             float *work;
1210           
1211             nsub=pars->nsub;
1212             hwidth=pars->hwidth;
1213             ker=pars->ker;
1214             fwidth=2*hwidth+1;
1215           
1216 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1217           // for which resulting input point is within image
1218 arta  1.1   xoffl=xoff-hwidth;
1219             xoffh=xoff+hwidth;
1220             if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1221             if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1222           
1223             yoffl=yoff-hwidth;
1224             yoffh=yoff+hwidth;
1225             if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1226             if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1227           
1228 schou 1.5 // Also ensure that indices are within output array
1229           
1230             imin=maxval(0,minval(imin,nxout-1));
1231             imax=maxval(0,minval(imax,nxout-1));
1232             jmin=maxval(0,minval(jmin,nyout-1));
1233             jmax=maxval(0,minval(jmax,nyout-1));
1234 arta  1.1 
1235             /*t1=dsecnd();*/
1236            
1237           n2=nleadin*nsub;
1238           nchunk=64;
1239           
1240           #pragma omp parallel default(none) \
1241           private(i,j,/*i1,*/j1,inpi,kerp,/*sum,*/work,nc)      \
1242           shared(/*normal,*/transpose,/*n1,*/n2,nchunk)       \
1243           shared(image_in,image_out,fillval) \
1244           shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
1245           shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
1246             { // Needed to define parallel region
1247           
1248           work=(float *)(MKL_malloc(nyout*sizeof(float),malign));
1249           #pragma omp for
1250               for (j=jmin; j<=jmax; j++) {
1251                 for (i=imin; i<=imax; i++) {
1252                   image_out[j*nleadout+i]=0.0f;
1253                 }
1254               }
1255 arta  1.1 
1256           #pragma omp for
1257           for (jc=jmin;jc<=jmax;jc=jc+nchunk) {
1258           nc=minval(jc+nchunk-1,jmax)-jc+1;
1259               for (j1=0; j1<=2*hwidth; j1++) {
1260                 kerp=ker+j1*fwidth;
1261                 for (i=imin; i<=imax; i++) {
1262                   inpi=image_in+(jc*nsub+yoffl+j1)*nleadin+i*nsub+xoffl;
1263                   sgemv(transpose,&fwidth,&nc,&fone,inpi,&n2,kerp,&ione,&fone,image_out+jc*nleadout+i,&nleadout);
1264                 }
1265               }
1266           }
1267           
1268           // Fill below valid region
1269           #pragma omp for
1270               for (j=0; j<jmin; j++) {
1271                 for (i=0; i<nxout; i++) {
1272                   image_out[j*nleadout+i]=fillval;
1273                 } // i=
1274               } //j=
1275           
1276 arta  1.1 // Valid heights
1277           #pragma omp for
1278               for (j=jmin; j<=jmax; j++) {
1279           // Fill to the left
1280                 for (i=0; i<imin; i++) {
1281                   image_out[j*nleadout+i]=fillval;
1282                 } // i=
1283           // Fill to the right
1284                 for (i=imax+1; i<nxout; i++) {
1285                   image_out[j*nleadout+i]=fillval;
1286                 } // i=
1287               } //j=
1288           
1289           // Fill above valid region
1290           #pragma omp for
1291               for (j=jmax+1; j<nyout; j++) {
1292                 for (i=0; i<nxout; i++) {
1293                   image_out[j*nleadout+i]=fillval;
1294                 } // i=
1295               } //j=
1296           
1297 arta  1.1     MKL_free(work);
1298             } // End of parallel region
1299           
1300             return 0;
1301           }
1302           
1303           int f2d_fft(
1304             struct fresize_struct *pars,
1305             float *image_in,
1306             float *image_out,
1307             int nxin,
1308             int nyin,
1309             int nleadin,
1310             int nxout,
1311             int nyout,
1312             int nleadout,
1313             int xoff,
1314             int yoff,
1315             float fillval
1316           )
1317           {
1318 arta  1.1   fftwf_complex *helpc,*fkernel;
1319             float *helpin,*helpout;
1320             fftwf_plan plan1,plan2;
1321             int nxinp; // Complex array size
1322             int hwidth/*,fwidth*/;
1323             int i,j,/*i1,*/j1;
1324             /*float c;*/
1325             int imin,imax,jmin,jmax;
1326             int xoffl,xoffh,yoffl,yoffh;
1327             int nsub;
1328           
1329             if (pars->nxin != nxin) return 1;
1330             if (pars->nyin != nyin) return 1;
1331           
1332             nsub=pars->nsub;
1333             hwidth=pars->hwidth;
1334             /*fwidth=2*hwidth+1;*/
1335             nxinp=(nxin/2+1);
1336           
1337             helpin=pars->helpin;
1338             fkernel=pars->fkernel;
1339 arta  1.1   helpc=pars->helpc;
1340             helpout=pars->helpout;
1341             plan1=pars->plan1;
1342             plan2=pars->plan2;
1343           
1344           /* Now copy and transform input image */
1345           
1346             for (j=0;j<nyin;j++) {
1347               for (i=0;i<nxin;i++) {
1348                 helpin[j*nxin+i]=image_in[j*nleadin+i];
1349               }
1350             }
1351           
1352             fftwf_execute(plan1);
1353           
1354           /* Multiply by kernel transform */
1355           
1356             for (j=0;j<nyin;j++) {
1357               for (i=0;i<nxinp;i++) {
1358                 helpc[j*nxinp+i]=fkernel[j*nxinp+i]*helpc[j*nxinp+i];
1359               }
1360 arta  1.1   }
1361           
1362             fftwf_execute(plan2);
1363           
1364           // Copy data to output array
1365           
1366 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1367           // for which resulting input point is within image
1368 arta  1.1   xoffl=xoff-hwidth;
1369             xoffh=xoff+hwidth;
1370             if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1371             if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1372           
1373             yoffl=yoff-hwidth;
1374             yoffh=yoff+hwidth;
1375             if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1376             if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1377           
1378 schou 1.5 // Also ensure that indices are within output array
1379           
1380             imin=maxval(0,minval(imin,nxout-1));
1381             imax=maxval(0,minval(imax,nxout-1));
1382             jmin=maxval(0,minval(jmin,nyout-1));
1383             jmax=maxval(0,minval(jmax,nyout-1));
1384           
1385 arta  1.1   for (j=jmin; j<=jmax; j++) {
1386               j1=(j*nsub+yoff)*nxin+xoff;
1387               for (i=imin; i<=imax; i++) {
1388                 image_out[j*nleadout+i]=helpout[j1+i*nsub];
1389               }
1390             }
1391           
1392           // Fill below valid region
1393             for (j=0; j<jmin; j++) {
1394               for (i=0; i<nxout; i++) {
1395                 image_out[j*nleadout+i]=fillval;
1396               } // i=
1397             } //j=
1398           
1399           // Valid heights
1400             for (j=jmin; j<=jmax; j++) {
1401           // Fill to the left
1402               for (i=0; i<imin; i++) {
1403                 image_out[j*nleadout+i]=fillval;
1404               } // i=
1405           // Fill to the right
1406 arta  1.1     for (i=imax+1; i<nxout; i++) {
1407                 image_out[j*nleadout+i]=fillval;
1408               } // i=
1409             } //j=
1410           
1411           // Fill above valid region
1412             for (j=jmax+1; j<nyout; j++) {
1413               for (i=0; i<nxout; i++) {
1414                 image_out[j*nleadout+i]=fillval;
1415               } // i=
1416             } //j=
1417           
1418             return 0;
1419           }
1420           
1421           int fresize(
1422             struct fresize_struct *pars,
1423             float *image_in,
1424             float *image_out,
1425             int nxin,
1426             int nyin,
1427 arta  1.1   int nleadin,
1428             int nx,
1429             int ny,
1430             int nlead,
1431             int xoff,
1432             int yoff,
1433             float fillval
1434           )
1435           {
1436             int status;
1437           
1438             switch (pars->method) {
1439           
1440             case fresize_sample:
1441               status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
1442               break;
1443           
1444             case fresize_bin:
1445               status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
1446               break;
1447           
1448 arta  1.1   case fresize_1d:
1449               status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1450               break;
1451           
1452             case fresize_1d_fft:
1453               status=f1d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1454               break;
1455           
1456             case fresize_2d:
1457               status=f2d_mat(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1458               break;
1459           
1460             case fresize_2d_fft:
1461               status=f2d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1462               break;
1463           
1464             default:
1465               status=1;
1466               break;
1467             }
1468           
1469 arta  1.1 return status;
1470           
1471           }
1472           
1473 schou 1.3 int init_fresize_user(
1474             struct fresize_struct *pars,
1475             int hwidth, // Half width of kernel. Full is 2*hwidth+1.
1476             int nsub, // Distance between sampled points
1477             float *user_ker // User specified kernel to convolve with.
1478                             // Must be of size (2*hwidth+1) x (2*hwidth+1).
1479                             // Kernel need not be and will not be normalized.
1480           )
1481           {
1482             const int malign=32;
1483             int fwidth;
1484             int i,j;
1485           
1486             pars->method=fresize_2d;
1487             pars->nsub=nsub;
1488             pars->hwidth=hwidth;
1489             fwidth=2*hwidth+1;
1490             pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
1491             memcpy(pars->ker,user_ker,sizeof(float)*fwidth*fwidth);
1492           
1493             return 0;
1494 schou 1.3 }
1495           

Karen Tian
Powered by
ViewCVS 0.9.4