00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <time.h>
00004 #include <string.h>
00005 #include <math.h>
00006 #include <mkl_blas.h>
00007 #include <mkl_service.h>
00008 #include <mkl_lapack.h>
00009 #include <mkl_vml_functions.h>
00010 #include <omp.h>
00011 #include <complex.h>
00012 #include <fftw3.h>
00013 #include "fresize.h"
00014 #define minval(x,y) (((x) < (y)) ? (x) : (y))
00015 #define maxval(x,y) (((x) < (y)) ? (y) : (x))
00016 #define fresize_sample 1
00017 #define fresize_bin 2
00018 #define fresize_1d 3
00019 #define fresize_2d 4
00020 #define fresize_1d_fft 5
00021 #define fresize_2d_fft 6
00022
00023 static double sinc(double x)
00024 {
00025 if (fabs(x) < (1.e-10))
00026 return 1.;
00027 else
00028 return sin(M_PI*x)/(M_PI*x);
00029 }
00030
00031 int make_fft1d(
00032 struct fresize_struct *pars,
00033 int nxin,
00034 int nyin
00035 )
00036 {
00037 fftwf_complex *helpc,*fkernel,*fkernely;
00038 float *helpin,*helpout;
00039 fftwf_plan plan1,plan2,plan1y,plan2y;
00040 int nxinp,nyinp;
00041 int nin,ninp;
00042 int hwidth ;
00043 int i,i1;
00044 float c;
00045
00046 hwidth=pars->hwidth;
00047
00048 nxinp=(nxin/2+1);
00049 nyinp=(nyin/2+1);
00050 nin=maxval(nxin,nyin);
00051 ninp=maxval(nxinp,nyinp);
00052
00053 helpin=(float*) fftwf_malloc(sizeof(float) * nin);
00054 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp);
00055 fkernely=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nyinp);
00056 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * ninp);
00057 helpout=(float*) fftwf_malloc(sizeof(float) * nin);
00058
00059 plan1=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
00060 plan2=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
00061 plan1y=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
00062 plan2y=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
00063
00064 pars->helpin=helpin;
00065 pars->helpout=helpout;
00066 pars->fkernel=fkernel;
00067 pars->fkernely=fkernely;
00068 pars->helpc=helpc;
00069 pars->plan1=plan1;
00070 pars->plan2=plan2;
00071 pars->plan1y=plan1y;
00072 pars->plan2y=plan2y;
00073 pars->nxin=nxin;
00074 pars->nyin=nyin;
00075 pars->nxinp=nxinp;
00076 pars->nyinp=nyinp;
00077 pars->method=fresize_1d_fft;
00078
00079
00080
00081
00082 for (i=0;i<nxin;i++) {
00083 helpin[i]=0;
00084 }
00085
00086
00087 for (i=-hwidth;i<=hwidth;i++) {
00088 i1=(i+nxin) % nxin;
00089 helpin[i1]=pars->kerx[i+hwidth];
00090 }
00091
00092
00093 fftwf_execute(plan1);
00094
00095
00096 c=1.0f/nxin;
00097
00098 for (i=0;i<nxinp;i++) {
00099 fkernel[i]=c*helpc[i];
00100 }
00101
00102
00103
00104
00105 for (i=0;i<nxin;i++) {
00106 helpin[i]=0;
00107 }
00108
00109
00110 for (i=-hwidth;i<=hwidth;i++) {
00111 i1=(i+nyin) % nyin;
00112 helpin[i1]=pars->kery[i+hwidth];
00113 }
00114
00115
00116 fftwf_execute(plan1y);
00117
00118
00119 c=1.0f/nyin;
00120
00121 for (i=0;i<nyinp;i++) {
00122 fkernely[i]=c*helpc[i];
00123 }
00124
00125 return 0;
00126 }
00127
00128 int make_fft2d(
00129 struct fresize_struct *pars,
00130 int nxin,
00131 int nyin
00132 )
00133 {
00134 fftwf_complex *helpc,*fkernel;
00135 float *helpin,*helpout;
00136 fftwf_plan plan1,plan2;
00137 int nxinp;
00138 int hwidth,fwidth;
00139 int i,j,i1,j1;
00140 float c;
00141
00142 hwidth=pars->hwidth;
00143 fwidth=2*hwidth+1;
00144 nxinp=(nxin/2+1);
00145
00146 helpin=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
00147 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
00148 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
00149 helpout=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
00150
00151 plan1=fftwf_plan_dft_r2c_2d(nyin,nxin,helpin,helpc,FFTW_ESTIMATE);
00152 plan2=fftwf_plan_dft_c2r_2d(nyin,nxin,helpc,helpout,FFTW_ESTIMATE);
00153
00154 pars->helpin=helpin;
00155 pars->helpout=helpout;
00156 pars->fkernel=fkernel;
00157 pars->helpc=helpc;
00158 pars->plan1=plan1;
00159 pars->plan2=plan2;
00160 pars->nxin=nxin;
00161 pars->nyin=nyin;
00162 pars->nxinp=nxinp;
00163 pars->method=fresize_2d_fft;
00164
00165
00166
00167
00168 for (j=0;j<nyin;j++) {
00169 for (i=0;i<nxin;i++) {
00170 helpin[j*nxin+i]=0;
00171 }
00172 }
00173
00174
00175
00176
00177 for (j=-hwidth;j<=hwidth;j++) {
00178 j1=(j+nyin) % nyin;
00179 for (i=-hwidth;i<=hwidth;i++) {
00180 i1=(i+nxin) % nxin;
00181 helpin[j1*nxin+i1]=pars->ker[(j+hwidth)*fwidth+(i+hwidth)];
00182 }
00183 }
00184
00185
00186 fftwf_execute(plan1);
00187
00188
00189 c=1.0f/nxin/nyin;
00190
00191 for (j=0;j<nyin;j++) {
00192 for (i=0;i<nxinp;i++) {
00193 fkernel[j*nxinp+i]=c*helpc[j*nxinp+i];
00194 }
00195 }
00196
00197 return 0;
00198 }
00199
00200 int init_fresize_sample(
00201 struct fresize_struct *pars,
00202 int nsub
00203 )
00204 {
00205
00206 pars->method=fresize_sample;
00207 pars->nsub=nsub;
00208
00209 return 0;
00210 }
00211
00212 int init_fresize_bin(
00213 struct fresize_struct *pars,
00214 int nsub
00215 )
00216 {
00217
00218 pars->method=fresize_bin;
00219 pars->nsub=nsub;
00220
00221 return 0;
00222 }
00223
00224 int init_fresize_boxcar(
00225 struct fresize_struct *pars,
00226 int hwidth,
00227 int nsub
00228 )
00229 {
00230 const int malign=32;
00231 int fwidth;
00232 int i ;
00233
00234 pars->method=fresize_1d;
00235 pars->nsub=nsub;
00236 pars->hwidth=hwidth;
00237 fwidth=2*hwidth+1;
00238 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00239 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00240 for (i=0;i<fwidth;i++) {
00241 pars->kerx[i]=1.0f/fwidth;
00242 pars->kery[i]=1.0f/fwidth;
00243 }
00244
00245 return 0;
00246 }
00247
00248 int init_fresize_boxcar_fft(
00249 struct fresize_struct *pars,
00250 int hwidth,
00251 int nsub,
00252 int nxin,
00253 int nyin
00254 )
00255 {
00256 int status;
00257
00258 status=init_fresize_boxcar(pars,hwidth,nsub);
00259 if (status != 0) return status;
00260
00261 status=make_fft1d(pars,nxin,nyin);
00262 return status;
00263 }
00264
00265 int init_fresize_gaussian(
00266 struct fresize_struct *pars,
00267 float sigma,
00268 int hwidth,
00269 int nsub
00270 )
00271 {
00272 const int malign=32;
00273 int fwidth;
00274 int i ;
00275 float sum,x,y;
00276
00277 pars->method=fresize_1d;
00278 pars->nsub=nsub;
00279 pars->hwidth=hwidth;
00280 fwidth=2*hwidth+1;
00281 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00282 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00283 sum=0.0f;
00284 for (i=0;i<fwidth;i++) {
00285 x=(i-hwidth)/sigma;
00286 y=(float)exp(-x*x/2);
00287 sum=sum+y;
00288 }
00289 for (i=0;i<fwidth;i++) {
00290 x=(i-hwidth)/sigma;
00291 y=(float)exp(-x*x/2);
00292 pars->kerx[i]=y/sum;
00293 pars->kery[i]=y/sum;
00294 }
00295
00296 return 0;
00297 }
00298
00299 int init_fresize_gaussian_fft(
00300 struct fresize_struct *pars,
00301 float sigma,
00302 int hwidth,
00303 int nsub,
00304 int nxin,
00305 int nyin
00306 )
00307 {
00308 int status;
00309
00310 status=init_fresize_gaussian(pars,sigma,hwidth,nsub);
00311 if (status != 0) return status;
00312
00313 status=make_fft1d(pars,nxin,nyin);
00314 return status;
00315 }
00316
00317 int init_fresize_sinc(
00318 struct fresize_struct *pars,
00319 float wsinc,
00320
00321
00322 int hwidth,
00323 int iap,
00324
00325
00326
00327 int nap,
00328
00329
00330 int nsub
00331 )
00332 {
00333 const int malign=32;
00334 int fwidth;
00335 int i ;
00336 float sum,x,y;
00337
00338 pars->method=fresize_1d;
00339 pars->nsub=nsub;
00340 pars->hwidth=hwidth;
00341 fwidth=2*hwidth+1;
00342 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00343 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00344 sum=0.0f;
00345 for (i=0;i<fwidth;i++) {
00346 x=(float)(i-hwidth);
00347 if (abs(x) > (nap*wsinc)) {
00348 y=0.0f;
00349 }
00350 else {
00351 y=(float)sinc(x/wsinc);
00352 }
00353 if (iap == 1) y=y*(1-(x/(nap*wsinc))*(x/(nap*wsinc)));
00354 if (iap == 2) y=(float)(y*sinc(x/(nap*wsinc)));
00355 pars->kerx[i]=y;
00356 sum=sum+y;
00357 }
00358 for (i=0;i<fwidth;i++) {
00359 pars->kerx[i]=pars->kerx[i]/sum;
00360 pars->kery[i]=pars->kerx[i];
00361 }
00362
00363 return 0;
00364 }
00365
00366 int init_fresize_sinc_fft(
00367 struct fresize_struct *pars,
00368 float wsinc,
00369
00370
00371 int hwidth,
00372 int iap,
00373
00374
00375
00376
00377 int nap,
00378
00379
00380 int nsub,
00381 int nxin,
00382 int nyin
00383 )
00384 {
00385 int status;
00386
00387 status=init_fresize_sinc(pars,wsinc,hwidth,iap,nap,nsub);
00388 if (status != 0) return status;
00389
00390 status=make_fft1d(pars,nxin,nyin);
00391 return status;
00392 }
00393
00394 int init_fresize_gaussian2(
00395 struct fresize_struct *pars,
00396 float sigma,
00397 float rmax,
00398 int hwidth,
00399 int nsub
00400 )
00401 {
00402 const int malign=32;
00403 int fwidth;
00404 int i,j;
00405 float sum,xi,xj,y,r2,rmax2;
00406
00407 pars->method=fresize_2d;
00408 pars->nsub=nsub;
00409 pars->hwidth=hwidth;
00410 fwidth=2*hwidth+1;
00411 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
00412 rmax2=rmax*rmax/sigma/sigma;
00413
00414 sum=0.0f;
00415 for (j=0;j<fwidth;j++) {
00416 xj=(j-hwidth)/sigma;
00417 for (i=0;i<fwidth;i++) {
00418 xi=(i-hwidth)/sigma;
00419 r2=xi*xi+xj*xj;
00420 if (r2 <= rmax2 ) {
00421 y=(float)exp(-r2/2);
00422 }
00423 else {
00424 y=0.0f;
00425 }
00426 pars->ker[fwidth*j+i]=y;
00427 sum=sum+y;
00428 }
00429 }
00430
00431 for (j=0;j<fwidth;j++) {
00432 for (i=0;i<fwidth;i++) {
00433 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
00434 }
00435 }
00436
00437 return 0;
00438 }
00439
00440 int init_fresize_gaussian2_fft(
00441 struct fresize_struct *pars,
00442 float sigma,
00443 float rmax,
00444 int hwidth,
00445 int nsub,
00446 int nxin,
00447 int nyin
00448 )
00449 {
00450 int status;
00451
00452 status=init_fresize_gaussian2(pars,sigma,rmax,hwidth,nsub);
00453 if (status != 0) return status;
00454
00455 status=make_fft2d(pars,nxin,nyin);
00456 return status;
00457 }
00458
00459 int init_fresize_airy(
00460 struct fresize_struct *pars,
00461 float cdown,
00462 int hwidth,
00463
00464 int iap,
00465
00466
00467
00468
00469
00470
00471 int nap,
00472 int nsub
00473 )
00474 {
00475 const int malign=32;
00476 const float beszeros[20]={0.0000000,3.8317060,7.0155867,10.173468,13.323692,16.470630,19.615859,22.760084,25.903672,29.046829,32.189680,35.332308,38.474766,41.617094,44.759319,47.901461,51.043535,54.185554,57.327525,60.469458};
00477 float cb;
00478 int fwidth;
00479 int i,j;
00480 float sum,xi,xj;
00481 float r2,r,rc,airy,ra;
00482 float nzero;
00483
00484 cb=(float)(cdown/M_PI);
00485 nzero=beszeros[nap]*cb;
00486 if (hwidth < 0) hwidth=(float)floor(nzero);
00487
00488 pars->method=fresize_2d;
00489 pars->nsub=nsub;
00490 pars->hwidth=hwidth;
00491 fwidth=2*hwidth+1;
00492 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
00493
00494 sum=0.0f;
00495 for (j=0;j<fwidth;j++) {
00496 xj=(float)(j-hwidth);
00497 for (i=0;i<fwidth;i++) {
00498 xi=(float)(i-hwidth);
00499 r2=xi*xi+xj*xj;
00500 r=(float)maxval(sqrt(r2),1e-20);
00501 rc=r/cb;
00502 if (r <= nzero ) {
00503 airy=j1f(rc)/rc;
00504 }
00505 else {
00506 airy=0.0f;
00507 }
00508 ra=r/nzero;
00509 if (iap == 1) airy=airy*(1-ra*ra);
00510 if (iap == 2) airy=(float)(airy*sinc(ra));
00511 if (iap == 3) {
00512 ra=rc*beszeros[1]/beszeros[nap];
00513 airy=airy*j1f(ra)/ra;
00514 }
00515 pars->ker[fwidth*j+i]=airy;
00516 sum=sum+airy;
00517 }
00518 }
00519
00520 for (j=0;j<fwidth;j++) {
00521 for (i=0;i<fwidth;i++) {
00522 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
00523 }
00524 }
00525
00526
00527 return 0;
00528 }
00529
00530 int init_fresize_airy_fft(
00531 struct fresize_struct *pars,
00532 float cdown,
00533 int hwidth,
00534
00535 int iap,
00536
00537
00538
00539
00540
00541
00542 int nap,
00543 int nsub,
00544 int nxin,
00545 int nyin
00546 )
00547 {
00548 int status;
00549
00550 status=init_fresize_airy(pars,cdown,hwidth,iap,nap,nsub);
00551 if (status != 0) return status;
00552
00553 status=make_fft2d(pars,nxin,nyin);
00554 return status;
00555 }
00556
00557 int free_fresize(
00558 struct fresize_struct *pars
00559 )
00560 {
00561
00562 if (pars->method==fresize_1d) {
00563 MKL_free (pars->kerx);
00564 MKL_free (pars->kery);
00565 }
00566
00567 if (pars->method==fresize_2d) {
00568 MKL_free (pars->ker);
00569 }
00570
00571 if (pars->method==fresize_1d_fft) {
00572 MKL_free (pars->kerx);
00573 MKL_free (pars->kery);
00574 fftwf_free(pars->helpin);
00575 fftwf_free(pars->fkernel);
00576 fftwf_free(pars->fkernely);
00577 fftwf_free(pars->helpc);
00578 fftwf_free(pars->helpout);
00579 fftwf_destroy_plan(pars->plan1);
00580 fftwf_destroy_plan(pars->plan2);
00581 fftwf_destroy_plan(pars->plan1y);
00582 fftwf_destroy_plan(pars->plan2y);
00583 }
00584
00585 if (pars->method==fresize_2d_fft) {
00586 MKL_free (pars->ker);
00587 fftwf_free(pars->helpin);
00588 fftwf_free(pars->fkernel);
00589 fftwf_free(pars->helpc);
00590 fftwf_free(pars->helpout);
00591 fftwf_destroy_plan(pars->plan1);
00592 fftwf_destroy_plan(pars->plan2);
00593 }
00594
00595 return 0;
00596 }
00597
00598 int fsample(
00599 float *image_in,
00600 float *image_out,
00601 int nxin,
00602 int nyin,
00603 int nleadin,
00604 int nxout,
00605 int nyout,
00606 int nleadout,
00607 int nsub,
00608 int xoff,
00609 int yoff,
00610 float fillval
00611 )
00612
00613 {
00614 int i,j;
00615 int imin,imax,jmin,jmax;
00616 float *imp;
00617
00618
00619
00620 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00621 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
00622 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00623 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
00624
00625
00626
00627 imin=maxval(0,minval(imin,nxout-1));
00628 imax=maxval(0,minval(imax,nxout-1));
00629 jmin=maxval(0,minval(jmin,nyout-1));
00630 jmax=maxval(0,minval(jmax,nyout-1));
00631
00632 imp=image_in+yoff*nleadin+xoff;
00633
00634 #pragma omp parallel default(none) \
00635 private(i,j) \
00636 shared(image_in,image_out,imp,fillval) \
00637 shared(imin,imax,jmin,jmax) \
00638 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00639 {
00640
00641
00642 #pragma omp for
00643 for (j=0; j<jmin; j++) {
00644 for (i=0; i<nxout; i++) {
00645 image_out[j*nleadout+i]=fillval;
00646 }
00647 }
00648
00649
00650 #pragma omp for
00651 for (j=jmin; j<=jmax; j++) {
00652
00653 for (i=0; i<imin; i++) {
00654 image_out[j*nleadout+i]=fillval;
00655 }
00656
00657 for (i=imin; i<=imax; i++) {
00658
00659 image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
00660 }
00661
00662 for (i=imax+1; i<nxout; i++) {
00663 image_out[j*nleadout+i]=fillval;
00664 }
00665 }
00666
00667
00668 #pragma omp for
00669 for (j=jmax+1; j<nyout; j++) {
00670 for (i=0; i<nxout; i++) {
00671 image_out[j*nleadout+i]=fillval;
00672 }
00673 }
00674
00675 }
00676
00677 return 0;
00678 }
00679
00680 int fbin(
00681 float *image_in,
00682 float *image_out,
00683 int nxin,
00684 int nyin,
00685 int nleadin,
00686 int nxout,
00687 int nyout,
00688 int nleadout,
00689 int nsub,
00690 int xoff,
00691 int yoff,
00692 float fillval
00693 )
00694
00695 {
00696 int i,j,i1,j1;
00697 int imin,imax,jmin,jmax;
00698 float *imp,*impi;
00699 float sum;
00700
00701
00702
00703 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00704 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
00705 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00706 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
00707
00708
00709
00710 imin=maxval(0,minval(imin,nxout-1));
00711 imax=maxval(0,minval(imax,nxout-1));
00712 jmin=maxval(0,minval(jmin,nyout-1));
00713 jmax=maxval(0,minval(jmax,nyout-1));
00714
00715 imp=image_in+yoff*nleadin+xoff;
00716
00717 #pragma omp parallel default(none) \
00718 private(i,j,i1,j1,impi,sum) \
00719 shared(image_in,image_out,imp,fillval) \
00720 shared(imin,imax,jmin,jmax) \
00721 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00722 {
00723
00724
00725 #pragma omp for
00726 for (j=0; j<jmin; j++) {
00727 for (i=0; i<nxout; i++) {
00728 image_out[j*nleadout+i]=fillval;
00729 }
00730 }
00731
00732
00733 #pragma omp for
00734 for (j=jmin; j<=jmax; j++) {
00735
00736 for (i=0; i<imin; i++) {
00737 image_out[j*nleadout+i]=fillval;
00738 }
00739
00740 for (i=imin; i<=imax; i++) {
00741 impi=imp+j*nleadin*nsub+i*nsub;
00742 sum=0.0f;
00743 for (j1=0; j1<nsub; j1++) {
00744 for (i1=0; i1<nsub; i1++) {
00745 sum=sum+impi[i1];
00746 }
00747 impi=impi+nleadin;
00748 }
00749 image_out[j*nleadout+i]=sum/(nsub*nsub);
00750 }
00751
00752 for (i=imax+1; i<nxout; i++) {
00753 image_out[j*nleadout+i]=fillval;
00754 }
00755 }
00756
00757
00758 #pragma omp for
00759 for (j=jmax+1; j<nyout; j++) {
00760 for (i=0; i<nxout; i++) {
00761 image_out[j*nleadout+i]=fillval;
00762 }
00763 }
00764
00765 }
00766
00767 return 0;
00768 }
00769
00770 int f1d(
00771 struct fresize_struct *pars,
00772 float *image_in,
00773 float *image_out,
00774 int nxin,
00775 int nyin,
00776 int nleadin,
00777 int nxout,
00778 int nyout,
00779 int nleadout,
00780 int xoff,
00781 int yoff,
00782 float fillval
00783 )
00784 {
00785 const int malign=32;
00786
00787 char normal[] = "n";
00788 const int ione = 1;
00789 const float fone = 1.0f;
00790 const float fzero = 0.0f;
00791 int i,j,i1 ;
00792 int imin,imax,jmin,jmax;
00793 float *inpi,*inpj;
00794 float sum;
00795 int nsub,hwidth;
00796 float *kerx,*kery,*work;
00797 int xoffl,xoffh,yoffl,yoffh;
00798 int nwork;
00799
00800 int n1,n2 ;
00801
00802 nsub=pars->nsub;
00803 hwidth=pars->hwidth;
00804 kerx=pars->kerx;
00805 kery=pars->kery;
00806
00807
00808
00809 xoffl=xoff-hwidth;
00810 xoffh=xoff+hwidth;
00811 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
00812 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
00813
00814 yoffl=yoff-hwidth;
00815 yoffh=yoff+hwidth;
00816 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
00817 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
00818
00819
00820
00821 imin=maxval(0,minval(imin,nxout-1));
00822 imax=maxval(0,minval(imax,nxout-1));
00823 jmin=maxval(0,minval(jmin,nyout-1));
00824 jmax=maxval(0,minval(jmax,nyout-1));
00825
00826
00827 nwork=nxout*nyin;
00828 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
00829
00830
00831 n1=(imax-imin+1);
00832 n2=2*hwidth+1;
00833
00834
00835 #pragma omp parallel default(none) \
00836 private(i,j,i1,inpi,inpj,sum) \
00837 shared(image_in,image_out,fillval) \
00838 shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
00839 shared(normal,n1,n2) \
00840 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00841 {
00842
00843
00844
00845
00846 #pragma omp for
00847 for (j=0;j<nyin;j++) {
00848 inpj=image_in+j*nleadin+xoffl;
00849 for (i=imin; i<=imax; i++) {
00850 sum=0.0f;
00851 inpi=inpj+i*nsub;
00852 for (i1=0; i1<=2*hwidth; i1++) {
00853 sum=sum+kerx[i1]*inpi[i1];
00854 }
00855 work[j*nleadout+i]=sum;
00856 }
00857 }
00858
00859
00860
00861
00862 if (n1 > 0) {
00863 #pragma omp for
00864 for (j=jmin; j<=jmax; j++) {
00865 inpj=work+(yoffl+j*nsub)*nleadout;
00866 sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
00867 }
00868 }
00869
00870
00871 #pragma omp for
00872 for (j=0; j<jmin; j++) {
00873 for (i=0; i<nxout; i++) {
00874 image_out[j*nleadout+i]=fillval;
00875 }
00876 }
00877
00878
00879 #pragma omp for
00880 for (j=jmin; j<=jmax; j++) {
00881
00882 for (i=0; i<imin; i++) {
00883 image_out[j*nleadout+i]=fillval;
00884 }
00885
00886 for (i=imax+1; i<nxout; i++) {
00887 image_out[j*nleadout+i]=fillval;
00888 }
00889 }
00890
00891
00892 #pragma omp for
00893 for (j=jmax+1; j<nyout; j++) {
00894 for (i=0; i<nxout; i++) {
00895 image_out[j*nleadout+i]=fillval;
00896 }
00897 }
00898
00899 }
00900
00901 MKL_free(work);
00902 return 0;
00903 }
00904
00905 int f1d_fft(
00906 struct fresize_struct *pars,
00907 float *image_in,
00908 float *image_out,
00909 int nxin,
00910 int nyin,
00911 int nleadin,
00912 int nxout,
00913 int nyout,
00914 int nleadout,
00915 int xoff,
00916 int yoff,
00917 float fillval
00918 )
00919 {
00920 const int malign=32;
00921 int i,j,i1 ;
00922 int imin,imax,jmin,jmax;
00923 float *inpj;
00924
00925 int nsub,hwidth;
00926 float *work;
00927 int xoffl,xoffh,yoffl,yoffh;
00928 int nwork;
00929 double t1, t4;
00930 int nchunk;
00931 fftwf_complex *helpc,*fkernel,*fkernely;
00932 float *helpin,*helpout;
00933 int nxinp,nyinp;
00934 float *iwork,*owork;
00935
00936 if (pars->nxin != nxin) return 1;
00937 if (pars->nyin != nyin) return 1;
00938
00939 nsub=pars->nsub;
00940 hwidth=pars->hwidth;
00941
00942
00943 helpc=pars->helpc;
00944 fkernel=pars->fkernel;
00945 fkernely=pars->fkernely;
00946 helpin=pars->helpin;
00947 helpout=pars->helpout;
00948 nxinp=pars->nxinp;
00949 nyinp=pars->nyinp;
00950
00951
00952
00953 xoffl=xoff-hwidth;
00954 xoffh=xoff+hwidth;
00955 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
00956 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
00957
00958 yoffl=yoff-hwidth;
00959 yoffh=yoff+hwidth;
00960 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
00961 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
00962
00963
00964
00965 imin=maxval(0,minval(imin,nxout-1));
00966 imax=maxval(0,minval(imax,nxout-1));
00967 jmin=maxval(0,minval(jmin,nyout-1));
00968 jmax=maxval(0,minval(jmax,nyout-1));
00969
00970
00971 nwork=nxout*nyin;
00972 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
00973
00974 nchunk=64;
00975 iwork=(float *)(MKL_malloc(nchunk*nyin*sizeof(float),malign));
00976 owork=(float *)(MKL_malloc(nchunk*nyout*sizeof(float),malign));
00977
00978
00979
00980
00981
00982
00983
00984 {
00985
00986 t1=dsecnd();
00987
00988
00989 for (j=0;j<nyin;j++) {
00990 inpj=image_in+j*nleadin;
00991 for (i=0;i<nxin;i++) helpin[i]=inpj[i];
00992 fftwf_execute(pars->plan1);
00993 for (i=0; i<nxinp; i++) helpc[i]=fkernel[i]*helpc[i];
00994 fftwf_execute(pars->plan2);
00995 for (i=imin; i<=imax; i++) work[j*nleadout+i]=helpout[i*nsub+xoff];
00996 }
00997 t1=dsecnd()-t1;
00998
00999
01000
01001
01002 t4=dsecnd();
01003
01004 for (i1=imin;i1<=imax;i1=i1+nchunk) {
01005 for (j=0;j<nyin;j++) {
01006 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01007 iwork[(i-i1)*nyin+j]=work[j*nleadout+i];
01008 }
01009 }
01010 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01011 for (j=0;j<nyin;j++) helpin[j]=iwork[(i-i1)*nyin+j];
01012 fftwf_execute(pars->plan1y);
01013 for (j=0; j<nyinp; j++) helpc[j]=fkernely[j]*helpc[j];
01014 fftwf_execute(pars->plan2y);
01015 for (j=jmin; j<=jmax; j++) owork[j+(i-i1)*nyout]=helpout[j*nsub+yoff];
01016 }
01017 for (j=jmin; j<=jmax; j++) {
01018 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01019 image_out[j*nleadout+i]=owork[j+(i-i1)*nyout];
01020 }
01021 }
01022 }
01023 t4=dsecnd()-t4;
01024
01025
01026
01027 for (j=0; j<jmin; j++) {
01028 for (i=0; i<nxout; i++) {
01029 image_out[j*nleadout+i]=fillval;
01030 }
01031 }
01032
01033
01034
01035 for (j=jmin; j<=jmax; j++) {
01036
01037 for (i=0; i<imin; i++) {
01038 image_out[j*nleadout+i]=fillval;
01039 }
01040
01041 for (i=imax+1; i<nxout; i++) {
01042 image_out[j*nleadout+i]=fillval;
01043 }
01044 }
01045
01046
01047
01048 for (j=jmax+1; j<nyout; j++) {
01049 for (i=0; i<nxout; i++) {
01050 image_out[j*nleadout+i]=fillval;
01051 }
01052 }
01053
01054 }
01055
01056 MKL_free(iwork);
01057 MKL_free(owork);
01058 MKL_free(work);
01059
01060 return 0;
01061 }
01062
01063 int f2d(
01064 struct fresize_struct *pars,
01065 float *image_in,
01066 float *image_out,
01067 int nxin,
01068 int nyin,
01069 int nleadin,
01070 int nxout,
01071 int nyout,
01072 int nleadout,
01073 int xoff,
01074 int yoff,
01075 float fillval
01076 )
01077
01078 {
01079
01080
01081
01082
01083
01084
01085 int i,j,i1,j1;
01086 int imin,imax,jmin,jmax;
01087 float *inpi,*kerp;
01088 float sum;
01089 int nsub,hwidth,fwidth;
01090 float *ker;
01091 int xoffl,xoffh,yoffl,yoffh;
01092
01093
01094 nsub=pars->nsub;
01095 hwidth=pars->hwidth;
01096 ker=pars->ker;
01097 fwidth=2*hwidth+1;
01098
01099
01100
01101 xoffl=xoff-hwidth;
01102 xoffh=xoff+hwidth;
01103 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01104 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01105
01106 yoffl=yoff-hwidth;
01107 yoffh=yoff+hwidth;
01108 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01109 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01110
01111
01112
01113 imin=maxval(0,minval(imin,nxout-1));
01114 imax=maxval(0,minval(imax,nxout-1));
01115 jmin=maxval(0,minval(jmin,nyout-1));
01116 jmax=maxval(0,minval(jmax,nyout-1));
01117
01118
01119
01120 #pragma omp parallel default(none) \
01121 private(i,j,i1,j1,inpi,kerp,sum) \
01122 shared(image_in,image_out,fillval) \
01123 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
01124 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
01125 {
01126
01127 #pragma omp for
01128 for (j=jmin; j<=jmax; j++) {
01129 for (i=imin; i<=imax; i++) {
01130 inpi=image_in+(j*nsub+yoffl)*nleadin+i*nsub+xoffl;
01131 sum=0.0f;
01132 for (j1=0; j1<=2*hwidth; j1++) {
01133 kerp=ker+j1*fwidth;
01134 for (i1=0; i1<=2*hwidth; i1++) {
01135 sum=sum+kerp[i1]*inpi[i1];
01136 }
01137 inpi=inpi+nleadin;
01138 }
01139 image_out[j*nleadout+i]=sum;
01140 }
01141 }
01142
01143
01144 #pragma omp for
01145 for (j=0; j<jmin; j++) {
01146 for (i=0; i<nxout; i++) {
01147 image_out[j*nleadout+i]=fillval;
01148 }
01149 }
01150
01151
01152 #pragma omp for
01153 for (j=jmin; j<=jmax; j++) {
01154
01155 for (i=0; i<imin; i++) {
01156 image_out[j*nleadout+i]=fillval;
01157 }
01158
01159 for (i=imax+1; i<nxout; i++) {
01160 image_out[j*nleadout+i]=fillval;
01161 }
01162 }
01163
01164
01165 #pragma omp for
01166 for (j=jmax+1; j<nyout; j++) {
01167 for (i=0; i<nxout; i++) {
01168 image_out[j*nleadout+i]=fillval;
01169 }
01170 }
01171
01172 }
01173
01174 return 0;
01175 }
01176
01177
01178 int f2d_mat(
01179 struct fresize_struct *pars,
01180 float *image_in,
01181 float *image_out,
01182 int nxin,
01183 int nyin,
01184 int nleadin,
01185 int nxout,
01186 int nyout,
01187 int nleadout,
01188 int xoff,
01189 int yoff,
01190 float fillval
01191 )
01192
01193 {
01194 const int malign=32;
01195 char transpose[] = "t";
01196
01197 const int ione = 1;
01198 const float fone = 1.0f;
01199
01200 int i,j,j1;
01201 int imin,imax,jmin,jmax;
01202 float *inpi,*kerp;
01203
01204 int nsub,hwidth,fwidth;
01205 float *ker;
01206 int xoffl,xoffh,yoffl,yoffh;
01207
01208 int n2,nchunk,jc,nc;
01209 float *work;
01210
01211 nsub=pars->nsub;
01212 hwidth=pars->hwidth;
01213 ker=pars->ker;
01214 fwidth=2*hwidth+1;
01215
01216
01217
01218 xoffl=xoff-hwidth;
01219 xoffh=xoff+hwidth;
01220 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01221 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01222
01223 yoffl=yoff-hwidth;
01224 yoffh=yoff+hwidth;
01225 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01226 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01227
01228
01229
01230 imin=maxval(0,minval(imin,nxout-1));
01231 imax=maxval(0,minval(imax,nxout-1));
01232 jmin=maxval(0,minval(jmin,nyout-1));
01233 jmax=maxval(0,minval(jmax,nyout-1));
01234
01235
01236
01237 n2=nleadin*nsub;
01238 nchunk=64;
01239
01240 #pragma omp parallel default(none) \
01241 private(i,j,j1,inpi,kerp,work,nc) \
01242 shared(transpose,n2,nchunk) \
01243 shared(image_in,image_out,fillval) \
01244 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
01245 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
01246 {
01247
01248 work=(float *)(MKL_malloc(nyout*sizeof(float),malign));
01249 #pragma omp for
01250 for (j=jmin; j<=jmax; j++) {
01251 for (i=imin; i<=imax; i++) {
01252 image_out[j*nleadout+i]=0.0f;
01253 }
01254 }
01255
01256 #pragma omp for
01257 for (jc=jmin;jc<=jmax;jc=jc+nchunk) {
01258 nc=minval(jc+nchunk-1,jmax)-jc+1;
01259 for (j1=0; j1<=2*hwidth; j1++) {
01260 kerp=ker+j1*fwidth;
01261 for (i=imin; i<=imax; i++) {
01262 inpi=image_in+(jc*nsub+yoffl+j1)*nleadin+i*nsub+xoffl;
01263 sgemv(transpose,&fwidth,&nc,&fone,inpi,&n2,kerp,&ione,&fone,image_out+jc*nleadout+i,&nleadout);
01264 }
01265 }
01266 }
01267
01268
01269 #pragma omp for
01270 for (j=0; j<jmin; j++) {
01271 for (i=0; i<nxout; i++) {
01272 image_out[j*nleadout+i]=fillval;
01273 }
01274 }
01275
01276
01277 #pragma omp for
01278 for (j=jmin; j<=jmax; j++) {
01279
01280 for (i=0; i<imin; i++) {
01281 image_out[j*nleadout+i]=fillval;
01282 }
01283
01284 for (i=imax+1; i<nxout; i++) {
01285 image_out[j*nleadout+i]=fillval;
01286 }
01287 }
01288
01289
01290 #pragma omp for
01291 for (j=jmax+1; j<nyout; j++) {
01292 for (i=0; i<nxout; i++) {
01293 image_out[j*nleadout+i]=fillval;
01294 }
01295 }
01296
01297 MKL_free(work);
01298 }
01299
01300 return 0;
01301 }
01302
01303 int f2d_fft(
01304 struct fresize_struct *pars,
01305 float *image_in,
01306 float *image_out,
01307 int nxin,
01308 int nyin,
01309 int nleadin,
01310 int nxout,
01311 int nyout,
01312 int nleadout,
01313 int xoff,
01314 int yoff,
01315 float fillval
01316 )
01317 {
01318 fftwf_complex *helpc,*fkernel;
01319 float *helpin,*helpout;
01320 fftwf_plan plan1,plan2;
01321 int nxinp;
01322 int hwidth;
01323 int i,j,j1;
01324
01325 int imin,imax,jmin,jmax;
01326 int xoffl,xoffh,yoffl,yoffh;
01327 int nsub;
01328
01329 if (pars->nxin != nxin) return 1;
01330 if (pars->nyin != nyin) return 1;
01331
01332 nsub=pars->nsub;
01333 hwidth=pars->hwidth;
01334
01335 nxinp=(nxin/2+1);
01336
01337 helpin=pars->helpin;
01338 fkernel=pars->fkernel;
01339 helpc=pars->helpc;
01340 helpout=pars->helpout;
01341 plan1=pars->plan1;
01342 plan2=pars->plan2;
01343
01344
01345
01346 for (j=0;j<nyin;j++) {
01347 for (i=0;i<nxin;i++) {
01348 helpin[j*nxin+i]=image_in[j*nleadin+i];
01349 }
01350 }
01351
01352 fftwf_execute(plan1);
01353
01354
01355
01356 for (j=0;j<nyin;j++) {
01357 for (i=0;i<nxinp;i++) {
01358 helpc[j*nxinp+i]=fkernel[j*nxinp+i]*helpc[j*nxinp+i];
01359 }
01360 }
01361
01362 fftwf_execute(plan2);
01363
01364
01365
01366
01367
01368 xoffl=xoff-hwidth;
01369 xoffh=xoff+hwidth;
01370 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01371 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01372
01373 yoffl=yoff-hwidth;
01374 yoffh=yoff+hwidth;
01375 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01376 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01377
01378
01379
01380 imin=maxval(0,minval(imin,nxout-1));
01381 imax=maxval(0,minval(imax,nxout-1));
01382 jmin=maxval(0,minval(jmin,nyout-1));
01383 jmax=maxval(0,minval(jmax,nyout-1));
01384
01385 for (j=jmin; j<=jmax; j++) {
01386 j1=(j*nsub+yoff)*nxin+xoff;
01387 for (i=imin; i<=imax; i++) {
01388 image_out[j*nleadout+i]=helpout[j1+i*nsub];
01389 }
01390 }
01391
01392
01393 for (j=0; j<jmin; j++) {
01394 for (i=0; i<nxout; i++) {
01395 image_out[j*nleadout+i]=fillval;
01396 }
01397 }
01398
01399
01400 for (j=jmin; j<=jmax; j++) {
01401
01402 for (i=0; i<imin; i++) {
01403 image_out[j*nleadout+i]=fillval;
01404 }
01405
01406 for (i=imax+1; i<nxout; i++) {
01407 image_out[j*nleadout+i]=fillval;
01408 }
01409 }
01410
01411
01412 for (j=jmax+1; j<nyout; j++) {
01413 for (i=0; i<nxout; i++) {
01414 image_out[j*nleadout+i]=fillval;
01415 }
01416 }
01417
01418 return 0;
01419 }
01420
01421 int fresize(
01422 struct fresize_struct *pars,
01423 float *image_in,
01424 float *image_out,
01425 int nxin,
01426 int nyin,
01427 int nleadin,
01428 int nx,
01429 int ny,
01430 int nlead,
01431 int xoff,
01432 int yoff,
01433 float fillval
01434 )
01435 {
01436 int status;
01437
01438 switch (pars->method) {
01439
01440 case fresize_sample:
01441 status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
01442 break;
01443
01444 case fresize_bin:
01445 status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
01446 break;
01447
01448 case fresize_1d:
01449 status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01450 break;
01451
01452 case fresize_1d_fft:
01453 status=f1d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01454 break;
01455
01456 case fresize_2d:
01457 status=f2d_mat(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01458 break;
01459
01460 case fresize_2d_fft:
01461 status=f2d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01462 break;
01463
01464 default:
01465 status=1;
01466 break;
01467 }
01468
01469 return status;
01470
01471 }
01472
01473 int init_fresize_user(
01474 struct fresize_struct *pars,
01475 int hwidth,
01476 int nsub,
01477 float *user_ker
01478
01479
01480 )
01481 {
01482 const int malign=32;
01483 int fwidth;
01484 int i,j;
01485
01486 pars->method=fresize_2d;
01487 pars->nsub=nsub;
01488 pars->hwidth=hwidth;
01489 fwidth=2*hwidth+1;
01490 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
01491 memcpy(pars->ker,user_ker,sizeof(float)*fwidth*fwidth);
01492
01493 return 0;
01494 }
01495