00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <time.h>
00004 #include <string.h>
00005 #include <math.h>
00006 #include <mkl_blas.h>
00007 #include <mkl_service.h>
00008 #include <mkl_lapack.h>
00009 #include <mkl_vml_functions.h>
00010 #include <omp.h>
00011 #include <complex.h>
00012 #include <fftw3.h>
00013 #include "fresize.h"
00014 #define minval(x,y) (((x) < (y)) ? (x) : (y))
00015 #define maxval(x,y) (((x) < (y)) ? (y) : (x))
00016 #define fresize_sample 1
00017 #define fresize_bin 2
00018 #define fresize_1d 3
00019 #define fresize_2d 4
00020 #define fresize_1d_fft 5
00021 #define fresize_2d_fft 6
00022
00023 static double sinc(double x)
00024 {
00025 if (fabs(x) < (1.e-10))
00026 return 1.;
00027 else
00028 return sin(M_PI*x)/(M_PI*x);
00029 }
00030
00031 int make_fft1d(
00032 struct fresize_struct *pars,
00033 int nxin,
00034 int nyin
00035 )
00036 {
00037 fftwf_complex *helpc,*fkernel,*fkernely;
00038 float *helpin,*helpout;
00039 fftwf_plan plan1,plan2,plan1y,plan2y;
00040 int nxinp,nyinp;
00041 int nin,ninp;
00042 int hwidth ;
00043 int i,i1;
00044 float c;
00045
00046 hwidth=pars->hwidth;
00047
00048 nxinp=(nxin/2+1);
00049 nyinp=(nyin/2+1);
00050 nin=maxval(nxin,nyin);
00051 ninp=maxval(nxinp,nyinp);
00052
00053 helpin=(float*) fftwf_malloc(sizeof(float) * nin);
00054 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp);
00055 fkernely=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nyinp);
00056 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * ninp);
00057 helpout=(float*) fftwf_malloc(sizeof(float) * nin);
00058
00059 plan1=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
00060 plan2=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
00061 plan1y=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
00062 plan2y=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
00063
00064 pars->helpin=helpin;
00065 pars->helpout=helpout;
00066 pars->fkernel=fkernel;
00067 pars->fkernely=fkernely;
00068 pars->helpc=helpc;
00069 pars->plan1=plan1;
00070 pars->plan2=plan2;
00071 pars->plan1y=plan1y;
00072 pars->plan2y=plan2y;
00073 pars->nxin=nxin;
00074 pars->nyin=nyin;
00075 pars->nxinp=nxinp;
00076 pars->nyinp=nyinp;
00077 pars->method=fresize_1d_fft;
00078
00079
00080
00081
00082 for (i=0;i<nxin;i++) {
00083 helpin[i]=0;
00084 }
00085
00086
00087 for (i=-hwidth;i<=hwidth;i++) {
00088 i1=(i+nxin) % nxin;
00089 helpin[i1]=pars->kerx[i+hwidth];
00090 }
00091
00092
00093 fftwf_execute(plan1);
00094
00095
00096 c=1.0f/nxin;
00097
00098 for (i=0;i<nxinp;i++) {
00099 fkernel[i]=c*helpc[i];
00100 }
00101
00102
00103
00104
00105 for (i=0;i<nxin;i++) {
00106 helpin[i]=0;
00107 }
00108
00109
00110 for (i=-hwidth;i<=hwidth;i++) {
00111 i1=(i+nyin) % nyin;
00112 helpin[i1]=pars->kery[i+hwidth];
00113 }
00114
00115
00116 fftwf_execute(plan1y);
00117
00118
00119 c=1.0f/nyin;
00120
00121 for (i=0;i<nyinp;i++) {
00122 fkernely[i]=c*helpc[i];
00123 }
00124
00125 return 0;
00126 }
00127
00128 int make_fft2d(
00129 struct fresize_struct *pars,
00130 int nxin,
00131 int nyin
00132 )
00133 {
00134 fftwf_complex *helpc,*fkernel;
00135 float *helpin,*helpout;
00136 fftwf_plan plan1,plan2;
00137 int nxinp;
00138 int hwidth,fwidth;
00139 int i,j,i1,j1;
00140 float c;
00141
00142 hwidth=pars->hwidth;
00143 fwidth=2*hwidth+1;
00144 nxinp=(nxin/2+1);
00145
00146 helpin=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
00147 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
00148 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
00149 helpout=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
00150
00151 plan1=fftwf_plan_dft_r2c_2d(nyin,nxin,helpin,helpc,FFTW_ESTIMATE);
00152 plan2=fftwf_plan_dft_c2r_2d(nyin,nxin,helpc,helpout,FFTW_ESTIMATE);
00153
00154 pars->helpin=helpin;
00155 pars->helpout=helpout;
00156 pars->fkernel=fkernel;
00157 pars->helpc=helpc;
00158 pars->plan1=plan1;
00159 pars->plan2=plan2;
00160 pars->nxin=nxin;
00161 pars->nyin=nyin;
00162 pars->nxinp=nxinp;
00163 pars->method=fresize_2d_fft;
00164
00165
00166
00167
00168 for (j=0;j<nyin;j++) {
00169 for (i=0;i<nxin;i++) {
00170 helpin[j*nxin+i]=0;
00171 }
00172 }
00173
00174
00175
00176
00177 for (j=-hwidth;j<=hwidth;j++) {
00178 j1=(j+nyin) % nyin;
00179 for (i=-hwidth;i<=hwidth;i++) {
00180 i1=(i+nxin) % nxin;
00181 helpin[j1*nxin+i1]=pars->ker[(j+hwidth)*fwidth+(i+hwidth)];
00182 }
00183 }
00184
00185
00186 fftwf_execute(plan1);
00187
00188
00189 c=1.0f/nxin/nyin;
00190
00191 for (j=0;j<nyin;j++) {
00192 for (i=0;i<nxinp;i++) {
00193 fkernel[j*nxinp+i]=c*helpc[j*nxinp+i];
00194 }
00195 }
00196
00197 return 0;
00198 }
00199
00200 int init_fresize_sample(
00201 struct fresize_struct *pars,
00202 int nsub
00203 )
00204 {
00205
00206 pars->method=fresize_sample;
00207 pars->nsub=nsub;
00208
00209 return 0;
00210 }
00211
00212 int init_fresize_bin(
00213 struct fresize_struct *pars,
00214 int nsub
00215 )
00216 {
00217
00218 pars->method=fresize_bin;
00219 pars->nsub=nsub;
00220
00221 return 0;
00222 }
00223
00224 int init_fresize_boxcar(
00225 struct fresize_struct *pars,
00226 int hwidth,
00227 int nsub
00228 )
00229 {
00230 const int malign=32;
00231 int fwidth;
00232 int i ;
00233
00234 pars->method=fresize_1d;
00235 pars->nsub=nsub;
00236 pars->hwidth=hwidth;
00237 fwidth=2*hwidth+1;
00238 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00239 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00240 for (i=0;i<fwidth;i++) {
00241 pars->kerx[i]=1.0f/fwidth;
00242 pars->kery[i]=1.0f/fwidth;
00243 }
00244
00245 return 0;
00246 }
00247
00248 int init_fresize_boxcar_fft(
00249 struct fresize_struct *pars,
00250 int hwidth,
00251 int nsub,
00252 int nxin,
00253 int nyin
00254 )
00255 {
00256 int status;
00257
00258 status=init_fresize_boxcar(pars,hwidth,nsub);
00259 if (status != 0) return status;
00260
00261 status=make_fft1d(pars,nxin,nyin);
00262 return status;
00263 }
00264
00265 int init_fresize_gaussian(
00266 struct fresize_struct *pars,
00267 float sigma,
00268 int hwidth,
00269 int nsub
00270 )
00271 {
00272 const int malign=32;
00273 int fwidth;
00274 int i ;
00275 float sum,x,y;
00276
00277 pars->method=fresize_1d;
00278 pars->nsub=nsub;
00279 pars->hwidth=hwidth;
00280 fwidth=2*hwidth+1;
00281 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00282 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00283 sum=0.0f;
00284 for (i=0;i<fwidth;i++) {
00285 x=(i-hwidth)/sigma;
00286 y=(float)exp(-x*x/2);
00287 sum=sum+y;
00288 }
00289 for (i=0;i<fwidth;i++) {
00290 x=(i-hwidth)/sigma;
00291 y=(float)exp(-x*x/2);
00292 pars->kerx[i]=y/sum;
00293 pars->kery[i]=y/sum;
00294 }
00295
00296 return 0;
00297 }
00298
00299 int init_fresize_gaussian_fft(
00300 struct fresize_struct *pars,
00301 float sigma,
00302 int hwidth,
00303 int nsub,
00304 int nxin,
00305 int nyin
00306 )
00307 {
00308 int status;
00309
00310 status=init_fresize_gaussian(pars,sigma,hwidth,nsub);
00311 if (status != 0) return status;
00312
00313 status=make_fft1d(pars,nxin,nyin);
00314 return status;
00315 }
00316
00317 int init_fresize_sinc(
00318 struct fresize_struct *pars,
00319 float wsinc,
00320
00321
00322 int hwidth,
00323 int iap,
00324
00325
00326
00327 int nap,
00328
00329
00330 int nsub
00331 )
00332 {
00333 const int malign=32;
00334 int fwidth;
00335 int i ;
00336 float sum,x,y;
00337
00338 pars->method=fresize_1d;
00339 pars->nsub=nsub;
00340 pars->hwidth=hwidth;
00341 fwidth=2*hwidth+1;
00342 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00343 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00344 sum=0.0f;
00345 for (i=0;i<fwidth;i++) {
00346 x=(float)(i-hwidth);
00347 if (abs(x) > (nap*wsinc)) {
00348 y=0.0f;
00349 }
00350 else {
00351 y=(float)sinc(x/wsinc);
00352 }
00353 if (iap == 1) y=y*(1-(x/(nap*wsinc))*(x/(nap*wsinc)));
00354 if (iap == 2) y=(float)(y*sinc(x/(nap*wsinc)));
00355 pars->kerx[i]=y;
00356 sum=sum+y;
00357 }
00358 for (i=0;i<fwidth;i++) {
00359 pars->kerx[i]=pars->kerx[i]/sum;
00360 pars->kery[i]=pars->kerx[i];
00361 }
00362
00363 return 0;
00364 }
00365
00366 int init_fresize_sinc_fft(
00367 struct fresize_struct *pars,
00368 float wsinc,
00369
00370
00371 int hwidth,
00372 int iap,
00373
00374
00375
00376
00377 int nap,
00378
00379
00380 int nsub,
00381 int nxin,
00382 int nyin
00383 )
00384 {
00385 int status;
00386
00387 status=init_fresize_sinc(pars,wsinc,hwidth,iap,nap,nsub);
00388 if (status != 0) return status;
00389
00390 status=make_fft1d(pars,nxin,nyin);
00391 return status;
00392 }
00393
00394 int init_fresize_gaussian2(
00395 struct fresize_struct *pars,
00396 float sigma,
00397 float rmax,
00398 int hwidth,
00399 int nsub
00400 )
00401 {
00402 const int malign=32;
00403 int fwidth;
00404 int i,j;
00405 float sum,xi,xj,y,r2,rmax2;
00406
00407 pars->method=fresize_2d;
00408 pars->nsub=nsub;
00409 pars->hwidth=hwidth;
00410 fwidth=2*hwidth+1;
00411 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
00412 rmax2=rmax*rmax/sigma/sigma;
00413
00414 sum=0.0f;
00415 for (j=0;j<fwidth;j++) {
00416 xj=(j-hwidth)/sigma;
00417 for (i=0;i<fwidth;i++) {
00418 xi=(i-hwidth)/sigma;
00419 r2=xi*xi+xj*xj;
00420 if (r2 <= rmax2 ) {
00421 y=(float)exp(-r2/2);
00422 }
00423 else {
00424 y=0.0f;
00425 }
00426 pars->ker[fwidth*j+i]=y;
00427 sum=sum+y;
00428 }
00429 }
00430
00431 for (j=0;j<fwidth;j++) {
00432 for (i=0;i<fwidth;i++) {
00433 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
00434 }
00435 }
00436
00437 return 0;
00438 }
00439
00440 int init_fresize_gaussian2_fft(
00441 struct fresize_struct *pars,
00442 float sigma,
00443 float rmax,
00444 int hwidth,
00445 int nsub,
00446 int nxin,
00447 int nyin
00448 )
00449 {
00450 int status;
00451
00452 status=init_fresize_gaussian2(pars,sigma,rmax,hwidth,nsub);
00453 if (status != 0) return status;
00454
00455 status=make_fft2d(pars,nxin,nyin);
00456 return status;
00457 }
00458
00459 int init_fresize_airy(
00460 struct fresize_struct *pars,
00461 float cdown,
00462 int hwidth,
00463
00464 int iap,
00465
00466
00467
00468
00469
00470
00471 int nap,
00472 int nsub
00473 )
00474 {
00475 const int malign=32;
00476 const float beszeros[20]={0.0000000,3.8317060,7.0155867,10.173468,13.323692,16.470630,19.615859,22.760084,25.903672,29.046829,32.189680,35.332308,38.474766,41.617094,44.759319,47.901461,51.043535,54.185554,57.327525,60.469458};
00477 float cb;
00478 int fwidth;
00479 int i,j;
00480 float sum,xi,xj;
00481 float r2,r,rc,airy,ra;
00482 float nzero;
00483
00484 cb=(float)(cdown/M_PI);
00485 nzero=beszeros[nap]*cb;
00486 if (hwidth < 0) hwidth=(float)floor(nzero);
00487
00488 pars->method=fresize_2d;
00489 pars->nsub=nsub;
00490 pars->hwidth=hwidth;
00491 fwidth=2*hwidth+1;
00492 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
00493
00494 sum=0.0f;
00495 for (j=0;j<fwidth;j++) {
00496 xj=(float)(j-hwidth);
00497 for (i=0;i<fwidth;i++) {
00498 xi=(float)(i-hwidth);
00499 r2=xi*xi+xj*xj;
00500 r=(float)maxval(sqrt(r2),1e-20);
00501 rc=r/cb;
00502 if (r <= nzero ) {
00503 airy=j1f(rc)/rc;
00504 }
00505 else {
00506 airy=0.0f;
00507 }
00508 ra=r/nzero;
00509 if (iap == 1) airy=airy*(1-ra*ra);
00510 if (iap == 2) airy=(float)(airy*sinc(ra));
00511 if (iap == 3) {
00512 ra=rc*beszeros[1]/beszeros[nap];
00513 airy=airy*j1f(ra)/ra;
00514 }
00515 pars->ker[fwidth*j+i]=airy;
00516 sum=sum+airy;
00517 }
00518 }
00519
00520 for (j=0;j<fwidth;j++) {
00521 for (i=0;i<fwidth;i++) {
00522 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
00523 }
00524 }
00525
00526
00527 return 0;
00528 }
00529
00530 int init_fresize_airy_fft(
00531 struct fresize_struct *pars,
00532 float cdown,
00533 int hwidth,
00534
00535 int iap,
00536
00537
00538
00539
00540
00541
00542 int nap,
00543 int nsub,
00544 int nxin,
00545 int nyin
00546 )
00547 {
00548 int status;
00549
00550 status=init_fresize_airy(pars,cdown,hwidth,iap,nap,nsub);
00551 if (status != 0) return status;
00552
00553 status=make_fft2d(pars,nxin,nyin);
00554 return status;
00555 }
00556
00557 int free_fresize(
00558 struct fresize_struct *pars
00559 )
00560 {
00561
00562 if (pars->method==fresize_1d) {
00563 MKL_free (pars->kerx);
00564 MKL_free (pars->kery);
00565 }
00566
00567 if (pars->method==fresize_2d) {
00568 MKL_free (pars->ker);
00569 }
00570
00571 if (pars->method==fresize_1d_fft) {
00572 MKL_free (pars->kerx);
00573 MKL_free (pars->kery);
00574 fftwf_free(pars->helpin);
00575 fftwf_free(pars->fkernel);
00576 fftwf_free(pars->fkernely);
00577 fftwf_free(pars->helpc);
00578 fftwf_free(pars->helpout);
00579 fftwf_destroy_plan(pars->plan1);
00580 fftwf_destroy_plan(pars->plan2);
00581 fftwf_destroy_plan(pars->plan1y);
00582 fftwf_destroy_plan(pars->plan2y);
00583 }
00584
00585 if (pars->method==fresize_2d_fft) {
00586 MKL_free (pars->ker);
00587 fftwf_free(pars->helpin);
00588 fftwf_free(pars->fkernel);
00589 fftwf_free(pars->helpc);
00590 fftwf_free(pars->helpout);
00591 fftwf_destroy_plan(pars->plan1);
00592 fftwf_destroy_plan(pars->plan2);
00593 }
00594
00595 return 0;
00596 }
00597
00598 int fsample(
00599 float *image_in,
00600 float *image_out,
00601 int nxin,
00602 int nyin,
00603 int nleadin,
00604 int nxout,
00605 int nyout,
00606 int nleadout,
00607 int nsub,
00608 int xoff,
00609 int yoff,
00610 float fillval
00611 )
00612
00613 {
00614 int i,j;
00615 int imin,imax,jmin,jmax;
00616 float *imp;
00617
00618
00619 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00620 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
00621 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00622 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
00623
00624 imp=image_in+yoff*nleadin+xoff;
00625
00626 #pragma omp parallel default(none) \
00627 private(i,j) \
00628 shared(image_in,image_out,imp,fillval) \
00629 shared(imin,imax,jmin,jmax) \
00630 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00631 {
00632
00633
00634 #pragma omp for
00635 for (j=0; j<jmin; j++) {
00636 for (i=0; i<nxout; i++) {
00637 image_out[j*nleadout+i]=fillval;
00638 }
00639 }
00640
00641
00642 #pragma omp for
00643 for (j=jmin; j<=jmax; j++) {
00644
00645 for (i=0; i<imin; i++) {
00646 image_out[j*nleadout+i]=fillval;
00647 }
00648
00649 for (i=imin; i<=imax; i++) {
00650
00651 image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
00652 }
00653
00654 for (i=imax+1; i<nxout; i++) {
00655 image_out[j*nleadout+i]=fillval;
00656 }
00657 }
00658
00659
00660 #pragma omp for
00661 for (j=jmax+1; j<nyout; j++) {
00662 for (i=0; i<nxout; i++) {
00663 image_out[j*nleadout+i]=fillval;
00664 }
00665 }
00666
00667 }
00668
00669 return 0;
00670 }
00671
00672 int fbin(
00673 float *image_in,
00674 float *image_out,
00675 int nxin,
00676 int nyin,
00677 int nleadin,
00678 int nxout,
00679 int nyout,
00680 int nleadout,
00681 int nsub,
00682 int xoff,
00683 int yoff,
00684 float fillval
00685 )
00686
00687 {
00688 int i,j,i1,j1;
00689 int imin,imax,jmin,jmax;
00690 float *imp,*impi;
00691 float sum;
00692
00693
00694 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00695 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
00696 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00697 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
00698
00699 imp=image_in+yoff*nleadin+xoff;
00700
00701 #pragma omp parallel default(none) \
00702 private(i,j,i1,j1,impi,sum) \
00703 shared(image_in,image_out,imp,fillval) \
00704 shared(imin,imax,jmin,jmax) \
00705 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00706 {
00707
00708
00709 #pragma omp for
00710 for (j=0; j<jmin; j++) {
00711 for (i=0; i<nxout; i++) {
00712 image_out[j*nleadout+i]=fillval;
00713 }
00714 }
00715
00716
00717 #pragma omp for
00718 for (j=jmin; j<=jmax; j++) {
00719
00720 for (i=0; i<imin; i++) {
00721 image_out[j*nleadout+i]=fillval;
00722 }
00723
00724 for (i=imin; i<=imax; i++) {
00725 impi=imp+j*nleadin*nsub+i*nsub;
00726 sum=0.0f;
00727 for (j1=0; j1<nsub; j1++) {
00728 for (i1=0; i1<nsub; i1++) {
00729 sum=sum+impi[i1];
00730 }
00731 impi=impi+nleadin;
00732 }
00733 image_out[j*nleadout+i]=sum/(nsub*nsub);
00734 }
00735
00736 for (i=imax+1; i<nxout; i++) {
00737 image_out[j*nleadout+i]=fillval;
00738 }
00739 }
00740
00741
00742 #pragma omp for
00743 for (j=jmax+1; j<nyout; j++) {
00744 for (i=0; i<nxout; i++) {
00745 image_out[j*nleadout+i]=fillval;
00746 }
00747 }
00748
00749 }
00750
00751 return 0;
00752 }
00753
00754 int f1d(
00755 struct fresize_struct *pars,
00756 float *image_in,
00757 float *image_out,
00758 int nxin,
00759 int nyin,
00760 int nleadin,
00761 int nxout,
00762 int nyout,
00763 int nleadout,
00764 int xoff,
00765 int yoff,
00766 float fillval
00767 )
00768 {
00769 const int malign=32;
00770
00771 char normal[] = "n";
00772 const int ione = 1;
00773 const float fone = 1.0f;
00774 const float fzero = 0.0f;
00775 int i,j,i1 ;
00776 int imin,imax,jmin,jmax;
00777 float *inpi,*inpj;
00778 float sum;
00779 int nsub,hwidth;
00780 float *kerx,*kery,*work;
00781 int xoffl,xoffh,yoffl,yoffh;
00782 int nwork;
00783
00784 int n1,n2 ;
00785
00786 nsub=pars->nsub;
00787 hwidth=pars->hwidth;
00788 kerx=pars->kerx;
00789 kery=pars->kery;
00790
00791
00792 xoffl=xoff-hwidth;
00793 xoffh=xoff+hwidth;
00794 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
00795 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
00796
00797 yoffl=yoff-hwidth;
00798 yoffh=yoff+hwidth;
00799 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
00800 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
00801
00802
00803 nwork=nxout*nyin;
00804 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
00805
00806
00807 n1=(imax-imin+1);
00808 n2=2*hwidth+1;
00809
00810
00811 #pragma omp parallel default(none) \
00812 private(i,j,i1,inpi,inpj,sum) \
00813 shared(image_in,image_out,fillval) \
00814 shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
00815 shared(normal,n1,n2) \
00816 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00817 {
00818
00819
00820
00821
00822 #pragma omp for
00823 for (j=0;j<nyin;j++) {
00824 inpj=image_in+j*nleadin+xoffl;
00825 for (i=imin; i<=imax; i++) {
00826 sum=0.0f;
00827 inpi=inpj+i*nsub;
00828 for (i1=0; i1<=2*hwidth; i1++) {
00829 sum=sum+kerx[i1]*inpi[i1];
00830 }
00831 work[j*nleadout+i]=sum;
00832
00833
00834 }
00835
00836
00837 }
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866 #pragma omp for
00867 for (j=jmin; j<=jmax; j++) {
00868 inpj=work+(yoffl+j*nsub)*nleadout;
00869 sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
00870 }
00871
00872
00873 #pragma omp for
00874 for (j=0; j<jmin; j++) {
00875 for (i=0; i<nxout; i++) {
00876 image_out[j*nleadout+i]=fillval;
00877 }
00878 }
00879
00880
00881 #pragma omp for
00882 for (j=jmin; j<=jmax; j++) {
00883
00884 for (i=0; i<imin; i++) {
00885 image_out[j*nleadout+i]=fillval;
00886 }
00887
00888 for (i=imax+1; i<nxout; i++) {
00889 image_out[j*nleadout+i]=fillval;
00890 }
00891 }
00892
00893
00894 #pragma omp for
00895 for (j=jmax+1; j<nyout; j++) {
00896 for (i=0; i<nxout; i++) {
00897 image_out[j*nleadout+i]=fillval;
00898 }
00899 }
00900
00901 }
00902
00903 MKL_free(work);
00904 return 0;
00905 }
00906
00907 int f1d_fft(
00908 struct fresize_struct *pars,
00909 float *image_in,
00910 float *image_out,
00911 int nxin,
00912 int nyin,
00913 int nleadin,
00914 int nxout,
00915 int nyout,
00916 int nleadout,
00917 int xoff,
00918 int yoff,
00919 float fillval
00920 )
00921 {
00922 const int malign=32;
00923 int i,j,i1 ;
00924 int imin,imax,jmin,jmax;
00925 float *inpj;
00926
00927 int nsub,hwidth;
00928 float *work;
00929 int xoffl,xoffh,yoffl,yoffh;
00930 int nwork;
00931 double t1, t4;
00932 int nchunk;
00933 fftwf_complex *helpc,*fkernel,*fkernely;
00934 float *helpin,*helpout;
00935 int nxinp,nyinp;
00936 float *iwork,*owork;
00937
00938 if (pars->nxin != nxin) return 1;
00939 if (pars->nyin != nyin) return 1;
00940
00941 nsub=pars->nsub;
00942 hwidth=pars->hwidth;
00943
00944
00945 helpc=pars->helpc;
00946 fkernel=pars->fkernel;
00947 fkernely=pars->fkernely;
00948 helpin=pars->helpin;
00949 helpout=pars->helpout;
00950 nxinp=pars->nxinp;
00951 nyinp=pars->nyinp;
00952
00953
00954 xoffl=xoff-hwidth;
00955 xoffh=xoff+hwidth;
00956 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
00957 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
00958
00959 yoffl=yoff-hwidth;
00960 yoffh=yoff+hwidth;
00961 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
00962 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
00963
00964
00965 nwork=nxout*nyin;
00966 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
00967
00968 nchunk=64;
00969 iwork=(float *)(MKL_malloc(nchunk*nyin*sizeof(float),malign));
00970 owork=(float *)(MKL_malloc(nchunk*nyout*sizeof(float),malign));
00971
00972
00973
00974
00975
00976
00977
00978 {
00979
00980 t1=dsecnd();
00981
00982
00983 for (j=0;j<nyin;j++) {
00984 inpj=image_in+j*nleadin;
00985 for (i=0;i<nxin;i++) helpin[i]=inpj[i];
00986 fftwf_execute(pars->plan1);
00987 for (i=0; i<nxinp; i++) helpc[i]=fkernel[i]*helpc[i];
00988 fftwf_execute(pars->plan2);
00989 for (i=imin; i<=imax; i++) work[j*nleadout+i]=helpout[i*nsub+xoff];
00990 }
00991 t1=dsecnd()-t1;
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029 t4=dsecnd();
01030
01031 for (i1=imin;i1<=imax;i1=i1+nchunk) {
01032 for (j=0;j<nyin;j++) {
01033 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01034 iwork[(i-i1)*nyin+j]=work[j*nleadout+i];
01035 }
01036 }
01037 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01038 for (j=0;j<nyin;j++) helpin[j]=iwork[(i-i1)*nyin+j];
01039 fftwf_execute(pars->plan1y);
01040 for (j=0; j<nyinp; j++) helpc[j]=fkernely[j]*helpc[j];
01041 fftwf_execute(pars->plan2y);
01042 for (j=jmin; j<=jmax; j++) owork[j+(i-i1)*nyout]=helpout[j*nsub+yoff];
01043 }
01044 for (j=jmin; j<=jmax; j++) {
01045 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
01046 image_out[j*nleadout+i]=owork[j+(i-i1)*nyout];
01047 }
01048 }
01049 }
01050 t4=dsecnd()-t4;
01051
01052
01053
01054 for (j=0; j<jmin; j++) {
01055 for (i=0; i<nxout; i++) {
01056 image_out[j*nleadout+i]=fillval;
01057 }
01058 }
01059
01060
01061
01062 for (j=jmin; j<=jmax; j++) {
01063
01064 for (i=0; i<imin; i++) {
01065 image_out[j*nleadout+i]=fillval;
01066 }
01067
01068 for (i=imax+1; i<nxout; i++) {
01069 image_out[j*nleadout+i]=fillval;
01070 }
01071 }
01072
01073
01074
01075 for (j=jmax+1; j<nyout; j++) {
01076 for (i=0; i<nxout; i++) {
01077 image_out[j*nleadout+i]=fillval;
01078 }
01079 }
01080
01081 }
01082
01083 MKL_free(iwork);
01084 MKL_free(owork);
01085 MKL_free(work);
01086
01087 return 0;
01088 }
01089
01090 int f2d(
01091 struct fresize_struct *pars,
01092 float *image_in,
01093 float *image_out,
01094 int nxin,
01095 int nyin,
01096 int nleadin,
01097 int nxout,
01098 int nyout,
01099 int nleadout,
01100 int xoff,
01101 int yoff,
01102 float fillval
01103 )
01104
01105 {
01106
01107
01108
01109
01110
01111
01112 int i,j,i1,j1;
01113 int imin,imax,jmin,jmax;
01114 float *inpi,*kerp;
01115 float sum;
01116 int nsub,hwidth,fwidth;
01117 float *ker;
01118 int xoffl,xoffh,yoffl,yoffh;
01119
01120
01121 nsub=pars->nsub;
01122 hwidth=pars->hwidth;
01123 ker=pars->ker;
01124 fwidth=2*hwidth+1;
01125
01126
01127 xoffl=xoff-hwidth;
01128 xoffh=xoff+hwidth;
01129 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01130 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01131
01132 yoffl=yoff-hwidth;
01133 yoffh=yoff+hwidth;
01134 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01135 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01136
01137
01138
01139
01140 #pragma omp parallel default(none) \
01141 private(i,j,i1,j1,inpi,kerp,sum) \
01142 shared(image_in,image_out,fillval) \
01143 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
01144 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
01145 {
01146
01147 #pragma omp for
01148 for (j=jmin; j<=jmax; j++) {
01149 for (i=imin; i<=imax; i++) {
01150 inpi=image_in+(j*nsub+yoffl)*nleadin+i*nsub+xoffl;
01151 sum=0.0f;
01152 for (j1=0; j1<=2*hwidth; j1++) {
01153 kerp=ker+j1*fwidth;
01154 for (i1=0; i1<=2*hwidth; i1++) {
01155 sum=sum+kerp[i1]*inpi[i1];
01156 }
01157 inpi=inpi+nleadin;
01158 }
01159 image_out[j*nleadout+i]=sum;
01160 }
01161 }
01162
01163
01164 #pragma omp for
01165 for (j=0; j<jmin; j++) {
01166 for (i=0; i<nxout; i++) {
01167 image_out[j*nleadout+i]=fillval;
01168 }
01169 }
01170
01171
01172 #pragma omp for
01173 for (j=jmin; j<=jmax; j++) {
01174
01175 for (i=0; i<imin; i++) {
01176 image_out[j*nleadout+i]=fillval;
01177 }
01178
01179 for (i=imax+1; i<nxout; i++) {
01180 image_out[j*nleadout+i]=fillval;
01181 }
01182 }
01183
01184
01185 #pragma omp for
01186 for (j=jmax+1; j<nyout; j++) {
01187 for (i=0; i<nxout; i++) {
01188 image_out[j*nleadout+i]=fillval;
01189 }
01190 }
01191
01192 }
01193
01194 return 0;
01195 }
01196
01197
01198 int f2d_mat(
01199 struct fresize_struct *pars,
01200 float *image_in,
01201 float *image_out,
01202 int nxin,
01203 int nyin,
01204 int nleadin,
01205 int nxout,
01206 int nyout,
01207 int nleadout,
01208 int xoff,
01209 int yoff,
01210 float fillval
01211 )
01212
01213 {
01214 const int malign=32;
01215 char transpose[] = "t";
01216
01217 const int ione = 1;
01218 const float fone = 1.0f;
01219
01220 int i,j,j1;
01221 int imin,imax,jmin,jmax;
01222 float *inpi,*kerp;
01223
01224 int nsub,hwidth,fwidth;
01225 float *ker;
01226 int xoffl,xoffh,yoffl,yoffh;
01227
01228 int n2,nchunk,jc,nc;
01229 float *work;
01230
01231 nsub=pars->nsub;
01232 hwidth=pars->hwidth;
01233 ker=pars->ker;
01234 fwidth=2*hwidth+1;
01235
01236
01237 xoffl=xoff-hwidth;
01238 xoffh=xoff+hwidth;
01239 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01240 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01241
01242 yoffl=yoff-hwidth;
01243 yoffh=yoff+hwidth;
01244 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01245 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01246
01247
01248
01249
01250
01251 n2=nleadin*nsub;
01252 nchunk=64;
01253
01254 #pragma omp parallel default(none) \
01255 private(i,j,j1,inpi,kerp,work,nc) \
01256 shared(transpose,n2,nchunk) \
01257 shared(image_in,image_out,fillval) \
01258 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
01259 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
01260 {
01261
01262 work=(float *)(MKL_malloc(nyout*sizeof(float),malign));
01263 #pragma omp for
01264 for (j=jmin; j<=jmax; j++) {
01265 for (i=imin; i<=imax; i++) {
01266 image_out[j*nleadout+i]=0.0f;
01267 }
01268 }
01269
01270 #pragma omp for
01271 for (jc=jmin;jc<=jmax;jc=jc+nchunk) {
01272 nc=minval(jc+nchunk-1,jmax)-jc+1;
01273 for (j1=0; j1<=2*hwidth; j1++) {
01274 kerp=ker+j1*fwidth;
01275 for (i=imin; i<=imax; i++) {
01276 inpi=image_in+(jc*nsub+yoffl+j1)*nleadin+i*nsub+xoffl;
01277 sgemv(transpose,&fwidth,&nc,&fone,inpi,&n2,kerp,&ione,&fone,image_out+jc*nleadout+i,&nleadout);
01278 }
01279 }
01280 }
01281
01282
01283 #pragma omp for
01284 for (j=0; j<jmin; j++) {
01285 for (i=0; i<nxout; i++) {
01286 image_out[j*nleadout+i]=fillval;
01287 }
01288 }
01289
01290
01291 #pragma omp for
01292 for (j=jmin; j<=jmax; j++) {
01293
01294 for (i=0; i<imin; i++) {
01295 image_out[j*nleadout+i]=fillval;
01296 }
01297
01298 for (i=imax+1; i<nxout; i++) {
01299 image_out[j*nleadout+i]=fillval;
01300 }
01301 }
01302
01303
01304 #pragma omp for
01305 for (j=jmax+1; j<nyout; j++) {
01306 for (i=0; i<nxout; i++) {
01307 image_out[j*nleadout+i]=fillval;
01308 }
01309 }
01310
01311 MKL_free(work);
01312 }
01313
01314 return 0;
01315 }
01316
01317 int f2d_fft(
01318 struct fresize_struct *pars,
01319 float *image_in,
01320 float *image_out,
01321 int nxin,
01322 int nyin,
01323 int nleadin,
01324 int nxout,
01325 int nyout,
01326 int nleadout,
01327 int xoff,
01328 int yoff,
01329 float fillval
01330 )
01331 {
01332 fftwf_complex *helpc,*fkernel;
01333 float *helpin,*helpout;
01334 fftwf_plan plan1,plan2;
01335 int nxinp;
01336 int hwidth;
01337 int i,j,j1;
01338
01339 int imin,imax,jmin,jmax;
01340 int xoffl,xoffh,yoffl,yoffh;
01341 int nsub;
01342
01343 if (pars->nxin != nxin) return 1;
01344 if (pars->nyin != nyin) return 1;
01345
01346 nsub=pars->nsub;
01347 hwidth=pars->hwidth;
01348
01349 nxinp=(nxin/2+1);
01350
01351 helpin=pars->helpin;
01352 fkernel=pars->fkernel;
01353 helpc=pars->helpc;
01354 helpout=pars->helpout;
01355 plan1=pars->plan1;
01356 plan2=pars->plan2;
01357
01358
01359
01360 for (j=0;j<nyin;j++) {
01361 for (i=0;i<nxin;i++) {
01362 helpin[j*nxin+i]=image_in[j*nleadin+i];
01363 }
01364 }
01365
01366 fftwf_execute(plan1);
01367
01368
01369
01370 for (j=0;j<nyin;j++) {
01371 for (i=0;i<nxinp;i++) {
01372 helpc[j*nxinp+i]=fkernel[j*nxinp+i]*helpc[j*nxinp+i];
01373 }
01374 }
01375
01376 fftwf_execute(plan2);
01377
01378
01379
01380
01381 xoffl=xoff-hwidth;
01382 xoffh=xoff+hwidth;
01383 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
01384 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
01385
01386 yoffl=yoff-hwidth;
01387 yoffh=yoff+hwidth;
01388 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
01389 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
01390
01391 for (j=jmin; j<=jmax; j++) {
01392 j1=(j*nsub+yoff)*nxin+xoff;
01393 for (i=imin; i<=imax; i++) {
01394 image_out[j*nleadout+i]=helpout[j1+i*nsub];
01395 }
01396 }
01397
01398
01399 for (j=0; j<jmin; j++) {
01400 for (i=0; i<nxout; i++) {
01401 image_out[j*nleadout+i]=fillval;
01402 }
01403 }
01404
01405
01406 for (j=jmin; j<=jmax; j++) {
01407
01408 for (i=0; i<imin; i++) {
01409 image_out[j*nleadout+i]=fillval;
01410 }
01411
01412 for (i=imax+1; i<nxout; i++) {
01413 image_out[j*nleadout+i]=fillval;
01414 }
01415 }
01416
01417
01418 for (j=jmax+1; j<nyout; j++) {
01419 for (i=0; i<nxout; i++) {
01420 image_out[j*nleadout+i]=fillval;
01421 }
01422 }
01423
01424 return 0;
01425 }
01426
01427 int fresize(
01428 struct fresize_struct *pars,
01429 float *image_in,
01430 float *image_out,
01431 int nxin,
01432 int nyin,
01433 int nleadin,
01434 int nx,
01435 int ny,
01436 int nlead,
01437 int xoff,
01438 int yoff,
01439 float fillval
01440 )
01441 {
01442 int status;
01443
01444 switch (pars->method) {
01445
01446 case fresize_sample:
01447 status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
01448 break;
01449
01450 case fresize_bin:
01451 status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
01452 break;
01453
01454 case fresize_1d:
01455 status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01456 break;
01457
01458 case fresize_1d_fft:
01459 status=f1d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01460 break;
01461
01462 case fresize_2d:
01463 status=f2d_mat(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01464 break;
01465
01466 case fresize_2d_fft:
01467 status=f2d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
01468 break;
01469
01470 default:
01471 status=1;
01472 break;
01473 }
01474
01475 return status;
01476
01477 }
01478
01479 int init_fresize_user(
01480 struct fresize_struct *pars,
01481 int hwidth,
01482 int nsub,
01483 float *user_ker
01484
01485
01486 )
01487 {
01488 const int malign=32;
01489 int fwidth;
01490 int i,j;
01491
01492 pars->method=fresize_2d;
01493 pars->nsub=nsub;
01494 pars->hwidth=hwidth;
01495 fwidth=2*hwidth+1;
01496 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
01497 memcpy(pars->ker,user_ker,sizeof(float)*fwidth*fwidth);
01498
01499 return 0;
01500 }
01501