1 arta 1.1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <time.h>
4 #include <string.h>
5 #include <math.h>
6 #include <mkl_blas.h>
7 #include <mkl_service.h>
8 #include <mkl_lapack.h>
9 #include <mkl_vml_functions.h>
10 #include <omp.h>
11 #include <complex.h>
12 #include <fftw3.h>
13 #include "fresize.h"
14 #define minval(x,y) (((x) < (y)) ? (x) : (y))
15 #define maxval(x,y) (((x) < (y)) ? (y) : (x))
16 #define fresize_sample 1
17 #define fresize_bin 2
18 #define fresize_1d 3
19 #define fresize_2d 4
20 #define fresize_1d_fft 5
21 #define fresize_2d_fft 6
22 arta 1.1
|
23 schou 1.2 static double sinc(double x)
|
24 arta 1.1 {
25 if (fabs(x) < (1.e-10))
26 return 1.;
27 else
28 return sin(M_PI*x)/(M_PI*x);
29 }
30
31 int make_fft1d(
32 struct fresize_struct *pars,
33 int nxin,
34 int nyin
35 )
36 {
37 fftwf_complex *helpc,*fkernel,*fkernely;
38 float *helpin,*helpout;
39 fftwf_plan plan1,plan2,plan1y,plan2y;
40 int nxinp,nyinp; // Complex array size
41 int nin,ninp;
42 int hwidth /*,fwidth */;
43 int i,/*j,*/i1/*,j1*/;
44 float c;
45 arta 1.1
46 hwidth=pars->hwidth;
47 /* fwidth=2*hwidth+1; */
48 nxinp=(nxin/2+1);
49 nyinp=(nyin/2+1);
50 nin=maxval(nxin,nyin); // Max value to use same arrays
51 ninp=maxval(nxinp,nyinp); // Max value to use same arrays
52
53 helpin=(float*) fftwf_malloc(sizeof(float) * nin);
54 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp);
55 fkernely=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nyinp);
56 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * ninp);
57 helpout=(float*) fftwf_malloc(sizeof(float) * nin);
58
59 plan1=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
60 plan2=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
61 plan1y=fftwf_plan_dft_r2c_1d(nxin,helpin,helpc,FFTW_ESTIMATE);
62 plan2y=fftwf_plan_dft_c2r_1d(nxin,helpc,helpout,FFTW_ESTIMATE);
63
64 pars->helpin=helpin;
65 pars->helpout=helpout;
66 arta 1.1 pars->fkernel=fkernel;
67 pars->fkernely=fkernely;
68 pars->helpc=helpc;
69 pars->plan1=plan1;
70 pars->plan2=plan2;
71 pars->plan1y=plan1y;
72 pars->plan2y=plan2y;
73 pars->nxin=nxin;
74 pars->nyin=nyin;
75 pars->nxinp=nxinp;
76 pars->nyinp=nyinp;
77 pars->method=fresize_1d_fft;
78
79 /* First transform kernel in x*/
80
81 /* Zero entire array */
82 for (i=0;i<nxin;i++) {
83 helpin[i]=0;
84 }
85
86 // Copy kernel to new array
87 arta 1.1 for (i=-hwidth;i<=hwidth;i++) {
88 i1=(i+nxin) % nxin;
89 helpin[i1]=pars->kerx[i+hwidth];
90 }
91
92 /* Transform kernel */
93 fftwf_execute(plan1);
94
95 /* Copy transform and scale */
96 c=1.0f/nxin;
97
98 for (i=0;i<nxinp;i++) {
99 fkernel[i]=c*helpc[i];
100 }
101
102 /* Then transform kernel in y*/
103
104 /* Zero entire array */
105 for (i=0;i<nxin;i++) {
106 helpin[i]=0;
107 }
108 arta 1.1
109 /* Copy kernel to new array */
110 for (i=-hwidth;i<=hwidth;i++) {
111 i1=(i+nyin) % nyin;
112 helpin[i1]=pars->kery[i+hwidth];
113 }
114
115 /* Transform kernel */
116 fftwf_execute(plan1y);
117
118 /* Copy transform and scale */
119 c=1.0f/nyin;
120
121 for (i=0;i<nyinp;i++) {
122 fkernely[i]=c*helpc[i];
123 }
124
125 return 0;
126 }
127
128 int make_fft2d(
129 arta 1.1 struct fresize_struct *pars,
130 int nxin,
131 int nyin
132 )
133 {
134 fftwf_complex *helpc,*fkernel;
135 float *helpin,*helpout;
136 fftwf_plan plan1,plan2;
137 int nxinp; // Complex array size
138 int hwidth,fwidth;
139 int i,j,i1,j1;
140 float c;
141
142 hwidth=pars->hwidth;
143 fwidth=2*hwidth+1;
144 nxinp=(nxin/2+1);
145
146 helpin=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
147 fkernel=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
148 helpc=(fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex) * nxinp*nyin);
149 helpout=(float*) fftwf_malloc(sizeof(float) * nxin*nyin);
150 arta 1.1
151 plan1=fftwf_plan_dft_r2c_2d(nyin,nxin,helpin,helpc,FFTW_ESTIMATE);
152 plan2=fftwf_plan_dft_c2r_2d(nyin,nxin,helpc,helpout,FFTW_ESTIMATE);
153
154 pars->helpin=helpin;
155 pars->helpout=helpout;
156 pars->fkernel=fkernel;
157 pars->helpc=helpc;
158 pars->plan1=plan1;
159 pars->plan2=plan2;
160 pars->nxin=nxin;
161 pars->nyin=nyin;
162 pars->nxinp=nxinp;
163 pars->method=fresize_2d_fft;
164
165 /* First transform kernel */
166
167 /* Zero entire array */
168 for (j=0;j<nyin;j++) {
169 for (i=0;i<nxin;i++) {
170 helpin[j*nxin+i]=0;
171 arta 1.1 }
172 }
173
174 /* helpin[0]=1.0f; */
175
176 /* Copy kernel to new array */
177 for (j=-hwidth;j<=hwidth;j++) {
178 j1=(j+nyin) % nyin;
179 for (i=-hwidth;i<=hwidth;i++) {
180 i1=(i+nxin) % nxin;
181 helpin[j1*nxin+i1]=pars->ker[(j+hwidth)*fwidth+(i+hwidth)];
182 }
183 }
184
185 /* Transform kernel */
186 fftwf_execute(plan1);
187
188 /* Copy transform and scale */
189 c=1.0f/nxin/nyin;
190
191 for (j=0;j<nyin;j++) {
192 arta 1.1 for (i=0;i<nxinp;i++) {
193 fkernel[j*nxinp+i]=c*helpc[j*nxinp+i];
194 }
195 }
196
197 return 0;
198 }
199
200 int init_fresize_sample(
201 struct fresize_struct *pars,
202 int nsub
203 )
204 {
205
206 pars->method=fresize_sample;
207 pars->nsub=nsub;
208
209 return 0;
210 }
211
212 int init_fresize_bin(
213 arta 1.1 struct fresize_struct *pars,
214 int nsub
215 )
216 {
217
218 pars->method=fresize_bin;
219 pars->nsub=nsub;
220
221 return 0;
222 }
223
224 int init_fresize_boxcar(
225 struct fresize_struct *pars,
226 int hwidth, // Half width of boxcar. Full is 2*hwidth+1.
227 int nsub // Distance between sampled points
228 )
229 {
230 const int malign=32;
231 int fwidth;
232 int i /*,j*/;
233
234 arta 1.1 pars->method=fresize_1d;
235 pars->nsub=nsub;
236 pars->hwidth=hwidth;
237 fwidth=2*hwidth+1;
238 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
239 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
240 for (i=0;i<fwidth;i++) {
241 pars->kerx[i]=1.0f/fwidth;
242 pars->kery[i]=1.0f/fwidth;
243 }
244
245 return 0;
246 }
247
248 int init_fresize_boxcar_fft(
249 struct fresize_struct *pars,
250 int hwidth, // Half width of boxcar. Full is 2*hwidth+1.
251 int nsub, // Distance between sampled points
252 int nxin, // Array size
253 int nyin // Array size
254 )
255 arta 1.1 {
256 int status;
257
258 status=init_fresize_boxcar(pars,hwidth,nsub);
259 if (status != 0) return status;
260
261 status=make_fft1d(pars,nxin,nyin);
262 return status;
263 }
264
265 int init_fresize_gaussian(
266 struct fresize_struct *pars,
267 float sigma, // Shape is exp(-(d/sigma)^2/2)
268 int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
269 int nsub // Distance between sampled points
270 )
271 {
272 const int malign=32;
273 int fwidth;
274 int i /*,j*/;
275 float sum,x,y;
276 arta 1.1
277 pars->method=fresize_1d;
278 pars->nsub=nsub;
279 pars->hwidth=hwidth;
280 fwidth=2*hwidth+1;
281 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
282 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
283 sum=0.0f;
284 for (i=0;i<fwidth;i++) {
285 x=(i-hwidth)/sigma;
286 y=(float)exp(-x*x/2); /* There is expf (a float version), but not sure what impact this has. */
287 sum=sum+y;
288 }
289 for (i=0;i<fwidth;i++) {
290 x=(i-hwidth)/sigma;
291 y=(float)exp(-x*x/2);
292 pars->kerx[i]=y/sum;
293 pars->kery[i]=y/sum;
294 }
295
296 return 0;
297 arta 1.1 }
298
299 int init_fresize_gaussian_fft( // Simple square truncated Gaussian. FFT version.
300 struct fresize_struct *pars,
301 float sigma, // Shape is exp(-(d/sigma)^2/2)
302 int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
303 int nsub, // Distance between sampled points
304 int nxin, // Array size
305 int nyin // Array size
306 )
307 {
308 int status;
309
310 status=init_fresize_gaussian(pars,sigma,hwidth,nsub);
311 if (status != 0) return status;
312
313 status=make_fft1d(pars,nxin,nyin);
314 return status;
315 }
316
317 int init_fresize_sinc( // Sinc filter
318 arta 1.1 struct fresize_struct *pars,
319 float wsinc, /* Shape is sinc(d/wsinc)*ap(d)
320 wsinc is the amount by which the Nyquist is reduced.
321 May want wsinc=nsub. */
322 int hwidth, // Half width of kernel. Full is 2*hwidth+1.
323 int iap, /* Apodization method. Always ap=0 for d>nap*wsinc.
324 iap=0 means no apodization ap=1
325 iap=1 uses parabola ap=1-(d/(nap*wsinc))^2
326 iap=2 uses sinc ap=sinc(d/(nap*wsinc)) */
327 int nap, /* Sinc apodization width in units of wsinc.
328 Normally hwidth=nap*wsinc,
329 but hwidth=nap*wsinc-1 works for integer */
330 int nsub // Distance between sampled points
331 )
332 {
333 const int malign=32;
334 int fwidth;
335 int i /*,j*/;
336 float sum,x,y;
337
338 pars->method=fresize_1d;
339 arta 1.1 pars->nsub=nsub;
340 pars->hwidth=hwidth;
341 fwidth=2*hwidth+1;
342 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
343 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
344 sum=0.0f;
345 for (i=0;i<fwidth;i++) {
346 x=(float)(i-hwidth);
347 if (abs(x) > (nap*wsinc)) {
348 y=0.0f;
349 }
350 else {
351 y=(float)sinc(x/wsinc);
352 }
353 if (iap == 1) y=y*(1-(x/(nap*wsinc))*(x/(nap*wsinc)));
354 if (iap == 2) y=(float)(y*sinc(x/(nap*wsinc)));
355 pars->kerx[i]=y;
356 sum=sum+y;
357 }
358 for (i=0;i<fwidth;i++) {
359 pars->kerx[i]=pars->kerx[i]/sum;
360 arta 1.1 pars->kery[i]=pars->kerx[i];
361 }
362
363 return 0;
364 }
365
366 int init_fresize_sinc_fft( // Sinc filter. FFT version.
367 struct fresize_struct *pars,
368 float wsinc, /* Shape is sinc(d/wsinc)*ap(d)
369 wsinc is the amount by which the Nyquist is reduced.
370 May want wsinc=nsub. */
371 int hwidth, // Half width of kernel. Full is 2*hwidth+1.
372 int iap, /* Apodization method. Always ap=0 for d>nap*wsinc.
373 iap=0 means no apodization ap=1
374 iap=1 uses parabola ap=1-(d/(nap*wsinc))^2
375 iap=2 uses sinc ap=sinc(d/(nap*wsinc))
376 all other cases give ap=1 (not guaranteed) */
377 int nap, /* Sinc apodization width in units of wsinc.
378 Normally hwidth=nap*wsinc,
379 but hwidth=nap*wsinc-1 works for integer */
380 int nsub, // Distance between sampled points
381 arta 1.1 int nxin, // Array size
382 int nyin // Array size
383 )
384 {
385 int status;
386
387 status=init_fresize_sinc(pars,wsinc,hwidth,iap,nap,nsub);
388 if (status != 0) return status;
389
390 status=make_fft1d(pars,nxin,nyin);
391 return status;
392 }
393
394 int init_fresize_gaussian2( // Circularly truncated Gaussian
395 struct fresize_struct *pars,
396 float sigma, // Shape is exp(-(d/sigma)^2/2)
397 float rmax, // Truncation radius. Probably rmax<=hwidth.
398 int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
399 int nsub // Distance between sampled points
400 )
401 {
402 arta 1.1 const int malign=32;
403 int fwidth;
404 int i,j;
405 float sum,xi,xj,y,r2,rmax2;
406
407 pars->method=fresize_2d;
408 pars->nsub=nsub;
409 pars->hwidth=hwidth;
410 fwidth=2*hwidth+1;
411 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
412 rmax2=rmax*rmax/sigma/sigma;
413
414 sum=0.0f;
415 for (j=0;j<fwidth;j++) {
416 xj=(j-hwidth)/sigma;
417 for (i=0;i<fwidth;i++) {
418 xi=(i-hwidth)/sigma;
419 r2=xi*xi+xj*xj;
420 if (r2 <= rmax2 ) {
421 y=(float)exp(-r2/2);
422 }
423 arta 1.1 else {
424 y=0.0f;
425 }
426 pars->ker[fwidth*j+i]=y;
427 sum=sum+y;
428 }
429 }
430 // Normalize kernel
431 for (j=0;j<fwidth;j++) {
432 for (i=0;i<fwidth;i++) {
433 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
434 }
435 }
436
437 return 0;
438 }
439
440 int init_fresize_gaussian2_fft( // Circularly truncated Gaussian. FFT version.
441 struct fresize_struct *pars,
442 float sigma, // Shape is exp(-(d/sigma)^2/2)
443 float rmax, // Truncation radius. Probably rmax<=hwidth.
444 arta 1.1 int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
445 int nsub, // Distance between sampled points
446 int nxin, // Input size
447 int nyin // Input size
448 )
449 {
450 int status;
451
452 status=init_fresize_gaussian2(pars,sigma,rmax,hwidth,nsub);
453 if (status != 0) return status;
454
455 status=make_fft2d(pars,nxin,nyin);
456 return status;
457 }
458
459 int init_fresize_airy( // 2D Airy filter
460 struct fresize_struct *pars,
461 float cdown, /* cdown is the amount by which the Nyquist is reduced. */
462 int hwidth, /* Half width of kernel. Full is 2*hwidth+1.
463 Set to <0 to make routine set appropriate value */
464 int iap, /* Apodization method. Always ap=0 for d>Z_nap, where Z_nap os
465 arta 1.1 the position of the nap'th zero.
466 iap=0 means no apodization ap=1
467 iap=1 uses parabola ap=1-(d/Z_nap)^2
468 iap=2 uses sinc ap=sinc(d/(Z_nap))
469 iap=3 uses Airy with first zero at Z_nap
470 all other cases give ap=1 (not guaranteed) */
471 int nap, /* Apodizes to nap'th zero */
472 int nsub // Distance between sampled points
473 )
474 {
475 const int malign=32;
476 const float beszeros[20]={0.0000000,3.8317060,7.0155867,10.173468,13.323692,16.470630,19.615859,22.760084,25.903672,29.046829,32.189680,35.332308,38.474766,41.617094,44.759319,47.901461,51.043535,54.185554,57.327525,60.469458}; // first 20 zeros in Bessel function
477 float cb;
478 int fwidth;
479 int i,j;
480 float sum,xi,xj;
481 float r2,r,rc,airy,ra;
482 float nzero;
483
484 cb=(float)(cdown/M_PI); // Amount to scale Bessel function to get zeros right.
485 nzero=beszeros[nap]*cb; // Position of nap'th zero
486 arta 1.1 if (hwidth < 0) hwidth=(float)floor(nzero);
487
488 pars->method=fresize_2d;
489 pars->nsub=nsub;
490 pars->hwidth=hwidth;
491 fwidth=2*hwidth+1;
492 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
493
494 sum=0.0f;
495 for (j=0;j<fwidth;j++) {
496 xj=(float)(j-hwidth);
497 for (i=0;i<fwidth;i++) {
498 xi=(float)(i-hwidth);
499 r2=xi*xi+xj*xj;
500 r=(float)maxval(sqrt(r2),1e-20);
501 rc=r/cb;
502 if (r <= nzero ) {
503 airy=j1f(rc)/rc;
504 }
505 else {
506 airy=0.0f;
507 arta 1.1 }
508 ra=r/nzero;
509 if (iap == 1) airy=airy*(1-ra*ra);
510 if (iap == 2) airy=(float)(airy*sinc(ra));
511 if (iap == 3) {
512 ra=rc*beszeros[1]/beszeros[nap];
513 airy=airy*j1f(ra)/ra;
514 }
515 pars->ker[fwidth*j+i]=airy;
516 sum=sum+airy;
517 }
518 }
519 // Normalize kernel
520 for (j=0;j<fwidth;j++) {
521 for (i=0;i<fwidth;i++) {
522 pars->ker[fwidth*j+i]=pars->ker[fwidth*j+i]/sum;
523 }
524 }
525
526
527 return 0;
528 arta 1.1 }
529
530 int init_fresize_airy_fft( // 2D Airy filter. FFT version.
531 struct fresize_struct *pars,
532 float cdown, /* cdown is the amount by which the Nyquist is reduced. */
533 int hwidth, /* Half width of kernel. Full is 2*hwidth+1.
534 Set to <0 to make routine set appropriate value */
535 int iap, /* Apodization method. Always ap=0 for d>Z_nap, where Z_nap os
536 the position of the nap'th zero.
537 iap=0 means no apodization ap=1
538 iap=1 uses parabola ap=1-(d/Z_nap)^2
539 iap=2 uses sinc ap=sinc(d/(Z_nap))
540 iap=3 uses Airy with first zero at Z_nap
541 all other cases give ap=1 (not guaranteed) */
542 int nap, /* Apodizes to nap'th zero */
543 int nsub, // Distance between sampled points
544 int nxin, // Input size
545 int nyin // Input size
546 )
547 {
548 int status;
549 arta 1.1
550 status=init_fresize_airy(pars,cdown,hwidth,iap,nap,nsub);
551 if (status != 0) return status;
552
553 status=make_fft2d(pars,nxin,nyin);
554 return status;
555 }
556
557 int free_fresize(
558 struct fresize_struct *pars
559 )
560 {
561
562 if (pars->method==fresize_1d) {
563 MKL_free (pars->kerx);
564 MKL_free (pars->kery);
565 }
566
567 if (pars->method==fresize_2d) {
568 MKL_free (pars->ker);
569 }
570 arta 1.1
571 if (pars->method==fresize_1d_fft) {
572 MKL_free (pars->kerx);
573 MKL_free (pars->kery);
574 fftwf_free(pars->helpin);
575 fftwf_free(pars->fkernel);
576 fftwf_free(pars->fkernely);
577 fftwf_free(pars->helpc);
578 fftwf_free(pars->helpout);
579 fftwf_destroy_plan(pars->plan1);
580 fftwf_destroy_plan(pars->plan2);
581 fftwf_destroy_plan(pars->plan1y);
582 fftwf_destroy_plan(pars->plan2y);
583 }
584
585 if (pars->method==fresize_2d_fft) {
586 MKL_free (pars->ker);
587 fftwf_free(pars->helpin);
588 fftwf_free(pars->fkernel);
589 fftwf_free(pars->helpc);
590 fftwf_free(pars->helpout);
591 arta 1.1 fftwf_destroy_plan(pars->plan1);
592 fftwf_destroy_plan(pars->plan2);
593 }
594
595 return 0;
596 }
597
598 int fsample(
599 float *image_in,
600 float *image_out,
601 int nxin,
602 int nyin,
603 int nleadin,
604 int nxout,
605 int nyout,
606 int nleadout,
607 int nsub,
608 int xoff,
609 int yoff,
610 float fillval
611 )
612 arta 1.1
613 {
614 int i,j;
615 int imin,imax,jmin,jmax;
616 float *imp;
617
|
618 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
619 // for which resulting input point is within image
|
620 arta 1.1 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
621 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
622 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
623 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
624
|
625 schou 1.5 // Also ensure that indices are within output array
626
627 imin=maxval(0,minval(imin,nxout-1));
628 imax=maxval(0,minval(imax,nxout-1));
629 jmin=maxval(0,minval(jmin,nyout-1));
630 jmax=maxval(0,minval(jmax,nyout-1));
631
|
632 arta 1.1 imp=image_in+yoff*nleadin+xoff;
633
634 #pragma omp parallel default(none) \
635 private(i,j) \
636 shared(image_in,image_out,imp,fillval) \
637 shared(imin,imax,jmin,jmax) \
638 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
639 { // Needed to define parallel region
640
641 // Fill below valid region
642 #pragma omp for
643 for (j=0; j<jmin; j++) {
644 for (i=0; i<nxout; i++) {
645 image_out[j*nleadout+i]=fillval;
646 } // i=
647 } //j=
648
649 // Valid heights
650 #pragma omp for
651 for (j=jmin; j<=jmax; j++) {
652 // Fill to the left
653 arta 1.1 for (i=0; i<imin; i++) {
654 image_out[j*nleadout+i]=fillval;
655 } // i=
656 // Valid region
657 for (i=imin; i<=imax; i++) {
658 // image_out[j*nleadout+i]=image_in[j*nsub*nleadin+i*nsub];
659 image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
660 } // i=
661 // Fill to the right
662 for (i=imax+1; i<nxout; i++) {
663 image_out[j*nleadout+i]=fillval;
664 } // i=
665 } //j=
666
667 // Fill above valid region
668 #pragma omp for
669 for (j=jmax+1; j<nyout; j++) {
670 for (i=0; i<nxout; i++) {
671 image_out[j*nleadout+i]=fillval;
672 } // i=
673 } //j=
674 arta 1.1
675 } // End of parallel region
676
677 return 0;
678 }
679
680 int fbin(
681 float *image_in,
682 float *image_out,
683 int nxin,
684 int nyin,
685 int nleadin,
686 int nxout,
687 int nyout,
688 int nleadout,
689 int nsub,
690 int xoff,
691 int yoff,
692 float fillval
693 )
694
695 arta 1.1 {
696 int i,j,i1,j1;
697 int imin,imax,jmin,jmax;
698 float *imp,*impi;
699 float sum;
700
|
701 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
702 // for which resulting input point is within image
|
703 arta 1.1 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
704 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
705 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
706 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
707
|
708 schou 1.5 // Also ensure that indices are within output array
709
710 imin=maxval(0,minval(imin,nxout-1));
711 imax=maxval(0,minval(imax,nxout-1));
712 jmin=maxval(0,minval(jmin,nyout-1));
713 jmax=maxval(0,minval(jmax,nyout-1));
714
|
715 arta 1.1 imp=image_in+yoff*nleadin+xoff;
716
717 #pragma omp parallel default(none) \
718 private(i,j,i1,j1,impi,sum) \
719 shared(image_in,image_out,imp,fillval) \
720 shared(imin,imax,jmin,jmax) \
721 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
722 { // Needed to define parallel region
723
724 // Fill below valid region
725 #pragma omp for
726 for (j=0; j<jmin; j++) {
727 for (i=0; i<nxout; i++) {
728 image_out[j*nleadout+i]=fillval;
729 } // i=
730 } //j=
731
732 // Valid heights
733 #pragma omp for
734 for (j=jmin; j<=jmax; j++) {
735 // Fill to the left
736 arta 1.1 for (i=0; i<imin; i++) {
737 image_out[j*nleadout+i]=fillval;
738 } // i=
739 // Valid region
740 for (i=imin; i<=imax; i++) {
741 impi=imp+j*nleadin*nsub+i*nsub;
742 sum=0.0f;
743 for (j1=0; j1<nsub; j1++) {
744 for (i1=0; i1<nsub; i1++) {
745 sum=sum+impi[i1];
746 }
747 impi=impi+nleadin;
748 }
749 image_out[j*nleadout+i]=sum/(nsub*nsub);
750 } // i=
751 // Fill to the right
752 for (i=imax+1; i<nxout; i++) {
753 image_out[j*nleadout+i]=fillval;
754 } // i=
755 } //j=
756
757 arta 1.1 // Fill above valid region
758 #pragma omp for
759 for (j=jmax+1; j<nyout; j++) {
760 for (i=0; i<nxout; i++) {
761 image_out[j*nleadout+i]=fillval;
762 } // i=
763 } //j=
764
765 } // End of parallel region
766
767 return 0;
768 }
769
770 int f1d(
771 struct fresize_struct *pars,
772 float *image_in,
773 float *image_out,
774 int nxin,
775 int nyin,
776 int nleadin,
777 int nxout,
778 arta 1.1 int nyout,
779 int nleadout,
780 int xoff,
781 int yoff,
782 float fillval
783 )
784 {
785 const int malign=32;
786 /*char transpose[] = "t"; */
787 char normal[] = "n";
788 const int ione = 1;
789 const float fone = 1.0f;
790 const float fzero = 0.0f;
791 int i,j,i1 /*,j1*/;
792 int imin,imax,jmin,jmax;
793 float /**inp*,*/ *inpi,*inpj;
794 float sum;
795 int nsub,hwidth;
796 float *kerx,*kery,*work;
797 int xoffl,xoffh,yoffl,yoffh;
798 int nwork;
799 arta 1.1 /*double t1,t2,t3;*/
800 int n1,n2 /*,nchunk*/;
801
802 nsub=pars->nsub;
803 hwidth=pars->hwidth;
804 kerx=pars->kerx;
805 kery=pars->kery;
806
|
807 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
808 // for which resulting input point is within image
|
809 arta 1.1 xoffl=xoff-hwidth;
810 xoffh=xoff+hwidth;
811 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
812 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
813
814 yoffl=yoff-hwidth;
815 yoffh=yoff+hwidth;
816 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
817 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
818
|
819 schou 1.5 // Also ensure that indices are within output array
820
821 imin=maxval(0,minval(imin,nxout-1));
822 imax=maxval(0,minval(imax,nxout-1));
823 jmin=maxval(0,minval(jmin,nyout-1));
824 jmax=maxval(0,minval(jmax,nyout-1));
825
|
826 arta 1.1 // Get work array big enough to hold convolution in x
827 nwork=nxout*nyin;
828 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
829
830 /*nchunk=64;*/
831 n1=(imax-imin+1); // Size of matrix for sgemv
832 n2=2*hwidth+1; // Size of matrix for sgemv
833 /*t1=dsecnd();*/
834
835 #pragma omp parallel default(none) \
836 private(i,j,i1,/*j1,*/inpi,inpj,sum) \
837 shared(image_in,image_out,fillval) \
838 shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
839 shared(normal,/*transpose,*/n1,n2/*,nchunk*/) \
840 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
841 { // Needed to define parallel region
842
843 // First convolve in x
844 // Brute force loop takes longer than it ought to, but have not
845 // found an obvious solution.
846 #pragma omp for
847 arta 1.1 for (j=0;j<nyin;j++) {
848 inpj=image_in+j*nleadin+xoffl; // Note offset to match kernels.
849 for (i=imin; i<=imax; i++) {
850 sum=0.0f;
851 inpi=inpj+i*nsub;
852 for (i1=0; i1<=2*hwidth; i1++) {
853 sum=sum+kerx[i1]*inpi[i1];
854 }
855 work[j*nleadout+i]=sum;
856 }
857 }
858
859 // Then convolve in y
860
|
861 schou 1.5 // Code using sgemv
862 if (n1 > 0) {
|
863 arta 1.1 #pragma omp for
|
864 schou 1.5 for (j=jmin; j<=jmax; j++) {
865 inpj=work+(yoffl+j*nsub)*nleadout;
866 sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
|
867 arta 1.1 }
868 }
869
870 // Fill below valid region
871 #pragma omp for
872 for (j=0; j<jmin; j++) {
873 for (i=0; i<nxout; i++) {
874 image_out[j*nleadout+i]=fillval;
875 } // i=
876 } //j=
877
878 // Valid heights
879 #pragma omp for
880 for (j=jmin; j<=jmax; j++) {
881 // Fill to the left
882 for (i=0; i<imin; i++) {
883 image_out[j*nleadout+i]=fillval;
884 } // i=
885 // Fill to the right
886 for (i=imax+1; i<nxout; i++) {
887 image_out[j*nleadout+i]=fillval;
888 arta 1.1 } // i=
889 } //j=
890
891 // Fill above valid region
892 #pragma omp for
893 for (j=jmax+1; j<nyout; j++) {
894 for (i=0; i<nxout; i++) {
895 image_out[j*nleadout+i]=fillval;
896 } // i=
897 } //j=
898
899 } // End of parallel region
900
901 MKL_free(work);
902 return 0;
903 }
904
905 int f1d_fft(
906 struct fresize_struct *pars,
907 float *image_in,
908 float *image_out,
909 arta 1.1 int nxin,
910 int nyin,
911 int nleadin,
912 int nxout,
913 int nyout,
914 int nleadout,
915 int xoff,
916 int yoff,
917 float fillval
918 )
919 {
920 const int malign=32;
921 int i,j,i1 /*,j1*/;
922 int imin,imax,jmin,jmax;
923 float /**inp,*inpi,*/ *inpj;
924 /*float sum;*/
925 int nsub,hwidth;
926 float /**kerx,*kery,*/ *work;
927 int xoffl,xoffh,yoffl,yoffh;
928 int nwork;
929 double t1, /*t2,t3,*/ t4;
930 arta 1.1 int /*n1,n2,*/ nchunk;
931 fftwf_complex *helpc,*fkernel,*fkernely;
932 float *helpin,*helpout;
933 int nxinp,nyinp;
934 float *iwork,*owork;
935
936 if (pars->nxin != nxin) return 1;
937 if (pars->nyin != nyin) return 1;
938
939 nsub=pars->nsub;
940 hwidth=pars->hwidth;
941 /*kerx=pars->kerx;*/
942 /*kery=pars->kery;*/
943 helpc=pars->helpc;
944 fkernel=pars->fkernel;
945 fkernely=pars->fkernely;
946 helpin=pars->helpin;
947 helpout=pars->helpout;
948 nxinp=pars->nxinp;
949 nyinp=pars->nyinp;
950
|
951 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
952 // for which resulting input point is within image
|
953 arta 1.1 xoffl=xoff-hwidth;
954 xoffh=xoff+hwidth;
955 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
956 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
957
958 yoffl=yoff-hwidth;
959 yoffh=yoff+hwidth;
960 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
961 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
962
|
963 schou 1.5 // Also ensure that indices are within output array
964
965 imin=maxval(0,minval(imin,nxout-1));
966 imax=maxval(0,minval(imax,nxout-1));
967 jmin=maxval(0,minval(jmin,nyout-1));
968 jmax=maxval(0,minval(jmax,nyout-1));
969
|
970 arta 1.1 // Get work array big enough to hold convolution in x
971 nwork=nxout*nyin;
972 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
973
974 nchunk=64;
975 iwork=(float *)(MKL_malloc(nchunk*nyin*sizeof(float),malign));
976 owork=(float *)(MKL_malloc(nchunk*nyout*sizeof(float),malign));
977
978 // No OpenMP yet, but have left statements in for good measure.
979 //#pragma omp parallel default(none) \
980 //private(i,j,i1,j1,inpi,inpj,sum) \
981 //shared(image_in,image_out,fillval) \
982 //shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) \
983 //shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
984 { // Needed to define parallel region
985
986 t1=dsecnd();
987 // First convolve in x
988 //#pragma omp for
989 for (j=0;j<nyin;j++) {
990 inpj=image_in+j*nleadin;
991 arta 1.1 for (i=0;i<nxin;i++) helpin[i]=inpj[i];
992 fftwf_execute(pars->plan1);
993 for (i=0; i<nxinp; i++) helpc[i]=fkernel[i]*helpc[i];
994 fftwf_execute(pars->plan2);
995 for (i=imin; i<=imax; i++) work[j*nleadout+i]=helpout[i*nsub+xoff];
996 }
997 t1=dsecnd()-t1;
998
999 // Then convolve in y
1000
1001 // Block both the input output arrays.
1002 t4=dsecnd();
1003 //#pragma omp for
1004 for (i1=imin;i1<=imax;i1=i1+nchunk) {
1005 for (j=0;j<nyin;j++) {
1006 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1007 iwork[(i-i1)*nyin+j]=work[j*nleadout+i];
1008 }
1009 }
1010 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1011 for (j=0;j<nyin;j++) helpin[j]=iwork[(i-i1)*nyin+j];
1012 arta 1.1 fftwf_execute(pars->plan1y);
1013 for (j=0; j<nyinp; j++) helpc[j]=fkernely[j]*helpc[j];
1014 fftwf_execute(pars->plan2y);
1015 for (j=jmin; j<=jmax; j++) owork[j+(i-i1)*nyout]=helpout[j*nsub+yoff];
1016 }
1017 for (j=jmin; j<=jmax; j++) {
1018 for (i=i1; i<=minval(i1+nchunk-1,imax); i++) {
1019 image_out[j*nleadout+i]=owork[j+(i-i1)*nyout];
1020 }
1021 }
1022 }
1023 t4=dsecnd()-t4;
1024
1025 // Fill below valid region
1026 //#pragma omp for
1027 for (j=0; j<jmin; j++) {
1028 for (i=0; i<nxout; i++) {
1029 image_out[j*nleadout+i]=fillval;
1030 } // i=
1031 } //j=
1032
1033 arta 1.1 // Valid heights
1034 //#pragma omp for
1035 for (j=jmin; j<=jmax; j++) {
1036 // Fill to the left
1037 for (i=0; i<imin; i++) {
1038 image_out[j*nleadout+i]=fillval;
1039 } // i=
1040 // Fill to the right
1041 for (i=imax+1; i<nxout; i++) {
1042 image_out[j*nleadout+i]=fillval;
1043 } // i=
1044 } //j=
1045
1046 // Fill above valid region
1047 //#pragma omp for
1048 for (j=jmax+1; j<nyout; j++) {
1049 for (i=0; i<nxout; i++) {
1050 image_out[j*nleadout+i]=fillval;
1051 } // i=
1052 } //j=
1053
1054 arta 1.1 } // End of parallel region
1055
1056 MKL_free(iwork);
1057 MKL_free(owork);
|
1058 schou 1.4 MKL_free(work);
|
1059 arta 1.1
1060 return 0;
1061 }
1062
1063 int f2d(
1064 struct fresize_struct *pars,
1065 float *image_in,
1066 float *image_out,
1067 int nxin,
1068 int nyin,
1069 int nleadin,
1070 int nxout,
1071 int nyout,
1072 int nleadout,
1073 int xoff,
1074 int yoff,
1075 float fillval
1076 )
1077
1078 {
1079 /*const int malign=32;*/
1080 arta 1.1 /*char transpose[] = "t";*/
1081 /*char normal[] = "n";*/
1082 /*const int ione = 1;*/
1083 /*const float fone = 1.0f;*/
1084 /*const float fzero = 0.0f;*/
1085 int i,j,i1,j1;
1086 int imin,imax,jmin,jmax;
1087 float *inpi,*kerp;
1088 float sum;
1089 int nsub,hwidth,fwidth;
1090 float *ker;
1091 int xoffl,xoffh,yoffl,yoffh;
1092 /*double t1,t2,t3;*/
1093
1094 nsub=pars->nsub;
1095 hwidth=pars->hwidth;
1096 ker=pars->ker;
1097 fwidth=2*hwidth+1;
1098
|
1099 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1100 // for which resulting input point is within image
|
1101 arta 1.1 xoffl=xoff-hwidth;
1102 xoffh=xoff+hwidth;
1103 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1104 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1105
1106 yoffl=yoff-hwidth;
1107 yoffh=yoff+hwidth;
1108 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1109 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1110
|
1111 schou 1.5 // Also ensure that indices are within output array
1112
1113 imin=maxval(0,minval(imin,nxout-1));
1114 imax=maxval(0,minval(imax,nxout-1));
1115 jmin=maxval(0,minval(jmin,nyout-1));
1116 jmax=maxval(0,minval(jmax,nyout-1));
|
1117 arta 1.1
1118 /*t1=dsecnd();*/
1119
1120 #pragma omp parallel default(none) \
1121 private(i,j,i1,j1,inpi,kerp,sum) \
1122 shared(image_in,image_out,fillval) \
1123 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
1124 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
1125 { // Needed to define parallel region
1126
1127 #pragma omp for
1128 for (j=jmin; j<=jmax; j++) {
1129 for (i=imin; i<=imax; i++) {
1130 inpi=image_in+(j*nsub+yoffl)*nleadin+i*nsub+xoffl; // Note offset to match kernels.
1131 sum=0.0f;
1132 for (j1=0; j1<=2*hwidth; j1++) {
1133 kerp=ker+j1*fwidth;
1134 for (i1=0; i1<=2*hwidth; i1++) {
1135 sum=sum+kerp[i1]*inpi[i1];
1136 }
1137 inpi=inpi+nleadin;
1138 arta 1.1 }
1139 image_out[j*nleadout+i]=sum;
1140 }
1141 }
1142
1143 // Fill below valid region
1144 #pragma omp for
1145 for (j=0; j<jmin; j++) {
1146 for (i=0; i<nxout; i++) {
1147 image_out[j*nleadout+i]=fillval;
1148 } // i=
1149 } //j=
1150
1151 // Valid heights
1152 #pragma omp for
1153 for (j=jmin; j<=jmax; j++) {
1154 // Fill to the left
1155 for (i=0; i<imin; i++) {
1156 image_out[j*nleadout+i]=fillval;
1157 } // i=
1158 // Fill to the right
1159 arta 1.1 for (i=imax+1; i<nxout; i++) {
1160 image_out[j*nleadout+i]=fillval;
1161 } // i=
1162 } //j=
1163
1164 // Fill above valid region
1165 #pragma omp for
1166 for (j=jmax+1; j<nyout; j++) {
1167 for (i=0; i<nxout; i++) {
1168 image_out[j*nleadout+i]=fillval;
1169 } // i=
1170 } //j=
1171
1172 } // End of parallel region
1173
1174 return 0;
1175 }
1176
1177 // this version uses matrix vector multiplys. Not much faster.
1178 int f2d_mat(
1179 struct fresize_struct *pars,
1180 arta 1.1 float *image_in,
1181 float *image_out,
1182 int nxin,
1183 int nyin,
1184 int nleadin,
1185 int nxout,
1186 int nyout,
1187 int nleadout,
1188 int xoff,
1189 int yoff,
1190 float fillval
1191 )
1192
1193 {
1194 const int malign=32;
1195 char transpose[] = "t";
1196 /*char normal[] = "n";*/
1197 const int ione = 1;
1198 const float fone = 1.0f;
1199 /*const float fzero = 0.0f;*/
1200 int i,j,/*i1,*/j1;
1201 arta 1.1 int imin,imax,jmin,jmax;
1202 float *inpi,*kerp;
1203 /*float sum;*/
1204 int nsub,hwidth,fwidth;
1205 float *ker;
1206 int xoffl,xoffh,yoffl,yoffh;
1207 /*double t1,t2,t3;*/
1208 int /*n1,*/n2,nchunk,jc,nc;
1209 float *work;
1210
1211 nsub=pars->nsub;
1212 hwidth=pars->hwidth;
1213 ker=pars->ker;
1214 fwidth=2*hwidth+1;
1215
|
1216 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1217 // for which resulting input point is within image
|
1218 arta 1.1 xoffl=xoff-hwidth;
1219 xoffh=xoff+hwidth;
1220 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1221 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1222
1223 yoffl=yoff-hwidth;
1224 yoffh=yoff+hwidth;
1225 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1226 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1227
|
1228 schou 1.5 // Also ensure that indices are within output array
1229
1230 imin=maxval(0,minval(imin,nxout-1));
1231 imax=maxval(0,minval(imax,nxout-1));
1232 jmin=maxval(0,minval(jmin,nyout-1));
1233 jmax=maxval(0,minval(jmax,nyout-1));
|
1234 arta 1.1
1235 /*t1=dsecnd();*/
1236
1237 n2=nleadin*nsub;
1238 nchunk=64;
1239
1240 #pragma omp parallel default(none) \
1241 private(i,j,/*i1,*/j1,inpi,kerp,/*sum,*/work,nc) \
1242 shared(/*normal,*/transpose,/*n1,*/n2,nchunk) \
1243 shared(image_in,image_out,fillval) \
1244 shared(imin,imax,jmin,jmax,hwidth,fwidth,ker,xoffl,yoffl) \
1245 shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
1246 { // Needed to define parallel region
1247
1248 work=(float *)(MKL_malloc(nyout*sizeof(float),malign));
1249 #pragma omp for
1250 for (j=jmin; j<=jmax; j++) {
1251 for (i=imin; i<=imax; i++) {
1252 image_out[j*nleadout+i]=0.0f;
1253 }
1254 }
1255 arta 1.1
1256 #pragma omp for
1257 for (jc=jmin;jc<=jmax;jc=jc+nchunk) {
1258 nc=minval(jc+nchunk-1,jmax)-jc+1;
1259 for (j1=0; j1<=2*hwidth; j1++) {
1260 kerp=ker+j1*fwidth;
1261 for (i=imin; i<=imax; i++) {
1262 inpi=image_in+(jc*nsub+yoffl+j1)*nleadin+i*nsub+xoffl;
1263 sgemv(transpose,&fwidth,&nc,&fone,inpi,&n2,kerp,&ione,&fone,image_out+jc*nleadout+i,&nleadout);
1264 }
1265 }
1266 }
1267
1268 // Fill below valid region
1269 #pragma omp for
1270 for (j=0; j<jmin; j++) {
1271 for (i=0; i<nxout; i++) {
1272 image_out[j*nleadout+i]=fillval;
1273 } // i=
1274 } //j=
1275
1276 arta 1.1 // Valid heights
1277 #pragma omp for
1278 for (j=jmin; j<=jmax; j++) {
1279 // Fill to the left
1280 for (i=0; i<imin; i++) {
1281 image_out[j*nleadout+i]=fillval;
1282 } // i=
1283 // Fill to the right
1284 for (i=imax+1; i<nxout; i++) {
1285 image_out[j*nleadout+i]=fillval;
1286 } // i=
1287 } //j=
1288
1289 // Fill above valid region
1290 #pragma omp for
1291 for (j=jmax+1; j<nyout; j++) {
1292 for (i=0; i<nxout; i++) {
1293 image_out[j*nleadout+i]=fillval;
1294 } // i=
1295 } //j=
1296
1297 arta 1.1 MKL_free(work);
1298 } // End of parallel region
1299
1300 return 0;
1301 }
1302
1303 int f2d_fft(
1304 struct fresize_struct *pars,
1305 float *image_in,
1306 float *image_out,
1307 int nxin,
1308 int nyin,
1309 int nleadin,
1310 int nxout,
1311 int nyout,
1312 int nleadout,
1313 int xoff,
1314 int yoff,
1315 float fillval
1316 )
1317 {
1318 arta 1.1 fftwf_complex *helpc,*fkernel;
1319 float *helpin,*helpout;
1320 fftwf_plan plan1,plan2;
1321 int nxinp; // Complex array size
1322 int hwidth/*,fwidth*/;
1323 int i,j,/*i1,*/j1;
1324 /*float c;*/
1325 int imin,imax,jmin,jmax;
1326 int xoffl,xoffh,yoffl,yoffh;
1327 int nsub;
1328
1329 if (pars->nxin != nxin) return 1;
1330 if (pars->nyin != nyin) return 1;
1331
1332 nsub=pars->nsub;
1333 hwidth=pars->hwidth;
1334 /*fwidth=2*hwidth+1;*/
1335 nxinp=(nxin/2+1);
1336
1337 helpin=pars->helpin;
1338 fkernel=pars->fkernel;
1339 arta 1.1 helpc=pars->helpc;
1340 helpout=pars->helpout;
1341 plan1=pars->plan1;
1342 plan2=pars->plan2;
1343
1344 /* Now copy and transform input image */
1345
1346 for (j=0;j<nyin;j++) {
1347 for (i=0;i<nxin;i++) {
1348 helpin[j*nxin+i]=image_in[j*nleadin+i];
1349 }
1350 }
1351
1352 fftwf_execute(plan1);
1353
1354 /* Multiply by kernel transform */
1355
1356 for (j=0;j<nyin;j++) {
1357 for (i=0;i<nxinp;i++) {
1358 helpc[j*nxinp+i]=fkernel[j*nxinp+i]*helpc[j*nxinp+i];
1359 }
1360 arta 1.1 }
1361
1362 fftwf_execute(plan2);
1363
1364 // Copy data to output array
1365
|
1366 schou 1.5 // Find first ([ij]min) and last ([ij]max) indices in output image
1367 // for which resulting input point is within image
|
1368 arta 1.1 xoffl=xoff-hwidth;
1369 xoffh=xoff+hwidth;
1370 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
1371 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
1372
1373 yoffl=yoff-hwidth;
1374 yoffh=yoff+hwidth;
1375 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
1376 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
1377
|
1378 schou 1.5 // Also ensure that indices are within output array
1379
1380 imin=maxval(0,minval(imin,nxout-1));
1381 imax=maxval(0,minval(imax,nxout-1));
1382 jmin=maxval(0,minval(jmin,nyout-1));
1383 jmax=maxval(0,minval(jmax,nyout-1));
1384
|
1385 arta 1.1 for (j=jmin; j<=jmax; j++) {
1386 j1=(j*nsub+yoff)*nxin+xoff;
1387 for (i=imin; i<=imax; i++) {
1388 image_out[j*nleadout+i]=helpout[j1+i*nsub];
1389 }
1390 }
1391
1392 // Fill below valid region
1393 for (j=0; j<jmin; j++) {
1394 for (i=0; i<nxout; i++) {
1395 image_out[j*nleadout+i]=fillval;
1396 } // i=
1397 } //j=
1398
1399 // Valid heights
1400 for (j=jmin; j<=jmax; j++) {
1401 // Fill to the left
1402 for (i=0; i<imin; i++) {
1403 image_out[j*nleadout+i]=fillval;
1404 } // i=
1405 // Fill to the right
1406 arta 1.1 for (i=imax+1; i<nxout; i++) {
1407 image_out[j*nleadout+i]=fillval;
1408 } // i=
1409 } //j=
1410
1411 // Fill above valid region
1412 for (j=jmax+1; j<nyout; j++) {
1413 for (i=0; i<nxout; i++) {
1414 image_out[j*nleadout+i]=fillval;
1415 } // i=
1416 } //j=
1417
1418 return 0;
1419 }
1420
1421 int fresize(
1422 struct fresize_struct *pars,
1423 float *image_in,
1424 float *image_out,
1425 int nxin,
1426 int nyin,
1427 arta 1.1 int nleadin,
1428 int nx,
1429 int ny,
1430 int nlead,
1431 int xoff,
1432 int yoff,
1433 float fillval
1434 )
1435 {
1436 int status;
1437
1438 switch (pars->method) {
1439
1440 case fresize_sample:
1441 status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
1442 break;
1443
1444 case fresize_bin:
1445 status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
1446 break;
1447
1448 arta 1.1 case fresize_1d:
1449 status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1450 break;
1451
1452 case fresize_1d_fft:
1453 status=f1d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1454 break;
1455
1456 case fresize_2d:
1457 status=f2d_mat(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1458 break;
1459
1460 case fresize_2d_fft:
1461 status=f2d_fft(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
1462 break;
1463
1464 default:
1465 status=1;
1466 break;
1467 }
1468
1469 arta 1.1 return status;
1470
1471 }
1472
|
1473 schou 1.3 int init_fresize_user(
1474 struct fresize_struct *pars,
1475 int hwidth, // Half width of kernel. Full is 2*hwidth+1.
1476 int nsub, // Distance between sampled points
1477 float *user_ker // User specified kernel to convolve with.
1478 // Must be of size (2*hwidth+1) x (2*hwidth+1).
1479 // Kernel need not be and will not be normalized.
1480 )
1481 {
1482 const int malign=32;
1483 int fwidth;
1484 int i,j;
1485
1486 pars->method=fresize_2d;
1487 pars->nsub=nsub;
1488 pars->hwidth=hwidth;
1489 fwidth=2*hwidth+1;
1490 pars->ker=(float *)(MKL_malloc(fwidth*fwidth*sizeof(float),malign));
1491 memcpy(pars->ker,user_ker,sizeof(float)*fwidth*fwidth);
1492
1493 return 0;
1494 schou 1.3 }
1495
|