00001 #include <stdio.h>
00002
00003 #include <stdlib.h>
00004
00005 #include <time.h>
00006
00007 #include <string.h>
00008
00009 #include <math.h>
00010
00011 #include <mkl_blas.h>
00012
00013 #include <mkl_service.h>
00014
00015 #include <mkl_lapack.h>
00016
00017 #include <mkl_vml_functions.h>
00018
00019 #include <omp.h>
00020
00021 #include "fresize.h"
00022
00023 #define minval(x,y) (((x) < (y)) ? (x) : (y))
00024
00025 #define maxval(x,y) (((x) < (y)) ? (y) : (x))
00026
00027 #define fresize_sample 1
00028
00029 #define fresize_bin 2
00030
00031 #define fresize_1d 3
00032
00033 #define fresize_2d 4
00034
00035
00036
00037 double sinc(double x)
00038
00039 {
00040
00041 if (fabs(x) < (1.e-10))
00042
00043 return 1.;
00044
00045 else
00046
00047 return sin(M_PI*x)/(M_PI*x);
00048
00049 }
00050
00051
00052
00053 int init_fresize_sample(
00054
00055 struct fresize_struct *pars,
00056
00057 int nsub
00058
00059 )
00060
00061 {
00062
00063
00064
00065 pars->method=fresize_sample;
00066
00067 pars->nsub=nsub;
00068
00069
00070
00071 return 0;
00072
00073 }
00074
00075
00076
00077 int init_fresize_bin(
00078
00079 struct fresize_struct *pars,
00080
00081 int nsub
00082
00083 )
00084
00085 {
00086
00087
00088
00089 pars->method=fresize_bin;
00090
00091 pars->nsub=nsub;
00092
00093
00094
00095 return 0;
00096
00097 }
00098
00099
00100
00101 int init_fresize_boxcar(
00102
00103 struct fresize_struct *pars,
00104
00105 int hwidth,
00106
00107 int nsub
00108
00109 )
00110
00111 {
00112
00113 const int malign=32;
00114
00115 int fwidth;
00116
00117 int i,j;
00118
00119
00120
00121 pars->method=fresize_1d;
00122
00123 pars->nsub=nsub;
00124
00125 pars->hwidth=hwidth;
00126
00127 fwidth=2*hwidth+1;
00128
00129 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00130
00131 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00132
00133 for (i=0;i<fwidth;i++) {
00134
00135 pars->kerx[i]=1.0f/fwidth;
00136
00137 pars->kery[i]=1.0f/fwidth;
00138
00139 }
00140
00141
00142
00143 return 0;
00144
00145 }
00146
00147
00148
00149 int init_fresize_gaussian(
00150
00151 struct fresize_struct *pars,
00152
00153 float sigma,
00154
00155 int hwidth,
00156
00157 int nsub
00158
00159 )
00160
00161 {
00162
00163 const int malign=32;
00164
00165 int fwidth;
00166
00167 int i,j;
00168
00169 float sum,x,y;
00170
00171
00172
00173 pars->method=fresize_1d;
00174
00175 pars->nsub=nsub;
00176
00177 pars->hwidth=hwidth;
00178
00179 fwidth=2*hwidth+1;
00180
00181 pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00182
00183 pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
00184
00185 sum=0.0f;
00186
00187 for (i=0;i<fwidth;i++) {
00188
00189 x=(i-hwidth)/sigma;
00190
00191 y=exp(-x*x/2);
00192
00193 sum=sum+y;
00194
00195 }
00196
00197 for (i=0;i<fwidth;i++) {
00198
00199 x=(i-hwidth)/sigma;
00200
00201 y=exp(-x*x/2);
00202
00203 pars->kerx[i]=y/sum;
00204
00205 pars->kery[i]=y/sum;
00206
00207 }
00208
00209
00210
00211 return 0;
00212
00213 }
00214
00215
00216
00217 int free_fresize(
00218
00219 struct fresize_struct *pars
00220
00221 )
00222
00223 {
00224
00225 if (pars->method==fresize_1d) {
00226
00227 MKL_free (pars->kerx);
00228
00229 MKL_free (pars->kery);
00230
00231 }
00232
00233
00234
00235 return 0;
00236
00237 }
00238
00239
00240
00241 int fsample(
00242
00243 float *image_in,
00244
00245 float *image_out,
00246
00247 int nxin,
00248
00249 int nyin,
00250
00251 int nleadin,
00252
00253 int nxout,
00254
00255 int nyout,
00256
00257 int nleadout,
00258
00259 int nsub,
00260
00261 int xoff,
00262
00263 int yoff,
00264
00265 float fillval
00266
00267 )
00268
00269
00270
00271 {
00272
00273 int i,j;
00274
00275 int imin,imax,jmin,jmax;
00276
00277 float *imp;
00278
00279
00280
00281
00282
00283 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00284
00285 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
00286
00287 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00288
00289 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
00290
00291
00292
00293 imp=image_in+yoff*nleadin+xoff;
00294
00295
00296
00297 #pragma omp parallel default(none) private(i,j) shared(image_in,image_out,imp,fillval) shared(imin,imax,jmin,jmax) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00298
00299 {
00300
00301
00302
00303
00304
00305 #pragma omp for
00306
00307 for (j=0; j<jmin; j++) {
00308
00309 for (i=0; i<nxout; i++) {
00310
00311 image_out[j*nleadout+i]=fillval;
00312
00313 }
00314
00315 }
00316
00317
00318
00319
00320
00321 #pragma omp for
00322
00323 for (j=jmin; j<=jmax; j++) {
00324
00325
00326
00327 for (i=0; i<imin; i++) {
00328
00329 image_out[j*nleadout+i]=fillval;
00330
00331 }
00332
00333
00334
00335 for (i=imin; i<=imax; i++) {
00336
00337
00338
00339 image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
00340
00341 }
00342
00343
00344
00345 for (i=imax+1; i<nxout; i++) {
00346
00347 image_out[j*nleadout+i]=fillval;
00348
00349 }
00350
00351 }
00352
00353
00354
00355
00356
00357 #pragma omp for
00358
00359 for (j=jmax+1; j<nyout; j++) {
00360
00361 for (i=0; i<nxout; i++) {
00362
00363 image_out[j*nleadout+i]=fillval;
00364
00365 }
00366
00367 }
00368
00369
00370
00371 }
00372
00373
00374
00375 return 0;
00376
00377 }
00378
00379
00380
00381 int fbin(
00382
00383 float *image_in,
00384
00385 float *image_out,
00386
00387 int nxin,
00388
00389 int nyin,
00390
00391 int nleadin,
00392
00393 int nxout,
00394
00395 int nyout,
00396
00397 int nleadout,
00398
00399 int nsub,
00400
00401 int xoff,
00402
00403 int yoff,
00404
00405 float fillval
00406
00407 )
00408
00409
00410
00411 {
00412
00413 int i,j,i1,j1;
00414
00415 int imin,imax,jmin,jmax;
00416
00417 float *imp,*impi;
00418
00419 float sum;
00420
00421
00422
00423
00424
00425 if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
00426
00427 if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
00428
00429 if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
00430
00431 if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
00432
00433
00434
00435 imp=image_in+yoff*nleadin+xoff;
00436
00437
00438
00439 #pragma omp parallel default(none) private(i,j,i1,j1,impi,sum) shared(image_in,image_out,imp,fillval) shared(imin,imax,jmin,jmax) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00440
00441 {
00442
00443
00444
00445
00446
00447 #pragma omp for
00448
00449 for (j=0; j<jmin; j++) {
00450
00451 for (i=0; i<nxout; i++) {
00452
00453 image_out[j*nleadout+i]=fillval;
00454
00455 }
00456
00457 }
00458
00459
00460
00461
00462
00463 #pragma omp for
00464
00465 for (j=jmin; j<=jmax; j++) {
00466
00467
00468
00469 for (i=0; i<imin; i++) {
00470
00471 image_out[j*nleadout+i]=fillval;
00472
00473 }
00474
00475
00476
00477 for (i=imin; i<=imax; i++) {
00478
00479 impi=imp+j*nleadin*nsub+i*nsub;
00480
00481 sum=0.0f;
00482
00483 for (j1=0; j1<nsub; j1++) {
00484
00485 for (i1=0; i1<nsub; i1++) {
00486
00487 sum=sum+impi[i1];
00488
00489 }
00490
00491 impi=impi+nleadin;
00492
00493 }
00494
00495 image_out[j*nleadout+i]=sum/(nsub*nsub);
00496
00497 }
00498
00499
00500
00501 for (i=imax+1; i<nxout; i++) {
00502
00503 image_out[j*nleadout+i]=fillval;
00504
00505 }
00506
00507 }
00508
00509
00510
00511
00512
00513 #pragma omp for
00514
00515 for (j=jmax+1; j<nyout; j++) {
00516
00517 for (i=0; i<nxout; i++) {
00518
00519 image_out[j*nleadout+i]=fillval;
00520
00521 }
00522
00523 }
00524
00525
00526
00527 }
00528
00529
00530
00531 return 0;
00532
00533 }
00534
00535
00536
00537 int f1d(
00538
00539 struct fresize_struct *pars,
00540
00541 float *image_in,
00542
00543 float *image_out,
00544
00545 int nxin,
00546
00547 int nyin,
00548
00549 int nleadin,
00550
00551 int nxout,
00552
00553 int nyout,
00554
00555 int nleadout,
00556
00557 int xoff,
00558
00559 int yoff,
00560
00561 float fillval
00562
00563 )
00564
00565
00566
00567 {
00568
00569 const int malign=32;
00570
00571 char transpose[] = "t";
00572
00573 char normal[] = "n";
00574
00575 const int ione = 1;
00576
00577 const float fone = 1.0f;
00578
00579 const float fzero = 0.0f;
00580
00581 int i,j,i1,j1;
00582
00583 int imin,imax,jmin,jmax;
00584
00585 float *inp,*inpi,*inpj;
00586
00587 float sum;
00588
00589 int nsub,hwidth;
00590
00591 float *kerx,*kery,*work;
00592
00593 int xoffl,xoffh,yoffl,yoffh;
00594
00595 int nwork;
00596
00597 double t1,t2,t3;
00598
00599 int n1,n2,nchunk;
00600
00601
00602
00603 nsub=pars->nsub;
00604
00605 hwidth=pars->hwidth;
00606
00607 kerx=pars->kerx;
00608
00609 kery=pars->kery;
00610
00611
00612
00613
00614
00615 xoffl=xoff-hwidth;
00616
00617 xoffh=xoff+hwidth;
00618
00619 if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
00620
00621 if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
00622
00623
00624
00625 yoffl=yoff-hwidth;
00626
00627 yoffh=yoff+hwidth;
00628
00629 if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
00630
00631 if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
00632
00633
00634
00635
00636
00637 nwork=nxout*nyin;
00638
00639 work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
00640
00641
00642
00643 nchunk=64;
00644
00645 n1=(imax-imin+1);
00646
00647 n2=2*hwidth+1;
00648
00649
00650
00651
00652
00653 #pragma omp parallel default(none) private(i,j,i1,j1,inpi,inpj,sum) shared(image_in,image_out,fillval) shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) shared(normal,transpose,n1,n2,nchunk) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
00654
00655 {
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665 #pragma omp for
00666
00667 for (j=0;j<nyin;j++) {
00668
00669 inpj=image_in+j*nleadin+xoffl;
00670
00671 for (i=imin; i<=imax; i++) {
00672
00673 sum=0.0f;
00674
00675 inpi=inpj+i*nsub;
00676
00677 for (i1=0; i1<=2*hwidth; i1++) {
00678
00679 sum=sum+kerx[i1]*inpi[i1];
00680
00681 }
00682
00683 work[j*nleadout+i]=sum;
00684
00685
00686
00687
00688
00689 }
00690
00691
00692
00693
00694
00695 }
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719 #pragma omp for
00720
00721 for (j=jmin; j<=jmax; j++) {
00722
00723 inpj=work+(yoffl+j*nsub)*nleadout;
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743 sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
00744
00745 }
00746
00747
00748
00749
00750
00751 #pragma omp for
00752
00753 for (j=0; j<jmin; j++) {
00754
00755 for (i=0; i<nxout; i++) {
00756
00757 image_out[j*nleadout+i]=fillval;
00758
00759 }
00760
00761 }
00762
00763
00764
00765
00766
00767 #pragma omp for
00768
00769 for (j=jmin; j<=jmax; j++) {
00770
00771
00772
00773 for (i=0; i<imin; i++) {
00774
00775 image_out[j*nleadout+i]=fillval;
00776
00777 }
00778
00779
00780
00781 for (i=imax+1; i<nxout; i++) {
00782
00783 image_out[j*nleadout+i]=fillval;
00784
00785 }
00786
00787 }
00788
00789
00790
00791
00792
00793 #pragma omp for
00794
00795 for (j=jmax+1; j<nyout; j++) {
00796
00797 for (i=0; i<nxout; i++) {
00798
00799 image_out[j*nleadout+i]=fillval;
00800
00801 }
00802
00803 }
00804
00805
00806
00807 }
00808
00809
00810
00811 return 0;
00812
00813 }
00814
00815
00816
00817
00818
00819 int fresize(
00820
00821 struct fresize_struct *pars,
00822
00823 float *image_in,
00824
00825 float *image_out,
00826
00827 int nxin,
00828
00829 int nyin,
00830
00831 int nleadin,
00832
00833 int nx,
00834
00835 int ny,
00836
00837 int nlead,
00838
00839 int xoff,
00840
00841 int yoff,
00842
00843 float fillval
00844
00845 )
00846
00847 {
00848
00849 int status;
00850
00851
00852
00853 switch (pars->method) {
00854
00855
00856
00857 case fresize_sample:
00858
00859 status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
00860
00861 break;
00862
00863
00864
00865 case fresize_bin:
00866
00867 status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
00868
00869 break;
00870
00871
00872
00873 case fresize_1d:
00874
00875 status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
00876
00877 break;
00878
00879
00880
00881 default:
00882
00883 status=1;
00884
00885 break;
00886
00887 }
00888
00889
00890
00891 return status;
00892
00893
00894
00895 }
00896
00897
00898