(file) Return to fresize.c CVS log (file) (dir) Up to [Development] / JSOC / proj / sharp / apps

  1 xudong 1.1 #include <stdio.h>
  2            
  3            #include <stdlib.h>
  4            
  5            #include <time.h>
  6            
  7            #include <string.h>
  8            
  9            #include <math.h>
 10            
 11            #include <mkl_blas.h>
 12            
 13            #include <mkl_service.h>
 14            
 15            #include <mkl_lapack.h>
 16            
 17            #include <mkl_vml_functions.h>
 18            
 19            #include <omp.h>
 20            
 21            #include "fresize.h"
 22 xudong 1.1 
 23            #define minval(x,y) (((x) < (y)) ? (x) : (y))
 24            
 25            #define maxval(x,y) (((x) < (y)) ? (y) : (x))
 26            
 27            #define fresize_sample 1
 28            
 29            #define fresize_bin 2
 30            
 31            #define fresize_1d 3
 32            
 33            #define fresize_2d 4
 34            
 35            
 36            
 37            double sinc(double x)
 38            
 39            {
 40            
 41            if (fabs(x) < (1.e-10))
 42            
 43 xudong 1.1   return 1.;
 44            
 45            else
 46            
 47              return sin(M_PI*x)/(M_PI*x);
 48            
 49            }
 50            
 51            
 52            
 53            int init_fresize_sample(
 54            
 55              struct fresize_struct *pars,
 56            
 57              int nsub
 58            
 59            )
 60            
 61            {
 62            
 63            
 64 xudong 1.1 
 65              pars->method=fresize_sample;
 66            
 67              pars->nsub=nsub;
 68            
 69            
 70            
 71              return 0;
 72            
 73            }
 74            
 75            
 76            
 77            int init_fresize_bin(
 78            
 79              struct fresize_struct *pars,
 80            
 81              int nsub
 82            
 83            )
 84            
 85 xudong 1.1 {
 86            
 87            
 88            
 89              pars->method=fresize_bin;
 90            
 91              pars->nsub=nsub;
 92            
 93            
 94            
 95              return 0;
 96            
 97            }
 98            
 99            
100            
101            int init_fresize_boxcar(
102            
103              struct fresize_struct *pars,
104            
105              int hwidth, // Half width of boxcar. Full is 2*hwidth+1.
106 xudong 1.1 
107              int nsub // Distance between sampled points
108            
109            )
110            
111            {
112            
113              const int malign=32;
114            
115              int fwidth;
116            
117              int i,j;
118            
119            
120            
121              pars->method=fresize_1d;
122            
123              pars->nsub=nsub;
124            
125              pars->hwidth=hwidth;
126            
127 xudong 1.1   fwidth=2*hwidth+1;
128            
129              pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
130            
131              pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
132            
133              for (i=0;i<fwidth;i++) {
134            
135                pars->kerx[i]=1.0f/fwidth;
136            
137                pars->kery[i]=1.0f/fwidth;
138            
139              }
140            
141            
142            
143              return 0;
144            
145            }
146            
147            
148 xudong 1.1 
149            int init_fresize_gaussian(
150            
151              struct fresize_struct *pars,
152            
153              float sigma, // Shape is exp(-(d/sigma)^2/2)
154            
155              int hwidth, // Half (truncation) width of kernel. Full is 2*hwidth+1.
156            
157              int nsub // Distance between sampled points
158            
159            )
160            
161            {
162            
163              const int malign=32;
164            
165              int fwidth;
166            
167              int i,j;
168            
169 xudong 1.1   float sum,x,y;
170            
171            
172            
173              pars->method=fresize_1d;
174            
175              pars->nsub=nsub;
176            
177              pars->hwidth=hwidth;
178            
179              fwidth=2*hwidth+1;
180            
181              pars->kerx=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
182            
183              pars->kery=(float *)(MKL_malloc(fwidth*sizeof(float),malign));
184            
185              sum=0.0f;
186            
187              for (i=0;i<fwidth;i++) {
188            
189                x=(i-hwidth)/sigma;
190 xudong 1.1 
191                y=exp(-x*x/2);
192            
193                sum=sum+y;
194            
195              }
196            
197              for (i=0;i<fwidth;i++) {
198            
199                x=(i-hwidth)/sigma;
200            
201                y=exp(-x*x/2);
202            
203                pars->kerx[i]=y/sum;
204            
205                pars->kery[i]=y/sum;
206            
207              }
208            
209            
210            
211 xudong 1.1   return 0;
212            
213            }
214            
215            
216            
217            int free_fresize(
218            
219              struct fresize_struct *pars
220            
221            )
222            
223            {
224            
225              if (pars->method==fresize_1d) {
226            
227                MKL_free (pars->kerx);
228            
229                MKL_free (pars->kery);
230            
231              }
232 xudong 1.1 
233            
234            
235              return 0;
236            
237            }
238            
239            
240            
241            int fsample(
242            
243              float *image_in,
244            
245              float *image_out,
246            
247              int nxin,
248            
249              int nyin,
250            
251              int nleadin,
252            
253 xudong 1.1   int nxout,
254            
255              int nyout,
256            
257              int nleadout,
258            
259              int nsub,
260            
261              int xoff,
262            
263              int yoff,
264            
265              float fillval
266            
267            )
268            
269            
270            
271            {
272            
273              int i,j;
274 xudong 1.1 
275              int imin,imax,jmin,jmax;
276            
277              float *imp;
278            
279            
280            
281            // Find first and last indices for which resulting input point is within image
282            
283              if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
284            
285              if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-1)/nsub),nxout-1);
286            
287              if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
288            
289              if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-1)/nsub),nyout-1);
290            
291            
292            
293              imp=image_in+yoff*nleadin+xoff;
294            
295 xudong 1.1 
296            
297            #pragma omp parallel default(none) private(i,j) shared(image_in,image_out,imp,fillval) shared(imin,imax,jmin,jmax) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
298            
299              { // Needed to define parallel region
300            
301            
302            
303            // Fill below valid region
304            
305            #pragma omp for
306            
307                for (j=0; j<jmin; j++) {
308            
309                  for (i=0; i<nxout; i++) {
310            
311                    image_out[j*nleadout+i]=fillval;
312            
313                  } // i=
314            
315                } //j=
316 xudong 1.1 
317            
318            
319            // Valid heights
320            
321            #pragma omp for
322            
323                for (j=jmin; j<=jmax; j++) {
324            
325            // Fill to the left
326            
327                  for (i=0; i<imin; i++) {
328            
329                    image_out[j*nleadout+i]=fillval;
330            
331                  } // i=
332            
333            // Valid region
334            
335                  for (i=imin; i<=imax; i++) {
336            
337 xudong 1.1 //      image_out[j*nleadout+i]=image_in[j*nsub*nleadin+i*nsub];
338            
339                    image_out[j*nleadout+i]=imp[j*nsub*nleadin+i*nsub];
340            
341                  } // i=
342            
343            // Fill to the right
344            
345                  for (i=imax+1; i<nxout; i++) {
346            
347                    image_out[j*nleadout+i]=fillval;
348            
349                  } // i=
350            
351                } //j=
352            
353            
354            
355            // Fill above valid region
356            
357            #pragma omp for
358 xudong 1.1 
359                for (j=jmax+1; j<nyout; j++) {
360            
361                  for (i=0; i<nxout; i++) {
362            
363                    image_out[j*nleadout+i]=fillval;
364            
365                  } // i=
366            
367                } //j=
368            
369            
370            
371              } // End of parallel region
372            
373            
374            
375              return 0;
376            
377            }
378            
379 xudong 1.1 
380            
381            int fbin(
382            
383              float *image_in,
384            
385              float *image_out,
386            
387              int nxin,
388            
389              int nyin,
390            
391              int nleadin,
392            
393              int nxout,
394            
395              int nyout,
396            
397              int nleadout,
398            
399              int nsub,
400 xudong 1.1 
401              int xoff,
402            
403              int yoff,
404            
405              float fillval
406            
407            )
408            
409            
410            
411            {
412            
413              int i,j,i1,j1;
414            
415              int imin,imax,jmin,jmax;
416            
417              float *imp,*impi;
418            
419              float sum;
420            
421 xudong 1.1 
422            
423            // Find first and last indices for which resulting input point is within image
424            
425              if (xoff >= 0) imin=0; else imin=((-xoff+nsub-1)/nsub);
426            
427              if (xoff <= 0) imax=nxout-1; else imax=minval(((nxin-xoff-nsub)/nsub),nxout-1);
428            
429              if (yoff >= 0) jmin=0; else jmin=((-yoff+nsub-1)/nsub);
430            
431              if (yoff <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoff-nsub)/nsub),nyout-1);
432            
433            
434            
435              imp=image_in+yoff*nleadin+xoff;
436            
437            
438            
439            #pragma omp parallel default(none) private(i,j,i1,j1,impi,sum) shared(image_in,image_out,imp,fillval) shared(imin,imax,jmin,jmax) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
440            
441              { // Needed to define parallel region
442 xudong 1.1 
443            
444            
445            // Fill below valid region
446            
447            #pragma omp for
448            
449                for (j=0; j<jmin; j++) {
450            
451                  for (i=0; i<nxout; i++) {
452            
453                    image_out[j*nleadout+i]=fillval;
454            
455                  } // i=
456            
457                } //j=
458            
459            
460            
461            // Valid heights
462            
463 xudong 1.1 #pragma omp for
464            
465                for (j=jmin; j<=jmax; j++) {
466            
467            // Fill to the left
468            
469                  for (i=0; i<imin; i++) {
470            
471                    image_out[j*nleadout+i]=fillval;
472            
473                  } // i=
474            
475            // Valid region
476            
477                  for (i=imin; i<=imax; i++) {
478            
479                    impi=imp+j*nleadin*nsub+i*nsub;
480            
481                    sum=0.0f;
482            
483                    for (j1=0; j1<nsub; j1++) {
484 xudong 1.1 
485                      for (i1=0; i1<nsub; i1++) {
486            
487                        sum=sum+impi[i1];
488            
489                      }
490            
491                      impi=impi+nleadin;
492            
493                    }
494            
495                    image_out[j*nleadout+i]=sum/(nsub*nsub);
496            
497                  } // i=
498            
499            // Fill to the right
500            
501                  for (i=imax+1; i<nxout; i++) {
502            
503                    image_out[j*nleadout+i]=fillval;
504            
505 xudong 1.1       } // i=
506            
507                } //j=
508            
509            
510            
511            // Fill above valid region
512            
513            #pragma omp for
514            
515                for (j=jmax+1; j<nyout; j++) {
516            
517                  for (i=0; i<nxout; i++) {
518            
519                    image_out[j*nleadout+i]=fillval;
520            
521                  } // i=
522            
523                } //j=
524            
525            
526 xudong 1.1 
527              } // End of parallel region
528            
529            
530            
531              return 0;
532            
533            }
534            
535            
536            
537            int f1d(
538            
539              struct fresize_struct *pars,
540            
541              float *image_in,
542            
543              float *image_out,
544            
545              int nxin,
546            
547 xudong 1.1   int nyin,
548            
549              int nleadin,
550            
551              int nxout,
552            
553              int nyout,
554            
555              int nleadout,
556            
557              int xoff,
558            
559              int yoff,
560            
561              float fillval
562            
563            )
564            
565            
566            
567            {
568 xudong 1.1 
569              const int malign=32;
570            
571              char transpose[] = "t";
572            
573              char normal[] = "n";
574            
575              const int ione = 1;
576            
577              const float fone = 1.0f;
578            
579              const float fzero = 0.0f;
580            
581              int i,j,i1,j1;
582            
583              int imin,imax,jmin,jmax;
584            
585              float *inp,*inpi,*inpj;
586            
587              float sum;
588            
589 xudong 1.1   int nsub,hwidth;
590            
591              float *kerx,*kery,*work;
592            
593              int xoffl,xoffh,yoffl,yoffh;
594            
595              int nwork;
596            
597              double t1,t2,t3;
598            
599              int n1,n2,nchunk;
600            
601            
602            
603              nsub=pars->nsub;
604            
605              hwidth=pars->hwidth;
606            
607              kerx=pars->kerx;
608            
609              kery=pars->kery;
610 xudong 1.1 
611            
612            
613            // Find first and last indices for which resulting input point is within image
614            
615              xoffl=xoff-hwidth;
616            
617              xoffh=xoff+hwidth;
618            
619              if (xoffl >= 0) imin=0; else imin=((-xoffl+nsub-1)/nsub);
620            
621              if (xoffh <= 0) imax=nxout-1; else imax=minval(((nxin-xoffh-1)/nsub),nxout-1);
622            
623            
624            
625              yoffl=yoff-hwidth;
626            
627              yoffh=yoff+hwidth;
628            
629              if (yoffl >= 0) jmin=0; else jmin=((-yoffl+nsub-1)/nsub);
630            
631 xudong 1.1   if (yoffh <= 0) jmax=nyout-1; else jmax=minval(((nyin-yoffh-1)/nsub),nyout-1);
632            
633            
634            
635            // Get work array big enough to hold convolution in x
636            
637              nwork=nxout*nyin;
638            
639              work=(float *)(MKL_malloc(nwork*sizeof(float),malign));
640            
641            
642            
643            nchunk=64;
644            
645            n1=(imax-imin+1); // Size of matrix for sgemv
646            
647            n2=2*hwidth+1; // Size of matrix for sgemv
648            
649            //t1=dsecnd();
650            
651             
652 xudong 1.1 
653            #pragma omp parallel default(none) private(i,j,i1,j1,inpi,inpj,sum) shared(image_in,image_out,fillval) shared(imin,imax,jmin,jmax,hwidth,kerx,kery,work,xoffl,yoffl) shared(normal,transpose,n1,n2,nchunk) shared(nxin,nyin,nleadin,nxout,nyout,nleadout,nsub)
654            
655              { // Needed to define parallel region
656            
657            
658            
659            // First convolve in x
660            
661            // Brute force loop takes longer than it ought to, but have not
662            
663            // found an obvious solution.
664            
665            #pragma omp for
666            
667                for (j=0;j<nyin;j++) {
668            
669                  inpj=image_in+j*nleadin+xoffl; // Note offset to match kernels.
670            
671                  for (i=imin; i<=imax; i++) {
672            
673 xudong 1.1         sum=0.0f;
674            
675                    inpi=inpj+i*nsub;
676            
677                    for (i1=0; i1<=2*hwidth; i1++) {
678            
679                      sum=sum+kerx[i1]*inpi[i1];
680            
681                    }
682            
683                    work[j*nleadout+i]=sum;
684            
685            // This works but is slower
686            
687            //      work[j*nleadout+i]=sdot(&n2,kerx,&ione,inpj+i*nsub,&ione);
688            
689                  }
690            
691            // This clever trick gives error message
692            
693            //sgemv(transpose,&n2,&n1,&fone,inpj+imin*nsub,&nsub,kerx,&ione,&fzero,work+j*nleadout+imin,&ione);
694 xudong 1.1 
695                }
696            
697            /*
698            
699            // This also works but is slower
700            
701            #pragma omp for
702            
703            for (j=0;j<nyin;j=j+nchunk) {
704            
705              for (i=imin; i<=imax; i++) {
706            
707                sgemv(transpose,&n2,&nchunk,&fone,image_in+j*nleadin+xoffl+i*nsub,&nleadin,kerx,&ione,&fzero,work+j*nleadout+i,&nleadout);
708            
709            }
710            
711            */
712            
713            
714            
715 xudong 1.1 // Then convolve in y
716            
717            
718            
719            #pragma omp for
720            
721                for (j=jmin; j<=jmax; j++) {
722            
723                  inpj=work+(yoffl+j*nsub)*nleadout;
724            
725            /* Old brute force code
726            
727                  for (i=imin; i<=imax; i++) {
728            
729                    sum=0.0f;
730            
731                    for (j1=0; j1<=2*hwidth; j1++) {
732            
733                      sum=sum+inpj[j1*nleadout+i]*kery[j1];
734            
735                    }
736 xudong 1.1 
737                    image_out[j*nleadout+i]=sum;
738            
739                  }
740            
741            */
742            
743                  sgemv(normal,&n1,&n2,&fone,inpj+imin,&nleadout,kery,&ione,&fzero,image_out+j*nleadout+imin,&ione);
744            
745                }
746            
747            
748            
749            // Fill below valid region
750            
751            #pragma omp for
752            
753                for (j=0; j<jmin; j++) {
754            
755                  for (i=0; i<nxout; i++) {
756            
757 xudong 1.1         image_out[j*nleadout+i]=fillval;
758            
759                  } // i=
760            
761                } //j=
762            
763            
764            
765            // Valid heights
766            
767            #pragma omp for
768            
769                for (j=jmin; j<=jmax; j++) {
770            
771            // Fill to the left
772            
773                  for (i=0; i<imin; i++) {
774            
775                    image_out[j*nleadout+i]=fillval;
776            
777                  } // i=
778 xudong 1.1 
779            // Fill to the right
780            
781                  for (i=imax+1; i<nxout; i++) {
782            
783                    image_out[j*nleadout+i]=fillval;
784            
785                  } // i=
786            
787                } //j=
788            
789            
790            
791            // Fill above valid region
792            
793            #pragma omp for
794            
795                for (j=jmax+1; j<nyout; j++) {
796            
797                  for (i=0; i<nxout; i++) {
798            
799 xudong 1.1         image_out[j*nleadout+i]=fillval;
800            
801                  } // i=
802            
803                } //j=
804            
805            
806            
807              } // End of parallel region
808            
809            
810            
811              return 0;
812            
813            }
814            
815            
816            
817            
818            
819            int fresize(
820 xudong 1.1 
821              struct fresize_struct *pars,
822            
823              float *image_in,
824            
825              float *image_out,
826            
827              int nxin,
828            
829              int nyin,
830            
831              int nleadin,
832            
833              int nx,
834            
835              int ny,
836            
837              int nlead,
838            
839              int xoff,
840            
841 xudong 1.1   int yoff,
842            
843              float fillval
844            
845            )
846            
847            {
848            
849              int status;
850            
851            
852            
853              switch (pars->method) {
854            
855            
856            
857              case fresize_sample:
858            
859                status=fsample(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
860            
861                break;
862 xudong 1.1 
863            
864            
865              case fresize_bin:
866            
867                status=fbin(image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,pars->nsub,xoff,yoff,fillval);
868            
869                break;
870            
871            
872            
873              case fresize_1d:
874            
875                status=f1d(pars,image_in,image_out,nxin,nyin,nleadin,nx,ny,nlead,xoff,yoff,fillval);
876            
877                break;
878            
879            
880            
881              default:
882            
883 xudong 1.1     status=1;
884            
885                break;
886            
887              }
888            
889            
890            
891            return status;
892            
893            
894            
895            }
896            
897            
898            

Karen Tian
Powered by
ViewCVS 0.9.4