00001
00002
00003 #include "ctypes_def.h"
00004
00005 #if TYPE == FLOAT
00006 #define PCG spcg
00007 #elif TYPE == DOUBLE
00008 #define PCG dpcg
00009 #elif TYPE == COMPLEXFLOAT
00010 #define PCG cpcg
00011 #elif TYPE == COMPLEXDOUBLE
00012 #define PCG zpcg
00013 #endif
00014
00015
00016 int PCG( int n, int maxit, RTYPE tol,
00017 void (*amult)(int n, CTYPE *x, CTYPE *y, void **data),
00018 void (*msolve)(int n, CTYPE *x, CTYPE *y, void **data),
00019 CTYPE *b, CTYPE *x, RTYPE *rnorm1,
00020 void **adata, void **mdata)
00021 {
00022 int k,i;
00023 RTYPE rnorm, bnorm;
00024 CTYPE gamma, alpha, beta, beta_old;
00025 CTYPE *r, *z, *p, *Ap;
00026
00027 k = 0;
00028
00029 bnorm = NRM2(n,b,1);
00030 if (bnorm == RZERO)
00031 {
00032 memset(x,0,n*sizeof(CTYPE));
00033 *rnorm1 = RZERO;
00034 return k;
00035 }
00036
00037 r = (CTYPE *)malloc(n*sizeof(CTYPE));
00038 Ap = (CTYPE *)malloc(n*sizeof(CTYPE));
00039
00040
00041 amult(n,x,Ap,adata);
00042
00043
00044 for (i=0;i<n;i++)
00045 r[i] = b[i] - Ap[i];
00046 rnorm = NRM2(n,r,1);
00047
00048
00049 if (rnorm < tol*bnorm)
00050 {
00051 *rnorm1 = rnorm/bnorm;
00052 free(r);
00053 free(Ap);
00054 return k;
00055 }
00056
00057 z = (CTYPE *)malloc(n*sizeof(CTYPE));
00058 p = (CTYPE *)malloc(n*sizeof(CTYPE));
00059
00060 if (msolve)
00061 msolve(n,r,z,mdata);
00062 else
00063 memcpy(z, r, n*sizeof(CTYPE));
00064
00065
00066 memcpy(p,z,n*sizeof(CTYPE));
00067
00068
00069 beta_old = DOTC(n,z,1,r,1);
00070
00071 for (k=0; k<maxit && rnorm >= tol*bnorm; k++)
00072 {
00073
00074 amult(n,p,Ap,adata);
00075
00076
00077 alpha = DOTC(n,Ap,1,p,1);
00078
00079
00080 gamma = beta_old / alpha;
00081 AXPY(n,gamma,p,1,x,1);
00082 AXPY(n,-gamma,Ap,1,r,1);
00083 rnorm = NRM2(n,r,1);
00084
00085
00086 if (msolve)
00087 msolve(n,r,z,mdata);
00088 else
00089 memcpy(z, r, n*sizeof(CTYPE));
00090
00091
00092 beta = DOTC(n,z,1,r,1);
00093 gamma = beta / beta_old;
00094 beta_old = beta;
00095
00096 for (i=0;i<n;i++)
00097 p[i] = z[i] + gamma*p[i];
00098 }
00099
00100 free(r);
00101 free(z);
00102 free(p);
00103 free(Ap);
00104
00105 *rnorm1 = rnorm/bnorm;
00106 return k;
00107 }
00108
00109
00110 #ifdef NOBLAS
00111
00112 int PCG( int n, int maxit, RTYPE tol,
00113 void (*amult)(int n, CTYPE *x, CTYPE *y, void **data),
00114 void (*msolve)(int n, CTYPE *x, CTYPE *y, void **data),
00115 CTYPE *b, CTYPE *x, RTYPE *rnorm1,
00116 void **adata, void **mdata)
00117 {
00118 int k,i;
00119 RTYPE rnorm, bnorm;
00120 CTYPE gamma, alpha, beta, beta_old;
00121 CTYPE *r, *z, *p, *Ap;
00122
00123 k=0;
00124
00125
00126 bnorm = RZERO;
00127 for (i=0;i<n;i++)
00128 bnorm = CONJ(b[i])*b[i];
00129 bnorm = sqrt(bnorm);
00130 if (bnorm == RZERO)
00131 {
00132 memset(x,0,n*sizeof(CTYPE));
00133 *rnorm1 = RZERO;
00134 return k;
00135 }
00136
00137 r = (CTYPE *)malloc(n*sizeof(CTYPE));
00138 Ap = (CTYPE *)malloc(n*sizeof(CTYPE));
00139
00140
00141 amult(n,x,Ap,adata);
00142
00143
00144 rnorm = RZERO;
00145 for (i=0;i<n;i++)
00146 {
00147 r[i] = b[i] - Ap[i];
00148 rnorm += CONJ(r[i])*r[i];
00149 }
00150 rnorm = sqrt(rnorm);
00151
00152
00153 if (rnorm < tol*bnorm)
00154 {
00155 *rnorm1 = rnorm/bnorm;
00156 free(r);
00157 free(Ap);
00158 return k;
00159 }
00160
00161 z = (CTYPE *)malloc(n*sizeof(CTYPE));
00162 p = (CTYPE *)malloc(n*sizeof(CTYPE));
00163
00164 if (msolve)
00165 msolve(n,r,z,mdata);
00166 else
00167 memcpy(z, r, n*sizeof(CTYPE));
00168
00169
00170 memcpy(p,z,n*sizeof(CTYPE));
00171
00172
00173 beta_old = CZERO;
00174 for (i=0; i<n; i++)
00175 beta_old += CONJ(z[i]) * r[i];
00176
00177 for (k=0; k<maxit && rnorm >= tol*bnorm; k++)
00178 {
00179
00180 amult(n,p,Ap,adata);
00181
00182
00183 alpha = CZERO;
00184 for (i=0;i<n;i++)
00185 alpha += CONJ(Ap[i]) * p[i];
00186
00187
00188 gamma = beta_old / alpha;
00189 rnorm = RZERO;
00190 for (i=0;i<n;i++)
00191 {
00192 x[i] += gamma*p[i];
00193 r[i] -= gamma*Ap[i];
00194 rnorm += SQR(r[i]);
00195 }
00196 rnorm = sqrt(rnorm);
00197
00198
00199 if (msolve)
00200 msolve(n,r,z,mdata);
00201 else
00202 memcpy(z, r, n*sizeof(CTYPE));
00203
00204
00205 beta = CZERO;
00206 for (i=0;i<n;i++)
00207 beta += CONJ(z[i]) * r[i];
00208 gamma = beta / beta_old;
00209 beta_old = beta;
00210
00211 for (i=0;i<n;i++)
00212 p[i] = z[i] + gamma*p[i];
00213 }
00214
00215 free(r);
00216 free(z);
00217 free(p);
00218 free(Ap);
00219
00220 *rnorm1 = rnorm/bnorm;
00221 return k;
00222 }
00223 #endif
00224
00225 #undef PCG
00226 #include "ctypes_undef.h"