00001 #include "ctypes_def.h"
00002
00003 #if TYPE == FLOAT
00004 #define PCG spcg
00005 #elif TYPE == DOUBLE
00006 #define PCG dpcg
00007 #elif TYPE == COMPLEXFLOAT
00008 #define PCG cpcg
00009 #elif TYPE == COMPLEXDOUBLE
00010 #define PCG zpcg
00011 #endif
00012
00013
00014 int PCG( int n, int maxit, RTYPE tol,
00015 void (*amult)(int n, CTYPE *x, CTYPE *y, void **data),
00016 void (*msolve)(int n, CTYPE *x, CTYPE *y, void **data),
00017 CTYPE *b, CTYPE *x, RTYPE *rnorm1,
00018 void **adata, void **mdata)
00019 {
00020 int k,i;
00021 RTYPE rnorm, bnorm;
00022 CTYPE gamma, alpha, beta, beta_old;
00023 CTYPE *r, *z, *p, *Ap;
00024
00025 k = 0;
00026
00027 bnorm = NRM2(n,b,1);
00028 if (bnorm == RZERO)
00029 {
00030 memset(x,0,n*sizeof(CTYPE));
00031 *rnorm1 = RZERO;
00032 return k;
00033 }
00034
00035 r = (CTYPE *)malloc(n*sizeof(CTYPE));
00036 Ap = (CTYPE *)malloc(n*sizeof(CTYPE));
00037
00038
00039 amult(n,x,Ap,adata);
00040
00041
00042 for (i=0;i<n;i++)
00043 r[i] = b[i] - Ap[i];
00044 rnorm = NRM2(n,r,1);
00045
00046
00047 if (rnorm < tol*bnorm)
00048 {
00049 *rnorm1 = rnorm/bnorm;
00050 free(r);
00051 free(Ap);
00052 return k;
00053 }
00054
00055 z = (CTYPE *)malloc(n*sizeof(CTYPE));
00056 p = (CTYPE *)malloc(n*sizeof(CTYPE));
00057
00058 if (msolve)
00059 msolve(n,r,z,mdata);
00060 else
00061 memcpy(z, r, n*sizeof(CTYPE));
00062
00063
00064 memcpy(p,z,n*sizeof(CTYPE));
00065
00066
00067 beta_old = DOTC(n,z,1,r,1);
00068
00069 for (k=0; k<maxit && rnorm >= tol*bnorm; k++)
00070 {
00071
00072 amult(n,p,Ap,adata);
00073
00074
00075 alpha = DOTC(n,Ap,1,p,1);
00076
00077
00078 gamma = beta_old / alpha;
00079 AXPY(n,gamma,p,1,x,1);
00080 AXPY(n,-gamma,Ap,1,r,1);
00081 rnorm = NRM2(n,r,1);
00082
00083
00084 if (msolve)
00085 msolve(n,r,z,mdata);
00086 else
00087 memcpy(z, r, n*sizeof(CTYPE));
00088
00089
00090 beta = DOTC(n,z,1,r,1);
00091 gamma = beta / beta_old;
00092 beta_old = beta;
00093
00094 for (i=0;i<n;i++)
00095 p[i] = z[i] + gamma*p[i];
00096 }
00097
00098 free(r);
00099 free(z);
00100 free(p);
00101 free(Ap);
00102
00103 *rnorm1 = rnorm/bnorm;
00104 return k;
00105 }
00106
00107
00108 #ifdef NOBLAS
00109
00110 int PCG( int n, int maxit, RTYPE tol,
00111 void (*amult)(int n, CTYPE *x, CTYPE *y, void **data),
00112 void (*msolve)(int n, CTYPE *x, CTYPE *y, void **data),
00113 CTYPE *b, CTYPE *x, RTYPE *rnorm1,
00114 void **adata, void **mdata)
00115 {
00116 int k,i;
00117 RTYPE rnorm, bnorm;
00118 CTYPE gamma, alpha, beta, beta_old;
00119 CTYPE *r, *z, *p, *Ap;
00120
00121 k=0;
00122
00123
00124 bnorm = RZERO;
00125 for (i=0;i<n;i++)
00126 bnorm = CONJ(b[i])*b[i];
00127 bnorm = sqrt(bnorm);
00128 if (bnorm == RZERO)
00129 {
00130 memset(x,0,n*sizeof(CTYPE));
00131 *rnorm1 = RZERO;
00132 return k;
00133 }
00134
00135 r = (CTYPE *)malloc(n*sizeof(CTYPE));
00136 Ap = (CTYPE *)malloc(n*sizeof(CTYPE));
00137
00138
00139 amult(n,x,Ap,adata);
00140
00141
00142 rnorm = RZERO;
00143 for (i=0;i<n;i++)
00144 {
00145 r[i] = b[i] - Ap[i];
00146 rnorm += CONJ(r[i])*r[i];
00147 }
00148 rnorm = sqrt(rnorm);
00149
00150
00151 if (rnorm < tol*bnorm)
00152 {
00153 *rnorm1 = rnorm/bnorm;
00154 free(r);
00155 free(Ap);
00156 return k;
00157 }
00158
00159 z = (CTYPE *)malloc(n*sizeof(CTYPE));
00160 p = (CTYPE *)malloc(n*sizeof(CTYPE));
00161
00162 if (msolve)
00163 msolve(n,r,z,mdata);
00164 else
00165 memcpy(z, r, n*sizeof(CTYPE));
00166
00167
00168 memcpy(p,z,n*sizeof(CTYPE));
00169
00170
00171 beta_old = CZERO;
00172 for (i=0; i<n; i++)
00173 beta_old += CONJ(z[i]) * r[i];
00174
00175 for (k=0; k<maxit && rnorm >= tol*bnorm; k++)
00176 {
00177
00178 amult(n,p,Ap,adata);
00179
00180
00181 alpha = CZERO;
00182 for (i=0;i<n;i++)
00183 alpha += CONJ(Ap[i]) * p[i];
00184
00185
00186 gamma = beta_old / alpha;
00187 rnorm = RZERO;
00188 for (i=0;i<n;i++)
00189 {
00190 x[i] += gamma*p[i];
00191 r[i] -= gamma*Ap[i];
00192 rnorm += SQR(r[i]);
00193 }
00194 rnorm = sqrt(rnorm);
00195
00196
00197 if (msolve)
00198 msolve(n,r,z,mdata);
00199 else
00200 memcpy(z, r, n*sizeof(CTYPE));
00201
00202
00203 beta = CZERO;
00204 for (i=0;i<n;i++)
00205 beta += CONJ(z[i]) * r[i];
00206 gamma = beta / beta_old;
00207 beta_old = beta;
00208
00209 for (i=0;i<n;i++)
00210 p[i] = z[i] + gamma*p[i];
00211 }
00212
00213 free(r);
00214 free(z);
00215 free(p);
00216 free(Ap);
00217
00218 *rnorm1 = rnorm/bnorm;
00219 return k;
00220 }
00221 #endif
00222
00223 #undef PCG
00224 #include "ctypes_undef.h"