Actual source code: dgmres.c
petsc-3.14.5 2021-03-03
1: /*
2: Implements deflated GMRES.
3: */
5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>
7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
9: #define GMRES_DELTA_DIRECTIONS 10
10: #define GMRES_DEFAULT_MAXK 30
11: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
12: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
13: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
15: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
16: {
20: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
21: return(0);
22: }
23: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
24: {
28: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
29: return(0);
30: }
31: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscBool force)
32: {
36: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
37: return(0);
38: }
39: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
40: {
44: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
45: return(0);
46: }
47: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
48: {
52: PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
53: return(0);
54: }
55: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp,PetscInt *curneigh)
56: {
60: PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP,PetscInt*),(ksp,curneigh));
61: return(0);
62: }
63: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
64: {
68: PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
69: return(0);
70: }
72: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
73: {
77: PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
78: return(0);
79: }
81: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
82: {
84: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
85: PetscInt neig = dgmres->neig+EIG_OFFSET;
86: PetscInt max_k = dgmres->max_k+1;
89: KSPSetUp_GMRES(ksp);
90: if (!dgmres->neig) return(0);
92: /* Allocate workspace for the Schur vectors*/
93: PetscMalloc1(neig*max_k, &SR);
94: dgmres->wr = NULL;
95: dgmres->wi = NULL;
96: dgmres->perm = NULL;
97: dgmres->modul = NULL;
98: dgmres->Q = NULL;
99: dgmres->Z = NULL;
101: UU = NULL;
102: XX = NULL;
103: MX = NULL;
104: AUU = NULL;
105: XMX = NULL;
106: XMU = NULL;
107: UMX = NULL;
108: AUAU = NULL;
109: TT = NULL;
110: TTF = NULL;
111: INVP = NULL;
112: X1 = NULL;
113: X2 = NULL;
114: MU = NULL;
115: return(0);
116: }
118: /*
119: Run GMRES, possibly with restart. Return residual history if requested.
120: input parameters:
122: . gmres - structure containing parameters and work areas
124: output parameters:
125: . nres - residuals (from preconditioned system) at each step.
126: If restarting, consider passing nres+it. If null,
127: ignored
128: . itcount - number of iterations used. nres[0] to nres[itcount]
129: are defined. If null, ignored.
131: Notes:
132: On entry, the value in vector VEC_VV(0) should be the initial residual
133: (this allows shortcuts where the initial preconditioned residual is 0).
134: */
135: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
136: {
137: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
138: PetscReal res_norm,res,hapbnd,tt;
140: PetscInt it = 0;
141: PetscInt max_k = dgmres->max_k;
142: PetscBool hapend = PETSC_FALSE;
143: PetscReal res_old;
144: PetscInt test = 0;
147: VecNormalize(VEC_VV(0),&res_norm);
148: KSPCheckNorm(ksp,res_norm);
149: res = res_norm;
150: *GRS(0) = res_norm;
152: /* check for the convergence */
153: PetscObjectSAWsTakeAccess((PetscObject)ksp);
154: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
155: else ksp->rnorm = 0.0;
156: PetscObjectSAWsGrantAccess((PetscObject)ksp);
157: dgmres->it = (it - 1);
158: KSPLogResidualHistory(ksp,ksp->rnorm);
159: KSPMonitor(ksp,ksp->its,ksp->rnorm);
160: if (!res) {
161: if (itcount) *itcount = 0;
162: ksp->reason = KSP_CONVERGED_ATOL;
163: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
164: return(0);
165: }
166: /* record the residual norm to test if deflation is needed */
167: res_old = res;
169: (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
170: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
171: if (it) {
172: KSPLogResidualHistory(ksp,ksp->rnorm);
173: KSPMonitor(ksp,ksp->its,ksp->rnorm);
174: }
175: dgmres->it = (it - 1);
176: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
177: KSPDGMRESGetNewVectors(ksp,it+1);
178: }
179: if (dgmres->r > 0) {
180: if (ksp->pc_side == PC_LEFT) {
181: /* Apply the first preconditioner */
182: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
183: /* Then apply Deflation as a preconditioner */
184: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
185: } else if (ksp->pc_side == PC_RIGHT) {
186: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
187: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
188: }
189: } else {
190: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
191: }
192: dgmres->matvecs += 1;
193: /* update hessenberg matrix and do Gram-Schmidt */
194: (*dgmres->orthog)(ksp,it);
196: /* vv(i+1) . vv(i+1) */
197: VecNormalize(VEC_VV(it+1),&tt);
198: /* save the magnitude */
199: *HH(it+1,it) = tt;
200: *HES(it+1,it) = tt;
202: /* check for the happy breakdown */
203: hapbnd = PetscAbsScalar(tt / *GRS(it));
204: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
205: if (tt < hapbnd) {
206: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
207: hapend = PETSC_TRUE;
208: }
209: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
211: it++;
212: dgmres->it = (it-1); /* For converged */
213: ksp->its++;
214: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
215: else ksp->rnorm = 0.0;
216: if (ksp->reason) break;
218: (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
220: /* Catch error in happy breakdown and signal convergence and break from loop */
221: if (hapend) {
222: if (!ksp->reason) {
223: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
224: else {
225: ksp->reason = KSP_DIVERGED_BREAKDOWN;
226: break;
227: }
228: }
229: }
230: }
232: /* Monitor if we know that we will not return for a restart */
233: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
234: KSPLogResidualHistory(ksp,ksp->rnorm);
235: KSPMonitor(ksp,ksp->its,ksp->rnorm);
236: }
237: if (itcount) *itcount = it;
239: /*
240: Down here we have to solve for the "best" coefficients of the Krylov
241: columns, add the solution values together, and possibly unwind the
242: preconditioning from the solution
243: */
244: /* Form the solution (or the solution so far) */
245: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
247: /* Compute data for the deflation to be used during the next restart */
248: if (!ksp->reason && ksp->its < ksp->max_it) {
249: test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
250: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
251: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
252: KSPDGMRESComputeDeflationData(ksp,NULL);
253: }
254: }
255: return(0);
256: }
258: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
259: {
261: PetscInt i,its,itcount;
262: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
263: PetscBool guess_zero = ksp->guess_zero;
266: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
268: PetscObjectSAWsTakeAccess((PetscObject)ksp);
269: ksp->its = 0;
270: dgmres->matvecs = 0;
271: PetscObjectSAWsGrantAccess((PetscObject)ksp);
273: itcount = 0;
274: ksp->reason = KSP_CONVERGED_ITERATING;
275: while (!ksp->reason) {
276: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
277: if (ksp->pc_side == PC_LEFT) {
278: dgmres->matvecs += 1;
279: if (dgmres->r > 0) {
280: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
281: VecCopy(VEC_TEMP, VEC_VV(0));
282: }
283: }
285: KSPDGMRESCycle(&its,ksp);
286: itcount += its;
287: if (itcount >= ksp->max_it) {
288: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
289: break;
290: }
291: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
292: }
293: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
295: for (i = 0; i < dgmres->r; i++) {
296: VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
297: }
298: return(0);
299: }
301: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
302: {
304: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
305: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
306: PetscInt max_neig = dgmres->max_neig;
309: if (dgmres->r) {
310: VecDestroyVecs(max_neig, &UU);
311: VecDestroyVecs(max_neig, &MU);
312: if (XX) {
313: VecDestroyVecs(neig1, &XX);
314: VecDestroyVecs(neig1, &MX);
315: }
316: PetscFree(TT);
317: PetscFree(TTF);
318: PetscFree(INVP);
319: PetscFree(XMX);
320: PetscFree(UMX);
321: PetscFree(XMU);
322: PetscFree(X1);
323: PetscFree(X2);
324: PetscFree(dgmres->work);
325: PetscFree(dgmres->iwork);
326: PetscFree(dgmres->wr);
327: PetscFree(dgmres->wi);
328: PetscFree(dgmres->modul);
329: PetscFree(dgmres->Q);
330: PetscFree(ORTH);
331: PetscFree(AUAU);
332: PetscFree(AUU);
333: PetscFree(SR2);
334: }
335: PetscFree(SR);
336: KSPDestroy_GMRES(ksp);
337: return(0);
338: }
340: /*
341: KSPDGMRESBuildSoln - create the solution from the starting vector and the
342: current iterates.
344: Input parameters:
345: nrs - work area of size it + 1.
346: vs - index of initial guess
347: vdest - index of result. Note that vs may == vdest (replace
348: guess with the solution).
350: This is an internal routine that knows about the GMRES internals.
351: */
352: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
353: {
354: PetscScalar tt;
356: PetscInt ii,k,j;
357: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
359: /* Solve for solution vector that minimizes the residual */
362: /* If it is < 0, no gmres steps have been performed */
363: if (it < 0) {
364: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
365: return(0);
366: }
367: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
368: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
369: else nrs[it] = 0.0;
371: for (ii=1; ii<=it; ii++) {
372: k = it - ii;
373: tt = *GRS(k);
374: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
375: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
376: nrs[k] = tt / *HH(k,k);
377: }
379: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
380: VecSet(VEC_TEMP,0.0);
381: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
383: /* Apply deflation */
384: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
385: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
386: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
387: }
388: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
390: /* add solution to previous solution */
391: if (vdest != vs) {
392: VecCopy(vs,vdest);
393: }
394: VecAXPY(vdest,1.0,VEC_TEMP);
395: return(0);
396: }
398: /*
399: Do the scalar work for the orthogonalization. Return new residual norm.
400: */
401: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
402: {
403: PetscScalar *hh,*cc,*ss,tt;
404: PetscInt j;
405: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
408: hh = HH(0,it);
409: cc = CC(0);
410: ss = SS(0);
412: /* Apply all the previously computed plane rotations to the new column
413: of the Hessenberg matrix */
414: for (j=1; j<=it; j++) {
415: tt = *hh;
416: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
417: hh++;
418: *hh = *cc++ * *hh -(*ss++ * tt);
419: }
421: /*
422: compute the new plane rotation, and apply it to:
423: 1) the right-hand-side of the Hessenberg system
424: 2) the new column of the Hessenberg matrix
425: thus obtaining the updated value of the residual
426: */
427: if (!hapend) {
428: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
429: if (tt == 0.0) {
430: ksp->reason = KSP_DIVERGED_NULL;
431: return(0);
432: }
433: *cc = *hh / tt;
434: *ss = *(hh+1) / tt;
435: *GRS(it+1) = -(*ss * *GRS(it));
436: *GRS(it) = PetscConj(*cc) * *GRS(it);
437: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
438: *res = PetscAbsScalar(*GRS(it+1));
439: } else {
440: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
441: another rotation matrix (so RH doesn't change). The new residual is
442: always the new sine term times the residual from last time (GRS(it)),
443: but now the new sine rotation would be zero...so the residual should
444: be zero...so we will multiply "zero" by the last residual. This might
445: not be exactly what we want to do here -could just return "zero". */
447: *res = 0.0;
448: }
449: return(0);
450: }
452: /*
453: Allocates more work vectors, starting from VEC_VV(it).
454: */
455: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
456: {
457: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
459: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
462: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
463: /* Adjust the number to allocate to make sure that we don't exceed the
464: number of available slots */
465: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
466: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
467: }
468: if (!nalloc) return(0);
470: dgmres->vv_allocated += nalloc;
472: KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
473: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
475: dgmres->mwork_alloc[nwork] = nalloc;
476: for (k=0; k<nalloc; k++) {
477: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
478: }
479: dgmres->nwork_alloc++;
480: return(0);
481: }
483: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
484: {
485: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
489: if (!ptr) {
490: if (!dgmres->sol_temp) {
491: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
492: PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
493: }
494: ptr = dgmres->sol_temp;
495: }
496: if (!dgmres->nrs) {
497: /* allocate the work area */
498: PetscMalloc1(dgmres->max_k,&dgmres->nrs);
499: PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
500: }
501: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
502: if (result) *result = ptr;
503: return(0);
504: }
506: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
507: {
508: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
510: PetscBool iascii,isharmonic;
513: KSPView_GMRES(ksp,viewer);
514: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
515: if (iascii) {
516: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: FALSE\n");
517: else PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: TRUE\n");
518: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
519: if (isharmonic) {
520: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
521: } else {
522: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
523: }
524: PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %D\n", dgmres->r);
525: PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
526: PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
527: PetscViewerASCIIPrintf(viewer, " Number of matvecs : %D\n", dgmres->matvecs);
528: }
529: return(0);
530: }
532: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
533: {
534: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
537: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
538: dgmres->neig=neig;
539: return(0);
540: }
542: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
543: {
544: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
547: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
548: dgmres->max_neig=max_neig;
549: return(0);
550: }
552: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
553: {
554: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
557: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
558: dgmres->smv=ratio;
559: return(0);
560: }
562: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
563: {
564: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
567: dgmres->force = force;
568: return(0);
569: }
571: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
572: {
574: PetscInt neig;
575: PetscInt max_neig;
576: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
577: PetscBool flg;
580: KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
581: PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
582: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
583: if (flg) {
584: KSPDGMRESSetEigen(ksp, neig);
585: }
586: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
587: if (flg) {
588: KSPDGMRESSetMaxEigen(ksp, max_neig);
589: }
590: PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
591: PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
592: PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
593: PetscOptionsTail();
594: return(0);
595: }
597: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
598: {
599: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
601: PetscInt i,j, k;
602: PetscBLASInt nr, bmax;
603: PetscInt r = dgmres->r;
604: PetscInt neig; /* number of eigenvalues to extract at each restart */
605: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
606: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
607: PetscInt N = dgmres->max_k+1;
608: PetscInt n = dgmres->it+1;
609: PetscReal alpha;
612: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
613: if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
614: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
615: return(0);
616: }
618: KSPDGMRESComputeSchurForm(ksp, &neig);
619: /* Form the extended Schur vectors X=VV*Sr */
620: if (!XX) {
621: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
622: }
623: for (j = 0; j<neig; j++) {
624: VecZeroEntries(XX[j]);
625: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
626: }
628: /* Orthogonalize X against U */
629: if (!ORTH) {
630: PetscMalloc1(max_neig, &ORTH);
631: }
632: if (r > 0) {
633: /* modified Gram-Schmidt */
634: for (j = 0; j<neig; j++) {
635: for (i=0; i<r; i++) {
636: /* First, compute U'*X[j] */
637: VecDot(XX[j], UU[i], &alpha);
638: /* Then, compute X(j)=X(j)-U*U'*X(j) */
639: VecAXPY(XX[j], -alpha, UU[i]);
640: }
641: }
642: }
643: /* Compute MX = M^{-1}*A*X */
644: if (!MX) {
645: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
646: }
647: for (j = 0; j<neig; j++) {
648: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
649: }
650: dgmres->matvecs += neig;
652: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
653: KSPDGMRESImproveEig(ksp, neig);
654: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
655: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
656: }
658: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
659: if (!XMX) {
660: PetscMalloc1(neig1*neig1, &XMX);
661: }
662: for (j = 0; j < neig; j++) {
663: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
664: }
666: if (r > 0) {
667: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
668: if (!UMX) {
669: PetscMalloc1(max_neig*neig1, &UMX);
670: }
671: for (j = 0; j < neig; j++) {
672: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
673: }
674: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
675: if (!XMU) {
676: PetscMalloc1(max_neig*neig1, &XMU);
677: }
678: for (j = 0; j<r; j++) {
679: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
680: }
681: }
683: /* Form the new matrix T = [T UMX; XMU XMX]; */
684: if (!TT) {
685: PetscMalloc1(max_neig*max_neig, &TT);
686: }
687: if (r > 0) {
688: /* Add XMU to T */
689: for (j = 0; j < r; j++) {
690: PetscArraycpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig);
691: }
692: /* Add [UMX; XMX] to T */
693: for (j = 0; j < neig; j++) {
694: k = r+j;
695: PetscArraycpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r);
696: PetscArraycpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig);
697: }
698: } else { /* Add XMX to T */
699: for (j = 0; j < neig; j++) {
700: PetscArraycpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig);
701: }
702: }
704: dgmres->r += neig;
705: r = dgmres->r;
706: PetscBLASIntCast(r,&nr);
707: /*LU Factorize T with Lapack xgetrf routine */
709: PetscBLASIntCast(max_neig,&bmax);
710: if (!TTF) {
711: PetscMalloc1(bmax*bmax, &TTF);
712: }
713: PetscArraycpy(TTF, TT, bmax*r);
714: if (!INVP) {
715: PetscMalloc1(bmax, &INVP);
716: }
717: {
718: PetscBLASInt info;
719: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
720: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
721: }
723: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
724: if (!UU) {
725: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
726: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
727: }
728: for (j=0; j<neig; j++) {
729: VecCopy(XX[j], UU[r-neig+j]);
730: VecCopy(MX[j], MU[r-neig+j]);
731: }
732: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
733: return(0);
734: }
736: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
737: {
738: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
740: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
741: PetscBLASInt bn;
742: PetscReal *A;
743: PetscBLASInt ihi;
744: PetscBLASInt ldA = 0; /* leading dimension of A */
745: PetscBLASInt ldQ; /* leading dimension of Q */
746: PetscReal *Q; /* orthogonal matrix of (left) Schur vectors */
747: PetscReal *work; /* working vector */
748: PetscBLASInt lwork; /* size of the working vector */
749: PetscInt *perm; /* Permutation vector to sort eigenvalues */
750: PetscInt i, j;
751: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
752: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A */
753: PetscBLASInt *select;
754: PetscBLASInt *iwork;
755: PetscBLASInt liwork;
756: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
757: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
758: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
759: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
762: PetscBLASIntCast(n,&bn);
763: PetscBLASIntCast(N,&ldA);
764: ihi = ldQ = bn;
765: PetscBLASIntCast(5*N,&lwork);
767: #if defined(PETSC_USE_COMPLEX)
768: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "No support for complex numbers.");
769: #endif
771: PetscMalloc1(ldA*ldA, &A);
772: PetscMalloc1(ldQ*n, &Q);
773: PetscMalloc1(lwork, &work);
774: if (!dgmres->wr) {
775: PetscMalloc1(n, &dgmres->wr);
776: PetscMalloc1(n, &dgmres->wi);
777: }
778: wr = dgmres->wr;
779: wi = dgmres->wi;
780: PetscMalloc1(n,&modul);
781: PetscMalloc1(n,&perm);
782: /* copy the Hessenberg matrix to work space */
783: PetscArraycpy(A, dgmres->hes_origin, ldA*ldA);
784: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
785: if (flag) {
786: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
787: /* Transpose the Hessenberg matrix */
788: PetscMalloc1(bn*bn, &Ht);
789: for (i = 0; i < bn; i++) {
790: for (j = 0; j < bn; j++) {
791: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
792: }
793: }
795: /* Solve the system H^T*t = h_{m+1,m}e_m */
796: PetscCalloc1(bn, &t);
797: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
798: PetscMalloc1(bn, &ipiv);
799: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
800: {
801: PetscBLASInt info;
802: PetscBLASInt nrhs = 1;
803: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
804: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
805: }
806: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
807: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
808: PetscFree(t);
809: PetscFree(Ht);
810: }
811: /* Compute eigenvalues with the Schur form */
812: {
813: PetscBLASInt info=0;
814: PetscBLASInt ilo = 1;
815: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
816: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
817: }
818: PetscFree(work);
820: /* sort the eigenvalues */
821: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
822: for (i=0; i<n; i++) perm[i] = i;
824: PetscSortRealWithPermutation(n, modul, perm);
825: /* save the complex modulus of the largest eigenvalue in magnitude */
826: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
827: /* count the number of extracted eigenvalues (with complex conjugates) */
828: NbrEig = 0;
829: while (NbrEig < dgmres->neig) {
830: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
831: else NbrEig += 1;
832: }
833: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
835: PetscCalloc1(n, &select);
837: if (!dgmres->GreatestEig) {
838: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
839: } else {
840: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
841: }
842: /* call Lapack dtrsen */
843: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
844: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
845: PetscMalloc1(lwork, &work);
846: PetscMalloc1(liwork, &iwork);
847: {
848: PetscBLASInt info=0;
849: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
850: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
851: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
852: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
853: }
854: PetscFree(select);
856: /* Extract the Schur vectors */
857: for (j = 0; j < NbrEig; j++) {
858: PetscArraycpy(&SR[j*N], &(Q[j*ldQ]), n);
859: }
860: *neig = NbrEig;
861: PetscFree(A);
862: PetscFree(work);
863: PetscFree(perm);
864: PetscFree(work);
865: PetscFree(iwork);
866: PetscFree(modul);
867: PetscFree(Q);
868: return(0);
869: }
871: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
872: {
873: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
874: PetscInt i, r = dgmres->r;
876: PetscReal alpha = 1.0;
877: PetscInt max_neig = dgmres->max_neig;
878: PetscBLASInt br,bmax;
879: PetscReal lambda = dgmres->lambdaN;
882: PetscBLASIntCast(r,&br);
883: PetscBLASIntCast(max_neig,&bmax);
884: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
885: if (!r) {
886: VecCopy(x,y);
887: return(0);
888: }
889: /* Compute U'*x */
890: if (!X1) {
891: PetscMalloc1(bmax, &X1);
892: PetscMalloc1(bmax, &X2);
893: }
894: VecMDot(x, r, UU, X1);
896: /* Solve T*X1=X2 for X1*/
897: PetscArraycpy(X2, X1, br);
898: {
899: PetscBLASInt info;
900: PetscBLASInt nrhs = 1;
901: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
902: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
903: }
904: /* Iterative refinement -- is it really necessary ?? */
905: if (!WORK) {
906: PetscMalloc1(3*bmax, &WORK);
907: PetscMalloc1(bmax, &IWORK);
908: }
909: {
910: PetscBLASInt info;
911: PetscReal berr, ferr;
912: PetscBLASInt nrhs = 1;
913: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
914: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
915: }
917: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
919: /* Compute X2=U*X2 */
920: VecZeroEntries(y);
921: VecMAXPY(y, r, X2, UU);
922: VecAXPY(y, alpha, x);
924: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
925: return(0);
926: }
928: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
929: {
930: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
931: PetscInt j,r_old, r = dgmres->r;
932: PetscBLASInt i = 0;
933: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
934: PetscInt bmax = dgmres->max_neig;
935: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
936: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
937: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
938: PetscBLASInt N; /* size of AUAU */
939: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
940: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
941: PetscReal *work; /* working vector */
942: PetscBLASInt lwork; /* size of the working vector */
943: PetscInt *perm; /* Permutation vector to sort eigenvalues */
944: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
945: PetscInt ierr;
946: PetscBLASInt NbrEig = 0,nr,bm;
947: PetscBLASInt *select;
948: PetscBLASInt liwork, *iwork;
951: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
952: if (!AUU) {
953: PetscMalloc1(aug1*aug1, &AUU);
954: PetscMalloc1(aug1*aug1, &AUAU);
955: }
956: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
957: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
958: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
959: for (j=0; j < r; j++) {
960: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
961: }
962: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
963: for (j = 0; j < neig; j++) {
964: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
965: }
966: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
967: for (j = 0; j < r; j++) {
968: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
969: }
970: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
971: for (j = 0; j < neig; j++) {
972: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
973: }
975: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
976: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
977: for (j=0; j < r; j++) {
978: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
979: }
980: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
981: for (j = 0; j < neig; j++) {
982: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
983: }
984: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
985: for (j = 0; j < r; j++) {
986: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
987: }
988: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
989: for (j = 0; j < neig; j++) {
990: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
991: }
993: /* Computation of the eigenvectors */
994: PetscBLASIntCast(aug1,&ldA);
995: PetscBLASIntCast(aug,&N);
996: lwork = 8 * N + 20; /* sizeof the working space */
997: PetscMalloc1(N, &wr);
998: PetscMalloc1(N, &wi);
999: PetscMalloc1(N, &beta);
1000: PetscMalloc1(N, &modul);
1001: PetscMalloc1(N, &perm);
1002: PetscMalloc1(N*N, &Q);
1003: PetscMalloc1(N*N, &Z);
1004: PetscMalloc1(lwork, &work);
1005: {
1006: PetscBLASInt info=0;
1007: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1008: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1009: }
1010: for (i=0; i<N; i++) {
1011: if (beta[i] !=0.0) {
1012: wr[i] /=beta[i];
1013: wi[i] /=beta[i];
1014: }
1015: }
1016: /* sort the eigenvalues */
1017: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1018: for (i=0; i<N; i++) perm[i] = i;
1019: PetscSortRealWithPermutation(N, modul, perm);
1020: /* Save the norm of the largest eigenvalue */
1021: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1022: /* Allocate space to extract the first r schur vectors */
1023: if (!SR2) {
1024: PetscMalloc1(aug1*bmax, &SR2);
1025: }
1026: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1027: while (NbrEig < bmax) {
1028: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1029: else NbrEig += 2;
1030: }
1031: if (NbrEig > bmax) NbrEig = bmax - 1;
1032: r_old = r; /* previous size of r */
1033: dgmres->r = r = NbrEig;
1035: /* Select the eigenvalues to reorder */
1036: PetscCalloc1(N, &select);
1037: if (!dgmres->GreatestEig) {
1038: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1039: } else {
1040: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1041: }
1042: /* Reorder and extract the new <r> schur vectors */
1043: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1044: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1045: PetscFree(work);
1046: PetscMalloc1(lwork, &work);
1047: PetscMalloc1(liwork, &iwork);
1048: {
1049: PetscBLASInt info=0;
1050: PetscReal Dif[2];
1051: PetscBLASInt ijob = 2;
1052: PetscBLASInt wantQ = 1, wantZ = 1;
1053: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1054: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
1055: }
1056: PetscFree(select);
1058: for (j=0; j<r; j++) {
1059: PetscArraycpy(&SR2[j*aug1], &(Z[j*N]), N);
1060: }
1062: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1063: -- save it temporarily in MU */
1064: for (j = 0; j < r; j++) {
1065: VecZeroEntries(MU[j]);
1066: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1067: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1068: }
1069: /* Form T = U'*MU*U */
1070: for (j = 0; j < r; j++) {
1071: VecCopy(MU[j], UU[j]);
1072: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1073: }
1074: dgmres->matvecs += r;
1075: for (j = 0; j < r; j++) {
1076: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1077: }
1078: /* Factorize T */
1079: PetscArraycpy(TTF, TT, bmax*r);
1080: PetscBLASIntCast(r,&nr);
1081: PetscBLASIntCast(bmax,&bm);
1082: {
1083: PetscBLASInt info;
1084: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1085: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1086: }
1087: /* Free Memory */
1088: PetscFree(wr);
1089: PetscFree(wi);
1090: PetscFree(beta);
1091: PetscFree(modul);
1092: PetscFree(perm);
1093: PetscFree(Q);
1094: PetscFree(Z);
1095: PetscFree(work);
1096: PetscFree(iwork);
1097: return(0);
1098: }
1100: /*MC
1101: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1102: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1103: stagnation occurs.
1105: Options Database Keys:
1106: GMRES Options (inherited):
1107: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1108: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1109: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1110: vectors are allocated as needed)
1111: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1112: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1113: . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1114: stability of the classical Gram-Schmidt orthogonalization.
1115: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1117: DGMRES Options Database Keys:
1118: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1119: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1120: process
1121: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1122: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1123: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1124: end with ":append". No vectors will be viewed if the adaptive
1125: strategy chooses not to deflate, so -ksp_dgmres_force should also
1126: be given.
1127: The deflation vectors span a subspace that may be a good
1128: approximation of the subspace of smallest eigenvectors of the
1129: preconditioned operator, so this option can aid in understanding
1130: the performance of a preconditioner.
1132: Level: beginner
1134: Notes:
1135: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1137: References:
1138: + 1. - J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1139: - 2. - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1140: In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1142: Contributed by: Desire NUENTSA WAKAM,INRIA
1144: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1145: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1146: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1147: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1149: M*/
1151: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1152: {
1153: KSP_DGMRES *dgmres;
1157: PetscNewLog(ksp,&dgmres);
1158: ksp->data = (void*) dgmres;
1160: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1161: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1162: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
1164: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1165: ksp->ops->setup = KSPSetUp_DGMRES;
1166: ksp->ops->solve = KSPSolve_DGMRES;
1167: ksp->ops->destroy = KSPDestroy_DGMRES;
1168: ksp->ops->view = KSPView_DGMRES;
1169: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1170: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1171: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1173: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1174: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1175: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1176: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1177: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1178: /* -- New functions defined in DGMRES -- */
1179: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1180: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1181: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1182: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1183: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1184: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1185: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1186: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1188: PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1189: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1191: dgmres->haptol = 1.0e-30;
1192: dgmres->q_preallocate = 0;
1193: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1194: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1195: dgmres->nrs = NULL;
1196: dgmres->sol_temp = NULL;
1197: dgmres->max_k = GMRES_DEFAULT_MAXK;
1198: dgmres->Rsvd = NULL;
1199: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1200: dgmres->orthogwork = NULL;
1202: /* Default values for the deflation */
1203: dgmres->r = 0;
1204: dgmres->neig = DGMRES_DEFAULT_EIG;
1205: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1206: dgmres->lambdaN = 0.0;
1207: dgmres->smv = SMV;
1208: dgmres->matvecs = 0;
1209: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1210: dgmres->HasSchur = PETSC_FALSE;
1211: return(0);
1212: }