Actual source code: ex4.c

  1: /* Program usage:  mpiexec -n <procs> ex5 [-help] [all PETSc options] */

  3: static char help[] = "Nonlinear PDE in 2d.\n\
  4: We solve the Lane-Emden equation in a 2D rectangular\n\
  5: domain, using distributed arrays (DMDAs) to partition the parallel grid.\n\n";

  7: /*T
  8:    Concepts: SNES^parallel Lane-Emden example
  9:    Concepts: DMDA^using distributed arrays;
 10:    Processors: n
 11: T*/

 13: /* ------------------------------------------------------------------------

 15:     The Lane-Emden equation is given by the partial differential equation
 16:   
 17:             -alpha*Laplacian u - lambda*u^3 = 0,  0 < x,y < 1,
 18:   
 19:     with boundary conditions
 20:    
 21:              u = 0  for  x = 0, x = 1, y = 0, y = 1.
 22:   
 23:     A bilinear finite element approximation is used to discretize the boundary
 24:     value problem to obtain a nonlinear system of equations.

 26:   ------------------------------------------------------------------------- */

 28: /* 
 29:    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
 30:    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
 31:    file automatically includes:
 32:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 33:      petscmat.h - matrices
 34:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 35:      petscviewer.h - viewers               petscpc.h  - preconditioners
 36:      petscksp.h   - linear solvers
 37: */
 38: #include <petscsys.h>
 39: #include <petscbag.h>
 40: #include <petscdmda.h>
 41: #include <petscdmmg.h>
 42: #include <petscsnes.h>

 44: /* 
 45:    User-defined application context - contains data needed by the 
 46:    application-provided call-back routines, FormJacobianLocal() and
 47:    FormFunctionLocal().
 48: */
 49: typedef struct {
 50:    PetscReal alpha;          /* parameter controlling linearity */
 51:    PetscReal lambda;         /* parameter controlling nonlinearity */
 52: } AppCtx;

 54: static PetscScalar Kref[16] = { 0.666667, -0.166667, -0.333333, -0.166667,
 55:                                -0.166667,  0.666667, -0.166667, -0.333333,
 56:                                -0.333333, -0.166667,  0.666667, -0.166667,
 57:                                -0.166667, -0.333333, -0.166667,  0.666667};

 59: /* These are */
 60: static PetscScalar quadPoints[8] = {0.211325, 0.211325,
 61:                                     0.788675, 0.211325,
 62:                                     0.788675, 0.788675,
 63:                                     0.211325, 0.788675};
 64: static PetscScalar quadWeights[4] = {0.25, 0.25, 0.25, 0.25};

 66: /* 
 67:    User-defined routines
 68: */

 76: int main(int argc,char **argv)
 77: {
 78:   DMMG                  *dmmg;                 /* hierarchy manager */
 79:   DM                     da;
 80:   SNES                   snes;                 /* nonlinear solver */
 81:   AppCtx                *user;                 /* user-defined work context */
 82:   PetscBag               bag;
 83:   PetscInt               its;                  /* iterations for convergence */
 84:   SNESConvergedReason    reason;
 85:   PetscErrorCode         ierr;
 86:   PetscReal              lambda_max = 6.81, lambda_min = 0.0;

 88:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 89:      Initialize program
 90:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 91:   PetscInitialize(&argc,&argv,(char *)0,help);

 93:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 94:      Initialize problem parameters
 95:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 96:   PetscBagCreate(PETSC_COMM_WORLD, sizeof(AppCtx), &bag);
 97:   PetscBagGetData(bag, (void **) &user);
 98:   PetscBagSetName(bag, "params", "Parameters for SNES example 4");
 99:   PetscBagRegisterReal(bag, &user->alpha, 1.0, "alpha", "Linear coefficient");
100:   PetscBagRegisterReal(bag, &user->lambda, 6.0, "lambda", "Nonlinear coefficient");
101:   PetscBagSetFromOptions(bag);
102:   PetscOptionsGetReal(PETSC_NULL,"-alpha",&user->alpha,PETSC_NULL);
103:   PetscOptionsGetReal(PETSC_NULL,"-lambda",&user->lambda,PETSC_NULL);
104:   if (user->lambda > lambda_max || user->lambda < lambda_min) {
105:     SETERRQ3(PETSC_COMM_SELF,1,"Lambda %G is out of range [%G, %G]", user->lambda, lambda_min, lambda_max);
106:   }

108:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
109:      Create multilevel DM data structure (DMMG) to manage hierarchical solvers
110:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
111:   DMMGCreate(PETSC_COMM_WORLD,1,user,&dmmg);

113:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
114:      Create distributed array (DMDA) to manage parallel grid and vectors
115:   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
116:   DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_NONE, DMDA_BOUNDARY_NONE,DMDA_STENCIL_BOX,-3,-3,PETSC_DECIDE,PETSC_DECIDE,
117:                     1,1,PETSC_NULL,PETSC_NULL,&da);
118:   DMDASetFieldName(da, 0, "ooblek");
119:   DMMGSetDM(dmmg, (DM) da);
120:   DMDestroy(&da);

122:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
123:      Set the discretization functions
124:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
125:   DMMGSetSNESLocal(dmmg, FormFunctionLocal, FormJacobianLocal, 0, 0);
126:   DMMGSetFromOptions(dmmg);

128:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
129:      Evaluate initial guess
130:      Note: The user should initialize the vector, x, with the initial guess
131:      for the nonlinear solver prior to calling SNESSolve().  In particular,
132:      to employ an initial guess of zero, the user should explicitly set
133:      this vector to zero by calling VecSet().
134:   */
135:   DMMGSetInitialGuess(dmmg, FormInitialGuess);

137:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
138:      Solve nonlinear system
139:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
140:   DMMGSolve(dmmg);
141:   snes = DMMGGetSNES(dmmg);
142:   SNESGetIterationNumber(snes,&its);
143:   SNESGetConvergedReason(snes, &reason);

145:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
146:   PetscPrintf(PETSC_COMM_WORLD,"Number of Newton iterations = %D, %s\n",its,SNESConvergedReasons[reason]);
147:   PrintVector(dmmg[0], DMMGGetx(dmmg));

149:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
150:      Free work space.  All PETSc objects should be destroyed when they
151:      are no longer needed.
152:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
153:   DMMGDestroy(dmmg);
154:   PetscBagDestroy(&bag);
155:   PetscFinalize();
156:   return(0);
157: }

161: PetscErrorCode PrintVector(DMMG dmmg, Vec U)
162: {
163:   DM             da =  dmmg->dm;
164:   PetscScalar  **u;
165:   PetscInt       i,j,xs,ys,xm,ym;

169:   DMDAVecGetArray(da,U,&u);
170:   DMDAGetCorners(da,&xs,&ys,PETSC_NULL,&xm,&ym,PETSC_NULL);
171:   for(j = ys+ym-1; j >= ys; j--) {
172:     for(i = xs; i < xs+xm; i++) {
173:       printf("u[%d][%d] = %G ", j, i, u[j][i]);
174:     }
175:     printf("\n");
176:   }
177:   DMDAVecRestoreArray(da,U,&u);
178:   return(0);
179: }

183: PetscErrorCode ExactSolution(PetscReal x, PetscReal y, PetscScalar *u)
184: {
186:   *u = x*x;
187:   return(0);
188: }

192: /* 
193:    FormInitialGuess - Forms initial approximation.

195:    Input Parameters:
196:    dmmg - The DMMG context
197:    X - vector

199:    Output Parameter:
200:    X - vector
201: */
202: PetscErrorCode FormInitialGuess(DMMG dmmg,Vec X)
203: {
204:   AppCtx        *user = (AppCtx *) dmmg->user;
205:   DM             da =  dmmg->dm;
206:   PetscInt       i,j,Mx,My,xs,ys,xm,ym;
208:   PetscReal      lambda,temp1,temp,hx,hy;
209:   PetscScalar    **x;

212:   DMDAGetInfo(da,PETSC_IGNORE,&Mx,&My,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,
213:                    PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);

215:   lambda = user->lambda;
216:   hx     = 1.0/(PetscReal)(Mx-1);
217:   hy     = 1.0/(PetscReal)(My-1);
218:   if (lambda == 0.0) {
219:     temp1  = 1.0;
220:   } else {
221:     temp1  = lambda/(lambda + 1.0);
222:   }

224:   /*
225:      Get a pointer to vector data.
226:        - For default PETSc vectors, VecGetArray() returns a pointer to
227:          the data array.  Otherwise, the routine is implementation dependent.
228:        - You MUST call VecRestoreArray() when you no longer need access to
229:          the array.
230:   */
231:   DMDAVecGetArray(da,X,&x);

233:   /*
234:      Get local grid boundaries (for 2-dimensional DMDA):
235:        xs, ys   - starting grid indices (no ghost points)
236:        xm, ym   - widths of local grid (no ghost points)

238:   */
239:   DMDAGetCorners(da,&xs,&ys,PETSC_NULL,&xm,&ym,PETSC_NULL);

241:   /*
242:      Compute initial guess over the locally owned part of the grid
243:   */
244:   for (j=ys; j<ys+ym; j++) {
245:     temp = (PetscReal)(PetscMin(j,My-j-1))*hy;
246:     for (i=xs; i<xs+xm; i++) {

248:       if (i == 0 || j == 0 || i == Mx-1 || j == My-1) {
249:         /* boundary conditions are all zero Dirichlet */
250:         x[j][i] = 0.0;
251:       } else {
252:         x[j][i] = temp1*sqrt(PetscMin((PetscReal)(PetscMin(i,Mx-i-1))*hx,temp));
253:       }
254:     }
255:   }

257:   for(j = ys+ym-1; j >= ys; j--) {
258:     for(i = xs; i < xs+xm; i++) {
259:       printf("u[%d][%d] = %g ", j, i, x[j][i]);
260:     }
261:     printf("\n");
262:   }
263:   /*
264:      Restore vector
265:   */
266:   DMDAVecRestoreArray(da,X,&x);

268:   return(0);
269: }

273: PetscErrorCode constantResidual(PetscReal lambda, int i, int j, PetscReal hx, PetscReal hy, PetscScalar r[])
274: {
275:   PetscScalar rLocal[4] = {0.0, 0.0, 0.0};
276:   PetscScalar phi[4] = {0.0, 0.0, 0.0, 0.0};
277:   PetscReal   xI = i*hx, yI = j*hy, hxhy = hx*hy, x, y;
278:   PetscScalar res;
279:   PetscInt    q, k;

282:   for(q = 0; q < 4; q++) {
283:     phi[0] = (1.0 - quadPoints[q*2])*(1.0 - quadPoints[q*2+1]);
284:     phi[1] =  quadPoints[q*2]       *(1.0 - quadPoints[q*2+1]);
285:     phi[2] =  quadPoints[q*2]       * quadPoints[q*2+1];
286:     phi[3] = (1.0 - quadPoints[q*2])* quadPoints[q*2+1];
287:     x      = xI + quadPoints[q*2]*hx;
288:     y      = yI + quadPoints[q*2+1]*hy;
289:     res    = quadWeights[q]*(2.0);
290:     for(k = 0; k < 4; k++) {
291:       rLocal[k] += phi[k]*res;
292:     }
293:   }
294:   for(k = 0; k < 4; k++) {
295:     printf("  constLocal[%d] = %g\n", k, rLocal[k]);
296:     r[k] += lambda*hxhy*rLocal[k];
297:   }
298:   return(0);
299: }

303: PetscErrorCode nonlinearResidual(PetscReal lambda, PetscScalar u[], PetscScalar r[]) {
305:   r[0] += lambda*(48.0*u[0]*u[0]*u[0] + 12.0*u[1]*u[1]*u[1] + 9.0*u[0]*u[0]*(4.0*u[1] + u[2] + 4.0*u[3]) + u[1]*u[1]*(9.0*u[2] + 6.0*u[3]) + u[1]*(6.0*u[2]*u[2] + 8.0*u[2]*u[3] + 6.0*u[3]*u[3])
306:            + 3.0*(u[2]*u[2]*u[2] + 2.0*u[2]*u[2]*u[3] + 3.0*u[2]*u[3]*u[3] + 4.0*u[3]*u[3]*u[3])
307:            + 2.0*u[0]*(12.0*u[1]*u[1] + u[1]*(6.0*u[2] + 9.0*u[3]) + 2.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3])))/1200.0;
308:   r[1] += lambda*(12.0*u[0]*u[0]*u[0] + 48.0*u[1]*u[1]*u[1] + 9.0*u[1]*u[1]*(4.0*u[2] + u[3]) + 3.0*u[0]*u[0]*(8.0*u[1] + 2.0*u[2] + 3.0*u[3])
309:            + 4.0*u[1]*(6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3]) + 3.0*(4.0*u[2]*u[2]*u[2] + 3.0*u[2]*u[2]*u[3] + 2.0*u[2]*u[3]*u[3] + u[3]*u[3]*u[3])
310:            + 2.0*u[0]*((18.0*u[1]*u[1] + 3.0*u[2]*u[2] + 4.0*u[2]*u[3] + 3.0*u[3]*u[3]) + u[1]*(9.0*u[2] + 6.0*u[3])))/1200.0;
311:   r[2] += lambda*(3.0*u[0]*u[0]*u[0] + u[0]*u[0]*(6.0*u[1] + 4.0*u[2] + 6.0*u[3]) + u[0]*(9.0*u[1]*u[1] + 9.0*u[2]*u[2] + 12.0*u[2]*u[3] + 9.0*u[3]*u[3] + 4.0*u[1]*(3.0*u[2] + 2.0*u[3]))
312:            + 6.0*(2.0*u[1]*u[1]*u[1] + u[1]*u[1]*(4.0*u[2] + u[3]) + u[1]*(6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3]) + 2.0*(4.0*u[2]*u[2]*u[2] + 3.0*u[2]*u[2]*u[3] + 2.0*u[2]*u[3]*u[3] + u[3]*u[3]*u[3])))/1200.0;
313:   r[3] += lambda*(12.0*u[0]*u[0]*u[0] + 3.0*u[1]*u[1]*u[1] + u[1]*u[1]*(6.0*u[2] + 4.0*u[3]) + 3.0*u[0]*u[0]*(3.0*u[1] + 2.0*u[2] + 8.0*u[3])
314:            + 3.0*u[1]*(3.0*u[2]*u[2] + 4.0*u[2]*u[3] + 3.0*u[3]*u[3]) + 12.0*(u[2]*u[2]*u[2] + 2.0*u[2]*u[2]*u[3] + 3.0*u[2]*u[3]*u[3] + 4.0*u[3]*u[3]*u[3])
315:            + 2.0*u[0]*(3.0*u[1]*u[1] + u[1]*(4.0*u[2] + 6.0*u[3]) + 3.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3])))/1200.0;
316:   return(0);
317: }

321: PetscErrorCode nonlinearResidualBratu(PetscReal lambda, PetscScalar u[], PetscScalar r[]) {
322:   PetscScalar rLocal[4] = {0.0, 0.0, 0.0, 0.0};
323:   PetscScalar phi[4] = {0.0, 0.0, 0.0, 0.0};
324:   PetscScalar res;
325:   PetscInt q;

328:   for(q = 0; q < 4; q++) {
329:     phi[0] = (1.0 - quadPoints[q*2])*(1.0 - quadPoints[q*2+1]);
330:     phi[1] =  quadPoints[q*2]       *(1.0 - quadPoints[q*2+1]);
331:     phi[2] =  quadPoints[q*2]       * quadPoints[q*2+1];
332:     phi[3] = (1.0 - quadPoints[q*2])* quadPoints[q*2+1];
333:     res    = quadWeights[q]*PetscExpScalar(u[0]*phi[0]+ u[1]*phi[1] + u[2]*phi[2]+ u[3]*phi[3]);
334:     rLocal[0] += phi[0]*res;
335:     rLocal[1] += phi[1]*res;
336:     rLocal[2] += phi[2]*res;
337:     rLocal[3] += phi[3]*res;
338:   }
339:   r[0] += lambda*rLocal[0];
340:   r[1] += lambda*rLocal[1];
341:   r[2] += lambda*rLocal[2];
342:   r[3] += lambda*rLocal[3];
343:   return(0);
344: }

348: PetscErrorCode nonlinearJacobian(PetscReal lambda, PetscScalar u[], PetscScalar J[]) {
350:   J[0]  = lambda*(72.0*u[0]*u[0] + 12.0*u[1]*u[1] + 9.0*u[0]*(4.0*u[1] + u[2] + 4.0*u[3]) + u[1]*(6.0*u[2] + 9.0*u[3]) + 2.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3]))/600.0;
351:   J[1]  = lambda*(18.0*u[0]*u[0] + 18.0*u[1]*u[1] + 3.0*u[2]*u[2] + 4.0*u[2]*u[3] + 3.0*u[3]*u[3] + 3.0*u[0]*(8.0*u[1] + 2.0*u[2] + 3.0*u[3]) + u[1]*(9.0*u[2] + 6.0*u[3]))/600.0;
352:   J[2]  = lambda*( 9.0*u[0]*u[0] +  9.0*u[1]*u[1] + 9.0*u[2]*u[2] + 12.0*u[2]*u[3] + 9.0*u[3]*u[3] + 4.0*u[1]*(3.0*u[2] + 2.0*u[3]) + 4.0*u[0]*(3.0*u[1] + 2.0*u[2] + 3.0*u[3]))/1200.0;
353:   J[3]  = lambda*(18.0*u[0]*u[0] +  3.0*u[1]*u[1] + u[1]*(4.0*u[2] + 6.0*u[3]) + 3.0*u[0]*(3.0*u[1] + 2.0*u[2] + 8.0*u[3]) + 3.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3]))/600.0;

355:   J[4]  = lambda*(18.0*u[0]*u[0] + 18.0*u[1]*u[1] + 3.0*u[2]*u[2] + 4.0*u[2]*u[3] + 3.0*u[3]*u[3] + 3.0*u[0]*(8.0*u[1] + 2.0*u[2] + 3.0*u[3]) + u[1]*(9.0*u[2] + 6.0*u[3]))/600.0;
356:   J[5]  = lambda*(12.0*u[0]*u[0] + 72.0*u[1]*u[1] + 9.0*u[1]*(4.0*u[2] + u[3]) + u[0]*(36.0*u[1] + 9.0*u[2] + 6.0*u[3]) + 2.0*(6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3]))/600.0;
357:   J[6]  = lambda*( 3.0*u[0]*u[0] + u[0]*(9.0*u[1] + 6.0*u[2] + 4.0*u[3]) + 3.0*(6.0*u[1]*u[1] + 6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3] + 2.0*u[1]*(4.0*u[2] + u[3])))/600.0;
358:   J[7]  = lambda*( 9.0*u[0]*u[0] +  9.0*u[1]*u[1] + 9.0*u[2]*u[2] + 12.0*u[2]*u[3] + 9.0*u[3]*u[3] + 4.0*u[1]*(3.0*u[2] + 2.0*u[3]) + 4.0*u[0]*(3.0*u[1] + 2.0*u[2] + 3.0*u[3]))/1200.0;

360:   J[8]  = lambda*( 9.0*u[0]*u[0] +  9.0*u[1]*u[1] + 9.0*u[2]*u[2] + 12.0*u[2]*u[3] + 9.0*u[3]*u[3] + 4.0*u[1]*(3.0*u[2] + 2.0*u[3]) + 4.0*u[0]*(3.0*u[1] + 2.0*u[2] + 3.0*u[3]))/1200.0;
361:   J[9]  = lambda*( 3.0*u[0]*u[0] + u[0]*(9.0*u[1] + 6.0*u[2] + 4.0*u[3]) + 3.0*(6.0*u[1]*u[1] + 6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3] + 2.0*u[1]*(4.0*u[2] + u[3])))/600.0;
362:   J[10] = lambda*( 2.0*u[0]*u[0] + u[0]*(6.0*u[1] + 9.0*u[2] + 6.0*u[3]) + 3.0*(4.0*u[1]*u[1] + 3.0*u[1]*(4.0*u[2] + u[3]) + 4.0*(6.0*u[2]*u[2] + 3.0*u[2]*u[3] + u[3]*u[3])))/600.0;
363:   J[11] = lambda*( 3.0*u[0]*u[0] + u[0]*(4.0*u[1] + 6.0*u[2] + 9.0*u[3]) + 3.0*(u[1]*u[1] + 6.0*u[2]*u[2] + 8.0*u[2]*u[3] + 6.0*u[3]*u[3] + u[1]*(3.0*u[2] + 2.0*u[3])))/600.0;

365:   J[12] = lambda*(18.0*u[0]*u[0] +  3.0*u[1]*u[1] + u[1]*(4.0*u[2] + 6.0*u[3]) + 3.0*u[0]*(3.0*u[1] + 2.0*u[2] + 8.0*u[3]) + 3.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3]))/600.0;
366:   J[13] = lambda*( 9.0*u[0]*u[0] +  9.0*u[1]*u[1] + 9.0*u[2]*u[2] + 12.0*u[2]*u[3] + 9.0*u[3]*u[3] + 4.0*u[1]*(3.0*u[2] + 2.0*u[3]) + 4.0*u[0]*(3.0*u[1] + 2.0*u[2] + 3.0*u[3]))/1200.0;
367:   J[14] = lambda*( 3.0*u[0]*u[0] + u[0]*(4.0*u[1] + 6.0*u[2] + 9.0*u[3]) + 3.0*(u[1]*u[1] + 6.0*u[2]*u[2] + 8.0*u[2]*u[3] + 6.0*u[3]*u[3] + u[1]*(3.0*u[2] + 2.0*u[3])))/600.0;
368:   J[15] = lambda*(12.0*u[0]*u[0] +  2.0*u[1]*u[1] + u[1]*(6.0*u[2] + 9.0*u[3]) + 12.0*(u[2]*u[2] + 3.0*u[2]*u[3] + 6.0*u[3]*u[3]) + u[0]*(6.0*u[1] + 9.0*(u[2] + 4.0*u[3])))/600.0;
369:   return(0);
370: }

374: /* 
375:    FormFunctionLocal - Evaluates nonlinear function, F(x).

377:        Process adiC(36): FormFunctionLocal

379:  */
380: PetscErrorCode FormFunctionLocal(DMDALocalInfo *info,PetscScalar **x,PetscScalar **f,AppCtx *user)
381: {
382:   PetscScalar    uLocal[4];
383:   PetscScalar    rLocal[4];
384:   PetscScalar    uExact;
385:   PetscReal      alpha,lambda,hx,hy,hxhy,sc;
386:   PetscInt       i,j,k,l;


391:   alpha  = user->alpha;
392:   lambda = user->lambda;
393:   hx     = 1.0/(PetscReal)(info->mx-1);
394:   hy     = 1.0/(PetscReal)(info->my-1);
395:   sc     = hx*hy*lambda;
396:   hxhy   = hx*hy;

398:   /* Zero the vector */
399:   PetscMemzero((void *) &(f[info->xs][info->ys]), info->xm*info->ym*sizeof(PetscScalar));
400:   /* Compute function over the locally owned part of the grid. For each
401:      vertex (i,j), we consider the element below:

403:        3         2
404:      i,j+1 --- i+1,j+1
405:        |         |
406:        |         |
407:       i,j  --- i+1,j
408:        0         1

410:      and therefore we do not loop over the last vertex in each dimension.
411:   */
412:   for(j = info->ys; j < info->ys+info->ym-1; j++) {
413:     for(i = info->xs; i < info->xs+info->xm-1; i++) {
414:       uLocal[0] = x[j][i];
415:       uLocal[1] = x[j][i+1];
416:       uLocal[2] = x[j+1][i+1];
417:       uLocal[3] = x[j+1][i];
418:       printf("Solution ElementVector for (%d, %d)\n", i, j);
419:       for(k = 0; k < 4; k++) {
420:         printf("  uLocal[%d] = %g\n", k, uLocal[k]);
421:       }
422:       for(k = 0; k < 4; k++) {
423:         rLocal[k] = 0.0;
424:         for(l = 0; l < 4; l++) {
425:           rLocal[k] += Kref[k*4 + l]*uLocal[l];
426:         }
427:         rLocal[k] *= hxhy*alpha;
428:       }
429:       printf("Laplacian ElementVector for (%d, %d)\n", i, j);
430:       for(k = 0; k < 4; k++) {
431:         printf("  rLocal[%d] = %g\n", k, rLocal[k]);
432:       }
433:       constantResidual(1.0, i, j, hx, hy, rLocal);
434:       printf("Laplacian+Constant ElementVector for (%d, %d)\n", i, j);
435:       for(k = 0; k < 4; k++) {
436:         printf("  rLocal[%d] = %g\n", k, rLocal[k]);
437:       }
438:       nonlinearResidual(-1.0*sc, uLocal, rLocal);
439:       f[j][i]     += rLocal[0];
440:       f[j][i+1]   += rLocal[1];
441:       f[j+1][i+1] += rLocal[2];
442:       f[j+1][i]   += rLocal[3];
443:       if (i == 0 || j == 0) {
444:         ExactSolution(i*hx, j*hy, &uExact);
445:         f[j][i] = x[j][i] - uExact;
446:       }
447:       if ((i == info->mx-2) || (j == 0)) {
448:         ExactSolution((i+1)*hx, j*hy, &uExact);
449:         f[j][i+1] = x[j][i+1] - uExact;
450:       }
451:       if ((i == info->mx-2) || (j == info->my-2)) {
452:         ExactSolution((i+1)*hx, (j+1)*hy, &uExact);
453:         f[j+1][i+1] = x[j+1][i+1] - uExact;
454:       }
455:       if ((i == 0) || (j == info->my-2)) {
456:         ExactSolution(i*hx, (j+1)*hy, &uExact);
457:         f[j+1][i] = x[j+1][i] - uExact;
458:       }
459:     }
460:   }

462:   for(j = info->ys+info->ym-1; j >= info->ys; j--) {
463:     for(i = info->xs; i < info->xs+info->xm; i++) {
464:       printf("f[%d][%d] = %g ", j, i, f[j][i]);
465:     }
466:     printf("\n");
467:   }
468:   PetscLogFlops(68.0*(info->ym-1)*(info->xm-1));
469:   return(0);
470: }

474: /*
475:    FormJacobianLocal - Evaluates Jacobian matrix.
476: */
477: PetscErrorCode FormJacobianLocal(DMDALocalInfo *info,PetscScalar **x,Mat jac,AppCtx *user)
478: {
479:   PetscScalar    JLocal[16], ELocal[16], uLocal[4];
480:   MatStencil     rows[4], cols[4], ident;
481:   PetscInt       localRows[4];
482:   PetscScalar    alpha,lambda,hx,hy,hxhy,sc;
483:   PetscInt       i,j,k,l,numRows;

487:   alpha  = user->alpha;
488:   lambda = user->lambda;
489:   hx     = 1.0/(PetscReal)(info->mx-1);
490:   hy     = 1.0/(PetscReal)(info->my-1);
491:   sc     = hx*hy*lambda;
492:   hxhy   = hx*hy;

494:   MatZeroEntries(jac);
495:   /* 
496:      Compute entries for the locally owned part of the Jacobian.
497:       - Currently, all PETSc parallel matrix formats are partitioned by
498:         contiguous chunks of rows across the processors. 
499:       - Each processor needs to insert only elements that it owns
500:         locally (but any non-local elements will be sent to the
501:         appropriate processor during matrix assembly). 
502:       - Here, we set all entries for a particular row at once.
503:       - We can set matrix entries either using either
504:         MatSetValuesLocal() or MatSetValues(), as discussed above.
505:   */
506:   for (j=info->ys; j<info->ys+info->ym-1; j++) {
507:     for (i=info->xs; i<info->xs+info->xm-1; i++) {
508:       numRows = 0;
509:       uLocal[0] = x[j][i];
510:       uLocal[1] = x[j][i+1];
511:       uLocal[2] = x[j+1][i+1];
512:       uLocal[3] = x[j+1][i];
513:       /* i,j */
514:       if (i == 0 || j == 0) {
515:         ident.i = i; ident.j = j;
516:         JLocal[0] = 1.0;
517:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
518:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
519:         MatSetValuesStencil(jac,1,&ident,1,&ident,JLocal,INSERT_VALUES);
520:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
521:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
522:       } else {
523:         localRows[numRows] = 0;
524:         rows[numRows].i = i; rows[numRows].j = j;
525:         numRows++;
526:       }
527:       cols[0].i = i; cols[0].j = j;
528:       /* i+1,j */
529:       if ((i == info->mx-2) || (j == 0)) {
530:         ident.i = i+1; ident.j = j;
531:         JLocal[0] = 1.0;
532:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
533:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
534:         MatSetValuesStencil(jac,1,&ident,1,&ident,JLocal,INSERT_VALUES);
535:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
536:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
537:       } else {
538:         localRows[numRows] = 1;
539:         rows[numRows].i = i+1; rows[numRows].j = j;
540:         numRows++;
541:       }
542:       cols[1].i = i+1; cols[1].j = j;
543:       /* i+1,j+1 */
544:       if ((i == info->mx-2) || (j == info->my-2)) {
545:         ident.i = i+1; ident.j = j+1;
546:         JLocal[0] = 1.0;
547:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
548:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
549:         MatSetValuesStencil(jac,1,&ident,1,&ident,JLocal,INSERT_VALUES);
550:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
551:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
552:       } else {
553:         localRows[numRows] = 2;
554:         rows[numRows].i = i+1; rows[numRows].j = j+1;
555:         numRows++;
556:       }
557:       cols[2].i = i+1; cols[2].j = j+1;
558:       /* i,j+1 */
559:       if ((i == 0) || (j == info->my-2)) {
560:         ident.i = i; ident.j = j+1;
561:         JLocal[0] = 1.0;
562:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
563:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
564:         MatSetValuesStencil(jac,1,&ident,1,&ident,JLocal,INSERT_VALUES);
565:         MatAssemblyBegin(jac,MAT_FLUSH_ASSEMBLY);
566:         MatAssemblyEnd(jac,MAT_FLUSH_ASSEMBLY);
567:       } else {
568:         localRows[numRows] = 3;
569:         rows[numRows].i = i; rows[numRows].j = j+1;
570:         numRows++;
571:       }
572:       cols[3].i = i; cols[3].j = j+1;
573:       nonlinearJacobian(-1.0*sc, uLocal, ELocal);
574:       for(k = 0; k < numRows; k++) {
575:         for(l = 0; l < 4; l++) {
576:           JLocal[k*4 + l] = (hxhy*alpha*Kref[localRows[k]*4 + l] + ELocal[localRows[k]*4 + l]);
577:         }
578:       }
579:       MatSetValuesStencil(jac,numRows,rows,4,cols,JLocal,ADD_VALUES);
580:     }
581:   }

583:   /* 
584:      Assemble matrix, using the 2-step process:
585:        MatAssemblyBegin(), MatAssemblyEnd().
586:   */
587:   MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);
588:   MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);
589:   /*
590:      Tell the matrix we will never add a new nonzero location to the
591:      matrix. If we do, it will generate an error.
592:   */
593:   MatSetOption(jac,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
594:   return(0);
595: }