Actual source code: cycliccuda.cu

slepc-3.19.2 2023-09-05
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc singular value solver: "cyclic" (CUDA implementation)
 12: */
 13: #include <slepc/private/svdimpl.h>
 14: #include "../src/svd/impls/cyclic/cyclic.h"

 16: PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y)
 17: {
 18:   SVD_CYCLIC_SHELL  *ctx;
 19:   const PetscScalar *d_px;
 20:   PetscScalar       *d_py;
 21:   PetscInt          m;

 23:   PetscFunctionBegin;
 24:   PetscCall(MatShellGetContext(B,&ctx));
 25:   PetscCall(MatGetLocalSize(ctx->A,&m,NULL));
 26:   PetscCall(VecCUDAGetArrayRead(x,&d_px));
 27:   PetscCall(VecCUDAGetArrayWrite(y,&d_py));
 28:   PetscCall(VecCUDAPlaceArray(ctx->x1,d_px));
 29:   PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m));
 30:   PetscCall(VecCUDAPlaceArray(ctx->y1,d_py));
 31:   PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m));
 32:   PetscCall(MatMult(ctx->A,ctx->x2,ctx->y1));
 33:   PetscCall(MatMult(ctx->AT,ctx->x1,ctx->y2));
 34:   PetscCall(VecCUDAResetArray(ctx->x1));
 35:   PetscCall(VecCUDAResetArray(ctx->x2));
 36:   PetscCall(VecCUDAResetArray(ctx->y1));
 37:   PetscCall(VecCUDAResetArray(ctx->y2));
 38:   PetscCall(VecCUDARestoreArrayRead(x,&d_px));
 39:   PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
 40:   PetscFunctionReturn(PETSC_SUCCESS);
 41: }

 43: PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y)
 44: {
 45:   SVD_CYCLIC_SHELL  *ctx;
 46:   const PetscScalar *d_px;
 47:   PetscScalar       *d_py;
 48:   PetscInt          mn,m,n;

 50:   PetscFunctionBegin;
 51:   PetscCall(MatShellGetContext(B,&ctx));
 52:   PetscCall(MatGetLocalSize(ctx->A,NULL,&n));
 53:   PetscCall(VecGetLocalSize(y,&mn));
 54:   m = mn-n;
 55:   PetscCall(VecCUDAGetArrayRead(x,&d_px));
 56:   PetscCall(VecCUDAGetArrayWrite(y,&d_py));
 57:   PetscCall(VecCUDAPlaceArray(ctx->x1,d_px));
 58:   PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m));
 59:   PetscCall(VecCUDAPlaceArray(ctx->y1,d_py));
 60:   PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m));
 61:   PetscCall(VecCopy(ctx->x1,ctx->y1));
 62:   PetscCall(MatMult(ctx->A,ctx->x2,ctx->w));
 63:   PetscCall(MatMult(ctx->AT,ctx->w,ctx->y2));
 64:   PetscCall(VecCUDAResetArray(ctx->x1));
 65:   PetscCall(VecCUDAResetArray(ctx->x2));
 66:   PetscCall(VecCUDAResetArray(ctx->y1));
 67:   PetscCall(VecCUDAResetArray(ctx->y2));
 68:   PetscCall(VecCUDARestoreArrayRead(x,&d_px));
 69:   PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
 70:   PetscFunctionReturn(PETSC_SUCCESS);
 71: }