Actual source code: fninvsqrt.c
slepc-3.15.1 2021-05-28
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Inverse square root function x^(-1/2)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
18: {
20: if (x==0.0) SETERRQ(PETSC_COMM_SELF,1,"Function not defined in the requested value");
21: #if !defined(PETSC_USE_COMPLEX)
22: if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Function not defined in the requested value");
23: #endif
24: *y = 1.0/PetscSqrtScalar(x);
25: return(0);
26: }
28: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
29: {
31: if (x==0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
32: #if !defined(PETSC_USE_COMPLEX)
33: if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
34: #endif
35: *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
36: return(0);
37: }
39: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
40: {
42: PetscBLASInt n=0,ld,*ipiv,info;
43: PetscScalar *Ba,*Wa;
44: PetscInt m;
45: Mat W;
48: FN_AllocateWorkMat(fn,A,&W);
49: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
50: MatDenseGetArray(B,&Ba);
51: MatDenseGetArray(W,&Wa);
52: /* compute B = sqrtm(A) */
53: MatGetSize(A,&m,NULL);
54: PetscBLASIntCast(m,&n);
55: ld = n;
56: FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
57: /* compute B = A\B */
58: PetscMalloc1(ld,&ipiv);
59: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
60: SlepcCheckLapackInfo("gesv",info);
61: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
62: PetscFree(ipiv);
63: MatDenseRestoreArray(W,&Wa);
64: MatDenseRestoreArray(B,&Ba);
65: FN_FreeWorkMat(fn,&W);
66: return(0);
67: }
69: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
70: {
72: PetscBLASInt n=0,ld,*ipiv,info,one=1;
73: PetscScalar *Ba,*Wa;
74: PetscInt m;
75: Mat B,W;
78: FN_AllocateWorkMat(fn,A,&B);
79: FN_AllocateWorkMat(fn,A,&W);
80: MatDenseGetArray(B,&Ba);
81: MatDenseGetArray(W,&Wa);
82: /* compute B_1 = sqrtm(A)*e_1 */
83: MatGetSize(A,&m,NULL);
84: PetscBLASIntCast(m,&n);
85: ld = n;
86: FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
87: /* compute B_1 = A\B_1 */
88: PetscMalloc1(ld,&ipiv);
89: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
90: SlepcCheckLapackInfo("gesv",info);
91: PetscFree(ipiv);
92: MatDenseRestoreArray(W,&Wa);
93: MatDenseRestoreArray(B,&Ba);
94: MatGetColumnVector(B,v,0);
95: FN_FreeWorkMat(fn,&W);
96: FN_FreeWorkMat(fn,&B);
97: return(0);
98: }
100: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
101: {
103: PetscBLASInt n=0;
104: PetscScalar *T;
105: PetscInt m;
108: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
109: MatDenseGetArray(B,&T);
110: MatGetSize(A,&m,NULL);
111: PetscBLASIntCast(m,&n);
112: FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
113: MatDenseRestoreArray(B,&T);
114: return(0);
115: }
117: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
118: {
120: PetscBLASInt n=0;
121: PetscScalar *T;
122: PetscInt m;
125: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
126: MatDenseGetArray(B,&T);
127: MatGetSize(A,&m,NULL);
128: PetscBLASIntCast(m,&n);
129: FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
130: MatDenseRestoreArray(B,&T);
131: return(0);
132: }
134: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
135: {
137: PetscBLASInt n=0,ld,*ipiv,info;
138: PetscScalar *Ba,*Wa;
139: PetscInt m;
140: Mat W;
143: FN_AllocateWorkMat(fn,A,&W);
144: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
145: MatDenseGetArray(B,&Ba);
146: MatDenseGetArray(W,&Wa);
147: /* compute B = sqrtm(A) */
148: MatGetSize(A,&m,NULL);
149: PetscBLASIntCast(m,&n);
150: ld = n;
151: FNSqrtmSadeghi(fn,n,Ba,n);
152: /* compute B = A\B */
153: PetscMalloc1(ld,&ipiv);
154: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
155: SlepcCheckLapackInfo("gesv",info);
156: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
157: PetscFree(ipiv);
158: MatDenseRestoreArray(W,&Wa);
159: MatDenseRestoreArray(B,&Ba);
160: FN_FreeWorkMat(fn,&W);
161: return(0);
162: }
164: #if defined(PETSC_HAVE_CUDA)
165: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
166: {
168: PetscBLASInt n=0;
169: PetscScalar *Ba;
170: PetscInt m;
173: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
174: MatDenseGetArray(B,&Ba);
175: MatGetSize(A,&m,NULL);
176: PetscBLASIntCast(m,&n);
177: FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
178: MatDenseRestoreArray(B,&Ba);
179: return(0);
180: }
182: #if defined(PETSC_HAVE_MAGMA)
183: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
184: {
186: PetscBLASInt n=0;
187: PetscScalar *T;
188: PetscInt m;
191: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
192: MatDenseGetArray(B,&T);
193: MatGetSize(A,&m,NULL);
194: PetscBLASIntCast(m,&n);
195: FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
196: MatDenseRestoreArray(B,&T);
197: return(0);
198: }
200: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
201: {
203: PetscBLASInt n=0,ld,*ipiv,info;
204: PetscScalar *Ba,*Wa;
205: PetscInt m;
206: Mat W;
209: FN_AllocateWorkMat(fn,A,&W);
210: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
211: MatDenseGetArray(B,&Ba);
212: MatDenseGetArray(W,&Wa);
213: /* compute B = sqrtm(A) */
214: MatGetSize(A,&m,NULL);
215: PetscBLASIntCast(m,&n);
216: ld = n;
217: FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
218: /* compute B = A\B */
219: PetscMalloc1(ld,&ipiv);
220: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
221: SlepcCheckLapackInfo("gesv",info);
222: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
223: PetscFree(ipiv);
224: MatDenseRestoreArray(W,&Wa);
225: MatDenseRestoreArray(B,&Ba);
226: FN_FreeWorkMat(fn,&W);
227: return(0);
228: }
229: #endif /* PETSC_HAVE_MAGMA */
230: #endif /* PETSC_HAVE_CUDA */
232: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
233: {
235: PetscBool isascii;
236: char str[50];
237: const char *methodname[] = {
238: "Schur method for inv(A)*sqrtm(A)",
239: "Denman-Beavers (product form)",
240: "Newton-Schulz iteration",
241: "Sadeghi iteration"
242: #if defined(PETSC_HAVE_CUDA)
243: ,"Newton-Schulz iteration CUDA"
244: #if defined(PETSC_HAVE_MAGMA)
245: ,"Denman-Beavers (product form) CUDA/MAGMA",
246: "Sadeghi iteration CUDA/MAGMA"
247: #endif
248: #endif
249: };
250: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
253: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
254: if (isascii) {
255: if (fn->beta==(PetscScalar)1.0) {
256: if (fn->alpha==(PetscScalar)1.0) {
257: PetscViewerASCIIPrintf(viewer," Inverse square root: x^(-1/2)\n");
258: } else {
259: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
260: PetscViewerASCIIPrintf(viewer," Inverse square root: (%s*x)^(-1/2)\n",str);
261: }
262: } else {
263: SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
264: if (fn->alpha==(PetscScalar)1.0) {
265: PetscViewerASCIIPrintf(viewer," Inverse square root: %s*x^(-1/2)\n",str);
266: } else {
267: PetscViewerASCIIPrintf(viewer," Inverse square root: %s",str);
268: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
269: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
270: PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
271: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
272: }
273: }
274: if (fn->method<nmeth) {
275: PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
276: }
277: }
278: return(0);
279: }
281: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
282: {
284: fn->ops->evaluatefunction = FNEvaluateFunction_Invsqrt;
285: fn->ops->evaluatederivative = FNEvaluateDerivative_Invsqrt;
286: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Invsqrt_Schur;
287: fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Invsqrt_DBP;
288: fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Invsqrt_NS;
289: fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
290: #if defined(PETSC_HAVE_CUDA)
291: fn->ops->evaluatefunctionmat[4] = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
292: #if defined(PETSC_HAVE_MAGMA)
293: fn->ops->evaluatefunctionmat[5] = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
294: fn->ops->evaluatefunctionmat[6] = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
295: #endif /* PETSC_HAVE_MAGMA */
296: #endif /* PETSC_HAVE_CUDA */
297: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
298: fn->ops->view = FNView_Invsqrt;
299: return(0);
300: }