diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 481bc43..21a2c23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,9 +12,18 @@ jobs: linux: strategy: matrix: - py-version: ['3.10', '3.11'] + include: + - py-version: '3.10' + c-compiler: 'gcc' + cxx-compiler: 'g++' + - py-version: '3.11' + c-compiler: 'gcc' + cxx-compiler: 'g++' + - py-version: '3.10' + c-compiler: 'clang' + cxx-compiler: 'clang++' fail-fast: false - name: Python ${{ matrix.py-version }} + name: ${{ matrix.c-compiler}} Python ${{ matrix.py-version }} runs-on: ubuntu-latest @@ -22,6 +31,8 @@ jobs: env: MPIEXEC_FLAGS: "--allow-run-as-root --oversubscribe" PYNUCLEUS_BUILD_PARALLELISM: 2 + OMPI_CC: ${{ matrix.c-compiler }} + OMPI_CXX: ${{ matrix.cxx-compiler }} steps: - name: Check out repo @@ -34,7 +45,7 @@ jobs: uses: actions/cache/restore@v3 with: path: /home/runner/.cache/ccache - key: ccache-${{ matrix.py-version }} + key: ccache-${{ matrix.c-compiler }}-${{ matrix.py-version }} - uses: actions/setup-python@v4 if: always() @@ -57,7 +68,7 @@ jobs: - name: Install if: always() - run: python -m pip install wheel && python -m pip list && make install PIP_INSTALL_FLAGS="--no-use-pep517" + run: python -m pip install wheel && python -m pip list && make install PIP_INSTALL_FLAGS="--no-use-pep517 -vvv" - name: Remove ccache cache if: ${{ steps.ccache-restore.outputs.cache-hit }} @@ -66,7 +77,7 @@ jobs: GH_TOKEN: ${{ github.token }} run: | gh extension install actions/gh-actions-cache - gh actions-cache delete ccache-${{ matrix.py-version }} --confirm + gh actions-cache delete ccache-${{ matrix.c-compiler}}-${{ matrix.py-version }} --confirm continue-on-error: true - name: Push ccache cache @@ -74,7 +85,7 @@ jobs: uses: actions/cache/save@v3 with: path: /home/runner/.cache/ccache - key: ccache-${{ matrix.py-version }} + key: ccache-${{ matrix.c-compiler }}-${{ matrix.py-version }} - name: Ccache report if: always() @@ -82,27 +93,27 @@ jobs: - name: Run tests if: always() - run: python3 -m pytest --junit-xml=test-results-${{ matrix.py-version }}.xml tests/ + run: python3 -m pytest --junit-xml=test-results-${{ matrix.c-compiler }}-${{ matrix.py-version }}.xml tests/ - name: Run flake8 if: always() run: | make flake8 - mv flake8.xml flake8-${{ matrix.py-version }}.xml + mv flake8.xml flake8-${{ matrix.c-compiler }}-${{ matrix.py-version }}.xml - name: Archive test results uses: actions/upload-artifact@v3 if: always() with: name: Test results - path: test-results-${{ matrix.py-version }}.xml + path: test-results-${{ matrix.c-compiler }}-${{ matrix.py-version }}.xml - name: Report test results uses: dorny/test-reporter@v1 if: always() with: - name: Test report (Python ${{ matrix.py-version }}) - path: test-results-${{ matrix.py-version }}.xml + name: Test report (${{ matrix.c-compiler }}, Python ${{ matrix.py-version }}) + path: test-results-${{ matrix.c-compiler }}-${{ matrix.py-version }}.xml reporter: java-junit fail-on-error: true @@ -110,7 +121,7 @@ jobs: uses: dorny/test-reporter@v1 if: always() with: - name: Flake8 report (Python ${{ matrix.py-version }}) - path: flake8-${{ matrix.py-version }}.xml + name: Flake8 report (${{ matrix.c-compiler }}, Python ${{ matrix.py-version }}) + path: flake8-${{ matrix.c-compiler }}-${{ matrix.py-version }}.xml reporter: java-junit fail-on-error: false diff --git a/base/PyNucleus_base/blas.pyx b/base/PyNucleus_base/blas.pyx index d339a2f..5a7d8bd 100644 --- a/base/PyNucleus_base/blas.pyx +++ b/base/PyNucleus_base/blas.pyx @@ -9,462 +9,15 @@ import numpy as np cimport numpy as np from . myTypes import INDEX -include "config.pxi" cdef: INDEX_t MAX_INT = np.iinfo(INDEX).max REAL_t NAN = np.nan -def uninitialized(*args, **kwargs): - IF FILL_UNINITIALIZED: - if 'dtype' in kwargs and np.issubdtype(kwargs['dtype'], np.integer): - kwargs['fill_value'] = np.iinfo(kwargs['dtype']).min - else: - kwargs['fill_value'] = NAN - return np.full(*args, **kwargs) - ELSE: - return np.empty(*args, **kwargs) - - -def uninitialized_like(like, **kwargs): - IF FILL_UNINITIALIZED: - if np.issubdtype(np.array(like, copy=False).dtype, np.integer): - kwargs['fill_value'] = np.iinfo(like.dtype).min - else: - kwargs['fill_value'] = NAN - return np.full_like(like, **kwargs) - ELSE: - return np.empty_like(like, **kwargs) - - -cpdef carray uninitializedINDEX(tuple shape): - cdef: - carray a = carray(shape, 4, 'i') - Py_ssize_t s, i - IF FILL_UNINITIALIZED: - s = 1 - for i in range(len(shape)): - s *= shape[i] - for i in range(s): - (a.data)[i] = MAX_INT - return a - - -cpdef carray uninitializedREAL(tuple shape): - cdef: - carray a = carray(shape, 8, 'd') - Py_ssize_t s, i - IF FILL_UNINITIALIZED: - s = 1 - for i in range(len(shape)): - s *= shape[i] - for i in range(s): - (a.data)[i] = NAN - return a - - -IF USE_BLAS: - - from scipy.linalg.cython_blas cimport dcopy, dscal, daxpy, ddot, dnrm2 - from scipy.linalg.cython_blas cimport zcopy, zscal, zaxpy, zdotc - from scipy.linalg.cython_blas cimport dgemv, zgemv - from scipy.linalg.cython_blas cimport dgemm, zgemm - - - cdef void assign(SCALAR_t[::1] y, SCALAR_t[::1] x): - cdef: - double* x_ptr - double* y_ptr - double complex* x_ptr_c - double complex* y_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc) - else: - x_ptr = &x[0] - y_ptr = &y[0] - dcopy(&n, x_ptr, &inc, y_ptr, &inc) - - cdef void assignScaled(SCALAR_t[::1] y, SCALAR_t[::1] x, SCALAR_t alpha): - cdef: - double* x_ptr - double* y_ptr - double complex* x_ptr_c - double complex* y_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc) - zscal(&n, &alpha, y_ptr_c, &inc) - else: - x_ptr = &x[0] - y_ptr = &y[0] - dcopy(&n, x_ptr, &inc, y_ptr, &inc) - dscal(&n, &alpha, y_ptr, &inc) - - cdef void assign3(SCALAR_t[::1] z, SCALAR_t[::1] x, SCALAR_t alpha, SCALAR_t[::1] y, SCALAR_t beta): - cdef: - double* x_ptr - double* y_ptr - double* z_ptr - double complex* x_ptr_c - double complex* y_ptr_c - double complex* z_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - y_ptr_c = &y[0] - z_ptr_c = &z[0] - zcopy(&n, x_ptr_c, &inc, z_ptr_c, &inc) - if alpha != 1.0: - zscal(&n, &alpha, z_ptr_c, &inc) - zaxpy(&n, &beta, y_ptr_c, &inc, z_ptr_c, &inc) - else: - x_ptr = &x[0] - y_ptr = &y[0] - z_ptr = &z[0] - dcopy(&n, x_ptr, &inc, z_ptr, &inc) - if alpha != 1.0: - dscal(&n, &alpha, z_ptr, &inc) - daxpy(&n, &beta, y_ptr, &inc, z_ptr, &inc) - - cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y): - cdef: - double* x_ptr - double* y_ptr - double complex* x_ptr_c - double complex* y_ptr_c - SCALAR_t alpha = 1.0 - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc) - else: - x_ptr = &x[0] - y_ptr = &y[0] - daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc) - - cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha): - cdef: - double* x_ptr - double* y_ptr - double complex* x_ptr_c - double complex* y_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc) - else: - x_ptr = &x[0] - y_ptr = &y[0] - daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc) - - cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha): - cdef: - double* x_ptr - double complex* x_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - zscal(&n, &alpha, x_ptr_c, &inc) - else: - x_ptr = &x[0] - dscal(&n, &alpha, x_ptr, &inc) - - cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1): - cdef: - SCALAR_t s = 0.0 - double* v0_ptr - double* v1_ptr - double complex* v0_ptr_c - double complex* v1_ptr_c - int n = v0.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - v0_ptr_c = &v0[0] - v1_ptr_c = &v1[0] - s = zdotc(&n, v0_ptr_c, &inc, v1_ptr_c, &inc) - else: - v0_ptr = &v0[0] - v1_ptr = &v1[0] - s = ddot(&n, v0_ptr, &inc, v1_ptr, &inc) - return s - - cdef REAL_t norm(SCALAR_t[::1] x): - cdef: - REAL_t s = 0.0 - double* x_ptr - double complex* x_ptr_c - int n = x.shape[0] - int inc = 1 - if SCALAR_t is COMPLEX_t: - x_ptr_c = &x[0] - s = zdotc(&n, x_ptr_c, &inc, x_ptr_c, &inc).real - return sqrt(s) - else: - x_ptr = &x[0] - return dnrm2(&n, x_ptr, &inc) - - cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): - cdef: - double* A_ptr - double* x_ptr - double* y_ptr - double complex* A_ptr_c - double complex* x_ptr_c - double complex* y_ptr_c - int m = A.shape[1] - int n = A.shape[0] - SCALAR_t alpha = 1. - int lda = A.shape[1] - int incx = 1 - int incy = 1 - if SCALAR_t is COMPLEX_t: - A_ptr_c = &A[0, 0] - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zgemv('t', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) - else: - A_ptr = &A[0, 0] - x_ptr = &x[0] - y_ptr = &y[0] - dgemv('t', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) - - cdef void gemvF(SCALAR_t[::1, :] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): - cdef: - double* A_ptr - double* x_ptr - double* y_ptr - double complex* A_ptr_c - double complex* x_ptr_c - double complex* y_ptr_c - int m = A.shape[0] - int n = A.shape[1] - SCALAR_t alpha = 1. - int lda = A.shape[0] - int incx = 1 - int incy = 1 - if SCALAR_t is COMPLEX_t: - A_ptr_c = &A[0, 0] - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) - else: - A_ptr = &A[0, 0] - x_ptr = &x[0] - y_ptr = &y[0] - dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) - - - cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): - cdef: - double* A_ptr - double* x_ptr - double* y_ptr - double complex* A_ptr_c - double complex* x_ptr_c - double complex* y_ptr_c - int m = A.shape[0] - int n = A.shape[1] - SCALAR_t alpha = 1. - int lda = A.shape[0] - int incx = 1 - int incy = 1 - if SCALAR_t is COMPLEX_t: - A_ptr_c = &A[0, 0] - x_ptr_c = &x[0] - y_ptr_c = &y[0] - zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) - else: - A_ptr = &A[0, 0] - x_ptr = &x[0] - y_ptr = &y[0] - dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) - - - cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C): - cdef: - double* A_ptr - double* B_ptr - double* C_ptr - double complex* A_ptr_c - double complex* B_ptr_c - double complex* C_ptr_c - int m = B.shape[1] - int k = B.shape[0] - int n = A.shape[0] - SCALAR_t alpha = 1. - SCALAR_t beta = 0. - int lda = A.shape[1] - int ldb = B.shape[1] - int ldc = C.shape[1] - if SCALAR_t is COMPLEX_t: - A_ptr_c = &A[0, 0] - B_ptr_c = &B[0, 0] - C_ptr_c = &C[0, 0] - zgemm('n', 'n', &m, &n, &k, &alpha, B_ptr_c, &ldb, A_ptr_c, &lda, &beta, C_ptr_c, &ldc) - else: - A_ptr = &A[0, 0] - B_ptr = &B[0, 0] - C_ptr = &C[0, 0] - dgemm('n', 'n', &m, &n, &k, &alpha, B_ptr, &ldb, A_ptr, &lda, &beta, C_ptr, &ldc) - -ELSE: - cdef void assign(SCALAR_t[::1] y, const SCALAR_t[::1] x): - cdef: - INDEX_t i - for i in range(x.shape[0]): - y[i] = x[i] - - cdef void assignScaled(SCALAR_t[::1] y, const SCALAR_t[::1] x, SCALAR_t alpha): - cdef: - INDEX_t i - for i in range(x.shape[0]): - y[i] = alpha*x[i] - - - cdef void assign3(SCALAR_t[::1] z, const SCALAR_t[::1] x, SCALAR_t alpha, const SCALAR_t[::1] y, SCALAR_t beta): - cdef: - INDEX_t i - for i in range(x.shape[0]): - z[i] = alpha*x[i] + beta*y[i] - - cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y): - cdef: - INDEX_t i - if SCALAR_t is COMPLEX_t: - for i in range(x.shape[0]): - x[i] = x[i] + y[i] - else: - for i in range(x.shape[0]): - x[i] += y[i] - - cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha): - cdef: - INDEX_t i - if SCALAR_t is COMPLEX_t: - for i in range(x.shape[0]): - x[i] = x[i]+alpha*y[i] - else: - for i in range(x.shape[0]): - x[i] += alpha*y[i] - - cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha): - cdef: - INDEX_t i - if SCALAR_t is COMPLEX_t: - for i in range(x.shape[0]): - x[i] *= alpha - else: - for i in range(x.shape[0]): - x[i] = x[i]*alpha - - cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1) nogil: - cdef: - int i - SCALAR_t s = 0.0 - if SCALAR_t is COMPLEX_t: - for i in range(v0.shape[0]): - s += v0[i].conjugate()*v1[i] - else: - for i in range(v0.shape[0]): - s += v0[i]*v1[i] - return s - - cdef REAL_t norm(SCALAR_t[::1] x): - cdef: - int i - REAL_t s = 0.0 - if SCALAR_t is COMPLEX_t: - for i in range(x.shape[0]): - s += (x[i].conjugate()*x[i]).real - else: - for i in range(x.shape[0]): - s += x[i]*x[i] - return sqrt(s) - - cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): - cdef: - INDEX_t i, j - SCALAR_t s - if SCALAR_t is COMPLEX_t: - if beta != 0.: - for i in range(A.shape[0]): - s = 0. - for j in range(A.shape[1]): - s = s + A[i, j]*x[j] - y[i] = beta*y[i]+s - else: - for i in range(A.shape[0]): - s = 0. - for j in range(A.shape[1]): - s = s + A[i, j]*x[j] - y[i] = s - else: - if beta != 0.: - for i in range(A.shape[0]): - s = 0. - for j in range(A.shape[1]): - s += A[i, j]*x[j] - y[i] = beta*y[i]+s - else: - for i in range(A.shape[0]): - s = 0. - for j in range(A.shape[1]): - s += A[i, j]*x[j] - y[i] = s - - - cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): - cdef: - INDEX_t i, j - if SCALAR_t is COMPLEX_t: - if beta != 0.: - for i in range(A.shape[0]): - y[i] = y[i]*beta - else: - y[:] = 0. - for j in range(A.shape[1]): - for i in range(A.shape[0]): - y[i] = y[i]+A[j, i]*x[j] - else: - if beta != 0.: - for i in range(A.shape[0]): - y[i] *= beta - else: - y[:] = 0. - for j in range(A.shape[1]): - for i in range(A.shape[0]): - y[i] += A[j, i]*x[j] - - - cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C): - cdef: - INDEX_t i, j, k - C[:, :] = 0. - if SCALAR_t is COMPLEX_t: - for i in range(A.shape[0]): - for j in range(B.shape[0]): - for k in range(B.shape[1]): - C[i, k] = C[i, k] + A[i, j]*B[j, k] - else: - for i in range(A.shape[0]): - for j in range(B.shape[0]): - for k in range(B.shape[1]): - C[i, k] += A[i, j]*B[j, k] - +include "allocation.pxi" +include "blas_routines.pxi" +include "mkl_routines.pxi" cdef void updateScaledVector(REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] alpha): @@ -472,102 +25,3 @@ cdef void updateScaledVector(REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] alpha): INDEX_t i for i in range(x.shape[0]): x[i] += alpha[i]*y[i] - - -IF USE_MKL: - ctypedef INDEX_t MKL_INT - - cdef extern from "mkl/mkl_spblas.h": - void mkl_cspblas_dcsrgemv (const char *transa , const MKL_INT *m , const REAL_t *a , const MKL_INT *ia , const MKL_INT *ja , const REAL_t *x , REAL_t *y ); - void mkl_dcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const REAL_t *alpha , const char *matdescra , - const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , - const REAL_t *x , const REAL_t *beta , REAL_t *y ); - # void mkl_zcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const COMPLEX_t *alpha , const char *matdescra , - # const COMPLEX_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , - # const COMPLEX_t *x , const COMPLEX_t *beta , COMPLEX_t *y ); - - cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True): - cdef: - char transA = 78 - INDEX_t num_rows = indptr.shape[0]-1 - - assert overwrite - - if SCALAR_t is COMPLEX_t: - # mkl_cspblas_zcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0]) - pass - else: - mkl_cspblas_dcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0]) - - - cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result): - cdef: - char transA = 78 - SCALAR_t alpha = -1. - SCALAR_t beta = 1. - char matdscr[6] - INDEX_t inc = 1 - INDEX_t num_rows = indptr.shape[0]-1 - INDEX_t num_columns = x.shape[0] - - matdscr[0] = 71 - matdscr[2] = 78 - matdscr[3] = 67 - - assign(result, rhs) - if SCALAR_t is COMPLEX_t: - pass - # mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0], - # &data[0], &indices[0], &indptr[0], &indptr[1], - # &x[0], &beta, &result[0]) - else: - mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0], - &data[0], &indices[0], &indptr[0], &indptr[1], - &x[0], &beta, &result[0]) - -ELSE: - - cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True): - cdef: - INDEX_t i, jj, j - SCALAR_t temp - if SCALAR_t is COMPLEX_t: - for i in range(indptr.shape[0]-1): - temp = 0. - for jj in range(indptr[i], indptr[i+1]): - j = indices[jj] - temp = temp + data[jj]*x[j] - if overwrite: - y[i] = temp - else: - y[i] = y[i]+temp - else: - for i in range(indptr.shape[0]-1): - temp = 0. - for jj in range(indptr[i], indptr[i+1]): - j = indices[jj] - temp += data[jj]*x[j] - if overwrite: - y[i] = temp - else: - y[i] += temp - - cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result): - cdef: - INDEX_t i, jj, j - SCALAR_t temp - INDEX_t num_rows = indptr.shape[0]-1 - if SCALAR_t is COMPLEX_t: - for i in range(num_rows): - temp = rhs[i] - for jj in range(indptr[i], indptr[i+1]): - j = indices[jj] - temp = temp-data[jj]*x[j] - result[i] = temp - else: - for i in range(num_rows): - temp = rhs[i] - for jj in range(indptr[i], indptr[i+1]): - j = indices[jj] - temp -= data[jj]*x[j] - result[i] = temp diff --git a/base/PyNucleus_base/ip_norm.pyx b/base/PyNucleus_base/ip_norm.pyx index fb92d03..eefa202 100644 --- a/base/PyNucleus_base/ip_norm.pyx +++ b/base/PyNucleus_base/ip_norm.pyx @@ -9,8 +9,6 @@ import numpy as np from . myTypes import INDEX, REAL, COMPLEX from . blas import uninitialized -include "config.pxi" - import mpi4py.rc mpi4py.rc.initialize = False from mpi4py import MPI diff --git a/base/PyNucleus_base/linalg.pyx b/base/PyNucleus_base/linalg.pyx index 16dbb46..13db27e 100644 --- a/base/PyNucleus_base/linalg.pyx +++ b/base/PyNucleus_base/linalg.pyx @@ -27,8 +27,6 @@ from . convergence cimport (convergenceMaster, noOpConvergenceMaster, convergenceClient, noOpConvergenceClient) from numpy.linalg import norm as normSeq -include "config.pxi" - def UniformOnUnitSphere(dim, samples=1, norm=normSeq): "Uniform distribution on the unit sphere." @@ -204,43 +202,6 @@ cdef void forward_solve_csc(INDEX_t[::1] indptr, y[indices[i]] = y[indices[i]] + data[i]*y[j] -IF USE_MKL_TRISOLVE: - - ctypedef INDEX_t MKL_INT - - cdef extern from "mkl/mkl_spblas.h": - void mkl_dcsrsm (const char *transa , const MKL_INT *m , const MKL_INT *n , const REAL_t *alpha , const char *matdescra , - const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , - const REAL_t *b , const MKL_INT *ldb , REAL_t *c , const MKL_INT *ldc ); - - cdef inline void trisolve_mkl(INDEX_t[::1] indptr, - INDEX_t[::1] indices, - REAL_t[::1] data, - REAL_t[::1] b, - REAL_t[::1] y, - BOOL_t forward=True, - BOOL_t unitDiagonal=False): - cdef: - char transA - REAL_t alpha = 1. - char matdscr[6] - INDEX_t inc = 1 - INDEX_t n = indptr.shape[0]-1 - INDEX_t one = 1 - matdscr[0] = 84 - if forward: - transA = 84 - else: - transA = 78 - matdscr[1] = 85 - if unitDiagonal: - matdscr[2] = 85 - else: - matdscr[2] = 78 - matdscr[3] = 67 - mkl_dcsrsm(&transA, &n, &one, &alpha, &matdscr[0], &data[0], &indices[0], &indptr[0], &indptr[1], &b[0], &one, &y[0], &one) - - # Assumes that indices are ordered cdef void forward_solve_sss(const INDEX_t[::1] indptr, const INDEX_t[::1] indices, @@ -447,56 +408,6 @@ cpdef solve_cholesky(INDEX_t[::1] Lindptr, return temp_mem -cdef class cholesky_solver: - cdef: - public INDEX_t[::1] indptr, indices - public REAL_t[::1] data, diagonal, temp - CSR_LinearOperator L - - def __init__(self, num_rows): - self.temp = uninitialized((num_rows), dtype=REAL) - - def setup(self, A): - cdef: - INDEX_t i - if isinstance(A, CSR_LinearOperator): - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(A) - elif isinstance(A, SSS_LinearOperator): - # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A) - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(A.to_csr_linear_operator()) - elif isinstance(A, TimeStepperLinearOperator): - B = A.to_csr_linear_operator() - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B) - else: - raise NotImplementedError() - - IF USE_MKL_TRISOLVE: - from . linear_operators import diagonalOperator - T = CSR_LinearOperator(self.indices, self.indptr, self.data).to_csr()+diagonalOperator(self.diagonal).to_csr() - self.L = CSR_LinearOperator.from_csr(T) - ELSE: - for i in range(self.diagonal.shape[0]): - self.diagonal[i] = 1./self.diagonal[i] - - cpdef solve(self, REAL_t[::1] b, REAL_t[::1] x): - self.temp[:] = 0.0 - IF USE_MKL_TRISOLVE: - trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, b, self.temp, forward=True, unitDiagonal=False) - trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, self.temp, x, forward=False, unitDiagonal=False) - ELSE: - forward_solve_sss_noInverse(self.indptr, self.indices, - self.data, self.diagonal, - b, self.temp, unitDiagonal=False) - backward_solve_sss_noInverse(self.indptr, self.indices, - self.data, self.diagonal, - self.temp, x) - - def asPreconditioner(self): - return LinearOperator_wrapper(self.diagonal.shape[0], - self.diagonal.shape[0], - self.solve) - - cpdef void bicgstab(LinearOperator A, const REAL_t[::1] b, REAL_t[::1] x, diff --git a/base/PyNucleus_base/linear_operators.pyx b/base/PyNucleus_base/linear_operators.pyx index 5d65de4..2531cf7 100644 --- a/base/PyNucleus_base/linear_operators.pyx +++ b/base/PyNucleus_base/linear_operators.pyx @@ -12,8 +12,6 @@ from . blas cimport gemv from . blas import uninitialized from cython.parallel cimport prange, parallel -include "config.pxi" - COMPRESSION = 'gzip' include "LinearOperator_REAL.pxi" diff --git a/base/PyNucleus_base/malloc_linux.pxi b/base/PyNucleus_base/malloc_linux.pxi new file mode 100644 index 0000000..6129a75 --- /dev/null +++ b/base/PyNucleus_base/malloc_linux.pxi @@ -0,0 +1,9 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef extern from "malloc.h" nogil: + int malloc_trim(size_t pad) diff --git a/base/PyNucleus_base/malloc_mac.pxi b/base/PyNucleus_base/malloc_mac.pxi new file mode 100644 index 0000000..b0780f3 --- /dev/null +++ b/base/PyNucleus_base/malloc_mac.pxi @@ -0,0 +1,9 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef int malloc_trim(size_t pad): + pass diff --git a/base/PyNucleus_base/opt_false_allocation.pxi b/base/PyNucleus_base/opt_false_allocation.pxi new file mode 100644 index 0000000..9c156b6 --- /dev/null +++ b/base/PyNucleus_base/opt_false_allocation.pxi @@ -0,0 +1,27 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +def uninitialized(*args, **kwargs): + return np.empty(*args, **kwargs) + + +def uninitialized_like(like, **kwargs): + return np.empty_like(like, **kwargs) + + +cpdef carray uninitializedINDEX(tuple shape): + cdef: + carray a = carray(shape, 4, 'i') + Py_ssize_t s, i + return a + + +cpdef carray uninitializedREAL(tuple shape): + cdef: + carray a = carray(shape, 8, 'd') + Py_ssize_t s, i + return a diff --git a/base/PyNucleus_base/opt_false_blas.pxi b/base/PyNucleus_base/opt_false_blas.pxi new file mode 100644 index 0000000..0b81294 --- /dev/null +++ b/base/PyNucleus_base/opt_false_blas.pxi @@ -0,0 +1,149 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef void assign(SCALAR_t[::1] y, const SCALAR_t[::1] x): + cdef: + INDEX_t i + for i in range(x.shape[0]): + y[i] = x[i] + +cdef void assignScaled(SCALAR_t[::1] y, const SCALAR_t[::1] x, SCALAR_t alpha): + cdef: + INDEX_t i + for i in range(x.shape[0]): + y[i] = alpha*x[i] + + +cdef void assign3(SCALAR_t[::1] z, const SCALAR_t[::1] x, SCALAR_t alpha, const SCALAR_t[::1] y, SCALAR_t beta): + cdef: + INDEX_t i + for i in range(x.shape[0]): + z[i] = alpha*x[i] + beta*y[i] + +cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y): + cdef: + INDEX_t i + if SCALAR_t is COMPLEX_t: + for i in range(x.shape[0]): + x[i] = x[i] + y[i] + else: + for i in range(x.shape[0]): + x[i] += y[i] + +cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha): + cdef: + INDEX_t i + if SCALAR_t is COMPLEX_t: + for i in range(x.shape[0]): + x[i] = x[i]+alpha*y[i] + else: + for i in range(x.shape[0]): + x[i] += alpha*y[i] + +cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha): + cdef: + INDEX_t i + if SCALAR_t is COMPLEX_t: + for i in range(x.shape[0]): + x[i] *= alpha + else: + for i in range(x.shape[0]): + x[i] = x[i]*alpha + +cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1) nogil: + cdef: + int i + SCALAR_t s = 0.0 + if SCALAR_t is COMPLEX_t: + for i in range(v0.shape[0]): + s += v0[i].conjugate()*v1[i] + else: + for i in range(v0.shape[0]): + s += v0[i]*v1[i] + return s + +cdef REAL_t norm(SCALAR_t[::1] x): + cdef: + int i + REAL_t s = 0.0 + if SCALAR_t is COMPLEX_t: + for i in range(x.shape[0]): + s += (x[i].conjugate()*x[i]).real + else: + for i in range(x.shape[0]): + s += x[i]*x[i] + return sqrt(s) + +cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): + cdef: + INDEX_t i, j + SCALAR_t s + if SCALAR_t is COMPLEX_t: + if beta != 0.: + for i in range(A.shape[0]): + s = 0. + for j in range(A.shape[1]): + s = s + A[i, j]*x[j] + y[i] = beta*y[i]+s + else: + for i in range(A.shape[0]): + s = 0. + for j in range(A.shape[1]): + s = s + A[i, j]*x[j] + y[i] = s + else: + if beta != 0.: + for i in range(A.shape[0]): + s = 0. + for j in range(A.shape[1]): + s += A[i, j]*x[j] + y[i] = beta*y[i]+s + else: + for i in range(A.shape[0]): + s = 0. + for j in range(A.shape[1]): + s += A[i, j]*x[j] + y[i] = s + + +cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): + cdef: + INDEX_t i, j + if SCALAR_t is COMPLEX_t: + if beta != 0.: + for i in range(A.shape[0]): + y[i] = y[i]*beta + else: + y[:] = 0. + for j in range(A.shape[1]): + for i in range(A.shape[0]): + y[i] = y[i]+A[j, i]*x[j] + else: + if beta != 0.: + for i in range(A.shape[0]): + y[i] *= beta + else: + y[:] = 0. + for j in range(A.shape[1]): + for i in range(A.shape[0]): + y[i] += A[j, i]*x[j] + + +cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C): + cdef: + INDEX_t i, j, k + C[:, :] = 0. + if SCALAR_t is COMPLEX_t: + for i in range(A.shape[0]): + for j in range(B.shape[0]): + for k in range(B.shape[1]): + C[i, k] = C[i, k] + A[i, j]*B[j, k] + else: + for i in range(A.shape[0]): + for j in range(B.shape[0]): + for k in range(B.shape[1]): + C[i, k] += A[i, j]*B[j, k] diff --git a/base/PyNucleus_base/opt_false_mkl.pxi b/base/PyNucleus_base/opt_false_mkl.pxi new file mode 100644 index 0000000..ae0afc2 --- /dev/null +++ b/base/PyNucleus_base/opt_false_mkl.pxi @@ -0,0 +1,51 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True): + cdef: + INDEX_t i, jj, j + SCALAR_t temp + if SCALAR_t is COMPLEX_t: + for i in range(indptr.shape[0]-1): + temp = 0. + for jj in range(indptr[i], indptr[i+1]): + j = indices[jj] + temp = temp + data[jj]*x[j] + if overwrite: + y[i] = temp + else: + y[i] = y[i]+temp + else: + for i in range(indptr.shape[0]-1): + temp = 0. + for jj in range(indptr[i], indptr[i+1]): + j = indices[jj] + temp += data[jj]*x[j] + if overwrite: + y[i] = temp + else: + y[i] += temp + +cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result): + cdef: + INDEX_t i, jj, j + SCALAR_t temp + INDEX_t num_rows = indptr.shape[0]-1 + if SCALAR_t is COMPLEX_t: + for i in range(num_rows): + temp = rhs[i] + for jj in range(indptr[i], indptr[i+1]): + j = indices[jj] + temp = temp-data[jj]*x[j] + result[i] = temp + else: + for i in range(num_rows): + temp = rhs[i] + for jj in range(indptr[i], indptr[i+1]): + j = indices[jj] + temp -= data[jj]*x[j] + result[i] = temp diff --git a/base/PyNucleus_base/opt_false_mkl_trisolve.pxi b/base/PyNucleus_base/opt_false_mkl_trisolve.pxi new file mode 100644 index 0000000..2103817 --- /dev/null +++ b/base/PyNucleus_base/opt_false_mkl_trisolve.pxi @@ -0,0 +1,46 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class ichol_solver(solver): + def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1): + solver.__init__(self, A, num_rows) + self.temp = uninitialized((self.num_rows), dtype=REAL) + + cpdef void setup(self, LinearOperator A=None): + cdef: + INDEX_t i + if A is not None: + self.A = A + if isinstance(self.A, CSR_LinearOperator): + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A) + elif isinstance(self.A, SSS_LinearOperator): + # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A) + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A.to_csr_linear_operator()) + else: + try: + B = self.A.to_csr_linear_operator() + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B) + except: + raise NotImplementedError() + + for i in range(self.diagonal.shape[0]): + self.diagonal[i] = 1./self.diagonal[i] + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + solver.solve(self, b, x) + self.temp[:] = 0.0 + forward_solve_sss_noInverse(self.indptr, self.indices, + self.data, self.diagonal, + b, self.temp, unitDiagonal=False) + backward_solve_sss_noInverse(self.indptr, self.indices, + self.data, self.diagonal, + self.temp, x) + return 1 + + def __str__(self): + return 'Incomplete Cholesky' diff --git a/base/PyNucleus_base/opt_false_solver_cholmod.pxi b/base/PyNucleus_base/opt_false_solver_cholmod.pxi new file mode 100644 index 0000000..16fc689 --- /dev/null +++ b/base/PyNucleus_base/opt_false_solver_cholmod.pxi @@ -0,0 +1,72 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class chol_solver(solver): + def __init__(self, LinearOperator A, INDEX_t num_rows=-1): + solver.__init__(self, A, num_rows) + + cpdef void setup(self, LinearOperator A=None): + cdef: + LinearOperator B = None + INDEX_t i, j, k + if A is not None: + self.A = A + + if not isinstance(self.A, (SSS_LinearOperator, + CSR_LinearOperator, + Dense_LinearOperator)): + if self.A.isSparse(): + B = self.A.to_csr_linear_operator() + else: + B = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray())) + else: + B = self.A + + self.denseFactor = False + if isinstance(B, Dense_LinearOperator): + from scipy.linalg import cholesky + L = cholesky(B.toarray(), lower=True) + self.Lflat = uninitialized((L.shape[0]*(L.shape[1]+1)//2), dtype=REAL) + k = 0 + for i in range(L.shape[0]): + for j in range(i+1): + self.Lflat[k] = L[i, j] + k += 1 + self.denseFactor = True + self.temp = uninitialized((L.shape[0]), dtype=REAL) + else: + raise NotImplementedError("Cholmod not available, install \"scikit-sparse\".") + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + cdef: + INDEX_t i, j, k + REAL_t val + REAL_t[::1] temp = self.temp, Lflat = self.Lflat + solver.solve(self, b, x) + if not self.denseFactor: + np.array(x, copy=False, dtype=REAL)[:] = self.Ainv(np.array(b, copy=False, dtype=REAL)) + else: + k = 0 + for i in range(x.shape[0]): + val = b[i] + for j in range(i): + val -= Lflat[k]*temp[j] + k += 1 + temp[i] = val/Lflat[k] + k += 1 + for i in range(x.shape[0]-1, -1, -1): + val = temp[i] + for j in range(i+1, x.shape[0]): + k = ((j*(j+1)) >> 1) + i + val -= Lflat[k]*x[j] + k = ((i*(i+1)) >> 1) + i + x[i] = val/Lflat[k] + return 1 + + def __str__(self): + return 'Cholesky' diff --git a/base/PyNucleus_base/opt_true_allocation.pxi b/base/PyNucleus_base/opt_true_allocation.pxi new file mode 100644 index 0000000..a316b44 --- /dev/null +++ b/base/PyNucleus_base/opt_true_allocation.pxi @@ -0,0 +1,45 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +def uninitialized(*args, **kwargs): + if 'dtype' in kwargs and np.issubdtype(kwargs['dtype'], np.integer): + kwargs['fill_value'] = np.iinfo(kwargs['dtype']).min + else: + kwargs['fill_value'] = NAN + return np.full(*args, **kwargs) + + +def uninitialized_like(like, **kwargs): + if np.issubdtype(np.array(like, copy=False).dtype, np.integer): + kwargs['fill_value'] = np.iinfo(like.dtype).min + else: + kwargs['fill_value'] = NAN + return np.full_like(like, **kwargs) + + +cpdef carray uninitializedINDEX(tuple shape): + cdef: + carray a = carray(shape, 4, 'i') + Py_ssize_t s, i + s = 1 + for i in range(len(shape)): + s *= shape[i] + for i in range(s): + (a.data)[i] = MAX_INT + return a + + +cpdef carray uninitializedREAL(tuple shape): + cdef: + carray a = carray(shape, 8, 'd') + Py_ssize_t s, i + s = 1 + for i in range(len(shape)): + s *= shape[i] + for i in range(s): + (a.data)[i] = NAN + return a diff --git a/base/PyNucleus_base/opt_true_blas.pxi b/base/PyNucleus_base/opt_true_blas.pxi new file mode 100644 index 0000000..2ff9557 --- /dev/null +++ b/base/PyNucleus_base/opt_true_blas.pxi @@ -0,0 +1,261 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +from scipy.linalg.cython_blas cimport dcopy, dscal, daxpy, ddot, dnrm2 +from scipy.linalg.cython_blas cimport zcopy, zscal, zaxpy, zdotc +from scipy.linalg.cython_blas cimport dgemv, zgemv +from scipy.linalg.cython_blas cimport dgemm, zgemm + + +cdef void assign(SCALAR_t[::1] y, SCALAR_t[::1] x): + cdef: + double* x_ptr + double* y_ptr + double complex* x_ptr_c + double complex* y_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc) + else: + x_ptr = &x[0] + y_ptr = &y[0] + dcopy(&n, x_ptr, &inc, y_ptr, &inc) + +cdef void assignScaled(SCALAR_t[::1] y, SCALAR_t[::1] x, SCALAR_t alpha): + cdef: + double* x_ptr + double* y_ptr + double complex* x_ptr_c + double complex* y_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc) + zscal(&n, &alpha, y_ptr_c, &inc) + else: + x_ptr = &x[0] + y_ptr = &y[0] + dcopy(&n, x_ptr, &inc, y_ptr, &inc) + dscal(&n, &alpha, y_ptr, &inc) + +cdef void assign3(SCALAR_t[::1] z, SCALAR_t[::1] x, SCALAR_t alpha, SCALAR_t[::1] y, SCALAR_t beta): + cdef: + double* x_ptr + double* y_ptr + double* z_ptr + double complex* x_ptr_c + double complex* y_ptr_c + double complex* z_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + y_ptr_c = &y[0] + z_ptr_c = &z[0] + zcopy(&n, x_ptr_c, &inc, z_ptr_c, &inc) + if alpha != 1.0: + zscal(&n, &alpha, z_ptr_c, &inc) + zaxpy(&n, &beta, y_ptr_c, &inc, z_ptr_c, &inc) + else: + x_ptr = &x[0] + y_ptr = &y[0] + z_ptr = &z[0] + dcopy(&n, x_ptr, &inc, z_ptr, &inc) + if alpha != 1.0: + dscal(&n, &alpha, z_ptr, &inc) + daxpy(&n, &beta, y_ptr, &inc, z_ptr, &inc) + +cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y): + cdef: + double* x_ptr + double* y_ptr + double complex* x_ptr_c + double complex* y_ptr_c + SCALAR_t alpha = 1.0 + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc) + else: + x_ptr = &x[0] + y_ptr = &y[0] + daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc) + +cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha): + cdef: + double* x_ptr + double* y_ptr + double complex* x_ptr_c + double complex* y_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc) + else: + x_ptr = &x[0] + y_ptr = &y[0] + daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc) + +cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha): + cdef: + double* x_ptr + double complex* x_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + zscal(&n, &alpha, x_ptr_c, &inc) + else: + x_ptr = &x[0] + dscal(&n, &alpha, x_ptr, &inc) + +cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1): + cdef: + SCALAR_t s = 0.0 + double* v0_ptr + double* v1_ptr + double complex* v0_ptr_c + double complex* v1_ptr_c + int n = v0.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + v0_ptr_c = &v0[0] + v1_ptr_c = &v1[0] + s = zdotc(&n, v0_ptr_c, &inc, v1_ptr_c, &inc) + else: + v0_ptr = &v0[0] + v1_ptr = &v1[0] + s = ddot(&n, v0_ptr, &inc, v1_ptr, &inc) + return s + +cdef REAL_t norm(SCALAR_t[::1] x): + cdef: + REAL_t s = 0.0 + double* x_ptr + double complex* x_ptr_c + int n = x.shape[0] + int inc = 1 + if SCALAR_t is COMPLEX_t: + x_ptr_c = &x[0] + s = zdotc(&n, x_ptr_c, &inc, x_ptr_c, &inc).real + return sqrt(s) + else: + x_ptr = &x[0] + return dnrm2(&n, x_ptr, &inc) + +cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): + cdef: + double* A_ptr + double* x_ptr + double* y_ptr + double complex* A_ptr_c + double complex* x_ptr_c + double complex* y_ptr_c + int m = A.shape[1] + int n = A.shape[0] + SCALAR_t alpha = 1. + int lda = A.shape[1] + int incx = 1 + int incy = 1 + if SCALAR_t is COMPLEX_t: + A_ptr_c = &A[0, 0] + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zgemv('t', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) + else: + A_ptr = &A[0, 0] + x_ptr = &x[0] + y_ptr = &y[0] + dgemv('t', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) + +cdef void gemvF(SCALAR_t[::1, :] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): + cdef: + double* A_ptr + double* x_ptr + double* y_ptr + double complex* A_ptr_c + double complex* x_ptr_c + double complex* y_ptr_c + int m = A.shape[0] + int n = A.shape[1] + SCALAR_t alpha = 1. + int lda = A.shape[0] + int incx = 1 + int incy = 1 + if SCALAR_t is COMPLEX_t: + A_ptr_c = &A[0, 0] + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) + else: + A_ptr = &A[0, 0] + x_ptr = &x[0] + y_ptr = &y[0] + dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) + + +cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.): + cdef: + double* A_ptr + double* x_ptr + double* y_ptr + double complex* A_ptr_c + double complex* x_ptr_c + double complex* y_ptr_c + int m = A.shape[0] + int n = A.shape[1] + SCALAR_t alpha = 1. + int lda = A.shape[0] + int incx = 1 + int incy = 1 + if SCALAR_t is COMPLEX_t: + A_ptr_c = &A[0, 0] + x_ptr_c = &x[0] + y_ptr_c = &y[0] + zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy) + else: + A_ptr = &A[0, 0] + x_ptr = &x[0] + y_ptr = &y[0] + dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy) + + +cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C): + cdef: + double* A_ptr + double* B_ptr + double* C_ptr + double complex* A_ptr_c + double complex* B_ptr_c + double complex* C_ptr_c + int m = B.shape[1] + int k = B.shape[0] + int n = A.shape[0] + SCALAR_t alpha = 1. + SCALAR_t beta = 0. + int lda = A.shape[1] + int ldb = B.shape[1] + int ldc = C.shape[1] + if SCALAR_t is COMPLEX_t: + A_ptr_c = &A[0, 0] + B_ptr_c = &B[0, 0] + C_ptr_c = &C[0, 0] + zgemm('n', 'n', &m, &n, &k, &alpha, B_ptr_c, &ldb, A_ptr_c, &lda, &beta, C_ptr_c, &ldc) + else: + A_ptr = &A[0, 0] + B_ptr = &B[0, 0] + C_ptr = &C[0, 0] + dgemm('n', 'n', &m, &n, &k, &alpha, B_ptr, &ldb, A_ptr, &lda, &beta, C_ptr, &ldc) diff --git a/base/PyNucleus_base/opt_true_mkl.pxi b/base/PyNucleus_base/opt_true_mkl.pxi new file mode 100644 index 0000000..4a8737c --- /dev/null +++ b/base/PyNucleus_base/opt_true_mkl.pxi @@ -0,0 +1,56 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +ctypedef INDEX_t MKL_INT + +cdef extern from "mkl/mkl_spblas.h": + void mkl_cspblas_dcsrgemv (const char *transa , const MKL_INT *m , const REAL_t *a , const MKL_INT *ia , const MKL_INT *ja , const REAL_t *x , REAL_t *y ); + void mkl_dcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const REAL_t *alpha , const char *matdescra , + const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , + const REAL_t *x , const REAL_t *beta , REAL_t *y ); + # void mkl_zcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const COMPLEX_t *alpha , const char *matdescra , + # const COMPLEX_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , + # const COMPLEX_t *x , const COMPLEX_t *beta , COMPLEX_t *y ); + +cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True): + cdef: + char transA = 78 + INDEX_t num_rows = indptr.shape[0]-1 + + assert overwrite + + if SCALAR_t is COMPLEX_t: + # mkl_cspblas_zcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0]) + pass + else: + mkl_cspblas_dcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0]) + + +cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result): + cdef: + char transA = 78 + SCALAR_t alpha = -1. + SCALAR_t beta = 1. + char matdscr[6] + INDEX_t inc = 1 + INDEX_t num_rows = indptr.shape[0]-1 + INDEX_t num_columns = x.shape[0] + + matdscr[0] = 71 + matdscr[2] = 78 + matdscr[3] = 67 + + assign(result, rhs) + if SCALAR_t is COMPLEX_t: + pass + # mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0], + # &data[0], &indices[0], &indptr[0], &indptr[1], + # &x[0], &beta, &result[0]) + else: + mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0], + &data[0], &indices[0], &indptr[0], &indptr[1], + &x[0], &beta, &result[0]) diff --git a/base/PyNucleus_base/opt_true_mkl_trisolve.pxi b/base/PyNucleus_base/opt_true_mkl_trisolve.pxi new file mode 100644 index 0000000..8af0fc3 --- /dev/null +++ b/base/PyNucleus_base/opt_true_mkl_trisolve.pxi @@ -0,0 +1,78 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +ctypedef INDEX_t MKL_INT + +cdef extern from "mkl/mkl_spblas.h": + void mkl_dcsrsm (const char *transa , const MKL_INT *m , const MKL_INT *n , const REAL_t *alpha , const char *matdescra , + const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre , + const REAL_t *b , const MKL_INT *ldb , REAL_t *c , const MKL_INT *ldc ); + +cdef inline void trisolve_mkl(INDEX_t[::1] indptr, + INDEX_t[::1] indices, + REAL_t[::1] data, + REAL_t[::1] b, + REAL_t[::1] y, + BOOL_t forward=True, + BOOL_t unitDiagonal=False): + cdef: + char transA + REAL_t alpha = 1. + char matdscr[6] + INDEX_t inc = 1 + INDEX_t n = indptr.shape[0]-1 + INDEX_t one = 1 + matdscr[0] = 84 + if forward: + transA = 84 + else: + transA = 78 + matdscr[1] = 85 + if unitDiagonal: + matdscr[2] = 85 + else: + matdscr[2] = 78 + matdscr[3] = 67 + mkl_dcsrsm(&transA, &n, &one, &alpha, &matdscr[0], &data[0], &indices[0], &indptr[0], &indptr[1], &b[0], &one, &y[0], &one) + + +cdef class ichol_solver(solver): + def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1): + solver.__init__(self, A, num_rows) + self.temp = uninitialized((self.num_rows), dtype=REAL) + + cpdef void setup(self, LinearOperator A=None): + cdef: + INDEX_t i + if A is not None: + self.A = A + if isinstance(self.A, CSR_LinearOperator): + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A) + elif isinstance(self.A, SSS_LinearOperator): + # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A) + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A.to_csr_linear_operator()) + else: + try: + B = self.A.to_csr_linear_operator() + self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B) + except: + raise NotImplementedError() + + from . linear_operators import diagonalOperator + T = CSR_LinearOperator(self.indices, self.indptr, self.data).to_csr()+diagonalOperator(self.diagonal).to_csr() + self.L = CSR_LinearOperator.from_csr(T) + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + solver.solve(self, b, x) + self.temp[:] = 0.0 + trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, b, self.temp, forward=True, unitDiagonal=False) + trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, self.temp, x, forward=False, unitDiagonal=False) + return 1 + + def __str__(self): + return 'Incomplete Cholesky' diff --git a/base/PyNucleus_base/opt_true_solver_cholmod.pxi b/base/PyNucleus_base/opt_true_solver_cholmod.pxi new file mode 100644 index 0000000..88b4a6b --- /dev/null +++ b/base/PyNucleus_base/opt_true_solver_cholmod.pxi @@ -0,0 +1,86 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class chol_solver(solver): + def __init__(self, LinearOperator A, INDEX_t num_rows=-1): + solver.__init__(self, A, num_rows) + + cpdef void setup(self, LinearOperator A=None): + cdef: + LinearOperator B = None + INDEX_t i, j, k + if A is not None: + self.A = A + + if not isinstance(self.A, (SSS_LinearOperator, + CSR_LinearOperator, + Dense_LinearOperator)): + if self.A.isSparse(): + B = self.A.to_csr_linear_operator() + else: + B = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray())) + else: + B = self.A + + self.denseFactor = False + if isinstance(B, Dense_LinearOperator): + from scipy.linalg import cholesky + L = cholesky(B.toarray(), lower=True) + self.Lflat = uninitialized((L.shape[0]*(L.shape[1]+1)//2), dtype=REAL) + k = 0 + for i in range(L.shape[0]): + for j in range(i+1): + self.Lflat[k] = L[i, j] + k += 1 + self.denseFactor = True + self.temp = uninitialized((L.shape[0]), dtype=REAL) + else: + from sksparse.cholmod import cholesky + + if isinstance(B, CSR_LinearOperator): + try: + self.Ainv = cholesky(B.to_csc()) + except AttributeError: + self.Ainv = cholesky(B.to_csr().tocsc()) + elif isinstance(B, SSS_LinearOperator): + self.Ainv = cholesky(B.to_lower_csc()) + else: + try: + B = B.to_csr_linear_operator() + self.Ainv = cholesky(B) + except AttributeError: + raise NotImplementedError() + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + cdef: + INDEX_t i, j, k + REAL_t val + REAL_t[::1] temp = self.temp, Lflat = self.Lflat + solver.solve(self, b, x) + if not self.denseFactor: + np.array(x, copy=False, dtype=REAL)[:] = self.Ainv(np.array(b, copy=False, dtype=REAL)) + else: + k = 0 + for i in range(x.shape[0]): + val = b[i] + for j in range(i): + val -= Lflat[k]*temp[j] + k += 1 + temp[i] = val/Lflat[k] + k += 1 + for i in range(x.shape[0]-1, -1, -1): + val = temp[i] + for j in range(i+1, x.shape[0]): + k = ((j*(j+1)) >> 1) + i + val -= Lflat[k]*x[j] + k = ((i*(i+1)) >> 1) + i + x[i] = val/Lflat[k] + return 1 + + def __str__(self): + return 'Cholesky' diff --git a/base/PyNucleus_base/opt_true_solver_pyamg.pxi b/base/PyNucleus_base/opt_true_solver_pyamg.pxi new file mode 100644 index 0000000..0e69c07 --- /dev/null +++ b/base/PyNucleus_base/opt_true_solver_pyamg.pxi @@ -0,0 +1,45 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +from pyamg import smoothed_aggregation_solver + +cdef class pyamg_solver(iterative_solver): + def __init__(self, LinearOperator A=None, num_rows=-1): + iterative_solver.__init__(self, A, num_rows) + + cpdef void setup(self, LinearOperator A=None): + iterative_solver.setup(self, A) + # self.ml = ruge_stuben_solver(self.A.to_csr(), + # coarse_solver='splu', + # max_coarse=2500, + # presmoother=('gauss_seidel', {'sweep': 'forward'}), + # postsmoother=('gauss_seidel', {'sweep': 'backward'})) + + self.ml = smoothed_aggregation_solver(self.A.to_csr(), + np.ones((self.num_rows)), + smooth=None, + coarse_solver='splu', + max_coarse=2500, + presmoother=('gauss_seidel', {'sweep': 'forward'}), + postsmoother=('gauss_seidel', {'sweep': 'backward'})) + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + residuals = [] + x_np = np.array(x, copy=False) + if self.x0 is not None: + x_np[:] = self.ml.solve(np.array(b, copy=False), + x0=np.array(self.x0, copy=False), + tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg') + else: + x_np[:] = self.ml.solve(np.array(b, copy=False), + tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg') + self.residuals = residuals + return len(residuals) + + def __str__(self): + return str(self.ml) diff --git a/base/PyNucleus_base/opt_true_solver_pyamg_decl.pxi b/base/PyNucleus_base/opt_true_solver_pyamg_decl.pxi new file mode 100644 index 0000000..a1f2390 --- /dev/null +++ b/base/PyNucleus_base/opt_true_solver_pyamg_decl.pxi @@ -0,0 +1,10 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef class pyamg_solver(iterative_solver): + cdef: + object ml diff --git a/base/PyNucleus_base/opt_true_solver_pypardiso.pxi b/base/PyNucleus_base/opt_true_solver_pypardiso.pxi new file mode 100644 index 0000000..42f66b6 --- /dev/null +++ b/base/PyNucleus_base/opt_true_solver_pypardiso.pxi @@ -0,0 +1,78 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + + +cdef class pardiso_lu_solver(solver): + def __init__(self, LinearOperator A, INDEX_t num_rows=-1): + solver.__init__(self, A, num_rows) + + cpdef void setup(self, LinearOperator A=None): + cdef: + INDEX_t i, j, explicitZeros, explicitZerosRow + REAL_t[:, ::1] data + + if A is not None: + self.A = A + + if not isinstance(self.A, (SSS_LinearOperator, + CSR_LinearOperator, + Dense_LinearOperator)): + if self.A.isSparse(): + self.A = self.A.to_csr_linear_operator() + else: + self.A = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray())) + try_sparsification = False + sparsificationThreshold = 0.9 + if isinstance(self.A, Dense_LinearOperator) and try_sparsification: + explicitZeros = 0 + data = self.A.data + for i in range(self.A.num_rows): + explicitZerosRow = 0 + for j in range(self.A.num_columns): + if data[i, j] == 0.: + explicitZerosRow += 1 + explicitZeros += explicitZerosRow + if not (explicitZerosRow > sparsificationThreshold*self.A.num_columns): + break + if explicitZeros > sparsificationThreshold*self.A.num_rows*self.A.num_columns: + print('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*explicitZeros/REAL(self.A.num_rows*self.A.num_columns))) + self.A = CSR_LinearOperator.from_dense(self.A) + if isinstance(self.A, (SSS_LinearOperator, + CSR_LinearOperator)): + from pypardiso import PyPardisoSolver + try: + self.Ainv = PyPardisoSolver() + self.Asp = self.A.to_csr() + self.Ainv.factorize(self.Asp) + except RuntimeError: + print(self.A, np.array(self.A.data)) + raise + elif isinstance(self.A, Dense_LinearOperator): + from scipy.linalg import lu_factor + self.lu, self.perm = lu_factor(self.A.data) + else: + raise NotImplementedError('Cannot use operator of type "{}"'.format(type(self.A))) + self.initialized = True + + cdef int solve(self, vector_t b, vector_t x) except -1: + cdef: + REAL_t[::1] temp + solver.solve(self, b, x) + if isinstance(self.A, (SSS_LinearOperator, CSR_LinearOperator)): + temp = self.Ainv.solve(self.Asp, + np.array(b, copy=False, dtype=REAL)) + assign(x, temp) + else: + from scipy.linalg import lu_solve + assign(x, b) + lu_solve((self.lu, self.perm), + np.array(x, copy=False, dtype=REAL), + overwrite_b=True) + return 1 + + def __str__(self): + return 'LU (MKL Pardiso)' diff --git a/base/PyNucleus_base/opt_true_solver_pypardiso_decl.pxi b/base/PyNucleus_base/opt_true_solver_pypardiso_decl.pxi new file mode 100644 index 0000000..c933773 --- /dev/null +++ b/base/PyNucleus_base/opt_true_solver_pypardiso_decl.pxi @@ -0,0 +1,16 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +from . solvers cimport solver +from . myTypes cimport REAL_t, INDEX_t + + +cdef class pardiso_lu_solver(solver): + cdef: + INDEX_t[::1] perm + object Ainv, lu + object Asp diff --git a/base/PyNucleus_base/solvers.pxd b/base/PyNucleus_base/solvers.pxd index 2cf5ed2..9f21fc0 100644 --- a/base/PyNucleus_base/solvers.pxd +++ b/base/PyNucleus_base/solvers.pxd @@ -21,7 +21,6 @@ from . convergence cimport (convergenceMaster, noOpConvergenceMaster, convergenceClient, noOpConvergenceClient) from . performanceLogger cimport PLogger, FakePLogger -include "config.pxi" cdef class solver: cdef: @@ -48,12 +47,7 @@ cdef class lu_solver(solver): BOOL_t useTriangularSolveRoutines -cdef class pardiso_lu_solver(solver): - cdef: - INDEX_t[::1] perm - object Ainv, lu - object Asp - +include "solver_pypardiso_decl.pxi" cdef class chol_solver(solver): cdef: @@ -155,10 +149,7 @@ cdef class bicgstab_solver(krylov_solver): cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1 -IF USE_PYAMG: - cdef class pyamg_solver(iterative_solver): - cdef: - object ml +include "solver_pyamg_decl.pxi" cdef class complex_solver: diff --git a/base/PyNucleus_base/solvers.pyx b/base/PyNucleus_base/solvers.pyx index 2a25acf..9e78dba 100644 --- a/base/PyNucleus_base/solvers.pyx +++ b/base/PyNucleus_base/solvers.pyx @@ -17,8 +17,6 @@ from . linalg cimport (forward_solve_csc, backward_solve_csc, forward_solve_sss_noInverse, backward_solve_sss_noInverse) -include "config.pxi" - cdef class solver: def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1): @@ -182,211 +180,9 @@ cdef class lu_solver(solver): return 'LU' -IF USE_PYPARDISO: - cdef class pardiso_lu_solver(solver): - def __init__(self, LinearOperator A, INDEX_t num_rows=-1): - solver.__init__(self, A, num_rows) - - cpdef void setup(self, LinearOperator A=None): - cdef: - INDEX_t i, j, explicitZeros, explicitZerosRow - REAL_t[:, ::1] data - - if A is not None: - self.A = A - - if not isinstance(self.A, (SSS_LinearOperator, - CSR_LinearOperator, - Dense_LinearOperator)): - if self.A.isSparse(): - self.A = self.A.to_csr_linear_operator() - else: - self.A = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray())) - try_sparsification = False - sparsificationThreshold = 0.9 - if isinstance(self.A, Dense_LinearOperator) and try_sparsification: - explicitZeros = 0 - data = self.A.data - for i in range(self.A.num_rows): - explicitZerosRow = 0 - for j in range(self.A.num_columns): - if data[i, j] == 0.: - explicitZerosRow += 1 - explicitZeros += explicitZerosRow - if not (explicitZerosRow > sparsificationThreshold*self.A.num_columns): - break - if explicitZeros > sparsificationThreshold*self.A.num_rows*self.A.num_columns: - print('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*explicitZeros/REAL(self.A.num_rows*self.A.num_columns))) - self.A = CSR_LinearOperator.from_dense(self.A) - if isinstance(self.A, (SSS_LinearOperator, - CSR_LinearOperator)): - from pypardiso import PyPardisoSolver - try: - self.Ainv = PyPardisoSolver() - self.Asp = self.A.to_csr() - self.Ainv.factorize(self.Asp) - except RuntimeError: - print(self.A, np.array(self.A.data)) - raise - elif isinstance(self.A, Dense_LinearOperator): - from scipy.linalg import lu_factor - self.lu, self.perm = lu_factor(self.A.data) - else: - raise NotImplementedError('Cannot use operator of type "{}"'.format(type(self.A))) - self.initialized = True - - cdef int solve(self, vector_t b, vector_t x) except -1: - cdef: - REAL_t[::1] temp - solver.solve(self, b, x) - if isinstance(self.A, (SSS_LinearOperator, CSR_LinearOperator)): - temp = self.Ainv.solve(self.Asp, - np.array(b, copy=False, dtype=REAL)) - assign(x, temp) - else: - from scipy.linalg import lu_solve - assign(x, b) - lu_solve((self.lu, self.perm), - np.array(x, copy=False, dtype=REAL), - overwrite_b=True) - return 1 - - def __str__(self): - return 'LU (MKL Pardiso)' - - -cdef class chol_solver(solver): - def __init__(self, LinearOperator A, INDEX_t num_rows=-1): - solver.__init__(self, A, num_rows) - - cpdef void setup(self, LinearOperator A=None): - cdef: - LinearOperator B = None - INDEX_t i, j, k - if A is not None: - self.A = A - - if not isinstance(self.A, (SSS_LinearOperator, - CSR_LinearOperator, - Dense_LinearOperator)): - if self.A.isSparse(): - B = self.A.to_csr_linear_operator() - else: - B = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray())) - else: - B = self.A - - self.denseFactor = False - if isinstance(B, Dense_LinearOperator): - from scipy.linalg import cholesky - L = cholesky(B.toarray(), lower=True) - self.Lflat = uninitialized((L.shape[0]*(L.shape[1]+1)//2), dtype=REAL) - k = 0 - for i in range(L.shape[0]): - for j in range(i+1): - self.Lflat[k] = L[i, j] - k += 1 - self.denseFactor = True - self.temp = uninitialized((L.shape[0]), dtype=REAL) - else: - IF USE_CHOLMOD: - from sksparse.cholmod import cholesky - - if isinstance(B, CSR_LinearOperator): - try: - self.Ainv = cholesky(B.to_csc()) - except AttributeError: - self.Ainv = cholesky(B.to_csr().tocsc()) - elif isinstance(B, SSS_LinearOperator): - self.Ainv = cholesky(B.to_lower_csc()) - else: - try: - B = B.to_csr_linear_operator() - self.Ainv = cholesky(B) - except AttributeError: - raise NotImplementedError() - ELSE: - raise NotImplementedError("Cholmod not available, install \"scikit-sparse\".") - self.initialized = True - - cdef int solve(self, vector_t b, vector_t x) except -1: - cdef: - INDEX_t i, j, k - REAL_t val - REAL_t[::1] temp = self.temp, Lflat = self.Lflat - solver.solve(self, b, x) - if not self.denseFactor: - np.array(x, copy=False, dtype=REAL)[:] = self.Ainv(np.array(b, copy=False, dtype=REAL)) - else: - k = 0 - for i in range(x.shape[0]): - val = b[i] - for j in range(i): - val -= Lflat[k]*temp[j] - k += 1 - temp[i] = val/Lflat[k] - k += 1 - for i in range(x.shape[0]-1, -1, -1): - val = temp[i] - for j in range(i+1, x.shape[0]): - k = ((j*(j+1)) >> 1) + i - val -= Lflat[k]*x[j] - k = ((i*(i+1)) >> 1) + i - x[i] = val/Lflat[k] - return 1 - - def __str__(self): - return 'Cholesky' - - -cdef class ichol_solver(solver): - def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1): - solver.__init__(self, A, num_rows) - self.temp = uninitialized((self.num_rows), dtype=REAL) - - cpdef void setup(self, LinearOperator A=None): - cdef: - INDEX_t i - if A is not None: - self.A = A - if isinstance(self.A, CSR_LinearOperator): - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A) - elif isinstance(self.A, SSS_LinearOperator): - # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A) - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A.to_csr_linear_operator()) - else: - try: - B = self.A.to_csr_linear_operator() - self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B) - except: - raise NotImplementedError() - - IF USE_MKL_TRISOLVE: - from . linear_operators import diagonalOperator - T = CSR_LinearOperator(self.indices, self.indptr, self.data).to_csr()+diagonalOperator(self.diagonal).to_csr() - self.L = CSR_LinearOperator.from_csr(T) - ELSE: - for i in range(self.diagonal.shape[0]): - self.diagonal[i] = 1./self.diagonal[i] - self.initialized = True - - cdef int solve(self, vector_t b, vector_t x) except -1: - solver.solve(self, b, x) - self.temp[:] = 0.0 - IF USE_MKL_TRISOLVE: - trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, b, self.temp, forward=True, unitDiagonal=False) - trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, self.temp, x, forward=False, unitDiagonal=False) - ELSE: - forward_solve_sss_noInverse(self.indptr, self.indices, - self.data, self.diagonal, - b, self.temp, unitDiagonal=False) - backward_solve_sss_noInverse(self.indptr, self.indices, - self.data, self.diagonal, - self.temp, x) - return 1 - - def __str__(self): - return 'Incomplete Cholesky' +include "solver_pypardiso.pxi" +include "solver_chol.pxi" +include "solver_ichol.pxi" cdef class ilu_solver(solver): @@ -1001,47 +797,6 @@ cdef class bicgstab_solver(krylov_solver): return s -IF USE_PYAMG: - from pyamg import smoothed_aggregation_solver - - cdef class pyamg_solver(iterative_solver): - def __init__(self, LinearOperator A=None, num_rows=-1): - iterative_solver.__init__(self, A, num_rows) - - cpdef void setup(self, LinearOperator A=None): - iterative_solver.setup(self, A) - # self.ml = ruge_stuben_solver(self.A.to_csr(), - # coarse_solver='splu', - # max_coarse=2500, - # presmoother=('gauss_seidel', {'sweep': 'forward'}), - # postsmoother=('gauss_seidel', {'sweep': 'backward'})) - - self.ml = smoothed_aggregation_solver(self.A.to_csr(), - np.ones((self.num_rows)), - smooth=None, - coarse_solver='splu', - max_coarse=2500, - presmoother=('gauss_seidel', {'sweep': 'forward'}), - postsmoother=('gauss_seidel', {'sweep': 'backward'})) - self.initialized = True - - cdef int solve(self, vector_t b, vector_t x) except -1: - residuals = [] - x_np = np.array(x, copy=False) - if self.x0 is not None: - x_np[:] = self.ml.solve(np.array(b, copy=False), - x0=np.array(self.x0, copy=False), - tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg') - else: - x_np[:] = self.ml.solve(np.array(b, copy=False), - tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg') - self.residuals = residuals - return len(residuals) - - def __str__(self): - return str(self.ml) - - ###################################################################### diff --git a/base/PyNucleus_base/tupleDict.pyx b/base/PyNucleus_base/tupleDict.pyx index a10e47c..f626599 100644 --- a/base/PyNucleus_base/tupleDict.pyx +++ b/base/PyNucleus_base/tupleDict.pyx @@ -10,14 +10,7 @@ from libc.stdlib cimport malloc, realloc, free from libc.stdlib cimport qsort from . myTypes import INDEX -include "config.pxi" - -IF HAVE_MALLOC_H: - cdef extern from "malloc.h" nogil: - int malloc_trim(size_t pad) -ELSE: - cdef int malloc_trim(size_t pad): - pass +include "malloc.pxi" # def return_memory_to_OS(): # malloc_trim(0) diff --git a/base/setup.py b/base/setup.py index 76e336e..70bac63 100644 --- a/base/setup.py +++ b/base/setup.py @@ -35,14 +35,14 @@ raise ImportError('\'PyNucleus_packageTools\' needs to be installed first.') from e p = package('PyNucleus_base') -p.addOption('USE_BLAS', 'useBLAS', True) -p.addOption('USE_MKL', 'useMKL', False) -p.addOption('USE_CHOLMOD', 'use_cholmod', True, ['scikit-sparse']) -p.addOption('USE_PYAMG', 'use_pyamg', False, ['pyamg']) -p.addOption('USE_PYPARDISO', 'use_pypardiso', False, ['pypardiso']) -p.addOption('MKL_LIBRARY', 'mklLibrary', 'mkl_rt') -p.addOption('USE_MKL_TRISOLVE', 'useMKL_trisolve', False) -p.addOption('FILL_UNINITIALIZED', 'fillUninitialized', True) +p.addOption(None, 'useBLAS', True) +p.addOption(None, 'useMKL', False) +p.addOption(None, 'use_cholmod', True, ['scikit-sparse']) +p.addOption(None, 'use_pyamg', False, ['pyamg']) +p.addOption(None, 'use_pypardiso', False, ['pypardiso']) +p.addOption(None, 'mklLibrary', 'mkl_rt') +p.addOption(None, 'useMKL_trisolve', False) +p.addOption(None, 'fillUninitialized', True) try: cython.inline(""" @@ -50,10 +50,14 @@ int malloc_trim(size_t pad) """) have_malloc_h = True + if not (p.hash_file(p.folder+'malloc_linux.pxi') == + p.hash_file(p.folder+'malloc.pxi')): + copy(p.folder+'malloc_linux.pxi', p.folder+'malloc.pxi') except CompileError as e: print('malloc.h not found, error was \"{}\". Depending on the system, this might be normal.'.format(e)) - have_malloc_h = False -p.addOption('HAVE_MALLOC_H', 'have_malloc_h', have_malloc_h) + if not (p.hash_file(p.folder+'malloc_mac.pxi') == + p.hash_file(p.folder+'malloc.pxi')): + copy(p.folder+'malloc_mac.pxi', p.folder+'malloc.pxi') p.loadConfig(extra_config={'annotate': True, 'cythonDirectives': {'initializedcheck': False, 'boundscheck': False, @@ -139,6 +143,19 @@ fillTemplate(Path(p.folder), templates, replacementGroups) +# allocation +p.conditionalCopy(p.folder+'allocation.pxi', p.config['fillUninitialized'], p.folder+'opt_true_allocation.pxi', p.folder+'opt_false_allocation.pxi') +p.conditionalCopy(p.folder+'blas_routines.pxi', p.config['useBLAS'], p.folder+'opt_true_blas.pxi', p.folder+'opt_false_blas.pxi') +p.conditionalCopy(p.folder+'mkl_routines.pxi', p.config['useMKL'], p.folder+'opt_true_mkl.pxi', p.folder+'opt_false_mkl.pxi') + +# solvers +p.conditionalCopy(p.folder+'solver_chol.pxi', p.config['use_cholmod'], p.folder+'opt_true_solver_cholmod.pxi', p.folder+'opt_false_solver_cholmod.pxi') +p.conditionalCopy(p.folder+'solver_ichol.pxi', p.config['useMKL_trisolve'], p.folder+'opt_true_mkl_trisolve.pxi', p.folder+'opt_false_mkl_trisolve.pxi') +p.conditionalCopy(p.folder+'solver_pypardiso.pxi', p.config['use_pypardiso'], p.folder+'opt_true_solver_pypardiso.pxi', None) +p.conditionalCopy(p.folder+'solver_pypardiso_decl.pxi', p.config['use_pypardiso'], p.folder+'opt_true_solver_pypardiso_decl.pxi', None) +p.conditionalCopy(p.folder+'solver_pyamg.pxi', p.config['use_pyamg'], p.folder+'opt_true_solver_pyamg.pxi', None) +p.conditionalCopy(p.folder+'solver_pyamg_decl.pxi', p.config['use_pyamg'], p.folder+'opt_true_solver_pyamg_decl.pxi', None) + p.addExtension("linear_operators", sources=[p.folder+"linear_operators.pyx"]) p.addExtension("sparseGraph", diff --git a/fem/PyNucleus_fem/DoFMaps.pxd b/fem/PyNucleus_fem/DoFMaps.pxd index b3895cf..8596c55 100644 --- a/fem/PyNucleus_fem/DoFMaps.pxd +++ b/fem/PyNucleus_fem/DoFMaps.pxd @@ -74,6 +74,7 @@ cdef class shapeFunction: cdef REAL_t eval(self, const REAL_t[::1] lam) cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride) cdef REAL_t evalGlobal(self, REAL_t[:, ::1] simplex, REAL_t[::1] x) + cdef void evalGrad(self, const REAL_t[::1] lam, const REAL_t[:, ::1] gradLam, REAL_t[::1] value) cdef class vectorShapeFunction: diff --git a/fem/PyNucleus_fem/DoFMaps.pyx b/fem/PyNucleus_fem/DoFMaps.pyx index a797c91..c3bf5c6 100644 --- a/fem/PyNucleus_fem/DoFMaps.pyx +++ b/fem/PyNucleus_fem/DoFMaps.pyx @@ -1672,6 +1672,14 @@ cdef class shapeFunction: cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride): raise NotImplementedError() + cdef void evalGrad(self, const REAL_t[::1] lam, const REAL_t[:, ::1] gradLam, REAL_t[::1] value): + pass + + def evalGradPy(self, lam, gradLam): + value = uninitialized((gradLam.shape[1]), dtype=REAL) + self.evalGrad(np.array(lam), np.array(gradLam), value) + return value + cdef REAL_t evalGlobal(self, REAL_t[:, ::1] simplex, REAL_t[::1] x): if simplex.shape[1] == 1: getBarycentricCoords1D(simplex, x, self.bary) @@ -1818,6 +1826,13 @@ cdef class shapeFunctionP1(shapeFunction): cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride): return lam[self.vertexNo*stride] + cdef void evalGrad(self, const REAL_t[::1] lam, const REAL_t[:, ::1] gradLam, REAL_t[::1] value): + cdef: + INDEX_t dim = gradLam.shape[1] + INDEX_t i + for i in range(dim): + value[i] = gradLam[self.vertexNo, i] + def __getstate__(self): return self.vertexNo diff --git a/fem/PyNucleus_fem/quadrature.pxd b/fem/PyNucleus_fem/quadrature.pxd index 541a8a9..de41d58 100644 --- a/fem/PyNucleus_fem/quadrature.pxd +++ b/fem/PyNucleus_fem/quadrature.pxd @@ -19,8 +19,6 @@ from . meshCy cimport (vectorProduct, cimport numpy as np from libc.math cimport sqrt -include "config.pxi" - cdef class quadratureRule: cdef: diff --git a/fem/PyNucleus_fem/quadrature.pyx b/fem/PyNucleus_fem/quadrature.pyx index b656d05..3493a37 100644 --- a/fem/PyNucleus_fem/quadrature.pyx +++ b/fem/PyNucleus_fem/quadrature.pyx @@ -13,8 +13,6 @@ import numpy as np from modepy import XiaoGimbutasSimplexQuadrature from modepy.tools import unit_to_barycentric -include "config.pxi" - cdef class quadratureRule: def __init__(self, diff --git a/metisCy/PyNucleus_metisCy/metisCy.pxd b/metisCy/PyNucleus_metisCy/metisCy.pxd index 3ef3850..834891c 100644 --- a/metisCy/PyNucleus_metisCy/metisCy.pxd +++ b/metisCy/PyNucleus_metisCy/metisCy.pxd @@ -7,14 +7,4 @@ cimport numpy as np -include "config.pxi" - -IF IDXTYPEWIDTH == 32: - ctypedef np.int32_t idx_t -ELIF IDXTYPEWIDTH == 64: - ctypedef np.int64_t idx_t - -IF REALTYPEWIDTH == 32: - ctypedef float real_t -ELIF REALTYPEWIDTH == 64: - ctypedef np.float64_t real_t +include "metisTypes_decl.pxi" diff --git a/metisCy/PyNucleus_metisCy/metisCy.pyx b/metisCy/PyNucleus_metisCy/metisCy.pyx index 95a320d..5955ba0 100644 --- a/metisCy/PyNucleus_metisCy/metisCy.pyx +++ b/metisCy/PyNucleus_metisCy/metisCy.pyx @@ -8,20 +8,9 @@ import numpy as np cimport numpy as np -include "config.pxi" - from PyNucleus_base import uninitialized -IF IDXTYPEWIDTH == 32: - idx = np.int32 -ELIF IDXTYPEWIDTH == 64: - idx = np.int64 - -IF REALTYPEWIDTH == 32: - real = np.float32 -ELIF REALTYPEWIDTH == 64: - real = np.float64 - +include "metisTypes.pxi" cdef extern from "metis.h": int METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, diff --git a/metisCy/PyNucleus_metisCy/parmetisCy.pyx b/metisCy/PyNucleus_metisCy/parmetisCy.pyx index 0edaffb..72228fc 100644 --- a/metisCy/PyNucleus_metisCy/parmetisCy.pyx +++ b/metisCy/PyNucleus_metisCy/parmetisCy.pyx @@ -17,23 +17,8 @@ from PyNucleus_base import uninitialized ctypedef mpi.MPI_Comm MPI_Comm -include "config.pxi" - -IF IDXTYPEWIDTH == 32: - idx = np.int32 - cdef int IDX = np.NPY_INT32 - ctypedef np.int32_t idx_t -ELIF IDXTYPEWIDTH == 64: - idx = np.int64 - cdef int IDX = np.NPY_INT64 - ctypedef np.int64_t idx_t - -IF REALTYPEWIDTH == 32: - real = np.float32 - ctypedef float real_t -ELIF REALTYPEWIDTH == 64: - real = np.float64 - ctypedef np.float64_t real_t +include "metisTypes_decl.pxi" +include "metisTypes.pxi" cdef extern from "parmetis.h": diff --git a/metisCy/setup.py b/metisCy/setup.py index cfe43c6..fec4c1c 100644 --- a/metisCy/setup.py +++ b/metisCy/setup.py @@ -5,6 +5,8 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### +from pathlib import Path +from shutil import copy try: from PyNucleus_packageTools import package except ImportError as e: @@ -25,9 +27,38 @@ int REALTYPEWIDTH return IDXTYPEWIDTH, REALTYPEWIDTH""", cython_include_dirs=p.config['includeDirs']) - -p.addOption('IDXTYPEWIDTH', 'METIS_idx_width', idx) -p.addOption('REALTYPEWIDTH', 'METIS_real_width', real) +idx = int(idx) +real = int(real) +python_def = '' +decl = '' +if idx == 32: + python_def += 'idx = np.int32\n' + decl += 'ctypedef np.int32_t idx_t\n' +elif idx == 64: + python_def += 'idx = np.int64\n' + decl += 'ctypedef np.int64_t idx_t\n' +else: + raise NotImplementedError() +if real == 32: + python_def += 'real = np.float32\n' + decl += 'ctypedef float real_t\n' +elif real == 64: + python_def += 'real = np.float64\n' + decl += 'ctypedef np.float64_t real_t\n' +else: + raise NotImplementedError() +with open(p.folder+'metisTypesTemp.pxi', 'w') as f: + f.write(python_def) +with open(p.folder+'metisTypesTemp_decl.pxi', 'w') as f: + f.write(decl) +if not (p.hash_file(p.folder+'metisTypesTemp.pxi') == + p.hash_file(p.folder+'metisTypes.pxi')): + copy(p.folder+'metisTypesTemp.pxi', p.folder+'metisTypes.pxi') +Path(p.folder+'metisTypesTemp.pxi').unlink() +if not (p.hash_file(p.folder+'metisTypesTemp_decl.pxi') == + p.hash_file(p.folder+'metisTypes_decl.pxi')): + copy(p.folder+'metisTypesTemp_decl.pxi', p.folder+'metisTypes_decl.pxi') +Path(p.folder+'metisTypesTemp_decl.pxi').unlink() p.loadConfig() p.addPackageInclude('PyNucleus_base') diff --git a/nl/PyNucleus_nl/bem_{SCALAR}.pxi b/nl/PyNucleus_nl/bem_{SCALAR}.pxi new file mode 100644 index 0000000..a5d295f --- /dev/null +++ b/nl/PyNucleus_nl/bem_{SCALAR}.pxi @@ -0,0 +1,1182 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + + self.qrId = sQR.qr + self.PHI_id = sQR.PHI3 + elif panel == COMMON_VERTEX: + try: + sQR = self.specialQuadRules[(singularityValue, panel)] + except KeyError: + + qr = singularityCancelationQuadRule1D(panel, + self.singularityCancelationIntegrandAcrossElements+singularityValue, + self.quad_order_diagonal, + 2*dm_order) + PHI = uninitialized((2*dofs_per_element - dofs_per_vertex, + qr.num_nodes, + 2), dtype=REAL) + + for dof in range(dofs_per_vertex): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_y[0] = qr.nodes[2, i] + lcl_bary_y[1] = qr.nodes[3, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = sf.eval(lcl_bary_y) + + for dof in range(dofs_per_vertex, dofs_per_element): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_y[0] = qr.nodes[2, i] + lcl_bary_y[1] = qr.nodes[3, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = 0 + PHI[dofs_per_element+dof-dofs_per_vertex, i, 0] = 0 + PHI[dofs_per_element+dof-dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) + + sQR = specialQuadRule(qr, PHI3=PHI) + self.specialQuadRules[(singularityValue, panel)] = sQR + if qr.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.qrVertex = sQR.qr + self.PHI_vertex = sQR.PHI3 + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + +cdef class {SCALAR_label}bem3D({SCALAR_label}bem): + def __init__(self, {SCALAR_label}Kernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, manifold_dim2=-1, **kwargs): + super({SCALAR_label}bem3D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs) + + cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex): + cdef: + INDEX_t i, j + REAL_t hmax = 0., h2 + for i in range(2): + for j in range(i+1, 3): + h2 = ((simplex[j, 0]-simplex[i, 0])*(simplex[j, 0]-simplex[i, 0]) + + (simplex[j, 1]-simplex[i, 1])*(simplex[j, 1]-simplex[i, 1]) + + (simplex[j, 2]-simplex[i, 2])*(simplex[j, 2]-simplex[i, 2])) + hmax = max(hmax, h2) + return sqrt(hmax) + + cdef panelType getQuadOrder(self, + const REAL_t h1, + const REAL_t h2, + REAL_t d): + cdef: + panelType panel, panel2 + REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2) + REAL_t c = (0.5*self.target_order+0.5)*log(self.num_dofs*self.H0**2) #-4. + REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0)) + REAL_t loghminH0 = max(logh1H0, logh2H0) + REAL_t s = max(-0.5*(self.kernel.getSingularityValue()+2), 0.) + panel = max(ceil((c + (s-1.)*logh2H0 + loghminH0 - s*logdh2) / + (max(logdh1, 0) + 0.4)), + 2) + panel2 = max(ceil((c + (s-1.)*logh1H0 + loghminH0 - s*logdh1) / + (max(logdh2, 0) + 0.4)), + 2) + panel = max(panel, panel2) + if self.distantQuadRulesPtr[panel] == NULL: + self.addQuadRule_nonSym(panel) + return panel + + cdef void getNearQuadRule(self, panelType panel): + cdef: + INDEX_t i + REAL_t singularityValue = self.kernel.getSingularityValue() + specialQuadRule sQR + quadratureRule qr + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dofs_per_edge = self.DoFMap.dofs_per_edge + INDEX_t dofs_per_vertex = self.DoFMap.dofs_per_vertex + INDEX_t dm_order = max(self.DoFMap.polynomialOrder, 1) + shapeFunction sf + INDEX_t manifold_dim = 2 + REAL_t lcl_bary_x[3] + REAL_t lcl_bary_y[3] + REAL_t[:, :, ::1] PHI + INDEX_t dof + + if panel == COMMON_FACE: + try: + sQR = self.specialQuadRules[(singularityValue, panel)] + except KeyError: + + qr = singularityCancelationQuadRule2D(panel, + self.singularityCancelationIntegrandWithinElement+singularityValue, + self.quad_order_diagonal, + self.quad_order_diagonalV, + 1) + PHI = uninitialized((dofs_per_element, qr.num_nodes, 2), dtype=REAL) + + for dof in range(dofs_per_element): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = sf.eval(lcl_bary_y) + + sQR = specialQuadRule(qr, PHI3=PHI) + self.specialQuadRules[(singularityValue, panel)] = sQR + if qr.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.qrId = sQR.qr + self.PHI_id = sQR.PHI3 + elif panel == COMMON_EDGE: + try: + sQR = self.specialQuadRules[(singularityValue, panel)] + except KeyError: + + qr = singularityCancelationQuadRule2D(panel, + self.singularityCancelationIntegrandAcrossElements+singularityValue, + self.quad_order_diagonal, + self.quad_order_diagonalV, + 1) + PHI = uninitialized((2*dofs_per_element - 2*dofs_per_vertex - dofs_per_edge, + qr.num_nodes, + 2), dtype=REAL) + + for dof in range(2*dofs_per_vertex): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = sf.eval(lcl_bary_y) + + for dof in range((manifold_dim+1)*dofs_per_vertex, (manifold_dim+1)*dofs_per_vertex+dofs_per_edge): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = sf.eval(lcl_bary_y) + + for dof in range(2*dofs_per_vertex, (manifold_dim+1)*dofs_per_vertex): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = 0 + PHI[dofs_per_element+dof-2*dofs_per_vertex, i, 0] = 0 + PHI[dofs_per_element+dof-2*dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) + + for dof in range((manifold_dim+1)*dofs_per_vertex+dofs_per_edge, dofs_per_element): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = 0 + PHI[dofs_per_element+dof-2*dofs_per_vertex-dofs_per_edge, i, 0] = 0 + PHI[dofs_per_element+dof-2*dofs_per_vertex-dofs_per_edge, i, 1] = sf.eval(lcl_bary_y) + + sQR = specialQuadRule(qr, PHI3=PHI) + self.specialQuadRules[(singularityValue, panel)] = sQR + if qr.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.qrEdge = sQR.qr + self.PHI_edge = sQR.PHI3 + elif panel == COMMON_VERTEX: + try: + sQR = self.specialQuadRules[(singularityValue, panel)] + except KeyError: + + qr = singularityCancelationQuadRule2D(panel, + self.singularityCancelationIntegrandAcrossElements+singularityValue, + self.quad_order_diagonal, + self.quad_order_diagonalV, + 1) + PHI = uninitialized((2*dofs_per_element - dofs_per_vertex, + qr.num_nodes, + 2), dtype=REAL) + + for dof in range(dofs_per_vertex): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = sf.eval(lcl_bary_y) + + for dof in range(dofs_per_vertex, dofs_per_element): + sf = self.getLocalShapeFunction(dof) + for i in range(qr.num_nodes): + lcl_bary_x[0] = qr.nodes[0, i] + lcl_bary_x[1] = qr.nodes[1, i] + lcl_bary_x[2] = qr.nodes[2, i] + lcl_bary_y[0] = qr.nodes[3, i] + lcl_bary_y[1] = qr.nodes[4, i] + lcl_bary_y[2] = qr.nodes[5, i] + PHI[dof, i, 0] = sf.eval(lcl_bary_x) + PHI[dof, i, 1] = 0 + PHI[dofs_per_element+dof-dofs_per_vertex, i, 0] = 0 + PHI[dofs_per_element+dof-dofs_per_vertex, i, 1] = sf.eval(lcl_bary_y) + + sQR = specialQuadRule(qr, PHI3=PHI) + self.specialQuadRules[(singularityValue, panel)] = sQR + if qr.num_nodes > self.temp.shape[0]: + self.temp = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.temp2 = uninitialized((qr.num_nodes), dtype={SCALAR}) + self.qrVertex = sQR.qr + self.PHI_vertex = sQR.PHI3 + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + +cdef class {SCALAR_label}bem2D_V({SCALAR_label}bem2D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem2D_V, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 0. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 0. + else: + self.singularityCancelationIntegrandAcrossElements = 0. + + smin = max(-0.5*(self.kernel.min_singularity+1), 0.) + smax = max(-0.5*(self.kernel.max_singularity+1), 0.) + + if target_order is None: + # this is the desired local quadrature error + target_order = self.DoFMap.polynomialOrder+1-smin + self.target_order = target_order + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) + self.quad_order_diagonal = quad_order_diagonal + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 2 + REAL_t x[2] + REAL_t y[2] + + if panel >= 1: + self.eval_distant_bem_V(contrib, panel, mask) + return + elif panel == COMMON_EDGE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + vol = vol1*vol2 + for m in range(qr.num_nodes): + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + + simplex2[self.perm2[1], j]*qr.nodes[3, m]) + self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * (PHI[I, m, 0] * PHI[J, m, 1] + PHI[I, m, 1] * PHI[J, m, 0]) + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}bem2D_K({SCALAR_label}bem2D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem2D_K, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 1. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 1. + else: + self.singularityCancelationIntegrandAcrossElements = 1. + + smin = max(-0.5*(self.kernel.min_singularity+1), 0.) + smax = max(-0.5*(self.kernel.max_singularity+1), 0.) + + if target_order is None: + # this is the desired local quadrature error + target_order = self.DoFMap.polynomialOrder+1-smin + self.target_order = target_order + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) + self.quad_order_diagonal = quad_order_diagonal + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 2 + REAL_t x[2] + REAL_t y[2] + REAL_t normW + + if panel >= 1: + self.eval_distant_bem_K(contrib, panel, mask) + return + elif panel == COMMON_EDGE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + # n is independent of x and y + self.n1[0] = simplex1[1, 1] - simplex1[0, 1] + self.n1[1] = simplex1[0, 0] - simplex1[1, 0] + valReal = 1./sqrt(mydot(self.n1, self.n1)) + self.n1[0] *= valReal + self.n1[1] *= valReal + + self.n2[0] = simplex2[1, 1] - simplex2[0, 1] + self.n2[1] = simplex2[0, 0] - simplex2[1, 0] + valReal = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= valReal + self.n2[1] *= valReal + + vol = vol1*vol2 + for m in range(qr.num_nodes): + normW = 0. + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + + simplex2[self.perm2[1], j]*qr.nodes[3, m]) + self.w[j] = y[j]-x[j] + normW += self.w[j]**2 + normW = 1./sqrt(normW) + for j in range(dim): + self.w[j] *= normW + + val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.temp[m] = val * mydot(self.n2, self.w) + self.temp2[m] = - val * mydot(self.n1, self.w) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}bem2D_K_prime({SCALAR_label}bem2D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem2D_K_prime, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 1. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -1., "Discontinuous finite elements are not conforming for singularity order {} <= -2.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 1. + else: + self.singularityCancelationIntegrandAcrossElements = 1. + + smin = max(-0.5*(self.kernel.min_singularity+1), 0.) + smax = max(-0.5*(self.kernel.max_singularity+1), 0.) + + if target_order is None: + # this is the desired local quadrature error + target_order = self.DoFMap.polynomialOrder+1-smin + self.target_order = target_order + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2) + self.quad_order_diagonal = quad_order_diagonal + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 2 + REAL_t x[2] + REAL_t y[2] + REAL_t normW + + if panel >= 1: + self.eval_distant_bem_K_prime(contrib, panel, mask) + return + elif panel == COMMON_EDGE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + # n is independent of x and y + self.n1[0] = simplex1[1, 1] - simplex1[0, 1] + self.n1[1] = simplex1[0, 0] - simplex1[1, 0] + valReal = 1./sqrt(mydot(self.n1, self.n1)) + self.n1[0] *= valReal + self.n1[1] *= valReal + + self.n2[0] = simplex2[1, 1] - simplex2[0, 1] + self.n2[1] = simplex2[0, 0] - simplex2[1, 0] + valReal = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= valReal + self.n2[1] *= valReal + + vol = vol1*vol2 + for m in range(qr.num_nodes): + normW = 0. + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[2, m] + + simplex2[self.perm2[1], j]*qr.nodes[3, m]) + self.w[j] = y[j]-x[j] + normW += self.w[j]**2 + normW = 1./sqrt(normW) + for j in range(dim): + self.w[j] *= normW + + val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.temp[m] = - val * mydot(self.n1, self.w) + self.temp2[m] = val * mydot(self.n2, self.w) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}bem3D_V({SCALAR_label}bem3D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem3D_V, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 0. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 0. + else: + self.singularityCancelationIntegrandAcrossElements = 0. + + if target_order is None: + # this is the desired local quadrature error + # target_order = (2.-s)/self.dim + target_order = 0.5 + self.target_order = target_order + + smax = max(-0.5*(self.kernel.max_singularity+2), 0.) + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) + # measured log(2 rho_2) = 0.7 + quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) + else: + quad_order_diagonalV = quad_order_diagonal + self.quad_order_diagonal = quad_order_diagonal + self.quad_order_diagonalV = quad_order_diagonalV + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_FACE) + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 3 + REAL_t x[3] + REAL_t y[3] + + if panel >= 1: + self.eval_distant_bem_V(contrib, panel, mask) + return + elif panel == COMMON_FACE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_EDGE: + qr = self.qrEdge + PHI = self.PHI_edge + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + vol = 4.0*vol1*vol2 + for m in range(qr.num_nodes): + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m] + + simplex1[self.perm1[2], j]*qr.nodes[2, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + + simplex2[self.perm2[1], j]*qr.nodes[4, m] + + simplex2[self.perm2[2], j]*qr.nodes[5, m]) + self.temp[m] = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * (PHI[I, m, 0] * PHI[J, m, 1] + PHI[I, m, 1] * PHI[J, m, 0]) + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}bem3D_K({SCALAR_label}bem3D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem3D_K, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 1. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 1. + else: + self.singularityCancelationIntegrandAcrossElements = 1. + + if target_order is None: + # this is the desired local quadrature error + # target_order = (2.-s)/self.dim + target_order = 0.5 + self.target_order = target_order + + smax = max(-0.5*(self.kernel.max_singularity+2), 0.) + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) + # measured log(2 rho_2) = 0.7 + quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) + else: + quad_order_diagonalV = quad_order_diagonal + self.quad_order_diagonal = quad_order_diagonal + self.quad_order_diagonalV = quad_order_diagonalV + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_FACE) + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 3 + REAL_t x[3] + REAL_t y[3] + REAL_t normW + + if panel >= 1: + self.eval_distant_bem_K(contrib, panel, mask) + return + elif panel == COMMON_FACE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_EDGE: + qr = self.qrEdge + PHI = self.PHI_edge + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + for j in range(dim): + x[j] = simplex1[1, j]-simplex1[0, j] + for j in range(dim): + y[j] = simplex1[2, j]-simplex1[0, j] + self.n1[0] = x[1]*y[2]-x[2]*y[1] + self.n1[1] = x[2]*y[0]-x[0]*y[2] + self.n1[2] = x[0]*y[1]-x[1]*y[0] + valReal = 1./sqrt(mydot(self.n1, self.n1)) + self.n1[0] *= valReal + self.n1[1] *= valReal + self.n1[2] *= valReal + + for j in range(dim): + x[j] = simplex2[1, j]-simplex2[0, j] + for j in range(dim): + y[j] = simplex2[2, j]-simplex2[0, j] + self.n2[0] = x[1]*y[2]-x[2]*y[1] + self.n2[1] = x[2]*y[0]-x[0]*y[2] + self.n2[2] = x[0]*y[1]-x[1]*y[0] + valReal = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= valReal + self.n2[1] *= valReal + self.n2[2] *= valReal + + vol = 4.0*vol1*vol2 + for m in range(qr.num_nodes): + normW = 0. + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m] + + simplex1[self.perm1[2], j]*qr.nodes[2, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + + simplex2[self.perm2[1], j]*qr.nodes[4, m] + + simplex2[self.perm2[2], j]*qr.nodes[5, m]) + self.w[j] = y[j]-x[j] + normW += self.w[j]**2 + + normW = 1./sqrt(normW) + for j in range(dim): + self.w[j] *= normW + + val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.temp[m] = val * mydot(self.n2, self.w) + self.temp2[m] = - val * mydot(self.n1, self.w) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}bem3D_K_prime({SCALAR_label}bem3D): + """The local stiffness matrix + + .. math:: + + \\int_{K_1}\\int_{K_2} 0.5 * [ u(x) v(y) + u(y) v(x) ] \\gamma(x,y) dy dx + + for the V operator. + """ + def __init__(self, + {SCALAR_label}Kernel kernel, + meshBase mesh, + DoFMap DoFMap, + quad_order_diagonal=None, + target_order=None, + num_dofs=None, + **kwargs): + cdef: + REAL_t smin, smax + super({SCALAR_label}bem3D_K_prime, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs) + + # The integrand (excluding the kernel) cancels 2 orders of the singularity within an element. + self.singularityCancelationIntegrandWithinElement = 1. + # The integrand (excluding the kernel) cancels 2 orders of the + # singularity across elements for continuous finite elements. + if isinstance(DoFMap, P0_DoFMap): + assert self.kernel.max_singularity > -3., "Discontinuous finite elements are not conforming for singularity order {} <= -3.".format(self.kernel.max_singularity) + self.singularityCancelationIntegrandAcrossElements = 1. + else: + self.singularityCancelationIntegrandAcrossElements = 1. + + if target_order is None: + # this is the desired local quadrature error + # target_order = (2.-s)/self.dim + target_order = 0.5 + self.target_order = target_order + + smax = max(-0.5*(self.kernel.max_singularity+2), 0.) + if quad_order_diagonal is None: + # measured log(2 rho_2) = 0.43 + quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4) + # measured log(2 rho_2) = 0.7 + quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4) + else: + quad_order_diagonalV = quad_order_diagonal + self.quad_order_diagonal = quad_order_diagonal + self.quad_order_diagonalV = quad_order_diagonalV + + if (self.kernel.kernelType != FRACTIONAL) or (not self.kernel.variableOrder): + self.getNearQuadRule(COMMON_FACE) + self.getNearQuadRule(COMMON_EDGE) + self.getNearQuadRule(COMMON_VERTEX) + + cdef void eval(self, + {SCALAR}_t[::1] contrib, + panelType panel, + MASK_t mask=ALL): + cdef: + INDEX_t k, m, i, j, I, J + REAL_t vol, valReal, vol1 = self.vol1, vol2 = self.vol2 + {SCALAR}_t val + REAL_t[:, ::1] simplex1 = self.simplex1 + REAL_t[:, ::1] simplex2 = self.simplex2 + quadratureRule qr + REAL_t[:, :, ::1] PHI + INDEX_t dofs_per_element = self.DoFMap.dofs_per_element + INDEX_t dim = 3 + REAL_t x[3] + REAL_t y[3] + REAL_t normW + + if panel >= 1: + self.eval_distant_bem_K_prime(contrib, panel, mask) + return + elif panel == COMMON_FACE: + qr = self.qrId + PHI = self.PHI_id + elif panel == COMMON_EDGE: + qr = self.qrEdge + PHI = self.PHI_edge + elif panel == COMMON_VERTEX: + qr = self.qrVertex + PHI = self.PHI_vertex + else: + raise NotImplementedError('Unknown panel type: {}'.format(panel)) + + for j in range(dim): + x[j] = simplex1[1, j]-simplex1[0, j] + for j in range(dim): + y[j] = simplex1[2, j]-simplex1[0, j] + self.n1[0] = x[1]*y[2]-x[2]*y[1] + self.n1[1] = x[2]*y[0]-x[0]*y[2] + self.n1[2] = x[0]*y[1]-x[1]*y[0] + valReal = 1./sqrt(mydot(self.n1, self.n1)) + self.n1[0] *= valReal + self.n1[1] *= valReal + self.n1[2] *= valReal + + for j in range(dim): + x[j] = simplex2[1, j]-simplex2[0, j] + for j in range(dim): + y[j] = simplex2[2, j]-simplex2[0, j] + self.n2[0] = x[1]*y[2]-x[2]*y[1] + self.n2[1] = x[2]*y[0]-x[0]*y[2] + self.n2[2] = x[0]*y[1]-x[1]*y[0] + valReal = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= valReal + self.n2[1] *= valReal + self.n2[2] *= valReal + + vol = 4.0*vol1*vol2 + for m in range(qr.num_nodes): + normW = 0. + for j in range(dim): + x[j] = (simplex1[self.perm1[0], j]*qr.nodes[0, m] + + simplex1[self.perm1[1], j]*qr.nodes[1, m] + + simplex1[self.perm1[2], j]*qr.nodes[2, m]) + y[j] = (simplex2[self.perm2[0], j]*qr.nodes[3, m] + + simplex2[self.perm2[1], j]*qr.nodes[4, m] + + simplex2[self.perm2[2], j]*qr.nodes[5, m]) + self.w[j] = y[j]-x[j] + normW += self.w[j]**2 + + normW = 1./sqrt(normW) + for j in range(dim): + self.w[j] *= normW + + val = qr.weights[m] * self.kernel.evalPtr(dim, &x[0], &y[0]) + self.temp[m] = - val * mydot(self.n1, self.w) + self.temp2[m] = val * mydot(self.n2, self.w) + + contrib[:] = 0. + for I in range(PHI.shape[0]): + i = self.perm[I] + for J in range(I, PHI.shape[0]): + j = self.perm[J] + if j < i: + k = 2*dofs_per_element*j-(j*(j+1) >> 1) + i + else: + k = 2*dofs_per_element*i-(i*(i+1) >> 1) + j + if mask[k]: + val = 0. + for m in range(qr.num_nodes): + val += self.temp[m] * PHI[I, m, 0] * PHI[J, m, 1] + self.temp2[m] * PHI[I, m, 1] * PHI[J, m, 0] + contrib[k] = 0.5*val*vol + + +cdef class {SCALAR_label}boundaryIntegralSingleLayer({function_type}): + cdef: + {SCALAR_label_lc_}fe_vector u + {SCALAR_label}twoPointFunction kernel + REAL_t[:, ::1] simplex + REAL_t[:, ::1] span + simplexQuadratureRule qr + REAL_t[:, ::1] PHI + {SCALAR}_t[::1] fvals + + def __init__(self, {SCALAR_label_lc_}fe_vector u, {SCALAR_label}twoPointFunction kernel): + self.u = u + self.kernel = kernel + dm = u.dm + mesh = dm.mesh + + dim = mesh.dim + dimManifold = mesh.manifold_dim + + self.simplex = uninitialized((dimManifold+1, dim), dtype=REAL) + self.span = uninitialized((dimManifold, dim), dtype=REAL) + self.qr = simplexXiaoGimbutas(dim=dim, manifold_dim=dimManifold, order=3) + + self.PHI = uninitialized((dm.dofs_per_element, self.qr.num_nodes), dtype=REAL) + for i in range(dm.dofs_per_element): + for j in range(self.qr.num_nodes): + self.PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(self.qr.nodes[:, j])) + + self.fvals = uninitialized((self.qr.num_nodes), dtype={SCALAR}) + + cdef {SCALAR}_t eval(self, REAL_t[::1] x): + cdef: + INDEX_t cellNo + {SCALAR}_t val = 0. + DoFMap dm = self.u.dm + meshBase mesh = dm.mesh + INDEX_t dim = mesh.dim + INDEX_t manifold_dim = mesh.manifold_dim + simplexQuadratureRule qr = self.qr + INDEX_t num_quad_nodes = qr.num_nodes + INDEX_t k, j, I, m + REAL_t vol + {SCALAR}_t[::1] u = self.u + + for cellNo in range(mesh.num_cells): + mesh.getSimplex(cellNo, self.simplex) + + # Calculate volume + vol = qr.getSimplexVolume(self.simplex) + + for j in range(num_quad_nodes): + qr.tempVec[:] = 0. + for k in range(manifold_dim+1): + for m in range(dim): + qr.tempVec[m] += qr.nodes[k, j] * self.simplex[k, m] + self.fvals[j] = self.kernel.evalPtr(dim, &x[0], &qr.tempVec[0]) + + for k in range(dm.dofs_per_element): + I = dm.cell2dof(cellNo, k) + if I < 0: + continue + for j in range(num_quad_nodes): + val += vol*qr.weights[j]*self.fvals[j]*self.PHI[k, j]*u[I] + return val + + +cdef class {SCALAR_label}boundaryIntegralDoubleLayer({function_type}): + cdef: + {SCALAR_label_lc_}fe_vector u + {SCALAR_label}twoPointFunction kernel + REAL_t[:, ::1] simplex + REAL_t[:, ::1] span + simplexQuadratureRule qr + REAL_t[:, ::1] PHI + {SCALAR}_t[::1] fvals + REAL_t[::1] n2, w, x, y + + def __init__(self, {SCALAR_label_lc_}fe_vector u, twoPointFunction kernel): + self.u = u + self.kernel = kernel + dm = u.dm + mesh = dm.mesh + + dim = mesh.dim + dimManifold = mesh.manifold_dim + + self.simplex = uninitialized((dimManifold+1, dim), dtype=REAL) + self.span = uninitialized((dimManifold, dim), dtype=REAL) + self.qr = simplexXiaoGimbutas(dim=dim, manifold_dim=dimManifold, order=3) + + self.PHI = uninitialized((dm.dofs_per_element, self.qr.num_nodes), dtype=REAL) + for i in range(dm.dofs_per_element): + for j in range(self.qr.num_nodes): + self.PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(self.qr.nodes[:, j])) + + self.fvals = uninitialized((self.qr.num_nodes), dtype={SCALAR}) + self.n2 = uninitialized((dim), dtype=REAL) + self.w = uninitialized((dim), dtype=REAL) + self.x = uninitialized((dim), dtype=REAL) + self.y = uninitialized((dim), dtype=REAL) + + cdef {SCALAR}_t eval(self, REAL_t[::1] x): + cdef: + INDEX_t cellNo + {SCALAR}_t val = 0. + DoFMap dm = self.u.dm + meshBase mesh = dm.mesh + INDEX_t dim = mesh.dim + INDEX_t manifold_dim = mesh.manifold_dim + simplexQuadratureRule qr = self.qr + INDEX_t num_quad_nodes = qr.num_nodes + INDEX_t k, j, I, m + REAL_t vol, normW + {SCALAR}_t[::1] u = self.u + + for cellNo in range(mesh.num_cells): + mesh.getSimplex(cellNo, self.simplex) + + if dim == 2: + self.n2[0] = self.simplex[1, 1] - self.simplex[0, 1] + self.n2[1] = self.simplex[0, 0] - self.simplex[1, 0] + normW = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= normW + self.n2[1] *= normW + elif dim == 3: + for j in range(dim): + self.x[j] = self.simplex[1, j]-self.simplex[0, j] + for j in range(dim): + self.y[j] = self.simplex[2, j]-self.simplex[0, j] + self.n2[0] = self.x[1]*self.y[2]-self.x[2]*self.y[1] + self.n2[1] = self.x[2]*self.y[0]-self.x[0]*self.y[2] + self.n2[2] = self.x[0]*self.y[1]-self.x[1]*self.y[0] + normW = 1./sqrt(mydot(self.n2, self.n2)) + self.n2[0] *= normW + self.n2[1] *= normW + self.n2[2] *= normW + + # Calculate volume + vol = qr.getSimplexVolume(self.simplex) + + for j in range(num_quad_nodes): + qr.tempVec[:] = 0. + for k in range(manifold_dim+1): + for m in range(dim): + qr.tempVec[m] += qr.nodes[k, j] * self.simplex[k, m] + normW = 0. + for m in range(dim): + self.w[m] = qr.tempVec[m]-x[m] + normW += self.w[m]**2 + normW = 1./sqrt(normW) + for m in range(dim): + self.w[m] *= normW + + self.fvals[j] = self.kernel.evalPtr(dim, &x[0], &qr.tempVec[0]) * mydot(self.n2, self.w) + + for k in range(dm.dofs_per_element): + I = dm.cell2dof(cellNo, k) + if I < 0: + continue + for j in range(num_quad_nodes): + val += vol*qr.weights[j]*self.fvals[j]*self.PHI[k, j]*u[I] + return val diff --git a/nl/PyNucleus_nl/bitset.pyx b/nl/PyNucleus_nl/bitset.pyx index 10ca057..82f9472 100644 --- a/nl/PyNucleus_nl/bitset.pyx +++ b/nl/PyNucleus_nl/bitset.pyx @@ -9,14 +9,7 @@ import numpy as np from libc.stdlib cimport malloc, realloc, free -include "config.pxi" - -IF HAVE_MALLOC_H: - cdef extern from "malloc.h" nogil: - int malloc_trim(size_t pad) -ELSE: - cdef int malloc_trim(size_t pad): - pass +include "malloc.pxi" cdef str MASK2Str(MASK_t a): diff --git a/nl/PyNucleus_nl/malloc_linux.pxi b/nl/PyNucleus_nl/malloc_linux.pxi new file mode 100644 index 0000000..6129a75 --- /dev/null +++ b/nl/PyNucleus_nl/malloc_linux.pxi @@ -0,0 +1,9 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef extern from "malloc.h" nogil: + int malloc_trim(size_t pad) diff --git a/nl/PyNucleus_nl/malloc_mac.pxi b/nl/PyNucleus_nl/malloc_mac.pxi new file mode 100644 index 0000000..b0780f3 --- /dev/null +++ b/nl/PyNucleus_nl/malloc_mac.pxi @@ -0,0 +1,9 @@ +################################################################################### +# Copyright 2021 National Technology & Engineering Solutions of Sandia, # +# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the # +# U.S. Government retains certain rights in this software. # +# If you want to use this code, please refer to the README.rst and LICENSE files. # +################################################################################### + +cdef int malloc_trim(size_t pad): + pass diff --git a/nl/PyNucleus_nl/nonlocalAssembly.pxd b/nl/PyNucleus_nl/nonlocalAssembly.pxd index 5f9f2a2..0b38c35 100644 --- a/nl/PyNucleus_nl/nonlocalAssembly.pxd +++ b/nl/PyNucleus_nl/nonlocalAssembly.pxd @@ -45,8 +45,6 @@ from . kernelsCy cimport (Kernel, FractionalKernel) -include "config.pxi" - include "nonlocalAssembly_decl_REAL.pxi" include "nonlocalAssembly_decl_COMPLEX.pxi" diff --git a/nl/PyNucleus_nl/nonlocalAssembly.pyx b/nl/PyNucleus_nl/nonlocalAssembly.pyx index 7d776d6..b0fa56d 100644 --- a/nl/PyNucleus_nl/nonlocalAssembly.pyx +++ b/nl/PyNucleus_nl/nonlocalAssembly.pyx @@ -9,8 +9,6 @@ from libc.math cimport ceil import numpy as np cimport numpy as np -include "config.pxi" - from libc.math cimport sin, cos, M_PI as pi from libcpp.map cimport map from cpython.long cimport PyLong_FromSsize_t diff --git a/nl/setup.py b/nl/setup.py index 72bb3b0..8437e43 100644 --- a/nl/setup.py +++ b/nl/setup.py @@ -5,7 +5,7 @@ # If you want to use this code, please refer to the README.rst and LICENSE files. # ################################################################################### -from shutil import move +from shutil import move, copy from os import remove from importlib.metadata import version from distutils.errors import CompileError @@ -29,10 +29,14 @@ int malloc_trim(size_t pad) """) have_malloc_h = True + if not (p.hash_file(p.folder+'malloc_linux.pxi') == + p.hash_file(p.folder+'malloc.pxi')): + copy(p.folder+'malloc_linux.pxi', p.folder+'malloc.pxi') except CompileError as e: print('malloc.h not found, error was \"{}\". Depending on the system, this might be normal.'.format(e)) - have_malloc_h = False -p.addOption('HAVE_MALLOC_H', 'have_malloc_h', have_malloc_h) + if not (p.hash_file(p.folder+'malloc_mac.pxi') == + p.hash_file(p.folder+'malloc.pxi')): + copy(p.folder+'malloc_mac.pxi', p.folder+'malloc.pxi') p.addOption(None, 'mask_size', 256) cython_directives = {'initializedcheck': False, 'boundscheck': False, diff --git a/packageTools/PyNucleus_packageTools/__init__.py b/packageTools/PyNucleus_packageTools/__init__.py index 67260e0..e4b488b 100644 --- a/packageTools/PyNucleus_packageTools/__init__.py +++ b/packageTools/PyNucleus_packageTools/__init__.py @@ -111,7 +111,7 @@ def __init__(self, name, namespace=''): 'compiler_c++': 'detect', 'mpi': 'openmpi', 'threads': 1} - self.addOption('USE_OPENMP', 'useOpenMP', False) + self.addOption(None, 'useOpenMP', False) self.addOption(None, 'gitSHA', self.getGitSHA()) def addOption(self, optionCy, optionPy, default, pkgDependencies=[]): @@ -378,6 +378,36 @@ def hash_file(self, filename): except: return + def emptyFile(self, filename): + from shutil import copy + + with open('tempEmpty', 'w') as f: + pass + if Path(filename).exists(): + if not (self.hash_file('tempEmpty') == + self.hash_file(filename)): + copy('tempEmpty', filename) + else: + copy('tempEmpty', filename) + Path('tempEmpty').unlink() + + def conditionalCopy(self, target, conditional, source_if_true, source_if_false): + from shutil import copy + + if conditional: + if not (self.hash_file(source_if_true) == + self.hash_file(target)): + copy(source_if_true, + target) + else: + if source_if_false is not None: + if not (self.hash_file(source_if_false) == + self.hash_file(target)): + copy(source_if_false, + target) + else: + self.emptyFile(target) + def fillTemplate(basedir, templates, replacements): for tmp in templates: