Skip to content

Commit

Permalink
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,37 @@ ADDV(PRECISION_CHAR, void)
size_t avl = n;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);
xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

if (incy == 1)
VLSEG2_V_F(PREC, LMUL)( &yvec_real, &yvec_imag, (BASE_DT*) y, vl);
yvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&yvec_real, &yvec_imag, (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

yvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

yvec_real = VFADD_VV(PREC, LMUL)(yvec_real, xvec_real, vl);
if (conjx == BLIS_NO_CONJUGATE)
yvec_imag = VFADD_VV(PREC, LMUL)(yvec_imag, xvec_imag, vl);
else
yvec_imag = VFSUB_VV(PREC, LMUL)(yvec_imag, xvec_imag, vl);

RVV_SET_REAL(PREC, LMUL, yvec, yvec_real);
RVV_SET_IMAG(PREC, LMUL, yvec, yvec_imag);

if (incy == 1)
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec_real, yvec_imag, vl);
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec, vl);
else
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec_real, yvec_imag, vl);
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec, vl);

x += vl*incx;
y += vl*incy;
avl -= vl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
AXPBYV(PRECISION_CHAR, void)
{
// Computes y := beta * y + alpha * conjx(x)

if (n <= 0) return;

const DATATYPE* restrict alpha = alpha_;
Expand All @@ -59,7 +59,7 @@ AXPBYV(PRECISION_CHAR, void)
return;
}

// Note: in the cases alpha = 0 && beta = 1, or alpha = 1 && beta = 0, we
// Note: in the cases alpha = 0 && beta = 1, or alpha = 1 && beta = 0, we
// will canonicalize NaNs whereas the reference code will propagate NaN payloads.

// TO DO (optimization): special cases for alpha = +-1, +-i, beta = +-1, +-i
Expand All @@ -68,23 +68,28 @@ AXPBYV(PRECISION_CHAR, void)
size_t avl = n;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag, temp_real, temp_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);
xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

if (incy == 1)
VLSEG2_V_F(PREC, LMUL)( &yvec_real, &yvec_imag, (BASE_DT*) y, vl);
yvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&yvec_real, &yvec_imag, (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

yvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

// Computed as:
// y.real = beta.real * y.real - beta.imag * y.imag + alpha.real * x.real - alpha.imag * conj(x.imag)
// y.imag = beta.real * y.imag + beta.imag * y.real + alpha.imag * x.real + alpha.real * conj(x.imag)
temp_real = VFMUL_VF(PREC, LMUL) (yvec_real, beta->real, vl);
temp_imag = VFMUL_VF(PREC, LMUL) (yvec_imag, beta->real, vl);
RVV_TYPE_F(PREC, LMUL) temp_real = VFMUL_VF(PREC, LMUL) (yvec_real, beta->real, vl);
RVV_TYPE_F(PREC, LMUL) temp_imag = VFMUL_VF(PREC, LMUL) (yvec_imag, beta->real, vl);
temp_real = VFNMSAC_VF(PREC, LMUL)(temp_real, beta->imag, yvec_imag, vl);
temp_imag = VFMACC_VF(PREC, LMUL) (temp_imag, beta->imag, yvec_real, vl);
yvec_real = VFMACC_VF(PREC, LMUL) (temp_real, alpha->real, xvec_real, vl);
Expand All @@ -97,11 +102,14 @@ AXPBYV(PRECISION_CHAR, void)
yvec_imag = VFNMSAC_VF(PREC, LMUL)(yvec_imag, alpha->real, xvec_imag, vl);
}

RVV_SET_REAL(PREC, LMUL, yvec, yvec_real);
RVV_SET_IMAG(PREC, LMUL, yvec, yvec_imag);

if (incy == 1)
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec_real, yvec_imag, vl);
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec, vl);
else
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec_real, yvec_imag, vl);
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec, vl);

x += vl*incx;
y += vl*incy;
avl -= vl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,30 @@ AXPYV(PRECISION_CHAR, void)
const DATATYPE* restrict alpha = alpha_;
const DATATYPE* restrict x = x_;
DATATYPE* restrict y = y_;

if (n <= 0) return;
if (alpha->real == 0 && alpha->imag == 0) return;

size_t avl = n;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);
xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

if (incy == 1)
VLSEG2_V_F(PREC, LMUL)( &yvec_real, &yvec_imag, (BASE_DT*) y, vl);
yvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&yvec_real, &yvec_imag, (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

yvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

yvec_real = VFMACC_VF(PREC, LMUL)( yvec_real, alpha->real, xvec_real, vl);
yvec_imag = VFMACC_VF(PREC, LMUL)( yvec_imag, alpha->imag, xvec_real, vl);
if (conjx == BLIS_NO_CONJUGATE){
Expand All @@ -70,11 +75,15 @@ AXPYV(PRECISION_CHAR, void)
yvec_imag = VFNMSAC_VF(PREC, LMUL)(yvec_imag, alpha->real, xvec_imag, vl);
}


RVV_SET_REAL(PREC, LMUL, yvec, yvec_real);
RVV_SET_IMAG(PREC, LMUL, yvec, yvec_imag);

if (incy == 1)
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec_real, yvec_imag, vl);
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec, vl);
else
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec_real, yvec_imag, vl);
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec, vl);

x += vl*incx;
y += vl*incy;
avl -= vl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ DOTV(PRECISION_CHAR, void)
DATATYPE* restrict rho = rho_;
const DATATYPE* restrict x = x_;
const DATATYPE* restrict y = y_;

if (n <= 0) {
rho->real = 0;
rho->imag = 0;
Expand All @@ -60,18 +60,23 @@ DOTV(PRECISION_CHAR, void)
bool first = true;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);
xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

if (incy == 1)
VLSEG2_V_F(PREC, LMUL)( &yvec_real, &yvec_imag, (BASE_DT*) y, vl);
yvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&yvec_real, &yvec_imag, (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

yvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

if (first) {
acc_real = VFMUL_VV(PREC, LMUL)(xvec_real, yvec_real, vl);
acc_imag = VFMUL_VV(PREC, LMUL)(xvec_imag, yvec_real, vl);
Expand All @@ -93,7 +98,6 @@ DOTV(PRECISION_CHAR, void)
avl -= vl;
}


RVV_TYPE_F(PREC, m1) sum_real = VFMV_S_F(PREC, m1)(0.f, 1);
RVV_TYPE_F(PREC, m1) sum_imag = VFMV_S_F(PREC, m1)(0.f, 1);
sum_real = VF_REDUSUM_VS(PREC, LMUL)(acc_real, sum_real, n);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ DOTXV(PRECISION_CHAR, void)
DATATYPE* restrict rho = rho_;
const DATATYPE* restrict x = x_;
const DATATYPE* restrict y = y_;

if (beta->real == 0 && beta->imag == 0){
rho->real = 0;
rho->imag = 0;
Expand All @@ -69,18 +69,23 @@ DOTXV(PRECISION_CHAR, void)
bool first = true;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);
xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

if (incy == 1)
VLSEG2_V_F(PREC, LMUL)( &yvec_real, &yvec_imag, (BASE_DT*) y, vl);
yvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&yvec_real, &yvec_imag, (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

yvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, 2*FLT_SIZE*incy, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

if (first) {
acc_real = VFMUL_VV(PREC, LMUL)(xvec_real, yvec_real, vl);
acc_imag = VFMUL_VV(PREC, LMUL)(xvec_imag, yvec_real, vl);
Expand All @@ -102,7 +107,6 @@ DOTXV(PRECISION_CHAR, void)
avl -= vl;
}


RVV_TYPE_F(PREC, m1) sum_real = VFMV_S_F(PREC, m1)(0.f, 1);
RVV_TYPE_F(PREC, m1) sum_imag = VFMV_S_F(PREC, m1)(0.f, 1);
sum_real = VF_REDUSUM_VS(PREC, LMUL)(acc_real, sum_real, n);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SCAL2V(PRECISION_CHAR, void)
const DATATYPE* restrict alpha = alpha_;
const DATATYPE* restrict x = x_;
DATATYPE* restrict y = y_;

if (n <= 0) return;
if (alpha->real == 0 && alpha->imag == 0) {
SETV(PRECISION_CHAR)(BLIS_NO_CONJUGATE, n, alpha, y, incy, cntx);
Expand All @@ -56,13 +56,18 @@ SCAL2V(PRECISION_CHAR, void)
size_t avl = n;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag, yvec_real, yvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec, yvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) yvec_real = RVV_GET_REAL(PREC, LMUL, yvec);
RVV_TYPE_F(PREC, LMUL) yvec_imag = RVV_GET_IMAG(PREC, LMUL, yvec);

yvec_real = VFMUL_VF(PREC, LMUL)(xvec_real, alpha->real, vl);
yvec_imag = VFMUL_VF(PREC, LMUL)(xvec_real, alpha->imag, vl);
if (conjx == BLIS_NO_CONJUGATE) {
Expand All @@ -73,11 +78,15 @@ SCAL2V(PRECISION_CHAR, void)
yvec_imag = VFNMSAC_VF(PREC, LMUL)(yvec_imag, alpha->real, xvec_imag, vl);
}

RVV_SET_REAL(PREC, LMUL, yvec, yvec_real);
RVV_SET_IMAG(PREC, LMUL, yvec, yvec_imag);
#pragma GCC diagnostic ignored "-Wuninitialized"

if (incy == 1)
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec_real, yvec_imag, vl);
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) y, yvec, vl);
else
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec_real, yvec_imag, vl);
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) y, 2*FLT_SIZE*incy, yvec, vl);

x += vl*incx;
y += vl*incy;
avl -= vl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ SCALV(PRECISION_CHAR, void)
// Computes x = conjalpha(alpha) * x
const DATATYPE* restrict alpha = alpha_;
DATATYPE* restrict x = x_;

if (n <= 0 || (alpha->real == 1 && alpha->imag == 0)) return;

if (alpha->real == 0 && alpha->imag==0){
Expand All @@ -51,13 +51,16 @@ SCALV(PRECISION_CHAR, void)
size_t avl = n;
while (avl) {
size_t vl = VSETVL(PREC, LMUL)(avl);
RVV_TYPE_F(PREC, LMUL) xvec_real, xvec_imag;
RVV_TYPE_F_X2(PREC, LMUL) xvec;

if (incx == 1)
VLSEG2_V_F(PREC, LMUL)( &xvec_real, &xvec_imag, (BASE_DT*) x, vl);
xvec = VLSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, vl);
else
VLSSEG2_V_F(PREC, LMUL)(&xvec_real, &xvec_imag, (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

xvec = VLSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, 2*FLT_SIZE*incx, vl);

RVV_TYPE_F(PREC, LMUL) xvec_real = RVV_GET_REAL(PREC, LMUL, xvec);
RVV_TYPE_F(PREC, LMUL) xvec_imag = RVV_GET_IMAG(PREC, LMUL, xvec);

RVV_TYPE_F(PREC, LMUL) temp_real = VFMUL_VF(PREC, LMUL)(xvec_real, alpha->real, vl);
RVV_TYPE_F(PREC, LMUL) temp_imag = VFMUL_VF(PREC, LMUL)(xvec_imag, alpha->real, vl);
if (conjalpha == BLIS_NO_CONJUGATE) {
Expand All @@ -67,13 +70,17 @@ SCALV(PRECISION_CHAR, void)
temp_real = VFMACC_VF(PREC, LMUL) (temp_real, alpha->imag, xvec_imag, vl);
temp_imag = VFNMSAC_VF(PREC, LMUL)(temp_imag, alpha->imag, xvec_real, vl);
}


RVV_TYPE_F_X2(PREC, LMUL) temp;
RVV_SET_REAL(PREC, LMUL, temp, temp_real);
RVV_SET_IMAG(PREC, LMUL, temp, temp_imag);
#pragma GCC diagnostic ignored "-Wuninitialized"

if (incx == 1)
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, temp_real, temp_imag, vl);
VSSEG2_V_F(PREC, LMUL)( (BASE_DT*) x, temp, vl);
else
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) x, 2*FLT_SIZE*incx, temp_real, temp_imag, vl);
VSSSEG2_V_F(PREC, LMUL)((BASE_DT*) x, 2*FLT_SIZE*incx, temp, vl);

x += vl*incx;
avl -= vl;
}
Expand Down
Loading

0 comments on commit 9fc9359

Please sign in to comment.