From 3c2865589555b1ab8477c3d4958f475a9c215edd Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Tue, 12 Nov 2024 13:32:32 +0100
Subject: [PATCH] Waksman multiplication for gr_mat (#2109)

---
 doc/source/gr_mat.rst           |   4 +
 src/gr_mat.h                    |   1 +
 src/gr_mat/mul_waksman.c        | 140 ++++++++++++++++++++++++++++++++
 src/gr_mat/test/main.c          |   2 +
 src/gr_mat/test/t-mul_waksman.c |  97 ++++++++++++++++++++++
 5 files changed, 244 insertions(+)
 create mode 100644 src/gr_mat/mul_waksman.c
 create mode 100644 src/gr_mat/test/t-mul_waksman.c

diff --git a/doc/source/gr_mat.rst b/doc/source/gr_mat.rst
index 93d8d2b9fa..e87a079938 100644
--- a/doc/source/gr_mat.rst
+++ b/doc/source/gr_mat.rst
@@ -278,6 +278,7 @@ Arithmetic
 
 .. function:: int gr_mat_mul_classical(gr_mat_t res, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
               int gr_mat_mul_strassen(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+              int gr_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int gr_mat_mul_generic(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
               int gr_mat_mul(gr_mat_t res, const gr_mat_t mat1, const gr_mat_t mat2, gr_ctx_t ctx)
 
@@ -285,6 +286,9 @@ Arithmetic
     otherwise, it falls back to :func:`gr_mat_mul_generic` which currently
     only performs classical multiplication.
 
+    The *Waksman* algorithm assumes a commutative base ring which supports
+    exact division by two.
+
 .. function:: int gr_mat_sqr(gr_mat_t res, const gr_mat_t mat, gr_ctx_t ctx)
 
 .. function:: int gr_mat_add_scalar(gr_mat_t res, const gr_mat_t mat, gr_srcptr c, gr_ctx_t ctx)
diff --git a/src/gr_mat.h b/src/gr_mat.h
index 68b36147f1..b6ede2c3aa 100644
--- a/src/gr_mat.h
+++ b/src/gr_mat.h
@@ -145,6 +145,7 @@ WARN_UNUSED_RESULT int gr_mat_div_scalar(gr_mat_t res, const gr_mat_t mat, gr_sr
 
 WARN_UNUSED_RESULT int gr_mat_mul_classical(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 WARN_UNUSED_RESULT int gr_mat_mul_strassen(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
+WARN_UNUSED_RESULT int gr_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 WARN_UNUSED_RESULT int gr_mat_mul_generic(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 WARN_UNUSED_RESULT int gr_mat_mul(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx);
 
diff --git a/src/gr_mat/mul_waksman.c b/src/gr_mat/mul_waksman.c
new file mode 100644
index 0000000000..4ba770c65d
--- /dev/null
+++ b/src/gr_mat/mul_waksman.c
@@ -0,0 +1,140 @@
+/*
+    Copyright (C) 2024 Éric Schost
+    Copyright (C) 2024 Vincent Neiger
+    Copyright (C) 2024 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "gr_mat.h"
+#include "gr_vec.h"
+
+/* todo: division by two should be divexact by two */
+/* todo: avoid redundant additions 0 + ... */
+
+int gr_mat_mul_waksman(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t ctx)
+{
+    int status = GR_SUCCESS;
+    slong m, n, p;
+    slong sz = ctx->sizeof_elem;
+
+    m = A->r;
+    n = A->c;
+    p = B->c;
+
+    if (m == 0 || n == 0 || p == 0)
+    {
+        return gr_mat_zero(C, ctx);
+    }
+
+    if (n != B->r || m != C->r || p != C->c)
+    {
+        return GR_DOMAIN;
+    }
+
+    if (A == C || B == C)
+    {
+        gr_mat_t T;
+        gr_mat_init(T, m, p, ctx);
+        status |= gr_mat_mul_waksman(T, A, B, ctx);
+        status |= gr_mat_swap_entrywise(T, C, ctx);
+        gr_mat_clear(T, ctx);
+        return status;
+    }
+
+    slong i, l, j, k;
+
+    gr_ptr tmp, Crow, Ccol, val0, val1, val2, crow;
+
+    GR_TMP_INIT_VEC(tmp, p + m + 4, ctx);
+
+    Crow = tmp;
+    Ccol = GR_ENTRY(Crow, p, sz);
+    val0 = GR_ENTRY(Ccol, m, sz);
+    val1 = GR_ENTRY(val0, 1, sz);
+    val2 = GR_ENTRY(val1, 1, sz);
+    crow = GR_ENTRY(val2, 1, sz);
+
+    slong np = n >> 1;
+
+    for (i = 0; i < m; i++)
+        status |= _gr_vec_zero(GR_MAT_ENTRY(C, i, 0, sz), p, ctx);
+
+    for (j = 1; j <= np; j++)
+    {
+        slong j2 = (j << 1) - 1;
+    
+        for (k = 0; k < p; k++)
+        {
+            status |= gr_add(val1, GR_MAT_ENTRY(A, 0, j2 - 1, sz), GR_MAT_ENTRY(B, j2, k, sz), ctx);
+            status |= gr_add(val2, GR_MAT_ENTRY(A, 0, j2, sz), GR_MAT_ENTRY(B, j2 - 1, k, sz), ctx);
+            status |= gr_addmul(GR_MAT_ENTRY(C, 0, k, sz), val1, val2, ctx);
+
+            status |= gr_sub(val1, GR_MAT_ENTRY(A, 0, j2 - 1, sz), GR_MAT_ENTRY(B, j2, k, sz), ctx);
+            status |= gr_sub(val2, GR_MAT_ENTRY(A, 0, j2, sz), GR_MAT_ENTRY(B, j2 - 1, k, sz), ctx);
+            status |= gr_addmul(GR_ENTRY(Crow, k, sz), val1, val2, ctx);
+        }
+
+        for (l = 1; l < m; l++)
+        {
+            status |= gr_add(val1, GR_MAT_ENTRY(A, l, j2 - 1, sz), GR_MAT_ENTRY(B, j2, 0, sz), ctx);
+            status |= gr_add(val2, GR_MAT_ENTRY(A, l, j2, sz), GR_MAT_ENTRY(B, j2 - 1, 0, sz), ctx);
+            status |= gr_addmul(GR_MAT_ENTRY(C, l, 0, sz), val1, val2, ctx);
+      
+            status |= gr_sub(val1, GR_MAT_ENTRY(A, l, j2 - 1, sz), GR_MAT_ENTRY(B, j2, 0, sz), ctx);
+            status |= gr_sub(val2, GR_MAT_ENTRY(A, l, j2, sz), GR_MAT_ENTRY(B, j2 - 1, 0, sz), ctx);
+            status |= gr_addmul(GR_ENTRY(Ccol, l, sz), val1, val2, ctx);
+        }
+
+        for (k = 1; k < p; k++)
+        {
+            for (l = 1; l < m; l++)
+            {
+                status |= gr_add(val1, GR_MAT_ENTRY(A, l, j2 - 1, sz), GR_MAT_ENTRY(B, j2, k, sz), ctx);
+                status |= gr_add(val2, GR_MAT_ENTRY(A, l, j2, sz), GR_MAT_ENTRY(B, j2 - 1, k, sz), ctx);
+                status |= gr_addmul(GR_MAT_ENTRY(C, l, k, sz), val1, val2, ctx);
+            }
+        }
+    }
+
+    for (l = 1; l < m; l++)
+    {
+        status |= gr_add(val1, GR_ENTRY(Ccol, l, sz), GR_MAT_ENTRY(C, l, 0, sz), ctx);
+        status |= gr_mul_2exp_si(GR_ENTRY(Ccol, l, sz), val1, -1, ctx); 
+        status |= gr_sub(GR_MAT_ENTRY(C, l, 0, sz), GR_MAT_ENTRY(C, l, 0, sz), GR_ENTRY(Ccol, l, sz), ctx);
+    }
+
+    status |= gr_add(val1, Crow, GR_MAT_ENTRY(C, 0, 0, sz), ctx);
+    status |= gr_mul_2exp_si(val0, val1, -1, ctx);
+    status |= gr_sub(GR_MAT_ENTRY(C, 0, 0, sz), GR_MAT_ENTRY(C, 0, 0, sz), val0, ctx);
+
+    for (k = 1; k < p; k++)
+    {
+        status |= gr_add(crow, GR_ENTRY(Crow, k, sz), GR_MAT_ENTRY(C, 0, k, sz), ctx);
+        status |= gr_mul_2exp_si(val1, crow, -1, ctx); 
+        status |= gr_sub(GR_MAT_ENTRY(C, 0, k, sz), GR_MAT_ENTRY(C, 0, k, sz), val1, ctx);
+        status |= gr_sub(crow, val1, val0, ctx);
+
+        for (l = 1; l < m; l++)
+        {
+            status |= gr_sub(val2, GR_MAT_ENTRY(C, l, k, sz), crow, ctx);
+            status |= gr_sub(GR_MAT_ENTRY(C, l, k, sz), val2, GR_ENTRY(Ccol, l, sz), ctx);
+        }
+    }
+
+    if ((n & 1) == 1)
+        for (l = 0; l < m; l++)
+            for (k = 0; k < p; k++)
+                status |= gr_addmul(GR_MAT_ENTRY(C, l, k, sz),
+                    GR_MAT_ENTRY(A, l, n - 1, sz), GR_MAT_ENTRY(B, n - 1, k, sz), ctx);
+
+    GR_TMP_CLEAR_VEC(tmp, p + m + 4, ctx);
+
+    return status;
+}
+
diff --git a/src/gr_mat/test/main.c b/src/gr_mat/test/main.c
index b4e197be07..058517cd98 100644
--- a/src/gr_mat/test/main.c
+++ b/src/gr_mat/test/main.c
@@ -35,6 +35,7 @@
 #include "t-lu_recursive.c"
 #include "t-minpoly_field.c"
 #include "t-mul_strassen.c"
+#include "t-mul_waksman.c"
 #include "t-nullspace.c"
 #include "t-properties.c"
 #include "t-randrank.c"
@@ -82,6 +83,7 @@ test_struct tests[] =
     TEST_FUNCTION(gr_mat_lu_recursive),
     TEST_FUNCTION(gr_mat_minpoly_field),
     TEST_FUNCTION(gr_mat_mul_strassen),
+    TEST_FUNCTION(gr_mat_mul_waksman),
     TEST_FUNCTION(gr_mat_nullspace),
     TEST_FUNCTION(gr_mat_properties),
     TEST_FUNCTION(gr_mat_randrank),
diff --git a/src/gr_mat/test/t-mul_waksman.c b/src/gr_mat/test/t-mul_waksman.c
new file mode 100644
index 0000000000..3e12b77e38
--- /dev/null
+++ b/src/gr_mat/test/t-mul_waksman.c
@@ -0,0 +1,97 @@
+/*
+    Copyright (C) 2022 Fredrik Johansson
+
+    This file is part of FLINT.
+
+    FLINT is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.  See <https://www.gnu.org/licenses/>.
+*/
+
+#include "test_helpers.h"
+#include "ulong_extras.h"
+#include "gr_mat.h"
+
+TEST_FUNCTION_START(gr_mat_mul_waksman, state)
+{
+    slong iter;
+
+    for (iter = 0; iter < 1000; iter++)
+    {
+        gr_ctx_t ctx;
+        gr_mat_t A, B, C, D;
+        slong a, b, c;
+        int status = GR_SUCCESS;
+        int can_div2;
+
+        if (n_randint(state, 2))
+        {
+            gr_ctx_init_fmpz(ctx);
+            can_div2 = 1;
+        }
+        else
+        {
+            ulong m = n_randtest_not_zero(state);
+            can_div2 = m % 2;
+            gr_ctx_init_nmod(ctx, m);
+        }
+
+        a = n_randint(state, 8);
+        b = n_randint(state, 2) ? a : n_randint(state, 8);
+        c = n_randint(state, 2) ? a : n_randint(state, 8);
+
+        gr_mat_init(A, a, b, ctx);
+        gr_mat_init(B, b, c, ctx);
+        gr_mat_init(C, a, c, ctx);
+        gr_mat_init(D, a, c, ctx);
+
+        status |= gr_mat_randtest(A, state, ctx);
+        status |= gr_mat_randtest(B, state, ctx);
+        status |= gr_mat_randtest(C, state, ctx);
+        status |= gr_mat_randtest(D, state, ctx);
+
+        if (a == b && b == c && n_randint(state, 2))
+        {
+            status |= gr_mat_set(B, A, ctx);
+            status |= gr_mat_mul_waksman(C, A, A, ctx);
+        }
+        else if (b == c && n_randint(state, 2))
+        {
+            status |= gr_mat_set(C, A, ctx);
+            status |= gr_mat_mul_waksman(C, C, B, ctx);
+        }
+        else if (a == b && n_randint(state, 2))
+        {
+            status |= gr_mat_set(C, B, ctx);
+            status |= gr_mat_mul_waksman(C, A, C, ctx);
+        }
+        else
+        {
+            status |= gr_mat_mul_waksman(C, A, B, ctx);
+        }
+
+        status |= gr_mat_mul_classical(D, A, B, ctx);
+
+        if ((can_div2 && (status != GR_SUCCESS || gr_mat_equal(C, D, ctx) != T_TRUE))
+            || (status == GR_SUCCESS && gr_mat_equal(C, D, ctx) != T_TRUE))
+        {
+            flint_printf("FAIL:\n");
+            gr_ctx_println(ctx);
+            flint_printf("A:\n"); gr_mat_print(A, ctx); flint_printf("\n\n");
+            flint_printf("B:\n"); gr_mat_print(B, ctx); flint_printf("\n\n");
+            flint_printf("C:\n"); gr_mat_print(C, ctx); flint_printf("\n\n");
+            flint_printf("D:\n"); gr_mat_print(D, ctx); flint_printf("\n\n");
+            flint_abort();
+        }
+
+        gr_mat_clear(A, ctx);
+        gr_mat_clear(B, ctx);
+        gr_mat_clear(C, ctx);
+        gr_mat_clear(D, ctx);
+
+        gr_ctx_clear(ctx);
+    }
+
+    TEST_FUNCTION_END(state);
+}