Skip to content

Commit

Permalink
Simplifying RTNeural AVX code
Browse files Browse the repository at this point in the history
  • Loading branch information
jatinchowdhury18 committed Nov 25, 2023
1 parent 5cac170 commit 3e203a2
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 89 deletions.
2 changes: 1 addition & 1 deletion modules/RTNeural
Submodule RTNeural updated 61 files
+5 −0 RTNeural/CMakeLists.txt
+2 −2 RTNeural/Layer.h
+2 −2 RTNeural/Model.h
+2 −2 RTNeural/ModelT.h
+4 −4 RTNeural/RTNeural.cpp
+4 −0 RTNeural/RTNeural.h
+4 −4 RTNeural/activation/activation.h
+2 −2 RTNeural/activation/activation_eigen.h
+2 −2 RTNeural/activation/activation_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm.h
+1 −1 RTNeural/batchnorm/batchnorm.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d.h
+1 −1 RTNeural/batchnorm/batchnorm2d.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d_eigen.h
+1 −1 RTNeural/batchnorm/batchnorm2d_eigen.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm2d_xsimd.tpp
+1 −1 RTNeural/batchnorm/batchnorm_eigen.h
+1 −1 RTNeural/batchnorm/batchnorm_eigen.tpp
+1 −1 RTNeural/batchnorm/batchnorm_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm_xsimd.tpp
+8 −8 RTNeural/common.h
+2 −2 RTNeural/conv1d/conv1d.h
+2 −2 RTNeural/conv1d/conv1d.tpp
+2 −2 RTNeural/conv1d/conv1d_eigen.h
+2 −2 RTNeural/conv1d/conv1d_eigen.tpp
+2 −2 RTNeural/conv1d/conv1d_xsimd.h
+2 −2 RTNeural/conv1d/conv1d_xsimd.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless_eigen.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless_eigen.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless_xsimd.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless_xsimd.tpp
+1 −1 RTNeural/conv2d/conv2d.h
+2 −2 RTNeural/conv2d/conv2d.tpp
+1 −1 RTNeural/conv2d/conv2d_eigen.h
+2 −2 RTNeural/conv2d/conv2d_eigen.tpp
+1 −1 RTNeural/conv2d/conv2d_xsimd.h
+2 −2 RTNeural/conv2d/conv2d_xsimd.tpp
+2 −2 RTNeural/dense/dense.h
+2 −2 RTNeural/dense/dense_eigen.h
+2 −2 RTNeural/dense/dense_xsimd.h
+2 −2 RTNeural/gru/gru.h
+2 −2 RTNeural/gru/gru.tpp
+2 −2 RTNeural/gru/gru_eigen.h
+2 −2 RTNeural/gru/gru_eigen.tpp
+2 −2 RTNeural/gru/gru_xsimd.h
+2 −2 RTNeural/gru/gru_xsimd.tpp
+2 −2 RTNeural/lstm/lstm.h
+2 −2 RTNeural/lstm/lstm.tpp
+2 −2 RTNeural/lstm/lstm_eigen.h
+2 −2 RTNeural/lstm/lstm_eigen.tpp
+2 −2 RTNeural/lstm/lstm_xsimd.h
+2 −2 RTNeural/lstm/lstm_xsimd.tpp
+1 −1 RTNeural/maths/maths_eigen.h
+1 −1 RTNeural/maths/maths_stl.h
+1 −1 RTNeural/maths/maths_xsimd.h
+2 −2 RTNeural/model_loader.h
+1 −1 RTNeural/torch_helpers.h
+1 −1 examples/hello_rtneural/Makefile
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
target_link_libraries(${target} PRIVATE math_approx)
endif()
endforeach()
target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16)
target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32)
target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16 RTNEURAL_NAMESPACE=RTNeural_sse_arm)
target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32 RTNEURAL_NAMESPACE=RTNeural_avx)
target_link_libraries(BYOD PRIVATE rnn_accelerated)

# special flags for MSVC
Expand Down
13 changes: 5 additions & 8 deletions src/processors/drive/GuitarMLAmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,13 @@ class GuitarMLAmp : public BaseProcessor
double processSampleRate = 96000.0;
std::shared_ptr<FileChooser> customModelChooser;

#if JUCE_INTEL
template <int numIns, int hiddenSize>
using GuitarML_LSTM = EA::Variant<
rnn_sse::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>>;
#else
template <int numIns, int hiddenSize>
using GuitarML_LSTM = EA::Variant<
rnn_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>>;
using GuitarML_LSTM = EA::Variant<rnn_sse_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>
#if JUCE_INTEL
,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>
#endif
>;

using LSTM40Cond = GuitarML_LSTM<2, 40>;
using LSTM40NoCond = GuitarML_LSTM<1, 40>;
Expand Down
47 changes: 15 additions & 32 deletions src/processors/drive/neural_utils/RNNAccelerated.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,5 @@
#include "RNNAccelerated.h"

#if __AVX__
#define RTNeural RTNeural_avx
#define xsimd xsimd_avx
#elif __SSE__
#define RTNeural RTNeural_sse
#define xsimd xsimd_sse
#else
#define RTNeural RTNeural_arm
#define xsimd xsimd_arm
#endif

#if __clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
Expand Down Expand Up @@ -45,18 +34,12 @@ struct ApproxMathsProvider
#pragma GCC diagnostic pop
#endif

#if (__aarch64__ || __arm__)
namespace rnn_arm
{
#elif __AVX__ || (_MSC_VER && BYOD_COMPILING_WITH_AVX)
#if __AVX__ // Intel/AVX
namespace rnn_avx
{
#elif __SSE__ || (_MSC_VER && ! BYOD_COMPILING_WITH_AVX)
namespace rnn_sse
{
#else
#error "Unknown or un-supported platform!"
namespace rnn_sse_arm
#endif
{

#if ! (XSIMD_WITH_NEON && BYOD_COMPILING_WITH_AVX)

Expand All @@ -65,14 +48,14 @@ struct RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::Inter
{
using RecurrentLayerTypeComplete = std::conditional_t<RecurrentLayerType == RecurrentLayerType::LSTMLayer,
#if RTNEURAL_USE_MATH_APPROX
RTNeural::LSTMLayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>,
RTNeural::GRULayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>>;
RTNEURAL_NAMESPACE::LSTMLayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>,
RTNEURAL_NAMESPACE::GRULayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>>;
#else
RTNeural::LSTMLayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode>,
RTNeural::GRULayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode>>;
RTNEURAL_NAMESPACE::LSTMLayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode>,
RTNEURAL_NAMESPACE::GRULayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode>>;
#endif
using DenseLayerType = RTNeural::DenseT<float, hiddenSize, 1>;
RTNeural::ModelT<float, inputSize, 1, RecurrentLayerTypeComplete, DenseLayerType> model;
using DenseLayerType = RTNEURAL_NAMESPACE::DenseT<float, hiddenSize, 1>;
RTNEURAL_NAMESPACE::ModelT<float, inputSize, 1, RecurrentLayerTypeComplete, DenseLayerType> model;
};

template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
Expand All @@ -98,7 +81,7 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::initial
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare ([[maybe_unused]] int rnnDelaySamples)
{
if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::NoInterp)
if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp)
{
internal->model.template get<0>().prepare (rnnDelaySamples);
internal->model.reset();
Expand All @@ -108,7 +91,7 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare ([[maybe_unused]] float rnnDelaySamples)
{
if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::LinInterp)
if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp)
{
internal->model.template get<0>().prepare (rnnDelaySamples);
internal->model.reset();
Expand Down Expand Up @@ -160,9 +143,9 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::process
}
}

template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // MetalFace
template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // BassFace
template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond)
template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond)
template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // MetalFace
template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // BassFace
template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond)
template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond)
#endif // NEON + AVX
}
40 changes: 3 additions & 37 deletions src/processors/drive/neural_utils/RNNAccelerated.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include <memory>
#include <modules/json/json.hpp>
#include <span>

Expand All @@ -10,8 +9,7 @@ constexpr int LSTMLayer = 1;
constexpr int GRULayer = 2;
} // namespace RecurrentLayerType

#if __aarch64__ || __arm__
namespace rnn_arm
namespace rnn_sse_arm
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
class RNNAccelerated
Expand Down Expand Up @@ -42,41 +40,9 @@ class RNNAccelerated
static constexpr size_t alignment = 16;
alignas (alignment) char internal_data[max_model_size] {};
};
} // namespace rnn_arm
#else // intel
namespace rnn_sse
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
class RNNAccelerated
{
public:
RNNAccelerated();
~RNNAccelerated();

RNNAccelerated (const RNNAccelerated&) = delete;
RNNAccelerated& operator= (const RNNAccelerated&) = delete;
RNNAccelerated (RNNAccelerated&&) noexcept = delete;
RNNAccelerated& operator= (RNNAccelerated&&) noexcept = delete;

void initialise (const nlohmann::json& weights_json);

void prepare (int rnnDelaySamples);
void prepare (float rnnDelaySamples);
void reset();

void process (std::span<float> buffer, bool useResiduals = false) noexcept;
void process_conditioned (std::span<float> buffer, std::span<const float> condition, bool useResiduals = false) noexcept;

private:
struct Internal;
Internal* internal = nullptr;

static constexpr size_t max_model_size = 30000;
static constexpr size_t alignment = 16;
alignas (alignment) char internal_data[max_model_size] {};
};
} // namespace rnn_sse
} // namespace rnn_sse_arm

#if __AVX__ // Intel/AVX
namespace rnn_avx
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
Expand Down
10 changes: 5 additions & 5 deletions src/processors/drive/neural_utils/ResampledRNNAccelerated.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ class ResampledRNNAccelerated
}

private:
EA::Variant<rnn_sse_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>
#if JUCE_INTEL
EA::Variant<rnn_sse::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>>
model_variant;
#elif JUCE_ARM
EA::Variant<rnn_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>> model_variant;
,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>
#endif
>
model_variant;

using ResamplerType = chowdsp::ResamplingTypes::LanczosResampler<8192, 8>;
chowdsp::ResampledProcess<ResamplerType> resampler;
Expand Down
10 changes: 6 additions & 4 deletions src/processors/drive/neural_utils/model_loaders.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <RTNeural/RTNeural.h>

namespace model_loaders
{
using Vec2d = std::vector<std::vector<float>>;
Expand All @@ -22,8 +24,8 @@ template <typename ModelType>
void loadLSTMModel (ModelType& model, const nlohmann::json& weights_json)
{
const auto& state_dict = weights_json.at ("state_dict");
RTNeural::torch_helpers::loadLSTM<float> (state_dict, "rec.", model.template get<0>());
RTNeural::torch_helpers::loadDense<float> (state_dict, "lin.", model.template get<1>());
RTNEURAL_NAMESPACE::torch_helpers::loadLSTM<float> (state_dict, "rec.", model.template get<0>());
RTNEURAL_NAMESPACE::torch_helpers::loadDense<float> (state_dict, "lin.", model.template get<1>());
}

template <typename ModelType>
Expand All @@ -38,7 +40,7 @@ void loadGRUModel (ModelType& model, const nlohmann::json& weights_json)

int layer_idx = 0;
const auto& gru_weights = gru_layer_json["weights"];
RTNeural::json_parser::loadGRU<float> (gru, gru_weights);
RTNeural::modelt_detail::loadLayer<float> (dense, layer_idx, dense_layer_json, "dense", 1, false);
RTNEURAL_NAMESPACE::json_parser::loadGRU<float> (gru, gru_weights);
RTNEURAL_NAMESPACE::modelt_detail::loadLayer<float> (dense, layer_idx, dense_layer_json, "dense", 1, false);
}
} // namespace model_loaders

0 comments on commit 3e203a2

Please sign in to comment.