diff --git a/modules/RTNeural b/modules/RTNeural index 37fde444..1e7b5703 160000 --- a/modules/RTNeural +++ b/modules/RTNeural @@ -1 +1 @@ -Subproject commit 37fde44497f256e7b0836fa66b26efb575eee532 +Subproject commit 1e7b570348c01cd5e05c60082fc92d55d70186c2 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 53445bf1..4d0fbb55 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -191,8 +191,8 @@ foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx) target_link_libraries(${target} PRIVATE math_approx) endif() endforeach() -target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16) -target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32) +target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16 RTNEURAL_NAMESPACE=RTNeural_sse_arm) +target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32 RTNEURAL_NAMESPACE=RTNeural_avx) target_link_libraries(BYOD PRIVATE rnn_accelerated) # special flags for MSVC diff --git a/src/processors/drive/GuitarMLAmp.h b/src/processors/drive/GuitarMLAmp.h index a6511ac7..2d19e524 100644 --- a/src/processors/drive/GuitarMLAmp.h +++ b/src/processors/drive/GuitarMLAmp.h @@ -40,16 +40,13 @@ class GuitarMLAmp : public BaseProcessor double processSampleRate = 96000.0; std::shared_ptr customModelChooser; -#if JUCE_INTEL - template - using GuitarML_LSTM = EA::Variant< - rnn_sse::RNNAccelerated, - rnn_avx::RNNAccelerated>; -#else template - using GuitarML_LSTM = EA::Variant< - rnn_arm::RNNAccelerated>; + using GuitarML_LSTM = EA::Variant +#if JUCE_INTEL + , + rnn_avx::RNNAccelerated #endif + >; using LSTM40Cond = GuitarML_LSTM<2, 40>; using LSTM40NoCond = GuitarML_LSTM<1, 40>; diff --git a/src/processors/drive/neural_utils/RNNAccelerated.cpp b/src/processors/drive/neural_utils/RNNAccelerated.cpp index 7d1d11bf..cf2a9518 100644 --- a/src/processors/drive/neural_utils/RNNAccelerated.cpp +++ b/src/processors/drive/neural_utils/RNNAccelerated.cpp @@ -1,16 +1,5 @@ #include "RNNAccelerated.h" -#if __AVX__ -#define RTNeural RTNeural_avx -#define xsimd xsimd_avx -#elif __SSE__ -#define RTNeural RTNeural_sse -#define xsimd xsimd_sse -#else -#define RTNeural RTNeural_arm -#define xsimd xsimd_arm -#endif - #if __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshorten-64-to-32" @@ -45,18 +34,12 @@ struct ApproxMathsProvider #pragma GCC diagnostic pop #endif -#if (__aarch64__ || __arm__) -namespace rnn_arm -{ -#elif __AVX__ || (_MSC_VER && BYOD_COMPILING_WITH_AVX) +#if __AVX__ // Intel/AVX namespace rnn_avx -{ -#elif __SSE__ || (_MSC_VER && ! BYOD_COMPILING_WITH_AVX) -namespace rnn_sse -{ #else -#error "Unknown or un-supported platform!" +namespace rnn_sse_arm #endif +{ #if ! (XSIMD_WITH_NEON && BYOD_COMPILING_WITH_AVX) @@ -65,14 +48,14 @@ struct RNNAccelerated::Inter { using RecurrentLayerTypeComplete = std::conditional_t, - RTNeural::GRULayerT>; + RTNEURAL_NAMESPACE::LSTMLayerT, + RTNEURAL_NAMESPACE::GRULayerT>; #else - RTNeural::LSTMLayerT, - RTNeural::GRULayerT>; + RTNEURAL_NAMESPACE::LSTMLayerT, + RTNEURAL_NAMESPACE::GRULayerT>; #endif - using DenseLayerType = RTNeural::DenseT; - RTNeural::ModelT model; + using DenseLayerType = RTNEURAL_NAMESPACE::DenseT; + RTNEURAL_NAMESPACE::ModelT model; }; template @@ -98,7 +81,7 @@ void RNNAccelerated::initial template void RNNAccelerated::prepare ([[maybe_unused]] int rnnDelaySamples) { - if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::NoInterp) + if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp) { internal->model.template get<0>().prepare (rnnDelaySamples); internal->model.reset(); @@ -108,7 +91,7 @@ void RNNAccelerated::prepare template void RNNAccelerated::prepare ([[maybe_unused]] float rnnDelaySamples) { - if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::LinInterp) + if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp) { internal->model.template get<0>().prepare (rnnDelaySamples); internal->model.reset(); @@ -160,9 +143,9 @@ void RNNAccelerated::process } } -template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // MetalFace -template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // BassFace -template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond) -template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond) +template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // MetalFace +template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // BassFace +template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond) +template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond) #endif // NEON + AVX } diff --git a/src/processors/drive/neural_utils/RNNAccelerated.h b/src/processors/drive/neural_utils/RNNAccelerated.h index 2ca32ca3..f623994b 100644 --- a/src/processors/drive/neural_utils/RNNAccelerated.h +++ b/src/processors/drive/neural_utils/RNNAccelerated.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -10,8 +9,7 @@ constexpr int LSTMLayer = 1; constexpr int GRULayer = 2; } // namespace RecurrentLayerType -#if __aarch64__ || __arm__ -namespace rnn_arm +namespace rnn_sse_arm { template class RNNAccelerated @@ -42,41 +40,9 @@ class RNNAccelerated static constexpr size_t alignment = 16; alignas (alignment) char internal_data[max_model_size] {}; }; -} // namespace rnn_arm -#else // intel -namespace rnn_sse -{ -template -class RNNAccelerated -{ -public: - RNNAccelerated(); - ~RNNAccelerated(); - - RNNAccelerated (const RNNAccelerated&) = delete; - RNNAccelerated& operator= (const RNNAccelerated&) = delete; - RNNAccelerated (RNNAccelerated&&) noexcept = delete; - RNNAccelerated& operator= (RNNAccelerated&&) noexcept = delete; - - void initialise (const nlohmann::json& weights_json); - - void prepare (int rnnDelaySamples); - void prepare (float rnnDelaySamples); - void reset(); - - void process (std::span buffer, bool useResiduals = false) noexcept; - void process_conditioned (std::span buffer, std::span condition, bool useResiduals = false) noexcept; - -private: - struct Internal; - Internal* internal = nullptr; - - static constexpr size_t max_model_size = 30000; - static constexpr size_t alignment = 16; - alignas (alignment) char internal_data[max_model_size] {}; -}; -} // namespace rnn_sse +} // namespace rnn_sse_arm +#if __AVX__ // Intel/AVX namespace rnn_avx { template diff --git a/src/processors/drive/neural_utils/ResampledRNNAccelerated.h b/src/processors/drive/neural_utils/ResampledRNNAccelerated.h index 38aa344d..e7b9376e 100644 --- a/src/processors/drive/neural_utils/ResampledRNNAccelerated.h +++ b/src/processors/drive/neural_utils/ResampledRNNAccelerated.h @@ -60,13 +60,13 @@ class ResampledRNNAccelerated } private: + EA::Variant #if JUCE_INTEL - EA::Variant, - rnn_avx::RNNAccelerated> - model_variant; -#elif JUCE_ARM - EA::Variant> model_variant; + , + rnn_avx::RNNAccelerated #endif + > + model_variant; using ResamplerType = chowdsp::ResamplingTypes::LanczosResampler<8192, 8>; chowdsp::ResampledProcess resampler; diff --git a/src/processors/drive/neural_utils/model_loaders.h b/src/processors/drive/neural_utils/model_loaders.h index 8fcae920..508e28c2 100644 --- a/src/processors/drive/neural_utils/model_loaders.h +++ b/src/processors/drive/neural_utils/model_loaders.h @@ -1,5 +1,7 @@ #pragma once +#include + namespace model_loaders { using Vec2d = std::vector>; @@ -22,8 +24,8 @@ template void loadLSTMModel (ModelType& model, const nlohmann::json& weights_json) { const auto& state_dict = weights_json.at ("state_dict"); - RTNeural::torch_helpers::loadLSTM (state_dict, "rec.", model.template get<0>()); - RTNeural::torch_helpers::loadDense (state_dict, "lin.", model.template get<1>()); + RTNEURAL_NAMESPACE::torch_helpers::loadLSTM (state_dict, "rec.", model.template get<0>()); + RTNEURAL_NAMESPACE::torch_helpers::loadDense (state_dict, "lin.", model.template get<1>()); } template @@ -38,7 +40,7 @@ void loadGRUModel (ModelType& model, const nlohmann::json& weights_json) int layer_idx = 0; const auto& gru_weights = gru_layer_json["weights"]; - RTNeural::json_parser::loadGRU (gru, gru_weights); - RTNeural::modelt_detail::loadLayer (dense, layer_idx, dense_layer_json, "dense", 1, false); + RTNEURAL_NAMESPACE::json_parser::loadGRU (gru, gru_weights); + RTNEURAL_NAMESPACE::modelt_detail::loadLayer (dense, layer_idx, dense_layer_json, "dense", 1, false); } } // namespace model_loaders