From dffcd9bdc362c613a4c1fd37c72de34c39e396fa Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Tue, 5 Nov 2024 15:34:05 +0330 Subject: [PATCH 01/13] Make KeepConstPrecision attribute copyable --- ...dequantization_subgraph_transformation.cpp | 32 +++++++++++++++++++ .../rt_info/keep_const_precision.hpp | 4 --- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index f68b7ba43b7c9f..2ef4611f2341a4 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -11,9 +11,41 @@ #include "transformations/rt_info/keep_const_precision.hpp" #include "common_test_utils/ov_test_utils.hpp" +#include "transformations/convert_precision.hpp" using namespace ov; +TEST_F(TransformationTestsF, KeepConstPrecision) { + { + auto lp_const = std::make_shared(element::u4, Shape{27}, 1); + + const auto target_shape = std::make_shared(ov::element::i64, ov::Shape{3}, 3); + auto reshape = std::make_shared(lp_const, target_shape, false); + + auto second_convert = std::make_shared(reshape, element::f32); + auto zero_point = opset10::Constant::create(element::f32, Shape{}, {127}); + auto subtract = std::make_shared(second_convert, zero_point); + auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2}); + auto multiply = std::make_shared(subtract, scale); + auto stub_op = std::make_shared(multiply); + model = std::make_shared(stub_op, ParameterVector{}); + } + manager.register_pass(element::TypeVector{element::u4}); + manager.register_pass(); + manager.register_pass(ov::element::u4, ov::element::u8, type_to_fuse_map{}, false, false); + manager.register_pass("keep_const_precision.xml", "keep_const_precision.bin"); + { + auto lp_const = std::make_shared(element::u4, Shape{3, 3, 3}, 1); + auto second_convert = std::make_shared(lp_const, element::f32); + auto zero_point = opset10::Constant::create(element::f32, Shape{}, {127}); + auto subtract = std::make_shared(second_convert, zero_point); + auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2}); + auto multiply = std::make_shared(subtract, scale); + auto stub_op = std::make_shared(multiply); + model_ref = std::make_shared(stub_op, ParameterVector{}); + } +} + TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { // Input graph: // diff --git a/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp b/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp index 46c8bf00deb3f2..3a7e106cebbc27 100644 --- a/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp +++ b/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp @@ -26,10 +26,6 @@ class TRANSFORMATIONS_API KeepConstPrecision : public RuntimeAttribute { OPENVINO_RTTI("keep_const_precision", "0"); KeepConstPrecision() = default; - - bool is_copyable() const override { - return false; - } }; } // namespace ov From 7555a9e5f12526bc047fccc60cf3c36535f74162 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Wed, 13 Nov 2024 15:21:47 +0330 Subject: [PATCH 02/13] update mark dequantization transformation --- .../assets/snippets/lpt_intel_cpu_plugin.cpp | 2 +- ...dequantization_subgraph_transformation.cpp | 38 ++-- .../mark_dequantization_subgraph.hpp | 30 ++- .../common_optimizations.cpp | 1 - .../moc_transformations.cpp | 5 +- .../mark_dequantization_subgraph.cpp | 212 ++++++++++++------ .../tests/op_conversions/convert_subtract.cpp | 2 +- .../frontend/src/op/dequantize_linear.cpp | 5 +- .../transformation_pipeline.cpp | 6 +- .../src/plugin/transformations_pipeline.cpp | 10 +- 10 files changed, 189 insertions(+), 122 deletions(-) diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp index d9e41bc77eec17..662d32be4a10fb 100644 --- a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp +++ b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp @@ -38,7 +38,7 @@ auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector{}; if (useLpt) { // disable constant folding on dequantization subgraphs so they can be processed by LPT - manager.register_pass(defaultPrecisions); + manager.register_pass(defaultPrecisions); } // OpenVINO common transformations happen here diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index 2ef4611f2341a4..89fb71f1a8f0f1 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -30,10 +30,10 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { auto stub_op = std::make_shared(multiply); model = std::make_shared(stub_op, ParameterVector{}); } - manager.register_pass(element::TypeVector{element::u4}); - manager.register_pass(); + + manager.register_pass(element::TypeVector{element::u4}); manager.register_pass(ov::element::u4, ov::element::u8, type_to_fuse_map{}, false, false); - manager.register_pass("keep_const_precision.xml", "keep_const_precision.bin"); + { auto lp_const = std::make_shared(element::u4, Shape{3, 3, 3}, 1); auto second_convert = std::make_shared(lp_const, element::f32); @@ -46,7 +46,7 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { } } -TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { +TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { // Input graph: // // Parameter @@ -69,7 +69,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { // \ / // Convolution // - // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph + // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // All 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero points are marked with 'KeepConstPrecision' attribute @@ -114,7 +114,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -170,7 +170,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) { comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint) { +TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPoint) { // Input graph: // // Parameter @@ -190,7 +190,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint // \ / // Convolution // - // After MarkDequantizationSubgraph all Multiply nodes from above graph + // After MarkDequantizationAndDecompression all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -229,7 +229,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -274,7 +274,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPointFP16) { +TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPointFP16) { // Input graph: // // Parameter @@ -294,7 +294,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint // \ / // Convolution // - // After MarkDequantizationSubgraph all Multiply nodes from above graph + // After MarkDequantizationAndDecompression all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -337,9 +337,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); - manager.register_pass(); - manager.register_pass(); + manager.register_pass(element::TypeVector{element::u8, element::i8}); { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -387,7 +385,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstantWeights) { +TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNotConstantWeights) { // Input graph: // // Parameter @@ -410,7 +408,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant // \ / // Convolution // - // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph + // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero point nodes are marked with 'KeepConstPrecision' attribute @@ -458,7 +456,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -513,7 +511,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubConst) { +TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFoldSubConst) { // Input graph: After transformation: // // Constant Constant Constant @@ -527,7 +525,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons // | / \ / // Multiply Multiply // - // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph + // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' node before weights is marked with 'DisableConstantFolding' attribute // but Convert before Dequantization Sub const isn't because fold_subtract_const is set to true @@ -544,7 +542,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons model = std::make_shared(ov::OutputVector{multiply}); } - manager.register_pass(element::TypeVector{element::u8}, true); + manager.register_pass(element::TypeVector{element::u8}, true); manager.register_pass(); { diff --git a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp index 8b9b9e573ba957..7770647d736e67 100644 --- a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp +++ b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp @@ -4,6 +4,8 @@ #pragma once +#include + #include "openvino/pass/matcher_pass.hpp" #include "transformations_visibility.hpp" @@ -12,19 +14,25 @@ namespace pass { /** * @ingroup ov_transformation_common_api - * @brief MarkDequantizationSubgraph marks dequantization subgraph, that is: - * Convert->Subtract(optional)->Multiply - * in two ways: - * - first Convert is marked with DisableConstantFolding attribute, also if Subtract is present - * and its second input is a Convert - that Convert is marked with DisableConstantFolding as well, - * - Subtract and Multiply are marked with 'DequantizationNode' attribute + * @brief TBA */ -class TRANSFORMATIONS_API MarkDequantizationSubgraph : public MatcherPass { +class TRANSFORMATIONS_API MarkDequantizationAndDecompression : public ModelPass { public: - OPENVINO_RTTI("MarkDequantizationSubgraph", "0"); - MarkDequantizationSubgraph(const element::TypeVector& precisions, - const bool fold_subtract_const = false, - const bool disable_fold_multiply_const = false); + OPENVINO_RTTI("MarkDequantizationAndDecompression", "0"); + explicit MarkDequantizationAndDecompression(element::TypeVector precisions, + const bool fold_subtract_const = false, + const bool fold_multiply_const = true) + : m_fold_subtract_const(fold_subtract_const), + m_fold_multiply_const(fold_multiply_const), + m_precisions(std::move(precisions)) {} + + bool run_on_model(const std::shared_ptr& m) override; + +private: + bool m_fold_subtract_const = false; + bool m_fold_multiply_const = true; + element::TypeVector m_precisions; }; + } // namespace pass } // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 37ee2d12d9aebb..fea833171fb1ee 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -128,7 +128,6 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr manager.set_per_pass_validation(false); using namespace ov::pass; REGISTER_PASS(manager, InitNodeInfo) + REGISTER_PASS(manager, EliminateConvert) if (m_low_precision_enabled) { - manager.register_pass( + manager.register_pass( element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); } if (!m_use_shapes) { @@ -142,8 +143,6 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr // Zero dimensions in shape causes creation empty tensors, which are incorrect during CF. // In particular, if zero dim tensor is consumed in body of MultiSubGraphOp // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults should be called together. - using namespace ov::pass; - REGISTER_PASS(manager, EliminateConvert) REGISTER_PASS(manager, EliminateScatterUpdate) REGISTER_PASS(manager, RemoveConcatZeroDimInput) REGISTER_PASS(manager, EliminateLoopInputsOutputs); diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 9fdb17804409a9..ceabc62de459a0 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -6,16 +6,67 @@ #include "openvino/op/multiply.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/rt_info/dequantization_node.hpp" #include "transformations/rt_info/disable_constant_folding.hpp" #include "transformations/rt_info/keep_const_precision.hpp" #include "transformations/utils/utils.hpp" -ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::TypeVector& precisions, - const bool fold_subtract_const, - const bool disable_fold_multiply_const) { +using namespace ov; +using namespace ov::op; +using namespace ov::pass::pattern; + +namespace { +/** + * @ingroup ov_transformation_common_api + * @brief TBA + */ +class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("MarkDQ", "0"); + explicit MarkDequantization(const element::TypeVector& precisions, + bool fold_subtract_const = false, + bool fold_multiply_const = true); +}; + +/** + * @ingroup ov_transformation_common_api + * @brief TBA + */ +class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("KeepConstsPrecision", "0"); + explicit KeepConstsPrecision(const element::TypeVector& precisions); +}; + +bool check_precision(const ov::element::Type_t type_to_check, const ov::element::TypeVector& precisions) { + return std::find(precisions.begin(), precisions.end(), type_to_check) != precisions.end(); +}; + +using RTInfoSetter = std::function& node)>; +void set_rt_info(const PatternValueMap& pt_map, + const RTInfoSetter& rt_info_setter, + const NodeVector& pattern_nodes, + const ov::element::TypeVector& precisions) { + for (const auto& pattern_node : pattern_nodes) { + if (pt_map.count(pattern_node)) { + auto node = pt_map.at(pattern_node).get_node_shared_ptr(); + if (ov::as_type_ptr(node) && !check_precision(node->get_input_element_type(0), precisions)) { + continue; + } + rt_info_setter(node); + } + } +}; +} // namespace + +MarkDequantization::MarkDequantization(const element::TypeVector& precisions, + const bool fold_subtract_const, + const bool fold_multiply_const) { // Dequantization subgraph may have two forms: with and without Subtract // // Input Input @@ -26,93 +77,106 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element:: // \ / // Multiply // - auto input_pattern = pattern::any_input(); - auto convert_pattern = pattern::wrap_type({input_pattern}, pattern::consumers_count(1)); - auto zero_point_pattern = pattern::any_input(); - auto subtract_pattern = pattern::wrap_type({convert_pattern, zero_point_pattern}); - auto multiply_pattern = pattern::wrap_type({subtract_pattern, pattern::any_input()}); - auto multiply_no_subtract_pattern = - pattern::wrap_type({convert_pattern, pattern::any_input()}); - auto root = std::make_shared(OutputVector{multiply_pattern, multiply_no_subtract_pattern}); - - ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) -> bool { - const auto& pattern_map = m.get_pattern_value_map(); - auto convert = pattern_map.at(convert_pattern).get_node_shared_ptr(); - auto input = pattern_map.at(input_pattern); + auto input_pattern = any_input(); + auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); + + // zero points: + auto zp_pattern = any_input(); + auto zp_convert_pattern = optional(zp_pattern); + auto subtract_pattern = optional({convert_pattern, zp_convert_pattern}); + + // scale: + auto scale_pattern = any_input(); + auto scale_convert_pattern = optional(scale_pattern); + auto multiply_pattern = wrap_type({subtract_pattern, scale_convert_pattern}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) -> bool { + const auto& pt_map = m.get_pattern_value_map(); + auto convert = pt_map.at(convert_pattern); const auto multiply = m.get_match_root(); if (transformation_callback(multiply)) { return false; } - auto subtract_it = pattern_map.find(subtract_pattern); - if (subtract_it == pattern_map.end()) { - for (size_t i = 0; i < multiply->get_input_size(); i++) { - const auto node = ov::as_type_ptr(multiply->get_input_node_shared_ptr(i)); - if (node && std::find(precisions.begin(), precisions.end(), node->get_input_element_type(0)) != - precisions.end()) { - convert = node; - input = convert->input_value(0); - } - } - } + // Multiply and Subtract have to be marked as dq + set_rt_info(pt_map, mark_as_dequantization_node, {subtract_pattern, multiply_pattern}, {/* not applicable */}); - const auto& input_precision = input.get_element_type(); - // validation by Convert operation input precisions - if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) { - return false; + // Convert might be presented on scales, zp and data_input. + // Depending on the transformation arguments they have to be marked/unmarked with disable_cf rt_info. + NodeVector converts_to_mark = {convert_pattern}; + NodeVector converts_to_unmark = {}; + if (fold_subtract_const) { + converts_to_unmark.push_back(zp_convert_pattern); + } else { + converts_to_mark.push_back(zp_convert_pattern); } - if (ov::op::util::is_on_constant_path(input)) { - // disable ConstantFolding if dequantization subgraph is on constant data - ov::disable_constant_folding(convert); - // It is also necessary to avoid precision conversion for constant nodes with input_precision - auto keep_const_precision = [&](Node* node) { - if (auto constant = ov::as_type(node)) { - const auto& const_et = constant->get_element_type(); - if (std::find(precisions.begin(), precisions.end(), const_et) != precisions.end()) - ov::enable_keep_const_precision(convert->get_input_node_shared_ptr(0)); - } - }; - std::unordered_set visited; - ov::op::util::visit_constant_path(input.get_node(), visited, keep_const_precision); + if (fold_multiply_const) { + converts_to_unmark.push_back(scale_convert_pattern); + } else { + converts_to_mark.push_back(scale_convert_pattern); } - if (subtract_it != pattern_map.end()) { - // mark Subtract as dequantization node - ov::mark_as_dequantization_node(subtract_it->second.get_node_shared_ptr()); - auto zero_point = pattern_map.at(zero_point_pattern).get_node_shared_ptr(); - if (ov::is_type(zero_point) && - input_precision == zero_point->get_input_element_type(0) && - ov::is_type(zero_point->get_input_node_ptr(0))) { - if (!fold_subtract_const) { - // disable ConstantFolding also for Convert on zero_point - // so we don't have to constantfold it and then convert it back to - // low precision in LP transformations - ov::disable_constant_folding(zero_point); - ov::enable_keep_const_precision(zero_point->get_input_node_shared_ptr(0)); - } else { - ov::enable_constant_folding(zero_point); - ov::disable_keep_const_precision(zero_point->get_input_node_shared_ptr(0)); - } - } - } + set_rt_info(pt_map, disable_constant_folding, converts_to_mark, precisions); + set_rt_info(pt_map, enable_constant_folding, converts_to_unmark, precisions); + return false; + }; + + auto m = std::make_shared(multiply_pattern, "MarkDQ"); + this->register_matcher(m, callback); +} - // mark Multiply as dequantization node - ov::mark_as_dequantization_node(multiply); - auto scale = multiply->get_input_node_shared_ptr(1); - if (ov::is_type(scale) && - ov::is_type(scale->get_input_node_ptr(0))) { - if (disable_fold_multiply_const) { - ov::disable_constant_folding(scale); - ov::unmark_as_decompression(scale); - ov::enable_keep_const_precision(scale->get_input_node_shared_ptr(0)); +KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions) { + // Dequantization subgraph may have two forms: with and without Subtract + // + // Input Input + // | | + // Convert zero point OR Convert scale + // \ / \ / + // Subtract scale Multiply + // \ / + // Multiply + // + auto input_pattern = any_input(); + auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); + + // zero points: + auto zp_pattern = any_input(); + auto zp_convert_pattern = optional(zp_pattern); + auto subtract_pattern = optional({convert_pattern, zp_convert_pattern}); + + // scale: + auto scale_pattern = any_input(); + auto scale_convert_pattern = optional(scale_pattern); + auto multiply_pattern = wrap_type({subtract_pattern, scale_convert_pattern}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) -> bool { + const auto& pt_map = m.get_pattern_value_map(); + + NodeVector keep_const_precisions = {}; + for (const auto& pattern_node : {input_pattern, zp_pattern, scale_pattern}) { + if (pt_map.count(pattern_node)) { + auto node = pt_map.at(pattern_node).get_node_shared_ptr(); + if (ov::as_type_ptr(node)) { + ov::enable_keep_const_precision(node); + } } } - return false; }; - auto m = std::make_shared(root, "MarkDequantizationSubgraph"); + auto m = std::make_shared(multiply_pattern, "KeepConstsPrecision"); this->register_matcher(m, callback); } + +bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_ptr& m) { + ov::pass::Manager manager("MarkDequantizationAndDecompressionManager"); + manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(m_precisions); + manager.run_passes(m); + + return false; +} diff --git a/src/common/transformations/tests/op_conversions/convert_subtract.cpp b/src/common/transformations/tests/op_conversions/convert_subtract.cpp index fb835d0cdb581e..93b5c223345d03 100644 --- a/src/common/transformations/tests/op_conversions/convert_subtract.cpp +++ b/src/common/transformations/tests/op_conversions/convert_subtract.cpp @@ -77,7 +77,7 @@ TEST_F(TransformationTestsF, ConvertSubtractDequantizationSubgraph) { model = std::make_shared(mul, ParameterVector{data}); - manager.register_pass(element::TypeVector{element::u8}); + manager.register_pass(element::TypeVector{element::u8}); manager.register_pass(); } diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index 47fcc7af60bf61..e3d3f4f1235504 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -231,6 +231,7 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { src_x.get_shape()[0] % block_size == 0, "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); + // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] ov::Output broadcastable_x = op::util::reshape( src_x, Shape{static_cast(src_x.get_shape()[0]) / block_size, block_size, src_x.get_shape()[1]}); @@ -240,16 +241,14 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { const auto scale_type = scale.get_element_type(); if (inputs.size() > 2) { zp = inputs[2]; + zp = std::make_shared(zp, unsqueezed_axes); if (zp.get_element_type() != scale.get_element_type()) { zp = std::make_shared(zp, scale_type); - disable_constant_folding(zp.get_node_shared_ptr()); } - zp = std::make_shared(zp, unsqueezed_axes); } const auto& x = src_x.get_element_type() == scale_type ? broadcastable_x : std::make_shared(broadcastable_x, scale_type); - // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] // Adding additional dimension for broadcasting scale = std::make_shared(scale, unsqueezed_axes); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 9dd1da2d471e5a..2e79d066b069ff 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -342,10 +342,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::element::i4, ov::element::nf4, ov::element::f4e2m1}; - CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, false, true); + CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationAndDecompression, decompression_precisions, false, true); CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool { return !is_decompression_multiply(node); - }, ov::pass::MarkDequantizationSubgraph); + }, ov::pass::MarkDequantizationAndDecompression); CPU_SET_CALLBACK_COMMON( decompression_handling_manager, @@ -371,7 +371,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::Manager manager("Plugin:CPU"); manager.set_per_pass_validation(false); if (useLpt) - CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationAndDecompression, defaultPrecisions); auto get_convert_precisions = [&]() { precisions_map map = { diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 158dee2ee7ac05..cf54b75f2630e0 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -249,7 +249,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; if (enableInt8) { - manager.register_pass( + manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); } @@ -329,11 +329,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // Disable subtract folding only for the dGPUs to meet the requirements of oneDNN: // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) - manager.register_pass(supported_woq_types, !device_info.supports_immad); + manager.register_pass(supported_woq_types, !device_info.supports_immad); // Need to check if transformations work correctly for mixed models with both compression and quantization at the same time. if (!is_model_quantized) { - pass_config->set_callback([&](const std::shared_ptr node) { + pass_config->set_callback([&](const std::shared_ptr node) { return !is_decompression_multiply(node); }); } @@ -839,8 +839,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); // ZP should not be folded for FC. But still, ZP should be folded for Gather. - // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. - manager.register_pass(supported_woq_types, true); + // Therefore, run MarkDequantizationAndDecompression again to fold ZP constant. + manager.register_pass(supported_woq_types, true); if (device_info.supports_immad) { if (disable_horizontal_fc_fusion) manager.register_pass(); From 5f237eb0969cb36dfd0185ecc539c99a16468542 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Wed, 13 Nov 2024 16:09:29 +0330 Subject: [PATCH 03/13] add transformation callback --- .../low_precision/mark_dequantization_subgraph.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index ceabc62de459a0..95d860d8ac126d 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -153,6 +153,11 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions) ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) -> bool { const auto& pt_map = m.get_pattern_value_map(); + const auto multiply = m.get_match_root(); + + if (transformation_callback(multiply)) { + return false; + } NodeVector keep_const_precisions = {}; for (const auto& pattern_node : {input_pattern, zp_pattern, scale_pattern}) { From 0dfabd9201a7d0253664aaab9948b62e71af2096 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Wed, 13 Nov 2024 16:25:40 +0330 Subject: [PATCH 04/13] try to fix a warning --- .../low_precision/mark_dequantization_subgraph.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 95d860d8ac126d..19e5d0bca991f0 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -20,14 +20,13 @@ using namespace ov; using namespace ov::op; using namespace ov::pass::pattern; -namespace { /** * @ingroup ov_transformation_common_api * @brief TBA */ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkDQ", "0"); + OPENVINO_RTTI("MarkDequantization", "0"); explicit MarkDequantization(const element::TypeVector& precisions, bool fold_subtract_const = false, bool fold_multiply_const = true); @@ -43,6 +42,8 @@ class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { explicit KeepConstsPrecision(const element::TypeVector& precisions); }; +namespace { + bool check_precision(const ov::element::Type_t type_to_check, const ov::element::TypeVector& precisions) { return std::find(precisions.begin(), precisions.end(), type_to_check) != precisions.end(); }; From 865fd4cd02fbcc26f57a53271446975704319c94 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Wed, 13 Nov 2024 16:40:29 +0330 Subject: [PATCH 05/13] revert KeepConstPrecision change --- .../include/transformations/rt_info/keep_const_precision.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp b/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp index 3a7e106cebbc27..46c8bf00deb3f2 100644 --- a/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp +++ b/src/common/transformations/include/transformations/rt_info/keep_const_precision.hpp @@ -26,6 +26,10 @@ class TRANSFORMATIONS_API KeepConstPrecision : public RuntimeAttribute { OPENVINO_RTTI("keep_const_precision", "0"); KeepConstPrecision() = default; + + bool is_copyable() const override { + return false; + } }; } // namespace ov From fa6b0ec1715d9f09ef17a9ac1a052e4c20781288 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Fri, 15 Nov 2024 10:51:51 +0400 Subject: [PATCH 06/13] align the current behavior with the previous implementation --- .../mark_dequantization_subgraph.cpp | 68 +++++++++++++++---- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 19e5d0bca991f0..71cbba76fd8353 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -5,7 +5,9 @@ #include "transformations/low_precision/mark_dequantization_subgraph.hpp" #include "openvino/op/multiply.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/subtract.hpp" +#include "openvino/op/unsqueeze.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/optional.hpp" #include "openvino/pass/pattern/op/or.hpp" @@ -28,8 +30,8 @@ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: OPENVINO_RTTI("MarkDequantization", "0"); explicit MarkDequantization(const element::TypeVector& precisions, - bool fold_subtract_const = false, - bool fold_multiply_const = true); + bool fold_subtract_const, + bool fold_multiply_const); }; /** @@ -39,7 +41,9 @@ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { public: OPENVINO_RTTI("KeepConstsPrecision", "0"); - explicit KeepConstsPrecision(const element::TypeVector& precisions); + explicit KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const, + bool fold_multiply_const); }; namespace { @@ -63,6 +67,23 @@ void set_rt_info(const PatternValueMap& pt_map, } } }; + +void swap_nodes(const PatternValueMap& pt_map, + const std::shared_ptr& first, + const std::shared_ptr& second) { + if (pt_map.count(first) && pt_map.count(second)) { + auto first_node = pt_map.at(first).get_node_shared_ptr(); + auto second_node = pt_map.at(second).get_node_shared_ptr(); + + auto target_inputs = second_node->output(0).get_target_inputs(); + second_node->input(0).replace_source_output(first_node->input_value(0)); + first_node->input(0).replace_source_output(second_node->output(0)); + for (const auto& in : target_inputs) { + in.replace_source_output(first_node->output(0)); + } + } +} + } // namespace MarkDequantization::MarkDequantization(const element::TypeVector& precisions, @@ -84,16 +105,19 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, // zero points: auto zp_pattern = any_input(); auto zp_convert_pattern = optional(zp_pattern); - auto subtract_pattern = optional({convert_pattern, zp_convert_pattern}); + auto zp_reshape_pattern = optional({zp_convert_pattern, any_input()}); + auto subtract_pattern = optional({convert_pattern, zp_reshape_pattern}); // scale: auto scale_pattern = any_input(); auto scale_convert_pattern = optional(scale_pattern); - auto multiply_pattern = wrap_type({subtract_pattern, scale_convert_pattern}); + auto scale_reshape_pattern = optional({scale_convert_pattern, any_input()}); + auto multiply_pattern = wrap_type({subtract_pattern, scale_reshape_pattern}); ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](Matcher& m) -> bool { const auto& pt_map = m.get_pattern_value_map(); auto convert = pt_map.at(convert_pattern); + auto input = pt_map.at(input_pattern); const auto multiply = m.get_match_root(); if (transformation_callback(multiply)) { @@ -107,7 +131,9 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, // Depending on the transformation arguments they have to be marked/unmarked with disable_cf rt_info. NodeVector converts_to_mark = {convert_pattern}; NodeVector converts_to_unmark = {}; - if (fold_subtract_const) { + + if (fold_subtract_const || + (pt_map.count(subtract_pattern) && pt_map.at(zp_pattern).get_element_type() != input.get_element_type())) { converts_to_unmark.push_back(zp_convert_pattern); } else { converts_to_mark.push_back(zp_convert_pattern); @@ -121,14 +147,20 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, set_rt_info(pt_map, disable_constant_folding, converts_to_mark, precisions); set_rt_info(pt_map, enable_constant_folding, converts_to_unmark, precisions); + + // Move Reshape/Unsqueeze ops up to fold them in ConstantFolding. + swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); + swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); return false; }; - auto m = std::make_shared(multiply_pattern, "MarkDQ"); + auto m = std::make_shared(multiply_pattern, "MarkDequantization"); this->register_matcher(m, callback); } -KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions) { +KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const, + bool fold_multiply_const) { // Dequantization subgraph may have two forms: with and without Subtract // // Input Input @@ -160,12 +192,18 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions) return false; } - NodeVector keep_const_precisions = {}; - for (const auto& pattern_node : {input_pattern, zp_pattern, scale_pattern}) { - if (pt_map.count(pattern_node)) { - auto node = pt_map.at(pattern_node).get_node_shared_ptr(); + std::map, bool> keep_const_precisions = {{input_pattern, false}, + {zp_pattern, fold_subtract_const}, + {scale_pattern, fold_multiply_const}}; + for (const auto& pattern_node : keep_const_precisions) { + if (pt_map.count(pattern_node.first)) { + auto node = pt_map.at(pattern_node.first).get_node_shared_ptr(); if (ov::as_type_ptr(node)) { - ov::enable_keep_const_precision(node); + if (pattern_node.second) { + ov::disable_keep_const_precision(node); + } else { + ov::enable_keep_const_precision(node); + } } } } @@ -178,10 +216,10 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions) bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_ptr& m) { ov::pass::Manager manager("MarkDequantizationAndDecompressionManager"); - manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); manager.register_pass(); + manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); manager.register_pass(); - manager.register_pass(m_precisions); + manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); manager.run_passes(m); return false; From 1202ab2ecd2b5682211d73fa731ecaf2a6a1d367 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Fri, 15 Nov 2024 17:07:22 +0330 Subject: [PATCH 07/13] fix tests --- .../low_precision/mark_dequantization_subgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 71cbba76fd8353..8e24ce101e01a9 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -81,6 +81,8 @@ void swap_nodes(const PatternValueMap& pt_map, for (const auto& in : target_inputs) { in.replace_source_output(first_node->output(0)); } + first_node->validate_and_infer_types(); + second_node->validate_and_infer_types(); } } @@ -220,7 +222,5 @@ bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_pt manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); manager.register_pass(); manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); - manager.run_passes(m); - - return false; + return manager.run_passes(m); } From c93f7a742072a8e821f8f0e8b1453b07cd201dab Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Sun, 17 Nov 2024 14:33:26 +0330 Subject: [PATCH 08/13] add precision check --- .../low_precision/mark_dequantization_subgraph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 8e24ce101e01a9..34aaac6a5dcf41 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -200,7 +200,8 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, for (const auto& pattern_node : keep_const_precisions) { if (pt_map.count(pattern_node.first)) { auto node = pt_map.at(pattern_node.first).get_node_shared_ptr(); - if (ov::as_type_ptr(node)) { + if (ov::as_type_ptr(node) && + check_precision(node->get_output_element_type(0), precisions)) { if (pattern_node.second) { ov::disable_keep_const_precision(node); } else { From 189153f47c9bdabf23ee2a7782ad5147b65e6e04 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Tue, 19 Nov 2024 14:56:31 +0400 Subject: [PATCH 09/13] fix issue on gpu, docs, refactoring --- .../mark_dequantization_subgraph.hpp | 5 +- .../moc_transformations.cpp | 14 ++-- .../mark_dequantization_subgraph.cpp | 84 ++++++++++++------- 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp index 7770647d736e67..c60d9ca5d3659c 100644 --- a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp +++ b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp @@ -11,10 +11,11 @@ namespace ov { namespace pass { - /** * @ingroup ov_transformation_common_api - * @brief TBA + * @brief MarkDequantizationAndDecompression is a set of transformation which mark + * Dequantization and Decompression patterns with the keep_const_precision, disable_const_folding and + * dequantization attributes. Also it calls ConstantFolding. */ class TRANSFORMATIONS_API MarkDequantizationAndDecompression : public ModelPass { public: diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 86e6604e3241cd..cfcd1a96fa577f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -130,15 +130,12 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr using namespace ov::pass; REGISTER_PASS(manager, InitNodeInfo) REGISTER_PASS(manager, EliminateConvert) - if (m_low_precision_enabled) { - manager.register_pass( - element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); - } if (!m_use_shapes) { manager.register_pass(); } + // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults - // should be performed before first ConstantFolding call. + // should be performed before first !ConstantFolding! call. // The passes can deteach graph branches where zero dimesion is calculated. // Zero dimensions in shape causes creation empty tensors, which are incorrect during CF. // In particular, if zero dim tensor is consumed in body of MultiSubGraphOp @@ -147,6 +144,13 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr REGISTER_PASS(manager, RemoveConcatZeroDimInput) REGISTER_PASS(manager, EliminateLoopInputsOutputs); REGISTER_PASS(manager, Validate) + + if (m_low_precision_enabled) { + // includes ConstantFolding call + manager.register_pass( + element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); + } + // todo: ticket 96960 // the order EliminateDuplicateTIInputs and RemoveMultiSubGraphOpDanglingParamsResults is important // it looks like we need to combine these transformations into one. diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 34aaac6a5dcf41..3e742ff305c68c 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -24,7 +24,30 @@ using namespace ov::pass::pattern; /** * @ingroup ov_transformation_common_api - * @brief TBA + * + * @brief MarkDequantization matches Dequantization subgraphs and marks Subtract and Multiply nodes + * with the dequantization attribute. Also if Convert nodes are part of the subgraph they might be marked + * with the disable_const_folding attribute. + * + * If Convert -> Reshape/Unsqueeze are part of the Dequantization subraph, Convert and Reshape/Unsqueeze + * nodes will be swapped to eliminate Reshape/Unsqueeze in the next ConstantFolding. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * ZeroPoints and Scale might be present as subgraphs and include Convert ops. + * + * Input ZeroPoints + * │ │ + * ▼ ▼ + * Convert (opt) Reshape/Unsqueeze + * │ │ + * ▼ ▼ Scale Input Scale + * Subtract │ │ │ + * │ ▼ ▼ ▼ + * │ (opt) Reshape/Unsqueeze Convert (opt) Reshape/Unsqueeze + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * */ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: @@ -36,7 +59,25 @@ class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { /** * @ingroup ov_transformation_common_api - * @brief TBA + * + * @brief KeepConstsPrecision matches Dequantization subgraphs and if Input/ZeroPoints/Scale are Constants + * they might be marked with keep_const_precision attribute. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * + * Input + * │ + * ▼ + * Convert ZeroPoints + * │ │ + * ▼ ▼ Input + * Subtract │ + * │ ▼ + * │ Scale Convert Scale + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * */ class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { public: @@ -91,16 +132,7 @@ void swap_nodes(const PatternValueMap& pt_map, MarkDequantization::MarkDequantization(const element::TypeVector& precisions, const bool fold_subtract_const, const bool fold_multiply_const) { - // Dequantization subgraph may have two forms: with and without Subtract - // - // Input Input - // | | - // Convert zero point OR Convert scale - // \ / \ / - // Subtract scale Multiply - // \ / - // Multiply - // + // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -134,8 +166,7 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, NodeVector converts_to_mark = {convert_pattern}; NodeVector converts_to_unmark = {}; - if (fold_subtract_const || - (pt_map.count(subtract_pattern) && pt_map.at(zp_pattern).get_element_type() != input.get_element_type())) { + if (fold_subtract_const) { converts_to_unmark.push_back(zp_convert_pattern); } else { converts_to_mark.push_back(zp_convert_pattern); @@ -163,16 +194,7 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, bool fold_subtract_const, bool fold_multiply_const) { - // Dequantization subgraph may have two forms: with and without Subtract - // - // Input Input - // | | - // Convert zero point OR Convert scale - // \ / \ / - // Subtract scale Multiply - // \ / - // Multiply - // + // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -194,9 +216,10 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, return false; } - std::map, bool> keep_const_precisions = {{input_pattern, false}, - {zp_pattern, fold_subtract_const}, - {scale_pattern, fold_multiply_const}}; + using PatternNode = std::shared_ptr; + std::map keep_const_precisions = {{input_pattern, false}, + {zp_pattern, fold_subtract_const}, + {scale_pattern, fold_multiply_const}}; for (const auto& pattern_node : keep_const_precisions) { if (pt_map.count(pattern_node.first)) { auto node = pt_map.at(pattern_node.first).get_node_shared_ptr(); @@ -218,7 +241,12 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, } bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_ptr& m) { - ov::pass::Manager manager("MarkDequantizationAndDecompressionManager"); + const auto& pass_config = get_pass_config(); + auto callback = pass_config->get_callback(); + pass_config->set_callback(callback); + pass_config->set_callback(callback); + + ov::pass::Manager manager(pass_config, "MarkDequantizationAndDecompressionManager"); manager.register_pass(); manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); manager.register_pass(); From 06f1c22bcac3bace412abc283c8f23e6feafb642 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Tue, 19 Nov 2024 17:16:29 +0400 Subject: [PATCH 10/13] remove the dq model pass, leave the separate matchers only --- .../assets/snippets/lpt_intel_cpu_plugin.cpp | 2 +- ...dequantization_subgraph_transformation.cpp | 39 ++++---- .../mark_dequantization_subgraph.hpp | 75 +++++++++++---- .../common_optimizations.cpp | 1 + .../moc_transformations.cpp | 11 +-- .../mark_dequantization_subgraph.cpp | 91 ++----------------- .../tests/op_conversions/convert_subtract.cpp | 2 +- .../transformation_pipeline.cpp | 10 +- .../src/plugin/transformations_pipeline.cpp | 10 +- 9 files changed, 107 insertions(+), 134 deletions(-) diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp index 662d32be4a10fb..76e6d60b8e3e90 100644 --- a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp +++ b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp @@ -38,7 +38,7 @@ auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector{}; if (useLpt) { // disable constant folding on dequantization subgraphs so they can be processed by LPT - manager.register_pass(defaultPrecisions); + manager.register_pass(defaultPrecisions); } // OpenVINO common transformations happen here diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index 89fb71f1a8f0f1..b8c5ad1177d2d5 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -31,7 +31,9 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { model = std::make_shared(stub_op, ParameterVector{}); } - manager.register_pass(element::TypeVector{element::u4}); + manager.register_pass(element::TypeVector{element::u4}); + manager.register_pass(); + manager.register_pass(element::TypeVector{element::u4}); manager.register_pass(ov::element::u4, ov::element::u8, type_to_fuse_map{}, false, false); { @@ -46,7 +48,7 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { } } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { +TEST_F(TransformationTestsF, MarkDequantizationTransformation) { // Input graph: // // Parameter @@ -69,7 +71,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { // \ / // Convolution // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // All 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero points are marked with 'KeepConstPrecision' attribute @@ -114,7 +116,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -170,7 +173,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPoint) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNoZeroPoint) { // Input graph: // // Parameter @@ -190,7 +193,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ // \ / // Convolution // - // After MarkDequantizationAndDecompression all Multiply nodes from above graph + // After MarkDequantization all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -229,7 +232,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -274,7 +278,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPointFP16) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNoZeroPointFP16) { // Input graph: // // Parameter @@ -294,7 +298,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ // \ / // Convolution // - // After MarkDequantizationAndDecompression all Multiply nodes from above graph + // After MarkDequantization all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -337,7 +341,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -385,7 +390,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNotConstantWeights) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNotConstantWeights) { // Input graph: // // Parameter @@ -408,7 +413,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot // \ / // Convolution // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero point nodes are marked with 'KeepConstPrecision' attribute @@ -456,7 +461,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -511,7 +517,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFoldSubConst) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationFoldSubConst) { // Input graph: After transformation: // // Constant Constant Constant @@ -525,7 +531,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFol // | / \ / // Multiply Multiply // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' node before weights is marked with 'DisableConstantFolding' attribute // but Convert before Dequantization Sub const isn't because fold_subtract_const is set to true @@ -542,7 +548,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFol model = std::make_shared(ov::OutputVector{multiply}); } - manager.register_pass(element::TypeVector{element::u8}, true); + manager.register_pass(element::TypeVector{element::u8}, true); + manager.register_pass(element::TypeVector{element::u8}, true); manager.register_pass(); { diff --git a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp index c60d9ca5d3659c..6cbd8d990ac73e 100644 --- a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp +++ b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp @@ -13,26 +13,67 @@ namespace ov { namespace pass { /** * @ingroup ov_transformation_common_api - * @brief MarkDequantizationAndDecompression is a set of transformation which mark - * Dequantization and Decompression patterns with the keep_const_precision, disable_const_folding and - * dequantization attributes. Also it calls ConstantFolding. + * + * @brief MarkDequantization matches Dequantization subgraphs and marks Subtract and Multiply nodes + * with the dequantization attribute. Also if Convert nodes are part of the subgraph they might be marked + * with the disable_const_folding attribute. + * + * If Convert -> Reshape/Unsqueeze are part of the Dequantization subraph, Convert and Reshape/Unsqueeze + * nodes will be swapped to eliminate Reshape/Unsqueeze in the next ConstantFolding. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * ZeroPoints and Scale might be present as subgraphs and include Convert ops. + * + * Input ZeroPoints + * │ │ + * ▼ ▼ + * Convert (opt) Reshape/Unsqueeze + * │ │ + * ▼ ▼ Scale Input Scale + * Subtract │ │ │ + * │ ▼ ▼ ▼ + * │ (opt) Reshape/Unsqueeze Convert (opt) Reshape/Unsqueeze + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * */ -class TRANSFORMATIONS_API MarkDequantizationAndDecompression : public ModelPass { +class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkDequantizationAndDecompression", "0"); - explicit MarkDequantizationAndDecompression(element::TypeVector precisions, - const bool fold_subtract_const = false, - const bool fold_multiply_const = true) - : m_fold_subtract_const(fold_subtract_const), - m_fold_multiply_const(fold_multiply_const), - m_precisions(std::move(precisions)) {} - - bool run_on_model(const std::shared_ptr& m) override; + OPENVINO_RTTI("MarkDequantization", "0"); + explicit MarkDequantization(const element::TypeVector& precisions, + bool fold_subtract_const = false, + bool fold_multiply_const = true); +}; -private: - bool m_fold_subtract_const = false; - bool m_fold_multiply_const = true; - element::TypeVector m_precisions; +/** + * @ingroup ov_transformation_common_api + * + * @brief KeepConstsPrecision matches Dequantization subgraphs and if Input/ZeroPoints/Scale are Constants + * they might be marked with keep_const_precision attribute. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * + * Input + * │ + * ▼ + * Convert ZeroPoints + * │ │ + * ▼ ▼ Input + * Subtract │ + * │ ▼ + * │ Scale Convert Scale + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * + */ +class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("KeepConstsPrecision", "0"); + explicit KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const = false, + bool fold_multiply_const = true); }; } // namespace pass diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index fea833171fb1ee..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -128,6 +128,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr manager.register_pass(); } + if (m_low_precision_enabled) { + manager.register_pass( + element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); + } + // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults // should be performed before first !ConstantFolding! call. // The passes can deteach graph branches where zero dimesion is calculated. @@ -145,12 +150,6 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr REGISTER_PASS(manager, EliminateLoopInputsOutputs); REGISTER_PASS(manager, Validate) - if (m_low_precision_enabled) { - // includes ConstantFolding call - manager.register_pass( - element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); - } - // todo: ticket 96960 // the order EliminateDuplicateTIInputs and RemoveMultiSubGraphOpDanglingParamsResults is important // it looks like we need to combine these transformations into one. diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 3e742ff305c68c..bddaf81e31a067 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -22,71 +22,6 @@ using namespace ov; using namespace ov::op; using namespace ov::pass::pattern; -/** - * @ingroup ov_transformation_common_api - * - * @brief MarkDequantization matches Dequantization subgraphs and marks Subtract and Multiply nodes - * with the dequantization attribute. Also if Convert nodes are part of the subgraph they might be marked - * with the disable_const_folding attribute. - * - * If Convert -> Reshape/Unsqueeze are part of the Dequantization subraph, Convert and Reshape/Unsqueeze - * nodes will be swapped to eliminate Reshape/Unsqueeze in the next ConstantFolding. - * - * Dequantization subgraph may have two forms: with and without Subtract. - * ZeroPoints and Scale might be present as subgraphs and include Convert ops. - * - * Input ZeroPoints - * │ │ - * ▼ ▼ - * Convert (opt) Reshape/Unsqueeze - * │ │ - * ▼ ▼ Scale Input Scale - * Subtract │ │ │ - * │ ▼ ▼ ▼ - * │ (opt) Reshape/Unsqueeze Convert (opt) Reshape/Unsqueeze - * │ │ │ │ - * ▼ ▼ ▼ ▼ - * Multiply Multiply - * - */ -class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("MarkDequantization", "0"); - explicit MarkDequantization(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const); -}; - -/** - * @ingroup ov_transformation_common_api - * - * @brief KeepConstsPrecision matches Dequantization subgraphs and if Input/ZeroPoints/Scale are Constants - * they might be marked with keep_const_precision attribute. - * - * Dequantization subgraph may have two forms: with and without Subtract. - * - * Input - * │ - * ▼ - * Convert ZeroPoints - * │ │ - * ▼ ▼ Input - * Subtract │ - * │ ▼ - * │ Scale Convert Scale - * │ │ │ │ - * ▼ ▼ ▼ ▼ - * Multiply Multiply - * - */ -class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("KeepConstsPrecision", "0"); - explicit KeepConstsPrecision(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const); -}; - namespace { bool check_precision(const ov::element::Type_t type_to_check, const ov::element::TypeVector& precisions) { @@ -129,9 +64,9 @@ void swap_nodes(const PatternValueMap& pt_map, } // namespace -MarkDequantization::MarkDequantization(const element::TypeVector& precisions, - const bool fold_subtract_const, - const bool fold_multiply_const) { +ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& precisions, + const bool fold_subtract_const, + const bool fold_multiply_const) { // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -191,9 +126,9 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, this->register_matcher(m, callback); } -KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const) { +ov::pass::KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const, + bool fold_multiply_const) { // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -239,17 +174,3 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, auto m = std::make_shared(multiply_pattern, "KeepConstsPrecision"); this->register_matcher(m, callback); } - -bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_ptr& m) { - const auto& pass_config = get_pass_config(); - auto callback = pass_config->get_callback(); - pass_config->set_callback(callback); - pass_config->set_callback(callback); - - ov::pass::Manager manager(pass_config, "MarkDequantizationAndDecompressionManager"); - manager.register_pass(); - manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); - manager.register_pass(); - manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); - return manager.run_passes(m); -} diff --git a/src/common/transformations/tests/op_conversions/convert_subtract.cpp b/src/common/transformations/tests/op_conversions/convert_subtract.cpp index 93b5c223345d03..1a1d6d8b5c83bb 100644 --- a/src/common/transformations/tests/op_conversions/convert_subtract.cpp +++ b/src/common/transformations/tests/op_conversions/convert_subtract.cpp @@ -77,7 +77,7 @@ TEST_F(TransformationTestsF, ConvertSubtractDequantizationSubgraph) { model = std::make_shared(mul, ParameterVector{data}); - manager.register_pass(element::TypeVector{element::u8}); + manager.register_pass(element::TypeVector{element::u8}); manager.register_pass(); } diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 2e79d066b069ff..8daf8d81704301 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -342,10 +342,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::element::i4, ov::element::nf4, ov::element::f4e2m1}; - CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationAndDecompression, decompression_precisions, false, true); + CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantization, decompression_precisions, false, true); CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool { return !is_decompression_multiply(node); - }, ov::pass::MarkDequantizationAndDecompression); + }, ov::pass::MarkDequantization); CPU_SET_CALLBACK_COMMON( decompression_handling_manager, @@ -371,7 +371,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::Manager manager("Plugin:CPU"); manager.set_per_pass_validation(false); if (useLpt) - CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationAndDecompression, defaultPrecisions); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantization, defaultPrecisions); auto get_convert_precisions = [&]() { precisions_map map = { @@ -427,6 +427,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations); + CPU_REGISTER_PASS_X64(manager, ov::pass::KeepConstsPrecision, decompression_precisions, false, true); + CPU_SET_CALLBACK_X64(manager, [&](const_node_ptr &node) -> bool { + return !is_decompression_multiply(node); + }, ov::pass::KeepConstsPrecision); CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes); CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeSinking); CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertSequenceToTensorIterator); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f4a1014ee879df..b0a767eec013bb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -292,7 +292,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; if (enableInt8) { - manager.register_pass( + manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); } @@ -372,8 +372,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // Disable subtract folding only for the dGPUs to meet the requirements of oneDNN: // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) - manager.register_pass(supported_woq_types, !device_info.supports_immad); - pass_config->set_callback([&](const std::shared_ptr node) { + manager.register_pass(supported_woq_types, !device_info.supports_immad); + pass_config->set_callback([&](const std::shared_ptr node) { return !is_decompression_multiply(node, device_info.supports_immad); }); @@ -911,8 +911,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); // ZP should not be folded for FC. But still, ZP should be folded for Gather. - // Therefore, run MarkDequantizationAndDecompression again to fold ZP constant. - manager.register_pass(supported_woq_types, true); + // Therefore, run MarkDequantization again to fold ZP constant. + manager.register_pass(supported_woq_types, true); if (device_info.supports_immad) { if (disable_horizontal_fc_fusion) manager.register_pass(); From 1c7a72e75fa1c785bc03fa8bcb38b4c5e0bc44ea Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Thu, 21 Nov 2024 14:23:40 +0400 Subject: [PATCH 11/13] fix Opattern::op::Or logic --- src/core/src/pattern/op/or.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/pattern/op/or.cpp b/src/core/src/pattern/op/or.cpp index e2c37a322f3a5c..f0aa96120cc2be 100644 --- a/src/core/src/pattern/op/or.cpp +++ b/src/core/src/pattern/op/or.cpp @@ -13,7 +13,7 @@ bool ov::pass::pattern::op::Or::match_value(Matcher* matcher, auto saved = matcher->start_match(); if (matcher->match_value(input_value, graph_value)) { auto& pattern_map = matcher->get_pattern_value_map(); - pattern_map[input_value.get_node_shared_ptr()] = graph_value; + pattern_map[shared_from_this()] = graph_value; return saved.finish(true); } } From 416d610fe0ccfc596be89ed67ae3c8d65f44d446 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Thu, 21 Nov 2024 21:12:27 +0400 Subject: [PATCH 12/13] fixed the marking on gpu --- .../low_precision/mark_dequantization_subgraph.cpp | 12 +++++++----- .../src/plugin/transformations_pipeline.cpp | 13 ++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index bddaf81e31a067..5755dc51dc34ea 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -44,7 +44,7 @@ void set_rt_info(const PatternValueMap& pt_map, } }; -void swap_nodes(const PatternValueMap& pt_map, +bool swap_nodes(const PatternValueMap& pt_map, const std::shared_ptr& first, const std::shared_ptr& second) { if (pt_map.count(first) && pt_map.count(second)) { @@ -59,7 +59,9 @@ void swap_nodes(const PatternValueMap& pt_map, } first_node->validate_and_infer_types(); second_node->validate_and_infer_types(); + return true; } + return false; } } // namespace @@ -89,7 +91,7 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec auto input = pt_map.at(input_pattern); const auto multiply = m.get_match_root(); - if (transformation_callback(multiply)) { + if (!check_precision(input.get_element_type(), precisions) || transformation_callback(multiply)) { return false; } @@ -117,9 +119,9 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec set_rt_info(pt_map, enable_constant_folding, converts_to_unmark, precisions); // Move Reshape/Unsqueeze ops up to fold them in ConstantFolding. - swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); - swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); - return false; + auto changed = swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); + changed = changed || swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); + return changed; }; auto m = std::make_shared(multiply_pattern, "MarkDequantization"); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b0a767eec013bb..0317327b589fdb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -291,10 +291,12 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; - if (enableInt8) { - manager.register_pass( - std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); - } + + //if (enableInt8) { Why do we need this check? According to the line 378 we did this marking anyway + manager.register_pass( + std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, + !device_info.supports_immad); + //} manager.register_pass(); manager.register_pass(); @@ -373,7 +375,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) manager.register_pass(supported_woq_types, !device_info.supports_immad); - pass_config->set_callback([&](const std::shared_ptr node) { + pass_config->set_callback([&](const std::shared_ptr node) { return !is_decompression_multiply(node, device_info.supports_immad); }); From 553f2b631a902988455cf4812400206239790de9 Mon Sep 17 00:00:00 2001 From: Tikhonov Ivan Date: Sat, 30 Nov 2024 12:05:27 +0400 Subject: [PATCH 13/13] resolve review comments --- ...ark_dequantization_subgraph_transformation.cpp | 7 +++++++ .../common_optimizations/moc_transformations.cpp | 14 ++++++-------- .../mark_dequantization_subgraph.cpp | 15 ++++++++++++--- .../src/plugin/transformations_pipeline.cpp | 2 -- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index b8c5ad1177d2d5..bf254cded24ed8 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -45,7 +45,14 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { auto multiply = std::make_shared(subtract, scale); auto stub_op = std::make_shared(multiply); model_ref = std::make_shared(stub_op, ParameterVector{}); + + mark_as_dequantization_node(subtract); + mark_as_dequantization_node(multiply); + enable_keep_const_precision(lp_const); + ov::pass::disable_constant_folding(second_convert); } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } TEST_F(TransformationTestsF, MarkDequantizationTransformation) { diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index f082a8250fcb32..185ae84ec83642 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -129,27 +129,25 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr manager.set_per_pass_validation(false); using namespace ov::pass; REGISTER_PASS(manager, InitNodeInfo) - REGISTER_PASS(manager, EliminateConvert) - if (!m_use_shapes) { - manager.register_pass(); - } - if (m_low_precision_enabled) { manager.register_pass( element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); } - + if (!m_use_shapes) { + manager.register_pass(); + } // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults - // should be performed before first !ConstantFolding! call. + // should be performed before first ConstantFolding call. // The passes can deteach graph branches where zero dimesion is calculated. // Zero dimensions in shape causes creation empty tensors, which are incorrect during CF. // In particular, if zero dim tensor is consumed in body of MultiSubGraphOp // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults should be called together. + using namespace ov::pass; + REGISTER_PASS(manager, EliminateConvert) REGISTER_PASS(manager, EliminateScatterUpdate) REGISTER_PASS(manager, RemoveConcatZeroDimInput) REGISTER_PASS(manager, EliminateLoopInputsOutputs); REGISTER_PASS(manager, Validate) - // todo: ticket 96960 // the order EliminateDuplicateTIInputs and RemoveMultiSubGraphOpDanglingParamsResults is important // it looks like we need to combine these transformations into one. diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 5755dc51dc34ea..3cc7a4dfabe1d8 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -4,6 +4,7 @@ #include "transformations/low_precision/mark_dequantization_subgraph.hpp" +#include "itt.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/reshape.hpp" #include "openvino/op/subtract.hpp" @@ -36,9 +37,13 @@ void set_rt_info(const PatternValueMap& pt_map, for (const auto& pattern_node : pattern_nodes) { if (pt_map.count(pattern_node)) { auto node = pt_map.at(pattern_node).get_node_shared_ptr(); + + // we don't need to mark Converts with disable_cf attribute if the `from` type (input type) + // is not in the `precisions` list. if (ov::as_type_ptr(node) && !check_precision(node->get_input_element_type(0), precisions)) { continue; } + rt_info_setter(node); } } @@ -69,6 +74,8 @@ bool swap_nodes(const PatternValueMap& pt_map, ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& precisions, const bool fold_subtract_const, const bool fold_multiply_const) { + MATCHER_SCOPE(MarkDequantization); + // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -120,7 +127,7 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec // Move Reshape/Unsqueeze ops up to fold them in ConstantFolding. auto changed = swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); - changed = changed || swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); + changed = swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern) || changed; return changed; }; @@ -131,6 +138,8 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec ov::pass::KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, bool fold_subtract_const, bool fold_multiply_const) { + MATCHER_SCOPE(KeepConstsPrecision); + // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -160,8 +169,8 @@ ov::pass::KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& pr for (const auto& pattern_node : keep_const_precisions) { if (pt_map.count(pattern_node.first)) { auto node = pt_map.at(pattern_node.first).get_node_shared_ptr(); - if (ov::as_type_ptr(node) && - check_precision(node->get_output_element_type(0), precisions)) { + const auto& precision = node->get_output_element_type(0); + if (ov::as_type_ptr(node) && check_precision(precision, precisions)) { if (pattern_node.second) { ov::disable_keep_const_precision(node); } else { diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 0317327b589fdb..b6585311521d2a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -292,11 +292,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; - //if (enableInt8) { Why do we need this check? According to the line 378 we did this marking anyway manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, !device_info.supports_immad); - //} manager.register_pass(); manager.register_pass();