openvinotoolkit · nshchego · Jul 17, 2023 · Jul 18, 2023 · Jul 19, 2023
@@ -871,6 +871,14 @@ inline int32_t convert_value<uint32_t, int32_t>(uint32_t val) {
     return static_cast<int32_t>(val);
 }
 
+template <>
+inline int64_t convert_value<uint64_t, int64_t>(uint64_t val) {
+    if (val > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
+        return std::numeric_limits<int64_t>::max();
+    }
+    return static_cast<int64_t>(val);
+}
+
 namespace {
 template <ov::element::Type_t PREC_FROM, ov::element::Type_t PREC_TO>
 std::shared_ptr<ngraph::Node> change_constant_precision(std::shared_ptr<opset4::Constant>& constant) {
@@ -1110,7 +1118,9 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
     const auto& to = it->second;
     if (auto constant = ov::as_type_ptr<opset4::Constant>(node)) {
         std::shared_ptr<ngraph::Node> new_const;
-        if (from == ov::element::u64 && to == ov::element::i32) {
+        if (from == ov::element::u64 && to == ov::element::i64) {
+            new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i64>(constant);
+        } else if (from == ov::element::u64 && to == ov::element::i32) {
             new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i32>(constant);
         } else if (from == ov::element::i64 && to == ov::element::i32) {
             new_const = change_constant_precision<ov::element::Type_t::i64, ov::element::Type_t::i32>(constant);

@@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
 INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
 INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);
 
+/**
+ * @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
+ */
+DECLARE_CONFIG_KEY(CPU_NATIVE_I64);
+
 }  // namespace PluginConfigInternalParams
 
 }  // namespace InferenceEngine
@@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
                     << ". Supported values: PERFORMANCE, ACCURACY";
             }
+        } else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
+            if (val == PluginConfigParams::YES) {
+                enableNativeI64 = true;
+            } else if (val == PluginConfigParams::NO) {
+                enableNativeI64 = false;
+            } else {
+                IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
+                                    << ". Expected only YES or NO values.";
+            }
         } else {
             IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
         }
@@ -308,4 +317,4 @@ void Config::updateProperties() {
 }
 
 }  // namespace intel_cpu
-}   // namespace ov
+}  // namespace ov
@@ -57,6 +57,7 @@ struct Config {
     // TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
     size_t rtCacheCapacity = 0ul;
 #endif
+    bool enableNativeI64 = false;
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
     InferenceEngine::PerfHintsConfig  perfHintsConfig;
     bool enableCpuPinning = true;

@@ -4,45 +4,43 @@
 
 #include "dnnl_extension_utils.h"
 
-#include "utils/general_utils.h"
 #include <oneapi/dnnl/dnnl.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
-#include "onednn/iml_type_mapper.h"
-#include <common/primitive_desc.hpp>
 #include <common/primitive_desc_iface.hpp>
 
-#include <vector>
-
 using namespace dnnl;
 
 namespace ov {
 namespace intel_cpu {
 
-uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
+uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
     switch (dataType) {
-    case dnnl::memory::data_type::f32:
-        return 4;
-    case dnnl::memory::data_type::s32:
+    case memory::data_type::f64:
+    case memory::data_type::s64:
+        return 8;
+    case memory::data_type::f32:
+    case memory::data_type::s32:
         return 4;
-    case dnnl::memory::data_type::bf16:
+    case memory::data_type::bf16:
+    case memory::data_type::f16:
         return 2;
-    case dnnl::memory::data_type::s8:
-        return 1;
-    case dnnl::memory::data_type::u8:
+    case memory::data_type::s8:
+    case memory::data_type::u8:
+    case memory::data_type::bin:
         return 1;
-    case dnnl::memory::data_type::bin:
-        return 1;
-    case dnnl::memory::data_type::f16:
-        return 2;
-    case dnnl::memory::data_type::undef:
+    case memory::data_type::undef:
         return 0;
     default:
-        IE_THROW() << "Unsupported data type.";
+        IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
     }
 }
 
 memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
     switch (prec) {
+        case InferenceEngine::Precision::FP64:
+            return memory::data_type::f64;
+        case InferenceEngine::Precision::I64:
+            return memory::data_type::s64;
         case InferenceEngine::Precision::FP32:
             return memory::data_type::f32;
         case InferenceEngine::Precision::I32:
@@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
 
 InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
     switch (dataType) {
+        case memory::data_type::f64:
+            return InferenceEngine::Precision::FP64;
+        case memory::data_type::s64:
+            return InferenceEngine::Precision::I64;
         case memory::data_type::f32:
             return InferenceEngine::Precision::FP32;
         case memory::data_type::s32:
@@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
     }
 }
 
-Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
+Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
     return dim == DNNL_RUNTIME_DIM_VAL ?  Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
 }
-dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
-    return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
+memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
+    return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
 }
 
 VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
@@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
     }
 }
 
-DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
+DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
     return makeDescriptor(desc.get());
 }
 
 DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
-    if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
+    if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
         return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
     } else {
         return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
     }
 }
 
-size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
+size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
     auto tmpDesc = desc;
 
     const auto offset0 = tmpDesc.get()->offset0;
@@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
     }
 }
 
-DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
-    auto query = dnnl::convert_to_c(what);
+DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
+    auto query = convert_to_c(what);
     const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);
 
     if (!cdesc)

@@ -45,7 +45,6 @@
 #include "memory_desc/cpu_memory_desc_utils.h"
 
 #include <openvino/core/model.hpp>
-#include <openvino/core/node.hpp>
 #include <openvino/op/ops.hpp>
 #include <transformations/utils/utils.hpp>
 #include <low_precision/low_precision.hpp>
@@ -306,7 +305,7 @@ void Graph::Replicate(const CNNNetwork &network) {
     // change precision for input/output nodes to avoid extra data conversion when set input/output blobs
     // also we need to change input/output precisions for consumers/producers to avoid inserting reorder
     for (auto &input : inputNodesMap) {
-        const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
+        auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision(), getConfig().enableNativeI64);
         input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
         const auto childEdges = input.second->getChildEdgesAtPort(0);
         for (size_t i = 0; i < childEdges.size(); i++) {
@@ -320,7 +319,7 @@ void Graph::Replicate(const CNNNetwork &network) {
     }
 
     for (auto &output : outputNodesMap) {
-        const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
+        auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision(), getConfig().enableNativeI64);
         output.second->setOriginalInputPrecisionAtPort(0, precToSet);
         const auto parentEdges = output.second->getParentEdgesAtPort(0);
         for (size_t i = 0; i < parentEdges.size(); i++) {
@@ -1004,7 +1003,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
 
         // todo: make sure 'name' exists in this map...
         if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
-            if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
+            if (inTensorDesc.getPrecision() == Precision::FP32) {
                 _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
                                                           inTensorDesc.getLayout());
             } else {
@@ -1460,16 +1459,16 @@ void Graph::SortTopologically() {
     }
 }
 
-void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
+void Graph::GetPerfData(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
     unsigned i = 0;
-    std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
-            getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
-        InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
+    std::function<void(std::map<std::string, InferenceEngineProfileInfo> &, const NodePtr&)>
+            getPerfMapFor = [&](std::map<std::string, InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
+        InferenceEngineProfileInfo &pc = perfMap[node->getName()];
         pc.execution_index = i++;
         // TODO: Why time counter is signed?
         pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
-        pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
-                                    : InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
+        pc.status = pc.cpu_uSec > 0 ? InferenceEngineProfileInfo::EXECUTED
+                                    : InferenceEngineProfileInfo::NOT_RUN;
         std::string pdType = node->getPrimitiveDescriptorType();
         size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]);
         pdType.copy(pc.exec_type, typeLen, 0);

@@ -19,7 +19,6 @@
 #include "nodes/mvn.h"
 #include "nodes/transpose.h"
 #include "nodes/interpolate.h"
-#include "nodes/reduce.h"
 #include "nodes/input.h"
 #include "nodes/rnn.h"
 #include "nodes/common/cpu_convert.h"

@@ -367,7 +367,7 @@ InferRequestBase::normToInputSupportedPrec(const std::pair<const std::string, In
     if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
         inPrec = InferenceEngine::Precision::FP32;
     } else {
-        inPrec = normalizeToSupportedPrecision(inPrec);
+        inPrec = normalizeToSupportedPrecision(inPrec, graph->getConfig().enableNativeI64);
     }
 
     if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
@@ -583,7 +583,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name)
             auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory());
             if (!data) {
                 InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
-                desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));
+                desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision(), graph->getConfig().enableNativeI64));
 
                 // WA: need to avoid exception thrown when we compare blocking desc in SetBlob
                 // in situation if we push output blobs as inputs for next network (in Hetero plugin)

@@ -55,10 +55,6 @@ Eye::Eye(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
     }
     outType = op->get_output_element_type(0);
     withBatchShape = (op->get_input_size() == 4);
-    if (!one_of(outType, ngraph::element::f32, ngraph::element::bf16,
-        ngraph::element::i32, ngraph::element::i8, ngraph::element::u8)) {
-        THROW_ERROR << errorPrefix << "doesn't support demanded output precision";
-    }
 }
 
 void Eye::getSupportedDescriptors() {

@@ -40,9 +40,6 @@ NonZero::NonZero(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CP
     } else {
         IE_THROW(NotImplemented) << errorMessage;
     }
-    if (op->get_output_element_type(0) != ngraph::element::i32) {
-        IE_THROW() << errorPrefix << "doesn't support demanded output precision";
-    }
 }
 
 void NonZero::getSupportedDescriptors() {

@@ -9,7 +9,6 @@
 
 #include "transformations/transformation_pipeline.h"
 #include "itt.h"
-#include "extension_mngr.h"
 #include "extension.h"
 #include "serialize.h"
 #include "threading/ie_executor_manager.hpp"
@@ -21,11 +20,9 @@
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 
-#include <transformations/utils/utils.hpp>
 #include <ie_ngraph_utils.hpp>
 
 #include "performance_heuristics.hpp"
-#include "openvino/runtime/properties.hpp"
 #include "weights_cache.hpp"
 #include "utils/denormals.hpp"
 
@@ -36,7 +33,6 @@
 #endif
 
 #include <cpu/x64/cpu_isa_traits.hpp>
-#include <itt.h>
 
 #if defined(OV_CPU_WITH_ACL)
 #include "nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -164,7 +160,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
            config.count(ov::num_streams.name());
 }
 
-void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
+void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
     auto getNumStreamsLatency = [&]() {
         return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
     };
@@ -281,7 +277,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
     }
 }
 
-void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
+void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
     const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
     const int latency_streams = get_default_latency_streams(config.latencyThreadingMode);
     int streams;
@@ -462,6 +458,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
         IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
 }
 
+static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
+    engineConfig.enableNativeI64 = false;
+    const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
+    if (i64prop != modelConfig.end()) {
+        if (i64prop->second == PluginConfigParams::YES) {
+            engineConfig.enableNativeI64 = true;
+        } else if (i64prop->second != PluginConfigParams::NO) {
+            IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
+                                << ". Expected only YES or NO values.";
+        }
+    }
+}
+
 InferenceEngine::IExecutableNetworkInternal::Ptr
 Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -495,6 +504,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
     const bool enableLPT = shouldEnableLPT(config, engConfig);
     ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
     const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
+    setI64Mode(config, engConfig);
 
     auto nGraphFunc = clonedNetwork.getFunction();
 
@@ -770,6 +780,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
     const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
                         || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
     const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
+    setI64Mode(config, conf);
 
     auto model = network.getFunction();
     if (model == nullptr) {
@@ -785,7 +796,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
                                            transformation.UpToCpuSpecificOpSet();
                                            transformation.CpuSpecificOpSet();
                                        },
-                                       [&](const std::shared_ptr<ngraph::Node>& op) {
+                                       [&](const std::shared_ptr<ov::Node>& op) {
                                            std::unique_ptr<Node> ptr;
                                            try {
                                                ptr.reset(Node::factory().create(op, context));