Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] I64 transformation & config. #18594

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,14 @@ inline int32_t convert_value<uint32_t, int32_t>(uint32_t val) {
return static_cast<int32_t>(val);
}

template <>
inline int64_t convert_value<uint64_t, int64_t>(uint64_t val) {
if (val > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return std::numeric_limits<int64_t>::max();
}
return static_cast<int64_t>(val);
}

namespace {
template <ov::element::Type_t PREC_FROM, ov::element::Type_t PREC_TO>
std::shared_ptr<ngraph::Node> change_constant_precision(std::shared_ptr<opset4::Constant>& constant) {
Expand Down Expand Up @@ -1110,7 +1118,9 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
const auto& to = it->second;
if (auto constant = ov::as_type_ptr<opset4::Constant>(node)) {
std::shared_ptr<ngraph::Node> new_const;
if (from == ov::element::u64 && to == ov::element::i32) {
if (from == ov::element::u64 && to == ov::element::i64) {
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i64>(constant);
} else if (from == ov::element::u64 && to == ov::element::i32) {
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i32>(constant);
} else if (from == ov::element::i64 && to == ov::element::i32) {
new_const = change_constant_precision<ov::element::Type_t::i64, ov::element::Type_t::i32>(constant);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);

/**
* @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
*/
DECLARE_CONFIG_KEY(CPU_NATIVE_I64);

} // namespace PluginConfigInternalParams

} // namespace InferenceEngine
11 changes: 10 additions & 1 deletion src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
<< ". Supported values: PERFORMANCE, ACCURACY";
}
} else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
if (val == PluginConfigParams::YES) {
enableNativeI64 = true;
} else if (val == PluginConfigParams::NO) {
enableNativeI64 = false;
} else {
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
<< ". Expected only YES or NO values.";
}
} else {
IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
}
Expand Down Expand Up @@ -308,4 +317,4 @@ void Config::updateProperties() {
}

} // namespace intel_cpu
} // namespace ov
} // namespace ov
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct Config {
// TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
size_t rtCacheCapacity = 0ul;
#endif
bool enableNativeI64 = false;
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
InferenceEngine::PerfHintsConfig perfHintsConfig;
bool enableCpuPinning = true;
Expand Down
56 changes: 29 additions & 27 deletions src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,43 @@

#include "dnnl_extension_utils.h"

#include "utils/general_utils.h"
#include <oneapi/dnnl/dnnl.hpp>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "onednn/iml_type_mapper.h"
#include <common/primitive_desc.hpp>
#include <common/primitive_desc_iface.hpp>

#include <vector>

using namespace dnnl;

namespace ov {
namespace intel_cpu {

uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
switch (dataType) {
case dnnl::memory::data_type::f32:
return 4;
case dnnl::memory::data_type::s32:
case memory::data_type::f64:
case memory::data_type::s64:
return 8;
case memory::data_type::f32:
case memory::data_type::s32:
return 4;
case dnnl::memory::data_type::bf16:
case memory::data_type::bf16:
case memory::data_type::f16:
return 2;
case dnnl::memory::data_type::s8:
return 1;
case dnnl::memory::data_type::u8:
case memory::data_type::s8:
case memory::data_type::u8:
case memory::data_type::bin:
return 1;
case dnnl::memory::data_type::bin:
return 1;
case dnnl::memory::data_type::f16:
return 2;
case dnnl::memory::data_type::undef:
case memory::data_type::undef:
return 0;
default:
IE_THROW() << "Unsupported data type.";
IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
}
}

memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
switch (prec) {
case InferenceEngine::Precision::FP64:
return memory::data_type::f64;
case InferenceEngine::Precision::I64:
return memory::data_type::s64;
case InferenceEngine::Precision::FP32:
return memory::data_type::f32;
case InferenceEngine::Precision::I32:
Expand All @@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin

InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
switch (dataType) {
case memory::data_type::f64:
return InferenceEngine::Precision::FP64;
case memory::data_type::s64:
return InferenceEngine::Precision::I64;
case memory::data_type::f32:
return InferenceEngine::Precision::FP32;
case memory::data_type::s32:
Expand All @@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
}
}

Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
}
dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
}

VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
Expand Down Expand Up @@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
}
}

DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
return makeDescriptor(desc.get());
}

DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
} else {
return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
}
}

size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
auto tmpDesc = desc;

const auto offset0 = tmpDesc.get()->offset0;
Expand All @@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
}
}

DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
auto query = dnnl::convert_to_c(what);
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
auto query = convert_to_c(what);
const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);

if (!cdesc)
Expand Down
19 changes: 9 additions & 10 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
#include "memory_desc/cpu_memory_desc_utils.h"

#include <openvino/core/model.hpp>
#include <openvino/core/node.hpp>
#include <openvino/op/ops.hpp>
#include <transformations/utils/utils.hpp>
#include <low_precision/low_precision.hpp>
Expand Down Expand Up @@ -306,7 +305,7 @@ void Graph::Replicate(const CNNNetwork &network) {
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
for (auto &input : inputNodesMap) {
const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision(), getConfig().enableNativeI64);
input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
const auto childEdges = input.second->getChildEdgesAtPort(0);
for (size_t i = 0; i < childEdges.size(); i++) {
Expand All @@ -320,7 +319,7 @@ void Graph::Replicate(const CNNNetwork &network) {
}

for (auto &output : outputNodesMap) {
const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision(), getConfig().enableNativeI64);
output.second->setOriginalInputPrecisionAtPort(0, precToSet);
const auto parentEdges = output.second->getParentEdgesAtPort(0);
for (size_t i = 0; i < parentEdges.size(); i++) {
Expand Down Expand Up @@ -1004,7 +1003,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::

// todo: make sure 'name' exists in this map...
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
if (inTensorDesc.getPrecision() == Precision::FP32) {
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
inTensorDesc.getLayout());
} else {
Expand Down Expand Up @@ -1460,16 +1459,16 @@ void Graph::SortTopologically() {
}
}

void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
void Graph::GetPerfData(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
unsigned i = 0;
std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
std::function<void(std::map<std::string, InferenceEngineProfileInfo> &, const NodePtr&)>
getPerfMapFor = [&](std::map<std::string, InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
InferenceEngineProfileInfo &pc = perfMap[node->getName()];
pc.execution_index = i++;
// TODO: Why time counter is signed?
pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
: InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
pc.status = pc.cpu_uSec > 0 ? InferenceEngineProfileInfo::EXECUTED
: InferenceEngineProfileInfo::NOT_RUN;
std::string pdType = node->getPrimitiveDescriptorType();
size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]);
pdType.copy(pc.exec_type, typeLen, 0);
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "nodes/mvn.h"
#include "nodes/transpose.h"
#include "nodes/interpolate.h"
#include "nodes/reduce.h"
#include "nodes/input.h"
#include "nodes/rnn.h"
#include "nodes/common/cpu_convert.h"
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ InferRequestBase::normToInputSupportedPrec(const std::pair<const std::string, In
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
inPrec = InferenceEngine::Precision::FP32;
} else {
inPrec = normalizeToSupportedPrecision(inPrec);
inPrec = normalizeToSupportedPrecision(inPrec, graph->getConfig().enableNativeI64);
}

if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
Expand Down Expand Up @@ -583,7 +583,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name)
auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory());
if (!data) {
InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));
desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision(), graph->getConfig().enableNativeI64));

// WA: need to avoid exception thrown when we compare blocking desc in SetBlob
// in situation if we push output blobs as inputs for next network (in Hetero plugin)
Expand Down
4 changes: 0 additions & 4 deletions src/plugins/intel_cpu/src/nodes/eye.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ Eye::Eye(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context)
}
outType = op->get_output_element_type(0);
withBatchShape = (op->get_input_size() == 4);
if (!one_of(outType, ngraph::element::f32, ngraph::element::bf16,
ngraph::element::i32, ngraph::element::i8, ngraph::element::u8)) {
THROW_ERROR << errorPrefix << "doesn't support demanded output precision";
}
}

void Eye::getSupportedDescriptors() {
Expand Down
3 changes: 0 additions & 3 deletions src/plugins/intel_cpu/src/nodes/non_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ NonZero::NonZero(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CP
} else {
IE_THROW(NotImplemented) << errorMessage;
}
if (op->get_output_element_type(0) != ngraph::element::i32) {
IE_THROW() << errorPrefix << "doesn't support demanded output precision";
}
}

void NonZero::getSupportedDescriptors() {
Expand Down
25 changes: 18 additions & 7 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include "transformations/transformation_pipeline.h"
#include "itt.h"
#include "extension_mngr.h"
#include "extension.h"
#include "serialize.h"
#include "threading/ie_executor_manager.hpp"
Expand All @@ -21,11 +20,9 @@
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"

#include <transformations/utils/utils.hpp>
#include <ie_ngraph_utils.hpp>

#include "performance_heuristics.hpp"
#include "openvino/runtime/properties.hpp"
#include "weights_cache.hpp"
#include "utils/denormals.hpp"

Expand All @@ -36,7 +33,6 @@
#endif

#include <cpu/x64/cpu_isa_traits.hpp>
#include <itt.h>

#if defined(OV_CPU_WITH_ACL)
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
Expand Down Expand Up @@ -164,7 +160,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
config.count(ov::num_streams.name());
}

void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
auto getNumStreamsLatency = [&]() {
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
};
Expand Down Expand Up @@ -281,7 +277,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
}
}

void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
const int latency_streams = get_default_latency_streams(config.latencyThreadingMode);
int streams;
Expand Down Expand Up @@ -462,6 +458,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
}

static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
engineConfig.enableNativeI64 = false;
const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
if (i64prop != modelConfig.end()) {
if (i64prop->second == PluginConfigParams::YES) {
engineConfig.enableNativeI64 = true;
} else if (i64prop->second != PluginConfigParams::NO) {
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
<< ". Expected only YES or NO values.";
}
}
}

InferenceEngine::IExecutableNetworkInternal::Ptr
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
Expand Down Expand Up @@ -495,6 +504,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
const bool enableLPT = shouldEnableLPT(config, engConfig);
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
setI64Mode(config, engConfig);

auto nGraphFunc = clonedNetwork.getFunction();

Expand Down Expand Up @@ -770,6 +780,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
setI64Mode(config, conf);

auto model = network.getFunction();
if (model == nullptr) {
Expand All @@ -785,7 +796,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
transformation.UpToCpuSpecificOpSet();
transformation.CpuSpecificOpSet();
},
[&](const std::shared_ptr<ngraph::Node>& op) {
[&](const std::shared_ptr<ov::Node>& op) {
std::unique_ptr<Node> ptr;
try {
ptr.reset(Node::factory().create(op, context));
Expand Down
Loading