-
Notifications
You must be signed in to change notification settings - Fork 64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Initial Backend Engine Integration #773
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,9 @@ | |
#include "logging/ngraph_log.h" | ||
#include "ngraph_bridge/default_opset.h" | ||
#include "ngraph_bridge/executable.h" | ||
#include "ngraph_bridge/ie_basic_engine.h" | ||
#include "ngraph_bridge/ie_tensor.h" | ||
#include "ngraph_bridge/ie_utils.h" | ||
#include "ngraph_bridge/ngraph_utils.h" | ||
|
||
using namespace std; | ||
|
@@ -139,12 +141,8 @@ Executable::Executable(shared_ptr<Function> func, string device) | |
name + "_IE_" + m_device; | ||
} | ||
|
||
NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device; | ||
|
||
// Load network to the plugin (m_device) and create an infer request | ||
InferenceEngine::ExecutableNetwork exe_network = | ||
ie.LoadNetwork(m_network, m_device, options); | ||
m_infer_req = exe_network.CreateInferRequest(); | ||
NGRAPH_VLOG(2) << "Creating IE Execution Engine"; | ||
m_ie_engine = make_shared<IEBasicEngine>(m_network, m_device); | ||
} | ||
|
||
bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs, | ||
|
@@ -167,7 +165,9 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs, | |
} | ||
|
||
// Prepare input blobs | ||
auto func = m_network.getFunction(); | ||
auto func = m_ie_engine->GetFunc(); | ||
std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size()); | ||
std::vector<std::string> input_names(inputs.size()); | ||
auto parameters = func->get_parameters(); | ||
int j = 0; | ||
for (int i = 0; i < inputs.size(); i++) { | ||
|
@@ -180,18 +180,23 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs, | |
NGRAPH_VLOG(1) << "Skipping unused input " << input_name; | ||
continue; | ||
} | ||
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]); | ||
m_infer_req.SetBlob(input_name, tv->get_blob()); | ||
ie_inputs[i] = nullptr; | ||
ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]); | ||
input_names[i] = input_name; | ||
Comment on lines
+183
to
+185
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make IETensor named so that we can track the name together with the tensor. |
||
} | ||
|
||
std::vector<std::shared_ptr<IETensor>> ie_hoisted_params( | ||
m_hoisted_params.size()); | ||
std::vector<std::string> param_names(m_hoisted_params.size()); | ||
for (const auto& it : m_hoisted_params) { | ||
auto input_name = it.first; | ||
if (input_info.find(input_name) == input_info.end()) { | ||
NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name; | ||
continue; | ||
} | ||
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second); | ||
m_infer_req.SetBlob(input_name, tv->get_blob()); | ||
ie_hoisted_params[j] = nullptr; | ||
ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second); | ||
param_names[j++] = input_name; | ||
} | ||
|
||
InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo(); | ||
|
@@ -214,22 +219,22 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs, | |
|
||
// Prepare output blobs | ||
auto results = func->get_results(); | ||
std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size()); | ||
std::vector<std::string> output_names(outputs.size()); | ||
for (int i = 0; i < results.size(); i++) { | ||
if (outputs[i] != nullptr) { | ||
NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; | ||
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]); | ||
m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob()); | ||
ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]); | ||
} | ||
output_names[i] = get_output_name(results[i]); | ||
} | ||
|
||
m_infer_req.Infer(); | ||
m_ie_engine->Infer(ie_inputs, input_names, ie_outputs, output_names, | ||
ie_hoisted_params, param_names); | ||
Comment on lines
+231
to
+232
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does the backend need to know hoisted parameters for? It could just be passed as an input? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's possible to merge them into a single vector. But I think this time we may not pass the inputs directly as suggested in the review above. Also, I'm not sure if this would be safe for batching. It may not be an issue but we may need to verify. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A hoisted parameter is an input to the model. The backend doesn't need to know whether an input was hoisted or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the basic backend this may not matter. VADM backend will divide input into multiple batches but it's not the case for the hoisted parameters. How do their size affected by the actual input batch size? If we pass them as regular inputs to the backend now, the VADM backend will disable batching because it sees multiple inputs although the actual input size is 1. |
||
|
||
// Set dynamic output blobs | ||
for (int i = 0; i < results.size(); i++) { | ||
if (outputs[i] == nullptr) { | ||
NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; | ||
auto blob = m_infer_req.GetBlob(get_output_name(results[i])); | ||
outputs[i] = make_shared<IETensor>(blob); | ||
outputs[i] = ie_outputs[i]; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/******************************************************************************* | ||
* Copyright 2017-2020 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#include <iostream> | ||
|
||
#include "ngraph_bridge/ie_backend_engine.h" | ||
#include "ngraph_bridge/ie_utils.h" | ||
|
||
namespace tensorflow { | ||
namespace ngraph_bridge { | ||
|
||
IEBackendEngine::IEBackendEngine(InferenceEngine::CNNNetwork ie_network, | ||
std::string device) | ||
: m_network(ie_network), | ||
m_func(ie_network.getFunction()), | ||
m_device(device), | ||
m_multi_req_execution(false), | ||
m_network_ready(false) { | ||
if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) { | ||
auto& name = m_network.getName(); | ||
m_network.serialize(name + ".xml", name + ".bin"); | ||
} | ||
} | ||
|
||
IEBackendEngine::~IEBackendEngine() {} | ||
|
||
void IEBackendEngine::LoadNetwork() { | ||
if (m_network_ready) return; | ||
|
||
std::map<std::string, std::string> config; | ||
|
||
if (m_device == "MYRIAD") { | ||
// Set MYRIAD configurations | ||
if (IEUtils::VPUConfigEnabled()) { | ||
config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO"; | ||
} | ||
|
||
if (IEUtils::VPUFastCompileEnabled()) { | ||
config["MYRIAD_HW_INJECT_STAGES"] = "NO"; | ||
config["MYRIAD_COPY_OPTIMIZATION"] = "NO"; | ||
} | ||
} | ||
Comment on lines
+45
to
+55
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These customizations should be set in the "myriad" backend implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Answered within the comment below since it is related. |
||
|
||
InferenceEngine::Core ie; | ||
// Load network to the plugin (m_device) | ||
m_exe_network = ie.LoadNetwork(m_network, m_device, config); | ||
m_network_ready = true; | ||
} | ||
|
||
void IEBackendEngine::StartAsyncInference(const int req_id) { | ||
// Start Async inference | ||
try { | ||
m_infer_reqs[req_id].StartAsync(); | ||
} catch (InferenceEngine::details::InferenceEngineException e) { | ||
THROW_IE_EXCEPTION << "Couldn't start Inference: "; | ||
} catch (...) { | ||
THROW_IE_EXCEPTION << "Couldn't start Inference: "; | ||
} | ||
} | ||
|
||
void IEBackendEngine::CompleteAsyncInference(const int req_id) { | ||
// Wait for Async inference completion | ||
try { | ||
m_infer_reqs[req_id].Wait( | ||
InferenceEngine::IInferRequest::WaitMode::RESULT_READY); | ||
} catch (InferenceEngine::details::InferenceEngineException e) { | ||
THROW_IE_EXCEPTION << " Exception with completing Inference: "; | ||
} catch (...) { | ||
THROW_IE_EXCEPTION << " Exception with completing Inference: "; | ||
} | ||
} | ||
|
||
size_t IEBackendEngine::GetOutputBatchSize(size_t input_batch_size) const { | ||
return m_network.getBatchSize() * | ||
IEUtils::GetNumRequests(input_batch_size, m_device); | ||
} | ||
|
||
// Enables multi request execution if the execution engine supprts | ||
void IEBackendEngine::EnableMultiReqExecution() { | ||
m_multi_req_execution = true; | ||
} | ||
// Disables multi request execution | ||
void IEBackendEngine::DisableMultiReqExecution() { | ||
m_multi_req_execution = false; | ||
} | ||
|
||
std::shared_ptr<ngraph::Function> IEBackendEngine::GetFunc() { return m_func; } | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/******************************************************************************* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor nit in terms of naming: I'd prefer to call this
|
||
* Copyright 2017-2020 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#ifndef IE_BACKEND_ENGINE_H_ | ||
#define IE_BACKEND_ENGINE_H_ | ||
|
||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include <ie_core.hpp> | ||
|
||
#include "ngraph_bridge/ie_tensor.h" | ||
|
||
namespace tensorflow { | ||
namespace ngraph_bridge { | ||
|
||
class IEBackendEngine { | ||
public: | ||
IEBackendEngine(InferenceEngine::CNNNetwork ie_network, std::string device); | ||
~IEBackendEngine(); | ||
|
||
// Executes the inference | ||
virtual void Infer(std::vector<std::shared_ptr<IETensor>>& inputs, | ||
std::vector<std::string>& input_names, | ||
std::vector<std::shared_ptr<IETensor>>& outputs, | ||
std::vector<std::string>& output_names, | ||
std::vector<std::shared_ptr<IETensor>>& hoisted_params, | ||
std::vector<std::string>& param_names) = 0; | ||
|
||
// Returns output batch size based on the input batch size and the device | ||
// FIXME: This may not be needed | ||
virtual size_t GetOutputBatchSize(size_t input_batch_size) const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be removed. |
||
|
||
// Enables multi request execution if the execution engine supprts | ||
void EnableMultiReqExecution(); | ||
// Disables multi request execution | ||
void DisableMultiReqExecution(); | ||
Comment on lines
+48
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be a parameter to the HDDL backend constructor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may limit us to enable/disable batching dynamically. Or this parameter might be a part of caching. But it will require more changes into the existing bridge code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In which scenarios would we want to enable/disable batching dynamically for a given network executing on a given device? If we expect this interface to be limited to a specific backend, then it shouldn't be a part of the abstract interface. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then this should be a parameter to the Executable constructor since the backend is created there. |
||
|
||
// Returns the NGraph Function from the CNNNetwork | ||
std::shared_ptr<ngraph::Function> GetFunc(); | ||
|
||
virtual const std::vector<size_t> GetOutputShape(const int i) = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a function needed for VADM backend. We can remove it in this PR but we need to bring it back with the next PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The output shape can be determined either from the function or the output that was originally passed to the backend? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. VADM backend modifies the batch size of the function so the output size we get from the function will be wrong. We can get the output size just after creating the function and use it later for allocations maybe. This will add extra functionality into the NGraphEncapsulateOp and I need to test to see how it works. |
||
|
||
protected: | ||
InferenceEngine::CNNNetwork m_network; | ||
std::shared_ptr<ngraph::Function> m_func; | ||
std::vector<InferenceEngine::InferRequest> m_infer_reqs; | ||
std::string m_device; | ||
bool m_multi_req_execution; | ||
InferenceEngine::ExecutableNetwork m_exe_network; | ||
bool m_network_ready; | ||
|
||
virtual void StartAsyncInference(const int req_id); | ||
virtual void CompleteAsyncInference(const int req_id); | ||
Comment on lines
+67
to
+68
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How's one supposed to use these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are required for the asynchronous execution. Currently we need this for VADM. For the other backends, it will be same as Infer for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I can tell that these are required for asynchronous execution from the name :) I was asking how one'd use this interface because I don't see a way to create an asynchronous inference request? The implementation for these is broken at the moment, and I'd prefer that we implement it correctly if we're extending the interface. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not have any impact on the execution. We kept all executions as async to have a common call for all backends in case we may need it for multiple backends in the future. We can move back to Infer call for the basic execution for now, it will not make any difference. |
||
virtual void LoadNetwork(); | ||
}; | ||
} | ||
} | ||
|
||
#endif // IE_BACKEND_ENGINE_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/******************************************************************************* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Basic Engine" is unnecessary if you provide a default implementation for the abstract backend? Backends that don't need a custom "Infer" or custom configuration can fall back to the default impl. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the basic execution should be in Executable or in BackendEngine? We would disagree with the first one. The second option might be possible and it may help with the issue about MYRIAD configurations above too. But we may need to think about it if it will impact anything else in the future. Although, we may change this design later, I don't think it's in a bad shape the be merged now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Basic" execution should be in the implementation of the default Backend. |
||
* Copyright 2017-2020 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#include <iostream> | ||
|
||
#include "logging/ngraph_log.h" | ||
#include "ngraph_bridge/ie_basic_engine.h" | ||
#include "ngraph_bridge/ie_utils.h" | ||
|
||
namespace tensorflow { | ||
namespace ngraph_bridge { | ||
|
||
IEBasicEngine::IEBasicEngine(InferenceEngine::CNNNetwork ie_network, | ||
std::string device) | ||
: IEBackendEngine(ie_network, device) {} | ||
|
||
IEBasicEngine::~IEBasicEngine() {} | ||
|
||
void IEBasicEngine::Infer( | ||
std::vector<std::shared_ptr<IETensor>>& inputs, | ||
std::vector<std::string>& input_names, | ||
std::vector<std::shared_ptr<IETensor>>& outputs, | ||
std::vector<std::string>& output_names, | ||
std::vector<std::shared_ptr<IETensor>>& hoisted_params, | ||
std::vector<std::string>& param_names) { | ||
LoadNetwork(); | ||
if (m_infer_reqs.empty()) { | ||
m_infer_reqs.push_back(m_exe_network.CreateInferRequest()); | ||
} | ||
|
||
// Prepare input blobs | ||
auto func = m_network.getFunction(); | ||
auto parameters = func->get_parameters(); | ||
for (int i = 0; i < inputs.size(); i++) { | ||
if (inputs[i] != nullptr) | ||
m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob()); | ||
} | ||
|
||
for (int i = 0; i < hoisted_params.size(); i++) { | ||
if (hoisted_params[i] != nullptr) | ||
m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob()); | ||
} | ||
|
||
// Prepare output blobs | ||
auto results = func->get_results(); | ||
for (int i = 0; i < results.size(); i++) { | ||
if (outputs[i] != nullptr) { | ||
NGRAPH_VLOG(4) << "Executable::call() SetBlob()"; | ||
m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob()); | ||
} | ||
} | ||
|
||
m_infer_reqs[0].Infer(); | ||
|
||
// Set dynamic output blobs | ||
for (int i = 0; i < results.size(); i++) { | ||
if (outputs[i] == nullptr) { | ||
NGRAPH_VLOG(4) << "Executable::call() GetBlob()"; | ||
auto blob = m_infer_reqs[0].GetBlob(output_names[i]); | ||
outputs[i] = std::make_shared<IETensor>(blob); | ||
} | ||
} | ||
|
||
// return true; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not necessary. You can pass inputs directly to the backend.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But in that case we have to pass ngraph tensors directly and cast them to IETensors inside the engine. Then we will need to move more functionality on the backend from the executable. We might need further discussion about the related changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IETensor
is an nGraph tensor.