Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Backend Engine Integration #773

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ngraph_bridge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ set(SRC
tf_graphcycles.cc
tf_deadness_analysis.cc
version.cc
ie_backend_engine.cc
ie_basic_engine.cc
)

message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")
Expand Down
41 changes: 23 additions & 18 deletions ngraph_bridge/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
#include "logging/ngraph_log.h"
#include "ngraph_bridge/default_opset.h"
#include "ngraph_bridge/executable.h"
#include "ngraph_bridge/ie_basic_engine.h"
#include "ngraph_bridge/ie_tensor.h"
#include "ngraph_bridge/ie_utils.h"
#include "ngraph_bridge/ngraph_utils.h"

using namespace std;
Expand Down Expand Up @@ -139,12 +141,8 @@ Executable::Executable(shared_ptr<Function> func, string device)
name + "_IE_" + m_device;
}

NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;

// Load network to the plugin (m_device) and create an infer request
InferenceEngine::ExecutableNetwork exe_network =
ie.LoadNetwork(m_network, m_device, options);
m_infer_req = exe_network.CreateInferRequest();
NGRAPH_VLOG(2) << "Creating IE Execution Engine";
m_ie_engine = make_shared<IEBasicEngine>(m_network, m_device);
}

bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
Expand All @@ -167,7 +165,9 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
}

// Prepare input blobs
auto func = m_network.getFunction();
auto func = m_ie_engine->GetFunc();
std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessary. You can pass inputs directly to the backend.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But in that case we have to pass ngraph tensors directly and cast them to IETensors inside the engine. Then we will need to move more functionality on the backend from the executable. We might need further discussion about the related changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IETensor is an nGraph tensor.

std::vector<std::string> input_names(inputs.size());
auto parameters = func->get_parameters();
int j = 0;
for (int i = 0; i < inputs.size(); i++) {
Expand All @@ -180,18 +180,23 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
continue;
}
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
m_infer_req.SetBlob(input_name, tv->get_blob());
ie_inputs[i] = nullptr;
ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
input_names[i] = input_name;
Comment on lines +183 to +185
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make IETensor named so that we can track the name together with the tensor.

}

std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
m_hoisted_params.size());
std::vector<std::string> param_names(m_hoisted_params.size());
for (const auto& it : m_hoisted_params) {
auto input_name = it.first;
if (input_info.find(input_name) == input_info.end()) {
NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
continue;
}
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
m_infer_req.SetBlob(input_name, tv->get_blob());
ie_hoisted_params[j] = nullptr;
ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
param_names[j++] = input_name;
}

InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
Expand All @@ -214,22 +219,22 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,

// Prepare output blobs
auto results = func->get_results();
std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
std::vector<std::string> output_names(outputs.size());
for (int i = 0; i < results.size(); i++) {
if (outputs[i] != nullptr) {
NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
}
output_names[i] = get_output_name(results[i]);
}

m_infer_req.Infer();
m_ie_engine->Infer(ie_inputs, input_names, ie_outputs, output_names,
ie_hoisted_params, param_names);
Comment on lines +231 to +232
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the backend need to know hoisted parameters for? It could just be passed as an input?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible to merge them into a single vector. But I think this time we may not pass the inputs directly as suggested in the review above. Also, I'm not sure if this would be safe for batching. It may not be an issue but we may need to verify.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A hoisted parameter is an input to the model. The backend doesn't need to know whether an input was hoisted or not.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the basic backend this may not matter. VADM backend will divide input into multiple batches but it's not the case for the hoisted parameters. How do their size affected by the actual input batch size? If we pass them as regular inputs to the backend now, the VADM backend will disable batching because it sees multiple inputs although the actual input size is 1.


// Set dynamic output blobs
for (int i = 0; i < results.size(); i++) {
if (outputs[i] == nullptr) {
NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
outputs[i] = make_shared<IETensor>(blob);
outputs[i] = ie_outputs[i];
}
}

Expand Down
3 changes: 3 additions & 0 deletions ngraph_bridge/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <ie_core.hpp>
#include "ngraph/ngraph.hpp"

#include "ngraph_bridge/ie_backend_engine.h"

using namespace std;

namespace tensorflow {
Expand Down Expand Up @@ -56,6 +58,7 @@ class Executable {
shared_ptr<ngraph::Function> m_trivial_fn;
// This is the original nGraph function corresponding to this executable
shared_ptr<ngraph::Function> m_function;
shared_ptr<IEBackendEngine> m_ie_engine;
};
}
}
102 changes: 102 additions & 0 deletions ngraph_bridge/ie_backend_engine.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*******************************************************************************
* Copyright 2017-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include <iostream>

#include "ngraph_bridge/ie_backend_engine.h"
#include "ngraph_bridge/ie_utils.h"

namespace tensorflow {
namespace ngraph_bridge {

IEBackendEngine::IEBackendEngine(InferenceEngine::CNNNetwork ie_network,
std::string device)
: m_network(ie_network),
m_func(ie_network.getFunction()),
m_device(device),
m_multi_req_execution(false),
m_network_ready(false) {
if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
auto& name = m_network.getName();
m_network.serialize(name + ".xml", name + ".bin");
}
}

IEBackendEngine::~IEBackendEngine() {}

void IEBackendEngine::LoadNetwork() {
if (m_network_ready) return;

std::map<std::string, std::string> config;

if (m_device == "MYRIAD") {
// Set MYRIAD configurations
if (IEUtils::VPUConfigEnabled()) {
config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
}

if (IEUtils::VPUFastCompileEnabled()) {
config["MYRIAD_HW_INJECT_STAGES"] = "NO";
config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
}
}
Comment on lines +45 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These customizations should be set in the "myriad" backend implementation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Answered within the comment below since it is related.


InferenceEngine::Core ie;
// Load network to the plugin (m_device)
m_exe_network = ie.LoadNetwork(m_network, m_device, config);
m_network_ready = true;
}

void IEBackendEngine::StartAsyncInference(const int req_id) {
// Start Async inference
try {
m_infer_reqs[req_id].StartAsync();
} catch (InferenceEngine::details::InferenceEngineException e) {
THROW_IE_EXCEPTION << "Couldn't start Inference: ";
} catch (...) {
THROW_IE_EXCEPTION << "Couldn't start Inference: ";
}
}

void IEBackendEngine::CompleteAsyncInference(const int req_id) {
// Wait for Async inference completion
try {
m_infer_reqs[req_id].Wait(
InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
} catch (InferenceEngine::details::InferenceEngineException e) {
THROW_IE_EXCEPTION << " Exception with completing Inference: ";
} catch (...) {
THROW_IE_EXCEPTION << " Exception with completing Inference: ";
}
}

size_t IEBackendEngine::GetOutputBatchSize(size_t input_batch_size) const {
return m_network.getBatchSize() *
IEUtils::GetNumRequests(input_batch_size, m_device);
}

// Enables multi request execution if the execution engine supprts
void IEBackendEngine::EnableMultiReqExecution() {
m_multi_req_execution = true;
}
// Disables multi request execution
void IEBackendEngine::DisableMultiReqExecution() {
m_multi_req_execution = false;
}

std::shared_ptr<ngraph::Function> IEBackendEngine::GetFunc() { return m_func; }
}
}
74 changes: 74 additions & 0 deletions ngraph_bridge/ie_backend_engine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*******************************************************************************
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit in terms of naming: I'd prefer to call this Backend. The "IE" prefix is unnecessary since that's the only type of backends we expect to interface with. The files can similarly be renamed to be:

backend.{h,cc}
backends/hddl.{h,cc}
backends/myriad.{h,cc}

* Copyright 2017-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#ifndef IE_BACKEND_ENGINE_H_
#define IE_BACKEND_ENGINE_H_

#include <memory>
#include <string>
#include <vector>

#include <ie_core.hpp>

#include "ngraph_bridge/ie_tensor.h"

namespace tensorflow {
namespace ngraph_bridge {

class IEBackendEngine {
public:
IEBackendEngine(InferenceEngine::CNNNetwork ie_network, std::string device);
~IEBackendEngine();

// Executes the inference
virtual void Infer(std::vector<std::shared_ptr<IETensor>>& inputs,
std::vector<std::string>& input_names,
std::vector<std::shared_ptr<IETensor>>& outputs,
std::vector<std::string>& output_names,
std::vector<std::shared_ptr<IETensor>>& hoisted_params,
std::vector<std::string>& param_names) = 0;

// Returns output batch size based on the input batch size and the device
// FIXME: This may not be needed
virtual size_t GetOutputBatchSize(size_t input_batch_size) const;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be removed.


// Enables multi request execution if the execution engine supprts
void EnableMultiReqExecution();
// Disables multi request execution
void DisableMultiReqExecution();
Comment on lines +48 to +51
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be a parameter to the HDDL backend constructor.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may limit us to enable/disable batching dynamically. Or this parameter might be a part of caching. But it will require more changes into the existing bridge code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In which scenarios would we want to enable/disable batching dynamically for a given network executing on a given device? If we expect this interface to be limited to a specific backend, then it shouldn't be a part of the abstract interface.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then this should be a parameter to the Executable constructor since the backend is created there.


// Returns the NGraph Function from the CNNNetwork
std::shared_ptr<ngraph::Function> GetFunc();

virtual const std::vector<size_t> GetOutputShape(const int i) = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a function needed for VADM backend. We can remove it in this PR but we need to bring it back with the next PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output shape can be determined either from the function or the output that was originally passed to the backend?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VADM backend modifies the batch size of the function so the output size we get from the function will be wrong. We can get the output size just after creating the function and use it later for allocations maybe. This will add extra functionality into the NGraphEncapsulateOp and I need to test to see how it works.


protected:
InferenceEngine::CNNNetwork m_network;
std::shared_ptr<ngraph::Function> m_func;
std::vector<InferenceEngine::InferRequest> m_infer_reqs;
std::string m_device;
bool m_multi_req_execution;
InferenceEngine::ExecutableNetwork m_exe_network;
bool m_network_ready;

virtual void StartAsyncInference(const int req_id);
virtual void CompleteAsyncInference(const int req_id);
Comment on lines +67 to +68
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How's one supposed to use these? Infer only uses one infer request.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are required for the asynchronous execution. Currently we need this for VADM. For the other backends, it will be same as Infer for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I can tell that these are required for asynchronous execution from the name :)

I was asking how one'd use this interface because I don't see a way to create an asynchronous inference request? The implementation for these is broken at the moment, and I'd prefer that we implement it correctly if we're extending the interface.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not have any impact on the execution. We kept all executions as async to have a common call for all backends in case we may need it for multiple backends in the future. We can move back to Infer call for the basic execution for now, it will not make any difference.

virtual void LoadNetwork();
};
}
}

#endif // IE_BACKEND_ENGINE_H_
80 changes: 80 additions & 0 deletions ngraph_bridge/ie_basic_engine.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*******************************************************************************
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Basic Engine" is unnecessary if you provide a default implementation for the abstract backend? Backends that don't need a custom "Infer" or custom configuration can fall back to the default impl.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the basic execution should be in Executable or in BackendEngine? We would disagree with the first one. The second option might be possible and it may help with the issue about MYRIAD configurations above too. But we may need to think about it if it will impact anything else in the future. Although, we may change this design later, I don't think it's in a bad shape the be merged now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Basic" execution should be in the implementation of the default Backend.

* Copyright 2017-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include <iostream>

#include "logging/ngraph_log.h"
#include "ngraph_bridge/ie_basic_engine.h"
#include "ngraph_bridge/ie_utils.h"

namespace tensorflow {
namespace ngraph_bridge {

IEBasicEngine::IEBasicEngine(InferenceEngine::CNNNetwork ie_network,
std::string device)
: IEBackendEngine(ie_network, device) {}

IEBasicEngine::~IEBasicEngine() {}

void IEBasicEngine::Infer(
std::vector<std::shared_ptr<IETensor>>& inputs,
std::vector<std::string>& input_names,
std::vector<std::shared_ptr<IETensor>>& outputs,
std::vector<std::string>& output_names,
std::vector<std::shared_ptr<IETensor>>& hoisted_params,
std::vector<std::string>& param_names) {
LoadNetwork();
if (m_infer_reqs.empty()) {
m_infer_reqs.push_back(m_exe_network.CreateInferRequest());
}

// Prepare input blobs
auto func = m_network.getFunction();
auto parameters = func->get_parameters();
for (int i = 0; i < inputs.size(); i++) {
if (inputs[i] != nullptr)
m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob());
}

for (int i = 0; i < hoisted_params.size(); i++) {
if (hoisted_params[i] != nullptr)
m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob());
}

// Prepare output blobs
auto results = func->get_results();
for (int i = 0; i < results.size(); i++) {
if (outputs[i] != nullptr) {
NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob());
}
}

m_infer_reqs[0].Infer();

// Set dynamic output blobs
for (int i = 0; i < results.size(); i++) {
if (outputs[i] == nullptr) {
NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
auto blob = m_infer_reqs[0].GetBlob(output_names[i]);
outputs[i] = std::make_shared<IETensor>(blob);
}
}

// return true;
}
}
}
Loading