tensorflow · cavusmustafa · Jan 5, 2021 · adk9 · Jan 7, 2021 · cavusmustafa
diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt
@@ -49,6 +49,8 @@ set(SRC
    tf_graphcycles.cc
    tf_deadness_analysis.cc
    version.cc
+   ie_backend_engine.cc
+   ie_basic_engine.cc
 )
 
 message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")

diff --git a/ngraph_bridge/executable.cc b/ngraph_bridge/executable.cc
@@ -22,7 +22,9 @@
 #include "logging/ngraph_log.h"
 #include "ngraph_bridge/default_opset.h"
 #include "ngraph_bridge/executable.h"
+#include "ngraph_bridge/ie_basic_engine.h"
 #include "ngraph_bridge/ie_tensor.h"
+#include "ngraph_bridge/ie_utils.h"
 #include "ngraph_bridge/ngraph_utils.h"
 
 using namespace std;
@@ -139,12 +141,8 @@ Executable::Executable(shared_ptr<Function> func, string device)
         name + "_IE_" + m_device;
   }
 
-  NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;
-
-  // Load network to the plugin (m_device) and create an infer request
-  InferenceEngine::ExecutableNetwork exe_network =
-      ie.LoadNetwork(m_network, m_device, options);
-  m_infer_req = exe_network.CreateInferRequest();
+  NGRAPH_VLOG(2) << "Creating IE Execution Engine";
+  m_ie_engine = make_shared<IEBasicEngine>(m_network, m_device);
 }
 
 bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
@@ -167,7 +165,9 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
   }
 
   //  Prepare input blobs
-  auto func = m_network.getFunction();
+  auto func = m_ie_engine->GetFunc();
+  std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
+  std::vector<std::string> input_names(inputs.size());
   auto parameters = func->get_parameters();
   int j = 0;
   for (int i = 0; i < inputs.size(); i++) {
@@ -180,18 +180,23 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
       NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_inputs[i] = nullptr;
+    ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
+    input_names[i] = input_name;
   }
 
+  std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
+      m_hoisted_params.size());
+  std::vector<std::string> param_names(m_hoisted_params.size());
   for (const auto& it : m_hoisted_params) {
     auto input_name = it.first;
     if (input_info.find(input_name) == input_info.end()) {
       NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
       continue;
     }
-    shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
-    m_infer_req.SetBlob(input_name, tv->get_blob());
+    ie_hoisted_params[j] = nullptr;
+    ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
+    param_names[j++] = input_name;
   }
 
   InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
@@ -214,22 +219,22 @@ bool Executable::Call(const vector<shared_ptr<runtime::Tensor>>& inputs,
 
   //  Prepare output blobs
   auto results = func->get_results();
+  std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
+  std::vector<std::string> output_names(outputs.size());
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] != nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
-      shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
-      m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
+      ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
     }
+    output_names[i] = get_output_name(results[i]);
   }
 
-  m_infer_req.Infer();
+  m_ie_engine->Infer(ie_inputs, input_names, ie_outputs, output_names,
+                     ie_hoisted_params, param_names);
 
   // Set dynamic output blobs
   for (int i = 0; i < results.size(); i++) {
     if (outputs[i] == nullptr) {
-      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
-      auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
-      outputs[i] = make_shared<IETensor>(blob);
+      outputs[i] = ie_outputs[i];
     }
   }
 

diff --git a/ngraph_bridge/executable.h b/ngraph_bridge/executable.h
@@ -23,6 +23,8 @@
 #include <ie_core.hpp>
 #include "ngraph/ngraph.hpp"
 
+#include "ngraph_bridge/ie_backend_engine.h"
+
 using namespace std;
 
 namespace tensorflow {
@@ -56,6 +58,7 @@ class Executable {
   shared_ptr<ngraph::Function> m_trivial_fn;
   // This is the original nGraph function corresponding to this executable
   shared_ptr<ngraph::Function> m_function;
+  shared_ptr<IEBackendEngine> m_ie_engine;
 };
 }
 }
diff --git a/ngraph_bridge/ie_backend_engine.cc b/ngraph_bridge/ie_backend_engine.cc
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "ngraph_bridge/ie_backend_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IEBackendEngine::IEBackendEngine(InferenceEngine::CNNNetwork ie_network,
+                                 std::string device)
+    : m_network(ie_network),
+      m_func(ie_network.getFunction()),
+      m_device(device),
+      m_multi_req_execution(false),
+      m_network_ready(false) {
+  if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
+    auto& name = m_network.getName();
+    m_network.serialize(name + ".xml", name + ".bin");
+  }
+}
+
+IEBackendEngine::~IEBackendEngine() {}
+
+void IEBackendEngine::LoadNetwork() {
+  if (m_network_ready) return;
+
+  std::map<std::string, std::string> config;
+
+  if (m_device == "MYRIAD") {
+    // Set MYRIAD configurations
+    if (IEUtils::VPUConfigEnabled()) {
+      config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
+    }
+
+    if (IEUtils::VPUFastCompileEnabled()) {
+      config["MYRIAD_HW_INJECT_STAGES"] = "NO";
+      config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
+    }
+  }
+
+  InferenceEngine::Core ie;
+  // Load network to the plugin (m_device)
+  m_exe_network = ie.LoadNetwork(m_network, m_device, config);
+  m_network_ready = true;
+}
+
+void IEBackendEngine::StartAsyncInference(const int req_id) {
+  // Start Async inference
+  try {
+    m_infer_reqs[req_id].StartAsync();
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << "Couldn't start Inference: ";
+  }
+}
+
+void IEBackendEngine::CompleteAsyncInference(const int req_id) {
+  // Wait for Async inference completion
+  try {
+    m_infer_reqs[req_id].Wait(
+        InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+  } catch (InferenceEngine::details::InferenceEngineException e) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  } catch (...) {
+    THROW_IE_EXCEPTION << " Exception with completing Inference: ";
+  }
+}
+
+size_t IEBackendEngine::GetOutputBatchSize(size_t input_batch_size) const {
+  return m_network.getBatchSize() *
+         IEUtils::GetNumRequests(input_batch_size, m_device);
+}
+
+// Enables multi request execution if the execution engine supprts
+void IEBackendEngine::EnableMultiReqExecution() {
+  m_multi_req_execution = true;
+}
+// Disables multi request execution
+void IEBackendEngine::DisableMultiReqExecution() {
+  m_multi_req_execution = false;
+}
+
+std::shared_ptr<ngraph::Function> IEBackendEngine::GetFunc() { return m_func; }
+}
+}
diff --git a/ngraph_bridge/ie_backend_engine.h b/ngraph_bridge/ie_backend_engine.h
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef IE_BACKEND_ENGINE_H_
+#define IE_BACKEND_ENGINE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_core.hpp>
+
+#include "ngraph_bridge/ie_tensor.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+class IEBackendEngine {
+ public:
+  IEBackendEngine(InferenceEngine::CNNNetwork ie_network, std::string device);
+  ~IEBackendEngine();
+
+  // Executes the inference
+  virtual void Infer(std::vector<std::shared_ptr<IETensor>>& inputs,
+                     std::vector<std::string>& input_names,
+                     std::vector<std::shared_ptr<IETensor>>& outputs,
+                     std::vector<std::string>& output_names,
+                     std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+                     std::vector<std::string>& param_names) = 0;
+
+  // Returns output batch size based on the input batch size and the device
+  // FIXME: This may not be needed
+  virtual size_t GetOutputBatchSize(size_t input_batch_size) const;
+
+  // Enables multi request execution if the execution engine supprts
+  void EnableMultiReqExecution();
+  // Disables multi request execution
+  void DisableMultiReqExecution();
+
+  // Returns the NGraph Function from the CNNNetwork
+  std::shared_ptr<ngraph::Function> GetFunc();
+
+  virtual const std::vector<size_t> GetOutputShape(const int i) = 0;
+
+ protected:
+  InferenceEngine::CNNNetwork m_network;
+  std::shared_ptr<ngraph::Function> m_func;
+  std::vector<InferenceEngine::InferRequest> m_infer_reqs;
+  std::string m_device;
+  bool m_multi_req_execution;
+  InferenceEngine::ExecutableNetwork m_exe_network;
+  bool m_network_ready;
+
+  virtual void StartAsyncInference(const int req_id);
+  virtual void CompleteAsyncInference(const int req_id);
+  virtual void LoadNetwork();
+};
+}
+}
+
+#endif  // IE_BACKEND_ENGINE_H_
diff --git a/ngraph_bridge/ie_basic_engine.cc b/ngraph_bridge/ie_basic_engine.cc
@@ -0,0 +1,80 @@
+/*******************************************************************************
+ * Copyright 2017-2020 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include <iostream>
+
+#include "logging/ngraph_log.h"
+#include "ngraph_bridge/ie_basic_engine.h"
+#include "ngraph_bridge/ie_utils.h"
+
+namespace tensorflow {
+namespace ngraph_bridge {
+
+IEBasicEngine::IEBasicEngine(InferenceEngine::CNNNetwork ie_network,
+                             std::string device)
+    : IEBackendEngine(ie_network, device) {}
+
+IEBasicEngine::~IEBasicEngine() {}
+
+void IEBasicEngine::Infer(
+    std::vector<std::shared_ptr<IETensor>>& inputs,
+    std::vector<std::string>& input_names,
+    std::vector<std::shared_ptr<IETensor>>& outputs,
+    std::vector<std::string>& output_names,
+    std::vector<std::shared_ptr<IETensor>>& hoisted_params,
+    std::vector<std::string>& param_names) {
+  LoadNetwork();
+  if (m_infer_reqs.empty()) {
+    m_infer_reqs.push_back(m_exe_network.CreateInferRequest());
+  }
+
+  //  Prepare input blobs
+  auto func = m_network.getFunction();
+  auto parameters = func->get_parameters();
+  for (int i = 0; i < inputs.size(); i++) {
+    if (inputs[i] != nullptr)
+      m_infer_reqs[0].SetBlob(input_names[i], inputs[i]->get_blob());
+  }
+
+  for (int i = 0; i < hoisted_params.size(); i++) {
+    if (hoisted_params[i] != nullptr)
+      m_infer_reqs[0].SetBlob(param_names[i], hoisted_params[i]->get_blob());
+  }
+
+  //  Prepare output blobs
+  auto results = func->get_results();
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] != nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
+      m_infer_reqs[0].SetBlob(output_names[i], outputs[i]->get_blob());
+    }
+  }
+
+  m_infer_reqs[0].Infer();
+
+  // Set dynamic output blobs
+  for (int i = 0; i < results.size(); i++) {
+    if (outputs[i] == nullptr) {
+      NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
+      auto blob = m_infer_reqs[0].GetBlob(output_names[i]);
+      outputs[i] = std::make_shared<IETensor>(blob);
+    }
+  }
+
+  // return true;
+}
+}
+}