Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NPUW: Deref #27799

Merged
18 changes: 18 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,

// Finalize memory in closures and weight banks
finalize_weights_bank();
detach_memory();

// Print stats report when possible
{
Expand Down Expand Up @@ -499,6 +500,23 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
LOG_INFO("Done.");
}

void ov::npuw::CompiledModel::detach_memory() {
LOG_INFO("Detaching model & weight memory...");
LOG_BLOCK();
for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
auto& comp_model_desc = m_compiled_submodels[idx];
auto& proto_comp_model_desc = m_compiled_submodels[comp_model_desc.replaced_by.value_or(idx)];
if (!proto_comp_model_desc.model || !proto_comp_model_desc.compiled_model) {
continue; // optimized-out OR already cleared - skip
}
if (proto_comp_model_desc.device_it + 1 == m_dev_list.end()) {
LOG_INFO("No fallback expected - clear the OV model for Subgraph[" << idx << "]");
proto_comp_model_desc.model.reset();
}
}
LOG_INFO("Done");
}

std::string ov::npuw::CompiledModel::global_mem_device() const {
// Force globally set device if set
const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>();
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class CompiledModel : public ov::ICompiledModel {
void implement_properties();

void finalize_weights_bank();
void detach_memory();

std::string global_mem_device() const;
std::string funcall_mem_device(const std::size_t idx) const;

Expand Down
115 changes: 74 additions & 41 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,34 @@ namespace npuw {
namespace weights {
namespace op {
struct Const {
std::shared_ptr<ov::op::v0::Constant> node;

std::shared_ptr<ov::op::v0::Constant> m_node;
ov::element::Type m_cached_type;
ov::Shape m_cached_shape;
const void* m_cached_ptr = nullptr;

explicit Const(std::shared_ptr<ov::op::v0::Constant> n) : m_node(n) {
m_cached_type = m_node->get_element_type();
m_cached_shape = m_node->get_shape();
m_cached_ptr = m_node->get_data_ptr();
}
std::size_t hash() const {
std::size_t seed = std::hash<const void*>()(node->get_data_ptr()) + 0x9e3779b9;
seed ^= node->get_element_type().hash() + 0x9e3779b9;
for (const auto& dim : node->get_shape()) {
std::size_t seed = std::hash<const void*>()(m_cached_ptr) + 0x9e3779b9;
seed ^= m_cached_type.hash() + 0x9e3779b9;
for (const auto& dim : m_cached_shape) {
seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
}
return seed;
}
bool operator==(const Const& other) const {
return (node->get_shape() == other.node->get_shape() &&
node->get_element_type() == other.node->get_element_type() &&
node->get_data_ptr() == other.node->get_data_ptr());
return (m_cached_type == other.m_cached_type && m_cached_shape == other.m_cached_shape &&
m_cached_ptr == other.m_cached_ptr);
}
ov::Tensor eval() const {
return ov::npuw::util::tensor_from_const(node);
NPUW_ASSERT(m_node && "Const::eval() can only happen before detach");
return ov::npuw::util::tensor_from_const(m_node);
}
void detach() {
m_node.reset();
}
};
struct Concat {
Expand All @@ -59,6 +70,11 @@ struct Concat {
}
return ov::npuw::util::concat(to_concat, axis);
}
void detach() {
for (auto&& lt : tensors) {
lt.detach();
}
}
};

struct Unpack {
Expand Down Expand Up @@ -95,6 +111,11 @@ struct Unpack {
}
return dst;
}
void detach() {
w.detach();
z.detach();
s.detach();
}
};
struct Permute {
LazyTensor tensor;
Expand All @@ -113,6 +134,9 @@ struct Permute {
ov::Tensor eval() const {
return ov::npuw::util::permute(tensor.eval(), axes);
}
void detach() {
tensor.detach();
}
};
struct Convert {
LazyTensor tensor;
Expand All @@ -130,23 +154,26 @@ struct Convert {
NPUW_ASSERT(ov::element::f16 == type);
return ov::npuw::util::to_f16(tensor.eval());
}
void detach() {
tensor.detach();
}
};
} // namespace op

using Transform = std::variant<op::Const, op::Concat, op::Unpack, op::Permute, op::Convert>;

struct LazyTensorImpl {
public:
LazyTensorImpl() = default;
explicit LazyTensorImpl(Transform&& t);
bool operator==(const LazyTensorImpl& other) const;

ov::Tensor eval() const;

bool operator==(const LazyTensorImpl& other) const;
std::size_t get_hash() const;

void detach();

Transform m_transform;
std::size_t m_hash = 0;
const std::size_t m_hash = 0;
};

} // namespace weights
Expand All @@ -165,26 +192,12 @@ struct overloaded : Ts... {
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;

std::size_t LazyTensorImpl::get_hash() const {
// Already calculated
if (m_hash != 0) {
return m_hash;
}

// Get hash
std::size_t seed = 0;
std::visit(overloaded{[&seed](const auto& op) {
seed ^= op.hash();
}},
m_transform);

return seed;
}

LazyTensorImpl::LazyTensorImpl(Transform&& t) {
m_transform = std::move(t);
m_hash = get_hash();
}
LazyTensorImpl::LazyTensorImpl(Transform&& t)
: m_transform(std::move(t)),
m_hash(std::visit(overloaded{[](const auto& op) {
return op.hash();
}},
m_transform)) {}

bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
return m_hash == other.m_hash && m_transform == other.m_transform;
Expand All @@ -200,17 +213,25 @@ ov::Tensor LazyTensorImpl::eval() const {
some kind of indicator that the only difference is concat and we should look for an existing ov::Tensor.
Perhaps it should be done after model compilation and not handled here.
*/
return std::visit(overloaded{[](const auto& op) {
return op.eval();
}},
m_transform);
}

std::size_t LazyTensorImpl::get_hash() const {
return m_hash;
}

ov::Tensor result = std::visit(overloaded{[](const auto& op) {
return op.eval();
}},
m_transform);
NPUW_ASSERT(result);
return result;
void LazyTensorImpl::detach() {
std::visit(overloaded{[](auto& op) {
op.detach();
}},
m_transform);
}

LazyTensor::LazyTensor(const std::shared_ptr<ov::op::v0::Constant>& const_ptr)
: m_impl(std::make_shared<LazyTensorImpl>(op::Const{const_ptr})) {}
: m_impl(std::make_shared<LazyTensorImpl>(op::Const(const_ptr))) {}
LazyTensor::LazyTensor(const std::vector<LazyTensor>& to_concat, const std::size_t axis)
: m_impl(std::make_shared<LazyTensorImpl>(op::Concat{to_concat, axis})) {}
LazyTensor::LazyTensor(const LazyTensor& cw,
Expand All @@ -233,11 +254,17 @@ LazyTensor LazyTensor::convert(const ov::element::Type& type) {
}

bool LazyTensor::operator==(const LazyTensor& other) const {
if (!m_impl && !other.m_impl) {
return true;
}
if ((!m_impl && other.m_impl) || (m_impl && !other.m_impl)) {
return false;
}
return *m_impl.get() == *other.m_impl.get();
}

bool LazyTensor::operator!=(const LazyTensor& other) const {
return !(*m_impl.get() == *other.m_impl.get());
return !(*this == other);
}

ov::Tensor LazyTensor::eval() const {
Expand All @@ -254,6 +281,12 @@ std::size_t LazyTensor::get_hash() const {
return m_impl->get_hash();
}

void LazyTensor::detach() {
if (m_impl) {
m_impl->detach();
}
}

std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
return lt.get_hash();
}
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class LazyTensor {
bool operator!=(const LazyTensor& other) const;

ov::Tensor eval() const;

std::size_t get_hash() const;
void detach();

private:
std::shared_ptr<LazyTensorImpl> m_impl = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ using ov::npuw::online::detail::isOp;
Group::Group(const std::shared_ptr<ov::Node>& node,
size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot)
: m_nh(std::move(nh)),
m_id(gid),
Expand All @@ -36,7 +36,7 @@ Group::Group(const std::shared_ptr<ov::Node>& node,

Group::Group(size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot)
: m_nh(std::move(nh)),
m_id(gid),
Expand Down Expand Up @@ -214,23 +214,25 @@ void Group::relinkGraph(const Group::GPtr& gptr_other) {
auto consumers = gptr_other->dstNodes();

// Remove gptr_other node from the graph. Note: also removes all it's edges
m_graph->remove(gptr_other->getHandle());
auto&& graph = m_graph.lock();
NPUW_ASSERT(graph);
graph->remove(gptr_other->getHandle());
for (const auto& nh : producers) {
if (m_nh == nh) {
continue;
}
// relink the graph
if (!m_graph->linked(nh, m_nh)) {
m_graph->link(nh, m_nh);
if (!graph->linked(nh, m_nh)) {
graph->link(nh, m_nh);
}
}
for (const auto& nh : consumers) {
if (m_nh == nh) {
continue;
}
// relink the graph
if (!m_graph->linked(m_nh, nh)) {
m_graph->link(m_nh, nh);
if (!graph->linked(m_nh, nh)) {
graph->link(m_nh, nh);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ class Group : public std::enable_shared_from_this<Group> {
Group(const std::shared_ptr<ov::Node>& node,
size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot);
Group(size_t gid,
own::ade::NodeHandle nh,
const std::shared_ptr<own::ade::Graph>& g,
const std::weak_ptr<own::ade::Graph>& g,
const std::weak_ptr<Snapshot>& snapshot);

// After we formed a final structure of partitioning,
Expand Down Expand Up @@ -100,7 +100,7 @@ class Group : public std::enable_shared_from_this<Group> {

own::ade::NodeHandle m_nh;
size_t m_id; // used for utility prints only
std::shared_ptr<own::ade::Graph> m_graph;
std::weak_ptr<own::ade::Graph> m_graph;
std::weak_ptr<Snapshot> m_snapshot;
bool m_frozen = false;
bool m_nofold = false;
Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ Impl<M> _(std::shared_ptr<M> pM) {

} // namespace at

// Written here to be a drop-in replacement for ov::parallel_for for the debug purposes
template <typename F>
void non_parallel_for(std::size_t count, F&& f) {
for (std::size_t idx = 0u; idx < count; idx++) {
f(idx);
}
}

} // namespace util
} // namespace npuw
} // namespace ov
Loading
Loading