From e9d776593f6bd459fc5993771191f49a95e182a7 Mon Sep 17 00:00:00 2001 From: Loic Blot Date: Fri, 25 Oct 2024 11:22:47 +0200 Subject: [PATCH] fix(exoscale): enhance UX around SKS nodepools In the 1.31.0 and lower version SKS nodepools where discovered through exoscale instance pools. Whereas it works as expected, end user experience is not very convenient, especially when end users use priority based expander. On our portal SKS nodepools are shown with their ID and the instancepool is hidden in the SKS nodepool page itself. People trying to use priority based expander put SKS nodepool ID instead of our generic compute API InstancePool ID. For convenience, starting with this commit we are using SKS nodepool ID and we add retro compatibility for already cluster autoscaler managed SKS clusters. This will reduce a bit our support on it, and make priority based expander UX more convenient. --- .gitignore | 1 + .../exoscale/exoscale_manager.go | 40 +++++++++++++++++-- .../exoscale_node_group_sks_nodepool.go | 2 +- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 10ad5ee3f654..dd6af6a79c77 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ # VSCode project files **/.vscode *.code-workspace +__debug_bin* # Emacs save files *~ diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go index 759890982f54..2b726388707b 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go @@ -96,14 +96,46 @@ func newManager(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (*Manager // based on the `--scan-interval`. By default it's 10 seconds. func (m *Manager) Refresh() error { var nodeGroups []cloudprovider.NodeGroup + + // load clusters, it's required for SKS node groups check + sksClusters, err := m.client.ListSKSClusters(m.ctx, m.zone) + if err != nil { + errorf("unable to list SKS clusters: %v", err) + return err + } + for _, ng := range m.nodeGroups { - if _, err := m.client.GetInstancePool(m.ctx, m.zone, ng.Id()); err != nil { - if errors.Is(err, exoapi.ErrNotFound) { + // Check SKS Nodepool existence first + found := false + for _, c := range sksClusters { + for _, np := range c.Nodepools { + if *np.ID == ng.Id() { + if _, err := m.client.GetInstancePool(m.ctx, m.zone, *np.InstancePoolID); err != nil { + if !errors.Is(err, exoapi.ErrNotFound) { + errorf("unable to retrieve SKS Instance Pool %s: %v", ng.Id(), err) + return err + } + } else { + found = true + break + } + } + } + } + + if !found { + // If SKS Nodepool is not found, check the Instance Pool + // it was the previous behavior which was less convenient for end user UX + if _, err := m.client.GetInstancePool(m.ctx, m.zone, ng.Id()); err != nil { + if !errors.Is(err, exoapi.ErrNotFound) { + errorf("unable to retrieve SKS Instance Pool %s: %v", ng.Id(), err) + return err + } + + // Neither SKS Nodepool nor Instance Pool found, remove it from cache debugf("removing node group %s from manager cache", ng.Id()) continue } - errorf("unable to retrieve Instance Pool %s: %v", ng.Id(), err) - return err } nodeGroups = append(nodeGroups, ng) diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go index a78f6b8e25a5..3f115eacf2a5 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go @@ -151,7 +151,7 @@ func (n *sksNodepoolNodeGroup) DecreaseTargetSize(_ int) error { // Id returns an unique identifier of the node group. func (n *sksNodepoolNodeGroup) Id() string { - return *n.sksNodepool.InstancePoolID + return *n.sksNodepool.ID } // Debug returns a string containing all information regarding this node group.