diff --git a/.github/scs-compliance-check/openstack/clouds.yaml b/.github/scs-compliance-check/openstack/clouds.yaml index 80da3e855..63a2b9805 100644 --- a/.github/scs-compliance-check/openstack/clouds.yaml +++ b/.github/scs-compliance-check/openstack/clouds.yaml @@ -71,14 +71,45 @@ clouds: #region_name: "MUC" auth: auth_url: https://api.dc1.muc.cloud.cnds.io:5000/ - application_credential_id: "f3102a98821641c19d8ea762dc64b0b0" + application_credential_id: "39a5bf194c6e4b0d8348d28e55136750" #project_id: 225a7363dab74b69aa1e3f744aced109 + poc-kdo: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + auth: + auth_url: https://keystone.services.poc-kdo.fitko.sovereignit.cloud + application_credential_id: "248684b7a3da4dc786fbe65592f165be" + region_name: "RegionOne" poc-wgcloud: interface: public identity_api_verion: 3 auth_type: "v3applicationcredential" #region_name: default auth: - auth_url: https://identity.l1.cloudandheat.com/v3 - application_credential_id: "b4844a0fb23247149997bf0ff2c0b156" - #project_id: 9adb8fc81ba345178654cee5cb7f1464 + auth_url: https://identity.l1a.cloudandheat.com/v3 + application_credential_id: "7ab4e3339ea04255bc131868974cfe63" + scaleup-occ2: + auth_type: v3applicationcredential + auth: + auth_url: https://keystone.occ2.scaleup.cloud + application_credential_id: "5d2eea4e8bf8448092490b4190d4430a" + region_name: "RegionOne" + interface: "public" + identity_api_version: 3 + syseleven-dus2: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + region_name: dus2 + auth: + auth_url: https://keystone.cloud.syseleven.net:5000/v3 + application_credential_id: s11auth + syseleven-ham1: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + region_name: ham1 + auth: + auth_url: https://keystone.cloud.syseleven.net:5000/v3 + application_credential_id: s11auth diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index d073f04a3..e53dc2502 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -22,10 +22,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -33,12 +33,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.IMAGE_NAME }} - name: Build and push Docker image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: context: "./Tests/" push: true diff --git a/.github/workflows/check-gx-scs-v2.yml b/.github/workflows/check-gx-scs-v2.yml deleted file mode 100644 index 9c0eea1bd..000000000 --- a/.github/workflows/check-gx-scs-v2.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: "Compliance IaaS v2 of gx-scs" - -on: - # Trigger compliance check every day at 4:30 UTC - schedule: - - cron: '30 4 * * *' - # Trigger compliance check after Docker image has been built - workflow_run: - workflows: [Build and publish scs-compliance-check Docker image] - types: - - completed - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - check-gx-scs: - uses: ./.github/workflows/scs-compliance-check.yml - with: - version: v2 - layer: iaas - cloud: "gx-scs" - secret_name: "OS_PASSWORD_GXSCS" - secrets: inherit diff --git a/.github/workflows/check-pco-prod1-v2.yml b/.github/workflows/check-pco-prod1-v4.yml similarity index 90% rename from .github/workflows/check-pco-prod1-v2.yml rename to .github/workflows/check-pco-prod1-v4.yml index dec078a47..4a947d3b6 100644 --- a/.github/workflows/check-pco-prod1-v2.yml +++ b/.github/workflows/check-pco-prod1-v4.yml @@ -1,4 +1,4 @@ -name: "Compliance IaaS v2 of pco-prod1" +name: "Compliance IaaS v4 of pco-prod1" on: # Trigger compliance check every day at 4:30 UTC @@ -16,7 +16,7 @@ jobs: check-pco-prod1: uses: ./.github/workflows/scs-compliance-check.yml with: - version: v2 + version: v4 layer: iaas cloud: "pco-prod1" secret_name: "OS_PASSWORD_PCOPROD1" diff --git a/.github/workflows/check-regio-a-v2.yml b/.github/workflows/check-poc-kdo-v4.yml similarity index 66% rename from .github/workflows/check-regio-a-v2.yml rename to .github/workflows/check-poc-kdo-v4.yml index 23f726311..2a6960278 100644 --- a/.github/workflows/check-regio-a-v2.yml +++ b/.github/workflows/check-poc-kdo-v4.yml @@ -1,9 +1,9 @@ -name: "Compliance IaaS v2 of regio-a" +name: "Compliance IaaS v4 of poc-kdo" on: - # Trigger compliance check every day at 4:30 UTC + # Trigger compliance check every day at 4:22 UTC schedule: - - cron: '30 4 * * *' + - cron: '22 4 * * *' # Trigger compliance check after Docker image has been built workflow_run: workflows: [Build and publish scs-compliance-check Docker image] @@ -13,11 +13,11 @@ on: workflow_dispatch: jobs: - check-regio-a: + check-poc-kdo: uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml with: - version: v2 + version: v4 layer: iaas - cloud: "regio-a" - secret_name: "OS_PASSWORD_REGIO_A" + cloud: "poc-kdo" + secret_name: "OS_PASSWORD_POC_KDO" secrets: inherit diff --git a/.github/workflows/check-poc-wgcloud-v3.yml b/.github/workflows/check-poc-wgcloud-v3.yml index 0a0eb3122..47de83f94 100644 --- a/.github/workflows/check-poc-wgcloud-v3.yml +++ b/.github/workflows/check-poc-wgcloud-v3.yml @@ -1,4 +1,4 @@ -name: "Compliance IaaS v4 of poc-wgcloud.osba" +name: "Compliance IaaS v3 of poc-wgcloud.osba" on: # Trigger compliance check every day at 4:12 UTC diff --git a/.github/workflows/check-pco-prod4-v2.yml b/.github/workflows/check-scaleup-occ2-v4.yml similarity index 76% rename from .github/workflows/check-pco-prod4-v2.yml rename to .github/workflows/check-scaleup-occ2-v4.yml index 63ec59b1e..b5bf70a2d 100644 --- a/.github/workflows/check-pco-prod4-v2.yml +++ b/.github/workflows/check-scaleup-occ2-v4.yml @@ -1,4 +1,4 @@ -name: "Compliance IaaS v2 of pco-prod4" +name: "Compliance IaaS v4 of scaleup-occ2" on: # Trigger compliance check every day at 4:30 UTC @@ -11,13 +11,13 @@ on: - completed # Allows you to run this workflow manually from the Actions tab workflow_dispatch: - + jobs: - check-pco-prod4: + check-scaleup-occ2: uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml with: - version: v2 + version: v4 layer: iaas - cloud: "pco-prod4" - secret_name: "OS_PASSWORD_PCOPROD4" + cloud: scaleup-occ2 + secret_name: OS_PASSWORD_SCALEUP_OCC2 secrets: inherit diff --git a/.github/workflows/check-pco-prod3-v2.yml b/.github/workflows/check-syseleven-dus2-v3.yml similarity index 62% rename from .github/workflows/check-pco-prod3-v2.yml rename to .github/workflows/check-syseleven-dus2-v3.yml index a9aee6e8c..3bd0cff2b 100644 --- a/.github/workflows/check-pco-prod3-v2.yml +++ b/.github/workflows/check-syseleven-dus2-v3.yml @@ -1,9 +1,9 @@ -name: "Compliance IaaS v2 of pco-prod3" +name: "Compliance IaaS v3 of syseleven dus2 region" on: - # Trigger compliance check every day at 4:30 UTC + # Trigger compliance check every day at 4:08 UTC schedule: - - cron: '30 4 * * *' + - cron: '08 4 * * *' # Trigger compliance check after Docker image has been built workflow_run: workflows: [Build and publish scs-compliance-check Docker image] @@ -13,11 +13,11 @@ on: workflow_dispatch: jobs: - check-pco-prod3: + check-syseleven-dus2: uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml with: - version: v2 + version: v3 layer: iaas - cloud: "pco-prod3" - secret_name: "OS_PASSWORD_PCOPROD3" + cloud: "syseleven-dus2" + secret_name: "OS_PASSWORD_SYSELEVEN_DUS2" secrets: inherit diff --git a/.github/workflows/check-pco-prod2-v2.yml b/.github/workflows/check-syseleven-dus2-v4.yml similarity index 50% rename from .github/workflows/check-pco-prod2-v2.yml rename to .github/workflows/check-syseleven-dus2-v4.yml index 0f62ec6d0..ffcaee609 100644 --- a/.github/workflows/check-pco-prod2-v2.yml +++ b/.github/workflows/check-syseleven-dus2-v4.yml @@ -1,9 +1,9 @@ -name: "Compliance IaaS v2 of pco-prod2" +name: "Compliance IaaS v4 of syseleven dus2 region" on: - # Trigger compliance check every day at 4:30 UTC + # Trigger compliance check every day at 4:10 UTC schedule: - - cron: '30 4 * * *' + - cron: '10 4 * * *' # Trigger compliance check after Docker image has been built workflow_run: workflows: [Build and publish scs-compliance-check Docker image] @@ -11,13 +11,13 @@ on: - completed # Allows you to run this workflow manually from the Actions tab workflow_dispatch: - + jobs: - check-pco-prod2: - uses: ./.github/workflows/scs-compliance-check.yml + check-syseleven-dus2: + uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml with: - version: v2 + version: v4 layer: iaas - cloud: "pco-prod2" - secret_name: "OS_PASSWORD_PCOPROD2" + cloud: "syseleven-dus2" + secret_name: "OS_PASSWORD_SYSELEVEN_DUS2" secrets: inherit diff --git a/.github/workflows/check-syseleven-ham1-v3.yml b/.github/workflows/check-syseleven-ham1-v3.yml new file mode 100644 index 000000000..b9cfa75db --- /dev/null +++ b/.github/workflows/check-syseleven-ham1-v3.yml @@ -0,0 +1,23 @@ +name: "Compliance IaaS v3 of syseleven ham1 region" + +on: + # Trigger compliance check every day at 4:09 UTC + schedule: + - cron: '09 4 * * *' + # Trigger compliance check after Docker image has been built + workflow_run: + workflows: [Build and publish scs-compliance-check Docker image] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + check-syseleven-ham1: + uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml + with: + version: v3 + layer: iaas + cloud: "syseleven-ham1" + secret_name: "OS_PASSWORD_SYSELEVEN_HAM1" + secrets: inherit diff --git a/.github/workflows/check-syseleven-ham1-v4.yml b/.github/workflows/check-syseleven-ham1-v4.yml new file mode 100644 index 000000000..41b6e26c7 --- /dev/null +++ b/.github/workflows/check-syseleven-ham1-v4.yml @@ -0,0 +1,23 @@ +name: "Compliance IaaS v4 of syseleven ham1 region" + +on: + # Trigger compliance check every day at 4:15 UTC + schedule: + - cron: '15 4 * * *' + # Trigger compliance check after Docker image has been built + workflow_run: + workflows: [Build and publish scs-compliance-check Docker image] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + check-syseleven-ham1: + uses: ./.github/workflows/scs-compliance-check-with-application-credential.yml + with: + version: v4 + layer: iaas + cloud: "syseleven-ham1" + secret_name: "OS_PASSWORD_SYSELEVEN_HAM1" + secrets: inherit diff --git a/.github/workflows/check-wavestack-v2.yml b/.github/workflows/check-wavestack-v2.yml deleted file mode 100644 index 548c23d72..000000000 --- a/.github/workflows/check-wavestack-v2.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: "Compliance IaaS v2 of wavestack" - -on: - # Trigger compliance check every day at 4:30 UTC - schedule: - - cron: '30 4 * * *' - # Trigger compliance check after Docker image has been built - workflow_run: - workflows: [Build and publish scs-compliance-check Docker image] - types: - - completed - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - check-wavestack: - uses: ./.github/workflows/scs-compliance-check.yml - with: - version: v2 - layer: iaas - cloud: "wavestack" - secret_name: "OS_PASSWORD_WAVESTACK" - secrets: inherit diff --git a/.github/workflows/create-flavors-spec.yml b/.github/workflows/create-flavors-spec.yml index 2b2029270..b3dd57311 100644 --- a/.github/workflows/create-flavors-spec.yml +++ b/.github/workflows/create-flavors-spec.yml @@ -13,7 +13,7 @@ jobs: default: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.FLAVORS_SPEC_PAT }} - name: Set up Python 3.10.5 diff --git a/.github/workflows/link-validator.yml b/.github/workflows/link-validator.yml index be5f45a7c..25a184d26 100644 --- a/.github/workflows/link-validator.yml +++ b/.github/workflows/link-validator.yml @@ -1,13 +1,13 @@ -name: Check links for modified files +name: Check links in Markdown files -on: +"on": workflow_dispatch: schedule: - cron: "0 0 * * *" push: branches: - main - pull_request: # Add this section + pull_request: branches: - main @@ -15,11 +15,12 @@ jobs: markdown-link-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: gaurav-nelson/github-action-markdown-link-check@1.0.15 with: use-quiet-mode: 'yes' use-verbose-mode: 'yes' - check-modified-files-only: 'yes' + # https://github.com/orgs/community/discussions/26738#discussioncomment-3253176 + check-modified-files-only: ${{ contains(fromJSON('["push", "pull_request"]'), github.event_name) && 'yes' || 'no' }} config-file: 'mlc_config.json' base-branch: 'main' diff --git a/.github/workflows/lint-golang.yml b/.github/workflows/lint-golang.yml new file mode 100644 index 000000000..faf7fdc8c --- /dev/null +++ b/.github/workflows/lint-golang.yml @@ -0,0 +1,28 @@ +name: Check Go syntax + +on: + push: + paths: + - 'Tests/kaas/kaas-sonobuoy-tests/**/*.go' + - .github/workflows/lint-go.yml + +jobs: + lint-go-syntax: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.23' + + # Install golangci-lint + - name: Install golangci-lint + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.61.0 + + # Run golangci-lint + - name: Run golangci-lint + working-directory: Tests/kaas/kaas-sonobuoy-tests + run: golangci-lint run ./... -v diff --git a/.github/workflows/scs-compliance-check-with-application-credential.yml b/.github/workflows/scs-compliance-check-with-application-credential.yml index 29d32bf45..7d4ad2463 100644 --- a/.github/workflows/scs-compliance-check-with-application-credential.yml +++ b/.github/workflows/scs-compliance-check-with-application-credential.yml @@ -32,10 +32,13 @@ jobs: auth: application_credential_secret: ${{ secrets[inputs.secret_name] }} EOF + - name: "Clean up any lingering resources from previous run" + if: ${{ inputs.layer == 'iaas' && inputs.version == 'v4' }} + run: "cd /scs-compliance && ./cleanup.py -c ${{ inputs.cloud }} --prefix _scs- --ipaddr 10.1.0. --debug" - name: "Run scs-compliance-check" run: "cd /scs-compliance && ./scs-compliance-check.py scs-compatible-${{ inputs.layer }}.yaml --version ${{ inputs.version }} -o result.yaml -s ${{ inputs.cloud }} -a os_cloud=${{ inputs.cloud }}" - name: "Upload results" - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: result path: /scs-compliance/result.yaml diff --git a/.github/workflows/scs-compliance-check.yml b/.github/workflows/scs-compliance-check.yml index 7b6a7b099..43a7afdc0 100644 --- a/.github/workflows/scs-compliance-check.yml +++ b/.github/workflows/scs-compliance-check.yml @@ -15,7 +15,7 @@ on: secret_name: required: true type: string - + jobs: scs-compliance-check: runs-on: ubuntu-latest @@ -32,10 +32,13 @@ jobs: auth: password: ${{ secrets[inputs.secret_name] }} EOF + - name: "Clean up any lingering resources from previous run" + if: ${{ inputs.layer == 'iaas' && inputs.version == 'v4' }} + run: "cd /scs-compliance && ./cleanup.py -c ${{ inputs.cloud }} --prefix _scs- --ipaddr 10.1.0. --debug" - name: "Run scs-compliance-check" run: "cd /scs-compliance && ./scs-compliance-check.py scs-compatible-${{ inputs.layer }}.yaml --version ${{ inputs.version }} -o result.yaml -s ${{ inputs.cloud }} -a os_cloud=${{ inputs.cloud }}" - name: "Upload results" - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: result path: /scs-compliance/result.yaml diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index bba1c8f8e..9f5fb6c48 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -6,6 +6,8 @@ name: Run Python unit and regression tests paths: - '**.py' - .github/workflows/test-python.yml + - Tests/requirements.txt + - Tests/test-requirements.txt jobs: run-pytest-tests: diff --git a/.gitignore b/.gitignore index 4d7851fab..2b83a0983 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ **/__pycache__/ .venv/ .idea +.sandbox .DS_Store node_modules Tests/kaas/results/ +Tests/kaas/kaas-sonobuoy-tests/results/ *.tar.gz diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc index ccb3a9a9c..4d44024bd 100644 --- a/.markdownlint-cli2.jsonc +++ b/.markdownlint-cli2.jsonc @@ -43,13 +43,14 @@ { "name": "double-spaces", "message": "Avoid double spaces", - "searchPattern": "/([^\\s>]) ([^\\s|])/g", + "searchPattern": "/([^\\s>|]) ([^\\s|])/g", "replace": "$1 $2", - "skipCode": true + "skipCode": true, + "tables": false } ] } }, "customRules": ["markdownlint-rule-search-replace"], - "ignores": ["node_modules", ".github"] + "ignores": ["node_modules", ".github", ".git"] } diff --git a/.zuul.d/config.yaml b/.zuul.d/config.yaml index 34a08b740..902123cb4 100644 --- a/.zuul.d/config.yaml +++ b/.zuul.d/config.yaml @@ -5,11 +5,7 @@ merge-mode: "squash-merge" periodic-daily: jobs: - - scs-check-pco-prod1 - - scs-check-pco-prod2 - - scs-check-pco-prod3 - - scs-check-regio-a - - scs-check-wavestack + - scs-check-all periodic-hourly: jobs: - scs-check-gx-scs-main @@ -22,6 +18,7 @@ - job: name: scs-check-adr-syntax parent: base + nodeset: pod-fedora-40 pre-run: playbooks/pre.yaml run: playbooks/adr_syntax.yaml - job: @@ -30,8 +27,9 @@ secrets: - name: clouds_conf secret: SECRET_STANDARDS + nodeset: pod-fedora-40 vars: - cloud: gx-scs + preset: default pre-run: - playbooks/pre.yaml - playbooks/pre_cloud.yaml @@ -41,27 +39,7 @@ parent: scs-check-gx-scs branches: main - job: - name: scs-check-pco-prod3 + name: scs-check-all parent: scs-check-gx-scs-main vars: - cloud: pco-prod3 -- job: - name: scs-check-pco-prod2 - parent: scs-check-gx-scs-main - vars: - cloud: pco-prod2 -- job: - name: scs-check-pco-prod1 - parent: scs-check-gx-scs-main - vars: - cloud: pco-prod1 -- job: - name: scs-check-regio-a - parent: scs-check-gx-scs-main - vars: - cloud: regio-a -- job: - name: scs-check-wavestack - parent: scs-check-gx-scs-main - vars: - cloud: wavestack + preset: all diff --git a/.zuul.d/secure.yaml b/.zuul.d/secure.yaml index 04377fb98..78dbb906f 100644 --- a/.zuul.d/secure.yaml +++ b/.zuul.d/secure.yaml @@ -2,6 +2,39 @@ - secret: name: SECRET_STANDARDS data: + zuul_ci_api_key: !encrypted/pkcs1-oaep + - jUzccSZN0bjWMX8BPntW4dct5gT5YtlNPQUbQRELCKBlce7y1Ao41g4u06CXBMfzUZIYG + eWoxhDsn2QfMnN8RLxkWEjGBG/+n7ql2N1CC+FuSBqU8FkDliSPNZdlC75BFBQYksVeG/ + XbnP90D4u3cOzE6nK72Ftr5hK90Is/PquIfcStXjxoirZjO4tsgz8kI4elBGGg2q96guu + 5LTVejYcxH/iwr78AM4YvRSnLto7L0tNxfTI3K6l9gFLWHY52DINYhmZq18MmGIx99Yat + wvnLefMeX634FzlS+qzemC029T5FKot+rNG4zD2JbpyN7Raqt6HBLRh1SIyte7RLqTmmA + sONOho8I+LhRvb0no3vjslNm/OJU1SSdcTPilFv6Vyvr0wt892ihsqHn6F0ZT3HAXpkeI + h5Vax81vIl6T+ZUCqgqea7zgQHr1RntqOc1tu8nx1PtrzHRySZBS+CQwo9GKbXR0HdLOk + ANItov3dMRqgDewGQPoX7UyI23zfa5JmFAkDkM016l+r4xDfo4v/eGVArloyTEF362gMh + o3NDMUlGaL2kNehk6k/ol8HyWilVRceGFDVRZjYlGWFU+jFTNYi0vqq8mP1gv9CIVOe/b + K6dgp6XZo/apeESp4e8xlfJA2Tf+UpylmBtsgcHXjOWPNH9JaalA8pv6LWjcf8= + zuul_ci_basic_auth: !encrypted/pkcs1-oaep + - cGUlUyavav0SwvkMeaZu5VZs/FD7qMUXx+vkskQULii1Kjmob9wpIpwBasB6C5wjkz7Yu + SZc9kDeYLGNlLS0liGEnNzQEHtSToVCLmEsuEQQWVLKgjVFHhuYG92D//OTnPhwGdrWR0 + YbN++e6dfdoUTFgNomE9yQ/AiP31frb5xsnkzBOd4Yck7ctlCaoEFqsLDpg9fCvmHuTOD + rI5lwaxHjaNCsBdWBvGrV/y06wWz3Dd4DI/mK9gzQT8LQUJb14WuM7Skif0piig/3X2+p + 4AFYEj9ZsxTaGL+IhHsMQKHQOxZ8qfDsBTVYjaQxo7qGrGtZ9sZcRcmhScA5OgSiMI3kA + jrFtm3TMT15SZVaaG/fqmaYykGl9JXD5jBvpwIY6oPmjjY9hNmc5bm0tmsJ/RF6f8ZChP + sR5e/wOJUnYbUxTyuId1ZrFL7kFQxfc1HWg1VCwRTfeYwFal5K3CqXWu9u6O/6foa/DFn + uHYP40Lq1Rd4LRQjT3TW9TybgM2Jvt6sd7sCKM7KQ4/fGyj8BswRThBNzuJMN83QmmAiq + phthkp6X2A7ELMd22wjrOy7ruwZfXObhagJNis4x/t55fdDpnZcW3KeeqOJAv2xkD4StF + 32RSKLdIRbtWLsouOYPNloVFwPAykbrFkDfH2lyy0LJS9gWyK7t6u4Ks3P7hUE= + zuul_ci_signing_key: !encrypted/pkcs1-oaep + - gbtzWcQo4LytBGfTskgeFs0bFXzAZo2R33ljlNFAfNzdzlrPDnrEljyys+Bp9+yjEfcG/ + rq8YeW0FVVYYulnmkasfgbUP8lMmsMHli5AwCLB00QjqCsy6Ixc1ELUr+KTTSYXyU8qhT + d0HoofhFvMoU8nb34KvJ18LbyU1huLHZSs+3kbZQeK85rV7BMCmIwJUnGxZrxmI8tCvrT + /EVbykMojLnsW48zMPqa5o75NWb2yU7RhQFNr9GEdNM3ZvilsPxsuQ/6b+h92WOKhcrsZ + 1+FSOc2Z46l+G4aIl25L4jM8ktPpi1ug71B2hnh5BM6aHidQuILKthsk5/w2gRaTEgjA9 + +sQdeOHqP7ZhVT8JAyHi9bVXZEf/ROUwsWf044hPw9MFk8zFu/lC+WuZvB8QOJ6jd4C2x + zuL9HkO4uh52d57E68QLY5IkCA8TnHLisUp3BSGWXoL83ExwtoFpWGvGN4oJdqWd5A1ak + JWvUvQl33JFC1jzIoQ7Rph6c660fRmz377jwjqx9/+bkHwLozGCP/9XrZVcizefEJM6jD + eHRcpMpjuyFJLyNKnEF1lp58sfVQoJfVHTIvmzS9erVJhU/zjyeDFPsrLFweV7/5QzRvI + lDiXrfj+X40EQvPrkSBJj/BIBYl+RuF8PfYnV4jqaQBwFNNBQoL/hGEotZa5h4= gx_scs_ac_id: !encrypted/pkcs1-oaep - XbUa6vPaT+ANGdJyKHj0I2Q5iE6y/RItmux52ixruTHhYyiqTx8B9HWwDU61TogBMdKvC o2Cz9+2pj1hdkyQp5+99ARKHCLsOP/7u9E5M/zQ2X1QT+8UCQEEkfjpvLF491qGx50dg2 @@ -24,6 +57,72 @@ Vjo5fBpIjEOLNtA40vx0Agx1Red2eMFA5oLcxiDMBSP4nLiI1QNROSnGc7GYoIT5kyq8k P5Uw261H4un2xvpO9ZBuHEV5mskTiBazTPDwaqU3yZoyGIvdQl2Uo6S3Glnf41YH+k/hN jxzziQB9sT2glnmmzrzZ22xPZg+sTngWS/3svRYV9BXKnMgq8jjR6eIKGYSwyQ= + cnds_ac_id: !encrypted/pkcs1-oaep + - apArEei+9Eu0NrelesebDZwUjGvbep58dAQowtcCEn1hzbUAqMxWm74Ibk1e+C3prH5Zv + tcj+okUoyfelP8ExqCZL55cOZVentoyg/VfiwZlR1z9N/p3vDkN3sx8z6o7CJux2oFMkg + bRegNcQ3qahUPJ+cJecmSoy9K/lE4t+qieqXAHt9aqqhxM+1ElcK4eaK5XssoP9X/ZqOR + lJf0UjgppeS+YWzIBeN4m3jPc5g+NrN4edXxodm41PNnYjPVklGODhgoKfEQvpv3VvX4e + /fZtV8/buRWDisYj/mVuYkI49VP4Q2G3QPfxOW3fNrtr7UPFL34INUp+KRCZSyDJeQgFB + /svIC0FRTkUgzseyouOIb/IjxIdXUBpvkWs7zd8IoG/VvR2TYJEapctHm5MmqWyJIKBIW + T9QmLCu/WsPsyab+Va4oopNw7TdgBa5HbYgTa9xcBr7kbPLf8U6tHDigL8aCTN3fMAvWL + P5XkXkeONBiDQ/lYjDAtlftRJfdz/VnqNjf700GSYYi4/vFLyVG0V11IYKLq2ybm2c2kU + r9ZvquSumOQq1bP3pSfi7xbYGAhmrTXmH2H1MpHfMk9FK7ZejWglS8kDwrHCQBaRxLrD/ + VphyB4SQFj6sbaf0O4B+WcsivKRjdXVioxxmdF3kXrRbVL/lu5P0dotOwUNeX4= + cnds_ac_secret: !encrypted/pkcs1-oaep + - JFzDs+xTnb8qNP0oaB9LgJa0YZVbOF7eLCYDJiPPXozBuCgUc+eJrAxtVAddzFOHuGvoZ + xkAM2uu2Wq6AHZ8xE0yVuG/i0GriB+NbVAlUrxg6YKR+1TYK5s1uUnd/CDDMIP0k4wFN0 + PZuw2H5IYhexfsz+eP6n6Zvc8ULbyecYnCnCkM/eWhEVQuvU1drMIpxlqy+dmfMPICeeN + ju5m0K3ZcUO6lqAK6bAivxFBiHNqwq04wK68siT7Vu62l+K4L7j8QuXtyvLfeaerZx6Hc + wEdDfJY1lAnjSjysITy7F6hSXffgvY/d1vpN4HqTCgy4GZPsrP3Kq33X7Sm4N3drOjjFz + PJ64FYctbktKygscvI8etoJNwLTjNou9AMnSsI1Sg9B/iLRmRdKS02LWy2jVbFXRFzsE0 + SB56himcOvYxw0CakSTm/N690mjc8VpDPpb7zTH+t3lSG0mzbPgVsk4drwOeC8mz7UoUa + o9slrGAyL7g3zdjNlUJA04U33SNCvaCxL8fac6JZ15vrqeW4g4AB4+rx7fYKAnOVg2FOL + 5jMOKsiGgfLvz1KZ9c6Q1ThfeCQzG9waWJnyCx2R2tEtyQ17hIW6Rzo1RzmQkUyvLN9TJ + CLSZCUoR+2Ut+ZlpDi3vVushWWLXyjj8ojblTO/zqlbQ1A+7d/C+5x2mrh2T/0= + poc_kdo_ac_id: !encrypted/pkcs1-oaep + - l/2ts4lmTN611PMcpMhpNrYqoiiuoZkqUTwPKfLwAEYLF68JBCuCy2iknr8fjz9FP+103 + vxqM6wq/el7kIGPUBqpb33DBJ54nS3uLUrA57vB2YoJVa99w0x/3NDsXGuC7EpJnHnkl+ + VmVTwWKomsTtxiSNV3E2ol5GK/YzPUH4A4armF7wslHhaXmerH0I9pL+62k15C/wibVtK + 84sDhOIwvdIjXAq4XMtdDJvWspp+k2K8mg+k1qwFbMiDQKyJBAGpOzpgdX5C4UT7soK6a + AGWGyeQn0Il7oNk0ngZm9IJL4ZnAJ+KSaSTTbYD+cu2uLyxq1PjmWPQ1jdpbzrB6QZwJO + Bmi+htTkNro+wKuK9uBJfo+BjMUcshRgnzj+6e8swDI/ng9bzLixV56weGbT3RRlJbOG2 + SNTCa+NIYk2oAxE7ZhEaOfsKiop52pBFIT1cJs4/STVTPtt0jcmvPzUu/D6g1LfZsTbhv + wdcmRc1emZIOsa3YCAyGhEE1DgyjQzsHprewjmsIWXqigr6e8eT4XimNMovhrZiN0zIUU + 3LlDffNO1eGzUZOz2/prp/m8eD/n/+x1sku2ktjyqovASVwQoiiaFNK9nlWvwy9FSOYoT + AiAv8LecYUkNFlonXXR/iiXhjWzmAzgCcAiSqWbGPMVQvw/dilgrHlbhD7djB8= + poc_kdo_ac_secret: !encrypted/pkcs1-oaep + - PQ44tlT4qGSzdx1JAPe/TbWfttNicGAl9ng9OmjbGTtNd5NLVhwfGSIP/UPEkCCneGDKJ + y+eFwHpmQPMpfHWrfCSuCm/cMI3UkRqQ6Yn1UJCfbIj3YOMa4KezenXlNMHsq4489sRJ4 + bUq6O002NrExa31lDVAA15U06qDwte+Gq/+iX2DeZF2o6gWSuWF5kSf+0mWBT98diKSSE + tSsYLbjl68Vq8KqqJ8HrPJfDpVDuX3AUVEg5JxuxoIoqJM62sJLqj1a3AdQk1QIB2TzCh + OHGT55lyL+gBnJnOqGZ9q5uldGOuPEDW2e9dwrUZZoPPZcfVGfXLhqtFP3UuQ4f/BiHJO + rGnNwfQkEAu53hYYzZAFGHgRMeiMF8bWP+7QUyV6u21bfVhOhO5hMC4sIh4ewa6DjKEmM + xQenS/D7Nbf8y5DbXUqGB10SawwXiVlsJsjBYPE2thc3JwfZq/Zcp8Rxlku5KfIcRUrsy + NIadxXBGLDDqTWaiOp0Mg9eXQ7tz+8lOg23uojzyXzgNiDGu77azuNkfJ4iosBHUraYEE + cewhUsF1mXh3le03eEUnMVnnRBmFTsAyygjhLtUwnDDSoJHp3g1e1dRTd/nihsOjSLwBa + vNFgP3N8s1smIbNMpjyMjQiTbXQ87Sqx6ZmUoSX+1tKu8tJU0k04y00xaFYWxE= + poc_wgcloud_ac_id: !encrypted/pkcs1-oaep + - dQIs3NJt1CpP1925+b9QjjwonqjmiuCl1ewxw160yIEHQ/qyQiwutJbsg4IYS9XKhKc2X + GumOOpLY7+/uNRR5pZmEfOdlGnPoJvVhYtCqHBFy7xQ6NLHKFxCT8zHM9ppSl1Hjc2G2F + fseCFIYM95Mibybu7EVAzVnZNvZHamuwwqVx5DtXHh0s3ZS53n06TX4Su/Q+kEDl7yytk + grZx8mc6qj0YEchqQKejB1Se6keQBUuyy5vjua8aFBcwaZUgaAuXXBbj7azE2ajm6SYpQ + 97YzU5By5Dba0xHsD5t5nu7TUbf4NbgQ4rwOTEbgT2MOypBkFPqZniL9Mb0HfW2p7Gvak + 4TP/9YKER9JlJoCXhQTTqlTIQZwUjKsuSV/sTQbCvnolVL3P2hPx3lXQjv6xv6p3sXAK8 + 6WybS/RWfHgVoCrh2ZwJTwCdxQpJNKnSekbp8AQ/pIomGGDdY6TjBhxnD4CONczmUtKSU + GGWKpfiM7N8yBnnkBiXrHKMuMN+8pni01NuyBTJzlpQd8eqEQPiZOMTwzwnnoRVNiQLCf + xPqOWWzdgdxxk1vEQCp97VLayI1THuy9svyLsRejzY/FGdK+fKOV7OXYzFFO+gYb1iiKh + T1UkFzXhw/mTDkueHYUJCCqZCYyuXs/6BdX12REGVqk2KUnGSvG4QOPDoIPWTc= + poc_wgcloud_ac_secret: !encrypted/pkcs1-oaep + - TZSHaBH+5kN+1h7sAJ0RWKaCwS1ABcFjlmpDlkSVTp+ZpH0eCR/gKzWzQegFh67QjUrOA + hbH78xD9VTSiLfl6t0K5ugbTdUmBJ34dSC61ffIOZzsMHEdXAUvYvIbeV2q04skgGZWlz + t4FTtvnTJeAigWXtDh0ZnUiEJj5sVs7AAGGObqVI+yxcZwAnBC6w3ld0m4d35bXJc1Gys + t40ewCVe4BwtmKOblPbJYlJ6/tHLFKvnZRZDDW/+LdDJPjK48IITMaTzxs8PdnFMT2uml + b8dhwIgpo7m1kgppgYdlZa7Nk61bOefdMSOWMLB5nxcRhFr6oaM2InAkGbCGf3AGQQAGy + k75G91/pGbxOq96UpBP6l0rFiuqLGppaqz9aJb1/BKYM5Ld74W65TfMwp2p3p7n+FsGT2 + i4Ex+PXCIogxPN2l5GN90rvF13UqarYHNZd4rxRxd+pW3+UEnfVchROyg7n2Fks9w0kEl + uZq8xRkPwlISeLi1SN23QVtBz2JIIjxa/TydCnpwAN8XEWBLQJYcqDomD+jldjf7CDIgS + cZ/KVePBCnPpnAU8LFTR3JYC/t4N+ZY0m+rWLsQ6XUyG0lhV0uO4U9dxD/3C2F2W/U/gG + KMDAb8fySxCweaeCWncCLPSZcLB1A7OiNGNhNwSCYtTjbHtUysssQi8CbT4m3c= pco_prod1_ac_id: !encrypted/pkcs1-oaep - jGScb1B/BfnuDdDnfsJoHnVRaeiTAX1fCB3eYBuUx6grQTQ2SorKWeUeVWqznfJJF0Pug uE09n6oCwZE3hxzI2VxFA+o4wDBA3azasAs8N3vV+QyFYF5dl+5K1M0xwdkhqAyefw5n8 @@ -90,6 +189,28 @@ riPv16TIXHG/VYteDxT4f/onB6xBBiZ7Bm7drM3nAsM+ZM3WwxUnu4luWZsZYAADV+S31 ODoxs6vEGmQgoOCCej1a71jkLMx+xdOeRN606H6Jrqfm2BfsYa1ZxUhX1Dk1dgpcVXZOK gJLXG3zz2PJNa/Zl0/3aqrWJS6+A9lD5XuDHDdPxKfyhwo+R2+zhzScd/bbn4E= + pco_prod4_ac_id: !encrypted/pkcs1-oaep + - L4tJH+zPSVZweHeg7FjSVgeDZdumMqhyEU9Amf6lUKqrHGz7llHgDp0InKyjrFe/CwWkG + Y3hySGiEvsrdqywYWRq3y1gfxCvdJ7RMIO7j0xH2oJtCa+v1MpJYLG7FwC34YNt4aphgg + VWdL6HgmBTwxmQZhRGMykqSoPTKRT0jInUZwKUg/xbAUj6WzMId0sfxM0C+q4zdSbyxhT + sRT9J7ewHbOBpOnO+RTjNP1yHhU8ZqZvJ8RoVHLGuu3i8mGjSdr5cnUrZj7bxdCv9Xh8h + QzirkJJ0MN7oiyvAjQdzC6fZQvlTGaH3ifzLZFWl/1ipwOsDDvb58011KxIjA4RpwoBU/ + fdLWZZnsLDGk1I/j1XZULipRHVqBZxCotIfXjMQhbbuRRC8nAADaS9jbB703gjgN90nxO + Adbp9kRj7MHLc0F1JRs8AbadCu4+VVxIPQFzg2LtfN200tXDYJ0XwXUZ899fGJkfXTJgi + Dy55LTZ8Dvumi+5AU5fuQ9cqeGGjG21878vuopaG9qwoEo6gcpHAQ77WpqfLYfN13jUUg + 9zFfpmzPJ7/307QVSMMdRdogEjAFkJ0TzwFYOysVTdv+wbfc5VTBAiX2HFLmyiE2G5F7g + 2dWrNS4ahwIlNXtG1PVvQ+kcT7gdx5WViHCUc4qwLwIgzkRguVLUIcokW2R6CI= + pco_prod4_ac_secret: !encrypted/pkcs1-oaep + - Poj5AZd4iE9iSZpUTizRgup9PshitKyN/hScYPee/NJmfF8qHKkpXEWK1YvnfCcL8xOuE + cgVAKWkWxpggBAYYRen7AdkGZR4zldCqHQ26xnjmXRvjEv0ncUL96pWg9yj6GeZjFyLon + /mFS7fc+cTDgPjJ2zgKi2uT4MV1LVAiARa5RXgXRZ64vg1F6UT1kKIuLUmM3iu83KImsh + AJgXjR0xsBS8qxbQ3l85+ybSBglXRp0ETOinxrVfyS7rpSnXepGLE2s2evSHVntybEgsy + TNCCtOti8phaGh2WSEyA/YZekMpMNhSq5bYS6J3ttF9fkpBE4Xsgbu7Z5yL4BTkEDOXBG + I8nNV8ICq8i5VEcaMByPWetJwFUxlYuQ07dOaqQk7XohQKd6+XMeUAkKlag9Vosb8K1kX + lX9EyR1Y+C28tc4soeXsg/TkE702JnCpJ7I3aQqSjbhUm0yDWOEwzT4TOoN1j35iXzD1K + +sTx+tASbZ9UobexgC+3hyMa1CanFzPPjgMm3UYyrMmnvi96zImau6Q/CpJhQg3tZ8vLz + 4BnqOQklRAJxZA5btw8SFAb7GB2TCeEs/+dt/XqLrY2XkeaR9lGBl3Bftvkr9vFVfsVmx + 7IMobRXhnMOdUZQo7JBc5BV2CB0ZhBn0phUCHQtD4BGQZb/YIl0wO1wyJdk4A0= regio_a_ac_id: !encrypted/pkcs1-oaep - lBlWjvJ6RA3uniS1M4etvbdUxKB9KRNWm53gL3VlPyRkA7Ic2yFcAkZEGodHWH1iqNWfN p/3B2iFYwuZRktllbc/Ro80pkg52vHOkNkBdXLQd7ZFKG8zNJOxRt2nhDQxQPS7PzcUgo @@ -112,6 +233,72 @@ VCsXjf0qBBMrzz6HP9z95Bk44fiJ3L/LkA3Iij961dYrQXbZKDrKOiX/QPwrcSrVmjmew UbPexJFHgvTCqjadoLejSt9cUd9lVzhuzLJ8CS+CcCMbZOno6qathrd2B88riQaPNIGNu gfkNT9R63ZzKB1qIA2n5RZi7SH9DPIUd0AwLMn2bhp3uok5pNAPP/4/1RkQiCA= + scaleup_occ2_ac_id: !encrypted/pkcs1-oaep + - N2duwkcMdOXw6wF0deE/0BPM1M/URt3eWmrnBJ89VHeCDENGfTfDHcWPYs3wW4rSRCG6t + gqgNuA049OvOhL7rtjNHZ6yIj6xEHH/YdqT4UxjXPS9GFwoJXDtE8rIGjK3KU8GfUgKnG + DLplyyzGzx5j39rJAS628InmC56aip47rO1J4HQE9Ku25Wb06R7ykx+0ZOWr0HXjV/VsV + uwfyL+DPgewbL+4u8/XkcI0FwAM9/KkF/CcYUq5aVMdQS2foatTQW0C2idg+pffSTRaau + VF44rkVfzsCOz4MYAFpLIaL9Zxx1FifaPOd0oi6rEFjGd6vFtFCHk1BRpKmOITLyx3Te5 + zVffSkQAsqpn/4er8800bjQzxXvqmQmR0QwPM7dhvRnrNbTSCA/Awm5BPaUgeCZFN3MPN + Mc0XIaEwjuJvDK6fqj5tJrVIs5bxAmqRDj8d76AlJcOdDxHicTHgR3aUG4AKOWkUsskgQ + 3xR8lPh31O/HgzG9tq6o/DCPA1O9wyyOyT7KwJAaRASPCA1O80ZAzhZUNUVyut6dYEwaS + QXP4IaEJOxP8EkxR7FDEuO99UFZ7TXQ1CF7ots4wIs5tEpQvcdLnvBjJckp0fNBFTuGMm + FCvhgBK30NC93U4DxQv6xZBhqtvHYjHcTOXvz2fryRJT2teMN+eI+RDdV1Jj8Y= + scaleup_occ2_ac_secret: !encrypted/pkcs1-oaep + - LfUHhslK41JDp3CpslWGGA4bZ3udZh4KnytcXohkdbchb8QVt8eNc4nD0ti0/XS18YKwq + DlHOWw2rDJZ8RGIXENVUYzDbECoBErE8IAqQE0q3oS/8Oq0NYOFTGvvlKuue7U4s87Pwi + YFi+Q0Rv7vO8cWFVtbRHK+Hw6pC42Biq2T+tuVBCLqylIMViXpuEy9UpFLEv59zr6EHa9 + uB3xkjnpWuabe7vrG+LQHc0pJ5tNhcLiOnJggU5Ef02FBy+t6xvuJW8f6cXCnRRj1q0fl + D/vTmC7avwHnWC+J4WLL69HCwW05I7iHftVSWOXQgRzMBd4D4ND2OXfsWElu0eOV5XG6X + JsQH8lDnVN/lqaDAOYR4fk4+9yt3RURwvNL5FUnDK1t7LAI4X0gcvLrQAfzgOlpBYDXSK + 0kbUzqwivuw1v2zO/gxQU+J28PsOfZaKf/7ZZyj3e/tiq4wBpvPb0mVBwWXigKqzr+QED + Iy2u/g3x2qdcTpXR/RPq+xiXM2B2rw1V5gdkscdL+avXtTF7hT9HrcayHx3HDZ/h6aGPD + RWIJ8bstl+x2Q4zExgR13amWM8ZR1iLGCN20U/ZAaqANCqjDbrSVSTjTPzYtNFwAXwxkB + 3NHhPDHZ1MIdr6IJE4IZ4TCMsIeTA2UHNfF4RCzeDSIJ+CXOQxUFWOxZkf97WY= + syseleven_dus2_ac_id: !encrypted/pkcs1-oaep + - SjwtIvJO7DkLJDmS+T/Z5utFBa22hmPRBd8mzonJHGgURB2W7fmXFreD9NPrLfbt7ujKi + KNqJm8k1Vr1F3Mu+Osr0BWSnq5makwVt2ikBY4qPbL8iyVXsByaT/HNPLCOokqy+REpfu + lIA06uKm8vFwwqrmCs6Yaf4JmYpxMk/Bav14qyudGmfIAaNnsiACiUZJz5fmnhZJKLlOP + rwIWnWAYs6yNXpR/vRXpAOr531NffZ7q9F8gVjwmvhFJlMGMGOJ5xnMkTCQsRAnW4sLaM + gssFB8wr9sfNpreD6WPxe8xGdU44czDkFGzZjUgE+TLiaGSjnmaVE+F+KZT8BmVtcKHIQ + N0oM1UpmrvH44GNNRR3xAYLSrCIWUz6VVaG+xmWrJPOCR7bc9Ko/bwvtpJ13n/t1iyLfs + EKB67+EfupDzMlhWXUMMaeuzxrYhXd8y70Owf7s1NR01Ci+zYuYr9u6ORHYWvDdc5nMyR + NBjrcsUIJNV5hQjw09EbJdViN8E/w+GU++/Mkn/WFJZV59Q14+sXT/XdO/o0iajmIPaj+ + sonP03eSUcwzGnE2WvX097oEI5Cjxk1YqfGzgsf3AClwWKwqXxaBOXeKZsNhl1SZSf2h3 + oGzEgf3RiR/NVSDQqdZpLKDGeU4qvJrTnUqGQSHXpW/HZ6/lgZzMjsn8wJOrfU= + syseleven_dus2_ac_secret: !encrypted/pkcs1-oaep + - AtKcuIlw4WkHcRv+YLw/80Azxiaf/7rZGtHxSPk9l0Z+H9fM6zXe76V327HRb8fCVjN1x + 6FuZOurM+ph5MUXKM3pPrlkloPpQLX5ViYq5W9PsH7pisLeV1J9jsxf+eIxCmnFjlZyPT + cUoGu1kMqKR9+IpD4qYG5bXPg3b6stmG4HyBZ8cCDHF+anxxfcq41vqmrxaTJuZ/ITIh8 + sBrL8SDviYZ8YI5oeJbYWQkdnxVYjStwucR4Eij4wqrrNscU4sPRHPbydSx93CbPqPMbl + W11NGdV1fmpuHHVgJbw8/dxMuI8cxX3Iej6RH3kPmDzoMKEy/mqZe3uP0vQ2BlM/FhLlT + Vxwz0Bx+R8lcVVPtQQDKhJ/fllFhuHdC8p3vwWbxWvVd5whueclSsRDQ94YVT5Elmw9KY + VnvSVZXNsBPn6hvyu5bX8KQ8Z8NuFZBdQdctjDpsoiztAhgSFoz+dKpB5ynPD9JDe3+Bl + 8tYIxiQD85Rnv431bk8yq/Cc8s0Q77SWZNln1NieDnOyvXALwvE1owUrC7NnMuIcGaySk + so/qIZP35Hs2rGR/LWVLIpQco/RzI6KaSN5Idk6906Q2UCgNErvmKQAnGaA+CUCkRqodX + nw8TQSmx0VChVFhMnCevWTl2vHT3KrAb7NtOV53WG0RFvL220WkEeT3Fllb1LE= + syseleven_ham1_ac_id: !encrypted/pkcs1-oaep + - SjwtIvJO7DkLJDmS+T/Z5utFBa22hmPRBd8mzonJHGgURB2W7fmXFreD9NPrLfbt7ujKi + KNqJm8k1Vr1F3Mu+Osr0BWSnq5makwVt2ikBY4qPbL8iyVXsByaT/HNPLCOokqy+REpfu + lIA06uKm8vFwwqrmCs6Yaf4JmYpxMk/Bav14qyudGmfIAaNnsiACiUZJz5fmnhZJKLlOP + rwIWnWAYs6yNXpR/vRXpAOr531NffZ7q9F8gVjwmvhFJlMGMGOJ5xnMkTCQsRAnW4sLaM + gssFB8wr9sfNpreD6WPxe8xGdU44czDkFGzZjUgE+TLiaGSjnmaVE+F+KZT8BmVtcKHIQ + N0oM1UpmrvH44GNNRR3xAYLSrCIWUz6VVaG+xmWrJPOCR7bc9Ko/bwvtpJ13n/t1iyLfs + EKB67+EfupDzMlhWXUMMaeuzxrYhXd8y70Owf7s1NR01Ci+zYuYr9u6ORHYWvDdc5nMyR + NBjrcsUIJNV5hQjw09EbJdViN8E/w+GU++/Mkn/WFJZV59Q14+sXT/XdO/o0iajmIPaj+ + sonP03eSUcwzGnE2WvX097oEI5Cjxk1YqfGzgsf3AClwWKwqXxaBOXeKZsNhl1SZSf2h3 + oGzEgf3RiR/NVSDQqdZpLKDGeU4qvJrTnUqGQSHXpW/HZ6/lgZzMjsn8wJOrfU= + syseleven_ham1_ac_secret: !encrypted/pkcs1-oaep + - AtKcuIlw4WkHcRv+YLw/80Azxiaf/7rZGtHxSPk9l0Z+H9fM6zXe76V327HRb8fCVjN1x + 6FuZOurM+ph5MUXKM3pPrlkloPpQLX5ViYq5W9PsH7pisLeV1J9jsxf+eIxCmnFjlZyPT + cUoGu1kMqKR9+IpD4qYG5bXPg3b6stmG4HyBZ8cCDHF+anxxfcq41vqmrxaTJuZ/ITIh8 + sBrL8SDviYZ8YI5oeJbYWQkdnxVYjStwucR4Eij4wqrrNscU4sPRHPbydSx93CbPqPMbl + W11NGdV1fmpuHHVgJbw8/dxMuI8cxX3Iej6RH3kPmDzoMKEy/mqZe3uP0vQ2BlM/FhLlT + Vxwz0Bx+R8lcVVPtQQDKhJ/fllFhuHdC8p3vwWbxWvVd5whueclSsRDQ94YVT5Elmw9KY + VnvSVZXNsBPn6hvyu5bX8KQ8Z8NuFZBdQdctjDpsoiztAhgSFoz+dKpB5ynPD9JDe3+Bl + 8tYIxiQD85Rnv431bk8yq/Cc8s0Q77SWZNln1NieDnOyvXALwvE1owUrC7NnMuIcGaySk + so/qIZP35Hs2rGR/LWVLIpQco/RzI6KaSN5Idk6906Q2UCgNErvmKQAnGaA+CUCkRqodX + nw8TQSmx0VChVFhMnCevWTl2vHT3KrAb7NtOV53WG0RFvL220WkEeT3Fllb1LE= wavestack_ac_id: !encrypted/pkcs1-oaep - NgtWt9AeOFCvfDaDtYdWAFO1oh+LVLMNi2gyK2N0IHkf5SK68DRkR8asKm10iOIaXVkN4 riQQqirjYHzIzWS2s/dKoLIH5DTpRHZUl4n8i7sdN5lhdoxjga5+Ep+FWTG8oSWN6ZJFP diff --git a/Drafts/README.md b/Drafts/README.md index f4ee47aae..e3b903091 100644 --- a/Drafts/README.md +++ b/Drafts/README.md @@ -1,5 +1,11 @@ -# Design-Docs +# Drafts Archive -Design Documents, Architecture etc. for SCS and related technology +## Deprecation Notice -Here we collect docs that cover overarching SCS topics or topics that otherwise do not belong to an existing repository. +> [!CAUTION] +> Please do not create new files in this folder! + +The contents of this folder are for archival purposes only. New drafts belong +in the [`../Standards/`](https://github.com/SovereignCloudStack/standards/tree/main/Standards) +folder instead and adhere to the lifecycle described in +[scs-0001-v1-sovereign-cloud-standards](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0001-v1-sovereign-cloud-standards.md). diff --git a/README.md b/README.md index 7fcfcaa21..5052b25bf 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,10 @@ - # Sovereign Cloud Stack – Standards and Certification SCS unifies the best of cloud computing in a certified standard. With a decentralized and federated cloud stack, SCS puts users in control of their data and fosters trust in clouds, backed by a global open-source community. ## SCS compatible clouds -This is a list of clouds that we test on a nightly basis against our `scs-compatible` certification level. - -| Name | Description | Operator | IaaS Compliance Check | HealthMon | -| -------------------------------------------------------------------------------------------------------------- | ------------------------------------------------- | ----------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------: | -| [gx-scs](https://github.com/SovereignCloudStack/docs/blob/main/community/cloud-resources/plusserver-gx-scs.md) | Dev environment provided for SCS & GAIA-X context | plusserver GmbH | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-gx-scs-v2.yml?label=compliant) | [HM](https://health.gx-scs.sovereignit.cloud:3000/) | -| [pluscloud open](https://www.plusserver.com/en/products/pluscloud-open)
- prod1
- prod2
- prod3
- prod4 | Public cloud for customers (4 regions) | plusserver GmbH |  
![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-pco-prod1-v2.yml?label=compliant)
![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-pco-prod2-v2.yml?label=compliant)
![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-pco-prod3-v2.yml?label=compliant)
![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-pco-prod4-v2.yml?label=compliant) |  
[HM1](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-pco)
[HM2](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod2)
[HM3](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod3)
[HM4](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod4) | -| [Wavestack](https://www.noris.de/wavestack-cloud/) | Public cloud for customers | noris network AG/Wavecon GmbH | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-wavestack-v3.yml?label=compliant) | [HM](https://health.wavestack1.sovereignit.cloud:3000/) | -| [REGIO.cloud](https://regio.digital) | Public cloud for customers | OSISM GmbH | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-regio-a-v3.yml?label=compliant) | broken | -| [CNDS](https://cnds.io/) | Public cloud for customers | [artcodix UG](https://artcodix.com/) | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-artcodix-v3.yml?label=compliant) | [HM](https://ohm.muc.cloud.cnds.io/) | -| [aov.cloud](https://aov.de/) | Community cloud for customers | aov IT.Services GmbH | (soon) | [HM](https://health.aov.cloud/) | -| PoC WG-Cloud OSBA | Cloud PoC for FITKO | Cloud&Heat Technologies GmbH | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/SovereignCloudStack/standards/check-poc-wgcloud-v3.yml?label=compliant) | [HM](https://health.poc-wgcloud.osba.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?var-mycloud=poc-wgcloud&orgId=1) | +See [Compliant clouds overview](https://docs.scs.community/standards/certification/overview) on our docs page. ## SCS standards overview diff --git a/Standards/scs-0001-v1-sovereign-cloud-standards.md b/Standards/scs-0001-v1-sovereign-cloud-standards.md index c6916151b..eabfff020 100644 --- a/Standards/scs-0001-v1-sovereign-cloud-standards.md +++ b/Standards/scs-0001-v1-sovereign-cloud-standards.md @@ -18,12 +18,12 @@ It strives for interoperable and sovereign cloud stacks which can be deployed and used by a wide range of organizations and individuals. Wherever feasible, transparency and openness both in respect to the inner workings of the platforms standardised by SCS, -as well as the SCS organisation itself +as well as the SCS organization itself are a paradigm we intend to live. ## Requirements -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119). +The keywords "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119). In addition, "FORBIDDEN" is to be interpreted equivalent to "MUST NOT". @@ -107,7 +107,7 @@ embedded in the markdown header. | Field name | Requirement | Description | | --------------- | -------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | | `type` | REQUIRED | one of `Procedural`, `Standard`, `Decision Record`, or `Supplement` | -| `status` | REQUIRED | one of `Proposal`, `Draft`, `Stable`, `Deprecated`, or `Rejected` | +| `status` | REQUIRED | one of `Draft`, `Stable`, `Deprecated`, or `Rejected` | | `track` | REQUIRED | one of `Global`, `IaaS`, `KaaS`, `IAM`, `Ops` | | `supplements` | REQUIRED precisely when `type` is `Supplement` | list of documents that are extended by this document (e.g., multiple major versions) | | `deprecated_at` | REQUIRED if `status` is `Deprecated` | ISO formatted date indicating the date after which the deprecation is in effect | @@ -167,11 +167,11 @@ In addition, the following OPTIONAL sections should be considered: ## Process The lifecycle of an SCS document goes through the following phases: -Proposal, Draft, Stable, Deprecated, and Rejected. +Draft, Stable, Deprecated, and Rejected. ```mermaid graph TD - A[Proposal] -->|Pull Request| B[Draft] + A["Draft (Proposal)"] -->|Pull Request| B[Draft] B -->|Pull Request| D[Stable] B -->|Pull Request| E[Rejected] D -->|Pull Request| F[Deprecated] @@ -195,8 +195,15 @@ Supplements may be kept in Draft state, because they are not authoritative. To propose a new SCS document, a community participant creates a pull request on GitHub against the [standards repository in the SovereignCloudStack organisation][scs-standards-repo]. - -The pull request MUST add exactly one SCS document, +In the beginning, the pull request will contain a draft of an SCS document and +the community participant should present it to the SCS community. +They may refer to the [SCS Community page](https://docs.scs.community/community/) +for an overview of applicable means of communication and online meetings +to get in touch with the SCS community. +Community participants are encouraged to present their proposal to the SCS community early on. +Note that the proposal draft's content does not need to be finished in any way at this stage. + +The pull request for the proposal MUST add exactly one SCS document, in the `Standards` folder. In the proposal phase, the document number MUST be replaced with `xxxx` in the file name, @@ -209,7 +216,7 @@ for a Supplement of `scs-0100-v3-flavor-naming.md`, the file name might be `scs-0100-w1-flavor-naming-implementation-testing.md` (note the `w1`!). The metadata MUST indicate the intended `track` and `type` of the document, -and the `status` MUST be set to `Proposal`; +and the `status` MUST be set to `Draft`; for a Supplement, the `supplements` field MUST be set to a list of documents (usually containing one element). @@ -217,7 +224,8 @@ Upon acceptance by the group of people identified by the `track`, a number is assigned (the next unused number) and the proposer is asked -to rename the file to replace the `xxxx` with that number. +to rename the file to replace the `xxxx` with that number +before the merge of the pull request. **Note:** Documents on the `Design Record` track MAY be proposed or accepted directly into `Stable` state, @@ -291,13 +299,13 @@ and the old document SHOULD be deprecated. ### Deprecation phase (Deprecated) -Should a document become obsolete, -it can be deprecated. +When a document is no longer deemed fit for production use, +it can be marked as deprecated. -Obsoletions SHOULD be announced ahead of their execution by setting the +Deprecations SHOULD be announced ahead of their execution by setting the `deprecated_at` field to a future date and moving the `status` to `Deprecated`. This signals current and future implementors -that the subject matter of the document +that the subject of the document is not considered necessary or state of the art anymore. If one or more replacement documents for the document exists, @@ -349,7 +357,7 @@ The advantages of such an approach are: The disadvantages of that approach are: - It is possible to make breaking changes after stabilization. - Potentially, an hypothetical SCS-1234 document might refer to something completely different + Potentially, a hypothetical SCS-1234 document might refer to something completely different in a hypothetical R15 release than what it meant in R5, if there have been sufficient, gradual breaking changes to the document. diff --git a/Standards/scs-0002-v2-standards-docs-org.md b/Standards/scs-0002-v2-standards-docs-org.md index 71583ceef..0a9be93f5 100644 --- a/Standards/scs-0002-v2-standards-docs-org.md +++ b/Standards/scs-0002-v2-standards-docs-org.md @@ -155,7 +155,7 @@ Docusaurus' robust toolkit assists in crafting and maintaining quality documenta #### Special Implementation Details -SCS's unique architecture necessitates a unique approach to documentation. To ensure seamless integration of reference documentation for Components and components developed for SCS, we have created a custom workflow. This workflow automatically syncs upstream repositories, pulling the most recent documentation at regular intervals. +The unique architecture of SCS necessitates a unique approach to documentation. To ensure seamless integration of reference documentation for Components and components developed for SCS, we have created a custom workflow. This workflow automatically syncs upstream repositories, pulling the most recent documentation at regular intervals. We have accomplished this by utilizing a Node.js post-install script found [here](https://github.com/SovereignCloudStack/docs-page/blob/main/getDocs.js). diff --git a/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md b/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md index 5db52ab16..08e72fd6e 100644 --- a/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md +++ b/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md @@ -21,7 +21,7 @@ SCS plans to offer six kinds of certificates with varying scope. These scopes ca - SCS-open - SCS-sovereign 2. _cloud layer_, of which there are two: - - infastructure as a service (IaaS) + - infrastructure as a service (IaaS) - Kubernetes as a service (KaaS) So, for instance, a certificate can have the scope _SCS-compatible IaaS_ or _SCS-sovereign KaaS_. @@ -75,18 +75,57 @@ users to adapt their environments or deployment automation to the new standards By providing a machine-readable document, we can generate web-friendly overviews of our certificate scopes as well as create a tool suite that checks environments against all described standards. +## Basic concepts + +The introduction stated that a certificate scope amounts to a set of standards that have to be fulfilled by the cloud service in question in order for a certificate to be issued. +While instructive, this view is still a bit simplified. Let's get more precise now by defining the following concepts. + +1. _(Test) subject_: + The cloud under test. +2. _Test case_ (also spelled testcase in code): + A statement about the subject that can be evaluated unambiguously to be either satisfied or not. The result is either `PASS` or `FAIL`, or—if the test could not be performed—`DNF` (did not finish). + A test case can be as simple as "the subject conforms to standard X", but a standard can also be decomposed into multiple test cases, which can then be reported on (also to the customers) individually. + This latter option has the advantage that we can show explicitly if the subject complies with optional parts of the standard. +3. _Check_: + A script that determines and reports the results of certain test cases. The report is printed to stdout, and each test case is reported as a single line of the form `testcase-id: [PASS/FAIL]`. The result `DNF` is not reported. Lines of other forms are permissible and will be ignored. + We also occasionally extend the concept of _check_ to manual audits. +4. _Module_: + A collection of test cases and corresponding checks, together with additional meta information such as the result lifetime, description, and a list of tags for a test case. + Ultimately, we aim to specify one module for each version of each standard: the module translates the standard into something measurable and, ideally, executable to be used for certification. +5. _Selector (expression)_: + An expression used to select test cases by referring to the tags that must (or must not) be present. +6. _Target_: + A named collection of test cases specified using selector expressions. + Ultimately, the certification of a subject always depends on a designated "main" target; all its test cases must be passed for the certificate to be awarded. + Further targets can be used to report on optional aspects of the certificate, such as particularly good security and encryption measures. +7. _(Certificate-scope) version_: + A collection of modules and a collection of targets, one of them being "main". + Note that a collection of modules can again be construed as a (larger) module. We opt to use one module per standard version, as mentioned above, in order to make commonalities between certificate-scope versions explicit. +8. _Certificate scope_: + A list of certificate-scope versions. + +Having introduced these concepts, we can now get even more precise by defining the actual specification in YAML format. + ## SCS Certification YAML Each certificate scope is recorded in a dedicated YAML file, e.g. `scs-open-kaas.yaml`. +For an example of such a file, see +[scs-compatible-iaas.yaml](https://github.com/SovereignCloudStack/standards/blob/main/Tests/scs-compatible-iaas.yaml) or +[scs-compatible-kaas.yaml](https://github.com/SovereignCloudStack/standards/blob/main/Tests/scs-compatible-kaas.yaml). The certification YAML _MUST_ contain the following keys: | Key | Type | Description | Example | | ---------- | ------------- | ---------------------------------------------------- | ----------------------------------------------------------------------------------------------- | -| `name` | String | Full name of this certificate scope | _SCS Open KaaS_ | +| `uuid` | String | Universally unique identifier | `d912d0a5-826a-4b01-bafd-b48f65f76f43` | +| `name` | String | Full name of this certificate scope | `SCS-open KaaS` | | `url` | String | Valid URL to the latest raw version of this document | `https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-open-kaas.yaml` | +| `modules` | Array of maps | List of module descriptors (described below) | (see below) | +| `timeline` | Array of maps | List of timeline entries (described below) | (see below) | | `versions` | Array of maps | List of version descriptors (described below) | (see below) | +A uuid may be generated on the command line using the tool `uuidgen` or using Python as follows: `python3 -c "import uuid; print(uuid.uuid4())"` + The certification YAML _MAY_ contain the following keys: | Key | Type | Description | @@ -104,51 +143,93 @@ where corresponding means: of the same layer. The latter certificate is said to We implement this logic by allowing for the designation of a certificate scope as a prerequisite; then a certificate of that prerequisite scope has to be presented before the certificate of the scope in question can be granted. -| Key | Type | Description | Example | -| ------ | ------ | ------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | -| `name` | String | Full name of the certificate scope | _SCS Compatible IaaS_ | -| `url` | String | Valid URL to the latest raw version of the certificate scope | _[scs-compatible-iaas.yaml](https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-compatible-iaas.yaml)_ | +| Key | Type | Description | Example | +| ------ | ------ | ------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | +| `name` | String | Full name of the certificate scope | `SCS-compatible IaaS` | +| `url` | String | Valid URL to the latest raw version of the certificate scope | `https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-compatible-iaas.yaml` | ### Version descriptor -| Key | Type | Description | Example | -| --------------- | ------------- | ------------------------------------------------------------------------------- | ------------ | -| `version` | String | Mandatory: Version of the particular list of standards | _v3_ | -| `standards` | Array of maps | Mandatory: List of standard descriptors for this particular layer | | -| `stabilized_at` | Date | ISO formatted date indicating the date after this version is considered stable. | _2022-11-09_ | -| `deprecated_at` | Date | ISO formatted date indicating the date on which this version is expired. | _2023-04-09_ | +| Key | Type | Description | Example | +| --------------- | ------------- | ------------------------------------------------------------------------------- | ------------------ | +| `version` | String | required: version of the particular list of standards | `v3` | +| `include` | Array | required: list of module ids or include descriptors (see below) | `[scs-0100-v3]` | +| `targets` | Map of maps | required: this maps target names to selector expressions (explained below) | `main: mandatory` | +| `stabilized_at` | Date | ISO formatted date indicating the date after this version is considered stable. | `2022-11-09` | -Once a version has a `stabilized_at` field, this field may not be changed. The same holds true for the `deprecated_at` field. +The ids of the test cases of all the modules specified via `include` MUST be pairwise different. -Note that at any point in time, all versions that are older (`stabilized_at` is at or before this point) -can be certified against, unless the version is already deprecated (the point is after `deprecated_at`). -This means that more than one version may be allowable at a certain point in time. Tooling should default -to use the newest allowable version (the one with the most recent `stabilized_at` date) then. +Once a version descriptor has a `stabilized_at` field, the version is deemed _stable_, and the descriptor may no longer be changed. -Note: We intend to keep only one version in effect, except for a grace period of 4 to 6 weeks, when two versions -are effective at the same time. +#### Include descriptor + +Each include may be specified by means of a module id (i.e., a string) or by an include descriptor: + +| Key | Type | Description | Example | +| ------------------------ | ------ | ---------------------------------------------- | ----------------------------------------------------------------- | +| `ref` | String | id of the module to be included | `scs-0100-v3` | +| `parameters` | Map | Maps parameter names to parameter values | `image_spec: https://raw.github...s/scs-0104-v1-images.yaml` | + +When the referenced module uses parameters, then these parameters must be assigned values here. + +#### Selector expressions + +In order to define what a selector expression is, we need to define tags, atoms and terms first. + +A _tag_ is a string that does not contain any space, comma, forward slash, or exclamation mark. + +Examples: `iaas`, `mandatory`, `recommended`, `encryption`. + +An _atom_ is a string that is either (i) a tag or (ii) an exclamation mark followed by tag. +A list of tags _satisfies_ the atom if + +- the atom is of form (i) and the tag is contained in the list, or +- the atom is of form (ii) and the tag is not contained in the list. + +Examples: `mandatory`, `!mandatory`. + +A _term_ is a string that is a non-empty list of atoms joined by slashes. +A list of tags _satisfies_ the term if it satisfies at least one of the atoms. + +Examples: `mandatory`, `mandatory/recommended`, `!mandatory/encryption`. + +A _selector (expression)_ is a string that is a non-empty list of terms joined by space. +A list of tags _satisfies_ the selector if it satisfies all the terms. + +Examples: `mandatory`, `iaas mandatory`, `iaas !mandatory/encryption`. + +In the map `targets` above, it is possible to specify a list of selectors that are joined by comma. +(Note that this is still a string, not a YAML list.) +A list of tags satisfies this list of selectors if it satisfies at least one of the selectors. + +Examples: `mandatory iaas, recommended kaas` (NOT: `[mandatory iaas, recommended kaas]`) -### Standard descriptor +### Module descriptor -Every list of standards consists of several standards that – altogether – define the particular layer standard in the given version. +| Key | Type | Description | Example | +| ------------------------ | ------ | --------------------------------------------------------------------------- | ----------------------------------------------------------------- | +| `id` | String | id for referring to this module | `scs-0100-v3` | +| `name` | String | name of this module | `Flavor naming v3` | +| `url` | String | Valid URL to relevant documentation (usually a standard document) | `https://docs.scs.community/standards/scs-0100-v3-flavor-naming` | +| `parameters` | List | List of parameters that the checks in this module might use | `[image_spec]` | +| `run` | Array | List of all checks that should be run; each entry being a check descriptor | (see below) | +| `testcases` | Array | List of all test cases; each entry being a test-case descriptor | (see below) | -| Key | Type | Description | Example | -| ------------------------ | ------ | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ | -| `name` | String | Full name of the particular standard | _Flavor naming_ | -| `url` | String | Valid URL to the latest raw version of the particular standard | _[Flavor naming](https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v2-flavor-naming.md)_ | -| `condition` | String | State of the particular standard, currently either `mandatory` or `optional`, default is `mandatory` | _mandatory_ | -| `checks` | Array | List of all checks that must pass; each entry being a check descriptor | | +The parameters specified here will be added to the variable assignment for all check tools that belong to this module, so they will be substituted in the same way. +The values to these parameters must be provided in the include descriptor as explained above. + +Using parameters offers two advantages: + +- they may show up in the automatically generated documentation, whereas the check tools themselves probably won't. +- multiple versions of a standard can be represented using the same module, if everything that changes between versions can be captured by the parameters. ### Check descriptor The following fields are valid for every check descriptor: -| Key | Type | Description | Example | -| ----------------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- | -| `id` | String | Identifier for this check (immutable and unique within this version of the certificate scope) | image-md-check | -| `condition` | String | _Optionally_ overrides the per-standard condition (`mandatory` or `optional`) | _optional_ | -| `lifetime` | String | One of: `day` (_default_), `week`, `month`, `quarter`; the test result is valid until the end of the next period | _week_ | -| `section` | String | _Optional_ what section to associate this check with (sections can be checked in isolation); default: equal to lifetime | _flavor-name syntax_ | +| Key | Type | Description | Example | +| ----------------- | ------ | ---------------------------------------------------------------------------------------------- | ---------- | +| `section` | String | _Optional_ what section to associate this check with (sections can be checked in isolation) | `weekly` | Additional fields are valid depending on whether the check is automated or manual. @@ -170,72 +251,68 @@ _Note_: the `executable` could in principle also be given via a URL; however, th TBD -### Basic Example - -```yaml -name: SCS Open IaaS -url: https://raw.githubusercontent.com/SovereignCloudStack/Docs/main/Certification/scs-open-iaas.yaml -prerequisite: - name: SCS Compatible IaaS - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-compatible-iaas.yaml -variables: - - os_cloud -versions: - - version: v5 # This version is in a draft state and work in progress - # No stabilized_at: date set yet - standards: - - name: Flavor naming - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v2-flavor-naming.md - condition: mandatory # is default and can be left out - checks: - - executable: flavor-name-check.py - env: - OS_CLOUD: "{os_cloud}" - id: flavor-name-check - lifetime: day - - name: Image metadata - url: https://raw.githubusercontent.com/SovereignCloudStack/Docs/main/Standards/SCS-0004-v1-image-metadata.md - condition: mandatory - checks: - - executable: image-md-check.py - args: -c {os_cloud} -v - id: image-md-check - lifetime: day - - executable: image-md-check2.py - condition: optional - id: image-md-check-2 - lifetime: day - - version: v4 # This is the upcoming version with a given target date. No further changes should be done to this set of standards - stabilized_at: 2022-04-01 - standards: - - name: .... - - - version: v3 # This is the stable set of standards that is currently active - stabilized_at: 2021-10-01 - deprecated_at: 2022-11-08 - standards: - - name: .... - - - version: v2 # This set of standards is obsolete and has been replaced by v3 - stabilized_at: 2021-07-01 - deprecated_at: 2021-11-01 - standards: - - name: .... -``` +### Test-case descriptor + +| Key | Type | Description | Example | +| ----------------- | --------------- | ----------------------------------------------------------------------------------------------------------------- | ----------------- | +| `id` | String | Identifier for this test case (immutable and unique within this module) | `image-md-check` | +| `lifetime` | String | One of: `day` (_default_), `week`, `month`, `quarter`; the test result is valid until the end of the next period | `week` | +| `tags` | List of strings | A tag is a keyword that will be used to select this test case using a selector expression | `[mandatory]` | +| `description` | String | Short description of the test case | | + +A tag MUST NOT contain any of these characters: space, comma, exclamation mark, forward slash. + +The `id` of a test case MUST NOT be changed. +Exceptions MAY be made if the test case is not referenced by any stable version. + +### Timeline entry + +The timeline is a list of timeline entries as detailed below. Each timeline entry represents a time period +starting at a given date and ending immediately before the chronologically next entry, if any, otherwise the +time period extends indefinitely. The list itself SHOULD be sorted by this date in descending order, however +tooling MUST NOT depend on this order. + +| Key | Type | Description | Example | +| ----------- | --------------- | ---------------------------------------------------------------- | ----------------- | +| `date` | Date | ISO formatted date indicating the date when this period begins. | `2022-11-09` | +| `versions` | Map of strings | Maps versions to validity code | `v3: effective` | + +The following validity codes are recognized: + +- `effective`: the version can be certified against; it MUST be stable at the start of the period. +- `warn`: the version can be certified against, but a PASS MUST be accompanied by a warning that the version + is about to expire; the version MUST be stable at the start of the period. +- `draft`: the version can be tested against, but not certified; the version need not be stable. +- `deprecated`: the version can be tested against, but not certified. + +Any version not listed in `versions` is considered `deprecated`. + +If no other restriction is given, any version listed in `version` SHOULD be tested against. +This includes any version listed as `deprecated`; the rationale here is that, while the test subject +can no longer be certified against it, some customers may still work with that version. + +Note: Compliance with a new (effective) version often implies compliance with an older (deprecated) one. +Including the older one into the test is meant to increase the confidence that this is indeed the case, or, +if it isn't, serves to provide a clear picture of how many test subjects still comply with the old version. + +Note: We intend to keep only one version in effect, except for a grace period of 4 to 6 weeks, when two versions +are effective at the same time. ## Process The lifecycle any version of any certificate scope goes through the following phases: -Draft, Stable, and Deprecated. +Draft and (optionally) Stable. The phase transition is performed using a pull request. ```mermaid graph TD B[Draft] -->|Pull Request| D[Stable] - D -->|Pull Request| F[Deprecated] ``` -Note that one pull request can affect multiple versions, but each pull request has to affect -at most one layer. +The timeline is considered append-only (or rather, prepend-only). A new entry is added using +a pull request. + +It is possible to use the same pull request to add a new version, stabilize some version, and +add a new timeline entry, if so desired. Each pull request is to be voted upon in the corresponding team meeting. The vote has to be on the pull request only, i.e., it may not affect any other pull request or issue, and it @@ -245,13 +322,13 @@ must be announced 14 days in advance via the corresponding mailing list. ### File format -In order to have a document that can be processed by a wide range of tools, we need to opt for a simple but yet well supported format. +In order to have a document that can be processed by a wide range of tools, we need to opt for a simple but yet well-supported format. YAML offers readability for humans as well as good support by many frameworks. Since YAML is heavily used in the cloud and container domain, the choice is obvious. ### Dependency graph for certifications -This standard only allows exactly one depending certification, otherwise we would need to use a list of mappings. Since this is +This standard only allows depending on exactly one certification, otherwise we would need to use a list of mappings. Since this is in accordance to the current plan of the SIG Standardization & Certification, we can safely ignore multiple dependency of certification for now. diff --git a/Standards/scs-0004-v1-achieving-certification.md b/Standards/scs-0004-v1-achieving-certification.md index 682accfbc..cb6f39a89 100644 --- a/Standards/scs-0004-v1-achieving-certification.md +++ b/Standards/scs-0004-v1-achieving-certification.md @@ -41,7 +41,7 @@ As operator, I want to obtain a certificate with the scope SCS-compatible IaaS o 6. Once the certificate is granted by the SCS certification assessment body, the operator SHOULD use the corresponding logo and publicly state the certified "SCS compatibility" on the respective layer for the time of the validity of the certification. In case of a public cloud, this public display is even REQUIRED. In any case, the logo MUST be accompanied by a hyperlink (a QR code for printed assets) to the respective certificate status page. -7. If the certificate is to be revoked for any reason, it will be included in a publicly available Certificate Revokation List (CRL). This fact will also be reflected in the certificate status page. +7. If the certificate is to be revoked for any reason, it will be included in a publicly available Certificate Revocation List (CRL). This fact will also be reflected in the certificate status page. 8. If any of the automated tests or manual checks fail after the certificate has been issued, the certificate is not immediately revoked. Rather, the automated tests MUST pass 99.x % of the runs, and the operator SHALL be notified at the second failed attempt in a row at the latest. In case a manual check fails, it has to be repeated at a date to be negotiated with SCS. It MAY NOT fail more than two times in a row. diff --git a/Standards/scs-0100-v1-flavor-naming.md b/Standards/scs-0100-v1-flavor-naming.md index cc4c31bfc..d874cdf53 100644 --- a/Standards/scs-0100-v1-flavor-naming.md +++ b/Standards/scs-0100-v1-flavor-naming.md @@ -94,7 +94,7 @@ the lack of workload management that would prevent worst case performance < 20% #### Insufficient microcode Not using these mitigations must be indicated by an additional `i suffix` for insecure -(weak protection against CPU vulns through insufficient microcode, lack of disabled hyperthreading +(weak protection against CPU vulnerabilities through insufficient microcode, lack of disabled hyperthreading on L1TF susceptible CPUs w/o effective core scheduling or disabled protections on the host/hypervisor). #### Examples @@ -299,22 +299,22 @@ The optional `h` suffix to the comput unit count indicates high-performance (e.g high bandwidth gfx memory such as HBM); `h` can be duplicated for even higher performance. -`-ib` indicates Inifinband networking. +`-ib` indicates Infiniband networking. More extensions will be forthcoming. -Extensions need to be specified in the above mentioned order. +Extensions need to be specified in the above-mentioned order. ## Proposal Examples -| Example | Decoding | -| ------------------------- | ----------------------------------------------------------------------------------------------- | -| SCS-2C:4:10n | 2 dedicated cores (x86-64), 4GiB RAM, 10GB network disk | -| SCS-8Ti:32:50p-i1 | 8 dedicated hyperthreads (insecure), Skylake, 32GiB RAM, 50GB local NVMe | -| SCS-1L:1u:5 | 1 vCPU (heavily oversubscribed), 1GiB Ram (no ECC), 5GB disk (unspecific) | -| SCS-16T:64:200s-GNa:64-ib | 16 dedicated threads, 64GiB RAM, 200GB local SSD, Inifiniband, 64 Passthrough nVidia Ampere SMs | -| SCS-4C:16:2x200p-a1 | 4 dedicated Arm64 cores (A78 class), 16GiB RAM, 2x200GB local NVMe drives | -| SCS-1V:0.5 | 1 vCPU, 0.5GiB RAM, no disk (boot from cinder volume) | +| Example | Decoding | +| ------------------------- | ---------------------------------------------------------------------------------------------- | +| SCS-2C:4:10n | 2 dedicated cores (x86-64), 4GiB RAM, 10GB network disk | +| SCS-8Ti:32:50p-i1 | 8 dedicated hyperthreads (insecure), Skylake, 32GiB RAM, 50GB local NVMe | +| SCS-1L:1u:5 | 1 vCPU (heavily oversubscribed), 1GiB Ram (no ECC), 5GB disk (unspecific) | +| SCS-16T:64:200s-GNa:64-ib | 16 dedicated threads, 64GiB RAM, 200GB local SSD, Infiniband, 64 Passthrough nVidia Ampere SMs | +| SCS-4C:16:2x200p-a1 | 4 dedicated Arm64 cores (A78 class), 16GiB RAM, 2x200GB local NVMe drives | +| SCS-1V:0.5 | 1 vCPU, 0.5GiB RAM, no disk (boot from cinder volume) | ## Standard SCS flavors @@ -376,14 +376,14 @@ for usability and easier portability, even beyond the mandated flavors. You must be very careful to expose low vCPU guarantees (`L` instead ov `V`), insecure hyperthreading/microcode `i`, non-ECC-RAM `u`, memory oversubscription `o`. Note that omitting these qualifiers is overstating your security, reliability or performance properties and may be reason for -clients to feel betrayed or claim damages. It might in extreme cases also cause SCS to withdraw certification +clients to feel betrayed or claim damages. It might, in extreme cases, also cause SCS to withdraw certification along with public statements. -You may offer additional SCS- flavors, following the naming scheme outlined here. +You may offer additional `SCS-` flavors, following the naming scheme outlined here. You may offer additional flavors, not following above scheme. -You must not offer flavors with the SCS- prefix which do not follow this naming scheme. +You must not offer flavors with the `SCS-` prefix which do not follow this naming scheme. You must not extend the SCS naming scheme with your own suffices; you are encouraged however to suggest extensions that we can discuss and add to the official scheme. @@ -434,8 +434,8 @@ on the flavor list compliance of the cloud environment. Some providers might offer VM services ("IaaS") without trying to adhere to SCS standards, yet still finding the flavor naming standards useful. The Gaia-X Technical Committee's -Provider Working Group (WG) would seem like a logical place for such dicussions then. +Provider Working Group (WG) would seem like a logical place for such discussions then. If so, we could -replace the SCS- prefix with a GX- prefix and transfer the naming scheme governance from +replace the `SCS-` prefix with a GX- prefix and transfer the naming scheme governance from the SCS project to the Gaia-X Provider WG (where we participate). SCS certification would then reference the Gaia-X flavor naming standard as a requirement. diff --git a/Standards/scs-0100-v2-flavor-naming.md b/Standards/scs-0100-v2-flavor-naming.md index ef18ef161..38d405828 100644 --- a/Standards/scs-0100-v2-flavor-naming.md +++ b/Standards/scs-0100-v2-flavor-naming.md @@ -40,8 +40,8 @@ Note that not all relevant properties of flavors can be discovered; creating a s to address this is a separate but related effort to the name standardization. Commonly used infrastructure-as-code tools do not provide a way to use discoverability features to express something like "I want a flavor with 2 vCPUs, 8GiB of RAM, a local -20GB SSD disk and Infiniband support but I don't care whether it's AMD or intel" in a -reasonable manner. Using flavor names to express this will thus continue to be useful +20GB SSD disk and Infiniband support, but I don't care whether it's AMD or intel" in a +reasonable manner. Using flavor names to express this will thus continue to be useful, and we don't expect the need for standardization of flavor names to go away until the commonly used IaC tools work on a higher abstraction layer than they currently do. @@ -75,7 +75,7 @@ encoding all details) as well as very detailed longer names. | `SCS-` | N`L/V/T/C`\[`i`\] | `-`N\[`u`\]\[`o`\] | \[`-`\[M`x`\]N\[`n/s/l/p`\]\] | \[`_`EXT\] | Note that `N` and `M` are placeholders for numbers here. -The optional fields are denoted in brackets (and have opt: in the header. +The optional fields are denoted in brackets (and have `opt:` in the header). See below for extensions. Note that all letters are case-sensitive. @@ -123,7 +123,7 @@ the lack of workload management that would prevent worst case performance < 20% #### Insufficient microcode Not using these mitigations must be indicated by an additional `i` suffix for insecure -(weak protection against CPU vulns through insufficient microcode, lack of disabled hyperthreading +(weak protection against CPU vulnerabilities through insufficient microcode, lack of disabled hyperthreading on L1TF susceptible CPUs w/o effective core scheduling or disabled protections on the host/hypervisor). #### Examples @@ -142,7 +142,7 @@ on L1TF susceptible CPUs w/o effective core scheduling or disabled protections o Cloud providers should use ECC memory. Memory oversubscription should not be used. -It is allowed to specify half GiBs (e.g. 3.5), though this is should not be done for larger memory sizes (>= 10GiB). +It is allowed to specify half GiBs (e.g. 3.5), though this should not be done for larger memory sizes (>= 10GiB). #### No ECC @@ -317,9 +317,9 @@ create all standard, mandatory SCS flavors for you. ## Extensions Extensions provide a possibility for providers that offer a very differentiated set -of flavors to indicate hypervisors, support for hardware/nested virtuatlization, +of flavors to indicate hypervisors, support for hardware/nested virtualization, CPU types and generations, high-frequency models, GPU support and GPU types as -well as Inifiniband support. (More extensions may be appended in the future.) +well as Infiniband support. (More extensions may be appended in the future.) Using the systematic naming approach ensures that two providers that offer flavors with the same specific features will use the same name for them, thus simplifying @@ -465,7 +465,7 @@ high bandwidth gfx memory such as HBM); More extensions may be forthcoming and appended in a later revision of this spec. -Extensions need to be specified in the above mentioned order. +Extensions need to be specified in the above-mentioned order. ### Naming options advice diff --git a/Standards/scs-0100-v3-flavor-naming.md b/Standards/scs-0100-v3-flavor-naming.md index 990814aae..587bde220 100644 --- a/Standards/scs-0100-v3-flavor-naming.md +++ b/Standards/scs-0100-v3-flavor-naming.md @@ -14,7 +14,7 @@ description: | ## Introduction -This is the standard v3.1 for SCS Release 5. +This is the standard v3.2 for SCS Release 8. Note that we intend to only extend it (so it's always backwards compatible), but try to avoid changing in incompatible ways. (See at the end for the v1 to v2 transition where we have not met that @@ -41,8 +41,8 @@ Note that not all relevant properties of flavors can be discovered; creating a s to address this is a separate but related effort to the name standardization. Commonly used infrastructure-as-code tools do not provide a way to use discoverability features to express something like "I want a flavor with 2 vCPUs, 8GiB of RAM, a local -20GB SSD disk and Infiniband support but I don't care whether it's AMD or intel" in a -reasonable manner. Using flavor names to express this will thus continue to be useful +20GB SSD disk and Infiniband support, but I don't care whether it's AMD or intel" in a +reasonable manner. Using flavor names to express this will thus continue to be useful, and we don't expect the need for standardization of flavor names to go away until the commonly used IaC tools work on a higher abstraction layer than they currently do. @@ -76,7 +76,7 @@ encoding all details) as well as very detailed longer names. | `SCS-` | N`L/V/T/C`\[`i`\] | `-`N\[`u`\]\[`o`\] | \[`-`\[M`x`\]N\[`n/h/s/p`\]\] | \[`_`EXT\] | Note that N and M are placeholders for numbers here. -The optional fields are denoted in brackets (and have opt: in the header. +The optional fields are denoted in brackets (and have `opt:` in the header). See below for extensions. Note that all letters are case-sensitive. @@ -131,7 +131,7 @@ the lack of workload management that would prevent worst case performance < 20% #### Insufficient microcode Not using these mitigations must be indicated by an additional `i` suffix for insecure -(weak protection against CPU vulns through insufficient microcode, lack of disabled hyperthreading +(weak protection against CPU vulnerabilities through insufficient microcode, lack of disabled hyperthreading on L1TF susceptible CPUs w/o effective core scheduling or disabled protections on the host/hypervisor). #### Examples @@ -150,7 +150,7 @@ on L1TF susceptible CPUs w/o effective core scheduling or disabled protections o Cloud providers should use ECC memory. Memory oversubscription should not be used. -It is allowed to specify half GiBs (e.g. 3.5), though this is should not be done for larger memory sizes (>= 10GiB). +It is allowed to specify half GiBs (e.g. 3.5), though this should not be done for larger memory sizes (>= 10GiB). #### No ECC @@ -366,13 +366,15 @@ The options for arch are as follows: The generation is vendor specific and can be left out, but it can only be specified in conjunction with a vendor. At present, these values are possible: -| Generation | i (Intel x86-64) | z (AMD x86-64) |  a (AArch64) | r (RISC-V) | -| ---------- | ---------------- | -------------- | ------------------ | ---------- | -| 0 | pre Skylake | pre Zen | pre Cortex A76 | TBD | -| 1 | Skylake | Zen-1 (Naples) | A76/NeoN1 class | TBD | -| 2 | Cascade Lake | Zen-2 (Rome) | A78/x1/NeoV1 class | TBD | -| 3 | Ice Lake | Zen-3 (Milan) | A71x/NeoN2 (ARMv9) | TBD | -| 4 | Sapphire Rapids | Zen-4 (Genoa) | | TBD | +| Generation | i (Intel x86-64) | z (AMD x86-64) |  a (AArch64) | r (RISC-V) | +| ---------- | ----------------- | -------------- | -------------------- | ---------- | +| 0 | pre Skylake | pre Zen | pre Cortex A76 | TBD | +| 1 | Skylake | Zen-1 (Naples) | A76/NeoN1 class | TBD | +| 2 | Cascade Lake | Zen-2 (Rome) | A78/x1/NeoV1 class | TBD | +| 3 | Ice Lake | Zen-3 (Milan) | A71x/NeoN2/V2(ARMv9) | TBD | +| 4 | Sapphire Rapids | Zen-4 (Genoa) | AmpereOne (ARMv8.6) | TBD | +| 5 | Sierra Forest(E) | Zen-5 (Turin) | A72x/NeoN3/V3(Av9.2) | TBD | +| 6 | Granite Rapids(P) | | | TBD | It is recommended to leave out the `0` when specifying the old generation; this will help the parser tool, which assumes 0 for an unspecified value and does leave it @@ -384,8 +386,11 @@ out when generating the name for comparison. In other words: 0 has a meaning of We don't differentiate between Zen-4 (Genoa) and Zen-4c (Bergamo); L3 cache per Siena core is smaller on Bergamo and the frequency lower but the cores are otherwise identical. As we already have a qualifier `h` that allows to specify higher frequencies -(which Genoa thus may use more and Bergamo less or not), we have enough distinction -capabilities. +(which Genoa thus may use more and Bergamo not), we have enough distinction +capabilities. The same applies to Zen-5 (Turin) and Zen-5c (Turin Dense). +For intel with the server E-cores (Crestmont), these received their own +generation assignment, as the difference to the server P-cores (Redwood Cove) +is more significant. ::: @@ -412,7 +417,7 @@ capabilities. ### [OPTIONAL] GPU support -Format: `_`\[`G/g`\]X\[N\]\[`-`M\]\[`h`\] +Format: `_`\[`G/g`\]X\[N\[`-`M\[`h`\]\[`-`V\[`h`\]\]\]\] This extension provides more details on the specific GPU: @@ -420,7 +425,9 @@ This extension provides more details on the specific GPU: - vendor (X) - generation (N) - number (M) of processing units that are exposed (for pass-through) or assigned; see table below for vendor-specific terminology -- high-performance indicator (`h`) +- high-frequency indicator (`h`) for compute units +- amount of video memory (V) in GiB +- an indicator for high-bandwidth memory Note that the vendor letter X is mandatory, generation and processing units are optional. @@ -430,18 +437,34 @@ Note that the vendor letter X is mandatory, generation and processing units are | `A` | AMD | compute units (CUs) | | `I` | Intel | execution units (EUs) | -For nVidia, the generation N can be f=Fermi, k=Kepler, m=Maxwell, p=Pascal, v=Volta, t=turing, a=Ampere, l=Ada Lovelace, ..., -for AMD GCN-x=0.x, RDNA1=1, RDNA2=2, RDNA3=3, -for Intel Gen9=0.9, Xe(12.1)=1, ... +For nVidia, the generation N can be f=Fermi, k=Kepler, m=Maxwell, p=Pascal, v=Volta, t=turing, a=Ampere, l=Ada Lovelace, g=Grace Hopper, ..., +for AMD GCN-x=0.x, RDNA1=1, C/RDNA2=2, C/RDNA3=3, C/RDNA3.5=3.5, C/RDNA4=4, ... +for Intel Gen9=0.9, Xe(12.1/DG1)=1, Xe(12.2)=2, Arc(12.7/DG2)=3 ... (Note: This may need further work to properly reflect what's out there.) -The optional `h` suffix to the compute unit count indicates high-performance (e.g. high freq or special -high bandwidth gfx memory such as HBM); -`h` can be duplicated for even higher performance. +The optional `h` suffix to the compute unit count indicates high-frequency GPU compute units. +It is not normally recommended to use it except if there are several variants of cards within +a generation of GPUs and with similar number of SMs/CUs/EUs. +In case there are even more than two variants, the letter `h` can be duplicated for even +higher frquencies. -Example: `SCS-16V-64-500s_GNa-14h` -This flavor has a pass-through GPU nVidia Ampere with 14 SMs and either high-bandwidth memory or specially high frequencies. -Looking through GPU specs you could guess it's 1/4 of an A30. +Please note that there are GPUs from one generation and vendor that have vastly different sizes +(or different fractions are being passed to an instance with multi-instance-GPUs). The number +M allows to differentiate between them and have an indicator of the compute capability and +parallelism. M can not at all be compared between different generations let alone different +vendors. + +The amount of video memory dedicated to the instance can be indicated by V (in binary +Gigabytes). This number needs to be an integer - fractional memory sizes must be rounded +down. An optional `h` can be used to indicate high bandwidth memory (such as HBM2+) with +bandwidths well above 1GiB/s. + +Example: `SCS-16V-64-500s_GNa-14-6h` +This flavor has a pass-through GPU nVidia Ampere with 14 SMs and 6 GiB of high-bandwidth video +memory. Looking through GPU specs you could guess it's 1/4 of an A30. + +We have a table with common GPUs in the +[implementation hints for this standard](scs-0100-w1-flavor-naming-implementation-testing.md) ### [OPTIONAL] Infiniband @@ -485,14 +508,14 @@ an image is considered broken by the SCS team. ## Proposal Examples -| Example | Decoding | -| ------------------------- | ---------------------------------------------------------------------------------------------- | -| SCS-2C-4-10n | 2 dedicated cores (x86-64), 4GiB RAM, 10GB network disk | -| SCS-8Ti-32-50p_i1 | 8 dedicated hyperthreads (insecure), Skylake, 32GiB RAM, 50GB local NVMe | -| SCS-1L-1u-5 | 1 vCPU (heavily oversubscribed), 1GiB Ram (no ECC), 5GB disk (unspecific) | -| SCS-16T-64-200s_GNa-64_ib | 16 dedicated threads, 64GiB RAM, 200GB local SSD, Infiniband, 64 Passthrough nVidia Ampere SMs | -| SCS-4C-16-2x200p_a1 | 4 dedicated Arm64 cores (A76 class), 16GiB RAM, 2x200GB local NVMe drives | -| SCS-1V-0.5 | 1 vCPU, 0.5GiB RAM, no disk (boot from cinder volume) | +| Example | Decoding | +| ------------------------------ | ---------------------------------------------------------------------------------------------- | +| `SCS-2C-4-10n` | 2 dedicated cores (x86-64), 4GiB RAM, 10GB network disk | +| `SCS-8Ti-32-50p_i1` | 8 dedicated hyperthreads (insecure), Skylake, 32GiB RAM, 50GB local NVMe | +| `SCS-1L-1u-5` | 1 vCPU (heavily oversubscribed), 1GiB Ram (no ECC), 5GB disk (unspecific) | +| `SCS-16T-64-200s_GNa-72-24_ib` | 16 dedicated threads, 64GiB RAM, 200GB local SSD, Infiniband, 72 Passthrough nVidia Ampere SMs | +| `SCS-4C-16-2x200p_a1` | 4 dedicated Arm64 cores (A76 class), 16GiB RAM, 2x200GB local NVMe drives | +| `SCS-1V-0.5` | 1 vCPU, 0.5GiB RAM, no disk (boot from cinder volume) | ## Previous standard versions @@ -541,7 +564,7 @@ However, we have been reaching out to the OpenStack Public Cloud SIG and the ALA members to seek further alignment. Getting upstream OpenStack support for flavor aliases would provide more flexibility -and ease migrations between providers, also providers that don't offer the SCS- +and ease migrations between providers, also providers that don't offer the `SCS-` flavors. We also would like to see upstream `extra_specs` standardizing the discoverability of some diff --git a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md index 5f179d2ad..868215476 100644 --- a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md +++ b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md @@ -2,7 +2,7 @@ title: "SCS Flavor Naming Standard: Implementation and Testing Notes" type: Supplement track: IaaS -status: Proposal +status: Draft supplements: - scs-0100-v1-flavor-naming.md - scs-0100-v2-flavor-naming.md @@ -15,36 +15,139 @@ The three major versions of the standard that exist so far are very similar, and Therefore, the procedures needed to implement or test them are very similar as well. Yet, this document will only cover v3, because v1 and v2 are already obsolete by the time of writing. -## Implementation Notes +## Implementation notes Every flavor whose name starts with `SCS-` must conform with the naming scheme laid down in the standard. -### Operational Tooling +### Operational tooling -#### Syntax Check +#### Syntax check The [test suite](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/flavor-naming) comes with a handy [command-line utility](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/flavor-naming/cli.py) -that can be used to validate flavor names, to -interactively construct a flavor name via a questionnaire, and to generate prose descriptions for given -flavor names. See the -[README](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/flavor-naming/README.md) +that can be used to validate flavor names, to interactively construct a flavor name +via a questionnaire, and to generate prose descriptions for given flavor names. +See the [README](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/flavor-naming/README.md) for more details. The functionality of this script is also (partially) exposed via the web page -[https://flavors.scs.community/](https://flavors.scs.community/). +[https://flavors.scs.community/](https://flavors.scs.community/), which can both +parse SCS flavors names as well as generate them. With the OpenStack tooling (`python3-openstackclient`, `OS_CLOUD`) in place, you can call `cli.py -v parse v3 $(openstack flavor list -f value -c Name)` to get a report on the syntax compliance of the flavor names of the cloud environment. -#### Flavor Creation +#### Flavor creation -The [OpenStack Flavor Manager](https://github.com/osism/openstack-flavor-manager) will create a whole set -of flavors in one go, given a YAML description of this set. +The [OpenStack Flavor Manager from OSISM](https://github.com/osism/openstack-flavor-manager) +will create a whole set of flavors in one go. +To that end, it provides different options: either the standard mandatory and +possibly recommended flavors can be created, or the user can set a file containing his flavors. -## Automated Tests +### GPU table + +The most commonly used datacenter GPUs are listed here, showing what GPUs (or partitions +of a GPU) result in what GPU part of the flavor name. + +#### Nvidia (`N`) + +We show the most popular recent generations here. Older one are of course possible as well. + +##### Ampere (`a`) + +One Streaming Multiprocessor on Ampere has 64 (A30, A100) or 128 Cuda Cores (A10, A40). + +GPUs without MIG (one SM has 128 Cuda Cores and 4 Tensor Cores): + +| Nvidia GPU | Tensor C | Cuda Cores | SMs | VRAM | SCS name piece | +|------------|----------|------------|-----|-----------|----------------| +| A10 | 288 | 9216 | 72 | 24G GDDR6 | `GNa-72-24` | +| A40 | 336 | 10752 | 84 | 48G GDDR6 | `GNa-84-48` | + +GPUs with Multi-Instance-GPU (MIG), where GPUs can be partitioned and the partitions handed +out as as pass-through PCIe devices to instances. One SM corresponds to 64 Cuda Cores and +4 Tensor Cores. + +| Nvidia GPU | Fraction | Tensor C | Cuda Cores | SMs | VRAM | SCS GPU name | +|------------|----------|----------|------------|-----|-----------|----------------| +| A30 | 1/1 | 224 | 3584 | 56 | 24G HBM2 | `GNa-56-24` | +| A30 | 1/2 | 112 | 1792 | 28 | 12G HBM2 | `GNa-28-12` | +| A30 | 1/4 | 56 | 896 | 14 | 6G HBM2 | `GNa-14-6` | +| A30X | 1/1 | 224 | 3584 | 56 | 24G HBM2e | `GNa-56h-24h` | +| A100 | 1/1 | 432 | 6912 | 108 | 80G HBM2e | `GNa-108h-80h` | +| A100 | 1/2 | 216 | 3456 | 54 | 40G HBM2e | `GNa-54h-40h` | +| A100 | 1/4 | 108 | 1728 | 27 | 20G HBM2e | `GNa-27h-20h` | +| A100 | 1/7 | 60+ | 960+ | 15+| 10G HBM2e | `GNa-15h-10h`+ | +| A100X | 1/1 | 432 | 6912 | 108 | 80G HBM2e | `GNa-108-80h` | + +[+] The precise numbers for the 1/7 MIG configurations are not known by the author of +this document and need validation. + +##### Ada Lovelave (`l`) + +No MIG support, 128 Cuda Cores and 4 Tensor Cores per SM. + +| Nvidia GPU | Tensor C | Cuda Cores | SMs | VRAM | SCS name piece | +|------------|----------|------------|-----|-----------|----------------| +| L4 | 232 | 7424 | 58 | 24G GDDR6 | `GNl-58-24` | +| L40 | 568 | 18176 | 142 | 48G GDDR6 | `GNl-142-48` | +| L40G | 568 | 18176 | 142 | 48G GDDR6 | `GNl-142h-48` | +| L40S | 568 | 18176 | 142 | 48G GDDR6 | `GNl-142hh-48` | + +##### Grace Hopper (`g`) + +These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM. + +| Nvidia GPU | Fraction | Tensor C | Cuda Cores | SMs | VRAM | SCS GPU name | +|------------|----------|----------|------------|-----|------------|----------------| +| H100 | 1/1 | 528 | 16896 | 132 | 80G HBM3 | `GNg-132-80h` | +| H100 | 1/2 | 264 | 8448 | 66 | 40G HBM3 | `GNg-66-40h` | +| H100 | 1/4 | 132 | 4224 | 33 | 20G HBM3 | `GNg-33-20h` | +| H100 | 1/7 | 72+ | 2304+ | 18+| 10G HBM3 | `GNg-18-10h`+ | +| H200 | 1/1 | 528 | 16896 | 132 | 141G HBM3e | `GNg-132-141h` | +| H200 | 1/2 | 264 | 16896 | 66 | 70G HBM3e | `GNg-66-70h` | +| ... | + +[+] The precise numbers for the 1/7 MIG configurations are not known by the author of +this document and need validation. + +#### AMD Radeon (`A`) + +##### CDNA 2 (`2`) + +One CU contains 64 Stream Processors. + +| AMD Instinct| Stream Proc | CUs | VRAM | SCS name piece | +|-------------|-------------|-----|------------|----------------| +| Inst MI210 | 6656 | 104 | 64G HBM2e | `GA2-104-64h` | +| Inst MI250 | 13312 | 208 | 128G HBM2e | `GA2-208-128h` | +| Inst MI250X | 14080 | 229 | 128G HBM2e | `GA2-220-128h` | + +##### CDNA 3 (`3`) + +SRIOV partitioning is possible, resulting in pass-through for +up to 8 partitions, somewhat similar to Nvidia MIG. 4 Tensor +Cores and 64 Stream Processors per CU. + +| AMD GPU | Tensor C | Stream Proc | CUs | VRAM | SCS name piece | +|-------------|----------|-------------|-----|------------|----------------| +| Inst MI300X | 1216 | 19456 | 304 | 192G HBM3 | `GA3-304-192h` | +| Inst MI325X | 1216 | 19456 | 304 | 288G HBM3 | `GA3-304-288h` | + +#### intel Xe (`I`) + +##### Xe-HPC (Ponte Vecchio) (`3`) + +1 EU corresponds to one Tensor Core and contains 128 Shading Units. + +| intel DC GPU | Tensor C | Shading U | EUs | VRAM | SCS name part | +|--------------|----------|-----------|-----|------------|----------------| +| Max 1100 | 56 | 7168 | 56 | 48G HBM2e | `GI3-56-48h` | +| Max 1550 | 128 | 16384 | 128 | 128G HBM2e | `GI3-128-128h` | + +## Automated tests ### Errors @@ -68,6 +171,6 @@ The script [`flavor-names-openstack.py`](https://github.com/SovereignCloudStack/ talks to the OpenStack API of the cloud specified by the `OS_CLOUD` environment and queries properties and checks the names for standards compliance. -## Manual Tests +## Manual tests To be determined. diff --git a/Standards/scs-0101-v1-entropy.md b/Standards/scs-0101-v1-entropy.md index 7b9a10744..2b719079f 100644 --- a/Standards/scs-0101-v1-entropy.md +++ b/Standards/scs-0101-v1-entropy.md @@ -52,7 +52,7 @@ a HRNG, they are not treated as such by the kernel, i.e., they _do not_ appear as `/dev/hwrng`! The Linux kernel combines multiple sources of entropy into a pool. To this -end, it will use all of the sources discussed so far with one exception: +end, it will use all the sources discussed so far with one exception: the HRNG must be fed into the pool (if so desired) via the daemon `rngd`. The kernel converts the entropy from the pool into cryptographically secure random numbers that appear under `/dev/random` and `/dev/urandom`. @@ -78,7 +78,7 @@ be used to feed it into the kernel's entropy pool. On a side note, the kernel exposes available HRNGs via the special directory `/sys/devices/virtual/misc/hw_random`. In particular, the -file `rng_available` lists availabe HRNGs while the file `rng_current` +file `rng_available` lists available HRNGs while the file `rng_current` contains the HRNG currently used. In summary, with current kernels and CPUs entropy in virtual instances diff --git a/Standards/scs-0101-w1-entropy-implementation-testing.md b/Standards/scs-0101-w1-entropy-implementation-testing.md index e770f0ab8..19e1f43dc 100644 --- a/Standards/scs-0101-w1-entropy-implementation-testing.md +++ b/Standards/scs-0101-w1-entropy-implementation-testing.md @@ -2,23 +2,25 @@ title: "SCS Entropy: Implementation and Testing Notes" type: Supplement track: IaaS -status: Proposal +status: Draft supplements: - scs-0101-v1-entropy.md --- -## Implementation +## Implementation notes -We presume that almost nothing has to be done (or indeed can be done), as -long as the CPUs and VM images are reasonably recent; only the flavor and -image attributes have to be set: +With reasonably recent hardware—x86 CPU with RDRAND/RDSEED (Intel from 2012, +AMD from 2015) or ARM CPU with FEAT_RNG or FEAT_RNG_TRAP—and recent VM image—Linux +kernel 5.18 or higher—, there is (almost) nothing to be done. -- flavor: `hw_rng:allowed=True` , -- image: `hw_rng_model: virtio` . +Only the flavor and image attributes required by the standard have to be set: -## Automated Tests +- flavor extra_spec: `hw_rng:allowed=True` , +- image property: `hw_rng_model: virtio` . -### Images Sample +## Automated tests + +### Images sample Some checks need to be performed on a live instance. For these checks, it is necessary to choose a sample of VM images to test on. @@ -59,6 +61,6 @@ as ensured by the image metadata standard. The script [`entropy-check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/entropy/entropy-check.py) connects to OpenStack and performs the checks described in this section. -## Manual Tests +## Manual tests None. diff --git a/Standards/scs-0102-v1-image-metadata.md b/Standards/scs-0102-v1-image-metadata.md index 907da3751..18d42adf7 100644 --- a/Standards/scs-0102-v1-image-metadata.md +++ b/Standards/scs-0102-v1-image-metadata.md @@ -1,5 +1,5 @@ --- -title: SCS Image Metadata Standard +title: SCS Image Metadata type: Standard stabilized_at: 2022-10-31 status: Stable @@ -16,7 +16,7 @@ description: | ## Motivation Many clouds offer standard Operating System images for their users' convenience. -To make them really useful, they should contain meta data (properties) to allow +To make them really useful, they should contain metadata (properties) to allow users to understand what they can expect using these images. The specification is targeting images that are managed by the service provider, @@ -53,7 +53,7 @@ in the [OpenStack Image documentation](https://docs.openstack.org/glance/latest/ The following properties are considered mandatory: - `architecture`, `hypervisor_type` -- `min_disk_size` (in GiB), `min_ram` (in MiB) +- `min_disk` (in GiB), `min_ram` (in MiB) - `os_version`, `os_distro` - `hw_rng_model`, `hw_disk_bus` (`scsi` recommended, and then setting `hw_scsi_model` is also recommended) @@ -78,7 +78,7 @@ level). Technically, the thus updated image is a new image and will thus carry a new UUID. It is recommended that the old image gets renamed (e.g. build date or patch level attached) -and hidden (`os_hidden=true`), but remains accessible via its (unchanged) UUID for some +and hidden (`os_hidden=True`), but remains accessible via its (unchanged) UUID for some time. The update handling by the provider is described via the properties `replace_frequency`, @@ -119,7 +119,7 @@ the issue becomes public and a tested fix is available as maintenance update fro distribution_. A value of 0 indicates a best-effort approach without firm SLAs; the field not being present indicates no commitment. A value of 48 would indicate that the provider commits to a new image within 48hrs. A critical issue is defined as a security vulnerability -with a CVSS score of 9.0 or higher that affects a package that is included in the image. +with a CVSS score of 9.0 or higher that affects software that is included in the image. The `provided_until` field is supposed to contain a date in `YYYY-MM-DD` format that indicates until when an image under this name will be provided and (according to the @@ -142,7 +142,10 @@ by its UUID. Note that the old images must be hidden from the image catalogue or renamed (or both) to avoid failing referencing by name. Note that `last-N` may be limited by the `provided_until` -date. +date. We recommend providers that keep old images according to the advertized `uuid_validity` +to hide older images (setting the `os_hidden` property to `True`). If the outdated images must +remain visible, the recommendation is to rename the images by attaching a datestamp in the +format " `YYYYMMDD`" to the name where the date must reflect the `build_date` of the image. The three properties `uuid_validity`, `provided_until` and `replace_frequency` are mandatory; the field `hotfix_hours` is optional. @@ -164,13 +167,13 @@ The provider makes an effort to replace images upon critical security issues out - Mandatory: `image_source` needs to be a URL to point to a place from which the image can be downloaded. (Note: This may be set to the string "private" to indicate that the image can not be freely downloaded.) -- Mandatory: `image_description` needs to be an URL (or text) with release notes and other human readable +- Mandatory: `image_description` needs to be a URL (or text) with release notes and other human-readable data about the image. - Recommended _tag_: `managed_by_VENDOR` Note that for most images that come straight from an upstream source, `image_description` should point -to a an upstream web page where these images are described. If download links are available as well +to an upstream web page where these images are described. If download links are available as well on that page, `image_source` can point to the same page, otherwise a more direct link to the image should be used, e.g. directly linking the `.qcow2` or `.img` file. If providers have their own image building machinery or do some post-processing on top of @@ -187,7 +190,7 @@ upstream images, they should point to the place where they document and offer th the patch status. - Mandatory: `image_original_user` is the default login user for the operating system which can connect to the image via the injected SSH key or provided password. (This can be set to `none` if no default - user name exists for the operating system.) + username exists for the operating system.) - Optional: `patchlevel` can be set to an operating specific patch level that describes the patch status — typically we would expect the `image_build_date` to be sufficient. @@ -208,10 +211,10 @@ might not use any of these properties, except maybe `maintained_until`. Note tha Windows images would typically require `license_included`, `subscription_included`. A boolean property that is not present is considered to be `false`. -- Optional: `license_included` (boolean) indicates whether or not the flavor fee +- Optional: `license_included` (boolean) indicates whether the flavor fee includes the licenses required to use this image. This field is mandatory for images that contain software that requires commercial licenses. -- Optional: `license_required` (boolean) indicates whether or not a customer must bring +- Optional: `license_required` (boolean) indicates whether a customer must bring its own license to be license compliant. This can not be true at the same time as the previous setting. This field is mandatory IF customers need to bring their own license to use the image. diff --git a/Standards/scs-0102-w1-image-metadata-implementation-testing.md b/Standards/scs-0102-w1-image-metadata-implementation-testing.md new file mode 100644 index 000000000..b2d9f5b75 --- /dev/null +++ b/Standards/scs-0102-w1-image-metadata-implementation-testing.md @@ -0,0 +1,32 @@ +--- +title: "SCS Image Metadata: Implementation and Testing Notes" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-0102-v1-image-metadata.md +--- + + +## Implementation notes + +The [OpenStack Image Manager from OSISM](https://github.com/osism/openstack-image-manager) +will create a set of images from a "spec file" provided by the user, which can also set the required properties +for these images. + +## Automated tests + +### Images sample + +Some checks need to be performed on a live instance. All publicly available images on this instance +will be checked for either only the mandatory properties or possibly also the recommended ones. +Additionally, a user can also decide to test their private images, although this isn't a necessity. + +### Implementation + +The script [`image-md-check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/image-metadata/image-md-check.py) +connects to OpenStack and performs the checks described in this section. + +## Manual tests + +None. diff --git a/Standards/scs-0103-v1-standard-flavors.md b/Standards/scs-0103-v1-standard-flavors.md index 46c4b7c7c..cee9282f2 100644 --- a/Standards/scs-0103-v1-standard-flavors.md +++ b/Standards/scs-0103-v1-standard-flavors.md @@ -14,6 +14,19 @@ description: | ## Introduction +This is v1.1 of the standard, which lifts the following restriction regarding the property `scs:name-vN`: +this property may now be used on any flavor, rather than standard flavors only. In addition, the "vN" is +now interpreted as "name variant N" instead of "version N of the naming standard". Note that this change +indeed preserves compliance, i.e., compliance with v1.0 implies compliance with v1.1. + +## Terminology + +extra_specs + Additional properties on an OpenStack flavor, see + [OpenStack Nova user documentation](https://docs.openstack.org/nova/2024.1/user/flavors.html#extra-specs) + and + [OpenStack Nova configuration documentation](https://docs.openstack.org/nova/2024.1/configuration/extra-specs.html). + ## Motivation In OpenStack environments there is a need to define different flavors for instances. @@ -23,19 +36,19 @@ OpenStack providers thus typically offer a large selection of flavors. While flavors can be discovered (`openstack flavor list`), it is helpful for users (DevOps teams), to have a guaranteed set of flavors available on all SCS clouds, so these need not be discovered. -## Properties (extra specs) +## Properties (extra_specs) -The following extra specs are recognized, together with the respective semantics: +The following extra_specs are recognized, together with the respective semantics: -- `scs:name-vN=NAME` (where `N` is `1` or `2`, and `NAME` is some string) means that the - flavor is one of the - standard SCS flavors, and the requirements of Section "Standard SCS flavors" below apply. +- `scs:name-vN=NAME` (where `N` is a positive integer, and `NAME` is some string) means that + `NAME` is a valid name for this flavor according to any major version of the [SCS standard on + flavor naming](https://docs.scs.community/standards/iaas/scs-0100). - `scs:cpu-type=shared-core` means that _at least 20% of a core in >99% of the time_, measured over the course of one month (1% is 7,2 h/month). The `cpu-type=shared-core` corresponds to the `V` cpu modifier in the [flavor-naming spec](./scs-0100-v3-flavor-naming.md), other options are `crowded-core` (`L`), `dedicated-thread` (`T`) and `dedicated-core` (`C`). -- `scs:diskN-type=ssd` (where `N` is a nonnegative integer, usually `0`) means that the - root disk `N` must support 1000 _sequential_ IOPS per VM and it must be equipped with +- `scs:diskN-type=ssd` (where `N` is a non-negative integer, usually `0`) means that the + root disk `N` must support 1000 _sequential_ IOPS per VM, and it must be equipped with power-loss protection; see [scs-0110-v1-ssd-flavors](./scs-0110-v1-ssd-flavors.md). The `disk`N`-type=ssd` setting corresponds to the `s` disk modifier, other options are `nvme` (`p`), `hdd` (`h`) and `network` (`n`). Only flavors without disk and @@ -43,6 +56,24 @@ The following extra specs are recognized, together with the respective semantics Whenever ANY of these are present on ANY flavor, the corresponding semantics must be satisfied. +The extra_spec `scs:name-vN` is to be interpreted as "name variant N". This name scheme is designed to be +backwards compatible with v1.0 of this standard, where `scs:name-vN` is interpreted as +"name according to naming standard vN". We abandon this former interpretation for two reasons: + +1. the naming standards admit multiple (even many) names for the same flavor, and we want to provide a means + of advertising more than one of them (said standards recommend using two: a short one and a long one), +2. the same flavor name may be valid according to multiple versions at the same time, which would lead to + a pollution of the extra_specs with redundant properties; for instance, the name + `SCS-4V-16` is valid for both [scs-0100-v2](scs-0100-v2-flavor-naming.md) and + [scs-0100-v3](scs-0100-v3-flavor-naming.md), and, since it does not use any extension, it will be valid + for any future version that only changes the extensions, such as the GPU vendor and architecture. + +Note that it is not required to use consecutive numbers to number the name variants. +This way, it becomes easier to remove a single variant (no "closing the gap" required). + +If extra_specs of the form `scs:name-vN` are used to specify SCS flavor names, it is RECOMMENDED to include +names for the latest stable major version of the standard on flavor naming. + ## Standard SCS flavors Following are flavors that must exist on standard SCS clouds (x86-64). @@ -127,14 +158,19 @@ instance life cycle.) ## Conformance Tests -The script `flavors-openstack.py` will read the lists of mandatory and recommended flavors +The script [`flavors-openstack.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/standard-flavors/flavors-openstack.py) +will read the lists of mandatory and recommended flavors from a yaml file provided as command-line argument, connect to an OpenStack installation, -and check whether the flavors are present and their extra specs are correct. Missing -flavors will be reported on various logging channels: error for mandatory, info for -recommended flavors. Incorrect extra specs will be reported as error in any case. +and check whether the flavors are present and their extra_specs are correct. + +Missing flavors will be reported on various logging channels: error for mandatory, warning for +recommended flavors. Incorrect extra_specs will be reported as error in any case. The return code will be non-zero if the test could not be performed or if any error was reported. +The script does not check whether a name given via the extra_spec `scs:name-vN` is indeed valid according +to any major version of the SCS standard on flavor naming. + ## Operational tooling The [openstack-flavor-manager](https://github.com/osism/openstack-flavor-manager) is able to diff --git a/Standards/scs-0104-v1-standard-images.md b/Standards/scs-0104-v1-standard-images.md index 220add31b..06e87a8f6 100644 --- a/Standards/scs-0104-v1-standard-images.md +++ b/Standards/scs-0104-v1-standard-images.md @@ -1,6 +1,6 @@ --- title: SCS Standard Images -type: Standard +type: Procedural status: Stable stabilized_at: 2024-02-21 track: IaaS @@ -40,14 +40,16 @@ The YAML file MUST contain the key `images`, whose value is a list of objects. E | Key | Type | Description | Example | | --------- | -------------------- | ---------------------------------------------------- | ---------------------------------------------------- | | `name` | String | Name of the image | `"Debian 12"` | -| `status` | String | _optional_: either `mandatory` or `recommended` | `"recommended"` | +| `status` | String (_optional_) | `optional` (default), `mandatory` or `recommended` | `"recommended"` | | `source` | String | Prefix of the source URL | `"https://cloud.debian.org/images/cloud/bookworm/"` | | | OR: List of strings | multiple possible prefixes | (see full example below) | The meaning of this specification is as follows. -1. If the status is `mandatory`, then the image MUST be present. -2. If an image by the name given is present, then its `image_source` property +1. If the status is `mandatory`, then an image with the name given via `name` MUST be present. +2. If the status is `recommended`, then an image with the name given via `name` SHOULD be present. +3. Regardless of the status: + if an image with the name given is present, then its `image_source` property (as described in the [Image Metadata standard](scs-0102-v1-image-metadata.md)) MUST start with one of the prefixes given via `source`. @@ -57,7 +59,7 @@ The meaning of this specification is as follows. | ------------- | -------------------- | ---------------------------------------------------- | ------------------------------------------------ | | `name` | String | Name of the class of images | `"ubuntu-2204-kube"` | | `name_scheme` | String (regex) | Regular expression for the image name | `"ubuntu-2204-kube-v[0-9].[0-9]+(.[0-9]+)?"` | -| `status` | String | _optional_: either `mandatory` or `recommended` | `"recommended"` | +| `status` | String (_optional_) | `optional` (default), `mandatory` or `recommended` | `"recommended"` | | `source` | String | Prefix of the source URL | `"https://swift.services.a.regiocloud.tech"` | | | OR: List of strings | multiple possible prefixes | (see full example below) | @@ -65,7 +67,10 @@ The meaning of this specification is as follows: 1. If the status is `mandatory`, then at least one image MUST be present whose name matches the regular expression given via `name_scheme`. -2. For any image whose name matches the regular expression given via `name_scheme`, +2. If the status is `recommended`, then at least one image SHOULD be present whose name + matches the regular expression given via `name_scheme`. +3. Regardless of the status: + for any image whose name matches the regular expression given via `name_scheme`, its `image_source` property MUST start with one of the prefixes given via `source`. ## Full example @@ -78,7 +83,7 @@ images: - https://cloud-images.ubuntu.com/jammy/ status: mandatory - name: "ubuntu-capi-image" - name_scheme: "ubuntu-capi-image-v[0-9].[0-9]+(.[0-9]+)?" + name_scheme: "ubuntu-capi-image v[0-9]\\.[0-9]+(\\.[0-9]+)?" source: https://swift.services.a.regiocloud.tech/swift/v1/AUTH_b182637428444b9aa302bb8d5a5a418c/openstack-k8s-capi-images/ubuntu-2204-kube status: recommended - name: "Ubuntu 20.04" @@ -107,7 +112,7 @@ The YAML file is generally located under [https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/). Any change that could render existing installations non-conformant (i.e., when new -specifications are added, when the name scheme of a specification is changed so as to +specifications are added, when the name scheme of a specification is changed to match more names than before, when the status of an existing specification changes to mandatory, or when some source prefix is removed) requires a new YAML file to be created. As a consequence, any currently valid certificates stay valid; the change can only take diff --git a/Standards/scs-0104-w1-standard-images-implementation.md b/Standards/scs-0104-w1-standard-images-implementation.md new file mode 100644 index 000000000..9a18a9056 --- /dev/null +++ b/Standards/scs-0104-w1-standard-images-implementation.md @@ -0,0 +1,61 @@ +--- +title: "SCS Standard Images: Implementation Notes" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-0104-v1-standard-images.md +--- + +## Introduction + +The SCS standard on standard images does not in itself lay down what images are actually +required or recommended; rather it specifies the format of a YAML file that in turn serves +said purpose. The particular YAML file that an implementer (a cloud service provider or operator) +has to comply with is given in the respective version of the certificate scope "SCS-compatible IaaS" +as a parameter to the standard. This document is intended to give implementers a +step-by-step guide on how to comply with the SCS certificate scope. + +## Step-by-step walkthrough + +### Option A: pragmatic + +Run the test script on your environment and check the error messages :) + +1. Check out the [standards repository](https://github.com/SovereignCloudStack/standards). + + ```shell + git clone https://github.com/SovereignCloudStack/standards.git + cd standards + ``` + +2. Install requirements: + + ```shell + python3 -m venv .venv && source .venv/bin/activate + pip install -r requirements.txt + ``` + +3. Make sure that your `OS_CLOUD` environment variable is set. +4. Run the main check script: + + ```shell + python3 ./Tests/scs-compliance-check.py ./Tests/scs-compatible-iaas.yaml -t standard-images-check \ + -s $OS_CLOUD -a os_cloud=$OS_CLOUD -o report.yaml -C + ``` + +5. Inspect console output (stderr) for error messages. + +### Option B: principled + +1. Find your intended version of the certificate scope in the [overview table](https://docs.scs.community/standards/scs-compatible-iaas). It will most likely be one whose 'State' is 'Effective' or 'Stable'. +2. In (or below) the row labeled 'scs-0104: Standard images', you find a link to the YAML file that lists mandatory and recommended images, such as [scs-0104-v1-images.yaml](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/scs-0104-v1-images.yaml) for v4 of the certificate scope. +3. For each entry under `images`, ensure the following (either manually or by using the OpenStack Image Manager described in the section "Operational Tooling"): + - if the entry says `status: mandatory`, your environment MUST provide this image, i.e., an image whose name matches the `name_scheme` or (in absence of a name scheme) the `name`. + - every actual image in your environment _that matches the `name_scheme` or (in absence of a name scheme) the `name`_ has the correct `image_source` property: its value MUST start with one of the prefixes listed under `source`. + +## Operational Tooling + +The [openstack-image-manager](https://github.com/osism/openstack-image-manager) is able to +create all standard, mandatory SCS images for you given image definitions from a YAML file. +Please see [its documentation](https://docs.scs.community/docs/iaas/components/image-manager/) for details. diff --git a/Standards/scs-0110-v1-ssd-flavors.md b/Standards/scs-0110-v1-ssd-flavors.md index 8819b143a..2a8716743 100644 --- a/Standards/scs-0110-v1-ssd-flavors.md +++ b/Standards/scs-0110-v1-ssd-flavors.md @@ -62,7 +62,7 @@ requires write latencies in the range of a single-digit ms (or better). #### One-node etcd (backed by redundant storage) -If k8s uses only one control plane node, there will only be only one etcd node, +If k8s uses only one control plane node, there will only be one etcd node, avoiding timed out heartbeats. Single node control planes are typically not recommended for production workloads though. They are limited with respect to control plane performance, have a higher chance to fail (as a single node failure @@ -107,7 +107,7 @@ which is not typically critical if done within reasonable limits. This change however does not fully address the issue — occasional write latencies above 100ms will still cause failed heartbeats, just less often. -This change has been implemented in SCS's +This change has been implemented in the [k8s-cluster-api-provider](https://etcd.io/docs/v3.5/op-guide/hardware/#example-hardware-configurations) reference implementation: The heartbeat has been changed from 1/100ms (10/s) to 1/250ms (4/s) and the reelection timeout from 1s to 2.5s. @@ -145,7 +145,7 @@ written out. Recovery from such a scenario can range from smooth to impossible. In a multi-node cluster, this may not be as bad as it sounds — if only one node is affected by a disruption, the crashed node can be recovered by resyncing the data from other nodes. In practice an inconsistent state would be considered -too risky and it should be preferred to set up a fresh node to join the +too risky, and it should be preferred to set up a fresh node to join the existing etcd cluster. This would need to be implemented to make this option less risky. @@ -222,9 +222,9 @@ Disk IO QoS is not part of this spec but may be considered in another one. Live-migration with local storage is significantly more difficult than with networked storage: The contents of the local disks also need to be replicated over to the new host. Live-migration for these VMs may thus take significantly -longer or not be possible at all, depending the configuration from the provider. +longer or not be possible at all, depending on the configuration from the provider. Not supporting live-migration is OK for flavors with local disks according -to the flavor naming spec — a capability to indicate whether or not +to the flavor naming spec — a capability to indicate whether live-migration is supported will be subject to a flavor-metadata discoverability spec that is planned for the future. @@ -252,7 +252,7 @@ to solve the latency requirements for databases and etcd may emerge. When we standardize QoS features there, we may amend this standard with QoS recommendations or possibly requirements. -A future flavor metadata discoverability standard will indicate whether or not +A future flavor metadata discoverability standard will indicate whether these flavors can be live-migrated. A future VM metadata standard will allow users to request live-migration and/or cold migration or restart to be or to not be performed. diff --git a/Standards/scs-0111-v1-volume-type-decisions.md b/Standards/scs-0111-v1-volume-type-decisions.md index 9b4ebf08c..aaf3e522f 100644 --- a/Standards/scs-0111-v1-volume-type-decisions.md +++ b/Standards/scs-0111-v1-volume-type-decisions.md @@ -7,29 +7,29 @@ track: IaaS ## Introduction -Volumes in OpenStack are virtual drives. They are managed by the storage service Cinder, which abstracts creation and usage of many different storage backends. While it is possible to use a backend like lvm which can reside on the same host as the hypervisor, the SCS wants to make a more clear differentiation between volumes and the ephemeral storage of a virtual machine. For all SCS deployments we want to assume that volumes are always residing in a storage backend that is NOT on the same host as a hypervisor - in short terms: Volumes are network storage. Ephemeral storage on the other hand is the only storage residing on a compute host. It is created by creating a VM directly from an Image and is automatically los as soon as the VM cease to exist. Volumes on the other hand have to be created from Images and only after that can be used for VMs. They are persistent and will remain in the last state a VM has written on them before they cease to exit. Being persistent and not relying on the host where the VM resides, Volumes can easily be attached to another VM in case of a node outage and VMs be migrated way more easily, because only metadata and data in RAM has to be shifted to another host, accelerating any migration or evacuation of a VM. +Volumes in OpenStack are virtual drives. They are managed by the storage service Cinder, which abstracts creation and usage of many different storage backends. While it is possible to use a backend like lvm which can reside on the same host as the hypervisor, this decision record wants to make a more clear differentiation between volumes and the ephemeral storage of a virtual machine. For all SCS deployments we want to assume that volumes are always residing in a storage backend that is NOT on the same host as a hypervisor - in short terms: Volumes are network storage. Ephemeral storage on the other hand is the only storage residing on a compute host. It is created by creating a VM directly from an Image and is automatically lost as soon as the VM cease to exist. Volumes on the other hand have to be created from Images and only after that can be used for VMs. They are persistent and will remain in the last state a VM has written on them before they cease to exit. Being persistent and not relying on the host where the VM resides, Volumes can easily be attached to another VM in case of a node outage and VMs be migrated way more easily, because only metadata and data in RAM has to be shifted to another host, accelerating any migration or evacuation of a VM. -Volume Types are used to classify volumes and provide a basic decision for what kind of volume should be created. These volume types can sometimes very be backend-specific and it might be hard for a user to choose the most suitable volume type, if there is more than one default type. Nevertheless the most of configuration is done in the backends themself, so volume types only work as a rough classification. +Volume Types are used to classify volumes and provide a basic decision for what kind of volume should be created. These volume types can sometimes very be backend-specific, and it might be hard for a user to choose the most suitable volume type, if there is more than one default type. Nevertheless, most of the configuration is done in the backends themselves, so volume types only work as a rough classification. ## Motivation -We want to standardize a few varieties of volume types. While a user can choose simple things like size when creating a volume, Volume Types define a few broader aspects of volume. Encryption of volumes for example is solely decided by the volume type. And whether the volume will be replicated is a mix between definiton in the volume type and backend specific configuration, but it's visiblity can only be reached in the volume type. +We want to standardize a few varieties of volume types. While a user can choose simple things like size when creating a volume, Volume Types define a few broader aspects of volume. Encryption of volumes for example is solely decided by the volume type. And whether the volume will be replicated is a mix between definition in the volume type and backend specific configuration, but it's visibility can only be reached in the volume type. -In General: what the different volume types are capable of is highly dependend on both the used backend and the configurations of OpenStack. A few options are worth being at least recommended. +In General: what the different volume types are capable of is highly dependent on both the used backend and the configurations of OpenStack. A few options are worth being at least recommended. ## Design Considerations We want to have a discoverable Standard. So there should be no naming conventions as per request by operators. -This first decision will have impacts on upstream OpenStack development, as those things, that would be nice to discover, may not be currently dicoverable by users or not at all. +This first decision will have impacts on upstream OpenStack development, as those things, that would be nice to discover, may not be currently discoverable by users or not at all. -There are severel aspects of volume types, which will be discussed in the following: +There are several aspects of volume types, which will be discussed in the following: ### Options considered #### Encryption -Encryption for volumes is an option which has to be configured within the volume type. As an admin it is possible to set encryption-provider, key size, cipher and control location. As an admin it is also currently possible to see these configurations in a volume type with list and show commands. A user should not see these parameters in detail, but a boolean value that descibes whether encryption is used or not. Currently this is not possible in upstream OpenStack. +Encryption for volumes is an option which has to be configured within the volume type. As an admin it is possible to set encryption-provider, key size, cipher and control location. As an admin it is also currently possible to see these configurations in a volume type with list and show commands. A user should not see these parameters in detail, but a boolean value that describes whether encryption is used or not. Currently, this is not possible in upstream OpenStack. **Conclusion**: This is a solid aspect to be standardized. But it will need work on OpenStack, to have a boolean value presented to the users. @@ -41,7 +41,7 @@ OpenStack Cinder works with a lot of different backends. They all have some kind #### Availability Zones -Availability Zones are used in Nova and Cinder seperatly to provide an often also physical separation of compute hosts or storage nodes. This leads to two options to consider: +Availability Zones are used in Nova and Cinder separately to provide an often also physical separation of compute hosts or storage nodes. This leads to two options to consider: 1. Multiple Volume AZs: This might be used if there are different backends present in one IaaS structure. The different volume types are usually used for the different volume AZs. This makes migration between those AZs only be possible for administrators. @@ -49,24 +49,24 @@ Availability Zones are used in Nova and Cinder seperatly to provide an often als Another question is how many providers use one of these options or both. -**Conclusion**: The first part doesn't make much sense to standardize, as migration between the volume types can only be done by admins. However the second part might be noteable, but due to the variety of configuration options very hard to standardize. +**Conclusion**: The first part doesn't make much sense to standardize, as migration between the volume types can only be done by admins. However, the second part might be noteable, but due to the variety of configuration options very hard to standardize. #### Multiattach -It is possible in a few backends to attach a volume to multiple VMs. This has to be configured in the Volume Type and this information is also accessable for users. Nevertheless this option also needs a lot of work from users, as those types of volumes have to have a file system, that is capable of multiattach. Many providers don't provide multiattach. +It is possible in a few backends to attach a volume to multiple VMs. This has to be configured in the Volume Type and this information is also accessible for users. Nevertheless, this option also needs a lot of work from users, as those types of volumes have to have a file system, that is capable of multiattach. Many providers don't provide multiattach. **Conclusion**: It might be noteable, that this already is a discoverable option. #### Replication -Replication states, whether or not there are multiple replicas of a volume. Thus answers the question, whether the data could survive a node outage. Again there are different ways to achive replicated volumes. It can either be defined in the volume type and is discoverable also by normal users or it is configured in the backend. The last option is usually used with ceph for example. This makes it hard to discover, whether a volume is replicated or not. Another point is the number of replicas, that exist. +Replication states, whether there are multiple replicas of a volume. Thus answers the question, whether the data could survive a node outage. Again there are different ways to achieve replicated volumes. It can either be defined in the volume type and is discoverable also by normal users, or it is configured in the backend. The last option is usually used with ceph for example. This makes it hard to discover, whether a volume is replicated or not. Another point is the number of replicas, that exist. -**Conclusion**: Replication is a good option to be standardized. Whether this should be done as a boolean option or if the number of replicas is also something users need to know should still be discussed. Nevertheless due to the different options to configure replication this will be quite complex. +**Conclusion**: Replication is a good option to be standardized. Whether this should be done as a boolean option or if the number of replicas is also something users need to know should still be discussed. Nevertheless, due to the different options to configure replication this will be quite complex. #### QoS -Quality of Service parameters can be stated in a volume qos object. These objects can then be associated to a volume type (or directly to a volume as an admin only option). But this is optional and thus even good or very good volume QoS parameters that are aquired through hardware configuration and storage parameters, might go by unmentioned. -Furthermore the indirection makes it harder to discover the qos for a volume type. Only admins will see the associated qos ID and will have to take a closer look at the qos after discovering the volume type. PLUS: there can only be one qos association for one volume type. But a qos can be used for multiple volumes. +Quality of Service parameters can be stated in a volume qos object. These objects can then be associated to a volume type (or directly to a volume as an admin only option). But this is optional and thus even good or very good volume QoS parameters that are acquired through hardware configuration and storage parameters, might go by unmentioned. +Furthermore, the indirection makes it harder to discover the qos for a volume type. Only admins will see the associated qos ID and will have to take a closer look at the qos after discovering the volume type. PLUS: there can only be one qos association for one volume type. But a qos can be used for multiple volumes. **Conclusion**: The benefit of displaying qos parameters is clear, thus this option should be noted. But are volume qos objects widely used? If not, standardization process would be too much work. diff --git a/Standards/scs-0112-v1-sonic.md b/Standards/scs-0112-v1-sonic.md index 06ac03c69..73657154d 100644 --- a/Standards/scs-0112-v1-sonic.md +++ b/Standards/scs-0112-v1-sonic.md @@ -2,14 +2,14 @@ title: SONiC Support in SCS type: Decision Record status: Draft -track: Global +track: IaaS description: | SCSS-0112 outlines architectural decisions in SCS in regards to SONiC support and integration. --- ## Introduction -SONiC support in [SCS](https://scs.community) was considered within the context of [VP04 Networking](https://scs.community/tenders/lot4), sub-lot 1 SDN scalability. Different challenges and approaches to SDN scalability have been explored and more specifically those who require support in the underlay network. Using SONiC in the underlay can have benefits for SCS users by using a standardized OS for network devices and also having a clear path for network scalability when using SONiC. For this to work, we have to define how SONiC is used and supported architecturally in SCS. This document outlines the architectural decisions in regards to SONiC support and integration in SCS. +SONiC support in [SCS](https://scs.community) was considered within the context of [VP04 Networking](https://scs.community/tenders/lot4), sub-lot 1 SDN scalability. Different challenges and approaches to SDN scalability have been explored and more specifically those who require support in the underlay network. Using SONiC in the underlay can have benefits for SCS users by using a standardized OS for network devices and also having a clear path for network scalability when using SONiC. For this to work, we have to define how SONiC is used and supported architecturally in SCS. This document outlines the architectural decisions in regard to SONiC support and integration in SCS. ## Motivation @@ -19,7 +19,7 @@ In respect to SDN scalability improvements in Openstack and SCS, there are sever In many network designs for Openstack, configuration of the actual network hardware by Openstack Neutron service is required. The following network designs apply: -- VLANs. Uisng VLANs to segment tenant networks requires the network switch to be configured. This can be manual or dynamic configuration via the ML2 Neutron driver. +- VLANs. Using VLANs to segment tenant networks requires the network switch to be configured. This can be manual or dynamic configuration via the ML2 Neutron driver. - EVPN/VXLAN on the switch. In this use case, SONiC runs on leaf switches. Leafs terminate VXLAN endpoints and run BGP/EVPN for the control plane. Again, the ML2 Neutron driver is used to dynamically configure the network switch. The link between the switch and the service is regular VLAN. @@ -41,19 +41,19 @@ There are different ways SONiC support can be implemented in SCS, very similar t #### Option 1: SCS distribution of SONiC -With this approach, SCS will create it's own distribution of SONiC, similar to what Debian or Arch are for Linux. This distribution will be based on the SONiC community distribution, but will have SCS specific modules, which will be developed and maintained by SCS. SCS will contribute its code to dedicated SCS repositories and build its own SONiC images. The code can eventually be pushed upstream, but not as top priority. This approach will allow SCS to have a clear path for SONiC support and integration in SCS, but will also require SCS to maintain a distribution of SONiC, which is a significant effort. Upstream/downstream changes will have to be managed and maintained. However the advantage is that SCS will have full control over the distribution and can make changes as needed. Users will have to use the SCS distribution of SONiC, which will be based on the community distribution. If users already deploy community or enterprise SONiC, a migration path to SCS SONiC will be needed. +With this approach, SCS will create its own distribution of SONiC, similar to what Debian or Arch are for Linux. This distribution will be based on the SONiC community distribution, but will have SCS specific modules, which will be developed and maintained by SCS. SCS will contribute its code to dedicated SCS repositories and build its own SONiC images. The code can eventually be pushed upstream, but not as top priority. This approach will allow SCS to have a clear path for SONiC support and integration in SCS, but will also require SCS to maintain a distribution of SONiC, which is a significant effort. Upstream/downstream changes will have to be managed and maintained. However, the advantage is that SCS will have full control over the distribution and can make changes as needed. Users will have to use the SCS distribution of SONiC, which will be based on the community distribution. If users already deploy community or enterprise SONiC, a migration path to SCS SONiC will be needed. #### Option 2: SCS will support SONiC but will not change it -SCS supports enterprise ans community versions of SONiC but will not develop its own code for it. This will significantly limit the ability to develop new features for SDN, because all changes will be done in the tooling around SONiC and not in the OS itself. The advantages are that SCS will still improve SONiC support and will have minimal effort for this. The downside is that some features like OVN control plane for SONiC will not be possible. +SCS supports enterprise and community versions of SONiC but will not develop its own code for it. This will significantly limit the ability to develop new features for SDN, because all changes will be done in the tooling around SONiC and not in the OS itself. The advantages are that SCS will still improve SONiC support and will have minimal effort for this. The downside is that some features like OVN control plane for SONiC will not be possible. #### Option 3: SCS develops SCS-specific modules as add-on for any SONiC (Community or Enterprise) -In option 3, SCS will change SONiC by releasing its own modules for it. Those module can be provided as add-ons and installed on top of any version, community or enterprise. While compatability between the modules the SONiC releases will need to be maintained, there will be much broader support for SONiC and users will be able to pick and chose distributions based on their existing relationships and experience and use SCS independent of this. In cases where SCS provides contributions to core SONiC, those can be made in upstream Community repositories, so that the whole community including the propriatory vendors can adopt them eventually. +In option 3, SCS will change SONiC by releasing its own modules for it. Those module can be provided as add-ons and installed on top of any version, community or enterprise. While compatibility between the modules the SONiC releases will need to be maintained, there will be much broader support for SONiC and users will be able to pick and chose distributions based on their existing relationships and experience and use SCS independent of this. In cases where SCS provides contributions to core SONiC, those can be made in upstream Community repositories, so that the whole community including the propitiatory vendors can adopt them eventually. #### Option 4: SCS does not adopt SONiC at all -This option entails no dedicated effort on SCS's part in supporting SONiC network equipement for it's users and software stack. Users can still use SONiC from what is available by other projects or if they invest the effort themselves. This has several disadvantages: +This option entails no dedicated effort on SCS's part in supporting SONiC network equipment for its users and software stack. Users can still use SONiC from what is available by other projects or if they invest the effort themselves. This has several disadvantages: - SCS is not contributing to the SONiC community - the value for SCS by users who already use or plan to invest in SONiC is diminished @@ -76,7 +76,7 @@ Multiple vendor distributions. Expensive in general New tags appears on different periods, once 2 times per month, other 3 months between releases. -- adoption penetration - how many vendors use it? What type of venders (big, medium and large)? +- adoption penetration - how many vendors use it? What type of vendors (big, medium and large)? Good initial adoption: Microsoft, Target. Adoption requires time and money @@ -90,7 +90,7 @@ The SONiC community is healthy and growing, however progress is slower due to fa ## Decision -IaaS team recommends to use Option 3: SCS develops SCS-specific modules as add-on for any SONiC (Community or Enterprise). It has the best tradeoff between time and resource investment and benefits for the community. Adopting this strategy would allow SCS to be agile and quickly adopt SONiC, by providing users with clear path while allowing the freedom to chose different hardware and software vendors. SCS code can be packaged independently of each SONiC distribution and installed as add-on. Also SCS contributions to core SONiC will be done directly upstream, so that the whole community can benefit from them. +IaaS team recommends to use Option 3: SCS develops SCS-specific modules as add-on for any SONiC (Community or Enterprise). It has the best tradeoff between time and resource investment and benefits for the community. Adopting this strategy would allow SCS to be agile and quickly adopt SONiC, by providing users with clear path while allowing the freedom to choose different hardware and software vendors. SCS code can be packaged independently of each SONiC distribution and installed as add-on. Also, SCS contributions to core SONiC will be done directly upstream, so that the whole community can benefit from them. Work on hardware support in SONiC should be raised in upstream and SCS shouldn't make significant investments in this area. diff --git a/Standards/scs-0113-v1-security-groups-decision-record.md b/Standards/scs-0113-v1-security-groups-decision-record.md index 3b7c3c11c..dae926566 100644 --- a/Standards/scs-0113-v1-security-groups-decision-record.md +++ b/Standards/scs-0113-v1-security-groups-decision-record.md @@ -40,7 +40,7 @@ By design of OpenStack and when not changed, default rules in the default securi ### Reasons for and against a standard for security groups -Considering having most likely similiar security groups within different projects, it might make sense to standardize a few security groups for often used cases like ssh, http, https and maybe icmp. +Considering having most likely similar security groups within different projects, it might make sense to standardize a few security groups for often used cases like ssh, http, https and maybe icmp. What speaks for standardizing a certain set of security groups: 1. Having a set of correctly configured security groups could reduce misconfiguration from users @@ -53,7 +53,7 @@ What are the downsides of having a set of standardized security groups: 1. A bug or misconfiguration is a single point of failure for ALL customers 2. Users might apply the wrong security group to their port or VM because they lack the domain knowledge, unknowingly opening themselves to attacks 3. Users will not inspect such default security groups: this may result in applying a wrong group and opening traffic too much -4. the central authority managing the groups does not necessarily know the usecase of the user, the user/operator must know best what kind of security their workload needs. What is a necessary port for 99% of deployments might be a security disaster for my deployment +4. the central authority managing the groups does not necessarily know the use case of the user, the user/operator must know best what kind of security their workload needs. What is a necessary port for 99% of deployments might be a security disaster for my deployment 5. Providing default groups could have the effect of stopping customers to think about their specific security needs and instead just copying default groups and or rules This leads to a conclusion, that a set of default security groups is only more valuable than harmful for users: @@ -91,12 +91,12 @@ stack@devstack:~/devstack$ openstack default security group rule list ``` Those rules can be edited, which may pose a security risk for customers consuming the default security group. -This should be adressed as a pre-requirement [here](https://github.com/SovereignCloudStack/standards/issues/521). +This should be addressed as a pre-requirement [here](https://github.com/SovereignCloudStack/standards/issues/521). ### Option 1: operator usage of network rbac -The `network rbac` endpoint[^2] manages the possibitity to share and access certain network-specific resources such as security groups. -For admins it is possible to use this endpoint to share a security group with ALL projects within the the cloud including ALL projects of ALL domains: +The `network rbac` endpoint[^2] manages the possibility to share and access certain network-specific resources such as security groups. +For admins, it is possible to use this endpoint to share a security group with ALL projects within the cloud including ALL projects of ALL domains: ```bash stack@devstack:~/devstack$ openstack network rbac create --target-all-projects --action access_as_shared --type security_group group-for-everyone @@ -112,7 +112,7 @@ stack@devstack:~/devstack$ openstack network rbac create --target-all-projects - +-------------------+--------------------------------------+ ``` -This would fulfill our goal to grant access to predefined security groups for all projects and all groups recieved as shared do not count into the projects quota for security groups. +This would fulfill our goal to grant access to predefined security groups for all projects and all groups received as shared do not count into the projects quota for security groups. But there are a few downsides to this: 1. This should be strictly bound to the admin: no other user should be able to share security groups so to not confuse user. @@ -158,7 +158,7 @@ The biggest downside: As soon as a security group is shared, everyone from every Using and adhering the project scope of the security groups has the consequence, that: 1. either an admin has to set up security groups for each project -2. or the SCS project only provides a guide on how to setup and use some recommended security groups. +2. or the SCS project only provides a guide on how to set up and use some recommended security groups. As users are allowed to, will and should edit their security groups, there is no way to ensure, that a certain set of security groups with certain rules is always present in a project. So packing an extra burden on admins is unreasonable. @@ -174,7 +174,7 @@ That would include identifying what kind of network permission a single VM needs The default Security Group Rules should be standardized as a pre-requirement (Option 0). Using the `network rbac` endpoint (Option 1) would not solve the idea of having pre-defined and administrator audited Security Groups, because it is possible for any user to edit the rules of shared Security Groups. -Instead the project-scope of the Security Groups should by focused and a guide prepared, that gives insight about creating and using Security Groups with a few examples but with a clear security focus (Mix of Option 2 and 3). +Instead, the project-scope of the Security Groups should by focused and a guide prepared, that gives insight about creating and using Security Groups with a few examples but with a clear security focus (Mix of Option 2 and 3). ## Consequences diff --git a/Standards/scs-0114-v1-volume-type-standard.md b/Standards/scs-0114-v1-volume-type-standard.md index 630ee63ec..003db9a24 100644 --- a/Standards/scs-0114-v1-volume-type-standard.md +++ b/Standards/scs-0114-v1-volume-type-standard.md @@ -1,13 +1,14 @@ --- -title: Volume Type Standard +title: SCS Volume Types type: Standard -status: Draft -track: IaaS +status: Stable +stabilized_at: 2024-11-13 +track: IaaS --- ## Introduction -A volume is a virtual drive that is to be used by an instance (i. e., a virtual machine). With OpenStack, +A volume is a virtual drive that is to be used by an instance (i.e., a virtual machine). With OpenStack, each volume is created per a type that determines basic features of the volume as provided by the backend, such as encryption, replication, or quality of service. As of the writing of this document, presence or absence of these features can not be discovered with full certainty by non-privileged users via the OpenStack API. @@ -37,11 +38,11 @@ All considerations can be looked up in detail in the [Decision Record for the Vo ### Systematic Description of Volume Types -To test whether a deployment has volume types with certain aspects, the discoverability of the parameters in the volume type has to be given. As for the time this standard is created, there is no way for users to discover all aspects through OpenStack commands. Therefore the aspects, that are fulfilled within a volume type, should be stated in the beginning of the **description** of a volume type in the following manner: +To test whether a deployment has volume types with certain aspects, the discoverability of the parameters in the volume type has to be given. As for the time this standard is created, there is no way for users to discover all aspects through OpenStack commands. Therefore, the aspects, that are fulfilled within a volume type, should be stated in the beginning of the **description** of a volume type in the following manner: `[scs:aspect1, aspect2, ..., aspectN]...` -The mentioned aspects MUST be sorted alphebetically and every aspect should only be mentioned to the maximal amount of one. +The mentioned aspects MUST be sorted alphabetically and every aspect should only be mentioned to the maximal amount of one. ### Standardized Aspects @@ -93,7 +94,7 @@ openstack volume type show LUKS ### Replication -Replication states whether or not there are multiple replicas of a volume. Thus, it answers the question whether the data could survive a node outage. Unfortunately there are two ways replication can be achieved: +Replication states whether or not there are multiple replicas of a volume, i.e., whether the data could survive a node outage. Unfortunately, there are two ways replication can be achieved: 1. In the configuration of a volume type. It then is visible as extra_spec in the properties of a volume type. 2. Via the used backend. Ceph for example provides automatic replication, that does not need to be specified in the volume type. This is currently not visible for users. diff --git a/Standards/scs-0115-v1-default-rules-for-security-groups.md b/Standards/scs-0115-v1-default-rules-for-security-groups.md new file mode 100644 index 000000000..8809a2857 --- /dev/null +++ b/Standards/scs-0115-v1-default-rules-for-security-groups.md @@ -0,0 +1,141 @@ +--- +title: Default Rules for Security Groups +type: Standard +status: Stable +stabilized_at: 2024-11-13 +track: IaaS +--- + +## Introduction + +Security Groups in IaaS (OpenStack) are part of the network security mechanisms provided for the users. +They resemble sets of virtual firewall rules allowing specific network traffic at a port of a VM that connects it to a network. +They are project-bound, which means that all Security Groups that are newly created are only available to the project in which they were created. +This is also the case for the default Security Group that is created for each project as soon as the project itself is created. + +## Terminology + +Security Group (abbr. SG) + Set of ip table rules, used for tenant network security. + +Security Group Rule (abbr. SG Rule) + A single ip table rule, that is part of a Security Group. + +Administrator (abbr. Admin) + Operator = User of an OpenStack cloud with the admin role. + +### Default Security Groups, Custom Security Groups and default Security Group Rules + +To properly understand the concepts in this standard and avoid ambiguity, it is very important to distinguish between the following similar-sounding but different resources in the OpenStack Networking API: + +1. default Security Group +2. custom Security Group +3. default Security Group Rules + +A **default Security Group** is a predefined Security Group which is automatically created once a project is created and is specific to that project. +This Security Group is called "default" and there exists only one per project. +It will automatically be assigned to VMs that have no other Security Group explicitly assigned to it, when a VM is created. + +A **custom Security Group** is any additional Security Group created within a project separate from the *default Security Group* of the project. + +The **default Security Group Rules** may target the *default Security Groups* or the *custom Security Groups* or both. +They resemble a rule template and each Security Group will be initially created with rules according to this template. + +Although the rules of a Security Group may be adjusted freely after its creation, these default rule presets applied on initialization are predefined. +In recent OpenStack releases, both presets can be adjusted independently by administrators of the infrastructure. + +## Motivation + +The rules of a Security Group can be edited by default by any user with the member role within a project. +But when a Security Group is created it automatically incorporates a few Security Group rules that are configured as default rules. +Since the 2023.2 release, the default set of Security Group rules can be adjusted. +This functionality is only available to administrators[^1][^2]. +In combination with the OpenStack behavior that when a VM is created with no Security Group specified, the default Security Group of the project is automatically applied to the ports of the VM, +a user cannot be sure which firewall rules are applied to such a VM. + +Therefore, this standard proposes default Security Group rules that MUST be set by administrators to avoid divergence in default network security between different IaaS environments. + +[^1]: [Tracking of development for editable default SG rules](https://bugs.launchpad.net/neutron/+bug/1983053) +[^2]: [Release Notes of Neutron 2023.2](https://docs.openstack.org/releasenotes/neutron/2023.2.html) + +## Design Considerations + +Up to the 2023.1 release (Antelope) the default Security Group rules are defined in the OpenStack code. +We should not require changing this behavior through code changes in deployments. + +Beginning with the 2023.2 release (Bobcat) the default Security Group rules can now be edited by administrators through an API. +All rules that should be present as default in Security Groups have to be configured by admins through this API. + +There are two ways to approach a standard for the default rules of Security Groups. + +1. **There could be a set of rules standardized that has to be configured by admins.** + + OpenStack's default rules for Security Groups already provide a good baseline for port security, because they allow all egress traffic and for the default Security Group only ingress traffic from the same group. + + Allowing more rules would not benefit the security level, while reducing or limiting the existing rules would barely improve it. + Nevertheless, a standard could hold up the current security level against possible future release with more open default rules. + Changing the default rules will not change the rules of any existing Security Groups. + +2. **With the already strict OpenStack default rules users are required in most use cases to create and manage their own Security Groups.** + + This has the benefit that users need to explicitly think about the port security of their VMs and may be less likely to apply Security Groups which rules open up more ports than needed. + There is also a guide from the SCS project on how to set up a Security Group that also focuses on having a good port security[^3]. + + With the default OpenStack behavior of having already strict rules, which in most cases require users to manage their own Security Groups, this standard should mandate a middle way: + It should allow adjusting the default rules, but only to a stricter version. + +Allowing all outgoing traffic in the default rules in combination with blocking all incoming traffic would be strict enough from a security point of view. +And it would make it necessary for users to check and change the rules of their Security Group to a meaningful set. + +[^3]: [Guide for Security Groups](https://docs.scs.community/docs/iaas/guides/user-guide/security-groups/) + +### Further Annotations + +This standard should only be applied onto versions of OpenStack that implement the new endpoint for the default Security Group rules, which would only include 2023.2 or higher releases. + +It is possible to have different default Security Group rules for the default Security Group and custom Security Groups. +And it is arguable to have a more strict standard for default rules for the default Security Group than for the custom Security Groups. +Because the latter ones are not automatically applied to a VM but are always edited by the users to apply to their requirements. + +The allowlisting concept of Security Group rules makes it hard to allow traffic with an exception to certain ports. +It would be possible to just define many rules to achieve what a blocklist would achieve. +But having many rules may confuse users, and they may not disable unnecessary default rules in their Security Groups. + +## Standard + +The default Security Group rules for the default Security Groups SHOULD allow incoming traffic from the same Security Group. +The default Security Group rules for ALL Security Groups MUST NOT allow any other incoming traffic. Neither IPv4 nor IPv6. +This can be achieved through having ingress rules in the default Security Group rules that allow ingress traffic from the Remote Security Group "PARENT" but are only used in the default Security Group. + +The default Security Group rules for ALL Security Groups SHOULD allow egress Traffic for both IPv4 and IPv6. + +### Example + +In the following table, there is only ingress traffic between the same default Security Groups allowed plus all egress traffic: + +```bash +$ openstack default security group rule list ++--------------------------+-------------+-----------+-----------+------------+-----------+-----------------------+----------------------+--------------------------------+-------------------------------+ +| ID | IP Protocol | Ethertype | IP Range | Port Range | Direction | Remote Security Group | Remote Address Group | Used in default Security Group | Used in custom Security Group | ++--------------------------+-------------+-----------+-----------+------------+-----------+-----------------------+----------------------+--------------------------------+-------------------------------+ +| 47b929fd-9b39-4f22-abc5- | None | IPv6 | ::/0 | | egress | None | None | True | True | +| 7d4f64d10909 | | | | | | | | | | +| 92a79600-5358-4fef-a390- | None | IPv4 | 0.0.0.0/0 | | egress | None | None | True | True | +| 822665f46070 | | | | | | | | | | +| 93e35d0c-2482-4ec1-9fbd- | None | IPv4 | 0.0.0.0/0 | | ingress | PARENT | None | True | False | +| fd8c9a21a04e | | | | | | | | | | +| ed5cd662-add2-4e42-b0a7- | None | IPv6 | ::/0 | | ingress | PARENT | None | True | False | +| 3b585d348820 | | | | | | | | | | ++--------------------------+-------------+-----------+-----------+------------+-----------+-----------------------+----------------------+--------------------------------+-------------------------------+ +``` + +## Related Documents + +The spec for introducing configurability for the default Security Groups Rules can be found [here](https://specs.openstack.org/openstack/neutron-specs/specs/2023.2/configurable-default-sg-rules.html). + +More about Security Groups as a resource in OpenStack can be found [here](https://docs.openstack.org/nova/latest/user/security-groups.html). + +## Conformance Tests + +The conformance tests should check for the absence of any ingress traffic rules except traffic from the same Security Group in the `openstack default security group rule list`. +As having egress rules is allowed by this standard, but not forced and can be set in various ways, the tests should check for presence of any egress rules. diff --git a/Standards/scs-0116-v1-key-manager-standard.md b/Standards/scs-0116-v1-key-manager-standard.md new file mode 100644 index 000000000..b0dd19139 --- /dev/null +++ b/Standards/scs-0116-v1-key-manager-standard.md @@ -0,0 +1,106 @@ +--- +title: SCS Key Manager Standard +type: Standard +status: Stable +stabilized_at: 2024-11-13 +track: IaaS +--- + +## Introduction + +To encrypt user data like volumes or in the future also Images and ephemeral storage for VMs, the key has to be present in the infrastructure. +A Key Manager service within the infrastructure can be utilized to store keys. +Consequently providing keys for every encryption or decryption is possible without including the user. +Also authorization policies can be applied on every request to the Key Manager service. +OpenStack offers a Key Manager implementation that is named Barbican, which provides these features. +This standard aims to provide a base level of security for Cloud Service Providers that integrate a Key Manager into their deployments. + +## Terminology + +| Term | Meaning | +|---|---| +| API | Application Programming Interface, often referring to the REST API interfaces provided by OpenStack and related services | +| Barbican | The Key Manager implementation in OpenStack | +| CSP | Cloud Service Provider, provider managing the OpenStack infrastructure | +| IaaS | Infrastructure-as-a-Service | +| HSM | Hardware Security Module | +| KEK | Key Encryption Key | +| RBAC | Role Based Access Control | + +## Motivation + +User data encryption requires an encryption key to be known during encryption and decryption processes. +Key Managers like Barbican provide this functionality on the IaaS-Level. +Not every IaaS deployment currently offers user data encryption as part of their standard offering. +This standard should encourage CSPs to integrate a Key Manager and thus increase the amount of Clouds with offerings of data encryption. +It is also important to take a closer look into the Key Manager and analyze how such a service can be configured securely. + +A Key Manager service manages keys in a secure manner, but this can be achieved differently and is not primarily in scope of this standard. +The OpenStack Key Manager Barbican stores keys encrypted with a project specific KEK in the database. +The KEKs are also stored encrypted in the same database. +The Master-KEK, used to encrypt the project specific KEKs is not stored in the database and is stored differently depending on the backend storage plugin used. +This standard also abstracts the used plugins and wants to ensure that the Master-KEK is protected, too. + +## Design Considerations + +While discussing what this standard should aim for it was discovered that some CSPs don't use Barbican or another Key Manager at all and do not provide the feature to encrypt user data to their customers. +This should change, but the exact change comes with financial burden, when choosing a plugin in Barbican to store the Master-KEK or choosing to integrate another Key Manager service instead. +To minimize the burden and enable more CSPs to step up and provide encryption, this standard will only make recommendations about plugins from Barbican. + +### Options considered + +#### Recommend or even mandate specific Key Manager plugins + +It was considered to only recommend a certain set of plugins or backends for the Key Manager, but this may be very prone to change if e.g. Barbican adds a new plugin. +As the SCS only wants to mandate the API that can be abstracted through the Castellan library in OpenStack, integrating any other Key Manager implementation is not uncommon, so this standard needs to consider other possible Key Managers as well. +Due to these reasons this option was disregarded. + +#### Recommendation regarding the handling of the Master KEK + +Looking into the available Barbican plugins and possible attack vectors one design decision in the plugins is very important: where and how to store the Master-KEK. +Because the Plugins might use different technologies there are many locations for the Master KEK possible. +Most of the Plugins increase the security level by not storing the Master-KEK in plain text on the physical machine Barbican is running on. +This mechanism as a whole, is something that CSPs should aim to do. + +#### Standardization of the Key Manager Policy + +Because this standards recommends or even eventually mandates the presence of a Key Manager, the situation about the policy of the Key Manager needs to be discussed. +The policy of an IaaS service should use the same roles as the other IaaS services. +Unfortunately this does not apply to the Key Manager implementation Barbican. +It has the roles `reader`, `audit` and `creator`, which are not present in the Keystone role concept. +The roles a customer usually gets through the Identity API is `member`. +Leaving it this way will prevent users from creating and using secrets even when a Key Manager is integrated. + +To unify the roles among all IaaS services, there is currently work done in the OpenStack Community. +This initiative is called secure RBAC[^1]. +Also the SCS is discussing a standard concerning the roles[^2]. +When this is done, there is no further work needed. +But as of the 2024.1 release, this is still under development. + +In conclusion this standard should mandate everyone who uses a Key Manager that does not include the secure RBAC, to adjust the policies to have a mapping between the internal `creator` and the identity-based `member` role. +This will result in a `member` being allowed to do everything a `creator` can do. + +[^1]: [Secure RBAC work in OpenStack](https://etherpad.opendev.org/p/rbac-goal-tracking) +[^2]: [Issue for a role standard in SCS](https://github.com/SovereignCloudStack/issues/issues/396) + +## Key Manager Standard + +To increase security and allow user data encryption, CSPs SHOULD implement the Key Manager API (e.g. implemented by Barbican). +The Keys managed by this Key Manager MUST be stored encrypted and the Master-KEK of the Key Manager MUST be stored in another place than the Keys. + +If possible CSPs SHOULD NOT store the Master-KEK in plain-text on the physical host the Key Manager is running on. + +### Key Manager Policies + +If a Key Manager without secure RBAC enabled is used, the policies MUST be adjusted to let the `member` role of the Identity service be equivalent to the Key Manager internal `creator` role. + +## Related Documents + +[Barbican Plugins](https://docs.openstack.org/de/security-guide/secrets-management/barbican.html) + +## Conformance Tests + +Conformance must be tested in two steps. + +1. The check whether a Key Manager is present can be done in a similar way as in the mandatory OpenStack service APIs standard and the test should be merged into the mandatory service test as soon as a Key Manager is required in scs-conformant infrastructures. +2. The check, that there is no Master-KEK present on the Key Manager Node, has to be done by the CSP themself. diff --git a/Standards/scs-0116-w1-key-manager-implementation-testing.md b/Standards/scs-0116-w1-key-manager-implementation-testing.md new file mode 100644 index 000000000..d3acc6b4c --- /dev/null +++ b/Standards/scs-0116-w1-key-manager-implementation-testing.md @@ -0,0 +1,56 @@ +--- +title: "SCS Key Manager Standard: Implementation and Testing Notes" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-0116-v1-key-manager-standard.md +--- + +## Implementation + +A Key Manager service can have different backends. +For Barbican these are called Plugins. +The standard plugin is `simple_crypto`, which has the Master-KEK written in the Barbican config file. +In that case the Master-KEK needs additional protection. +When the `simple_crypto` plugin is used, securing the Master-KEK can be achieved through protection of the Barbican config e.g. through running Barbican in an enclave. + +Another option to secure the Master-KEK would be using an HSM with a corresponding plugin in Barbican. +In that case the Master-KEK will be stored inside the HSM and encryption and decryption of the Project-KEKs will also happen in the HSM. +There are also software HSMs available, that should be tested for their integration into the Barbican workflow. + +Other Plugins in Barbican are the KMIP plugin and Vault[^1]. +They are storing the keys differently and CSPs need to make sure, that the access to the keys is configured securely. + +:::tip + +Barbican supports deploying out-of-tree drivers what enables operators to satisfy their specific needs. + +::: + +[^1]:[Barbican Plugins](https://docs.openstack.org/barbican/latest/install/barbican-backend.html) + +### Policies + +When a Key Manager is used, but it uses the old policies and does not enforce the new secure RBAC work, the roles between Barbican and the other IaaS services differ. +This can be done with a small change in the policy.yaml file. The `creator` has to be defined like this: + +```yaml +"creator": "role:member" +``` + +## Automated Tests + +The check for the presence of a Key Manager is done with a test script, that checks the presence of a Key Manager service in the catalog endpoint of Openstack. +This check can eventually be moved to the checks for the mandatory an supported service/API list, in case of a promotion of the Key Manager to the mandatory list. + +### Implementation + +The script [`check-for-key-manager.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/key-manager/check-for-key-manager.py) +connects to OpenStack and performs the checks described in this section. + +## Manual Tests + +It is not possible to check a deployment for a correctly protected Master KEK automatically from the outside. +Even audits would need to check the complete host for plain-text keys. +CSPs are responsible for ensuring the protection of the Master KEK and they have to make at least their architecture for that protection auditable. diff --git a/Standards/scs-0117-v1-volume-backup-service.md b/Standards/scs-0117-v1-volume-backup-service.md new file mode 100644 index 000000000..9838536fa --- /dev/null +++ b/Standards/scs-0117-v1-volume-backup-service.md @@ -0,0 +1,98 @@ +--- +title: Volume Backup Functionality +type: Standard +status: Stable +stabilized_at: 2024-11-13 +track: IaaS +--- + +## Introduction + +OpenStack offers a variety of resources where users are able to transfer and store data in the infrastructure. +A prime example of these resources are volumes which are attached to virtual machines as virtual block storage devices. +As such they carry potentially large amounts of user data which is constantly changing at runtime. +It is important for users to have the ability to create backups of this data in a reliable and effifcient manner. + +## Terminology + +| Term | Meaning | +|---|---| +| CSP | Cloud Service Provider, provider managing the OpenStack infrastructure | +| IaaS | Abbreviation for Infrastructure as a Service | +| Image | IaaS resource representing a snapshot of a block storage disk, can be used to create Volumes | +| Volume | IaaS resource representing a virtual block storage device that can be attached as a disk to virtual machines | + +## Motivation + +The [volume backup functionality of the Block Storage API](https://docs.openstack.org/cinder/latest/admin/volume-backups.html) is a feature that is not available in all clouds per default, e.g., in OpenStack. +The feature requires a backend to be prepared and configured correctly before it can be used. +In the Block Storage service, the backup storage backend is usually configured separately from the storage backend of the general volume service and may not be mandatory. +Thus, an arbitrary cloud may or may not offer the backup feature in the Block Storage API. + +This standard aims to make this functionality the default in SCS clouds so that customers can expect the feature to be usable. + +## Design Considerations + +The standard should make sure that the feature is available and usable but should not limit the exact implementation (e.g. choice of backend driver) any further than necessary. + +### Options considered + +#### Only recommend volume backup feature, use images as alternative + +As an alternative to the volume backup feature of the Block Storage API, images can also be created based on volumes and act as a backup under certain circumstances. +As an option, this standard could keep the actual integration of the volume backup feature optional and guide users how to use images as backup targets instead in case the feature is unavailable. + +However, it is not guaranteed that the image backend storage is separate from the volume storage. +For instance, both could be using the same Ceph cluster. +In such case, the images would not count as genuine backups. + +Although users are able to download images and transfer them to a different storage location, this approach might also prove unfeasible depending on the image size and the existence (or lack) of appropriate target storage on the user side. + +Furthermore, incremental backups are not possible when creating images from volumes either. +This results in time-consuming backup operations of fully copying a volume everytime a backup is created. + +#### Focus on feature availability, make feature mandatory + +This option is pretty straightforward. +It would make the volume backup feature mandatory for SCS clouds. +This way users can expect the feature to be available and usable. + +With this, users can leverage functionalities like incremental backups and benefit from optimized performance of the backup process due to the tight integration with the volume service. + +However, it does not seem feasible to also mandate having a separate storage backend for volume backups at the same time due to potential infrastructure limitations at CSP-side making it hard or even impossible to offer. +As such, the actual benefit of backups in terms of reliability and security aspects would be questionable if a separate storage backend is not mandated and therefore not guaranteed. + +This approach would focus on feature availability rather than backup reliability. + +#### Focus on backup reliability, make separate backend mandatory + +As an alternative, the volume backup feature availability could be made optional but in case a CSP chooses to offer it, the standard would mandate a separate storage backend to be used for volume backups. +This way, failures of the volume storage backend would not directly impact the availability and safety of volume backups, making them actually live up to their name. + +In contrast to the above, this approach would focus on backup reliability rather than feature availability. + +## Standard + +This standard decides to go with the second option and makes the volume backup feature mandatory in the following way: + +In an SCS cloud, the volume backup functionality MUST be configured properly and its API as defined per `/v3/{project_id}/backups` MUST be offered to customers. +If using Cinder, a suitable [backup driver](https://docs.openstack.org/cinder/latest/configuration/block-storage/backup-drivers.html) MUST be set up. + +The volume backup target storage SHOULD be a separate storage system from the one used for volumes themselves. + +## Related Documents + +- [OpenStack Block Storage v3 Backup API reference](https://docs.openstack.org/api-ref/block-storage/v3/index.html#backups-backups) +- [OpenStack Volume Backup Drivers](https://docs.openstack.org/cinder/latest/configuration/block-storage/backup-drivers.html) + +## Conformance Tests + +Conformance tests include using the `/v3/{project_id}/backups` Block Storage API endpoint to create a volume and a backup of it as a non-admin user and subsequently restore the backup on a new volume while verifying the success of each operation. +These tests verify the mandatory part of the standard: providing the Volume Backup API. + +There is a test suite in [`volume-backup-tester.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/volume-backup/volume-backup-tester.py). +The test suite connects to the OpenStack API and executes basic operations using the volume backup API to verify that the functionality requested by the standard is available. +Please consult the associated [README.md](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/volume-backup/README.md) for detailed setup and testing instructions. + +Note that these tests don't verify the optional part of the standard: providing a separate storage backend for Cinder volume backups. +This cannot be checked from outside of the infrastructure as it is an architectural property of the infrastructure itself and transparent to customers. diff --git a/Standards/scs-0118-v1-taxonomy-of-failsafe-levels.md b/Standards/scs-0118-v1-taxonomy-of-failsafe-levels.md new file mode 100644 index 000000000..45f494368 --- /dev/null +++ b/Standards/scs-0118-v1-taxonomy-of-failsafe-levels.md @@ -0,0 +1,255 @@ +--- +title: SCS Taxonomy of Failsafe Levels +type: Decision Record +status: Draft +track: IaaS +--- + + +## Abstract + +When talking about redundancy and backups in the context of cloud infrastructures, the scope under which circumstances these concepts apply to various resources is neither homogenous nor intuitive. +There does exist very detailed lists of risks and what consequences there are for each risk, but this Decision Record should give a high-level view on the topic. +So that in each standard that referenced redundancy, it can easily be seen how far this redundancy goes in that certain circumstance. +Readers of such standards should be able to know at one glance, whether the achieved failure safeness is on a basic level or a higher one and whether there would be additional actions needed to protect the data. + +This is why this decision record aims to define different levels of failure safety. +These levels can then be used in standards to clearly set the scope that certain procedures in e.g. OpenStack offer. + +## Glossary + +| Term | Explanation | +| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| Availability Zone | (also: AZ) internal representation of physical grouping of service hosts, which also lead to internal grouping of resources. | +| BSI | German Federal Office for Information Security (Bundesamt für Sicherheit in der Informationstechnik). | +| CSP | Cloud Service Provider, provider managing the OpenStack infrastructure. | +| Compute | A generic name for the IaaS service, that manages virtual machines (e.g. Nova in OpenStack). | +| Network | A generic name for the IaaS service, that manages network resources (e.g. Neutron in OpenStack). | +| Storage | A generic name for the IaaS service, that manages the storage backends and virtual devices (e.g. Cinder in OpenStack). | +| RTO | Recovery Time Objective, the acceptable time needed to restore a ressource. | +| Disk | A physical disk drive (e.g. HDD, SSD) in the infrastructure. | +| Host | A physical machine in the infrastructure providing computational, storage and/or network connectivity capabilities. | +| Cyber attack/threat | Attacks on the infrastructure through the means of electronic access. | + +## Context + +Some standards provided by the SCS project will talk about or require procedures to back up resources or have redundancy for resources. +This decision record should discuss, which failure threats exist within an IaaS and KaaS deployment and will classify them into several levels according to their impact and possible handling mechanisms. +In consequence these levels should be used in standards concerning redundancy or failure safety. + +Based on our research, no similar standardized classification scheme seems to exist currently. +Something close but also very detailed is the [BSI-Standard 200-3 (german)][bsi-200-3] published by the German Federal Office for Information Security. +As we want to focus on IaaS and K8s resources and also have an easily understandable structure that can be applied in standards covering replication, redundancy and backups, this document is too detailed. + +### Goal of this Decision Record + +The SCS wants to classify levels of failure cases according to their impact and the respective measures CSPs can implement to prepare for each level. +Standards that deal with redundancy or backups or recovery SHOULD refer to the levels of this standard. +Thus every reader knows, up to which level of failsafeness the implementation of the standard works. +Reader then should be able to abstract what kind of other measures they have to apply, to reach the failsafe lavel they want to reach. + +:::caution + +This document will not be a replacement for a risk analysis. +Every CSP and every Customer (user of IaaS or KaaS resources) need to do a risk analysis of their own. +Also the differentiation of failure cases in classes, may not be an ideal basis for Business Continuity Planning. +It may be used to get general hints and directions though. + +::: + +### Differentiation between failsafe levels and high availability, disaster recovery, redundancy and backups + +The levels of failsafeness defined in this decision record classify the possibilities and impacts of failure cases (such as data loss) and the possible measures. +High Availability, disaster recovery, redundancy and backups are all measures that can and should be applied to IaaS and KaaS deployments by both CSPs and Users to reduce the possibility and impact of data loss. +So with this document every reader can see to what level of failsafeness their measures protect user data. + +To differentiate also between the named measures the following table can be used: + +| Term | Explanation | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| High Availability | Refers to the availability of resources over an extended period of time unaffected by smaller hardware issues. E.g. achievable through having several instances of resources. | +| Disaster Recovery | Measures taken after an incident to recover data, IaaS resource and maybe even physical resources. | +| Redundancy | Having more than one (or two) instances of each resource, to be able to switch to the second resource (could also be a data mirror) in case of a failure. | +| Backup | A specific copy of user data, that presents all data points at a given time. Usually managed by users themself, read only and never stored in the same place as the original data. | + +### Failsafe Levels and RTO + +As this documents classifies failure case with very broad impacts and it is written in regards of mostly IaaS and KaaS, there cannot be one simple RTO set. +The RTOs will differ for each resource and also between IaaS and KaaS level. +It should be taken into consideration that the measure to achieve the RTOs for IaaS and KaaS means to make user data available again through measures within the infrastructure. +But this will not be effective, when there is no backup of the user data or a redundancy of it already in place. +So the different failsafe levels, measures and impacts will be needed to define realistic RTOs. +For example a storage disk that has a failure will not result in a volume gein unavailable and needing a defined RTO, when the storage backend uses internal replication and still has two replicas of the user data. +While in the worst case of a natural disaster, most likely a severe fire, the whole deployment will be lost and if there were no off-site backups done by users any defined RTO will never be met, because the data cannot be recovered anymore. + +[bsi-200-3]: https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Grundschutz/BSI_Standards/standard_200_3.pdf?__blob=publicationFile&v=2 + +## Decision + +### Failsafe Levels + +This Decision Record defines **four** failsafe levels, each of which describe what kind of failures have to +be tolerated by a provided service. + +:::caution + +This table only contains examples of failure cases. +This should not be used as a replacement for a risk analysis. + +::: + +In general, the lowest, **level 1**, describes isolated/local failures which can occur very frequently, whereas +the highest, **level 4**, describes relatively unlikely failures that impact a whole or even multiple datacenter(s): + +| Level | Probability | Impact | Examples | +| - | - | - | - | +| 1 | Very High | small Hardware Issue | Disk failure, RAM failure, small software bug | +| 2 | High | Rack-wide | Rack outage, power outage, small fire | +| 3 | Medium | site-wide (temporary) | Regional power outage, huge fire, orchestrated cyber attack | +| 4 | Low | site destruction | Natural disaster | + +For example, a provided service with failsafe level 2 tolerates a rack outage (because there is some kind of +redundancy in place.) + +There are some *general* consequences, that can be addressed by CSPs and users in the following ways: + +| Level | consequences for CSPs | consequences for Users | +|---|-----|-----| +| 1. Level | CSPs MUST operate replicas for important components (e.g. replicated volume back-end, replicated database, ...). | Users SHOULD backup their data themself and place it on an other host. | +| 2. Level | CSPs MUST have redundancy for important components (e.g. HA for API services, redundant power supply, ...). | Users MUST backup their data themselves and place it on an other host. | +| 3. Level | CSPs SHOULD operate hardware in dedicated Availability Zones. | Users SHOULD backup their data, in different AZs or even other deployments. | +| 4. Level | CSPs may not be able to save user data from such catastrophes. | Users MUST have a backup of their data in a different geographic location. | + +:::caution + +The columns **consequences for CSPs / Users** only show examples of actions that may provide this class of failure safety for a certain resource. +Customers should always check, what they can do to protect their data and not rely solely on the CSP. + +::: + +More specific guidance on what these levels mean on the IaaS and KaaS layers will be provided in the sections +further down. +But beforehand, we will describe the considered failure scenarios and the resources that may be affected. + +### Failure Scenarios + +The following failure scenarios have been considered for the proposed failsafe levels. +For each failure scenario, we estimate the probability of occurence and the (worst case) damage caused by the scenario. +Furthermore, the corresponding minimum failsafe level covering that failure scenario is given. +The following table give a coarse view over the probabilities, that are used to describe the occurance of failure cases: + +| Probability | Meaning | +|-----------|----| +| Very Low | Occurs at most once a decade OR needs extremly unlikely circumstances. | +| Low | Occurs at most once a year OR needs very unlikely circumstances. | +| Medium | Occurs more than one time a year, up to one time a month. | +| High | Occurs more than once a month and up to a daily basis. | +| Very High | Occurs within minutes. | + +#### Hardware Related + +| Failure Scenario | Probability | Consequences | Failsafe Level Coverage | +|----|-----|----|----| +| Disk Failure | High | Permanent data loss in this disk. Impact depends on type of lost data (data base, user data) | L1 | +| Host Failure (without disks) | Medium to High | Permanent loss of functionality and connectivity of host (impact depends on type of host) | L1 | +| Host Failure | Medium to High | Data loss in RAM and temporary loss of functionality and connectivity of host (impact depends on type of host) | L1 | +| Rack Outage | Medium | Outage of all nodes in rack | L2 | +| Network router/switch outage | Medium | Temporary loss of service, loss of connectivity, network partitioning | L2 | +| Loss of network uplink | Medium | Temporary loss of service, loss of connectivity | L3 | +| Power Outage (Data Center supply) | Medium | Temporary outage of all nodes in all racks | L3 | + +#### Environmental + +Note that probability for these scenarios is dependent on the location. + +| Failure Scenario | Probability | Consequences | Failsafe Level Coverage | +|----|-----|----|----| +| Fire | Low | permanent Disk and Host loss in the affected zone | L3 | +| Flood | Very Low | permanent Disk and Host loss in the affected region | L4 | +| Earthquake | Very Low | permanent Disk and Host loss in the affected region | L4 | +| Storm/Tornado | Low | permanent Disk and Host loss in the affected region | L4 | + +As we consider mainly deployments in central Europe, the probability of earthquakes is low and in the rare case of such an event the severity is also low compared to other regions in the world (e.g. the pacific ring of fire). +The event of a flood will most likely come from overflowing rivers instead of storm floods from a sea. +There can be measures taken, to reduce the probability and severity of a flooding event in central Europe due to simply choosing a different location for a deployment. + +#### Software Related + +| Failure Scenario | Probability | Consequences | Failsafe Level Coverage | +|----|-----|----|----| +| Software bug (major) | Low to Medium | permanent loss or compromise of data that trigger the bug up to data on the whole deployment | L3 | +| Software bug (minor) | Medium to High | temporary or partial loss or compromise of data | L1 | + +Many software components have lots of lines of code and cannot be proven correct in their whole functionality. +They are tested instead with at best enough test cases to check every interaction. +Still bugs can and will occur in software. +Most of them are rather small issues, that might even seem like a feature to some. +An exmple for this would be: [whether a floating IP in OpenStack could be assigned to a VM even if it is already bound to another VM](https://bugs.launchpad.net/neutron/+bug/2060808). +Bugs like this do not affect a whole deployment, when they are triggered, but just specific data or resources. +Nevertheless those bugs can be a daily struggle. +This is the reason, the probability of such minor bugs may be pretty high, but the consequences would either be just temporary or would only result in small losses or compromisation. + +On the other hand major bugs, which might be used to compromise data, that is not in direct connection to the triggered bug, occur only a few times a year. +This can be seen e.g. in the [OpenStack Security Advisories](https://security.openstack.org/ossalist.html), where there were only 3 major bugs found in 2023. +While these bugs might appear only rarely their consequences are immense. +They might be the reason for a whole deployment to be compromised or shut down. +CSPs should be in contact with people triaging and patching such bugs, to be informed early and to be able to update their deployments, before the bug is openly announced. + +#### Human Interference + +| Failure Scenario | Probability | Consequences | Failsafe Level Coverage | +|----|-----|----|----| +| Minor operating error | High | Temporary outage | L1 | +| Major operating error | Low | Permanent loss of data | L3 | +| Cyber attack (minor) | Very High | permanent loss or compromise of data on affected Disk and Host | L1 | +| Cyber attack (major) | Medium | permanent loss or compromise of data on affected Disk and Host | L3 | + +Mistakes in maintaining a data center will always happen. +To reduce the probability of such a mistake, measures are needed to reduce human error, which is more an issue of sociology and psychology instead of computer science. +On the other side an attack on an infrastructure cannot be avoided by this. +Instead every deployment needs to be prepared for an attack all the time, e.g. through security updates. +The severity of Cyber attacks can also vary broadly: from denial-of-service attacks, which should only be a temporary issue, up until coordinated attacks to steal or destroy data, which could also affect a whole deployment. +The easier an attack is, the more frequently it will be used by various persons and organizations up to be just daily business. +Major attacks are often orchestrated and require specific knowledge e.g. of Day-0 Bugs or the attacked infrastructure. +Due to that nature their occurance is less likely, but the damage done can be far more severe. + +## Consequences + +Using the definition of levels established in this decision record throughout all SCS standards would allow readers to understand up to which level certain procedures or aspects of resources (e.g. volume types or a backend requiring redundancy) would protect their data and/or resource availability. + +### Affected Resources + +#### IaaS Layer (OpenStack Resources) + +| Resource | Explanation | Affected by Level | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| Ephemeral VM | Equals the `server` resource in Nova, booting from ephemeral storage. | L1, L2, L3, L4 | +| Volume-based VM | Equals the `server` resource in Nova, booting from a volume. | L2, L3, L4 | +| Ephemeral Storage | Disk storage directly supplied to a virtual machine by Nova. Different from volumes. | L1, L2, L3, L4 | +| Ironic Machine | A physical host managed by Ironic or as a `server` resource in Nova. | L1, L2, L3, L4 | +| (Glance) Image | IaaS resource usually storing raw disk data. Managed by the Glance service. | (L1), L2, L3, L4 | +| (Cinder) Volume | IaaS resource representing block storage disk that can be attached as a virtual disk to virtual machines. Managed by the Cinder service. | (L1, L2), L3, L4 | +| (Volume) Snapshot | Thinly-provisioned copy-on-write snapshots of volumes. Stored in the same Cinder storage backend as volumes. | (L1, L2), L3, L4 | +| Volume Type | Attribute of volumes determining storage details of a volume such as backend location or whether the volume will be encrypted. | L3, L4 | +| (Barbican) Secret | IaaS resource storing cryptographic assets such as encryption keys. Managed by the Barbican service. | L3, L4 | +| Key Encryption Key | IaaS resource, used to encrypt other keys to be able to store them encrypted in a database. | L3, L4 | +| Floating IP | IaaS resource, an IP that is usually routed and accessible from external networks. | L3, L4 | + +#### KaaS Layer (Kubernetes Resources) + +A detailed list of consequnces for certain failures can be found in the [Kubernetes docs](https://kubernetes.io/docs/tasks/debug/debug-cluster/). +The following table gives an overview about certain resources on the KaaS Layer and in which failsafe classes they are affected: + +| Resource(s) | Explanation | Affected by Level | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| Pod | Kubernetes object that represents a workload to be executed, consisting of one or more containers. | L3, L4 | +| Container | A lightweight and portable executable image that contains software and all of its dependencies. | L3, L4 | +| Deployment, StatefulSet | Kubernetes objects that manage a set of Pods. | L3, L4 | +| Job | Application workload that runs once. | L3, L4 | +| CronJob | Application workload that runs once, but repeatedly at specific intervals. | L3, L4 | +| ConfigMap, Secret | Objects holding static application configuration data. | L3, L4 | +| Service | Makes a Pod's network service accessible inside a cluster. | (L2), L3, L4 | +| Ingress | Makes a Service externally accessible. | L2, L3, L4 | +| PersistentVolume (PV) | Persistent storage that can be bound and mounted to a pod. | L1, L2, L3, L4 | + +Also see [Kubernetes Glossary](https://kubernetes.io/docs/reference/glossary/). diff --git a/Standards/scs-0118-w1-example-impacts-of-failure-scenarios.md b/Standards/scs-0118-w1-example-impacts-of-failure-scenarios.md new file mode 100644 index 000000000..a41ceb6ea --- /dev/null +++ b/Standards/scs-0118-w1-example-impacts-of-failure-scenarios.md @@ -0,0 +1,77 @@ +--- +title: "SCS Taxonomy of Failsafe Levels: Examples of Failure Cases and their impact on IaaS and KaaS resources" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-0118-v1-taxonomy-of-failsafe-levels.md +--- + +## Examples of the impact from certain failure scenarios on Cloud Resources + +Failure cases in Cloud deployments can be hardware related, environmental, due to software errors or human interference. +The following table summerizes different failure scenarios, that can occur: + +| Failure Scenario | Probability | Consequences | Failsafe Level Coverage | +|----|-----|----|----| +| Disk Failure | High | Permanent data loss in this disk. Impact depends on type of lost data (data base, user data) | L1 | +| Host Failure (without disks) | Medium to High | Permanent loss of functionality and connectivity of host (impact depends on type of host) | L1 | +| Host Failure | Medium to High | Data loss in RAM and temporary loss of functionality and connectivity of host (impact depends on type of host) | L1 | +| Rack Outage | Medium | Outage of all nodes in rack | L2 | +| Network router/switch outage | Medium | Temporary loss of service, loss of connectivity, network partitioning | L2 | +| Loss of network uplink | Medium | Temporary loss of service, loss of connectivity | L3 | +| Power Outage (Data Center supply) | Medium | Temporary outage of all nodes in all racks | L3 | +| Fire | Medium | permanent Disk and Host loss in the affected zone | L3 | +| Flood | Low | permanent Disk and Host loss in the affected region | L4 | +| Earthquake | Very Low | permanent Disk and Host loss in the affected region | L4 | +| Storm/Tornado | Low | permanent Disk and Host loss in the affected region | L4 | +| Software bug (major) | Low | permanent loss or compromise of data that trigger the bug up to data on the whole physical machine | L3 | +| Software bug (minor) | High | temporary or partial loss or compromise of data | L1 | +| Minor operating error | High | Temporary outage | L1 | +| Major operating error | Low | Permanent loss of data | L3 | +| Cyber attack (minor) | High | permanent loss or compromise of data on affected Disk and Host | L1 | +| Cyber attack (major) | Medium | permanent loss or compromise of data on affected Disk and Host | L3 | + +Those failure scenarios can result in either only temporary (T) or permanent (P) loss of IaaS / KaaS resources or data. +Additionally, there are a lot of resources in IaaS alone that are more or less affected by these failure scenarios. +The following tables shows the impact **when no redundancy or failure safety measure is in place**, i.e., when +**not even failsafe level 1 is fulfilled**. + +### Impact on IaaS Resources (IaaS Layer) + +| Resource | Disk Loss | Node Loss | Rack Loss | Power Loss | Natural Catastrophy | Cyber Threat | Software Bug | +|----|----|----|----|----|----|----|----| +| Image | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | P | +| Volume | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | P | +| User Data on RAM /CPU | | P | P | P | P | T/P | P | +| volume-based VM | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | P | +| ephemeral-based VM | P[^1] | P | P | T | P (T[^4]) | T/P | P | +| Ironic-based VM | P[^2] | P | P | T | P (T[^4]) | T/P | P | +| Secret | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | P | +| network configuration (DB objects) | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | P | +| network connectivity (materialization) | | T[^3] | T/P | T | P (T[^4]) | T/P | T | +| floating IP | P[^1] | T[^3] | T/P | T | P (T[^4]) | T/P | T | + +For some cases, this only results in temporary unavailability and cloud infrastructures usually have certain mechanisms in place to avoid data loss, like redundancy in storage backends and databases. +So some of these outages are easier to mitigate than others. + +[^1]: If the resource is located on that specific disk. +[^2]: Everything located on that specific disk. If more than one disk is used, some data could be recovered. +[^3]: If the resource is located on that specific node. +[^4]: In case of disks, nodes or racks are not destroyed, some data could be safed. E.g. when a fire just destroyes the power line. + +### Impact on Kubernetes Resources (KaaS layer) + +:::note + +In case the KaaS layer runs on top of IaaS layer, the impacts described in the above table apply for the KaaS layer as well. + +::: + +| Resource | Disk Loss | Node Loss | Rack Loss | Power Loss | Natural Catastrophy | Cyber Threat | Software Bug | +|----|----|----|----|----|----|----|----| +|Node|P| | | | | |T/P| +|Kubelet|T| | | | | |T/P| +|Pod|T| | | | | |T/P| +|PVC|P| | | | | |P| +|API Server|T| | | | | |T/P| diff --git a/Standards/scs-0119-v1-rook-decision.md b/Standards/scs-0119-v1-rook-decision.md new file mode 100644 index 000000000..47e825f70 --- /dev/null +++ b/Standards/scs-0119-v1-rook-decision.md @@ -0,0 +1,76 @@ +--- +title: Replacement of the deprecated ceph-ansible tool +type: Decision Record +status: Draft +track: IaaS +--- + +## Abstract + +This decision record evaluates the choice for a modern, future-proof deployment tool for the networked storage solution Ceph in the SCS reference implementation, [OSISM](https://osism.tech/). +The new deployment tool aims to enhance Kubernetes integration within SCS, potentially allowing providers to manage the Ceph cluster with greater ease and efficiency. + +## Context + +The current reference implementation relies on `ceph-ansible`, [which is now deprecated](https://github.com/ceph/ceph-ansible/commit/a9d1ec844d24fcc3ddea7c030eff4cd6c414d23d). As a result, this decision record evaluates two alternatives: [Cephadm](https://docs.ceph.com/en/latest/cephadm/) and [Rook](https://rook.io/docs/rook/latest-release/Getting-Started/intro/). + +Both tools are designed to roll out and configure Ceph clusters, providing the capability to manage clusters throughout their lifecycle. This includes functionalities such as adding or removing OSDs, upgrading Ceph services, and managing CRUSH maps, as outlined in the [Feature-Decision-Table](#feature-decision-table). + +This decision record considers both the current and future needs of the reference implementation. The decision is guided by a comprehensive comparison of each tool's capabilities and limitations as well as the SCS communities needs and futures objectives. + +### Comparison of Features + +The tool selected in this decision MUST ensure: + +* ease of migration +* future-proofness +* feature-completeness and feature-maturity +* effective management of Ceph clusters + +#### Feature Decision Table + +A comparative analysis of Cephadm and Rook highlights the following: + +| Feature | Supported in Cephadm | Supported in Rook | +| ------- | -------------------- | ----------------- | +| Migrate from other setups | ☑ Adoption of clusters, that where built with ceph-ansible [is officially supported](https://docs.ceph.com/en/quincy/cephadm/adoption/).| ☐ Migration from other setups is not offically supported. See this [issue](https://github.com/rook/rook/discussions/12045). Consequently, SCS develops a migration tool, named [rookify](https://github.com/SovereignCloudStack/rookify). Alternatively, Rook allows to use [Ceph as an external cluster](https://rook.io/docs/rook/latest-release/CRDs/Cluster/external-cluster/external-cluster/). | +| Connect RGW with OpenStack Keystone | ☑ | ☑ Experimental | +| Deploy specific Ceph versions | ☑ | ☑ | +| Upgrade to specific Ceph versions | ☑ Streamlined upgrade process. | ☑ Rook, CSI and Ceph upgrades have to be aligned, there is a [guide](https://rook.io/docs/rook/latest-release/Upgrade/health-verification/) available for each Rook version. | +| Deploy Ceph Monitors | ☑ | ☑ | +| Deploy Ceph Managers | ☑ | ☑ | +| Deploy Ceph OSDs | ☑ | ☑ | +| Deploy Ceph Object Gateway (RGW) | ☑ | ☑ | +| Removal of nodes | ☑ | ☑ | +| Purging of complete cluster | ☑ | ☑ | + +☐ not supported (yet) +☑ supported +☑☑ better option +☒ not supported on purpose + +#### Evaluation in the Light of SCS Community Plans and Preferences + +**Environment**: Cephadm is better suited for traditional or standalone environments. Conversely, Rook is tailored for Kubernetes. That being said, it's important to note that the current state of resource deployment and management on Kubernetes within the IaaS reference implementation is still in its early stages. This would make Rook one of the first components to utilise Kubernetes in OSISM. + +**Deployment**: Cephadm uses containerization for Ceph components, whereas Rook fully embraces the Kubernetes ecosystem for deployment and management. Although containerization is already a core concept in the reference implementation, there is a strong push from the SCS community to adopt more Kubernetes. + +**Configuration and Management**: Rook offers a more straightforward experience for those already utilizing Kubernetes, leveraging Kubernetes' features for automation and scaling. In contrast, Cephadm grants finer control over Ceph components, albeit necessitating more manual intervention. In both cases, this is something that needs to be partly abstracted by the reference implementation. + +**Integration**: Rook provides better integration with cloud-native tools and environments, whereas Cephadm offers a more Ceph-centric management experience. + +**Migration**: Rook does not currently provide any migration support, while Cephadm does offer this capability. However, the SCS community is highly supportive of developing a migration tool (Rookify) for Rook, as this would enhance SCS's influence by offering the first migration solution specifically for Rook providers. + +**SCS Community**: An important factor in our decision is the preferences and direction of the SCS community and its providers. There is a noticeable trend towards increased use of Kubernetes within the community. This indicates a preference for deployment tools that integrate well with Kubernetes environments. + +**SCS Future Goals**: The SCS community is open to building tools that provide open-source, publicly available solutions beyond the scope of SCS. This openness to development efforts that address limitations of the chosen tools, such as Rook, is also a key consideration in our decision. + +## Decision + +As OSISM will increasingly focus on a Kubernetes-centric approach for orchestration in the near future, adopting Rook is a more suitable and standardized approach. Moreover, many service providers within the SCS community (including several who deploy OSISM) already have experience with Kubernetes. Regarding the missing OpenStack Keystone integration, we are confident that colleagues, who work on this issue, will provide a solution in a timely manner. We expect that deploying Ceph with Rook will simplify deployment and configuration form the outset. +In order to allow for a migration from existing Ceph installations to Rook, we decided to develop a migration tool (called Rookify) for the reference implementation. If the development of Rookify goes beyond the targeted scope of the reference implementation the tool will add value to the Ceph as well as the Rook community. + +## Consequences + +Migrating an existing Ceph environment onto Kubernetes, as well as bringing together existing but independent Ceph and Kubernetes environments, will become straight forward without much manual interference needed. +Landscapes that currently do not deploy a Kubernetes cluster have to adapt and provide a Kubernetes cluster in the future. diff --git a/Standards/scs-0120-v1-capi-images.md b/Standards/scs-0120-v1-capi-images.md new file mode 100644 index 000000000..1fe2380b5 --- /dev/null +++ b/Standards/scs-0120-v1-capi-images.md @@ -0,0 +1,63 @@ +--- +title: Cluster-API images +type: Decision Record +status: Draft +track: IaaS +--- + +## Abstract + +The SCS reference implementation for the Kubernetes-as-a-service layer is built on top of Cluster API (CAPI), and therefore it depends on the corresponding VM images, which may or may not be present on the underlying infrastructure-as-a-service layer. Current tooling will make sure to upload the required image in case it's not present or outdated. However, these ad-hoc uploads will not be shared across environments, which may lead to waste of bandwidth (for transferring the image), storage (if images are not stored in a deduplicated manner), and not least time (because the upload does take multiple minutes). Needless to say, it may also lead to excessive greenhouse-gas emissions. + +This decision record investigates the pros and cons of making the CAPI images mandatory. Ultimately, the decision is made to keep them recommended; we stress, however, that providers who offer the images by default should advertise this fact. + +## Terminology + +- _Kubernetes as a service (KaaS)_: A service that offers provisioning Kubernetes clusters. +- _Cluster API (CAPI)_: "Cluster API is a Kubernetes sub-project focused on providing declarative APIs and tooling to simplify provisioning, upgrading, and operating multiple Kubernetes clusters." ([source](https://cluster-api.sigs.k8s.io/)) This API can thus be used to implement KaaS. +- _CAPI image_: Virtual machine image that contains a standardized Kubernetes setup to be used for CAPI. The SCS reference implementation for KaaS depends on these images. +- _CSP_: Cloud-service provider + +## Design considerations + +We consider the following two options: + +1. Make CAPI image mandatory. +2. Keep CAPI image recommended. + +For reasons of symmetry, it suffices to consider the pros and cons of the first option. + +Pros: + +- Save time, money, physical resources and power for both CSP and customer. +- Regardless of CSP taste, this KaaS tech is part of SCS. + +Neutral: + +- The CAPI image can be provided in an automated fashion that means virtually no burden to the CSP. +- The KaaS implementation will work either way. +- Willing CSPs may offer the image by default and advertise as much. + +Cons: + +- Additional regulations would be necessary to guarantee quality and timeliness of image. +- Some CSPs may be opposed to being forced to offer a certain service, which may hurt the overall acceptance + of the SCS standardization efforts. + +## Decision + +Ultimately, we value the freedom of the CSPs (and the acceptance of the standardization efforts) highest; +willing CSPs are welcome to opt in, i.e., to provide up-to-date images and advertise as much. + +Therefore we decide to _keep the CAPI images recommended_. + +## Consequences + +None, as the status quo is being kept. + +## Open questions + +Some interesting potential future work does remain, however: to find a way to certify that a willing provider +does indeed provide up-to-date images. It would be possible with today's methods to certify that a CAPI +image is present (the image_spec yaml file would have to be split up to obtain a separate test case), but +we there is no way to make sure that the image is up to date. diff --git a/Standards/scs-0121-v1-Availability-Zones-Standard.md b/Standards/scs-0121-v1-Availability-Zones-Standard.md new file mode 100644 index 000000000..0dc9ed698 --- /dev/null +++ b/Standards/scs-0121-v1-Availability-Zones-Standard.md @@ -0,0 +1,206 @@ +--- +title: SCS Availability Zones +type: Standard +status: Stable +stabilized_at: 2024-11-13 +track: IaaS +--- + +## Introduction + +On the IaaS level especially in OpenStack it is possible to group resources in Availability Zones. +Such Zones often are mapped to the physical layer of a deployment, such as e.g. physical separation of hardware or redundancy of power circuits or fire zones. +But how CSPs apply Availability Zones to the IaaS Layer in one deplyoment may differ widely. +Therefore this standard will address the minimal requirements that need to be met, when creating Avaiability Zones. + +## Terminology + +| Term | Explanation | +| ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | +| Availability Zone | (also: AZ) internal representation of physical grouping of service hosts, which also lead to internal grouping of resources. | +| Fire Zone | A physical separation in a data center that will contain fire within it. Effectively stopping spreading of fire. | +| PDU | Power Distribution Unit, used to distribute the power to all physical machines of a single server rack. | +| Compute | A generic name for the IaaS service, that manages virtual machines (e.g. Nova in OpenStack). | +| Network | A generic name for the IaaS service, that manages network resources (e.g. Neutron in OpenStack). | +| Storage | A generic name for the IaaS service, that manages the storage backends and virtual devices (e.g. Cinder in OpenStack). | +| BSI | German Federal Office for Information Security (Bundesamt für Sicherheit in der Informationstechnik) | +| CSP | Cloud Service Provider, provider managing the OpenStack infrastructure. | +| SDN | Software Defined Network, virtual networks managed by the networking service. | + +## Motivation + +Redundancy is a non-trivial but relevant issue for a cloud deployment. +First and foremost it is necessary to increase failure safety through redundancy on the physical layer. +The IaaS layer as the first abstraction layer from the hardware has an important role in this topic, too. +The grouping of redundant physical resources into Availability Zones on the IaaS level, gives customers the option to distribute their workload to different AZs which will result in a better failure safety. +While CSPs already have some similarities in their grouping of physical resources to AZs, there are also differences. +This standard aims to reduce those differences and will clarify, what customers can expect from Availability Zones in IaaS. + +Availability Zones in IaaS can be set up for Compute, Network and Storage separately while all may be referring to the same physical separation in a deployment. +This standard elaborates the necessity of having Availability Zones for each of these classes of resources. +It will also check the requirements customers may have, when thinking about Availability Zones in relation to the taxonomy of failure safety levels [^1]. +The result should enable CSPs to know when to create AZs to be SCS-compliant. + +## Design Considerations + +Availability Zones should represent parts of the same physical deployment that are independent of each other. +The maximum level of physical independence is achieved through putting physical machines into different fire zones. +In that case a failure case up to level 3 as described in the taxonomy of failure safety levels document[^1] will not lead to a complete outage of the deployment. + +Having Availability Zones represent fire zones will also result in AZs being able to take workload from another AZ in a failure case of Level 3. +So that even the destruction of one Availability Zone will not automatically include the destruction of the other AZs. + +:::caution + +Even with fire zones being physically designed to protect parts of a data center from severe destruction in case of a fire, this will not always succeed. +Availability Zones in Clouds are most of the time within the same physical data center. +In case of a big catastrophe like a huge fire or a flood the whole data center could be destroyed. +Availability Zones will not protect customers against these failure cases of level 4 of the taxonomy of failure safety[^1]. + +::: + +Smaller deplyoments like edge deployments may not have more than one fire zone in a single location. +To include such deployments, it should not be required to use Availability Zones. + +Other physical factors that should be considered are the power supplies, internet connection, cooling and core routing. +Availability Zones were also used by CSPs as a representations of redundant PDUs. +That means there are deployments, which have Availability Zones per rack as each rack has it's own PDU and this was considered to be the single point of failure an AZ should represent. +While this is also a possible measurement of independency it only provides failure safety for level 2. +Therefore this standard should be very clear about which independency an AZ should represent and it should not be allowed to have different deployments with their Availability Zones representing different levels of failure safety. + +Additionally Availability Zones are available for Compute, Storage and Network services. +They behave differently for each of these resources and also when working across resource-based Availability Zones, e.g. attaching a volume from one AZ to a virtual machine in another AZ. +For each of these IaaS resource classes, it should be defined, under which circumstances Availability Zones should be used. + +[^1]: [Taxonomy of Failsafe Levels in SCS (TODO: change link as soon as taxonomy is merged)](https://github.com/SovereignCloudStack/standards/pull/579) + +### Scope of the Availability Zone Standard + +When elaborating redundancy and failure safety in data centers, it is necessary to also define redundancy on the physical level. +There are already recommendations from the BSI for physical redundancy within a cloud deployment [^2]. +This standard considers these recommendation as a basis, that is followed by most CSPs. +So this standard will not go into details, already provided by the CSP, but will rather concentrate on the IaaS layer and only have a coarse view on the physical layer. +The first assumtion from the recommendations of the BSI is that the destruction of one fire zone will not lead to an outage of all power lines (not PDUs), internet connections, core routers or cooling systems. + +For the setup of Availability Zone this means, that within every AZ, there needs to be redundancy in core routers, internet connection, power lines and at least two separate cooling systems. +This should avoid having single points of failure within the Availability Zones. +But all this physical infrastructure can be the same over all Availability Zones in a deployment, when it is possible to survive the destruction of one fire zone. + +[^2]: [Availability recommendations from the BSI](https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/RZ-Sicherheit/RZ-Verfuegbarkeitsmassnahmen.pdf?__blob=publicationFile&v=9) + +### Options considered + +#### Physical-based Availability Zones + +It is possible standardize the usage of Availability Zones over all IaaS resources. +The downside from this is, that the IaaS resources behave so differently, that they have different requirements for redundancy and thus Availability Zones. +This is not the way to go. +Besides that, it is already possible to create two physically separated deployments close to each other, connect them with each other and use regions to differ between the IaaS on both deployments. + +The question that remains is, what an Availability Zone should consist of? +Having one Availability Zone per fire zone gives the best level of failure safety, that can be achieved by CSPs. +When building up on the relation between fire zone and physical redundancy recommendations as from the BSI, this combination is a good starting point, but need to be checked for the validity for the different IaaS resources. + +Another point is where Availability Zones can be instantiated and what the connection between AZs should look like. +To have a proper way to deal with outages of one AZ, where a second AZ can step in, a few requirements need to be met for the connection between those two AZs. +The amount data that needs to be transferred very fast in a failure case may be enormous, so there is a requirement for a high bandwidth between connected AZs. +Tho avoid additional failure cases the latency between those two Availability Zones need to be low. +With such requirements it is very clear that AZs should only reside within one (physical) region of an IaaS deployment. + +#### AZs in Compute + +Compute Hosts are physical machines on which the compute service runs. +A single virtual machine is always running on ONE compute host. +Redundancy of virtual machines is either up to the layer above IaaS or up to the customers themself. +Having Availability Zones gives customers the possibility to let another virtual machine as a backup run within another Availability Zone. + +Customers will expect that in case of the failure of one Availability Zone all other AZs are still available. +The highest possible failure safety here is achieved, when Availability Zones for Compute are used for different fire zones. + +When the BSI recommendations are followed, there should already be redundancy in power lines, internet connection and cooling. +An outage of one of these physical resources will not affect the compute host and its resources for more than a minimal timeframe. +But when a single PDU is used for a rack, a failure of that PDU will result in an outage of all compute hosts in this rack. +In such a case it is not relevant, whether this rack represents a whole Availability Zone or is only part of a bigger AZ. +All virtual machines on the affected compute hosts will not be available and need to be restarted on other hosts, whether of the same Availability Zone or another. + +#### AZs in Storage + +There are many different backends used for the storage service with Ceph being one of the most prominent backends. +Configuring those backends can already include to span one storage cluster over physical machines in different fire zones. +In combination with internal replication a configuration is possible, that already distributes replicas from volumes over different fire zones. +When a deployment has such a configured storage backend, it already can provide safety in case of a failure of level 3. + +Using Availability Zones is also possible for the storage service, but configuring AZs, when having a configuration like above will not increase safety. +Nevertheless using AZs when having different backends in different fire zones will give customers a hint to backup volumes into storages of other AZs. + +Additionally when the BSI recommendations are followed, there should already be redundancy in power lines, internet connection and cooling. +An outage of one of these physical resources will not affect the storage host and its resources for more than a minimal timeframe. +When internal replication is used, either through the IaaS or through the storage backend itself, the outage of a single PDU and such a single rack will not affect the availability of the data itself. +All these physical factors are not requiring the usage of an Availability Zone for Storage. +An increase of the level of failure safety will not be reached through AZs in these cases. + +Still it might be confusing when having deployments with compute AZs but without storage AZs. +CSPs may need to communicate clearly up to which failure safety level their storage service can automatically have redundancy and from which level customers are responsible for the redundancy of their data. + +#### AZs in Network + +Virtualized network resources can typically be quickly and easily set up from building instructions. +Those instructions are stored in the database of the networking service. + +If a physical machine, on which certain network resources are set up, is not available anymore, the resources can be rolled out on another physical machine, without being dependent on the current situation of the lost resources. +There might only be a loss of a few packets within the affected network resources. + +With having Compute and Storage in a good state (e.g. through having fire zones with a compute AZ each and storage being replicated over the fire zones) there would be no downsides to omitting Availability Zones for the network service. +It might even be the opposite: Having resources running in certain Availability Zones might prevent them from being scheduled in other AZs[^3]. +As the network resources like routers are bound to an AZ, in a failure case of one AZ all resource definitions might still be there in the database, while the implementation of those resources is gone. +Trying to rebuild them in another AZ is not possible, because the scheduler will not allow them to be implemented in another AZ, than the one thats present in their definition. +In a failure case of one AZ this might lead to a lot of manual work to rebuild the SDN from scratch instead of just re-using the definitions. + +Because of this severe side effect, this standard will make no recommendations about Network AZs. + +[^3]: [Availability Zones in Neutron for OVN](https://docs.openstack.org/neutron/latest/admin/ovn/availability_zones.html) + +### Cross-Attaching volumes from one AZ to another compute AZ + +Without the networking AZs we only need to take a closer look into attaching volumes to virtual machines across AZs. + +When there is more than one Storage Availability Zone, those AZs do normally align with the Compute Availability Zones. +This means that fire zone 1 contains compute AZ 1 and storage AZ 1 , fire zone 2 contains compute AZ 2 and storage AZ 2 and the same for fire zone 3. +It is possible to allow or forbid cross-attaching volumes from one storage Availability Zone to virtual machines in another AZ. +If it is not allowed, then the creation of volume-based virtual machines will fail, if there is no space left for VMs in the corresponding Availability Zone. +While this may be unfortunate, it gives customers a very clear picture of an Availability Zone. +It clarifies that having a virtual machine in another AZ also requires having a backup or replication of volumes in the other storage AZ. +Then this backup or replication can be used to create a new virtual machine in the other AZ. + +It seems to be a good decision to not encourage CSPs to allow cross-attach. +Currently CSPs also do not seem to widely use it. + +## Standard + +If Compute Availability Zones are used, they MUST be in different fire zones. +Availabilty Zones for Storage SHOULD be setup, if there is no storage backend used that can span over different fire zones and automatically replicate the data. +Otherwise a single Availabilty Zone for Storage SHOULD be configured. + +If more than one Availability Zone for Storage is set up, the attaching of volumes from one Storage Availability Zone to another Compute Availability Zone (cross-attach) SHOULD NOT be possible. + +Within each Availability Zone: + +- there MUST be redundancy in power supply, as in line into the deployment +- there MUST be redundancy in external connection (e.g. internet connection or WAN-connection) +- there MUST be redundancy in core routers +- there SHOULD be redundancy in the cooling system + +AZs SHOULD only occur within the same region and have a low-latency interconnection with a high bandwidth. + +## Related Documents + +The taxonomy of failsafe levels can be used to get an overview over the levels of failure safety in a deployment(TODO: link after DR is merged.) + +The BSI can be consulted for further information about [failure risks](https://www.bsi.bund.de/DE/Themen/Unternehmen-und-Organisationen/Standards-und-Zertifizierung/IT-Grundschutz/IT-Grundschutz-Kompendium/Elementare-Gefaehrdungen/elementare-gefaehrdungen_node.html), [risk analysis for a datacenter](https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/Grundschutz/BSI_Standards/standard_200_3.pdf?__blob=publicationFile&v=2) or [measures for availability](https://www.bsi.bund.de/SharedDocs/Downloads/DE/BSI/RZ-Sicherheit/RZ-Verfuegbarkeitsmassnahmen.pdf?__blob=publicationFile&v=9). + +## Conformance Tests + +As this standard will not require Availability Zones to be present, we cannot automatically test the conformance. +The other parts of the standard are physical or internal and could only be tested through an audit. +Whether there are fire zones physically available is a criteria that will never change for a single deployment - this only needs to be audited once. +It might be possible to also use Gaia-X Credentials to provide such information, which then could be tested. diff --git a/Standards/scs-0121-w1-Availability-Zones-Standard.md b/Standards/scs-0121-w1-Availability-Zones-Standard.md new file mode 100644 index 000000000..9ec3dbc82 --- /dev/null +++ b/Standards/scs-0121-w1-Availability-Zones-Standard.md @@ -0,0 +1,42 @@ +--- +title: "SCS Availability Zones: Implementation and Testing Notes" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-0121-v1-Availability-Zones-Standard.md +--- + +## Automated Tests + +The standard will not preclude small deployments and edge deployments, that both will not meet the requirement for being divided into multiple Availability Zones. +Thus multiple Availability Zones are not always present. +Somtimes there can just be a single Availability Zones. +Because of that, there will be no automated tests to search for AZs. + +## Required Documentation + +The requirements for each Availability Zone are written in the Standard. +For each deployment, that uses more than a single Availability Zone, the CSP has to provide documentation to proof the following points: + +1. The presence of fire zones MUST be documented (e.g. through construction plans of the deployment). +2. The correct configuration of one AZ per fire zone MUST be documented. +3. The redundancy in Power Supply within each AZ MUST be documented. +4. The redundancy in external connection within each AZ MUST be documented. +5. The redundancy in core routers within each AZ MUST be documented. + +All of these requirements will either not change at all like the fire zones or it is very unlikely for them to change like redundant internet connection. +Because of this documentation must only be provided in the following cases: + +1. When a new deployment with multiple AZs should be tested for compliance. +2. When there are physical changes in a deplyoment, which already provided the documentation: the changes needs to be documented and provided as soon as possible. + +### Alternative Documentation + +If a deployment already did undergo certification like ISO 27001 or ISO 9001, those certificates can be provided as part of the documentation to cover the redundancy parts. +It is still required to document the existence of fire zones and the correct configuration of one AZ per fire zone. + +## Physical Audits + +In cases where it is reasonable to mistrust the provided documentation, a physical audit by a natural person - called auditor - send by e.g. the [OSBA](https://osb-alliance.de/) should be performed. +The CSP of the deployment, which needs such an audit, should grant access to the auditor to the physical infrastructure and should show them all necessary IaaS-Layer configurations, that are needed to verify compliance to this standard. diff --git a/Standards/scs-0122-v1-node-to-node-encryption.md b/Standards/scs-0122-v1-node-to-node-encryption.md new file mode 100644 index 000000000..f3d298706 --- /dev/null +++ b/Standards/scs-0122-v1-node-to-node-encryption.md @@ -0,0 +1,529 @@ +--- +title: _End-to-End Encryption between Customer Workloads_ +type: Decision Record +status: Draft +track: IaaS +--- + +## Abstract + +This document explores options for developing end-to-end (E2E) encryption for +VMs, Magnum workloads, and container layers to enhance security between user +services. It includes a detailed review of various technologies, feedback from +the OpenStack community, and the decision-making process that led to selecting +VXLANs with the OpenStack ML2 plugin and it's later abandonment in favour of +natural openvswitch-ipsec solution. + +## Terminology + +| Term | Meaning | +|---|---| +| CSP | Cloud service provider, in this document it includes also an operator of a private cloud | +| VM | Virtual machine, alternatively instance, is a virtualized compute resource that functions as a self-contained server for a customer | +| Node | Machine under CSP administration which hosts cloud services and compute instances | + +## Context + +### Motivation + +The security of customer/user workloads is one of CSPs main concerns. With +larger and more diverse cloud instances, parts of the underlying physical +infrastructure can be outside of CSPs direct control, either when +interconnecting datacenters via public internet or in the case of renting +infrastructure from third party. Many security breaches occur due to +actions of malicious or negligent inhouse operators. While some burden lies +with customers, which should secure their own workloads, CSP should have the +option to transparently protect the data pathways between instances, more so +for private clouds, where CSP and customer are the same entity or parts of the +same entity. + +In RFC8926[^rfc] it is stated: +> A tenant system in a customer premises (private data center) may want to +> connect to tenant systems on their tenant overlay network in a public cloud +> data center, or a tenant may want to have its tenant systems located in +> multiple geographically separated data centers for high availability. Geneve +> data traffic between tenant systems across such separated networks should be +> protected from threats when traversing public networks. Any Geneve overlay +> data leaving the data center network beyond the operator's security domain +> SHOULD be secured by encryption mechanisms, such as IPsec or other VPN +> technologies, to protect the communications between the NVEs when they are +> geographically separated over untrusted network links. + +We aren't considering the communication intra node, meaning inside one host +node between different VMs potentially of multiple tenants as this is a +question of tenant isolation, not of networking security, and encryption here +would be possibly a redundant measure. Isolation of VMs is handled by OpenStack +on multiple levels - overlay tunneling protocols, routing rules on networking +level, network namespaces on kernel level and hypervisor isolation mechanisms. +All the communication here is existing inside node and any malicious agent with +high enough access to the node itself to observe/tamper with the internal +communication traffic would pressumably have access to the encryption keys +themselves, rendering the encryption ineffective. + +### Potential threats in detail + +We are assuming that: + +* the customer workloads are not executed within secure enclaves (e.g. Security +Guard Extensions (SGX)) and aren't using security measures like end-to-end +encryption themselves, either relying with security on the CSP or in the case +of a private cloud are run by the operator of the cloud +* the CSP OpenStack administrators are deemed trustworthy since they possess +root access to the host nodes, with access to keys and certificates, enabling +them to bypass any form of internode communication encryption +* a third party or an independent team manages physical network communication +between nodes within a colocation setting or the communication passes unsafe +public infrastructure in the case of a single stretched instance spanning +multiple data centers + +#### Man in the Middle Attack + +Considering the assumptions and the objective to enforce end-to-end (E2E) +encryption for user workloads, our primary security concern is averting +man-in-the-middle (MITM) attacks. These can be categorized into two distinct +forms: active and passive. + +##### Passive Attacks - Eavesdropping + +Consider the scenario where an untrusted individual, such as a third party +network administrator, with physical access to the data center engages in +'passive' covert surveillance, silently monitoring unencrypted traffic +without interfering with data integrity or network operations. + +Wiretapping is a common technique employed in such espionage. It involves the +unauthorized attachment to network cabling, enabling the covert observation of +data transit. This activity typically goes unnoticed as it does not disrupt +the flow of network traffic, although it may occasionally introduce minor +transmission errors. + +An alternative strategy involves deploying an interception device that +captures and retransmits data, which could potentially introduce network +latency or, if deployed disruptively, cause connectivity interruptions. Such +devices can be concealed by synchronizing their installation with network +downtime, maintenance periods, or less conspicuous times like power outages. +They could also be strategically placed in less secure, more accessible +locations, such as inter-building network links. This applies to wiretapping +as well. + +Furthermore, the vulnerability extends to network devices, where an attacker +could exploit unsecured management ports or leverage compromised remote +management tools (like IPMI) to gain unauthorized access. Such access points, +especially those not routinely monitored like backup VPNs, present additional +security risks. + +Below is a conceptual diagram depicting potential vulnerabilities in an +OpenStack deployment across dual regions, highlighting how these passive +eavesdropping techniques could be facilitated. + +![image](https://github.com/SovereignCloudStack/issues/assets/1249759/f5b7edf3-d259-4b2a-8632-c877934f3e31) + +##### Active - Spoofing / Tampering + +Active network attacks like spoofing and tampering exploit various access +points, often leveraging vulnerabilities uncovered during passive eavesdropping +phases. These attacks actively manipulate or introduce unauthorized +communications on the network. + +Spoofing involves an attacker masquerading as another device or user within the +network. This deception can manifest in several ways: + +* **ARP Spoofing:** The attacker sends forged ARP (Address Resolution Protocol) + messages onto the network. This can redirect network traffic flow to the + attacker's machine, intercepting, modifying, or blocking data before it + reaches its intended destination. +* **DNS Spoofing:** By responding with falsified DNS replies, an attacker can + reroute traffic to malicious sites, further compromising or data exfiltration. +* **IP Spoofing:** The attacker disguises their network identity by falsifying + IP address information in packets, tricking the network into accepting them + as legitimate traffic. This can be particularly damaging if encryption is not + enforced, enabling the attacker to interact with databases, invoke APIs, or + execute unauthorized commands while appearing as a trusted entity. + +Moreover, when an active interception device is in place, attackers can extend +their capabilities to traffic filtering. They might selectively delete or alter +logs and metrics to erase traces of their intrusion or fabricate system +performance data, thus obscuring the true nature of their activities. + +### Preliminary considerations + +Initially we wanted to create a plugin into Neutron[^ne] using eBPF[^eb] to +secure the traffic automatically between VMs. We presented the idea in a +team IaaS call[^ia]. After the initial round of feedback specific requirements +emerged. + +#### Utilize existing solutions + +Leverage existing technologies and frameworks as much as possible. This +approach aims to reduce development time and ensure the solution is built on +proven, reliable foundations. Potential technologies include: + +* **OVS[^sw] + IPSec[^ip]**: Provides an overlay network and has built-in + support for encryption using IPsec. Leveraging OVS can minimize development + time since it is already integrated with OpenStack. +* **Neutron[^ne] with eBPF[^eb]**: Using eBPF[^eb] could provide fine-grained + control over packet filtering and encryption directly in the kernel. +* **TripleO[^to] (with IPsec)**: TripleO[^to] tool set for OpenStack deployment + supports IPsec tunnels between nodes. +* **Neutron[^ne] + Cilium[^ci]**: Cilium is an open source, cloud native + eBPF[^eb]-based networking solution, including transparent encryption tools. +* **Tailscale[^ta]** is a mesh VPN based on WireGuard[^wg] that simplifies the + setup of secure, encrypted networks. This could be a potential alternative + to managing encrypted connections in OpenStack environments. + +#### Upstream integration + +Move as much of the development work upstream into existing OpenStack projects. +This will help ensure the solution is maintained by the wider OpenStack +community, reducing the risk of it becoming unmaintained or unusable in the +future. This means to collaborate with the OpenStack community to contribute +changes upstream, particularly in projects like Neutron[^ne], OVN[^ov], +kolla[^kl] and ansible-kolla[^ka]. + +#### Address threat modeling issues + +"We should not encrypt something just for the sake of encryption." The solution +must address the specific security issues identified in the +[threat modeling](#potential-threats-in-detail). This ideally includes +protecting against both passive (eavesdropping) and active (spoofing, +tampering) MITM attacks. Encryption mechanisms on all communication channels +between VMs, containers, hosts prevents successfull eavesdropping, +authentication and integrity checks prevent spoofing and tampering. For example +IPsec[^ip] provides mechanisms for both encyption and integrity verification. + +#### Performance impact and ease of use + +Evaluate the performance impact of the encryption solution and ensure it is +minimal. Performance benchmarking should be conducted to assess the impact of +the encryption solution on network throughput and latency. For local trusted +scenarios opt out should be possible. The solution should also be easy to use +and manage, both for administrators and ideally fully transparent for +end-users. This may involve developing user-friendly interfaces and automated +tools for key management and configuration. + +#### Avoid redundant encryption + +If possible, develop a mechanism to detect and avoid encrypting traffic that is +already encrypted. This will help optimize performance and resource usage. + +By focusing on these detailed requirements and considerations, we aim to +develop a robust, efficient, and sustainable E2E encryption solution for +OpenStack environments. This solution will not only enhance security for user +workloads but also ensure long-term maintainability and ease of use. + +### Exploration of technologies + +Based on the result of the threat modeling and presentation, we explored the +following technologies and also reached out to the OpenStack mailing list for +additional comments. + +This section provides a brief explanation of OpenStack networking and design +decisions for encryption between customer workloads. + +#### Networking in OpenStack + +The foundation of networking in OpenStack is the Neutron[^ne] project, +providing networking as a service (NaaS). It creates and manages network +resources such as switches, routers, subnets, firewalls and load balancers, +uses plugin architecture to support different physical network implementation +options and is accessible to admin or other services through API. + +Another integral part is the Open vSwitch (OVS)[^sw] - a widely adopted virtual +switch implementation, which is not strictly necessary, as Neutron is quite +flexible with compenents used to implement the infrastructure, but tends to +be the agent of choice and is the current default agent for Neutron. It allows +it to respond to environment changes, supporting accounting and monitoring +protocols and maintaining OVSDB state database. It manages virtual ports, +bridges and tunnels on hosts. + +Open Virtual Networking (OVN[^ov]) is a logical abstraction layer on top of OVS, +developed by the same community that became the default controller driver for +Neutron. It manages logical networks insulated from underlying physical/virtual +networks by encapsulation. It replaces the need for OVS agents running on each +host and supports L2 switching, distributed L3 routing, access control and load +balancing. + +#### Encryption options + +##### MACsec[^ms] + +A layer 2 security protocol, defined by an IEEE standard 802.1AE. It allows to +secure an ethernet link for almost all traffic, including control protocols +like DHCP and ARP. It is mostly implemented in hardware, in routers and +switches, but software implementations exist, notably a Linux kernel module. + +##### eBPF[^eb]-based encryption with Linux Kernel Crypto API + +A network packet specific filtering technology in Linux kernel called +Berkeley Packet Filter (BPF) uses a specialized virtual machine inside +kernel to run filters on the networking stack. eBPF is an extension of this +principle to a general purpose stack which can run sandboxed programs in kernel +without changes of kernel code or loading modules. High-performance networking +observability and security is a natural use-case with projects like Cilium[^ci] +implementing transparent in-kernel packet encryption with it. Linux kernel +itself also provides an encryption framework called +Linux Kernel Crypto API[^lkc] which such solutions use. + +##### IPsec[^ip] + +Internet Protocol security is a suite of protocols for network security on +layer 3, providing authentication and packets encryption used for example in +Virtual Private Network (VPN) setups. It is an IETF[^ie] specification with +various open source and commercial implementations. For historical +reasons[^ipwh] it defines two main transmission protocols +Authentication Header (AH) and Encapsulating Security Payload (ESP) where only +the latter provides encryption in addition to authentication and integrity. The +key negotiations use the IKE(v1/v2) protocol to establish and maintain +Security Associations (SA). + +##### WireGuard[^wg] + +Aims to be a simple and fast open source secure network tunneling solution +working on layer 3, utilizing state-of-the-art cryptography while maintaining +much simpler codebase and runtime setup as alternative solutions[^wgwp]. Focus +is on fast in-kernel encryption. WireGuard[^wg] adds new network interfaces, +managable by standard tooling (ifconfig, route,...) which act as tunneling +interfaces. Main mechanism, called _Cryptokey routing_, are tables associating +public keys of endpoints with allowed IPs inside given tunnels. These behave as +routing tables when sending and access control lists (ACL) when receiving +packets. All packets are sent over UDP. Built-in roaming is achieved by both +server and clients being able to update the peer list by examining from where +correctly authenticated data originates. + +### Solution proposals + +#### TripleO[^to] with IPsec[^ip] + +> TripleO is a project aimed at installing, upgrading and operating OpenStack +> clouds using OpenStack's own cloud facilities as the foundation - building on +> Nova, Ironic, Neutron and Heat to automate cloud management at datacenter +> scale + +This project is retired as of February 2024, but its approach was considered +for adoption. + +Its deployment allowed for IPsec[^ip] encryption of node communication. When +utilized, two types of tunnels were created in overcloud: node-to-node tunnels +for each two nodes on the same network, for all networks those nodes were on, +and Virtual IP tunnels. Each node hosting the Virtual IP would open a tunnel +for any node in the specific network that can properly authenticate. + +#### OVN[^ov] + IPsec[^ip] + +There is support in the OVN[^ov] project for IPsec[^ip] encryption of tunnel +traffic[^oit]. A daemon running in each chassis automatically manages and +monitors IPsec[^ip] tunnel states. + +#### Neutron[^ne] + Cilium[^ci] + +Another potential architecture involves a Neutron[^ne] plugin hooking an +eBPF[^eb] proxy on each interface and moving internal traffic via an encrypted +Cilium[^ci] mesh. Cilium uses IPsec[^ip] or WireGuard[wg] to transparently +encrypt node-to-node traffic. There were some attempts to integrate Cilium[^ci] +with OpenStack [^neci1], [^neci2], but we didn't find any concrete projects +which would leverage the transparent encryption ability of Cilium[^ci] in +OpenStack environment. This path would pressumably require significant +development. + +#### Neutron[^ne] + Calico[^ca] + +The Calico[^ca] project in its community open source version provides +node-to-node encryption using WireGuard[^wg]. Despite being primarily a +Kubernetes networking project, it provides an OpenStack integration[^caos] via +a Neutron[^ne] plugin and deploying the necessary subset of tools like etcd, +Calico agent Felix, routing daemon BIRD and a DHCP agent. + +### Proof of concept implementations + +#### Neutron Plugin + +Initially the potential for developing a specialized Neutron plugin was +investigated and a simple skeleton implementation for testing purposes was +devised. + +Own development was later abandoned in favor of a more sustainable solution +using existing technologies as disussed in +[preliminary considerations](#preliminary-considerations). + +#### Manual setup + +We created a working Proof of Concept with manually setting up VXLAN tunnels +between nodes. While this solution ensures no impact on OpenStack and is easy +to set up, it has limitations, such as unencrypted data transmission if the +connection breaks. To mitigate this, we proposed using a dedicated subnet +present only in the IPsec[^ip] tunnels. + +We presented the idea to the kolla-ansible[^ak] project, but it was deemed out +of scope. Instead, we were directed towards a native Open vSwitch solution +supporting IPsec[^ip]. This requires creating a new OpenStack service +(working name: openstack-ipsec) and a role to manage chassis keys and run the +openstack-ipsec container on each node. + +#### Proof of concept (PoC) implementation + +In our second proof of concept, we decided to implement support for +openstack-ipsec. The initial step involved creating a new container image +within the kolla[^kl] project specifically for this purpose. + +##### Architecture + +When Neutron[^ne] uses OVN[^ov] as controller it instructs it to create the +necessary virtual networking infrastructure (logical switches, routers, etc.), +particullary to create Geneve tunnels between compute nodes. These tunnels are +used to carry traffic between instances on different compute nodes. + +In PoC setup Libreswan[^ls] suite runs on each compute node and manages the +IPSec[^ip] tunnels. It encrypts the traffic flowing over the Geneve tunnels, +ensuring that data is secure as it traverses the physical network. In setup +phase it establishes IPSec tunnels between compute nodes by negotiating the +necessary security parameters (encryption, authentication, etc.). Once the +tunnels are established, Libreswan[^ls] monitors and manages them, ensuring +that the encryption keys are periodically refreshed and that the tunnels remain +up. It also dynamically adds and removes tunnels based on changes of network +topology. + +A packet originating from a VM on one compute node and destined for a VM on +a different node is processed by OVS and encapsulated into a Geneve tunnel. +Before the Geneve-encapsulated packet leaves the compute node, it passes +through the Libreswan process, which applies IPSec encryption. The encrypted +packet traverses the physical network to the destination compute node. On the +destination node, Libreswan[^ls] decrypts the packet, and OVN[^ov] handles +decapsulation and forwards it to the target VM. + +##### Challanges + +Implementing the openstack-ipsec image we encountered a significant challenge: +the ovs-ctl start-ovs-ipsec command could not run inside the container because +it requires a running init.d or systemctl to start the IPsec daemon immediately +after OVS[^ov] deploys the configuration. We attempted to use supervisor to +manage the processes within the container. However, this solution forced a +manual start of the IPsec daemon before ovs-ctl had the chance to create the +appropriate configurations. + +Another challenge was the requirement for both the IPsec daemon and ovs-ipsec +to run within a single container. This added complexity to the container +configuration and management, making it harder to ensure both services operated +correctly and efficiently. + +##### Additional infrastructure + +New ansible role for generating chassis keys and distributing them to the +respective machines was created. This utility also handles the configuration on +each machine. Managing and creating production certificates is up to the user, +which is also true for the backend TLS certificates in kolla-ansible[^ka]. +While this management should be handled within the same process, it currently +poses a risk of downtime when certificates expire, as it requires careful +management and timely renewal of certificates. + +The new container image was designed to include all necessary +components for openstack-ipsec. Using supervisor to manage the IPsec daemon +within the container involved creating configuration files to ensure all +services start correctly. However, integrating supervisor introduced additional +complexity and potential points of failure. + +##### Possible improvements + +PoC doesn't currently address the opt-out possibility for disabling the +encryption for some specific group of nodes, where operator deems it +detrimental because of them being virtual or where security is already handled +in some other layer of the stack. This could be implemented as a further +customization available to the operator to encrypt only some subset of Geneve +tunnels, either in blacklist or whitelist manner. + +Further refinement is needed to ensure ovs-ctl and the IPsec daemon start and +configure correctly within the container environment. Exploring alternative +process management tools or improving the configuration of supervisor could +help achieve a more robust solution. + +Implementing automated certificate management could mitigate the risks +associated with manual certificate renewals. Tools like Certbot or integration +with existing Public Key Infrastructure (PKI) solutions might be beneficial. + +Engaging with the upstream Open vSwitch community to address containerization +challenges and improve support for running ovs-ctl within containers could lead +to more sustainable solution. + +## Decision + +The final proof of concept implementation demonstrated the feasibility of +implementing transparent IPsec[^ip] encryption between nodes in an OVN[^ov] +logical networking setup in OpenStack. +To recapitulate our preliminary considerations: + +### Utilize existing solutions + +Implementation in kolla-ansible[^ka] is unintrusive, provided by a +self-contained new kolla[^kl] container, which only adds an IPsec[^ip] +tunneling support module to OVS[^sw], which is already an integral part of +OpenStack networking, and a mature open source toolkit - Libreswan[^ls]. Also +OVN[^ov] has native support in OpenStack and became the default controller for +Neutron[^ne]. + +### Address threat modeling issues + +As disussed in [motivation](#motivation) and [threat +modelling](#potential-threats-in-detail) sections our concern lies with the +potentially vulnerable physical infrastructure between nodes inside or between +data centers. In this case ensuring encryption and integrity of packets before +leaving any node addresses these threats, while avoiding the complexity of +securing the communication on the VM level, where frequent additions, deletions +and migrations could render such system complicated and error prone. We also +don't needlessly encrypt VM communication inside one node. + +### Avoid redundant encryption + +As the encryption happens inside tunnels specific for inter-node workload +communication, isolated on own network and also inside Geneve tunnels, no cloud +service data, which could be possibly encrypted on higher-levels (TLS) is +possible here. As to the workload communication itself - detecting higher-layer +encryption in a way that would allow IPsec[^ip] to avoid redundant encryption +is complex and would require custom modifications or non-standard solutions. +It's usually safer and more straightforward to allow the redundancy, ensuring +security at multiple layers, rather than trying to eliminate it. + +### Performance impact and ease of use + +Setup is straightforward for the operator, there is just a flag to enable or +disable the IPsec[^ip] encryption inside Geneve tunnels and the need to set the +Neutron[^ne] agent to OVN[^ov]. No other configuration is necessary. The only +other administrative burden is the deployment of certificates to provided +configuration directory on the control node. + +Certificate management for this solution can and should be handled in the same +way as for the backend service certificates which are part of the ongoing +efforts to provide complete service communication encryption in kolla-ansible. +Currently the management of these certificates is partially left on external +processes, but if a toolset or a process would be devised inside the project, +this solution would fit in. + +### Upstream integration + +The potential for upstream adoption and long-term maintainability makes this a +promising direction for securing inter-node communication in OpenStack +environments. + +## References + +[^ne]: [Neutron](https://docs.openstack.org/neutron/latest/) - networking as a service (NaaS) in OpenStack +[^eb]: [eBPF](https://en.wikipedia.org/wiki/EBPF) +[^ia]: Team IaaS call [minutes](https://github.com/SovereignCloudStack/minutes/blob/main/iaas/20240214.md) +[^sw]: [open vSwitch](https://www.openvswitch.org/) +[^ip]: [IPsec](https://en.wikipedia.org/wiki/IPsec) +[^ipwh]: [Why is IPsec so complicated](https://destcert.com/resources/why-the-is-ipsec-so-complicated/) +[^to]: [TripleO](https://docs.openstack.org/developer/tripleo-docs/) - OpenStack on OpenStack +[^ci]: [Cillium](https://cilium.io/) +[^ca]: [Calico](https://docs.tigera.io/calico/latest/about) +[^caos]: [Calico for OpenStack](https://docs.tigera.io/calico/latest/getting-started/openstack/overview) +[^ta]: [Tailscale](https://tailscale.com/solutions/devops) +[^ov]: [Open Virtual Network](https://www.ovn.org/en/) (OVN) +[^oit]: [OVN IPsec tutorial](https://docs.ovn.org/en/latest/tutorials/ovn-ipsec.html) +[^kl]: [kolla](https://opendev.org/openstack/kolla) project +[^ka]: [kolla-ansible](https://docs.openstack.org/kolla-ansible/latest/) project +[^wg]: [WireGuard](https://www.wireguard.com/) +[^wgwp]: WireGuard [white paper](https://www.wireguard.com/papers/wireguard.pdf) +[^ie]: [Internet Engineering Task Force](https://www.ietf.org/) (IETF) +[^rfc]: [RFC8926](https://datatracker.ietf.org/doc/html/rfc8926#name-inter-data-center-traffic) +[^lkc]: [Linux Kernel Crypto API](https://www.kernel.org/doc/html/v4.10/crypto/index.html) +[^ls]: [Libreswan](https://libreswan.org/) VPN software +[^ms]: [MACsec standard](https://en.wikipedia.org/wiki/IEEE_802.1AE) +[^neci1]: [Neutron + Cilium architecture example](https://gist.github.com/oblazek/466a9ae836f663f8349b71e76abaee7e) +[^neci2]: [Neutron + Cilium Proposal](https://github.com/cilium/cilium/issues/13433) diff --git a/Standards/scs-0123-v1-mandatory-and-supported-IaaS-services.md b/Standards/scs-0123-v1-mandatory-and-supported-IaaS-services.md new file mode 100644 index 000000000..2f7a74326 --- /dev/null +++ b/Standards/scs-0123-v1-mandatory-and-supported-IaaS-services.md @@ -0,0 +1,82 @@ +--- +title: Mandatory and Supported IaaS Services +type: Standard +status: Stable +stabilized_at: 2024-11-20 +track: IaaS +--- + +## Introduction + +To be SCS-compliant a Cloud Service Provider (CSP) has to fulfill all SCS standards. +Some of those standards are broad and consider all APIs of all services on the IaaS-Layer like the consideration of a [role standard](https://github.com/SovereignCloudStack/issues/issues/396). +There exist many services on that layer and for a first step they need to be limited to have a clear scope for the standards and the Cloud Service Providers following them. +For this purpose, this standard will establish lists for mandatory services whose APIs have to be present in a SCS cloud as well as supported services, which APIs are considered by some standards and may even be tested for their integration but are optional in a sense that their omission will not violate SCS conformance. + +## Motivation + +There are many OpenStack APIs and their corresponding services that can be deployed on the IaaS level. +These services have differences in the quality of their implementation and liveness and some of them may be easily omitted when creating an IaaS deployment. +To fulfill all SCS-provided standards only a subset of these APIs are required. +Some more but not all remaining OpenStack APIs are also supported additionally by the SCS project and may be part of its reference implementation. +This results in different levels of support for specific services. +This document will give readers insight about how the SCS classifies the OpenStack APIs accordingly. +If a cloud provides all mandatory and any number of supported OpenStack APIs, it can be tested for SCS-compliance. +Any unsupported APIs will not be tested. + +## Mandatory IaaS APIs + +The following IaaS APIs MUST be present in SCS-compliant IaaS deployments and could be implemented with the corresponding OpenStack services: + +| Mandatory API | corresponding OpenStack Service | description | +|-----|-----|-----| +| **block-storage** | Cinder | Block Storage service | +| **compute** | Nova | Compute service | +| **identity** | Keystone | Identity service | +| **image** | Glance | Image service | +| **load-balancer** | Octavia | Load-balancer service | +| **network** | Neutron | Networking service | +| **s3** | S3 API object storage | Object Storage service | + +:::caution + +S3 API implementations may differ in certain offered features. +CSPs must publicly describe the endpoints of their S3 solutions and which implementations they use in their deployment. +Users should always research whether a needed feature is supported in the offered implementation. + +::: + +The endpoints of services MUST be findable through the `catalog list` of the identity API[^1]. + +[^1]: [Integrate into the service catalog of Keystone](https://docs.openstack.org/keystone/latest/contributor/service-catalog.html) + +## Supported IaaS APIs + +The following IaaS APIs MAY be present in SCS-compliant IaaS deployment, e.g. implemented thorugh the corresponding OpenStack services, and are considered in the SCS standards. + +| Supported API | corresponding OpenStack Service | description | +|-----|-----|-----| +| **bare-metal** | Ironic | Bare Metal provisioning service | +| **billing** | CloudKitty | Rating/Billing service | +| **dns** | Designate | DNS service | +| **ha** | Masakari | Instances High Availability service | +| **key-manager** | Barbican | Key Manager service | +| **object-store** | Swift | Object Store with different possible backends | +| **orchestration** | Heat | Orchestration service | +| **shared-file-systems** | Manila | Shared File Systems service | +| **time-series-database** | Gnocchi | Time Series Database service | + +## Unsupported IaaS APIs + +All other OpenStack services, whose APIs are not mentioned in the mandatory or supported lists will not be tested for their compatibility and conformance in SCS clouds by the SCS community. +Those services MAY be integrated into IaaS deployments by a Cloud Service Provider on their own responsibility but SCS will not assume they are present and potential issues that occur during deployment or usage have to be handled by the CSP on their own accord. +The SCS standard offers no guarantees for compatibility or reliability of services categorized as unsupported. + +## Related Documents + +[The OpenStack Services](https://www.openstack.org/software/) + +## Conformance Tests + +The presence of the mandatory OpenStack APIs will be tested in [this test-script](https://github.com/SovereignCloudStack/standards/blob/main/Tests/iaas/mandatory-services/mandatory-iaas-services.py) +The test will further check whether the object-store endpoint is compatible to s3. diff --git a/Standards/scs-0210-v1-k8s-new-version-policy.md b/Standards/scs-0210-v1-k8s-new-version-policy.md index 69bcefd01..366fa8fae 100644 --- a/Standards/scs-0210-v1-k8s-new-version-policy.md +++ b/Standards/scs-0210-v1-k8s-new-version-policy.md @@ -17,7 +17,7 @@ description: | Here we will describe how fast providers need to keep up with the upstream Kubernetes version. -To create a informed decision we summarize here the Kubernetes rules regarding versioning at the time of writing (2023-01-16): +To create an informed decision we summarize here the Kubernetes rules regarding versioning at the time of writing (2023-01-16): Kubernetes usually provides about **3 minor** releases per year (see [Kubernetes Release Cycle][k8s-release-cycle]). @@ -37,7 +37,7 @@ The remaining 2 months are only for: Kubernetes is a fast-paced project. We want to achieve that providers keep up to date with upstream and do not fall behind Kubernetes releases. -This ensures that users are able to upgrade their clusters to address security issues, bug fixes and new features when using SCS compliant clusters in regards of Kubernetes. +This ensures that users are able to upgrade their clusters to address security issues, bug fixes and new features when using SCS compliant clusters in regard to Kubernetes. However, providers should have reasonable time to implement the new Kubernetes versions and test them. ## Decision diff --git a/Standards/scs-0210-v2-k8s-version-policy.md b/Standards/scs-0210-v2-k8s-version-policy.md index 88ed5738b..3b086ec3d 100644 --- a/Standards/scs-0210-v2-k8s-version-policy.md +++ b/Standards/scs-0210-v2-k8s-version-policy.md @@ -24,14 +24,14 @@ More information can be found under [Kubernetes Support Period]. The [Kubernetes release cycle][k8s-release-cycle] is set around 4 months, which usually results in about **3 minor** releases per year. -Patches to these releases are provided monthly, with the exception of the first patch, +Patches to these releases are provided monthly, except for the first patch, which is usually provided 1-2 weeks after the initial release (see [Patch Release Cadence][k8s-release-cadence]). ## Motivation Kubernetes is a living, fast-paced project, which follows a pre-defined release cycle. -This enables forward planning with regards to releases and patches, but also implies a +This enables forward planning with regard to releases and patches, but also implies a necessity to upgrade to newer versions quickly, since these often include new features, important security updates or especially if a previous version falls out of the support period window. @@ -40,7 +40,7 @@ We want to achieve an up-to-date policy, meaning that providers should be mostly sync with the upstream and don't fall behind the official Kubernetes releases. This is achievable, since new versions are released periodical on a well communicated schedule, enabling providers and users to set up processes around it. -Being up to date ensures that security issues and bugs are addressed and new features +Being up-to-date ensures that security issues and bugs are addressed and new features are made available when using SCS compliant clusters. It is nevertheless important to at least support all Kubernetes versions that are still @@ -56,11 +56,11 @@ In order to keep up-to-date with the latest Kubernetes features, bug fixes and s the provided Kubernetes versions should be kept up-to-date with new upstream releases: - The latest minor version MUST be provided no later than 4 months after release. -- The latest patch version MUST be provided no later than 1 week after release. +- The latest patch version MUST be provided no later than 2 weeks after release. - This time period MUST be even shorter for patches that fix critical CVEs. In this context, a critical CVE is a CVE with a CVSS base score >= 8 according to the CVSS version used in the original CVE record (e.g., CVSSv3.1). - It is RECOMMENDED to provide a new patch version in a 2 day time period after their release. + It is RECOMMENDED to provide a new patch version in a 2-day time period after their release. - New versions MUST be tested before being rolled out on productive infrastructure; at least the [CNCF E2E tests][cncf-conformance] should be passed beforehand. diff --git a/Standards/scs-0210-w1-k8s-version-policy-implementation-testing.md b/Standards/scs-0210-w1-k8s-version-policy-implementation-testing.md new file mode 100644 index 000000000..9b36063bb --- /dev/null +++ b/Standards/scs-0210-w1-k8s-version-policy-implementation-testing.md @@ -0,0 +1,35 @@ +--- +title: "SCS K8S Version Policy: Implementation and Testing Notes" +type: Supplement +track: KaaS +status: Draft +supplements: + - scs-0210-v2-k8s-version-policy.md +--- + +## Implementation notes + +The standard is quite concise about [the regulations](https://docs.scs.community/standards/scs-0210-v2-k8s-version-policy#decision), +so they are not restated here. Suffice it to say that a +CSP must make new versions for their KaaS offering available in a timely fashion, so that +new versions are available in a short window of time. +Older versions need to be supported until the end of their support window. + +Concrete implementation details can't be given here, since not every CSP does provide +their versions the same way. The best advice to give is to monitor the +[Kubernetes releases page](https://kubernetes.io/releases/) closely. + +## Automated tests + +### Implementation + +The script [`k8s_version_policy.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/kaas/k8s-version-policy/k8s_version_policy.py) +connects to an existing K8s cluster and checks the version against a list of versions, that +are calculated to be inside a recency window. + +Note that this implementation is subject to change, because testing an existing cluster is not +sufficient to guarantee that all active k8s branches are supported and kept up to date. + +## Manual tests + +None. diff --git a/Standards/scs-0211-v1-kaas-default-storage-class.md b/Standards/scs-0211-v1-kaas-default-storage-class.md index 7e224a9e3..267a04e5b 100644 --- a/Standards/scs-0211-v1-kaas-default-storage-class.md +++ b/Standards/scs-0211-v1-kaas-default-storage-class.md @@ -53,4 +53,12 @@ This will be done in another document which is yet to be created. ## Conformance Tests -TBD +The script `k8s-default-storage-class-check.py` requires a kubeconfig file with connection +details for the Kubernetes cluster that should be checked for conformance. +It will check for a default storage class and use the associated storage provider to +try to create and mount a PersistentVolumeClaim with the aforementioned properties to +a container in a Pod. +After it is done, it cleans up the resources. +Rule violations will be reported on various logging channels: ERROR for mandatory rules +and INFO for recommended rules. +An exit code of zero indicates that the standard has been met. diff --git a/Standards/scs-0211-v2-kaas-default-storage-class.md b/Standards/scs-0211-v2-kaas-default-storage-class.md new file mode 100644 index 000000000..f535b9ae4 --- /dev/null +++ b/Standards/scs-0211-v2-kaas-default-storage-class.md @@ -0,0 +1,68 @@ +--- +title: SCS KaaS default storage class +type: Standard +status: Draft +replaces: scs-0211-v1-kaas-default-storage-class.md +track: KaaS +description: | + The SCS-0211 standard ensures that a default StorageClass with specific characteristics is available to KaaS users. +--- + +## Introduction + +This is the standard v2 for SCS Release 7. + +Cluster consumers can request persistent storage via [`PersistentVolumeClaims`][k8s-pvc], which is provisioned +automatically by cloud-provided automation. +Storage requirements may vary across use cases, so there is the concept of storage classes (`StorageClass`). +Storage classes define some set of storage properties and consumers can choose one of these depending on the use case. + +## Motivation + +A lot of third-party software, such as Helm charts, assume that a default storage class is configured. +Thus, for an out-of-the-box working experience, a SCS compliant Kubernetes cluster should come +preconfigured with a sensible default storage class providing persistent storage. + +## Decision + +A freshly provisioned Kubernetes cluster MUST have a default storage class, i.e., a `StorageClass` +object that is annotated with `storageclass.kubernetes.io/is-default-class=true` as described in the +[Kubernetes documentation][k8s-default-sc]. +The name of this storage class is not standardized. + +### Recommended non-performance-related properties + +The following recommendations are not completely tested yet and therefore do not represent hard requirement criteria so far. Nevertheless, they are important prerequisites for ensuring data storage stability. Generally, these criteria are met by choosing the right provisioner such as Cinder CSI Provisioner. And this shall be cross-checked against a list of provisioners. + +If the persistent volumes (PV) provisioned by the provided default storage class are required to be failure-safe they MUST fulfill all +of the following properties: + +- MUST support the `ReadWriteOnce` [access mode][k8s-accessmode]. +- MUST NOT be bound to the lifecycle of a Kubernetes node. +- MUST NOT be backed by local or ephemeral storage.\ + This means: + - MUST NOT be backed by local storage on the Kubernetes Node VM itself. + - MAY be backed by some kind of redundant storage within an AZ, across hosts. + - MAY be backed by some kind of redundant storage across AZ's. + +Volumes that are not necessarily required to be failure-safe may be local/node-bound/non-redundant. This might be the case with fast to run applications that take care of data durability and availability on application level. + +The provisioned storage class MAY support volume expansion (`allowVolumeExpansion=true`). + +### Required performance-related properties + +- _NO_ fixed guarantees regarding latency/bandwidth/IOPS/etc. +Generally, customers should be able to expect low-tier performance without pricing surprises. + +## Previous standard versions + +[Version v1 of this standard](scs-0211-v1-kaas-default-storage-class.md) did not enforce the +existence of a default storage class in a newly created cluster. + +## Conformance Tests + +Is currently under progress [stage: pull-request](https://github.com/SovereignCloudStack/standards/pull/658). + +[k8s-pvc]: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims +[k8s-default-sc]: https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/ +[k8s-accessmode]: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes diff --git a/Standards/scs-0211-w1-kaas-default-storage-class-implementation-testing.md b/Standards/scs-0211-w1-kaas-default-storage-class-implementation-testing.md new file mode 100644 index 000000000..1eeb89e48 --- /dev/null +++ b/Standards/scs-0211-w1-kaas-default-storage-class-implementation-testing.md @@ -0,0 +1,39 @@ +--- +title: "SCS KaaS default storage class: Implementation and Testing Notes" +type: Supplement +track: KaaS +status: Draft +supplements: + - scs-0211-v1-kaas-default-storage-class.md +--- + +## Implementation notes + +A `StorageClass` is made default by using the `storageclass.kubernetes.io/is-default-class` +annotation; a standardized name is not given. `ReadWriteOnce` must be supported by the volume, +and it must be protected against data loss due to hardware failures. +Therefore, volumes must not be bound to the lifecycle of a Kubernetes node and, at best, +be backed by some kind of redundant storage. +Guarantees for latency, bandwidth, IOPS and so on are not given. + +The cost-intensive part of this standard would be the hardware failure protection by binding +the `StorageClass` to redundant, non-lifecycle bound storage, since this would mean that +storage needs to be provided in a higher capacity to achieve the same usable capacity. + +## Automated tests + +### Notes + +The test for the [SCS Kaas Default storage class](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0211-v1-kaas-default-storage-class.md) +checks if a default storage class is available and if this storage class can be used +to create a `PersistentVolume` from a `PersistentVolumeClaim` for a container. + +### Implementation + +The script [`k8s-default-storage-class-check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/kaas/k8s-default-storage-class/k8s-default-storage-class-check.py) +connects to an existing K8s cluster and checks for the availability of a default storage class. +This can also be done via Sonobuoy. + +## Manual tests + +None. diff --git a/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md b/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md index a4c5231f4..6d2e10ad4 100644 --- a/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md +++ b/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md @@ -7,8 +7,8 @@ track: KaaS ## Introduction -A Kubernetes instance is provided as a cluster, which consists of a set of worker machines, -so called nodes. A cluster is composed of a control plane and at least one worker node. +A Kubernetes instance is provided as a cluster, which consists of a set of worker machines, also called nodes. +A cluster is composed of a control plane and at least one worker node. The control plane manages the worker nodes and therefore the pods in the cluster by making decisions about scheduling, event detection and global decisions. Inside the control plane, multiple components exist, which can be duplicated and distributed over multiple machines @@ -36,20 +36,22 @@ could fail, they should be distributed over multiple nodes on different machines This can be steered with the Affinity or Anti Affinity features, which are separated by Kubernetes into two features: -Node Affinity -The Node Affinity feature allows to match pods according to logical matches of -key-value-pairs referring to labels of nodes. -These can be defined with different weights or preferences in order to allow fine-grained -selection of nodes. The feature works similar to the Kubernetes nodeSelector. -It is defined in the PodSpec using the nodeAffinity field in the affinity section. - -Pod Affinity -Pod Affinity or Pod Anti Affinity allows the constraint of pod scheduling based on the -labels of pods already running on a node. -This means the constraint will match other pods on a node according to their labels key-value-pairs -and then either schedule the pod to the same (Affinity) or another (Anti Affinity) node. -This feature is also defined in the PodSpec using the podAffinity and podAntiAffinity -fields in the affinity section. [3] +- Node Affinity + + The Node Affinity feature allows to match pods according to logical matches of + key-value-pairs referring to labels of nodes. + These can be defined with different weights or preferences in order to allow fine-grained + selection of nodes. The feature works similar to the Kubernetes nodeSelector. + It is defined in the PodSpec using the nodeAffinity field in the affinity section. + +- Pod Affinity + + Pod Affinity or Pod Anti Affinity allows the constraint of pod scheduling based on the + labels of pods already running on a node. + This means the constraint will match other pods on a node according to their labels key-value-pairs + and then either schedule the pod to the same (Affinity) or another (Anti Affinity) node. + This feature is also defined in the PodSpec using the podAffinity and podAntiAffinity + fields in the affinity section. [3] Both features allow the usage of "required" or "preferred" keywords, which create "hard" or "soft" affinities. By using a hard affinity, a pod would need to be scheduled @@ -97,7 +99,7 @@ assign them to different nodes, but at this point, a redundant setup like presen So Anti Affinity in this context probably means more like distribution over multiple physical machines, which needs to be planned beforehand on the machine/server level. -Therefore would it be preferred for the control plane to use a redundant setup, which +Therefore, would it be preferred for the control plane to use a redundant setup, which is separated over different physical machines, meaning at least half of the control plane nodes runs on a different physical machine as the rest. The currently used ClusterAPI enables this by establishing the concept of "failure domains". These are used to control @@ -128,11 +130,11 @@ of them. This should provide at least the minimum requirements for a fault-toler For the standard, there is also a possibility to define multiple stages of distributed infrastructure and only make sensible ones a requirement and the rest optional, e.g. -* non-distributed clusters -* High-Availability clusters that are - * distributed over multiple machines/availability zones - * distributed over multiple clouds - * distributed over multiple physical locations/datacenters +- non-distributed clusters +- High-Availability clusters that are + - distributed over multiple machines/availability zones + - distributed over multiple clouds + - distributed over multiple physical locations/datacenters The worker nodes are RECOMMENDED to be distributed over different machines. In order to provide clear information to the users, the nodes should be labeled to reflect the diff --git a/Standards/scs-0214-v1-k8s-node-distribution.md b/Standards/scs-0214-v1-k8s-node-distribution.md index 2e237de07..ffec30efc 100644 --- a/Standards/scs-0214-v1-k8s-node-distribution.md +++ b/Standards/scs-0214-v1-k8s-node-distribution.md @@ -80,6 +80,34 @@ If the standard is used by a provider, the following decisions are binding and v can also be scaled vertically first before scaling horizontally. - Worker node distribution MUST be indicated to the user through some kind of labeling in order to enable (anti)-affinity for workloads over "failure zones". +- To provide metadata about the node distribution, which also enables testing of this standard, + providers MUST label their K8s nodes with the labels listed below. + - `topology.kubernetes.io/zone` + + Corresponds with the label described in [K8s labels documentation][k8s-labels-docs]. + It provides a logical zone of failure on the side of the provider, e.g. a server rack + in the same electrical circuit or multiple machines bound to the internet through a + singular network structure. How this is defined exactly is up to the plans of the provider. + The field gets autopopulated most of the time by either the kubelet or external mechanisms + like the cloud controller. + + - `topology.kubernetes.io/region` + + Corresponds with the label described in [K8s labels documentation][k8s-labels-docs]. + It describes the combination of one or more failure zones into a region or domain, therefore + showing a larger entity of logical failure zone. An example for this could be a building + containing racks that are put into such a zone, since they're all prone to failure, if e.g. + the power for the building is cut. How this is defined exactly is also up to the provider. + The field gets autopopulated most of the time by either the kubelet or external mechanisms + like the cloud controller. + + - `topology.scs.community/host-id` + + This is an SCS-specific label; it MUST contain the hostID of the physical machine running + the hypervisor (NOT: the hostID of a virtual machine). Here, the hostID is an arbitrary identifier, + which need not contain the actual hostname, but it should nonetheless be unique to the host. + This helps identify the distribution over underlying physical machines, + which would be masked if VM hostIDs were used. ## Conformance Tests @@ -92,3 +120,4 @@ If also produces warnings and informational outputs, if e.g. labels don't seem t [k8s-ha]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/ [k8s-large-clusters]: https://kubernetes.io/docs/setup/best-practices/cluster-large/ [scs-0213-v1]: https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md +[k8s-labels-docs]: https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone diff --git a/Standards/scs-0214-v2-k8s-node-distribution.md b/Standards/scs-0214-v2-k8s-node-distribution.md new file mode 100644 index 000000000..3b4915492 --- /dev/null +++ b/Standards/scs-0214-v2-k8s-node-distribution.md @@ -0,0 +1,129 @@ +--- +title: Kubernetes Node Distribution and Availability +type: Standard +status: Draft +replaces: scs-0214-v1-k8s-node-distribution.md +track: KaaS +--- + +## Introduction + +A Kubernetes instance is provided as a cluster, which consists of a set of machines, +so-called nodes. A cluster is composed of a control plane and at least one worker node. +The control plane manages the worker nodes and therefore the pods in the cluster by making +decisions about scheduling, event detection and rights management. Inside the control plane, +multiple components exist, which can be duplicated and distributed over multiple nodes +inside the cluster. Typically, no user workloads are run on these nodes in order to +separate the controller component from user workloads, which could pose a security risk. + +### Glossary + +The following terms are used throughout this document: + +| Term | Meaning | +|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Worker | Virtual or bare-metal machine, which hosts workloads of customers | +| Control Plane | Virtual or bare-metal machine, which hosts the container orchestration layer that exposes the API and interfaces to define, deploy, and manage the lifecycle of containers. | +| Machine | Virtual or bare-metal entity with computational capabilities | + +## Motivation + +In normal day-to-day operation, it is not unusual for some operational failures, either +due to wear and tear of hardware, software misconfigurations, external problems or +user errors. Whichever was the source of such an outage, it always means down-time for +operations and users and possible even data loss. +Therefore, a Kubernetes cluster in a productive environment should be distributed over +multiple "failure zones" in order to provide fault-tolerance and high availability. +This is especially important for the control plane of the cluster, since it contains the +state of the whole cluster. A failure of this component could mean an unrecoverable failure +of the whole cluster. + +## Design Considerations + +Most design considerations of this standard follow the previously written Decision Record +[Kubernetes Nodes Anti Affinity][scs-0213-v1] as well as the Kubernetes documents about +[High Availability][k8s-ha] and [Best practices for large clusters][k8s-large-clusters]. + +SCS wishes to prefer distributed, highly-available systems due to their obvious advantages +like fault-tolerance and data redundancy. But it also understands the costs and overhead +for the providers associated with this effort, since the infrastructure needs to have +hardware which will just be used to provide fail-over safety or duplication. + +The document [Best practices for large clusters][k8s-large-clusters] describes the concept of a failure zone. +This term isn't defined any further, but can in this context be described as a number of +physical (computing) machines in such a vicinity to each other (either through physical +or logical interconnection in some way), that specific problems inside this zone would put +all these machines at risk of failure/shutdown. It is therefore necessary for important +data or services to not be present just on one failure zone. +How such a failure zone should be defined is dependent on the risk model of the service/data +and its owner as well as the capabilities of the provider. Zones could be set from things +like single machines or racks up to whole datacenters or even regions, which could be +coupled by things like electrical grids. They're therefore purely logical entities, which +shouldn't be defined further in this document. + +## Decision + +This standard formulates the requirement for the distribution of Kubernetes nodes in order +to provide a fault-tolerant and available Kubernetes cluster infrastructure. + +The control plane nodes MUST be distributed over multiple physical machines. +Kubernetes provides [best-practices][k8s-zones] on this topic, which are also RECOMMENDED by SCS. + +At least one control plane instance MUST be run in each "failure zone" used for the cluster, +more instances per "failure zone" are possible to provide fault-tolerance inside a zone. + +Worker nodes are RECOMMENDED to be distributed over multiple zones. This policy makes +it OPTIONAL to provide a worker node in each "failure zone", meaning that worker nodes +can also be scaled vertically first before scaling horizontally. + +To provide metadata about the node distribution and possibly provide the ability +to schedule workloads efficiently, which also enables testing of this standard, +providers MUST annotate their K8s nodes with the labels listed below. +These labels MUST be kept up to date with the current state of the deployment. + +- `topology.kubernetes.io/zone` + + Corresponds with the label described in [K8s labels documentation][k8s-labels-docs]. + It provides a logical zone of failure on the side of the provider, e.g. a server rack + in the same electrical circuit or multiple machines bound to the internet through a + singular network structure. How this is defined exactly is up to the plans of the provider. + The field gets autopopulated most of the time by either the kubelet or external mechanisms + like the cloud controller. + +- `topology.kubernetes.io/region` + + Corresponds with the label described in [K8s labels documentation][k8s-labels-docs]. + It describes the combination of one or more failure zones into a region or domain, therefore + showing a larger entity of logical failure zone. An example for this could be a building + containing racks that are put into such a zone, since they're all prone to failure, if e.g. + the power for the building is cut. How this is defined exactly is also up to the provider. + The field gets autopopulated most of the time by either the kubelet or external mechanisms + like the cloud controller. + +- `topology.scs.community/host-id` + + This is an SCS-specific label; it MUST contain the hostID of the physical machine running + the hypervisor (NOT: the hostID of a virtual machine). Here, the hostID is an arbitrary identifier, + which need not contain the actual hostname, but it should nonetheless be unique to the host. + This helps identify the distribution over underlying physical machines, + which would be masked if VM hostIDs were used. + +## Conformance Tests + +The script `k8s-node-distribution-check.py` checks the nodes available with a user-provided +kubeconfig file. Based on the labels `topology.scs.community/host-id`, +`topology.kubernetes.io/zone`, `topology.kubernetes.io/region` and `node-role.kubernetes.io/control-plane`, +the script then determines whether the nodes are distributed according to this standard. +If this isn't the case, the script produces an error. +It also produces warnings and informational outputs, e.g., if labels don't seem to be set. + +## Previous standard versions + +This is version 2 of the standard; it extends [version 1](scs-0214-v1-k8s-node-distribution.md) with the +requirements regarding node labeling. + +[k8s-ha]: https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/high-availability/ +[k8s-large-clusters]: https://kubernetes.io/docs/setup/best-practices/cluster-large/ +[scs-0213-v1]: https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0213-v1-k8s-nodes-anti-affinity.md +[k8s-labels-docs]: https://kubernetes.io/docs/reference/labels-annotations-taints/#topologykubernetesiozone +[k8s-zones]: https://kubernetes.io/docs/setup/best-practices/multiple-zones/ diff --git a/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md b/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md new file mode 100644 index 000000000..4366365a0 --- /dev/null +++ b/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md @@ -0,0 +1,41 @@ +--- +title: "Kubernetes Node Distribution and Availability: Implementation and Testing Notes" +type: Supplement +track: KaaS +status: Draft +supplements: + - scs-0214-v1-k8s-node-distribution.md + - scs-0214-v2-k8s-node-distribution.md +--- + +## Implementation notes + +A Kubernetes clusters control plane must be distributed over multiple physical machines, as well +as different "failure zones". How these are defined is at the moment up to the CSP. +Worker nodes can also be distributed over "failure zones", but this isn't a requirement. +Distribution must be shown through labelling, so that users can access these information. + +Node distribution metadata is provided through the usage of the labels +`topology.kubernetes.io/region`, `topology.kubernetes.io/zone` and +`topology.scs.community/host-id` respectively. + +At the moment, not all labels are set automatically by most K8s cluster utilities, which incurs +additional setup and maintenance costs. + +## Automated tests + +### Notes + +The test for the [SCS K8s Node Distribution and Availability](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0214-v2-k8s-node-distribution.md) +checks if control-plane nodes are distributed over different failure zones (distributed into +physical machines, zones and regions) by observing their labels defined by the standard. + +### Implementation + +The script [`k8s_node_distribution_check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py) +connects to an existing K8s cluster and checks if a distribution can be detected with the labels +set for the nodes of this cluster. + +## Manual tests + +None. diff --git a/Standards/scs-0215-v1-robustness-features.md b/Standards/scs-0215-v1-robustness-features.md index e0ad3dc88..0085c3702 100644 --- a/Standards/scs-0215-v1-robustness-features.md +++ b/Standards/scs-0215-v1-robustness-features.md @@ -179,7 +179,7 @@ csr-9wvgt 112s kubernetes.io/kubelet-serving system:node:worker-1 Further information and examples can be found in the Kubernetes documentation: [Kubeadm certs](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/) -[Kubelete TLS bootstrapping](https://kubernetes.io/docs/reference/access-authn-authz/kubelet-tls-bootstrapping/) +[Kubelet TLS bootstrapping](https://kubernetes.io/docs/reference/access-authn-authz/kubelet-tls-bootstrapping/) ## Decision diff --git a/Standards/scs-0216-v1-requirements-for-testing-cluster-stacks.md b/Standards/scs-0216-v1-requirements-for-testing-cluster-stacks.md index d0b614239..82ea1856e 100644 --- a/Standards/scs-0216-v1-requirements-for-testing-cluster-stacks.md +++ b/Standards/scs-0216-v1-requirements-for-testing-cluster-stacks.md @@ -70,7 +70,7 @@ Two potential approaches for testing cluster stacks are the use of an IaaS provi - Challenges with monitoring and debugging. - Potential downtime and difficulty in running concurrent tests. -### Local Environment (Docker, Kubevirt) +### Local Environment (Docker, KubeVirt) #### Pros diff --git a/Standards/scs-0217-v1-baseline-cluster-security.md b/Standards/scs-0217-v1-baseline-cluster-security.md deleted file mode 100644 index f5dc82688..000000000 --- a/Standards/scs-0217-v1-baseline-cluster-security.md +++ /dev/null @@ -1,143 +0,0 @@ ---- -title: Kubernetes cluster baseline security -type: Standard -status: Draft -track: KaaS ---- - -## Introduction - -Due to the regular changes and updates, there are always new security features to deploy and use in Kubernetes. -Nevertheless, a provider (or even a customer) needs to take action in order to achieve a -hardened, secure cluster due to the myriad of configurations possible. This is especially -the case since Kubernetes ships with insecure features and configurations out of the box, -which will need to be mitigated by an administrator with the proper knowledge. -Hardened, secure Kubernetes clusters are desirable regardless of the possible threat model, -since higher security doesn't necessarily mean higher complexity in this case. - -## Terminology - -| Term | Meaning | -|------|-----------------------------| -| TLS | Transport Layer Security | -| CA | Certificate Authority | -| CSR | Certificate Signing Request | - -## Motivation - -Kubernetes clusters are highly configurable, which also gives rise to different security -problems, if the configuration isn't done properly. -These security risks can potentially be exposed in many different parts of a cluster, e.g. -different APIs, authorization and authentication procedures or even Pod privilege mechanisms. -In order to mitigate these problems, different steps and mechanisms could be used to increase -the security of a Kubernetes setup. - -## Design Considerations - -### External CA - -Kubernetes provides an API to provision TLS certificates that can be signed by a CA. -This CA can be controlled by the cluster provider, which enables much more tight control -over the clusters communication and therefore also better controllable security. - -In order to do this, the CA certificate bundle needs to be added to the trusted certificates -of the server. -To provide a certificate, the following steps need to be undertaken: - -1. Create a CSR -2. Send the CSR manifest to the k8s API -3. Approve the CSR -4. Sign CSR with your CA -5. Upload the signed certificate to the server - -This certificate could now be used by a user in a pod in order to provide a trusted certificate. - -It is also possible for the Kubernetes controller manager to provide the signing functionality. -To enable this, `--cluster-signing-cert-file` and `--cluster-signing-key-file` need to be set with -a reference to the CA keypair, which was used in the previous example to sign a CSR. - -### Protected Kubernetes endpoints - -In order to secure a Kubernetes cluster, the protection of endpoints is important. -To do this, different approaches can be taken. - -#### TLS for all internal/API traffic - -It is already expected by Kubernetes that all API communication internally is encrypted with TLS. -Nevertheless, some endpoints of internal components could be/will be exposed without the necessary -encryption, which could lead to weak points in the system. -A list of the default service endpoints can be seen in the following table - -| Protocol | Port Range | Purpose | Notes | -|----------|-------------|-------------------------|-----------------------------------------------------------------------------------------| -| TCP | 6443* | Kubernetes API Server | - | -| TCP | 2379-2380 | etcd server client API | - | -| TCP | 10250 | Kubelet API | - | -| TCP | 10251/10259 | kube-scheduler | 10251 could be insecure before 1.13, after that only the secure port 10259 is available | -| TCP | 10252/10257 | kube-controller-manager | 10252 could be insecure before 1.13, after that only the secure port 10257 is available | -| TCP | 30000-32767 | NodePort Services | Service endpoints, could be HTTP | - -The usage of `readOnlyPort` (enabling a read-only Kubelet API port on 10255) by design neither provides authentication nor authorization. Its usage is strongly discouraged! - -#### Authentication and Authorization - -All API clients should authenticate and authorize in order to be able to access an API or even -specific functions of this API. This is the case for users as well as internal components. - -Most internal clients (like proxies or nodes) are typically authenticated via service accounts or -x509 certificates, which will normally be created automatically during the setup of a cluster. -External users can authenticate via an access pattern of choice, which is typically decided by -the cluster provider. - -Authorization is (normally) done by the Role-Based Access Control (RBAC), which matches a request -by a user with a set of permissions, also called a role. Kubernetes deploys some roles out-of-the-box; -additional roles need to be carefully checked, since some permissions for specific resources allow -modification of other resources. - -This whole process is especially important for the Kubelet, which allows anonymous requests in its -default configuration. This is obviously a security risk, since everybody with access to its endpoint -could manipulate resources that are managed with the Kubelet. - -To disable anonymous requests, the Kubelet should be started with `--anonymous-auth=false`. -Authentication can be provided either through x509 client certificates or API bearer tokens. -How to set up both approaches can be found in the [Kubelet Authentication and Authorization](https://kubernetes.io/docs/reference/access-authn-authz/kubelet-authn-authz/). - -Kubelet authorization is set to `AlwaysAllow` as a default mode. This can be quite problematic, -since all authenticated users can do all actions. To mitigate this, it is possible to delegate -authorization to the API server by: - -- enabling the `authorization.k8s.io/v1beta1` API group -- starting the Kubelet with the `--authorization-mode=Webhook` and the `--kubeconfig` flags - -After that, the Kubelet calls the `SubjectAccessReview` API in order to determine the authorization of a request. - -## Decision - -This standard tries to increase security for a Kubernetes cluster in order to provide a -solid baseline setup with regard to security. For this to work, multiple measures need to be undertaken. - -A self-controlled CA SHOULD be used in order to be in control of the TLS certificates, which -enables operators to provide and revoke certificates according to their own requirements. - -All internal endpoints found in the section [TLS for all internal/API traffic] MUST be -encrypted with TLS in order to secure internal traffic. - -The Kubernetes API (kubeAPI) MUST be secured by authenticating and authorizing the users -trying to access its endpoints. How a user is authenticated is up to the provider of the -cluster and/or the wishes of the customer. Authorization MUST be done by providing fine-grained RBAC. -The authentication and authorization steps MUST also be applied to the Kubelet, which in its default configuration -doesn't enable them. A way to do this can be found in the section [Authentication and Authorization]. - -## Related Documents - -- [Managing TLS in a cluster](https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/) -- [Securing a cluster](https://kubernetes.io/docs/tasks/administer-cluster/securing-a-cluster/) -- [Controlling access](https://kubernetes.io/docs/concepts/security/controlling-access/) -- [Kubernetes Security Checklist](https://kubernetes.io/docs/concepts/security/security-checklist/) -- [Kubelet Authentication and Authorization](https://kubernetes.io/docs/reference/access-authn-authz/kubelet-authn-authz/) -- [Authentication](https://kubernetes.io/docs/reference/access-authn-authz/authentication/) -- [OWASP Kubernetes Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Kubernetes_Security_Cheat_Sheet.html) - -## Conformance Tests - -Conformance Tests will be written in another issue diff --git a/Standards/scs-0217-v1-cluster-hardening.md b/Standards/scs-0217-v1-cluster-hardening.md new file mode 100644 index 000000000..b1a8539dd --- /dev/null +++ b/Standards/scs-0217-v1-cluster-hardening.md @@ -0,0 +1,475 @@ +--- +title: Kubernetes cluster hardening +type: Standard +status: Draft +track: KaaS +--- + +## Introduction + +Due to the regular changes and updates, there are always new security features to deploy and use in Kubernetes. +Nevertheless, a provider (or even a customer) needs to take action in order to achieve a +baseline-secure cluster due to the myriad of configurations possible. This is especially +the case since Kubernetes ships with insecure features and configurations out of the box, +which will need to be mitigated by an administrator with the proper knowledge. +Secure Kubernetes clusters are desirable regardless of the possible threat model, +since higher security doesn't necessarily mean higher complexity in this case. + +## Terminology + +| Term | Meaning | +|------|--------------------------------| +| TLS | Transport Layer Security | +| CA | Certificate Authority | +| JWT | JSON Web Token | +| ABAC | Attribute-based access control | +| RBAC | Role-based access control | + +## Motivation + +Kubernetes clusters are highly configurable, which also gives rise to different security +problems, if the configuration isn't done properly. +These security risks can potentially be exposed in many different parts of a cluster, e.g. +different APIs, authorization and authentication procedures or even Pod privilege mechanisms. +In order to mitigate these problems, different steps and hardening mechanisms could be used +to increase the security of a Kubernetes setup. +Due to the focus of the SCS KaaS standards on the providers, best practices for security +that are more focused on user environments aren't described here, e.g., the possibility for +network traffic control between pods. This could theoretically be set up by a provider, +but isn't very practical for the user, since he would probably need to request changes +regularly in this case. + +## Hardening Kubernetes + +This section is non-authoritative and only describes concepts and design considerations. + +### Regular updates + +Due to the risk associated with running older versions of software, e.g. known security vulnerabilities, +bugs or missing features as well as the difficulty of tracking or identifying attack vectors, +it is advised to first and foremost keep the version of the Kubernetes components up-to-date. +It should be especially important to keep on track with the patch-level [versions of Kubernetes][kubernetes-releases], +since they include bugfixes and security patches, which are also backported to the previous +three minor-level versions, depending on their severity and the feasibility. It is also recommended +to refer to the version skew policy for more details about [component versions][kubernetes-version-skew]. + +### Securing etcd + +The etcd database is the storage for Kubernetes, containing information about cluster workloads, states and secrets. +Gaining access to this critical infrastructure part would enable a bad actor to read the aforementioned information; +write access would be equivalent to administrative access on the Kubernetes cluster and information could be manipulated +while ignoring any restrictions or validations put in place by other Kubernetes components. + +Securing etcd can be done through different or a combination of +many mechanisms, including strong security credentials for the etcd server, the isolation of the etcd servers behind a firewall, separate etcd +instances for components beside the API-server, ACL restrictions for read-write-access to subsets of the keyspace and +a separate CA for etcd communication, which limits the trusted partners of the etcd database to clients with a certificate from this CA. +These strategies will be explained a bit more in-depth in the following subsections. + +#### Strong authentication + +If an etcd instance wasn't secured correctly, it could be possible that a bad actor would try to authenticate against +the database. +It is therefore advised to use strong security credentials (see e.g. [the strong credentials requirements by NIST][strong-credentials]) for +all user accounts on the etcd server as well as the machines running this critical component. +This is obviously a fact for all possibly accessible components, but especially true for etcd, since it contains +the complete cluster state. + +#### Multiple etcd instances + +etcd is a critical component that needs to be protected from +bad actors as well as outages. Kubernetes recommends a [five-member cluster](https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/#multi-node-etcd-cluster) for durability and high-availability as well as regular back-ups of the data. +For more information on high-availability, look into the [Kubernetes Node Distribution and Availability Standard](scs-0214-v1-k8s-node-distribution.md). +It would also be possible to use these etcd instances in order to select specific instances +that aren't the current etcd leader for interaction with different components (e.g. Calico), since access to the primary etcd instance could be considered dangerous, because the full keyspace could be viewed without further restrictions (see [here](https://cheatsheetseries.owasp.org/cheatsheets/Kubernetes_Security_Cheat_Sheet.html#limiting-access-to-the-primary-etcd-instance) or [here](https://docs.tigera.io/calico/latest/reference/etcd-rbac/kubernetes-advanced)). +This approach should still be paired with [etcd ACL](#acl-restrictions) to better restrict access. + +#### etcd isolation + +The etcd database should at best be isolated from the rest of a Kubernetes cluster. +Access should only be granted to components that need it, which is in most cases mainly (or only) +the API server. Best practice would be to host etcd on machines separate from the Kubernetes cluster +and block access from machines or networks that don't need access with specific firewall rules. +In most cases, only the API server machines should need access to etcd on ports 2379-2380. + +#### ACL restrictions + +etcd implements access control lists (ACL) and authentication since version 2.1 [1][etcd-auth]. +etcd provides users and roles; users gain permissions through roles. When authentication is enabled, +each request to etcd requires authentication and the transaction is only allowed, if the user has the correct access rights. +etcd can also be launched with `--client-cert-auth=true`, which enables authentication via +the Common Name (CN) field of a client TLS certificate without a password. +This option enables Kubernetes components to authenticate as a user without providing a password, +which is neither possible for Kubernetes components nor planned in future releases. +This method is recommended in order to implement ACL for different Kubernetes components and +not give the Kubernetes API full root access to the etcd instance; instead, a separate user can be created. + +#### TLS communication + +etcd should use TLS for peer- and cluster-communication, so that traffic between different peered etcd instances as well +as the communication with the Kubernetes cluster can be secured. +etcd provides options for all these scenarios, including `--peer-key-file=peer.key` and `--peer-cert-file=peer.cert` +for securing peer communication and the flags `--key-file=k8sclient.key` and `--cert-file=k8sclient.cert` for securing +client communication (and therefore cluster communication). +Additionally, HTTPS should be used as the URL schema. +It is also possible to use a separate CA for the etcd in order to separate and better control access through client +certificates, since etcd by default trusts all the certificates issued by the root CA [2][nsa-cisa]. +More information about authentication via TLS is provided in the chapter [ACL restrictions](#acl-restrictions). + +### Securing endpoints + +Kubernetes provides a well-defined set of ports in its default configuration. These ports are +used for inter-component communication as well as external access. Due to the distribution of information +about Kubernetes clusters, it is easy for a bad actor to identify a clusters +ports and try to attack them. In order to minimize the attack vector, internal ports (and therefore components) +should not be accessible from external networks, except if there are requirements to enable this behavior. + +A good way to restrict access would be a combination of firewalls with port +blocking and the integration of network separation. +How this is done is highly dependent on the specific setup of the provider. +An additional document could be provided in the future to give basic +guidelines for this task. + +A list of the default ports used in Kubernetes as well as the components accessing them can be found below: + +#### Control plane nodes + +| Ports | Protocol | Purpose | Used by | Access type | +|-----------|----------|-------------------------|-----------------------|--------------------| +| 6443 | TCP | API server | All | External, internal | +| 2379-2380 | TCP | etcd server | kube-apiserver, etcd | Internal | +| 10250 | TCP | Kubelet API | Self, Control plane | Internal | +| 10255 | TCP | Read-only Kubelet API | External applications | External, Internal | +| 10257 | TCP | kube-controller-manager | Self | Internal | +| 10259 | TCP | kube-scheduler | Self | Internal | + +Hint: `Self` in the `Used by` context means, that a resource will access its own port for requests. + +#### Worker nodes + +| Ports | Protocol | Purpose | Used by | Access type | +|-------------|----------|-----------------------|-----------------------|--------------------| +| 10250 | TCP | Kubelet API | Self, Control plane | Internal | +| 10255 | TCP | Read-only Kubelet API | External applications | External, internal | +| 30000-32767 | TCP | NodePort Services | | External | + +### API security, authentication and authorization + +In order to secure Kubernetes against bad actors, limiting and securing access to API requests +is recommended, since requests to those are able to control the entire Kubernetes cluster. +Access control is applied to both human users and Kubernetes service accounts, which goes through +several stages after a request reaches the API. + +1. The Kubernetes API server listens on port 6443 on the first non-localhost network interface by default, +protected by TLS [3][controlling-access]. The TLS certificate can either be signed with a private CA or based on a public key +infrastructure with a widely recognized CA behind it. +2. The authentication step checks the request for correct authentication based on different possible +authentication modules like password, plain tokens or JWT. Only one of these methods needs to succeed +in order to allow a request to pass to the next stage. +3. The authorization step authorizes a request, if a user is allowed to carry out a specific operation. +The request must contain the username of the requester, the requested action and the affected object. +Kubernetes supports different authorization modules like ABAC, RBAC or Webhooks. Only one of these +modules need to approve the request in order for it to be authorized. +4. The last step are Admission control modules, which can modify or reject requests after accessing +the objects contents. + +#### Authentication + +Kubernetes provides different internal authentication mechanisms, that can be used depending +on the requirements of the cluster provider and user. Multiple authentication systems can +be enabled and the [Kubernetes documentation][kubernetes-auth] recommends at least using two methods, +including Service Account Tokens and another method. Methods directly provided by Kubernetes include +the following (a more complete or up-to-date list may be found in the [Kubernetes authentication docs][kubernetes-auth]): + +- *Static Token Files* + + This method reads bearer tokens from requests and checks them against a CSV file provided to Kubernetes containing + three columns named `token`, `username` and `uid`. These tokens last indefinitely and the list can't be changed + without a restart of the API server. This makes this option unsuitable for production clusters. + +- *Service Account Tokens* + + A service account is an authenticator that uses signed bearer tokens for request verification. + Service accounts can be given to the API server with a file containing PEM-encoded X509 RSA or + ECDSA private or public keys that verify the Service Account Tokens. + Service Accounts are normally created automatically by the API server and associated with the + pods through the `ServiceAccount` admission controller. Tokens are signed JSON Web Tokens, + that can be used as a Bearer Token or mounted into the pods for API server access. + Since Service Account Tokens are mainly used to allow workloads accessing the API server, + they're not really intended to authenticate users in production clusters. + +- *X509 client certificates* + + Client certificate authentication can be enabled by providing a `Certificate Authority` + file to the API server via the `--client-ca-file=` option. The file contains one + or more CAs that a presented client certificate is validated against. + In this case the common subject name is used as the username for the request; + additionally, a group membership can be indicated with the certificates organization field. + These certificates are unsuitable for production use, because Kubernetes does not + support certificate revocation. This means user credentials can't be modified or + revoked without rotating the root CA and re-issuing all cluster certificates. + +As outlined, most internal authentication mechanisms of Kubernetes aren't really +usable in productive environments at the current time. Instead, external authentication +should be used in order to provide production-ready workflows. +The Kubernetes documentation lists a few examples for external authenticators, e.g. + +- [OpenIDConnect][openidconnect] +- Bearer Tokens with [Webhook Token Authentication][webhook-token] +- Request Header Authentication with an [Authenticating Proxy][authenticating-proxy] + +All of these examples are useful to set up for an organization or can be used with +an already in-place solution. More information can be found in their respective +part of the Kubernetes documentation. +Most of these are good solutions for productive setups, since they enable easy +user management, access revocation and things like short-lived access tokens. +What will be used by your organization depends on the present setup and the use case. + +#### Authorization + +Authorization is done after the authentication step in order to check the rights +of a user within the system. Kubernetes authorizes API requests with the API server, +which evaluates requests against all policies in place and then allows or denies these requests. +By default, a request would be denied. + +Kubernetes provides several authentication modes to authorize a request: + +- *Node* + + The [Node authorization mode][node-authorization] grants permission to a Kubelet + based on the scheduled pods running on them. It allows a Kubelet to perform specific + API operations. The goal is to have a minimal set of permissions to ensure + the Kubelet can operate correctly. + Each Kubelet identifies with credentials belonging to the `system:nodes` group and + a username `system:nodes:` against this authorizer. + +- *ABAC (Attribute-based access control)* + + ABAC grants access rights based on policies dependent on attributes like + user attributes, resource attributes or environment attributes. + An example would be the `resource` attribute, which could limit access for a user + to only `Pod` resources. + +- *RBAC (Role-based access control)* + + RBAC is a method of regulating access to the resources based on the roles of + individual users. A user therefore must have the ability to perform a specific set + of tasks with a set of resources based on his role. + Kubernetes implements `Role`s to accomplish this and binds these with `Role Binding`s + to a user in order to specify his permission set. + +- *Webhook* + + Webhook authorization uses an HTTP callback to check the authorization of a user + against a URL provided for this mode. This externalises the authorization part + outside of Kubernetes. + +Most organizations and deployments work with RBAC, most often due to organizational or +customer-owner-relationship-like structures in place. +Nonetheless, neither ABAC, RBAC nor Webhook authorization can be recommended over the +other, since this all depends on the use case and required structure of a deployment. +Using at least one of these modes is recommended. + +It is also recommended to enable the Node authorizer in order to limit Kubelet +permissions to a minimum operational state. + +#### Admission Controllers + +Admission controllers intercept requests to the Kubernetes API after the +authentication and authorization step, which validate and/or mutate the request. +This step is limited to `create`, `modify` and `delete` objects as well as custom +verbs, but other requests are not blocked. +Kubernetes provides multiple admission controllers, some of which are enabled by default. + +One recommended admission controller is the [`NodeRestriction` controller][node-restriction], +which limits the `Node` and `Pod` objects a Kubelet is allowed to modify to their own `Node` or +objects that are bound to them. It also disallows updating or removing taints and prevents changing +or adding labels with a `node-restriction.kubernetes.io/` prefix. +Be aware that Kubelets will only be limited by this admission controller, if the user credentials +in the `system:nodes` group begin with a `system:node:` username. Administrators must therefore +configure their Kubelets correctly, if the `NodeRestriction` controller should be fully functional. + +### Kubelet access control + +The Kubelet is the node agent that runs on each node. It registers with the API +server and ensures, that pods handed over to it are running and healthy according +to the specification provided to it. The HTTPS endpoint of a Kubelet exposes APIs +with varying access to sensitive data and also enables various levels +of performant operations enabling manipulation of node data and containers. +There is also a read-only HTTP endpoint that was used for monitoring a Kubelet and +its information. This port was also used by applications like `kubeadm` to check +the health status of the Kubelet. +This port is still available, but it is planned to be [removed][ro-port-removal] +in a future version. At the moment, the port is disabled by default since [Kubernetes 1.10][ro-port-disabled] +and shortly later also in [`kubeadm`][ro-port-disabled-kubeadm]. +Different sources recommend disabling this port [4][ro-port-s1] [5][ro-port-s2] due to possible +security risks, but since this standard recommends restricting accessibility of internal ports, +this port wouldn't be accessible from external networks. +It is nevertheless recommended to keep this port disabled, since Kubernetes also acknowledged +its risks and plans to remove it. + +By default, the API server does not verify the Kubelets serving certificate and +requests to the HTTPS endpoint that are not rejected by other authentication +methods are treated as anonymous requests with the combination of name `system:anonymous` +and group `system:unauthenticated`. +This can be disabled by starting the Kubelet with the flag `--anonymous-auth=false`, +which return `401 Unauthorized` for unauthenticated requests. +It is also possible to enable internal authentication methods for the Kubelet. +Possibilities include X509 client certificates as well as API bearer tokens to +authenticate against the Kubelet; details for these methods can be found in the [Kubernetes docs][kubelet-auth]. + +After a request is authenticated, the authorization for it is checked, with the default +being `AlwaysAllow`. Requests should at best be authorized depending on their source, +so differentiation of access makes sense for the Kubelet; not all users should have +the same access rights. How access can be configured and delegated to the Kubernetes +API server can be found in the [Kubernetes docs][kubelet-auth]. The process works like the API request +authorization approach with verbs and resources being used as identifiers in roles and role bindings. + +### Pod security policies + +Pod security plays a big part in securing a Kubernetes cluster, since bad actors could use pods to gain +privileged access to the systems underneath. The security risk here is mainly influenced by the capabilities +and privileges given to a container. It is therefore recommended to apply the principal of "least privilege", +which should limit the security risk to a minimum. + +Kubernetes defines the [*Pod security standards*][pod-security-standards] +in the form of three policies that try to cover the range of the security spectrum. +These policies can be found in the following list and define a list of restricted fields that can only be +changed to a set of allowed values. An up-to-date list of these values can be found [here][pod-security-standards]. + +- *Privileged* + + Unrestricted policy, providing the widest possible level of permissions. + This policy allows for known privilege escalations. + +- *Baseline* + + Minimally restrictive policy which prevents known privilege escalations. + Allows the default (minimally specified) Pod configuration. + +- *Restricted* + + Heavily restricted policy, following current Pod hardening best practices. + +Kubernetes also offers the *Pod security* admission controller, which enforces +the *Pod security standards* on a namespace level during pod creation. +The admission controller defines the standard to be used with the three levels +`privileged`, `baseline` and `restricted`. Each namespace can be configured to enforce +a different control mode, which defines what action the control plane takes +after a violation of the selected *Pod security* is detected. + +- `enforce` + + Policy violations will cause the pod to be rejected. + +- `audit` + + Policy violations will trigger the addition of an audit annotation to the event + recorded in the audit log, but are otherwise allowed. + +- `warn` + + Policy violations will trigger a user-facing warning, but are otherwise allowed. + +Be aware, that `enforce` is not applied to workload resources, only to the pods created from their template. + +### Further measurements + +While researching this topic, further measurements were considered such as container image verification, +distroless images, usage of `ImagePolicyWebhook`, network policy enforcement, +container sandboxing and prevention of kernel module loading. +Most of these were taken out of the document during writing due to either being the responsibility +of the clusters' user (and therefore not possible to implement for the provider), being more relevant +for high security clusters or changing the expected cluster environment too much, so that normally +expected operations could potentially not work in such a modified cluster. +These measurements will possibly be introduced in a future document about higher security clusters. + +## Standard + +This standard provides the baseline security requirements for a cluster in the SCS context. + +Kubernetes clusters MUST be updated regularly in order to receive bugfixes and security patches. +For more information refer to the [SCS K8s Version Policy](scs-0210-v2-k8s-version-policy.md), +which outlines the version update policies of the SCS. + +Hardening etcd is important due to it being a critical component inside a Kubernetes cluster. +etcd SHOULD be isolated from the Kubernetes cluster by being hosted on separate (virtual) machines. +If this is the case, access to these instances MUST be configured, so that only the API server and +necessary cluster components requiring access can access etcd. +Communication with etcd MUST be secured with TLS for both peer- and cluster-communication. +It is RECOMMENDED to use a CA separate from the one used for the Kubernetes cluster for etcd in +order to better control and issue certificates for clients allowed to access etcd. +ACL MUST be enabled for etcd, which allows better control of the access rights to specific key sets +for specific users. Authentication MUST be done via the Common Name (CN) field of the TLS client +certificates (since normal username-password-authentication isn't implemented for Kubernetes). + +Kubernetes' endpoints MUST be secured in order to provide a small attack surface for bad actors. +It MUST NOT be possible to access Kubernetes ports from outside the internal network hosting the +Kubernetes cluster except for the ports of the API server (default 6443) and the NodePort Services +(default 30000-32767). The read-only Kubelet API port (default 10255), which is mostly used for monitoring, +SHOULD be disabled altogether if it isn't in use, mainly because the port is HTTP-only +and can deliver sensitive information to the outside. +Endpoints MUST be secured via HTTPS. + +Securing Kubernetes via authentication and authorization is another important topic here. +Authentication is possible through multiple mechanisms, including Kubernetes-provided systems as well as external +authentication processes. +A cluster MUST implement at least two methods for authentication. One of these MUST be *Service Account Tokens*, in order +to provide full functionality to Pods. A second authentication mechanisms can be chosen depending on the requirements +of the provider and/or customer. + +Authorization also can be provided through multiple mechanisms. +A cluster MUST activate at least two authorization methods, one of which MUST be *Node authorization* and another one +consisting of either ABAC, RBAC or Webhook authorization depending on the required use case. +We RECOMMEND RBAC due to it fitting most use cases and being very well documented, but your setup might require another solution. + +In order to harden Kubelet access control, a Kubelet SHOULD only be accessible internally via HTTPS. This is already the +case for the Kubelet API, except for the read-only port, which is only available as HTTP. As mentioned earlier, this port +should be disabled. +Kubelets MUST disable anonymous request authentication to disallow non-rejected requests to go through as anonymous requests. +OPTIONALLY, X509 client certificate authentication or API bearer token authentication can be enabled. +Request authorization for the Kubelet MUST be delegated to the API server via `Webhook` authorization as it is recommended +by the [Kubernetes documentation][kubelet-auth]. +Additionally, the `NodeRestriction` admission controller MUST be activated in order to limit interactions between +different Kubelets by disallowing modification of `Pod` objects, if they're not bound to the Kubelet requesting the modification. + +At last, *Pod security standards* in the form of policies MUST be activated for the cluster. The SCS REQUIRES at least +the *Baseline* policy with the *Restricted* policy CAN also be used. +The *Pod security* admission controller MUST also be activated in order to enforce these policies on a namespace level. +We RECOMMEND the `enforce` level to be used for this admission controller setup. + +## Conformance Tests + +Conformance Tests will be written within another issue. + +## Related Documents + +- [OWASP Kubernetes Security Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Kubernetes_Security_Cheat_Sheet.html) +- [Kubernetes security concepts](https://kubernetes.io/docs/concepts/security/) +- [Securing a cluster](https://kubernetes.io/docs/tasks/administer-cluster/securing-a-cluster/) +- [Controlling access](https://kubernetes.io/docs/concepts/security/controlling-access/) +- [Pod security standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) +- [NSA CISA Kubernetes hardening](https://kubernetes.io/blog/2021/10/05/nsa-cisa-kubernetes-hardening-guidance/) +- [Configure etcd](https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/) +- [Google Kubernetes cluster trust](https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-trust) + +[kubernetes-releases]: https://kubernetes.io/releases/ +[kubernetes-version-skew]: https://kubernetes.io/releases/version-skew-policy/ +[strong-credentials]: https://pages.nist.gov/800-63-3/sp800-63b.html +[kubernetes-auth]: https://kubernetes.io/docs/reference/access-authn-authz/authentication/ +[node-authorization]: https://kubernetes.io/docs/reference/access-authn-authz/node/ +[node-restriction]: https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction +[kubelet-auth]: https://kubernetes.io/docs/reference/access-authn-authz/kubelet-authn-authz/#kubelet-authorization +[pod-security-standards]: https://kubernetes.io/docs/concepts/security/pod-security-standards/ +[openidconnect]: https://kubernetes.io/docs/reference/access-authn-authz/authentication/#openid-connect-tokens +[webhook-token]: https://kubernetes.io/docs/reference/access-authn-authz/authentication/#webhook-token-authentication +[authenticating-proxy]: https://kubernetes.io/docs/reference/access-authn-authz/authentication/#authenticating-proxy +[controlling-access]: https://kubernetes.io/docs/concepts/security/controlling-access/ + +[ro-port-removal]: https://github.com/kubernetes/kubernetes/issues/12968 +[ro-port-disabled]: https://github.com/kubernetes/kubernetes/pull/59666 +[ro-port-disabled-kubeadm]: https://github.com/kubernetes/kubeadm/issues/732 +[ro-port-s1]: https://www.stigviewer.com/stig/kubernetes/2021-04-14/finding/V-242387 +[ro-port-s2]: https://docs.datadoghq.com/security/default_rules/cis-kubernetes-1.5.1-4.2.4/ +[nsa-cisa]: https://kubernetes.io/blog/2021/10/05/nsa-cisa-kubernetes-hardening-guidance/ +[etcd-auth]: https://etcd.io/docs/v3.3/op-guide/authentication/ diff --git a/Standards/scs-0219-v1-kaas-networking.md b/Standards/scs-0219-v1-kaas-networking.md new file mode 100644 index 000000000..8f35f7925 --- /dev/null +++ b/Standards/scs-0219-v1-kaas-networking.md @@ -0,0 +1,99 @@ +--- +title: KaaS Networking Standard +type: Standard +status: Draft +track: KaaS +--- + +## Introduction + +Kubernetes defines a networking model that needs to be implemented by a separate CNI plugin. +Beyond basic connectivity within the cluster, however, there are many networking features that are specified but optional. +Some of these optional features provide vital functionality, such as the NetworkPolicy API and the Ingress API. + +This standard specifies a minimal set of networking features that users can expect in clusters created by an SCS-compliant KaaS provider. + +## Terminology + +The following terms are used throughout this document: + +| Term | Meaning | +|------|---------| +| KaaS, managed Kubernetes | Kubernetes as a Service, automated on-demand deployment of Kubernetes clusters. | +| CSP | Cloud Service Provider, the provider of the KaaS infrastructure. | +| CNI | Container Network Interface, a standardized networking interface for container runtimes. | +| CNI plugin, networking plugin | Kubernetes bindings for a CNI implementation, translates Kubernetes API concepts into more basic container networking concepts. | +| network policy | A set of rules to restrict network traffic in a Kubernetes cluster. | + +## Motivation + +KaaS providers will typically support aditional networking functionality beyond basic Kubernetes networking. +The specific range of features depends on the used CNI plugin, but may also be extended by additional operators. +Users may expect certain optional functionality, so we should define a baseline feature set that has to be available in an SCS-compliant KaaS cluster. + +## Design Considerations + +The Kubernetes API can be extended arbitrarily. +Many CNI plugins will define custom resources to enable functionality that is not covered in the official [API specification](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/). +Sometimes they will even reuse names from different API groups, such as `NetworkPolicy`, which exists in the basic `networking.k8s.io/v1` API, but also in `projectcalico.org/v3`. + +To avoid any ambiguity, we should therefore be explicit about the API groups and versions of resources. +We should also avoid mandating third-party API extensions, to avoid dependencies on specific third-party software and keep the standard as generic as possible. + +### Options considered + +#### NetworkPolicy API + +Kubernetes network policies are used to restrict network traffic between pods in a cluster, but also between pods and external network resources. +The policy rules can filter based on port and address ranges, but also on Kubernetes-specific target attributes such as namespaces and labels. +They must be implemented by the CNI plugin, and though they are widely supported, they are still technically optional, and there are some lightweight networking plugins, such as Flannel, that are not enforcing them. + +Nonetheless, network policies are widely used and most users will expect them in a managed Kubernetes cluster. +The wide, but varying support among CNI plugins makes them a good target for SCS standardization. + +#### Default Network Policies in Namespaces + +Basic network policies are namespaced resources, and can only filter traffic to and from pods in their own namespace. +In a newly created namespace without policies the default behavior will apply, which is to not restrict traffic at all. + +It can be desirable to automatically create default network policies in new namespaces, using a policy operator such as Kyverno. +A CSP could provide such an operator and offer a number of default policies, like blocking connections to other namespaces by default, or blocking access to the OpenStack metadata service. + +Any user with permissions to manage their own network policies in a namespace will of course be able to remove or modify any default network policies in that namespace. +CSP-provided network policies should thus only be viewed as a safety default, and should only be deployed if they are actually beneficial to users. + +#### AdminNetworkPolicy API + +An alternative to automatically created default network policies are API extensions that allow cluster-wide networking rules. +Some CNI plugins have implemented such extensions, e.g. Calico's `GlobalNetworkPolicy` and Cilium's `CiliumClusterwideNetworkPolicy`. + +The Kubernetes Network Special Interest Group is currently working on an [official API extension](https://network-policy-api.sigs.k8s.io/api-overview/) to cover this functionality. +This API extension introduces the new `AdminNetworkPolicy` and `BaselineAdminNetworkPolicy` resources, which represent cluster-wide network policies with respectively higher or lower precedence than namespaced network policies. + +This API is also a good candidate for standardization because it consolidates a number of vendor-specific workarounds to limitations of the NetworkPolicy API. +It has not been stabilized yet, so currently we can at most recommend CNI plugins where there is ongoing work to support these features. + +#### Ingress API + +The Ingress API allows the external exposure of HTTP/HTTPS-based services running in the cluster. +Unlike the L3/L4-based LoadBalancer Service type, Ingress provides L7 load balancing, HTTP routing, and TLS termination for services. +This functionality can be provided within the cluster by a pod-based ingress controller such as `ingress-nginx`, that exposes Ingress resources as Services. + +However, there are also Ingress controllers that integrate with underlying infrastructure and may help to reduce overhead. +Examples for this are the Cilium CNI plugin, which comes with built-in Ingress support, and the Octavia Ingress controller, which may be a good choice if OpenStack Octavia is already used to provide L3/L4 load balancing. + +The CSPs that manage the underlying infrastructure can of course make the best choice for such an integrated Ingress controller, so they should be encouraged to do so. +Even with a CSP-provided default Ingress controller present, users will be able to use alternative Ingress controllers by creating a new `IngressClass`, which can then be referenced in Ingress resources. + +## Decision + +CSPs MUST provide a network plugin that fully supports `NetworkPolicy` resources in the API version `networking.k8s.io/v1`. +CSPs SHOULD provide a network plugin that supports or is working on support for the `AdminNetworkPolicy` and `BaselineAdminNetworkPolicy` resources of the `policy.networking.k8s.io` API group, in their latest version, up to `v1`. + +CSPs SHOULD offer the option for a managed, `networking.k8s.io/v1`-compliant Ingress controller and a default `IngressClass` resource for this controller. + +CSPs MAY add default networking restrictions, using either `networking.k8s.io/v1`-compliant `NetworkPolicy` resources with a policy operator, or alternatively any cluster-wide network policy extensions provided by the CNI plugin. + +## Conformance Tests + +Required support for network policies will be tested using the upstream e2e tests via Sonobuoy. diff --git a/Standards/scs-0219-w1-kaas-networking.md b/Standards/scs-0219-w1-kaas-networking.md new file mode 100644 index 000000000..3e34948d2 --- /dev/null +++ b/Standards/scs-0219-w1-kaas-networking.md @@ -0,0 +1,27 @@ +--- +title: "KaaS Networking Standard: Implementation Notes" +type: Supplement +track: KaaS +status: Draft +supplements: + - scs-0219-v1-kaas-networking.md +--- +## List of compliant CNI Plugins + +The Kubernetes Network Policy API working group maintains a [list of work-in-progress implementations](https://network-policy-api.sigs.k8s.io/implementations/) of the AdminNetworkPolicy and BaselineAdminNetworkPolicy resources. +Besides their own proof-of-concept implementation of [kube-network-policies](https://github.com/kubernetes-sigs/kube-network-policies), at the time of writing they list the following CNI plugins: + +- [OVN-Kubernetes](https://github.com/ovn-org/ovn-kubernetes/) +- [Antrea](https://github.com/antrea-io/antrea/) +- [KubeOVN](https://github.com/kubeovn/kube-ovn) +- [Calico](https://github.com/projectcalico/calico) +- [Cilium](https://github.com/cilium/cilium) + +All of these plugins also implement the basic NetworkPolicy API, and are therefore compliant both with the standard's requirements and recommendations. + +The CNI plugin [Flannel](https://github.com/flannel-io/flannel) does not support network policies by itself, but can be combined with Calico for policy enforcement. +This configuration is known as [Canal](https://docs.tigera.io/calico/latest/getting-started/kubernetes/flannel/install-for-flannel) and will likely profit from Calico's support for AdminNetworkPolicy. + +There are more CNI plugins that support the NetworkPolicy API, but are not known to work on support of the AdminNetworkPolicy extensions. +As such they are still compliant with the current version of the Standard. +However, these seem to be either vendor-specific, like the [Azure CNI](https://learn.microsoft.com/de-de/azure/aks/configure-azure-cni), or unmaintained, like [Weave](https://github.com/weaveworks/weave). diff --git a/Standards/scs-0300-v1-requirements-for-sso-identity-federation.md b/Standards/scs-0300-v1-requirements-for-sso-identity-federation.md index 65eee33e2..70f3ddc47 100644 --- a/Standards/scs-0300-v1-requirements-for-sso-identity-federation.md +++ b/Standards/scs-0300-v1-requirements-for-sso-identity-federation.md @@ -25,7 +25,7 @@ premises or e.g. as an external 3rd party cloud service. To ease onboarding of customer employees (or e.g. customer contracted 3rd party admin staff) as SCS users, it would be good to be able to consume these external identities in SCS. -For customers this avoids the neccessity to explicitly maintain an additional +For customers this avoids the necessity to explicitly maintain an additional dedicated account in SCS and this also reduces what SCS needs to do with respect to taking care of persisting user account information. @@ -34,7 +34,7 @@ authentication to external identity providers and map those users to roles in SCS that can be used for authorization decisions when users access SCS services. In addition to user identities there we also see the necessity to support the -use of "machine identites" (aka "workload identities" or "service accounts"). +use of "machine identities" (aka "workload identities" or "service accounts"). These will probably be SCS-local accounts and have for example the purpose to grant CaaS workload access to storage resources served by the infrastructure layer. Exact architectural details for this are still in active discussion, @@ -50,11 +50,11 @@ authorization. One thing these services have in common, is that they are able to use SSO protocols like OAuth 2.0 or OpenID Connect (OIDC) on top of it to delegate authentication. They are service providers (SAML terminology) and can -be relying parties (OIDC terminology) of a protocol compliant identity provider +be relying on parties (OIDC terminology) of a protocol compliant identity provider (IdP). So the idea is, to run an SSO IdP as part of SCS to provide a dedicated point -of entry for identites, which the SCS service layers can use as a common +of entry for identities, which the SCS service layers can use as a common interface to consume external user identities. The purpose of this document is to specify what requirements a specific @@ -66,10 +66,10 @@ in the context of SCS. As a central service for identity handling, the IdP service needs to be robust and reliable. -Customers shall be able to access self service, so that +Customers shall be able to access self-service, so that they can make reasonable adjustments e.g. to role mapping. At the time of writing this document it's still undecided -if SCS has the requirement of a dedicated "self service" service +if SCS has the requirement of a dedicated "self-service" service that serves as a frontend to provision and re-configure customer specific data, abstracting e.g. from IdP specific user interface particularities. @@ -77,7 +77,7 @@ user interface particularities. Keycloak is currently being deployed as part of the IaaS reference implementation. Technically this IdP component shall be shifted from the management plane to be run on the basis of a "minimal" Kubernetes (e.g. K3S), -e.g. to make use of the "self healing" and scaling features achievable +e.g. to make use of the "self-healing" and scaling features achievable with that. So one of the considerations is if the solution will work well on a @@ -98,7 +98,7 @@ Quarkus instead of WildFly/JBoss. The project maintains several means of community contributions as listed on the [community section](https://www.keycloak.org/community) -of the project website. It uses [Github issues](https://github.com/keycloak/keycloak/issues) +of the project website. It uses [GitHub issues](https://github.com/keycloak/keycloak/issues) to track development. It offers a REST API for administration and there's a separately maintained @@ -111,7 +111,7 @@ in adopting to protocol standard changes and extensions. This has been observed in the case of logout support (backend and frontend variants) in OIDC. It offers a concept of "Identity Brokering", where Keycloak is not just IdP -but also "client" to other IdPs. This allows daisy chaining of identity +but also "client" to other IdPs. This allows daisy-chaining of identity federation. In this configuration it can work as a point of protocol transition between different supported SSO protocols (SAML, OAuth 2.0, etc.). @@ -122,7 +122,7 @@ e.g.). Keycloak's implementation makes some design decisions, that are specific to it and have consequences for clients of the service. E.g. Keycloak has a concept of management "Realms", which have their own specific -set of HTTP API entrypoints, both for administration as well as for IdP +set of HTTP API entrypoints, both for administration and for IdP requests. Commonly Keycloak realms can be used to map them 1:1 to user domains, @@ -145,9 +145,9 @@ for all aspects of its administration interface. For storage of Keycloak configuration and local user metadata (e.g. from which external IdP a user account originally came from) -Keycloak supports several SQL backends through JDBC. Thus +Keycloak supports several SQL backends through JDBC. Thus, it can be hooked up to a Postgres Database or to a -MariaDB/Galera cluster e.g.. +MariaDB/Galera cluster e.g. As of April 11, 2023, Keycloak joined the CNCF as an incubating project. @@ -157,9 +157,9 @@ Zitadel is a newer implementation of an SSO IdP. It is implemented in Go and under active development and maintained by ZITADEL. The project is open for community [contributions](https://github.com/zitadel/zitadel/blob/main/CONTRIBUTING.md) -to all parts of the eco system. -Feature requests and bugs being tracked on [Github](https://github.com/orgs/zitadel/projects/2/views/5) for development. -Community questions can be asked in the [public chat](https://zitadel.com/chat) or via [Github Discussions](https://github.com/zitadel/zitadel/discussions). +to all parts of the ecosystem. +Feature requests and bugs being tracked on [GitHub](https://github.com/orgs/zitadel/projects/2/views/5) for development. +Community questions can be asked in the [public chat](https://zitadel.com/chat) or via [GitHub Discussions](https://github.com/zitadel/zitadel/discussions). ZITADEL offers support for the commonly used authentication and authorization protocols such as OIDC, OAuth2, SAML2. It is a compliant and certified OpenID Connect provider with support for various Grant Types for both human users and machine users. Compared to Keycloak SPIs, ZITADEL offers Actions to customize and integrate (eg, calling external APIs, Webhooks, customizing pre-built workflows, customizing tokens) @@ -175,7 +175,7 @@ in the following areas: - For client services (single set of HTTP API endpoints). - For SCS operators for provisioning customer [organizations](https://zitadel.com/docs/concepts/structure/organizations) - and robust configuraton by using templated client, role and mapping + and robust configuration by using templated client, role and mapping configuration. - For SCS customers for a robust user experience for self servicing. @@ -188,8 +188,8 @@ Managers that receive granted Projects can assign users permissions to use the p for multiple areas of use and configuration. It recently also added support for the [Device Authorization Grant](https://github.com/zitadel/oidc/issues/141), -which, at time of writing, is a feauture that is relevant -for SCS to be able use OpenStack CLI and APIs with federated +which, at time of writing, is a feature that is relevant +for SCS to be able to use OpenStack CLI and APIs with federated identities ([Device Authorization Grant](https://github.com/SovereignCloudStack/issues/issues/221)). Support for consumption of LDAP backends is available since [Zitadel v2.23.0](https://github.com/zitadel/zitadel/releases/tag/v2.23.0) @@ -203,7 +203,7 @@ to use Kubernetes (or similar like Knative) and CockroachDB. At time of writing a PoC "spike" is done to assess and verify the hopes connected with Zitadel in the context of the SCS testbed. -Currently Zitadel is lacking the possibility to easily add custom claims. +Currently, Zitadel is lacking the possibility to easily add custom claims. It supports `urn:zitadel:iam:user:metadata`, but that is more suitable towards Kubernetes and cannot be parsed with the OpenStack mapping mechanism. [There is work going on](https://github.com/zitadel/zitadel/issues/3997) which @@ -238,7 +238,7 @@ Keycloak currently supports the OAuth 2.0 grants that SCS wants to make use of (e.g. Device Authorization Grant). It is the implementation for which integration is currently documented in OpenStack and implemented in kolla-ansible. SCS currently deploys Keycloak and the IAM team has -most hands on expecience with it, e.g. when it comes to colletaral questions +most hands-on experience with it, e.g. when it comes to collateral questions like how to make TLS and signing certificates available to the IdP that shall be used in federation to external domains. diff --git a/Standards/scs-0301-v1-naming-conventions.md b/Standards/scs-0301-v1-naming-conventions.md index 6540e35e2..bf909e573 100644 --- a/Standards/scs-0301-v1-naming-conventions.md +++ b/Standards/scs-0301-v1-naming-conventions.md @@ -33,10 +33,10 @@ OPTIONAL For naming the customers the suggestion from PS is the following: -A prefix will be use to differenciate domain, project and user in -the openstack environment. The project name is also added as a sufix. +A prefix will be used to differentiate domain, project and user in +the openstack environment. The project name is also added as a suffix. -So the onboaring tool will create the following structure for a new +So the onboarding tool will create the following structure for a new customer onboarded in the system. ```commandline @@ -109,15 +109,15 @@ will be called "Customer A". There should be an OIDC client in each customer realm to allow the federation to the Proxy realm. Currently called OSISM on the testbed. -On the proxy realm, it's needed to add this new customer realm as an idenity provider. During the creation of the identity +On the proxy realm, it's needed to add this new customer realm as an identity provider. During the creation of the identity provider for "Customer A", the field "Alias" should be set to ``. This will make that the users federated from -realm "Customer A" to the proxy realm to be prefixed to avoid naming colisions, e.g. `d${ALIAS}-${CLAIM.preferred_username}`. +realm "Customer A" to the proxy realm to be prefixed to avoid naming collisions, e.g. `d${ALIAS}-${CLAIM.preferred_username}`. Also, on the identity federation there should be configured to store the `` from that realm into the users. So it -can be send to Keystone mapping to use it as `gd-member` and `gp--member`. There is +can be sent to Keystone mapping to use it as `gd-member` and `gp--member`. There is also the necessity of a mapper to send the `openstack-default-project`. -Add the aditional mappings for roles and groups as necessary to get the attributes from the customer realm into the OIDC +Add the additional mappings for roles and groups as necessary to get the attributes from the customer realm into the OIDC userinfo that is put into the OIDC to the proxy realm and from there to Keystone. #### _Option 2_ diff --git a/Standards/scs-0302-v1-domain-manager-role.md b/Standards/scs-0302-v1-domain-manager-role.md index 59702b3dc..a418a23b7 100644 --- a/Standards/scs-0302-v1-domain-manager-role.md +++ b/Standards/scs-0302-v1-domain-manager-role.md @@ -1,17 +1,27 @@ --- title: Domain Manager configuration for Keystone type: Standard -status: Draft +status: Stable +stabilized_at: 2024-11-13 track: IAM --- ## Introduction SCS Clouds should provide a way to grant Domain Manager rights to SCS Customers which provides IAM self-service capabilities within an OpenStack domain. -This is not properly implemented in the default OpenStack configuration and requires specific adjustments to the Keystone identity management configuration. +Such capabilities should enable the SCS customer to manage identity resources within their domain without involving the provider of the cloud. To avoid conflict with the unscoped `admin` role in OpenStack we want to refer to this new persona as "Domain Manager", introducing the `manager` role in the API for domains. -### Glossary +:::info + +The Domain Manager functionality will be a native part of the official OpenStack beginning with release 2024.2 ("Dalmatian"). + +To implement the Domain Manager in SCS clouds using an OpenStack release older than 2024.2, please refer to the supplemental [implementation notes for this standard](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0302-w1-domain-manager-implementation-notes.md). +The implementation notes document describes an alternative implementation that can be used for OpenStack 2024.1 and older releases. + +::: + +## Terminology The following special terms are used throughout this standard document: @@ -31,21 +41,11 @@ The following special terms are used throughout this standard document: [^1]: [OpenStack Documentation: Role-Based Access Control Overview](https://static.opendev.org/docs/patrole/latest/rbac-overview.html) -### Impact - -Applying this standard modifies the API policy configuration of Keystone and introduces a new persona to Keystone to enable IAM self-service for customers within a domain. -Once assigned, this persona allows special Domain Manager users within a domain to manage users, project, groups and role assignments as part of the IAM self-service. - -However, the configuration change introduced by this standard does not automatically assign the Domain Manager persona to any users per default. -Assigning the new persona and granting customers the resulting self-service capabilities is a deliberate action to be taken by the CSP on a per-tenant (i.e. per domain) basis. - -Omitting the provisioning of any Domain Manager users (i.e. not assigning the new persona to any user) will result in an OpenStack cloud that behaves identically to a configuration without the standard applied, making the actual usage of the functionality a CSP's choice and entirely optional. - ## Motivation In the default configuration of Keystone, only users with the `admin` role may manage the IAM resources such as projects, groups and users and their relation through role assignments. -The `admin` role in OpenStack Keystone is not properly scoped when assigned within a domain or project only as due to hard-coded architectural limitations in OpenStack, a user with the `admin` role may escalate their privileges outside of their assigned project or domain boundaries. -Thus, it is not possible to properly give customers a self-service functionality in regards to project, group and user management with the default configuration. +The `admin` role in OpenStack Keystone is not properly scoped when assigned within a domain or project only as due to hard-coded architectural limitations in OpenStack, a user with the `admin` role may escalate their privileges outside their assigned project or domain boundaries. +Thus, it is not possible to properly give customers a self-service functionality in regard to project, group and user management with the default configuration. To address this, this standard defines a new Domain Manager persona implemented using a domain-scoped `manager` role in conjunction with appropriate Keystone API policy adjustments to establish a standardized extension to the default Keystone configuration allowing for IAM self-service capabilities for customers within domains. @@ -59,7 +59,7 @@ To address this, this standard defines a new Domain Manager persona implemented ## Design Considerations - the Domain Manager persona MUST support managing projects, groups and users within a specific domain -- the Domain Manager persona MUST be properly scoped to a domain, it MUST NOT gain access to resources outside of its owning domain +- the Domain Manager persona MUST be properly scoped to a domain, it MUST NOT gain access to resources outside its owning domain - the Domain Manager persona MUST NOT be able to manipulate existing roles or create new roles - the Domain Manager persona MUST only be able to assign specific non-administrative\* roles to their managed users where the applicable roles are defined by the CSP - Domain Managers MUST NOT be able to abuse the role assignment functionalities to escalate their own privileges or those of other users beyond the roles defined by the CSP @@ -78,7 +78,7 @@ This results in special permissions being granted to users possessing the role w This poses severe security risks as the proper scoping of the `admin` role is impossible. **Due to this, this approach was discarded early.** -Upstream (OpenStack) is in the process of addressing this across the services but it has not been fully implemented yet, especially for domains[^3]. +Upstream (OpenStack) is in the process of addressing this across the services, but it has not been fully implemented yet, especially for domains[^3]. [^2]: [Launchpad bug: "admin"-ness not properly scoped](https://bugs.launchpad.net/keystone/+bug/968696) @@ -94,180 +94,52 @@ This means that by creating a new role and extending Keystone's API policy confi [^4]: [OpenStack Documentation: Administering Applications that use oslo.policy](https://docs.openstack.org/oslo.policy/latest/admin/index.html) -## Open questions - -### Limitations - -The approach described in this standard imposes the following limitations: +## Decision -1. as a result of the "`identity:list_domains`" rule (see below), Domain Managers are able to see all domains[^5] via "`openstack domain list`" and can inspect the metadata of other domains with "`openstack domain show`" -2. as a result of the "`identity:list_roles`" rule (see below), Domain Managers are able to see all roles via "`openstack role list`" and can inspect the metadata of other roles with "`openstack role show`" +A role named "`manager`" MUST be present in the identity service. -**As a result of points 1 and 2, metadata of all domains and roles will be exposed to all Domain Managers!** +The identity service MUST implement the Domain Manager functionality for this role. +The implementation details depend on the OpenStack Keystone version used. +See the sections below for reference. -If a CSP deems either of these points critical, they may abstain from granting the `"manager"` role to any user in a domain scope, effectively disabling the Domain Manager functionality. See [Impact](#impact). +### For OpenStack Keystone 2024.2 or later -[^5]: see the [corresponding Launchpad bug at Keystone](https://bugs.launchpad.net/keystone/+bug/2041611) +For OpenStack Keystone 2024.2 or later the Domain Manager persona is already integrated natively. +To guarantee proper scope protection, the Identity API MUST be configured with "`enforce_scope`" and "`enforce_new_defaults`" enabled for the oslo.policy library. -## Decision +Example entries for the `keystone.conf` configuration file: -A role named "`manager`" is to be created via the Keystone API and the policy adjustments quoted below are to be applied. - -### Policy adjustments - -The following policy has to be applied to Keystone in a verbatim fashion. -The only parts of the policy definitions that may be changed are: - -1. The "`base_*`" definitions to align them to the correct OpenStack defaults matching the OpenStack release of the environment in case those differ from this template. -2. The "`is_domain_managed_role`" definition (see next section below). - -```yaml -# SCS Domain Manager policy configuration - -# Section A: OpenStack base definitons -# The entries beginning with "base_" should be exact copies of the -# default "identity:" definitions for the target OpenStack release. -# They will be extended upon for the manager role below this section. -"base_get_domain": "(role:reader and system_scope:all) or token.domain.id:%(target.domain.id)s or token.project.domain.id:%(target.domain.id)s" -"base_list_domains": "(role:reader and system_scope:all)" -"base_list_roles": "(role:reader and system_scope:all)" -"base_get_role": "(role:reader and system_scope:all)" -"base_list_users": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" -"base_get_user": "(role:reader and system_scope:all) or (role:reader and token.domain.id:%(target.user.domain_id)s) or user_id:%(target.user.id)s" -"base_create_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" -"base_update_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" -"base_delete_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" -"base_list_projects": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" -"base_get_project": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.project.domain_id)s) or project_id:%(target.project.id)s" -"base_create_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" -"base_update_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" -"base_delete_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" -"base_list_user_projects": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s) or user_id:%(target.user.id)s" -"base_check_grant": "(role:reader and system_scope:all) or ((role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" -"base_list_grants": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)" -"base_create_grant": "(role:admin and system_scope:all) or ((role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" -"base_revoke_grant": "(role:admin and system_scope:all) or ((role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" -"base_list_role_assignments": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" -"base_list_groups": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" -"base_get_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" -"base_create_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" -"base_update_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" -"base_delete_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" -"base_list_groups_for_user": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s) or user_id:%(user_id)s" -"base_list_users_in_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" -"base_remove_user_from_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" -"base_check_user_in_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" -"base_add_user_to_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" - -# Section B: Domain Manager Extensions - -# classify domain managers with a special role -"is_domain_manager": "role:manager" - -# specify a rule that whitelists roles which domain admins are permitted -# to assign and revoke within their domain -"is_domain_managed_role": "'member':%(target.role.name)s or 'load-balancer_member':%(target.role.name)s" - -# allow domain admins to retrieve their own domain (does not need changes) -"identity:get_domain": "rule:base_get_domain or rule:admin_required" - -# list_domains is needed for GET /v3/domains?name=... requests -# this is mandatory for things like -# `create user --domain $DOMAIN_NAME $USER_NAME` to correctly discover -# domains by name -"identity:list_domains": "rule:is_domain_manager or rule:base_list_domains or rule:admin_required" - -# list_roles is needed for GET /v3/roles?name=... requests -# this is mandatory for things like `role add ... $ROLE_NAME`` to correctly -# discover roles by name -"identity:list_roles": "rule:is_domain_manager or rule:base_list_roles or rule:admin_required" - -# get_role is needed for GET /v3/roles/{role_id} requests -# this is mandatory for the OpenStack SDK to properly process role assignments -# which are issued by role id instead of name -"identity:get_role": "(rule:is_domain_manager and rule:is_domain_managed_role) or rule:base_get_role or rule:admin_required" - -# allow domain admins to manage users within their domain -"identity:list_users": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_users or rule:admin_required" -"identity:get_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_get_user or rule:admin_required" -"identity:create_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_create_user or rule:admin_required" -"identity:update_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_update_user or rule:admin_required" -"identity:delete_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_delete_user or rule:admin_required" - -# allow domain admins to manage projects within their domain -"identity:list_projects": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_projects or rule:admin_required" -"identity:get_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_get_project or rule:admin_required" -"identity:create_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_create_project or rule:admin_required" -"identity:update_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_update_project or rule:admin_required" -"identity:delete_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_delete_project or rule:admin_required" -"identity:list_user_projects": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_list_user_projects or rule:admin_required" - -# allow domain managers to manage role assignments within their domain -# (restricted to specific roles by the 'is_domain_managed_role' rule) -# -# project-level role assignment to user within domain -"is_domain_user_project_grant": "token.domain.id:%(target.user.domain_id)s and token.domain.id:%(target.project.domain_id)s" -# project-level role assignment to group within domain -"is_domain_group_project_grant": "token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.project.domain_id)s" -# domain-level role assignment to group -"is_domain_level_group_grant": "token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.domain.id)s" -# domain-level role assignment to user -"is_domain_level_user_grant": "token.domain.id:%(target.user.domain_id)s and token.domain.id:%(target.domain.id)s" -"domain_manager_grant": "rule:is_domain_manager and (rule:is_domain_user_project_grant or rule:is_domain_group_project_grant or rule:is_domain_level_group_grant or rule:is_domain_level_user_grant)" -"identity:check_grant": "rule:domain_manager_grant or rule:base_check_grant or rule:admin_required" -"identity:list_grants": "rule:domain_manager_grant or rule:base_list_grants or rule:admin_required" -"identity:create_grant": "(rule:domain_manager_grant and rule:is_domain_managed_role) or rule:base_create_grant or rule:admin_required" -"identity:revoke_grant": "(rule:domain_manager_grant and rule:is_domain_managed_role) or rule:base_revoke_grant or rule:admin_required" -"identity:list_role_assignments": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_role_assignments or rule:admin_required" - - -# allow domain managers to manage groups within their domain -"identity:list_groups": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or (role:reader and system_scope:all) or rule:base_list_groups or rule:admin_required" -"identity:get_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or (role:reader and system_scope:all) or rule:base_get_group or rule:admin_required" -"identity:create_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_create_group or rule:admin_required" -"identity:update_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_update_group or rule:admin_required" -"identity:delete_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_delete_group or rule:admin_required" -"identity:list_groups_for_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_list_groups_for_user or rule:admin_required" -"identity:list_users_in_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_list_users_in_group or rule:admin_required" -"identity:remove_user_from_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_remove_user_from_group or rule:admin_required" -"identity:check_user_in_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_check_user_in_group or rule:admin_required" -"identity:add_user_to_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_add_user_to_group or rule:admin_required" +```ini +[oslo_policy] +enforce_new_defaults = True +enforce_scope = True ``` -Note that the policy file begins with a list of "`base_*`" rule definitions ("Section A"). -These mirror the default policies of recent OpenStack releases. -They are used as a basis for the domain-manager-specific changes which are implemented in "Section B" where they are referenced to via "`or rule:base_*`" accordingly. -The section of "`base_*`" rules is meant for easy maintenance/update of default rules while keeping the domain-manager-specific rules separate. - -> **Note:** -> The "`or rule:admin_required`" appendix to the rule defintions in "Section B" is included for backwards compatibility with environments not yet fully configured for the new secure RBAC standard[^6]. - -[^6]: [OpenStack Technical Committee Governance Documents: Consistent and Secure Default RBAC](https://governance.openstack.org/tc/goals/selected/consistent-and-secure-rbac.html) +The "`is_domain_managed_role`" policy rule MAY be adjusted using a dedicated `policy.yaml` file for the Identity API in order to adjust the set of roles a Domain Manager is able to assign/revoke. +When doing so, the `admin` role MUST NOT be added to this set. -#### Specifying manageable roles via "`is_domain_managed_role`" +#### Note about upgrading from SCS Domain Manager to native integration -The "`is_domain_managed_role`" rule of the above policy template may be adjusted according to the requirements of the CSP and infrastructure architecture to specify different or multiple roles as manageable by Domain Managers as long as the policy rule adheres to the following: +In case the Identity API was upgraded from an older version where the policy-based Domain Manager implementation of SCS described in the [implementation notes for this standard](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0302-w1-domain-manager-implementation-notes.md) was still in use, the policies described there MUST be removed. +The only exception to this is the "`is_domain_managed_role`" rule in case any adjustments have been made to that rule and the CSP wants to preserve them. -- the "`is_domain_managed_role`" rule MUST NOT contain the "`admin`" role, neither directly nor transitively -- the "`is_domain_managed_role`" rule MUST define all applicable roles directly, it MUST NOT contain a "`rule:`" reference within itself +### For OpenStack Keystone 2024.1 or below -##### Example: permitting multiple roles +For OpenStack Keystone 2024.1 or below, the Domain Manager functionality MUST be implemented using API policies. +For details, refer to the [implementation notes for this standard](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0302-w1-domain-manager-implementation-notes.md). -The following example permits the "`reader`" role to be assigned/revoked by a Domain Manager in addition to the default "`member`" and "`load-balancer_member`" roles. -Further roles can be appended using the logical `or` directive. +For the release 2024.1 and below, changing the "`enforce_scope`" and "`enforce_new_defaults`" options for the Identity API is not necessary for the Domain Manager implementation. -```yaml -"is_domain_managed_role": "'member':%(target.role.name)s or 'load-balancer_member':%(target.role.name)s or 'reader':%(target.role.name)s" -``` - -**Note regarding the `manager` role** +## Related Documents -When adjusting the "`is_domain_managed_role`" rule a CSP might opt to also include the "`manager`" role itself in the manageable roles, resulting in Domain Managers being able to propagate the Domain Manager capabilities to other users within their domain. -This increases the self-service capabilities of the customer but introduces risks of Domain Managers also being able to revoke this role from themselves or each other (within their domain) in an unintended fashion. +### Upstream contribution spec for the Domain Manager functionality -CSPs have to carefully evaluate whether Domain Manager designation authority should reside solely on their side or be part of the customer self-service scope and decide about adding "`'manager':%(target.role.name)s`" to the rule accordingly. +**Description:** Upstream Identity service specification to introduce the Domain Manager functionality natively in OpenStack Keystone. +After implementing the Domain Manager functionality as described in the [implementation notes for this standard](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0302-w1-domain-manager-implementation-notes.md), the SCS project contributed the functionality to the official OpenStack project. +This eventually resulted in the feature being integrated natively in OpenStack Keystone starting with the 2024.2 release. +The specification was the starting point of the contribution. -## Related Documents +**Link:** [OpenStack Identity Specs: Domain Manager Persona for domain-scoped self-service administration](https://specs.openstack.org/openstack/keystone-specs/specs/keystone/2024.1/domain-manager-persona.html) ### "admin"-ness not properly scoped @@ -374,4 +246,4 @@ Rationale: Links / Comments / References: - [SIG IAM meeting protocol entry](https://input.scs.community/2023-scs-sig-iam#Domain-Admin-rights-for-SCS-IaaS-Customers-184) -- [issue commment about decision](https://github.com/SovereignCloudStack/issues/issues/184#issuecomment-1670985934) +- [issue comment about decision](https://github.com/SovereignCloudStack/issues/issues/184#issuecomment-1670985934) diff --git a/Standards/scs-0302-w1-domain-manager-implementation-notes.md b/Standards/scs-0302-w1-domain-manager-implementation-notes.md new file mode 100644 index 000000000..6e2c60298 --- /dev/null +++ b/Standards/scs-0302-w1-domain-manager-implementation-notes.md @@ -0,0 +1,194 @@ +--- +title: Domain Manager implementation notes +type: Supplement +track: IAM +status: Draft +supplements: + - scs-0302-v1-domain-manager-role.md +--- + +## Implementation notes + +:::caution + +If a Keystone release of OpenStack 2024.2 or later is used, **the policy configuration described in this document MUST be removed again** in case it was applied in the past prior to the upgrade. + +::: + +:::info + +The implementation described in this document only applies to Keystone releases prior to the OpenStack release 2024.2 ("Dalmatian"). +This document describes a transitional solution to offer the Domain Manager functionality for SCS clouds based on an OpenStack release earlier than 2024.2. + +Beginning with the 2024.2 release of OpenStack, the Domain Manager persona is integrated natively into Keystone and the implementation described below is unnecessary and might conflict with the native implementation. + +::: + +### Policy adjustments + +The following policy can be applied to Keystone releases older than 2024.2 ("Dalmatian"). +It mimics the Domain Manager persona implemented by Keystone starting with version 2024.2 and makes the functionality available for earlier releases of Keystone. + +The only parts of the policy definitions below that may be changed are: + +1. The "`base_*`" definitions to align them to the correct OpenStack defaults matching the OpenStack release of the environment in case those differ from this template. +2. The "`is_domain_managed_role`" definition (see next section below). + +```yaml +# SCS Domain Manager policy configuration + +# Section A: OpenStack base definitions +# The entries beginning with "base_" should be exact copies of the +# default "identity:" definitions for the target OpenStack release. +# They will be extended upon for the manager role below this section. +"base_get_domain": "(role:reader and system_scope:all) or token.domain.id:%(target.domain.id)s or token.project.domain.id:%(target.domain.id)s" +"base_list_domains": "(role:reader and system_scope:all)" +"base_list_roles": "(role:reader and system_scope:all)" +"base_get_role": "(role:reader and system_scope:all)" +"base_list_users": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" +"base_get_user": "(role:reader and system_scope:all) or (role:reader and token.domain.id:%(target.user.domain_id)s) or user_id:%(target.user.id)s" +"base_create_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" +"base_update_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" +"base_delete_user": "(role:admin and system_scope:all) or (role:admin and token.domain.id:%(target.user.domain_id)s)" +"base_list_projects": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" +"base_get_project": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.project.domain_id)s) or project_id:%(target.project.id)s" +"base_create_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" +"base_update_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" +"base_delete_project": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.project.domain_id)s)" +"base_list_user_projects": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s) or user_id:%(target.user.id)s" +"base_check_grant": "(role:reader and system_scope:all) or ((role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" +"base_list_grants": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)" +"base_create_grant": "(role:admin and system_scope:all) or ((role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" +"base_revoke_grant": "(role:admin and system_scope:all) or ((role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.user.domain_id)s and domain_id:%(target.domain.id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.project.domain_id)s) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.domain.id)s)) and (domain_id:%(target.role.domain_id)s or None:%(target.role.domain_id)s)" +"base_list_role_assignments": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.domain_id)s)" +"base_list_groups": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" +"base_get_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" +"base_create_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" +"base_update_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" +"base_delete_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s)" +"base_list_groups_for_user": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.user.domain_id)s) or user_id:%(user_id)s" +"base_list_users_in_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s)" +"base_remove_user_from_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" +"base_check_user_in_group": "(role:reader and system_scope:all) or (role:reader and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" +"base_add_user_to_group": "(role:admin and system_scope:all) or (role:admin and domain_id:%(target.group.domain_id)s and domain_id:%(target.user.domain_id)s)" + +# Section B: Domain Manager Extensions + +# classify domain managers with a special role +"is_domain_manager": "role:manager" + +# specify a rule that whitelists roles which domain admins are permitted +# to assign and revoke within their domain +"is_domain_managed_role": "'member':%(target.role.name)s or 'load-balancer_member':%(target.role.name)s" + +# allow domain admins to retrieve their own domain (does not need changes) +"identity:get_domain": "rule:base_get_domain or rule:admin_required" + +# list_domains is needed for GET /v3/domains?name=... requests +# this is mandatory for things like +# `create user --domain $DOMAIN_NAME $USER_NAME` to correctly discover +# domains by name +"identity:list_domains": "rule:is_domain_manager or rule:base_list_domains or rule:admin_required" + +# list_roles is needed for GET /v3/roles?name=... requests +# this is mandatory for things like `role add ... $ROLE_NAME`` to correctly +# discover roles by name +"identity:list_roles": "rule:is_domain_manager or rule:base_list_roles or rule:admin_required" + +# get_role is needed for GET /v3/roles/{role_id} requests +# this is mandatory for the OpenStack SDK to properly process role assignments +# which are issued by role id instead of name +"identity:get_role": "(rule:is_domain_manager and rule:is_domain_managed_role) or rule:base_get_role or rule:admin_required" + +# allow domain admins to manage users within their domain +"identity:list_users": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_users or rule:admin_required" +"identity:get_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_get_user or rule:admin_required" +"identity:create_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_create_user or rule:admin_required" +"identity:update_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_update_user or rule:admin_required" +"identity:delete_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_delete_user or rule:admin_required" + +# allow domain admins to manage projects within their domain +"identity:list_projects": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_projects or rule:admin_required" +"identity:get_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_get_project or rule:admin_required" +"identity:create_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_create_project or rule:admin_required" +"identity:update_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_update_project or rule:admin_required" +"identity:delete_project": "(rule:is_domain_manager and token.domain.id:%(target.project.domain_id)s) or rule:base_delete_project or rule:admin_required" +"identity:list_user_projects": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_list_user_projects or rule:admin_required" + +# allow domain managers to manage role assignments within their domain +# (restricted to specific roles by the 'is_domain_managed_role' rule) +# +# project-level role assignment to user within domain +"is_domain_user_project_grant": "token.domain.id:%(target.user.domain_id)s and token.domain.id:%(target.project.domain_id)s" +# project-level role assignment to group within domain +"is_domain_group_project_grant": "token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.project.domain_id)s" +# domain-level role assignment to group +"is_domain_level_group_grant": "token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.domain.id)s" +# domain-level role assignment to user +"is_domain_level_user_grant": "token.domain.id:%(target.user.domain_id)s and token.domain.id:%(target.domain.id)s" +"domain_manager_grant": "rule:is_domain_manager and (rule:is_domain_user_project_grant or rule:is_domain_group_project_grant or rule:is_domain_level_group_grant or rule:is_domain_level_user_grant)" +"identity:check_grant": "rule:domain_manager_grant or rule:base_check_grant or rule:admin_required" +"identity:list_grants": "rule:domain_manager_grant or rule:base_list_grants or rule:admin_required" +"identity:create_grant": "(rule:domain_manager_grant and rule:is_domain_managed_role) or rule:base_create_grant or rule:admin_required" +"identity:revoke_grant": "(rule:domain_manager_grant and rule:is_domain_managed_role) or rule:base_revoke_grant or rule:admin_required" +"identity:list_role_assignments": "(rule:is_domain_manager and token.domain.id:%(target.domain_id)s) or rule:base_list_role_assignments or rule:admin_required" + +# allow domain managers to manage groups within their domain +"identity:list_groups": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or (role:reader and system_scope:all) or rule:base_list_groups or rule:admin_required" +"identity:get_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or (role:reader and system_scope:all) or rule:base_get_group or rule:admin_required" +"identity:create_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_create_group or rule:admin_required" +"identity:update_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_update_group or rule:admin_required" +"identity:delete_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_delete_group or rule:admin_required" +"identity:list_groups_for_user": "(rule:is_domain_manager and token.domain.id:%(target.user.domain_id)s) or rule:base_list_groups_for_user or rule:admin_required" +"identity:list_users_in_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s) or rule:base_list_users_in_group or rule:admin_required" +"identity:remove_user_from_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_remove_user_from_group or rule:admin_required" +"identity:check_user_in_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_check_user_in_group or rule:admin_required" +"identity:add_user_to_group": "(rule:is_domain_manager and token.domain.id:%(target.group.domain_id)s and token.domain.id:%(target.user.domain_id)s) or rule:base_add_user_to_group or rule:admin_required" +``` + +Note that the policy file begins with a list of "`base_*`" rule definitions ("Section A"). +These mirror the default policies of recent OpenStack releases. +They are used as a basis for the domain-manager-specific changes which are implemented in "Section B" where they are referenced to via "`or rule:base_*`" accordingly. +The section of "`base_*`" rules is meant for easy maintenance/update of default rules while keeping the domain-manager-specific rules separate. + +> **Note:** +> The "`or rule:admin_required`" appendix to the rule definitions in "Section B" is included for backwards compatibility with environments not yet fully configured for the new secure RBAC standard[^1]. + +[^1]: [OpenStack Technical Committee Governance Documents: Consistent and Secure Default RBAC](https://governance.openstack.org/tc/goals/selected/consistent-and-secure-rbac.html) + +#### Specifying manageable roles via "`is_domain_managed_role`" + +The "`is_domain_managed_role`" rule of the above policy template may be adjusted according to the requirements of the CSP and infrastructure architecture to specify different or multiple roles as manageable by Domain Managers as long as the policy rule adheres to the following: + +- the "`is_domain_managed_role`" rule MUST NOT contain the "`admin`" role, neither directly nor transitively +- the "`is_domain_managed_role`" rule MUST define all applicable roles directly, it MUST NOT contain a "`rule:`" reference within itself + +##### Example: permitting multiple roles + +The following example permits the "`reader`" role to be assigned/revoked by a Domain Manager in addition to the default "`member`" and "`load-balancer_member`" roles. +Further roles can be appended using the logical `or` directive. + +```yaml +"is_domain_managed_role": "'member':%(target.role.name)s or 'load-balancer_member':%(target.role.name)s or 'reader':%(target.role.name)s" +``` + +**Note regarding the `manager` role** + +When adjusting the "`is_domain_managed_role`" rule a CSP might opt to also include the "`manager`" role itself in the manageable roles, resulting in Domain Managers being able to propagate the Domain Manager capabilities to other users within their domain. +This increases the self-service capabilities of the customer but introduces risks of Domain Managers also being able to revoke this role from themselves or each other (within their domain) in an unintended fashion. + +CSPs have to carefully evaluate whether Domain Manager designation authority should reside solely on their side or be part of the customer self-service scope and decide about adding "`'manager':%(target.role.name)s`" to the rule accordingly. + +### Impact + +Applying this implementation modifies the API policy configuration of Keystone and introduces a new persona to Keystone to enable IAM self-service for customers within a domain. +Once assigned, this persona allows special Domain Manager users within a domain to manage users, project, groups and role assignments as part of the IAM self-service. + +However, the configuration change introduced by this implementation does not automatically assign the Domain Manager persona to any users per default. +Assigning the new persona and granting customers the resulting self-service capabilities is a deliberate action to be taken by the CSP on a per-tenant (i.e. per domain) basis. + +Omitting the provisioning of any Domain Manager users (i.e. not assigning the new persona to any user) will result in an OpenStack cloud that behaves identically to a configuration without the implementation applied, making the actual usage of the functionality a CSP's choice and entirely optional. + +#### Security implications + +As a result of the "`identity:list_roles`" rule (see above), Domain Managers are able to see all roles via "`openstack role list`" and can inspect the metadata of any role with "`openstack role show`" diff --git a/Standards/scs-0400-v1-status-page-create-decision.md b/Standards/scs-0400-v1-status-page-create-decision.md index 139a05675..0443cd4b2 100644 --- a/Standards/scs-0400-v1-status-page-create-decision.md +++ b/Standards/scs-0400-v1-status-page-create-decision.md @@ -9,13 +9,13 @@ enhances: status-page-comparison.md ## Introduction Creating and maintaining IT infrastructure is a complex task. -Any kind of consumer (e.g. operators, cutsomers) can +Any kind of consumer (e.g. operators, customers) can be supported by presenting the status of all possible parts of the serving infrastructure. Whether a service is not reachable or the used hardware is having an outage we want the consumers to be easily informed by using a "Status Page" application. The need for a "Status Page" came up early in the SCS project and the requirements a "Status Page" application -has to fulfill were defined and written down on 2022-06-02 as a +has to fulfill were defined and written down on 2022-06-02 as the [MVP-0 epic](https://github.com/SovereignCloudStack/issues/issues/123). The upcoming research on existing solutions came to the conclusion that we want to create a new "Status Page" application. @@ -48,7 +48,7 @@ we pick up an existing project and try to get it in shape for our use case. It w own additional patches. So there will be a reference implementation that will match the requirements we have. -In addition there will be an architecture design documentation. So if the reference +In addition, there will be an architecture design documentation. So if the reference implementation may not fit to you, it will be possible to create your own application. ## Status Page Requirements @@ -60,7 +60,7 @@ implementation may not fit to you, it will be possible to create your own applic - support that components are only visible to a subset of users - implies that there is a role that is read-only - On-Prem use case might be handled by having an authenticating reverse proxy in front -- The status page applicaton should allow for simple and easy theming +- The status page application should allow for simple and easy theming - Page = (Possibly simple) Web-UI @@ -101,15 +101,15 @@ implementation may not fit to you, it will be possible to create your own applic - to minimize the probability of making errors, updating the status of a component should not be hard brainwork - updates can be both machine generated status changes (triggered e.g. by health monitoring) - as well as updates from human operators + and updates from human operators - updating a status should allow the CSP Operator to do that in a fashion that either pushes infos to the subscribers or just updates the status on the status page - updating the status can either be toggling the status of the component or can be accompanied by additional textual information. - When updating a status with textual information the status page application should make it - easy for me as the CSP Operator to do in a way that if different people submit infos over time - they are presented in a similar way (eg. the status page application should guide so that the - resulting infos are presented in a identical way. Example: when updating infos of an incident + easy for me as the CSP Operator to do in a way, that if different people submit infos over time, + they are presented in a similar way (e.g. the status page application should guide so that the + resulting infos are presented in an identical way). Example: when updating infos of an incident over time the timeline should automatically be sorted by the status page application so that it does not depend on the Operator whether the newest info is on top or at the bottom. This is typical thing that varies if several people update items @@ -153,7 +153,7 @@ With those requirements in mind the projects that initially were found, were eva | user management | ✅ | ❌ | ❌ | ❌ | ✅ by OIDC | ⁇ through github? | ❌ | | different output format on notification | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | | external hosting | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ looks like you are limited to github | ✅ | -| project healthy | ❌ last commit 17 months | ❌ last commit 3 years | ❌ last commit 5 months | ✅ last commit 2 months | ✅ recent activities | ✅ recent activities | ❌ archived and abondend by the owner | +| project healthy | ❌ last commit 17 months | ❌ last commit 3 years | ❌ last commit 5 months | ✅ last commit 2 months | ✅ recent activities | ✅ recent activities | ❌ archived and abandoned by the owner | | documentation | ✅ API ❌ User Documentation | ❌ | ❌ | ❌ | ✅ | ⁇u | ❌ not reachable anymore | | git based | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ⁇ a netlify based installation is able to communicate with github | | project page | [project page](https://cachethq.io/) | [project page](https://github.com/weeblrpress/clearstatus) | [project page](https://www.brotandgames.com/ciao/) | [project page](https://cstate.netlify.app/) | [project page](https://gatus.io/) | [project page](https://github.com/tadhglewis/issue-status) | [project page](https://marquez.co/statusfy) | diff --git a/Standards/scs-0401-v1-status-page-reference-implementation-decision.md b/Standards/scs-0401-v1-status-page-reference-implementation-decision.md index eca9480ae..2f9eb5bdf 100644 --- a/Standards/scs-0401-v1-status-page-reference-implementation-decision.md +++ b/Standards/scs-0401-v1-status-page-reference-implementation-decision.md @@ -7,9 +7,9 @@ track: Ops ## Introduction -For the reference implementation of the status page API defined by the [OpenAPI spec](https://github.com/SovereignCloudStack/status-page-openapi) some decision should be made to which technlogy to be used and why. +For the reference implementation of the status page API defined by the [OpenAPI spec](https://github.com/SovereignCloudStack/status-page-openapi) some decision should be made to which technology to be used and why. -A reference implementation should be of use to most of the intended group, but is not necsessarily applicable for every use case. +A reference implementation should be of use to most of the intended group, but is not necessarily applicable for every use case. ## Motivation @@ -19,9 +19,9 @@ For a reference implementation to be of any use, some common and widely used tec ### Programming Language -The status page application consists of an api server as well as a frontend. For implementing the [api server](https://github.com/SovereignCloudStack/status-page-api), which is generated from the [OpenAPI spec](https://github.com/SovereignCloudStack/status-page-openapi), [Go](https://go.dev/) was chosen, because of maturity and wide spread usage as industry standard. Go, in particular, is a modern programming language and is commonly used in network and cloud computing environments. +The status page application consists of an api server as well as a frontend. For implementing the [api server](https://github.com/SovereignCloudStack/status-page-api), which is generated from the [OpenAPI spec](https://github.com/SovereignCloudStack/status-page-openapi), [Go](https://go.dev/) was chosen, because of maturity and widespread usage as industry standard. Go, in particular, is a modern programming language and is commonly used in network and cloud computing environments. ### Database As database, [PostgreSQL](https://www.postgresql.org/) was chosen, since it is a mature, well-known database. PostgreSQL can be run in various environments from small setups to scaled setups. -Furthermore PostgreSQL is a very healthy project with an active community and a solid license. It easily passed the [SCS OSS health check](https://github.com/SovereignCloudStack/standards/blob/main/Drafts/OSS-Health.md). +Furthermore, PostgreSQL is a very healthy project with an active community and a solid license. It easily passed the [SCS OSS health check](https://github.com/SovereignCloudStack/standards/blob/main/Drafts/OSS-Health.md). diff --git a/Standards/scs-0402-v1-status-page-openapi-spec-decision.md b/Standards/scs-0402-v1-status-page-openapi-spec-decision.md index 00ba8a6dd..395825d66 100644 --- a/Standards/scs-0402-v1-status-page-openapi-spec-decision.md +++ b/Standards/scs-0402-v1-status-page-openapi-spec-decision.md @@ -11,7 +11,7 @@ While defining the [OpenAPI spec](https://github.com/SovereignCloudStack/status- ## Requirements -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119). +The keywords "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119). In addition, "FORBIDDEN" is to be interpreted equivalent to "MUST NOT". @@ -35,7 +35,7 @@ UUIDs are used, to ensure uniqueness. Also, they can be visually recognized as i #### Incremental -An `Incremental` is used in combination with other identifiers to identify a sub resource of any kind. `Incremental`s themselves are not globally unique, but unique for every sub resource of an unique resource. +An `Incremental` is used in combination with other identifiers to identify a sub resource of any kind. `Incremental`s themselves are not globally unique, but unique for every sub resource of a unique resource. #### Generation and order @@ -43,11 +43,11 @@ An `Incremental` is used in combination with other identifiers to identify a sub #### SeverityValue -A `SeverityValue` is an unsiged integer ranging from 0 to 100 inclusively. It MUST be utilized by an `Impact` when referenced by a requested `Component` to gauge the severity of the impact on that component. It MUST be added to an `Impact` when refereced by an `Incident`, when its created. While being described as an unsiged integer, implementing this value MAY not require it to be an uint data type in any form, because its range even fits in a signed int8 (byte) data type. +A `SeverityValue` is an unsigned integer ranging from 0 to 100 inclusively. It MUST be utilized by an `Impact` when referenced by a requested `Component` to gauge the severity of the impact on that component. It MUST be added to an `Impact` when referenced by an `Incident`, when its created. While being described as an unsigned integer, implementing this value MAY not require it to be an uint data type in any form, because its range even fits in a signed int8 (byte) data type. Each severity value SHOULD be unique, as multiple severities with the same value will be ambiguous. ### API objects -All objects which are used as payload, either as request or response, are defined by schemas. This centralizes the maintanence of field names and types, for both requests and responses. +All objects which are used as payload, either as request or response, are defined by schemas. This centralizes the maintenance of field names and types, for both requests and responses. ### API object fields @@ -62,7 +62,19 @@ Requests to updating operations SHOULD contain the minimum of the changed fields ### Endpoint naming -The endpoints are named in plural form, even when handeling single objects, to keep uniform paths. +The endpoints are named in plural form, even when handling single objects, to keep uniform paths. + +### Incidents + +Incidents are the main information bearer at the status page. They hold most of the data that describes an incident: + +- when it happened +- when it completed or if it is still ongoing +- which components it affected +- the stages the incident progressed through +- all updates related to the resolution of the incident + +An incident is considered _active_ while no end time has been set. Incidents whose end time has been set are considered to be _inactive_ or _resolved_. ### Phase list @@ -105,15 +117,11 @@ Example: [ { "displayName": "operational", - "value": 25 - }, - { - "displayName": "maintenance", - "value": 50 + "value": 33 }, { "displayName": "limited", - "value": 75 + "value": 66 }, { "displayName": "broken", @@ -122,30 +130,38 @@ Example: ] ``` +A special severity of type "maintenance" is given the exact value of 0. + This means: -- operational from 0 to 25 -- maintenance from 26 to 50 -- limited from 51 to 75 -- broken from 76 to 100. +- maintenance at 0 +- operational from 1 to 33 +- limited from 34 to 66 +- broken from 67 to 100. A value of 100 is the maximum of the severity value. -A severity with the value of 100 MUST always be supplied. This is the highest severity for the system. If no severity with a value of 100 exists, e.g. the highest severity value is set at 90, an `Impact` with a higher `SeverityValue` WILL be considered to be an _unkown_ severity. +A severity with the value of 100 MUST always be supplied. This is the highest severity for the system. If no severity with a value of 100 exists, e.g. the highest severity value is set at 90, an `Impact` with a higher `SeverityValue` WILL be considered to be an _unknown_ severity. ### Component impacts Components list their impacts, which they are affected by, as read only. Only an incident creates an impact on a component. Components MUST only list their currently active impacts. +An optional `at` parameter can be supplied, to set a reference time to show all incidents, active at that time, even when they are inactive currently. + +### Maintenance + +Any `impact` that has the reserved `SeverityValue` of 0 is a maintenance time slot. As such it MUST include a start and end time. However, both are allowed to be set in the future. + ### Return of `POST` requests -Generally `POST` requests create new resources. These endpoints do not return the new resource, but a unique identifier to the resource e.g. an UUID. +Generally `POST` requests create new resources. These endpoints do not return the new resource, but a unique identifier to the resource e.g. a UUID. In most cases the new resource won't be used directly after creation. Most often list calls are used. If the new resource is used directly, it can be retrieved by the returned identifier. Payloads to POST requests SHALL NOT include ID or `Incremental` typed fields, it lies in the responsibility of the API server to assign IDs and `Incremental`s to objects. -### Return of `PATCH` requestes +### Return of `PATCH` requests Most commonly `PATCH` requests are used to partially or fully change a resource. These requests do not respond with the changed resource, nor an identifier. @@ -159,4 +175,4 @@ The `PUT` requests is most commonly used to update full objects, whereas `PATCH` ### Authentication and authorization -The API spec does not include either authentication (AuthN) nor authorization (AuthZ) of any kind. The API server MUST be secured by an reverse/auth proxy. +The API spec does not include either authentication (AuthN) nor authorization (AuthZ) of any kind. The API server MUST be secured by a reverse/auth proxy. diff --git a/Standards/scs-0403-v1-csp-kaas-observability-stack.md b/Standards/scs-0403-v1-csp-kaas-observability-stack.md index f8d0d3523..5b22881b1 100644 --- a/Standards/scs-0403-v1-csp-kaas-observability-stack.md +++ b/Standards/scs-0403-v1-csp-kaas-observability-stack.md @@ -83,7 +83,7 @@ Use a mix of [kubernetes-mixin alerts](https://github.com/kubernetes-monitoring/ - S3 compatible bucket as a storage for long term metrics is configured - thanos query-frontend is deployed and configured - thanos query is deployed and configured - - thanos reciever is deployed and configured (simple deployment, non HA, without router) + - thanos receiver is deployed and configured (simple deployment, non HA, without router) - thanos ruler is deployed and configured - thanos compactor is deployed and configured - thanos bucket-web is deployed and configured @@ -97,7 +97,7 @@ Use a mix of [kubernetes-mixin alerts](https://github.com/kubernetes-monitoring/ - There exist Dashboards for KaaS Cluster Health - KaaS L0 dashboard counters are working correctly - Dedicated L0 dashboards are deployed for KaaS and for IaaS monitoring layers - - There exist Dashboards for SCS services endpoinds health (BlackBox exporter) + - There exist Dashboards for SCS services endpoints health (BlackBox exporter) - There exist Dashboards for IaaS layer health - Automatic Setup of Exporters for Observability of managed K8s clusters - KaaS service is mocked @@ -117,13 +117,13 @@ Use a mix of [kubernetes-mixin alerts](https://github.com/kubernetes-monitoring/ A survey was conducted to gather the needs and requirements of a CSP when providing Kubernetes as a Service. The results of the Survey (Questions with answers) were the following: 1. What is your understanding of a managed Kubernetes Offering: - - Hassle-Free Installation and Maintainance (customer viewpoint); Providing Controlplane and worker nodes and responsibility for correct function but agnostic to workload - - Day0, 1 and 2 (~planning, provisioning, operations) full lifecyle management or let customer manages some parts of that, depending on customer contract + - Hassle-Free Installation and Maintenance (customer viewpoint); Providing control plane and worker nodes and responsibility for correct function but agnostic to workload + - Day0, 1 and 2 (~planning, provisioning, operations) full lifecycle management or let customer manages some parts of that, depending on customer contract 2. What Type and Depth of observability is needed - - CPU, RAM, HDD and Network usage, Health and Function of Cluster Nodes, Controlplane and if desired Customer Workload + - CPU, RAM, HDD and Network usage, Health and Function of Cluster Nodes, control plane and if desired Customer Workload -3. Do you have an observabiltiy infrastructure, if yes, how it is built +3. Do you have an observability infrastructure, if yes, how it is built - Grafana/Thanos/Prometheus/Loki/Promtail/Alertmanger Stack, i.e. [Example Infrastructure](https://raw.githubusercontent.com/dNationCloud/kubernetes-monitoring-stack/main/thanos-deployment-architecture.svg) 4. Data Must haves diff --git a/Standards/scs-0410-v1-gnocchi-as-metering-database.md b/Standards/scs-0410-v1-gnocchi-as-metering-database.md index 848c33df9..9fce901ed 100644 --- a/Standards/scs-0410-v1-gnocchi-as-metering-database.md +++ b/Standards/scs-0410-v1-gnocchi-as-metering-database.md @@ -19,7 +19,7 @@ when it is supposed to be used for billing purposes. This document discusses how such metering data should be stored within the SCS. -In partiuclar, +In particular, it provides rationale for the choice of Gnocchi as time-series database for metering data within SCS. diff --git a/Standards/scs-XXXX-v1-security-of-iaas-service-software.md b/Standards/scs-XXXX-v1-security-of-iaas-service-software.md new file mode 100644 index 000000000..94b1200dd --- /dev/null +++ b/Standards/scs-XXXX-v1-security-of-iaas-service-software.md @@ -0,0 +1,138 @@ +--- +title: Standard for the security of IaaS service software +type: Standard +status: Draft +track: IaaS +--- + +## Introduction + +Software security relies on bug patches and security updates being available for specific versions of the software. +The services, which build the IaaS Layer should be updated on a regular basis based on updates provided by their respective authors or distributors. +But older releases or versions of the software of these services may not receive updates anymore. +Unpatched versions should not be used in deployments as they are a security risk, so this standard will define how CSPs should deal with software versions and security updates. + +## Terminology + +| Term | Explanation | +| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| CSP | Cloud Service Provider, provider managing the OpenStack infrastructure. | +| SLURP | Skip Level Upgrade Release Process - A Process that allows upgrades between two releases, while skipping the one in between them. | +| OSSN | [OpenStack Security Note](https://wiki.openstack.org/wiki/Security_Notes) - security issues from 3rd parties or due to misconfigurations. | +| OSSA | [OpenStack Security Advisories](https://security.openstack.org/ossalist.html) - security issues and advices for OpenStack. | + +## Motivation + +On the IaaS Layer the software, that needs to be considered in the scope of this standard, is mainly the APIs of IaaS Services. +Also there might be shared libraries and other dependencies, that could be considered part of the IaaS Layer. +In software projects like e.g. OpenStack that provide the main services and all APIs, the software will be modified and receive bug fixes continuously and will receive releases of new versions on a regular basis. +Older releases will at some point not receive updates anymore, because maintaining more and more releases simultaneously requires too much manpower. +Thus older versions will also eventually not receive security updates anymore. +Using versions which do not receive updates anymore threatens the baseline security of deployments and should be avoided under all circumstances. + +## Design Considerations + +It would be possible to define a minimum version of IaaS Layer software to avoid security risks. +In the following paragraphs several options of defining a minimum version or dealing with security patches otherwise are discussed. + +### Options considered + +#### Only Allow the current versions of Software + +Considering that OpenStack as one provider of IaaS Layer Software has two releases per year, with one SLURP release per year, this option would require CSPs to update their deployment once or twice a year. +Updating a whole deployment is a lot of work and requires also good life-cycle management. +Following only the SLURP releases would reduce this work to once per year. + +While following new releases closely already provides a deployment with recent bug fixes and new features, it also makes developing standards easier. +Differences between releases will accumulate eventually and may render older releases non-compliant to the SCS standards at some point. + +On the other hand on the IaaS Level there aren't many breaking changes introduced by releases and also most standards will also work with older releases. +Security updates and bug fixes are also provided by OpenStack for a few older releases with the state `maintained` according to the OpenStack releases overview[^2]. +Additionally the [SCS reference implementation](https://github.com/SovereignCloudStack/release-notes/blob/main/Release7.md) is integrating OpenStack releases after half a year - so about the time when a new release is published by OpenStack. +Considering a CSP that wants to use only SLURP releases and waits for the reference implementation to adopt them, will already lag over a year (i.e. 2 OpenStack releases) behind the latest release, this cannot be considered as using the current version of IaaS Layer Software. +Thus this option can be discarded. + +#### Allow only maintained versions of Software + +While following closely to the newest releases could be advised, there are several downsides to requiring this workflow, even if it would be only for SLURP releases. +Following the SCS reference implementation for example would also lead into being a little bit behind the newest OpenStack release. +But this is not as bad as it may seem to be, because security related fixes and bug fixes are backported to older but still `maintained` releases. +All releases that are still maintained can be looked up at the releases page from OpenStack[^2]. + +Allowing maintained versions would give CSPs a little bit more time to update and test their environments, while still receiving relevant security updates and bug fixes. +Also CSPs that want to become SCS-compliant will not have to take on the burden to upgrade their deployments to very recent releases immediately, but can instead test with an existing release before an upgrade and identify where they need to put in additional work to become SCS-compliant. + +One problem is, that there might be new features implemented in the newest versions of the software, which are desired by other SCS standards to be SCS-compliant. +In that case allowing all maintained versions would lead to a two-year timespan customers would need to wait for before such a feature becomes available in all SCS-compliant deployments. +In case of security relevant features this is not advisable. + +#### Standards implicitly define the minimum versions of Software + +Instead of requiring a defined minimum software version centrally, it could be derived from the individual standards. +Because: Whenever there is a new wanted behavior a standard should be created and a resonable timeframe given to CSPs to adopt a software version that can fulfill the new standard. +Through the combination of all standards that are in place, the minimum version for the IaaS service software is implicitly given. + +This would avoid to have conflicting versions of software in terms of feature parity, while also allowing older software. +Using this approach requires an additional advise to CSPs to update or implement patches for security issues. + +#### Advise CSPs to integrate software updates + +As long as maintained versions of software are used, updates with security patches are available and only need to be integrated. +This can and should be done in a reasonable short timeframe. + +But CSPs may even use releases of IaaS software, that are either not maintained anymore by an open source community or may be even closed source implementations of the mandatory IaaS APIs. +Allowing older versions or closed source software would only be acceptable, when CSPs assure (e.g. in documentation), that they themself will patch the software within their deployments. +Security bug fixes must be implemented and proof of the fix then provided. +Only under these circumstances deployments with older or alternative IaaS Layer software may be handled as compliant. + +This option could be taken for granted, but to actually advise using it may encourage CSPs to take a closer look on their life-cycle management and security risk handling. +And CSPs using OpenStack could even be encouraged to upgrade their deployments. + +#### Dependencies of the IaaS Layer Software + +While the IaaS service software like OpenStack itself is monitored and security issues announced in OSSNs and OSSAs, these services have lots of dependecies, that are not monitored by the same entity. +When dependencies have security issues, there might be no OSSN or OSSA, so CSPs also need to watch CVEs concerning these dependencies themselves. +Those dependencies must also be updated in a reasonable timeframe, when a security issue is disclosed. + +#### What timeframe is needed to fix the issue? + +CSPs should be encouraged to fix security issues as fast as possible. +Some security issues are very easy to exploit so as soon as the vulnerability is disclosed attacks on deployments will start. +Other vulnerabilities may need much knowledge and more time to be exploited. +Also the impact of different vulnerabilities will differ. + +So it can be concluded that some security issues need to be fixed immediately while for others it is okay to take some time. +The BSI already has some guidance[^1] on how fast CSPs should respond. +From the moment a vulnerability is disclosed these are the advised reaction times ranked by the severity of the vulnerability: + +1. Critical (CVSS = 9.0 – 10.0): 3 hours +2. High (CVSS = 7.0 – 8.9): 3 days +3. Mid (CVSS = 4.0 – 6.9): 1 month +4. Low (CVSS = 0.1 – 3.9): 3 months + +[^1]: [C5 criteria catalog with timeframes for responses on page 70.](https://www.bsi.bund.de/SharedDocs/Downloads/EN/BSI/CloudComputing/ComplianceControlsCatalogue/2020/C5_2020.pdf?__blob=publicationFile&v=3) + +This standard will follow this guidance and refer to these timeframes as "reasonable timeframes". + +## Standard for a minimum IaaS Layer Software version + +If a deployment is affected by a security issue and a maintained[^2] version of OpenStack is used as implementation for IaaS Layer software, security patches noted in OSSNs and OSSAs MUST be integrated within a reasonable timeframe according to the severity of the security issue[^1]. +Otherwise the CSP MUST implement security bug fixes themself within a reasonable timeframe, when the deplyoment is affected by a security issue according to the severity of the security issue[^1]. + +In both cases a notice of the update MUST be send to the OSBA, so that the compliance will not be revoked. + +If a deployment uses a dependency of the IaaS service software which is affected by a security issue, this software also MUST be updated with security patches within a reasonable timeframe[^1]. + +An open SBOM list MAY be used to propagate the current version of the software and may be used as proof of updates. + +[^2]: [OpenStack versions and their current status](https://releases.openstack.org) + +## Conformance Tests + +In case of provided SBOMs the version numbers of the software could be checked. +But this is not a requirement, so there cannot be such a test. +Tests on the integration of security patches itself are difficult. +And even if tests for certain security issues are possible, then those might be interpreted as an attack. +This is the reason there will be no conformance test. + +Rather the standard requires that CSPs provide notice of the fixed vulnerabilites themselves. diff --git a/Standards/scs-XXXX-w1-security-of-iaas-service-software.md b/Standards/scs-XXXX-w1-security-of-iaas-service-software.md new file mode 100644 index 000000000..3f0b1df8c --- /dev/null +++ b/Standards/scs-XXXX-w1-security-of-iaas-service-software.md @@ -0,0 +1,45 @@ +--- +title: "SCS Standard for the security of IaaS service software: Implementation and Testing Notes" +type: Supplement +track: IaaS +status: Draft +supplements: + - scs-XXXX-v1-security-of-iaas-service-software.md +--- + +## Testing or Detecting security updates in software + +It is not always possible to automatically test, whether the software has the newest security updates. +This is because software versions may differ or some CSPs might have added downstream code parts or using other software than the reference. +Also vulnerabilites and their fixes are quite different in testing, some might not be testable while others are. +Additionally testing might be perceived as an attack on the infrastructure. +So this standard will rely on the work and information CSPs must provide. +There are different cases and procedures which are addressed in the following parts, that lead to compliance for this standard. + +### Procedure to become compliant to the security of IaaS service software Standard + +This is the procedure when a new deployment wants to achieve SCS-conformancy. +There are two states such a deployment can be in: + +1. When a deployment is newly build or installed it usually uses software which includes all the latest security and bug fixes. +Such deployments should be considered compliant to the standard. + +2. When a CSP wants to make an older deployment compliant to the SCS standards and thus also to this standard, it should be checked, whether the running software is up to date and all vulnerabilites are fixed. +Any updates or upgrades to even newer versions should be done before the SCS compliance for every other standard is checked. +Afterwards the CSP may provide information about the used software in an SBOM or otherwise should provide a notice about the deployment having integrated all necessary vulnerability patches. + +### Procedure when new vulnerabilites are discovered + +Whenever there are new vulnerabilities discovered in IaaS service software like OpenStack there is either an internal discussion ongoing or it is just a smaller issue. +In the first case CSPs should have someone following such discussions and may even help preparing and testing patches. +From the moment on the vulnerability is disclosed publicly, the risk of it being actively exploited increases greatly. +So CSPs MUST watch out for announcements like in the OSSAs and OSSNs and when they are affected, update their deployment within the following timeframes according to the severity of the issue: + +1. Critical (CVSS = 9.0 – 10.0): 3 hours +2. High (CVSS = 7.0 – 8.9): 3 days +3. Mid (CVSS = 4.0 – 6.9): 1 month +4. Low (CVSS = 0.1 – 3.9): 3 months + +Afterwards CSPs MUST provide a notice to the OSBA, that they are not or not anymore affected by the vulnerabilty. +This can be done through either telling, what patches were integrated or showing configuration that renders the attack impossible. +It could also be provided a list of services, when the affected service is not used in that deployment. diff --git a/Tests/.secret/.gitkeep b/Tests/.secret/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/Tests/Dockerfile b/Tests/Dockerfile index 7bbe7f571..d44ff4fdd 100644 --- a/Tests/Dockerfile +++ b/Tests/Dockerfile @@ -10,4 +10,4 @@ RUN pip3 install -r requirements.txt COPY . . ENTRYPOINT ["./scs-compliance-check.py"] -CMD ["scs-compatible-iaas.yaml"] +CMD ["-h"] diff --git a/Tests/README.md b/Tests/README.md index c9c1f3624..9103e9348 100644 --- a/Tests/README.md +++ b/Tests/README.md @@ -1,9 +1,8 @@ # Testsuite for SCS standards The tool `scs-compliance-check.py` parses a -[compliance definition file](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md) -and executes the test executables referenced in there for -the specified layer (`iaas` or `kaas`). +[certificate scope specification](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0003-v1-sovereign-cloud-standards-yaml.md) +and executes the test executables referenced in there. ## Local execution (Linux, BSD, ...) @@ -16,30 +15,56 @@ python3 -m venv .venv && source .venv/bin/activate pip install -r requirements.txt ``` +At the moment, there are two cloud layers that can be checked: IaaS and KaaS. +For both types of checks, the exit code indicates success (0) or failure (!= 0). +You can also request a YAML report using the option `-o OUTPUT.yaml` + +### IaaS checks + With a cloud environment configured in your `~/.config/openstack/clouds.yaml` and `secure.yaml`, then run ```shell -./scs-compliance-check.py scs-compatible.yaml iaas --os-cloud CLOUDNAME +./scs-compliance-check.py -s CLOUDNAME -a os_cloud=CLOUDNAME scs-compatible-iaas.yaml ``` Replace `CLOUDNAME` with the name of your cloud environment as specified in `clouds.yaml`. -The exit code indicates success (0) or failure (!= 0). -You can also request a YAML report using the option `-o OUTPUT.yaml` +### KaaS checks + +Given a kubeconfig file `path/to/kubeconfig.yaml`, run + +```shell +./scs-compliance-check.py -v -a kubeconfig=path/to/kubeconfig.yaml -s SUBJECT scs-compatible-kaas.yaml +``` + +Replace `SUBJECT` with an arbitrary, but meaningful subject name. +Also, please note that the check will always use the `current-context` of the kubeconfig and will +fail if it isn't set. ## Usage information (help output) ```text -Usage: scs-compliance-check.py [options] compliance-spec.yaml layer [layer [layer]] -Options: -v/--verbose: More verbose output - -q/--quiet: Don't output anything but errors - -s/--single-layer: Don't perform required checks for dependant layers - -d/--date YYYY-MM-DD: Check standards valid on specified date instead of today - -V/--version VERS: Force version VERS of the standard (instead of deriving from date) - -c/--os-cloud CLOUD: Use specified cloud env (instead of OS_CLOUD env var) - -o/--output path: Generate yaml report of compliance check under given path +Usage: scs-compliance-check.py [options] SPEC_YAML + +Arguments: + SPEC_YAML: yaml file specifying the certificate scope + +Options: + -v/--verbose: More verbose output + -q/--quiet: Don't output anything but errors + --debug: enables DEBUG logging channel + -d/--date YYYY-MM-DD: Check standards valid on specified date instead of today + -V/--version VERS: Force version VERS of the standard (instead of deriving from date) + -s/--subject SUBJECT: Name of the subject (cloud) under test, for the report + -S/--sections SECTION_LIST: comma-separated list of sections to test (default: all sections) + -t/--tests REGEX: regular expression to select individual testcases based on their ids + -o/--output REPORT_PATH: Generate yaml report of compliance check under given path + -C/--critical-only: Only return critical errors in return code + -a/--assign KEY=VALUE: assign variable to be used for the run (as required by yaml file) + +With -C, the return code will be nonzero precisely when the tests couldn't be run to completion. ``` ## Testing in docker containers @@ -52,22 +77,27 @@ docker build --tag scs-compliance-check . ### Run tests in a docker container +You'll have to bind mount your respective config(s), pass required parameters and the specification file. + +For IaaS: + ```shell -docker run -it --env OS_CLOUD=CLOUDNAME -v ~/.config/openstack:/root/.config/openstack:ro scs-compliance-check +docker run -v ~/.config/openstack:/root/.config/openstack:ro scs-compliance-check -a os_cloud=CLOUDNAME -s CLOUDNAME scs-compatible-iaas.yaml ``` -The Docker entrypoint uses [scs-compatible-iaas.yaml](scs-compatible-iaas.yaml) -on the `iaas` layer by default. You can use an alternative spec file by simply -appending it to the above call, e.g. +For KaaS: ```shell -docker run -it --env OS_CLOUD=CLOUDNAME -v ~/.config/openstack:/root/.config/openstack:ro scs-compliance-check my-own-certification.yaml iaas +docker run -v /path/to/kubeconfig.yaml:/root/kubeconfig.yaml:ro scs-compliance-check -a kubeconfig=/root/kubeconfig.yaml -s SUBJECT scs-compatible-kaas.yaml ``` +If you want to test against a cluster running on localhost (e.g., kind cluster), replace +`docker run` with `docker run --net=host` in the above invocation. + ### Debugging ```shell -docker run -it --env OS_CLOUD=CLOUDNAME -v ~/.config/openstack:/root/.config/openstack:ro --entrypoint /bin/bash scs-compliance-check +docker run -it -v ~/.config/openstack:/root/.config/openstack:ro --entrypoint /bin/bash scs-compliance-check ``` ## Information for developers diff --git a/Tests/cleanup.py b/Tests/cleanup.py index 1357f0748..7097ff69b 100755 --- a/Tests/cleanup.py +++ b/Tests/cleanup.py @@ -26,15 +26,18 @@ def print_usage(file=sys.stderr): print("""Usage: cleanup.py [options] This tool cleans the cloud environment CLOUD by removing any resources whose name start with PREFIX. Options: - [-c/--os-cloud OS_CLOUD] sets cloud environment (default from OS_CLOUD env) - [-i/--prefix PREFIX] sets prefix (default from PREFIX env) + [-c/--os-cloud OS_CLOUD] sets cloud environment (default from OS_CLOUD env) + [-p/--prefix PREFIX] sets prefix to identify resources (default from PREFIX env) + [-i/--ipaddr addr[,addr]] list of IP addresses to identify ports to delete (def: delete all) + the specified strings will be matched against the start of the addrs """, end='', file=file) class Janitor: - def __init__(self, conn, prefix=""): + def __init__(self, conn, prefix="", ipfilter=()): self.conn = conn self.prefix = prefix + self.ipaddrs = ipfilter def disconnect_routers(self): logger.debug("disconnect routers") @@ -75,14 +78,38 @@ def cleanup_subnets(self): logger.info(subnet.name) self.conn.network.delete_subnet(subnet) + def port_match(self, port): + """Determine whether port is to be cleaned up: + - If it is connected to a VM/LB/...: False + - It it has a name that starts with the prefix: True + - If it has a name not matching the prefix filter: False + - If it has no name and we do not have IP range filters: True + - Otherwise see if one of the specified IP ranges matches + """ + if port.device_owner: + return False + if port.name.startswith(self.prefix): + return True + if port.name: + return False + if not self.ipaddrs: + return True + for fixed_addr in port.fixed_ips: + ip_addr = fixed_addr["ip_address"] + for ipmatch in self.ipaddrs: + if ip_addr.startswith(ipmatch): + logger.debug(f"{ip_addr} matches {ipmatch}") + return True + return False + def cleanup_ports(self): logger.debug("clean up ports") # FIXME: We can't filter for device_owner = '' unfortunately ports = list(self.conn.network.ports(status="DOWN")) for port in ports: - if port.device_owner: + if not self.port_match(port): continue - logger.info(port.id) + logger.info(f"{port.id}: {port.fixed_ips}") self.conn.network.delete_port(port) def cleanup_volumes(self): @@ -148,8 +175,10 @@ def cleanup_floating_ips(self): # Note: FIPs have no name, so we might clean up unrelated # currently unused FIPs here. logger.debug("clean up floating ips") - floating_ips = list(self.conn.search_floating_ips(filters={"attached": False})) + floating_ips = list(self.conn.search_floating_ips()) for floating_ip in floating_ips: + if floating_ip["port_id"]: + continue logger.info(floating_ip.floating_ip_address) self.conn.delete_floating_ip(floating_ip.id) @@ -173,12 +202,14 @@ def main(argv): level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", ) + openstack.enable_logging(debug=False) prefix = os.environ.get("PREFIX", None) cloud = os.environ.get("OS_CLOUD") + ipaddrs = [] try: - opts, args = getopt.gnu_getopt(argv, "c:p:h", ["os-cloud=", "prefix=", "help"]) + opts, args = getopt.gnu_getopt(argv, "c:p:i:h", ["os-cloud=", "prefix=", "ipaddr=", "help", "debug"]) except getopt.GetoptError as exc: logger.critical(f"{exc}") print_usage() @@ -192,6 +223,10 @@ def main(argv): prefix = opt[1] if opt[0] == "-c" or opt[0] == "--os-cloud": cloud = opt[1] + if opt[0] == "-i" or opt[0] == "--ipaddr": + ipaddrs = opt[1].split(",") + if opt[0] == "--debug": + logging.getLogger().setLevel(logging.DEBUG) if prefix is None: # check for None, because supplying --prefix '' shall be permitted @@ -203,7 +238,7 @@ def main(argv): return 1 with openstack.connect(cloud=cloud) as conn: - Janitor(conn, prefix).cleanup() + Janitor(conn, prefix, ipaddrs).cleanup() if __name__ == "__main__": @@ -211,6 +246,6 @@ def main(argv): sys.exit(main(sys.argv[1:])) except SystemExit: raise - except BaseException as exc: - logger.critical(repr(exc)) - sys.exit(1) + except BaseException: + logger.critical("A critical error occurred, see following traceback") + raise diff --git a/Tests/config.toml b/Tests/config.toml new file mode 100644 index 000000000..6b7a5c71f --- /dev/null +++ b/Tests/config.toml @@ -0,0 +1,84 @@ +[secrets] +keyfile = ".secret/keyfile" +tokenfile = ".secret/tokenfile" + + +[presets.default] +scopes = [ + "scs-compatible-iaas", +] +subjects = [ + "gx-scs", +] + + +[presets.all] +scopes = [ + "scs-compatible-iaas", +] +subjects = [ + "gx-scs", + "artcodix", + "pco-prod1", + "pco-prod2", + "pco-prod3", + "pco-prod4", + "poc-kdo", + "poc-wgcloud", + "regio-a", + "scaleup-occ2", + "syseleven-dus2", + "syseleven-ham1", + "wavestack", +] +workers = 4 + + +[presets.kaas-dev] +scopes = [ + "scs-compatible-kaas", +] +subjects = [ + "kind-current", + "kind-current-1", + "kind-current-2", +] +workers = 1 # better restrict this with clusters running on local machine + + +[scopes.scs-compatible-iaas] +spec = "./scs-compatible-iaas.yaml" + + +[scopes.scs-compatible-kaas] +spec = "./scs-compatible-kaas.yaml" + + +# default subject (not a real subject, but used to declare a default mapping) +# (this is the only mapping declaration that supports using Python string interpolation) +[subjects._.mapping] +os_cloud = "{subject}" +subject_root = "{subject}" + + +[subjects._.kubernetes_setup] +clusterspec = "kaas/clusterspec.yaml" + + +[subjects.kind-current.kubernetes_setup] +kube_plugin = "kind" +kube_plugin_config = "kaas/kind_config.yaml" +clusterspec_cluster = "current-k8s-release" + + +[subjects.kind-current-1.kubernetes_setup] +kube_plugin = "kind" +kube_plugin_config = "kaas/kind_config.yaml" +clusterspec_cluster = "current-k8s-release-1" + + +[subjects.kind-current-2.kubernetes_setup] +kube_plugin = "kind" +kube_plugin_config = "kaas/kind_config.yaml" +clusterspec_cluster = "current-k8s-release-2" + diff --git a/Tests/iaas/entropy/entropy-check.py b/Tests/iaas/entropy/entropy-check.py index ac7aa7771..da4e8a7ef 100755 --- a/Tests/iaas/entropy/entropy-check.py +++ b/Tests/iaas/entropy/entropy-check.py @@ -12,19 +12,15 @@ WARNING for violations of recommendations, DEBUG for background information and problems that don't hinder the test. """ -from collections import Counter +from collections import Counter, defaultdict import getopt import logging -from operator import attrgetter import os import re import sys -import tempfile import time import warnings -import fabric -import invoke import openstack import openstack.cloud @@ -34,6 +30,7 @@ # prefix ephemeral resources with '_scs-' to rule out any confusion with important resources # (this enables us to automatically dispose of any lingering resources should this script be killed) NETWORK_NAME = "_scs-0101-net" +SUBNET_NAME = "_scs-0101-subnet" ROUTER_NAME = "_scs-0101-router" SERVER_NAME = "_scs-0101-server" SECURITY_GROUP_NAME = "_scs-0101-group" @@ -52,22 +49,31 @@ FLAVOR_OPTIONAL = ("hw_rng:rate_bytes", "hw_rng:rate_period") +TIMEOUT = 5 * 60 # timeout in seconds after which we no longer wait for the VM to complete the run +MARKER = '_scs-test-' +SERVER_USERDATA_GENERIC = """ +#cloud-config +# apt-placeholder +packages: + - rng-tools5 +runcmd: + - echo '_scs-test-entropy-avail'; cat /proc/sys/kernel/random/entropy_avail + - echo '_scs-test-fips-test'; cat /dev/random | rngtest -c 1000 + - echo '_scs-test-rngd'; sudo systemctl status rngd + - echo '_scs-test-virtio-rng'; cat /sys/devices/virtual/misc/hw_random/rng_available; sudo /bin/sh -c 'od -vAn -N2 -tu2 < /dev/hwrng' + - echo '_scs-test-end' +final_message: "_scs-test-end" +""".strip() # we need to set package source on Ubuntu, because the default is not fixed and can lead to Heisenbugs SERVER_USERDATA = { - 'ubuntu': """#cloud-config -apt: + 'ubuntu': SERVER_USERDATA_GENERIC.replace('# apt-placeholder', """apt: primary: - arches: [default] - uri: http://az1.clouds.archive.ubuntu.com/ubuntu/ - security: [] -""", - 'debian': """#cloud-config -apt: + uri: http://az1.clouds.archive.ubuntu.com/ubuntu/"""), + 'debian': SERVER_USERDATA_GENERIC.replace('# apt-placeholder', """apt: primary: - arches: [default] - uri: https://mirror.plusserver.com/debian/debian/ - security: [] -""", + uri: https://mirror.plusserver.com/debian/debian/"""), } @@ -79,22 +85,31 @@ def print_usage(file=sys.stderr): [-c/--os-cloud OS_CLOUD] sets cloud environment (default from OS_CLOUD env) [-d/--debug] enables DEBUG logging channel [-i/--images IMAGE_LIST] sets images to be tested, separated by comma. + [-V/--image-visibility VIS_LIST] filters images by visibility + (default: 'public,community'; use '*' to disable) """, end='', file=file) def check_image_attributes(images, attributes=IMAGE_ATTRIBUTES): - for image in images: - wrong = [f"{key}={value}" for key, value in attributes.items() if image.get(key) != value] - if wrong: - logger.warning(f"Image '{image.name}' missing recommended attributes: {', '.join(wrong)}") + candidates = [ + (image.name, [f"{key}={value}" for key, value in attributes.items() if image.get(key) != value]) + for image in images + ] + # drop those candidates that are fine + offenders = [candidate for candidate in candidates if candidate[1]] + for name, wrong in offenders: + logger.warning(f"Image '{name}' missing recommended attributes: {', '.join(wrong)}") + return not offenders def check_flavor_attributes(flavors, attributes=FLAVOR_ATTRIBUTES, optional=FLAVOR_OPTIONAL): + offenses = 0 for flavor in flavors: extra_specs = flavor['extra_specs'] wrong = [f"{key}={value}" for key, value in attributes.items() if extra_specs.get(key) != value] miss_opt = [key for key in optional if extra_specs.get(key) is None] if wrong: + offenses += 1 message = f"Flavor '{flavor.name}' missing recommended attributes: {', '.join(wrong)}" # only report missing optional attributes if recommended are missing as well # reasoning here is that these optional attributes are merely a hint for implementers @@ -102,77 +117,60 @@ def check_flavor_attributes(flavors, attributes=FLAVOR_ATTRIBUTES, optional=FLAV if miss_opt: message += f"; additionally, missing optional attributes: {', '.join(miss_opt)}" logger.warning(message) + return not offenses -def install_test_requirements(fconn): - # in case we had to patch the apt package sources, wait here for completion - _ = fconn.run('cloud-init status --long --wait', hide=True, warn=True) - # logger.debug(_.stdout) - # the following commands seem to be necessary for CentOS 8, but let's not go there - # because, frankly, that image is ancient - # sudo sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* - # sudo sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* - # Try those commands first that have a high chance of success (Ubuntu seems very common) - commands = ( - # use ; instead of && after update because an error in update is not fatal - # also, on newer systems, it seems we need to install rng-tools5... - ('apt-get', 'apt-get -v && (cat /etc/apt/sources.list ; sudo apt-get update ; sudo apt-get install -y rng-tools5 || sudo apt-get install -y rng-tools)'), - ('dnf', 'sudo dnf install -y rng-tools'), - ('yum', 'sudo yum -y install rng-tools'), - ('pacman', 'sudo pacman -Syu rng-tools'), - ) - for name, cmd in commands: - try: - _ = fconn.run(cmd, hide=True) - except invoke.exceptions.UnexpectedExit as e: - logger.debug(f"Error running '{name}':\n{e.result.stderr.strip()}\n{e.result.stdout.strip()}") - else: - # logger.debug(f"Output running '{name}':\n{_.stderr.strip()}\n{_.stdout.strip()}") - return - logger.debug("No package manager worked; proceeding anyway as rng-utils might be present nonetheless.") +def check_entropy_avail(lines, image_name): + entropy_avail = lines[0].strip() + if entropy_avail != "256": + logger.error( + f"VM '{image_name}' didn't have a fixed amount of entropy available. " + f"Expected 256, got {entropy_avail}." + ) + return False + return True -def check_vm_requirements(fconn, image_name): - try: - entropy_avail = fconn.run('cat /proc/sys/kernel/random/entropy_avail', hide=True).stdout.strip() - if entropy_avail != "256": - logger.error( - f"VM '{image_name}' didn't have a fixed amount of entropy available. " - f"Expected 256, got {entropy_avail}." - ) +def check_rngd(lines, image_name): + if "could not be found" in '\n'.join(lines): + logger.warning(f"VM '{image_name}' doesn't provide the recommended service rngd") + return False + return True + - install_test_requirements(fconn) - fips_data = fconn.run('cat /dev/random | rngtest -c 1000', hide=True, warn=True).stderr +def check_fips_test(lines, image_name): + try: + fips_data = '\n'.join(lines) failure_re = re.search(r'failures:\s\d+', fips_data, flags=re.MULTILINE) if failure_re: fips_failures = failure_re.string[failure_re.regs[0][0]:failure_re.regs[0][1]].split(" ")[1] - if int(fips_failures) > 3: - logger.error( - f"VM '{image_name}' didn't pass the FIPS 140-2 testing. " - f"Expected a maximum of 3 failures, got {fips_failures}." - ) + if int(fips_failures) <= 3: + return True # this is the single 'successful' code path + logger.error( + f"VM '{image_name}' didn't pass the FIPS 140-2 testing. " + f"Expected a maximum of 3 failures, got {fips_failures}." + ) else: logger.error(f"VM '{image_name}': failed to determine fips failures") logger.debug(f"stderr following:\n{fips_data}") except BaseException: logger.critical(f"Couldn't check VM '{image_name}' requirements", exc_info=True) + return False # any unsuccessful path should end up here -def check_vm_recommends(fconn, image, flavor): +def check_virtio_rng(lines, image, flavor): try: - result = fconn.run('sudo systemctl status rngd', hide=True, warn=True) - if "could not be found" in result.stdout or "could not be found" in result.stderr: - logger.warning(f"VM '{image.name}' doesn't provide the recommended service rngd") # Check the existence of the HRNG -- can actually be skipped if the flavor # or the image doesn't have the corresponding attributes anyway! if image.hw_rng_model != "virtio" or flavor.extra_specs.get("hw_rng:allowed") != "True": logger.debug("Not looking for virtio-rng because required attributes are missing") - else: - # `cat` can fail with return code 1 if special file does not exist - hw_device = fconn.run('cat /sys/devices/virtual/misc/hw_random/rng_available', hide=True, warn=True).stdout - result = fconn.run("sudo su -c 'od -vAn -N2 -tu2 < /dev/hwrng'", hide=True, warn=True) - if not hw_device.strip() or "No such device" in result.stdout or "No such " in result.stderr: - logger.warning(f"VM '{image.name}' doesn't provide a hardware device.") + return False + # `cat` can fail with return code 1 if special file does not exist + hw_device = lines[0] + if not hw_device.strip() or "No such device" in lines[1]: + logger.warning(f"VM '{image.name}' doesn't provide a hardware device.") + return False + return True except BaseException: logger.critical(f"Couldn't check VM '{image.name}' recommends", exc_info=True) @@ -189,20 +187,10 @@ def __init__(self, conn): def prepare(self): try: - # Create a keypair and save both parts for later usage - self.keypair = self.conn.compute.create_keypair(name=KEYPAIR_NAME) - - self.keyfile = tempfile.NamedTemporaryFile() - self.keyfile.write(self.keypair.private_key.encode("ascii")) - self.keyfile.flush() - - # Create a new security group and give it some simple rules in order to access it via SSH - self.sec_group = self.conn.network.create_security_group( - name=SECURITY_GROUP_NAME - ) - # create network, subnet, router, connect everything self.network = self.conn.create_network(NETWORK_NAME) + # Note: The IP range/cidr here needs to match the one in the pre_cloud.yaml + # playbook calling cleanup.py self.subnet = self.conn.create_subnet( self.network.id, cidr="10.1.0.0/24", @@ -210,9 +198,10 @@ def prepare(self): enable_dhcp=True, allocation_pools=[{ "start": "10.1.0.100", - "end": "10.1.0.200", + "end": "10.1.0.199", }], dns_nameservers=["9.9.9.9"], + name=SUBNET_NAME, ) external_networks = list(self.conn.network.networks(is_router_external=True)) if not external_networks: @@ -228,25 +217,6 @@ def prepare(self): ROUTER_NAME, ext_gateway_net_id=external_gateway_net_id, ) self.conn.add_router_interface(self.router, subnet_id=self.subnet.id) - - _ = self.conn.network.create_security_group_rule( - security_group_id=self.sec_group.id, - direction='ingress', - remote_ip_prefix='0.0.0.0/0', - protocol='icmp', - port_range_max=None, - port_range_min=None, - ethertype='IPv4', - ) - _ = self.conn.network.create_security_group_rule( - security_group_id=self.sec_group.id, - direction='ingress', - remote_ip_prefix='0.0.0.0/0', - protocol='tcp', - port_range_max=22, - port_range_min=22, - ethertype='IPv4', - ) except BaseException: # if `prepare` doesn't go through, we want to revert to a clean state # (in my opinion, the user should only need to call `clean` when `prepare` goes through) @@ -279,24 +249,6 @@ def clean(self): logger.debug(f"The network {self.network.name} couldn't be deleted.", exc_info=True) self.network = None - if self.sec_group is not None: - try: - _ = self.conn.network.delete_security_group(self.sec_group) - except (openstack.cloud.OpenStackCloudException, openstack.cloud.OpenStackCloudUnavailableFeature): - logger.debug(f"The security group {self.sec_group.name} couldn't be deleted.", exc_info=True) - self.sec_group = None - - if self.keyfile is not None: - self.keyfile.close() - self.keyfile = None - - if self.keypair is not None: - try: - _ = self.conn.compute.delete_keypair(self.keypair) - except openstack.cloud.OpenStackCloudException: - logger.debug(f"The keypair '{self.keypair.name}' couldn't be deleted.") - self.keypair = None - def __enter__(self): self.prepare() return self @@ -320,20 +272,27 @@ def create_vm(env, all_flavors, image, server_name=SERVER_NAME): # try to pick a frugal flavor flavor = min(flavors, key=lambda flv: flv.vcpus + flv.ram / 3.0 + flv.disk / 10.0) - userdata = next((value for key, value in SERVER_USERDATA.items() if image.name.lower().startswith(key)), None) + userdata = SERVER_USERDATA.get(image.os_distro, SERVER_USERDATA_GENERIC) + logger.debug(f"Using userdata:\n{userdata}") + volume_size = max(image.min_disk, 8) # sometimes, the min_disk property is not set correctly # create a server with the image and the flavor as well as # the previously created keys and security group logger.debug( f"Creating instance of image '{image.name}' using flavor '{flavor.name}' and " - f"{image.min_disk} GiB ephemeral boot volume" + f"{volume_size} GiB ephemeral boot volume" ) + # explicitly set auto_ip=False, we may still get a (totally unnecessary) floating IP assigned server = env.conn.create_server( - server_name, image=image, flavor=flavor, key_name=env.keypair.name, network=env.network, - security_groups=[env.sec_group.id], userdata=userdata, wait=True, timeout=500, auto_ip=True, - boot_from_volume=True, terminate_volume=True, volume_size=image.min_disk, + server_name, image=image, flavor=flavor, userdata=userdata, wait=True, timeout=500, auto_ip=False, + boot_from_volume=True, terminate_volume=True, volume_size=volume_size, network=env.network, ) logger.debug(f"Server '{server_name}' ('{server.id}') has been created") - return server + # next, do an explicit get_server because, beginning with version 3.2.0, the openstacksdk no longer + # sets the interface attributes such as `public_v4` + # I (mbuechse) consider this a bug in openstacksdk; it was introduced with + # https://opendev.org/openstack/openstacksdk/commit/a8adbadf0c4cdf1539019177fb1be08e04d98e82 + # I also consider openstacksdk architecture with the Mixins etc. smelly to say the least + return env.conn.get_server(server.id) def delete_vm(conn, server_name=SERVER_NAME): @@ -344,25 +303,6 @@ def delete_vm(conn, server_name=SERVER_NAME): logger.debug(f"The server '{server_name}' couldn't be deleted.", exc_info=True) -def retry(func, exc_type, timeouts=(8, 7, 15, 10, 20, 30, 60)): - if isinstance(exc_type, str): - exc_type = exc_type.split(',') - timeout_iter = iter(timeouts) - # do an initial sleep because func is known fail at first anyway - time.sleep(next(timeout_iter)) - while True: - try: - func() - except Exception as e: - timeout = next(timeout_iter, None) - if timeout is None or e.__class__.__name__ not in exc_type: - raise - logger.debug(f"Caught {e!r} while {func!r}; waiting {timeout} s before retry") - time.sleep(timeout) - else: - break - - class CountingHandler(logging.Handler): def __init__(self, level=logging.NOTSET): super().__init__(level=level) @@ -372,35 +312,96 @@ def handle(self, record): self.bylevel[record.levelno] += 1 +# the following functions are used to map any OpenStack Image to a pair of integers +# used for sorting the images according to fitness for our test +# - debian take precedence over ubuntu +# - higher versions take precedence over lower ones + +# only list stable versions here +DEBIAN_CODENAMES = { + "buster": 10, + "bullseye": 11, + "bookworm": 12, +} + + +def _deduce_sort_debian(os_version, debian_ver=re.compile(r"\d+\Z")): + if debian_ver.match(os_version): + return 2, int(os_version) + return 2, DEBIAN_CODENAMES.get(os_version, 0) + + +def _deduce_sort_ubuntu(os_version, ubuntu_ver=re.compile(r"\d\d\.\d\d\Z")): + if ubuntu_ver.match(os_version): + return 1, int(os_version.replace(".", "")) + return 1, 0 + + +# map lower-case distro name to version deducing function +DISTROS = { + "ubuntu": _deduce_sort_ubuntu, + "debian": _deduce_sort_debian, +} + + +def _deduce_sort(img): + if not img.os_distro or not img.os_version: + return 0, 0 + deducer = DISTROS.get(img.os_distro.strip().lower()) + if deducer is None: + return 0, 0 + return deducer(img.os_version.strip().lower()) + + def select_deb_image(images): - """From a list of OpenStack image objects, select a recent Debian derivative. - - Try Debian first, then Ubuntu. - """ - for prefix in ("Debian ", "Ubuntu "): - imgs = sorted( - [img for img in images if img.name.startswith(prefix)], - key=attrgetter("name"), - ) - if imgs: - return imgs[-1] - return None + """From a list of OpenStack image objects, select a recent Debian derivative.""" + return max(images, key=_deduce_sort, default=None) + + +def print_result(check_id, passed): + print(check_id + ": " + ('FAIL', 'PASS')[bool(passed)]) + + +def evaluate_output(lines, image, flavor, marker=MARKER): + # parse lines from console output + # removing any "indent", stuff that looks like '[ 70.439502] cloud-init[513]: ' + section = None + indent = 0 + collected = defaultdict(list) + for line in lines: + idx = line.find(marker) + if idx != -1: + section = line[idx + len(marker):].strip() + if section == 'end': + section = None + indent = idx + continue + if section: + collected[section].append(line[indent:]) + # always check if we have something, because print_result won't do MISS, only PASS/FAIL + if collected['virtio-rng']: + # virtio-rng is not an official test case according to testing notes, + # but for some reason we check it nonetheless (call it informative) + check_virtio_rng(collected['virtio-rng'], image, flavor) + if collected['entropy-avail']: + print_result('entropy-check-entropy-avail', check_entropy_avail(collected['entropy-avail'], image.name)) + if collected['rngd']: + print_result('entropy-check-rngd', check_rngd(collected['rngd'], image.name)) + if collected['fips-test']: + print_result('entropy-check-fips-test', check_fips_test(collected['fips-test'], image.name)) def main(argv): # configure logging, disable verbose library logging logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) openstack.enable_logging(debug=False) - logging.getLogger("fabric").propagate = False - logging.getLogger("invoke").propagate = False - logging.getLogger("paramiko").propagate = False warnings.filterwarnings("ignore", "search_floating_ips") # count the number of log records per level (used for summary and return code) counting_handler = CountingHandler(level=logging.INFO) logger.addHandler(counting_handler) try: - opts, args = getopt.gnu_getopt(argv, "c:i:hd", ["os-cloud=", "images=", "help", "debug"]) + opts, args = getopt.gnu_getopt(argv, "c:i:hdV:", ["os-cloud=", "images=", "help", "debug", "image-visibility="]) except getopt.GetoptError as exc: logger.critical(f"{exc}") print_usage() @@ -408,6 +409,7 @@ def main(argv): cloud = os.environ.get("OS_CLOUD") image_names = set() + image_visibility = set() for opt in opts: if opt[0] == "-h" or opt[0] == "--help": print_usage() @@ -418,17 +420,32 @@ def main(argv): cloud = opt[1] if opt[0] == "-d" or opt[0] == "--debug": logging.getLogger().setLevel(logging.DEBUG) + if opt[0] == "-V" or opt[0] == "--image-visibility": + image_visibility.update([v.strip() for v in opt[1].split(',')]) if not cloud: logger.critical("You need to have OS_CLOUD set or pass --os-cloud=CLOUD.") return 1 + if not image_visibility: + image_visibility.update(("public", "community")) + try: logger.debug(f"Connecting to cloud '{cloud}'") with openstack.connect(cloud=cloud, timeout=32) as conn: all_images = conn.list_images() all_flavors = conn.list_flavors(get_extra=True) + if '*' not in image_visibility: + logger.debug(f"Images: filter for visibility {', '.join(sorted(image_visibility))}") + all_images = [img for img in all_images if img.visibility in image_visibility] + all_image_names = [f"{img.name} ({img.visibility})" for img in all_images] + logger.debug(f"Images: {', '.join(all_image_names) or '(NONE)'}") + + if not all_images: + logger.critical("Can't run this test without image") + return 1 + if image_names: # find images by the names given, BAIL out if some image is missing images = sorted([img for img in all_images if img.name in image_names], key=lambda img: img.name) @@ -443,27 +460,32 @@ def main(argv): logger.debug(f"Selected image: {images[0].name} ({images[0].id})") logger.debug("Checking images and flavors for recommended attributes") - check_image_attributes(all_images) - check_flavor_attributes(all_flavors) + print_result('entropy-check-image-properties', check_image_attributes(all_images)) + print_result('entropy-check-flavor-properties', check_flavor_attributes(all_flavors)) logger.debug("Checking dynamic instance properties") + # Check a VM for services and requirements with TestEnvironment(conn) as env: - # Check a VM for services and requirements for image in images: try: # ugly: create the server inside the try-block because the call # can be interrupted via Ctrl-C, and then the instance will be # started without us knowing its id server = create_vm(env, all_flavors, image) - with fabric.Connection( - host=server.public_v4, - user=image.properties.get('image_original_user') or image.properties.get('standarduser'), - connect_kwargs={"key_filename": env.keyfile.name, "allow_agent": False}, - ) as fconn: - # need to retry because it takes time for sshd to come up - retry(fconn.open, exc_type="NoValidConnectionsError,TimeoutError") - check_vm_recommends(fconn, image, server.flavor) - check_vm_requirements(fconn, image.name) + remainder = TIMEOUT + console = conn.compute.get_server_console_output(server) + while remainder > 0: + if "Failed to run module scripts-user" in console['output']: + raise RuntimeError(f"Failed tests for {server.id}") + if "_scs-test-end" in console['output']: + break + time.sleep(1.0) + remainder -= 1 + console = conn.compute.get_server_console_output(server) + # if the timeout was hit, maybe we won't find everything, but that's okay -- + # these testcases will count as missing, rightly so + logger.debug(f'Finished waiting with timeout remainder: {remainder} s') + evaluate_output(console['output'].splitlines(), image, server.flavor) finally: delete_vm(conn) except BaseException as e: @@ -475,6 +497,9 @@ def main(argv): "Total critical / error / warning: " f"{c[logging.CRITICAL]} / {c[logging.ERROR]} / {c[logging.WARNING]}" ) + # include this one for backwards compatibility + if not c[logging.CRITICAL]: + print("entropy-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) return min(127, c[logging.CRITICAL] + c[logging.ERROR]) # cap at 127 due to OS restrictions diff --git a/Tests/iaas/flavor-naming/check_yaml.py b/Tests/iaas/flavor-naming/check_yaml.py index b2ecbe407..3d6738917 100755 --- a/Tests/iaas/flavor-naming/check_yaml.py +++ b/Tests/iaas/flavor-naming/check_yaml.py @@ -17,8 +17,8 @@ from flavor_names import parser_v2, flavorname_to_dict -REQUIRED_FIELDS = ['name-v1', 'name-v2', 'name', 'cpus', 'ram', 'cpu-type'] -DEFAULTS = {'disk0-type': 'network'} +REQUIRED_FIELDS = ['scs:name-v1', 'scs:name-v2', 'name', 'cpus', 'ram', 'scs:cpu-type'] +DEFAULTS = {'scs:disk0-type': 'network'} class Undefined: @@ -47,7 +47,7 @@ def check_spec(self, flavor_spec): self.emit(f"flavor spec missing keys {', '.join(missing)}: {flavor_spec}") return name = flavor_spec['name'] - name_v2 = flavor_spec['name-v2'] + name_v2 = flavor_spec['scs:name-v2'] try: flavorname = parser_v2(name_v2) except Exception: diff --git a/Tests/iaas/flavor-naming/check_yaml_test.py b/Tests/iaas/flavor-naming/check_yaml_test.py index 3bb3bc12c..09cb01e62 100644 --- a/Tests/iaas/flavor-naming/check_yaml_test.py +++ b/Tests/iaas/flavor-naming/check_yaml_test.py @@ -17,12 +17,12 @@ BUGGY_YAML_DIR = Path(TEST_ROOT, "testing") EXPECTED_ERRORS = """ -ERROR: flavor 'SCS-1V-4': field 'cpu-type' contradicting name-v2 'SCS-1V-4'; found 'crowded-core', expected 'shared-core' -ERROR: flavor 'SCS-2V-8': field 'name-v1' contradicting name-v2 'SCS-2V-8'; found 'SCS-2V-8', expected 'SCS-2V:8' +ERROR: flavor 'SCS-1V-4': field 'scs:cpu-type' contradicting name-v2 'SCS-1V-4'; found 'crowded-core', expected 'shared-core' +ERROR: flavor 'SCS-2V-8': field 'scs:name-v1' contradicting name-v2 'SCS-2V-8'; found 'SCS-2V-8', expected 'SCS-2V:8' ERROR: flavor 'SCS-4V-16': field 'ram' contradicting name-v2 'SCS-4V-16'; found 12, expected 16.0 ERROR: flavor 'SCS-8V-32': field 'disk' contradicting name-v2 'SCS-8V-32'; found 128, expected undefined ERROR: flavor 'SCS-1V-2': field 'cpus' contradicting name-v2 'SCS-1V-2'; found 2, expected 1 -ERROR: flavor 'SCS-2V-4-20s': field 'disk0-type' contradicting name-v2 'SCS-2V-4-20s'; found 'network', expected 'ssd' +ERROR: flavor 'SCS-2V-4-20s': field 'scs:disk0-type' contradicting name-v2 'SCS-2V-4-20s'; found 'network', expected 'ssd' ERROR: flavor 'SCS-4V-16-100s': field 'disk' contradicting name-v2 'SCS-4V-16-100s'; found 10, expected 100 ERROR: file 'scs-0103-v1-flavors-wrong.yaml': found 7 errors """.strip() diff --git a/Tests/iaas/flavor-naming/cli.py b/Tests/iaas/flavor-naming/cli.py index ff3021e75..796b6a733 100755 --- a/Tests/iaas/flavor-naming/cli.py +++ b/Tests/iaas/flavor-naming/cli.py @@ -6,55 +6,18 @@ import click import yaml -from flavor_names import parser_v1, parser_v2, parser_v3, inputflavor, outputter, flavorname_to_dict, prettyname - - -logger = logging.getLogger(__name__) - - -class ParsingStrategy: - """class to model parsing that accepts multiple versions of the syntax in different ways""" - - def __init__(self, parsers=(), tolerated_parsers=(), invalid_parsers=()): - self.parsers = parsers - self.tolerated_parsers = tolerated_parsers - self.invalid_parsers = invalid_parsers - - def parse(self, namestr): - exc = None - for parser in self.parsers: - try: - return parser(namestr) - except Exception as e: - if exc is None: - exc = e - # at this point, if `self.parsers` is not empty, then `exc` is not `None` - for parser in self.tolerated_parsers: - try: - result = parser(namestr) - except Exception: - pass - else: - logger.warning(f"Name is merely tolerated {parser.vstr}: {namestr}") - return result - for parser in self.invalid_parsers: - try: - result = parser(namestr) - except Exception: - pass - else: - raise ValueError(f"Name is non-tolerable {parser.vstr}") - raise exc +from flavor_names import parser_v1, parser_v2, parser_v3, inputflavor, outputter, flavorname_to_dict, \ + prettyname, ParsingStrategy -VERSIONS = { - 'v1': ParsingStrategy(parsers=(parser_v1, ), invalid_parsers=(parser_v2, )), - 'v1/v2': ParsingStrategy(parsers=(parser_v1, ), tolerated_parsers=(parser_v2, )), - 'v2/v1': ParsingStrategy(parsers=(parser_v2, ), tolerated_parsers=(parser_v1, )), - 'v2': ParsingStrategy(parsers=(parser_v2, ), invalid_parsers=(parser_v1, )), - 'v3': ParsingStrategy(parsers=(parser_v3, ), invalid_parsers=(parser_v1, )), -} -_, VERSIONS['latest'] = max(VERSIONS.items()) +PARSERS = {ps.vstr: ps for ps in [ + ParsingStrategy(vstr='v1', parsers=(parser_v1, ), invalid_parsers=(parser_v2, )), + ParsingStrategy(vstr='v1/v2', parsers=(parser_v1, ), tolerated_parsers=(parser_v2, )), + ParsingStrategy(vstr='v2/v1', parsers=(parser_v2, ), tolerated_parsers=(parser_v1, )), + ParsingStrategy(vstr='v2', parsers=(parser_v2, ), invalid_parsers=(parser_v1, )), + ParsingStrategy(vstr='v3', parsers=(parser_v3, ), invalid_parsers=(parser_v1, )), +]} +_, PARSERS['latest'] = max(PARSERS.items()) def noop(*args, **kwargs): @@ -84,7 +47,7 @@ def process_pipeline(rc, *args, **kwargs): @cli.command() -@click.argument('version', type=click.Choice(list(VERSIONS), case_sensitive=False)) +@click.argument('version', type=click.Choice(list(PARSERS), case_sensitive=False)) @click.argument('name', nargs=-1) @click.option('-o', '--output', 'output', type=click.Choice(['none', 'prose', 'yaml']), help='select output format (default: none)') @@ -96,12 +59,12 @@ def parse(cfg, version, name, output='none'): validation. With 'v1/v2', flavor names of both kinds are accepted, but warnings are emitted for v2, and similarly with 'v2/v1', where warnings are emitted for v1. """ - version = VERSIONS.get(version) + parser = PARSERS.get(version) printv = cfg.printv errors = 0 for namestr in name: try: - flavorname = version.parse(namestr) + flavorname = parser(namestr) except ValueError as exc: print(f"{exc}: {namestr}") errors += 1 @@ -109,7 +72,7 @@ def parse(cfg, version, name, output='none'): if flavorname is None: print(f"NOT an SCS flavor: {namestr}") elif output == 'prose': - printv(name, end=': ') + printv(namestr, end=': ') print(f"{prettyname(flavorname)}") elif output == 'yaml': print(yaml.dump(flavorname_to_dict(flavorname), explicit_start=True)) diff --git a/Tests/iaas/flavor-naming/flavor-add-extra-specs.py b/Tests/iaas/flavor-naming/flavor-add-extra-specs.py new file mode 100755 index 000000000..d335e198c --- /dev/null +++ b/Tests/iaas/flavor-naming/flavor-add-extra-specs.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +# vim: set ts=4 sw=4 et: +""" +flavor-add-extra-specs.py + +Cycles through all SCS- openstack flavors and adds properties specified in +scs-0103-v1 . + +Usage: flavor-add-extra-specs.py [options] [FLAVORS] +Options: + -h|--help: Print usage information + -d|--debug: Output verbose debugging info + -q|--quiet: Only output warnings and errors + -A|--all-names: Overwrite scs:name-vN with systematic names (each + name will be kept, but may appear w/another key) + -t|--disk0-type TYPE: Assumes disk TYPE for flavors w/ unspec disk0-type + -p|--cpu-type TYPE: Assumes CPU TYPE for flavors w/o SCS name + -c|--os-cloud CLOUD: Cloud to work on (default: OS_CLOUD env) + -a|--action ACTION: What action to perform: + report: only report what changes would be performed + ask: (default) report, then ask whether to perform + apply: perform changes without asking + +By default, all SCS- flavors are processed; by passing flavor names FLAVORS as +arguments, only those are processed. +You can pass non-SCS FLAVORS and specify --cpu-type to generate SCS names and +set the SCS extra_specs. + +On most clouds, to add properties (extra_specs) to flavors, you need to have +admin power; this program will otherwise report the failed settings. +Add -d|--debug for more verbose output. + +(c) Kurt Garloff , 6/2024 +(c) Matthias Büchse , 8/2024 +SPDX-License-Identifier: CC-BY-SA-4.0 +""" + +import getopt +import logging +import os +import sys + +import openstack + +from flavor_names import parser_vN, CPUTYPE_KEY, DISKTYPE_KEY, Flavorname, Main, Disk, flavorname_to_dict, \ + SCS_NAME_PATTERN + + +logger = logging.getLogger(__name__) +DEFAULTS = {'scs:disk0-type': 'network'} + + +def usage(file=sys.stderr): + "Output usage information (help)" + print(__doc__.strip(), file=file) + + +def min_max_check(real, claim, valnm, flvnm, extra): + """Check whether property valnm real is at least claim. + Prints ERROR is lower and returns False + Prints WARNING if higher (and returns True) + Returns True if no problem detected. + For floats, we allow for 1% tolerance in both directions. + """ + # 1% tolerance for floats (RAM) + if isinstance(claim, float): + chkval = real*1.01 + chkval2 = real*0.99 + else: + chkval = real + chkval2 = real + if chkval < claim: + logger.error(f"Flavor {flvnm} claims {claim} {valnm}{extra}, but only has {real}. Needs fixing.") + return False + if chkval2 > claim: + logger.warning(f"Flavor {flvnm} claims {claim} {valnm}{extra}, but overdelivers with {real}.") + return True + + +def check_std_props(flavor, flvnm, extra=""): + """Check consistency of openstack props with parsed SCS name specs + Return no of errors found.""" + errors = 0 + # vcpus + if not min_max_check(flavor.vcpus, flvnm.cpuram.cpus, "CPUs", flavor.name, extra): + errors += 1 + # ram + if not min_max_check(flavor.ram, flvnm.cpuram.ram*1024, "MiB RAM", flavor.name, extra): + errors += 1 + # disk + disksz = 0 + if flvnm.disk: + disksz = flvnm.disk.disksize + if not min_max_check(flavor.disk, disksz, "GiB Disk", flavor.name, extra): + errors += 1 + return errors + + +def generate_flavorname(flavor, cpu_type, disk0_type): + """Generate an SCS- v2 name for flavor, + using cpu_type (and disk0_type if needed). + Returns string.""" + cpuram = Main() + cpuram.cpus = flavor.vcpus + cpuram.cputype = cpu_type + cpuram.ram = int((flavor.ram+12)/512)/2.0 + flavorname = Flavorname(cpuram) + if flavor.disk: + disk = Disk() + disk.disksize = flavor.disk + disk.disktype = disk0_type + flavorname.disk = disk + return flavorname + + +def revert_dict(value, dct, extra=""): + "Return key that matches val, None if no match" + for key, val in dct.items(): + if val == value: + return key + logger.error(f"ERROR: {extra} {value} should be in {dct.items()}") + + +class ActionReport: + @staticmethod + def set_extra_spec(flavor, key, value): + print(f'Flavor {flavor.name}: SET {key}={value}') + + @staticmethod + def del_extra_spec(flavor, key): + print(f'Flavor {flavor.name}: DELETE {key}') + + +class ActionApply: + def __init__(self, compute): + self.compute = compute + + def set_extra_spec(self, flavor, key, value): + logger.info(f'Flavor {flavor.name}: SET {key}={value}') + try: + flavor.update_extra_specs_property(self.compute, key, value) + except openstack.exceptions.SDKException as exc: + logger.error(f"{exc!r} while setting {key}={value} for {flavor.name}") + + def del_extra_spec(self, flavor, key): + logger.info(f'Flavor {flavor.name}: DELETE {key}') + try: + flavor.delete_extra_specs_property(self.compute, key) + except openstack.exceptions.SDKException as exc: + logger.error(f"{exc!r} while deleting {key} for {flavor.name}") + + +class SetCommand: + def __init__(self, flavor, key, value): + self.flavor = flavor + self.key = key + self.value = value + + def apply(self, action): + action.set_extra_spec(self.flavor, self.key, self.value) + + +class DelCommand: + def __init__(self, flavor, key): + self.flavor = flavor + self.key = key + + def apply(self, action): + action.del_extra_spec(self.flavor, self.key) + + +def handle_commands(action, compute, commands): + if not commands: + return + if action in ('ask', 'report'): + action_report = ActionReport() + print(f'Proposing the following {len(commands)} changes to extra_specs:') + for command in commands: + command.apply(action_report) + if action == 'ask': + print('Do you want to apply these changes? y/n') + if input() == 'y': + action = 'apply' + else: + print('No changes will be applied.') + if action == 'apply': + action_apply = ActionApply(compute) + for command in commands: + command.apply(action_apply) + + +def main(argv): + action = "ask" # or "report" or "apply" + + errors = 0 + disk0_type = None + cpu_type = None + gen_all_names = False + + cloud = os.environ.get("OS_CLOUD") + try: + opts, flvs = getopt.gnu_getopt(argv, "hdqAt:p:c:a:", + ("help", "debug", "quiet", "all-names", + "disk0-type=", "cpu-type=", "os-cloud=", "action=")) + except getopt.GetoptError as exc: + logger.critical(repr(exc)) + usage() + return 1 + for opt in opts: + if opt[0] == "-h" or opt[0] == "--help": + usage(file=sys.stdout) + return 0 + if opt[0] == "-q" or opt[0] == "--quiet": + logging.getLogger().setLevel(logging.WARNING) + if opt[0] == "-d" or opt[0] == "--debug": + logging.getLogger().setLevel(logging.DEBUG) + if opt[0] == "-A" or opt[0] == "--all-names": + gen_all_names = True + if opt[0] == "-a" or opt[0] == "--action": + action = opt[1].strip().lower() + if opt[0] == "-c" or opt[0] == "--os-cloud": + cloud = opt[1] + if opt[0] == "-t" or opt[0] == "--disk0-type": + disk0_type = opt[1] + if disk0_type not in DISKTYPE_KEY: + disk0_type = revert_dict(disk0_type, DISKTYPE_KEY) + if not disk0_type: + return 2 + if opt[0] == "-p" or opt[0] == "--cpu-type": + cpu_type = opt[1] + if cpu_type not in CPUTYPE_KEY: + cpu_type = revert_dict(cpu_type, CPUTYPE_KEY) + if not cpu_type: + return 2 + + if action not in ('ask', 'report', 'apply'): + logger.error("action needs to be one of ask, report, apply") + usage() + return 4 + + if not cloud: + logger.error("Need to pass -c|--os-cloud|OS_CLOUD env") + usage() + return 3 + + conn = openstack.connect(cloud) + conn.authorize() + + # select relevant flavors: either given via name, or all SCS flavors + predicate = (lambda fn: fn in flvs) if flvs else (lambda fn: fn.startswith('SCS-')) + flavors = [flavor for flavor in conn.compute.flavors() if predicate(flavor.name)] + # This is likely a user error, so make them aware + if len(flavors) < len(flvs): + missing = set(flvs) - set(flavor.name for flavor in flavors) + logger.warning("Flavors not found: " + ", ".join(missing)) + + commands = [] + for flavor in flavors: + extra_names_to_check = [ + (key, value) + for key, value in flavor.extra_specs.items() + if SCS_NAME_PATTERN.match(key) + ] + names_to_check = [('name', flavor.name)] if flavor.name.startswith('SCS-') else [] + names_to_check.extend(extra_names_to_check) + + # syntax check: compute flavorname instances + # sanity check: claims must be true wrt actual flavor + flavornames = {} + for key, name_str in names_to_check: + try: + flavornames[key] = flavorname = parser_vN(name_str) + except ValueError as exc: + logger.error(f"could not parse {key}={name_str}: {exc!r}") + errors += 1 + else: + errors += check_std_props(flavor, flavorname, " by name") + + if not flavornames: + # we need cputype and disktype from user + if not cpu_type: + logger.warning(f"Need to specify cpu-type for generating name for {flavor.name}, skipping") + continue + if flavor.disk and not disk0_type: + logger.warning(f"Need to specify disk0-type for generating name for {flavor.name}, skipping") + continue + flavornames['_generated'] = generate_flavorname(flavor, cpu_type, disk0_type) + + expected = flavorname_to_dict(*flavornames.values(), ctx=flavor.name) + # determine invalid keys (within scs namespace) + # scs:name-vN is always permissible + removals = [ + key + for key in flavor.extra_specs + if key.startswith('scs:') and not SCS_NAME_PATTERN.match(key) + if expected.get(key, DEFAULTS.get(key)) is None + ] + logger.debug(f"Flavor {flavor.name}: expected={expected}, removals={removals}") + + for key in removals: + commands.append(DelCommand(flavor, key)) + + # generate or rectify extra_specs + for key, value in expected.items(): + if not key.startswith("scs:"): + continue + if not gen_all_names and key.startswith("scs:name-v") and extra_names_to_check: + continue # do not generate names if names are present + current = flavor.extra_specs.get(key) + if current == value: + continue + if current is None and DEFAULTS.get(key) == value: + continue + if current is not None: + logger.warning(f"{flavor.name}: resetting {key} because {current} != expected {value}") + commands.append(SetCommand(flavor, key, value)) + + handle_commands(action, conn.compute, commands) + logger.info(f"Processed {len(flavors)} flavors, {len(commands)} changes") + return errors + + +if __name__ == "__main__": + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + openstack.enable_logging(debug=False) + sys.exit(min(127, main(sys.argv[1:]))) # cap at 127 due to OS restrictions diff --git a/Tests/iaas/flavor-naming/flavor-name-check.py b/Tests/iaas/flavor-naming/flavor-name-check.py index 536372757..e5d395e54 100755 --- a/Tests/iaas/flavor-naming/flavor-name-check.py +++ b/Tests/iaas/flavor-naming/flavor-name-check.py @@ -86,6 +86,9 @@ def main(argv): nm2 = _fnmck.outname(ret2) if nm1 != nm2: print(f"WARNING: {nm1} != {nm2}") + snm = _fnmck.outname(ret.shorten()) + if snm != nm1: + print(f"Shortened name: {snm}") argv = argv[1:] scs = 1 diff --git a/Tests/iaas/flavor-naming/flavor-names-openstack.py b/Tests/iaas/flavor-naming/flavor-names-openstack.py index 19a093495..6e9273d5f 100755 --- a/Tests/iaas/flavor-naming/flavor-names-openstack.py +++ b/Tests/iaas/flavor-naming/flavor-names-openstack.py @@ -218,17 +218,22 @@ def main(argv): } flvSCSRep = { "TotalAmount": len(MSCSFlv) + len(SCSFlv) + len(wrongFlv), - "MandatoryFlavorsPresent": len(MSCSFlv), - "MandatoryFlavorsMissing": len(scsMandatory), } - if v3mode: + # skip the following if no mandatory flavors are given (useful for v3.2 onward) + if len(MSCSFlv) + len(scsMandatory): + flvSCSRep.update({ + "MandatoryFlavorsPresent": len(MSCSFlv), + "MandatoryFlavorsMissing": len(scsMandatory), + }) + # skip the following if no recommended flavors are given (useful for v1, v2, and v3.2 onward) + if len(RSCSFlv) + len(scsRecommended): flvSCSRep.update({ "RecommendedFlavorsPresent": len(RSCSFlv), "RecommendedFlavorsMissing": len(scsRecommended), }) flvSCSRep.update({ - "OptionalFlavorsValid": len(SCSFlv), - "OptionalFlavorsWrong": len(wrongFlv), + "FlavorsValid": len(SCSFlv) + len(MSCSFlv) + len(RSCSFlv), + "FlavorsWrong": len(wrongFlv), "FlavorsWithWarnings": len(warnFlv), }) flvOthRep = { @@ -246,6 +251,7 @@ def main(argv): Report[cloud]["SCSFlavorReport"] = flvSCSList Report[cloud]["OtherFlavorReport"] = flvOthList print(f"{yaml.dump(Report, default_flow_style=False)}") + print("flavor-name-check: " + ('PASS', 'FAIL')[min(1, errors)]) return errors diff --git a/Tests/iaas/flavor-naming/flavor_names.py b/Tests/iaas/flavor-naming/flavor_names.py index 88b51da17..10ca54da6 100644 --- a/Tests/iaas/flavor-naming/flavor_names.py +++ b/Tests/iaas/flavor-naming/flavor_names.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +from collections import defaultdict +import logging import os import os.path import re @@ -9,8 +11,13 @@ import yaml +logger = logging.getLogger(__name__) + +SCS_NAME_PATTERN = re.compile(r"scs:name-v\d+\Z") CPUTYPE_KEY = {'L': 'crowded-core', 'V': 'shared-core', 'T': 'dedicated-thread', 'C': 'dedicated-core'} +CPUTYPE_SORT = {'crowded-core': 0, 'shared-core': 1, 'dedicated-thread': 2, 'dedicated-core': 3} DISKTYPE_KEY = {'n': 'network', 'h': 'hdd', 's': 'ssd', 'p': 'nvme'} +DISKTYPE_SORT = {'network': 0, 'hdd': 1, 'ssd': 2, 'nvme': 3} HERE = Path(__file__).parent @@ -155,6 +162,9 @@ class Main: raminsecure = BoolAttr("?no ECC", letter="u") ramoversubscribed = BoolAttr("?RAM Over", letter="o") + def shorten(self): + return self + class Disk: """Class representing the disk part""" @@ -164,6 +174,9 @@ class Disk: disksize = OptIntAttr("#.GB Disk") disktype = TblAttr("Disk type", {'': '(unspecified)', "n": "Networked", "h": "Local HDD", "s": "SSD", "p": "HiPerf NVMe"}) + def shorten(self): + return self + class Hype: """Class repesenting Hypervisor""" @@ -171,6 +184,9 @@ class Hype: component_name = "hype" hype = TblAttr(".Hypervisor", {"kvm": "KVM", "xen": "Xen", "hyv": "Hyper-V", "vmw": "VMware", "bms": "Bare Metal System"}) + def shorten(self): + return None + class HWVirt: """Class repesenting support for hardware virtualization""" @@ -178,6 +194,9 @@ class HWVirt: component_name = "hwvirt" hwvirt = BoolAttr("?HardwareVirt", letter="hwv") + def shorten(self): + return None + class CPUBrand: """Class repesenting CPU brand""" @@ -185,28 +204,58 @@ class CPUBrand: component_name = "cpubrand" cpuvendor = TblAttr("CPU Vendor", {"i": "Intel", "z": "AMD", "a": "ARM", "r": "RISC-V"}) cpugen = DepTblAttr("#.CPU Gen", cpuvendor, { - "i": {None: '(unspecified)', 0: "Unspec/Pre-Skylake", 1: "Skylake", 2: "Cascade Lake", 3: "Ice Lake", 4: "Sapphire Rapids"}, - "z": {None: '(unspecified)', 0: "Unspec/Pre-Zen", 1: "Zen 1", 2: "Zen 2", 3: "Zen 3", 4: "Zen 4"}, - "a": {None: '(unspecified)', 0: "Unspec/Pre-A76", 1: "A76/NeoN1", 2: "A78/X1/NeoV1", 3: "A710/NeoN2"}, + "i": {None: '(unspecified)', 0: "Unspec/Pre-Skylake", 1: "Skylake", 2: "Cascade Lake", 3: "Ice Lake", 4: "Sapphire Rapids", + 5: 'Sierra Forest (E)', 6: 'Granite Rapids (P)'}, + "z": {None: '(unspecified)', 0: "Unspec/Pre-Zen", 1: "Zen 1", 2: "Zen 2", 3: "Zen 3", 4: "Zen 4/4c", 5: "Zen 5/5c"}, + "a": {None: '(unspecified)', 0: "Unspec/Pre-A76", 1: "A76/NeoN1", 2: "A78/X1/NeoV1", 3: "A71x/NeoN2/V2", + 4: "AmpereOne", 5: "A72x/NeoN3/V3"}, "r": {None: '(unspecified)', 0: "Unspec"}, }) perf = TblAttr("Performance", {"": "Std Perf", "h": "High Perf", "hh": "Very High Perf", "hhh": "Very Very High Perf"}) + def __init__(self, cpuvendor="i", cpugen=0, perf=""): + self.cpuvendor = cpuvendor + self.cpugen = cpugen + self.perf = perf + + def shorten(self): + # For non-x86-64, don't strip out CPU brand for short name, as it contains the architecture + if self.cpuvendor in ('i', 'z'): + return None + return CPUBrand(self.cpuvendor) + class GPU: """Class repesenting GPU support""" type = "GPU" component_name = "gpu" gputype = TblAttr("Type", {"g": "vGPU", "G": "Pass-Through GPU"}) - brand = TblAttr("Brand", {"N": "nVidia", "A": "AMD", "I": "Intel"}) + brand = TblAttr("Brand", {"N": "Nvidia", "A": "AMD", "I": "Intel"}) gen = DepTblAttr("Gen", brand, { "N": {'': '(unspecified)', "f": "Fermi", "k": "Kepler", "m": "Maxwell", "p": "Pascal", - "v": "Volta", "t": "Turing", "a": "Ampere", "l": "AdaLovelace"}, - "A": {'': '(unspecified)', "0.4": "GCN4.0/Polaris", "0.5": "GCN5.0/Vega", "1": "RDNA1/Navi1x", "2": "RDNA2/Navi2x", "3": "RDNA3/Navi3x"}, - "I": {'': '(unspecified)', "0.9": "Gen9/Skylake", "0.95": "Gen9.5/KabyLake", "1": "Xe1/Gen12.1", "2": "Xe2"}, + "v": "Volta", "t": "Turing", "a": "Ampere", "l": "AdaLovelace", "g": "GraceHopper"}, + "A": {'': '(unspecified)', "0.4": "GCN4.0/Polaris", "0.5": "GCN5.0/Vega", "1": "RDNA1/Navi1x", "2": "C/RDNA2/Navi2x", + "3": "C/RDNA3/Navi3x", "3.5": "C/RDNA3.5", "4": "C/RDNA4"}, + "I": {'': '(unspecified)', "0.9": "Gen9/Skylake", "0.95": "Gen9.5/KabyLake", "1": "Xe1/Gen12.1/DG1", "2": "Xe2/Gen12.2", + "3": "Arc/Gen12.7/DG2"}, }) - cu = OptIntAttr("#.CU/EU/SM") - perf = TblAttr("Performance", {"": "Std Perf", "h": "High Perf", "hh": "Very High Perf", "hhh": "Very Very High Perf"}) + cu = OptIntAttr("#.N:SMs/A:CUs/I:EUs") + perf = TblAttr("Frequency", {"": "Std Freq", "h": "High Freq", "hh": "Very High Freq"}) + vram = OptIntAttr("#.V:GiB VRAM") + vramperf = TblAttr("Bandwidth", {"": "Std BW {<~1GiB/s)", "h": "High BW", "hh": "Very High BW"}) + + def __init__(self, gputype="g", brand="N", gen='', cu=None, perf='', vram=None, vramperf=''): + self.gputype = gputype + self.brand = brand + self.gen = gen + self.cu = cu + self.perf = perf + self.vram = vram + self.vramperf = vramperf + + def shorten(self): + # remove h modifiers + return GPU(gputype=self.gputype, brand=self.brand, gen=self.gen, cu=self.cu, vram=self.vram) class IB: @@ -215,6 +264,9 @@ class IB: component_name = "ib" ib = BoolAttr("?IB") + def shorten(self): + return self + class Flavorname: """A flavor name; merely a bunch of components""" @@ -232,9 +284,15 @@ def __init__( def shorten(self): """return canonically shortened name as recommended in the standard""" - if self.hype is None and self.hwvirt is None and self.cpubrand is None: - return self - return Flavorname(cpuram=self.cpuram, disk=self.disk, gpu=self.gpu, ib=self.ib) + return Flavorname( + cpuram=self.cpuram and self.cpuram.shorten(), + disk=self.disk and self.disk.shorten(), + hype=self.hype and self.hype.shorten(), + hwvirt=self.hwvirt and self.hwvirt.shorten(), + cpubrand=self.cpubrand and self.cpubrand.shorten(), + gpu=self.gpu and self.gpu.shorten(), + ib=self.ib and self.ib.shorten(), + ) class Outputter: @@ -257,7 +315,7 @@ class Outputter: hype = "_%s" hwvirt = "_%?" cpubrand = "_%s%0%s" - gpu = "_%s%s%s%-%s" + gpu = "_%s%s%s%-%s%-%s" ib = "_%?" def output_component(self, pattern, component, parts): @@ -320,7 +378,7 @@ class SyntaxV1: hwvirt = re.compile(r"\-(hwv)") # cpubrand needs final lookahead assertion to exclude confusion with _ib extension cpubrand = re.compile(r"\-([izar])([0-9]*)(h*)(?=$|\-)") - gpu = re.compile(r"\-([gG])([NAI])([^:h]*)(?::([0-9]+)|)(h*)") + gpu = re.compile(r"\-([gG])([NAI])([^:h]*)(?::([0-9]+)|)(h*)(?::([0-9]+)|)(h*)") ib = re.compile(r"\-(ib)") @staticmethod @@ -345,7 +403,7 @@ class SyntaxV2: hwvirt = re.compile(r"_(hwv)") # cpubrand needs final lookahead assertion to exclude confusion with _ib extension cpubrand = re.compile(r"_([izar])([0-9]*)(h*)(?=$|_)") - gpu = re.compile(r"_([gG])([NAI])([^\-h]*)(?:\-([0-9]+)|)(h*)") + gpu = re.compile(r"_([gG])([NAI])([^\-h]*)(?:\-([0-9]+)|)(h*)(?:\-([0-9]+)|)(h*)") ib = re.compile(r"_(ib)") @staticmethod @@ -427,6 +485,46 @@ def __call__(self, s: str, pos=0) -> Flavorname: return flavorname +class ParsingStrategy: + """ + Composite parser that accepts multiple versions of the syntax in different ways + + Follows the contract of class `Parser` + """ + + def __init__(self, vstr, parsers=(), tolerated_parsers=(), invalid_parsers=()): + self.vstr = vstr + self.parsers = parsers + self.tolerated_parsers = tolerated_parsers + self.invalid_parsers = invalid_parsers + + def __call__(self, namestr: str) -> Flavorname: + exc = None + for parser in self.parsers: + try: + return parser(namestr) + except Exception as e: + if exc is None: + exc = e + # at this point, if `self.parsers` is not empty, then `exc` is not `None` + for parser in self.tolerated_parsers: + try: + result = parser(namestr) + except Exception: + pass + else: + logger.warning(f"Name is merely tolerated {parser.vstr}: {namestr}") + return result + for parser in self.invalid_parsers: + try: + result = parser(namestr) + except Exception: + pass + else: + raise ValueError(f"Name is non-tolerable {parser.vstr}") + raise exc + + def _convert_user_input(idx, attr, target, val): """auxiliary function that converts user-input string `val` to the target attribute type""" fdesc = attr.name @@ -532,23 +630,49 @@ def __call__(self): parser_v1 = Parser("v1", SyntaxV1) parser_v2 = Parser("v2", SyntaxV2) parser_v3 = Parser("v3", SyntaxV2) # this is the same as parser_v2 except for the vstr +parser_vN = ParsingStrategy(vstr="vN", parsers=(parser_v2, parser_v1)) outname = outputter = Outputter() inputflavor = inputter = Inputter() -def flavorname_to_dict(flavorname: Flavorname) -> dict: - name_v2 = outputter(flavorname) +def flavorname_to_dict(*flavornames: Flavorname, ctx='') -> dict: + if not flavornames: + raise RuntimeError("need to supply at least one Flavorname instance!") + if ctx: + ctx = ctx + ': ' # used for logging warnings + name_collection = set() + collection = defaultdict(set) + for flavorname in flavornames: + collection['cpus'].add(flavorname.cpuram.cpus) + collection['ram'].add(flavorname.cpuram.ram) + collection['scs:cpu-type'].add(CPUTYPE_KEY[flavorname.cpuram.cputype]) + if flavorname.disk: + collection['disk'].add(flavorname.disk.disksize) + collection['nrdisks'].add(flavorname.disk.nrdisks) # this will need some postprocessing + collection['scs:disk0-type'].add(DISKTYPE_KEY[flavorname.disk.disktype or 'n']) + name_v2 = outputter(flavorname) + name_collection.add((SyntaxV1.from_v2(name_v2), "v1")) + name_collection.add((name_v2, "v2")) + short_v2 = outputter(flavorname.shorten()) + # could check whether short_v2 != name_v2, but the set will swallow everything + name_collection.add((SyntaxV1.from_v2(short_v2), "v1")) + name_collection.add((short_v2, "v2")) + for key, values in collection.items(): + if len(values) > 1: + logger.warning(f"{ctx}Inconsistent {key}: {', '.join(values)}") result = { - 'cpus': flavorname.cpuram.cpus, - 'cpu-type': CPUTYPE_KEY[flavorname.cpuram.cputype], - 'ram': flavorname.cpuram.ram, - 'name-v1': SyntaxV1.from_v2(name_v2), - 'name-v2': name_v2, + 'cpus': max(collection['cpus']), + 'scs:cpu-type': max(collection['scs:cpu-type'], key=CPUTYPE_SORT.__getitem__), + 'ram': max(collection['ram']), } - if flavorname.disk: - result['disk'] = flavorname.disk.disksize - for i in range(flavorname.disk.nrdisks): - result[f'disk{i}-type'] = DISKTYPE_KEY[flavorname.disk.disktype or 'n'] + if collection['nrdisks']: + result['disk'] = max(collection['disk']) + disktype = max(collection['scs:disk0-type'], key=DISKTYPE_SORT.__getitem__) + for i in range(max(collection['nrdisks'])): + result[f'scs:disk{i}-type'] = disktype + names = [item[0] for item in sorted(name_collection, key=lambda item: (-len(item[0]), item[1]))] + for idx, name in enumerate(names): + result[f'scs:name-v{idx + 1}'] = name return result @@ -610,10 +734,14 @@ def prettyname(flavorname, prefix=""): if flavorname.gpu: stg += "and " + _tbl_out(flavorname.gpu, "gputype") stg += _tbl_out(flavorname.gpu, "brand") - stg += _tbl_out(flavorname.gpu, "perf", True) stg += _tbl_out(flavorname.gpu, "gen", True) if flavorname.gpu.cu is not None: - stg += f"(w/ {flavorname.gpu.cu} CU/EU/SM) " + stg += f"(w/ {flavorname.gpu.cu} {_tbl_out(flavorname.gpu, 'perf', True)}SMs/CUs/EUs" + # Can not specify VRAM without CUs + if flavorname.gpu.vram: + stg += f" and {flavorname.gpu.vram} GiB {_tbl_out(flavorname.gpu, 'vramperf', True)}VRAM) " + else: + stg += ") " # IB if flavorname.ib: stg += "and Infiniband " diff --git a/Tests/iaas/image-metadata/image-md-check.py b/Tests/iaas/image-metadata/image-md-check.py index 0674aa8d4..77830d3a2 100755 --- a/Tests/iaas/image-metadata/image-md-check.py +++ b/Tests/iaas/image-metadata/image-md-check.py @@ -11,13 +11,20 @@ SPDX-License-Identifier: CC-BY-SA-4.0 """ +import calendar +from collections import Counter +import getopt +import logging import os import sys import time -import getopt + import openstack +logger = logging.getLogger(__name__) + + def usage(ret): "Usage information" print("Usage: image-md-check.py [options] [images]") @@ -29,20 +36,15 @@ def usage(ret): print(" -v/--verbose : Be more verbose") print(" -s/--skip-completeness: Don't check whether we have all mandatory images") print(" -h/--help : Print this usage information") - print("If you pass images, only these will be validated, otherwise all (public unless") - print(" -p is specified) images from the catalog will be processed.") + print(" [-V/--image-visibility VIS_LIST] : filters images by visibility") + print(" (default: 'public,community'; use '*' to disable)") + print("If you pass images, only these will be validated, otherwise all images") + print("(filtered according to -p, -V) from the catalog will be processed.") sys.exit(ret) # global options verbose = False -private = False -skip = False -conn = None -if "OS_CLOUD" in os.environ: - cloud = os.environ["OS_CLOUD"] -else: - cloud = None # Image list mand_images = ["Ubuntu 22.04", "Ubuntu 20.04", "Debian 11"] @@ -50,14 +52,32 @@ def usage(ret): rec2_images = ["SLES 15SP4", "RHEL 9", "RHEL 8", "Windows Server 2022", "Windows Server 2019"] sugg_images = ["openSUSE Leap 15.4", "Cirros 0.5.2", "Alpine", "Arch"] +# Just for nice formatting of image naming hints -- otherwise we capitalize the 1st letter +OS_LIST = ("CentOS", "AlmaLinux", "Windows Server", "RHEL", "SLES", "openSUSE") +# Auxiliary mapping for `freq2secs` (note that values are rounded up a bit on purpose) +FREQ_TO_SEC = { + "never": 0, + "critical_bug": 0, + "yearly": 365 * 24 * 3600, + "quarterly": 92 * 24 * 3600, + "monthly": 31 * 24 * 3600, + "weekly": 7 * 25 * 3600, + "daily": 25 * 3600, +} +STRICT_FORMATS = ("%Y-%m-%dT%H:%M:%SZ", ) +DATE_FORMATS = STRICT_FORMATS + ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M", "%Y-%m-%d") +MARKER_DATE_FORMATS = ("%Y-%m-%d", "%Y%m%d") +OUTDATED_MARKERS = ("old", "prev") +KIB, MIB, GIB = (1024 ** n for n in (1, 2, 3)) -def get_imagelist(priv): - "Retrieve list of public images (optionally also private images)" - if priv: - imgs = conn.image.images() - else: - imgs = conn.image.images(visibility='public') - return list(map(lambda x: x.name, imgs)) + +def recommended_name(nm, os_list=OS_LIST): + """Return capitalized name""" + for osnm in os_list: + osln = len(osnm) + if nm[:osln].casefold() == osnm.casefold(): + return osnm + nm[osln:] + return nm[0].upper() + nm[1:] class Property: @@ -76,27 +96,26 @@ def is_ok(self, props, warn = ""): if self.name in props: if self.values and not props[self.name] in self.values: if warn: - print(f'Error: Image "{warn}": value "{props[self.name]}" for property ' + print(f'ERROR: Image "{warn}": value "{props[self.name]}" for property ' f'"{self.name}" not allowed', file=sys.stderr) return False + if not props[self.name] and not self.values: + err = "ERROR" + ret = False else: - if not props[self.name] and not self.values: - err = "Error" - ret = False - else: - err = "Warning" - ret = True - if not props[self.name] and (verbose or not self.values) and warn: - print(f'{err}: Image "{warn}": empty value for property "{self.name}" not recommended', - file=sys.stderr) - return ret - elif self.ismand: + err = "WARNING" + ret = True + if not props[self.name] and (verbose or not self.values) and warn: + print(f'{err}: Image "{warn}": empty value for property "{self.name}" not recommended', + file=sys.stderr) + return ret + if self.ismand: if warn: - print(f'Error: Image "{warn}": Mandatory property "{self.name}" is missing', + print(f'ERROR: Image "{warn}": Mandatory property "{self.name}" is missing', file=sys.stderr) return False - elif warn and verbose: - print(f'Info: Image "{warn}": Optional property "{self.name}" is missing') # , file=sys.stderr) + if warn and verbose: + print(f'INFO: Image "{warn}": Optional property "{self.name}" is missing') # , file=sys.stderr) return True @@ -121,80 +140,161 @@ def is_ok(self, props, warn = ""): def is_url(stg): - "Is string stg a URL?" + """Is string stg a URL?""" idx = stg.find("://") - if idx < 0: - return False - if stg[:idx] in ("http", "https", "ftp", "ftps"): - return True - return False + return idx >= 0 and stg[:idx] in ("http", "https", "ftp", "ftps") -def validate_imageMD(imgnm): - "Retrieve image properties and test for compliance with spec" - try: - img = conn.image.find_image(imgnm) - except openstack.exceptions.DuplicateResource as exc: - print(f'Error with duplicate name "{imgnm}": {str(exc)}', file=sys.stderr) +def parse_date(stg, formats=DATE_FORMATS): + """ + Return time in Unix seconds or 0 if stg is not a valid date. + We recognize: %Y-%m-%dT%H:%M:%SZ, %Y-%m-%d %H:%M[:%S], and %Y-%m-%d + """ + bdate = 0 + for fmt in formats: + try: + tmdate = time.strptime(stg, fmt) + bdate = calendar.timegm(tmdate) + break + except ValueError: # as exc: + # print(f'date {stg} does not match {fmt}\n{exc}', file=sys.stderr) + pass + return bdate + + +def freq2secs(stg): + """Convert frequency to seconds (round up a bit), return 0 if not applicable""" + secs = FREQ_TO_SEC.get(stg) + if secs is None: + print(f'ERROR: replace frequency {stg}?', file=sys.stderr) + secs = 0 + return secs + + +def is_outdated(img, bdate): + """return 1 if img (with build/regdate bdate) is outdated, + 2 if it's not hidden or marked, 3 if error""" + max_age = 0 + if "replace_frequency" in img.properties: + max_age = 1.1 * (freq2secs(img.properties["replace_frequency"])) + if not max_age or time.time() <= max_age + bdate: + return 0 + # So we found an outdated image that should have been updated + # (5a1) Check whether we are past the provided_until date + until_str = img.properties["provided_until"] + if until_str in ("none", "notice"): + return 0 + until = parse_date(until_str) + if not until: + return 3 + if time.time() > until: + return 0 + if img.is_hidden: return 1 - if not img: - print(f'Image "{imgnm}" not found' % imgnm, file=sys.stderr) + parts = img.name.rsplit(" ", 1) + marker = parts[1] if len(parts) >= 2 else "" + if marker in OUTDATED_MARKERS or parse_date(marker, formats=MARKER_DATE_FORMATS): return 1 + return 2 + + +def validate_imageMD(img, outd_list): + """Retrieve image properties and test for compliance with spec""" + imgnm = img.name # Now the hard work: Look at properties .... errors = 0 warnings = 0 # (1) recommended os_* and hw_* + # (4) image_build_date, image_original_user, image_source (opt image_description) + # (5) maintained_until, provided_until, uuid_validity, replace_frequency for prop in (*os_props, *arch_props, *hw_props): if not prop.is_ok(img, imgnm): errors += 1 + for prop in (*build_props, *maint_props): + if not prop.is_ok(img.properties, imgnm): + errors += 1 constr_name = f"{img.os_distro} {img.os_version}" # (3) os_hash if img.hash_algo not in ('sha256', 'sha512'): - print(f'Warning: Image "{imgnm}": no valid hash algorithm {img.hash_algo}', file=sys.stderr) + print(f'WARNING: Image "{imgnm}": no valid hash algorithm {img.hash_algo}', file=sys.stderr) # errors += 1 warnings += 1 - - # (4) image_build_date, image_original_user, image_source (opt image_description) - # (5) maintained_until, provided_until, uuid_validity, update_frequency - for prop in (*build_props, *maint_props): - if not prop.is_ok(img.properties, imgnm): - errors += 1 - # TODO: Some more sanity checks: + # Some more sanity checks: # - Dateformat for image_build_date - bdate = time.strptime(img.created_at, "%Y-%m-%dT%H:%M:%SZ") + rdate = parse_date(img.created_at, formats=STRICT_FORMATS) + bdate = rdate if "image_build_date" in img.properties: - try: - bdate = time.strptime(img.properties["image_build_date"][:10], "%Y-%m-%d") - # This never evals to True, but makes bdate used for flake8 - if verbose and False: - print(f'Info: Image "{imgnm}" with build date {bdate}') - except Exception: - print(f'Error: Image "{imgnm}": no valid image_build_date ' + bdate = parse_date(img.properties["image_build_date"]) + if not bdate: + print(f'ERROR: Image "{imgnm}": no valid image_build_date ' f'{img.properties["image_build_date"]}', file=sys.stderr) errors += 1 + bdate = rdate + elif bdate > rdate: + print(f'ERROR: Image "{imgnm}" with build date {img.properties["image_build_date"]} after registration date {img.created_at}', + file=sys.stderr) + errors += 1 + if bdate > time.time(): + print(f'ERROR: Image "{imgnm}" has build time in the future: {bdate}') + errors += 1 # - image_source should be a URL if "image_source" not in img.properties: pass # we have already noted this as error, no need to do it again elif img.properties["image_source"] == "private": if verbose: - print(f'Info: Image {imgnm} has image_source set to private', file=sys.stderr) + print(f'INFO: Image {imgnm} has image_source set to private', file=sys.stderr) elif not is_url(img.properties["image_source"]): - print(f'Error: Image "{imgnm}": image_source should be a URL or "private"', file=sys.stderr) + print(f'ERROR: Image "{imgnm}": image_source should be a URL or "private"', file=sys.stderr) errors += 1 # - uuid_validity has a distinct set of options (none, last-X, DATE, notice, forever) + img_uuid_val = img.properties.get("uuid_validity") + if img_uuid_val in (None, "none", "notice", "forever"): + pass + elif img_uuid_val[:5] == "last-" and img_uuid_val[5:].isdecimal(): + pass + elif parse_date(img_uuid_val): + pass + else: + print(f'ERROR: Image "{imgnm}": invalid uuid_validity {img_uuid_val}', file=sys.stderr) + errors += 1 # - hotfix hours (if set!) should be numeric - # (5a) Sanity: Are we actually in violation of update_frequency? + if "hotfix_hours" in img.properties: + if not img.properties["hotfix_hours"].isdecimal(): + print(f'ERROR: Image "{imgnm}" has non-numeric hotfix_hours set', file=sys.stderr) + errors += 1 + # (5a) Sanity: Are we actually in violation of replace_frequency? # This is a bit tricky: We need to disregard images that have been rotated out # - os_hidden = True is a safe sign for this # - A name with a date stamp or old or prev (and a newer exists) + outd = is_outdated(img, bdate) + if outd == 3: + print(f'ERROR: Image "{imgnm}" does not provide a valid provided until date', + file=sys.stderr) + errors += 1 + elif outd == 2: + print(f'WARNING: Image "{imgnm}" seems outdated (acc. to its repl freq) but is not hidden or otherwise marked', + file=sys.stderr) + warnings += 1 + outd_list.append(imgnm) + elif outd: + outd_list.append(imgnm) # (2) sanity min_ram (>=64), min_disk (>= size) + if img.min_ram < 64: + print(f'WARNING: Image "{imgnm}": min_ram == {img.min_ram} MiB < 64 MiB', file=sys.stderr) + warnings += 1 + # errors += 1 + if img.min_disk * GIB < img.size: + print(f'WARNING: Image "{imgnm}": img size == {img.size / MIB:.0f} MiB, but min_disk == {img.min_disk * GIB / MIB:.0f} MiB', + file=sys.stderr) + warnings += 1 + # errors += 1 # (6) tags os:*, managed_by_* - # + # Nothing to do here ... we could do a warning if those are missing ... + # (7) Recommended naming if imgnm[:len(constr_name)].casefold() != constr_name.casefold(): # and verbose - # FIXME: There could be a more clever heuristic for displayed recommended names - rec_name = constr_name[0].upper()+constr_name[1:] - print(f'Warning: Image "{imgnm}" does not start with recommended name "{rec_name}"', + rec_name = recommended_name(constr_name) + print(f'WARNING: Image "{imgnm}" does not start with recommended name "{rec_name}"', file=sys.stderr) warnings += 1 @@ -217,14 +317,44 @@ def report_stdimage_coverage(imgs): return err +def miss_replacement_images(by_name, outd_list): + """Go over list of images to find replacement imgs for outd_list, return the ones that are left missing""" + rem_list = [] + for outd in outd_list: + img = None + shortnm = outd.rsplit(" ", 1)[0].rstrip() + if shortnm != outd: + img = by_name.get(shortnm) + if img is not None: + bdate = 0 + if "build_date" in img.properties: + bdate = parse_date(img.properties["build_date"]) + if not bdate: + bdate = parse_date(img.created_at, formats=STRICT_FORMATS) + if is_outdated(img, bdate): + img = None + if img is None: + rem_list.append(outd) + elif verbose: + print(f'INFO: Image "{img.name}" is a valid replacement for outdated "{outd}"', file=sys.stderr) + return rem_list + + def main(argv): "Main entry point" + # configure logging, disable verbose library logging + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + openstack.enable_logging(debug=False) # Option parsing - global verbose, private, skip - global cloud, conn + global verbose + image_visibility = set() + private = False + skip = False + cloud = os.environ.get("OS_CLOUD") + err = 0 try: - opts, args = getopt.gnu_getopt(argv[1:], "phvc:s", - ("private", "help", "os-cloud=", "verbose", "skip-completeness")) + opts, args = getopt.gnu_getopt(argv[1:], "phvc:sV:", + ("private", "help", "os-cloud=", "verbose", "skip-completeness", "image-visibility=")) except getopt.GetoptError: # as exc: print("CRITICAL: Command-line syntax error", file=sys.stderr) usage(1) @@ -232,31 +362,57 @@ def main(argv): if opt[0] == "-h" or opt[0] == "--help": usage(0) elif opt[0] == "-p" or opt[0] == "--private": - private = True + private = True # only keep this for backwards compatibility (we have -V now) elif opt[0] == "-v" or opt[0] == "--verbose": verbose = True + logging.getLogger().setLevel(logging.DEBUG) elif opt[0] == "-s" or opt[0] == "--skip-completeness": skip = True elif opt[0] == "-c" or opt[0] == "--os-cloud": cloud = opt[1] + if opt[0] == "-V" or opt[0] == "--image-visibility": + image_visibility.update([v.strip() for v in opt[1].split(',')]) images = args if not cloud: print("CRITICAL: Need to specify --os-cloud or set OS_CLOUD environment.", file=sys.stderr) usage(1) + if not image_visibility: + image_visibility.update(("public", "community")) + if private: + image_visibility.add("private") try: conn = openstack.connect(cloud=cloud, timeout=24) - # Do work + all_images = list(conn.image.images()) + if '*' not in image_visibility: + logger.debug(f"Images: filter for visibility {', '.join(sorted(image_visibility))}") + all_images = [img for img in all_images if img.visibility in image_visibility] + all_image_names = [f"{img.name} ({img.visibility})" for img in all_images] + logger.debug(f"Images: {', '.join(all_image_names) or '(NONE)'}") + by_name = {img.name: img for img in all_images} + if len(by_name) != len(all_images): + counter = Counter([img.name for img in all_images]) + duplicates = [name for name, count in counter.items() if count > 1] + print(f'WARNING: duplicate names detected: {", ".join(duplicates)}', file=sys.stderr) if not images: - images = get_imagelist(private) - err = 0 + images = [img.name for img in all_images] # Analyse image metadata - for image in images: - err += validate_imageMD(image) + outdated_images = [] + for imgnm in images: + err += validate_imageMD(by_name[imgnm], outdated_images) if not skip: err += report_stdimage_coverage(images) - except BaseException as e: - print(f"CRITICAL: {e!r}") - return 1 # just return 1 because `err` need not be assigned yet + if outdated_images: + if verbose: + print(f'INFO: The following outdated images have been detected: {outdated_images}', + file=sys.stderr) + rem_list = miss_replacement_images(by_name, outdated_images) + if rem_list: + print(f'ERROR: Outdated images without replacement: {rem_list}', file=sys.stderr) + err += len(rem_list) + except BaseException as exc: + print(f"CRITICAL: {exc!r}", file=sys.stderr) + return 1 + err + print("image-metadata-check: " + ('PASS', 'FAIL')[min(1, err)]) return err diff --git a/Tests/iaas/key-manager/check-for-key-manager.py b/Tests/iaas/key-manager/check-for-key-manager.py new file mode 100755 index 000000000..dae49acdd --- /dev/null +++ b/Tests/iaas/key-manager/check-for-key-manager.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +"""Key Manager service checker for scs-0116-v1-key-manager-standard.md + +This script retrieves the endpoint catalog from Keystone using the OpenStack +SDK and checks whether a key manager API endpoint is present. +It then checks whether a user with the maximum of a member role can create secrets. +This will only work after policy adjustments or with the new secure RBAC roles and policies. +The script relies on an OpenStack SDK compatible clouds.yaml file for +authentication with Keystone. +""" + +import argparse +import logging +import os +import sys + +import openstack + +logger = logging.getLogger(__name__) + + +def initialize_logging(): + logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO) + + +def check_for_member_role(conn: openstack.connection.Connection) -> None: + """Checks whether the current user has at maximum privileges of the member role. + + :param conn: connection to an OpenStack cloud. + :returns: boolean, when role with most privileges is member + """ + role_names = set(conn.session.auth.get_access(conn.session).role_names) + if role_names & {"admin", "manager"}: + return False + if "reader" in role_names: + logger.info("User has reader role.") + custom_roles = sorted(role_names - {"reader", "member"}) + if custom_roles: + logger.info(f"User has custom roles {', '.join(custom_roles)}.") + return "member" in role_names + + +def check_presence_of_key_manager(conn: openstack.connection.Connection) -> None: + try: + services = conn.service_catalog + except Exception: + logger.critical("Could not access Catalog endpoint.") + raise + + for svc in services: + svc_type = svc["type"] + if svc_type == "key-manager": + # key-manager is present + # now we want to check whether a user with member role + # can create and access secrets + logger.info("Key Manager is present") + return True + + +def _find_secret(conn: openstack.connection.Connection, secret_name_or_id: str): + """Replacement method for finding secrets. + + Mimicks the behavior of Connection.key_manager.find_secret() + but fixes an issue with the internal implementation raising an + exception due to an unexpected microversion parameter. + """ + secrets = conn.key_manager.secrets() + for s in secrets: + if s.name == secret_name_or_id or s.id == secret_name_or_id: + return s + + +def check_key_manager_permissions(conn: openstack.connection.Connection) -> None: + """ + After checking that the current user only has the member and maybe the + reader role, this method verifies that the user with a member role + has sufficient access to the Key Manager API functionality. + """ + secret_name = "scs-member-role-test-secret" + try: + existing_secret = _find_secret(conn, secret_name) + if existing_secret: + conn.key_manager.delete_secret(existing_secret) + + conn.key_manager.create_secret( + name=secret_name, + payload_content_type="text/plain", + secret_type="opaque", + payload="foo", + ) + try: + new_secret = _find_secret(conn, secret_name) + if not new_secret: + raise ValueError(f"Secret '{secret_name}' was not discoverable by the user") + finally: + conn.key_manager.delete_secret(new_secret) + except openstack.exceptions.ForbiddenException: + logger.debug('exception details', exc_info=True) + logger.error( + "Users with the 'member' role can use Key Manager API: FAIL" + ) + return 1 + logger.info( + "Users with the 'member' role can use Key Manager API: PASS" + ) + + +def main(): + initialize_logging() + parser = argparse.ArgumentParser(description="SCS Mandatory IaaS Service Checker") + parser.add_argument( + "--os-cloud", + type=str, + help="Name of the cloud from clouds.yaml, alternative " + "to the OS_CLOUD environment variable", + ) + parser.add_argument( + "--debug", action="store_true", help="Enable OpenStack SDK debug logging" + ) + args = parser.parse_args() + # @mbuechse: I think this is so much as to be unusable! + # (If necessary, a developer can always uncomment) + # openstack.enable_logging(debug=args.debug) + if args.debug: + logger.setLevel(logging.DEBUG) + + # parse cloud name for lookup in clouds.yaml + cloud = args.os_cloud or os.environ.get("OS_CLOUD", None) + if not cloud: + logger.critical( + "You need to have the OS_CLOUD environment variable set to your cloud " + "name or pass it via --os-cloud" + ) + return 2 + + with openstack.connect(cloud=cloud) as conn: + if not check_for_member_role(conn): + logger.critical("Cannot test key-manager permissions. User has wrong roles") + return 2 + if check_presence_of_key_manager(conn): + return check_key_manager_permissions(conn) + else: + # not an error, because key manager is merely recommended + logger.warning("There is no key-manager endpoint in the cloud.") + + +if __name__ == "__main__": + try: + sys.exit(main() or 0) + except SystemExit as e: + if e.code < 2: + print("key-manager-check: " + ('PASS', 'FAIL')[min(1, e.code)]) + raise + except BaseException: + logger.critical("exception", exc_info=True) + sys.exit(2) diff --git a/Tests/iaas/mandatory-services/README.md b/Tests/iaas/mandatory-services/README.md new file mode 100644 index 000000000..33a66d7f4 --- /dev/null +++ b/Tests/iaas/mandatory-services/README.md @@ -0,0 +1,66 @@ +# Mandatory IaaS Service APIs Test Suite + +## Test Environment Setup + +### Test Execution Environment + +> **NOTE:** The test execution procedure does not require cloud admin rights. + +To execute the test suite a valid cloud configuration for the OpenStack SDK in the shape of "`clouds.yaml`" is mandatory[^1]. +**The file is expected to be located in the current working directory where the test script is executed unless configured otherwise.** + +[^1]: [OpenStack Documentation: Configuring OpenStack SDK Applications](https://docs.openstack.org/openstacksdk/latest/user/config/configuration.html) + +The test execution environment can be located on any system outside of the cloud infrastructure that has OpenStack API access. +Make sure that the API access is configured properly in "`clouds.yaml`". + +It is recommended to use a Python virtual environment[^2]. +Next, install the OpenStack SDK and boto3 required by the test suite: + +```bash +pip3 install openstacksdk +pip3 install boto3 +``` + +Within this environment execute the test suite. + +[^2]: [Python 3 Documentation: Virtual Environments and Packages](https://docs.python.org/3/tutorial/venv.html) + +## Test Execution + +The test suite is executed as follows: + +```bash +python3 mandatory-iaas-services.py --os-cloud mycloud +``` + +As an alternative to "`--os-cloud`", the "`OS_CLOUD`" environment variable may be specified instead. +The parameter is used to look up the correct cloud configuration in "`clouds.yaml`". +For the example command above, this file should contain a `clouds.mycloud` section like this: + +```yaml +--- +clouds: + mycloud: + auth: + auth_url: ... + ... + ... +``` + +If the deployment uses s3 only and does not have the object store endpoint specified in the service catalog, the "`--s3-endpoint`" flag may be used to specify the s3 endpoint. +In that case the "`--s3-access`" and "`--s3-access-secret`" flags must also be set, to give all necessery credentials to the test suite: + +```bash +python3 mandatory-iaas-services3.py --os-cloud mycloud2 --s3-endpoint "http://s3-endpoint:9000" --s3-access test-user --s3-access-secret test-user-secret +``` + +For any further options consult the output of "`python3 volume-backup-tester.py --help`". + +### Script Behavior & Test Results + +If all tests pass, the script will return with an exit code of `0`. + +If any test fails, the script will halt, print the exact error to `stderr` and return with a non-zero exit code. + +There is no cleanup done by this test as it mainly only inspect the service catalog and only for the object store creates a bucket, which is then promptly deleted. diff --git a/Tests/iaas/mandatory-services/mandatory-iaas-services.py b/Tests/iaas/mandatory-services/mandatory-iaas-services.py new file mode 100644 index 000000000..ab5cc0a2f --- /dev/null +++ b/Tests/iaas/mandatory-services/mandatory-iaas-services.py @@ -0,0 +1,299 @@ +"""Mandatory APIs checker +This script retrieves the endpoint catalog from Keystone using the OpenStack +SDK and checks whether all mandatory APi endpoints, are present. +The script relies on an OpenStack SDK compatible clouds.yaml file for +authentication with Keystone. +As the s3 endpoint might differ, a missing one will only result in a warning. +""" + +import argparse +import boto3 +from collections import Counter +import logging +import os +import re +import sys +import uuid + +import openstack + + +TESTCONTNAME = "scs-test-container" + +logger = logging.getLogger(__name__) +mandatory_services = ["compute", "identity", "image", "network", + "load-balancer", "placement", "object-store"] +block_storage_service = ["volume", "volumev3", "block-storage"] + + +def connect(cloud_name: str) -> openstack.connection.Connection: + """Create a connection to an OpenStack cloud + :param string cloud_name: + The name of the configuration to load from clouds.yaml. + :returns: openstack.connnection.Connection + """ + return openstack.connect( + cloud=cloud_name, + ) + + +def check_presence_of_mandatory_services(cloud_name: str, s3_credentials=None): + try: + connection = connect(cloud_name) + services = connection.service_catalog + except Exception as e: + print(str(e)) + raise Exception( + f"Connection to cloud '{cloud_name}' was not successfully. " + f"The Catalog endpoint could not be accessed. " + f"Please check your cloud connection and authorization." + ) + + if s3_credentials: + mandatory_services.remove("object-store") + for svc in services: + svc_type = svc['type'] + if svc_type in mandatory_services: + mandatory_services.remove(svc_type) + continue + if svc_type in block_storage_service: + block_storage_service.remove(svc_type) + + bs_service_not_present = 0 + if len(block_storage_service) == 3: + # neither block-storage nor volume nor volumev3 is present + # we must assume, that there is no volume service + logger.error("FAIL: No block-storage (volume) endpoint found.") + mandatory_services.append(block_storage_service[0]) + bs_service_not_present = 1 + if not mandatory_services: + # every mandatory service API had an endpoint + return 0 + bs_service_not_present + else: + # there were multiple mandatory APIs not found + logger.error(f"FAIL: The following endpoints are missing: " + f"{mandatory_services}") + return len(mandatory_services) + bs_service_not_present + + +def list_containers(conn): + "Gets a list of buckets" + return [cont.name for cont in conn.object_store.containers()] + + +def create_container(conn, name): + "Creates a test container" + conn.object_store.create_container(name) + return list_containers(conn) + + +def del_container(conn, name): + "Deletes a test container" + conn.object_store.delete(name) + # return list_containers(conn) + + +def s3_conn(creds, conn=None): + "Return an s3 client conn" + vrfy = True + if conn: + cacert = conn.config.config.get("cacert") + # TODO: Handle self-signed certs (from ca_cert in openstack config) + if cacert: + print("WARNING: Trust all Certificates in S3, " + f"OpenStack uses {cacert}", file=sys.stderr) + vrfy = False + return boto3.resource('s3', aws_access_key_id=creds["AK"], + aws_secret_access_key=creds["SK"], + endpoint_url=creds["HOST"], + verify=vrfy) + + +def list_s3_buckets(s3): + "Get a list of s3 buckets" + return [buck.name for buck in s3.buckets.all()] + + +def create_bucket(s3, name): + "Create an s3 bucket" + # bucket = s3.Bucket(name) + # bucket.create() + s3.create_bucket(Bucket=name) + return list_s3_buckets(s3) + + +def del_bucket(s3, name): + "Delete an s3 bucket" + buck = s3.Bucket(name=name) + buck.delete() + # s3.delete_bucket(Bucket=name) + + +def s3_from_env(creds, fieldnm, env, prefix=""): + "Set creds[fieldnm] to os.environ[env] if set" + if env in os.environ: + creds[fieldnm] = prefix + os.environ[env] + if fieldnm not in creds: + print(f"WARNING: s3_creds[{fieldnm}] not set", file=sys.stderr) + + +def s3_from_ostack(creds, conn, endpoint): + "Set creds from openstack swift/keystone" + rgx = re.compile(r"^(https*://[^/]*)/") + match = rgx.match(endpoint) + if match: + creds["HOST"] = match.group(1) + # Use first ec2 cred if one exists + ec2_creds = [cred for cred in conn.identity.credentials() + if cred.type == "ec2"] + if len(ec2_creds): + # FIXME: Assume cloud is not evil + ec2_dict = eval(ec2_creds[0].blob, {"null": None}) + creds["AK"] = ec2_dict["access"] + creds["SK"] = ec2_dict["secret"] + return + # Generate keyid and secret + ak = uuid.uuid4().hex + sk = uuid.uuid4().hex + blob = f'{{"access": "{ak}", "secret": "{sk}"}}' + try: + conn.identity.create_credential(type="ec2", blob=blob, + user_id=conn.current_user_id, + project_id=conn.current_project_id) + creds["AK"] = ak + creds["SK"] = sk + except BaseException as exc: + print(f"WARNING: ec2 creds creation failed: {exc!s}", file=sys.stderr) + # pass + + +def check_for_s3_and_swift(cloud_name: str, s3_credentials=None): + # If we get credentials we assume, that there is no Swift and only test s3 + if s3_credentials: + try: + s3 = s3_conn(s3_credentials) + except Exception as e: + print(str(e)) + logger.error("FAIL: Connection to s3 failed.") + return 1 + s3_buckets = list_s3_buckets(s3) + if not s3_buckets: + s3_buckets = create_bucket(s3, TESTCONTNAME) + assert s3_buckets + if s3_buckets == [TESTCONTNAME]: + del_bucket(s3, TESTCONTNAME) + # everything worked, and we don't need to test for Swift: + print("SUCCESS: S3 exists") + return 0 + # there were no credentials given, so we assume s3 is accessable via + # the service catalog and Swift might exist too + try: + connection = connect(cloud_name) + connection.authorize() + except Exception as e: + print(str(e)) + raise Exception( + f"Connection to cloud '{cloud_name}' was not successfully. " + f"The Catalog endpoint could not be accessed. " + f"Please check your cloud connection and authorization." + ) + s3_creds = {} + try: + endpoint = connection.object_store.get_endpoint() + except Exception as e: + logger.error( + f"FAIL: No object store endpoint found in cloud " + f"'{cloud_name}'. No testing for the s3 service possible. " + f"Details: %s", e + ) + return 1 + # Get S3 endpoint (swift) and ec2 creds from OpenStack (keystone) + s3_from_ostack(s3_creds, connection, endpoint) + # Overrides (var names are from libs3, in case you wonder) + s3_from_env(s3_creds, "HOST", "S3_HOSTNAME", "https://") + s3_from_env(s3_creds, "AK", "S3_ACCESS_KEY_ID") + s3_from_env(s3_creds, "SK", "S3_SECRET_ACCESS_KEY") + + s3 = s3_conn(s3_creds, connection) + s3_buckets = list_s3_buckets(s3) + if not s3_buckets: + s3_buckets = create_bucket(s3, TESTCONTNAME) + assert s3_buckets + + # If we got till here, s3 is working, now swift + swift_containers = list_containers(connection) + # if not swift_containers: + # swift_containers = create_container(connection, TESTCONTNAME) + result = 0 + if Counter(s3_buckets) != Counter(swift_containers): + print("WARNING: S3 buckets and Swift Containers differ:\n" + f"S3: {sorted(s3_buckets)}\nSW: {sorted(swift_containers)}") + result = 1 + else: + print("SUCCESS: S3 and Swift exist and agree") + # Clean up + # FIXME: Cleanup created EC2 credential + # if swift_containers == [TESTCONTNAME]: + # del_container(connection, TESTCONTNAME) + # Cleanup created S3 bucket + if s3_buckets == [TESTCONTNAME]: + del_bucket(s3, TESTCONTNAME) + return result + + +def main(): + parser = argparse.ArgumentParser( + description="SCS Mandatory IaaS Service Checker") + parser.add_argument( + "--os-cloud", type=str, + help="Name of the cloud from clouds.yaml, alternative " + "to the OS_CLOUD environment variable" + ) + parser.add_argument( + "--s3-endpoint", type=str, + help="URL to the s3 service." + ) + parser.add_argument( + "--s3-access", type=str, + help="Access Key to connect to the s3 service." + ) + parser.add_argument( + "--s3-access-secret", type=str, + help="Access secret to connect to the s3 service." + ) + parser.add_argument( + "--debug", action="store_true", + help="Enable OpenStack SDK debug logging" + ) + args = parser.parse_args() + openstack.enable_logging(debug=args.debug) + + # parse cloud name for lookup in clouds.yaml + cloud = os.environ.get("OS_CLOUD", None) + if args.os_cloud: + cloud = args.os_cloud + assert cloud, ( + "You need to have the OS_CLOUD environment variable set to your cloud " + "name or pass it via --os-cloud" + ) + + s3_credentials = None + if args.s3_endpoint: + if (not args.s3_access) or (not args.s3_access_secret): + print("WARNING: test for external s3 needs access key and access secret.") + s3_credentials = { + "AK": args.s3_access, + "SK": args.s3_access_secret, + "HOST": args.s3_endpoint + } + elif args.s3_access or args.s3_access_secret: + print("WARNING: access to s3 was given, but no endpoint provided.") + + result = check_presence_of_mandatory_services(cloud, s3_credentials) + result = result + check_for_s3_and_swift(cloud, s3_credentials) + + return result + + +if __name__ == "__main__": + main() diff --git a/Tests/iaas/scs-0103-v1-flavors.yaml b/Tests/iaas/scs-0103-v1-flavors.yaml index 9d23b8527..66c02cdd6 100644 --- a/Tests/iaas/scs-0103-v1-flavors.yaml +++ b/Tests/iaas/scs-0103-v1-flavors.yaml @@ -1,194 +1,194 @@ meta: - name_key: name-v2 + name_key: "scs:name-v2" flavor_groups: - status: mandatory list: - name: SCS-1V-4 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 - name-v1: SCS-1V:4 - name-v2: SCS-1V-4 + "scs:name-v1": SCS-1V:4 + "scs:name-v2": SCS-1V-4 - name: SCS-2V-8 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-2V:8 - name-v2: SCS-2V-8 + "scs:name-v1": SCS-2V:8 + "scs:name-v2": SCS-2V-8 - name: SCS-4V-16 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 - name-v1: SCS-4V:16 - name-v2: SCS-4V-16 + "scs:name-v1": SCS-4V:16 + "scs:name-v2": SCS-4V-16 - name: SCS-8V-32 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 - name-v1: SCS-8V:32 - name-v2: SCS-8V-32 + "scs:name-v1": SCS-8V:32 + "scs:name-v2": SCS-8V-32 - name: SCS-1V-2 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 2 - name-v1: SCS-1V:2 - name-v2: SCS-1V-2 + "scs:name-v1": SCS-1V:2 + "scs:name-v2": SCS-1V-2 - name: SCS-2V-4 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 - name-v1: SCS-2V:4 - name-v2: SCS-2V-4 + "scs:name-v1": SCS-2V:4 + "scs:name-v2": SCS-2V-4 - name: SCS-4V-8 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-4V:8 - name-v2: SCS-4V-8 + "scs:name-v1": SCS-4V:8 + "scs:name-v2": SCS-4V-8 - name: SCS-8V-16 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 - name-v1: SCS-8V:16 - name-v2: SCS-8V-16 + "scs:name-v1": SCS-8V:16 + "scs:name-v2": SCS-8V-16 - name: SCS-16V-32 cpus: 16 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 - name-v1: SCS-16V:32 - name-v2: SCS-16V-32 + "scs:name-v1": SCS-16V:32 + "scs:name-v2": SCS-16V-32 - name: SCS-1V-8 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-1V:8 - name-v2: SCS-1V-8 + "scs:name-v1": SCS-1V:8 + "scs:name-v2": SCS-1V-8 - name: SCS-2V-16 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 - name-v1: SCS-2V:16 - name-v2: SCS-2V-16 + "scs:name-v1": SCS-2V:16 + "scs:name-v2": SCS-2V-16 - name: SCS-4V-32 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 - name-v1: SCS-4V:32 - name-v2: SCS-4V-32 + "scs:name-v1": SCS-4V:32 + "scs:name-v2": SCS-4V-32 - name: SCS-1L-1 cpus: 1 - cpu-type: crowded-core + "scs:cpu-type": crowded-core ram: 1 - name-v1: SCS-1L:1 - name-v2: SCS-1L-1 + "scs:name-v1": SCS-1L:1 + "scs:name-v2": SCS-1L-1 - status: mandatory list: - name: SCS-2V-4-20s cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 20 - disk0-type: ssd - name-v1: SCS-2V:4:20s - name-v2: SCS-2V-4-20s + "scs:disk0-type": ssd + "scs:name-v1": SCS-2V:4:20s + "scs:name-v2": SCS-2V-4-20s - name: SCS-4V-16-100s cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 100 - disk0-type: ssd - name-v1: SCS-4V:16:100s - name-v2: SCS-4V-16-100s + "scs:disk0-type": ssd + "scs:name-v1": SCS-4V:16:100s + "scs:name-v2": SCS-4V-16-100s - status: recommended list: - name: SCS-1V-4-10 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 10 - name-v1: SCS-1V:4:10 - name-v2: SCS-1V-4-10 + "scs:name-v1": SCS-1V:4:10 + "scs:name-v2": SCS-1V-4-10 - name: SCS-2V-8-20 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-2V:8:20 - name-v2: SCS-2V-8-20 + "scs:name-v1": SCS-2V:8:20 + "scs:name-v2": SCS-2V-8-20 - name: SCS-4V-16-50 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-4V:16:50 - name-v2: SCS-4V-16-50 + "scs:name-v1": SCS-4V:16:50 + "scs:name-v2": SCS-4V-16-50 - name: SCS-8V-32-100 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-8V:32:100 - name-v2: SCS-8V-32-100 + "scs:name-v1": SCS-8V:32:100 + "scs:name-v2": SCS-8V-32-100 - name: SCS-1V-2-5 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 2 disk: 5 - name-v1: SCS-1V:2:5 - name-v2: SCS-1V-2-5 + "scs:name-v1": SCS-1V:2:5 + "scs:name-v2": SCS-1V-2-5 - name: SCS-2V-4-10 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 10 - name-v1: SCS-2V:4:10 - name-v2: SCS-2V-4-10 + "scs:name-v1": SCS-2V:4:10 + "scs:name-v2": SCS-2V-4-10 - name: SCS-4V-8-20 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-4V:8:20 - name-v2: SCS-4V-8-20 + "scs:name-v1": SCS-4V:8:20 + "scs:name-v2": SCS-4V-8-20 - name: SCS-8V-16-50 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-8V:16:50 - name-v2: SCS-8V-16-50 + "scs:name-v1": SCS-8V:16:50 + "scs:name-v2": SCS-8V-16-50 - name: SCS-16V-32-100 cpus: 16 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-16V:32:100 - name-v2: SCS-16V-32-100 + "scs:name-v1": SCS-16V:32:100 + "scs:name-v2": SCS-16V-32-100 - name: SCS-1V-8-20 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-1V:8:20 - name-v2: SCS-1V-8-20 + "scs:name-v1": SCS-1V:8:20 + "scs:name-v2": SCS-1V-8-20 - name: SCS-2V-16-50 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-2V:16:50 - name-v2: SCS-2V-16-50 + "scs:name-v1": SCS-2V:16:50 + "scs:name-v2": SCS-2V-16-50 - name: SCS-4V-32-100 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-4V:32:100 - name-v2: SCS-4V-32-100 + "scs:name-v1": SCS-4V:32:100 + "scs:name-v2": SCS-4V-32-100 - name: SCS-1L-1-5 cpus: 1 - cpu-type: crowded-core + "scs:cpu-type": crowded-core ram: 1 disk: 5 - name-v1: SCS-1L:1:5 - name-v2: SCS-1L-1-5 + "scs:name-v1": SCS-1L:1:5 + "scs:name-v2": SCS-1L-1-5 diff --git a/Tests/iaas/scs-0104-v1-images-v5.yaml b/Tests/iaas/scs-0104-v1-images-v5.yaml new file mode 100644 index 000000000..180fbfa69 --- /dev/null +++ b/Tests/iaas/scs-0104-v1-images-v5.yaml @@ -0,0 +1,36 @@ +images: +# mandatory +- name: "Ubuntu 24.04" + source: + - https://cloud-images.ubuntu.com/releases/noble/ + - https://cloud-images.ubuntu.com/noble/ + status: mandatory +# recommended +- name: "Debian 12" # stable + source: + - https://cloud.debian.org/images/cloud/bookworm/ + - https://cdimage.debian.org/cdimage/cloud/bookworm/ + status: recommended +- name: "ubuntu-capi-image" + # this name_scheme uses `-` to separate base name "ubuntu-capi-image" from version + # latest openstack-image-manager can be told to use `-` by setting `separator: "-"` on the image + name_scheme: "ubuntu-capi-image-v[0-9]\\.[0-9]+(\\.[0-9]+)?" + source: https://swift.services.a.regiocloud.tech/swift/v1/AUTH_b182637428444b9aa302bb8d5a5a418c/openstack-k8s-capi-images/ubuntu-2204-kube + status: recommended +# optional +- name: "Ubuntu 22.04" + source: + - https://cloud-images.ubuntu.com/releases/jammy/ + - https://cloud-images.ubuntu.com/jammy/ +- name: "Ubuntu 20.04" + source: + - https://cloud-images.ubuntu.com/releases/focal/ + - https://cloud-images.ubuntu.com/focal/ +- name: "Debian 11" # oldstable + source: + - https://cloud.debian.org/images/cloud/bullseye/ + - https://cdimage.debian.org/cdimage/cloud/bullseye/ +- name: "Debian 10" # extended LTS since 2024-07-01, see https://wiki.debian.org/LTS/Extended + source: + - https://cloud.debian.org/images/cloud/buster/ + - https://cdimage.debian.org/cdimage/cloud/buster/ diff --git a/Tests/iaas/scs-0104-v1-images.yaml b/Tests/iaas/scs-0104-v1-images.yaml index 22681e93d..84c5fa333 100644 --- a/Tests/iaas/scs-0104-v1-images.yaml +++ b/Tests/iaas/scs-0104-v1-images.yaml @@ -5,7 +5,7 @@ images: - https://cloud-images.ubuntu.com/jammy/ status: mandatory - name: "ubuntu-capi-image" - name_scheme: "ubuntu-capi-image v[0-9].[0-9]+(.[0-9]+)?" + name_scheme: "ubuntu-capi-image v[0-9]\\.[0-9]+(\\.[0-9]+)?" source: https://swift.services.a.regiocloud.tech/swift/v1/AUTH_b182637428444b9aa302bb8d5a5a418c/openstack-k8s-capi-images/ubuntu-2204-kube status: recommended - name: "Ubuntu 20.04" diff --git a/Tests/iaas/security-groups/default-security-group-rules.py b/Tests/iaas/security-groups/default-security-group-rules.py new file mode 100755 index 000000000..def511956 --- /dev/null +++ b/Tests/iaas/security-groups/default-security-group-rules.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Default Security Group Rules Checker + +This script tests the absence of any ingress default security group rule +except for ingress rules from the same Security Group. Furthermore the +presence of default rules for egress traffic is checked. +""" +import argparse +from collections import Counter +import logging +import os +import sys + +import openstack +from openstack.exceptions import ResourceNotFound + +logger = logging.getLogger(__name__) + +SG_NAME = "scs-test-default-sg" +DESCRIPTION = "scs-test-default-sg" + + +def check_default_rules(rules, short=False): + """ + counts all verall ingress rules and egress rules, depending on the requested testing mode + + :param bool short + if short is True, the testing mode is set on short for older OpenStack versions + """ + ingress_rules = egress_rules = 0 + egress_vars = {'IPv4': {}, 'IPv6': {}} + for key, value in egress_vars.items(): + value['default'] = 0 + if not short: + value['custom'] = 0 + if not rules: + logger.info("No default security group rules defined.") + for rule in rules: + direction = rule["direction"] + ethertype = rule["ethertype"] + if direction == "ingress": + if not short: + # we allow ingress from the same security group + # but only for the default security group + if rule.remote_group_id == "PARENT" and not rule["used_in_non_default_sg"]: + continue + ingress_rules += 1 + elif direction == "egress" and ethertype in egress_vars: + egress_rules += 1 + if short: + egress_vars[ethertype]['default'] += 1 + continue + if rule.remote_ip_prefix: + # this rule does not allow traffic to all external ips + continue + # note: these two are not mutually exclusive + if rule["used_in_default_sg"]: + egress_vars[ethertype]['default'] += 1 + if rule["used_in_non_default_sg"]: + egress_vars[ethertype]['custom'] += 1 + # test whether there are no unallowed ingress rules + if ingress_rules: + logger.error(f"Expected no default ingress rules, found {ingress_rules}.") + # test whether all expected egress rules are present + missing = [(key, key2) for key, val in egress_vars.items() for key2, val2 in val.items() if not val2] + if missing: + logger.error( + "Expected rules for egress for IPv4 and IPv6 both for default and custom security groups. " + f"Missing rule types: {', '.join(str(x) for x in missing)}" + ) + logger.info(str({ + "Unallowed Ingress Rules": ingress_rules, + "Egress Rules": egress_rules, + })) + + +def create_security_group(conn, sg_name: str = SG_NAME, description: str = DESCRIPTION): + """Create security group in openstack + + :returns: + ~openstack.network.v2.security_group.SecurityGroup: The new security group or None + """ + sg = conn.network.create_security_group(name=sg_name, description=description) + return sg.id + + +def delete_security_group(conn, sg_id): + conn.network.delete_security_group(sg_id) + # in case of a successful delete finding the sg will throw an exception + try: + conn.network.find_security_group(name_or_id=sg_id) + except ResourceNotFound: + logger.debug(f"Security group {sg_id} was deleted successfully.") + except Exception: + logger.critical(f"Security group {sg_id} was not deleted successfully") + raise + + +def altern_test_rules(connection: openstack.connection.Connection): + sg_id = create_security_group(connection) + try: + sg = connection.network.find_security_group(name_or_id=sg_id) + check_default_rules(sg.security_group_rules, short=True) + finally: + delete_security_group(connection, sg_id) + + +def test_rules(connection: openstack.connection.Connection): + try: + rules = list(connection.network.default_security_group_rules()) + except ResourceNotFound: + logger.info( + "API call failed. OpenStack components might not be up to date. " + "Falling back to old-style test method. " + ) + logger.debug("traceback", exc_info=True) + altern_test_rules(connection) + else: + check_default_rules(rules) + + +class CountingHandler(logging.Handler): + def __init__(self, level=logging.NOTSET): + super().__init__(level=level) + self.bylevel = Counter() + + def handle(self, record): + self.bylevel[record.levelno] += 1 + + +def main(): + parser = argparse.ArgumentParser( + description="SCS Default Security Group Rules Checker", + ) + parser.add_argument( + "--os-cloud", + type=str, + help="Name of the cloud from clouds.yaml, alternative " + "to the OS_CLOUD environment variable", + ) + parser.add_argument( + "--debug", action="store_true", help="Enable debug logging", + ) + args = parser.parse_args() + openstack.enable_logging(debug=args.debug) + logging.basicConfig( + format="%(levelname)s: %(message)s", + level=logging.DEBUG if args.debug else logging.INFO, + ) + + # count the number of log records per level (used for summary and return code) + counting_handler = CountingHandler(level=logging.INFO) + logger.addHandler(counting_handler) + + # parse cloud name for lookup in clouds.yaml + cloud = args.os_cloud or os.environ.get("OS_CLOUD", None) + if not cloud: + raise ValueError( + "You need to have the OS_CLOUD environment variable set to your cloud " + "name or pass it via --os-cloud" + ) + + with openstack.connect(cloud) as conn: + test_rules(conn) + + c = counting_handler.bylevel + logger.debug(f"Total critical / error / warning: {c[logging.CRITICAL]} / {c[logging.ERROR]} / {c[logging.WARNING]}") + if not c[logging.CRITICAL]: + print("security-groups-default-rules-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) + return min(127, c[logging.CRITICAL] + c[logging.ERROR]) # cap at 127 due to OS restrictions + + +if __name__ == "__main__": + try: + sys.exit(main()) + except SystemExit: + raise + except BaseException as exc: + logging.debug("traceback", exc_info=True) + logging.critical(str(exc)) + sys.exit(1) diff --git a/Tests/iaas/standard-flavors/flavors-openstack.py b/Tests/iaas/standard-flavors/flavors-openstack.py index a90817d07..2680fa822 100755 --- a/Tests/iaas/standard-flavors/flavors-openstack.py +++ b/Tests/iaas/standard-flavors/flavors-openstack.py @@ -27,7 +27,9 @@ logger = logging.getLogger(__name__) -RESERVED_KEYS = ('scs:name-v1', 'scs:name-v2') +# do not enforce this part of the standard, because it doesn't work for the customers +# RESERVED_KEYS = ('scs:name-v1', 'scs:name-v2') +RESERVED_KEYS = () def print_usage(file=sys.stderr): @@ -103,7 +105,6 @@ def main(argv): logger.critical("Flavor definition missing 'flavor_groups' field") name_key = flavor_spec_data['meta']['name_key'] - es_name_key = f"scs:{name_key}" # compute union of all flavor groups, copying group info (mainly "status") to each flavor # check if the spec is complete while we are at it flavor_specs = [] @@ -126,9 +127,9 @@ def main(argv): with openstack.connect(cloud=cloud, timeout=32) as conn: present_flavors = conn.list_flavors(get_extra=True) by_name = { - flavor.extra_specs[es_name_key]: flavor + flavor.extra_specs[name_key]: flavor for flavor in present_flavors - if es_name_key in flavor.extra_specs + if name_key in flavor.extra_specs } by_legacy_name = {flavor.name: flavor for flavor in present_flavors} # for reserved keys, keep track of all flavors that don't have a matching spec @@ -143,10 +144,10 @@ def main(argv): if not flavor: flavor = by_legacy_name.get(flavor_spec[name_key]) if flavor: - logger.warning(f"Flavor '{flavor_spec['name']}' found via name only, missing property {es_name_key!r}") + logger.warning(f"Flavor '{flavor_spec['name']}' found via name only, missing property {name_key!r}") else: status = flavor_spec['_group']['status'] - level = {"mandatory": logging.ERROR}.get(status, logging.INFO) + level = {"mandatory": logging.ERROR}.get(status, logging.WARNING) logger.log(level, f"Missing {status} flavor '{flavor_spec['name']}'") continue # this flavor has a matching spec @@ -163,9 +164,9 @@ def main(argv): report = [ f"{key}: {es_value!r} should be {value!r}" for key, value, es_value in [ - (key, value, flavor.extra_specs.get(f"scs:{key}")) + (key, value, flavor.extra_specs.get(key)) for key, value in flavor_spec.items() - if key not in ('_group', 'name', 'cpus', 'ram', 'disk') + if key.startswith("scs:") ] if value != es_value ] @@ -183,6 +184,8 @@ def main(argv): c = counting_handler.bylevel logger.debug(f"Total critical / error / info: {c[logging.CRITICAL]} / {c[logging.ERROR]} / {c[logging.INFO]}") + if not c[logging.CRITICAL]: + print("standard-flavors-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) return min(127, c[logging.CRITICAL] + c[logging.ERROR]) # cap at 127 due to OS restrictions diff --git a/Tests/iaas/standard-images/images-openstack.py b/Tests/iaas/standard-images/images-openstack.py index 22182fe80..6f192e5d0 100755 --- a/Tests/iaas/standard-images/images-openstack.py +++ b/Tests/iaas/standard-images/images-openstack.py @@ -40,6 +40,8 @@ def print_usage(file=sys.stderr): Options: [-c/--os-cloud OS_CLOUD] sets cloud environment (default from OS_CLOUD env) [-d/--debug] enables DEBUG logging channel + [-V/--image-visibility VIS_LIST] filters images by visibility + (default: 'public,community'; use '*' to disable) """, end='', file=file) @@ -61,7 +63,7 @@ def main(argv): logger.addHandler(counting_handler) try: - opts, args = getopt.gnu_getopt(argv, "c:hd", ["os-cloud=", "help", "debug"]) + opts, args = getopt.gnu_getopt(argv, "c:hdV:", ["os-cloud=", "help", "debug", "image-visibility="]) except getopt.GetoptError as exc: logger.critical(f"{exc}") print_usage() @@ -74,6 +76,7 @@ def main(argv): yaml_path = args[0] cloud = os.environ.get("OS_CLOUD") + image_visibility = set() for opt in opts: if opt[0] == "-h" or opt[0] == "--help": print_usage() @@ -82,11 +85,25 @@ def main(argv): cloud = opt[1] if opt[0] == "-d" or opt[0] == "--debug": logging.getLogger().setLevel(logging.DEBUG) + if opt[0] == "-V" or opt[0] == "--image-visibility": + image_visibility.update([v.strip() for v in opt[1].split(',')]) if not cloud: logger.critical("You need to have OS_CLOUD set or pass --os-cloud=CLOUD.") return 1 + if not image_visibility: + image_visibility.update(("public", "community")) + + # we only support local files; but we allow specifying the following URLs for the sake of + # better documentation + prefix = next(p for p in ( + 'https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/', + 'https://github.com/SovereignCloudStack/standards/blob/main/Tests/', + '', # sentinel (do not remove!) + ) if yaml_path.startswith(p)) + if prefix: + yaml_path = yaml_path[len(prefix):] try: with open(yaml_path, "rb") as fileobj: image_data = yaml.safe_load(fileobj) @@ -104,11 +121,15 @@ def main(argv): logger.debug(f"Fetching image list from cloud '{cloud}'") with openstack.connect(cloud=cloud, timeout=32) as conn: present_images = conn.list_images(show_all=True) - by_name = { - image.name: image - for image in present_images - } - logger.debug(f"Images present: {', '.join(sorted(by_name))}") + if '*' not in image_visibility: + logger.debug(f"Images: filter for visibility {', '.join(sorted(image_visibility))}") + present_images = [img for img in present_images if img.visibility in image_visibility] + all_image_names = [f"{img.name} ({img.visibility})" for img in present_images] + logger.debug(f"Images: {', '.join(all_image_names) or '(NONE)'}") + by_name = { + image.name: image + for image in present_images + } logger.debug(f"Checking {len(image_specs)} image specs against {len(present_images)} images") for image_spec in image_specs: @@ -136,6 +157,8 @@ def main(argv): c = counting_handler.bylevel logger.debug(f"Total critical / error / warning: {c[logging.CRITICAL]} / {c[logging.ERROR]} / {c[logging.WARNING]}") + if not c[logging.CRITICAL]: + print("standard-images-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) return min(127, c[logging.CRITICAL] + c[logging.ERROR]) # cap at 127 due to OS restrictions diff --git a/Tests/iaas/volume-backup/README.md b/Tests/iaas/volume-backup/README.md new file mode 100644 index 000000000..2b6cd4716 --- /dev/null +++ b/Tests/iaas/volume-backup/README.md @@ -0,0 +1,70 @@ +# Volume Backup API Test Suite + +## Test Environment Setup + +### Test Execution Environment + +> **NOTE:** The test execution procedure does not require cloud admin rights. + +To execute the test suite a valid cloud configuration for the OpenStack SDK in the shape of "`clouds.yaml`" is mandatory[^1]. +**The file is expected to be located in the current working directory where the test script is executed unless configured otherwise.** + +[^1]: [OpenStack Documentation: Configuring OpenStack SDK Applications](https://docs.openstack.org/openstacksdk/latest/user/config/configuration.html) + +The test execution environment can be located on any system outside of the cloud infrastructure that has OpenStack API access. +Make sure that the API access is configured properly in "`clouds.yaml`". + +It is recommended to use a Python virtual environment[^2]. +Next, install the OpenStack SDK required by the test suite: + +```bash +pip3 install openstacksdk +``` + +Within this environment execute the test suite. + +[^2]: [Python 3 Documentation: Virtual Environments and Packages](https://docs.python.org/3/tutorial/venv.html) + +## Test Execution + +The test suite is executed as follows: + +```bash +python3 volume-backup-tester.py --os-cloud mycloud +``` + +As an alternative to "`--os-cloud`", the "`OS_CLOUD`" environment variable may be specified instead. +The parameter is used to look up the correct cloud configuration in "`clouds.yaml`". +For the example command above, this file should contain a `clouds.mycloud` section like this: + +```yaml +--- +clouds: + mycloud: + auth: + auth_url: ... + ... + ... +``` + +If the test suite fails and leaves test resources behind, the "`--cleanup-only`" flag may be used to delete those resources from the domains: + +```bash +python3 volume-backup-tester.py --os-cloud mycloud --cleanup-only +``` + +For any further options consult the output of "`python3 volume-backup-tester.py --help`". + +### Script Behavior & Test Results + +> **NOTE:** Before any execution of test batches, the script will automatically perform a cleanup of volumes and volume backups matching a special prefix (see the "`--prefix`" flag). +> This cleanup behavior is identical to "`--cleanup-only`". + +The script will print all cleanup actions and passed tests to `stdout`. + +If all tests pass, the script will return with an exit code of `0`. + +If any test fails, the script will halt, print the exact error to `stderr` and return with a non-zero exit code. + +In case of a failed test, cleanup is not performed automatically, allowing for manual inspection of the cloud state for debugging purposes. +Although unnecessary due to automatic cleanup upon next execution, you can manually trigger a cleanup using the "`--cleanup-only`" flag of this script. diff --git a/Tests/iaas/volume-backup/volume-backup-tester.py b/Tests/iaas/volume-backup/volume-backup-tester.py new file mode 100755 index 000000000..bcbb89664 --- /dev/null +++ b/Tests/iaas/volume-backup/volume-backup-tester.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +"""Volume Backup API tester for Block Storage API + +This test script executes basic operations on the Block Storage API centered +around volume backups. Its purpose is to verify that the Volume Backup API is +available and working as expected using simple operations such as creating and +restoring volume backups. + +It verifies that a properly configured backup driver is present to the extent +that aforementioned operations succeed on the API level. It does not by any +means verify that the backup and restore procedures actual handle the data +correctly (it only uses empty volumes and does not look at data for the sake +of simplicity). +""" + +import argparse +import getpass +import logging +import os +import sys +import time +import typing + +import openstack + +# prefix to be included in the names of any Keystone resources created +# used by the cleanup routine to identify resources that can be safely deleted +DEFAULT_PREFIX = "scs-test-" + +# timeout in seconds for resource availability checks +# (e.g. a volume becoming available) +WAIT_TIMEOUT = 60 + + +def wait_for_resource( + get_func: typing.Callable[[str], openstack.resource.Resource], + resource_id: str, + expected_status=("available", ), + timeout=WAIT_TIMEOUT, +) -> None: + seconds_waited = 0 + resource = get_func(resource_id) + while resource is None or resource.status not in expected_status: + time.sleep(1.0) + seconds_waited += 1 + if seconds_waited >= timeout: + raise RuntimeError( + f"Timed out after {seconds_waited} s: waiting for resource {resource_id} " + f"to be in status {expected_status} (current: {resource and resource.status})" + ) + resource = get_func(resource_id) + + +def test_backup(conn: openstack.connection.Connection, + prefix=DEFAULT_PREFIX, timeout=WAIT_TIMEOUT) -> None: + """Execute volume backup tests on the connection + + This will create an empty volume, a backup of that empty volume and then + attempt to restore the backup onto a new volume. + Purpose of these tests is to verify that the volume backup API is working + correctly. + """ + + # CREATE VOLUME + volume_name = f"{prefix}volume" + logging.info(f"Creating volume '{volume_name}' ...") + volume = conn.block_storage.create_volume(name=volume_name, size=1) + if volume is None: + raise RuntimeError(f"Creation of initial volume '{volume_name}' failed") + volume_id = volume.id + if conn.block_storage.get_volume(volume_id) is None: + raise RuntimeError(f"Retrieving initial volume by ID '{volume_id}' failed") + + logging.info( + f"↳ waiting for volume with ID '{volume_id}' to reach status " + f"'available' ..." + ) + wait_for_resource(conn.block_storage.get_volume, volume_id, timeout=timeout) + logging.info("Create empty volume: PASS") + + # CREATE BACKUP + logging.info("Creating backup from volume ...") + backup = conn.block_storage.create_backup(name=f"{prefix}volume-backup", volume_id=volume_id) + if backup is None: + raise RuntimeError("Backup creation failed") + backup_id = backup.id + if conn.block_storage.get_backup(backup_id) is None: + raise RuntimeError("Retrieving backup by ID failed") + + logging.info(f"↳ waiting for backup '{backup_id}' to become available ...") + wait_for_resource(conn.block_storage.get_backup, backup_id, timeout=timeout) + logging.info("Create backup from volume: PASS") + + # RESTORE BACKUP + restored_volume_name = f"{prefix}restored-backup" + logging.info(f"Restoring backup to volume '{restored_volume_name}' ...") + conn.block_storage.restore_backup(backup_id, name=restored_volume_name) + + logging.info( + f"↳ waiting for restoration target volume '{restored_volume_name}' " + f"to be created ..." + ) + wait_for_resource(conn.block_storage.find_volume, restored_volume_name, timeout=timeout) + # wait for the volume restoration to finish + logging.info( + f"↳ waiting for restoration target volume '{restored_volume_name}' " + f"to reach 'available' status ..." + ) + volume_id = conn.block_storage.find_volume(restored_volume_name).id + wait_for_resource(conn.block_storage.get_volume, volume_id, timeout=timeout) + logging.info("Restore volume from backup: PASS") + + +def cleanup(conn: openstack.connection.Connection, prefix=DEFAULT_PREFIX, + timeout=WAIT_TIMEOUT) -> bool: + """ + Looks up volume and volume backup resources matching the given prefix and + deletes them. + Returns False if there were any errors during cleanup which might leave + resources behind. Otherwise returns True to indicate cleanup success. + """ + + logging.info(f"Performing cleanup for resources with the '{prefix}' prefix ...") + + cleanup_issues = 0 # count failed cleanup operations + backups = conn.block_storage.backups() + for backup in backups: + if not backup.name.startswith(prefix): + continue + try: + # we can only delete if status is available or error, so try and wait + wait_for_resource( + conn.block_storage.get_backup, + backup.id, + expected_status=("available", "error"), + timeout=timeout, + ) + logging.info(f"↳ deleting volume backup '{backup.id}' ...") + conn.block_storage.delete_backup(backup.id) + except openstack.exceptions.ResourceNotFound: + # if the resource has vanished on its own in the meantime ignore it + continue + except Exception as e: + # Most common exception would be a timeout in wait_for_resource. + # We do not need to increment cleanup_issues here since + # any remaining ones will be caught in the next loop down below anyway. + logging.debug("traceback", exc_info=True) + logging.warning(str(e)) + + # wait for all backups to be cleaned up before attempting to remove volumes + seconds_waited = 0 + while len( + # list of all backups whose name starts with the prefix + [b for b in conn.block_storage.backups() if b.name.startswith(prefix)] + ) > 0: + time.sleep(1.0) + seconds_waited += 1 + if seconds_waited >= timeout: + cleanup_issues += 1 + logging.warning( + f"Timeout reached while waiting for all backups with prefix " + f"'{prefix}' to finish deletion during cleanup after " + f"{seconds_waited} seconds" + ) + break + + volumes = conn.block_storage.volumes() + for volume in volumes: + if not volume.name.startswith(prefix): + continue + try: + wait_for_resource( + conn.block_storage.get_volume, + volume.id, + expected_status=("available", "error"), + timeout=timeout, + ) + logging.info(f"↳ deleting volume '{volume.id}' ...") + conn.block_storage.delete_volume(volume.id) + except openstack.exceptions.ResourceNotFound: + # if the resource has vanished on its own in the meantime ignore it + continue + except Exception as e: + logging.debug("traceback", exc_info=True) + logging.warning(str(e)) + cleanup_issues += 1 + + if cleanup_issues: + logging.info( + f"Some resources with the '{prefix}' prefix were not cleaned up!" + ) + + return not cleanup_issues + + +def main(): + parser = argparse.ArgumentParser( + description="SCS Volume Backup API Conformance Checker") + parser.add_argument( + "--os-cloud", type=str, + help="Name of the cloud from clouds.yaml, alternative " + "to the OS_CLOUD environment variable" + ) + parser.add_argument( + "--ask", + help="Ask for password interactively instead of reading it from the " + "clouds.yaml", + action="store_true" + ) + parser.add_argument( + "--debug", action="store_true", + help="Enable OpenStack SDK debug logging" + ) + parser.add_argument( + "--prefix", type=str, + default=DEFAULT_PREFIX, + help=f"OpenStack resource name prefix for all resources to be created " + f"and/or cleaned up by this script within the configured domains " + f"(default: '{DEFAULT_PREFIX}')" + ) + parser.add_argument( + "--timeout", type=int, + default=WAIT_TIMEOUT, + help=f"Timeout in seconds for operations waiting for resources to " + f"become available such as creating volumes and volume backups " + f"(default: '{WAIT_TIMEOUT}')" + ) + parser.add_argument( + "--cleanup-only", action="store_true", + help="Instead of executing tests, cleanup all resources " + "with the prefix specified via '--prefix' (or its default)" + ) + args = parser.parse_args() + openstack.enable_logging(debug=args.debug) + logging.basicConfig( + format="%(levelname)s: %(message)s", + level=logging.DEBUG if args.debug else logging.INFO, + ) + + # parse cloud name for lookup in clouds.yaml + cloud = args.os_cloud or os.environ.get("OS_CLOUD", None) + if not cloud: + raise Exception( + "You need to have the OS_CLOUD environment variable set to your " + "cloud name or pass it via --os-cloud" + ) + password = getpass.getpass("Enter password: ") if args.ask else None + + with openstack.connect(cloud, password=password) as conn: + if not cleanup(conn, prefix=args.prefix, timeout=args.timeout): + raise RuntimeError("Initial cleanup failed") + if args.cleanup_only: + logging.info("Cleanup-only run finished.") + return + try: + test_backup(conn, prefix=args.prefix, timeout=args.timeout) + except BaseException: + print('volume-backup-check: FAIL') + raise + else: + print('volume-backup-check: PASS') + finally: + cleanup(conn, prefix=args.prefix, timeout=args.timeout) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except SystemExit: + raise + except BaseException as exc: + logging.debug("traceback", exc_info=True) + logging.critical(str(exc)) + sys.exit(1) diff --git a/Tests/iaas/volume-types/volume-types-check.py b/Tests/iaas/volume-types/volume-types-check.py old mode 100644 new mode 100755 index 444755816..4b1945fb8 --- a/Tests/iaas/volume-types/volume-types-check.py +++ b/Tests/iaas/volume-types/volume-types-check.py @@ -141,6 +141,8 @@ def main(argv): "Total critical / error / warning: " f"{c[logging.CRITICAL]} / {c[logging.ERROR]} / {c[logging.WARNING]}" ) + if not c[logging.CRITICAL]: + print("volume-types-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) return min(127, c[logging.CRITICAL] + c[logging.ERROR]) # cap at 127 due to OS restrictions diff --git a/Tests/iam/domain-manager/domain-manager-check.py b/Tests/iam/domain-manager/domain-manager-check.py old mode 100644 new mode 100755 index e56aad884..41040122b --- a/Tests/iam/domain-manager/domain-manager-check.py +++ b/Tests/iam/domain-manager/domain-manager-check.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """Domain Manager policy configuration checker This script uses the OpenStack SDK to validate the proper implementation diff --git a/Tests/kaas/clusterspec.yaml b/Tests/kaas/clusterspec.yaml new file mode 100644 index 000000000..c8439a89f --- /dev/null +++ b/Tests/kaas/clusterspec.yaml @@ -0,0 +1,11 @@ +# this file specifies all clusters that have to be provisioned for the tests to run +clusters: + current-k8s-release: + branch: "1.31" + kubeconfig: kubeconfig.yaml + current-k8s-release-1: + branch: "1.30" + kubeconfig: kubeconfig.yaml + current-k8s-release-2: + branch: "1.29" + kubeconfig: kubeconfig.yaml diff --git a/Tests/kaas/k8s-node-distribution/check_nodes_test.py b/Tests/kaas/k8s-node-distribution/check_nodes_test.py new file mode 100644 index 000000000..d32edccfb --- /dev/null +++ b/Tests/kaas/k8s-node-distribution/check_nodes_test.py @@ -0,0 +1,63 @@ +""" +Unit tests for node distribution check functions. + +(c) Martin Morgenstern , 4/2024 +(c) Hannes Baum , 5/2024 +SPDX-License-Identifier: CC-BY-SA-4.0 +""" + +from pathlib import Path +import yaml + +import pytest + +from k8s_node_distribution_check import check_nodes + + +HERE = Path(__file__).parent + + +@pytest.fixture +def load_testdata(): + with open(Path(HERE, "testdata", "scenarios.yaml")) as stream: + return yaml.safe_load(stream) + + +@pytest.mark.parametrize("yaml_key", ["success-1", "success-2"]) +def test_success_single_region_warning(yaml_key, caplog, load_testdata): + data = load_testdata[yaml_key] + assert check_nodes(data.values()) == 0 + assert len(caplog.records) == 2 + for record in caplog.records: + assert "no distribution across multiple regions" in record.message + assert record.levelname == "WARNING" + + +def test_not_enough_nodes(caplog, load_testdata): + data = load_testdata["not-enough-nodes"] + assert check_nodes(data.values()) == 2 + assert len(caplog.records) == 1 + assert "cluster only contains a single node" in caplog.records[0].message + assert caplog.records[0].levelname == "ERROR" + + +@pytest.mark.parametrize("yaml_key", ["no-distribution-1", "no-distribution-2"]) +def test_no_distribution(yaml_key, caplog, load_testdata): + data = load_testdata[yaml_key] + with caplog.at_level("ERROR"): + assert check_nodes(data.values()) == 2 + assert len(caplog.records) == 1 + record = caplog.records[0] + assert "distribution of nodes described in the standard couldn't be detected" in record.message + assert record.levelname == "ERROR" + + +def test_missing_label(caplog, load_testdata): + data = load_testdata["missing-labels"] + assert check_nodes(data.values()) == 2 + hostid_missing_records = [ + record for record in caplog.records + if "label for host-ids" in record.message + ] + assert len(hostid_missing_records) == 1 + assert hostid_missing_records[0].levelname == "ERROR" diff --git a/Tests/kaas/k8s-node-distribution/config.yaml.template b/Tests/kaas/k8s-node-distribution/config.yaml.template deleted file mode 100644 index 0f96da24d..000000000 --- a/Tests/kaas/k8s-node-distribution/config.yaml.template +++ /dev/null @@ -1,24 +0,0 @@ -## Configuration file for the K8s Version Recency Test - -logging: - level: INFO - version: 1 - disable_existing_loggers: False - formatters: - k8s-node-distribution-check: - format: "%(levelname)s: %(message)s" - handlers: - console: - class: logging.StreamHandler - formatter: k8s-node-distribution-check - stream: ext://sys.stdout - file: - class: logging.handlers.WatchedFileHandler - formatter: k8s-node-distribution-check - filename: MY-LOG-FILE-NAME.log - root: # Configuring the default (root) logger is highly recommended - handlers: [console] - loggers: - k8s-node-distribution-check: - handlers: [console, file] - propagate: no \ No newline at end of file diff --git a/Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py similarity index 61% rename from Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py rename to Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py index a0c167ff9..efac000d4 100755 --- a/Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py +++ b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py @@ -28,6 +28,7 @@ node-role.kubernetes.io/control-plane (c) Hannes Baum , 6/2023 +(c) Martin Morgenstern , 4/2024 License: CC-BY-SA 4.0 """ @@ -37,29 +38,17 @@ import logging import logging.config import sys -import yaml - - -logging_config = { - "level": "INFO", - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "k8s-node-distribution-check": { - "format": "%(levelname)s: %(message)s" - } - }, - "handlers": { - "console": { - "class": "logging.StreamHandler", - "formatter": "k8s-node-distribution-check", - "stream": "ext://sys.stdout" - } - }, - "root": { - "handlers": ["console"] - } -} + +# It is important to note, that the order of these labels matters for this test. +# Since we want to check if nodes are distributed, we want to do this from bigger +# infrastructure parts to smaller ones. So we first look if nodes are distributed +# across regions, then zones and then hosts. If one of these requirements is fulfilled, +# we don't need to check anymore, since a distribution was already detected. +LABELS = ( + "topology.kubernetes.io/region", + "topology.kubernetes.io/zone", + "topology.scs.community/host-id", +) logger = logging.getLogger(__name__) @@ -76,10 +65,12 @@ class DistributionException(BaseException): """Exception raised if the distribution seems to be not enough""" +class LabelException(BaseException): + """Exception raised if a label isn't set""" + + class Config: - config_path = "./config.yaml" kubeconfig = None - logging = None def print_usage(): @@ -97,7 +88,6 @@ def print_usage(): 2 - No distribution according to the standard could be detected for the nodes available. The following arguments can be set: - -c/--config PATH/TO/CONFIG - Path to the config file of the test script -k/--kubeconfig PATH/TO/KUBECONFIG - Path to the kubeconfig of the server we want to check -h - Output help """) @@ -108,51 +98,27 @@ def parse_arguments(argv): config = Config() try: - opts, args = getopt.gnu_getopt(argv, "c:k:h", ["config", "kubeconfig", "help"]) + opts, args = getopt.gnu_getopt(argv, "k:t:h", ["kubeconfig=", "test=", "help"]) except getopt.GetoptError: raise ConfigException for opt in opts: if opt[0] == "-h" or opt[0] == "--help": raise HelpException - if opt[0] == "-c" or opt[0] == "--config": - config.config_path = opt[1] if opt[0] == "-k" or opt[0] == "--kubeconfig": config.kubeconfig = opt[1] return config -def setup_logging(config_log): - - logging.config.dictConfig(config_log) - loggers = [ - logging.getLogger(name) - for name in logging.root.manager.loggerDict - if not logging.getLogger(name).level - ] - - for log in loggers: - log.setLevel(config_log['level']) - - def initialize_config(config): """Initialize the configuration for the test script""" - try: - with open(config.config_path, "r") as f: - config.logging = yaml.safe_load(f)['logging'] - except OSError: - logger.warning(f"The config file under {config.config_path} couldn't be found, " - f"falling back to the default config.") - finally: - # Setup logging if the config file with the relevant information could be loaded before - # Otherwise, we initialize logging with the included literal - setup_logging(config.logging or logging_config) - if config.kubeconfig is None: raise ConfigException("A kubeconfig needs to be set in order to test a k8s cluster version.") + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + return config @@ -176,28 +142,29 @@ async def get_k8s_cluster_labelled_nodes(kubeconfig, interesting_labels): return nodes -def compare_labels(node_list, labels, node_type="master"): +def compare_labels(node_list, node_type="control"): - label_data = {key: list() for key in labels} + label_data = {key: list() for key in LABELS} for node in node_list: - for key in labels: + for key in LABELS: try: label_data[key].append(node[key]) except KeyError: - logger.warning(f"The label for {key.split('/')[1]}s don't seem to be set for all nodes.") + raise LabelException(f"The label for {key.split('/')[1]}s doesn't seem to be set for all nodes.") - for label in labels: - if len(label_data[label]) < len(node_list): - logger.warning(f"The label for {label.split('/')[1]}s doesn't seem to be set for all nodes.") + for label in LABELS: if len(set(label_data[label])) <= 1: logger.warning(f"There seems to be no distribution across multiple {label.split('/')[1]}s " "or labels aren't set correctly across nodes.") else: - logger.info(f"The nodes are distributed across {str(len(set(label_data[label])))} {label.split('/')[1]}s.") + logger.info( + f"The {node_type} nodes are distributed across " + f"{str(len(set(label_data[label])))} {label.split('/')[1]}s." + ) return - if node_type == "master": + if node_type == "control": raise DistributionException("The distribution of nodes described in the standard couldn't be detected.") elif node_type == "worker": logger.warning("No node distribution could be detected for the worker nodes. " @@ -205,48 +172,49 @@ def compare_labels(node_list, labels, node_type="master"): return -async def main(argv): - try: - config = initialize_config(parse_arguments(argv)) - except (OSError, ConfigException, HelpException) as e: - if hasattr(e, 'message'): - logger.error(e.message) - print_usage() - return 1 - - # It is important to note, that the order of these labels matters for this test. - # Since we want to check if nodes are distributed, we want to do this from bigger - # infrastructure parts to smaller ones. So we first look if nodes are distributed - # across regions, then zones and then hosts. If one of these requirements is fulfilled, - # we don't need to check anymore, since a distribution was already detected. - labels = ( - "topology.kubernetes.io/region", - "topology.kubernetes.io/zone", - "topology.scs.community/host-id", - ) - - nodes = await get_k8s_cluster_labelled_nodes(config.kubeconfig, labels + ("node-role.kubernetes.io/control-plane", )) - +def check_nodes(nodes): if len(nodes) < 2: logger.error("The tested cluster only contains a single node, which can't comply with the standard.") return 2 - labelled_master_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" in node] + labelled_control_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" in node] try: - if len(labelled_master_nodes) >= 1: + if len(labelled_control_nodes) >= 1: worker_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" not in node] # Compare the labels of both types, since we have enough of them with labels - compare_labels(labelled_master_nodes, labels, "master") - compare_labels(worker_nodes, labels, "worker") + compare_labels(labelled_control_nodes, "control") + compare_labels(worker_nodes, "worker") else: - compare_labels(nodes, labels) - except DistributionException as e: + compare_labels(nodes) + except (DistributionException, LabelException) as e: logger.error(str(e)) return 2 return 0 +async def main(argv): + try: + config = initialize_config(parse_arguments(argv)) + except (OSError, ConfigException, HelpException) as e: + logger.critical("%s", e) + print_usage() + return 1 + + try: + nodes = await get_k8s_cluster_labelled_nodes( + config.kubeconfig, + LABELS + ("node-role.kubernetes.io/control-plane", ) + ) + except BaseException as e: + logger.critical("%s", e) + return 1 + + return_code = check_nodes(nodes) + print("node-distribution-check: " + ('PASS', 'FAIL')[min(1, return_code)]) + return return_code + + if __name__ == "__main__": return_code = asyncio.run(main(sys.argv[1:])) sys.exit(return_code) diff --git a/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml b/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml new file mode 100644 index 000000000..5cec0118d --- /dev/null +++ b/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml @@ -0,0 +1,128 @@ +# Success Scenario 1: +# All nodes have distinct host-ids and zones, but share the region. +success-1: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm1" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm2" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm3" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm4" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm5" + +# Success Scenario 2: +# Nodes share the host-id and region, but are in different zones. +success-2: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm0" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm1" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm1" + +# Failure Scenario: +# No distribution detectable because of too few nodes +no-distribution-1: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + +# Failure Scenario: +# No distribution detectable because all nodes are in the same zone +no-distribution-2: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + +# Failure Scenario: +# A host-id label is missing on a control node +missing-labels: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + # host-id missing + +# Failure Scenario: +# Not enough nodes available, so no distribution is detectable +not-enough-nodes: + node-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" diff --git a/Tests/kaas/k8s-version-policy/k8s-eol-data.yml b/Tests/kaas/k8s-version-policy/k8s-eol-data.yml index 4efe0634b..3a3d3b2eb 100644 --- a/Tests/kaas/k8s-version-policy/k8s-eol-data.yml +++ b/Tests/kaas/k8s-version-policy/k8s-eol-data.yml @@ -1,5 +1,9 @@ # https://kubernetes.io/releases/patch-releases/#detailed-release-history-for-active-branches +- branch: '1.31' + end-of-life: '2025-10-28' +- branch: '1.30' + end-of-life: '2025-06-28' - branch: '1.29' end-of-life: '2025-02-28' - branch: '1.28' diff --git a/Tests/kaas/k8s-version-policy/k8s_version_policy.py b/Tests/kaas/k8s-version-policy/k8s_version_policy.py index 3219f555f..cef272acd 100755 --- a/Tests/kaas/k8s-version-policy/k8s_version_policy.py +++ b/Tests/kaas/k8s-version-policy/k8s_version_policy.py @@ -45,7 +45,7 @@ MINOR_VERSION_CADENCE = timedelta(days=120) -PATCH_VERSION_CADENCE = timedelta(weeks=1) +PATCH_VERSION_CADENCE = timedelta(weeks=2) CVE_VERSION_CADENCE = timedelta(days=2) CVE_SEVERITY = 8 # CRITICAL @@ -517,6 +517,8 @@ async def main(argv): "Total error / warning: " f"{c[logging.ERROR]} / {c[logging.WARNING]}" ) + if not c[logging.CRITICAL]: + print("version-policy-check: " + ('PASS', 'FAIL')[min(1, c[logging.ERROR])]) return min(127, c[logging.ERROR]) # cap at 127 due to OS restrictions diff --git a/Tests/kaas/k8s-version-policy/k8s_version_policy_test.py b/Tests/kaas/k8s-version-policy/k8s_version_policy_test.py index dd65ceb50..dcef89a1b 100644 --- a/Tests/kaas/k8s-version-policy/k8s_version_policy_test.py +++ b/Tests/kaas/k8s-version-policy/k8s_version_policy_test.py @@ -44,8 +44,8 @@ def release_data(): K8S_VERSION = K8sVersion(1, 28, 5) EXPECTED_RECENCIES = { datetime(2024, 1, 17): True, - datetime(2024, 1, 24): True, - datetime(2024, 1, 25): False, + datetime(2024, 1, 31): True, + datetime(2024, 2, 1): False, } diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Dockerfile b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Dockerfile deleted file mode 100644 index 2519a36b7..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM golang:1.17-buster as build - -# Install kubectl -# Note: Latest version may be found on: -# https://aur.archlinux.org/packages/kubectl-bin/ -RUN wget https://storage.googleapis.com/kubernetes-release/release/v1.21.3/bin/linux/amd64/kubectl -O /usr/bin/kubectl && \ - chmod +x /usr/bin/kubectl && \ - apt-get update && \ - apt-get install -y jq - -COPY ./scs_k8s_tests /src/scs_k8s_tests -WORKDIR /src -COPY go.* /src/ -ENV CGO_ENABLED=0 -RUN go mod download - -#see: https://docs.docker.com/build/guide/mounts/ -RUN --mount=type=cache,target=/root/.cache/go-build \ - go test -c -o custom.test ./... - -CMD ["bash", "-c", "go tool test2json ./custom.test -test.v"] - - diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Makefile b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Makefile deleted file mode 100644 index 2202e9c2f..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/Makefile +++ /dev/null @@ -1,114 +0,0 @@ -# Makefile -# This makefile is for development purpose - -SHELL = /bin/bash -#SED ?= sed - -DOCKERFILE="Dockerfile" -IMAGE_REGISTRY="ghcr.io/sovereigncloudstack/standards" -IMAGE_NAME="scsconformance" -#IMAGE_VERSION_TAG ="v0.1.2" - -KIND_CLUSTER ="testcluster" - -#PLUGIN_NAME="k8s-default-storage-class-plugin-go" -PLUGIN_NAME="plugin" -PLUGIN_FILE="${PLUGIN_NAME}.yaml" - -#~ SONO_WAIT = 1 -#~ SONO_TIMEOUT = 60 - -KUBERNETES_SERVICE_HOST=127.0.0.1 -KUBERNETES_SERVICE_PORT=34743 - - -############################################################################### -## Helpers: ## -############################################################################### - -ifeq ($(IMAGE_VERSION_TAG),) - export TAG=dev -else - export TAG=${IMAGE_VERSION_TAG} -endif - -SONOBUOY_IMAGE = "${IMAGE_REGISTRY}/${IMAGE_NAME}:${TAG}" - -container-init: - @echo "" - @echo "[ContainerImageName] ${SONOBUOY_IMAGE}" - @echo "[SonobuoyPluginFile] ${PLUGIN_FILE}" - @echo "" - - -kind-init: - @echo "" - @echo "[KindCluster] ${KIND_CLUSTER}" - @echo "" - - -############################################################################### -## For develpoment usage: ## -############################################################################### - -dev-prerequests: - @echo "[check-test-setup]" - @kind version - @docker version - @sonobuoy version --short - @go version - - -dev-setup: kind-init - kind create cluster --name ${KIND_CLUSTER} - - -dev-build: container-init - @echo "[build]" - DOCKER_BUILDKIT=1 docker build . -f ${DOCKERFILE} -t ${SONOBUOY_IMAGE} - kind load docker-image --name ${KIND_CLUSTER} ${SONOBUOY_IMAGE} - - -dev-go: - @echo "[go]" - @echo "[KubernetesService] ${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}" - @rm -rf ./build || true - @mkdir ./build - go test -c -o ./build ./... -# go test -c -o ./build ./... --args --skip-labels="type=pod-list" -# go tool test2json ./build -test.v - - -dev-run: - @echo "[run-test]" - @echo "sonobuoy run --plugin ${PLUGIN_FILE} --wait=${SONO_WAIT} --timeout=${SONO_TIMEOUT}" -#~ @sonobuoy run --plugin ${PLUGIN_FILE} --wait=${SONO_WAIT} --timeout=${SONO_TIMEOUT} - @sonobuoy run --plugin ${PLUGIN_FILE} - @sonobuoy status - - -dev-result: - @echo "[result]" - #outfile=$(sonobuoy retrieve) && mkdir results && tar -xf ${outfile} -C results - sonobuoy retrieve - sonobuoy results *.tar.gz - mkdir results - tar -xf *.tar.gz -C results - - -dev-clean: - @echo "[clean]" - @sonobuoy delete --all --wait || true - @sonobuoy status || true - @rm -rf *.tar.gz || true - @rm -rf results || true - - - -dev-purge: kind-init dev-clean - @echo "[purge]" - kind delete cluster --name ${KIND_CLUSTER} || true - docker rmi ${SONOBUOY_IMAGE} || true - - -PHONY: dev-prerequests dev-build dev-run dev-result dev-clean dev-clean dev-purge diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go deleted file mode 100644 index 95b2e0482..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package scs_k8s_tests - -import ( - "context" - "fmt" - "os" - "testing" - - plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" - v1 "k8s.io/api/core/v1" - "sigs.k8s.io/e2e-framework/pkg/env" - "sigs.k8s.io/e2e-framework/pkg/envconf" -) - - -const ( - ProgressReporterCtxKey = "SONOBUOY_PROGRESS_REPORTER" - NamespacePrefixKey = "NS_PREFIX" -) - -var testenv env.Environment - -func TestMain(m *testing.M) { - // Assume we are running in the cluster as a Sonobuoy plugin. - testenv = env.NewInClusterConfig() - - // Specifying a run ID so that multiple runs wouldn't collide. Allow a prefix to be set via env var - // so that a plugin configuration (yaml file) can easily set that without code changes. - nsPrefix := os.Getenv(NamespacePrefixKey) - runID := envconf.RandomName(nsPrefix, 4) - - // Create updateReporter; will also place into context during Setup for use in features. - updateReporter := plugin_helper.NewProgressReporter(0) - - testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) { - // Try and create the client; doing it before all the tests allows the tests to assume - // it can be created without error and they can just use config.Client(). - _,err:=config.NewClient() - return context.WithValue(ctx,ProgressReporterCtxKey,updateReporter) ,err - }) - - testenv.BeforeEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { - fmt.Println("BeforeEachTest") - updateReporter.StartTest(t.Name()) - return createNSForTest(ctx, cfg, t, runID) - }) - - testenv.AfterEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { - fmt.Println("AfterEachTest") - updateReporter.StopTest(t.Name(),t.Failed(),t.Skipped(),nil) - return deleteNSForTest(ctx, cfg, t, runID) - }) - - /* - testenv.BeforeEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { - // Note that you can also add logic here for before a feature is tested. There may be - // more than one feature in a test. - fmt.Println("BeforeEachFeature") - return ctx, nil - }) - - testenv.AfterEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { - // Note that you can also add logic here for after a feature is tested. There may be - // more than one feature in a test. - fmt.Println("AfterEachFeature") - return ctx, nil - }) - */ - - testenv.Finish( - // Teardown func: delete kind cluster - func(ctx context.Context, cfg *envconf.Config) (context.Context, error) { - fmt.Println("Finished go test suite") - //~ if err := ???; err != nil{ - //~ return ctx, err - //~ } - return ctx, nil - }, - ) - - os.Exit(testenv.Run(m)) -} - -// CreateNSForTest creates a random namespace with the runID as a prefix. It is stored in the context -// so that the deleteNSForTest routine can look it up and delete it. -func createNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { - ns := envconf.RandomName(runID, 10) - ctx = context.WithValue(ctx, nsKey(t), ns) - - t.Logf("Creating namespace %v for test %v", ns, t.Name()) - nsObj := v1.Namespace{} - nsObj.Name = ns - return ctx, cfg.Client().Resources().Create(ctx, &nsObj) -} - -// DeleteNSForTest looks up the namespace corresponding to the given test and deletes it. -func deleteNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { - ns := fmt.Sprint(ctx.Value(nsKey(t))) - t.Logf("Deleting namespace %v for test %v", ns, t.Name()) - - nsObj := v1.Namespace{} - nsObj.Name = ns - return ctx, cfg.Client().Resources().Delete(ctx, &nsObj) -} - -func nsKey(t *testing.T) string { - return "NS-for-%v" + t.Name() -} diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go.template b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go.template deleted file mode 100644 index 0d3f577a0..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/main_test.go.template +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package k8s_test_template - -import ( - "context" - "fmt" - "os" - "testing" - - plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" - v1 "k8s.io/api/core/v1" - "sigs.k8s.io/e2e-framework/pkg/env" - "sigs.k8s.io/e2e-framework/pkg/envconf" -) - - -const ( - ProgressReporterCtxKey = "SONOBUOY_PROGRESS_REPORTER" - NamespacePrefixKey = "NS_PREFIX" -) - -var testenv env.Environment - -func TestMain(m *testing.M) { - // Assume we are running in the cluster as a Sonobuoy plugin. - testenv = env.NewInClusterConfig() - - // Specifying a run ID so that multiple runs wouldn't collide. Allow a prefix to be set via env var - // so that a plugin configuration (yaml file) can easily set that without code changes. - nsPrefix := os.Getenv(NamespacePrefixKey) - runID := envconf.RandomName(nsPrefix, 4) - - // Create updateReporter; will also place into context during Setup for use in features. - updateReporter := plugin_helper.NewProgressReporter(0) - - testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) { - // Try and create the client; doing it before all the tests allows the tests to assume - // it can be created without error and they can just use config.Client(). - _,err:=config.NewClient() - return context.WithValue(ctx,ProgressReporterCtxKey,updateReporter) ,err - }) - - testenv.BeforeEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { - updateReporter.StartTest(t.Name()) - return createNSForTest(ctx, cfg, t, runID) - }) - testenv.AfterEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { - updateReporter.StopTest(t.Name(),t.Failed(),t.Skipped(),nil) - return deleteNSForTest(ctx, cfg, t, runID) - }) - - /* - testenv.BeforeEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { - // Note that you can also add logic here for before a feature is tested. There may be - // more than one feature in a test. - return ctx, nil - }) - testenv.AfterEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { - // Note that you can also add logic here for after a feature is tested. There may be - // more than one feature in a test. - return ctx, nil - }) - */ - - os.Exit(testenv.Run(m)) -} - -// CreateNSForTest creates a random namespace with the runID as a prefix. It is stored in the context -// so that the deleteNSForTest routine can look it up and delete it. -func createNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { - ns := envconf.RandomName(runID, 10) - ctx = context.WithValue(ctx, nsKey(t), ns) - - t.Logf("Creating namespace %v for test %v", ns, t.Name()) - nsObj := v1.Namespace{} - nsObj.Name = ns - return ctx, cfg.Client().Resources().Create(ctx, &nsObj) -} - -// DeleteNSForTest looks up the namespace corresponding to the given test and deletes it. -func deleteNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { - ns := fmt.Sprint(ctx.Value(nsKey(t))) - t.Logf("Deleting namespace %v for test %v", ns, t.Name()) - - nsObj := v1.Namespace{} - nsObj.Name = ns - return ctx, cfg.Client().Resources().Delete(ctx, &nsObj) -} - -func nsKey(t *testing.T) string { - return "NS-for-%v" + t.Name() -} diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0200_example_test.go b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0200_example_test.go deleted file mode 100644 index ee30f453c..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0200_example_test.go +++ /dev/null @@ -1,83 +0,0 @@ -/* - Copyright 2021 The Kubernetes Authors. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package scs_k8s_tests - -import ( - "context" - "testing" - "time" - "fmt" - plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" - corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/e2e-framework/pkg/envconf" - "sigs.k8s.io/e2e-framework/pkg/features" -) - - -func Test_scs_0200_sonobuoy_pass(t *testing.T) { - fmt.Println("Test a passing test") - testvar := 5 - if testvar != 5 { - t.Errorf("testvar = %d; want 5", testvar) - } -} - -func Test_scs_0200_sonobuoy_fail(t *testing.T) { - fmt.Println("Test a failing test") - testvar := 5 - if testvar != 3 { - t.Errorf("testvar = %d; want 3", testvar) - } -} - -func Test_scs_0200_sonobuoy_TestListPods(t *testing.T) { - f := features.New("pod list").WithLabel("type", "pod-count").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - t.Logf("found %d pods", len(pods.Items)) - if len(pods.Items) == 0 { - t.Fatal("no pods in namespace kube-system") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} - -func Test_scs_0200_sonobuoy_TestListPods_Long(t *testing.T) { - f := features.New("pod list").WithLabel("type", "progress").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - progressReporterVal := ctx.Value(ProgressReporterCtxKey) - progressReporter:=progressReporterVal.(plugin_helper.ProgressReporter) - for i:=0;i<5;i++{ - time.Sleep(5*time.Second) - progressReporter.SendMessageAsync("Waiting for a long test...") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} - diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0201_example_test.go b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0201_example_test.go deleted file mode 100644 index 1771f8058..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/scs_0201_example_test.go +++ /dev/null @@ -1,80 +0,0 @@ -package scs_k8s_tests - -import ( - "context" - "testing" - "time" - "fmt" - plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" - corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/e2e-framework/pkg/envconf" - "sigs.k8s.io/e2e-framework/pkg/features" -) - - -func Test_scs_0201_TestDummyIn(t *testing.T) { - fmt.Println("DEBUG: dummy test") - testvar := 5 - if testvar != 3 { - t.Errorf("testvar = %d; want 3", testvar) - } -} - -func Test_scs_0201_TestListPods(t *testing.T) { - f := features.New("pod list").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - t.Logf("found %d pods", len(pods.Items)) - if len(pods.Items) == 0 { - t.Fatal("no pods in namespace kube-system") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} - -func Test_scs_0201_TestListPodsFailing(t *testing.T) { - f := features.New("pod list").Assess( - "pods from kube-test-a", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-test-a").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - t.Logf("found %d pods", len(pods.Items)) - if len(pods.Items) == 0 { - t.Fatal("no pods in namespace kube-test-a") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} - -func Test_scs_0201_TestLongTest(t *testing.T) { - f := features.New("pod list").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - progressReporterVal := ctx.Value(ProgressReporterCtxKey) - progressReporter:=progressReporterVal.(plugin_helper.ProgressReporter) - for i:=0;i<5;i++{ - time.Sleep(5*time.Second) - progressReporter.SendMessageAsync("Waiting for a long test...") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/template_test.go.template b/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/template_test.go.template deleted file mode 100644 index bf5476938..000000000 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/scs_k8s_tests/template_test.go.template +++ /dev/null @@ -1,66 +0,0 @@ -/* - Copyright 2021 The Kubernetes Authors. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package k8s_test_template - -import ( - "context" - "testing" - "time" - - plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" - corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/e2e-framework/pkg/envconf" - "sigs.k8s.io/e2e-framework/pkg/features" -) - - -func TestListPods(t *testing.T) { - f := features.New("pod list").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - t.Logf("found %d pods", len(pods.Items)) - if len(pods.Items) == 0 { - t.Fatal("no pods in namespace kube-system") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} - -func TestLongTest(t *testing.T) { - f := features.New("pod list").Assess( - "pods from kube-system", - func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var pods corev1.PodList - err := cfg.Client().Resources("kube-system").List(context.TODO(), &pods) - if err != nil { - t.Fatal(err) - } - progressReporterVal := ctx.Value(ProgressReporterCtxKey) - progressReporter:=progressReporterVal.(plugin_helper.ProgressReporter) - for i:=0;i<5;i++{ - time.Sleep(5*time.Second) - progressReporter.SendMessageAsync("Waiting for a long test...") - } - return ctx - }) - - testenv.Test(t, f.Feature()) -} diff --git a/Tests/kaas/kaas-sonobuoy-tests/Dockerfile b/Tests/kaas/kaas-sonobuoy-tests/Dockerfile new file mode 100644 index 000000000..738c3bd55 --- /dev/null +++ b/Tests/kaas/kaas-sonobuoy-tests/Dockerfile @@ -0,0 +1,28 @@ +FROM golang:1.23 + +# Use build arguments to get the correct architecture +ARG TARGETARCH + +# Install kubectl based on the architecture +#See https://github.com/kubernetes-sigs/kubespray/pull/10066 +RUN apt-get update && apt-get install -y wget jq && \ + if [ "$TARGETARCH" = "amd64" ]; then \ + wget https://cdn.dl.k8s.io/release/v1.31.1/bin/linux/amd64/kubectl -O /usr/bin/kubectl; \ + elif [ "$TARGETARCH" = "arm64" ]; then \ + wget https://cdn.dl.k8s.io/release/v1.31.1/bin/linux/arm64/kubectl -O /usr/bin/kubectl; \ + else \ + echo "Unsupported architecture: $TARGETARCH" && exit 1; \ + fi && \ + chmod +x /usr/bin/kubectl + +COPY ./scs_k8s_conformance_tests /src/scs_k8s_conformance_tests +WORKDIR /src +COPY go.* /src/ +ENV CGO_ENABLED=0 +RUN go mod download + +#see: https://docs.docker.com/build/guide/mounts/ +RUN --mount=type=cache,target=/root/.cache/go-build \ + go test -c -o custom.test ./... + +CMD ["bash", "-c", "go tool test2json ./custom.test -test.v"] diff --git a/Tests/kaas/kaas-sonobuoy-tests/Makefile b/Tests/kaas/kaas-sonobuoy-tests/Makefile new file mode 100644 index 000000000..dffc6d7a2 --- /dev/null +++ b/Tests/kaas/kaas-sonobuoy-tests/Makefile @@ -0,0 +1,132 @@ +# Makefile +# This makefile is for development purpose + +############################################################################### +## Setup: ## +############################################################################### +SHELL = /bin/bash + +DOCKERFILE="Dockerfile" +IMAGE_REGISTRY="ghcr.io/sovereigncloudstack/standards" +IMAGE_NAME="scs-kaas-conformance" + +KIND_CLUSTER ="testcluster" + +PLUGIN_NAME="scs-conformance-sonobuoy-plugin" +PLUGIN_FILE="${PLUGIN_NAME}.yaml" +KIND_CONFIG_FILE="kind_config.yaml" + +SONO_WAIT= 10 + +############################################################################### +## Helpers: ## +############################################################################### + +ifeq ($(IMAGE_VERSION_TAG),) + export TAG=dev +else + export TAG=${IMAGE_VERSION_TAG} +endif + +SONOBUOY_IMAGE = "${IMAGE_REGISTRY}/${IMAGE_NAME}:${TAG}" + +container-init: + @echo "" + @echo "[ContainerImageName] ${SONOBUOY_IMAGE}" + @echo "[SonobuoyPluginFile] ${PLUGIN_FILE}" + @echo "" + + +kind-init: + @echo "" + @echo "[KindCluster] ${KIND_CLUSTER}" + @echo "" + +############################################################################### +## For develpoment usage: ## +############################################################################### + +dev-prerequests: + @kind version + @docker version -f json | jq '.Client.Version' + @sonobuoy version --short + @yq --version + @jq --version + @go version + @docker buildx version + + +dev-setup: kind-init + kind create cluster --config ${KIND_CONFIG_FILE} --name ${KIND_CLUSTER} + + +dev-build: container-init + @echo "[Building image...]" + DOCKER_BUILDKIT=1 docker build . -f ${DOCKERFILE} -t ${SONOBUOY_IMAGE} + kind load docker-image --name ${KIND_CLUSTER} ${SONOBUOY_IMAGE} + + +dev-run: + @echo "[Running sonobuoy...]" + @sonobuoy run -p ${PLUGIN_FILE} --wait=${SONO_WAIT} + + +dev-run-background: + @echo "[Running sonobuoy in background...]" + @sonobuoy run -p ${PLUGIN_FILE} + @sonobuoy status + + +dev-result: dev-clean-result + @echo "[Retrieve results...]" + sonobuoy retrieve + @echo "[Extracting results...]" + mkdir results + tar -xf *.tar.gz -C results + cat results/plugins/scs-kaas-conformance/sonobuoy_results.yaml | yq + cat results/plugins/scs-kaas-conformance/results/global/out.json | jq '.Output' + @echo "[Displaying results...]" + sonobuoy results *.tar.gz + + +dev-rerun: dev-clean-sonobuoy dev-build dev-run dev-result + + +test-function: + @echo "only run tests for: $${TESTFUNCTION_CODE}" + DEVELOPMENT_MODE=createcluster go test -run=$${TESTFUNCTION_CODE} ./... || true + +lint: check-golangci-lint + @echo "[Running golangci-lint...]" + @golangci-lint run ./... -v || true + +GOLANGCI_LINT_VERSION ?= v1.61.0 +check-golangci-lint: + @if ! [ -x "$$(command -v golangci-lint)" ]; then \ + echo "[golangci-lint not found, installing...]"; \ + go install github.com/golangci/golangci-lint/cmd/golangci-lint@$(GOLANGCI_LINT_VERSION); \ + echo "[golangci-lint installed]"; \ + else \ + echo "[golangci-lint is already installed]"; \ + fi + +dev-clean-result: + @rm -rf *.tar.gz || true + @rm -rf results || true + + +dev-clean-sonobuoy: dev-clean-result + @echo "[Cleanup sonobuoy environment from cluster...]" + @sonobuoy delete --all --wait || true + + +dev-purge: kind-init dev-clean-sonobuoy + @echo "[Purge everthing...]" + @echo "[Deleting kind cluster...]" + kind delete cluster --name ${KIND_CLUSTER} || true + @echo "[Removing docker image...]" + docker rmi ${SONOBUOY_IMAGE} || true + @rm -rf ./build || true + +PHONY: dev-prerequests dev-build dev-run dev-result dev-clean-sonobuoy dev-clean-result dev-purge dev-rerun dev-run-background + diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/README.md b/Tests/kaas/kaas-sonobuoy-tests/README.md similarity index 80% rename from Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/README.md rename to Tests/kaas/kaas-sonobuoy-tests/README.md index cf5681dac..b5db4532d 100644 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/README.md +++ b/Tests/kaas/kaas-sonobuoy-tests/README.md @@ -34,21 +34,13 @@ For test development with Sonobuoy, [KinD](https://kind.sigs.k8s.io/) is used as make dev-setup ``` -1. Set environment variables - - ```bash - export IMAGE_VERSION_TAG="dev" - export K8S_HOST= - export K8S_PORT= - ``` - -2. Build the image and upload it to the KinD cluster +1. Build the image and upload it to the KinD cluster ```bash make dev-build ``` -3. Execute the Sonobuoy plugin +2. Execute the Sonobuoy plugin ```bash make dev-run @@ -61,7 +53,7 @@ For test development with Sonobuoy, [KinD](https://kind.sigs.k8s.io/) is used as sonobuoy status ``` -4. Retrieve the Results +3. Retrieve the Results Once Sonobuoy is done running the plugin you can retrieve the results as following: @@ -69,7 +61,7 @@ For test development with Sonobuoy, [KinD](https://kind.sigs.k8s.io/) is used as make dev-result ``` -5. Clean the Sonobuoy testcase from the KinD cluster +4. Clean the Sonobuoy testcase from the KinD cluster Cleaning up all Kubernetes resources which were placed on the KinD cluster by sonobuoy @@ -77,10 +69,21 @@ For test development with Sonobuoy, [KinD](https://kind.sigs.k8s.io/) is used as make dev-clean ``` -6. Purge everything +These steps can also be carried out in short form using the following command: - Deleting the KinD cluster +```bash +make dev-rerun +``` - ```bash - make dev-purge - ``` +Finnaly to remove the kind cluster simply use: + +```bash +make dev-purge +``` + +## Execution of only certain test functions for development purposes + +```bash +export TESTFUNCTION_CODE= +make test-function +``` diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.mod b/Tests/kaas/kaas-sonobuoy-tests/go.mod similarity index 95% rename from Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.mod rename to Tests/kaas/kaas-sonobuoy-tests/go.mod index b1fe960ce..1fe28bda6 100644 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.mod +++ b/Tests/kaas/kaas-sonobuoy-tests/go.mod @@ -1,6 +1,6 @@ -module kaas/kaas-sonobuoy-go-example-2 +module kaas/kaas-sonobuoy-tests -go 1.17 +go 1.21 require ( github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper v0.0.0-20211029183731-1d6848b67eec @@ -27,6 +27,7 @@ require ( github.com/satori/go.uuid v1.2.1-0.20181028125025-b2ce2384e17b // indirect github.com/sirupsen/logrus v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/vladimirvivien/gexe v0.1.1 // indirect github.com/vmware-tanzu/sonobuoy v1.11.5-prerelease.1.0.20211004145628-b633b4fefcdc // indirect golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d // indirect diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.sum b/Tests/kaas/kaas-sonobuoy-tests/go.sum similarity index 96% rename from Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.sum rename to Tests/kaas/kaas-sonobuoy-tests/go.sum index 7fed0bb8f..5bddac2e0 100644 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/go.sum +++ b/Tests/kaas/kaas-sonobuoy-tests/go.sum @@ -413,7 +413,6 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= @@ -444,9 +443,6 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -478,8 +474,6 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -511,12 +505,7 @@ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210224082022-3d97a244fca7/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -533,8 +522,6 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -583,21 +570,12 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -607,9 +585,6 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -659,8 +634,6 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -783,7 +756,6 @@ k8s.io/code-generator v0.21.1/go.mod h1:hUlps5+9QaTrKx+jiM4rmq7YmH8wPOIko64uZCHD k8s.io/component-base v0.21.1/go.mod h1:NgzFZ2qu4m1juby4TnrmpR8adRk6ka62YdH5DkIIyKA= k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= k8s.io/gengo v0.0.0-20201214224949-b6c5ce23f027/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= -k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= diff --git a/Tests/kaas/kaas-sonobuoy-tests/kind_config.yaml b/Tests/kaas/kaas-sonobuoy-tests/kind_config.yaml new file mode 100644 index 000000000..947a9fa8a --- /dev/null +++ b/Tests/kaas/kaas-sonobuoy-tests/kind_config.yaml @@ -0,0 +1,10 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + apiServerAddress: 127.0.0.1 + apiServerPort: 6443 +nodes: +- role: control-plane +- role: worker +- role: worker +- role: worker diff --git a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/plugin.yaml b/Tests/kaas/kaas-sonobuoy-tests/scs-conformance-sonobuoy-plugin.yaml similarity index 70% rename from Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/plugin.yaml rename to Tests/kaas/kaas-sonobuoy-tests/scs-conformance-sonobuoy-plugin.yaml index c3af316e2..48097d943 100644 --- a/Tests/kaas/kaas-sonobuoy-go-example-e2e-framework/plugin.yaml +++ b/Tests/kaas/kaas-sonobuoy-tests/scs-conformance-sonobuoy-plugin.yaml @@ -1,13 +1,13 @@ sonobuoy-config: driver: Job - plugin-name: scsconformance + plugin-name: scs-kaas-conformance result-format: gojson - description: An easy to start from project for making K8s aware tests. + description: A testsuite for testing the scs conformance of k8s clusters. spec: command: - bash args: ["-c","go tool test2json ./custom.test -test.v | tee ${SONOBUOY_RESULTS_DIR}/out.json ; echo ${SONOBUOY_RESULTS_DIR}/out.json > ${SONOBUOY_RESULTS_DIR}/done"] - image: ghcr.io/sovereigncloudstack/standards/scsconformance:dev + image: ghcr.io/sovereigncloudstack/standards/scs-kaas-conformance:dev env: - name: NS_PREFIX value: custom diff --git a/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/main_test.go b/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/main_test.go new file mode 100644 index 000000000..98d305a5b --- /dev/null +++ b/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/main_test.go @@ -0,0 +1,147 @@ +package scs_k8s_tests + +import ( + "context" + "fmt" + "log" + "os" + "testing" + + plugin_helper "github.com/vmware-tanzu/sonobuoy-plugins/plugin-helper" + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/e2e-framework/pkg/env" + "sigs.k8s.io/e2e-framework/pkg/envconf" + "sigs.k8s.io/e2e-framework/pkg/envfuncs" +) + +// Define a custom type for the context key +type nsContextKey string + +// Define a custom type for context keys +type contextKey string + +const ( + ProgressReporterCtxKey = "SONOBUOY_PROGRESS_REPORTER" + NamespacePrefixKey = "NS_PREFIX" + DevelopmentModeKey = "DEVELOPMENT_MODE" +) + +var testenv env.Environment + +func TestMain(m *testing.M) { + + // Specifying a run ID so that multiple runs wouldn't collide. Allow a prefix to be set via env var + // so that a plugin configuration (yaml file) can easily set that without code changes. + nsPrefix := os.Getenv(NamespacePrefixKey) + runID := envconf.RandomName(nsPrefix, 4) + + // Create updateReporter; will also place into context during Setup for use in features. + updateReporter := plugin_helper.NewProgressReporter(0) + + developmentMode := os.Getenv(DevelopmentModeKey) + log.Printf("Setup test enviornment for: %#v", developmentMode) + + switch KubernetesEnviornment := developmentMode; KubernetesEnviornment { + + case "createcluster": + log.Println("Create kind cluster for test") + testenv = env.New() + kindClusterName := envconf.RandomName("gotestcluster", 16) + //~ namespace := envconf.RandomName("testnamespace", 16) + + testenv.Setup( + envfuncs.CreateKindCluster(kindClusterName), + ) + + testenv.Finish( + //~ envfuncs.DeleteNamespace(namespace), + envfuncs.DestroyKindCluster(kindClusterName), + ) + + case "usecluster": + log.Println("Use existing k8s cluster for the test") + log.Println("Not Yet Implemented") + //~ testenv = env.NewFromFlags() + //~ KubeConfig:= os.Getenv(KUBECONFIGFILE) + //~ testenv = env.NewWithKubeConfig(KubeConfig) + + default: + // Assume we are running in the cluster as a Sonobuoy plugin. + log.Println("Running tests inside k8s cluster") + testenv = env.NewInClusterConfig() + + testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) { + // Try and create the client; doing it before all the tests allows the tests to assume + // it can be created without error and they can just use config.Client(). + _, err := config.NewClient() + return context.WithValue(ctx, contextKey(ProgressReporterCtxKey), updateReporter), err + }) + + testenv.Finish( + func(ctx context.Context, cfg *envconf.Config) (context.Context, error) { + log.Println("Finished go test suite") + //~ if err := ???; err != nil{ + //~ return ctx, err + //~ } + return ctx, nil + }, + ) + + } + + testenv.BeforeEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { + fmt.Println("BeforeEachTest") + updateReporter.StartTest(t.Name()) + return createNSForTest(ctx, cfg, t, runID) + }) + + testenv.AfterEachTest(func(ctx context.Context, cfg *envconf.Config, t *testing.T) (context.Context, error) { + fmt.Println("AfterEachTest") + updateReporter.StopTest(t.Name(), t.Failed(), t.Skipped(), nil) + return deleteNSForTest(ctx, cfg, t, runID) + }) + + /* + testenv.BeforeEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { + // Note that you can also add logic here for before a feature is tested. There may be + // more than one feature in a test. + fmt.Println("BeforeEachFeature") + return ctx, nil + }) + + testenv.AfterEachFeature(func(ctx context.Context, config *envconf.Config, info features.Feature) (context.Context, error) { + // Note that you can also add logic here for after a feature is tested. There may be + // more than one feature in a test. + fmt.Println("AfterEachFeature") + return ctx, nil + }) + */ + + os.Exit(testenv.Run(m)) +} + +// CreateNSForTest creates a random namespace with the runID as a prefix. It is stored in the context +// so that the deleteNSForTest routine can look it up and delete it. +func createNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { + ns := envconf.RandomName(runID, 10) + ctx = context.WithValue(ctx, nsKey(t), ns) + + t.Logf("Creating namespace %v for test %v", ns, t.Name()) + nsObj := v1.Namespace{} + nsObj.Name = ns + return ctx, cfg.Client().Resources().Create(ctx, &nsObj) +} + +// DeleteNSForTest looks up the namespace corresponding to the given test and deletes it. +func deleteNSForTest(ctx context.Context, cfg *envconf.Config, t *testing.T, runID string) (context.Context, error) { + ns := fmt.Sprint(ctx.Value(nsKey(t))) + t.Logf("Deleting namespace %v for test %v", ns, t.Name()) + + nsObj := v1.Namespace{} + nsObj.Name = ns + return ctx, cfg.Client().Resources().Delete(ctx, &nsObj) +} + +func nsKey(t *testing.T) nsContextKey { + return nsContextKey("NS-for-" + t.Name()) +} diff --git a/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/scs_0200_smoke_test.go b/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/scs_0200_smoke_test.go new file mode 100644 index 000000000..62ec43e3d --- /dev/null +++ b/Tests/kaas/kaas-sonobuoy-tests/scs_k8s_conformance_tests/scs_0200_smoke_test.go @@ -0,0 +1,15 @@ +package scs_k8s_tests + +import ( + "os" + "testing" +) + +func Test_scs_0200_smoke(t *testing.T) { + // This test ensures that no DevelopmentMode was set + // when using this test-suite productively + developmentMode := os.Getenv(DevelopmentModeKey) + if developmentMode != "" { + t.Errorf("developmentMode is set to = %v; want None", developmentMode) + } +} diff --git a/Tests/kaas/kind_config.yaml b/Tests/kaas/kind_config.yaml new file mode 100644 index 000000000..ead21eb72 --- /dev/null +++ b/Tests/kaas/kind_config.yaml @@ -0,0 +1,5 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker diff --git a/Tests/kaas/plugin/README.md b/Tests/kaas/plugin/README.md new file mode 100644 index 000000000..e54cf1864 --- /dev/null +++ b/Tests/kaas/plugin/README.md @@ -0,0 +1,38 @@ +# Plugin for provisioning k8s clusters and performing conformance tests on these clusters + +## Development environment + +### requirements + +* [docker](https://docs.docker.com/engine/install/) +* [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) + +### setup for development + +1. Generate python 3.10 env + + ```bash + sudo apt-get install python3.10-dev + virtualenv -p /usr/bin/python3.10 venv + echo "*" >> venv/.gitignore + source venv/bin/activate + (venv) curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + (venv) python3.10 -m pip install --upgrade pip + (venv) python3.10 -m pip --version + + ``` + +2. Install dependencies: + + ```bash + (venv) pip install pip-tools + (venv) pip-compile requirements.in + (venv) pip-sync requirements.txt + ``` + +3. Set environment variables and launch the process: + + ```bash + (venv) export CLUSTER_PROVIDER="kind" + (venv) python run.py + ``` diff --git a/Tests/kaas/plugin/interface.py b/Tests/kaas/plugin/interface.py new file mode 100644 index 000000000..f62e3b3e2 --- /dev/null +++ b/Tests/kaas/plugin/interface.py @@ -0,0 +1,54 @@ + + +class KubernetesClusterPlugin(): + """ + An abstract base class for custom Kubernetes cluster provider plugins. + It represents an interface class from which the api provider-specific + plugins must be derived as child classes + + To implement fill the methods `create_cluster` and `delete_cluster` with + api provider-specific functionalities for creating and deleting clusters. + The `create_cluster` method must ensure that the kubeconfigfile is provided + at the position in the file system defined by the parameter + `kubeconfig_filepath` + + - Implement `create_cluster` and `delete_cluster` methods + - Create `__init__(self, config_file)` method to handle api specific + configurations. + + Example: + .. code:: python + + from interface import KubernetesClusterPlugin + from apiX_library import cluster_api_class as ClusterAPI + + class PluginX(KubernetesClusterPlugin): + + def __init__(self, config_file): + self.config = config_file + + def create_cluster(self, cluster_name, version, kubeconfig_filepath): + self.cluster = ClusterAPI(name=cluster_name, image=cluster_image, kubeconfig_filepath) + self.cluster.create(self.config) + + def delete_cluster(self, cluster_name): + self.cluster = ClusterAPI(cluster_name) + self.cluster.delete() + .. + """ + + def create_cluster(self, cluster_name, version, kubeconfig_filepath): + """ + This method is to be called to create a k8s cluster + :param: cluster_name: + :param: version: + :param: kubeconfig_filepath: + """ + raise NotImplementedError + + def delete_cluster(self, cluster_name): + """ + This method is to be called in order to unprovision a cluster + :param: cluster_name: + """ + raise NotImplementedError diff --git a/Tests/kaas/plugin/plugin_kind.py b/Tests/kaas/plugin/plugin_kind.py new file mode 100644 index 000000000..26cd3f23d --- /dev/null +++ b/Tests/kaas/plugin/plugin_kind.py @@ -0,0 +1,50 @@ +import logging +import os +import os.path +from pathlib import Path + +from interface import KubernetesClusterPlugin +from pytest_kind import KindCluster + +logger = logging.getLogger(__name__) + + +class PluginKind(KubernetesClusterPlugin): + """ + Plugin to handle the provisioning of kubernetes cluster for + conformance testing purpose with the use of Kind + """ + def __init__(self, config_path): + logger.info("Init PluginKind") + self.config = config_path + logger.debug(self.config) + self.working_directory = os.getcwd() + logger.debug(f"Working from {self.working_directory}") + + def create_cluster(self, cluster_name, version, kubeconfig): + """ + This method is to be called to create a k8s cluster + :param: kubernetes_version: + :return: kubeconfig_filepath + """ + cluster_version = version + if cluster_version == '1.29': + cluster_version = 'v1.29.8' + elif cluster_version == '1.30': + cluster_version = 'v1.30.4' + elif cluster_version == '1.31' or cluster_version == 'default': + cluster_version = 'v1.31.1' + cluster_image = f"kindest/node:{cluster_version}" + kubeconfig_filepath = Path(kubeconfig) + if kubeconfig_filepath is None: + raise ValueError("kubeconfig_filepath is missing") + else: + self.cluster = KindCluster(name=cluster_name, image=cluster_image, kubeconfig=kubeconfig_filepath) + if self.config is None: + self.cluster.create() + else: + self.cluster.create(self.config) + + def delete_cluster(self, cluster_name): + self.cluster = KindCluster(cluster_name) + self.cluster.delete() diff --git a/Tests/kaas/plugin/plugin_static.py b/Tests/kaas/plugin/plugin_static.py new file mode 100644 index 000000000..0bd24707e --- /dev/null +++ b/Tests/kaas/plugin/plugin_static.py @@ -0,0 +1,19 @@ +import shutil + +from interface import KubernetesClusterPlugin + + +class PluginStatic(KubernetesClusterPlugin): + """ + Plugin to handle the provisioning of kubernetes + using a kubeconfig file + """ + + def __init__(self, config_path): + self.kubeconfig_path = config_path + + def create_cluster(self, cluster_name, version, kubeconfig): + shutil.copyfile(self.kubeconfig_path, kubeconfig) + + def delete_cluster(self, cluster_name, version): + pass diff --git a/Tests/kaas/plugin/requirements.in b/Tests/kaas/plugin/requirements.in new file mode 100644 index 000000000..0a60c3c3c --- /dev/null +++ b/Tests/kaas/plugin/requirements.in @@ -0,0 +1,2 @@ +pytest-kind +kubernetes diff --git a/Tests/kaas/plugin/requirements.txt b/Tests/kaas/plugin/requirements.txt new file mode 100644 index 000000000..a04a03167 --- /dev/null +++ b/Tests/kaas/plugin/requirements.txt @@ -0,0 +1,60 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile requirements.in +# +cachetools==5.5.0 + # via google-auth +certifi==2024.8.30 + # via + # kubernetes + # requests +charset-normalizer==3.3.2 + # via requests +google-auth==2.34.0 + # via kubernetes +idna==3.8 + # via requests +kubernetes==30.1.0 + # via -r requirements.in +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib +pyasn1==0.6.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.0 + # via google-auth +pykube-ng==23.6.0 + # via pytest-kind +pytest-kind==22.11.1 + # via -r requirements.in +python-dateutil==2.9.0.post0 + # via kubernetes +pyyaml==6.0.2 + # via + # kubernetes + # pykube-ng +requests==2.32.3 + # via + # kubernetes + # pykube-ng + # requests-oauthlib +requests-oauthlib==2.0.0 + # via kubernetes +rsa==4.9 + # via google-auth +six==1.16.0 + # via + # kubernetes + # python-dateutil +urllib3==2.2.2 + # via + # kubernetes + # pykube-ng + # requests +websocket-client==1.8.0 + # via kubernetes diff --git a/Tests/kaas/plugin/run_plugin.py b/Tests/kaas/plugin/run_plugin.py new file mode 100755 index 000000000..7b4084107 --- /dev/null +++ b/Tests/kaas/plugin/run_plugin.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +import logging +import os.path + +import click +import yaml + +from plugin_kind import PluginKind +from plugin_static import PluginStatic + +PLUGIN_LOOKUP = { + "kind": PluginKind, + "static": PluginStatic, +} + + +def init_plugin(plugin_kind, config_path): + plugin_maker = PLUGIN_LOOKUP.get(plugin_kind) + if plugin_maker is None: + raise ValueError(f"unknown plugin '{plugin_kind}'") + return plugin_maker(config_path) + + +def load_spec(clusterspec_path): + with open(clusterspec_path, "rb") as fileobj: + return yaml.load(fileobj, Loader=yaml.SafeLoader) + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.argument('plugin_kind', type=click.Choice(list(PLUGIN_LOOKUP), case_sensitive=False)) +@click.argument('plugin_config', type=click.Path(exists=True, dir_okay=False)) +@click.argument('clusterspec_path', type=click.Path(exists=True, dir_okay=False)) +@click.argument('cluster_id', type=str, default="default") +def create(plugin_kind, plugin_config, clusterspec_path, cluster_id): + clusterspec = load_spec(clusterspec_path)['clusters'] + plugin = init_plugin(plugin_kind, plugin_config) + clusterinfo = clusterspec[cluster_id] + plugin.create_cluster(cluster_id, clusterinfo['branch'], os.path.abspath(clusterinfo['kubeconfig'])) + + +@cli.command() +@click.argument('plugin_kind', type=click.Choice(list(PLUGIN_LOOKUP), case_sensitive=False)) +@click.argument('plugin_config', type=click.Path(exists=True, dir_okay=False)) +@click.argument('clusterspec_path', type=click.Path(exists=True, dir_okay=False)) +@click.argument('cluster_id', type=str, default="default") +def delete(plugin_kind, plugin_config, clusterspec_path, cluster_id): + plugin = init_plugin(plugin_kind, plugin_config) + plugin.delete_cluster(cluster_id) + + +if __name__ == '__main__': + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + cli() diff --git a/Tests/requirements.in b/Tests/requirements.in index 128fed5c0..e2113c5b7 100644 --- a/Tests/requirements.in +++ b/Tests/requirements.in @@ -1,8 +1,8 @@ aiohttp click -fabric kubernetes_asyncio python-dateutil PyYAML openstacksdk requests +tomli diff --git a/Tests/requirements.txt b/Tests/requirements.txt index 2f3cbc154..bf93aff83 100644 --- a/Tests/requirements.txt +++ b/Tests/requirements.txt @@ -4,47 +4,36 @@ # # pip-compile requirements.in # -aiohttp==3.9.4 +aiohappyeyeballs==2.3.5 + # via aiohttp +aiohttp==3.10.11 # via # -r requirements.in # kubernetes-asyncio aiosignal==1.3.1 # via aiohttp -appdirs==1.4.4 - # via openstacksdk async-timeout==4.0.3 # via aiohttp -attrs==23.2.0 +attrs==24.2.0 # via aiohttp -bcrypt==4.1.2 - # via paramiko -certifi==2024.2.2 +certifi==2024.7.4 # via # kubernetes-asyncio # requests -cffi==1.16.0 - # via - # cryptography - # pynacl +cffi==1.17.0 + # via cryptography charset-normalizer==3.3.2 # via requests click==8.1.7 # via -r requirements.in -cryptography==42.0.4 - # via - # openstacksdk - # paramiko +cryptography==43.0.1 + # via openstacksdk decorator==5.1.1 # via # dogpile-cache - # fabric # openstacksdk -deprecated==1.2.14 - # via fabric -dogpile-cache==1.3.1 +dogpile-cache==1.3.3 # via openstacksdk -fabric==3.2.2 - # via -r requirements.in frozenlist==1.4.1 # via # aiohttp @@ -53,8 +42,6 @@ idna==3.7 # via # requests # yarl -invoke==2.2.0 - # via fabric iso8601==2.1.0 # via # keystoneauth1 @@ -63,48 +50,46 @@ jmespath==1.0.1 # via openstacksdk jsonpatch==1.33 # via openstacksdk -jsonpointer==2.4 +jsonpointer==3.0.0 # via jsonpatch -keystoneauth1==5.5.0 +keystoneauth1==5.7.0 # via openstacksdk -kubernetes-asyncio==29.0.0 +kubernetes-asyncio==30.3.1 # via -r requirements.in multidict==6.0.5 # via # aiohttp # yarl -munch==4.0.0 - # via openstacksdk netifaces==0.11.0 # via openstacksdk -openstacksdk==0.103.0 +openstacksdk==3.3.0 # via -r requirements.in os-service-types==1.7.0 # via # keystoneauth1 # openstacksdk -paramiko==3.4.0 - # via fabric pbr==6.0.0 # via # keystoneauth1 # openstacksdk # os-service-types # stevedore -pycparser==2.21 +platformdirs==4.2.2 + # via openstacksdk +propcache==0.2.0 + # via yarl +pycparser==2.22 # via cffi -pynacl==1.5.0 - # via paramiko -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via # -r requirements.in # kubernetes-asyncio -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -r requirements.in # kubernetes-asyncio # openstacksdk -requests==2.31.0 +requests==2.32.3 # via # -r requirements.in # keystoneauth1 @@ -114,20 +99,17 @@ six==1.16.0 # via # kubernetes-asyncio # python-dateutil -stevedore==5.1.0 +stevedore==5.2.0 # via # dogpile-cache # keystoneauth1 -typing-extensions==4.9.0 +tomli==2.0.1 + # via -r requirements.in +typing-extensions==4.12.2 # via dogpile-cache -urllib3==2.2.1 +urllib3==2.2.2 # via # kubernetes-asyncio # requests -wrapt==1.16.0 - # via deprecated -yarl==1.9.4 +yarl==1.17.2 # via aiohttp - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/Tests/scs-compatible-iaas.yaml b/Tests/scs-compatible-iaas.yaml index becf02c18..5ad119fbf 100644 --- a/Tests/scs-compatible-iaas.yaml +++ b/Tests/scs-compatible-iaas.yaml @@ -1,97 +1,329 @@ -name: SCS Compatible IaaS +name: SCS-compatible IaaS +uuid: 50393e6f-2ae1-4c5c-a62c-3b75f2abef3f url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-compatible-iaas.yaml variables: - os_cloud +modules: + - id: opc-v2020.11 + name: OpenStack Powered Compute v2020.11 + url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2020.11.json + - id: opc-v2022.11 + name: OpenStack Powered Compute v2022.11 + url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2022.11.json + - id: scs-0100-v1 + name: Flavor naming v1 + url: https://docs.scs.community/standards/scs-0100-v1-flavor-naming + run: + - executable: ./iaas/flavor-naming/flavor-names-openstack.py + args: -c {os_cloud} --v1prefer + testcases: + - id: flavor-name-check + tags: [mandatory] + description: > + Must fulfill all requirements of + + - id: scs-0100-v2 + name: Flavor naming v2 + url: https://docs.scs.community/standards/scs-0100-v2-flavor-naming + run: + - executable: ./iaas/flavor-naming/flavor-names-openstack.py + args: -c {os_cloud} + testcases: + - id: flavor-name-check + tags: [mandatory] + description: > + Must fulfill all requirements of + + - id: scs-0100-v3.0 + name: Flavor naming v3.0 + url: https://docs.scs.community/standards/scs-0100-v3-flavor-naming + run: + - executable: ./iaas/flavor-naming/flavor-names-openstack.py + args: --v3 -c {os_cloud} + # Note: "--v3 --v2plus" would outlaw the v1 flavor names. Don't do this yet. + testcases: + - id: flavor-name-check + tags: [mandatory] + description: > + Must fulfill all requirements of + -- plus the list of mandatory + and recommended flavors found in + - id: scs-0100-v3.1 + name: Flavor naming v3.1 + url: https://docs.scs.community/standards/scs-0100-v3-flavor-naming + run: + - executable: ./iaas/flavor-naming/flavor-names-openstack.py + args: -c {os_cloud} --mand=./iaas/scs-0100-v3-flavors.yaml + # Note: --v2plus would outlaw the v1 flavor names. Don't do this yet. + testcases: + - id: flavor-name-check + tags: [mandatory] + description: > + Must fulfill all requirements of + + - id: scs-0101-v1 + name: Entropy v1 + url: https://docs.scs.community/standards/scs-0101-v1-entropy + run: + - executable: ./iaas/entropy/entropy-check.py + args: -c {os_cloud} -d + testcases: + - id: entropy-check-flavor-properties + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: entropy-check-image-properties + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: entropy-check-rngd + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: entropy-check-entropy-avail + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: entropy-check-fips-test + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: entropy-check + tags: [mandatory] + description: > + Must fulfill all requirements of + + - id: scs-0101-v1.1 + name: Entropy v1 + url: https://docs.scs.community/standards/scs-0101-v1-entropy + run: + - executable: ./iaas/entropy/entropy-check.py + args: -c {os_cloud} -d + testcases: + - id: entropy-check-flavor-properties + tags: [recommended] + description: > + Must have all flavor properties recommended in + + - id: entropy-check-image-properties + tags: [recommended] + description: > + Must have all image properties recommended in + + - id: entropy-check-rngd + tags: [mandatory] + description: > + Images of the test sample must have the service `rngd`; see + + - id: entropy-check-entropy-avail + tags: [mandatory] + description: > + A test instance must have the correct `entropy_avail`; see + + - id: entropy-check-fips-test + tags: [mandatory] + description: > + A test instance must pass the "FIPS test"; see + + - id: entropy-check + tags: [] # don't use this testcase, but list it anyway because the script will output a result + - id: scs-0102-v1 + name: Image metadata v1 + url: https://docs.scs.community/standards/scs-0102-v1-image-metadata + run: + - executable: ./iaas/image-metadata/image-md-check.py + args: -c {os_cloud} -v -s + # skip check of mand/recc/sugg images, for these were never authoritative, and they have been + # superseded by scs-0104-v1 + testcases: + - id: image-metadata-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0103-v1 + name: Standard flavors + url: https://docs.scs.community/standards/scs-0103-v1-standard-flavors + run: + - executable: ./iaas/standard-flavors/flavors-openstack.py + args: -c {os_cloud} -d ./iaas/scs-0103-v1-flavors.yaml + testcases: + - id: standard-flavors-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0104-v1 + name: Standard images + url: https://docs.scs.community/standards/scs-0104-v1-standard-images + parameters: + image_spec: address (URL) of an image-spec (YAML) file + run: + - executable: ./iaas/standard-images/images-openstack.py + args: -c {os_cloud} -d {image_spec} + testcases: + - id: standard-images-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0114-v1 + name: Volume Types + url: https://docs.scs.community/standards/scs-0114-v1-volume-type-standard + run: + - executable: ./iaas/volume-types/volume-types-check.py + args: -c {os_cloud} -d + testcases: + - id: volume-types-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0115-v1 + name: Default rules for security groups + url: https://docs.scs.community/standards/scs-0115-v1-default-rules-for-security-groups + run: + - executable: ./iaas/security-groups/default-security-group-rules.py + args: --os-cloud {os_cloud} --debug + testcases: + - id: security-groups-default-rules-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0116-v1 + name: Key manager + url: https://docs.scs.community/standards/scs-0116-v1-key-manager-standard + run: + - executable: ./iaas/key-manager/check-for-key-manager.py + args: --os-cloud {os_cloud} --debug + testcases: + - id: key-manager-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0117-v1 + name: Volume backup + url: https://docs.scs.community/standards/scs-0117-v1-volume-backup-service + run: + - executable: ./iaas/volume-backup/volume-backup-tester.py + args: --os-cloud {os_cloud} --debug + testcases: + - id: volume-backup-check + tags: [mandatory] + description: > + Must fulfill all requirements of + - id: scs-0121-v1 + name: Availability Zones + url: https://docs.scs.community/standards/scs-0121-v1-Availability-Zones-Standard + testcases: + - id: availability-zones-check + tags: [availability-zones] + description: > + Note: manual check! Must fulfill all requirements of + - id: scs-0302-v1 + name: Domain Manager Role + url: https://docs.scs.community/standards/scs-0302-v1-domain-manager-role + # run: + # - executable: ./iam/domain-manager/domain-manager-check.py + # args: --os-cloud {os_cloud} --debug --domain-config ... + testcases: + - id: domain-manager-check + tags: [domain-manager] + description: > + Note: manual check! Must fulfill all requirements of +timeline: + - date: 2024-11-08 + versions: + v5: draft + v4: effective + v3: deprecated + - date: 2024-08-23 + versions: + v5: draft + v4: effective + v3: deprecated + v3-orig: deprecated + - date: 2024-07-31 + versions: + v4: effective + - date: 2024-04-30 + versions: + v4: effective + v3: warn + - date: 2024-02-28 + versions: + v4: effective + v3: effective + - date: 2023-11-30 + versions: + v3: effective + - date: 2023-10-31 + versions: + v3: effective + v2: effective + - date: 2023-06-15 + versions: + v3: effective + v2: effective + v1: effective + - date: 2023-03-23 + versions: + v1: effective + v2: effective + - date: 2021-01-01 + versions: + v1: effective versions: + - version: v5 + include: + - opc-v2022.11 + - scs-0100-v3.1 + - scs-0101-v1 + - scs-0102-v1 + - scs-0103-v1 + - ref: scs-0104-v1 + parameters: + image_spec: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/iaas/scs-0104-v1-images-v5.yaml + - scs-0114-v1 + - scs-0115-v1 + - scs-0116-v1 + - scs-0117-v1 + - scs-0121-v1 + - scs-0302-v1 + targets: + main: mandatory + preview: domain-manager/availability-zones - version: v4 stabilized_at: 2024-02-28 - standards: - - name: OpenStack Powered Compute v2022.11 - url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2022.11.json - # Unfortunately, no wrapper to run refstack yet, needs to be added - - name: Flavor naming - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v3-flavor-naming.md - checks: - - executable: ./iaas/flavor-naming/flavor-names-openstack.py - args: -c {os_cloud} --mand=./iaas/scs-0100-v3-flavors.yaml - # Note: --v2plus would outlaw the v1 flavor names. Don't do this yet. - id: flavor-name-check - - name: Entropy - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0101-v1-entropy.md - checks: - - executable: ./iaas/entropy/entropy-check.py - args: -c {os_cloud} -d - id: entropy-check - - name: Image metadata - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0102-v1-image-metadata.md - checks: - - executable: ./iaas/image-metadata/image-md-check.py - args: -c {os_cloud} -s -v - id: image-metadata-check - - name: Standard flavors - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0103-v1-standard-flavors.md - checks: - - executable: ./iaas/standard-flavors/flavors-openstack.py - args: -c {os_cloud} -d ./iaas/scs-0103-v1-flavors.yaml - id: standard-flavors-check - - name: Standard images - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0104-v1-standard-images.md - checks: - - executable: ./iaas/standard-images/images-openstack.py - args: -c {os_cloud} -d ./iaas/scs-0104-v1-images.yaml - id: standard-images-check + include: + - opc-v2022.11 + - scs-0100-v3.1 + - scs-0101-v1 + - scs-0102-v1 + - scs-0103-v1 + - ref: scs-0104-v1 + parameters: + image_spec: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/iaas/scs-0104-v1-images.yaml + targets: + main: mandatory - version: v3 + # comment: > + # This is what our documentation wrongly stated as being v3 when we introduced v4. + # What was originally v3 (and what we actually continued to test) can be found as v3-orig. + stabilized_at: 2024-02-28 + include: + - opc-v2022.11 + - scs-0100-v3.1 + - scs-0102-v1 + targets: + main: mandatory + - version: v3-orig stabilized_at: 2023-06-15 - deprecated_at: 2024-04-30 - standards: - - name: Flavor naming - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v3-flavor-naming.md - checks: - - executable: ./iaas/flavor-naming/flavor-names-openstack.py - args: --v3 -c {os_cloud} - # Note: "--v3 --v2plus" would outlaw the v1 flavor names. Don't do this yet. - id: flavor-name-check - - name: Image metadata - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0102-v1-image-metadata.md - checks: - - executable: ./iaas/image-metadata/image-md-check.py - args: -v -c {os_cloud} - id: image-metadata-check - - name: OpenStack Powered Compute v2022.11 - url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2022.11.json + include: + - opc-v2022.11 + - scs-0100-v3.0 + - scs-0102-v1 + targets: + main: mandatory - version: v2 stabilized_at: 2023-03-23 - deprecated_at: 2023-11-30 - standards: - - name: Flavor naming - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v2-flavor-naming.md - checks: - - executable: ./iaas/flavor-naming/flavor-names-openstack.py - args: -c {os_cloud} - id: flavor-name-check - - name: Image metadata - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0102-v1-image-metadata.md - checks: - - executable: ./iaas/image-metadata/image-md-check.py - args: -v -c {os_cloud} - id: image-metadata-check - - name: OpenStack Powered Compute v2022.11 - url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2022.11.json + include: + - opc-v2022.11 + - scs-0100-v2 + - scs-0102-v1 + targets: + main: mandatory - version: v1 stabilized_at: 2021-01-01 - deprecated_at: 2023-10-31 - standards: - - name: Flavor naming - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0100-v1-flavor-naming.md - checks: - - executable: ./iaas/flavor-naming/flavor-names-openstack.py - args: -c {os_cloud} --v1prefer - id: flavor-name-check - - name: Image metadata - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0102-v1-image-metadata.md - checks: - - executable: ./iaas/image-metadata/image-md-check.py - args: -c {os_cloud} -v - id: image-metadata-check - - name: OpenStack Powered Compute v2020.11 - url: https://opendev.org/openinfra/interop/src/branch/master/guidelines/2020.11.json + include: + - opc-v2020.11 + - scs-0100-v1 + - scs-0102-v1 + targets: + main: mandatory diff --git a/Tests/scs-compatible-kaas.yaml b/Tests/scs-compatible-kaas.yaml index 9988e799e..a4010c64e 100644 --- a/Tests/scs-compatible-kaas.yaml +++ b/Tests/scs-compatible-kaas.yaml @@ -1,35 +1,44 @@ -name: SCS Compatible KaaS +name: SCS-compatible KaaS +uuid: 1fffebe6-fd4b-44d3-a36c-fc58b4bb0180 url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Tests/scs-compatible-kaas.yaml variables: - - kubeconfig + - subject_root + # directory containing the kubeconfig file for the subject under test + # (note that we consider each kubernetes branch a test subject of its own) +modules: + - id: cncf-k8s-conformance + name: CNCF Kubernetes conformance + url: https://github.com/cncf/k8s-conformance/tree/master + testcases: + - id: cncf-k8s-conformance + tags: [mandatory] + - id: scs-0210-v2 + name: Kubernetes version policy + url: https://docs.scs.community/standards/scs-0210-v2-k8s-version-policy + run: + - executable: ./kaas/k8s-version-policy/k8s_version_policy.py + args: -k {subject_root}/kubeconfig.yaml + testcases: + - id: version-policy-check + tags: [mandatory] + - id: scs-0214-v2 + name: Kubernetes node distribution and availability + url: https://docs.scs.community/standards/scs-0214-v2-k8s-node-distribution + run: + - executable: ./kaas/k8s-node-distribution/k8s_node_distribution_check.py + args: -k {subject_root}/kubeconfig.yaml + testcases: + - id: node-distribution-check + tags: [mandatory] +timeline: + - date: 2024-02-28 + versions: + v1: draft versions: - - version: v2 - standards: - - name: Kubernetes version policy - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0210-v2-k8s-version-policy.md - checks: - - executable: ./kaas/k8s-version-policy/k8s_version_policy.py - args: -k {kubeconfig} - id: version-policy-check - - name: Kubernetes node distribution and availability - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0214-v1-k8s-node-distribution.md - checks: - - executable: ./kaas/k8s-node-distribution/k8s-node-distribution-check.py - args: -k {kubeconfig} - id: node-distribution-check - - name: CNCF Kubernetes conformance - url: https://github.com/cncf/k8s-conformance/tree/master - version: v1 - standards: - - name: Kubernetes version policy - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0210-v2-k8s-version-policy.md - checks: - - executable: ./kaas/k8s-version-policy/k8s_version_policy.py - args: -k {kubeconfig} - id: version-policy-check - - name: Kubernetes node distribution and availability - url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0214-v1-k8s-node-distribution.md - checks: - - executable: ./kaas/k8s-node-distribution/k8s-node-distribution-check.py - args: -k {kubeconfig} - id: node-distribution-check + include: + - cncf-k8s-conformance + - scs-0210-v2 + - scs-0214-v2 + targets: + main: mandatory diff --git a/Tests/scs-compliance-check.py b/Tests/scs-compliance-check.py index 9bb4a5de0..a81c6737e 100755 --- a/Tests/scs-compliance-check.py +++ b/Tests/scs-compliance-check.py @@ -19,40 +19,46 @@ import os import os.path +import uuid +import re import sys import shlex import getopt import datetime import subprocess -from functools import partial +from collections import defaultdict from itertools import chain +import logging import yaml +from scs_cert_lib import load_spec, annotate_validity, compile_suite, TestSuite, TESTCASE_VERDICTS -# valid keywords for various parts of the spec, to be checked using `check_keywords` -KEYWORDS = { - 'spec': ('name', 'url', 'versions', 'prerequisite', 'variables'), - 'version': ('version', 'standards', 'stabilized_at', 'deprecated_at'), - 'standard': ('checks', 'url', 'name', 'condition'), - 'check': ('executable', 'env', 'args', 'condition', 'lifetime', 'id', 'section'), -} + +logger = logging.getLogger(__name__) def usage(file=sys.stdout): """Output usage information""" - print("""Usage: scs-compliance-check.py [options] compliance-spec.yaml -Options: -v/--verbose: More verbose output - -q/--quiet: Don't output anything but errors - -d/--date YYYY-MM-DD: Check standards valid on specified date instead of today - -V/--version VERS: Force version VERS of the standard (instead of deriving from date) - -s/--subject SUBJECT: Name of the subject (cloud) under test, for the report - -S/--sections SECTION_LIST: comma-separated list of sections to test (default: all sections) - -o/--output REPORT_PATH: Generate yaml report of compliance check under given path - -C/--critical-only: Only return critical errors in return code - -a/--assign KEY=VALUE: assign variable to be used for the run (as required by yaml file) + print("""Usage: scs-compliance-check.py [options] SPEC_YAML + +Arguments: + SPEC_YAML: yaml file specifying the certificate scope + +Options: + -v/--verbose: More verbose output + -q/--quiet: Don't output anything but errors + --debug: enables DEBUG logging channel + -d/--date YYYY-MM-DD: Check standards valid on specified date instead of today + -V/--version VERS: Force version VERS of the standard (instead of deriving from date) + -s/--subject SUBJECT: Name of the subject (cloud) under test, for the report + -S/--sections SECTION_LIST: comma-separated list of sections to test (default: all sections) + -t/--tests REGEX: regular expression to select individual testcases based on their ids + -o/--output REPORT_PATH: Generate yaml report of compliance check under given path + -C/--critical-only: Only return critical errors in return code + -a/--assign KEY=VALUE: assign variable to be used for the run (as required by yaml file) With -C, the return code will be nonzero precisely when the tests couldn't be run to completion. -""".strip(), file=file) +""", file=file) def run_check_tool(executable, args, env=None, cwd=None): @@ -73,11 +79,6 @@ def run_check_tool(executable, args, env=None, cwd=None): ) -def errcode_to_text(err): - "translate error code to text" - return f"{err} ERRORS" if err else "PASSED" - - class Config: def __init__(self): self.arg0 = None @@ -90,26 +91,31 @@ def __init__(self): self.output = None self.sections = None self.critical_only = False + self.tests = None def apply_argv(self, argv): """Parse options. May exit the program.""" try: - opts, args = getopt.gnu_getopt(argv, "hvqd:V:s:o:S:Ca:", ( - "help", "verbose", "quiet", "date=", "version=", - "subject=", "output=", "sections=", "critical-only", "assign", + opts, args = getopt.gnu_getopt(argv, "hvqd:V:s:o:S:Ca:t:", ( + "help", "verbose", "quiet", "date=", "version=", "debug", + "subject=", "output=", "sections=", "critical-only", "assign=", "tests=", )) - except getopt.GetoptError as exc: - print(f"Option error: {exc}", file=sys.stderr) - usage() - sys.exit(1) + except getopt.GetoptError: + usage(file=sys.stderr) + raise for opt in opts: if opt[0] == "-h" or opt[0] == "--help": usage() sys.exit(0) elif opt[0] == "-v" or opt[0] == "--verbose": + if self.verbose: + logger.setLevel(logging.DEBUG) self.verbose = True + elif opt[0] == "--debug": + logger.setLevel(logging.DEBUG) elif opt[0] == "-q" or opt[0] == "--quiet": self.quiet = True + logging.getLogger().setLevel(logging.ERROR) elif opt[0] == "-d" or opt[0] == "--date": self.checkdate = datetime.date.fromisoformat(opt[1]) elif opt[0] == "-V" or opt[0] == "--version": @@ -127,36 +133,18 @@ def apply_argv(self, argv): if key in self.assignment: raise ValueError(f"Double assignment for {key!r}") self.assignment[key] = value + elif opt[0] == "-t" or opt[0] == "--tests": + self.tests = re.compile(opt[1]) else: - print(f"Error: Unknown argument {opt[0]}", file=sys.stderr) - if len(args) < 1: + logger.error(f"Unknown argument {opt[0]}") + if len(args) != 1: usage(file=sys.stderr) - sys.exit(1) + raise RuntimeError("need precisely one argument") self.arg0 = args[0] -def condition_optional(cond, default=False): - """ - check whether condition is in dict cond - - If set to mandatory, return False - - If set to optional, return True - - If set to something else, error out - - If unset, return default - """ - value = cond.get("condition") - value = {None: default, "optional": True, "mandatory": False}.get(value) - if value is None: - print(f"ERROR in spec parsing condition: {cond['condition']}", file=sys.stderr) - value = default - return value - - -def check_keywords(ctx, d): - valid = KEYWORDS[ctx] - invalid = [k for k in d if k not in valid] - if invalid: - print(f"ERROR in spec: {ctx} uses unknown keywords: {','.join(invalid)}", file=sys.stderr) - return len(invalid) +def select_valid(versions: list) -> list: + return [version for version in versions if version['_explicit_validity']] def suppress(*args, **kwargs): @@ -188,153 +176,212 @@ def invoke_check_tool(exe, args, env, cwd): return invokation -def compute_result(num_abort, num_error): - """compute check result given number of abort messages and number of error messages""" - if num_error: - return -1 # equivalent to FAIL - if num_abort: - return 0 # equivalent to DNF - return 1 # equivalent to PASS +def compute_results(stdout): + """pick out test results from stdout lines""" + result = {} + for line in stdout: + parts = line.rsplit(':', 1) + if len(parts) != 2: + continue + value = TESTCASE_VERDICTS.get(parts[1].strip().upper()) + if value is None: + continue + result[parts[0].strip()] = value + return result -def main(argv): - """Entry point for the checker""" - config = Config() - try: - config.apply_argv(argv) - except Exception as exc: - print(f"CRITICAL: {exc}", file=sys.stderr) - return 1 - if not config.subject: - print("You need pass --subject=SUBJECT.", file=sys.stderr) - return 1 - printv = suppress if not config.verbose else partial(print, file=sys.stderr) - printnq = suppress if config.quiet else partial(print, file=sys.stderr) - with open(config.arg0, "r", encoding="UTF-8") as specfile: - spec = yaml.load(specfile, Loader=yaml.SafeLoader) - missing_vars = [v for v in spec.get("variables", ()) if v not in config.assignment] - if missing_vars: - print(f"Missing variable assignments (via -a) for: {', '.join(missing_vars)}") - return 1 - check_cwd = os.path.dirname(config.arg0) or os.getcwd() - allaborts = 0 - allerrors = 0 - critical = 0 - report = { +class CheckRunner: + def __init__(self, cwd, assignment, verbosity=0): + self.cwd = cwd + self.assignment = assignment + self.memo = {} + self.num_abort = 0 + self.num_error = 0 + self.verbosity = verbosity + self.spamminess = 0 + + def run(self, check): + parameters = check.get('parameters') + assignment = {**self.assignment, **parameters} if parameters else self.assignment + args = check.get('args', '').format(**assignment) + env = {key: value.format(**assignment) for key, value in check.get('env', {}).items()} + env_str = " ".join(f"{key}={value}" for key, value in env.items()) + memo_key = f"{env_str} {check['executable']} {args}".strip() + logger.debug(f"running {memo_key!r}...") + invocation = self.memo.get(memo_key) + if invocation is None: + check_env = {**os.environ, **env} + invocation = invoke_check_tool(check["executable"], args, check_env, self.cwd) + invocation = { + 'id': str(uuid.uuid4()), + 'cmd': memo_key, + 'result': 0, # keep this for backwards compatibility + 'results': compute_results(invocation['stdout']), + **invocation + } + if self.verbosity > 1 and invocation["stdout"]: + print("\n".join(invocation["stdout"])) + self.spamminess += 1 + # the following check used to be "> 0", but this is quite verbose... + if invocation['rc'] or self.verbosity > 1 and invocation["stderr"]: + print("\n".join(invocation["stderr"])) + self.spamminess += 1 + self.memo[memo_key] = invocation + logger.debug(f".. rc {invocation['rc']}, {invocation['critical']} critical, {invocation['error']} error") + self.num_abort += invocation["critical"] + self.num_error += invocation["error"] + # count failed testcases because they need not be reported redundantly on the error channel + self.num_error + len([value for value in invocation['results'].values() if value < 0]) + return invocation + + def get_invocations(self): + return {invocation['id']: invocation for invocation in self.memo.values()} + + +class ResultBuilder: + def __init__(self, name): + self.name = name + self._raw = defaultdict(list) + + def record(self, id_, **kwargs): + self._raw[id_].append(kwargs) + + def finalize(self, permissible_ids=None): + final = {} + for id_, ls in self._raw.items(): + if permissible_ids is not None and id_ not in permissible_ids: + logger.warning(f"ignoring invalid result id: {id_}") + continue + # just in case: sort by value (worst first) + ls.sort(key=lambda item: item['result']) + winner, runnerups = ls[0], ls[1:] + if runnerups: + logger.warning(f"multiple result values for {id_}") + winner = {**winner, 'runnerups': runnerups} + final[id_] = winner + return final + + +def run_suite(suite: TestSuite, runner: CheckRunner): + """run all checks of `suite` using `runner`, returning results dict via `ResultBuilder`""" + suite.check_sanity() + builder = ResultBuilder(suite.name) + for check in suite.checks: + invocation = runner.run(check) + for id_, value in invocation["results"].items(): + builder.record(id_, result=value, invocation=invocation['id']) + return builder.finalize(permissible_ids=suite.ids) + + +def print_report(subject: str, suite: TestSuite, targets: dict, results: dict, verbose=False): + print(f"{subject} {suite.name}:") + for tname, target_spec in targets.items(): + failed, missing, passed = suite.select(tname, target_spec).eval_buckets(results) + verdict = 'FAIL' if failed else 'TENTATIVE pass' if missing else 'PASS' + summary_parts = [f"{len(passed)} passed"] + if failed: + summary_parts.append(f"{len(failed)} failed") + if missing: + summary_parts.append(f"{len(missing)} missing") + verdict += f" ({', '.join(summary_parts)})" + print(f"- {tname}: {verdict}") + reportcateg = [(failed, 'FAILED'), (missing, 'MISSING')] + if verbose: + reportcateg.append((passed, 'PASSED')) + for offenders, category in reportcateg: + if category == 'MISSING' and suite.partial: + continue # do not report each missing testcase if a filter was used + if not offenders: + continue + print(f" - {category}:") + for testcase in offenders: + print(f" - {testcase['id']}:") + if 'description' in testcase: # used to be `verbose and ...`, but users need the URL! + print(f" > {testcase['description'].strip()}") + + +def create_report(argv, config, spec, versions, invocations): + return { # these fields are essential: "spec": { + "uuid": spec['uuid'], "name": spec['name'], "url": spec['url'], }, "checked_at": datetime.datetime.now(), "reference_date": config.checkdate, "subject": config.subject, - "versions": {}, + "versions": versions, # this field is mostly for debugging: "run": { + "uuid": str(uuid.uuid4()), "argv": argv, "assignment": config.assignment, "sections": config.sections, "forced_version": config.version or None, - "invocations": {}, + "forced_tests": None if config.tests is None else config.tests.pattern, + "invocations": invocations, }, } - check_keywords('spec', spec) - if config.version: - spec["versions"] = [vd for vd in spec["versions"] if vd["version"] == config.version] + + +def main(argv): + """Entry point for the checker""" + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + config = Config() + config.apply_argv(argv) + if not config.subject: + raise RuntimeError("You need pass --subject=SUBJECT.") + with open(config.arg0, "r", encoding="UTF-8") as specfile: + spec = load_spec(yaml.load(specfile, Loader=yaml.SafeLoader)) + missing_vars = [v for v in spec.get("variables", ()) if v not in config.assignment] + if missing_vars: + raise RuntimeError(f"Missing variable assignments (via -a) for: {', '.join(missing_vars)}") if "prerequisite" in spec: - print("WARNING: prerequisite not yet implemented!", file=sys.stderr) - vrs = report["versions"] - memo = report["run"]["invocations"] # memoize check tool results - matches = 0 - for vd in spec["versions"]: - check_keywords('version', vd) - stb_date = vd.get("stabilized_at") - dep_date = vd.get("deprecated_at") - futuristic = not stb_date or config.checkdate < stb_date - outdated = dep_date and dep_date < config.checkdate - if outdated and not config.version: - continue - vr = vrs[vd["version"]] = {} - matches += 1 - if config.version and outdated: - print(f"WARNING: Forced version {config.version} outdated", file=sys.stderr) - if config.version and futuristic: - print(f"INFO: Forced version {config.version} not (yet) stable", file=sys.stderr) - printnq(f"Testing {spec['name']} version {vd['version']}") - if "standards" not in vd: - print(f"WARNING: No standards defined yet for {spec['name']} version {vd['version']}", - file=sys.stderr) - seen_ids = set() - errors = 0 - aborts = 0 - for standard in vd.get("standards", ()): - check_keywords('standard', standard) - optional = condition_optional(standard) - printnq("*******************************************************") - printnq(f"Testing {'optional ' * optional}standard {standard['name']} ...") - printnq(f"Reference: {standard['url']} ...") - checks = standard.get("checks", ()) - if not checks: - printnq(f"WARNING: No check tool specified for {standard['name']}", file=sys.stderr) - for check in checks: - check_keywords('check', check) - if 'id' not in check: - raise RuntimeError(f"check descriptor missing id field: {check}") - id_ = check['id'] - if id_ in seen_ids: - raise RuntimeError(f"duplicate id: {id_}") - seen_ids.add(id_) - if 'executable' not in check: - # most probably a manual check - print(f"skipping check '{id_}': no executable given") - continue - section = check.get('section', check.get('lifetime', 'day')) - if config.sections and section not in config.sections: - print(f"skipping check '{id_}': not in selected sections") - continue - args = check.get('args', '').format(**config.assignment) - env = {key: value.format(**config.assignment) for key, value in check.get('env', {}).items()} - env_str = " ".join(f"{key}={value}" for key, value in env.items()) - memo_key = f"{env_str} {check['executable']} {args}".strip() - invokation = memo.get(memo_key) - if invokation is None: - check_env = {**os.environ, **env} - invokation = invoke_check_tool(check["executable"], args, check_env, check_cwd) - result = compute_result(invokation["critical"], invokation["error"]) - if result == 1 and invokation['rc']: - print(f"CRITICAL: check {id_} reported neither error nor abort, but had non-zero rc", file=sys.stderr) - critical += 1 - result = 0 - invokation['result'] = result - printv("\n".join(invokation["stdout"])) - printnq("\n".join(invokation["stderr"])) - memo[memo_key] = invokation - abort = invokation["critical"] - error = invokation["error"] - vr[check['id']] = {'result': invokation['result'], 'invocation': memo_key} - printnq(f"... returned {error} errors, {abort} aborts") - if not condition_optional(check, optional): - aborts += abort - errors += error - # NOTE: the following verdict may be tentative, depending on whether - # all tests have been run (which in turn depends on chosen sections); - # the logic to compute the ultimate verdict should be place further downstream, - # namely where the reports are gathered and evaluated - printnq("*******************************************************") - printnq(f"Verdict for subject {config.subject}, {spec['name']}, " - f"version {vd['version']}: {errcode_to_text(aborts + errors)}") - allaborts += aborts - allerrors += errors - if not matches: - print(f"CRITICAL: No valid version found for {config.checkdate}", file=sys.stderr) - critical += 1 - allaborts += critical # note: this is after we put the number into the report, so only for return code + logger.warning("prerequisite not yet implemented!") + annotate_validity(spec['timeline'], spec['versions'], config.checkdate) + if config.version is None: + versions = select_valid(spec['versions'].values()) + else: + versions = [spec['versions'].get(config.version)] + if versions[0] is None: + raise RuntimeError(f"Requested version '{config.version}' not found") + if not versions: + raise RuntimeError(f"No valid version found for {config.checkdate}") + check_cwd = os.path.dirname(config.arg0) or os.getcwd() + runner = CheckRunner(check_cwd, config.assignment, verbosity=config.verbose and 2 or not config.quiet) + version_report = {} + # collect report data as tuples (version, suite, results) before printing them + report_data = [] + for version in versions: + vname = version['version'] + suite = compile_suite( + f"{spec['name']} {vname} ({version['validity']})", + version['include'], + config.sections, + config.tests, + ) + report_data.append((version, suite, run_suite(suite, runner))) + # now report: to console if requested, and likewise for yaml output + if not config.quiet: + # print a horizontal line if we had any script output + if runner.spamminess: + print("********" * 10) # 80 characters + for version, suite, results in report_data: + print_report(config.subject, suite, version['targets'], results, config.verbose) if config.output: - with open(config.output, 'w', encoding='UTF-8') as file: - yaml.safe_dump(report, file, default_flow_style=False, sort_keys=False) - return min(127, allaborts + (0 if config.critical_only else allerrors)) + version_report = {version['version']: results for version, _, results in report_data} + report = create_report(argv, config, spec, version_report, runner.get_invocations()) + with open(config.output, 'w', encoding='UTF-8') as fileobj: + yaml.safe_dump(report, fileobj, default_flow_style=False, sort_keys=False, explicit_start=True) + return min(127, runner.num_abort + (0 if config.critical_only else runner.num_error)) if __name__ == "__main__": - sys.exit(main(sys.argv[1:])) + try: + sys.exit(main(sys.argv[1:])) + except SystemExit: + raise + except BaseException as exc: + logger.critical(f"{str(exc) or repr(exc)}") + raise diff --git a/Tests/scs-test-runner.py b/Tests/scs-test-runner.py new file mode 100755 index 000000000..780601e96 --- /dev/null +++ b/Tests/scs-test-runner.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +# vim: set ts=4 sw=4 et: +# +# scs-test-runner.py +# +# (c) Matthias Büchse +# SPDX-License-Identifier: Apache-2.0 +import logging +import os +import os.path +import shutil +import subprocess +import sys +import tempfile +import time + +import click +import tomli + +logger = logging.getLogger(__name__) +MONITOR_URL = "https://compliance.sovereignit.cloud/" + + +def ensure_dir(path): + try: + os.makedirs(path) + except FileExistsError: + pass + + +class Config: + def __init__(self): + self.cwd = os.path.abspath(os.path.dirname(sys.argv[0]) or os.getcwd()) + self.scs_compliance_check = os.path.join(self.cwd, 'scs-compliance-check.py') + self.cleanup_py = os.path.join(self.cwd, 'cleanup.py') + self.run_plugin_py = os.path.join(self.cwd, 'kaas', 'plugin', 'run_plugin.py') + self.ssh_keygen = shutil.which('ssh-keygen') + self.curl = shutil.which('curl') + self.secrets = {} + self.presets = {} + self.scopes = {} + self.subjects = {} + self._auth_token = None + + def load_toml(self, path): + self.cwd = os.path.abspath(os.path.dirname(path)) + with open(path, "rb") as fileobj: + toml_dict = tomli.load(fileobj) + self.scopes.update(toml_dict.get('scopes', {})) + self.subjects.update(toml_dict.get('subjects', {})) + self.presets.update(toml_dict.get('presets', {})) + self.secrets.update(toml_dict.get('secrets', {})) + + @property + def auth_token(self): + if self._auth_token is None: + pass + with open(self.abspath(self.secrets['tokenfile']), "r") as fileobj: + self._auth_token = fileobj.read().strip() + return self._auth_token + + def get_subject_mapping(self, subject): + default_mapping = self.subjects.get('_', {}).get('mapping', {}) + mapping = {key: value.format(subject=subject) for key, value in default_mapping.items()} + mapping.update(self.subjects.get(subject, {}).get('mapping', {})) + return mapping + + def get_kubernetes_setup(self, subject): + default_kubernetes_setup = self.subjects.get('_', {}).get('kubernetes_setup', {}) + kubernetes_setup = dict(default_kubernetes_setup) + kubernetes_setup.update(self.subjects.get(subject, {}).get('kubernetes_setup', {})) + return kubernetes_setup + + def abspath(self, path): + return os.path.join(self.cwd, path) + + def build_check_command(self, scope, subject, output): + # TODO figure out when to supply --debug here (but keep separated from our --debug) + args = [ + sys.executable, self.scs_compliance_check, self.abspath(self.scopes[scope]['spec']), + '--debug', '-C', '-o', output, '-s', subject, + ] + for key, value in self.get_subject_mapping(subject).items(): + args.extend(['-a', f'{key}={value}']) + return {'args': args} + + def build_provision_command(self, subject): + kubernetes_setup = self.get_kubernetes_setup(subject) + subject_root = self.abspath(self.get_subject_mapping(subject).get('subject_root') or '.') + ensure_dir(subject_root) + return { + 'args': [ + sys.executable, self.run_plugin_py, + 'create', + kubernetes_setup['kube_plugin'], + self.abspath(kubernetes_setup['kube_plugin_config']), + self.abspath(kubernetes_setup['clusterspec']), + kubernetes_setup['clusterspec_cluster'], + ], + 'cwd': subject_root, + } + + def build_unprovision_command(self, subject): + kubernetes_setup = self.get_kubernetes_setup(subject) + subject_root = self.abspath(self.get_subject_mapping(subject).get('subject_root') or '.') + ensure_dir(subject_root) + return { + 'args': [ + sys.executable, self.run_plugin_py, + 'delete', + kubernetes_setup['kube_plugin'], + self.abspath(kubernetes_setup['kube_plugin_config']), + self.abspath(kubernetes_setup['clusterspec']), + kubernetes_setup['clusterspec_cluster'], + ], + 'cwd': subject_root, + } + + def build_cleanup_command(self, subject): + # TODO figure out when to supply --debug here (but keep separated from our --debug) + return {'args': [ + sys.executable, self.cleanup_py, + '-c', self.get_subject_mapping(subject)['os_cloud'], + '--prefix', '_scs-', + '--ipaddr', '10.1.0.', + '--debug', + ]} + + def build_sign_command(self, target_path): + return {'args': [ + self.ssh_keygen, + '-Y', 'sign', + '-f', self.abspath(self.secrets['keyfile']), + '-n', 'report', + target_path, + ]} + + def build_upload_command(self, target_path, monitor_url): + if not monitor_url.endswith('/'): + monitor_url += '/' + return {'args': [ + self.curl, + '--fail-with-body', + '--data-binary', f'@{target_path}.sig', + '--data-binary', f'@{target_path}', + '-H', 'Content-Type: application/x-signed-yaml', + '-H', f'Authorization: Basic {self.auth_token}', + f'{monitor_url}reports', + ]} + + +@click.group() +@click.option('-d', '--debug', 'debug', is_flag=True) +@click.option('-c', '--config', 'config', type=click.Path(exists=True, dir_okay=False), default='config.toml') +@click.pass_obj +def cli(cfg, debug=False, config=None): + if debug: + logging.getLogger().setLevel(logging.DEBUG) + cfg.load_toml(config) + + +@cli.result_callback() +def process_pipeline(rc, *args, **kwargs): + sys.exit(rc) + + +def _run_commands(commands, num_workers=5): + processes = [] + while commands or processes: + while commands and len(processes) < num_workers: + processes.append(subprocess.Popen(**commands.pop())) + processes[:] = [p for p in processes if p.poll() is None] + time.sleep(0.5) + + +def _concat_files(source_paths, target_path): + with open(target_path, 'wb') as tfileobj: + for path in source_paths: + with open(path, 'rb') as sfileobj: + shutil.copyfileobj(sfileobj, tfileobj) + + +def _move_file(source_path, target_path): + # for Windows people, remove target first, but don't try too hard (Windows is notoriously bad at this) + # this two-stage delete-rename approach does have a tiny (irrelevant) race condition (thx Windows) + try: + os.remove(target_path) + except FileNotFoundError: + pass + os.rename(source_path, target_path) + + +@cli.command() +@click.option('--scope', 'scopes', type=str) +@click.option('--subject', 'subjects', type=str) +@click.option('--preset', 'preset', type=str) +@click.option('--num-workers', 'num_workers', type=int, default=5) +@click.option('--monitor-url', 'monitor_url', type=str, default=MONITOR_URL) +@click.option('-o', '--output', 'report_yaml', type=click.Path(exists=False), default=None) +@click.pass_obj +def run(cfg, scopes, subjects, preset, num_workers, monitor_url, report_yaml): + """ + run compliance tests and upload results to compliance monitor + """ + if not scopes and not subjects and not preset: + preset = 'default' + if preset: + preset_dict = cfg.presets.get(preset) + if preset_dict is None: + raise KeyError('preset not found') + scopes = preset_dict['scopes'] + subjects = preset_dict['subjects'] + num_workers = preset_dict.get('workers', num_workers) + else: + scopes = [scope.strip() for scope in scopes.split(',')] if scopes else [] + subjects = [subject.strip() for subject in subjects.split(',')] if subjects else [] + if not scopes or not subjects: + raise click.UsageError('both scope(s) and subject(s) must be non-empty') + logger.debug(f'running tests for scope(s) {", ".join(scopes)} and subject(s) {", ".join(subjects)}') + logger.debug(f'monitor url: {monitor_url}, num_workers: {num_workers}, output: {report_yaml}') + with tempfile.TemporaryDirectory(dir=cfg.cwd) as tdirname: + report_yaml_tmp = os.path.join(tdirname, 'report.yaml') + jobs = [(scope, subject) for scope in scopes for subject in subjects] + outputs = [os.path.join(tdirname, f'report-{idx}.yaml') for idx in range(len(jobs))] + commands = [cfg.build_check_command(job[0], job[1], output) for job, output in zip(jobs, outputs)] + _run_commands(commands, num_workers=num_workers) + _concat_files(outputs, report_yaml_tmp) + subprocess.run(**cfg.build_sign_command(report_yaml_tmp)) + subprocess.run(**cfg.build_upload_command(report_yaml_tmp, monitor_url)) + if report_yaml is not None: + _move_file(report_yaml_tmp, report_yaml) + return 0 + + +def _run_command_for_subjects(cfg, subjects, preset, num_workers, command): + if not subjects and not preset: + preset = 'default' + if preset: + preset_dict = cfg.presets.get(preset) + if preset_dict is None: + raise KeyError('preset not found') + subjects = preset_dict['subjects'] + num_workers = preset_dict.get('workers', num_workers) + else: + subjects = [subject.strip() for subject in subjects.split(',')] if subjects else [] + if not subjects: + raise click.UsageError('subject(s) must be non-empty') + logger.debug(f'running {command} for subject(s) {", ".join(subjects)}, num_workers: {num_workers}') + m = getattr(cfg, f'build_{command}_command') + commands = [m(subject) for subject in subjects] + _run_commands(commands, num_workers=num_workers) + return 0 + + +@cli.command() +@click.option('--subject', 'subjects', type=str) +@click.option('--preset', 'preset', type=str) +@click.option('--num-workers', 'num_workers', type=int, default=5) +@click.pass_obj +def cleanup(cfg, subjects, preset, num_workers): + """ + clean up any lingering IaaS resources + """ + return _run_command_for_subjects(cfg, subjects, preset, num_workers, "cleanup") + + +@cli.command() +@click.option('--subject', 'subjects', type=str) +@click.option('--preset', 'preset', type=str) +@click.option('--num-workers', 'num_workers', type=int, default=5) +@click.pass_obj +def provision(cfg, subjects, preset, num_workers): + """ + create k8s clusters + """ + return _run_command_for_subjects(cfg, subjects, preset, num_workers, "provision") + + +@cli.command() +@click.option('--subject', 'subjects', type=str) +@click.option('--preset', 'preset', type=str) +@click.option('--num-workers', 'num_workers', type=int, default=5) +@click.pass_obj +def unprovision(cfg, subjects, preset, num_workers): + """ + clean up k8s clusters + """ + return _run_command_for_subjects(cfg, subjects, preset, num_workers, "unprovision") + + +if __name__ == '__main__': + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + cli(obj=Config()) diff --git a/Tests/scs_cert_lib.py b/Tests/scs_cert_lib.py new file mode 100644 index 000000000..176224124 --- /dev/null +++ b/Tests/scs_cert_lib.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +# vim: set ts=4 sw=4 et: +# +# scs_cert_lib.py +# +# (c) Matthias Büchse +# SPDX-License-Identifier: Apache-2.0 + +from collections import Counter, defaultdict +from datetime import datetime, date, timedelta +import logging +import re + + +logger = logging.getLogger(__name__) + +# valid keywords for various parts of the spec, to be checked using `check_keywords` +KEYWORDS = { + 'spec': ('uuid', 'name', 'url', 'versions', 'prerequisite', 'variables', 'modules', 'timeline'), + 'versions': ('version', 'include', 'targets', 'stabilized_at'), + 'modules': ('id', 'run', 'testcases', 'url', 'name', 'parameters'), + 'run': ('executable', 'env', 'args', 'section'), + 'testcases': ('lifetime', 'id', 'description', 'tags'), + 'include': ('ref', 'parameters'), +} +# The canonical result values are -1, 0, and 1, for FAIL, MISS (or DNF), and PASS, respectively; +# these concrete numbers are important because we do rely on their ordering. Note that MISS/DNF should +# not be reported because it is tantamount to a result being absent. (Therefore the NIL_RESULT default +# value below.) +TESTCASE_VERDICTS = {'PASS': 1, 'FAIL': -1} +NIL_RESULT = {'result': 0} + + +def _check_keywords(ctx, d, keywords=KEYWORDS): + """ + Recursively check `d` (usually a `dict`, but maybe a `list` or a `tuple`) for correctness. + + Returns number of errors. + + Here, correctness means that the dict may only use keywords as given via `keywords`. + """ + valid = keywords.get(ctx) + if valid is None: + return 0 # stop recursion + if isinstance(d, (list, tuple)): + return sum(_check_keywords(ctx, v, keywords=keywords) for v in d) + if not isinstance(d, dict): + return 0 + invalid = [k for k in d if k not in valid] + if invalid: + logger.error(f"{ctx} uses unknown keywords: {','.join(invalid)}") + return len(invalid) + sum(_check_keywords(k, v, keywords=keywords) for k, v in d.items()) + + +def _resolve_spec(spec: dict): + """rewire `spec` so as to make most lookups via name unnecessary, and to find name errors early""" + if isinstance(spec['versions'], dict): + raise RuntimeError('spec dict already in resolved form') + # there are currently two types of objects that are being referenced by name or id + # - modules, referenced by id + # - versions, referenced by name (unfortunately, the field is called "version") + # step 1. build lookups + module_lookup = {module['id']: module for module in spec['modules']} + version_lookup = {version['version']: version for version in spec['versions']} + # step 2. check for duplicates: + if len(module_lookup) != len(spec['modules']): + raise RuntimeError("spec contains duplicate module ids") + if len(version_lookup) != len(spec['versions']): + raise RuntimeError("spec contains duplicate version ids") + # step 3. replace fields 'modules' and 'versions' by respective lookups + spec['modules'] = module_lookup + spec['versions'] = version_lookup + # step 4. resolve references + # step 4a. resolve references to modules in includes + # in this step, we also normalize the include form + for version in spec['versions'].values(): + version['include'] = [ + {'module': module_lookup[inc], 'parameters': {}} if isinstance(inc, str) else + {'module': module_lookup[inc['ref']], 'parameters': inc.get('parameters', {})} + for inc in version['include'] + ] + # step 4b. resolve references to versions in timeline + # on second thought, let's not go there: it's a canonical extension map, and it should remain that way. + # however, we still have to look for name errors + for entry in spec['timeline']: + for vname in entry['versions']: + # trigger KeyError + _ = version_lookup[vname] + + +def load_spec(document: dict) -> dict: + """check `document` (usually parsed YAML) and convert for further usage""" + if _check_keywords('spec', document): + # super simple syntax check (recursive) + raise RuntimeError('syntax problems in spec file. bailing') + _resolve_spec(document) + return document + + +def annotate_validity(timeline: list, versions: dict, checkdate: date): + """annotate `versions` with validity info from `timeline` (note that this depends on `checkdate`)""" + validity_lookup = max( + (entry for entry in timeline if entry['date'] <= checkdate), + key=lambda entry: entry['date'], + default={}, + ).get('versions', {}) + for vname, version in versions.items(): + validity = validity_lookup.get(vname) + version['validity'] = validity or 'deprecated' + version['_explicit_validity'] = validity + + +def add_period(dt: datetime, period: str) -> datetime: + """ + Given a `datetime` instance `dt` and a `str` instance `period`, compute the `datetime` when this period + expires, where period is one of: "day", "week", "month", or "quarter". For instance, with a period + of (calendar) "week", this period expires on midnight the next monday after `dt` + 7 days. This + computation is used to implement Regulations 2 and 3 of the standard scs-0004 -- see + + https://docs.scs.community/standards/scs-0004-v1-achieving-certification#regulations + """ + # compute the moment of expiry (so we are valid before that point, but not on that point) + if period is None or period == 'day': # day is default, so use it if period is None + dt += timedelta(days=2) + return datetime(dt.year, dt.month, dt.day) # omit time so as to arrive at midnight + if period == 'week': + dt += timedelta(days=14 - dt.weekday()) + return datetime(dt.year, dt.month, dt.day) # omit time so as to arrive at midnight + if period == 'month': + if dt.month == 11: + return datetime(dt.year + 1, 1, 1) + if dt.month == 12: + return datetime(dt.year + 1, 2, 1) + return datetime(dt.year, dt.month + 2, 1) + if period == 'quarter': + if dt.month >= 10: + return datetime(dt.year + 1, 4, 1) + if dt.month >= 7: + return datetime(dt.year + 1, 1, 1) + if dt.month >= 4: + return datetime(dt.year, 10, 1) + return datetime(dt.year, 7, 1) + + +def parse_selector(selector_str: str) -> list[list[str]]: + # a selector is a list of terms, + # a term is a list of atoms, + # an atom is a string that optionally starts with "!" + return [term_str.strip().split('/') for term_str in selector_str.split()] + + +def test_atom(atom: str, tags: list[str]): + if atom.startswith("!"): + return atom[1:] not in tags + return atom in tags + + +def test_selector(selector: list[list[str]], tags: list[str]): + return all(any(test_atom(atom, tags) for atom in term) for term in selector) + + +def test_selectors(selectors: list[list[list[str]]], tags: list[str]): + return any(test_selector(selector, tags) for selector in selectors) + + +class TestSuite: + def __init__(self, name): + self.name = name + self.checks = [] + self.testcases = [] + self.ids = Counter() + self.partial = False + + def check_sanity(self): + # sanity check: ids must be unique + duplicates = [key for key, value in self.ids.items() if value > 1] + if duplicates: + logger.warning(f"duplicate ids in {self.name}: {', '.join(duplicates)}") + + def include_checks(self, module, parameters, sections=None): + missing_params = set(module.get('parameters', ())) - set(parameters) + if missing_params: + logger.warning(f"module {module['id']}: missing parameters {', '.join(missing_params)}") + return + self.checks.extend( + {**check, 'parameters': parameters} + for check in module.get('run', ()) + if sections is None or check.get('section') in sections + ) + + def include_testcases(self, testcases): + self.testcases.extend(testcases) + self.ids.update(testcase["id"] for testcase in testcases) + + def select(self, name, selectors): + suite = TestSuite(name) + if isinstance(selectors, str): + # convenience: allow callers to supply serialized form (they don't care, rightly so) + selectors = [parse_selector(sel_str) for sel_str in selectors.split(',')] + suite.include_testcases([tc for tc in self.testcases if test_selectors(selectors, tc['tags'])]) + return suite + + def eval_buckets(self, results) -> tuple[list[dict], list[dict], list[dict]]: + """returns lists of (failed, missing, passed) test cases""" + by_value = defaultdict(list) + for testcase in self.testcases: + value = results.get(testcase['id'], NIL_RESULT).get('result', 0) + by_value[value].append(testcase) + return by_value[-1], by_value[0], by_value[1] + + def evaluate(self, results) -> int: + """returns overall result""" + return min([ + results.get(testcase['id'], NIL_RESULT).get('result', 0) + for testcase in self.testcases + ], default=0) + + +def compile_suite(basename: str, include: list, sections: tuple = (), tests: re.Pattern = None) -> TestSuite: + suite = TestSuite(basename) + if sections: + suite.name += f" [sections: {', '.join(sections)}]" + suite.partial = True + if tests: + suite.name += f" [tests: '{tests.pattern}']" + suite.partial = True + for inc in include: + module = inc['module'] + # basic sanity + testcases = module.get('testcases', ()) + checks = module.get('run', ()) + if not testcases or not checks: + logger.info(f"module {module['id']} missing checks or test cases") + # always include all testcases (necessary for assessing partial results) + suite.include_testcases(testcases) + # only add checks if they contain desired testcases + if not tests or any(tests.match(ch['id']) for ch in testcases): + suite.include_checks(module, inc['parameters'], sections=sections) + return suite diff --git a/Tests/testing/scs-0103-v1-flavors-wrong.yaml b/Tests/testing/scs-0103-v1-flavors-wrong.yaml index 60d1c7bc2..7aff89a0f 100644 --- a/Tests/testing/scs-0103-v1-flavors-wrong.yaml +++ b/Tests/testing/scs-0103-v1-flavors-wrong.yaml @@ -5,191 +5,191 @@ flavor_groups: list: - name: SCS-1V-4 cpus: 1 - cpu-type: crowded-core # wrong: name suggests shared-core + "scs:cpu-type": crowded-core # wrong: name suggests shared-core ram: 4 - name-v1: SCS-1V:4 - name-v2: SCS-1V-4 + "scs:name-v1": SCS-1V:4 + "scs:name-v2": SCS-1V-4 - name: SCS-2V-8 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-2V-8 # wrong: not a v1 name - name-v2: SCS-2V-8 + "scs:name-v1": SCS-2V-8 # wrong: not a v1 name + "scs:name-v2": SCS-2V-8 - name: SCS-4V-16 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 12 # wrong: name suggests 16 - name-v1: SCS-4V:16 - name-v2: SCS-4V-16 + "scs:name-v1": SCS-4V:16 + "scs:name-v2": SCS-4V-16 - name: SCS-8V-32 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 128 # wrong: no disk in name - name-v1: SCS-8V:32 - name-v2: SCS-8V-32 + "scs:name-v1": SCS-8V:32 + "scs:name-v2": SCS-8V-32 - name: SCS-1V-2 cpus: 2 # wrong: name suggests 1 cpu - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 2 - name-v1: SCS-1V:2 - name-v2: SCS-1V-2 + "scs:name-v1": SCS-1V:2 + "scs:name-v2": SCS-1V-2 - name: SCS-2V-4 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 - name-v1: SCS-2V:4 - name-v2: SCS-2V-4 + "scs:name-v1": SCS-2V:4 + "scs:name-v2": SCS-2V-4 - name: SCS-4V-8 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-4V:8 - name-v2: SCS-4V-8 + "scs:name-v1": SCS-4V:8 + "scs:name-v2": SCS-4V-8 - name: SCS-8V-16 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 - name-v1: SCS-8V:16 - name-v2: SCS-8V-16 + "scs:name-v1": SCS-8V:16 + "scs:name-v2": SCS-8V-16 - name: SCS-16V-32 cpus: 16 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 - name-v1: SCS-16V:32 - name-v2: SCS-16V-32 + "scs:name-v1": SCS-16V:32 + "scs:name-v2": SCS-16V-32 - name: SCS-1V-8 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 - name-v1: SCS-1V:8 - name-v2: SCS-1V-8 + "scs:name-v1": SCS-1V:8 + "scs:name-v2": SCS-1V-8 - name: SCS-2V-16 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 - name-v1: SCS-2V:16 - name-v2: SCS-2V-16 + "scs:name-v1": SCS-2V:16 + "scs:name-v2": SCS-2V-16 - name: SCS-4V-32 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 - name-v1: SCS-4V:32 - name-v2: SCS-4V-32 + "scs:name-v1": SCS-4V:32 + "scs:name-v2": SCS-4V-32 - name: SCS-1L-1 cpus: 1 - cpu-type: crowded-core + "scs:cpu-type": crowded-core ram: 1 - name-v1: SCS-1L:1 - name-v2: SCS-1L-1 + "scs:name-v1": SCS-1L:1 + "scs:name-v2": SCS-1L-1 - status: mandatory list: - name: SCS-2V-4-20s cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 20 # wrong: name suggests disk-type ssd - name-v1: SCS-2V:4:20s - name-v2: SCS-2V-4-20s + "scs:name-v1": SCS-2V:4:20s + "scs:name-v2": SCS-2V-4-20s - name: SCS-4V-16-100s cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 10 # wrong: name suggests 100 - disk0-type: ssd - name-v1: SCS-4V:16:100s - name-v2: SCS-4V-16-100s + "scs:disk0-type": ssd + "scs:name-v1": SCS-4V:16:100s + "scs:name-v2": SCS-4V-16-100s - status: recommended list: - name: SCS-1V-4-10 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 10 - name-v1: SCS-1V:4:10 - name-v2: SCS-1V-4-10 + "scs:name-v1": SCS-1V:4:10 + "scs:name-v2": SCS-1V-4-10 - name: SCS-2V-8-20 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-2V:8:20 - name-v2: SCS-2V-8-20 + "scs:name-v1": SCS-2V:8:20 + "scs:name-v2": SCS-2V-8-20 - name: SCS-4V-16-50 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-4V:16:50 - name-v2: SCS-4V-16-50 + "scs:name-v1": SCS-4V:16:50 + "scs:name-v2": SCS-4V-16-50 - name: SCS-8V-32-100 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-8V:32:100 - name-v2: SCS-8V-32-100 + "scs:name-v1": SCS-8V:32:100 + "scs:name-v2": SCS-8V-32-100 - name: SCS-1V-2-5 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 2 disk: 5 - name-v1: SCS-1V:2:5 - name-v2: SCS-1V-2-5 + "scs:name-v1": SCS-1V:2:5 + "scs:name-v2": SCS-1V-2-5 - name: SCS-2V-4-10 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 4 disk: 10 - name-v1: SCS-2V:4:10 - name-v2: SCS-2V-4-10 + "scs:name-v1": SCS-2V:4:10 + "scs:name-v2": SCS-2V-4-10 - name: SCS-4V-8-20 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-4V:8:20 - name-v2: SCS-4V-8-20 + "scs:name-v1": SCS-4V:8:20 + "scs:name-v2": SCS-4V-8-20 - name: SCS-8V-16-50 cpus: 8 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-8V:16:50 - name-v2: SCS-8V-16-50 + "scs:name-v1": SCS-8V:16:50 + "scs:name-v2": SCS-8V-16-50 - name: SCS-16V-32-100 cpus: 16 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-16V:32:100 - name-v2: SCS-16V-32-100 + "scs:name-v1": SCS-16V:32:100 + "scs:name-v2": SCS-16V-32-100 - name: SCS-1V-8-20 cpus: 1 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 8 disk: 20 - name-v1: SCS-1V:8:20 - name-v2: SCS-1V-8-20 + "scs:name-v1": SCS-1V:8:20 + "scs:name-v2": SCS-1V-8-20 - name: SCS-2V-16-50 cpus: 2 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 16 disk: 50 - name-v1: SCS-2V:16:50 - name-v2: SCS-2V-16-50 + "scs:name-v1": SCS-2V:16:50 + "scs:name-v2": SCS-2V-16-50 - name: SCS-4V-32-100 cpus: 4 - cpu-type: shared-core + "scs:cpu-type": shared-core ram: 32 disk: 100 - name-v1: SCS-4V:32:100 - name-v2: SCS-4V-32-100 + "scs:name-v1": SCS-4V:32:100 + "scs:name-v2": SCS-4V-32-100 - name: SCS-1L-1-5 cpus: 1 - cpu-type: crowded-core + "scs:cpu-type": crowded-core ram: 1 disk: 5 - name-v1: SCS-1L:1:5 - name-v2: SCS-1L-1-5 + "scs:name-v1": SCS-1L:1:5 + "scs:name-v2": SCS-1L-1-5 diff --git a/bindep.txt b/bindep.txt new file mode 100644 index 000000000..ee284814f --- /dev/null +++ b/bindep.txt @@ -0,0 +1,2 @@ +curl [!platform:gentoo] +net-misc/curl [platform:gentoo] diff --git a/compliance-monitor/Dockerfile b/compliance-monitor/Dockerfile new file mode 100644 index 000000000..c69837bee --- /dev/null +++ b/compliance-monitor/Dockerfile @@ -0,0 +1,16 @@ +# syntax=docker/dockerfile:1 +FROM python:3.10 +RUN useradd -g users -u 1001 -m -s /bin/bash runuser +USER 1001 +WORKDIR /code +COPY requirements.txt requirements.txt +# for python:3.10-alpine +# RUN \ +# apk add --no-cache postgresql-libs && \ +# apk add --no-cache --virtual .build-deps gcc musl-dev postgresql-dev && \ +# python3 -m pip install -r requirements.txt --no-cache-dir && \ +# apk --purge del .build-deps +RUN python3 -m pip install -r requirements.txt --no-cache-dir +COPY *.py . +EXPOSE 8080 +CMD ["python3", "./monitor.py"] diff --git a/compliance-monitor/README.md b/compliance-monitor/README.md new file mode 100644 index 000000000..b504b6eba --- /dev/null +++ b/compliance-monitor/README.md @@ -0,0 +1,229 @@ +# SCS compliance monitor + +A service with a REST-like interface, backed by Postgresql, to manage data about compliance of subjects +with SCS certificate requirements. + +## Setup + +The first two sections have been adapted from +. + +### Python dependencies + +This program uses the Python library `psycopg2`. As a [prerequisite for its build and +installation](https://www.psycopg.org/docs/install.html#build-prerequisites), +the following command is needed (on distributions based on Debian): + +```shell +sudo apt install build-essential libpq-dev python3-dev +``` + +Please ensure you have installed the dependencies +from `requirements.txt`. We recommended using Python >= 3.10 and to install the +requirements into a virtualenv as follows: + +```shell +python3 -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +``` + +### Updating Python dependencies + +We manage Python dependencies in two files: `requirements.in` and `requirements.txt`. +The former one is meant to be edited by humans, whereas the latter one is +generated by `pip-compile` and contains an exact listing of _all_ dependencies, +including transitive ones. + +`pip-compile` can be installed via `pip install pip-tools`. +It needs to be run in two cases: + +1. You modified `requirements.in` (e.g., added a new dependency): run + `pip-compile requirements.in`. +2. You want to bump the pinned dependencies: run `pip-compile --upgrade requirements.in`. + +Note: The Python version used for running `pip-compile` should be consistent. The currently +used version is documented in the header of the `requirements.txt`. + +### Postgresql + +You need running Postgresql. For instance, run it in a container like so: + +```shell +docker run --network=host --rm -v $(pwd)/data:/var/lib/postgresql/data -it --name postgres -e POSTGRES_PASSWORD=mysecretpassword postgres +``` + +### Monitor service + +Run the service as follows: + +```shell +SCM_DB_PASSWORD=mysecretpassword ./monitor.py --port 8080 --bootstrap bootstrap.yaml +``` + +(Alternatively, you may set `SCM_DB_PASSWORD_FILE` to point to a file containing the password.) + +The service will automatically create or update the database schema as needed, and it will load any records +from the given bootstrap file into the database; this file should at least contain credentials for one user, +because otherwise you won't be able to post new data. See the dedicated section for details. + +To use the service in production, it is strongly recommended to set up a reverse proxy with SSL. + +## Bootstrap file + +This file will be read and the database updated accordingly when the service is started, as well as upon the +corresponding signal. + +```yaml +accounts: + - subject: admin # usually the subject under test, but this one is a special account + api_keys: # needed for basic auth, this must be the hash as returned by `crypt.crypt` + - "$2b$12$02j2DtlOXdT/MTVmWG60Yu.MNIJOGxRGKvE3DdA5DylCaHXxAY1Om" + keys: # needed to check signature of posted material + - public_key: "..." + public_key_type: "ssh-ed25519" + public_key_name: "primary" + roles: + # anyone (even non-authenticated) can read public details for any subject + # any account can read the non-public details of compliance results of their subject + # any account can append results for their own subject + # - append_any # can append results for any subject + - read_any # can read non-public details of compliance results for any subject + - admin # can cause reload of the bootstrap file, among other things + - approve # can approve non-pass results + - subject: gx-scs + api_keys: [] + keys: [] +``` + +## Endpoints + +### POST /reports + +Needs to be authenticated (via basic auth). + +Needs to specify `Content-Type`, either `application/x-signed-json` or `application/x-signed-yaml`. + +The actual report must be prefixed by an SSH signature, plus an ampersand character. The full body can +be created and sent as follows: + +```shell +ssh-keygen \ + -Y sign -f ~/.ssh/id_ed25519 -n report myreport.yaml +curl \ + --data-binary @myreport.yaml.sig --data-binary @myreport.yaml \ + -H "Content-Type: application/x-signed-yaml" -H "Authorization: Basic $BASICAUTH" \ + http://127.0.0.1:8080/reports +``` + +The tool `curl` will concatenate the contents of the two files with an ampersand in between. + +### GET /reports + +Returns the most recent reports, by default restricted to the authenticated subject and limited to 10 items. + +Needs to be authenticated (via basic auth). + +Supports query parameters: + +- `subject=SUBJECT`: by default, results are restricted to the subject of the authenticated account; + if the account has the role `read_any`, any subject may be specified, or it may be left blank to remove + the restriction; +- `limit=N`: return at most N items (default: 10); +- `skip=N`: skip N items (useful for pagination). + +### GET /results + +Returns the most recent results that are not expired or have been expired for at most 7 days. + +Needs to be authenticated (via basic auth). + +The return value is a _list of objects_ like the following: + +```json + { + "reportuuid": "def374a9-56a9-492c-b113-330d491c58c7", + "subject": "gxscs", + "checked_at": "2024-03-16T14:13:53.857422", + "scopeuuid": "50393e6f-2ae1-4c5c-a62c-3b75f2abef3f", + "version": "v3", + "check": "image-metadata-check", + "result": 1, + "approval": false + } +``` + +Supports query parameters: + +- `approved=APPROVED`: return only results with approval status `APPROVED` (either 0 or 1); + default: no such restriction is applied; +- `limit=N`: return at most N items (default: 10); +- `skip=N`: skip N items (useful for pagination). + +### POST /results + +Sets approval state of given results. + +Needs to be authenticated (via basic auth). + +Needs to specify `Content-Type` as `application/json`. + +The request body is a _list of objects_ like the following: + +```json + { + "reportuuid": "def374a9-56a9-492c-b113-330d491c58c7", + "scopeuuid": "50393e6f-2ae1-4c5c-a62c-3b75f2abef3f", + "version": "v3", + "check": "image-metadata-check", + "approval": true + } +``` + +The final field is the desired state; the other fields are used to determine the result in question +(within one report, version and check uniquely determine a result; the scope is given here as well +in case reports at some point contain multiple scopes). + +### GET /status/{subject} + +Returns the current status of the subject. Use the `Accept` header to select desired content type: + +- `text/html` (default): a snippet of HTML suitable for the end user; +- `image/png`: a PNG image of a badge; +- `application/json`: a short summary in JSON format. + +Query parameters: + +- `scopeuuid` (optional): restrict scope +- `version` (optional): restrict version +- `privileged_view` (optional `0` or `1`, default `0`): request privileged view (see below) + +If the privileged view is requested, then this request needs to be authenticated (via basic auth), +either for the same subject or for some account with role `read_any`. This view will immediately +show any non-pass result, whereas otherwise, such a result needs to be verified manually. + +### GET /metrics/{subject} + +A Prometheus exporter for the status of the subject. + +Needs to be authenticated (via basic auth). + +Supports content type `text/plain; version=0.0.4; charset=utf-8` only. + +### GET /{view_type}/table\[_full\] + +Returns the compliance table for all active subjects, where `view_type` can be one of the following: + +- `markdown`: return Markdown fragment (mimetype `text/markdown`) +- `fragment`: return HTML fragment (mimetype `text/html`) +- `page`: return full HTML page (mimetype `text/html`) + +If `table_full` is used, then HTTP basic auth must be performed, and the table will show the +privileged view (i.e., any FAIL will be reported regardless of manual approval). + +### GET /{view_type}/details\[_full\]/{subject}/{scopeuuid} + +Returns compliance details for given subject and scope. + +### GET /{view_type}/scope/{scopeuuid} + +Returns spec overview for the given scope. diff --git a/compliance-monitor/acme/.gitkeep b/compliance-monitor/acme/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/compliance-monitor/bootstrap.yaml b/compliance-monitor/bootstrap.yaml new file mode 100644 index 000000000..8339c422d --- /dev/null +++ b/compliance-monitor/bootstrap.yaml @@ -0,0 +1,68 @@ +accounts: + - subject: admin + api_keys: + - "$2b$12$02j2DtlOXdT/MTVmWG60Yu.MNIJOGxRGKvE3DdA5DylCaHXxAY1Om" + roles: + - read_any + - approve + - admin + - subject: zuul_ci + api_keys: + - "$argon2id$v=19$m=65536,t=3,p=4$1/o/RwihlFIKAaAUolQKAQ$4MAuy6myIaVNofSW9KLlf81/y7WotHCfRl8dxKJ2rjQ" + keys: + - public_key: "AAAAC3NzaC1lZDI1NTE5AAAAIBgla2bdGVztsncJNoAEVdAUiZaOW9Lwvf2yCt5GcI1J" + public_key_type: "ssh-ed25519" + public_key_name: "zuul_ci_sign" + roles: + - append_any + - subject: gx-scs + delegates: + - zuul_ci + keys: + - public_key: "AAAAC3NzaC1lZDI1NTE5AAAAILufk4C7e0eQQIkmUDK8GB2IoiDjYtv6mx2eE8wZ3VWT" + public_key_type: "ssh-ed25519" + public_key_name: "primary" + - subject: artcodix + delegates: + - zuul_ci + - subject: pco-prod1 + delegates: + - zuul_ci + - subject: pco-prod2 + delegates: + - zuul_ci + - subject: pco-prod3 + delegates: + - zuul_ci + - subject: pco-prod4 + delegates: + - zuul_ci + - subject: poc-kdo + delegates: + - zuul_ci + - subject: poc-wgcloud + delegates: + - zuul_ci + - subject: regio-a + delegates: + - zuul_ci + keys: + - public_key: "AAAAC3NzaC1lZDI1NTE5AAAAILufk4C7e0eQQIkmUDK8GB2IoiDjYtv6mx2eE8wZ3VWT" + public_key_type: "ssh-ed25519" + public_key_name: "primary" + - subject: scaleup-occ2 + delegates: + - zuul_ci + - subject: syseleven-dus2 + delegates: + - zuul_ci + - subject: syseleven-ham1 + delegates: + - zuul_ci + - subject: wavestack + delegates: + - zuul_ci + keys: + - public_key: "AAAAC3NzaC1lZDI1NTE5AAAAILufk4C7e0eQQIkmUDK8GB2IoiDjYtv6mx2eE8wZ3VWT" + public_key_type: "ssh-ed25519" + public_key_name: "primary" diff --git a/compliance-monitor/db_password.txt b/compliance-monitor/db_password.txt new file mode 100644 index 000000000..baa1534a9 --- /dev/null +++ b/compliance-monitor/db_password.txt @@ -0,0 +1 @@ +mysecretpassword diff --git a/compliance-monitor/docker-compose.yml b/compliance-monitor/docker-compose.yml new file mode 100644 index 000000000..118290843 --- /dev/null +++ b/compliance-monitor/docker-compose.yml @@ -0,0 +1,60 @@ +version: '3' + +services: + reverse-proxy: + image: traefik:v2.11 + # Enables the web UI and tells Traefik to listen to docker + command: > + --api.insecure=true + --providers.docker + --entryPoints.web.address=:80 + --entryPoints.websecure.address=:443 + --certificatesresolvers.myresolver.acme.email=matthias.buechse@cloudandheat.com + --certificatesresolvers.myresolver.acme.storage=/acme/acme.json + --certificatesresolvers.myresolver.acme.tlschallenge=true + # --certificatesresolvers.myresolver.acme.httpchallenge.entrypoint=web + ports: + - "80:80" + - "443:443" + - "127.0.0.1:8080:8080" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./acme:/acme + web: + build: . + ports: + - "127.0.0.1:5000:8080" + environment: + - SCM_DB_HOST=postgres + - SCM_DB_PASSWORD_FILE=/run/secrets/db_password + - SCM_BASE_URL=https://compliance.sovereignit.cloud/ + volumes: + - ../Tests:/Tests + - ./bootstrap.yaml:/code/bootstrap.yaml + - ./templates:/code/templates + labels: + - traefik.http.routers.web.entryPoints=web + - traefik.http.routers.web.rule=Host(`localhost`) && PathPrefix(`/`) + - traefik.http.routers.websecure.entryPoints=websecure + - traefik.http.routers.websecure.rule=Host(`compliance.sovereignit.cloud`) && PathPrefix(`/`) + - traefik.http.routers.websecure.tls=true + - traefik.http.routers.websecure.tls.certresolver=myresolver + secrets: + - db_password + postgres: + image: postgres + volumes: + - $HOME/data:/var/lib/postgresql/data + environment: + - POSTGRES_PASSWORD_FILE=/run/secrets/db_password + ports: + - "127.0.0.1:8001:5432" + secrets: + - db_password + labels: + # do not publish this service publicly + - "traefik.http.routers.postgres.entrypoints=traefik" + +secrets: + db_password: + file: db_password.txt diff --git a/compliance-monitor/monitor.py b/compliance-monitor/monitor.py new file mode 100755 index 000000000..c6dcb2a41 --- /dev/null +++ b/compliance-monitor/monitor.py @@ -0,0 +1,777 @@ +#!/usr/bin/env python3 +# AN IMPORTANT NOTE ON CONCURRENCY: +# This server is based on uvicorn and, as such, is not multi-threaded. +# (It could use multiple processes, but we don't do that yet.) +# Consequently, we don't need to use any measures for thread-safety. +# However, if we do at some point enable the use of multiple processes, +# we should make sure that all processes are "on the same page" with regard +# to basic data such as certificate scopes, templates, and accounts. +# One way to achieve this synchronicity could be to use the Postgres server +# more, however, I hope that more efficient ways are possible. +# Also, it is quite likely that the signal SIGHUP could no longer be used +# to trigger a re-load. In any case, the `uvicorn.run` call would have to be +# fundamentally changed: +# > You must pass the application as an import string to enable 'reload' or 'workers'. +from collections import defaultdict +from datetime import date, datetime, timedelta +from enum import Enum +import json +import logging +import os +import os.path +from shutil import which +import signal +from subprocess import run +from tempfile import NamedTemporaryFile +from typing import Annotated, Optional + +from fastapi import Depends, FastAPI, HTTPException, Request, Response, status +from fastapi.responses import RedirectResponse +from fastapi.security import HTTPBasic, HTTPBasicCredentials +from jinja2 import Environment +from markdown import markdown +from passlib.context import CryptContext +import psycopg2 +from psycopg2.errors import UniqueViolation +from psycopg2.extensions import connection +import ruamel.yaml +import uvicorn + +from sql import ( + db_find_account, db_update_account, db_update_publickey, db_filter_publickeys, db_get_reports, + db_get_keys, db_insert_report, db_get_recent_results2, db_patch_approval2, db_get_report, + db_ensure_schema, db_get_apikeys, db_update_apikey, db_filter_apikeys, db_clear_delegates, + db_find_subjects, db_insert_result2, db_get_relevant_results2, db_add_delegate, +) + + +logger = logging.getLogger(__name__) + + +try: + from scs_cert_lib import load_spec, annotate_validity, compile_suite, add_period +except ImportError: + # the following course of action is not unproblematic because the Tests directory will be + # mounted to the Docker instance, hence it's hard to tell what version we are gonna get; + # however, unlike the reloading of the config, the import only happens once, and at that point + # in time, both monitor.py and scs_cert_lib.py should come from the same git checkout + import sys; sys.path.insert(0, os.path.abspath('../Tests')) # noqa: E702 + from scs_cert_lib import load_spec, annotate_validity, compile_suite, add_period + + +class Settings: + def __init__(self): + self.db_host = os.getenv("SCM_DB_HOST", "localhost") + self.db_user = os.getenv("SCM_DB_USER", "postgres") + password_file_path = os.getenv("SCM_DB_PASSWORD_FILE", None) + if password_file_path: + with open(os.path.abspath(password_file_path), "r") as fileobj: + self.db_password = fileobj.read().strip() + else: + self.db_password = os.getenv("SCM_DB_PASSWORD", "mysecretpassword") + self.base_url = os.getenv("SCM_BASE_URL", '/') + self.bootstrap_path = os.path.abspath("./bootstrap.yaml") + self.template_path = os.path.abspath("./templates") + self.yaml_path = os.path.abspath("../Tests") + + +ROLES = {'read_any': 1, 'append_any': 2, 'admin': 4, 'approve': 8} +# number of days that expired results will be considered in lieu of more recent, but unapproved ones +GRACE_PERIOD_DAYS = 7 +# separator between signature and report data; use something like +# ssh-keygen \ +# -Y sign -f ~/.ssh/id_ed25519 -n report myreport.yaml +# curl \ +# --data-binary @myreport.yaml.sig --data-binary @myreport.yaml \ +# -H "Content-Type: application/yaml" -H "Authorization: Basic ..." \ +# http://127.0.0.1:8080/reports +# to achieve this! +SEP = "-----END SSH SIGNATURE-----\n&" +ASTERISK_LOOKUP = {'effective': '', 'draft': '*', 'warn': '†', 'deprecated': '††'} + + +class ViewType(Enum): + markdown = "markdown" + page = "page" + fragment = "fragment" + + +VIEW_REPORT = { + ViewType.markdown: 'report.md', + ViewType.fragment: 'report.md', + ViewType.page: 'overview.html', +} +VIEW_DETAIL = { + ViewType.markdown: 'details.md', + ViewType.fragment: 'details.md', + ViewType.page: 'overview.html', +} +VIEW_TABLE = { + ViewType.markdown: 'overview.md', + ViewType.fragment: 'overview.md', + ViewType.page: 'overview.html', +} +VIEW_SCOPE = { + ViewType.markdown: 'scope.md', + ViewType.fragment: 'scope.md', + ViewType.page: 'overview.html', +} +REQUIRED_TEMPLATES = tuple(set(fn for view in (VIEW_REPORT, VIEW_DETAIL, VIEW_TABLE, VIEW_SCOPE) for fn in view.values())) + + +# do I hate these globals, but I don't see another way with these frameworks +app = FastAPI() +security = HTTPBasic(realm="Compliance monitor", auto_error=True) # use False for optional login +settings = Settings() +# see https://passlib.readthedocs.io/en/stable/narr/quickstart.html +cryptctx = CryptContext( + schemes=('argon2', 'bcrypt'), + deprecated='auto', +) +env = Environment() # populate this on startup (final section of this file) +templates_map = { + k: None for k in REQUIRED_TEMPLATES +} +_scopes = {} # map scope uuid to `PrecomputedScope` instance + + +class TimestampEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, (date, datetime)): + return str(obj) + # Let the base class default method raise the TypeError + return super().default(obj) + + +def mk_conn(settings=settings): + return psycopg2.connect(host=settings.db_host, user=settings.db_user, password=settings.db_password) + + +def get_conn(settings=settings): + conn = mk_conn(settings=settings) + try: + yield conn + finally: + conn.close() + + +def ssh_validate(keys, signature, data): + # based on https://www.agwa.name/blog/post/ssh_signatures + with NamedTemporaryFile(mode="w") as allowed_signers_file, \ + NamedTemporaryFile(mode="w") as report_sig_file, \ + NamedTemporaryFile(mode="w") as report_file: + allowed_signers_file.write("".join([ + f"mail@csp.eu {publickey_type} {publickey}\n" + for publickey_type, publickey in keys + ])) + allowed_signers_file.flush() + report_sig_file.write(signature) + report_sig_file.flush() + report_file.write(data) + report_file.flush() + report_file.seek(0) + if run([ + which("ssh-keygen"), + "-Y", "verify", "-f", allowed_signers_file.name, "-I", "mail@csp.eu", "-n", "report", + "-s", report_sig_file.name, + ], stdin=report_file).returncode: + raise ValueError + + +def get_current_account( + credentials: Optional[HTTPBasicCredentials], + conn: connection, +) -> Optional[tuple[str, str]]: + """Extract account info from `credentials`. + + Returns `None` if unauthorized, otherwise a tuple `(current_subject, present_roles)`. + """ + if credentials is None: + return + try: + with conn.cursor() as cur: + roles = db_find_account(cur, credentials.username) + api_keys = db_get_apikeys(cur, credentials.username) + match = False + for keyhash in api_keys: + # be sure to check every single one to make timing attacks less likely + match = cryptctx.verify(credentials.password, keyhash) or match + if not match: + raise RuntimeError + return credentials.username, roles + except (KeyError, RuntimeError): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password", + headers={"WWW-Authenticate": f"Basic {security.realm}"}, + ) + + +def import_bootstrap(bootstrap_path, conn): + ryaml = ruamel.yaml.YAML(typ='safe') + with open(bootstrap_path) as fp: + data = ryaml.load(fp) + if not data or not isinstance(data, dict): + return + accounts = data.get('accounts', ()) + subjects = data.get('subjects', {}) + if not accounts and not subjects: + return + with conn.cursor() as cur: + for account in accounts: + roles = sum(ROLES[r] for r in account.get('roles', ())) + accountid = db_update_account(cur, {'subject': account['subject'], 'roles': roles}) + db_clear_delegates(cur, accountid) + for delegate in account.get('delegates', ()): + db_add_delegate(cur, accountid, delegate) + keyids = set(db_update_apikey(cur, accountid, h) for h in account.get("api_keys", ())) + db_filter_apikeys(cur, accountid, lambda keyid, *_: keyid in keyids) + keyids = set(db_update_publickey(cur, accountid, key) for key in account.get("keys", ())) + db_filter_publickeys(cur, accountid, lambda keyid, *_: keyid in keyids) + conn.commit() + + +class PrecomputedVersion: + """Precompute all `TestSuite` instances necessary to evaluate the results of some version""" + def __init__(self, version): + self.name = version['version'] + self.suite = compile_suite(self.name, version['include']) + self.validity = version['validity'] + self.listed = bool(version['_explicit_validity']) + self.targets = { + tname: self.suite.select(tname, target_spec) + for tname, target_spec in version['targets'].items() + } + + def evaluate(self, scenario_results): + """evaluate the results for this version and return the canonical JSON output""" + target_results = {} + for tname, suite in self.targets.items(): + target_results[tname] = { + 'testcases': [testcase['id'] for testcase in suite.testcases], + 'result': suite.evaluate(scenario_results), + } + return { + 'testcases': {tc['id']: tc for tc in self.suite.testcases}, + 'results': scenario_results, + 'result': target_results['main']['result'], + 'targets': target_results, + 'validity': self.validity, + } + + +class PrecomputedScope: + """Precompute all `TestSuite` instances necessary to evaluate the results of some scope""" + def __init__(self, spec): + self.name = spec['name'] + self.spec = spec + self.versions = { + version['version']: PrecomputedVersion(version) + for version in spec['versions'].values() + } + + def evaluate(self, scope_results): + """evaluate the results for this scope and return the canonical JSON output""" + version_results = { + vname: self.versions[vname].evaluate(scenario_results) + for vname, scenario_results in scope_results.items() + } + by_validity = defaultdict(list) + for vname in scope_results: + by_validity[self.versions[vname].validity].append(vname) + # go through worsening validity values until a passing version is found + relevant = [] + best_passed = None + for validity in ('effective', 'warn', 'deprecated'): + vnames = by_validity[validity] + relevant.extend(vnames) + if any(version_results[vname]['result'] == 1 for vname in vnames): + best_passed = validity + break + # always include draft (but only at the end) + relevant.extend(by_validity['draft']) + passed = [vname for vname in relevant if version_results[vname]['result'] == 1] + return { + 'name': self.name, + 'versions': version_results, + 'relevant': relevant, + 'passed': passed, + 'passed_str': ', '.join([ + vname + ASTERISK_LOOKUP[self.versions[vname].validity] + for vname in passed + ]), + 'best_passed': best_passed, + } + + def update_lookup(self, target_dict): + """Create entries in a lookup mapping for each testcase that occurs in this scope. + + This mapping from triples (scope uuid, version name, testcase id) to testcase facilitates + evaluating result sets from database queries a great deal, because then just one lookup operation + tells us whether a result row can be associated with any known testcase, and if so, whether the + result is still valid (looking at the testcase's lifetime). + + In the future, the mapping could even be simplified by deriving a unique id from each triple that + could then be stored (redundantly) in a dedicated database column, and the mapping could be from + just one id (instead of a triple) to testcase. + """ + scope_uuid = self.spec['uuid'] + for vname, precomputed_version in self.versions.items(): + listed = precomputed_version.listed + for testcase in precomputed_version.suite.testcases: + # put False if listed is False, else put testcase + target_dict[(scope_uuid, vname, testcase['id'])] = listed and testcase + + +def import_cert_yaml(yaml_path, target_dict): + yaml = ruamel.yaml.YAML(typ='safe') + with open(yaml_path, "r") as fileobj: + spec = load_spec(yaml.load(fileobj.read())) + annotate_validity(spec['timeline'], spec['versions'], date.today()) + target_dict[spec['uuid']] = precomputed_scope = PrecomputedScope(spec) + precomputed_scope.update_lookup(target_dict) + + +def import_cert_yaml_dir(yaml_path, target_dict): + for fn in sorted(os.listdir(yaml_path)): + if fn.startswith('scs-') and fn.endswith('.yaml'): + import_cert_yaml(os.path.join(yaml_path, fn), target_dict) + + +def get_scopes(): + """returns the scopes dict""" + return _scopes + + +def import_templates(template_dir, env, templates): + for fn in os.listdir(template_dir): + if fn.startswith("."): + continue + name = fn.removesuffix('.j2') + if name not in templates: + continue + with open(os.path.join(template_dir, fn), "r") as fileobj: + templates[name] = env.from_string(fileobj.read()) + + +def validate_templates(templates, required_templates=REQUIRED_TEMPLATES): + missing = [key for key in required_templates if not templates.get(key)] + if missing: + raise RuntimeError(f"missing templates: {', '.join(missing)}") + + +async def auth(request: Request, conn: Annotated[connection, Depends(get_conn)]): + return get_current_account(await security(request), conn) + + +def check_role(account: Optional[tuple[str, str]], subject: str = None, roles: int = 0): + """Raise an HTTPException with code 401 if `account` has insufficient permissions. + + The `account` is expected as returned by `get_current_account` -- either `None` if unauthorized, or + a tuple `(current_subject, present_roles)`. + + Here, we assume that the account has full access to its own data, i.e., if `account[0] == subject`. + Otherwise, the account must at least have the roles given, i.e., `roles & account[1] == roles`. + """ + if account is None: + raise HTTPException(status_code=401, detail="Permission denied") + current_subject, present_roles = account + if subject != current_subject and roles & present_roles != roles: + raise HTTPException(status_code=401, detail="Permission denied") + return current_subject + + +@app.get("/") +async def root(): + # we might use the following redirect in the future: + # return RedirectResponse("/pages") + # but keep this silly message for the time being, so as not to expose the work in progress too much + return {"message": "Hello World"} + + +@app.get("/reports") +async def get_reports( + account: Annotated[tuple[str, str], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + subject: Optional[str] = None, limit: int = 10, skip: int = 0, +): + if subject is None: + subject, _ = account + else: + check_role(account, subject, ROLES['read_any']) + with conn.cursor() as cur: + return db_get_reports(cur, subject, limit, skip) + + +@app.get("/reports/{report_uuid}") +async def get_report( + account: Annotated[tuple[str, str], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + report_uuid: str, +): + with conn.cursor() as cur: + specs = db_get_report(cur, report_uuid) + if not specs: + raise HTTPException(status_code=404) + spec = specs[0] + check_role(account, spec['subject'], ROLES['read_any']) + return Response(content=json.dumps(spec, indent=2), media_type="application/json") + + +@app.post("/reports") +async def post_report( + request: Request, + account: Annotated[tuple[str, str], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], +): + # TODO this endpoint handles almost all user input, so check thoroughly and generate nice errors! + # check_role call further below because we need the subject from the document + # (we could expect the subject in the path or query and then later only check equality) + content_type = request.headers['content-type'] + if content_type not in ('application/x-signed-yaml', 'application/x-signed-json'): + # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/415 + raise HTTPException(status_code=415, detail="Unsupported Media Type") + + auth_subject, _ = account + with conn.cursor() as cur: + keys = db_get_keys(cur, auth_subject) + delegation_subjects = db_find_subjects(cur, auth_subject) + + body = await request.body() + body_text = body.decode("utf-8") + sep = body_text.find(SEP) + if sep < 0: + raise HTTPException(status_code=401, detail="missing signature") + sep += len(SEP) + signature = body_text[:sep - 1] # do away with the ampersand! + body_text = body_text[sep:] + try: + ssh_validate(keys, signature, body_text) + except Exception: + raise HTTPException(status_code=401, detail="verification failed") + + json_texts = [] + if content_type.endswith('-yaml'): + yaml = ruamel.yaml.YAML(typ='safe') + documents = list(yaml.load_all(body_text)) # ruamel.yaml doesn't have API docs: this is a generator + json_texts = [json.dumps(document, cls=TimestampEncoder) for document in documents] + elif content_type.endswith("-json"): + documents = [json.loads(body_text)] + json_texts = [body_text] + else: + # unreachable due to the content-type check at the top + raise AssertionError("branch should never be reached") + + if not documents: + raise HTTPException(status_code=200, detail="empty reports") + + allowed_subjects = {auth_subject} | set(delegation_subjects) + for document in documents: + check_role(account, document['subject'], ROLES['append_any']) + if document['subject'] not in allowed_subjects: + raise HTTPException(status_code=401, detail="delegation problem?") + + with conn.cursor() as cur: + for document, json_text in zip(documents, json_texts): + rundata = document['run'] + uuid, subject, checked_at = rundata['uuid'], document['subject'], document['checked_at'] + scopeuuid = document['spec']['uuid'] + try: + reportid = db_insert_report(cur, uuid, checked_at, subject, json_text) + except UniqueViolation: + raise HTTPException(status_code=409, detail="Conflict: report already present") + for version, vdata in document['versions'].items(): + for check, rdata in vdata.items(): + result = rdata['result'] + approval = 1 == result # pre-approve good result + db_insert_result2(cur, checked_at, subject, scopeuuid, version, check, result, approval, reportid) + conn.commit() + + +def convert_result_rows_to_dict2( + rows, scopes_lookup, grace_period_days=0, scopes=(), subjects=(), include_report=False, +): + """evaluate all versions occurring in query result `rows`, returning canonical JSON representation""" + now = datetime.now() + if grace_period_days: + now -= timedelta(days=grace_period_days) + # collect result per subject/scope/version + preliminary = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) # subject -> scope -> version + missing = set() + for subject, scope_uuid, version, testcase_id, result, checked_at, report_uuid in rows: + testcase = scopes_lookup.get((scope_uuid, version, testcase_id)) + if not testcase: + # it can be False (testcase is known but version too old) or None (testcase not known) + # only report the latter case + if testcase is None: + missing.add((scope_uuid, version, testcase_id)) + continue + # drop value if too old + expires_at = add_period(checked_at, testcase.get('lifetime')) + if now >= expires_at: + continue + tc_result = dict(result=result, checked_at=checked_at) + if include_report: + tc_result.update(report=report_uuid) + preliminary[subject][scope_uuid][version][testcase_id] = tc_result + if missing: + logger.warning('missing objects: ' + ', '.join(repr(x) for x in missing)) + # make sure the requested subjects and scopes are present (facilitates writing jinja2 templates) + for subject in subjects: + for scope in scopes: + _ = preliminary[subject][scope] + return { + subject: { + scope_uuid: scopes_lookup[scope_uuid].evaluate(scope_result) + for scope_uuid, scope_result in subject_result.items() + } + for subject, subject_result in preliminary.items() + } + + +@app.get("/status") +async def get_status( + request: Request, + account: Annotated[Optional[tuple[str, str]], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + subject: str = None, scopeuuid: str = None, version: str = None, +): + check_role(account, subject, ROLES['read_any']) + # note: text/html will be the default, but let's start with json to get the logic right + accept = request.headers['accept'] + if 'application/json' not in accept and '*/*' not in accept: + # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/406 + raise HTTPException(status_code=406, detail="client needs to accept application/json") + with conn.cursor() as cur: + rows2 = db_get_relevant_results2(cur, subject, scopeuuid, version, approved_only=False) + return convert_result_rows_to_dict2(rows2, get_scopes(), include_report=True) + + +def _build_report_url(base_url, report, *args, **kwargs): + if kwargs.get('download'): + return f"{base_url}reports/{report}" + url = f"{base_url}page/report/{report}" + if len(args) == 2: # version, testcase_id --> add corresponding fragment specifier + url += f"#{args[0]}_{args[1]}" + return url + + +def render_view(view, view_type, detail_page='detail', base_url='/', title=None, **kwargs): + media_type = {ViewType.markdown: 'text/markdown'}.get(view_type, 'text/html') + stage1 = stage2 = view[view_type] + if view_type is ViewType.page: + stage1 = view[ViewType.fragment] + def scope_url(uuid): return f"{base_url}page/scope/{uuid}" # noqa: E306,E704 + def detail_url(subject, scope): return f"{base_url}page/{detail_page}/{subject}/{scope}" # noqa: E306,E704 + def report_url(report, *args, **kwargs): return _build_report_url(base_url, report, *args, **kwargs) # noqa: E306,E704 + fragment = templates_map[stage1].render(detail_url=detail_url, report_url=report_url, scope_url=scope_url, **kwargs) + if view_type != ViewType.markdown and stage1.endswith('.md'): + fragment = markdown(fragment, extensions=['extra']) + if stage1 != stage2: + fragment = templates_map[stage2].render(fragment=fragment, title=title) + return Response(content=fragment, media_type=media_type) + + +@app.get("/{view_type}/report/{report_uuid}") +async def get_report_view( + request: Request, + account: Annotated[Optional[tuple[str, str]], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, + report_uuid: str, +): + with conn.cursor() as cur: + specs = db_get_report(cur, report_uuid) + if not specs: + raise HTTPException(status_code=404) + spec = specs[0] + check_role(account, spec['subject'], ROLES['read_any']) + return render_view(VIEW_REPORT, view_type, report=spec, base_url=settings.base_url, title=f'Report {report_uuid}') + + +@app.get("/{view_type}/detail/{subject}/{scopeuuid}") +async def get_detail( + request: Request, + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, + subject: str, + scopeuuid: str, +): + with conn.cursor() as cur: + rows2 = db_get_relevant_results2(cur, subject, scopeuuid, approved_only=True) + results2 = convert_result_rows_to_dict2( + rows2, get_scopes(), grace_period_days=GRACE_PERIOD_DAYS, + subjects=(subject, ), scopes=(scopeuuid, ), + ) + return render_view(VIEW_DETAIL, view_type, results=results2, base_url=settings.base_url, title=f'{subject} compliance') + + +@app.get("/{view_type}/detail_full/{subject}/{scopeuuid}") +async def get_detail_full( + request: Request, + account: Annotated[Optional[tuple[str, str]], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, + subject: str, + scopeuuid: str, +): + check_role(account, subject, ROLES['read_any']) + with conn.cursor() as cur: + rows2 = db_get_relevant_results2(cur, subject, scopeuuid, approved_only=False) + results2 = convert_result_rows_to_dict2( + rows2, get_scopes(), include_report=True, subjects=(subject, ), scopes=(scopeuuid, ), + ) + return render_view(VIEW_DETAIL, view_type, results=results2, base_url=settings.base_url, title=f'{subject} compliance') + + +@app.get("/{view_type}/table") +async def get_table( + request: Request, + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, +): + with conn.cursor() as cur: + rows2 = db_get_relevant_results2(cur, approved_only=True) + results2 = convert_result_rows_to_dict2(rows2, get_scopes(), grace_period_days=GRACE_PERIOD_DAYS) + return render_view(VIEW_TABLE, view_type, results=results2, base_url=settings.base_url, title="SCS compliance overview") + + +@app.get("/{view_type}/table_full") +async def get_table_full( + request: Request, + account: Annotated[Optional[tuple[str, str]], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, +): + check_role(account, None, ROLES['read_any']) + with conn.cursor() as cur: + rows2 = db_get_relevant_results2(cur, approved_only=False) + results2 = convert_result_rows_to_dict2(rows2, get_scopes()) + return render_view( + VIEW_TABLE, view_type, results=results2, + detail_page='detail_full', base_url=settings.base_url, + title="SCS compliance overview", + ) + + +@app.get("/{view_type}/scope/{scopeuuid}") +async def get_scope( + request: Request, + conn: Annotated[connection, Depends(get_conn)], + view_type: ViewType, + scopeuuid: str, +): + spec = get_scopes()[scopeuuid].spec + versions = spec['versions'] + relevant = sorted([name for name, version in versions.items() if version['_explicit_validity']]) + modules_chart = {} + for name in relevant: + for include in versions[name]['include']: + module_id = include['module']['id'] + row = modules_chart.get(module_id) + if row is None: + row = modules_chart[module_id] = {'module': include['module'], 'columns': {}} + row['columns'][name] = include + rows = sorted(list(modules_chart.values()), key=lambda row: row['module']['id']) + return render_view(VIEW_SCOPE, view_type, spec=spec, relevant=relevant, rows=rows, base_url=settings.base_url, title=spec['name']) + + +@app.get("/pages") +async def get_pages( + request: Request, + conn: Annotated[connection, Depends(get_conn)], +): + return RedirectResponse("/page/table") + + +@app.get("/results") +async def get_results( + request: Request, + account: Annotated[tuple[str, str], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], + approved: Optional[bool] = None, limit: int = 10, skip: int = 0, +): + """get recent results, potentially filtered by approval status""" + check_role(account, roles=ROLES['read_any']) + with conn.cursor() as cur: + return db_get_recent_results2(cur, approved, limit, skip, max_age_days=GRACE_PERIOD_DAYS) + + +@app.post("/results") +async def post_results( + request: Request, + account: Annotated[tuple[str, str], Depends(auth)], + conn: Annotated[connection, Depends(get_conn)], +): + """post approvals to this endpoint""" + check_role(account, roles=ROLES['approve']) + content_type = request.headers['content-type'] + if content_type not in ('application/json', ): + raise HTTPException(status_code=500, detail="Unsupported content type") + body = await request.body() + document = json.loads(body.decode("utf-8")) + records = [document] if isinstance(document, dict) else document + with conn.cursor() as cur: + for record in records: + db_patch_approval2(cur, record) + conn.commit() + + +def pick_filter(results, subject, scope): + """Jinja filter to pick scope results from `results` for given `subject` and `scope`""" + return results.get(subject, {}).get(scope, {}) + + +def summary_filter(scope_results): + """Jinja filter to construct summary from `scope_results`""" + passed_str = scope_results.get('passed_str', '') or '–' + best_passed = scope_results.get('best_passed') + # avoid simple 🟢🔴 (hard to distinguish for color-blind folks) + color = { + 'effective': '✅', + 'warn': '✅', # forgo differentiation here in favor of simplicity (will be apparent in version list) + 'deprecated': '🟧', + }.get(best_passed, '🛑') + return f'{color} {passed_str}' + + +def verdict_filter(value): + """Jinja filter to turn a canonical result value into a written verdict (PASS, MISS, or FAIL)""" + # be fault-tolerant here and turn every non-canonical value into a MISS + return {1: 'PASS', -1: 'FAIL'}.get(value, 'MISS') + + +def verdict_check_filter(value): + """Jinja filter to turn a canonical result value into a symbolic verdict (✔, ⚠, or ✘)""" + # be fault-tolerant here and turn every non-canonical value into a MISS + return {1: '✔', -1: '✘'}.get(value, '⚠') + + +def reload_static_config(*args, do_ensure_schema=False): + # allow arbitrary arguments so it can readily be used as signal handler + logger.info("loading static config") + scopes = {} + import_cert_yaml_dir(settings.yaml_path, scopes) + # import successful: only NOW destructively update global _scopes + _scopes.clear() + _scopes.update(scopes) + import_templates(settings.template_path, env=env, templates=templates_map) + validate_templates(templates=templates_map) + with mk_conn(settings=settings) as conn: + if do_ensure_schema: + db_ensure_schema(conn) + import_bootstrap(settings.bootstrap_path, conn=conn) + + +if __name__ == "__main__": + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + env.filters.update( + pick=pick_filter, + summary=summary_filter, + verdict=verdict_filter, + verdict_check=verdict_check_filter, + markdown=markdown, + ) + reload_static_config(do_ensure_schema=True) + signal.signal(signal.SIGHUP, reload_static_config) + uvicorn.run(app, host='0.0.0.0', port=8080, log_level="info", workers=1) diff --git a/compliance-monitor/requirements.in b/compliance-monitor/requirements.in new file mode 100644 index 000000000..2046cb08e --- /dev/null +++ b/compliance-monitor/requirements.in @@ -0,0 +1,9 @@ +argon2_cffi +bcrypt +fastapi +jinja2 +markdown +passlib +psycopg2 +ruamel.yaml +uvicorn[standard] diff --git a/compliance-monitor/requirements.txt b/compliance-monitor/requirements.txt new file mode 100644 index 000000000..1bcb5325a --- /dev/null +++ b/compliance-monitor/requirements.txt @@ -0,0 +1,122 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +annotated-types==0.7.0 + # via pydantic +anyio==4.4.0 + # via + # httpx + # starlette + # watchfiles +argon2-cffi==23.1.0 + # via -r requirements.in +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +bcrypt==4.1.3 + # via -r requirements.in +certifi==2024.7.4 + # via + # httpcore + # httpx +cffi==1.16.0 + # via argon2-cffi-bindings +click==8.1.7 + # via + # typer + # uvicorn +dnspython==2.6.1 + # via email-validator +email-validator==2.1.1 + # via fastapi +exceptiongroup==1.2.1 + # via anyio +fastapi==0.111.0 + # via -r requirements.in +fastapi-cli==0.0.4 + # via fastapi +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.5 + # via httpx +httptools==0.6.1 + # via uvicorn +httpx==0.27.0 + # via fastapi +idna==3.7 + # via + # anyio + # email-validator + # httpx +jinja2==3.1.4 + # via + # -r requirements.in + # fastapi +markdown==3.7 + # via -r requirements.in +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +orjson==3.10.4 + # via fastapi +passlib==1.7.4 + # via -r requirements.in +psycopg2==2.9.9 + # via -r requirements.in +pycparser==2.22 + # via cffi +pydantic==2.7.3 + # via fastapi +pydantic-core==2.18.4 + # via pydantic +pygments==2.18.0 + # via rich +python-dotenv==1.0.1 + # via uvicorn +python-multipart==0.0.9 + # via fastapi +pyyaml==6.0.1 + # via uvicorn +rich==13.7.1 + # via typer +ruamel-yaml==0.18.6 + # via -r requirements.in +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +shellingham==1.5.4 + # via typer +sniffio==1.3.1 + # via + # anyio + # httpx +starlette==0.37.2 + # via fastapi +typer==0.12.3 + # via fastapi-cli +typing-extensions==4.12.2 + # via + # anyio + # fastapi + # pydantic + # pydantic-core + # typer + # uvicorn +ujson==5.10.0 + # via fastapi +uvicorn[standard]==0.30.1 + # via + # -r requirements.in + # fastapi +uvloop==0.19.0 + # via uvicorn +watchfiles==0.22.0 + # via uvicorn +websockets==12.0 + # via uvicorn diff --git a/compliance-monitor/sql.py b/compliance-monitor/sql.py new file mode 100644 index 000000000..b4c2549e0 --- /dev/null +++ b/compliance-monitor/sql.py @@ -0,0 +1,404 @@ +from psycopg2 import sql +from psycopg2.extensions import cursor, connection + +# list schema versions in ascending order +SCHEMA_VERSION_KEY = 'version' +SCHEMA_VERSIONS = ['v1', 'v2', 'v3'] +# use ... (Ellipsis) here to indicate that no default value exists (will lead to error if no value is given) +ACCOUNT_DEFAULTS = {'subject': ..., 'api_key': ..., 'roles': ...} +PUBLIC_KEY_DEFAULTS = {'public_key': ..., 'public_key_type': ..., 'public_key_name': ...} + + +class SchemaVersionError(Exception): + pass + + +def sanitize_record(record, defaults, **kwargs): + sanitized = {key: record.get(key, value) for key, value in defaults.items()} + sanitized.update(**kwargs) + return sanitized + + +def make_where_clause(*filter_clauses): + """join args of type sql.Composable via AND, dropping None, and prepend WHERE if appropriate""" + clause = sql.SQL(' AND ').join(filter(None, filter_clauses)) + return sql.SQL(' WHERE {} ').format(clause) if clause.seq else sql.SQL('') + + +def db_find_account(cur: cursor, subject): + cur.execute(''' + SELECT roles + FROM account + WHERE subject = %s;''', (subject, )) + if not cur.rowcount: + raise KeyError(subject) + roles, = cur.fetchone() + return roles + + +def db_get_apikeys(cur: cursor, subject): + cur.execute(''' + SELECT apikeyhash + FROM apikey + NATURAL JOIN account + WHERE subject = %s;''', (subject, )) + return [row[0] for row in cur.fetchall()] + + +def db_get_keys(cur: cursor, subject): + cur.execute(''' + SELECT keytype, key + FROM publickey + NATURAL JOIN account + WHERE subject = %s;''', (subject, )) + return cur.fetchall() + + +def db_ensure_schema_common(cur: cursor): + # strive to make column names unique across tables so that selects become simple, such as: + # select * from "check" natural join standardentry natural join version natural join scope; + cur.execute(''' + CREATE TABLE IF NOT EXISTS account ( + accountid SERIAL PRIMARY KEY, + subject text UNIQUE, + roles integer + ); + CREATE TABLE IF NOT EXISTS apikey ( + apikeyid SERIAL PRIMARY KEY, + apikeyhash text, + accountid integer NOT NULL REFERENCES account ON DELETE CASCADE ON UPDATE CASCADE, + UNIQUE (accountid, apikeyhash) + ); + CREATE TABLE IF NOT EXISTS publickey ( + keyid SERIAL PRIMARY KEY, + key text, + keytype text, + keyname text, + accountid integer NOT NULL REFERENCES account ON DELETE CASCADE ON UPDATE CASCADE, + UNIQUE (accountid, keyname) + ); + CREATE TABLE IF NOT EXISTS report ( + reportid SERIAL PRIMARY KEY, + reportuuid text UNIQUE, + checked_at timestamp, + subject text, + -- scopeid integer NOT NULL REFERENCES scope ON DELETE CASCADE ON UPDATE CASCADE, + -- let's omit the scope here because it is determined via the results, and it + -- is possible that future reports refer to multiple scopes + data jsonb, + rawformat text, + raw bytea + ); + ''') + + +def db_ensure_schema_v2(cur: cursor): + db_ensure_schema_common(cur) + cur.execute(''' + -- make a way simpler version that doesn't put that much background knowledge into the schema + -- therefore let's hope the schema will be more robust against change + CREATE TABLE IF NOT EXISTS result2 ( + resultid SERIAL PRIMARY KEY, + -- some Python code to show how simple it is to fill this from a yaml report + -- (using member access syntax instead of array access syntax for the dict fields) + -- for vname, vres in report.versions.items(): + -- for tcid, tcres in vres.items(): + checked_at timestamp NOT NULL, -- = report.checked_at + subject text NOT NULL, -- = report.subject + scopeuuid text NOT NULL, -- = report.spec.uuid + version text NOT NULL, -- = vname + testcase text NOT NULL, -- = tcid + result int, -- = tcres.result + approval boolean, -- = tcres.result == 1 + -- the following is FYI only, for the most data is literally copied to this table + reportid integer NOT NULL REFERENCES report ON DELETE CASCADE ON UPDATE CASCADE + ); + ''') + + +def db_ensure_schema_v3(cur: cursor): + # v3 mainly extends v2, so we need v2 first + db_ensure_schema_v2(cur) + # We do alter the table "report" by dropping two columns, so these columns may have been created in vain + # if this database never really was on v2, but I hope dropping columns from an empty table is cheap + # enough, because I want to avoid having too many code paths here. We can remove these columns from + # create table once all databases are on v3. + cur.execute(''' + ALTER TABLE report DROP COLUMN IF EXISTS raw; + ALTER TABLE report DROP COLUMN IF EXISTS rawformat; + DROP TABLE IF EXISTS invocation; -- we forgot this for post-upgrade v2, can be dropped without harm + CREATE TABLE IF NOT EXISTS delegation ( + delegateid integer NOT NULL REFERENCES account ON DELETE CASCADE ON UPDATE CASCADE, + accountid integer NOT NULL REFERENCES account ON DELETE CASCADE ON UPDATE CASCADE, + UNIQUE (delegateid, accountid) + ); + ''') + + +def db_upgrade_data_v1_v2(cur): + # we are going to drop table result, but use delete anyway to have the transaction safety + cur.execute(''' + INSERT INTO result2 (checked_at, subject, scopeuuid, version, testcase, result, approval, reportid) + SELECT + report.checked_at, report.subject, + scope.scopeuuid, version.version, "check".id, + result.result, result.approval, result.reportid + FROM result + NATURAL JOIN report + NATURAL JOIN "check" + NATURAL JOIN version + NATURAL JOIN scope + ; + DELETE FROM result + ;''') + + +def db_post_upgrade_v1_v2(cur: cursor): + cur.execute(''' + DROP TABLE IF EXISTS result; + DROP TABLE IF EXISTS "check"; + DROP TABLE IF EXISTS standardentry; + DROP TABLE IF EXISTS version; + DROP TABLE IF EXISTS scope; + ''') + + +def db_get_schema_version(cur: cursor): + cur.execute('''SELECT value FROM meta WHERE key = %s;''', (SCHEMA_VERSION_KEY, )) + return cur.rowcount and cur.fetchone()[0] or None + + +def db_set_schema_version(cur: cursor, version: str): + cur.execute(''' + UPDATE meta SET value = %s WHERE key = %s + ;''', (version, SCHEMA_VERSION_KEY)) + + +def db_upgrade_schema(conn: connection, cur: cursor): + # the ensure_* and post_upgrade_* functions must be idempotent + # ditto for the data transfer (ideally insert/delete transaction) + # -- then, in case we get interrupted when setting the new version, the upgrade can be repeated + # -- addendum: DDL is transactional with Postgres, so this effort was a bit in vain, but I keep it + # that way just in case we want to use another database at some point + while True: + current = db_get_schema_version(cur) + if current == SCHEMA_VERSIONS[-1]: + break + if current is None: + # this is an empty db, but it also used to be the case with v1 + # I (mbuechse) made sure manually that the value v1 is set on running installations + db_ensure_schema_v3(cur) + db_set_schema_version(cur, 'v3') + conn.commit() + elif current == 'v1': + db_ensure_schema_v2(cur) + db_upgrade_data_v1_v2(cur) + db_set_schema_version(cur, 'v1-v2') + conn.commit() + elif current == 'v1-v2': + db_post_upgrade_v1_v2(cur) + db_set_schema_version(cur, 'v2') + conn.commit() + elif current == 'v2': + db_ensure_schema_v3(cur) + db_set_schema_version(cur, 'v3') + conn.commit() + + +def db_ensure_schema(conn: connection): + with conn.cursor() as cur: + cur.execute(''' + CREATE TABLE IF NOT EXISTS meta ( + key text PRIMARY KEY, + value text NOT NULL + ); + ''') + conn.commit() # apparently, DDL is transactional with Postgres, so be sure to relieve the journal + db_upgrade_schema(conn, cur) + # the following could at some point be more adequate than the call to db_upgrade_schema above + # -- namely, if the service is run as multiple processes and the upgrade must be done in advance --: + # current, expected = db_get_schema_version(cur), SCHEMA_VERSIONS[-1] + # if current != expected: + # raise SchemaVersionError(f"Database schema outdated! Expected {expected!r}, got {current!r}") + + +def db_update_account(cur: cursor, record: dict): + sanitized = sanitize_record(record, ACCOUNT_DEFAULTS) + cur.execute(''' + INSERT INTO account (subject, roles) + VALUES (%(subject)s, %(roles)s) + ON CONFLICT (subject) + DO UPDATE + SET roles = EXCLUDED.roles + RETURNING accountid;''', sanitized) + accountid, = cur.fetchone() + return accountid + + +def db_clear_delegates(cur: cursor, accountid): + cur.execute('''DELETE FROM delegation WHERE accountid = %s;''', (accountid, )) + + +def db_add_delegate(cur: cursor, accountid, delegate): + cur.execute(''' + INSERT INTO delegation (accountid, delegateid) + (SELECT %s, accountid + FROM account + WHERE subject = %s) + RETURNING accountid;''', (accountid, delegate)) + + +def db_find_subjects(cur: cursor, delegate): + cur.execute(''' + SELECT a.subject + FROM delegation + JOIN account a ON a.accountid = delegation.accountid + JOIN account b ON b.accountid = delegation.delegateid + WHERE b.subject = %s;''', (delegate, )) + return [row[0] for row in cur.fetchall()] + + +def db_update_apikey(cur: cursor, accountid, apikey_hash): + sanitized = dict(accountid=accountid, apikey_hash=apikey_hash) + cur.execute(''' + INSERT INTO apikey (apikeyhash, accountid) + VALUES (%(apikey_hash)s, %(accountid)s) + ON CONFLICT (accountid, apikeyhash) + DO UPDATE + SET apikeyhash = EXCLUDED.apikeyhash -- changes nothing, but necessary for RETURNING + RETURNING apikeyid;''', sanitized) + apikeyid, = cur.fetchone() + return apikeyid + + +def db_filter_apikeys(cur: cursor, accountid, predicate: callable): + cur.execute('SELECT apikeyid FROM apikey WHERE accountid = %s;', (accountid, )) + removeids = [row[0] for row in cur.fetchall() if not predicate(*row)] + while removeids: + cur.execute('DELETE FROM apikey WHERE apikeyid IN %s', (tuple(removeids[:10]), )) + del removeids[:10] + + +def db_update_publickey(cur: cursor, accountid, record: dict): + sanitized = sanitize_record(record, PUBLIC_KEY_DEFAULTS, accountid=accountid) + cur.execute(''' + INSERT INTO publickey (key, keytype, keyname, accountid) + VALUES (%(public_key)s, %(public_key_type)s, %(public_key_name)s, %(accountid)s) + ON CONFLICT (accountid, keyname) + DO UPDATE + SET key = EXCLUDED.key + , keytype = EXCLUDED.keytype + , keyname = EXCLUDED.keyname + RETURNING keyid;''', sanitized) + keyid, = cur.fetchone() + return keyid + + +def db_filter_publickeys(cur: cursor, accountid, predicate: callable): + cur.execute('SELECT keyid, keyname FROM publickey WHERE accountid = %s;', (accountid, )) + removeids = [row[0] for row in cur.fetchall() if not predicate(*row)] + while removeids: + cur.execute('DELETE FROM publickey WHERE keyid IN %s', (tuple(removeids[:10]), )) + del removeids[:10] + + +def db_get_report(cur: cursor, report_uuid): + cur.execute( + "SELECT data FROM report WHERE reportuuid = %(reportuuid)s;", + {"reportuuid": report_uuid}, + ) + return [row[0] for row in cur.fetchall()] + + +def db_get_reports(cur: cursor, subject, limit, skip): + cur.execute( + sql.SQL("SELECT data FROM report {} LIMIT %(limit)s OFFSET %(skip)s;") + .format(make_where_clause( + None if not subject else sql.SQL('subject = %(subject)s'), + )), + {"subject": subject, "limit": limit, "skip": skip}, + ) + return [row[0] for row in cur.fetchall()] + + +def db_insert_report(cur: cursor, uuid, checked_at, subject, json_text): + # this is an exception in that we don't use a record parameter (it's just not as practical here) + cur.execute(''' + INSERT INTO report (reportuuid, checked_at, subject, data) + VALUES (%s, %s, %s, %s) + RETURNING reportid;''', (uuid, checked_at, subject, json_text)) + reportid, = cur.fetchone() + return reportid + + +def db_insert_result2( + cur: cursor, checked_at, subject, scopeuuid, version, testcase, result, approval, reportid +): + # this is an exception in that we don't use a record parameter (it's just not as practical here) + cur.execute(''' + INSERT INTO result2 (checked_at, subject, scopeuuid, version, testcase, result, approval, reportid) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s) + RETURNING resultid;''', (checked_at, subject, scopeuuid, version, testcase, result, approval, reportid)) + resultid, = cur.fetchone() + return resultid + + +def db_get_relevant_results2( + cur: cursor, + subject=None, scopeuuid=None, version=None, approved_only=True, +): + """for each combination of scope/version/check, get the most recent test result that is still valid""" + # find the latest result per subject/scopeuuid/version/checkid for this subject + # DISTINCT ON is a Postgres-specific construct that comes in very handy here :) + cur.execute(sql.SQL(''' + SELECT DISTINCT ON (subject, scopeuuid, version, testcase) + result2.subject, scopeuuid, version, testcase, result, result2.checked_at, report.reportuuid + FROM result2 + JOIN report ON report.reportid = result2.reportid + {filter_condition} + ORDER BY subject, scopeuuid, version, testcase, checked_at DESC; + ''').format( + filter_condition=make_where_clause( + sql.SQL('approval') if approved_only else None, + None if scopeuuid is None else sql.SQL('scopeuuid = %(scopeuuid)s'), + None if version is None else sql.SQL('version = %(version)s'), + None if subject is None else sql.SQL('result2.subject = %(subject)s'), + ), + ), {"subject": subject, "scopeuuid": scopeuuid, "version": version}) + return cur.fetchall() + + +def db_get_recent_results2(cur: cursor, approved, limit, skip, max_age_days=None): + """list recent test results without grouping by scope/version/check""" + columns = ('reportuuid', 'subject', 'checked_at', 'scopeuuid', 'version', 'check', 'result', 'approval') + cur.execute(sql.SQL(''' + SELECT report.reportuuid, result2.subject, result2.checked_at, result2.scopeuuid, result2.version + , result2.testcase, result2.result, result2.approval + FROM result2 + NATURAL JOIN report + {where_clause} + ORDER BY checked_at + LIMIT %(limit)s OFFSET %(skip)s;''').format( + where_clause=make_where_clause( + None if max_age_days is None else sql.SQL( + f"checked_at > NOW() - interval '{max_age_days:d} days'" + ), + None if approved is None else sql.SQL('approval = %(approved)s'), + ), + ), {"limit": limit, "skip": skip, "approved": approved}) + return [{col: val for col, val in zip(columns, row)} for row in cur.fetchall()] + + +def db_patch_approval2(cur: cursor, record): + cur.execute(''' + UPDATE result2 + SET approval = %(approval)s + FROM report + WHERE report.reportuuid = %(reportuuid)s + AND result2.reportid = report.reportid + AND result2.scopeuuid = %(scopeuuid)s + AND result2.version = %(version)s + AND result2.testcase = %(check)s + RETURNING resultid;''', record) + resultid, = cur.fetchone() + return resultid diff --git a/compliance-monitor/templates/details.md.j2 b/compliance-monitor/templates/details.md.j2 new file mode 100644 index 000000000..30136b149 --- /dev/null +++ b/compliance-monitor/templates/details.md.j2 @@ -0,0 +1,39 @@ +{% for subject, subject_result in results.items() -%} +{# omit h1 title here because we can only have one of those, + and the html wrapper template will add one anyway -#} +{% for scopeuuid, scope_result in subject_result.items() -%} +## {{ scope_result.name }} + +- [spec overview]({{ scope_url(scopeuuid) }}) + +{% if not scope_result.relevant -%} + +No recent test results available. + +{% endif -%} +{% for version in scope_result.relevant -%} +{%- set version_result = scope_result.versions[version] -%} +### {{ version }} ({{ version_result.validity }}): {{ version_result.result | verdict }} +{% for target, target_result in version_result.targets.items() -%} +#### Target {{ target }}: {{ target_result.result | verdict }} + +| testcase id | result | description | +|---|---|---| +{% for testcase_id in target_result.testcases -%} +{% set testcase = version_result.testcases[testcase_id] -%} +{% set res = version_result.results[testcase_id] if testcase_id in version_result.results else dict(result=0) -%} +| {% if res.result != 1 %}⚠️ {% endif %}{{ testcase.id }} | +{#- #} {% if res.report -%} +[{{ res.result | verdict_check }}]({{ report_url(res.report, version, testcase_id) }}) +{%- else -%} +{{ res.result | verdict_check }} +{%- endif -%} +{% if res.report %} { title="{{ res.report }} ({{ res.checked_at }})" }{% endif %} {# -#} +| {{ testcase.description | trim }} | +{% endfor %} +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} + +{#{results}#} diff --git a/compliance-monitor/templates/overview.html.j2 b/compliance-monitor/templates/overview.html.j2 new file mode 100644 index 000000000..830b94121 --- /dev/null +++ b/compliance-monitor/templates/overview.html.j2 @@ -0,0 +1,18 @@ + + + + +{{ title or 'SCS compliance overview' }} + + + +{% if title %}

{{title}}

+{% endif %}{{fragment}} + diff --git a/compliance-monitor/templates/overview.md.j2 b/compliance-monitor/templates/overview.md.j2 new file mode 100644 index 000000000..77ba6bcc9 --- /dev/null +++ b/compliance-monitor/templates/overview.md.j2 @@ -0,0 +1,48 @@ +{# +we could of course iterate over results etc., but hardcode the table (except the actual results, of course) +for the time being to have the highest degree of control +-#} + +Version numbers are suffixed by a symbol depending on state: * for _draft_, † for _warn_ (soon to be deprecated), and †† for _deprecated_. + +{% set iaas = '50393e6f-2ae1-4c5c-a62c-3b75f2abef3f' -%} +| Name | Description | Operator | [SCS-compatible IaaS](https://docs.scs.community/standards/scs-compatible-iaas/) | HealthMon | +|-------|--------------|-----------|----------------------|:----------:| +| [gx-scs](https://github.com/SovereignCloudStack/docs/blob/main/community/cloud-resources/plusserver-gx-scs.md) | Dev environment provided for SCS & GAIA-X context | plusserver GmbH | +{#- #} [{{ results | pick('gx-scs', iaas) | summary }}]({{ detail_url('gx-scs', iaas) }}) {# -#} +| [HM](https://health.gx-scs.sovereignit.cloud:3000/) | +| [aov.cloud](https://www.aov.de/) | Community cloud for customers | aov IT.Services GmbH | +{#- #} [{{ results | pick('aov', iaas) | summary }}]({{ detail_url('aov', iaas) }}) {# -#} +| [HM](https://health.aov.cloud/) | +| [CNDS](https://cnds.io/) | Public cloud for customers | artcodix GmbH | +{#- #} [{{ results | pick('artcodix', iaas) | summary }}]({{ detail_url('artcodix', iaas) }}) {# -#} +| [HM](https://ohm.muc.cloud.cnds.io/) | +| [pluscloud open](https://www.plusserver.com/en/products/pluscloud-open)
(4 regions) | Public cloud for customers | plusserver GmbH | {# #} +{#- #}prod1: [{{ results | pick('pco-prod1', iaas) | summary }}]({{ detail_url('pco-prod1', iaas) }}){# -#} +
+{#- #}prod2: [{{ results | pick('pco-prod2', iaas) | summary }}]({{ detail_url('pco-prod2', iaas) }}){# -#} +
+{#- #}prod3: [{{ results | pick('pco-prod3', iaas) | summary }}]({{ detail_url('pco-prod3', iaas) }}){# -#} +
+{#- #}prod4: [{{ results | pick('pco-prod4', iaas) | summary }}]({{ detail_url('pco-prod4', iaas) }}) {# -#} +| [HM1](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-pco)
[HM2](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod2)
[HM3](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod3)
[HM4](https://health.prod1.plusserver.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?orgId=1&var-mycloud=plus-prod4) | +| PoC KDO | Cloud PoC for FITKO | KDO Service GmbH / OSISM GmbH | +{#- #} [{{ results | pick('poc-kdo', iaas) | summary }}]({{ detail_url('poc-kdo', iaas) }}) {# -#} +| (soon) | +| PoC WG-Cloud OSBA | Cloud PoC for FITKO | Cloud&Heat Technologies GmbH | +{#- #} [{{ results | pick('poc-wgcloud', iaas) | summary }}]({{ detail_url('poc-wgcloud', iaas) }}) {# -#} +| [HM](https://health.poc-wgcloud.osba.sovereignit.cloud:3000/d/9ltTEmlnk/openstack-health-monitor2?var-mycloud=poc-wgcloud&orgId=1) | +| [REGIO.cloud](https://regio.digital) | Public cloud for customers | OSISM GmbH | +{#- #} [{{ results | pick('regio-a', iaas) | summary }}]({{ detail_url('regio-a', iaas) }}) {# -#} +| [HM](https://apimon.services.regio.digital/public-dashboards/17cf094a47404398a5b8e35a4a3968d4?orgId=1&refresh=5m) | +| [ScaleUp Open Cloud](https://www.scaleuptech.com/cloud-hosting/) | Public cloud for customers | ScaleUp Technologies GmbH & Co. KG | +{#- #} [{{ results | pick('scaleup-occ2', iaas) | summary }}]({{ detail_url('scaleup-occ2', iaas) }}) {# -#} +| [HM](https://health.occ2.scaleup.sovereignit.cloud) | +| [syseleven](https://www.syseleven.de/en/products-services/openstack-cloud/)
(2 SCS regions) | Public OpenStack Cloud | SysEleven GmbH | {# #} +{#- #}dus2: [{{ results | pick('syseleven-dus2', iaas) | summary }}]({{ detail_url('syseleven-dus2', iaas) }}){# -#} +
+{#- #}ham1: [{{ results | pick('syseleven-ham1', iaas) | summary }}]({{ detail_url('syseleven-ham1', iaas) }}) {# -#} +| (soon)
(soon) | +| [Wavestack](https://www.noris.de/wavestack-cloud/) | Public cloud for customers | noris network AG/Wavecon GmbH | +{#- #} [{{ results | pick('wavestack', iaas) | summary }}]({{ detail_url('wavestack', iaas) }}) {# -#} +| [HM](https://health.wavestack1.sovereignit.cloud:3000/) | diff --git a/compliance-monitor/templates/report.md.j2 b/compliance-monitor/templates/report.md.j2 new file mode 100644 index 000000000..e46c2e086 --- /dev/null +++ b/compliance-monitor/templates/report.md.j2 @@ -0,0 +1,66 @@ +## General info + +- uuid: [{{ report.run.uuid }}]({{ report_url(report.run.uuid, download=True) }}) +- subject: {{ report.subject }} +- scope: [{{ report.spec.name }}]({{ scope_url(report.spec.uuid) }}) +- checked at: {{ report.checked_at }} + +## Results + +{% for version, version_results in report.versions.items() %}{% if version_results %} +### {{ version }} + +| test case | result | invocation | +|---|---|---| +{% for testcase_id, result_data in version_results.items() -%} +| {{ testcase_id }} {: #{{ version + '_' + testcase_id }} } | {{ result_data.result | verdict_check }} | [{{ result_data.invocation }}](#{{ result_data.invocation }}) | +{% endfor %} +{% endif %}{% endfor %} + +## Run + +### Variable assignment + +| key | value | +|---|---| +{% for key, value in report.run.assignment.items() -%} +| `{{ key }}` | `{{ value }}` | +{% endfor %} + +### Check tool invocations + +{% for invid, invdata in report.run.invocations.items() %} +#### Invocation {{invid}} {: #{{ invid }} } + +- cmd: `{{ invdata.cmd }}` +- rc: {{ invdata.rc }} +- channel summary +{%- for channel in ('critical', 'error', 'warning') %} +{%- if invdata[channel] %} + - **{{ channel }}: {{ invdata[channel] }}** +{%- else %} + - {{ channel }}: – +{%- endif %} +{%- endfor %} +- results +{%- for resultid, result in invdata.results.items() %} + - {{ resultid }}: {{ result | verdict_check }} +{%- endfor %} + +{% if invdata.stdout -%} +
Captured stdout +```text +{{ '\n'.join(invdata.stdout) }} +``` +
+{%- endif %} + +{% if invdata.stderr -%} +
Captured stderr +{%- for line in invdata.stderr %} +
{% if line.split(':', 1)[0].lower() in ('warning', 'error', 'critical') %}{{ '' + line + '' }}{% else %}{{ line }}{% endif %}
+{%- endfor %} +
+{%- endif %} + +{% endfor %} diff --git a/compliance-monitor/templates/scope.md.j2 b/compliance-monitor/templates/scope.md.j2 new file mode 100644 index 000000000..7c46abce6 --- /dev/null +++ b/compliance-monitor/templates/scope.md.j2 @@ -0,0 +1,26 @@ +| Scope versions -> | {% for name in relevant %} {{name}} |{% endfor %} +| :----------------- | {% for name in relevant %} :-- |{% endfor %} +| State | {% for name in relevant %} {{spec.versions[name].validity | capitalize}} |{% endfor %} +| Stabilized at | {% for name in relevant %} {{spec.versions[name].stabilized_at}} |{% endfor %} +| **Modules** | {% for name in relevant %} |{% endfor %} +{% for row in rows -%} +| [{% if row.module.id.startswith('scs-') %}{{row.module.id}}: {% endif %}{{row.module.name}}]({{row.module.url}}) |{% +for name in relevant + %} {% set column = row.columns[name] %}{% + if column + %}X{% + if column.parameters + %} ({% + for key, value in column.parameters.items() + %}{% + if value.startswith("https://") + %}[{{key}}]({{value}}){% + else + %}{{key}}={{value}}{% + endif %}{{ ", " if not loop.last }}){% + endfor %}{% + endif %}{% + endif %} |{% +endfor +%} +{% endfor %} diff --git a/package-lock.json b/package-lock.json index f6e8dbaf7..1fcf557e6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -46,27 +46,15 @@ "node": ">= 8" } }, - "node_modules/aggregate-error": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz", - "integrity": "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==", - "dependencies": { - "clean-stack": "^2.0.0", - "indent-string": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/ansi-escapes": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", - "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-5.0.0.tgz", + "integrity": "sha512-5GFMVX8HqE/TB+FuBJGuO5XG0WrsA6ptUqoODaT/n9mmUaZFkqnBueB4leqGBCmrUHnCnC4PCZTCd0E7QQ83bA==", "dependencies": { - "type-fest": "^0.21.3" + "type-fest": "^1.0.2" }, "engines": { - "node": ">=8" + "node": ">=12" }, "funding": { "url": "https://github.com/sponsors/sindresorhus" @@ -99,29 +87,21 @@ "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, - "node_modules/astral-regex": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", - "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", - "engines": { - "node": ">=8" - } - }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" } }, "node_modules/chalk": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.2.0.tgz", - "integrity": "sha512-ree3Gqw/nazQAPuJJEy+avdl7QfZMcUvmHIKgEZkGL+xOBzRvup5Hxo6LHuMceSxOabuJLJm5Yp/92R9eMmMvA==", + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", + "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", "engines": { "node": "^12.17.0 || ^14.13 || >=16.0.0" }, @@ -129,23 +109,18 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, - "node_modules/clean-stack": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz", - "integrity": "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==", - "engines": { - "node": ">=6" - } - }, "node_modules/cli-cursor": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-3.1.0.tgz", - "integrity": "sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-4.0.0.tgz", + "integrity": "sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==", "dependencies": { - "restore-cursor": "^3.1.0" + "restore-cursor": "^4.0.0" }, "engines": { - "node": ">=8" + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/cli-truncate": { @@ -163,39 +138,23 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" - }, "node_modules/colorette": { - "version": "2.0.19", - "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.19.tgz", - "integrity": "sha512-3tlv/dIP7FWvj3BsbHrGLJ6l/oKh1O3TcgBqMn+yyCagOxc23fyzDS6HypQbgxWbkpDnf52p1LuR4eWDQ/K9WQ==" + "version": "2.0.20", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", + "integrity": "sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==" }, "node_modules/commander": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.0.tgz", - "integrity": "sha512-zS5PnTI22FIRM6ylNW8G4Ap0IEOyk62fhLSD0+uHRT9McRCLGpkVNvao4bjimpK/GShynyQkFFxHhwMcETmduA==", + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-11.0.0.tgz", + "integrity": "sha512-9HMlXtt/BNoYr8ooyjjNRdIilOTkVJXB+GhxMTtOKwk0R4j4lS4NpjuqmRxroBfnfTSHQIHQB7wryHhXarNjmQ==", "engines": { - "node": ">=14" + "node": ">=16" } }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -253,10 +212,15 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/eventemitter3": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.1.tgz", + "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==" + }, "node_modules/execa": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/execa/-/execa-7.1.1.tgz", - "integrity": "sha512-wH0eMf/UXckdUYnO21+HDztteVv05rq2GXksxT4fCGeHkBhw1DROXh40wcjMcRqDOWE7iPJ4n3M7e2+YFP+76Q==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/execa/-/execa-7.2.0.tgz", + "integrity": "sha512-UduyVP7TLB5IcAQl+OzLyLcS/l32W/GLg+AhHJ+ow40FOk2U3SAllPwR44v4vmdFwIWqpdwxxpQbF1n5ta9seA==", "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^6.0.1", @@ -276,9 +240,9 @@ } }, "node_modules/fast-glob": { - "version": "3.2.12", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.12.tgz", - "integrity": "sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w==", + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", @@ -291,17 +255,17 @@ } }, "node_modules/fastq": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz", - "integrity": "sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==", + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", "dependencies": { "reusify": "^1.0.4" } }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dependencies": { "to-regex-range": "^5.0.1" }, @@ -372,21 +336,13 @@ } }, "node_modules/ignore": { - "version": "5.2.4", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", - "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==", + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.1.tgz", + "integrity": "sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==", "engines": { "node": ">= 4" } }, - "node_modules/indent-string": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", - "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", - "engines": { - "node": ">=8" - } - }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -458,50 +414,45 @@ } }, "node_modules/lint-staged": { - "version": "13.2.0", - "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-13.2.0.tgz", - "integrity": "sha512-GbyK5iWinax5Dfw5obm2g2ccUiZXNGtAS4mCbJ0Lv4rq6iEtfBSjOYdcbOtAIFtM114t0vdpViDDetjVTSd8Vw==", - "dependencies": { - "chalk": "5.2.0", - "cli-truncate": "^3.1.0", - "commander": "^10.0.0", - "debug": "^4.3.4", - "execa": "^7.0.0", + "version": "13.3.0", + "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-13.3.0.tgz", + "integrity": "sha512-mPRtrYnipYYv1FEE134ufbWpeggNTo+O/UPzngoaKzbzHAthvR55am+8GfHTnqNRQVRRrYQLGW9ZyUoD7DsBHQ==", + "dependencies": { + "chalk": "5.3.0", + "commander": "11.0.0", + "debug": "4.3.4", + "execa": "7.2.0", "lilconfig": "2.1.0", - "listr2": "^5.0.7", - "micromatch": "^4.0.5", - "normalize-path": "^3.0.0", - "object-inspect": "^1.12.3", - "pidtree": "^0.6.0", - "string-argv": "^0.3.1", - "yaml": "^2.2.1" + "listr2": "6.6.1", + "micromatch": "4.0.5", + "pidtree": "0.6.0", + "string-argv": "0.3.2", + "yaml": "2.3.1" }, "bin": { "lint-staged": "bin/lint-staged.js" }, "engines": { - "node": "^14.13.1 || >=16.0.0" + "node": "^16.14.0 || >=18.0.0" }, "funding": { "url": "https://opencollective.com/lint-staged" } }, "node_modules/listr2": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/listr2/-/listr2-5.0.8.tgz", - "integrity": "sha512-mC73LitKHj9w6v30nLNGPetZIlfpUniNSsxxrbaPcWOjDb92SHPzJPi/t+v1YC/lxKz/AJ9egOjww0qUuFxBpA==", + "version": "6.6.1", + "resolved": "https://registry.npmjs.org/listr2/-/listr2-6.6.1.tgz", + "integrity": "sha512-+rAXGHh0fkEWdXBmX+L6mmfmXmXvDGEKzkjxO+8mP3+nI/r/CWznVBvsibXdxda9Zz0OW2e2ikphN3OwCT/jSg==", "dependencies": { - "cli-truncate": "^2.1.0", - "colorette": "^2.0.19", - "log-update": "^4.0.0", - "p-map": "^4.0.0", + "cli-truncate": "^3.1.0", + "colorette": "^2.0.20", + "eventemitter3": "^5.0.1", + "log-update": "^5.0.1", "rfdc": "^1.3.0", - "rxjs": "^7.8.0", - "through": "^2.3.8", - "wrap-ansi": "^7.0.0" + "wrap-ansi": "^8.1.0" }, "engines": { - "node": "^14.13.1 || >=16.0.0" + "node": ">=16.0.0" }, "peerDependencies": { "enquirer": ">= 2.3.0 < 3" @@ -512,196 +463,22 @@ } } }, - "node_modules/listr2/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "engines": { - "node": ">=8" - } - }, - "node_modules/listr2/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/listr2/node_modules/cli-truncate": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-2.1.0.tgz", - "integrity": "sha512-n8fOixwDD6b/ObinzTrp1ZKFzbgvKZvuz/TvejnLn1aQfC6r52XEx85FmuC+3HI+JM7coBRXUvNqEU2PHVrHpg==", - "dependencies": { - "slice-ansi": "^3.0.0", - "string-width": "^4.2.0" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/listr2/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "node_modules/listr2/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "engines": { - "node": ">=8" - } - }, - "node_modules/listr2/node_modules/slice-ansi": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-3.0.0.tgz", - "integrity": "sha512-pSyv7bSTC7ig9Dcgbw9AuRNUb5k5V6oDudjZoMBSr13qpLBG7tB+zgCkARjq7xIUgdz5P1Qe8u+rSGdouOOIyQ==", - "dependencies": { - "ansi-styles": "^4.0.0", - "astral-regex": "^2.0.0", - "is-fullwidth-code-point": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/listr2/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/listr2/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/log-update": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/log-update/-/log-update-4.0.0.tgz", - "integrity": "sha512-9fkkDevMefjg0mmzWFBW8YkFP91OrizzkW3diF7CpG+S2EYdy4+TVfGwz1zeF8x7hCx1ovSPTOE9Ngib74qqUg==", - "dependencies": { - "ansi-escapes": "^4.3.0", - "cli-cursor": "^3.1.0", - "slice-ansi": "^4.0.0", - "wrap-ansi": "^6.2.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/log-update/node_modules/ansi-regex": { "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "engines": { - "node": ">=8" - } - }, - "node_modules/log-update/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "resolved": "https://registry.npmjs.org/log-update/-/log-update-5.0.1.tgz", + "integrity": "sha512-5UtUDQ/6edw4ofyljDNcOVJQ4c7OjDro4h3y8e1GQL5iYElYclVHJ3zeWchylvMaKnDbDilC8irOVyexnA/Slw==", "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/log-update/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "node_modules/log-update/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "engines": { - "node": ">=8" - } - }, - "node_modules/log-update/node_modules/slice-ansi": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", - "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", - "dependencies": { - "ansi-styles": "^4.0.0", - "astral-regex": "^2.0.0", - "is-fullwidth-code-point": "^3.0.0" + "ansi-escapes": "^5.0.0", + "cli-cursor": "^4.0.0", + "slice-ansi": "^5.0.0", + "strip-ansi": "^7.0.1", + "wrap-ansi": "^8.0.1" }, "engines": { - "node": ">=10" + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" }, "funding": { - "url": "https://github.com/chalk/slice-ansi?sponsor=1" - } - }, - "node_modules/log-update/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/log-update/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/log-update/node_modules/wrap-ansi": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz", - "integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=8" + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/markdown-it": { @@ -760,6 +537,14 @@ "markdownlint-cli2": ">=0.0.4" } }, + "node_modules/markdownlint-cli2/node_modules/yaml": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", + "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", + "engines": { + "node": ">= 14" + } + }, "node_modules/markdownlint-micromark": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/markdownlint-micromark/-/markdownlint-micromark-0.1.2.tgz", @@ -769,22 +554,25 @@ } }, "node_modules/markdownlint-rule-helpers": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/markdownlint-rule-helpers/-/markdownlint-rule-helpers-0.18.0.tgz", - "integrity": "sha512-UEdWfsoLr8ylXxfh4fzY5P6lExN+7Un7LbfqDXPlq5VLwwEDFdcZ7EMXoaEKNzncBKG/KWrt2sVt7KiCJgPyMQ==", + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/markdownlint-rule-helpers/-/markdownlint-rule-helpers-0.21.0.tgz", + "integrity": "sha512-27WM6H76t79EZjEl3jSabV0ZzXsC5QaSslI/5N1XuXV0mJRA6i3BPMGFrtZUbhlCNgtY6oC9h5JhtpDMv95tKg==", + "dependencies": { + "markdownlint-micromark": "0.1.2" + }, "engines": { - "node": ">=14.18.0" + "node": ">=16" } }, "node_modules/markdownlint-rule-search-replace": { - "version": "1.0.9", - "resolved": "https://registry.npmjs.org/markdownlint-rule-search-replace/-/markdownlint-rule-search-replace-1.0.9.tgz", - "integrity": "sha512-Qrd+wPvCoED1XFR1uL9PdA8ktOGDtCFHPtw7IVzh2TQbLRRHa2MY/moO9jpLj2maexVevIYH9DS0NLWdU3W1Cg==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/markdownlint-rule-search-replace/-/markdownlint-rule-search-replace-1.2.0.tgz", + "integrity": "sha512-l2eeVjb0ijxO+dO1ZrODcht+qnJ0VuiAAdBx1J8oa2kAugXl3NhxAGjfNuTfEJae5OQbdSGT+NjMczyzBXvWMA==", "dependencies": { - "markdownlint-rule-helpers": "~0.18.0" + "markdownlint-rule-helpers": "0.21.0" }, "engines": { - "node": ">=14" + "node": ">=16" } }, "node_modules/mdurl": { @@ -833,18 +621,10 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, - "node_modules/normalize-path": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", - "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/npm-run-path": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-5.1.0.tgz", - "integrity": "sha512-sJOdmRGrY2sjNTRMbSvluQqg+8X7ZK61yvzBEIDhz4f8z1TZFYABsqjjCBd/0PUNE9M6QDgHJXQkGUEm7Q+l9Q==", + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-5.3.0.tgz", + "integrity": "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ==", "dependencies": { "path-key": "^4.0.0" }, @@ -866,14 +646,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/object-inspect": { - "version": "1.12.3", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz", - "integrity": "sha512-geUvdk7c+eizMNUDkRpW1wJwgfOiOeHbxBR/hLXK1aT6zmVSO0jsQcs7fj6MGw89jC/cjGfLcNOrtMYtGqm81g==", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/onetime": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/onetime/-/onetime-6.0.0.tgz", @@ -888,20 +660,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/p-map": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz", - "integrity": "sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==", - "dependencies": { - "aggregate-error": "^3.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/path-key": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", @@ -941,9 +699,9 @@ } }, "node_modules/prettier": { - "version": "2.8.5", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.8.5.tgz", - "integrity": "sha512-3gzuxrHbKUePRBB4ZeU08VNkUcqEHaUaouNt0m7LGP4Hti/NuB07C7PPTM/LkWqXoJYJn2McEo5+kxPNrtQkLQ==", + "version": "2.8.8", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.8.8.tgz", + "integrity": "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q==", "bin": { "prettier": "bin-prettier.js" }, @@ -974,15 +732,18 @@ ] }, "node_modules/restore-cursor": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-3.1.0.tgz", - "integrity": "sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-4.0.0.tgz", + "integrity": "sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==", "dependencies": { "onetime": "^5.1.0", "signal-exit": "^3.0.2" }, "engines": { - "node": ">=8" + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/restore-cursor/node_modules/mimic-fn": { @@ -1017,9 +778,9 @@ } }, "node_modules/rfdc": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.3.0.tgz", - "integrity": "sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==" + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.3.1.tgz", + "integrity": "sha512-r5a3l5HzYlIC68TpmYKlxWjmOP6wiPJ1vWv2HeLhNsRZMrCkxeqxiHlQ21oXmQ4F3SiryXBHhAD7JZqvOJjFmg==" }, "node_modules/run-parallel": { "version": "1.2.0", @@ -1043,14 +804,6 @@ "queue-microtask": "^1.2.2" } }, - "node_modules/rxjs": { - "version": "7.8.0", - "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.0.tgz", - "integrity": "sha512-F2+gxDshqmIub1KdvZkaEfGDwLNpPvk9Fs6LD/MyQxNgMds/WH9OdDDXOmxUZpME+iSK3rQCctkL0DYyytUqMg==", - "dependencies": { - "tslib": "^2.1.0" - } - }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -1102,9 +855,9 @@ } }, "node_modules/string-argv": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.1.tgz", - "integrity": "sha512-a1uQGz7IyVy9YwhqjZIZu1c8JO8dNIe20xBmSS6qu9kv++k3JGzCVmprbNN5Kn+BgzD5E7YYwg1CcjuJMRNsvg==", + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.2.tgz", + "integrity": "sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q==", "engines": { "node": ">=0.6.19" } @@ -1126,9 +879,9 @@ } }, "node_modules/strip-ansi": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.0.1.tgz", - "integrity": "sha512-cXNxvT8dFNRVfhVME3JAe98mkXDYN2O1l7jmcwMnOslDeESg1rF/OZMtK0nRAhiari1unG5cD4jG3rapUAkLbw==", + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", "dependencies": { "ansi-regex": "^6.0.1" }, @@ -1161,11 +914,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/through": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", - "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" - }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -1177,15 +925,10 @@ "node": ">=8.0" } }, - "node_modules/tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" - }, "node_modules/type-fest": { - "version": "0.21.3", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", - "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-1.4.0.tgz", + "integrity": "sha512-yGSza74xk0UG8k+pLh5oeoYirvIiWo5t0/o3zHHAO2tRDiZcxWP7fywNlXhqb6/r6sWvwi+RsyQMWhVLe4BVuA==", "engines": { "node": ">=10" }, @@ -1213,84 +956,25 @@ } }, "node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" }, "engines": { - "node": ">=10" + "node": ">=12" }, "funding": { "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, - "node_modules/wrap-ansi/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrap-ansi/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "node_modules/wrap-ansi/node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, "node_modules/yaml": { - "version": "2.2.2", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", - "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz", + "integrity": "sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==", "engines": { "node": ">= 14" } diff --git a/playbooks/adr_syntax.yaml b/playbooks/adr_syntax.yaml index a816fcab0..3d10a0292 100644 --- a/playbooks/adr_syntax.yaml +++ b/playbooks/adr_syntax.yaml @@ -3,15 +3,17 @@ hosts: all tasks: - name: Run ADR syntax check script - ansible.builtin.shell: | - python3 ~/Tests/chk_adrs.py ~/Standards + ansible.builtin.command: + cmd: python3 Tests/chk_adrs.py Standards + chdir: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}" register: result changed_when: true failed_when: result.rc != 0 - name: Run test script consistency check script - ansible.builtin.shell: | - python3 ~/Tests/iaas/flavor-naming/check_yaml.py ~/Tests/iaas + ansible.builtin.shell: + cmd: python3 Tests/iaas/flavor-naming/check_yaml.py Tests/iaas + chdir: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}" register: result changed_when: true failed_when: result.rc != 0 diff --git a/playbooks/clouds.yaml.j2 b/playbooks/clouds.yaml.j2 index 78b329d19..2df1cdbd8 100644 --- a/playbooks/clouds.yaml.j2 +++ b/playbooks/clouds.yaml.j2 @@ -9,6 +9,16 @@ clouds: interface: "public" identity_api_version: 3 auth_type: "v3applicationcredential" + artcodix: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + #region_name: "MUC" + auth: + auth_url: https://api.dc1.muc.cloud.cnds.io:5000/ + application_credential_id: "{{ clouds_conf.cnds_ac_id }}" + application_credential_secret: "{{ clouds_conf.cnds_ac_secret }}" + #project_id: 225a7363dab74b69aa1e3f744aced109 pco-prod1: region_name: "prod1" interface: "public" @@ -36,6 +46,34 @@ clouds: application_credential_id: "{{ clouds_conf.pco_prod3_ac_id }}" application_credential_secret: "{{ clouds_conf.pco_prod3_ac_secret }}" auth_type: "v3applicationcredential" + pco-prod4: + region_name: "prod4" + interface: "public" + identity_api_version: 3 + auth: + auth_url: https://prod4.api.pco.get-cloud.io:5000 + application_credential_id: "{{ clouds_conf.pco_prod4_ac_id }}" + application_credential_secret: "{{ clouds_conf.pco_prod4_ac_secret }}" + auth_type: "v3applicationcredential" + poc-kdo: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + auth: + auth_url: https://keystone.services.poc-kdo.fitko.sovereignit.cloud + application_credential_id: "{{ clouds_conf.poc_kdo_ac_id }}" + application_credential_secret: "{{ clouds_conf.poc_kdo_ac_secret }}" + region_name: "RegionOne" + poc-wgcloud: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + #region_name: default + auth: + auth_url: https://identity.l1a.cloudandheat.com/v3 + application_credential_id: "{{ clouds_conf.poc_wgcloud_ac_id }}" + application_credential_secret: "{{ clouds_conf.poc_wgcloud_ac_secret }}" + #project_id: 9adb8fc81ba345178654cee5cb7f1464 regio-a: region_name: "RegionA" interface: "public" @@ -45,6 +83,34 @@ clouds: application_credential_id: "{{ clouds_conf.regio_a_ac_id }}" application_credential_secret: "{{ clouds_conf.regio_a_ac_secret }}" auth_type: "v3applicationcredential" + scaleup-occ2: + auth_type: v3applicationcredential + auth: + auth_url: https://keystone.occ2.scaleup.cloud + application_credential_id: "{{ clouds_conf.scaleup_occ2_ac_id }}" + application_credential_secret: "{{ clouds_conf.scaleup_occ2_ac_secret }}" + region_name: "RegionOne" + interface: "public" + identity_api_version: 3 + syseleven-dus2: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + region_name: dus2 + auth: + auth_url: https://keystone.cloud.syseleven.net:5000/v3 + application_credential_id: "{{ clouds_conf.syseleven_dus2_ac_id }}" + application_credential_secret: "{{ clouds_conf.syseleven_dus2_ac_secret }}" + syseleven-ham1: + interface: public + identity_api_verion: 3 + auth_type: "v3applicationcredential" + region_name: ham1 + auth: + auth_url: https://keystone.cloud.syseleven.net:5000/v3 + application_credential_id: s11auth + application_credential_id: "{{ clouds_conf.syseleven_ham1_ac_id }}" + application_credential_secret: "{{ clouds_conf.syseleven_ham1_ac_secret }}" wavestack: interface: "public" identity_api_version: 3 diff --git a/playbooks/compliance_check.yaml b/playbooks/compliance_check.yaml index 203028b55..d74d6e4ef 100644 --- a/playbooks/compliance_check.yaml +++ b/playbooks/compliance_check.yaml @@ -3,30 +3,20 @@ hosts: all tasks: - name: Run compliance script - ansible.builtin.shell: | - python3 ~/Tests/scs-compliance-check.py ~/Tests/scs-compatible-iaas.yaml -s {{ cloud }} -a os_cloud={{ cloud }} -o {{ cloud }}-iaas.yaml -C - register: result + # write report.yaml into the proper directory so it will be transferred back by base job + # -- this then works regardless of VM/pod + ansible.builtin.command: + cmd: > + python3 Tests/scs-test-runner.py --config Tests/config.toml --debug + run --preset {{ preset }} + --output "{{ ansible_user_dir }}/zuul-output/artifacts/report.yaml" + chdir: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}" changed_when: true - # failed_when: result.rc != 0 - # ^^^ this task does not fail; the only failure would be if the yaml file didn't get produced, - # but then the task "Copy result YAML" would fail anyway - - - ansible.builtin.debug: - msg: "{{ result.stdout }} {{ result.stderr }}" - - - name: Copy result YAML - ansible.builtin.synchronize: - dest: "{{ zuul.executor.log_root }}/{{ cloud }}-iaas.yaml" - mode: pull - src: "{{ cloud }}-iaas.yaml" - verify_host: true - owner: no - group: no - name: Return artifact URL zuul_return: data: zuul: artifacts: - - name: "{{ cloud }}-iaas.yaml" - url: "{{ cloud }}-iaas.yaml" + - name: "report.yaml" + url: "artifacts/report.yaml" diff --git a/playbooks/pre.yaml b/playbooks/pre.yaml index a47102f13..66e81f356 100644 --- a/playbooks/pre.yaml +++ b/playbooks/pre.yaml @@ -4,20 +4,7 @@ roles: - role: ensure-pip # https://zuul-ci.org/docs/zuul-jobs/latest/python-roles.html#role-ensure-pip tasks: - - name: Copy ADRs on the node - ansible.builtin.copy: - src: "../Standards" - dest: "~/" - mode: 0500 - no_log: false - - - name: Copy Tests on the node - ansible.builtin.copy: - src: "../Tests" - dest: "~/" - mode: 0500 - no_log: false - - name: Install dependencies ansible.builtin.pip: - requirements: ~/Tests/requirements.txt + chdir: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}" + requirements: "Tests/requirements.txt" diff --git a/playbooks/pre_cloud.yaml b/playbooks/pre_cloud.yaml index d163674dc..9a59fa410 100644 --- a/playbooks/pre_cloud.yaml +++ b/playbooks/pre_cloud.yaml @@ -2,7 +2,7 @@ - name: Prepare cloud config and ensure clean env hosts: all roles: - - role: ensure-pip # https://zuul-ci.org/docs/zuul-jobs/latest/python-roles.html#role-ensure-pip + - role: bindep # https://zuul-ci.org/docs/zuul-jobs/latest/general-roles.html#role-bindep tasks: - name: Create cloud config dir ansible.builtin.file: @@ -18,6 +18,30 @@ mode: "0600" no_log: true + - name: Create secrets dir + ansible.builtin.file: + path: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}/Tests/.secret" + state: directory + recurse: true + mode: "0700" + + - name: Create signing key file + ansible.builtin.copy: + # the secrets are usually stripped of whitespace, but the final newline is essential here + content: "{{ clouds_conf.zuul_ci_signing_key + '\n' }}" + dest: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}/Tests/.secret/keyfile" + mode: "0600" + no_log: true + + - name: Create basic_auth token file + ansible.builtin.copy: + content: "{{ clouds_conf.zuul_ci_basic_auth }}" + dest: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}/Tests/.secret/tokenfile" + mode: "0600" + no_log: true + - name: Clean up any lingering resources from previous run - ansible.builtin.shell: python3 ~/Tests/cleanup.py -c {{ cloud }} --prefix _scs- + ansible.builtin.command: + cmd: python3 Tests/scs-test-runner.py --config Tests/config.toml --debug cleanup --preset {{ preset }} + chdir: "{{ ansible_user_dir }}/{{ zuul.project.src_dir }}" changed_when: true