From 0c2460297266494773bd164bd5043b5f6f11f1b4 Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Fri, 13 Sep 2024 14:44:31 +0200 Subject: [PATCH] WIP: ZDU finalize Collection Version sha256 [noissue] --- CHANGES/+sha256_uniqueness.removal | 7 +++ .../datarepair-ansible-collection-sha256.py | 56 ------------------- .../0058_collectionversion_unique_sha256.py | 26 +++++++++ pulp_ansible/app/models.py | 4 +- pulp_ansible/app/serializers.py | 26 ++------- .../test_crud_collection_versions.py | 32 +++++++++++ ..._0057_collection_version_sha256_migrate.py | 6 ++ 7 files changed, 78 insertions(+), 79 deletions(-) create mode 100644 CHANGES/+sha256_uniqueness.removal delete mode 100644 pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py create mode 100644 pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py diff --git a/CHANGES/+sha256_uniqueness.removal b/CHANGES/+sha256_uniqueness.removal new file mode 100644 index 000000000..2d10a4d3b --- /dev/null +++ b/CHANGES/+sha256_uniqueness.removal @@ -0,0 +1,7 @@ +Added the final migration to make the sha256 of the collection version artifact the uniqueness +constraint. This allows users to serve their own interpretation of the content in their private +repositories. +The migration will only succeed if all the content has been adjusted. To account for content that +was not migrated by the migration shipped with 0.22.0, you can run the content repair command +``datarepair-ansible-collection-sha256`` prior to upgrading. +This version removed the content repair command. diff --git a/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py b/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py deleted file mode 100644 index 6ec68208a..000000000 --- a/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py +++ /dev/null @@ -1,56 +0,0 @@ -from gettext import gettext as _ - -from django.core.management import BaseCommand -from django.db import transaction - -from pulp_ansible.app.models import CollectionVersion - - -class Command(BaseCommand): - """ - Django management command to repair ansible collection versions without sha256. - """ - - help = ( - "This script repairs ansible collection versions without sha256 if artifacts are available." - ) - - def add_arguments(self, parser): - """Set up arguments.""" - parser.add_argument( - "--dry-run", - action="store_true", - help=_("Don't modify anything, just collect results."), - ) - - def handle(self, *args, **options): - dry_run = options["dry_run"] - failed_units = 0 - repaired_units = 0 - - unit_qs = CollectionVersion.objects.filter(sha256__isnull=True) - count = unit_qs.count() - print(f"CollectionVersions to repair: {count}") - if count == 0: - return - - for unit in unit_qs.iterator(): - try: - content_artifact = unit.contentartifact_set.get() - artifact = content_artifact.artifact - unit.sha256 = artifact.sha256 - - if not dry_run: - with transaction.atomic(): - unit.save(update_fields=["sha256"]) - except Exception as e: - failed_units += 1 - print( - f"Failed to migrate collection version '{unit.namespace}.{unit.name}' " - f"'{unit.version}': {e}" - ) - else: - repaired_units += 1 - - print(f"Successfully repaired collection versions: {repaired_units}") - print(f"Collection versions failed to repair: {failed_units}") diff --git a/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py b/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py new file mode 100644 index 000000000..c638a3fbd --- /dev/null +++ b/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py @@ -0,0 +1,26 @@ +# Generated by Django 3.2.14 on 2022-07-21 23:05 + +from django.db import migrations, models +from pulpcore.plugin.migrations import RequireVersion + + +class Migration(migrations.Migration): + + dependencies = [ + ('ansible', '0057_collectionversion_sha256_migrate'), + ('core', '0110_apiappstatus'), + ] + + operations = [ + # TODO adjust this version!!! + RequireVersion("ansible", "0.22.0"), + migrations.AlterField( + model_name='collectionversion', + name='sha256', + field=models.CharField(db_index=True, max_length=64, null=False), + ), + migrations.AlterUniqueTogether( + name='collectionversion', + unique_together={('sha256',)}, + ), + ] diff --git a/pulp_ansible/app/models.py b/pulp_ansible/app/models.py index 582ff244f..3274d00c4 100644 --- a/pulp_ansible/app/models.py +++ b/pulp_ansible/app/models.py @@ -180,7 +180,7 @@ class CollectionVersion(Content): namespace = models.CharField(max_length=64, editable=False) repository = models.CharField(default="", blank=True, max_length=2000, editable=False) requires_ansible = models.CharField(null=True, max_length=255) - sha256 = models.CharField(max_length=64, null=True, blank=False) + sha256 = models.CharField(max_length=64, db_index=True, null=False, blank=False) version = models.CharField(max_length=128, db_collation="pulp_ansible_semver") version_major = models.IntegerField() @@ -231,7 +231,7 @@ def __str__(self): class Meta: default_related_name = "%(app_label)s_%(model_name)s" - unique_together = (("namespace", "name", "version"), ("sha256",)) + unique_together = ("sha256",) constraints = [ UniqueConstraint( fields=("collection", "is_highest"), diff --git a/pulp_ansible/app/serializers.py b/pulp_ansible/app/serializers.py index f0a26157c..2c0085c58 100644 --- a/pulp_ansible/app/serializers.py +++ b/pulp_ansible/app/serializers.py @@ -504,10 +504,12 @@ def deferred_validate(self, data): # Call super to ensure that data contains artifact data = super().deferred_validate(data) artifact = data.get("artifact") + + # Some randomly failing tests suggest that this may not be true... + assert not artifact._state.adding + if (sha256 := data.get("sha256")) and sha256 != artifact.sha256: raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256")) - else: - data["sha256"] = artifact.sha256 collection_info = process_collection_artifact( artifact=artifact, @@ -518,30 +520,12 @@ def deferred_validate(self, data): # repository field clashes collection_info["origin_repository"] = collection_info.pop("repository", None) data.update(collection_info) - # `retrieve` needs artifact, but it won't be in validated_data because of `get_artifacts` - self.context["artifact"] = artifact return data def retrieve(self, validated_data): """Reuse existing CollectionVersion if provided artifact matches.""" - namespace = validated_data["namespace"] - name = validated_data["name"] - version = validated_data["version"] - artifact = self.context["artifact"] - # TODO switch this check to use digest when ColVersion uniqueness constraint is changed - col = CollectionVersion.objects.filter( - namespace=namespace, name=name, version=version - ).first() - if col: - if col._artifacts.get() != artifact: - raise ValidationError( - _("Collection {}.{}-{} already exists with a different artifact").format( - namespace, name, version - ) - ) - - return col + return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first() def create(self, validated_data): """Final step in creating the CollectionVersion.""" diff --git a/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py b/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py index 898a21524..e6b9f2219 100644 --- a/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py +++ b/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py @@ -1,5 +1,8 @@ """Tests related to sync ansible plugin collection content type.""" +import hashlib +from pathlib import Path + import pytest from orionutils.generator import randstr @@ -100,3 +103,32 @@ def test_content_unit_lifecycle(ansible_bindings, build_and_upload_collection, m ansible_bindings.ContentCollectionVersionsApi.create(file=collection_artifact.filename).task ) assert content_unit_href in create_task.created_resources + + +@pytest.mark.parallel +def test_duplicate_collection_key( + ansible_bindings, + ansible_repo_factory, + build_and_upload_collection, + monitor_task, +): + """Create two content units with the same name but different artifacts.""" + repository1 = ansible_repo_factory() + repository2 = ansible_repo_factory() + + attrs = {"namespace": randstr(), "name": "squeezer", "version": "0.0.9"} + collection_artifact1, content_unit_href1 = build_and_upload_collection( + repository1, config=attrs + ) + collection_artifact2, content_unit_href2 = build_and_upload_collection( + repository2, config=attrs + ) + + sha256_1 = hashlib.sha256(Path(collection_artifact1.filename).read_bytes()).hexdigest() + sha256_2 = hashlib.sha256(Path(collection_artifact2.filename).read_bytes()).hexdigest() + assert sha256_1 != sha256_2 + + content_unit_1 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href1) + assert content_unit_1.sha256 == sha256_1 + content_unit_2 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href2) + assert content_unit_2.sha256 == sha256_2 diff --git a/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py b/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py index c81f89afe..f1f1419fa 100644 --- a/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py +++ b/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py @@ -37,3 +37,9 @@ def test_collection_version_sha256_migrate(migrate): cv = CollectionVersion.objects.get(pk=cv.pk) assert cv.sha256 == "SENTINEL" + + apps = migrate([("ansible", "0058_collectionversion_unique_sha256")]) + CollectionVersion = apps.get_model("ansible", "CollectionVersion") + + cv = CollectionVersion.objects.get(pk=cv.pk) + assert cv.sha256 == "SENTINEL"