From a81d49314b8f2b56261ea180decf6cab22b7ab44 Mon Sep 17 00:00:00 2001 From: Matthias Dellweg Date: Wed, 6 Nov 2024 13:28:31 +0100 Subject: [PATCH] WIP: ZDU finalize Collection Version sha256 [noissue] --- CHANGES/1052.feature | 2 + CHANGES/1052.removal | 7 +++ .../datarepair-ansible-collection-sha256.py | 56 ------------------- .../0058_collectionversion_unique_sha256.py | 26 +++++++++ pulp_ansible/app/models.py | 9 +-- pulp_ansible/app/serializers.py | 33 ++++------- pulp_ansible/app/tasks/collections.py | 8 +-- pulp_ansible/app/tasks/upload.py | 4 +- .../test_crud_collection_versions.py | 32 +++++++++++ ..._0057_collection_version_sha256_migrate.py | 6 ++ 10 files changed, 93 insertions(+), 90 deletions(-) create mode 100644 CHANGES/1052.feature create mode 100644 CHANGES/1052.removal delete mode 100644 pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py create mode 100644 pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py diff --git a/CHANGES/1052.feature b/CHANGES/1052.feature new file mode 100644 index 000000000..bcab5c4a3 --- /dev/null +++ b/CHANGES/1052.feature @@ -0,0 +1,2 @@ +CollectionVersion global uniqueness constraint is now its sha256 digest. Repository level uniqueness +is still (namespace, name, version). diff --git a/CHANGES/1052.removal b/CHANGES/1052.removal new file mode 100644 index 000000000..2d10a4d3b --- /dev/null +++ b/CHANGES/1052.removal @@ -0,0 +1,7 @@ +Added the final migration to make the sha256 of the collection version artifact the uniqueness +constraint. This allows users to serve their own interpretation of the content in their private +repositories. +The migration will only succeed if all the content has been adjusted. To account for content that +was not migrated by the migration shipped with 0.22.0, you can run the content repair command +``datarepair-ansible-collection-sha256`` prior to upgrading. +This version removed the content repair command. diff --git a/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py b/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py deleted file mode 100644 index cd3c615ae..000000000 --- a/pulp_ansible/app/management/commands/datarepair-ansible-collection-sha256.py +++ /dev/null @@ -1,56 +0,0 @@ -from gettext import gettext as _ - -from django.core.management import BaseCommand -from django.db import transaction - -from pulp_ansible.app.models import CollectionVersion - - -class Command(BaseCommand): - """ - Django management command to repair ansible collection versions without sha256. - """ - - help = ( - "This script repairs ansible collection versions without sha256 if artifacts are available." - ) - - def add_arguments(self, parser): - """Set up arguments.""" - parser.add_argument( - "--dry-run", - action="store_true", - help=_("Don't modify anything, just collect results."), - ) - - def handle(self, *args, **options): - dry_run = options["dry_run"] - failed_units = 0 - repaired_units = 0 - - unit_qs = CollectionVersion.objects.filter(sha256__isnull=True) - count = unit_qs.count() - print(f"CollectionVersions to repair: {count}") - if count == 0: - return - - for unit in unit_qs.prefetch_related("contenartifact_set").iterator(): - try: - content_artifact = unit.contentartifact_set.get() - artifact = content_artifact.artifact - unit.sha256 = artifact.sha256 - - if not dry_run: - with transaction.atomic(): - unit.save(update_fields=["sha256"]) - except Exception as e: - failed_units += 1 - print( - f"Failed to migrate collection version '{unit.namespace}.{unit.name}' " - f"'{unit.version}': {e}" - ) - else: - repaired_units += 1 - - print(f"Successfully repaired collection versions: {repaired_units}") - print(f"Collection versions failed to repair: {failed_units}") diff --git a/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py b/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py new file mode 100644 index 000000000..c638a3fbd --- /dev/null +++ b/pulp_ansible/app/migrations/0058_collectionversion_unique_sha256.py @@ -0,0 +1,26 @@ +# Generated by Django 3.2.14 on 2022-07-21 23:05 + +from django.db import migrations, models +from pulpcore.plugin.migrations import RequireVersion + + +class Migration(migrations.Migration): + + dependencies = [ + ('ansible', '0057_collectionversion_sha256_migrate'), + ('core', '0110_apiappstatus'), + ] + + operations = [ + # TODO adjust this version!!! + RequireVersion("ansible", "0.22.0"), + migrations.AlterField( + model_name='collectionversion', + name='sha256', + field=models.CharField(db_index=True, max_length=64, null=False), + ), + migrations.AlterUniqueTogether( + name='collectionversion', + unique_together={('sha256',)}, + ), + ] diff --git a/pulp_ansible/app/models.py b/pulp_ansible/app/models.py index d4accf68f..3274d00c4 100644 --- a/pulp_ansible/app/models.py +++ b/pulp_ansible/app/models.py @@ -180,7 +180,7 @@ class CollectionVersion(Content): namespace = models.CharField(max_length=64, editable=False) repository = models.CharField(default="", blank=True, max_length=2000, editable=False) requires_ansible = models.CharField(null=True, max_length=255) - sha256 = models.CharField(max_length=64, null=True, blank=False) + sha256 = models.CharField(max_length=64, db_index=True, null=False, blank=False) version = models.CharField(max_length=128, db_collation="pulp_ansible_semver") version_major = models.IntegerField() @@ -207,11 +207,6 @@ class CollectionVersion(Content): # search_vector is built. search_vector = psql_search.SearchVectorField(default="") - @classmethod - def natural_key_fields(cls): - # This method should be removed when the uniqueness is properly switched to sha256. - return cls._meta.unique_together[0] - @hook(BEFORE_SAVE) def calculate_version_parts(self): v = Version(self.version) @@ -236,7 +231,7 @@ def __str__(self): class Meta: default_related_name = "%(app_label)s_%(model_name)s" - unique_together = (("namespace", "name", "version"), ("sha256",)) + unique_together = ("sha256",) constraints = [ UniqueConstraint( fields=("collection", "is_highest"), diff --git a/pulp_ansible/app/serializers.py b/pulp_ansible/app/serializers.py index 0cd9c86f5..6c593bcd7 100644 --- a/pulp_ansible/app/serializers.py +++ b/pulp_ansible/app/serializers.py @@ -505,10 +505,12 @@ def deferred_validate(self, data): # Call super to ensure that data contains artifact data = super().deferred_validate(data) artifact = data.get("artifact") + + # Some randomly failing tests suggest that this may not be true... + assert not artifact._state.adding + if (sha256 := data.get("sha256")) and sha256 != artifact.sha256: raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256")) - else: - data["sha256"] = artifact.sha256 collection_info = process_collection_artifact( artifact=artifact, @@ -519,35 +521,24 @@ def deferred_validate(self, data): # repository field clashes collection_info["origin_repository"] = collection_info.pop("repository", None) data.update(collection_info) - # `retrieve` needs artifact, but it won't be in validated_data because of `get_artifacts` - self.context["artifact"] = artifact + + # Some randomly failing tests suggest that this may not be true... + assert not data["artifact"]._state.adding return data def retrieve(self, validated_data): """Reuse existing CollectionVersion if provided artifact matches.""" - namespace = validated_data["namespace"] - name = validated_data["name"] - version = validated_data["version"] - artifact = self.context["artifact"] - # TODO switch this check to use digest when ColVersion uniqueness constraint is changed - col = CollectionVersion.objects.filter( - namespace=namespace, name=name, version=version - ).first() - if col: - if col._artifacts.get() != artifact: - raise ValidationError( - _("Collection {}.{}-{} already exists with a different artifact").format( - namespace, name, version - ) - ) - - return col + return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first() def create(self, validated_data): """Final step in creating the CollectionVersion.""" tags = validated_data.pop("tags") origin_repository = validated_data.pop("origin_repository") + + # Some randomly failing tests suggest that this may not be true... + assert not validated_data["artifact"]._state.adding + # Create CollectionVersion from its metadata and adds to repository if specified content = super().create(validated_data) diff --git a/pulp_ansible/app/tasks/collections.py b/pulp_ansible/app/tasks/collections.py index a6b2b08fc..01141d8dc 100644 --- a/pulp_ansible/app/tasks/collections.py +++ b/pulp_ansible/app/tasks/collections.py @@ -145,7 +145,7 @@ async def declarative_content_from_git_repo(remote, url, git_ref=None, metadata_ version = metadata["metadata"]["version"] else: raise e - collection_version = await sync_to_async(CollectionVersion.objects.get)( + collection_version = await CollectionVersion.objects.aget( namespace=namespace, name=name, version=version ) if artifact is None: @@ -275,7 +275,7 @@ def import_collection( temp_file = PulpTemporaryFile.objects.get(pk=temp_file_pk) filename = CollectionFilename(expected_namespace, expected_name, expected_version) log.info(f"Processing collection from {temp_file.file.name}") - user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection") + user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collections.import_collection") try: with temp_file.file.open() as artifact_file: @@ -673,9 +673,7 @@ async def _add_namespace(self, name, namespace_sha): """Adds A Namespace metadata content to the pipeline.""" try: - ns = await sync_to_async(AnsibleNamespaceMetadata.objects.get)( - metadata_sha256=namespace_sha - ) + ns = await AnsibleNamespaceMetadata.objects.aget(metadata_sha256=namespace_sha) await self.put(DeclarativeContent(ns)) return True except AnsibleNamespaceMetadata.DoesNotExist: diff --git a/pulp_ansible/app/tasks/upload.py b/pulp_ansible/app/tasks/upload.py index 0ce543d51..dea90217c 100644 --- a/pulp_ansible/app/tasks/upload.py +++ b/pulp_ansible/app/tasks/upload.py @@ -25,7 +25,9 @@ def process_collection_artifact(artifact, namespace, name, version): # Set up logging for CollectionImport object CollectionImport.objects.get_or_create(task_id=Task.current().pulp_id) - user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection") + user_facing_logger = logging.getLogger( + "pulp_ansible.app.tasks.upload.process_collection_artifact" + ) artifact_url = reverse("artifacts-detail", args=[artifact.pk]) filename = CollectionFilename(namespace, name, version) diff --git a/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py b/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py index 3e23b4676..b966f041f 100644 --- a/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py +++ b/pulp_ansible/tests/functional/api/collection/test_crud_collection_versions.py @@ -1,5 +1,8 @@ """Tests related to sync ansible plugin collection content type.""" +import hashlib +from pathlib import Path + import pytest from pulp_ansible.tests.functional.utils import randstr @@ -99,3 +102,32 @@ def test_content_unit_lifecycle(ansible_bindings, build_and_upload_collection, m ansible_bindings.ContentCollectionVersionsApi.create(file=collection_artifact.filename).task ) assert content_unit_href in create_task.created_resources + + +@pytest.mark.parallel +def test_duplicate_collection_key( + ansible_bindings, + ansible_repo_factory, + build_and_upload_collection, + monitor_task, +): + """Create two content units with the same name but different artifacts.""" + repository1 = ansible_repo_factory() + repository2 = ansible_repo_factory() + + attrs = {"namespace": randstr(), "name": "squeezer", "version": "0.0.9"} + collection_artifact1, content_unit_href1 = build_and_upload_collection( + repository1, config=attrs + ) + collection_artifact2, content_unit_href2 = build_and_upload_collection( + repository2, config=attrs + ) + + sha256_1 = hashlib.sha256(Path(collection_artifact1.filename).read_bytes()).hexdigest() + sha256_2 = hashlib.sha256(Path(collection_artifact2.filename).read_bytes()).hexdigest() + assert sha256_1 != sha256_2 + + content_unit_1 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href1) + assert content_unit_1.sha256 == sha256_1 + content_unit_2 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href2) + assert content_unit_2.sha256 == sha256_2 diff --git a/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py b/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py index c81f89afe..f1f1419fa 100644 --- a/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py +++ b/pulp_ansible/tests/unit/migrations/test_0057_collection_version_sha256_migrate.py @@ -37,3 +37,9 @@ def test_collection_version_sha256_migrate(migrate): cv = CollectionVersion.objects.get(pk=cv.pk) assert cv.sha256 == "SENTINEL" + + apps = migrate([("ansible", "0058_collectionversion_unique_sha256")]) + CollectionVersion = apps.get_model("ansible", "CollectionVersion") + + cv = CollectionVersion.objects.get(pk=cv.pk) + assert cv.sha256 == "SENTINEL"