Skip to content

Commit

Permalink
WIP: ZDU finalize Collection Version sha256
Browse files Browse the repository at this point in the history
[noissue]
  • Loading branch information
mdellweg committed Nov 11, 2024
1 parent 544e468 commit a81d493
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 90 deletions.
2 changes: 2 additions & 0 deletions CHANGES/1052.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CollectionVersion global uniqueness constraint is now its sha256 digest. Repository level uniqueness
is still (namespace, name, version).
7 changes: 7 additions & 0 deletions CHANGES/1052.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Added the final migration to make the sha256 of the collection version artifact the uniqueness
constraint. This allows users to serve their own interpretation of the content in their private
repositories.
The migration will only succeed if all the content has been adjusted. To account for content that
was not migrated by the migration shipped with 0.22.0, you can run the content repair command
``datarepair-ansible-collection-sha256`` prior to upgrading.
This version removed the content repair command.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 3.2.14 on 2022-07-21 23:05

from django.db import migrations, models
from pulpcore.plugin.migrations import RequireVersion


class Migration(migrations.Migration):

dependencies = [
('ansible', '0057_collectionversion_sha256_migrate'),
('core', '0110_apiappstatus'),
]

operations = [
# TODO adjust this version!!!
RequireVersion("ansible", "0.22.0"),
migrations.AlterField(
model_name='collectionversion',
name='sha256',
field=models.CharField(db_index=True, max_length=64, null=False),
),
migrations.AlterUniqueTogether(
name='collectionversion',
unique_together={('sha256',)},
),
]
9 changes: 2 additions & 7 deletions pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)
sha256 = models.CharField(max_length=64, db_index=True, null=False, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
Expand All @@ -207,11 +207,6 @@ class CollectionVersion(Content):
# search_vector is built.
search_vector = psql_search.SearchVectorField(default="")

@classmethod
def natural_key_fields(cls):
# This method should be removed when the uniqueness is properly switched to sha256.
return cls._meta.unique_together[0]

@hook(BEFORE_SAVE)
def calculate_version_parts(self):
v = Version(self.version)
Expand All @@ -236,7 +231,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = (("namespace", "name", "version"), ("sha256",))
unique_together = ("sha256",)
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
Expand Down
33 changes: 12 additions & 21 deletions pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,10 +505,12 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")

# Some randomly failing tests suggest that this may not be true...
assert not artifact._state.adding

if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256

collection_info = process_collection_artifact(
artifact=artifact,
Expand All @@ -519,35 +521,24 @@ def deferred_validate(self, data):
# repository field clashes
collection_info["origin_repository"] = collection_info.pop("repository", None)
data.update(collection_info)
# `retrieve` needs artifact, but it won't be in validated_data because of `get_artifacts`
self.context["artifact"] = artifact

# Some randomly failing tests suggest that this may not be true...
assert not data["artifact"]._state.adding

return data

def retrieve(self, validated_data):
"""Reuse existing CollectionVersion if provided artifact matches."""
namespace = validated_data["namespace"]
name = validated_data["name"]
version = validated_data["version"]
artifact = self.context["artifact"]
# TODO switch this check to use digest when ColVersion uniqueness constraint is changed
col = CollectionVersion.objects.filter(
namespace=namespace, name=name, version=version
).first()
if col:
if col._artifacts.get() != artifact:
raise ValidationError(
_("Collection {}.{}-{} already exists with a different artifact").format(
namespace, name, version
)
)

return col
return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first()

def create(self, validated_data):
"""Final step in creating the CollectionVersion."""
tags = validated_data.pop("tags")
origin_repository = validated_data.pop("origin_repository")

# Some randomly failing tests suggest that this may not be true...
assert not validated_data["artifact"]._state.adding

# Create CollectionVersion from its metadata and adds to repository if specified
content = super().create(validated_data)

Expand Down
8 changes: 3 additions & 5 deletions pulp_ansible/app/tasks/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ async def declarative_content_from_git_repo(remote, url, git_ref=None, metadata_
version = metadata["metadata"]["version"]
else:
raise e
collection_version = await sync_to_async(CollectionVersion.objects.get)(
collection_version = await CollectionVersion.objects.aget(
namespace=namespace, name=name, version=version
)
if artifact is None:
Expand Down Expand Up @@ -275,7 +275,7 @@ def import_collection(
temp_file = PulpTemporaryFile.objects.get(pk=temp_file_pk)
filename = CollectionFilename(expected_namespace, expected_name, expected_version)
log.info(f"Processing collection from {temp_file.file.name}")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collections.import_collection")

try:
with temp_file.file.open() as artifact_file:
Expand Down Expand Up @@ -673,9 +673,7 @@ async def _add_namespace(self, name, namespace_sha):
"""Adds A Namespace metadata content to the pipeline."""

try:
ns = await sync_to_async(AnsibleNamespaceMetadata.objects.get)(
metadata_sha256=namespace_sha
)
ns = await AnsibleNamespaceMetadata.objects.aget(metadata_sha256=namespace_sha)
await self.put(DeclarativeContent(ns))
return True
except AnsibleNamespaceMetadata.DoesNotExist:
Expand Down
4 changes: 3 additions & 1 deletion pulp_ansible/app/tasks/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def process_collection_artifact(artifact, namespace, name, version):

# Set up logging for CollectionImport object
CollectionImport.objects.get_or_create(task_id=Task.current().pulp_id)
user_facing_logger = logging.getLogger("pulp_ansible.app.tasks.collection.import_collection")
user_facing_logger = logging.getLogger(
"pulp_ansible.app.tasks.upload.process_collection_artifact"
)

artifact_url = reverse("artifacts-detail", args=[artifact.pk])
filename = CollectionFilename(namespace, name, version)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tests related to sync ansible plugin collection content type."""

import hashlib
from pathlib import Path

import pytest

from pulp_ansible.tests.functional.utils import randstr
Expand Down Expand Up @@ -99,3 +102,32 @@ def test_content_unit_lifecycle(ansible_bindings, build_and_upload_collection, m
ansible_bindings.ContentCollectionVersionsApi.create(file=collection_artifact.filename).task
)
assert content_unit_href in create_task.created_resources


@pytest.mark.parallel
def test_duplicate_collection_key(
ansible_bindings,
ansible_repo_factory,
build_and_upload_collection,
monitor_task,
):
"""Create two content units with the same name but different artifacts."""
repository1 = ansible_repo_factory()
repository2 = ansible_repo_factory()

attrs = {"namespace": randstr(), "name": "squeezer", "version": "0.0.9"}
collection_artifact1, content_unit_href1 = build_and_upload_collection(
repository1, config=attrs
)
collection_artifact2, content_unit_href2 = build_and_upload_collection(
repository2, config=attrs
)

sha256_1 = hashlib.sha256(Path(collection_artifact1.filename).read_bytes()).hexdigest()
sha256_2 = hashlib.sha256(Path(collection_artifact2.filename).read_bytes()).hexdigest()
assert sha256_1 != sha256_2

content_unit_1 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href1)
assert content_unit_1.sha256 == sha256_1
content_unit_2 = ansible_bindings.ContentCollectionVersionsApi.read(content_unit_href2)
assert content_unit_2.sha256 == sha256_2
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ def test_collection_version_sha256_migrate(migrate):

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"

apps = migrate([("ansible", "0058_collectionversion_unique_sha256")])
CollectionVersion = apps.get_model("ansible", "CollectionVersion")

cv = CollectionVersion.objects.get(pk=cv.pk)
assert cv.sha256 == "SENTINEL"

0 comments on commit a81d493

Please sign in to comment.