From df915b05ce6b3a2dd5e4fd4bba1e698588c328c3 Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Tue, 15 Oct 2024 12:22:17 -0400 Subject: [PATCH 1/4] Add mypy/django-stubs integration --- isic/settings/base.py | 3 +++ pyproject.toml | 15 +++++++++++++++ setup.py | 7 +++++++ tox.ini | 18 ++++++++++++++---- 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/isic/settings/base.py b/isic/settings/base.py index 0a4702a6..1523d0dc 100644 --- a/isic/settings/base.py +++ b/isic/settings/base.py @@ -3,9 +3,12 @@ from pathlib import Path from celery.schedules import crontab +import django_stubs_ext from .upstream_base import * # noqa: F403 +django_stubs_ext.monkeypatch() + def _oauth2_pkce_required(client_id): from oauth2_provider.models import get_application_model diff --git a/pyproject.toml b/pyproject.toml index 45f4d0cb..b7855e22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,3 +85,18 @@ force-sort-within-sections = true [tool.ruff.lint.pydocstyle] convention = "pep257" + +[tool.mypy] +disable_error_code = "var-annotated,return" +ignore_missing_imports = true +warn_unused_configs = true +check_untyped_defs = true +plugins = ["mypy_django_plugin.main"] + +[[tool.mypy.overrides]] +module = ["isic.core.pagination"] +ignore_errors = true + +[tool.django-stubs] +django_settings_module = "isic.settings.testing" +strict_settings = false diff --git a/setup.py b/setup.py index b6fb1b9b..204d4dec 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ "django-object-actions", "django-redis", "django-storages>1.14.2", + "django-stubs-ext", "django-widget-tweaks", "django[argon2]>=5.1,<6", "gdal", @@ -118,5 +119,11 @@ "pytest-random-order", "pytest-repeat", ], + "type": [ + "django-stubs[compatible-mypy]", + # see https://github.com/typeddjango/django-stubs/issues/2405 + "mypy<1.12.0", + "types-requests", + ], }, ) diff --git a/tox.ini b/tox.ini index 53b0434c..ce415c5d 100644 --- a/tox.ini +++ b/tox.ini @@ -33,11 +33,21 @@ commands = djhtml --check {posargs:isic} [testenv:type] +passenv = + DJANGO_CELERY_BROKER_URL + DJANGO_DATABASE_URL + DJANGO_MINIO_STORAGE_ACCESS_KEY + DJANGO_MINIO_STORAGE_ENDPOINT + DJANGO_MINIO_STORAGE_SECRET_KEY + DJANGO_ISIC_ELASTICSEARCH_URI + DJANGO_ISIC_REDIS_URL +extras = + dev + test + type skipsdist = true -skip_install = true -deps = - mypy - django-stubs +allowlist_externals = uv +install_command = uv pip install {opts} {packages} commands = mypy {posargs:.} From d3d54752fb4e8c412798b50832abb78151589ff6 Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Tue, 15 Oct 2024 12:37:03 -0400 Subject: [PATCH 2/4] Fix/ignore typechecking problems --- isic/auth.py | 4 +- isic/core/api/collection.py | 6 +- isic/core/api/image.py | 4 +- isic/core/api/user.py | 6 +- isic/core/dsl.py | 4 +- .../management/commands/merge-collections.py | 29 --------- .../commands/set_isic_permissions.py | 2 +- isic/core/models/collection.py | 4 +- isic/core/models/image.py | 35 +++++++--- isic/core/permissions.py | 6 +- isic/core/search.py | 17 +++-- isic/core/serializers.py | 12 ++-- isic/core/services/collection/doi.py | 5 +- isic/core/services/collection/image.py | 6 +- isic/core/services/image/__init__.py | 2 + isic/core/tests/test_collection.py | 1 + isic/core/tests/test_metadata_masking_api.py | 1 + isic/core/tests/test_middleware.py | 3 +- isic/core/tests/test_view_image_list.py | 2 +- isic/core/views/collections.py | 10 ++- isic/core/views/images.py | 7 +- isic/core/views/users.py | 2 +- isic/ingest/api.py | 4 ++ isic/ingest/forms.py | 2 + .../management/commands/merge-cohorts.py | 25 -------- .../commands/metadata-files-from-csv.py | 13 ++-- .../commands/migrate_content_disposition.py | 40 ------------ .../commands/revalidate-metadata.py | 10 +-- isic/ingest/models/accession.py | 14 ++-- isic/ingest/models/cohort.py | 3 +- isic/ingest/models/contributor.py | 9 +-- isic/ingest/models/lesion.py | 16 ++++- isic/ingest/services/accession/__init__.py | 2 +- isic/ingest/tests/csv_streams.py | 9 ++- isic/ingest/tests/test_metadata.py | 64 ++++++++++--------- isic/ingest/tests/test_publish.py | 2 +- isic/ingest/utils/metadata.py | 8 +-- isic/ingest/utils/mime.py | 2 +- isic/ingest/views/cohort.py | 4 +- isic/settings/base.py | 16 +++-- isic/settings/upstream_base.py | 2 +- isic/stats/tasks.py | 30 +++++---- isic/stats/tests/test_tasks.py | 9 +-- isic/studies/models.py | 10 +-- isic/studies/tests/test_create_study.py | 1 + isic/types.py | 12 ++++ isic/zip_download/api.py | 28 ++++++-- isic/zip_download/tests/test_zip_download.py | 6 +- 48 files changed, 254 insertions(+), 255 deletions(-) delete mode 100644 isic/core/management/commands/merge-collections.py delete mode 100644 isic/ingest/management/commands/merge-cohorts.py delete mode 100644 isic/ingest/management/commands/migrate_content_disposition.py create mode 100644 isic/types.py diff --git a/isic/auth.py b/isic/auth.py index 38be1052..b3588086 100644 --- a/isic/auth.py +++ b/isic/auth.py @@ -1,3 +1,5 @@ +from collections.abc import Callable + from ninja.security import HttpBearer, django_auth from oauth2_provider.oauth2_backends import get_oauthlib_core @@ -37,6 +39,6 @@ def authenticate(self, request, token): # The lambda _: True is to handle the case where a user doesn't pass any authentication. -allow_any = [django_auth, OAuth2AuthBearer("any"), lambda _: True] +allow_any: list[Callable] = [django_auth, OAuth2AuthBearer("any"), lambda _: True] is_authenticated = [django_auth, OAuth2AuthBearer("is_authenticated")] is_staff = [SessionAuthStaffUser(), OAuth2AuthBearer("is_staff")] diff --git a/isic/core/api/collection.py b/isic/core/api/collection.py index 1fbc4f18..52964358 100644 --- a/isic/core/api/collection.py +++ b/isic/core/api/collection.py @@ -146,7 +146,7 @@ class CollectionLicenseBreakdown(Schema): summary="Retrieve a breakdown of the licenses of the specified collection.", include_in_schema=False, ) -def collection_license_breakdown(request, id: int) -> CollectionLicenseBreakdown: +def collection_license_breakdown(request, id: int) -> dict[str, int]: qs = get_visible_objects(request.user, "core.view_collection") collection = get_object_or_404(qs, id=id) images = get_visible_objects(request.user, "core.view_image", collection.images.distinct()) @@ -179,7 +179,7 @@ def collection_populate_from_search(request, id: int, payload: SearchQueryIn): if collection.locked: return 409, {"error": "Collection is locked"} - if collection.public and payload.to_queryset(request.user).private().exists(): + if collection.public and payload.to_queryset(request.user).private().exists(): # type: ignore[attr-defined] return 409, {"error": "Collection is public and cannot contain private images."} # Pass data instead of validated_data because the celery task is going to revalidate. @@ -195,7 +195,7 @@ def collection_populate_from_search(request, id: int, payload: SearchQueryIn): class IsicIdList(Schema): - isic_ids: conlist(constr(pattern=ISIC_ID_REGEX), max_length=500) + isic_ids: conlist(constr(pattern=ISIC_ID_REGEX), max_length=500) # type: ignore[valid-type] model_config = {"extra": "forbid"} diff --git a/isic/core/api/image.py b/isic/core/api/image.py index 4e6f4bea..8f667726 100644 --- a/isic/core/api/image.py +++ b/isic/core/api/image.py @@ -46,7 +46,7 @@ class Meta: metadata: dict @staticmethod - def resolve_files(image: Image) -> dict: + def resolve_files(image: Image) -> ImageFilesOut: if settings.ISIC_PLACEHOLDER_IMAGES: full_url = f"https://picsum.photos/seed/{image.id}/1000" thumbnail_url = f"https://picsum.photos/seed/{image.id}/256" @@ -71,7 +71,7 @@ def resolve_metadata(image: Image) -> dict: for key, value in image.metadata.items(): try: - metadata[FIELD_REGISTRY[key].type][key] = value + metadata[FIELD_REGISTRY[key].type][key] = value # type: ignore[index] except KeyError: # it's probably a computed field for computed_field in image.accession.computed_fields: diff --git a/isic/core/api/user.py b/isic/core/api/user.py index ec1ffb9a..7a111b6b 100644 --- a/isic/core/api/user.py +++ b/isic/core/api/user.py @@ -1,11 +1,11 @@ from datetime import datetime from django.contrib.auth.models import User -from django.http.request import HttpRequest from django.utils import timezone from ninja import Field, ModelSchema, Router from isic.auth import is_authenticated +from isic.types import AuthenticatedHttpRequest router = Router() @@ -37,12 +37,12 @@ def resolve_full_name(obj: User): include_in_schema=True, auth=is_authenticated, ) -def user_me(request: HttpRequest): +def user_me(request: AuthenticatedHttpRequest): return request.user @router.put("/accept-terms/", include_in_schema=False, auth=is_authenticated) -def accept_terms_of_use(request: HttpRequest): +def accept_terms_of_use(request: AuthenticatedHttpRequest): if not request.user.profile.accepted_terms: request.user.profile.accepted_terms = timezone.now() request.user.profile.save(update_fields=["accepted_terms"]) diff --git a/isic/core/dsl.py b/isic/core/dsl.py index d96febba..879da169 100644 --- a/isic/core/dsl.py +++ b/isic/core/dsl.py @@ -97,6 +97,8 @@ def to_es(self, key: SearchTermKey) -> dict: elif key.field_lookup == "image_type" and self.value == "overview": self.value = "clinical: overview" + term: dict + if self.value == "*": term = {"exists": {"field": key.field_lookup}} elif self.value.startswith("*"): @@ -296,7 +298,7 @@ def make_parser( # noqa: C901 ) -> ParserElement: def make_term_keyword(name): term = Optional("-") + Keyword(name) - if term_converter: + if term_converter is not None: term.add_parse_action(term_converter) return term diff --git a/isic/core/management/commands/merge-collections.py b/isic/core/management/commands/merge-collections.py deleted file mode 100644 index 9c9febcb..00000000 --- a/isic/core/management/commands/merge-collections.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys - -from django.core.exceptions import ValidationError -import djclick as click - -from isic.core.models.collection import Collection -from isic.core.services.collection import collection_merge_magic_collections - - -@click.command() -@click.argument("collection_id", nargs=-1, type=click.INT) -def merge_collections(collection_id): - if len(collection_id) < 2: - click.secho("Must provide at least 2 collection IDs to merge.", color="red", err=True) - sys.exit(1) - - collections = [Collection.objects.get(pk=id_) for id_ in collection_id] - - try: - collection_merge_magic_collections( - dest_collection=collections[0], other_collections=collections[1:] - ) - except ValidationError as e: - click.secho(e.message, color="red", err=True) - sys.exit(1) - else: - click.secho( - f"Merged {len(collections[1:])} collections into {collections[0].name}.", color="green" - ) diff --git a/isic/core/management/commands/set_isic_permissions.py b/isic/core/management/commands/set_isic_permissions.py index 86ab00c5..e10a0723 100644 --- a/isic/core/management/commands/set_isic_permissions.py +++ b/isic/core/management/commands/set_isic_permissions.py @@ -53,7 +53,7 @@ def add_staff_group(): User, ZipUpload, ]: - content_type = ContentType.objects.get_for_model(model) + content_type = ContentType.objects.get_for_model(model) # type: ignore[arg-type] for permission in ["view", "change"]: group.permissions.add( Permission.objects.get( diff --git a/isic/core/models/collection.py b/isic/core/models/collection.py index 8a61fbb5..4195e6f5 100644 --- a/isic/core/models/collection.py +++ b/isic/core/models/collection.py @@ -67,7 +67,7 @@ class Meta(TimeStampedModel.Meta): shares = models.ManyToManyField( User, through="CollectionShare", - through_fields=["collection", "grantee"], + through_fields=("collection", "grantee"), related_name="collection_shares", ) @@ -128,7 +128,7 @@ def shared_with(self): ] def full_clean(self, exclude=None, validate_unique=True): # noqa: FBT002 - if self.pk and self.public and self.images.private().exists(): + if self.pk and self.public and self.images.private().exists(): # type: ignore[attr-defined] raise ValidationError("Can't make collection public, it contains private images.") return super().full_clean(exclude=exclude, validate_unique=validate_unique) diff --git a/isic/core/models/image.py b/isic/core/models/image.py index 19b1112b..c32a6b58 100644 --- a/isic/core/models/image.py +++ b/isic/core/models/image.py @@ -21,12 +21,23 @@ from .isic_id import IsicId -class ImageQuerySet(models.QuerySet): +class ImageQuerySet(models.QuerySet["Image"]): + def public(self): + return self.filter(public=True) + + def private(self): + return self.filter(public=False) + def from_search_query(self, query: str): if query == "": return self return self.filter(parse_query(django_parser, query) or Q()) + +class ImageManager(models.Manager["Image"]): + def get_queryset(self) -> ImageQuerySet: + return ImageQuerySet(self.model, using=self._db) + def with_elasticsearch_properties(self): return self.select_related("accession__cohort").annotate( coll_pks=ArrayAgg("collections", distinct=True, default=[]), @@ -67,9 +78,9 @@ class Meta(CreationSortedTimeStampedModel.Meta): # index is used because public is filtered in every permissions check public = models.BooleanField(default=False, db_index=True) - shares = models.ManyToManyField(User, through="ImageShare", through_fields=["image", "grantee"]) + shares = models.ManyToManyField(User, through="ImageShare", through_fields=("image", "grantee")) - objects = ImageQuerySet.as_manager() + objects = ImageManager() def __str__(self): return self.isic_id @@ -99,18 +110,22 @@ def metadata(self) -> dict: """ image_metadata = deepcopy(self.accession.metadata) - for field in Accession.computed_fields: - if field.input_field_name in image_metadata: - computed_output_fields = field.transformer(image_metadata[field.input_field_name]) + for computed_field in Accession.computed_fields: + if computed_field.input_field_name in image_metadata: + computed_output_fields = computed_field.transformer( + image_metadata[computed_field.input_field_name] + ) if computed_output_fields: image_metadata.update(computed_output_fields) - del image_metadata[field.input_field_name] + del image_metadata[computed_field.input_field_name] - for field in Accession.remapped_internal_fields: - if getattr(self.accession, field.csv_field_name) is not None: - image_metadata[field.csv_field_name] = getattr(self.accession, field.csv_field_name) + for remapped_field in Accession.remapped_internal_fields: + if getattr(self.accession, remapped_field.csv_field_name) is not None: + image_metadata[remapped_field.csv_field_name] = getattr( + self.accession, remapped_field.csv_field_name + ) if "legacy_dx" in image_metadata: image_metadata["diagnosis"] = image_metadata["legacy_dx"] diff --git a/isic/core/permissions.py b/isic/core/permissions.py index 90537d70..a22d9d38 100644 --- a/isic/core/permissions.py +++ b/isic/core/permissions.py @@ -1,3 +1,4 @@ +from functools import wraps from urllib.parse import urlparse import django.apps @@ -12,7 +13,6 @@ from django.db.models.query import QuerySet from django.http.request import HttpRequest from django.shortcuts import get_object_or_404, resolve_url -from django.utils.functional import wraps from ninja.security.session import SessionAuth @@ -34,7 +34,7 @@ def view_staff(user_obj, _=None): return user_obj.is_staff -User.perms_class = UserPermissions +User.perms_class = UserPermissions # type: ignore[attr-defined] ISIC_PERMS_MAP = {} @@ -99,7 +99,7 @@ def _wrapped_view(request, *args, **kwargs): "If model should be looked up from " "string it needs format: 'app_label.ModelClass'" ) - model = apps.get_model(*splitted) + model = apps.get_model(*splitted) # type: ignore[arg-type] elif issubclass(model.__class__, Model | ModelBase | QuerySet): pass else: diff --git a/isic/core/search.py b/isic/core/search.py index 194263e3..9e794db4 100644 --- a/isic/core/search.py +++ b/isic/core/search.py @@ -1,11 +1,11 @@ from copy import deepcopy from functools import lru_cache, partial import logging -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, NotRequired, TypedDict from cachalot.api import cachalot_disabled from django.conf import settings -from django.contrib.auth.models import User +from django.contrib.auth.models import AnonymousUser, User from django.db.models.query import QuerySet from isic_metadata import FIELD_REGISTRY from isic_metadata.fields import FitzpatrickSkinType, ImageTypeEnum @@ -71,7 +71,7 @@ def get_elasticsearch_client() -> "OpenSearch": # TODO: investigate using retryable requests with transport_class RetryOnTimeoutTransport = partial(Transport, retry_on_timeout=True) # noqa: N806 - return OpenSearch(settings.ISIC_ELASTICSEARCH_URI, transport_class=RetryOnTimeoutTransport) + return OpenSearch(settings.ISIC_ELASTICSEARCH_URI, transport_class=RetryOnTimeoutTransport) # type: ignore[arg-type] def maybe_create_index() -> None: @@ -157,7 +157,7 @@ def _prettify_facets(facets: dict[str, Any]) -> dict[str, Any]: # sort the values of image_type buckets by the element in the key field facets["image_type"]["buckets"] = sorted( facets["image_type"]["buckets"], - key=lambda x: ImageTypeEnum(x["key"])._sort_order_, + key=lambda x: ImageTypeEnum(x["key"])._sort_order_, # type: ignore[attr-defined] ) return facets @@ -183,7 +183,10 @@ def facets(query: dict | None = None, collections: list[int] | None = None) -> d body=counts_body, )["aggregations"] - facets_body = { + FacetsBody = TypedDict( # noqa: UP013 + "FacetsBody", {"size": int, "aggs": dict, "query": NotRequired[dict | None]} + ) + facets_body: FacetsBody = { "size": 0, "aggs": deepcopy(DEFAULT_SEARCH_AGGREGATES), } @@ -215,7 +218,7 @@ def facets(query: dict | None = None, collections: list[int] | None = None) -> d def build_elasticsearch_query( - query: dict, user: User, collection_pks: list[int] | None = None + query: dict, user: User | AnonymousUser, collection_pks: list[int] | None = None ) -> dict: """ Build an elasticsearch query from an elasticsearch query body, a user, and collection ids. @@ -237,7 +240,7 @@ def build_elasticsearch_query( else: visible_collection_pks = None - query_dict = {"bool": {"filter": [query]}} if query else {"bool": {}} + query_dict: dict = {"bool": {"filter": [query]}} if query else {"bool": {}} if visible_collection_pks is not None: query_dict["bool"].setdefault("filter", []) diff --git a/isic/core/serializers.py b/isic/core/serializers.py index f21c40c8..61d14ea7 100644 --- a/isic/core/serializers.py +++ b/isic/core/serializers.py @@ -58,14 +58,16 @@ def from_token_representation(cls, token) -> tuple[User, SearchQueryIn]: user = get_object_or_404(User, pk=user) if user else AnonymousUser() return user, cls(query=token["query"], collections=token["collections"]) - def to_queryset(self, user: User, qs: QuerySet[Image] | None = None) -> QuerySet[Image]: - qs = qs if qs is not None else Image._default_manager.all() + def to_queryset( + self, user: User | AnonymousUser, qs: QuerySet[Image] | None = None + ) -> QuerySet[Image]: + qs = qs if qs is not None else cast(QuerySet[Image], Image.objects.all()) if self.query: - qs = qs.from_search_query(self.query) + qs = qs.from_search_query(self.query) # type: ignore[attr-defined] if self.collections: - qs = qs.filter( + qs = qs.filter( # type: ignore[union-attr] collections__in=get_visible_objects( user, "core.view_collection", @@ -75,7 +77,7 @@ def to_queryset(self, user: User, qs: QuerySet[Image] | None = None) -> QuerySet return get_visible_objects(user, "core.view_image", qs).distinct() - def to_es_query(self, user: User) -> dict: + def to_es_query(self, user: User | AnonymousUser) -> dict: es_query: dict | None = None if self.query: # we know it can't be a Q object because we're using es_parser and not django_parser diff --git a/isic/core/services/collection/doi.py b/isic/core/services/collection/doi.py index cadc20ae..d59f0fe2 100644 --- a/isic/core/services/collection/doi.py +++ b/isic/core/services/collection/doi.py @@ -1,5 +1,6 @@ import logging import random +from typing import Any from urllib import parse from django.conf import settings @@ -20,7 +21,7 @@ logger = logging.getLogger(__name__) -def collection_build_doi_preview(*, collection: Collection) -> dict: +def collection_build_doi_preview(*, collection: Collection) -> dict[str, Any]: preview = collection_build_doi( collection=collection, doi_id=f"{settings.ISIC_DATACITE_DOI_PREFIX}/123456" )["data"]["attributes"] @@ -81,7 +82,7 @@ def collection_check_create_doi_allowed(*, user: User, collection: Collection) - raise ValidationError("This collection already has a DOI.") if not collection.public: raise ValidationError("A collection must be public to issue a DOI.") - if collection.images.private().exists(): + if collection.images.private().exists(): # type: ignore[attr-defined] raise ValidationError("This collection contains private images.") if not collection.images.exists(): raise ValidationError("An empty collection cannot be the basis of a DOI.") diff --git a/isic/core/services/collection/image.py b/isic/core/services/collection/image.py index 6919590f..8f09ea69 100644 --- a/isic/core/services/collection/image.py +++ b/isic/core/services/collection/image.py @@ -29,12 +29,12 @@ def collection_add_images( if collection.locked and not ignore_lock: raise ValidationError("Can't add images to locked collection.") - if collection.public and qs.private().exists(): + if collection.public and qs.private().exists(): # type: ignore[union-attr] raise ValidationError("Can't add private images to a public collection.") with transaction.atomic(), cachalot_disabled(): CollectionImageM2M = Collection.images.through # noqa: N806 - for image_batch in itertools.batched(qs.iterator(), 5_000): + for image_batch in itertools.batched(qs.iterator(), 5_000): # type: ignore[union-attr] # ignore_conflicts is necessary to make this method idempotent (consistent with # collection.images.add) ignore_conflicts only ignores primary key, duplicate, and # exclusion constraints. we don't use primary key or exclusion here, so this should @@ -65,7 +65,7 @@ def collection_move_images( if not ignore_lock and (src_collection.locked or dest_collection.locked): raise ValidationError("Can't move images to/from a locked collection.") - if dest_collection.public and src_collection.images.private().exists(): + if dest_collection.public and src_collection.images.private().exists(): # type: ignore[attr-defined] raise ValidationError("Can't move private images to a public collection.") with transaction.atomic(): diff --git a/isic/core/services/image/__init__.py b/isic/core/services/image/__init__.py index 5bb27f39..af081479 100644 --- a/isic/core/services/image/__init__.py +++ b/isic/core/services/image/__init__.py @@ -31,6 +31,8 @@ def image_share( if image: qs = Image.objects.filter(pk=image.pk) + assert qs is not None # noqa: S101 + with transaction.atomic(), cachalot_disabled(): ImageShareM2M = Image.shares.through # noqa: N806 for image_batch in itertools.batched(qs.iterator(), 5_000): diff --git a/isic/core/tests/test_collection.py b/isic/core/tests/test_collection.py index 1d0b16ae..142fd6c6 100644 --- a/isic/core/tests/test_collection.py +++ b/isic/core/tests/test_collection.py @@ -18,6 +18,7 @@ def test_collection_form(authenticated_client, user): ) assert r.status_code == 302 collection = Collection.objects.first() + assert collection assert collection.creator == user assert collection.name == "foo" assert collection.description == "bar" diff --git a/isic/core/tests/test_metadata_masking_api.py b/isic/core/tests/test_metadata_masking_api.py index 4d3f62c3..0463a64d 100644 --- a/isic/core/tests/test_metadata_masking_api.py +++ b/isic/core/tests/test_metadata_masking_api.py @@ -48,6 +48,7 @@ def test_image_csv_rows_exposes_safe_metadata(image_with_maskable_metadata): rows = image_metadata_csv(qs=Image.objects.all()) next(rows) for row in rows: + assert isinstance(row, dict) assert "age" not in row assert "age_approx" in row assert "lesion_id" in row diff --git a/isic/core/tests/test_middleware.py b/isic/core/tests/test_middleware.py index e89d8b9d..ef051dd2 100644 --- a/isic/core/tests/test_middleware.py +++ b/isic/core/tests/test_middleware.py @@ -1,3 +1,4 @@ +from datetime import timedelta import logging from django.urls import reverse @@ -23,7 +24,7 @@ def oauth_token_factory(oauth_app): def f(user): return get_access_token_model().objects.create( user=user, - expires=timezone.now() + timezone.timedelta(seconds=300), + expires=timezone.now() + timedelta(seconds=300), token="some-token", application=oauth_app, ) diff --git a/isic/core/tests/test_view_image_list.py b/isic/core/tests/test_view_image_list.py index e8380ea3..c19db368 100644 --- a/isic/core/tests/test_view_image_list.py +++ b/isic/core/tests/test_view_image_list.py @@ -26,7 +26,7 @@ def test_image_list_metadata_download_view(staff_client, mailoutbox, user, image assert r.status_code == 200 assert len(mailoutbox) == 1 - csv_url = re.search(r"https?://[^\s]+", mailoutbox[0].body).group(0) + csv_url = re.search(r"https?://[^\s]+", mailoutbox[0].body).group(0) # type: ignore[union-attr] r = requests.get(csv_url) assert r.status_code == 200 actual = r.text diff --git a/isic/core/views/collections.py b/isic/core/views/collections.py index 398dbbfe..440938c7 100644 --- a/isic/core/views/collections.py +++ b/isic/core/views/collections.py @@ -1,6 +1,7 @@ from collections.abc import Iterable import csv from datetime import UTC, datetime +from typing import TypedDict from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -13,6 +14,7 @@ from django.shortcuts import get_object_or_404, render from django.template.defaultfilters import slugify from django.urls.base import reverse +from django_stubs_ext import StrOrPromise from isic.core.filters import CollectionFilter from isic.core.forms.collection import CollectionForm @@ -55,7 +57,7 @@ def collection_list(request): collection_counts = ( Collection.images.through.objects.filter( - collection_id__in=page.object_list.values_list("pk", flat=True) + collection_id__in=page.object_list.values_list("pk", flat=True) # type: ignore[attr-defined] ) .values("collection_id") .annotate(num_images=Count("image_id")) @@ -136,6 +138,7 @@ def csv_rows(buffer: Buffer) -> Iterable[bytes]: yield writer.writeheader() for metadata_row in collection_metadata: + assert isinstance(metadata_row, dict) # noqa: S101 yield writer.writerow(metadata_row) current_time = datetime.now(tz=UTC).strftime("%Y-%m-%d") @@ -151,7 +154,10 @@ def csv_rows(buffer: Buffer) -> Iterable[bytes]: @needs_object_permission("core.create_doi", (Collection, "pk", "pk")) def collection_create_doi_(request, pk): collection = get_object_or_404(Collection, pk=pk) - context = {"collection": collection, "error": None} + Context = TypedDict( # noqa: UP013 + "Context", {"collection": Collection, "error": StrOrPromise | None, "preview": dict | None} + ) + context: Context = {"collection": collection, "error": None, "preview": None} if request.method == "POST": try: diff --git a/isic/core/views/images.py b/isic/core/views/images.py index 7254b05b..6864ab93 100644 --- a/isic/core/views/images.py +++ b/isic/core/views/images.py @@ -6,7 +6,7 @@ from django.db.models import Count from django.db.models.query import QuerySet from django.db.models.query_utils import Q -from django.http import HttpRequest, HttpResponse, HttpResponseRedirect +from django.http import HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404, render from django.urls import reverse @@ -15,6 +15,7 @@ from isic.core.permissions import get_visible_objects, needs_object_permission from isic.core.tasks import generate_staff_image_list_metadata_csv from isic.studies.models import Study +from isic.types import AuthenticatedHttpRequest # Show this many related images e.g. other patient images, other lesion images. # Lesions are typically <= 20 images, but patients can be hundreds. @@ -147,12 +148,12 @@ def image_browser(request): @staff_member_required -def staff_image_list_export(request: HttpRequest) -> HttpResponse: +def staff_image_list_export(request: AuthenticatedHttpRequest) -> HttpResponse: return render(request, "core/image_list_export.html") @staff_member_required -def staff_image_list_metadata_download(request: HttpRequest): +def staff_image_list_metadata_download(request: AuthenticatedHttpRequest): generate_staff_image_list_metadata_csv.delay_on_commit(request.user.id) messages.add_message( diff --git a/isic/core/views/users.py b/isic/core/views/users.py index a611bdfb..2ef1e9d9 100644 --- a/isic/core/views/users.py +++ b/isic/core/views/users.py @@ -23,7 +23,7 @@ def user_detail(request, pk): user = get_object_or_404(User.objects.select_related("profile"), pk=pk) ctx = { "user": user, - "email_addresses": user.emailaddress_set.order_by("-primary", "-verified", "email"), + "email_addresses": user.emailaddress_set.order_by("-primary", "-verified", "email"), # type: ignore[attr-defined] } ctx["sections"] = { diff --git a/isic/ingest/api.py b/isic/ingest/api.py index acb8b557..bad66274 100644 --- a/isic/ingest/api.py +++ b/isic/ingest/api.py @@ -110,6 +110,10 @@ def create_accession(request: HttpRequest, payload: AccessionIn): if not request.user.is_staff and not request.user.has_perm("ingest.add_accession", cohort): return 403, {"error": "You do not have permission to add accessions to this cohort."} + assert request.user.is_authenticated # noqa: S101 + # TODO: how to make django-ninja schema aware of S3PlaceholderFile while using str for input? + assert isinstance(payload.original_blob, S3PlaceholderFile) # noqa: S101 + return 201, accession_create( cohort=cohort, creator=request.user, diff --git a/isic/ingest/forms.py b/isic/ingest/forms.py index 41a1a7b0..4cecc8f0 100644 --- a/isic/ingest/forms.py +++ b/isic/ingest/forms.py @@ -77,6 +77,7 @@ class MergeCohortForm(forms.Form): def clean(self): cleaned_data = super().clean() + cleaned_data = cleaned_data if cleaned_data is not None else {} cohort = cleaned_data.get("cohort") cohort_to_merge = cleaned_data.get("cohort_to_merge") @@ -102,6 +103,7 @@ class PublishCohortForm(forms.Form): def clean(self): cleaned_data = super().clean() + assert cleaned_data # noqa: S101 # note that this logic is duplicated in cohort_publish_initialize, this is just # added for easier form validation. diff --git a/isic/ingest/management/commands/merge-cohorts.py b/isic/ingest/management/commands/merge-cohorts.py deleted file mode 100644 index 9dca65fc..00000000 --- a/isic/ingest/management/commands/merge-cohorts.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys - -from django.core.exceptions import ValidationError -import djclick as click - -from isic.ingest.models.cohort import Cohort -from isic.ingest.services.cohort import cohort_merge - - -@click.command() -@click.argument("cohort_id", nargs=-1, type=click.INT) -def merge_cohorts(cohort_id): - if len(cohort_id) < 2: - click.secho("Must provide at least 2 cohort IDs to merge.", color="red", err=True) - sys.exit(1) - - cohorts = [Cohort.objects.get(pk=id_) for id_ in cohort_id] - - try: - cohort_merge(dest_cohort=cohorts[0], other_cohorts=cohorts[1:]) - except ValidationError as e: - click.secho(e.message, color="red", err=True) - sys.exit(1) - else: - click.secho(f"Merged {len(cohorts[1:])} cohorts into {cohorts[0].name}.", color="green") diff --git a/isic/ingest/management/commands/metadata-files-from-csv.py b/isic/ingest/management/commands/metadata-files-from-csv.py index c689ec18..03714849 100644 --- a/isic/ingest/management/commands/metadata-files-from-csv.py +++ b/isic/ingest/management/commands/metadata-files-from-csv.py @@ -7,8 +7,6 @@ from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile import djclick as click -import numpy as np -import pandas as pd from isic.ingest.models import Accession from isic.ingest.models.metadata_file import MetadataFile @@ -35,17 +33,14 @@ def metadata_files_from_csv(user_id, csv_path, isic_id_column): """ u = User.objects.get(pk=user_id) with Path(csv_path).open() as f: - metadata_csv = pd.read_csv(f, header=0) - - # pydantic expects None for the absence of a value, not NaN - metadata_csv = metadata_csv.replace({np.nan: None}) + reader = csv.DictReader(f) columns_to_drop = {"isic_id", "filename", isic_id_column} cohort_files = defaultdict(list) cohort_columns = defaultdict(set) - for _, (_, row) in enumerate(metadata_csv.iterrows(), start=2): + for row in reader: accession: Accession = Accession.objects.select_related("cohort").get( image__isic_id=row[isic_id_column] ) @@ -65,12 +60,12 @@ def metadata_files_from_csv(user_id, csv_path, isic_id_column): size = blob.tell() blob.seek(0) blob_name = f"cohort_{cohort_id}_metadata.csv" - blob = SimpleUploadedFile(blob_name, blob.getvalue(), "text/csv") + blob_file = SimpleUploadedFile(blob_name, blob.getvalue(), "text/csv") m = MetadataFile.objects.create( creator=u, cohort_id=cohort_id, - blob=blob, + blob=blob_file, blob_size=size, blob_name=blob_name, ) diff --git a/isic/ingest/management/commands/migrate_content_disposition.py b/isic/ingest/management/commands/migrate_content_disposition.py deleted file mode 100644 index bec867bd..00000000 --- a/isic/ingest/management/commands/migrate_content_disposition.py +++ /dev/null @@ -1,40 +0,0 @@ -import boto3 -from django.conf import settings -import djclick as click - -from isic.core.models import Segmentation -from isic.ingest.models import Accession -from isic.studies.models import Markup - -client = boto3.client("s3") - - -def make_attachment(key): - client.copy_object( - Bucket=settings.AWS_STORAGE_BUCKET_NAME, - Key=key, - CopySource=f"{settings.AWS_STORAGE_BUCKET_NAME}/{key}", - ContentDisposition="attachment", - # Content-Disposition is metadata, so it needs to be "replaced" - MetadataDirective="REPLACE", - ) - - -@click.command() -def migrate_content_disposition(): - accessions = Accession.objects.ingested() - with click.progressbar(accessions.iterator(), length=accessions.count()) as bar: - for accession in bar: - make_attachment(accession.blob.name) - make_attachment(accession.original_blob.name) - make_attachment(accession.thumbnail_256.name) - - segmentations = Segmentation.objects.all() - with click.progressbar(segmentations.iterator(), length=segmentations.count()) as bar: - for segmentation in bar: - make_attachment(segmentation.mask.name) - - markups = Markup.objects.all() - with click.progressbar(markups.iterator(), length=markups.count()) as bar: - for markup in bar: - make_attachment(markup.mask.name) diff --git a/isic/ingest/management/commands/revalidate-metadata.py b/isic/ingest/management/commands/revalidate-metadata.py index 10cb7858..f3bf775f 100644 --- a/isic/ingest/management/commands/revalidate-metadata.py +++ b/isic/ingest/management/commands/revalidate-metadata.py @@ -7,17 +7,17 @@ @click.command(help="Revalidate all accession metadata") def revalidate_metadata(): - accessions = Accession.objects.values_list("pk", "metadata") + accessions = Accession.objects.all() num_accessions = accessions.count() num_errors = 0 - with click.progressbar(accessions) as bar: - for pk, metadata in bar: + with click.progressbar(accessions.iterator()) as bar: + for accession in bar: try: - MetadataRow.model_validate(metadata) + MetadataRow.model_validate(accession.metadata) except PydanticValidationError as e: num_errors += 1 - click.echo(pk) + click.echo(accession.pk) click.echo(e.errors()) click.echo(f"{num_errors}/{num_accessions} accessions had problems.") diff --git a/isic/ingest/models/accession.py b/isic/ingest/models/accession.py index f32d1770..52250c9e 100644 --- a/isic/ingest/models/accession.py +++ b/isic/ingest/models/accession.py @@ -53,7 +53,7 @@ class Approx(Transform): lookup_name = "approx" - def as_sql(self, compiler, connection): + def as_sql(self, compiler, connection): # type: ignore[override] lhs, params = compiler.compile(self.lhs) return ( f"((ROUND(((({lhs})::double precision / 5.0))::numeric(1000, 15), 0) * 5))::integer", @@ -163,7 +163,7 @@ class RemappedField: csv_field_name: str relation_name: str internal_id_name: str - model: models.Model + model: type[models.Model] def internal_value(self, obj: models.Model) -> str: return getattr(getattr(obj, self.relation_name), self.internal_id_name) @@ -208,7 +208,7 @@ def diagnosis_split(diagnosis: str | None) -> dict[str, str | None]: parts = diagnosis.split(":") # pad parts out to 5 elements - parts += [None] * (5 - len(parts)) + parts += [None] * (5 - len(parts)) # type: ignore[list-item] return {f"diagnosis_{i + 1}": parts[i] for i in range(5)} @@ -230,7 +230,7 @@ class AccessionBlob: is_cog: bool -class Accession(CreationSortedTimeStampedModel, AccessionMetadata): +class Accession(CreationSortedTimeStampedModel, AccessionMetadata): # type: ignore[django-manager-missing] # the creator is either inherited from the zip creator, or directly attached in the # case of a single shot upload. creator = models.ForeignKey(User, on_delete=models.PROTECT, related_name="accessions") @@ -702,12 +702,12 @@ def generate_thumbnail(self) -> None: img = self._cog_thumbnail() else: with self.blob.open("rb") as blob_stream: - img: PIL.Image.Image = PIL.Image.open(blob_stream) + img = PIL.Image.open(blob_stream) # Load the image so the stream can be closed img.load() # LANCZOS provides the best anti-aliasing - img.thumbnail((256, 256), resample=PIL.Image.LANCZOS) + img.thumbnail((256, 256), resample=PIL.Image.LANCZOS) # type: ignore[attr-defined] with io.BytesIO() as thumbnail_stream: # 75 quality uses ~55% as much space as 90 quality, with only a very slight drop in @@ -788,7 +788,7 @@ def maybe_remap_internal_metadata(metadata: dict) -> bool: or field.internal_value(self) != parsed_field ): mapped = True - value, _ = field.model.objects.get_or_create( + value, _ = field.model.objects.get_or_create( # type: ignore[attr-defined] cohort=self.cohort, **{field.internal_id_name: parsed_field} ) setattr(self, field.relation_name, value) diff --git a/isic/ingest/models/cohort.py b/isic/ingest/models/cohort.py index fbdfe835..88feca08 100644 --- a/isic/ingest/models/cohort.py +++ b/isic/ingest/models/cohort.py @@ -122,8 +122,7 @@ def edit_cohort(user_obj, obj): @staticmethod def add_accession(user_obj: User, obj: Cohort) -> bool: - if obj: - return user_obj.is_authenticated and obj.contributor.owners.contains(user_obj) + return obj and user_obj.is_authenticated and obj.contributor.owners.contains(user_obj) Cohort.perms_class = CohortPermissions diff --git a/isic/ingest/models/contributor.py b/isic/ingest/models/contributor.py index bf671d10..50d3a90e 100644 --- a/isic/ingest/models/contributor.py +++ b/isic/ingest/models/contributor.py @@ -80,10 +80,11 @@ def add_contributor(user_obj, _=None): @staticmethod def add_cohort(user_obj: User, obj: Contributor) -> bool: - if obj: - return user_obj.is_authenticated and ContributorPermissions.view_contributor( - user_obj, obj - ) + return ( + obj + and user_obj.is_authenticated + and ContributorPermissions.view_contributor(user_obj, obj) + ) Contributor.perms_class = ContributorPermissions diff --git a/isic/ingest/models/lesion.py b/isic/ingest/models/lesion.py index 90d6b1f4..10222a1d 100644 --- a/isic/ingest/models/lesion.py +++ b/isic/ingest/models/lesion.py @@ -1,4 +1,5 @@ import secrets +from typing import TypedDict from django.contrib.auth.models import User from django.contrib.postgres.aggregates import BoolAnd @@ -24,7 +25,16 @@ def _default_id(): return lesion_id -class LesionQuerySet(models.QuerySet): +class LesionInfo(TypedDict): + index_image_id: str + + images_count: int + outcome_diagnosis: str + outcome_benign_malignant: str + longitudinally_monitored: bool + + +class LesionQuerySet(models.Manager["Lesion"]): def has_images(self): from isic.ingest.models import Accession @@ -93,7 +103,7 @@ class Lesion(models.Model): cohort = models.ForeignKey("Cohort", on_delete=models.CASCADE, related_name="lesions") private_lesion_id = models.CharField(max_length=255) - objects = LesionQuerySet.as_manager() + objects = LesionQuerySet() class Meta: constraints = [ @@ -151,4 +161,4 @@ def view_lesion(user_obj: User, obj: Lesion) -> bool: return LesionPermissions.view_lesion_list(user_obj).contains(obj) -Lesion.perms_class = LesionPermissions +Lesion.perms_class = LesionPermissions # type: ignore[attr-defined] diff --git a/isic/ingest/services/accession/__init__.py b/isic/ingest/services/accession/__init__.py index bf38cdc3..90445fc1 100644 --- a/isic/ingest/services/accession/__init__.py +++ b/isic/ingest/services/accession/__init__.py @@ -18,7 +18,7 @@ def accession_create( *, creator: User, cohort: Cohort, - original_blob: File, + original_blob: File | str, original_blob_name: str, original_blob_size: int, ) -> Accession: diff --git a/isic/ingest/tests/csv_streams.py b/isic/ingest/tests/csv_streams.py index dd8a535b..958c5b28 100644 --- a/isic/ingest/tests/csv_streams.py +++ b/isic/ingest/tests/csv_streams.py @@ -2,7 +2,6 @@ import csv import io import pathlib -from typing import BinaryIO data_dir = pathlib.Path(__file__).parent / "data" @@ -11,7 +10,7 @@ StreamWriter = codecs.getwriter("utf-8") -def csv_stream_valid() -> BinaryIO: +def csv_stream_valid() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["filename", "benign_malignant", "foo"]) writer.writeheader() @@ -19,7 +18,7 @@ def csv_stream_valid() -> BinaryIO: return file_stream -def csv_stream_without_filename_column() -> BinaryIO: +def csv_stream_without_filename_column() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["foo"]) writer.writeheader() @@ -27,14 +26,14 @@ def csv_stream_without_filename_column() -> BinaryIO: return file_stream -def csv_stream_bom_filename_column() -> BinaryIO: +def csv_stream_bom_filename_column() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["\ufefffilename"]) writer.writeheader() return file_stream -def csv_stream_duplicate_filenames() -> BinaryIO: +def csv_stream_duplicate_filenames() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["filename"]) writer.writeheader() diff --git a/isic/ingest/tests/test_metadata.py b/isic/ingest/tests/test_metadata.py index a5526981..520afeee 100644 --- a/isic/ingest/tests/test_metadata.py +++ b/isic/ingest/tests/test_metadata.py @@ -1,8 +1,10 @@ +import codecs import csv from decimal import Decimal import io -from typing import BinaryIO, cast +from typing import cast +from django.contrib.auth.models import User from django.urls.base import reverse from django.utils import timezone import pytest @@ -30,7 +32,7 @@ def imageless_accession(accession_factory): @pytest.fixture() -def csv_stream_diagnosis_sex() -> BinaryIO: +def csv_stream_diagnosis_sex() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["filename", "diagnosis", "sex"]) writer.writeheader() @@ -39,7 +41,7 @@ def csv_stream_diagnosis_sex() -> BinaryIO: @pytest.fixture() -def csv_stream_diagnosis_sex_lesion_patient() -> BinaryIO: +def csv_stream_diagnosis_sex_lesion_patient() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter( file_stream, @@ -59,7 +61,7 @@ def csv_stream_diagnosis_sex_lesion_patient() -> BinaryIO: @pytest.fixture() -def csv_stream_diagnosis_sex_disagreeing_lesion_patient() -> BinaryIO: +def csv_stream_diagnosis_sex_disagreeing_lesion_patient() -> codecs.StreamWriter: # a version that maps the same lesion to a different patient file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter( @@ -80,7 +82,7 @@ def csv_stream_diagnosis_sex_disagreeing_lesion_patient() -> BinaryIO: @pytest.fixture() -def csv_stream_benign() -> BinaryIO: +def csv_stream_benign() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["filename", "benign_malignant"]) writer.writeheader() @@ -89,7 +91,7 @@ def csv_stream_benign() -> BinaryIO: @pytest.fixture() -def csv_stream_diagnosis_sex_invalid() -> BinaryIO: +def csv_stream_diagnosis_sex_invalid() -> codecs.StreamWriter: file_stream = StreamWriter(io.BytesIO()) writer = csv.DictWriter(file_stream, fieldnames=["filename", "diagnosis", "sex"]) writer.writeheader() @@ -105,7 +107,7 @@ def cohort_with_accession(cohort, accession_factory): @pytest.mark.django_db() -def test_apply_metadata(accession_factory, valid_metadatafile, cohort, user): +def test_apply_metadata(accession_factory, valid_metadatafile, cohort, user) -> None: accession = accession_factory(cohort=cohort, original_blob_name="filename.jpg") update_metadata_task(user.pk, valid_metadatafile.pk) accession.refresh_from_db() @@ -141,7 +143,7 @@ def metadatafile_duplicate_filenames(cohort, metadata_file_factory, csv_stream_d @pytest.mark.django_db() -def test_valid_batch_invalid_row(): +def test_valid_batch_invalid_row() -> None: bad_batch = [ { "image_type": "dermoscopy", # error, should be 'dermoscopic' @@ -156,7 +158,7 @@ def test_valid_batch_invalid_row(): @pytest.mark.django_db() -def test_validate_metadata_step1_ignores_bom(metadatafile_bom_filename_column): +def test_validate_metadata_step1_ignores_bom(metadatafile_bom_filename_column) -> None: problems = validate_csv_format_and_filenames( MetadataFile.to_dict_reader(metadatafile_bom_filename_column.blob.open("rb")), metadatafile_bom_filename_column.cohort, @@ -167,7 +169,7 @@ def test_validate_metadata_step1_ignores_bom(metadatafile_bom_filename_column): @pytest.mark.django_db() def test_validate_metadata_step1_requires_filename_column( metadatafile_without_filename_column, -): +) -> None: problems = validate_csv_format_and_filenames( MetadataFile.to_dict_reader(metadatafile_without_filename_column.blob.open("rb")), metadatafile_without_filename_column.cohort, @@ -179,7 +181,7 @@ def test_validate_metadata_step1_requires_filename_column( @pytest.mark.django_db() def test_validate_metadata_step1_has_duplicate_filenames( metadatafile_duplicate_filenames, -): +) -> None: problems = validate_csv_format_and_filenames( MetadataFile.to_dict_reader(metadatafile_duplicate_filenames.blob.open("rb")), metadatafile_duplicate_filenames.cohort, @@ -191,7 +193,7 @@ def test_validate_metadata_step1_has_duplicate_filenames( @pytest.mark.django_db() def test_apply_metadata_step2( staff_client, cohort_with_accession, csv_stream_diagnosis_sex, metadata_file_factory -): +) -> None: metadatafile = metadata_file_factory( blob__from_func=lambda: csv_stream_diagnosis_sex, cohort=cohort_with_accession ) @@ -212,7 +214,7 @@ def test_apply_metadata_step2_invalid( metadata_file_factory, mocker, django_capture_on_commit_callbacks, -): +) -> None: metadatafile = metadata_file_factory( blob__from_func=lambda: csv_stream_diagnosis_sex_invalid, cohort=cohort_with_accession, @@ -240,7 +242,7 @@ def test_apply_metadata_step2_invalid( @pytest.mark.django_db() -def test_diagnosis_transition(staff_user, accession, image_factory): +def test_diagnosis_transition(staff_user, accession, image_factory) -> None: # this tests that the original diagnosis is preserved even though we're attempting # to change the hierarchical version. from isic_metadata.diagnosis_hierarchical import DiagnosisEnum @@ -282,7 +284,7 @@ def test_apply_metadata_step3( metadata_file_factory, mocker, django_capture_on_commit_callbacks, -): +) -> None: # TODO: refactor this test to split out the first half metadatafile = metadata_file_factory( blob__from_func=lambda: csv_stream_diagnosis_sex, cohort=cohort_with_accession @@ -338,7 +340,7 @@ def test_apply_metadata_step3_full_cohort( metadata_file_factory, mocker, django_capture_on_commit_callbacks, -): +) -> None: metadatafile = metadata_file_factory( blob__from_func=lambda: csv_stream_diagnosis_sex_lesion_patient, cohort=cohort_with_accession, @@ -385,7 +387,7 @@ def test_apply_metadata_step3_full_cohort( @pytest.mark.django_db() -def test_accession_metadata_versions(user, accession): +def test_accession_metadata_versions(user, accession) -> None: accession.update_metadata(user, {"foo": "bar"}) assert accession.metadata_versions.count() == 1 diffs = accession.metadata_versions.differences() @@ -431,7 +433,7 @@ def test_accession_metadata_versions(user, accession): @pytest.mark.django_db() -def test_accession_metadata_versions_remove(user, imageless_accession): +def test_accession_metadata_versions_remove(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"foo": "bar", "baz": "qux"}) imageless_accession.remove_unstructured_metadata(user, ["nonexistent"]) assert imageless_accession.unstructured_metadata.value == { @@ -442,7 +444,7 @@ def test_accession_metadata_versions_remove(user, imageless_accession): @pytest.mark.django_db() -def test_accession_update_metadata(user, imageless_accession): +def test_accession_update_metadata(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"sex": "male", "foo": "bar", "baz": "qux"}) assert imageless_accession.unstructured_metadata.value == { "foo": "bar", @@ -453,7 +455,7 @@ def test_accession_update_metadata(user, imageless_accession): @pytest.mark.django_db() -def test_accession_update_metadata_iddx(user, imageless_accession): +def test_accession_update_metadata_iddx(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"diagnosis": "Nevus"}) assert imageless_accession.metadata == { "diagnosis": "Benign:Benign melanocytic proliferations:Nevus" @@ -462,7 +464,7 @@ def test_accession_update_metadata_iddx(user, imageless_accession): @pytest.mark.django_db() -def test_accession_update_metadata_idempotent(user, imageless_accession): +def test_accession_update_metadata_idempotent(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"sex": "male", "foo": "bar", "baz": "qux"}) imageless_accession.update_metadata(user, {"sex": "male", "foo": "bar", "baz": "qux"}) # test the case where meta/unstructured are different, but updating wouldn't change anything @@ -476,7 +478,7 @@ def test_accession_update_metadata_idempotent(user, imageless_accession): @pytest.mark.django_db() -def test_accession_remove_unstructured_metadata(user, imageless_accession): +def test_accession_remove_unstructured_metadata(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"foo": "bar", "baz": "qux"}) imageless_accession.remove_unstructured_metadata(user, ["foo"]) assert imageless_accession.unstructured_metadata.value == {"baz": "qux"} @@ -484,7 +486,7 @@ def test_accession_remove_unstructured_metadata(user, imageless_accession): @pytest.mark.django_db() -def test_accession_remove_metadata(user, imageless_accession): +def test_accession_remove_metadata(user, imageless_accession) -> None: imageless_accession.update_metadata( user, {"diagnosis": "Melanoma Invasive", "benign_malignant": "malignant"} ) @@ -494,7 +496,7 @@ def test_accession_remove_metadata(user, imageless_accession): @pytest.mark.django_db() -def test_accession_remove_metadata_idempotent(user, imageless_accession): +def test_accession_remove_metadata_idempotent(user, imageless_accession) -> None: imageless_accession.update_metadata( user, {"diagnosis": "Melanoma Invasive", "benign_malignant": "malignant"} ) @@ -505,7 +507,7 @@ def test_accession_remove_metadata_idempotent(user, imageless_accession): @pytest.mark.django_db() -def test_accession_remove_unstructured_metadata_idempotent(user, imageless_accession): +def test_accession_remove_unstructured_metadata_idempotent(user, imageless_accession) -> None: imageless_accession.update_metadata(user, {"foo": "bar", "baz": "qux"}) imageless_accession.remove_unstructured_metadata(user, ["foo"]) imageless_accession.remove_unstructured_metadata(user, ["foo"]) @@ -514,7 +516,7 @@ def test_accession_remove_unstructured_metadata_idempotent(user, imageless_acces @pytest.fixture() -def unpublished_accepted_accession(accession_factory, user): +def unpublished_accepted_accession(accession_factory, user: User): accession = accession_factory() accession.update_metadata(user, {"sex": "female"}) accession_review_update_or_create( @@ -525,7 +527,7 @@ def unpublished_accepted_accession(accession_factory, user): @pytest.mark.django_db() @pytest.mark.parametrize("reset_review", [True, False]) -def test_update_metadata_resets_checks(user, unpublished_accepted_accession, reset_review): +def test_update_metadata_resets_checks(user, unpublished_accepted_accession, reset_review) -> None: unpublished_accepted_accession.update_metadata(user, {"sex": "male"}, reset_review=reset_review) unpublished_accepted_accession.refresh_from_db() @@ -536,15 +538,17 @@ def test_update_metadata_resets_checks(user, unpublished_accepted_accession, res @pytest.mark.django_db() -def test_update_unstructured_metadata_does_not_reset_checks(user, unpublished_accepted_accession): +def test_update_unstructured_metadata_does_not_reset_checks( + user, unpublished_accepted_accession +) -> None: unpublished_accepted_accession.update_metadata(user, {"foobar": "baz"}) unpublished_accepted_accession.refresh_from_db() assert unpublished_accepted_accession.reviewed @pytest.mark.django_db() -def test_metadata_version_serializes_decimal(user, accession: Accession): +def test_metadata_version_serializes_decimal(user: User, accession: Accession) -> None: accession.update_metadata(user, {"clin_size_long_diam_mm": 5}) assert accession.clin_size_long_diam_mm == Decimal("5.0") assert accession.metadata_versions.count() == 1 - assert accession.metadata_versions.first().metadata == {"clin_size_long_diam_mm": "5"} + assert accession.metadata_versions.first().metadata == {"clin_size_long_diam_mm": "5"} # type: ignore[union-attr] diff --git a/isic/ingest/tests/test_publish.py b/isic/ingest/tests/test_publish.py index 9171ca79..35248764 100644 --- a/isic/ingest/tests/test_publish.py +++ b/isic/ingest/tests/test_publish.py @@ -41,7 +41,7 @@ def test_publish_cohort( published_images = Image.objects.filter(accession__cohort=publishable_cohort) assert published_images.count() == 1 - assert not published_images.first().public + assert not published_images.first().public # type: ignore[union-attr] assert Collection.objects.count() == 3 publishable_cohort.refresh_from_db() magic_collection = publishable_cohort.collection diff --git a/isic/ingest/utils/metadata.py b/isic/ingest/utils/metadata.py index d0f74bfb..568f3093 100644 --- a/isic/ingest/utils/metadata.py +++ b/isic/ingest/utils/metadata.py @@ -22,7 +22,7 @@ class Meta: class Problem(BaseModel): - message: str | None = None + message: str context: list | None = None type: str | None = "error" @@ -31,13 +31,11 @@ class Problem(BaseModel): ColumnRowErrors = dict[tuple[str, str], list[int]] -def validate_csv_format_and_filenames( - rows: Iterable[dict[str, Any]], cohort: Cohort -) -> list[Problem]: +def validate_csv_format_and_filenames(rows: csv.DictReader, cohort: Cohort) -> list[Problem]: problems = [] filenames = Counter() - if "filename" not in rows.fieldnames: + if not rows.fieldnames or "filename" not in rows.fieldnames: problems.append(Problem(message="Unable to find a filename column in CSV.")) return problems diff --git a/isic/ingest/utils/mime.py b/isic/ingest/utils/mime.py index fe030d44..48b0eebe 100644 --- a/isic/ingest/utils/mime.py +++ b/isic/ingest/utils/mime.py @@ -22,7 +22,7 @@ def guess_mime_type(content: IO[bytes], source_filename: str | None = None) -> s with tempfile.SpooledTemporaryFile() as file_stream: # Copy blob_stream into a SpooledTemporaryFile so it can be used by magic, # which does not accept a file-like object - shutil.copyfileobj(content, file_stream) + shutil.copyfileobj(content, file_stream) # type: ignore[misc] file_stream.seek(0) # Calling .fileno() forces the file to be flushed to disk diff --git a/isic/ingest/views/cohort.py b/isic/ingest/views/cohort.py index b37135e2..80d1cefb 100644 --- a/isic/ingest/views/cohort.py +++ b/isic/ingest/views/cohort.py @@ -41,12 +41,12 @@ def cohort_list(request): Prefetch("cohorts", queryset=Cohort.objects.order_by("attribution", "name")) ).order_by("institution_name") - counts_by_cohort = Accession.objects.values("cohort_id").annotate( + counts_by_cohort_qs = Accession.objects.values("cohort_id").annotate( lesion_count=Count("lesion", distinct=True), patient_count=Count("patient", distinct=True), accession_count=Count("pk"), ) - counts_by_cohort = {row["cohort_id"]: row for row in counts_by_cohort} + counts_by_cohort = {row["cohort_id"]: row for row in counts_by_cohort_qs} rows = [] for contributor in contributors.all(): diff --git a/isic/settings/base.py b/isic/settings/base.py index 1523d0dc..fe663432 100644 --- a/isic/settings/base.py +++ b/isic/settings/base.py @@ -1,6 +1,7 @@ from datetime import timedelta import os from pathlib import Path +from typing import Any from celery.schedules import crontab import django_stubs_ext @@ -138,7 +139,7 @@ def _oauth2_pkce_required(client_id): WSGI_APPLICATION = "isic.wsgi.application" ROOT_URLCONF = "isic.urls" -TEMPLATES[0]["OPTIONS"]["context_processors"] += [ # noqa: F405 +TEMPLATES[0]["OPTIONS"]["context_processors"] += [ # type: ignore[index] # noqa: F405 "isic.core.context_processors.noindex", "isic.core.context_processors.sandbox_banner", "isic.core.context_processors.placeholder_images", @@ -166,8 +167,8 @@ def _oauth2_pkce_required(client_id): ISIC_DATACITE_API_URL = os.environ.get( "DJANGO_ISIC_DATACITE_API_URL", "https://api.test.datacite.org" ) -ISIC_DATACITE_USERNAME = None -ISIC_DATACITE_PASSWORD = None +ISIC_DATACITE_USERNAME: Any = None +ISIC_DATACITE_PASSWORD: Any = None ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS = [ "377090260", # ISIC Home "360152967", # ISIC Gallery @@ -178,6 +179,11 @@ def _oauth2_pkce_required(client_id): "265233311", # ISDIS ] -ISIC_GOOGLE_API_JSON_KEY = None +ISIC_GOOGLE_API_JSON_KEY: str | None = None -CDN_LOG_BUCKET = None +CDN_LOG_BUCKET: Any = None + + +AWS_CLOUDFRONT_KEY: Any +AWS_CLOUDFRONT_KEY_ID: Any +AWS_S3_CUSTOM_DOMAIN: Any diff --git a/isic/settings/upstream_base.py b/isic/settings/upstream_base.py index cd1533ef..c19be8a5 100644 --- a/isic/settings/upstream_base.py +++ b/isic/settings/upstream_base.py @@ -98,7 +98,7 @@ CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT = True CELERY_TASK_REJECT_ON_WORKER_LOST = False CELERY_WORKER_CANCEL_LONG_RUNNING_TASKS_ON_CONNECTION_LOSS = True -CELERY_WORKER_CONCURRENCY = 1 +CELERY_WORKER_CONCURRENCY: int | None = 1 CELERY_WORKER_PREFETCH_MULTIPLIER = 1 # CORS diff --git a/isic/stats/tasks.py b/isic/stats/tasks.py index bff5468e..09170211 100644 --- a/isic/stats/tasks.py +++ b/isic/stats/tasks.py @@ -1,6 +1,7 @@ from collections import defaultdict from collections.abc import Iterable import csv +from dataclasses import dataclass import datetime from datetime import timedelta import gzip @@ -37,6 +38,7 @@ def _get_analytics_client(): from google.analytics.data_v1beta import BetaAnalyticsDataClient from google.oauth2 import service_account + assert settings.ISIC_GOOGLE_API_JSON_KEY # noqa: S101 json_acct_info = json.loads(settings.ISIC_GOOGLE_API_JSON_KEY) credentials = service_account.Credentials.from_service_account_info(json_acct_info) scoped_credentials = credentials.with_scopes( @@ -45,13 +47,19 @@ def _get_analytics_client(): return BetaAnalyticsDataClient(credentials=scoped_credentials) -def _get_google_analytics_report(client, property_id: str) -> dict: +@dataclass +class GoogleAnalyticsReportResult: + num_sessions: int + sessions_per_country: dict[str, int] + + +def _get_google_analytics_report(client, property_id: str) -> GoogleAnalyticsReportResult: from google.analytics.data_v1beta.types import DateRange, Dimension, Metric, RunReportRequest - results = { - "num_sessions": 0, - "sessions_per_country": defaultdict(int), - } + results = GoogleAnalyticsReportResult( + num_sessions=0, + sessions_per_country=defaultdict(int), + ) request = RunReportRequest( property=f"properties/{property_id}", @@ -63,8 +71,8 @@ def _get_google_analytics_report(client, property_id: str) -> dict: for row in response.rows: country_id, sessions = row.dimension_values[0].value, row.metric_values[0].value - results["sessions_per_country"][country_id] += int(sessions) - results["num_sessions"] += int(sessions) + results.sessions_per_country[country_id] += int(sessions) + results.num_sessions += int(sessions) return results @@ -105,8 +113,8 @@ def collect_google_analytics_metrics_task(): for property_id in settings.ISIC_GOOGLE_ANALYTICS_PROPERTY_IDS: results = _get_google_analytics_report(client, property_id) - num_sessions += results["num_sessions"] - for key, value in results["sessions_per_country"].items(): + num_sessions += results.num_sessions + for key, value in results.sessions_per_country.items(): sessions_per_iso_code[key] += value for iso_code, sessions in sessions_per_iso_code.items(): @@ -150,7 +158,7 @@ def _cdn_access_log_records(log_file_bytes: BytesIO) -> Iterable[dict]: ) for row in reader: yield { - "download_time": timezone.datetime.strptime( + "download_time": datetime.datetime.strptime( f"{row['date']} {row['time']}", "%Y-%m-%d %H:%M:%S" ).replace(tzinfo=datetime.UTC), "path": row["cs-uri-stem"].lstrip("/"), @@ -264,5 +272,5 @@ def _process_s3_log_file_task(log_file_bytes: BytesIO): # Ignore duplicate entries, this is necessary because another transaction can be # committed between the time of the earlier check and now. # See https://www.postgresql.org/docs/current/errcodes-appendix.html - if e.__cause__.pgcode != "23505": + if e.__cause__.pgcode != "23505": # type: ignore[union-attr] raise diff --git a/isic/stats/tests/test_tasks.py b/isic/stats/tests/test_tasks.py index b4c1e7b9..9b9caf77 100644 --- a/isic/stats/tests/test_tasks.py +++ b/isic/stats/tests/test_tasks.py @@ -7,6 +7,7 @@ from isic.stats.models import GaMetrics, ImageDownload from isic.stats.tasks import ( + GoogleAnalyticsReportResult, _cdn_access_log_records, collect_google_analytics_metrics_task, collect_image_download_records_task, @@ -26,13 +27,13 @@ def test_collect_google_analytics_task(mocker, settings): mocker.patch("isic.stats.tasks._get_analytics_client", mocker.MagicMock) mocker.patch( "isic.stats.tasks._get_google_analytics_report", - return_value={ - "num_sessions": 10, - "sessions_per_country": { + return_value=GoogleAnalyticsReportResult( + num_sessions=10, + sessions_per_country={ "US": 3, "CA": 5, }, - }, + ), ) collect_google_analytics_metrics_task() diff --git a/isic/studies/models.py b/isic/studies/models.py index 3c58df97..89607e1b 100644 --- a/isic/studies/models.py +++ b/isic/studies/models.py @@ -1,5 +1,7 @@ +from collections.abc import Generator import csv from datetime import timedelta +from typing import Any from cachalot.api import cachalot_disabled from django.contrib.auth.models import User @@ -225,7 +227,7 @@ class StudyPermissions: def view_study_results_list( user_obj: User, qs: QuerySet[Study] | None = None ) -> QuerySet[Study]: - qs: QuerySet[Study] = qs if qs is not None else Study._default_manager.all() + qs = qs if qs is not None else Study.objects.all() # There's duplication of this check in study_detail.html if user_obj.is_staff: @@ -242,7 +244,7 @@ def view_study_results(user_obj, obj): @staticmethod def view_study_list(user_obj: User, qs: QuerySet[Study] | None = None) -> QuerySet[Study]: - qs: QuerySet[Study] = qs if qs is not None else Study._default_manager.all() + qs = qs if qs is not None else Study.objects.all() if user_obj.is_staff: return qs @@ -260,7 +262,7 @@ def view_study_list(user_obj: User, qs: QuerySet[Study] | None = None) -> QueryS @staticmethod def edit_study_list(user_obj: User, qs: QuerySet[Study] | None = None) -> QuerySet[Study]: - qs: QuerySet[Study] = qs if qs is not None else Study._default_manager.all() + qs = qs if qs is not None else Study.objects.all() if user_obj.is_staff: return qs @@ -372,7 +374,7 @@ def annotation_duration(self) -> timedelta | None: class ResponseQuerySet(models.QuerySet): - def for_display(self) -> list: + def for_display(self) -> Generator[dict[str, Any], None, None]: with cachalot_disabled(): for response in ( self.annotate( diff --git a/isic/studies/tests/test_create_study.py b/isic/studies/tests/test_create_study.py index 8708e935..230292f7 100644 --- a/isic/studies/tests/test_create_study.py +++ b/isic/studies/tests/test_create_study.py @@ -41,6 +41,7 @@ def test_create_study( ) assert Study.objects.count() == 1 study = Study.objects.first() + assert study assert study.name == "foobar" assert study.description == "-" assert study.collection == collection diff --git a/isic/types.py b/isic/types.py new file mode 100644 index 00000000..3623f74e --- /dev/null +++ b/isic/types.py @@ -0,0 +1,12 @@ +from typing import Any + +from django.contrib.auth.models import User +from django.http import HttpRequest + + +class AuthenticatedHttpRequest(HttpRequest): + user: User + + +class NinjaAuthHttpRequest(HttpRequest): + auth: Any diff --git a/isic/zip_download/api.py b/isic/zip_download/api.py index aae4df18..5071405c 100644 --- a/isic/zip_download/api.py +++ b/isic/zip_download/api.py @@ -10,6 +10,7 @@ from django.conf import settings from django.contrib.sites.models import Site from django.core.signing import BadSignature, TimestampSigner +from django.db import connection, transaction from django.http.request import HttpRequest from django.http.response import Http404, HttpResponse from django.shortcuts import render @@ -23,6 +24,7 @@ from isic.core.models import CopyrightLicense, Image from isic.core.serializers import SearchQueryIn from isic.core.services import image_metadata_csv +from isic.types import NinjaAuthHttpRequest logger = logging.getLogger(__name__) zip_router = Router() @@ -32,7 +34,7 @@ def get_attributions(attributions: Iterable[str]) -> list[str]: counter = Counter(attributions) # sort by the number of images descending, then the name of the institution ascending - attributions = sorted(counter.most_common(), key=lambda v: (-v[1], v[0])) + attributions = sorted(counter.most_common(), key=lambda v: (-v[1], v[0])) # type: ignore # noqa: PGH003 # push anonymous attributions to the end attributions = sorted(attributions, key=lambda v: 1 if v[0] == "Anonymous" else 0) return [x[0] for x in attributions] @@ -64,7 +66,7 @@ def authenticate(self, request: HttpRequest, key: str | None) -> dict: return token_dict -def zip_api_auth(request: HttpRequest) -> bool: +def zip_api_auth(request: HttpRequest): """ Protects the zip listing endpoint with basic auth. @@ -88,7 +90,7 @@ def create_zip_download_url(request: HttpRequest, payload: SearchQueryIn): return f"{settings.ZIP_DOWNLOAD_SERVICE_URL}/download?zsid={token}" -create_zip_download_url.csrf_exempt = True +create_zip_download_url.csrf_exempt = True # type: ignore[attr-defined] def _cloudfront_signer(message: bytes) -> bytes: @@ -101,15 +103,26 @@ def _cloudfront_signer(message: bytes) -> bytes: @zip_router.get("/file-listing/", include_in_schema=False, auth=zip_api_auth) +@transaction.atomic def zip_file_listing( - request: HttpRequest, + request: NinjaAuthHttpRequest, ): + # use repeatable read to ensure consistent results + cursor = connection.cursor() + cursor.execute("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ") + token = request.auth["token"] user, search = SearchQueryIn.from_token_representation(request.auth) + # ordering isn't necessary for the zipstreamer and can slow down the query considerably qs = search.to_queryset(user, Image.objects.select_related("accession")).order_by() file_count = qs.count() - suggested_filename = f"{qs.first().isic_id}.zip" if file_count == 1 else "ISIC-images.zip" + if file_count == 1: + only_image = qs.first() + assert isinstance(only_image, Image) # noqa: S101 + suggested_filename = f"{only_image.isic_id}.zip" + else: + suggested_filename = "ISIC-images.zip" if settings.ZIP_DOWNLOAD_WILDCARD_URLS: # this is a performance optimization. repeated signing of individual urls @@ -178,7 +191,7 @@ def zip_file_listing( @zip_router.get("/metadata-file/", include_in_schema=False, auth=ZipDownloadTokenAuth()) -def zip_file_metadata_file(request: HttpRequest): +def zip_file_metadata_file(request: NinjaAuthHttpRequest): user, search = SearchQueryIn.from_token_representation(request.auth) qs = search.to_queryset(user, Image.objects.select_related("accession__cohort").distinct()) response = HttpResponse(content_type="text/csv") @@ -188,13 +201,14 @@ def zip_file_metadata_file(request: HttpRequest): writer.writeheader() for metadata_row in metadata_file: + assert isinstance(metadata_row, dict) # noqa: S101 writer.writerow(metadata_row) return response @zip_router.get("/attribution-file/", include_in_schema=False, auth=ZipDownloadTokenAuth()) -def zip_file_attribution_file(request: HttpRequest): +def zip_file_attribution_file(request: NinjaAuthHttpRequest): user, search = SearchQueryIn.from_token_representation(request.auth) qs = search.to_queryset(user, Image.objects.select_related("accession__cohort").distinct()) attributions = get_attributions(qs.values_list("accession__cohort__attribution", flat=True)) diff --git a/isic/zip_download/tests/test_zip_download.py b/isic/zip_download/tests/test_zip_download.py index ecfec263..79a85891 100644 --- a/isic/zip_download/tests/test_zip_download.py +++ b/isic/zip_download/tests/test_zip_download.py @@ -34,7 +34,7 @@ def zip_basic_auth(): } -@pytest.mark.django_db() +@pytest.mark.django_db(transaction=True) @pytest.mark.usefixtures("_random_images_with_licenses") def test_zip_download_licenses(authenticated_client): r = authenticated_client.post( @@ -57,7 +57,7 @@ def test_zip_download_licenses(authenticated_client): assert not any("CC-BY-NC" in result["url"] for result in r.json()["files"]) -@pytest.mark.django_db() +@pytest.mark.django_db(transaction=True) @pytest.mark.usefixtures("_random_images_with_licenses") def test_zip_download_listing(authenticated_client, zip_basic_auth): r = authenticated_client.post( @@ -79,7 +79,7 @@ def test_zip_download_listing(authenticated_client, zip_basic_auth): assert len(r.json()["files"]) == 6, r.json() -@pytest.mark.django_db() +@pytest.mark.django_db(transaction=True) @pytest.mark.usefixtures("_random_images_with_licenses") def test_zip_download_listing_wildcard_urls(authenticated_client, zip_basic_auth, settings, mocker): r = authenticated_client.post( From 4d8269593ec5226e7758cf58434c3dcabb2f84cb Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Tue, 15 Oct 2024 12:53:01 -0400 Subject: [PATCH 3/4] Replace check with condition for CheckConstraint --- isic/core/migrations/0002_initial.py | 8 ++++---- isic/core/models/collection.py | 2 +- isic/core/models/girder_image.py | 4 ++-- isic/core/models/image.py | 2 +- isic/ingest/migrations/0001_initial.py | 12 ++++++------ .../0003_accession_accession_is_cog_mosaic.py | 2 +- isic/ingest/models/accession.py | 12 ++++++------ isic/ingest/models/lesion.py | 2 +- isic/ingest/models/patient.py | 2 +- isic/ingest/models/zip_upload.py | 2 +- isic/stats/migrations/0001_initial.py | 4 ++-- isic/stats/models.py | 5 +++-- isic/studies/migrations/0001_initial.py | 4 ++-- isic/studies/models.py | 4 ++-- 14 files changed, 33 insertions(+), 32 deletions(-) diff --git a/isic/core/migrations/0002_initial.py b/isic/core/migrations/0002_initial.py index 1aca1f8d..a3111ba1 100644 --- a/isic/core/migrations/0002_initial.py +++ b/isic/core/migrations/0002_initial.py @@ -153,7 +153,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="imageshare", constraint=models.CheckConstraint( - check=models.Q(("grantor", models.F("grantee")), _negated=True), + condition=models.Q(("grantor", models.F("grantee")), _negated=True), name="imageshare_grantor_grantee_diff_check", ), ), @@ -176,7 +176,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="girderimage", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( ("status", "unknown"), ("status", "non_image"), ("accession__isnull", False), @@ -188,7 +188,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="girderimage", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( ("status", "non_image"), models.Q(("stripped_blob_dm", ""), _negated=True), _connector="OR", @@ -199,7 +199,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="collectionshare", constraint=models.CheckConstraint( - check=models.Q(("grantor", models.F("grantee")), _negated=True), + condition=models.Q(("grantor", models.F("grantee")), _negated=True), name="collectionshare_grantor_grantee_diff_check", ), ), diff --git a/isic/core/models/collection.py b/isic/core/models/collection.py index 4195e6f5..b12627b2 100644 --- a/isic/core/models/collection.py +++ b/isic/core/models/collection.py @@ -139,7 +139,7 @@ class Meta(TimeStampedModel.Meta): constraints = [ CheckConstraint( name="collectionshare_grantor_grantee_diff_check", - check=~Q(grantor=F("grantee")), + condition=~Q(grantor=F("grantee")), ), UniqueConstraint( name="collectionshare_grantor_collection_grantee_unique", diff --git a/isic/core/models/girder_image.py b/isic/core/models/girder_image.py index a7cc2ef9..e3dca28b 100644 --- a/isic/core/models/girder_image.py +++ b/isic/core/models/girder_image.py @@ -88,13 +88,13 @@ class Meta: constraints = [ models.CheckConstraint( name="non_unknown_have_accession", - check=Q(status=GirderImageStatus.UNKNOWN) + condition=Q(status=GirderImageStatus.UNKNOWN) | Q(status=GirderImageStatus.NON_IMAGE) | Q(accession__isnull=False), ), models.CheckConstraint( name="non_non_image_have_stripped_blob_dm", - check=Q(status=GirderImageStatus.NON_IMAGE) | ~Q(stripped_blob_dm=""), + condition=Q(status=GirderImageStatus.NON_IMAGE) | ~Q(stripped_blob_dm=""), ), ] diff --git a/isic/core/models/image.py b/isic/core/models/image.py index c32a6b58..ace90b7a 100644 --- a/isic/core/models/image.py +++ b/isic/core/models/image.py @@ -191,7 +191,7 @@ class Meta(TimeStampedModel.Meta): constraints = [ CheckConstraint( name="imageshare_grantor_grantee_diff_check", - check=~Q(grantor=F("grantee")), + condition=~Q(grantor=F("grantee")), ), models.UniqueConstraint( name="imageshare_grantor_image_grantee_unique", diff --git a/isic/ingest/migrations/0001_initial.py b/isic/ingest/migrations/0001_initial.py index a6c4c918..51b85ec7 100644 --- a/isic/ingest/migrations/0001_initial.py +++ b/isic/ingest/migrations/0001_initial.py @@ -796,7 +796,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="zipupload", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( models.Q( ("status", "failed"), models.Q(("fail_reason", ""), _negated=True), @@ -822,7 +822,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="patient", constraint=models.CheckConstraint( - check=models.Q(("id__regex", "^IP_[0-9]{7}$")), + condition=models.Q(("id__regex", "^IP_[0-9]{7}$")), name="patient_id_valid_format", ), ), @@ -835,7 +835,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="lesion", constraint=models.CheckConstraint( - check=models.Q(("id__regex", "^IL_[0-9]{7}$")), + condition=models.Q(("id__regex", "^IL_[0-9]{7}$")), name="lesion_id_valid_format", ), ), @@ -902,14 +902,14 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="accession", constraint=models.CheckConstraint( - check=models.Q(("original_blob_name", models.F("blob_name")), _negated=True), + condition=models.Q(("original_blob_name", models.F("blob_name")), _negated=True), name="accession_blob_name_not_original_blob_name", ), ), migrations.AddConstraint( model_name="accession", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( models.Q( ("blob_size__isnull", False), ("height__isnull", False), @@ -928,7 +928,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="accession", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( models.Q( ("concomitant_biopsy", True), ("diagnosis_confirm_type", "histopathology"), diff --git a/isic/ingest/migrations/0003_accession_accession_is_cog_mosaic.py b/isic/ingest/migrations/0003_accession_accession_is_cog_mosaic.py index 9cdfc67f..477b1301 100644 --- a/isic/ingest/migrations/0003_accession_accession_is_cog_mosaic.py +++ b/isic/ingest/migrations/0003_accession_accession_is_cog_mosaic.py @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="accession", constraint=models.CheckConstraint( - check=models.Q( + condition=models.Q( ("image_type__isnull", True), models.Q( ( diff --git a/isic/ingest/models/accession.py b/isic/ingest/models/accession.py index 52250c9e..e2c2566d 100644 --- a/isic/ingest/models/accession.py +++ b/isic/ingest/models/accession.py @@ -319,12 +319,12 @@ class Meta(CreationSortedTimeStampedModel.Meta): # the original blob name should always be hidden, so blob_name shouldn't be the same CheckConstraint( name="accession_blob_name_not_original_blob_name", - check=~Q(original_blob_name=F("blob_name")), + condition=~Q(original_blob_name=F("blob_name")), ), # require blob_size / width / height for succeeded accessions CheckConstraint( name="accession_succeeded_blob_fields", - check=Q( + condition=Q( status=AccessionStatus.SUCCEEDED, thumbnail_256_size__isnull=False, blob_size__isnull=False, @@ -337,7 +337,7 @@ class Meta(CreationSortedTimeStampedModel.Meta): ), CheckConstraint( name="accession_concomitant_biopsy_diagnosis_confirm_type", - check=Q( + condition=Q( concomitant_biopsy=True, diagnosis_confirm_type="histopathology", ) @@ -372,12 +372,12 @@ class Meta(CreationSortedTimeStampedModel.Meta): # (and is_cog) is set. CheckConstraint( name="accession_is_cog_mosaic", - check=Q(is_cog=False) + condition=Q(is_cog=False) | Q(image_type__isnull=True) | Q(image_type=ImageTypeEnum.rcm_mosaic), ), - CheckConstraint(name="valid_diagnosis", check=Q(diagnosis__in=DiagnosisEnum)), - CheckConstraint(name="valid_legacy_dx", check=Q(legacy_dx__in=LegacyDxEnum)), + CheckConstraint(name="valid_diagnosis", condition=Q(diagnosis__in=DiagnosisEnum)), + CheckConstraint(name="valid_legacy_dx", condition=Q(legacy_dx__in=LegacyDxEnum)), ] indexes = [ diff --git a/isic/ingest/models/lesion.py b/isic/ingest/models/lesion.py index 10222a1d..d7d6326f 100644 --- a/isic/ingest/models/lesion.py +++ b/isic/ingest/models/lesion.py @@ -110,7 +110,7 @@ class Meta: UniqueConstraint(fields=["private_lesion_id", "cohort"], name="unique_lesion"), CheckConstraint( name="lesion_id_valid_format", - check=Q(id__regex=f"^{LESION_ID_REGEX}$"), + condition=Q(id__regex=f"^{LESION_ID_REGEX}$"), ), ] diff --git a/isic/ingest/models/patient.py b/isic/ingest/models/patient.py index ddd1e7c1..fb015d72 100644 --- a/isic/ingest/models/patient.py +++ b/isic/ingest/models/patient.py @@ -32,7 +32,7 @@ class Meta: UniqueConstraint(fields=["private_patient_id", "cohort"], name="unique_patient"), CheckConstraint( name="patient_id_valid_format", - check=Q(id__regex=f"^{PATIENT_ID_REGEX}$"), + condition=Q(id__regex=f"^{PATIENT_ID_REGEX}$"), ), ] diff --git a/isic/ingest/models/zip_upload.py b/isic/ingest/models/zip_upload.py index 8e1b8ecf..4ea746bc 100644 --- a/isic/ingest/models/zip_upload.py +++ b/isic/ingest/models/zip_upload.py @@ -39,7 +39,7 @@ class Meta(CreationSortedTimeStampedModel.Meta): UniqueConstraint(name="zipupload_unique_blob", fields=["blob"], condition=~Q(blob="")), CheckConstraint( name="zipupload_fail_reason_requires_failed_status", - check=(Q(status=ZipUploadStatus.FAILED) & ~Q(fail_reason="")) + condition=(Q(status=ZipUploadStatus.FAILED) & ~Q(fail_reason="")) | ~Q(status=ZipUploadStatus.FAILED), ), ] diff --git a/isic/stats/migrations/0001_initial.py b/isic/stats/migrations/0001_initial.py index 79159cfc..0e1d4959 100644 --- a/isic/stats/migrations/0001_initial.py +++ b/isic/stats/migrations/0001_initial.py @@ -80,14 +80,14 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="gametrics", constraint=models.CheckConstraint( - check=models.Q(("range_start__lt", models.F("range_end"))), + condition=models.Q(("range_start__lt", models.F("range_end"))), name="range_end_gt_range_start", ), ), migrations.AddConstraint( model_name="imagedownload", constraint=models.CheckConstraint( - check=models.Q(("download_time__lt", models.F("created"))), + condition=models.Q(("download_time__lt", models.F("created"))), name="download_occurred_before_tracking", ), ), diff --git a/isic/stats/models.py b/isic/stats/models.py index 0cb2b626..5c7c90fd 100644 --- a/isic/stats/models.py +++ b/isic/stats/models.py @@ -12,7 +12,7 @@ class GaMetrics(TimeStampedModel): class Meta(TimeStampedModel.Meta): constraints = [ CheckConstraint( - name="range_end_gt_range_start", check=Q(range_start__lt=F("range_end")) + name="range_end_gt_range_start", condition=Q(range_start__lt=F("range_end")) ) ] @@ -35,7 +35,8 @@ class ImageDownload(models.Model): class Meta: constraints = [ CheckConstraint( - name="download_occurred_before_tracking", check=Q(download_time__lt=F("created")) + name="download_occurred_before_tracking", + condition=Q(download_time__lt=F("created")), ), UniqueConstraint(name="unique_request_id", fields=["request_id"]), ] diff --git a/isic/studies/migrations/0001_initial.py b/isic/studies/migrations/0001_initial.py index c7c235b8..8d7b44a8 100644 --- a/isic/studies/migrations/0001_initial.py +++ b/isic/studies/migrations/0001_initial.py @@ -477,7 +477,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="response", constraint=models.CheckConstraint( - check=django.db.models.lookups.Exact( + condition=django.db.models.lookups.Exact( lhs=models.Func( "choice", "value", @@ -504,7 +504,7 @@ class Migration(migrations.Migration): migrations.AddConstraint( model_name="annotation", constraint=models.CheckConstraint( - check=models.Q(("start_time__lte", models.F("created"))), + condition=models.Q(("start_time__lte", models.F("created"))), name="annotation_start_time_check", ), ), diff --git a/isic/studies/models.py b/isic/studies/models.py index 89607e1b..41dac2d3 100644 --- a/isic/studies/models.py +++ b/isic/studies/models.py @@ -348,7 +348,7 @@ class Meta: constraints = [ CheckConstraint( name="annotation_start_time_check", - check=Q(start_time__lte=F("created")), + condition=Q(start_time__lte=F("created")), ), ] unique_together = [["study", "task", "image", "annotator"]] @@ -431,7 +431,7 @@ class Meta: constraints = [ CheckConstraint( name="response_choice_or_value_check", - check=Exact( + condition=Exact( lhs=Func( "choice", "value", From 49470d5d63602b6dc3ab63e3ffe7db0fd24f6687 Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Tue, 15 Oct 2024 14:00:03 -0400 Subject: [PATCH 4/4] Fix/ignore misc warnings --- isic/ingest/forms.py | 3 ++- tox.ini | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/isic/ingest/forms.py b/isic/ingest/forms.py index 4cecc8f0..7e9643c2 100644 --- a/isic/ingest/forms.py +++ b/isic/ingest/forms.py @@ -38,12 +38,13 @@ class Meta: model = Contributor fields = [ "institution_name", - "institution_url", "legal_contact_info", "default_copyright_license", "default_attribution", ] + institution_url = forms.URLField(assume_scheme="http") + class SingleAccessionUploadForm(forms.Form): original_blob = S3FormFileField(model_field_id="ingest.Accession.original_blob", label="Image") diff --git a/tox.ini b/tox.ini index ce415c5d..ba661e61 100644 --- a/tox.ini +++ b/tox.ini @@ -110,3 +110,5 @@ DJANGO_SETTINGS_MODULE = isic.settings.testing addopts = --strict-markers --showlocals filterwarnings = ignore::DeprecationWarning:pkg_resources + ignore:.*Factory._after_postgeneration:DeprecationWarning + ignore:set FASTDEV_STRICT_IF:DeprecationWarning