Skip to content

Commit

Permalink
Merge pull request #950 from ImageMarkup/isic-36-add-iddx
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna authored Oct 2, 2024
2 parents 494e881 + 3bea461 commit 97a1be9
Show file tree
Hide file tree
Showing 14 changed files with 230 additions and 65 deletions.
8 changes: 8 additions & 0 deletions isic/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import Final

from django.contrib.auth.models import Group, User
from django.test.client import Client
from isic_metadata.fields import DiagnosisEnum
import pytest
from pytest_factoryboy import register

Expand Down Expand Up @@ -27,6 +30,11 @@

from .factories import ProfileFactory, UserFactory

MELANOMA: Final = (
DiagnosisEnum.malignant_malignant_melanocytic_proliferations_melanoma_melanoma_invasive
)
NEVUS: Final = DiagnosisEnum.benign_benign_melanocytic_proliferations_nevus_nevus_spitz


@pytest.fixture(autouse=True)
def _setup_groups(request):
Expand Down
3 changes: 3 additions & 0 deletions isic/core/dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ def convert_term(s, loc, toks):
for field in Accession.remapped_internal_fields:
field_to_lookup_map[field.csv_field_name] = f"accession__{field.relation_name}__id"

# transition code to map old diagnosis searches to the legacy dx
field_to_lookup_map["diagnosis"] = "accession__legacy_dx"

if toks[0] in field_to_lookup_map:
return SearchTermKey(field_to_lookup_map[toks[0]], negate)

Expand Down
4 changes: 4 additions & 0 deletions isic/core/models/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def metadata(self) -> dict:
if getattr(self.accession, field.csv_field_name) is not None:
image_metadata[field.csv_field_name] = getattr(self.accession, field.csv_field_name)

if "legacy_dx" in image_metadata:
image_metadata["diagnosis"] = image_metadata["legacy_dx"]
del image_metadata["legacy_dx"]

return image_metadata

def to_elasticsearch_document(self, *, body_only=False) -> dict:
Expand Down
21 changes: 18 additions & 3 deletions isic/core/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,16 @@ def staff_image_metadata_csv(
if computed_output_fields:
value.update(computed_output_fields)

if "legacy_dx" in value:
value["diagnosis"] = value["legacy_dx"]
del value["legacy_dx"]
else:
value["diagnosis"] = None

yield value


def image_metadata_csv(
def image_metadata_csv( # noqa: C901
*, qs: QuerySet[Image]
) -> Generator[list[str] | dict[str, str | bool | float], None, None]:
"""
Expand All @@ -160,13 +166,18 @@ def image_metadata_csv(
counts = accession_qs.aggregate(**{k: Count(k) for k in Accession.metadata_keys()})
used_metadata_keys = [k for k, v in counts.items() if v > 0]

if "legacy_dx" in used_metadata_keys:
used_metadata_keys.remove("legacy_dx")

for field in Accession.remapped_internal_fields:
if accession_qs.exclude(**{field.relation_name: None}).exists():
used_metadata_keys.append(field.csv_field_name) # noqa: PERF401

for computed_field in Accession.computed_fields:
if computed_field.input_field_name in used_metadata_keys:
used_metadata_keys.remove(computed_field.input_field_name)
if computed_field.input_field_name != "diagnosis":
used_metadata_keys.remove(computed_field.input_field_name)

used_metadata_keys += computed_field.output_field_names

fieldnames = headers + sorted(used_metadata_keys)
Expand Down Expand Up @@ -200,6 +211,10 @@ def image_metadata_csv(
)
if computed_fields:
image.update(computed_fields)
del image[computed_field.input_field_name]

if computed_field.input_field_name != "diagnosis":
del image[computed_field.input_field_name]
else:
image["diagnosis"] = image.pop("legacy_dx")

yield {k: v for k, v in image.items() if k in fieldnames}
42 changes: 21 additions & 21 deletions isic/core/tests/test_dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
("-mel_thick_mm:*", ~Q(accession__mel_thick_mm__isnull=False)),
(
"-diagnosis:* OR diagnosis:foobar",
~Q(accession__diagnosis__isnull=False) | Q(accession__diagnosis="foobar"),
~Q(accession__legacy_dx__isnull=False) | Q(accession__legacy_dx="foobar"),
),
("age_approx:[50 TO *]", ParseException),
("-melanocytic:*", ~Q(accession__melanocytic__isnull=False)),
Expand All @@ -29,7 +29,7 @@
),
(
"-diagnosis:foo*",
~Q(accession__diagnosis__startswith="foo") | Q(accession__diagnosis__isnull=True),
~Q(accession__legacy_dx__startswith="foo") | Q(accession__legacy_dx__isnull=True),
),
(
"-age_approx:[50 TO 70]",
Expand All @@ -38,13 +38,13 @@
),
(
"-diagnosis:foobar OR (diagnosis:foobaz AND (-diagnosis:foo* OR age_approx:50))",
(~Q(accession__diagnosis="foobar") | Q(accession__diagnosis__isnull=True))
(~Q(accession__legacy_dx="foobar") | Q(accession__legacy_dx__isnull=True))
| (
Q(accession__diagnosis="foobaz")
Q(accession__legacy_dx="foobaz")
& (
(
~Q(accession__diagnosis__startswith="foo")
| Q(accession__diagnosis__isnull=True)
~Q(accession__legacy_dx__startswith="foo")
| Q(accession__legacy_dx__isnull=True)
)
| Q(accession__age__approx=50)
)
Expand All @@ -58,27 +58,27 @@
("rcm_case_id:123*", Q(accession__rcm_case__id__startswith="123")),
("rcm_case_id:*123", Q(accession__rcm_case__id__endswith="123")),
('copyright_license:"CC-0"', Q(accession__copyright_license="CC-0")),
("diagnosis:foobar", Q(accession__diagnosis="foobar")),
('diagnosis:"foo bar"', Q(accession__diagnosis="foo bar")),
("diagnosis:foo*", Q(accession__diagnosis__startswith="foo")),
("diagnosis:*foo", Q(accession__diagnosis__endswith="foo")),
('diagnosis:"foobar"', Q(accession__diagnosis="foobar")),
('diagnosis:"foo*"', Q(accession__diagnosis__startswith="foo")),
('diagnosis:"*foo"', Q(accession__diagnosis__endswith="foo")),
("diagnosis:foobar", Q(accession__legacy_dx="foobar")),
('diagnosis:"foo bar"', Q(accession__legacy_dx="foo bar")),
("diagnosis:foo*", Q(accession__legacy_dx__startswith="foo")),
("diagnosis:*foo", Q(accession__legacy_dx__endswith="foo")),
('diagnosis:"foobar"', Q(accession__legacy_dx="foobar")),
('diagnosis:"foo*"', Q(accession__legacy_dx__startswith="foo")),
('diagnosis:"*foo"', Q(accession__legacy_dx__endswith="foo")),
(
"diagnosis:foobar AND diagnosis:foobaz",
Q(accession__diagnosis="foobar") & Q(accession__diagnosis="foobaz"),
Q(accession__legacy_dx="foobar") & Q(accession__legacy_dx="foobaz"),
),
(
"diagnosis:foobar OR diagnosis:foobaz",
Q(accession__diagnosis="foobar") | Q(accession__diagnosis="foobaz"),
Q(accession__legacy_dx="foobar") | Q(accession__legacy_dx="foobaz"),
),
(
"diagnosis:foobar OR (diagnosis:foobaz AND (diagnosis:foo* OR age_approx:50))",
Q(accession__diagnosis="foobar")
Q(accession__legacy_dx="foobar")
| (
Q(accession__diagnosis="foobaz")
& (Q(accession__diagnosis__startswith="foo") | Q(accession__age__approx=50))
Q(accession__legacy_dx="foobaz")
& (Q(accession__legacy_dx__startswith="foo") | Q(accession__age__approx=50))
),
),
("age_approx:50", Q(accession__age__approx=50)),
Expand All @@ -96,11 +96,11 @@
),
(
"diagnosis:foo AND age_approx:[10 TO 12.5] AND diagnosis:bar AND diagnosis:baz",
Q(accession__diagnosis="foo")
Q(accession__legacy_dx="foo")
& Q(accession__age__approx__gte=10)
& Q(accession__age__approx__lte=12.5)
& Q(accession__diagnosis="bar")
& Q(accession__diagnosis="baz"),
& Q(accession__legacy_dx="bar")
& Q(accession__legacy_dx="baz"),
),
(
"mel_thick_mm:[0 TO 0.5]",
Expand Down
21 changes: 18 additions & 3 deletions isic/core/tests/test_metadata_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from isic.core.models.image import Image
from isic.core.services import image_metadata_csv, staff_image_metadata_csv
from isic.ingest.models.accession import Accession


@pytest.fixture()
Expand All @@ -14,7 +15,7 @@ def image_with_metadata(image):
{
"age": 32,
"benign_malignant": "benign",
"diagnosis": "nevus",
"diagnosis": "Nevus",
"patient_id": "supersecretpatientid",
"lesion_id": "supersecretlesionid",
"rcm_case_id": "supersecretrcmcaseid",
Expand All @@ -23,6 +24,10 @@ def image_with_metadata(image):
},
ignore_image_check=True,
)

# TODO: transition code that should be removed when iddx/legacy_dx stuff is removed
Accession.objects.filter(id=image.accession.id).update(legacy_dx="nevus")

return image


Expand All @@ -36,7 +41,12 @@ def test_image_metadata_csv_rows_correct(image_with_metadata):
"attribution": image_with_metadata.accession.cohort.attribution,
"benign_malignant": image_with_metadata.accession.benign_malignant,
"copyright_license": image_with_metadata.accession.copyright_license,
"diagnosis": image_with_metadata.accession.diagnosis,
"diagnosis": "nevus",
"diagnosis_1": "Benign",
"diagnosis_2": "Benign melanocytic proliferations",
"diagnosis_3": "Nevus",
"diagnosis_4": None,
"diagnosis_5": None,
"image_type": image_with_metadata.accession.image_type,
"isic_id": image_with_metadata.isic_id,
"lesion_id": image_with_metadata.accession.lesion_id,
Expand All @@ -58,7 +68,12 @@ def test_staff_image_metadata_csv_rows_correct(image_with_metadata):
"cohort_id": image_with_metadata.accession.cohort_id,
"cohort": image_with_metadata.accession.cohort.name,
"copyright_license": image_with_metadata.accession.copyright_license,
"diagnosis": image_with_metadata.accession.diagnosis,
"diagnosis": "nevus",
"diagnosis_1": "Benign",
"diagnosis_2": "Benign melanocytic proliferations",
"diagnosis_3": "Nevus",
"diagnosis_4": None,
"diagnosis_5": None,
"image_type": image_with_metadata.accession.image_type,
"isic_id": image_with_metadata.isic_id,
"original_filename": image_with_metadata.accession.original_blob_name,
Expand Down
14 changes: 11 additions & 3 deletions isic/core/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.urls import reverse
from isic_metadata.fields import ImageTypeEnum
from isic_metadata.fields import DiagnosisEnum, ImageTypeEnum
import pytest
from pytest_lazy_fixtures import lf

Expand All @@ -20,8 +20,16 @@ def private_searchable_image(image_factory, _search_index):
@pytest.fixture()
def searchable_images(image_factory, _search_index):
images = [
image_factory(public=True, accession__diagnosis="melanoma"),
image_factory(public=False, accession__diagnosis="nevus"),
image_factory(
public=True,
accession__diagnosis=DiagnosisEnum.malignant_malignant_melanocytic_proliferations_melanoma_melanoma_invasive,
accession__legacy_dx="melanoma",
),
image_factory(
public=False,
accession__diagnosis=DiagnosisEnum.benign_benign_melanocytic_proliferations_nevus_nevus_spitz,
accession__legacy_dx="nevus",
),
]
for image in images:
add_to_search_index(image)
Expand Down
14 changes: 12 additions & 2 deletions isic/core/tests/test_view_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_image_list_metadata_download_view(staff_client, mailoutbox, user, image
"lesion_id": "foo",
"patient_id": "bar",
"rcm_case_id": "baz",
"diagnosis": "melanoma",
"diagnosis": "Melanoma Invasive",
"image_type": "RCM: macroscopic",
},
ignore_image_check=True,
Expand All @@ -42,6 +42,11 @@ def test_image_list_metadata_download_view(staff_client, mailoutbox, user, image
"age",
"age_approx",
"diagnosis",
"diagnosis_1",
"diagnosis_2",
"diagnosis_3",
"diagnosis_4",
"diagnosis_5",
"image_type",
"private_lesion_id",
"lesion_id",
Expand All @@ -65,7 +70,12 @@ def test_image_list_metadata_download_view(staff_client, mailoutbox, user, image
image.public,
"57",
"55",
"melanoma",
"",
"Malignant",
"Malignant melanocytic proliferations (Melanoma)",
"Melanoma Invasive",
"",
"",
"RCM: macroscopic",
"foo",
image.accession.lesion_id,
Expand Down
17 changes: 17 additions & 0 deletions isic/ingest/migrations/0005_accession_legacy_dx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 5.1.1 on 2024-10-01 18:31

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ingest", "0004_remove_accession_accession_is_cog_mosaic_and_more"),
]

operations = [
migrations.AddField(
model_name="accession",
name="legacy_dx",
field=models.CharField(blank=True, max_length=255, null=True),
),
]
45 changes: 44 additions & 1 deletion isic/ingest/models/accession.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class AccessionMetadata(models.Model):
anatom_site_general = models.CharField(max_length=255, null=True, blank=True)
benign_malignant = models.CharField(max_length=255, null=True, blank=True)
diagnosis = models.CharField(max_length=255, null=True, blank=True)
legacy_dx = models.CharField(max_length=255, null=True, blank=True)
diagnosis_confirm_type = models.CharField(max_length=255, null=True, blank=True)
personal_hx_mm = models.BooleanField(null=True, blank=True)
family_hx_mm = models.BooleanField(null=True, blank=True)
Expand Down Expand Up @@ -190,6 +191,22 @@ class ComputedMetadataField:
es_aggregates: dict


def diagnosis_split(diagnosis: str | None) -> dict[str, str | None]:
if not diagnosis:
return {
"diagnosis_1": None,
"diagnosis_2": None,
"diagnosis_3": None,
"diagnosis_4": None,
"diagnosis_5": None,
}

parts = diagnosis.split(":")
# pad parts out to 5 elements
parts += [None] * (5 - len(parts))
return {f"diagnosis_{i + 1}": parts[i] for i in range(5)}


class AccessionStatus(models.TextChoices):
CREATING = "creating", "Creating"
CREATED = "created", "Created"
Expand Down Expand Up @@ -381,7 +398,7 @@ def __str__(self) -> str:
ComputedMetadataField(
"age",
["age_approx"],
lambda age: None if age is None else {"age_approx": int(round(age / 5.0) * 5)},
lambda age: (None if age is None else {"age_approx": int(round(age / 5.0) * 5)}),
"clinical",
es_mappings={"age_approx": {"type": "integer"}},
es_aggregates={
Expand All @@ -394,6 +411,32 @@ def __str__(self) -> str:
}
},
),
ComputedMetadataField(
"diagnosis",
[
"diagnosis_1",
"diagnosis_2",
"diagnosis_3",
"diagnosis_4",
"diagnosis_5",
],
diagnosis_split,
"clinical",
es_mappings={
"diagnosis_1": {"type": "keyword"},
"diagnosis_2": {"type": "keyword"},
"diagnosis_3": {"type": "keyword"},
"diagnosis_4": {"type": "keyword"},
"diagnosis_5": {"type": "keyword"},
},
es_aggregates={
"diagnosis_1": {"terms": {"field": "diagnosis_1"}},
"diagnosis_2": {"terms": {"field": "diagnosis_2"}},
"diagnosis_3": {"terms": {"field": "diagnosis_3"}},
"diagnosis_4": {"terms": {"field": "diagnosis_4"}},
"diagnosis_5": {"terms": {"field": "diagnosis_5"}},
},
),
]

@property
Expand Down
Loading

0 comments on commit 97a1be9

Please sign in to comment.