diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..569193b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,18 @@ +root = true + +[*] +indent_style = space +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 + +[*.py] +indent_size = 4 +max_line_length = 100 + +[{*.yml,*.yaml}] +indent_size = 2 + +[*.ini] +indent_size = 4 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..724024e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: ci +on: + pull_request: + push: + branches: + - master + schedule: + - cron: "0 0 * * *" +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9"] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: | + pip install --upgrade pip + pip install tox + - name: Run tests + run: | + tox diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..6f16e2b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,27 @@ +name: release +on: + release: + types: [published] +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + # Tags are needed to compute the current version number + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + - name: Install tox + run: | + pip install --upgrade pip + pip install tox + - name: Publish to PyPI + env: + TWINE_USERNAME: "__token__" + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + TWINE_NON_INTERACTIVE: "true" + run: | + tox -e release diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b6e4761 --- /dev/null +++ b/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..2e6ce4d --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# isic-metadata +[![PyPI](https://img.shields.io/pypi/v/isic-metadata)](https://pypi.org/project/isic-metadata/) diff --git a/isic_metadata/__init__.py b/isic_metadata/__init__.py new file mode 100644 index 0000000..c01a543 --- /dev/null +++ b/isic_metadata/__init__.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +from importlib.metadata import PackageNotFoundError, version + +from isic_metadata.fields import ( + AcquisitionDay, + AnatomSiteGeneral, + BenignMalignant, + ClinSizeLongDiamMm, + ColorTint, + DermoscopicType, + Diagnosis, + DiagnosisConfirmType, + ImageType, + LesionId, + MelClass, + MelMitoticIndex, + MelThickMm, + MelType, + NevusType, + PatientId, + Sex, +) + +try: + __version__ = version('isic-metadata') +except PackageNotFoundError: + # package is not installed + pass + + +FIELD_REGISTRY = {} + +for field in [ + 'blurry', + 'hairy', + 'marker_pen', + 'personal_hx_mm', + 'family_hx_mm', + 'melanocytic', + 'mel_ulcer', +]: + FIELD_REGISTRY[field] = { + 'validator': bool, + 'search': { + 'key': field, + 'es_property': {'type': 'boolean'}, + 'es_facet': {'terms': {'field': field}}, + }, + } + + +for field, validator in [ + ('sex', Sex), + ('benign_malignant', BenignMalignant), + ('diagnosis_confirm_type', DiagnosisConfirmType), + ('nevus_type', NevusType), + ('image_type', ImageType), + ('dermoscopic_type', DermoscopicType), + ('mel_type', MelType), + ('mel_class', MelClass), + ('mel_mitotic_index', MelMitoticIndex), + ('anatom_site_general', AnatomSiteGeneral), + ('color_tint', ColorTint), +]: + FIELD_REGISTRY[field] = { + 'validator': validator, + 'search': { + 'key': field, + 'es_property': {'type': 'keyword'}, + 'es_facet': {'terms': {'field': field}}, + }, + } + +FIELD_REGISTRY.update( + { + 'clin_size_long_diam_mm': { + 'validator': ClinSizeLongDiamMm, + 'search': { + 'key': 'clin_size_long_diam_mm', + 'es_property': {'type': 'float'}, + 'es_facet': { + 'histogram': { + 'field': 'clin_size_long_diam_mm', + 'interval': 10, + 'extended_bounds': {'min': 0, 'max': 100}, + } + }, + }, + }, + 'acquisition_day': { + 'validator': AcquisitionDay, + 'search': False, + }, + 'diagnosis': { + 'validator': Diagnosis, + 'search': { + 'key': 'diagnosis', + 'es_property': {'type': 'keyword'}, + 'es_facet': {'terms': {'field': 'diagnosis', 'size': 100}}, + }, + }, + 'mel_thick_mm': { + 'validator': MelThickMm, + 'search': { + 'key': 'mel_thick_mm', + 'es_property': {'type': 'float'}, + 'es_facet': { + 'range': { + 'field': 'mel_thick_mm', + 'ranges': [ + {'from': 0.0, 'to': 0.5}, + {'from': 0.5, 'to': 1.0}, + {'from': 1.0, 'to': 1.5}, + {'from': 1.5, 'to': 2.0}, + {'from': 2.0, 'to': 2.5}, + {'from': 2.5, 'to': 3.0}, + {'from': 3.0, 'to': 3.5}, + {'from': 3.5, 'to': 4.0}, + {'from': 4.0, 'to': 4.5}, + {'from': 4.5, 'to': 5.0}, + {'from': 5.0}, + ], + } + }, + }, + }, + 'patient_id': {'validator': PatientId, 'search': False}, + 'lesion_id': {'validator': LesionId, 'search': False}, + } +) + +# age diff --git a/isic_metadata/fields.py b/isic_metadata/fields.py new file mode 100644 index 0000000..0cd1c6c --- /dev/null +++ b/isic_metadata/fields.py @@ -0,0 +1,321 @@ +from __future__ import annotations + +from enum import Enum +import re + +from pydantic.types import constr + + +class BaseStr(str): + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, value: str): + raise NotImplementedError + + +class ClinSizeLongDiamMm(BaseStr): + @classmethod + def validate(cls, value) -> float | None: + if not value: + return None + + if isinstance(value, float): + return value + + match = re.match('(.+)(um|mm|cm)$', value) + + if not match: + raise ValueError(f'Invalid clinical size of {value}.') + + float_value, units = match.groups() + float_value = float(float_value) + + # Convert to mm + if units == 'um': + float_value *= 1e-3 + elif units == 'cm': + float_value *= 1e1 + + return float_value + + +class Age(BaseStr): + @classmethod + def validate(cls, value: str) -> int | None: + if not value: + return None + elif value == '85+': + value = 85 + + value: int = int(value) + # clip to 85 + value = min(value, 85) + return value + + +class Sex(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if not value: + return None + + if value == 'm': + value = 'male' + elif value == 'f': + value = 'female' + + if value not in ['male', 'female']: + raise ValueError(f'Invalid sex of: {value}.') + + return value + + +class BenignMalignantEnum(str, Enum): + benign = 'benign' + malignant = 'malignant' + indeterminate = 'indeterminate' + indeterminate_benign = 'indeterminate/benign' + indeterminate_malignant = 'indeterminate/malignant' + + +# todo indeterminable +class BenignMalignant(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if not value: + return None + + if value not in BenignMalignantEnum._value2member_map_: + raise ValueError(f'Invalid benign/malignant value of {value}.') + + return value + + +class DiagnosisConfirmTypeEnum(str, Enum): + histopathology = 'histopathology' + serial_imaging_showing_no_change = 'serial imaging showing no change' + single_image_expert_consensus = 'single image expert consensus' + confocal_microscopy_with_consensus_dermoscopy = 'confocal microscopy with consensus dermoscopy' + + +class DiagnosisConfirmType(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in DiagnosisConfirmTypeEnum._value2member_map_: + raise ValueError(f'Invalid diagnosis confirm type of: {value}.') + return value + + +class DiagnosisEnum(str, Enum): + actinic_keratosis = 'actinic keratosis' + adnexal_tumor = 'adnexal tumor' + aimp = 'AIMP' + angiokeratoma = 'angiokeratoma' + angioma = 'angioma' + basal_cell_carcinoma = 'basal cell carcinoma' + cafe_au_lait_macule = 'cafe-au-lait macule' + dermatofibroma = 'dermatofibroma' + ephelis = 'ephelis' + lentigo_nos = 'lentigo NOS' + lentigo_simplex = 'lentigo simplex' + lichenoid_keratosis = 'lichenoid keratosis' + melanoma = 'melanoma' + melanoma_metastasis = 'melanoma metastasis' + merkel_cell_carcinoma = 'merkel cell carcinoma' + mucosal_melanosis = 'mucosal melanosis' + nevus = 'nevus' + nevus_spilus = 'nevus spilus' + seborrheic_keratosis = 'seborrheic keratosis' + solar_lentigo = 'solar lentigo' + squamous_cell_carcinoma = 'squamous cell carcinoma' + clear_cell_acanthoma = 'clear cell acanthoma' + atypical_spitz_tumor = 'atypical spitz tumor' + acrochordon = 'acrochordon' + angiofibroma_or_fibrous_papule = 'angiofibroma or fibrous papule' + neurofibroma = 'neurofibroma' + pyogenic_granuloma = 'pyogenic granuloma' + scar = 'scar' + sebaceous_adenoma = 'sebaceous adenoma' + sebaceous_hyperplasia = 'sebaceous hyperplasia' + verruca = 'verruca' + atypical_melanocytic_proliferation = 'atypical melanocytic proliferation' + epidermal_nevus = 'epidermal nevus' + pigmented_benign_keratosis = 'pigmented benign keratosis' + vascular_lesion = 'vascular lesion' + other = 'other' + + +class Diagnosis(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in DiagnosisEnum._value2member_map_: + raise ValueError(f'Invalid diagnosis of: {value}.') + return value + + +class NevusTypeEnum(str, Enum): + blue = 'blue' + combined = 'combined' + nevus_nos = 'nevus NOS' + deep_penetrating = 'deep penetrating' + halo = 'halo' + persistent_recurrent = 'persistent/recurrent' + pigmented_spindle_cell_of_reed = 'pigmented spindle cell of reed' + plexiform_spindle_cell = 'plexiform spindle cell' + special_site = 'special site' + spitz = 'spitz' + + +class NevusType(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in NevusTypeEnum._value2member_map_: + raise ValueError(f'Invalid nevus type of: {value}.') + return value + + +class ImageTypeEnum(str, Enum): + dermoscopic = 'dermoscopic' + clinical = 'clinical' + overview = 'overview' + + +class ImageType(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in ImageTypeEnum._value2member_map_: + raise ValueError(f'Invalid image type of: {value}.') + return value + + +class DermoscopicTypeEnum(str, Enum): + contact_polarized = 'contact polarized' + contact_non_polarized = 'contact non-polarized' + non_contact_polarized = 'non-contact polarized' + + +class DermoscopicType(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in DermoscopicTypeEnum._value2member_map_: + raise ValueError(f'Invalid dermoscopic type of: {value}.') + return value + + +class MelTypeEnum(str, Enum): + superficial_spreading_melanoma = 'superficial spreading melanoma' + nodular_melanoma = 'nodular melanoma' + lentigo_maligna_melanoma = 'lentigo maligna melanoma' + acral_lentiginous_melanoma = 'acral lentiginous melanoma' + melanoma_nos = 'melanoma NOS' + + +class MelType(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in MelTypeEnum._value2member_map_: + raise ValueError(f'Invalid mel type of: {value}.') + return value + + +class MelClassEnum(str, Enum): + melanoma_in_situ = 'melanoma in situ' + invasive_melanoma = 'invasive melanoma' + recurrent_persistent_melanoma_in_situ = 'recurrent/persistent melanoma, in situ' + recurrent_persistent_melanoma_invasive = 'recurrent/persistent melanoma, invasive' + melanoma_nos = 'melanoma NOS' + + +class MelClass(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in MelClassEnum._value2member_map_: + raise ValueError(f'Invalid mel class of: {value}.') + return value + + +class MelThickMm(BaseStr): + _regex = re.compile( + r""" + (.+?) # Non-greedy + (?:mm)? # Optional units, non-capturing + $ + """, + re.VERBOSE, + ) + + @classmethod + def validate(cls, value) -> float | None: + if isinstance(value, float): + return value + # Parse value into floating point component and units + result = re.match(cls._regex, value) + if not result: + raise ValueError(f'Invalid melanoma thickness of: {value}.') + + value = result.group(1) + int_value = float(value) + + return int_value + + +class MelMitoticIndexEnum(str, Enum): + zero = '0/mm^2' + lt_one = '<1/mm^2' + one = '1/mm^2' + two = '2/mm^2' + three = '3/mm^2' + four = '4/mm^2' + gt_4 = '>4/mm^2' + + +class MelMitoticIndex(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in MelMitoticIndexEnum._value2member_map_: + raise ValueError(f'Invalid mel mitotic index of: {value}.') + return value + + +class AnatomSiteGeneralEnum(str, Enum): + head_neck = 'head/neck' + upper_extremity = 'upper extremity' + lower_extremity = 'lower extremity' + anterior_torso = 'anterior torso' + posterior_torso = 'posterior torso' + palms_soles = 'palms/soles' + lateral_torso = 'lateral torso' + oral_genital = 'oral/genital' + + +class AnatomSiteGeneral(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in AnatomSiteGeneralEnum._value2member_map_: + raise ValueError(f'Invalid general anatomical site of: {value}.') + return value + + +class ColorTintEnum(str, Enum): + blue = 'blue' + pink = 'pink' + none = 'none' + + +class ColorTint(BaseStr): + @classmethod + def validate(cls, value: str) -> str | None: + if value not in ColorTintEnum._value2member_map_: + raise ValueError(f'Invalid color tint of: {value}.') + return value + + +class AcquisitionDay(int): + pass + + +PatientId = constr(regex=r'^IP_[0-9]{7}$') +LesionId = constr(regex=r'^IL_[0-9]{7}$') diff --git a/isic_metadata/metadata.py b/isic_metadata/metadata.py new file mode 100644 index 0000000..855e0bd --- /dev/null +++ b/isic_metadata/metadata.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +from typing import Any, Optional + +from pydantic import BaseModel, root_validator, validator + +from isic_metadata.fields import ( + Age, + AnatomSiteGeneral, + BenignMalignant, + BenignMalignantEnum, + ClinSizeLongDiamMm, + ColorTint, + DermoscopicType, + Diagnosis, + DiagnosisConfirmType, + DiagnosisEnum, + ImageType, + ImageTypeEnum, + LesionId, + MelClass, + MelMitoticIndex, + MelThickMm, + MelType, + NevusType, + PatientId, + Sex, +) + + +class MetadataRow(BaseModel): + age: Optional[Age] + sex: Optional[Sex] + benign_malignant: Optional[BenignMalignant] + diagnosis: Optional[Diagnosis] + diagnosis_confirm_type: Optional[DiagnosisConfirmType] + personal_hx_mm: Optional[bool] + family_hx_mm: Optional[bool] + clin_size_long_diam_mm: Optional[ClinSizeLongDiamMm] + melanocytic: Optional[bool] + patient_id: Optional[PatientId] + lesion_id: Optional[LesionId] + acquisition_day: Optional[int] + marker_pen: Optional[bool] + hairy: Optional[bool] + blurry: Optional[bool] + nevus_type: Optional[NevusType] + image_type: Optional[ImageType] + dermoscopic_type: Optional[DermoscopicType] + anatom_site_general: Optional[AnatomSiteGeneral] + color_tint: Optional[ColorTint] + mel_class: Optional[MelClass] + mel_mitotic_index: Optional[MelMitoticIndex] + mel_thick_mm: Optional[MelThickMm] + mel_type: Optional[MelType] + mel_ulcer: Optional[bool] + + unstructured: Optional[dict] + + # See https://github.com/samuelcolvin/pydantic/issues/2285 for more detail + @root_validator(pre=True) + def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]: # noqa: N805 + all_required_field_names = { + field.alias for field in cls.__fields__.values() if field.alias != 'unstructured' + } # to support alias + + unstructured: dict[str, Any] = {} + for field_name in list(values): + if field_name not in all_required_field_names: + unstructured[field_name] = values.pop(field_name) + values['unstructured'] = unstructured + return values + + @validator('*', pre=True) + @classmethod + def strip(cls, v): + if isinstance(v, str): + v = v.strip() + return v + + @validator( + 'anatom_site_general', + 'benign_malignant', + 'clin_size_long_diam_mm', + 'diagnosis_confirm_type', + 'mel_mitotic_index', + 'mel_thick_mm', + 'sex', + pre=True, + ) + @classmethod + def lower(cls, v): + if isinstance(v, str): + v = v.lower() + return v + + @validator('diagnosis') + @classmethod + def validate_no_benign_melanoma(cls, v, values): + if 'benign_malignant' in values: + + if v == 'melanoma' and values['benign_malignant'] == 'benign': + raise ValueError('A benign melanoma cannot exist.') + + if v == 'nevus' and values['benign_malignant'] not in [ + BenignMalignantEnum.benign, + BenignMalignantEnum.indeterminate_benign, + BenignMalignantEnum.indeterminate, + ]: + raise ValueError(f'A {values["benign_malignant"]} nevus cannot exist.') + + return v + + @validator('nevus_type') + @classmethod + def validate_non_nevus_diagnoses(cls, v, values): + if ( + v + and values.get('diagnosis') + and values['diagnosis'] not in [DiagnosisEnum.nevus, DiagnosisEnum.nevus_spilus] + ): + raise ValueError(f'Nevus type is inconsistent with {values["diagnosis"]}.') + return v + + @validator('mel_class', 'mel_mitotic_index', 'mel_thick_mm', 'mel_type', 'mel_ulcer') + @classmethod + def validate_melanoma_fields(cls, v, values, config, field): + if v and 'diagnosis' in values and values['diagnosis'] != 'melanoma': + raise ValueError(f'A non-melanoma {field} cannot exist.') + return v + + @validator('diagnosis_confirm_type') + @classmethod + def validate_non_histopathology_diagnoses(cls, v, values): + if 'diagnosis' not in values: + raise ValueError('Diagnosis confirm type requires a diagnosis.') + + if 'benign_malignant' in values: + if v != 'histopathology' and values['benign_malignant'] in [ + BenignMalignantEnum.malignant, + BenignMalignantEnum.indeterminate_benign, + BenignMalignantEnum.indeterminate_malignant, + BenignMalignantEnum.indeterminate, + ]: + + raise ValueError(f'A {values["benign_malignant"]} ...') + + return v + + @validator('dermoscopic_type') + @classmethod + def validate_dermoscopic_fields(cls, v, values): + if values.get('image_type') != ImageTypeEnum.dermoscopic and v: + image_type = values.get('image_type', 'none') + raise ValueError(f'Image type {image_type} inconsistent with dermoscopic type {v}.') + return v diff --git a/isic_metadata/utils.py b/isic_metadata/utils.py new file mode 100644 index 0000000..e6c0ce8 --- /dev/null +++ b/isic_metadata/utils.py @@ -0,0 +1,15 @@ +from isic_metadata.metadata import MetadataRow + + +def get_unstructured_columns(df): + unstructured_columns = set() + structured_columns = set(MetadataRow.__fields__.keys()) - {'unstructured'} + + for _, (_, row) in enumerate(df.iterrows(), start=2): + unstructured_columns |= set(row.keys()) - structured_columns + + # unstructured columns are any columns that aren't part of the core + # columns (filename, isic_id) and aren't defined in MetadataRow + unstructured_columns -= {'filename', 'isic_id'} + + return sorted(list(unstructured_columns)) diff --git a/isic_metadata/validators.py b/isic_metadata/validators.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..298989e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools >= 42", "wheel", "setuptools-scm[toml]>=3.4"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] + +[tool.black] +line-length = 100 +skip-string-normalization = true +target-version = ["py38"] +exclude='\.eggs|\.git|\.mypy_cache|\.tox|\.venv|_build|buck-out|build|dist' + +[tool.isort] +profile = "black" +line_length = 100 +# Sort by name, don't cluster "from" vs "import" +force_sort_within_sections = true +# Combines "as" imports on the same line +combine_as_imports = true + +[tool.mypy] +ignore_missing_imports = true +show_error_codes = true diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..14157fe --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +from pathlib import Path + +from setuptools import find_packages, setup + +readme_file = Path(__file__).parent / 'README.md' +with readme_file.open() as f: + long_description = f.read() + +setup( + name='isic-metadata', + description='', + long_description=long_description, + long_description_content_type='text/markdown', + license='Apache 2.0', + url='https://github.com/ImageMarkup/isic-metadata', + project_urls={ + 'Bug Reports': 'https://github.com/ImageMarkup/isic-metadata/issues', + 'Source': 'https://github.com/ImageMarkup/isic-metadata', + }, + author='Kitware, Inc.', + author_email='kitware@kitware.com', + keywords='requests', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python', + ], + python_requires='>=3.8', + install_requires=['pydantic'], + packages=find_packages(), +) diff --git a/tests/test_fields.py b/tests/test_fields.py new file mode 100644 index 0000000..644fe46 --- /dev/null +++ b/tests/test_fields.py @@ -0,0 +1,49 @@ +from hypothesis import given, strategies as st +from pydantic import ValidationError + +from isic_metadata.metadata import MetadataRow + + +def test_melanoma_fields(): + try: + # mel_class can only be set if diagnosis is melanoma + MetadataRow(diagnosis='angioma', mel_class='invasive melanoma') + except ValidationError as e: + assert len(e.errors()) == 1 + assert e.errors()[0]['loc'][0] == 'mel_class' + + # mel_class can only be set if diagnosis is melanoma + MetadataRow(diagnosis='melanoma', mel_class='invasive melanoma') + + +def test_no_benign_melanoma(): + try: + MetadataRow(diagnosis='melanoma', benign_malignant='benign') + except ValidationError as e: + assert len(e.errors()) == 1 + assert e.errors()[0]['loc'][0] == 'diagnosis' + + +@given(age=st.integers(min_value=0).map(str)) +def test_age_ceiling(age): + assert MetadataRow(age=age).age <= 85 + + +def test_age_special_case(): + assert MetadataRow(age='85+').age == 85 + + +# test that non numeric strings fail, this should capture negative values +# @given(age=st.text().filter(lambda s: not s.isnumeric() and s != '')) +# def test_age_fuzz(age): +# print(age) +# with pytest.raises(ValidationError) as excinfo: +# MetadataRow(age=age) + + +# assert 'foo' == excinfo.value +# except ValidationError as e: +# breakpoint() +# assert len(e.errors()) == 1 +# assert e.errors()[0]['loc'][0] == 'age' +# print(e.errors()) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..be774c8 --- /dev/null +++ b/tox.ini @@ -0,0 +1,77 @@ +[tox] +# Build in an environment which respects PEP 518 +isolated_build = true +envlist = + lint, + test, + +[testenv:lint] +skipsdist = true +skip_install = true +deps = + flake8 + flake8-black + flake8-bugbear + flake8-docstrings + flake8-isort + flake8-quotes + pep8-naming +commands = + flake8 {posargs:.} + +[testenv:test] +deps = + hypothesis + pytest + pytest-mock +commands = + pytest {posargs} + +[testenv:type] +skipsdist = true +skip_install = true +deps = + mypy +commands = + mypy {posargs:.} + +[testenv:format] +skipsdist = true +skip_install = true +deps = + black + isort +commands = + isort {posargs:.} + black {posargs:.} + +[testenv:release] +skipsdist = true +skip_install = true +passenv = + TWINE_USERNAME + TWINE_PASSWORD + TWINE_NON_INTERACTIVE +deps = + build + twine +commands = + pyproject-build --sdist --wheel --outdir {envtmpdir} + twine check {envtmpdir}/* + twine upload --skip-existing {envtmpdir}/* + +[flake8] +max-line-length = 100 +show-source = True +ignore = + # closing bracket does not match indentation of opening bracket’s line + E123 + # whitespace before ':' + E203, + # line break before binary operator + W503, + # Missing docstring in * + D10, + +[pytest] +addopts = --strict-markers --showlocals --verbose