diff --git a/isic/ingest/models/accession.py b/isic/ingest/models/accession.py index 99d52ec1..000077a4 100644 --- a/isic/ingest/models/accession.py +++ b/isic/ingest/models/accession.py @@ -295,8 +295,7 @@ def generate_blob(self): try: with self.original_blob.open("rb") as original_blob_stream: blob_mime_type = guess_mime_type(original_blob_stream, self.original_blob_name) - blob_major_mime_type = blob_mime_type.partition("/")[0] - if blob_major_mime_type != "image": + if blob_mime_type.major != "image": raise InvalidBlobError( # noqa: TRY301 f'Blob has a non-image MIME type: "{blob_mime_type}"' ) diff --git a/isic/ingest/tests/test_utils_mime.py b/isic/ingest/tests/test_utils_mime.py index f5086c5f..a97af188 100644 --- a/isic/ingest/tests/test_utils_mime.py +++ b/isic/ingest/tests/test_utils_mime.py @@ -1,6 +1,6 @@ import pathlib -from isic.ingest.utils.mime import guess_mime_type +from isic.ingest.utils.mime import MimeType, guess_mime_type data_dir = pathlib.Path(__file__).parent / "data" @@ -11,7 +11,7 @@ def test_utils_mime_guess_mime_type_consistent(caplog): with file_path.open("rb") as stream: mime_type = guess_mime_type(stream, file_path.name) - assert mime_type == "image/jpeg" + assert mime_type == MimeType("image/jpeg") assert not any("Inconsistent MIME types" in msg for msg in caplog.messages) @@ -21,7 +21,7 @@ def test_utils_mime_guess_mime_type_inconsistent(caplog): with file_path.open("rb") as stream: mime_type = guess_mime_type(stream, "ISIC_0000000.gif") - assert mime_type == "image/jpeg" + assert mime_type == MimeType("image/jpeg") message = next((msg for msg in caplog.messages if "Inconsistent MIME types" in msg), None) assert message assert '"image/gif"' in message diff --git a/isic/ingest/utils/mime.py b/isic/ingest/utils/mime.py index fe030d44..81d364ab 100644 --- a/isic/ingest/utils/mime.py +++ b/isic/ingest/utils/mime.py @@ -1,41 +1,55 @@ +from dataclasses import dataclass import logging import mimetypes import shutil import tempfile from typing import IO -import magic +from magic import Magic logger = logging.getLogger(__name__) -def guess_mime_type(content: IO[bytes], source_filename: str | None = None) -> str: +@dataclass +class MimeType: + major: str + minor: str + + def __init__(self, mime_type: str) -> None: + self.major, _, self.minor = mime_type.partition("/") + + def __str__(self) -> str: + return f"{self.major}/{self.minor}" + + +def guess_mime_type(content: IO[bytes], source_filename: str | None = None) -> MimeType: """ Guess the MIME type of a file, based on its content. An optional `filename` can be provided, to provide extra context for guessing. """ - m = magic.Magic(mime=True) + magic = Magic(mime=True) # This initial seek is just defensive content.seek(0) - with tempfile.SpooledTemporaryFile() as file_stream: - # Copy blob_stream into a SpooledTemporaryFile so it can be used by magic, + with tempfile.TemporaryFile() as file_stream: + # Copy blob_stream into a TemporaryFile so it can be used by magic, # which does not accept a file-like object shutil.copyfileobj(content, file_stream) file_stream.seek(0) - # Calling .fileno() forces the file to be flushed to disk - content_mime_type = m.from_descriptor(file_stream.fileno()) + content_mime_type = MimeType(magic.from_descriptor(file_stream.fileno())) content.seek(0) if source_filename is not None: - source_filename_mime_type = mimetypes.guess_type(source_filename, strict=False)[0] - if source_filename_mime_type is not None and source_filename_mime_type != content_mime_type: - # Right now, do not rely on `filename_mime_type` for the return value, but + source_filename_mime_type = MimeType( + mimetypes.guess_type(source_filename, strict=False)[0] or "application/octet-stream" + ) + if source_filename_mime_type != content_mime_type: + # Right now, do not rely on `source_filename_mime_type` for the return value, but # warn if it's inconsistent with the content. logger.warning( - 'Inconsistent MIME types: content "%s", filename %s "%s"', + 'Inconsistent MIME types: content is "%s", filename "%s" is "%s"', content_mime_type, source_filename, source_filename_mime_type,