-
Notifications
You must be signed in to change notification settings - Fork 5
/
common.py
71 lines (51 loc) · 2.01 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import annotations
import hashlib
import io
import logging
from pathlib import Path
from typing import TypedDict
import numpy as np # type: ignore
from PIL import Image # type: ignore
try:
ANTIALIAS = Image.Resampling.LANCZOS
except AttributeError:
# deprecated in pillow 10
# https://pillow.readthedocs.io/en/stable/deprecations.html
ANTIALIAS = Image.ANTIALIAS
from config import Config
class Match(TypedDict):
ord_id: str
match_sum: int
def get_logger(name: str, log_file_path: str | Path) -> logging.Logger:
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
log_handler = logging.FileHandler(log_file_path)
log_formatter = logging.Formatter("%(asctime)s %(message)s")
log_handler.setFormatter(log_formatter)
logger.addHandler(log_handler)
return logger
def content_md5_hash(content: bytes) -> str:
return hashlib.md5(content).hexdigest()
def path_to_hash(image_path: str | Path) -> str:
img = Image.open(image_path)
return average_hash(img, Config.HASH_SIZE)
def bytes_to_hash(data: bytes) -> str:
img_file = io.BytesIO(data)
img = Image.open(img_file)
return average_hash(img, Config.HASH_SIZE)
def average_hash(img: Image.Image, hash_size: int) -> str:
"""Creates a fingerprint of an image using the average hash algorithm.
Actually copy-pasted code from imagehash.average_hash, so we do
not need to bring in the whole library with scipy etc.
Docs about the approach:
https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
"""
# reduce size and complexity, then covert to grayscale
img = img.convert("L").resize((hash_size, hash_size), ANTIALIAS)
# find average pixel value; 'pixels' is an array of the pixel values, ranging from 0 (black) to 255 (white)
pixels = np.asarray(img)
avg = np.mean(pixels)
bool_array = pixels > avg
int_array = bool_array.flatten().astype(int)
binary_string = "".join(map(str, int_array))
return binary_string