From 51a19fa46efdb238d0eedfa69b7540f0a1333984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= Date: Tue, 29 Aug 2023 15:40:02 +0200 Subject: [PATCH] fix: don't generate weight insight when value is suspicious Solves #302 --- robotoff/prediction/ocr/product_weight.py | 8 ++-- .../prediction/ocr/test_product_weight.py | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/robotoff/prediction/ocr/product_weight.py b/robotoff/prediction/ocr/product_weight.py index 1d47e9e820..d8f99cb9f6 100644 --- a/robotoff/prediction/ocr/product_weight.py +++ b/robotoff/prediction/ocr/product_weight.py @@ -85,15 +85,12 @@ def is_extreme_weight(normalized_value: float, unit: str) -> bool: # volumes above 10 l return normalized_value >= 10000 or normalized_value <= 10 - raise ValueError("invalid unit: {}, 'g', or 'ml' " "expected".format(unit)) + raise ValueError(f"invalid unit: {unit}, 'g', or 'ml' expected") def is_suspicious_weight(normalized_value: float, unit: str) -> bool: """Return True is the weight is suspicious, i.e is likely wrongly detected.""" - if is_extreme_weight(normalized_value, unit): - return True - if normalized_value > 1000: # weight value is above 1000 and # last digit is not 0 @@ -144,6 +141,9 @@ def process_product_weight( text = "{} {}".format(value, unit) normalized_value, normalized_unit = normalize_weight(value, unit) + if is_extreme_weight(normalized_value, unit): + return None + if is_suspicious_weight(normalized_value, normalized_unit): # Don't process the prediction automatically if the value # is suspicious (very high, low,...) diff --git a/tests/unit/prediction/ocr/test_product_weight.py b/tests/unit/prediction/ocr/test_product_weight.py index 57189cb5b5..0e3fa256ce 100644 --- a/tests/unit/prediction/ocr/test_product_weight.py +++ b/tests/unit/prediction/ocr/test_product_weight.py @@ -3,11 +3,13 @@ from robotoff.prediction.ocr.dataclass import OCRRegex from robotoff.prediction.ocr.product_weight import ( PRODUCT_WEIGHT_REGEX, + find_product_weight, is_extreme_weight, is_suspicious_weight, is_valid_weight, normalize_weight, ) +from robotoff.types import Prediction, PredictionType, ServerType @pytest.mark.parametrize( @@ -122,3 +124,44 @@ def test_is_extreme_weight(value: float, unit: str, expected: bool): ) def test_is_suspicious_weight(value: float, unit: str, expected: bool): assert is_suspicious_weight(value, unit) is expected + + +@pytest.mark.parametrize( + "text,expected", + [ + ("760094310634\nGE PAPIER\n", []), + ( + "Poids net: 150 g\nIngrédients:", + [ + Prediction( + type=PredictionType.product_weight, + data={ + "automatic_processing": True, + "matcher_type": "with_mention", + "normalized_unit": "g", + "normalized_value": 150, + "notify": False, + "priority": 1, + "prompt": "Poids net", + "raw": "Poids net: 150 g", + "unit": "g", + "value": "150", + }, + value_tag=None, + value="150 g", + automatic_processing=True, + predictor="regex", + predictor_version="1", + barcode=None, + timestamp=None, + source_image=None, + id=None, + confidence=None, + server_type=ServerType.off, + ), + ], + ), + ], +) +def test_find_product_weight(text: str, expected: list[dict]): + assert find_product_weight(text) == expected