Skip to content

Commit

Permalink
fix: don't generate weight insight when value is suspicious
Browse files Browse the repository at this point in the history
Solves #302
  • Loading branch information
raphael0202 committed Aug 29, 2023
1 parent f937442 commit 51a19fa
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 4 deletions.
8 changes: 4 additions & 4 deletions robotoff/prediction/ocr/product_weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,12 @@ def is_extreme_weight(normalized_value: float, unit: str) -> bool:
# volumes above 10 l
return normalized_value >= 10000 or normalized_value <= 10

raise ValueError("invalid unit: {}, 'g', or 'ml' " "expected".format(unit))
raise ValueError(f"invalid unit: {unit}, 'g', or 'ml' expected")


def is_suspicious_weight(normalized_value: float, unit: str) -> bool:
"""Return True is the weight is suspicious, i.e is likely wrongly
detected."""
if is_extreme_weight(normalized_value, unit):
return True

if normalized_value > 1000:
# weight value is above 1000 and
# last digit is not 0
Expand Down Expand Up @@ -144,6 +141,9 @@ def process_product_weight(
text = "{} {}".format(value, unit)
normalized_value, normalized_unit = normalize_weight(value, unit)

if is_extreme_weight(normalized_value, unit):
return None

if is_suspicious_weight(normalized_value, normalized_unit):
# Don't process the prediction automatically if the value
# is suspicious (very high, low,...)
Expand Down
43 changes: 43 additions & 0 deletions tests/unit/prediction/ocr/test_product_weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from robotoff.prediction.ocr.dataclass import OCRRegex
from robotoff.prediction.ocr.product_weight import (
PRODUCT_WEIGHT_REGEX,
find_product_weight,
is_extreme_weight,
is_suspicious_weight,
is_valid_weight,
normalize_weight,
)
from robotoff.types import Prediction, PredictionType, ServerType


@pytest.mark.parametrize(
Expand Down Expand Up @@ -122,3 +124,44 @@ def test_is_extreme_weight(value: float, unit: str, expected: bool):
)
def test_is_suspicious_weight(value: float, unit: str, expected: bool):
assert is_suspicious_weight(value, unit) is expected


@pytest.mark.parametrize(
"text,expected",
[
("760094310634\nGE PAPIER\n", []),
(
"Poids net: 150 g\nIngrédients:",
[
Prediction(
type=PredictionType.product_weight,
data={
"automatic_processing": True,
"matcher_type": "with_mention",
"normalized_unit": "g",
"normalized_value": 150,
"notify": False,
"priority": 1,
"prompt": "Poids net",
"raw": "Poids net: 150 g",
"unit": "g",
"value": "150",
},
value_tag=None,
value="150 g",
automatic_processing=True,
predictor="regex",
predictor_version="1",
barcode=None,
timestamp=None,
source_image=None,
id=None,
confidence=None,
server_type=ServerType.off,
),
],
),
],
)
def test_find_product_weight(text: str, expected: list[dict]):
assert find_product_weight(text) == expected

0 comments on commit 51a19fa

Please sign in to comment.