Skip to content

Commit

Permalink
Fix incorrect is_not_null() implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
kbolashev committed Nov 18, 2024
1 parent f48ae45 commit 1d2c948
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 59 deletions.
4 changes: 3 additions & 1 deletion dagshub/data_engine/model/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,7 +1560,9 @@ def is_not_null(self):
:meta private:
"""
return self.is_null().add_query_op("not")
field = self._get_filtering_field()
value_type = metadataTypeLookupReverse[field.valueType.value]
return self.add_query_op("!isnull", value_type())

def _get_filtering_field(self) -> MetadataFieldSchema:
field_name = self.get_query().filter.column_filter
Expand Down
7 changes: 7 additions & 0 deletions dagshub/data_engine/model/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ def compose(self, op: str, other: Optional[Union[str, int, float, "QueryFilterTr
if self._column_filter_node is not None:
# If there was an unfilled query node with a column - put the operand in that node
node = self._column_filter_node
if op.startswith("!"):
# Negation in the operation - prepend a not node before the current node
tree = self._operand_tree
parent_id = tree.parent(node.identifier)
not_node = tree.create_node("not", parent=parent_id)
tree.move_node(node.identifier, not_node.identifier)
op = op[1:]
node.tag = op
node.data.update({"value": other})
elif op == "isnull":
Expand Down
102 changes: 44 additions & 58 deletions tests/data_engine/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,24 @@ def test_sequential_querying(ds):
assert queried2.get_query().filter.tree_to_dict() == expected


def test_is_not_null_composition(ds):
add_int_fields(ds, "col1", "col2")
queried = ds["col1"] >= 2
queried = queried["col2"].is_not_null()

expected = {
"and": {
"children": [
{"ge": {"data": {"field": "col1", "value": 2}}},
{"not": {"children": [{"isnull": {"data": {"field": "col2", "value": 0}}}], "data": None}},
],
"data": None,
}
}

assert queried.get_query().filter.tree_to_dict() == expected


def test_composition_string_then_field(ds):
add_int_fields(ds, "col1")
add_int_fields(ds, "col2")
Expand Down Expand Up @@ -566,31 +584,19 @@ def test_basic_datetime_query(ds):
add_datetime_fields(ds, "x")
t = dateutil.parser.parse("2022-04-05T15:30:00.99999+05:30")

ds2 = (ds[ds["x"] > t])
ds2 = ds[ds["x"] > t]

q = ds2.get_query().filter

print(q.tree_to_dict())
print(ds2.serialize_gql_query_input())
expected = {
'gt': {
'data': {
'field': 'x',
'value': t
}
}
}
expected = {"gt": {"data": {"field": "x", "value": t}}}

assert q.tree_to_dict() == expected

expected_serialized = {
'query': {
'filter': {
'key': 'x',
'value': '1649152800999',
'valueType': 'DATETIME',
'comparator': 'GREATER_THAN'
}
"query": {
"filter": {"key": "x", "value": "1649152800999", "valueType": "DATETIME", "comparator": "GREATER_THAN"}
}
}
assert ds2.serialize_gql_query_input() == expected_serialized
Expand All @@ -612,35 +618,22 @@ def test_periodic_datetime_periods(ds, period):

print(q.tree_to_dict())
print(ds2.serialize_gql_query_input())
expected = {
f"{period}": {
'data': {
'field': 'x',
'value': [
'1',
'3'
]
}
}
}
expected = {f"{period}": {"data": {"field": "x", "value": ["1", "3"]}}}

assert q.tree_to_dict() == expected

expected_serialized = {
'timeZone': '+03:00',
'query': {
'filter': {
'key': 'x',
'value': 0,
'valueType': 'DATETIME',
'comparator': 'DATE_TIME_FILTER',
'valueRange': [
'1',
'3'
],
'timeFilter': f"{period.upper()}"
"timeZone": "+03:00",
"query": {
"filter": {
"key": "x",
"value": 0,
"valueType": "DATETIME",
"comparator": "DATE_TIME_FILTER",
"valueRange": ["1", "3"],
"timeFilter": f"{period.upper()}",
}
}
},
}

assert ds2.serialize_gql_query_input() == expected_serialized
Expand All @@ -655,29 +648,22 @@ def test_periodic_datetime_timeofday(ds):

print(q.tree_to_dict())
print(ds2.serialize_gql_query_input())
expected = {
'timeofday': {
'data': {
'field': 'x',
'value': '12:00-13:00'
}
}
}
expected = {"timeofday": {"data": {"field": "x", "value": "12:00-13:00"}}}

assert q.tree_to_dict() == expected

expected_serialized = {
'timeZone': '+03:00',
'query': {
'filter': {
'key': 'x',
'value': '12:00-13:00',
'valueType': 'DATETIME',
'comparator': 'DATE_TIME_FILTER',
'valueRange': 0,
'timeFilter': 'TIMEOFDAY'
"timeZone": "+03:00",
"query": {
"filter": {
"key": "x",
"value": "12:00-13:00",
"valueType": "DATETIME",
"comparator": "DATE_TIME_FILTER",
"valueRange": 0,
"timeFilter": "TIMEOFDAY",
}
}
},
}

assert ds2.serialize_gql_query_input() == expected_serialized

0 comments on commit 1d2c948

Please sign in to comment.