Skip to content

Commit

Permalink
Include the column name in the error message for an unexpected NULL (#…
Browse files Browse the repository at this point in the history
…397)

* Improve error messages to include column name

* Fix imports, add type

* Move column name into BaseQueryContext

* Remove unnecessary None check

* Fix import cycle
  • Loading branch information
angusholder authored Sep 19, 2024
1 parent 58fbbc1 commit b90cdf9
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 24 deletions.
4 changes: 2 additions & 2 deletions clickhouse_connect/datatypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _write_column_low_card(self, column: Sequence, dest: bytearray, ctx: InsertC
write_uint64(len(index), dest)
self._write_column_binary(index, dest, ctx)
write_uint64(len(keys), dest)
write_array(array_type(1 << ix_type, False), keys, dest)
write_array(array_type(1 << ix_type, False), keys, dest, ctx)

def _active_null(self, _ctx: QueryContext) -> Any:
return None
Expand Down Expand Up @@ -338,7 +338,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
if len(column) and self.nullable:
column = [0 if x is None else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.as_pandas and ctx.use_extended_dtypes:
Expand Down
2 changes: 1 addition & 1 deletion clickhouse_connect/datatypes/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [x._ip if x else 0 for x in column]
else:
column = [x._ip for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down
14 changes: 7 additions & 7 deletions clickhouse_connect/datatypes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
return np.array(column)
return column

def _write_column_binary(self, column, dest, _ctx):
write_array('B', [1 if x else 0 for x in column], dest)
def _write_column_binary(self, column, dest, ctx):
write_array('B', [1 if x else 0 for x in column], dest, ctx)


class Boolean(Bool):
Expand Down Expand Up @@ -218,15 +218,15 @@ def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryConte
lookup = self._int_map.get
return [lookup(x, None) for x in column]

def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx):
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx):
first = self._first_value(column)
if first is None or not isinstance(first, str):
if self.nullable:
column = [0 if not x else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)
else:
lookup = self._name_map.get
write_array(self._array_type, [lookup(x, 0) for x in column], dest)
write_array(self._array_type, [lookup(x, 0) for x in column], dest, ctx)


class Enum8(Enum):
Expand Down Expand Up @@ -291,9 +291,9 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
dec = decimal.Decimal
mult = self._mult
if self.nullable:
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest, ctx)
else:
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.use_none:
Expand Down
9 changes: 4 additions & 5 deletions clickhouse_connect/datatypes/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef
from clickhouse_connect.driver.errors import handle_error
from clickhouse_connect.driver.exceptions import DataError
from clickhouse_connect.driver.insert import InsertContext
from clickhouse_connect.driver.query import QueryContext
from clickhouse_connect.driver.types import ByteSource
Expand Down Expand Up @@ -104,7 +103,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
else:
Expand All @@ -114,19 +113,19 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
elif self.nullable:
for b in column:
if not b:
ext(empty)
elif len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
else:
ext(b)
else:
for b in column:
if len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
ext(b)
6 changes: 3 additions & 3 deletions clickhouse_connect/datatypes/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [0 if x is None else (x - esd).days for x in column]
else:
column = [(x - esd).days for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [int(x.timestamp()) if x else 0 for x in column]
else:
column = [int(x.timestamp()) for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)


class DateTime64(DateTimeBase):
Expand Down Expand Up @@ -213,4 +213,4 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
for x in column]
else:
column = [((int(x.timestamp()) * 1000000 + x.microsecond) * prec) // 1000000 for x in column]
write_array('q', column, dest)
write_array('q', column, dest, ctx)
14 changes: 9 additions & 5 deletions clickhouse_connect/driver/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import struct
import sys

from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator
from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator, TYPE_CHECKING

from clickhouse_connect.driver.exceptions import ProgrammingError, StreamClosedError, DataError
from clickhouse_connect.driver.exceptions import ProgrammingError, StreamClosedError
from clickhouse_connect.driver.types import Closable

if TYPE_CHECKING:
from clickhouse_connect.driver.insert import InsertContext

# pylint: disable=invalid-name
must_swap = sys.byteorder == 'big'
int_size = array.array('i').itemsize
Expand Down Expand Up @@ -38,12 +41,13 @@ def array_type(size: int, signed: bool):
return code if signed else code.upper()


def write_array(code: str, column: Sequence, dest: MutableSequence):
def write_array(code: str, column: Sequence, dest: MutableSequence, ctx: 'InsertContext'):
"""
Write a column of native Python data matching the array.array code
:param code: Python array.array code matching the column data type
:param column: Column of native Python values
:param dest: Destination byte buffer
:param ctx: The InsertContext
"""
if len(column) and not isinstance(column[0], (int, float)):
if code in ('f', 'F', 'd', 'D'):
Expand All @@ -54,8 +58,8 @@ def write_array(code: str, column: Sequence, dest: MutableSequence):
buff = struct.Struct(f'<{len(column)}{code}')
dest += buff.pack(*column)
except (TypeError, OverflowError, struct.error) as ex:
raise DataError('Unable to create Python array. This is usually caused by trying to insert None ' +
'values into a ClickHouse column that is not Nullable') from ex
raise ctx.make_data_error('Unable to create Python array. This is usually caused by trying to insert None ' +
'values into a ClickHouse column that is not Nullable') from ex


def write_uint64(value: int, dest: MutableSequence):
Expand Down
2 changes: 2 additions & 0 deletions clickhouse_connect/driver/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ def __init__(self,
self.use_extended_dtypes = use_extended_dtypes
self._active_col_fmt = None
self._active_col_type_fmts = _empty_map
self._column_name = None

def start_column(self, name: str):
self._column_name = name
self._active_col_fmt = self.col_simple_formats.get(name)
self._active_col_type_fmts = self.col_type_formats.get(name, _empty_map)

Expand Down
5 changes: 4 additions & 1 deletion clickhouse_connect/driver/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from clickhouse_connect.driver.ctypes import data_conv
from clickhouse_connect.driver.context import BaseQueryContext
from clickhouse_connect.driver.options import np, pd, pd_time_test
from clickhouse_connect.driver.exceptions import ProgrammingError
from clickhouse_connect.driver.exceptions import ProgrammingError, DataError

if TYPE_CHECKING:
from clickhouse_connect.datatypes.base import ClickHouseType
Expand Down Expand Up @@ -198,3 +198,6 @@ def _convert_numpy(self, np_array):
data[ix] = data[ix].tolist()
self.column_oriented = True
return data

def make_data_error(self, error_message: str) -> DataError:
return DataError(f"Failed to write column '{self._column_name}': {error_message}")

0 comments on commit b90cdf9

Please sign in to comment.