Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include the column name in the error message for an unexpected NULL #397

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clickhouse_connect/datatypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _write_column_low_card(self, column: Sequence, dest: bytearray, ctx: InsertC
write_uint64(len(index), dest)
self._write_column_binary(index, dest, ctx)
write_uint64(len(keys), dest)
write_array(array_type(1 << ix_type, False), keys, dest)
write_array(array_type(1 << ix_type, False), keys, dest, ctx)

def _active_null(self, _ctx: QueryContext) -> Any:
return None
Expand Down Expand Up @@ -338,7 +338,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
if len(column) and self.nullable:
column = [0 if x is None else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.as_pandas and ctx.use_extended_dtypes:
Expand Down
2 changes: 1 addition & 1 deletion clickhouse_connect/datatypes/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [x._ip if x else 0 for x in column]
else:
column = [x._ip for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down
14 changes: 7 additions & 7 deletions clickhouse_connect/datatypes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
return np.array(column)
return column

def _write_column_binary(self, column, dest, _ctx):
write_array('B', [1 if x else 0 for x in column], dest)
def _write_column_binary(self, column, dest, ctx):
write_array('B', [1 if x else 0 for x in column], dest, ctx)


class Boolean(Bool):
Expand Down Expand Up @@ -218,15 +218,15 @@ def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryConte
lookup = self._int_map.get
return [lookup(x, None) for x in column]

def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx):
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx):
first = self._first_value(column)
if first is None or not isinstance(first, str):
if self.nullable:
column = [0 if not x else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)
else:
lookup = self._name_map.get
write_array(self._array_type, [lookup(x, 0) for x in column], dest)
write_array(self._array_type, [lookup(x, 0) for x in column], dest, ctx)


class Enum8(Enum):
Expand Down Expand Up @@ -291,9 +291,9 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
dec = decimal.Decimal
mult = self._mult
if self.nullable:
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest, ctx)
else:
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.use_none:
Expand Down
9 changes: 4 additions & 5 deletions clickhouse_connect/datatypes/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from clickhouse_connect.datatypes.base import ClickHouseType, TypeDef
from clickhouse_connect.driver.errors import handle_error
from clickhouse_connect.driver.exceptions import DataError
from clickhouse_connect.driver.insert import InsertContext
from clickhouse_connect.driver.query import QueryContext
from clickhouse_connect.driver.types import ByteSource
Expand Down Expand Up @@ -104,7 +103,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
else:
Expand All @@ -114,19 +113,19 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
elif self.nullable:
for b in column:
if not b:
ext(empty)
elif len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
else:
ext(b)
else:
for b in column:
if len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
ext(b)
6 changes: 3 additions & 3 deletions clickhouse_connect/datatypes/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [0 if x is None else (x - esd).days for x in column]
else:
column = [(x - esd).days for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [int(x.timestamp()) if x else 0 for x in column]
else:
column = [int(x.timestamp()) for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)


class DateTime64(DateTimeBase):
Expand Down Expand Up @@ -213,4 +213,4 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
for x in column]
else:
column = [((int(x.timestamp()) * 1000000 + x.microsecond) * prec) // 1000000 for x in column]
write_array('q', column, dest)
write_array('q', column, dest, ctx)
14 changes: 9 additions & 5 deletions clickhouse_connect/driver/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import struct
import sys

from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator
from typing import Sequence, MutableSequence, Dict, Optional, Union, Generator, TYPE_CHECKING

from clickhouse_connect.driver.exceptions import ProgrammingError, StreamClosedError, DataError
from clickhouse_connect.driver.exceptions import ProgrammingError, StreamClosedError
from clickhouse_connect.driver.types import Closable

if TYPE_CHECKING:
from clickhouse_connect.driver.insert import InsertContext

# pylint: disable=invalid-name
must_swap = sys.byteorder == 'big'
int_size = array.array('i').itemsize
Expand Down Expand Up @@ -38,12 +41,13 @@ def array_type(size: int, signed: bool):
return code if signed else code.upper()


def write_array(code: str, column: Sequence, dest: MutableSequence):
def write_array(code: str, column: Sequence, dest: MutableSequence, ctx: 'InsertContext'):
"""
Write a column of native Python data matching the array.array code
:param code: Python array.array code matching the column data type
:param column: Column of native Python values
:param dest: Destination byte buffer
:param ctx: The InsertContext
"""
if len(column) and not isinstance(column[0], (int, float)):
if code in ('f', 'F', 'd', 'D'):
Expand All @@ -54,8 +58,8 @@ def write_array(code: str, column: Sequence, dest: MutableSequence):
buff = struct.Struct(f'<{len(column)}{code}')
dest += buff.pack(*column)
except (TypeError, OverflowError, struct.error) as ex:
raise DataError('Unable to create Python array. This is usually caused by trying to insert None ' +
'values into a ClickHouse column that is not Nullable') from ex
raise ctx.make_data_error('Unable to create Python array. This is usually caused by trying to insert None ' +
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the only error I really needed improving, but I modified the other places where DataError could be raised too, in string.py

'values into a ClickHouse column that is not Nullable') from ex


def write_uint64(value: int, dest: MutableSequence):
Expand Down
2 changes: 2 additions & 0 deletions clickhouse_connect/driver/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ def __init__(self,
self.use_extended_dtypes = use_extended_dtypes
self._active_col_fmt = None
self._active_col_type_fmts = _empty_map
self._column_name = None

def start_column(self, name: str):
self._column_name = name
self._active_col_fmt = self.col_simple_formats.get(name)
self._active_col_type_fmts = self.col_type_formats.get(name, _empty_map)

Expand Down
5 changes: 4 additions & 1 deletion clickhouse_connect/driver/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from clickhouse_connect.driver.ctypes import data_conv
from clickhouse_connect.driver.context import BaseQueryContext
from clickhouse_connect.driver.options import np, pd, pd_time_test
from clickhouse_connect.driver.exceptions import ProgrammingError
from clickhouse_connect.driver.exceptions import ProgrammingError, DataError

if TYPE_CHECKING:
from clickhouse_connect.datatypes.base import ClickHouseType
Expand Down Expand Up @@ -198,3 +198,6 @@ def _convert_numpy(self, np_array):
data[ix] = data[ix].tolist()
self.column_oriented = True
return data

def make_data_error(self, error_message: str) -> DataError:
return DataError(f"Failed to write column '{self._column_name}': {error_message}")