From 875e0476dc45ff837eb64df718c37c0a494085e5 Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Fri, 5 Jul 2024 13:16:17 +0100 Subject: [PATCH] MR for 2.5.2 feature additions --- docs/getting-started/q_magic_command.ipynb | 225 ++++++++++++--------- docs/release-notes/changelog.md | 112 ++++++++++ src/pykx/_wrappers.pyx | 6 +- src/pykx/embedded_q.py | 6 +- src/pykx/lib/read.q | 2 +- src/pykx/pandas_api/pandas_indexing.py | 11 +- src/pykx/pandas_api/pandas_meta.py | 17 +- src/pykx/pykx.q | 6 +- src/pykx/wrappers.py | 14 +- tests/qcumber_tests/extensions.quke | 5 + tests/test_pandas_api.py | 54 +++++ tests/test_pykx.py | 7 + tests/test_q.py | 2 + tests/test_system.py | 12 ++ tests/test_toq.py | 4 +- tests/test_wrappers.py | 43 +++- 16 files changed, 403 insertions(+), 123 deletions(-) diff --git a/docs/getting-started/q_magic_command.ipynb b/docs/getting-started/q_magic_command.ipynb index 7a0c430..9d5bc49 100644 --- a/docs/getting-started/q_magic_command.ipynb +++ b/docs/getting-started/q_magic_command.ipynb @@ -1,8 +1,31 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "5c1e9b66", + "metadata": {}, + "source": [ + "# Jupyter q Magic Command Notebook\n", + "\n", + "_The purpose of this notebook is to exemplify how to use the q Magic command in a Jupyter notebook._\n", + "\n", + "\n", + "The Jupyter q magic command in PyKX allows you to execute q code within a Jupyter notebook. It provides seamless integration with the q programming language.\n", + "\n", + "This example Notebook has the following sections:\n", + "\n", + "1. [Import PyKX](#1-import-pykx)\n", + "1. [Create the external q process](#2-create-the-external-q-process)\n", + "1. [Execute against Embedded q](#3-execute-against-embedded-q)\n", + "1. [SQL interface](#4-sql-interface)\n", + "1. [q namespaces](#5-q-namespaces)\n", + "1. [(Advanced) q over IPC](#6-advanced-q-over-ipc)" + ] + }, { "cell_type": "code", "execution_count": null, + "id": "2f25482a", "metadata": { "tags": [ "hide_code" @@ -15,6 +38,16 @@ "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation." ] }, + { + "cell_type": "markdown", + "id": "688b9ed0", + "metadata": {}, + "source": [ + "## 1. Import PyKX\n", + "\n", + "To run this example, first import the PyKX library:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -30,12 +63,9 @@ "id": "9c520c21", "metadata": {}, "source": [ - "#### Create the external q process\n", - "To run this example, the Python code in the following cell will do the equivalent to executing the following in a terminal:\n", + "## 2. Create the external q process\n", "\n", - "```\n", - "$ q -p 5001\n", - "```" + "You can run an external q process by using the following Python code:" ] }, { @@ -58,13 +88,27 @@ " raise kx.QError('Unable to create q process on port 5000')" ] }, + { + "cell_type": "markdown", + "id": "1b318ba2", + "metadata": {}, + "source": [ + "\n", + "Or execute this command in a terminal:\n", + "\n", + "```sh\n", + "$ q -p 5000\n", + "```\n" + ] + }, { "cell_type": "markdown", "id": "bc7219fb", "metadata": {}, "source": [ - "#### Executing against Embedded q\n", - "A cell beginning with `%%q` will execute q within `PyKX`'s `EmbeddedQ` module." + "## 3. Execute against Embedded q\n", + "\n", + "To execute q code within PyKX's `EmbeddedQ` module, type `%%q` at the beginning of the cell:" ] }, { @@ -83,23 +127,18 @@ "id": "89ec26e4", "metadata": {}, "source": [ - "#### Execution options\n", - "\n", - "Execution options can also be included after `%%q`.\n", + "After `%%q` you can further add two execution options:\n", "\n", - "Here is the list of currently supported execution options.\n", - "\n", - "```\n", - "--debug: prints the q backtrace before raising a QError\n", - " if the cell errors\n", - "--display: calls display rather than the default print\n", - " on returned objects\n", - "```\n" + "| **Execution option** | **Description** |\n", + "|---------------|----------------------------------------------------|\n", + "| --debug | Prints the q backtrace before raising a QError if the cell gives an error.|\n", + "| --display | Calls display rather than the default print on returned objects.|" ] }, { "cell_type": "code", "execution_count": null, + "id": "f9ed8310", "metadata": {}, "outputs": [], "source": [ @@ -110,6 +149,7 @@ { "cell_type": "code", "execution_count": null, + "id": "daa9a196", "metadata": {}, "outputs": [], "source": [ @@ -119,156 +159,155 @@ }, { "cell_type": "markdown", - "id": "89ec26e4", + "id": "2905895e", "metadata": {}, "source": [ - "#### Executing against an external q process over IPC\n", + "## 4. SQL interface\n", "\n", - "Connection information can also be included after the `%%q` to connect to a remote `q` process over\n", - "IPC.\n", + "The `s)` syntax runs SQL queries against local tables within the `q` process.\n", "\n", - "Here is the list of currently supported connection parameters.\n", - "If they specify a type a second value is expected to follow them to be used as the parameter.\n", - "If no type follows them they can be used as a stand alone flag.\n", - "\n", - "```\n", - "--host: A string object denoting the host to connect to\n", - "--port: An int object denoting the port to connect over\n", - "--user: A str object denoting the username to use when connecting\n", - "--password: A str object denoting the password to use when connecting\n", - "--timeout: A float object denoting the time in seconds before the query\n", - " times out, defaults to no timeout\n", - "--nolarge: Disable messages over 2GB being sent / received\n", - "--tls: Use a tls connection\n", - "--unix: Use a unix connection\n", - "--reconnection_attempts: An int object denoting how many\n", - " reconnection attempts to make\n", - "--noctx: Disable the context interface\n", - "```\n", - "\n", - "Connect to a q server running on `localhost` at port `5001` as `user` using password `password`\n", - "and disable the context interface." + "Note: To use the SQL interface, first you need to load the `s.k_` library." ] }, { "cell_type": "code", "execution_count": null, - "id": "1faca1e1", - "metadata": {}, + "id": "56220bb5", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "%%q --host localhost --port 5000 --user user --pass password --noctx\n", - "til 10" + "%%q\n", + "\\l s.k_\n", + "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);\n", + "s) select * from tab where a>500 and b<250.0 limit 5" ] }, { "cell_type": "markdown", - "id": "f046ebb6", + "id": "da906296", "metadata": {}, "source": [ - "All connection arguments are optional with the exception of the `--port` argument. If `--host` is not provided `localhost` will be used as the default host." + "## 5. q namespaces\n", + "\n", + "You can use `q` namespaces, and switch between them with `\\d`.\n", + "\n", + "Note: The namespace is reset back to the base namespace `.` between cells." ] }, { "cell_type": "code", "execution_count": null, - "id": "615d7d2e", + "id": "502af937", "metadata": {}, "outputs": [], "source": [ - "%%q --port 5000\n", - "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);" - ] - }, - { - "cell_type": "markdown", - "id": "d756f342", - "metadata": {}, - "source": [ - "It is possible to execute `q` code spanning multiple lines." + "%%q\n", + "\\d .example\n", + "f: {[x] til x};" ] }, { "cell_type": "code", "execution_count": null, - "id": "c739a80a", - "metadata": { - "scrolled": true - }, + "id": "58d0c7c9", + "metadata": {}, "outputs": [], "source": [ - "%%q --port 5000\n", - "afunc: {[x; y]\n", - " x + y \n", - " };\n", - "afunc[0; 1]\n", - "afunc[2; 3]" + "%%q\n", + "\\d\n", + ".example.f[10]" ] }, { "cell_type": "markdown", - "id": "2905895e", + "id": "52ca850e", "metadata": {}, "source": [ - "#### Using the SQL interface\n", - "The `s)` syntax to run SQL queries against local tables within the `q` process.\n", + "## 6. (Advanced) q over IPC\n", + "\n", + "After `%%q` you can include connection information, if you wish to connect to a remote `q` process over IPC. \n", "\n", - "Note: The `s.k_` library must be loaded first to use the SQL interface" + "The list of supported connection parameters is below. The rule is:\n", + "\n", + "- If they have a type, it must be followed by a second value/parameter.\n", + "- If there's no type after them, you can use them as a standalone flag.\n", + "\n", + "| **Parameter**                        | **Object type and description**|\n", + "|-----------------------|-----------------------------------------------|\n", + "|--host | (string) The host to connect to. |\n", + "|--port | (integer) The port to connect over. |\n", + "|--user | (string) The username to use when connecting. |\n", + "|--password | (string) The password to use when connecting. |\n", + "|--timeout | (float) The time in seconds before the query times out. Defaults to no timeout.|\n", + "|--nolarge | Disable messages over 2GB being sent / received. |\n", + "|--tls | Use a tls connection. |\n", + "|--unix | Use a unix connection. |\n", + "|--reconnection_attempts| (integer) How many reconnection attempts to make.|\n", + "|--noctx | Disable the context interface. |\n", + "\n", + "Connect to a q server running on `localhost` at port `5000` as `user` using password `password`\n", + "and disable the context interface." ] }, { "cell_type": "code", "execution_count": null, - "id": "56220bb5", - "metadata": { - "scrolled": true - }, + "id": "a282e069", + "metadata": {}, "outputs": [], "source": [ - "%%q --port 5000\n", - "\\l s.k_\n", - "s) select * from tab where a>500 and b<250.0 limit 5" + "%%q --host localhost --port 5000 --user user --pass password --noctx\n", + "til 10" ] }, { "cell_type": "markdown", - "id": "da906296", + "id": "a1fe3b8e", "metadata": {}, "source": [ - "#### Using namespaces\n", - "You can also use `q` namespaces, and switch between them using `\\d`.\n", - "\n", - "Note: The namespace is reset back to the base namespace `.` between cells." + "All connection arguments are optional, except the `--port` argument. If `--host` is not provided `localhost` is the default host." ] }, { "cell_type": "code", "execution_count": null, - "id": "502af937", + "id": "18d8416b", "metadata": {}, "outputs": [], "source": [ "%%q --port 5000\n", - "\\d .example\n", - "f: {[x] til x};" + "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);" + ] + }, + { + "cell_type": "markdown", + "id": "e143c382", + "metadata": {}, + "source": [ + "Note that it's possible to execute `q` code spanning multiple lines:" ] }, { "cell_type": "code", "execution_count": null, - "id": "58d0c7c9", + "id": "ccb197e8", "metadata": {}, "outputs": [], "source": [ "%%q --port 5000\n", - "\\d\n", - ".example.f[10]" + "afunc: {[x; y]\n", + " x + y \n", + " };\n", + "afunc[0; 1]\n", + "afunc[2; 3]" ] }, { "cell_type": "code", "execution_count": null, - "id": "334d0ac5", + "id": "c12a7d38", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index 2b827cb..ad3894c 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -8,6 +8,118 @@ Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type. +## PyKX 2.5.2 + +#### Release Date + +2024-07-05 + +### Fixes and Improvements + +- Converting PyKX generic lists using the keyword parameter `raw=True` would previously return incorrect results, the values received being the memory address of the individual elements of the list, this has now been resolved + + === "Behaviour prior to change" + + ```python + >>> a = kx.q('(1; 3.4f; `asad; "asd")') + >>> a.np(raw=True) + array([3012581664, 30547, 3012579792, 30547], dtype=uint64) + ``` + + === "Behaviour post change" + + ```python + >>> a = kx.q('(1; 3.4f; `asad; "asd")') + >>> a.np(raw=True) + array([1, 3.4, b'asad', b'asd'], dtype=object) + ``` + +- Converting PyKX generic lists using the keyword parameter `raw=True` when explictly required previously would error indicating that the keyword argument was not supplied. This has been resolved with the parameter now appropriately passed to all items + + === "Behaviour prior to change" + + The errors below are truncated for readability + + ```python + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").py(raw=True) + TypeError: The q datetime type is deprecated, and can only be accessed .. + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").np(raw=True) + TypeError: The q datetime type is deprecated, and can only be accessed .. + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").pd(raw=True) + TypeError: The q datetime type is deprecated, and can only be accessed .. + ``` + + === "Behaviour post change" + + ```python + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").py(raw=True) + [1, 2, 3, b'a', 8766.5] + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").np(raw=True) + array([1, 2, 3, b'a', 8766.5], dtype=object) + >>> kx.q("(1;2;3;`a;2024.01.01T12:00:00)").pd(raw=True) + 0 1 + 1 2 + 2 3 + 3 b'a' + 4 8766.5 + ``` + +- Use of `get` method on `kx.Table` with a `str` input will now raise a [`FutureWarning`](https://docs.python.org/3/library/exceptions.html#FutureWarning) indicating that the return type of this method will change with release 3.0.0. Currently this function returns a `kx.Table` with a single column, in version 3.0.0 this will return a list/vector containing the content of the column to better align with the Pandas API approach. + + ```python + >>> import pykx as kx + >>> tab = kx.Table(data={'x': [1, 2, 3], 'y': [2, 3, 4]}) + >>> tab.get('x') + /usr/python/3.12/lib/python3.12/site-packages/pykx/pandas_api/pandas_indexing.py:42: FutureWarning: + + Single column retrieval using 'get' method will return a vector/list object in release 3.0+ + To access the vector/list directly use table['column_name'] + warnings.warn("Single column retrieval using 'get' method will a return vector/list object " + pykx.Table(pykx.q(' + x + - + 0 + 1 + 2 + ')) + >>> tab['x'] + pykx.LongVector(pykx.q('1 2 3')) + ``` + + +- Fix to issue where use of `kx.SymbolAtom` with `__getitem__` method on `kx.Table` objects would return a table rather then vector/list. The return now mirrors the expected return which matches `str` type inputs + + === "Behaviour prior to change" + + ```python + >>> import pykx as kx + >>> tab = kx.Table(data={'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) + >>> tab['x'] + pykx.LongVector(pykx.q('1 2 3')) + >>> tab[kx.SymbolAtom('x')] + pykx.Table(pykx.q(' + x + - + 1 + 2 + 3 + ')) + ``` + + === "Behaviour post change" + + ```python + >>> import pykx as kx + >>> tab = kx.Table(data={'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) + >>> tab['x'] + pykx.LongVector(pykx.q('1 2 3')) + >>> tab[kx.SymbolAtom('x')] + pykx.LongVector(pykx.q('1 2 3')) + ``` + +- Reworked `Table.std()` method to better handle edge cases relating to mixed columns and nulls. Now matching Pandas results. This addresses issues raised [here](https://github.com/KxSystems/pykx/issues/28). +- Fix to issue where loading PyKX on Windows from 2.5.0 could result in a users working directory being changed to `site-packages/pykx`. + ## PyKX 2.5.1 #### Release Date diff --git a/src/pykx/_wrappers.pyx b/src/pykx/_wrappers.pyx index 517c607..7534c19 100644 --- a/src/pykx/_wrappers.pyx +++ b/src/pykx/_wrappers.pyx @@ -296,7 +296,7 @@ def guid_atom_py(self, bint raw, bint has_nulls, bint stdlib): return UUID(bytes=(_k(self).G0)[:16]) -def list_np(self, bint raw, bint has_nulls): +def list_np(self, bint raw, bint has_nulls, bint raw_repr): cdef uintptr_t[:] addrs, razed_addrs cdef Py_ssize_t i cdef long long n = (self._addr).n @@ -335,9 +335,9 @@ def list_np(self, bint raw, bint has_nulls): arr = np.empty(n, dtype=object) for i in range(n): if 10 == ((addrs[i])).t: - arr[i] = wrappers._rich_convert(k_from_addr(wrappers.CharVector, addrs[i], True)) + arr[i] = wrappers._rich_convert(k_from_addr(wrappers.CharVector, addrs[i], True), raw=raw_repr) else: - arr[i] = wrappers._rich_convert(factory(addrs[i], True), stdlib=False) + arr[i] = wrappers._rich_convert(factory(addrs[i], True), stdlib=False, raw=raw_repr) return arr diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py index f962125..86b6d81 100644 --- a/src/pykx/embedded_q.py +++ b/src/pykx/embedded_q.py @@ -126,8 +126,10 @@ def __init__(self): # noqa code += ''' .pykx.util.loadfile:{[folder;file] cache:system"cd"; - res:.[{system"cd ",x;res:system"l ",y;(0b;res)}; - (folder;file); + system"cd ",folder; + folder:system"cd"; + res:@[{res:system"l ",x;(0b;res)}; + file; {(1b;x)} ]; if[folder~system"cd";system"cd ",cache]; diff --git a/src/pykx/lib/read.q b/src/pykx/lib/read.q index 398b992..dd490f7 100644 --- a/src/pykx/lib/read.q +++ b/src/pykx/lib/read.q @@ -1,4 +1,4 @@ -.pykx.util.loadfile[;"csvutil.q"]{x sv (-1 _ x vs y)}[$[.z.o~`w64;"\\";"/"]; (value{})6]; +.pykx.util.loadfile[;"csvutil.q"]{x sv (-1 _ x vs y)}[$[.z.o like "w*";"\\";"/"]; (value{})6]; system"d .read"; diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 7b9f82e..4961e72 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -13,6 +13,9 @@ def _init(_q): def _get(tab, key, default, cols_check=True): idxs = None _init_tab = None + single_col = False + if isinstance(key, SymbolAtom) or isinstance(key, str): + single_col = True if 'Keyed' in str(type(tab)): keys, idxs = key _init_tab = tab @@ -33,6 +36,11 @@ def _get(tab, key, default, cols_check=True): return tab if isinstance(key, SymbolAtom): key = key.py() + if single_col: + warnings.warn("\n\tSingle column retrieval using 'get' method will return a vector/list " + "object in release 3.0+\n\t" + "To access the vector/list directly use table['column_name']", + FutureWarning) if key in q('{key flip 0#x}', tab).py(): tab = q(f'{{([] {key}: x[y])}}', tab, key) return tab @@ -203,7 +211,6 @@ def _loc(tab, loc): # noqa return _iloc(tab, loc) if (((isinstance(loc, list) and (isinstance(loc[0], str) or isinstance(loc[0], SymbolAtom))) or isinstance(loc, SymbolVector) - or isinstance(loc, SymbolAtom) or (isinstance(loc, List) and q('{-11h~type x 0}', loc))) or ('Keyed' in str(type(tab)) and type(loc) is str) ): @@ -238,7 +245,7 @@ def _loc(tab, loc): # noqa if 'Keyed' in str(type(tab)): return q('{(count keys x)!((0!x) each where y)}', tab, loc) return q('{x where y}', tab, loc) - if isinstance(loc, str): + if isinstance(loc, str) or isinstance(loc, SymbolAtom): if q('{not x in cols y}', loc, tab): raise QError(f'Attempted to retrieve inaccessible column: {loc}') return q('{x[enlist each y]}', tab, loc) diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 67dbfc6..be3fdd2 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -204,14 +204,15 @@ def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): if ddof == len(tab): return q('{x!count[x]#0n}', axis_keys) - return q(''' - {[tab;axis;ddof;axis_keys] - tab:$[0~axis;(::);flip] value flip tab; - d:$[0~ddof;dev; - 1~ddof;sdev; - {[ddof;x] avg sqrt (sum xexp[x-avg x;2]) % count[x]-ddof}ddof]; - axis_keys!d each tab - }''', tab, axis, ddof, axis_keys) + return q( + '''{[tab;axis;ddof;axis_keys] + tab:$[0~axis;(::);flip] value flip 9h$tab; + d:$[0~ddof;dev; + 1~ddof;sdev; + {sqrt (n*var y*c>0)%c:0|(neg x)+n:sum not null y}ddof]; + axis_keys!d each tab + }''', tab, axis, ddof, axis_keys + ) @api_return def median(self, axis: int = 0, numeric_only: bool = False): diff --git a/src/pykx/pykx.q b/src/pykx/pykx.q index 45593a5..4ba2ee6 100644 --- a/src/pykx/pykx.q +++ b/src/pykx/pykx.q @@ -47,7 +47,9 @@ util.startup:.Q.opt .z.x // to allow loading of files at folder locations containing spaces util.loadfile:{[folder;file] cache:system"cd"; - res:.[{system"cd ",x;res:system"l ",y;(0b;res)};(folder;file);{(1b;x)}]; + system"cd ",folder; + folder:system"cd"; + res:@[{res:system"l ",x;(0b;res)};file;{(1b;x)}]; if[folder~system"cd";system"cd ",cache]; $[res[0];'res[1];res[1]] } @@ -1735,7 +1737,7 @@ loadExtension:{[ext] if[not 10h=type ext;'"Extension provided must be of type string"]; if[not ext in listExtensions[];'"Extension provided '",ext,"' not available"]; .[util.loadfile; - (pykxDir,"/extensions/";ext,".q"); + (pykxDir,"/extensions";ext,".q"); {'x," raised when attempting to load extension"} ]; } diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py index af4e55e..734ef41 100644 --- a/src/pykx/wrappers.py +++ b/src/pykx/wrappers.py @@ -229,12 +229,12 @@ def _key_preprocess(key, n, slice=False): return(key) -def _rich_convert(x: 'K', stdlib: bool = True): +def _rich_convert(x: 'K', stdlib: bool = True, raw=False): if stdlib: - return x.py(stdlib=stdlib) + return x.py(stdlib=stdlib, raw=raw) if isinstance(x, Mapping): - return x.pd() - return x.np() + return x.pd(raw=raw) + return x.np(raw=raw) def _null_gen(x): @@ -2014,11 +2014,11 @@ def has_infs(self) -> bool: return any(x.is_inf if x.is_atom else False for x in self) def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True): - return [_rich_convert(x, stdlib) for x in self] + return [_rich_convert(x, stdlib, raw) for x in self] def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): """Provides a Numpy representation of the list.""" - return _wrappers.list_np(self, False, has_nulls) + return _wrappers.list_np(self, False, has_nulls, raw) class NumericVector(Vector): @@ -4398,7 +4398,7 @@ class Foreign(Atom): def __reduce__(self): raise TypeError('Unable to serialize pykx.Foreign objects') - def py(self, stdlib=None): + def py(self, stdlib=None, raw=None): """Turns the pointer stored within the Foreign back into a Python Object. Note: The resulting object is a reference to the same memory location as the initial object. diff --git a/tests/qcumber_tests/extensions.quke b/tests/qcumber_tests/extensions.quke index 6917cc7..4bd3c9e 100644 --- a/tests/qcumber_tests/extensions.quke +++ b/tests/qcumber_tests/extensions.quke @@ -11,7 +11,12 @@ feature General Extensions feature Dashboards Extension before + .test.cd:system"cd"; + + should Not change working directory on load + expect to be in same dir after load .pykx.loadExtension["dashboards"]; + .qu.compare[.test.cd;system"cd"] should Fail to run dashboards functionality under various conditions expect to fail when running a function using an invalid function definition diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 75d3941..78b0d73 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -107,6 +107,7 @@ def test_df_get(kx, q): 'x': [x for x in range(10)], 'y': [10 - x for x in range(10)] }) + assert df.get(['y', 'z']).py() == df[['y', 'z']].py() assert df.get(['x', 'y']).py() == df[['x', 'y']].py() assert df.get('r') is None @@ -212,6 +213,9 @@ def test_df_getitem(kx, q): 'z': ['a', 'a'] } ) + assert all(df[kx.SymbolAtom('x')] == df['x']) + assert all(df[kx.SymbolVector(['x', 'y'])] == df[['x', 'y']]) + assert all(df[kx.SymbolVector(['x'])] == df[['x']]) def test_df_loc_set(kx, q): @@ -2525,6 +2529,56 @@ def test_std(kx, q): q_m = tab.std(axis=1) +def test_std_extended(kx, q): + + df_full = pd.DataFrame( + { + # important to note that this is m*n array where m!=n + 'a': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + 'b': [10, 30, 20, 4, 9.5, 2.445, 999, 302, 11.11, 6], # mixed types + 'c': [3, 4, 5, np.NaN, 6, np.NaN, 2, np.NaN, 9, 5], # NaN types included + 'd': ['foo', 'bar', 'foobar', 'fizz', 'buzz', 'fizzbuzz', 'test', '123', 'test123', ''], # noqa: E501 + }) + numdf = df_full[['a', 'b', 'c']] # numerical columns only + + df = numdf + tab = kx.toq(df) + + # Testing flipped axes + p_res = df.std(axis=1) + q_res = tab.std(axis=1) + for c in range(len(q.cols(tab))): + assert np.isclose(p_res[c], q_res[q('{`$string x}', c)].py(), atol=1e-20) + + # Testing higher ddof + p_res = df.std(ddof=7) + q_res = tab.std(ddof=7) + for c in q.key(q_res).py(): + assert np.isclose(p_res[c], q_res[c].py(), atol=1e-20) or \ + (np.isnan(p_res[c]) and np.isnan(q_res[c].py())) + + # Testing both + p_res = df.std(axis=1, ddof=3) + q_res = tab.std(axis=1, ddof=3) + for c in q.key(q_res).py(): + assert np.isclose(p_res[int(c)], q_res[c].py(), atol=1e-20) or \ + (np.isnan(p_res[int(c)]) and np.isnan(q_res[c].py())) + + # Testing with full df + df = df_full + tab = kx.toq(df) + + p_res = df.std(numeric_only=True) + q_res = tab.std(numeric_only=True) + for c in q.key(q_res).py(): + assert np.isclose(p_res[c], q_res[c].py(), atol=1e-20) + + p_res = df.std(axis=1, numeric_only=True) + q_res = tab.std(axis=1, numeric_only=True) + for c in q.key(q_res).py(): + assert np.isclose(p_res[int(c)], q_res[c].py(), atol=1e-20) + + def test_merge_qjoin(kx): tab1 = kx.Table(data={'k': ['foo', 'bar', 'baz'], 'v': [1, 2, 3]}) tab2 = kx.Table(data={'k': ['foo', 'bar', 'baz'], 'v': [4, 5, 6]}) diff --git a/tests/test_pykx.py b/tests/test_pykx.py index 271eec1..0a6326e 100644 --- a/tests/test_pykx.py +++ b/tests/test_pykx.py @@ -363,3 +363,10 @@ def test_PYKX_Q_LIB_LOCATION(): def test_subnormals(kx): import numpy as np assert '5e-324' == str(np.finfo(np.float64).smallest_subnormal + 0.) + + +@pytest.mark.isolate +def test_import_cd(): + cd = os.getcwd() + import pykx as kx # noqa: F401 + assert cd == os.getcwd() diff --git a/tests/test_q.py b/tests/test_q.py index 018b73f..19b4617 100644 --- a/tests/test_q.py +++ b/tests/test_q.py @@ -286,9 +286,11 @@ def test_load_spacefile(tmp_path): os.makedirs(test_location, exist_ok=True) with open(test_location/'file.q', 'w') as f: f.write('.pykx_test.tmp.variable:1b') + cd = os.getcwd() import pykx as kx kx.q('{.pykx.util.loadfile[1_string x;y]}', test_location, b'file.q') assert kx.q('.pykx_test.tmp.variable') + assert cd == os.getcwd() @pytest.mark.isolate diff --git a/tests/test_system.py b/tests/test_system.py index 620d583..549d1a0 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -1,4 +1,5 @@ import os +from platform import system # Do not import pykx here - use the `kx` fixture instead! import pytest @@ -219,11 +220,22 @@ def test_system_space_load(tmp_path): assert kx.q('.pykx_test.system.variable') assert cache_dir == os.getcwd() + kx.q('.pykx_test.system.variable:0b') + if system() == 'Windows': + file_location = test_location/'..\\test directory\\\\\\load_file.q' + else: + file_location = test_location/'../test directory///load_file.q' + kx.q.system.load(file_location) + assert kx.q('.pykx_test.system.variable') + assert cache_dir == os.getcwd() + test_splay = test_location/'splay/' kx.q('{x set ([]10?1f;10?1f)}', test_splay) def test_load_splay(test_splay): + cd = os.getcwd() loaded = kx.q.system.load(test_splay) + assert cd == os.getcwd() assert loaded.py() == 'splay' assert isinstance(kx.q['splay'], kx.Table) kx.q('delete splay from `.') diff --git a/tests/test_toq.py b/tests/test_toq.py index a90b52a..37fa0bd 100644 --- a/tests/test_toq.py +++ b/tests/test_toq.py @@ -1303,8 +1303,8 @@ def test_null_roundtrip(kx): t = kx.q('flip ({`$.Q.t x} each ty)!{enlist nulls[x]} each til count ty') for col in t: assert ( - kx.q('{x 0}', kx.q.value(kx.q.flip(t[col]))) - == kx.toq(kx.q.value(kx.q.flip(t[col])).np(), handle_nulls=True) + kx.q('{x 0}', t[col]) + == kx.toq(t[col].np(), handle_nulls=True) ).all() assert (t == kx.toq(t.pd(), handle_nulls=True)).all().all() diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index f5a511f..d0361cb 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -1565,6 +1565,44 @@ def test_contains(self, q): def test_empty_vector(self, q): assert q('0h$()').np().dtype == object + def test_raw_conversions(self, q, kx): + qarray = q("(1;2;3;`a;2024.01.01T12:00:00)") + with pytest.raises(TypeError) as err: + qarray.np() + assert 'The q datetime type is deprecated' in str(err.value) + + pyarray = qarray.py(raw=True) + nparray = qarray.np(raw=True) + pdarray = qarray.pd(raw=True) + + for arr in [pyarray, nparray, pdarray]: + qarr = kx.toq(arr) + assert isinstance(qarr, kx.List) + assert isinstance(qarr[4], kx.FloatAtom) + qarr[3] = q('`$', qarr[3]) + qarr[4] = q('"z"$', qarr[4]) + assert (qarray == qarr).all() + + nestarr = q(''' + ((1;2;"a";2024.01.01T12:00:00); + (2;3;"b";2024.01.01T12:00:00); + (3;4;"c";2024.01.01T12:00:00)) + ''') + + with pytest.raises(TypeError) as err: + nestarr.py() + assert 'The q datetime type is deprecated' in str(err.value) + + pynest = nestarr.py(raw=True) + npnest = nestarr.np(raw=True) + pdnest = nestarr.pd(raw=True) + + for arr in [pynest, npnest, pdnest]: + qnest = kx.toq(arr) + assert isinstance(qnest, kx.List) + assert isinstance(q('{x[;3]}', qnest), kx.FloatVector) + assert q('{x[;3]~"z"$y[;3]}', nestarr, qnest) + # NaN is tricky to compare, so we generate GUID vectors until we get one whose complex form has no # NaNs in it. @@ -4298,11 +4336,10 @@ def test_pyarrow_pandas_table_roundtrip(kx): tab2 = kx.toq(tab.pd(as_arrow=True)) for x in tab.keys(): - assert isinstance(tab2[x], type(tab[x])) if x == 'dset_1_tab12': - assert all([x < 1000 for x in (tab[x]._values - tab2[x]._values).np()[0].astype(int)]) # noqa + assert all([x < 1000 for x in (tab[x] - tab2[x]).np().astype(int)]) # noqa else: - assert (tab[x]._values == tab2[x]._values).all() + assert (tab[x] == tab2[x]).all() @pytest.mark.unlicensed