diff --git a/MANIFEST.in b/MANIFEST.in index 47efeaa..9809842 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include setup.py graft src/pykx/include graft src/pykx/lib +graft src/pykx/extensions recursive-include src/pykx *.py *.pxd *.pyx *.c *.so *.k recursive-include tests *.py diff --git a/README.md b/README.md index 9343fd0..29610de 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# PyKX +# PyKX ## Introduction @@ -92,18 +92,26 @@ KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX PyKX depends on the following third-party Python packages: -- `pandas~=1.2` -- `numpy~=1.22` +- `pandas>=1.2, < 2.2.0` +- `numpy~=1.22; python_version<'3.11'` +- `numpy~=1.23.2; python_version=='3.11'` +- `pytz>=2022.1` +- `toml~=0.10.2` They are installed automatically by `pip` when PyKX is installed. PyKX also has an optional Python dependency of `pyarrow>=3.0.0`, which can be included by installing the `pyarrow` extra, e.g. `pip install pykx[pyarrow]` +When using PyKX with KX Dashboards users will be required to install `ast2json~=0.3` this can be installed using the `dashboards` extra, e.g. `pip install pykx[dashboards]` + +When using PyKX Beta features users will be required to install `dill>=0.2.0` this can be installed using the `beta` extra, e.g. `pip install pykx[beta]` + **Warning:** Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11. #### Optional Non-Python Dependencies - `libssl` for TLS on [IPC connections](docs/api/ipc.md). +- `libpthread` on Linux/MacOS when using the `PYKX_THREADING` environment variable. #### Windows Dependencies diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml new file mode 100644 index 0000000..87fed87 --- /dev/null +++ b/conda-recipe/conda_build_config.yaml @@ -0,0 +1,5 @@ +python: + - 3.8 + - 3.9 + - 3.10 + - 3.11 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index bcdb2bc..a781dfc 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -27,7 +27,7 @@ requirements: run: - python - numpy>=1.22 - - pandas>=1.2 + - pandas>=1.2, <2.2.0 - pytz>=2022.1 - toml>=0.10.2 diff --git a/docs/api/db.md b/docs/api/db.md new file mode 100644 index 0000000..074a383 --- /dev/null +++ b/docs/api/db.md @@ -0,0 +1,10 @@ +# Database interaction and management functionality + +::: pykx.db + rendering: + show_root_heading: false + options: + show_root_heading: false + members_order: source + members: + - DB diff --git a/docs/api/pykx-q-data/type_conversions.md b/docs/api/pykx-q-data/type_conversions.md index 27f289e..6feb022 100644 --- a/docs/api/pykx-q-data/type_conversions.md +++ b/docs/api/pykx-q-data/type_conversions.md @@ -322,7 +322,7 @@ True Calling `.py()` on a `pykx.LongVector` will return a list of python int objects. ```Python - >>>> pykx.LongVector([1, 2.5]).py() + >>> pykx.LongVector([1, 2.5]).py() [1, 2] ``` @@ -342,7 +342,7 @@ True Calling `.py()` on a `pykx.RealAtom` will return a python float object. ```Python - >>>> pykx.RealAtom(2.5).py() + >>> pykx.RealAtom(2.5).py() 2.5 ``` @@ -383,7 +383,7 @@ True Calling `.py()` on a `pykx.FloatAtom` will return a python float object. ```Python - >>>> pykx.FloatAtom(2.5).py() + >>> pykx.FloatAtom(2.5).py() 2.5 ``` diff --git a/docs/api/remote.md b/docs/api/remote.md new file mode 100644 index 0000000..2101520 --- /dev/null +++ b/docs/api/remote.md @@ -0,0 +1,11 @@ +# Remote Python Execution Functionality + +::: pykx.remote + rendering: + show_root_heading: false + options: + show_root_heading: false + members_order: source + members: + - session + - function diff --git a/docs/api/system.md b/docs/api/system.md new file mode 100644 index 0000000..8a9ed5f --- /dev/null +++ b/docs/api/system.md @@ -0,0 +1,3 @@ +# System Command Wrappers + +::: pykx.system diff --git a/docs/beta-features/db-management.md b/docs/beta-features/db-management.md new file mode 100644 index 0000000..5d0fdcf --- /dev/null +++ b/docs/beta-features/db-management.md @@ -0,0 +1,247 @@ +# Database Management + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` + +## Introduction + +The term Database Management as used here, refers to creating and maintaining [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for more in-depth information about partitioned databases in kdb+. + +A kdb+ database consists of one or more tables saved on-disk, where they are split into separate folders called partitions. These partitions are most often based on a temporal field within the dataset, such as date or month. Each table within the database must follow the same partition structure. + +We recommend using partitioned databases when the volume of data being handled exceeds ~100 million records. + +## Functional walkthrough + +This walkthrough will demonstrate the following steps: + +1. Creating a database from a historical dataset. +1. Adding a new partition to the database. +1. Managing the on-disk database by: + 1. Renaming a table and column + 1. Creating a copy of a column to the database + 1. Applying a Python function to a column of the database + 1. Updating the data type of a column +1. Adding a new table to the most recent partition of the database. + +All integrations with the `Database Management` functionality are facilitated through use of the `pykx.DB` class. To follow along with the example outlined below you can use the [companion notebook](../examples/db-management.ipynb). This uses a more complex table but runs the same commands. For full information on the functions available you can reference the [API section](../api/db.md). + +### Creating a database + +Create a dataset containing time-series data with multiple dates, and columns of various types: + +```python +>>> import pykx as kx +>>> from datetime import date +>>> N = 100000 +>>> dataset = kx.Table(data={ +... 'date': kx.random.random(N, [date(2020, 1, 1), date(2020, 1, 2)]), +... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), +... 'price': kx.random.random(N, 10.0) +... }) +``` + +Initialise the `DB` class. The expected input is the file path where you intend to save the partitioned database and its associated tables. + +```python +>>> db = kx.DB(path = 'db') +``` + +Create the database using the `date` column as the partition, and add `dataset` as a table called `trade_data` within it. + +```python +>>> db.create(dataset, 'trade_data', 'date', by_field = 'sym', sym_enum = 'symcol') +Writing Database Partition 2020.01.01 to table trade_data +Writing Database Partition 2020.01.02 to table trade_data +``` + +This now exists as a table and is saved to disk. + +```python +>>> db.tables +['trade_data'] +``` + +When a table is saved, an attribute is added to the `db` class for it. For our newly generated table, this is `db.trade_data` + +```python +>>> db.trade_data +pykx.PartitionedTable(pykx.q(' +date sym price +------------------------- +2020.01.01 AAPL 7.055037 +2020.01.01 AAPL 3.907669 +2020.01.01 AAPL 2.20948 +2020.01.01 AAPL 7.839242 +2020.01.01 AAPL 0.8549648 +.. +') +``` + +### Adding a new partition to the database + +Once a table has been generated, you can add more partitions to the database through reuse of the `create` method. In this case we are adding the new partition `2020.01.03` to the database. + +```python +>>> N = 10000 +>>> dataset = kx.Table(data={ +... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), +... 'price': kx.random.random(N, 10.0) +... }) +>>> db.create(dataset, 'trade_data', date(2020, 1, 3), by_field = 'sym', sym_enum = 'symcol') +Writing Database Partition 2020.01.03 to table trade_data +``` + +### Managing the database + +This section covers updating the contents of a database. We will continue using the table created in the [Creating a database](#creating-a-database) section above. + +The name of a table can be updated using the `rename_table` method. Below, we are updating the table `trade_data` to be called `trade`. + +```python +>>> db.rename_table('trade_data', 'trade') +2023.12.08 09:54:22 renaming :/tmp/db/2020.01.01/trade_data to :/tmp/db/2020.01.01/trade +2023.12.08 09:54:22 renaming :/tmp/db/2020.01.02/trade_data to :/tmp/db/2020.01.02/trade +2023.12.08 09:54:22 renaming :/tmp/db/2020.01.03/trade_data to :/tmp/db/2020.01.03/trade +``` + +During the rename process, the attribute in the `db` class is also updated. + +```python +>>> db.trade +pykx.PartitionedTable(pykx.q(' +date sym price +------------------------- +2020.01.01 AAPL 7.055037 +2020.01.01 AAPL 3.907669 +2020.01.01 AAPL 2.20948 +2020.01.01 AAPL 7.839242 +2020.01.01 AAPL 0.8549648 +.. +') +``` + +Renaming a column in a table is achieved using the `rename_column` method. For example, let's update the `sym` column in the `trade` table to be called `ticker`. + +```python +>>> db.rename_column('trade', 'sym', 'ticker') +2023.12.08 10:06:27 renaming sym to ticker in `:/tmp/db/2020.01.01/trade +2023.12.08 10:06:27 renaming sym to ticker in `:/tmp/db/2020.01.02/trade +2023.12.08 10:06:27 renaming sym to ticker in `:/tmp/db/2020.01.03/trade +``` + +To safely apply a function to modify the `price` column within the database, first create a copy of the column. + +```python +>>> db.copy_column('trade', 'price', 'price_copy') +2023.12.08 10:14:54 copying price to price_copy in `:/tmp/db/2020.01.01/trade +2023.12.08 10:14:54 copying price to price_copy in `:/tmp/db/2020.01.02/trade +2023.12.08 10:14:54 copying price to price_copy in `:/tmp/db/2020.01.03/trade +``` + +You can now apply a function to the copied column without the risk of losing the original data. Below we are modifying the copied column by multiplying the contents by 2. + +```python +>>> db.apply_function('trade', 'price_copy', lambda x: 2*x) +2023.12.08 10:18:18 resaving column price_copy (type 9) in `:/tmp/db/2020.01.01/trade +2023.12.08 10:18:18 resaving column price_copy (type 9) in `:/tmp/db/2020.01.02/trade +2023.12.08 10:18:18 resaving column price_copy (type 9) in `:/tmp/db/2020.01.03/trade +>>> db.trade +pykx.PartitionedTable(pykx.q(' +date ticker price price_copy +-------------------------------------- +2020.01.01 AAPL 7.055037 14.11007 +2020.01.01 AAPL 3.907669 7.815337 +2020.01.01 AAPL 2.20948 4.418959 +2020.01.01 AAPL 7.839242 15.67848 +2020.01.01 AAPL 0.8549648 1.70993 +.. +') +``` + +Once you are happy with the new values within the `price_copy` column, you can safely delete the `price` column, then rename the `price_copy` column to be called `price`. + +```python +>>> db.delete_column('trade', 'price') +2023.12.08 10:20:02 deleting column price from `:/tmp/db/2020.01.01/trade +2023.12.08 10:20:02 deleting column price from `:/tmp/db/2020.01.02/trade +2023.12.08 10:20:02 deleting column price from `:/tmp/db/2020.01.03/trade +>>> db.rename_column('trade', 'price_copy', 'price') +2023.12.08 10:06:27 renaming price_copy to price in `:/tmp/db/2020.01.01/trade +2023.12.08 10:06:27 renaming price_copy to price in `:/tmp/db/2020.01.02/trade +2023.12.08 10:06:27 renaming price_copy to price in `:/tmp/db/2020.01.03/trade +>>> db.trade +pykx.PartitionedTable(pykx.q(' +date ticker price +-------------------------- +2020.01.01 AAPL 14.11007 +2020.01.01 AAPL 7.815337 +2020.01.01 AAPL 4.418959 +2020.01.01 AAPL 15.67848 +2020.01.01 AAPL 1.70993 +.. +') +``` + +To convert the data type of a column, you can use the `set_column_type` method. Currently the `price` column is the type `FloatAtom`. We will update this to be a type `RealAtom`. + +```python +>>> db.set_column_type('trade', 'price', kx.RealAtom) +2023.12.08 10:28:28 resaving column price (type 8) in `:/tmp/db/2020.01.01/trade +2023.12.08 10:28:28 resaving column price (type 8) in `:/tmp/db/2020.01.02/trade +2023.12.08 10:28:28 resaving column price (type 8) in `:/tmp/db/2020.01.03/trade +``` + +### Adding a new table to the database + +Now that you have successfully set up one table, you may want to add a second table named `quotes`. In this example, the `quotes` table only contains data for `2020.01.03`. We follow the same method as before and create the `quotes` table using the `create` method + +```python +>>> quotes = kx.Table(data={ +... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), +... 'open': kx.random.random(N, 10.0), +... 'high': kx.random.random(N, 10.0), +... 'low': kx.random.random(N, 10.0), +... 'close': kx.random.random(N, 10.0) +... }) +>>> db.create(quotes, 'quotes', date(2020, 1, 3), by_field = 'sym', sym_enum = 'symcol') +Writing Database Partition 2020-01-03 to table quotes +``` + +As mentioned in the introduction, all tables within a database must contain the same partition structure. To ensure the new table can be accessed, the quotes table needs to exist in every partition within the database, even if there is no data for that partition. This is called backfilling data. For the partitions where the `quotes` table is missing, we use the `fill_database` method. + +```python +>>> db.fill_database() +Successfully filled missing tables to partition: :/tmp/db/2020.01.01 +Successfully filled missing tables to partition: :/tmp/db/2020.01.02 +``` + +Now that the database has resolved the missing tables within the partitions, we can view the new `quotes` table + +```python +>>> db.quotes +pykx.PartitionedTable(pykx.q(' +date sym open high low close +------------------------------------------------------- +2020.01.03 AAPL 7.456644 7.217498 5.012176 3.623649 +2020.01.03 AAPL 6.127973 0.4229592 7.450608 5.651364 +2020.01.03 AAPL 8.147475 4.459108 3.493555 5.78803 +2020.01.03 AAPL 5.812028 7.81659 5.395469 8.424176 +2020.01.03 AAPL 8.519148 1.18101 6.684017 8.376375 +.. +') +``` + +Finally, to view the amount of saved data you can count the number of rows per partition using `partition_count` + +```python +>>> db.partition_count() +pykx.Dictionary(pykx.q(' + | quotes trade +----------| ------------- +2020.01.01| 0 49859 +2020.01.02| 0 50141 +2020.01.03| 100000 100000 +')) +``` diff --git a/docs/beta-features/index.md b/docs/beta-features/index.md new file mode 100644 index 0000000..ca6fecc --- /dev/null +++ b/docs/beta-features/index.md @@ -0,0 +1,52 @@ +# Beta Features + +## What is a Beta Feature? + +As used commonly within software development "Beta Features" within PyKX describe features which have completed an initial development process phase and are being released in an opt-in manner to users of PyKX wishing to test these features. These features are not intended to be for production use while in beta and are subject to change prior to release as full features. Usage of these features will not effect the default behaviour of the library outside of the scope of the new functionality being added. + +Feedback on Beta Feature development is incredibly helpful and helps to determine when these features are promoted to fully supported production features. If you run into any issues while making use of these features please raise an issue on the PyKX Github [here](https://github.com/KxSystems/pykx/issues). + +## How do I enable Beta Features? + +Within PyKX beta features are enabled through the use of a configuration/environment variable `PYKX_BETA_FEATURES`, within a Python session users can set this prior to importing PyKX as shown below, note that when enabled you will be able to see what features are in beta through access of `kx.beta_features`: + +```python +>>> import os +>>> os.environ['PYKX_BETA_FEATURES'] = 'True' +>>> import pykx as kx +>>> kx.beta_features +['Database Management', 'Remote Functions'] +``` + +Alternatively you can set beta features to be available at all times by adding `PYKX_BETA_FEATURES` to your `.pykx-config` file as outlined [here](../user-guide/configuration.md#configuration-file). An example of a configuration making use of this is as follows: + +```bash +[default] +PYKX_KEEP_LOCAL_TIMES='true' + +[beta] +PYKX_BETA_FEATURES='true' +``` + +## What Beta Features are available? + +As mentioned above the list of available features to a user is contained within the `beta_features` property, for users with these features available you can get access to this information as follows within a Python session + +```python +>>> import pykx as kx +>>> kx.beta_features +['Database Management', 'Remote Functions'] +``` + +The following are the currently available beta features: + +- [Database Management](db-management.md) provides users with the ability to create, load and maintain databases and their associated tables including but not limited to: + + - Database table creation and renaming. + - Enumeration of in-memory tables against on-disk sym file. + - Column listing, addition, reordering, renaming copying, function application and deletion on-disk. + - Attribute setting and removal. + - Addition of missing tables from partitions within a database. + +- [Remote Functions](remote-functions.md) let you define functions in Python which interact directly with kdb+ data on a q process. These functions can seamlessly integrate into existing Python infrastructures and also benefit systems that use q processes over Python for performance reasons or as part of legacy applications. +- [PyKX Threading](threading.md) provides users with the ability to call into `EmbeddedQ` from multithreaded python programs and allow any thread to modify global state safely. diff --git a/docs/beta-features/remote-functions.md b/docs/beta-features/remote-functions.md new file mode 100644 index 0000000..41145f8 --- /dev/null +++ b/docs/beta-features/remote-functions.md @@ -0,0 +1,132 @@ +# Remote Function Execution + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` + +## Introduction + +Remote Functions let you define Python functions within your Python environment which can interact with kdb+ data on a q process. Once defined, these functions are registered to a [remote session object]() along with any Python dependencies which need to be imported. The [remote session object]() establishes and manages the remote connection to the kdb+/q server. + +To execute kdb+/q functions using PyKX, please see [PyKX under q](../pykx-under-q/intro.html) + +## Requirements and limitations + +To run this functionality, the kdb+/q server you connect to must have the ability to load PyKX under q. It is your responsibility to ensure the version and existence of Python library dependencies are correct in your kdb+/q environment at runtime. + +Users must additionally ensure that they have all Python requirements installed on the client server, in particular `dill>=0.2` is required for this functionality. + +It can be installed using the following command: + +```bash +pip install pykx[beta] +``` + +## Functional walkthrough + +This walkthrough will demonstrate the following steps: + +1. Initialize a q/kdb+ server loading PyKX under q on a specified port. +1. Import PyKX and generate a remote session object which denotes the process against which the Python functions will be executed +1. Define a number of Python functions which will be executed on the remote q/kdb+ server. + +### Initializing a q/kdb+ server with PyKX under q + +This step ensures you have a q process running with PyKX under q, as well as having a kdb+ table available to query. If you have this already, proceed to the next step. + +Ensure that you have q installed. If you do not have this installed please follow the guide provided [here](https://code.kx.com/q/learn/install/), retrieving your license following the instructions provided [here](https://kx.com/kdb-insights-personal-edition-license-download). + +Install PyKX under q using the following command. + +```bash +python -c "import pykx;pykx.install_into_QHOME()" +``` + +Start the q process to which you will execute your functions. + +```bash +q pykx.q -p 5050 +``` + +Create a table which you will use within your Python analytics defined below. + +```q +q)N:1000 +q)tab:([]sym:N?`AAPL`MSFT`GOOG`FDP;price:100+N?100f;size:10+N?100) +``` + +Set a requirement for users to provide a username/password if you wish to add security to your q process. + +```q +.z.pw:{[u;p]$[(u~`user)&p~`password;1b;0b]} +``` + +### Import PyKX and create a session + +Create a session object from a Python environment of your choice, which establishes and manages the remote connection to the kdb+/q server. + +```python +>>> import os +>>> os.environ['PYKX_BETA_FEATURES'] = 'true' +>>> from pykx.remote import session +>>> remote_session = session() +>>> remote_session.create(host='localhost', port=5050, username='user', password='password') +``` + +### Defining and Executing Python functions using a session + +Tag the Python functions you want to run on the remote server using the `kx.remote.function` decorator. This registers the functions on the `remote_session` object you have just created. + +=== "Single Argument Function" + + ```python + >>> from pykx.remote import function + >>> @function(remote_session) + ... def single_arg_function(x): + ... return x+10 + >>> single_arg_function(10) + pykx.LongAtom(pykx.q('20')) + ``` + +=== "Multi Argument Function" + + ```python + >>> from pykx.remote import function + >>> @function(remote_session) + ... def multi_arg_function(x, y): + ... return x+y + >>> multi_arg_function(10, 20) + pykx.LongAtom(pykx.q('30')) + ``` + +Add any Python libraries which need to be available when executing the function(s) you have just defined. You can achieve this in two ways: + +1. Using `session.add_library` to import required libraries before defining your function +1. Importing libraries within the body of the function being executed + +Both examples can be seen below + +=== "Library addition functionality" + + ```python + >>> remote_session.add_library('numpy', 'pykx') + >>> @function(remote_session) + ... def dependent_function(x, y, z): + ... return pykx.q.mavg(4, numpy.linspace(x, y, z)) + >>> dependent_function(0, 10, 10) + pykx.FloatVector(pykx.q('0 0.5555556 1.111111 2.222222 3...')) + ``` + +=== "Defining imports within function body" + + ```python + >>> @function(remote_session) + ... def dependent_function(x, y, z): + ... import pykx as kx + ... import numpy as np + ... return kx.q.mavg(4, np.linspace(x, y, z)) + >>> dependent_function(0, 10, 10) + pykx.FloatVector(pykx.q('0 0.5555556 1.111111 2.222222 3...')) + ``` + +While both are valid, we suggest using `add_library` as it allows for pre-checking of the libraries prior to definition of the function and will be expanded over time to include additional validation. diff --git a/docs/beta-features/threading.md b/docs/beta-features/threading.md new file mode 100644 index 0000000..eaefb4e --- /dev/null +++ b/docs/beta-features/threading.md @@ -0,0 +1,64 @@ +# Multi-Threaded Execution + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` and `PYKX_THREADING='true'`. + +## Introduction + +One major limitation of `EmbeddedQ` when using python with multi-threading is that only the main +thread (the thread that imports PyKX and loads `libq`) is allowed to modify state within `EmbeddedQ`. +However if you wanted to use one of Pythons multi-threading libraries whether that is the `threading` +library or `asyncio` or any other library that allows Python to utilise multiple threads at once, +and have those threads modify state in some way; whether that be to upsert a row to a global table, +open `QConnection` instances or any other use case that requires the threads to modify state. You +would not be able to do that by default in PyKX. + +This beta feature allows these use cases to become possible by spawning a background thread that all +calls into `EmbeddedQ` will be run on. This background thread is created at the `C` level using +`libpthread` with lightweight future objects to ensure the lowest overhead possible for passing +calls onto a secondary thread. This allows multi-threaded programs to modify state within the spawned +threads safely, without losing out on performance. + + +!!! Note + + While using `PyKX Threading` it is not possible to also use the functionality within `pykx.q`, + it is also not possible to have q call back into Python. + +## How to enable + +This beta feature requires an extra opt-in step. While the overhead for offloading calls onto a secondary +thread is low, there will always be a cost to forcing a thread context switch to process a call into +`EmbeddedQ`. Therefore you will need to enable both the `PYKX_BETA_FEATURES` environment variable as +well as the `PYKX_THREADING` environment variable. + +!!! Warning + + Because using `PyKX Threading` spawns a background thread to run all queries to `EmbeddedQ`, you + must ensure that you call `kx.shutdown_thread()` at the end of your script to ensure that this + background thread is properly shutdown at the end. If you fail to do this the background thread will + be left running after the script is finished. The best way to ensure this always happens is to start + a main function for your script within a `try` - `finally` block. + + +```Python +import os +import asyncio +os.environ['PYKX_THREADING'] = '1' +os.environ['PYKX_BETA_FEATURES'] = '1' +import pykx as kx + +def main(): # Your scripts entry point + ... + +if __name__ == '__main__': + try: + main() + finally: + kx.shutdown_thread() # This will be called if the script completes normally or errors early +``` + +## More complete examples + +More examples showing this functionality in use can be found [here](../examples/threaded_execution/threading.md). diff --git a/docs/contributors.md b/docs/contributors.md new file mode 100644 index 0000000..e44d1d7 --- /dev/null +++ b/docs/contributors.md @@ -0,0 +1,23 @@ +# Contributors + +The aim of this page is to include a list of the contributors to our project both internal and external to KX. If you wish to contribute to the project please open a pull request to our project [here](https://github.com/KxSystems/pykx/pulls). + +## Internal Development Team (Current and Past) + +- [Conor McCarthy](https://github.com/cmccarthy1) +- [Kian Shepherd](https://github.com/kshepherdkx) +- [Rian Ó Cuinneagáin](https://github.com/rianoc-kx) +- [Reuben Taylor](https://github.com/roobxyz) +- [Bruno Le Hyaric](https://github.com/bu2) +- [Will Da Silva](https://github.com/WillDaSilva) +- [Matt Maynes](https://github.com/mattmaynes) +- [Tim Thornton](https://github.com/igorii) +- Siobhán Stevenson +- Andy McDonald +- Sean Foden + +## External Contributors + +- [neutropolis](https://github.com/neutropolis) +- [nipsn](https://github.com/nipsn) +- [marcosvm13](https://github.com/marcosvm13) diff --git a/docs/examples/db-management.ipynb b/docs/examples/db-management.ipynb new file mode 100644 index 0000000..b58935d --- /dev/null +++ b/docs/examples/db-management.ipynb @@ -0,0 +1,2728 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "015ba887", + "metadata": {}, + "source": [ + "# Introduction\n", + "\n", + "This notebook provides a walkthrough of some of the functionality available for users looking to create and maintain large databases using PyKX.\n", + "\n", + "In particular, this notebook refers to creating and maintaining [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). Go to [Q for Mortals](https://code.kx.com/q4m3/14_Introduction_to_Kdb+/#143-partitioned-tables) for more in-depth information about partitioned databases in kdb+.\n", + "\n", + "You can download this walkthrough as a `.ipynb` notebook file using the following link.", + "\n", + "This walkthrough provides examples of the following tasks:\n", + "\n", + "1. Creating a database from a historical dataset\n", + "1. Adding a new partition to the database\n", + "1. Managing the on-disk database by:\n", + " 1. Renaming a table and column\n", + " 2. Creating a copy of a column to the database\n", + " 3. Applying a Python function to a column of the database\n", + " 4. Updating the data type of a column\n", + "1. Adding a new table to the most recent partition of the database\n", + "\n", + "For full information on the functions available you can reference the [API section](https://code.kx.com/pykx/api/db.html).\n", + "\n", + "---\n", + "\n", + "## Initial setup\n", + "\n", + "Import all required libraries and create a temporary directory which will be used to store the database we create for this walkthrough" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0afee62a", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['PYKX_BETA_FEATURES'] = 'true'\n", + "\n", + "import pykx as kx\n", + "from datetime import date\n", + "import tempfile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "64c18054", + "metadata": {}, + "outputs": [], + "source": [ + "tempdir = tempfile.TemporaryDirectory()" + ] + }, + { + "cell_type": "markdown", + "id": "2e91160e", + "metadata": {}, + "source": [ + "Database interactions are facilitated through use of the `pykx.DB` class. All methods/attributes used in this notebook are contained within this class. \n", + "\n", + "Initialise the `DB` class to start. The expected input is the file path where you intend to save the partitioned database and its associated tables. In this case we're going to use the temporary directory we just created. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "90d9eac3", + "metadata": {}, + "outputs": [], + "source": [ + "db = kx.DB(path = tempdir.name + '/db')" + ] + }, + { + "cell_type": "markdown", + "id": "143e0886", + "metadata": {}, + "source": [ + "For details on any methods contained within this class, you can use the `help` method. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e817132", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method create in module pykx.db:\n", + "\n", + "create(table, table_name, partition, *, by_field=None, sym_enum=None, log=True) method of pykx.db.DB instance\n", + " Create an on-disk partitioned table within a kdb+ database from a supplied\n", + " `pykx.Table` object. Once generated this table will be accessible\n", + " as an attribute of the `DB` class or a sub attribute of `DB.table`.\n", + " \n", + " Parameters:\n", + " table: The `pykx.Table` object which is to be persisted to disk\n", + " table_name: The name with which the table will be persisted and accessible\n", + " once loaded and available as a `pykx.PartitionedTable`\n", + " partition: The name of the column which is to be used to partition the data if\n", + " supplied as a `str` or if supplied as non string object this will be used as\n", + " the partition to which all data is persisted\n", + " by_field: A field of the table to be used as a by column, this column will be\n", + " the second column in the table (the first being the virtual column determined\n", + " by the partitioning column)\n", + " sym_enum: The name of the symbol enumeration table to be associated with the table\n", + " log: Print information about status of partitioned datab\n", + " \n", + " Returns:\n", + " A `None` object on successful invocation, the database class will be\n", + " updated to contain attributes associated with the available created table\n", + " \n", + " Examples:\n", + " \n", + " Generate a partitioned table from a table containing multiple partitions\n", + " \n", + " ```python\n", + " >>> import pykx as kx\n", + " >>> db = kx.DB(path = 'newDB')\n", + " >>> N = 1000\n", + " >>> qtab = kx.Table(data = {\n", + " ... 'date': kx.q.asc(kx.random.random(N, kx.q('2020.01 2020.02 2020.03'))),\n", + " ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " ... 'price': kx.random.random(N, 10.0),\n", + " ... 'size': kx.random.random(N, 100)\n", + " ... })\n", + " >>> db.create(qtab, 'stocks', 'date', by_field = 'sym', sym_enum = 'symbols')\n", + " >>> db.tables\n", + " ['stocks']\n", + " >>> db.stocks\n", + " pykx.PartitionedTable(pykx.q('\n", + " month sym price size\n", + " ---------------------------\n", + " 2020.01 AAPL 7.979004 85\n", + " 2020.01 AAPL 5.931866 55\n", + " 2020.01 AAPL 5.255477 49\n", + " 2020.01 AAPL 8.15255 74\n", + " 2020.01 AAPL 4.771067 80\n", + " ..\n", + " '))\n", + " ```\n", + " \n", + " Add a table as a partition to an on-disk database, in the example below we are adding\n", + " a partition to the table generated above\n", + " \n", + " ```python\n", + " >>> import pykx as kx\n", + " >>> db = kx.DB(path = 'newDB')\n", + " >>> N = 333\n", + " >>> qtab = kx.Table(data = {\n", + " ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " ... 'price': kx.random.random(N, 10.0),\n", + " ... 'size': kx.random.random(N, 100)\n", + " ... })\n", + " >>> db.create(qtab, 'stocks', kx.q('2020.04'), by_field = 'sym', sym_enum = 'symbols')\n", + " >>> db.tables\n", + " ['stocks']\n", + " >>> db.stocks\n", + " pykx.PartitionedTable(pykx.q('\n", + " month sym price size\n", + " ---------------------------\n", + " 2020.01 AAPL 7.979004 85\n", + " 2020.01 AAPL 5.931866 55\n", + " 2020.01 AAPL 5.255477 49\n", + " 2020.01 AAPL 8.15255 74\n", + " 2020.01 AAPL 4.771067 80\n", + " ..\n", + " '))\n", + " ```\n", + "\n" + ] + } + ], + "source": [ + "help(db.create)" + ] + }, + { + "cell_type": "markdown", + "id": "607599f8", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "3516ab83", + "metadata": {}, + "source": [ + "## Create the sample dataset\n", + "\n", + "Create a dataset called `trades` containing time-series data spanning multiple dates, and columns of various types:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "686441cc", + "metadata": {}, + "outputs": [], + "source": [ + "N = 1000000\n", + "trades = kx.Table(data={\n", + " 'date': kx.random.random(N, [date(2020, 1, 1), date(2020, 1, 2)]),\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'price': kx.random.random(N, 10.0),\n", + " 'size': kx.random.random(N, 1000)\n", + "})" + ] + }, + { + "cell_type": "markdown", + "id": "d0529e7c", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "0738729d", + "metadata": {}, + "source": [ + "## Create the database" + ] + }, + { + "cell_type": "markdown", + "id": "0fb4659b", + "metadata": {}, + "source": [ + "Create the database using the `date` column as the partition, and add `trades` as a table called `trade_data` within it." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "db8b9a04", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing Database Partition 2020.01.01 to table trade_data\n", + "Writing Database Partition 2020.01.02 to table trade_data\n" + ] + } + ], + "source": [ + "db.create(trades, 'trade_data', 'date')" + ] + }, + { + "cell_type": "markdown", + "id": "ad2fa6f9", + "metadata": {}, + "source": [ + "This now exists as a table and is saved to disk." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "82796fbc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['trade_data']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.tables" + ] + }, + { + "cell_type": "markdown", + "id": "c0ecec19", + "metadata": {}, + "source": [ + "When a table is saved, an attribute is added to the `db` class for it. For our newly generated table, this is `db.trade_data`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29606b7a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesympricesize
02020.01.01MSFT7.079266800
12020.01.01AAPL1.82432165
22020.01.01MSFT2.408259292
32020.01.01GOOG1.6754387
42020.01.01AAPL8.311168183
52020.01.01AAPL2.208693989
62020.01.01MSFT6.068126567
72020.01.01AAPL4.918926794
82020.01.01AAPL9.33186939
92020.01.01AAPL1.142611507
102020.01.01AAPL2.685874581
112020.01.01AAPL3.483591163
122020.01.01AAPL0.4422525466
132020.01.01MSFT7.406654976
142020.01.01MSFT2.493871171
152020.01.01AAPL9.24208828
162020.01.01MSFT0.3954522747
172020.01.01MSFT0.3441191512
182020.01.01GOOG9.662762998
192020.01.01AAPL9.601674812
202020.01.01AAPL4.969858910
212020.01.01GOOG1.048204830
222020.01.01GOOG0.9817644595
...............
9999992020.01.02GOOG1.470716636
\n", + "

1,000,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date sym price size\n", + "------------------------------\n", + "2020.01.01 MSFT 7.079266 800 \n", + "2020.01.01 AAPL 1.824321 65 \n", + "2020.01.01 MSFT 2.408259 292 \n", + "2020.01.01 GOOG 1.675438 7 \n", + "2020.01.01 AAPL 8.311168 183 \n", + "2020.01.01 AAPL 2.208693 989 \n", + "2020.01.01 MSFT 6.068126 567 \n", + "2020.01.01 AAPL 4.918926 794 \n", + "2020.01.01 AAPL 9.331869 39 \n", + "2020.01.01 AAPL 1.142611 507 \n", + "2020.01.01 AAPL 2.685874 581 \n", + "2020.01.01 AAPL 3.483591 163 \n", + "2020.01.01 AAPL 0.4422525 466 \n", + "2020.01.01 MSFT 7.406654 976 \n", + "2020.01.01 MSFT 2.493871 171 \n", + "2020.01.01 AAPL 9.242088 28 \n", + "2020.01.01 MSFT 0.3954522 747 \n", + "2020.01.01 MSFT 0.3441191 512 \n", + "2020.01.01 GOOG 9.662762 998 \n", + "2020.01.01 AAPL 9.601674 812 \n", + "..\n", + "'))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trade_data" + ] + }, + { + "cell_type": "markdown", + "id": "5ed4224e", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "889dfb46", + "metadata": {}, + "source": [ + "## Add a new partition to the database\n", + "\n", + "Once a table has been generated, you can add more partitions to the database through reuse of the `create` method. In this case we are adding the new partition `2020.01.03` to the database." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7cce4947", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing Database Partition 2020-01-03 to table trade_data\n" + ] + } + ], + "source": [ + "N = 10000\n", + "new_day = kx.Table(data={\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'price': kx.random.random(N, 10.0),\n", + " 'size': kx.random.random(N, 100)\n", + "})\n", + "db.create(new_day, 'trade_data', date(2020, 1, 3))" + ] + }, + { + "cell_type": "markdown", + "id": "e24ecc1d", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "09f0bd28", + "metadata": {}, + "source": [ + "## Manage the database\n", + "\n", + "This section covers updating the contents of a database. The examples below demonstrate a number of common tasks that would be completed regularly when updating a database.\n", + "\n", + "The name of a table can be updated using the `rename_table` method. Below, we are updating the table `trade_data` to be called `trade`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ae9d244b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:14:22 renaming :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trade_data to :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:22 renaming :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trade_data to :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:22 renaming :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trade_data to :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.rename_table('trade_data', 'trades')" + ] + }, + { + "cell_type": "markdown", + "id": "5edc2eba", + "metadata": {}, + "source": [ + "During the rename process, the attribute in the `db` class is also updated. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "00eaf253", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesympricesize
02020.01.01MSFT7.079266800
12020.01.01AAPL1.82432165
22020.01.01MSFT2.408259292
32020.01.01GOOG1.6754387
42020.01.01AAPL8.311168183
52020.01.01AAPL2.208693989
62020.01.01MSFT6.068126567
72020.01.01AAPL4.918926794
82020.01.01AAPL9.33186939
92020.01.01AAPL1.142611507
102020.01.01AAPL2.685874581
112020.01.01AAPL3.483591163
122020.01.01AAPL0.4422525466
132020.01.01MSFT7.406654976
142020.01.01MSFT2.493871171
152020.01.01AAPL9.24208828
162020.01.01MSFT0.3954522747
172020.01.01MSFT0.3441191512
182020.01.01GOOG9.662762998
192020.01.01AAPL9.601674812
202020.01.01AAPL4.969858910
212020.01.01GOOG1.048204830
222020.01.01GOOG0.9817644595
...............
10099992020.01.03AAPL9.75038799
\n", + "

1,010,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date sym price size\n", + "------------------------------\n", + "2020.01.01 MSFT 7.079266 800 \n", + "2020.01.01 AAPL 1.824321 65 \n", + "2020.01.01 MSFT 2.408259 292 \n", + "2020.01.01 GOOG 1.675438 7 \n", + "2020.01.01 AAPL 8.311168 183 \n", + "2020.01.01 AAPL 2.208693 989 \n", + "2020.01.01 MSFT 6.068126 567 \n", + "2020.01.01 AAPL 4.918926 794 \n", + "2020.01.01 AAPL 9.331869 39 \n", + "2020.01.01 AAPL 1.142611 507 \n", + "2020.01.01 AAPL 2.685874 581 \n", + "2020.01.01 AAPL 3.483591 163 \n", + "2020.01.01 AAPL 0.4422525 466 \n", + "2020.01.01 MSFT 7.406654 976 \n", + "2020.01.01 MSFT 2.493871 171 \n", + "2020.01.01 AAPL 9.242088 28 \n", + "2020.01.01 MSFT 0.3954522 747 \n", + "2020.01.01 MSFT 0.3441191 512 \n", + "2020.01.01 GOOG 9.662762 998 \n", + "2020.01.01 AAPL 9.601674 812 \n", + "..\n", + "'))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trades" + ] + }, + { + "cell_type": "markdown", + "id": "4c44fab2", + "metadata": {}, + "source": [ + "Renaming a column in a table is achieved using the `rename_column` method. For example, let's update the `sym` column in the `trade` table to be called `ticker`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1c52d0b0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:14:25 renaming sym to ticker in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:25 renaming sym to ticker in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:25 renaming sym to ticker in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.rename_column('trades', 'sym', 'ticker')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b03c5c17", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetickerpricesize
02020.01.01MSFT7.079266800
12020.01.01AAPL1.82432165
22020.01.01MSFT2.408259292
32020.01.01GOOG1.6754387
42020.01.01AAPL8.311168183
52020.01.01AAPL2.208693989
62020.01.01MSFT6.068126567
72020.01.01AAPL4.918926794
82020.01.01AAPL9.33186939
92020.01.01AAPL1.142611507
102020.01.01AAPL2.685874581
112020.01.01AAPL3.483591163
122020.01.01AAPL0.4422525466
132020.01.01MSFT7.406654976
142020.01.01MSFT2.493871171
152020.01.01AAPL9.24208828
162020.01.01MSFT0.3954522747
172020.01.01MSFT0.3441191512
182020.01.01GOOG9.662762998
192020.01.01AAPL9.601674812
202020.01.01AAPL4.969858910
212020.01.01GOOG1.048204830
222020.01.01GOOG0.9817644595
...............
10099992020.01.03AAPL9.75038799
\n", + "

1,010,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date ticker price size\n", + "--------------------------------\n", + "2020.01.01 MSFT 7.079266 800 \n", + "2020.01.01 AAPL 1.824321 65 \n", + "2020.01.01 MSFT 2.408259 292 \n", + "2020.01.01 GOOG 1.675438 7 \n", + "2020.01.01 AAPL 8.311168 183 \n", + "2020.01.01 AAPL 2.208693 989 \n", + "2020.01.01 MSFT 6.068126 567 \n", + "2020.01.01 AAPL 4.918926 794 \n", + "2020.01.01 AAPL 9.331869 39 \n", + "2020.01.01 AAPL 1.142611 507 \n", + "2020.01.01 AAPL 2.685874 581 \n", + "2020.01.01 AAPL 3.483591 163 \n", + "2020.01.01 AAPL 0.4422525 466 \n", + "2020.01.01 MSFT 7.406654 976 \n", + "2020.01.01 MSFT 2.493871 171 \n", + "2020.01.01 AAPL 9.242088 28 \n", + "2020.01.01 MSFT 0.3954522 747 \n", + "2020.01.01 MSFT 0.3441191 512 \n", + "2020.01.01 GOOG 9.662762 998 \n", + "2020.01.01 AAPL 9.601674 812 \n", + "..\n", + "'))" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trades" + ] + }, + { + "cell_type": "markdown", + "id": "148207eb", + "metadata": {}, + "source": [ + "To safely apply a function to modify the `price` column within the database, first create a copy of the column." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f7d2f116", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:14:29 copying price to price_copy in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:29 copying price to price_copy in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:29 copying price to price_copy in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.copy_column('trades', 'price', 'price_copy')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9bad2096", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetickerpricesizeprice_copy
02020.01.01MSFT7.0792668007.079266
12020.01.01AAPL1.824321651.824321
22020.01.01MSFT2.4082592922.408259
32020.01.01GOOG1.67543871.675438
42020.01.01AAPL8.3111681838.311168
52020.01.01AAPL2.2086939892.208693
62020.01.01MSFT6.0681265676.068126
72020.01.01AAPL4.9189267944.918926
82020.01.01AAPL9.331869399.331869
92020.01.01AAPL1.1426115071.142611
102020.01.01AAPL2.6858745812.685874
112020.01.01AAPL3.4835911633.483591
122020.01.01AAPL0.44225254660.4422525
132020.01.01MSFT7.4066549767.406654
142020.01.01MSFT2.4938711712.493871
152020.01.01AAPL9.242088289.242088
162020.01.01MSFT0.39545227470.3954522
172020.01.01MSFT0.34411915120.3441191
182020.01.01GOOG9.6627629989.662762
192020.01.01AAPL9.6016748129.601674
202020.01.01AAPL4.9698589104.969858
212020.01.01GOOG1.0482048301.048204
222020.01.01GOOG0.98176445950.9817644
..................
10099992020.01.03AAPL9.750387999.750387
\n", + "

1,010,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date ticker price size price_copy\n", + "-------------------------------------------\n", + "2020.01.01 MSFT 7.079266 800 7.079266 \n", + "2020.01.01 AAPL 1.824321 65 1.824321 \n", + "2020.01.01 MSFT 2.408259 292 2.408259 \n", + "2020.01.01 GOOG 1.675438 7 1.675438 \n", + "2020.01.01 AAPL 8.311168 183 8.311168 \n", + "2020.01.01 AAPL 2.208693 989 2.208693 \n", + "2020.01.01 MSFT 6.068126 567 6.068126 \n", + "2020.01.01 AAPL 4.918926 794 4.918926 \n", + "2020.01.01 AAPL 9.331869 39 9.331869 \n", + "2020.01.01 AAPL 1.142611 507 1.142611 \n", + "2020.01.01 AAPL 2.685874 581 2.685874 \n", + "2020.01.01 AAPL 3.483591 163 3.483591 \n", + "2020.01.01 AAPL 0.4422525 466 0.4422525 \n", + "2020.01.01 MSFT 7.406654 976 7.406654 \n", + "2020.01.01 MSFT 2.493871 171 2.493871 \n", + "2020.01.01 AAPL 9.242088 28 9.242088 \n", + "2020.01.01 MSFT 0.3954522 747 0.3954522 \n", + "2020.01.01 MSFT 0.3441191 512 0.3441191 \n", + "2020.01.01 GOOG 9.662762 998 9.662762 \n", + "2020.01.01 AAPL 9.601674 812 9.601674 \n", + "..\n", + "'))" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trades" + ] + }, + { + "cell_type": "markdown", + "id": "3c63e2bb", + "metadata": {}, + "source": [ + "You can now apply a function to the copied column without the risk of losing the original data. Below we are modifying the copied column by multiplying the contents by 2." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "483a3b48", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:14:31 resaving column price_copy (type 9) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:31 resaving column price_copy (type 9) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:31 resaving column price_copy (type 9) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.apply_function('trades', 'price_copy', kx.q('{2*x}'))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e5285600", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetickerpricesizeprice_copy
02020.01.01MSFT7.07926680014.15853
12020.01.01AAPL1.824321653.648642
22020.01.01MSFT2.4082592924.816519
32020.01.01GOOG1.67543873.350875
42020.01.01AAPL8.31116818316.62234
52020.01.01AAPL2.2086939894.417385
62020.01.01MSFT6.06812656712.13625
72020.01.01AAPL4.9189267949.837851
82020.01.01AAPL9.3318693918.66374
92020.01.01AAPL1.1426115072.285222
102020.01.01AAPL2.6858745815.371748
112020.01.01AAPL3.4835911636.967183
122020.01.01AAPL0.44225254660.8845049
132020.01.01MSFT7.40665497614.81331
142020.01.01MSFT2.4938711714.987742
152020.01.01AAPL9.2420882818.48418
162020.01.01MSFT0.39545227470.7909045
172020.01.01MSFT0.34411915120.6882382
182020.01.01GOOG9.66276299819.32552
192020.01.01AAPL9.60167481219.20335
202020.01.01AAPL4.9698589109.939716
212020.01.01GOOG1.0482048302.096408
222020.01.01GOOG0.98176445951.963529
..................
10099992020.01.03AAPL9.7503879919.50077
\n", + "

1,010,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date ticker price size price_copy\n", + "-------------------------------------------\n", + "2020.01.01 MSFT 7.079266 800 14.15853 \n", + "2020.01.01 AAPL 1.824321 65 3.648642 \n", + "2020.01.01 MSFT 2.408259 292 4.816519 \n", + "2020.01.01 GOOG 1.675438 7 3.350875 \n", + "2020.01.01 AAPL 8.311168 183 16.62234 \n", + "2020.01.01 AAPL 2.208693 989 4.417385 \n", + "2020.01.01 MSFT 6.068126 567 12.13625 \n", + "2020.01.01 AAPL 4.918926 794 9.837851 \n", + "2020.01.01 AAPL 9.331869 39 18.66374 \n", + "2020.01.01 AAPL 1.142611 507 2.285222 \n", + "2020.01.01 AAPL 2.685874 581 5.371748 \n", + "2020.01.01 AAPL 3.483591 163 6.967183 \n", + "2020.01.01 AAPL 0.4422525 466 0.8845049 \n", + "2020.01.01 MSFT 7.406654 976 14.81331 \n", + "2020.01.01 MSFT 2.493871 171 4.987742 \n", + "2020.01.01 AAPL 9.242088 28 18.48418 \n", + "2020.01.01 MSFT 0.3954522 747 0.7909045 \n", + "2020.01.01 MSFT 0.3441191 512 0.6882382 \n", + "2020.01.01 GOOG 9.662762 998 19.32552 \n", + "2020.01.01 AAPL 9.601674 812 19.20335 \n", + "..\n", + "'))" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trades" + ] + }, + { + "cell_type": "markdown", + "id": "a7db5560", + "metadata": {}, + "source": [ + "Once you are happy with the new values within the `price_copy` column, you can safely delete the `price` column, then rename the `price_copy` column to be called `price`." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "fbb0fe94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:14:33 deleting column price from `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:33 deleting column price from `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:33 deleting column price from `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n", + "2023.12.15 16:14:33 renaming price_copy to price in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:14:33 renaming price_copy to price in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:14:33 renaming price_copy to price in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.delete_column('trades', 'price')\n", + "db.rename_column('trades', 'price_copy', 'price')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2810b08f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetickersizeprice
02020.01.01MSFT80014.15853
12020.01.01AAPL653.648642
22020.01.01MSFT2924.816519
32020.01.01GOOG73.350875
42020.01.01AAPL18316.62234
52020.01.01AAPL9894.417385
62020.01.01MSFT56712.13625
72020.01.01AAPL7949.837851
82020.01.01AAPL3918.66374
92020.01.01AAPL5072.285222
102020.01.01AAPL5815.371748
112020.01.01AAPL1636.967183
122020.01.01AAPL4660.8845049
132020.01.01MSFT97614.81331
142020.01.01MSFT1714.987742
152020.01.01AAPL2818.48418
162020.01.01MSFT7470.7909045
172020.01.01MSFT5120.6882382
182020.01.01GOOG99819.32552
192020.01.01AAPL81219.20335
202020.01.01AAPL9109.939716
212020.01.01GOOG8302.096408
222020.01.01GOOG5951.963529
...............
10099992020.01.03AAPL9919.50077
\n", + "

1,010,000 rows × 4 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date ticker size price \n", + "--------------------------------\n", + "2020.01.01 MSFT 800 14.15853 \n", + "2020.01.01 AAPL 65 3.648642 \n", + "2020.01.01 MSFT 292 4.816519 \n", + "2020.01.01 GOOG 7 3.350875 \n", + "2020.01.01 AAPL 183 16.62234 \n", + "2020.01.01 AAPL 989 4.417385 \n", + "2020.01.01 MSFT 567 12.13625 \n", + "2020.01.01 AAPL 794 9.837851 \n", + "2020.01.01 AAPL 39 18.66374 \n", + "2020.01.01 AAPL 507 2.285222 \n", + "2020.01.01 AAPL 581 5.371748 \n", + "2020.01.01 AAPL 163 6.967183 \n", + "2020.01.01 AAPL 466 0.8845049\n", + "2020.01.01 MSFT 976 14.81331 \n", + "2020.01.01 MSFT 171 4.987742 \n", + "2020.01.01 AAPL 28 18.48418 \n", + "2020.01.01 MSFT 747 0.7909045\n", + "2020.01.01 MSFT 512 0.6882382\n", + "2020.01.01 GOOG 998 19.32552 \n", + "2020.01.01 AAPL 812 19.20335 \n", + "..\n", + "'))" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.trades" + ] + }, + { + "cell_type": "markdown", + "id": "119a373b", + "metadata": {}, + "source": [ + "To convert the data type of a column, you can use the `set_column_type` method. Before we do that, we can look at the metadata information for the table using the `meta` method. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "45f01b75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tfa
c
date\"d\"
ticker\"s\"
size\"j\"
price\"f\"
" + ], + "text/plain": [ + "pykx.KeyedTable(pykx.q('\n", + "c | t f a\n", + "------| -----\n", + "date | d \n", + "ticker| s \n", + "size | j \n", + "price | f \n", + "'))" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kx.q.meta(db.trades)" + ] + }, + { + "cell_type": "markdown", + "id": "ffad39b1", + "metadata": {}, + "source": [ + "Currently the `size` column is the type `LongAtom`. We will update this to be a type `ShortAtom`." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "3706ad43", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:20:03 resaving column size (type 5) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:20:03 resaving column size (type 5) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:20:03 resaving column size (type 5) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.set_column_type('trades', 'size', kx.ShortAtom)" + ] + }, + { + "cell_type": "markdown", + "id": "319317bf", + "metadata": {}, + "source": [ + "Now let's apply the `grouped` attribute to the size column. For more information on attributes in kdb+, please refer to the Q for Mortals [Attributes section](https://code.kx.com/q4m3/8_Tables/#88-attributes)." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fd550ac7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023.12.15 16:20:04 resaving column ticker (type 20) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01/trades\n", + "2023.12.15 16:20:04 resaving column ticker (type 20) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02/trades\n", + "2023.12.15 16:20:04 resaving column ticker (type 20) in `:/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.03/trades\n" + ] + } + ], + "source": [ + "db.set_column_attribute('trades', 'ticker', 'grouped')" + ] + }, + { + "cell_type": "markdown", + "id": "95e9a5a9", + "metadata": {}, + "source": [ + "Let's revisit the metadata of the table to ensure they have been applied correctly." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "debf733d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tfa
c
date\"d\"
ticker\"s\"g
size\"h\"
price\"f\"
" + ], + "text/plain": [ + "pykx.KeyedTable(pykx.q('\n", + "c | t f a\n", + "------| -----\n", + "date | d \n", + "ticker| s g\n", + "size | h \n", + "price | f \n", + "'))" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kx.q.meta(db.trades)" + ] + }, + { + "cell_type": "markdown", + "id": "e75b07ae", + "metadata": {}, + "source": [ + "## Onboarding your next table\n", + "\n", + "Now that you have successfully set up one table, you may want to add a second table. We follow the same method as before and create the `quotes` table using the `create` method. In this example, the `quotes` table only contains data for `2020.01.03`." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b04c2f77", + "metadata": {}, + "outputs": [], + "source": [ + "quotes = kx.Table(data={\n", + " 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']),\n", + " 'open': kx.random.random(N, 10.0),\n", + " 'high': kx.random.random(N, 10.0),\n", + " 'low': kx.random.random(N, 10.0),\n", + " 'close': kx.random.random(N, 10.0)\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "6914a50e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Writing Database Partition 2020-01-03 to table quotes\n" + ] + } + ], + "source": [ + "db.create(quotes, 'quotes', date(2020, 1, 3), by_field = 'sym')" + ] + }, + { + "cell_type": "markdown", + "id": "87670793", + "metadata": {}, + "source": [ + "All tables within a database must contain the same partition structure. To ensure the new table can be accessed, the `quotes` table needs to exist in every partition within the database, even if there is no data for that partition. This is called backfilling data. For the partitions where the `quotes` table is missing, we use the `fill_database` method. " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e6f873e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully filled missing tables to partition: :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.02\n", + "Successfully filled missing tables to partition: :/var/folders/l8/t7s11kcs02x3dchm9_m48mq80000gn/T/tmp2ts68edc/db/2020.01.01\n" + ] + } + ], + "source": [ + "db.fill_database()" + ] + }, + { + "cell_type": "markdown", + "id": "e41e8589", + "metadata": {}, + "source": [ + "Now that the database has resolved the missing tables within the partitions, we can view the new `quotes` table" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b3be6075", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesymopenhighlowclose
02020.01.03AAPL8.2040260.91152013.9168649.813545
12020.01.03AAPL8.0927546.0195780.085131372.825277
22020.01.03AAPL1.4250438.8817194.2854617.820761
32020.01.03AAPL7.1727363.339855.9994033.010211
42020.01.03AAPL2.9741851.5593722.763565.182052
52020.01.03AAPL3.2007597.4850887.9288136.437041
62020.01.03AAPL7.7495995.5594440.33004049.424896
72020.01.03AAPL4.8859614.6774328.2883184.366883
82020.01.03AAPL7.4128915.0821899.2140367.900838
92020.01.03AAPL6.6258479.7921396.2088189.195079
102020.01.03AAPL2.0757975.3403210.40387090.7533655
112020.01.03AAPL4.7976428.3733174.981566.299731
122020.01.03AAPL0.86887651.9676163.3495734.094004
132020.01.03AAPL2.6841430.057673528.8781742.166685
142020.01.03AAPL3.1810934.6861130.89676137.39341
152020.01.03AAPL3.6302680.45638092.890256.428857
162020.01.03AAPL7.3424699.2984047.0985091.698009
172020.01.03AAPL1.2931448.1258347.2141845.946857
182020.01.03AAPL8.0513221.4461929.4361854.824975
192020.01.03AAPL1.0187811.2994011.181810.6091787
202020.01.03AAPL4.0029094.1157725.0362111.680549
212020.01.03AAPL0.98641044.750850.51407352.468647
222020.01.03AAPL8.3885616.1704051.0671532.034476
.....................
99992020.01.03MSFT2.8328181.4661713.4575455.985203
\n", + "

10,000 rows × 6 columns

" + ], + "text/plain": [ + "pykx.PartitionedTable(pykx.q('\n", + "date sym open high low close \n", + "---------------------------------------------------------\n", + "2020.01.03 AAPL 8.204026 0.9115201 3.916864 9.813545 \n", + "2020.01.03 AAPL 8.092754 6.019578 0.08513137 2.825277 \n", + "2020.01.03 AAPL 1.425043 8.881719 4.285461 7.820761 \n", + "2020.01.03 AAPL 7.172736 3.33985 5.999403 3.010211 \n", + "2020.01.03 AAPL 2.974185 1.559372 2.76356 5.182052 \n", + "2020.01.03 AAPL 3.200759 7.485088 7.928813 6.437041 \n", + "2020.01.03 AAPL 7.749599 5.559444 0.3300404 9.424896 \n", + "2020.01.03 AAPL 4.885961 4.677432 8.288318 4.366883 \n", + "2020.01.03 AAPL 7.412891 5.082189 9.214036 7.900838 \n", + "2020.01.03 AAPL 6.625847 9.792139 6.208818 9.195079 \n", + "2020.01.03 AAPL 2.075797 5.340321 0.4038709 0.7533655\n", + "2020.01.03 AAPL 4.797642 8.373317 4.98156 6.299731 \n", + "2020.01.03 AAPL 0.8688765 1.967616 3.349573 4.094004 \n", + "2020.01.03 AAPL 2.684143 0.05767352 8.878174 2.166685 \n", + "2020.01.03 AAPL 3.181093 4.686113 0.8967613 7.39341 \n", + "2020.01.03 AAPL 3.630268 0.4563809 2.89025 6.428857 \n", + "2020.01.03 AAPL 7.342469 9.298404 7.098509 1.698009 \n", + "2020.01.03 AAPL 1.293144 8.125834 7.214184 5.946857 \n", + "2020.01.03 AAPL 8.051322 1.446192 9.436185 4.824975 \n", + "2020.01.03 AAPL 1.018781 1.299401 1.18181 0.6091787\n", + "..\n", + "'))" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.quotes" + ] + }, + { + "cell_type": "markdown", + "id": "43366fab", + "metadata": {}, + "source": [ + "Finally, to view the amount of saved data you can count the number of rows per partition using `partition_count`" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "78b45d91", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
quotestrades
2020.01.010500425
2020.01.020499575
2020.01.031000010000
" + ], + "text/plain": [ + "pykx.Dictionary(pykx.q('\n", + " | quotes trades\n", + "----------| -------------\n", + "2020.01.01| 0 500425\n", + "2020.01.02| 0 499575\n", + "2020.01.03| 10000 10000 \n", + "'))" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.partition_count()" + ] + }, + { + "cell_type": "markdown", + "id": "b03cfb4b", + "metadata": {}, + "source": [ + "## Cleanup temporary database created" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "f3883344", + "metadata": {}, + "outputs": [], + "source": [ + "tempdir.cleanup()" + ] + }, + { + "cell_type": "markdown", + "id": "90049e04", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/subscriber/archive.zip b/docs/examples/subscriber/archive.zip index 0e8323e..655bb7d 100644 Binary files a/docs/examples/subscriber/archive.zip and b/docs/examples/subscriber/archive.zip differ diff --git a/docs/examples/subscriber/readme.md b/docs/examples/subscriber/readme.md index f6cc736..77e3178 100644 --- a/docs/examples/subscriber/readme.md +++ b/docs/examples/subscriber/readme.md @@ -45,7 +45,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initital Table ===== +===== Initial Table ===== a b --- 4 8 @@ -58,7 +58,7 @@ a b 2 1 1 8 8 5 -===== Initital Table ===== +===== Initial Table ===== ``` diff --git a/docs/examples/subscriber/subscriber.py b/docs/examples/subscriber/subscriber.py index 5ab019a..e187d34 100644 --- a/docs/examples/subscriber/subscriber.py +++ b/docs/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/subscriber/subscriber_async.py b/docs/examples/subscriber/subscriber_async.py index 4db8388..30b628d 100644 --- a/docs/examples/subscriber/subscriber_async.py +++ b/docs/examples/subscriber/subscriber_async.py @@ -25,9 +25,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/docs/examples/threaded_execution/archive.zip b/docs/examples/threaded_execution/archive.zip index 9d31432..6e58739 100644 Binary files a/docs/examples/threaded_execution/archive.zip and b/docs/examples/threaded_execution/archive.zip differ diff --git a/docs/examples/threaded_execution/asyncio_threading.py b/docs/examples/threaded_execution/asyncio_threading.py new file mode 100644 index 0000000..9d96f06 --- /dev/null +++ b/docs/examples/threaded_execution/asyncio_threading.py @@ -0,0 +1,57 @@ +import os +import asyncio +os.environ['PYKX_THREADING'] = '1' +os.environ['PYKX_BETA_FEATURES'] = '1' + +import pykx as kx + + +table = kx.q('table: ([] a: 10?10; b: 10?10)') + + +def assert_result(res): + # assert message from q process has the correct schema to be appended to the table + return type(res) is kx.LongVector and len(res) == 2 + + +async def upsert_threaded(q, calls): + counter = calls + while True: + result = await q.poll_recv_async() + if assert_result(result): + kx.q.upsert('table', result) + result = None + counter -= 1 + if counter <= 0: + break + + +async def main(): + N = 20 + calls = 1000 + conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa + main_q_con = kx.SyncQConnection(port=5001) + print('===== Initial Table =====') + print(kx.q('table')) + print('===== Initial Table =====') + # Set the variable py_server on the q process pointing towards this processes IPC connection + # We use neg to ensure the messages are sent async so no reply is expected from this process + [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] + query = 'send_data: {' + for i in range(N): + query += f'py_server{i}[2?100];' + query = query[:-1] + '}' + + await conns[0](query) + + tasks = [asyncio.create_task(upsert_threaded(conns[i], calls)) for i in range(N)] + main_q_con(f'do[{calls}; send_data[]]', wait=False) + [await t for t in tasks] + print(kx.q('table')) + + +if __name__ == '__main__': + try: + asyncio.run(main()) + finally: + kx.shutdown_thread() diff --git a/docs/examples/threaded_execution/threading.md b/docs/examples/threaded_execution/threading.md new file mode 100644 index 0000000..c3dbef4 --- /dev/null +++ b/docs/examples/threaded_execution/threading.md @@ -0,0 +1,96 @@ +# PyKX Calling into q from multiple threads + +The purpose of this example is to provide a quickstart for setting up a python process using `PyKX` +to call into `EmbeddedQ` from multiple threads. + +To follow along with this example please feel free to download this +zip archive that contains a copy of the python scripts and this +writeup. + +## Quickstart + +This example creates a python process that creates multiple tasks/threads that subscribe to a `q` +process over IPC and upon recieving a new row upsert it to a local table. There are 2 scripts +included: `asyncio_threading.py` and `threads.py`, the first uses asyncio tasks running on +seperate threads and the second example uses the python `threading` library directly to spawn +threads. + + +### Running the example + +```bash +$ python asyncio_threading.py +// or +$ python threads.py +``` + +### Outcome + +The inital table will be printed upon starting the program, once all the threads/tasks have +upserted all of the rows they have recieved to the table the final table will be printed. + +``` +$ python asyncio_threading.py +===== Initial Table ===== +a b +--- +4 8 +9 1 +2 9 +7 5 +0 4 +1 6 +9 6 +2 1 +1 8 +8 5 +===== Initial Table ===== +a b +----- +4 8 +9 1 +2 9 +7 5 +0 4 +1 6 +9 6 +2 1 +1 8 +8 5 +7 63 +11 13 +80 89 +43 50 +96 35 +35 83 +28 31 +96 12 +83 16 +77 33 +.. +``` + +### Important Note on usage + +Since using `PYKX_THREADING` creates a background thread to run the calls into `q`, the +background thread must be shutdown when finished. The easiest way to ensure this is done is by using +a `try` - `finally` block around the entrypoint to your script. This will ensure that even in the +event of an error the background thread will still be shutdown correctly so python can exit. + +``` +import os +os.environ['PYKX_THREADING'] = '1' +os.environ['PYKX_BETA_FEATURES'] = '1' +import pykx as kx + +def main(): + ... + + +if __name__ == '__main__': + try: + main() + finally: + # Must shutdown the background thread to properly exit + kx.shutdown_thread() +``` diff --git a/docs/examples/threaded_execution/threads.py b/docs/examples/threaded_execution/threads.py new file mode 100644 index 0000000..f93689e --- /dev/null +++ b/docs/examples/threaded_execution/threads.py @@ -0,0 +1,60 @@ +import os +import asyncio +from threading import Thread +os.environ['PYKX_THREADING'] = '1' +os.environ['PYKX_BETA_FEATURES'] = '1' + +import pykx as kx + + +table = kx.q('table: ([] a: 10?10; b: 10?10)') + + +def assert_result(res): + # assert message from q process has the correct schema to be appended to the table + return type(res) is kx.LongVector and len(res) == 2 + + +def upsert_threaded(q, calls): + counter = calls + while True: + result = q.poll_recv() + if result is not None and assert_result(result): + kx.q.upsert('table', result) + result = None + counter -= 1 + if counter <= 0: + break + return 0 + + +async def main(): + N = 20 + calls = 1000 + conns = [await kx.RawQConnection(port=5001, event_loop=asyncio.get_event_loop()) for _ in range(N)] # noqa + main_q_con = kx.SyncQConnection(port=5001) + print('===== Initial Table =====') + print(kx.q('table')) + print('===== Initial Table =====') + # Set the variable py_server on the q process pointing towards this processes IPC connection + # We use neg to ensure the messages are sent async so no reply is expected from this process + [await conns[i](f'py_server{i}: neg .z.w') for i in range(N)] + query = 'send_data: {' + for i in range(N): + query += f'py_server{i}[2?100];' + query = query[:-1] + '}' + + await conns[0](query) + + tasks = [Thread(target=upsert_threaded, args=[conns[i], calls]) for i in range(N)] + [t.start() for t in tasks] + main_q_con(f'do[{calls}; send_data[]]', wait=False) + [t.join() for t in tasks] + print(kx.q('table')) + + +if __name__ == '__main__': + try: + asyncio.run(main()) + finally: + kx.shutdown_thread() diff --git a/docs/extras/known_issues.md b/docs/extras/known_issues.md index c60cbf6..b16d656 100644 --- a/docs/extras/known_issues.md +++ b/docs/extras/known_issues.md @@ -9,3 +9,14 @@ use a `MonthVector` or a `DayVector`. In the scenario that it is not possible to determine the expected type a warning will be raised and the `DayVector` type will be used as a default. +- `None` and `pykx.Identity(pykx.q('::'))` do not pass through to single argument Python functions set under q. See [here](../pykx-under-q/known_issues.md#default-parameter). + + ``` + >>> def func(n=2): + ... return n + ... + >>> kx.q('func', None) + pykx.LongAtom(pykx.q('2')) + >>> kx.q('func', kx.q('::')) + pykx.LongAtom(pykx.q('2')) + ``` diff --git a/docs/faq.md b/docs/faq.md index 977c223..b639e74 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -1,4 +1,9 @@ -# FAQ +# FAQs + +## Known Issues + +* [PyKX known issues](extras/known_issues.md) +* [PyKX under q known issues](pykx-under-q/known_issues.md) ## How to work around the `'cores` licensing error? @@ -11,22 +16,52 @@ This error indicates your license is limited to a given number of cores but PyKX tried to use more cores than the license allows. - On Linux you can use `taskset` to limit the number of cores used by the python process and likewise PyKX and EmbeddedQ: -``` -# Example to limit python to the 4 first cores on a 8 cores CPU -$ taskset -c 0-3 python -``` + + ```bash + # Example to limit python to the 4 first cores on a 8 cores CPU + $ taskset -c 0-3 python + ``` - You can also do this in python before importing PyKX (Linux only): -``` ->>> import os ->>> os.sched_setaffinity(0, [0, 1, 2, 3]) ->>> import pykx as kx ->>> kx.q('til 10') -pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) -``` + + ```bash + >>> import os + >>> os.sched_setaffinity(0, [0, 1, 2, 3]) + >>> import pykx as kx + >>> kx.q('til 10') + pykx.LongVector(pykx.q('0 1 2 3 4 5 6 7 8 9')) + ``` - On Windows you can use the `start` command with its `/affinity` argument (see: `> help start`): + + ```bat + > start /affinity f python + ``` + + (above, `0xf = 00001111b`, so the python process will only use the four cores for which the mask bits are equal to 1) + +## How does PyKX determine the license that is used? + +The following outlines the paths searched for when loading PyKX + +1. Search for `kx.lic`, `kc.lic` and `k4.lic` license files in this order within the following locations + 1. Current working directory + 1. Location defined by environment variable `QLIC` if set + 1. Location defined by environment variable `QHOME` if set +2. If a license is not found use the following environment variables (if they are set) to install and make use of a license + 1. `KDB_LICENSE_B64` pointing to a base64 encoded version of a `kc.lic` license + 1. `KDB_K4LICENSE_B64` pointing to a base64 encoded version of a `k4.lic` license +3. If a license has not been located according to the above search you will be guided to install a license following a prompt based license installation walkthrough. + +## Can I use PyKX in a subprocess? + +Yes, however doing so requires some considerations. To ensure that PyKX is initialized in a clean environment it is suggested that the creation of subprocesses reliant on PyKX should be done within a code block making use of the `kx.PyKXReimport` functionality as follows: + +```python +import pykx as kx +import subprocess +with kx.PyKXReimport(): + subprocess.Popen(['python', 'file.py']) # Run Python with a file that imports PyKX ``` -> start /affinity f python -``` -(above, 0xf = 00001111b, so the python process will only use the four cores for which the mask bits are equal to 1) + +Failure to use this functionality can result in segmentation faults as noted in the troubleshooting guide [here](troubleshooting.md). For more information on the `PyKXReimport` functionality see its API documentation [here](api/reimporting.md). diff --git a/docs/getting-started/PyKX Introduction Notebook.ipynb b/docs/getting-started/PyKX Introduction Notebook.ipynb new file mode 100644 index 0000000..1623b7e --- /dev/null +++ b/docs/getting-started/PyKX Introduction Notebook.ipynb @@ -0,0 +1,1096 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PyKX Introduction Notebook\n", + "\n", + "The purpose of this notebook is to provide an introduction to the capabilities and functionality made available to you with PyKX.\n", + "\n", + "To follow along please download this notebook using the following 'link.'\n", + "\n", + "This Notebook is broken into the following sections\n", + "\n", + "1. [How to import PyKX](#How-to-import-Pykx)\n", + "1. [The basic data structures of PyKX](#The-basic-data-structures-of-PyKX)\n", + "1. [Accessing and creating PyKX objects](#Accessing-and-creating-PyKX-objects)\n", + "1. [Running analytics on objects in PyKX](#Running-analytics-on-objects-in-PyKX)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Welcome to PyKX!\n", + "\n", + "PyKX is a Python library built and maintained for interfacing seamlessly with the worlds fastest time-series database technology kdb+ and it's underlying vector programming language q.\n", + "\n", + "It's aim is to provide you and all Python data-engineers and data-scientist with an interface to efficiently apply analytics on large volumes of on-disk and in-memory data, in a fraction of the time of competitor libraries.\n", + "\n", + "## How to import PyKX\n", + "\n", + "To access PyKX and it's functions import it in your Python code as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": ["hide_code"] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + }, + "outputs": [], + "source": [ + "import pykx as kx\n", + "kx.q.system.console_size = [10, 80]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The shortening of the import name to `kx` is done for readability of code that uses PyKX and is the intended standard for the library. As such we recommend that you always use `import pykx as kx` when using the library.\n", + "\n", + "Below we load additional libraries used through this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The basic data structures of PyKX\n", + "\n", + "Central to your interaction with PyKX are the various data types that are supported by the library, fundamentally PyKX is built atop a fully featured functional programming language `q` which provides small footprint data structures that can be used in analytic calculations and the creation of highly performant databases. The types we show below are generated from Python equivalent types but as you will see through this notebook \n", + "\n", + "In this section we will describe the basic elements which you will come in contact with as you traverse the library and explain why/how they are different.\n", + "\n", + "### PyKX Atomic Types\n", + "\n", + "In PyKX an atom denotes a single irreducible value of a specific type, for example you may come across `pykx.FloatAtom` or `pykx.DateAtom` objects generated as follows which may have been generated as follows from an equivalent Pythonic representation. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.FloatAtom(1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import date\n", + "kx.DateAtom(date(2020, 1, 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PyKX Vector Types\n", + "\n", + "Similar to atoms, vectors are a data structure composed of a collection of multiple elements of a single specified type. These objects in PyKX along with lists described below form the basis for the majority of the other important data structures that you will encounter including dictionaries and tables.\n", + "\n", + "Typed vector objects provide significant benefits when it comes to the applications of analytics over Python lists for example. Similar to Numpy, PyKX gains from the underlying speed of it's analytic engine when operating on these strictly typed objects.\n", + "\n", + "Vector type objects are always 1-D and as such are/can be indexed along a single axis.\n", + "\n", + "In the following example we are creating PyKX vectors from common Python equivalent `numpy` and `pandas` objects." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.IntVector(np.array([1, 2, 3, 4], dtype=np.int32))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.toq(pd.Series([1, 2, 3, 4]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PyKX Lists\n", + "\n", + "A `List` in PyKX can loosely be described as an untyped vector object. Unlike vectors which are optimised for the performance of analytics, lists are more commonly used for storing reference information or matrix data.\n", + "\n", + "Unlike vector objects which are by definition 1-D in shape, lists can be ragged N-Dimensional objects. This makes them useful for the storage of some complex data structures but limits their performance when dealing with data-access/data modification tasks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.List([[1, 2, 3], [1.0, 1.1, 1.2], ['a', 'b', 'c']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PyKX Dictionaries\n", + "\n", + "A dictionary in PyKX is defined as a mapping between a direct key-value mapping, the list of keys and values to which they are associated must have the same count. While it can be considered as a key-value pair, it is physically stored as a pair of lists." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(kx.Dictionary({'x': [1, 2, 3], 'x1': np.array([1, 2, 3])}))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PyKX Tables\n", + "\n", + "Tables in PyKX are a first-class typed entity which live in memory. They can be fundamentally described as a collection of named columns implemented as a dictionary. This mapping construct means that tables in PyKX are column-oriented which makes analytic operations on specified columns much faster than would be the case for a relational database equivalent.\n", + "\n", + "Tables in PyKX come in many forms but the key table types are as follows\n", + "\n", + "- `pykx.Table` \n", + "- `pykx.KeyedTable`\n", + "- `pykx.SplayedTable`\n", + "- `pykx.PartitionedTable`\n", + "\n", + "In this section we will deal only with the first two of these which constitute specifically the in-memory data table types. As will be discussed in later sections `Splayed` and `Partitioned` tables are memory-mapped on-disk data structures, these are derivations of the `pykx.Table` and `pykx.KeyedTable` type objects.\n", + "\n", + "#### `pykx.Table`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(kx.Table([[1, 2, 'a'], [2, 3, 'b'], [3, 4, 'c']], columns = ['col1', 'col2', 'col3']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(kx.Table(data = {'col1': [1, 2, 3], 'col2': [2 , 3, 4], 'col3': ['a', 'b', 'c']}))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `pykx.KeyedTable`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.Table(data = {'x': [1, 2, 3], 'x1': [2, 3, 4], 'x2': ['a', 'b', 'c']}).set_index(['x'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other Data Types\n", + "\n", + "The above types outline the majority of the important type structures in PyKX but there are many others which you will encounter as you use the library, below we have outlined some of the important ones that you will run into through the rest of this notebook.\n", + "\n", + "#### `pykx.Lambda`\n", + "\n", + "A `pykx.Lambda` is the most basic kind of function within PyKX. They take between 0 and 8 parameters and are the building blocks for most analytics written by users when interacting with data from PyKX." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pykx_lambda = kx.q('{x+y}')\n", + "type(pykx_lambda)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pykx_lambda(1, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `pykx.Projection`\n", + "\n", + "Similar to [functools.partial](https://docs.python.org/3/library/functools.html#functools.partial), functions in PyKX can have some of their parameters fixed in advance, resulting in a new function, which is called a projection. When this projection is called, the fixed parameters are no longer required, and cannot be provided.\n", + "\n", + "If the original function had `n` total parameters, and it had `m` provided, the result would be a function (projection) that requires a user to input `n-m` parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "projection = kx.q('{x+y}')(1)\n", + "projection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "projection(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Accessing and creating PyKX objects\n", + "\n", + "Now that we have seen some of the PyKX object types that you will encounter, practically speaking how will they be created in real-world scenarios?\n", + "\n", + "### Creating PyKX objects from Pythonic data types\n", + "\n", + "One of the most common ways that PyKX data is generated is through conversions from equivalent Pythonic data types. PyKX natively supports conversions to and from the following common Python data formats.\n", + "\n", + "- Python\n", + "- Numpy\n", + "- Pandas\n", + "- PyArrow\n", + "\n", + "In each of the above cases generation of PyKX objects is facilitated through the use of the `kx.toq` PyKX function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pydict = {'a': [1, 2, 3], 'b': ['a', 'b', 'c'], 'c': 2}\n", + "kx.toq(pydict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nparray = np.array([1, 2, 3, 4], dtype = np.int32)\n", + "kx.toq(nparray)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pdframe = pd.DataFrame(data = {'a':[1, 2, 3], 'b': ['a', 'b', 'c']})\n", + "kx.toq(pdframe)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random data generation\n", + "\n", + "PyKX provides users with a module for the creation of random data of user specified PyKX types or their equivalent Python types. The creation of random data is useful in prototyping analytics and is used extensively within our documentation when creating test examples.\n", + "\n", + "As a first example you can generate a list of 1,000,000 random floating point values between 0 and 1 as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.random(1000000, 1.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If instead you wish to choose values randomly from a list, this can be facilitated by using the list as the second argument to your function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.random(5, [kx.LongAtom(1), ['a', 'b', 'c'], np.array([1.1, 1.2, 1.3])])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Random data does not only come in 1-Dimensional forms however and modifications to the first argument to be a list allow you to create multi-Dimensional PyKX Lists. The below examples are additionally using a PyKX trick where nulls/infinities can be used to generate random data across the full allowable range" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.random([2, 5], kx.GUIDAtom.null)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.random([2, 3, 4], kx.IntAtom.inf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, users can set the seed for the random data generation explicitly allowing users to have consistency over the generated objects. This can be completed globally or for individual function calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.seed(10)\n", + "kx.random.random(10, 2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.random.random(10, 2.0, seed = 10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running q code to generate data\n", + "\n", + "As mentioned in the introduction PyKX provides an entrypoint to the vector programming language q, as such users of PyKX can execute q code directly via PyKX within a Python session. This is facilitated through use of calls to `kx.q`.\n", + "\n", + "Create some q data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q('0 1 2 3 4')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q('([idx:desc til 5]col1:til 5;col2:5?1f;col3:5?`2)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apply arguments to a user specified function `x+y`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q('{x+y}', kx.LongAtom(1), kx.LongAtom(2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read data from a CSV file\n", + "\n", + "A lot of data that you run into for data analysis tasks comes in the form of CSV files, PyKX similar to Pandas provides a CSV reader called via `kx.q.read.csv`, in the following cell we will create a CSV to be read in using PyKX" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "\n", + "with open('pykx.csv', 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " field = [\"name\", \"age\", \"height\", \"country\"]\n", + " \n", + " writer.writerow(field)\n", + " writer.writerow([\"Oladele Damilola\", \"40\", \"180.0\", \"Nigeria\"])\n", + " writer.writerow([\"Alina Hricko\", \"23\", \"179.2\", \"Ukraine\"])\n", + " writer.writerow([\"Isabel Walter\", \"50\", \"179.5\", \"United Kingdom\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.read.csv('pykx.csv', types = {'age': kx.LongAtom, 'country': kx.SymbolAtom})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.remove('pykx.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Querying external Processes via IPC\n", + "\n", + "One of the most common usage patterns in organisations with access to data in kdb+/q you will encounter is to query this data from an external server process infrastructure. In the example below we assume that you have q installed in addition to PyKX, see [here](https://kx.com/kdb-insights-personal-edition-license-download/) to install q alongside the license access for PyKX.\n", + "\n", + "First we set up a q/kdb+ server setting it on port 5050 and populating it with some data in the form of a table `tab`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess\n", + "import time\n", + "\n", + "try:\n", + " proc = subprocess.Popen(\n", + " ('q', '-p', '5000'),\n", + " stdin=subprocess.PIPE,\n", + " stdout=subprocess.DEVNULL,\n", + " stderr=subprocess.DEVNULL,\n", + " )\n", + " time.sleep(2)\n", + "except:\n", + " raise kx.QError('Unable to create q process on port 5000')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once a q process is available you can establish a connection to it for synchronous query execution as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "conn = kx.SyncQConnection(port = 5000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can now run q commands against the q server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "conn('tab:([]col1:100?`a`b`c;col2:100?1f;col3:100?0Ng)')\n", + "conn('select from tab where col1=`a')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or use the PyKX query API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "conn.qsql.select('tab', where=['col1=`a', 'col2<0.3'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or use PyKX's context interface to run SQL server side if it's available to you" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "conn('\\l s.k_')\n", + "conn.sql('SELECT * FROM tab where col2>=0.5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally the q server used for this demonstration can be shut down" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "proc.stdin.close()\n", + "proc.kill()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running analytics on objects in PyKX\n", + "\n", + "Like many Python libraries including Numpy and Pandas PyKX provides a number of ways that it's data can be used with analytics defined internal to the library and which you have self generated.\n", + "\n", + "### Using in-built methods on PyKX Vectors\n", + "\n", + "When you are interacting with PyKX Vectors you may wish to gain insights into these objects through the application of basic analytics such as calculation of the `mean`/`median`/`mode` of the vector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "q_vector = kx.random.random(1000, 10.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "q_vector.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "q_vector.max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above is useful for basic analysis but will not be sufficient for more bespoke analytics on these vectors, to allow you more control over the analytics run you can also use the `apply` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def bespoke_function(x, y):\n", + " return x*y\n", + "\n", + "q_vector.apply(bespoke_function, 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using in-built methods on PyKX Tables\n", + "\n", + "In addition to the vector processing capabilities of PyKX your ability to operate on Tabular structures is also important. Highlighted in greater depth within the Pandas-Like API documentation [here](../user-guide/advanced/Pandas_API.ipynb) these methods allow you to apply functions and gain insights into your data in a way that is familiar.\n", + "\n", + "In the below example you will use combinations of the most commonly used elements of this Table API operating on the following table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "N = 1000000\n", + "example_table = kx.Table(data = {\n", + " 'sym' : kx.random.random(N, ['a', 'b', 'c']),\n", + " 'col1' : kx.random.random(N, 10.0),\n", + " 'col2' : kx.random.random(N, 20)\n", + " }\n", + ")\n", + "example_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can search for and filter data within your tables using `loc` similarly to how this is achieved by Pandas as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_table.loc[example_table['sym'] == 'a']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This behavior also is incorporated when retrieving data from a table through the `__get__` method as you can see here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_table[example_table['sym'] == 'b']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can additionally set the index columns of the table, when dealing with PyKX tables this converts the table from a `pykx.Table` object to a `pykx.KeyedTable` object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_table.set_index('sym')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional to basic data manipulation such as index setting you also get access to analytic capabilities such as the application of basic data manipulation operations such as `mean` and `median` as demonstrated here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('mean:')\n", + "print(example_table.mean(numeric_only = True))\n", + "\n", + "print('median:')\n", + "print(example_table.median(numeric_only = True))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can make use of the `groupby` method which groups the PyKX tabular data which can then be used for analytic application.\n", + "\n", + "In your first example let's start by grouping the dataset based on the `sym` column and then calculating the `mean` for each column based on their `sym`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_table.groupby('sym').mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an extension to the above groupby you can now consider a more complex example which is making use of `numpy` to run some calculations on the PyKX data, you will see later that this can be simplified further in this specific use-case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def apply_func(x):\n", + " nparray = x.np()\n", + " return np.sqrt(nparray).mean()\n", + "\n", + "example_table.groupby('sym').apply(apply_func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time-series specific joining of data can be completed using `merge_asof` joins. In this example a number of tables with temporal information namely a `trades` and `quotes` table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n", + " ],\n", + " \"ticker\": [\n", + " \"GOOG\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"GOOG\",\n", + " \"AAPL\",\n", + " \"GOOG\",\n", + " \"MSFT\"\n", + " ],\n", + " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n", + " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n", + "})\n", + "quotes = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n", + " ],\n", + " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n", + " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n", + " \"quantity\": [75, 155, 100, 100, 100]\n", + "})\n", + "\n", + "print('trades:')\n", + "display(trades)\n", + "print('quotes:')\n", + "display(quotes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When applying the asof join you can additionally used named arguments to ensure that it is possible to make a distinction between the tables that the columns originate. In this case suffixing with `_trades` and `_quotes`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trades.merge_asof(quotes, on='time', suffixes=('_trades', '_quotes'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using PyKX/q native functions\n", + "\n", + "While use of the Pandas-Like API and methods provided off PyKX Vectors provides an effective method of applying analytics on PyKX data the most efficient and performant way you can run analytics on your data is through the use of the PyKX/q primitives which are available through the `kx.q` module.\n", + "\n", + "These include functionality for the calculation of moving averages, application of asof/window joins, column reversal etc. A full list of the available functions and some examples of their usage can be found [here](../api/pykx-execution/q.md).\n", + "\n", + "Here are a few examples of usage of how you can use these functions, broken into sections for convenience\n", + "\n", + "#### Mathematical functions\n", + "\n", + "##### mavg\n", + "\n", + "Calculate a series of average values across a list using a rolling window" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.mavg(10, kx.random.random(10000, 2.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### cor\n", + "\n", + "Calculate the correlation between two lists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.cor([1, 2, 3], [2, 3, 4])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.cor(kx.random.random(100, 1.0), kx.random.random(100, 1.0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### prds\n", + "\n", + "Calculate the cumulative product across a supplied list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.prds([1, 2, 3, 4, 5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Iteration functions\n", + "\n", + "##### each\n", + "\n", + "Supplied both as a standalone primitive and as a method for PyKX Lambdas `each` allows you to pass individual elements of a PyKX object to a function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.each(kx.q('{prd x}'), kx.random.random([5, 5], 10.0, seed=10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q('{prd x}').each(kx.random.random([5, 5], 10.0, seed=10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Table functions\n", + "\n", + "##### meta\n", + "\n", + "Retrieval of metadata information about a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "qtab = kx.Table(data = {\n", + " 'x' : kx.random.random(1000, ['a', 'b', 'c']).grouped(),\n", + " 'y' : kx.random.random(1000, 1.0),\n", + " 'z' : kx.random.random(1000, kx.TimestampAtom.inf)\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.meta(qtab)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### xasc\n", + "\n", + "Sort the contents of a specified column in ascending order" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kx.q.xasc('z', qtab)" + ] + } + ], + "metadata": { + "file_extension": ".py()", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/getting-started/installing.md b/docs/getting-started/installing.md index f044603..effd713 100644 --- a/docs/getting-started/installing.md +++ b/docs/getting-started/installing.md @@ -131,12 +131,26 @@ The following steps outline the process by which a user can gain access to and i ### License installation using environment variables +To provide environment specific flexibility there are two methods by which users can install a license using environment variables. In both cases this method is flexible to the installation of both `kc.lic` and `k4.lic` versions of a license. + +#### Using a supplied license file directly + 1. Visit https://kx.com/kdb-insights-personal-edition-license-download/ or https://kx.com/kdb-insights-commercial-evaluation-license-download/ and fill in the attached form following the instructions provided. 2. On receipt of an email from KX providing access to your license download the license file and save to a secure location on your computer. 3. Set an environment variable on your computer pointing to the folder containing the license file (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). * Variable Name: `QLIC` * Variable Value: `/user/path/to/folder` +#### Using the base64 encoded license content + +1. Visit https://kx.com/kdb-insights-personal-edition-license-download/ or https://kx.com/kdb-insights-commercial-evaluation-license-download/ and fill in the attached form following the instructions provided. +2. On receipt of an email from KX providing access to your license copy the base64 encoded contents of your license provided in plain-text within the email +3. Set an environment variable `KDB_LICENSE_B64` on your computer pointing with the value copied in step 2 (instructions for setting environment variables on PyKX supported operating systems can be found [here](https://chlee.co/how-to-setup-environment-variables-for-windows-mac-and-linux/). + * Variable Name: `KDB_LICENSE_B64` + * Variable Value: `` + +If looking to make use of a `k4.lic` you can do so by setting the base64 encoded content of your file as the environment variable `KDB_K4LICENSE_B64`. + ## Supported Environments KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX installed from wheel files. Support for user-built installations of PyKX (e.g. built from the source distribution) is only provided on a best-effort basis. Currently, PyKX provides wheels for the following environments: @@ -149,27 +163,47 @@ KX only officially supports versions of PyKX built by KX, i.e. versions of PyKX ### Python Dependencies +#### Required Python dependencies + PyKX depends on the following third-party Python packages: -- `pandas>=1.2` -- `numpy~=1.22` +- `numpy~=1.20; python_version=='3.7'` +- `numpy~=1.22; python_version<'3.11', python_version>'3.7'` +- `numpy~=1.23.2; python_version>='3.11'` +- `pandas>=1.2, < 2.2.0` - `pytz>=2022.1` - `toml~=0.10.2` They are installed automatically by `pip` when PyKX is installed. -### Optional Python Dependencies +The following provides a breakdown of how these libraries are used within PyKX + +- [Numpy](https://pypi.org/project/numpy) is used by PyKX when converting data from PyKX objects to numpy equivalent array/recarray style objects, additionally low level integration allowing direct calls to numpy functions such as `numpy.max` with PyKX objects relies on the numpy Python API. +- [Pandas](https://pypi.org/project/pandas) is used by PyKX when converting PyKX data to Pandas Series/DataFrame equivalent objects, additionally when converting data to PyArrow data formats as supported by the optional dependencies below Pandas is used as an intermendiary data format. +- [pytz](https://pypi.org/project/pytz/) is used by PyKX when converting data with timezone information to PyKX objects in order to ensure that the timezone offsets are accurately applied. +- [toml](https://pypi.org/project/toml/) is used by PyKX for configuration parsing, in particular when users make use of `.pykx-config` files for configuration management as outlined [here](../user-guide/configuration.md). + + +#### Optional Python Dependencies - `pyarrow>=3.0.0`, which can be included by installing the `pyarrow` extra, e.g. `pip install pykx[pyarrow]`. -- `find-libpython~=0.2`, which can be included by installing the `debug` extra, e.g. `pip install pykx[debug]`. This dependency can be used to help find `libpython` in the scenario that `pykx.q` fails to find it. +- `find-libpython~=0.2`, which can be included by installing the `debug` extra, e.g. `pip install pykx[debug]`. +- `ast2json~=0.3`, which is required for KX Dashboards Direct integration and can be installed with the `dashboards` extra, e.g. `pip install pykx[dashboards]` +- `dill>=0.2`, which is required for the Beta feature `Remote Functions` can be installed via pip with the `beta` extra, e.g. `pip install pykx[beta]` !!! Warning Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. `pyarrow` is supported Python 3.8-3.10 but remains in Beta for Python 3.11. +The following provides a breakdown of how these libraries are used within PyKX + +- [PyArrow](https://pypi.org/project/pyarrow) is used by PyKX for the conversion of PyKX object to and from their PyArrow equivalent table/array objects. +- [find-libpython](https://pypi.org/project/find-libpython) can be used by developers using PyKX to source the `libpython.{so|dll|dylib}` file required by [PyKX under q](../pykx-under-q/intro.md). + ### Optional Non-Python Dependencies - `libssl` for TLS on [IPC connections](../api/ipc.md). +- `libpthread` on Linux/MacOS when using the `PYKX_THREADING` environment variable. ### Windows Dependencies diff --git a/docs/getting-started/q_magic_command.ipynb b/docs/getting-started/q_magic_command.ipynb index 9411f67..aa2c079 100644 --- a/docs/getting-started/q_magic_command.ipynb +++ b/docs/getting-started/q_magic_command.ipynb @@ -1,5 +1,17 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": ["hide_code"] + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -7,8 +19,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n", "import pykx as kx" ] }, @@ -61,12 +71,24 @@ ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`print` is the default method called on returned objects. To use `display` you can instead pass `--display`" - ] - }, + "cell_type": "markdown", + "id": "89ec26e4", + "metadata": {}, + "source": [ + "#### Execution options\n", + "\n", + "Execution options can also be included after `%%q`.\n", + "\n", + "Here is the list of currently supported execution options.\n", + "\n", + "```\n", + "--debug: prints the q backtrace before raising a QError\n", + " if the cell errors\n", + "--display: calls display rather than the default print\n", + " on returned objects\n", + "```\n" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 233b9f4..aebc930 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -50,26 +50,6 @@ x x1 ')) ``` -### Creation of PyKX objects using q - -Generation of PyKX objects using q can be completed through calling `kx.q` - -```python ->>> kx.q('10 20 30') -pykx.LongVector(pykx.q('10 20 30')) - ->>> kx.q('([]5?1f;5?`4;5?0Ng)') -pykx.Table(pykx.q(' -x x1 x2 ---------------------------------------------------- -0.439081 ncej 8c6b8b64-6815-6084-0a3e-178401251b68 -0.5759051 jogn 5ae7962d-49f2-404d-5aec-f7c8abbae288 -0.5919004 ciha 5a580fb6-656b-5e69-d445-417ebfe71994 -0.8481567 hkpb ddb87915-b672-2c32-a6cf-296061671e9d -0.389056 aeaj 580d8c87-e557-0db1-3a19-cb3a44d623b1 -')) -``` - ### Creation of PyKX objects from Python data types Generation of PyKX objects from Python, Numpy, Pandas and PyArrow objects can be completed as follows using the `kx.toq` method. @@ -113,6 +93,26 @@ col1 col2 ')) ``` +### Creation of PyKX objects using q + +Generation of PyKX objects using q can be completed through calling `kx.q` + +```python +>>> kx.q('10 20 30') +pykx.LongVector(pykx.q('10 20 30')) + +>>> kx.q('([]5?1f;5?`4;5?0Ng)') +pykx.Table(pykx.q(' +x x1 x2 +--------------------------------------------------- +0.439081 ncej 8c6b8b64-6815-6084-0a3e-178401251b68 +0.5759051 jogn 5ae7962d-49f2-404d-5aec-f7c8abbae288 +0.5919004 ciha 5a580fb6-656b-5e69-d445-417ebfe71994 +0.8481567 hkpb ddb87915-b672-2c32-a6cf-296061671e9d +0.389056 aeaj 580d8c87-e557-0db1-3a19-cb3a44d623b1 +')) +``` + ## Interacting with PyKX Objects PyKX objects can be interacted with in a variety of ways, through indexing using Pythonic syntax, passing PyKX objects to q/numpy functions, querying via SQL/qSQL syntax or through the use of q functionality via the context interface. Each of these is described in more depth throughout this documentation but examples of each are provided here @@ -121,7 +121,7 @@ PyKX objects can be interacted with in a variety of ways, through indexing using * Create a PyKX list and interact with the list using indexing and slices. ```python - >>> qarray = kx.toq([random() for _ in range(10)], kx.FloatVector) + >>> qarray = kx.random.random(10, 1.0) >>> qarray pykx.FloatVector(pykx.q('0.391543 0.08123546 0.9367503 0.2782122 0.2392341 0.1508133 0.1567317 0.9785 ..')) >>> qarray[1] @@ -143,11 +143,12 @@ PyKX objects can be interacted with in a variety of ways, through indexing using * Create a PyKX table and manipulate using Pythonic syntax ```python + >>> N = 100 >>> qtable = kx.Table( data={ - 'x': [random() for _ in range(100)], - 'x1': [random() * 5 for _ in range(100)], - 'x2': [['a', 'b', 'c'][randint(0, 2)] for _ in range(100)] + 'x': kx.random.random(N, 1.0), + 'x1': 5 * kx.random.random(N, 1.0), + 'x2': kx.random.random(N, ['a', 'b', 'c']) } ) >>> qtable @@ -216,16 +217,16 @@ PyKX objects can be interacted with in a variety of ways, through indexing using * Pass a PyKX array objects to a Numpy functions ```python - >>> qarray1 = kx.toq([random() for _ in range(10)], kx.FloatVector) + >>> qarray1 = kx.random.random(10, 1.0) >>> qarray1 pykx.FloatVector(pykx.q('0.7880561 0.9677446 0.9325539 0.6501981 0.4837422 0.5338642 0.5156039 0.31358..')) - >>> qarray2 = kx.toq([random() for _ in range(10)], kx.FloatVector) + >>> qarray2 = kx.random.random(10, 1.0) >>> qarray2 pykx.FloatVector(pykx.q('0.04164985 0.6417901 0.1608836 0.691249 0.4832847 0.6339534 0.4614883 0.06373..')) >>> np.max(qarray1) 0.9677445779088885 - >>> np.sum(kx.toq([randint(0, 10) for _ in range(10)])) + >>> np.sum(kx.random.random(10, 10)) 43 >>> np.add(qarray1, qarray2) pykx.FloatVector(pykx.q('0.8297059 1.609535 1.093438 1.341447 0.9670269 1.167818 0.9770923 0.3773123 1..')) @@ -234,11 +235,12 @@ PyKX objects can be interacted with in a variety of ways, through indexing using * Query using SQL/qSQL ```python + >>> N = 100 >>> qtable = kx.Table( data={ - 'x': [['a', 'b', 'c'][randint(0, 2)] for _ in range(100)] - 'x1': [random() for _ in range(100)], - 'x2': [random() * 5 for _ in range(100)], + 'x': kx.random.random(N, ['a', 'b', 'c'], + 'x1': kx.random.random(N, 1.0), + 'x2': 5 * kx.random.random(N, 1.0), } ) >>> qtable[0:5] @@ -310,6 +312,9 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt * Convert PyKX objects to Numpy ```python + >>> import numpy as np + >>> random = np.random.random + >>> randint = np.random.randint >>> qvec = kx.q('10?5') >>> qvec.np() array([0, 2, 4, 1, 2, 1, 0, 1, 0, 1]) @@ -360,7 +365,7 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt * Convert PyKX objects to PyArrow ```python - >>> qvec = kx.q('10?5') + >>> qvec = kx.random.random(10, 5) >>> qvec.pa() [ @@ -388,5 +393,5 @@ Objects generated via the PyKX library can be converted where reasonable to `Pyt ## Next steps -- [Interface Overview Notebook](interface_overview.ipynb) +- [Interface Overview Notebook](PyKX%20Introduction%20Notebook.ipynb#ipc-communication) - [PyKX User Guide](../user-guide/index.md) diff --git a/docs/pykx-under-q/api.md b/docs/pykx-under-q/api.md index 23d0b3f..2abee19 100644 --- a/docs/pykx-under-q/api.md +++ b/docs/pykx-under-q/api.md @@ -131,6 +131,13 @@ x x1 x2 -------------------------------------------------- 0.439081 49f2404d-5aec-f7c8-abba-e2885a580fb6 mil 0.5759051 656b5e69-d445-417e-bfe7-1994ddb87915 igf + +// Enter PyKX console setting Python objects using PyKX +q).pykx.console[] +>>> a = list(range(5)) +>>> quit() +q).pykx.eval["a"]` +0 1 2 3 4 ``` ## `.pykx.debugInfo` @@ -559,6 +566,35 @@ q).pykx.repr til 5 "0 1 2 3 4" ``` +## `.pykx.safeReimport` + + +_Isolated execution of a q function which relies on importing PyKX_ + +```q +.pykx.safeReimport[qFunction] +``` + +**Parameters:** + +name | type | description +-------------|------------|------------- +`qFunction` | `function` | A function which is to be run following unsetting of PyKX environment variables and prior to their reset + +**Returns:** + +type | description +-------|------------ +`any` | On successful execution this function will return the result of the executed function + +**Example:** + +```q +q)\l pykx.q +q).pykx.safeReimport[{system"python -c 'import pykx as kx'";til 5}] +0 1 2 3 4 +``` + ## `.pykx.set` @@ -713,12 +749,14 @@ type | description | [Python](https://docs.python.org/3/library/datatypes.html) | `"py", "python", "Python"` | [PyArrow](https://arrow.apache.org/docs/python/index.html) | `"pa", "pyarrow", "PyArrow"` | [K](../api/pykx-q-data/type_conversions.md) | `"k", "q"` | + raw | `"raw"` | + default | `"default"` | ```q -// Default value on startup is "np" +// Default value on startup is "default" q).pykx.util.defaultConv -"np" +"default" // Set default value to Pandas q).pykx.setdefault["Pandas"] @@ -726,6 +764,45 @@ q).pykx.util.defaultConv "pd" ``` +## `.pykx.todefault` + + +_Tag a q object to indicate it should use the PyKX default conversion when called in Python_ + +```q +.pykx.todefault[qObject] +``` + +**Parameters:** + +name | type | description | +----------|---------|-------------| +`qObject` | `any` | A q object which is to be converted to a default form in Python. | + +**Return:** + +type | description +-------------|------------ +`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a default object. | + +!!! Note + The `todefault` conversion is used to match embedPy conversion logic, in particular it converts q lists to Python lists when dealing with contiguous datatypes rather than to nested single value array types. Additionally it converts q tables to Pandas DataFrames + +```q +// Denote that a q object once passed to Python should be managed as a default object +// in this case a q list is converted to numpy +q).pykx.todefault til 10 +enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] + +// Pass a q list to Python treating the Python object as PyKX default +q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault (til 10;til 10) + + +// Pass a q Table to Python by default treating the Python table as a Pandas DataFrame +q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault ([]til 10;til 10) + +``` + ## `.pykx.tok` diff --git a/docs/pykx-under-q/intro.md b/docs/pykx-under-q/intro.md index 94e4dee..2a6568d 100644 --- a/docs/pykx-under-q/intro.md +++ b/docs/pykx-under-q/intro.md @@ -165,13 +165,13 @@ Foreign objects can be stored in variables just like any other q datatype, or as Foreign objects cannot be directly operated on in q. Instead, Python objects are typically represented as PyKX objects, which wrap the underlying foreign objects. This provides the ability to get and set attributes, index, call or convert the underlying foreign object to a q object. -Use `.pykx.wrap` to create an PyKX object from a foreign object. +Use `.pykx.wrap` to create a PyKX object from a foreign object. ```q q)x foreign q)p:.pykx.wrap x -q)p /how an PyKX object looks +q)p /how a PyKX object looks {[f;x].pykx.util.pykx[f;x]}[foreign]enlist ``` @@ -187,7 +187,7 @@ function | argument | example ### Converting data -Given `obj`, an PyKX object representing Python data, we can get the underlying data (as foreign or q) using +Given `obj`, a PyKX object representing Python data, we can get the underlying data (as foreign or q) using ```q obj`. / get data as foreign @@ -214,7 +214,7 @@ There is one important exception to this. When calling Python functions, methods ### Getting attributes and properties -Given `obj`, an PyKX object representing a Python object, we can get an attribute or property directly using +Given `obj`, a PyKX object representing a Python object, we can get an attribute or property directly using ```q obj`:attr / equivalent to obj.attr in Python @@ -248,7 +248,7 @@ q)obj[`:y]` ### Setting attributes and properties -Given `obj`, an PyKX object representing a Python object, we can set an attribute or property directly using +Given `obj`, a PyKX object representing a Python object, we can set an attribute or property directly using ```q obj[:;`:attr;val] / equivalent to obj.attr=val in Python @@ -271,7 +271,7 @@ q)obj[`:y]` ### Indexing -Given `lst`, an PyKX object representing an indexable container object in Python, we can access the element at index `i` using +Given `lst`, a PyKX object representing an indexable container object in Python, we can access the element at index `i` using ```q lst[@;i] / equivalent to lst[i] in Python @@ -307,7 +307,7 @@ q)lst` ### Getting methods -Given `obj`, an PyKX object representing a Python object, we can access a method directly using +Given `obj`, a PyKX object representing a Python object, we can access a method directly using ```q obj`:method / equivalent to obj.method in Python @@ -334,7 +334,7 @@ Using the function API, PyKX objects can be called directly (returning PyKX obje Users explicitly specify the return type as q or foreign, the default is as a PyKX object. -Given `func`, an `PyKX` object representing a callable Python function or method, we can carry out the following operations: +Given `func`, a `PyKX` object representing a callable Python function or method, we can carry out the following operations: ```q func / func is callable by default (returning PyKX) @@ -432,7 +432,7 @@ q)oarg:.pykx.eval"10" q)oarg` 10 q)ofunc:.pykx.eval["lambda x:2+x";<] -q)ofunc[1]` +q)ofunc[1] 3 q)ofunc oarg 12 @@ -691,7 +691,7 @@ func(None) #call with argument None !!! warning "PyKX function called with `::` calls Python with no arguments" - Although `::` in q corresponds to `None` in Python, if an PyKX function is called with `::` as its only argument, the corresponding Python function will be called with _no_ arguments. + Although `::` in q corresponds to `None` in Python, if a PyKX function is called with `::` as its only argument, the corresponding Python function will be called with _no_ arguments. To call a Python function with `None` as its sole argument, retrieve `None` as a foreign object in q and pass that as the argument. diff --git a/docs/pykx-under-q/known_issues.md b/docs/pykx-under-q/known_issues.md new file mode 100644 index 0000000..d463fcb --- /dev/null +++ b/docs/pykx-under-q/known_issues.md @@ -0,0 +1,131 @@ +# PyKX under q known issues + +PyKX aims to make q and Python operate as seamlessly as possible together. +However due to differences in the languages there are some special cases to be aware of when using the interface. + +## Passing special values to PyKX objects + +PyKX under q uses certain special values to control how objects are returned/converted. When you need to pass these special values as parameters some specific steps must be followed. + +### Return control values `<`, `>`, and `*` + +Using the [PyKX function API](intro.md#pykx-function-api), PyKX objects can be called directly (returning PyKX objects) or declared callable returning q or `foreign` data. + +Users explicitly specify the return type as q or foreign, the default is as a PyKX object. + +Given `func`, a object representing a callable Python function or method, we can carry out the following operations: + +```q +func / func is callable by default (returning PyKX) +func arg / call func(arg) (returning PyKX) +func[*] / declare func callable (returning PyKX) +func[*]arg / call func(arg) (returning PyKX) +func[*;arg] / equivalent +func[<] / declare func callable (returning q) +func[<]arg / call func(arg) (returning q) +func[<;arg] / equivalent +func[>] / declare func callable (returning foreign) +func[>]arg / call func(arg) (returning foreign) +func[>;arg] / equivalent +``` + +**Chaining operations** Returning another PyKX object from a function or method call, allows users to chain together sequences of operations. +We can also chain these operations together with calls to `.pykx.import`, `.pykx.get` and `.pykx.eval`. + +Due to this usage of `<`, `>`, and `*` as control characters passing them as arguments to functions must be managed more carefully. + +```q +func // Avoid passing the function without specifying a return type if you need to pass *,<,> as possible arguments +func arg // Avoid passing the argument without specifying a return type if you need to pass *,<,> as possible arguments +``` + +Do attach a return type to the function as you define it: + +```q +q)f:.pykx.eval["lambda x: x";<] // Specify < to return output as q object +q)f[*] // *,<,> can now be passed as a arguments successfully +* +``` + +### Conversion control values `` ` `` and `` `. `` + +When [converting data](intro.md#converting-data), given a PyKX object `obj` representing Python data, we can get the underlying data (as foreign or q) using: + +```q +obj`. / get data as foreign +obj` / get data as q +``` + +For example: + +```q +q)x:.pykx.eval"(1,2,3)" +q)x +{[f;x].pykx.util.pykx[f;x]}[foreign]enlist +q)x`. +foreign +q)x` +1 2 3 +``` + +Due to this usage of `` ` `` and `` `. `` as control characters passing them as arguments to functions must be managed more carefully: + +.i.e + +```q +q).pykx.eval["lambda x: x"][`]` +'Provided foreign object is not a Python object +``` + +To avoid this you can define the return type using `<` or `>` in advance: + +```q +q).pykx.eval["lambda x: x";<][`] +` +``` + +Or wrap the input in `.pykx.tok`: + +```q +q).pykx.eval["lambda x: x"][.pykx.tok[`]]` +` +``` + +### Default parameter `::` + +In q, functions take between 1-8 parameters. This differs from Python. + +When one calls a q function with empty brackets `[]` a default value is still passed. +This value is `::` the generic null. + +```q +q)(::)~{x}[] //Showing x parameter receives the generic null :: +1b +``` + +Due to this difference with Python, using `::` as an argument to PyKX functions has some difficulties: + +```q +q)f:.pykx.eval["lambda x: x";<] +q)f[::] // The Python cannot tell the difference between f[] and f[::] as they resolve to the same input +'TypeError("() missing 1 required positional argument: 'x'") + [0] f[::] +``` + +You can avoid this by wrapping the input in `.pykx.tok`: + +```q +q)(::)~f[.pykx.tok[::]] +1b +``` + +Note Python functions with 0 parameters run without issue as they ignore the passed `(::)`: + +```q +p)def noparam():return 7 +q)f:.pykx.get[`noparam;<] +q)f[] +7 +q)f[::] / equivalent +7 +``` \ No newline at end of file diff --git a/docs/pykx-under-q/upgrade.md b/docs/pykx-under-q/upgrade.md index a9d1967..f451454 100644 --- a/docs/pykx-under-q/upgrade.md +++ b/docs/pykx-under-q/upgrade.md @@ -30,18 +30,6 @@ EmbedPy does not allow users to discern between q string and symbol types when c 1b ``` - -### Python object type support - -EmbedPy contains a fundamental limitation with respect to the data formats that are supported when converting between q and Python. Namely that all q objects when passed to Python functions use the analogous Python/Numpy representation. This limitation means that a user of embedPy must handle their own data conversions when handling Pandas or PyArrow objects. - -PyKX natively supports data conversions from q to Python, Numpy, Pandas and PyArrow and as such can support workflows which previously required users to manually control these conversions, for example: - -```q -q).pykx.print .pykx.eval["lambda x:type(x)"] .pykx.topd ([]10?1f) - -``` - ## Functionality mapping The following table describes the function mapping from PyKX to embedPy for various elements of the supported functionality within embedPy, where a mapping supported this will be explicitly noted. Where workarounds exist these are additionally noted. @@ -70,3 +58,83 @@ The following table describes the function mapping from PyKX to embedPy for vari | Convert a q object to a Python foreign object | Unsupported | `.p.q2py` | | Create a Python closure using a q function | Unsupported | `.p.closure` | | Create a Python generator using a q function | Unsupported | `.p.generator` | + +## PyKX under q benefits over embedPy + +PyKX under q provides a number of key functional benefits over embedPy alone when considering the generation of workloads that integrate Python and q code. The following are the key functional/feature updates which provide differentiation between the two libraries + +1. Flexibility in supported data formats and conversions +2. Python code interoperability +3. Access to PyKX in it's Python first modality + +### Flexibility in supported data formats and conversions + +EmbedPy contains a fundamental limitation with respect to the data formats that are supported when converting between q and Python. Namely that all q objects when passed to Python functions use the analogous Python/NumPy representation. This limitation means that a user of embedPy who require data to be in a Pandas/PyArrow format need to handle these conversions manually. + +As PyKX supports Python, NumPy, Pandas and PyArrow data formats this improves the flexibility of workflows that can be supported, for example PyKX will by default convert q tables to Pandas DataFrames when passed to a Python function as follows + +```q +q).pykx.eval["lambda x:type(x)"] ([]10?1f;10?1f) + +``` + +Additional to this a number of helper functions are provided to allow users to selectively choose the target data formats which are used when passing to multivariable functions, for example + +```q +q).pykx.eval["lambda x, y:print(type(x), type(y))"][.pykx.tonp ([]10?1f);.pykx.topd til 10]; + +``` + +This flexibility makes integration with custom libraries easier to manage. + +### Python interoperability + +For users that are working to integrate tightly their Python code and q code prototyping Python functions for use within embedPy could be difficult. Users are required when defining their functions either to provide them as a string with appropriate tab/indent usage to a `.p.e` as follows + +```q +q).p.e"def func(x):\n\treturn x+1" +q)pyfunc:.pykx.get[`func;<] +q)pyfunc[2] +3 +``` + +Alternatively users could create a `.py`/`.p` file and access their functions using ```.pykx.import[`file_name]``` or `\l file_name.p` respectively. + +While these solutions provide provide a method of integrating your Python code they are not intuitive to a user versed both in Python and q. + +PyKX provides a function `.pykx.console` which allows users within a q session to run a Python "console" to generate their functions/variables for use within their q code. The following example uses PyKX 2.3.0. + +```q +q).pykx.console[] +>>> def func(x): +... return x+1 +... +>>> quit() +q)pyfunc:.pykx.get[`func;<] +q)pyfunc[2] +3 +``` + +This change allows users to iterate development of their analytics faster than when operating with embedPy. + +### Access to PyKX in it's Python first modality + +Following on from the Python interoperability section above access to PyKX itself as a Python module provides significant flexibility to users when developing analytics for use within a q session. + +With embedPy when q/kdb+ data is passed to Python for the purposes of completing "Python first" analysis there is a requirement that that analysis fully uses Python libraries that are available to a user and can not get performance benefits from having access to q/kdb+. + +Take for example a case where a user wishes to run a Python function which queries a table available in their q process using SQL and calculates the mean value for all numeric columns. + +```q +q)tab:([]100?`a`b`c;100?1f;100?1f;100?0Ng) +q).pykx.console[] +>>> import pykx as kx +>>> def pyfunction(x): +... qtab = kx.q.sql('SELECT * from tab where x=$1', x) +... return qtab.mean(numeric_only=True) +>>> quit() +q)pyfunc:.pykx.get[`pyfunction;<] +q)pyfunc `a +x1| 0.5592623 +x2| 0.486176 +``` diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index 53073e8..59d61e6 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -4,15 +4,426 @@ The changelog presented here outlines changes to PyKX when operating within a Python environment specifically, if you require changelogs associated with PyKX operating under a q environment see [here](./underq-changelog.md). -## PyKX 2.2.0 +!!! Warning + + Currently PyKX is not compatible with Pandas 2.2.0 or above as it introduced breaking changes which cause data to be cast to the incorrect type. + +## PyKX 2.3.1 #### Release Date -2023-11-09 +2024-02-07 + +### Fixes and Improvements + +- Python functions saved to q would error if passed `''` or `'.'`. These now pass without issue. + + === "Behavior prior to change" + + ```python + >>> def func(n=2): + ... return n + ... + >>> kx.q['func']= func + >>> kx.q('func', '') + Traceback (most recent call last): + File "", line 1, in + File "/home/rocuinneagain/.local/lib/python3.10/site-packages/pykx/embedded_q.py", line 227, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: Provided foreign object is not a Python object + >>> kx.q('func', '.') + Traceback (most recent call last): + File "", line 1, in + File "/home/rocuinneagain/.local/lib/python3.10/site-packages/pykx/embedded_q.py", line 227, in __call__ + return factory(result, False) + File "pykx/_wrappers.pyx", line 493, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 486, in pykx._wrappers.factory + pykx.exceptions.QError: rank + ``` + + === "Behavior post change" + + ```python + >>> def func(n=2): + ... return n + ... + >>> kx.q['func']= func + >>> kx.q('func', '') + pykx.SymbolAtom(pykx.q('`')) + >>> kx.q('func', '.') + pykx.SymbolAtom(pykx.q('`.')) + ``` + +- Changed `Table.rename()` to ignore any `columns` values that are of the wrong type instead of throwing an unhelpful error. + + === "Behavior prior to change" + + ```python + >>> key_tab.rename({0:'PolicyID'}, axis = 1) + ValueError('nyi') + ``` + + === "Behavior post change" + + ```python + >>> key_tab.rename({0:'PolicyID'}, axis = 1) + pykx.KeyedTable(pykx.q(' + idx| x y + ---| --- + 0 | 0 3 + 1 | 1 2 + 2 | 2 1 + ')) + ``` + +- Improved upon the quality of `Table.rename()` error messages and documentation on the function. +- PyKX would error with `_get_config_value() missing 1 required positional argument: 'default'` on import if a license was not found since 2.3.0. Now correctly opens the license walkthrough. +- Pandas 2.2.0 introduced breaking changes which effect PyKX. PyKX dependencies have been updated to `pandas>=1.2, < 2.2.0` until these are resolved. Data casting behavior leads to an unexpected datatype being returned: + + === "Behavior with Pandas <2.2.0" + + ```python + >>> pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype()).to_numpy() + array([1, , 3], dtype=object) + + >>> kx.K(pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype())) + pykx.LongVector(pykx.q('1 0N 3')) + ``` + + === "Behavior with Pandas >=2.2.0" + + ```python + >>> pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype()).to_numpy() + array([ 1., nan, 3.]) + + >>> kx.K(pd.Series([1, pd.NA, 3], dtype=pd.Int64Dtype())) + pykx.FloatVector(pykx.q('1 -9.223372e+18 3')) + ``` + +- `df.select_dtypes()` updated to now accept `kx.*Atom` values for `include`/`exclude` params. Use of `kx.CharVector` will return error. +- To align with other areas of PyKX the `upsert` and `insert` methods for PyKX tables and keyed tables now support the keyword argument `inplace`, this change will deprecate usage of `replace_self` with the next major release of PyKX. + +### Beta Features + +- Addition of the concept of `Remote Function` execution to PyKX, this allows users, from a Python session to define Python functions which will be executed on a remote q/kdb+ server running PyKX under q. The intention with this feature is to allow onboarding of Python first operations within existing or q/kdb+ first infrastructures + + ```python + >>> from pykx.remote import function, session + >>> remote_session = session() + >>> remote_session.create('localhost', 5050) + >>> @function(remote_session) + ... def func(x): + ... return x+1 + >>> func(2) # Functionality run on q server + pykx.LongAtom(pykx.q('3')) + >>> remote_session.clear() + ``` + +## PyKX 2.3.0 + +#### Release Date + +2024-01-22 + +### Additions + +- PyKX now supports the use of `KDB_LICENSE_B64` or `KDB_K4LICENSE_B64` configuration values/environment variables to define the content of a `kc.lic` or `k4.lic` license respectively if no license is found on initial usage of PyKX. +- Shortcut provided for access to current date, time and timestamp information using `'today'` and `'now'`. + + ```python + >>> kx.DateAtom('today') + pykx.DateAtom(pykx.q('2024.01.05')) + >>> kx.TimeAtom('now') + pykx.TimeAtom(pykx.q('16:15:32.724')) + >>> kx.TimestampAtom('now') + pykx.TimestampAtom(pykx.q('2024.01.05T16:15:42.926631000')) + ``` + +- Addition of support for `inplace` updates of PyKX tables modified using qsql select/update/delete operations on in-memory data. Application of `inplace` modifications is not supported for direct application on Partitioned/Splayed tables. + + ```python + >>> N = 1000 + >>> qtab = kx.Table(data={'x': kx.random.random(N, 1.0, seed=10)}) + >>> qtab + pykx.Table(pykx.q(' + x + ----------- + 0.0891041 + 0.8345194 + 0.3621949 + 0.999934 + 0.3837986 + .. + ')) + >>> kx.q.qsql.select(qtab, where = ['x>0.5'], inplace=True) + pykx.Table(pykx.q(' + x + ----------- + 0.8345194 + 0.999934 + 0.8619188 + 0.7517286 + 0.6348263 + .. + ')) + >>> qtab + pykx.Table(pykx.q(' + x + ----------- + 0.8345194 + 0.999934 + 0.8619188 + 0.7517286 + 0.6348263 + .. + ')) + ``` + +- Addition of `reset_index`, `add_suffix`, `add_prefix`, `count`, `skew` and `std` functionality to Pandas Like API + - See [here](../user-guide/advanced/Pandas_API.ipynb) for details of supported keyword arguments, limitations and examples. +- `%%q` Jupyter Notebook magic adds `--debug` option which prints the q backtrace if the cell execution fails. +- Release 2.3.0 adds to PyKX the concept of Beta features, these features are available to users through setting the configuration/environment variable `PYKX_BETA_FEATURES`. For more information on Beta features see further documentation [here](../beta-features/index.md) + +### Fixes and Improvements + +- `%%q` Jupyter Notebook magic now returns all outputs up to and including an error when thrown. Previously only the error was returned. +- `%%q` Jupyter Notebook magic ignores accidental whitespace in execution options. Below example no longer fails with `Received unknown argument` error: + + ```python + %%q --port 5000 + ``` + +- In cases where PyKX IPC sockets read data from unexpected publishers it could raise an `IndexError`. PyKX will now provide a more verbose error indicating that an unexpected message has been received, the bytes processed and requests a reproducible example to be provided if possible. +- Update to table column retrieval logic to error when a user attempts to access a non-existent column with a queried table. + + === "Behavior prior to change" + + ```python + >>> tab = kx.Table(data = {'a': [1, 2, 3]}) + >>> tab['c'] + pykx.LongVector(pykx.q('`long$()')) + ``` + + === "Behavior post change" + + ```python + >>> tab = kx.Table(data = {'a': [1, 2, 3]}) + >>> tab['c'] + .. + QError: Attempted to retrieve inaccessible column: c + ``` + +- Improved error message for conversion failures. +- Fixes an issue where a user would receive a length error when attempting to apply `min`, `max`, `prod` and `sum` functions on `pykx.KeyedTable` objects. + +### Beta Features + +- Database Management functionality has been added for the creation, loading and maintenance of PyKX Partitioned Databases. A full worked example of this functionality can be found [here](../examples/db-management.ipynb) along with full API documentation which includes examples of each function [here](../api/db.md). The API includes but is not limited to the following: + + - Database table creation and renaming. + - Enumeration of in-memory tables against on-disk sym file. + - Column listing, addition, reordering, renaming copying, function application and deletion on-disk. + - Attribute setting and removal. + - Addition of missing tables from partitions within a database. + +- Added `PYKX_THREADING` environment variable that allows [multithreaded programs](../beta-features/threading.md) to modify state when calling into python on secondary threads. Note: This behaviour is only supported on Linux / MacOS. + + !!! Note + + When using `PYKX_THREADING` you must ensure you call `kx.shutdown_thread()` at the end of the script to ensure the background thread is properly closed. + +## PyKX 2.2.3 + +#### Release Date + +2024-01-11 + +### Fixes and Improvements + +- PyKX now raises an error appropriately when failing to locate `msvcr100.dll` when loading on Windows. +- Config values now default to `False` when not set rather than `None`. +- Resolved issue where both `PYKX_NO_SIGNAL` and `PYKX_NO_SIGINT` needed to be set to take effect. Now correctly accepts either. +- Reduced signal handling list to only `SIGINT` and `SIGTERM`. The inclusion of `SIGSEGV` since 2.2.1 could cause segfaults with compressed enum files. +- Updated q libraries to 2024.01.09 + +!!! Note + + PyKX 2.2.3 is currently not available for Mac x86 for all Python versions, additionally it is unavailable for Mac ARM on Python 3.7. Updated builds will be provided once available. + +## PyKX 2.2.2 !!! Warning - PyKX 2.2.0 presently does not include a Python 3.11 release for MacOS x86 and Linux x86 architectures, this will be rectified in an upcoming patch release. + Please skip this release and use 2.2.3 or newer. This is due to potential segfaults when reading compressed files. + +#### Release Date + +2023-12-12 + +### Fixes and Improvements + +- Conversions between `UUID` and `pykx.GUID` types could produce invalid results under various conditions in both licensed and unlicensed mode. +- A regression in 2.2.1 resulted in `SIGINT` signals being incorrectly treated as `SIGTERM` style signals, PyKX now resets all signals overwritten by PyKX to their values prior to import. +- Indexing regression in 2.2.1 causing hangs for certain inputs such as `tbl[::-1]` has been resolved. + +## PyKX 2.2.1 + +!!! Warning + + Please skip this release and use 2.2.3 or newer. This is due to potential segfaults when reading compressed files. + +#### Release Date + +2023-11-30 + +### Fixes and Improvements + +- Some messages to `stdout` were not being captured when redirecting. Now all are captured. +- Deprecation of internally used environment variable `UNDER_PYTHON` which has been replaced by `PYKX_UNDER_PYTHON` to align with other internally used environment variables. +- Fix `Unknown default conversion type` error when `PYKX_DEFAULT_CONVERSION` is set to `k` +- Numpy dependency for Python 3.11 corrected to `numpy~=1.23.2` +- `pykx.q.qsql.select` and `pykx.q.qsql.exec` statements no longer use `get` calls for table retrieval unnecessarily when operating locally or via IPC. +- Null integral values in table keys will no longer convert the underlying vectors to floats when converting from a `pykx.KeyedTable` to `pandas.DataFrame` + + === "Behaviour prior to change" + + ```python + >>> kx.q('`col1 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd() + col2 col3 + col1 + 1.0 1 a + 2.0 2 b + 0.0 -- c + ``` + + === "Behaviour post change" + + ```python + >>> kx.q('`col1 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd() + col2 col3 + col1 + 1 1 a + 2 2 b + -- -- c + ``` + + !!! Warning + + For multi-keyed PyKX tables converted to Pandas the appropriate round-trip behaviour is supported however due to limitations in Pandas displaying of these as masked arrays is not supported as below + + ```python + >>> kx.q('`col1`col2 xkey ([] col1: (1j; 2j; 0Nj); col2:(1j; 2j; 0Nj); col3:`a`b`c)').pd() + col3 + col1 col2 + 1 1 a + 2 2 b + -9223372036854775808 -9223372036854775808 c + ``` + +- Fix to issue where providing `SIGTERM` signals to Python processes running PyKX would not result in the Python process being terminated. +- Addition of deprecation warning for environmental configuration option `PYKX_NO_SIGINT` which is to be replaced by `PYKX_NO_SIGNAL`. This is used when users require no signal handling logic overwrites and now covers `SIGTERM`, `SIGINT`, `SIGABRT` signals amongst others. +- Use of `pykx.q.system.variables` no longer prepends leading `.` to supplied string allowing users to get the variables associated with dictionary like namespaces. + + === "Behaviour prior to change" + + ```python + >>> kx.q('.test.a:1;.test.b:2') + >>> kx.q('test.c:3;test.d:4') + >>> kx.q.system.variables('.test') + pykx.SymbolVector(pykx.q('`s#`a`b')) + >>> kx.q.system.variables('test') + pykx.SymbolVector(pykx.q('`s#`a`b')) + ``` + + === "Behaviour post change" + + ```python + >>> kx.q('.test.a:1;.test.b:2') + >>> kx.q('test.c:3;test.d:4') + >>> kx.q.system.variables('.test') + pykx.SymbolVector(pykx.q('`s#`a`b')) + >>> kx.q.system.variables('test') + pykx.SymbolVector(pykx.q('`s#`c`d')) + ``` + +- q dictionaries with tables as keys were being incorrectly wrapped as `pykx.KeyedTable`. Now corrected to `pykx.Dictionary`: + + === "Behavior prior to change" + + ```python + >>> type(pykx.q('([] a:1 2 3;b:2 3 4)!enlist each 1 2 3')) + + ``` + + === "Behavior post change" + + ```python + >>> type(pykx.q('([] a:1 2 3;b:2 3 4)!enlist each 1 2 3')) + + ``` +- Added consistent conversion of `datetime.time` objects + + === "Behavior prior to change" + + ```q + q).pykx.pyexec"from datetime import time" + q).pykx.eval["time(11, 34, 56)"]` + foreign + ``` + + ```python + >>> kx.toq(time(11, 34, 56)) + Traceback (most recent call last): + File "", line 1, in + File "pykx/toq.pyx", line 2641, in pykx.toq.ToqModule.__call__ + File "pykx/toq.pyx", line 270, in pykx.toq._default_converter + TypeError: Cannot convert 'datetime.time(11, 34, 56)' to K object + ``` + + === "Behavior post change" + + ```q + q).pykx.pyexec"from datetime import time" + q).pykx.eval["time(11, 34, 56)"]` + 0D11:34:56.000000000 + ``` + + ```python + >>> kx.toq(time(11, 34, 56)) + pykx.TimespanAtom(pykx.q('0D11:34:56.000000000')) + ``` + +- Fixed null value for `TimestampVector` returning `NoneType` instead of `pykx.wrappers.TimestampAtom` for `.py()` method + + === "Before Null Change" + + ```python + >>> for x in kx.q('0Np,.z.p').py(): + ... print(type (x)) + + + ``` + + === "After Null Change" + + ```python + >>> for x in kx.q('0Np,.z.p').py(): + ... print(type (x)) + + + ``` + +### Upgrade considerations + +- If dependent on the environment variable `UNDER_PYTHON` please upgrade your code to use `PYKX_UNDER_PYTHON` + +## PyKX 2.2.0 + +#### Release Date + +2023-11-09 ### Additions @@ -204,6 +615,32 @@ - Addition of `poll_recv_async` to `RawQConnection` objects to support asynchronous polling. +- Addition of negative slicing to `list` , `vector` and `table` objects + + ```python + >>> import pykx as kx + >>> qlist = kx.q('("a";2;3.3;`four)') + >>> qlist[-3:] + pykx.List(pykx.q(' + 2 + 3.3 + `four + ')) + + >>> vector = kx.q('til 5') + >>> vector[:-1] + pykx.LongVector(pykx.q('0 1 2 3')) + + >>> table = kx.q('([] a:1 2 3; b:4 5 6; c:7 8 9)') + >>> table[-2:] + pykx.Table(pykx.q(' + a b c + ----- + 2 5 8 + 3 6 9 + ')) + ``` + ### Fixes and Improvements - Fix to allow users to use Python functions when operating on a `pykx.GroupbyTable` with an `apply` function @@ -399,7 +836,7 @@ the following reads a CSV file and specifies the types of the three columns name ``` - Notebooks will HTML print tables and dictionaries through the addition of `_repr_html_`. Previous `q` style output is still available using `print`. -- Added [`serialize` and `deserialize`](../api/serialize.html) as base methods to assist with the serialization of `K` objects for manual use over IPC. +- Added [`serialize` and `deserialize`](../api/serialize.md) as base methods to assist with the serialization of `K` objects for manual use over IPC. - Added support for `pandas` version `2.0`. !!! Warning "Pandas 2.0 has deprecated the `datetime64[D/M]` types." @@ -674,7 +1111,7 @@ the following reads a CSV file and specifies the types of the three columns name ### Additions - Added `to_local_folder` kwarg to `install_into_QHOME` to enable use of `pykx.q` without write access to `QHOME`. -- Added [an example](../examples/threaded_execution/README.md) that shows how to use `EmbeddedQ` in a multithreaded context where the threads need to modify global state. +- Added [an example](../examples/threaded_execution/threading.md) that shows how to use `EmbeddedQ` in a multithreaded context where the threads need to modify global state. - Added [PYKX_NO_SIGINT](../user-guide/configuration.md#environment-variables) environment variable. ### Fixes and Improvements diff --git a/docs/release-notes/underq-changelog.md b/docs/release-notes/underq-changelog.md index c30b87f..f248ca3 100644 --- a/docs/release-notes/underq-changelog.md +++ b/docs/release-notes/underq-changelog.md @@ -6,8 +6,172 @@ This changelog provides updates from PyKX 2.0.0 and above, for information relat The changelog presented here outlines changes to PyKX when operating within a q environment specifically, if you require changelogs associated with PyKX operating within a Python environment see [here](./changelog.md). +## PyKX 2.3.1 + +#### Release Date + +2024-02-07 + +### Fixes and Improvements + +- `.pykx.eval` is now variadic allowing an optional second parameter to be passed to define return type. Previously would error with `rank`. + + === "Behavior prior to change" + + ```q + q).pykx.eval["lambda x: x";<] 7 + 'rank + [0] .pykx.eval["lambda x: x";<] 7 + ``` + + === "Behavior post change" + + ```q + q).pykx.eval["lambda x: x";<] 7 + 7 + ``` + +- Wraps which have a return type assigned using `<` or `>` are now considered wraps and can be unwrapped: + + === "Behavior prior to change" + + ```q + q).pykx.util.isw .pykx.eval["lambda x: x"][<] + 0b + q).pykx.unwrap .pykx.eval["lambda x: x"][<] + {$[type[x]in 104 105 112h;util.foreignToq unwrap x;x]}.[code[foreign]]`.pykx.util.parseArgsenlist + ``` + + === "Behavior post change" + + ```q + q).pykx.util.isw .pykx.eval["lambda x: x"][<] + 1b + q).pykx.unwrap .pykx.eval["lambda x: x"][<] + foreign + ``` + +- `.pykx.qcallable` and `.pykx.pycallable` can now convert wraps which already have return types assigned: + + === "Behavior prior to change" + + ```q + q).pykx.qcallable[.pykx.eval["lambda x: x"][<]]` + 'Could not convert provided function to callable with q return + q).pykx.print .pykx.pycallable[.pykx.eval["lambda x: x"][>]] + 'Could not convert provided function to callable with Python return + ``` + + === "Behavior post change" + + ```q + q).pykx.qcallable[.pykx.eval["lambda x: x"][<]]`test + `test + q).pykx.print .pykx.wrap .pykx.pycallable[.pykx.eval["lambda x: x"][>]]`test + test + ``` + +## PyKX 2.3.0 + +#### Release Date + +2024-01-22 + +### Fixes and Improvements + +- A bug was fixed when using `.pykx.console`, it is now possible to access python variables set using the console with `.pykx.(eval|pyexec|pyeval)` functions. + + === "Behavior prior to change" + + ```q + q) .pykx.console[] + >>> a = 10 + >>> quit() + q) .pykx.eval["a"]` + 'NameError("name 'a' is not defined") + [1] /.../q/pykx.q:968: .pykx.eval:{wrap pyeval x} + ``` + + === "Behavior post change" + + ```q + q) .pykx.console[] + >>> a = 10 + >>> quit() + q) .pykx.eval["a"]` + 10 + ``` + +## PyKX 2.2.2 + +#### Release Date + +2023-12-07 + +### Fixes and Improvements + +- When loaded in a q process loading `pykx.q` would not allow `Ctrl+C` (SIGINT) interruption. + +## PyKX 2.2.1 + +#### Release Date + +2023-11-30 + +### Fixes and Improvements + +- `.pykx.print` was using `repr` representation for some objects. Now consistently calls `print`. +- `.pykx.safeReimport` now resets environment variables correctly before throwing any error raised by the function supplied to it. +- Wrapped Python objects being supplied as arguments to functions were being converted according to `.pykx.util.defaultConv`. Now are left untouched: + + === "Behavior prior to change" + + ```q + q)\l pykx.q + q)np:.pykx.import `numpy; + q)r:np[`:random.rand;1;2]; + q).pykx.print r + array([[0.03720163, 0.72012121]]) + q).pykx.print .pykx.eval["lambda x: x"] r + array([array([0.03720163, 0.72012121])], dtype=object) + q).pykx.setdefault"py" + q).pykx.print .pykx.eval["lambda x: x"] r + [[0.037201634310417564, 0.7201212148535847]] + ``` + + === "Behavior post change" + + ```q + q).pykx.print r + array([[0.59110368, 0.52612429]]) + q).pykx.print .pykx.eval["lambda x: x"] r + array([[0.59110368, 0.52612429]]) + q).pykx.setdefault"py" + q).pykx.print .pykx.eval["lambda x: x"] r + array([[0.59110368, 0.52612429]]) + ``` +- q hsym will convert correctly to `pathlib.PosixPath` rather than `str`: + + === "Behavior prior to change" + + ```q + q).pykx.eval["lambda x: print(type(x))"] `:/path/to/somewhere; + + ``` + + === "Behavior post change" + + ```q + q).pykx.eval["lambda x: print(type(x))"] `:/path/to/somewhere; + + ``` + ## PyKX 2.2.0 +#### Release Date + +2023-11-09 + ### Additions - Addition of `PYKX_EXECUTABLE` environment/configuration variable to allow control of which Python executable is used under q. @@ -63,6 +227,10 @@ This changelog provides updates from PyKX 2.0.0 and above, for information relat ## PyKX 2.1.0 +#### Release Date + +2023-10-09 + ### Fixes and Improvements - Update to default conversion logic for q objects passed to PyKX functions to more closely match embedPy based conversion expectations.For version <=2.0 conversions of KX lists would produce N Dimensional Numpy arrays of singular type. This results in issues when applying to many analytic libraries which rely on lists of lists rather than singular N Dimensional arrays. Additionally q tables and keyed tables would be converted to Numpy recarrays, these are now converted to Pandas DataFrames. To maintain previous behavior please set the following environment variable `PYKX_DEFAULT_CONVERSION="np"`. diff --git a/docs/roadmap.md b/docs/roadmap.md index 65e3496..45e84b3 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -16,7 +16,7 @@ If you need a feature that's not included in this list please let us know by rai - Database management functionality allowing for Pythonic persistence and management of on-disk kdb+ Databases (Beta) - Improvements to multi-threaded PyKX efficiency, reducing per-call overhead for running PyKX on separate threads -- Configurable initialisation logic in the absense of a license. Thus allowing users who have their own workflows for license access to modify the instructions for their users. +- Configurable initialisation logic in the absence of a license. Thus allowing users who have their own workflows for license access to modify the instructions for their users. - Addition of `cast` keyword when inserting/upserting data into a table reducing mismatch issues ## Future @@ -26,4 +26,4 @@ If you need a feature that's not included in this list please let us know by rai - Continued additions of Pandas-like functionality on PyKX Table objects - Performance improvements through enhanced usage of Cython - Real-time/Streaming functionality utilities -- Data pre-processing and statitics modules for operation on PyKX tables and vector objects +- Data pre-processing and statistics modules for operation on PyKX tables and vector objects diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index c7118c7..7aaf644 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -22,7 +22,7 @@ Once you have access to your license you can install the license following the w ### Initialization failing with a 'embedq' error -Failure to initialize PyKX while raising an error `embedq` indicates that the license you are attempting to use for PyKX in [licensed modality](modes.md) does not have the sufficient feature flags necessary to run PyKX. To access a license which does allow for running PyKX in this modality please following the instructions [here](#accessing-a-license-valid-for-pykx) to get a new license with appropriate feature flags. +Failure to initialize PyKX while raising an error `embedq` indicates that the license you are attempting to use for PyKX in [licensed modality](user-guide/advanced/modes.md) does not have the sufficient feature flags necessary to run PyKX. To access a license which does allow for running PyKX in this modality please following the instructions [here](#accessing-a-license-valid-for-pykx) to get a new license with appropriate feature flags. ### Initialization failing with a 'kc.lic' error @@ -156,3 +156,18 @@ The following section outlines how a user can get access to a verbose set of env which q: /usr/local/anaconda3/bin/q q info: ``` + +## Issues running PyKX in a subprocess? + +Internally PyKX makes use of a number of variables/environment variables which are persisted within the Python/q process within imports PyKX. Due to how Python subprocesses work with respect to inheriting environment variables users who attempt to spawn a subprocess dependent on PyKX will run into a Segmentation Fault. + +To avoid this subprocesses should be spawned while making use of the `kx.PyKXReimport` functionality as follows: + +```python +import pykx as kx +import subprocess +with kx.PyKXReimport(): + subprocess.Popen(['python', 'file.py']) # Run Python with a file that imports PyKX +``` + +For more information on the `PyKXReimport` functionality see its API documentation [here](api/reimporting.md). diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 56c8d38..ee56ed9 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "2d0c8656", + "id": "dfa26ef1", "metadata": {}, "source": [ "# Pandas API\n", @@ -12,24 +12,37 @@ "\n", "This demonstration will outline the following\n", "\n", - "1. [Metadata properties](#metadata)\n", - "2. [Indexing operations](#indexing)\n", - "3. [Reindexing operations](#reindexing)\n", - "4. [Merging tables](#merging)\n", - "5. [Computations](#computations)\n", - "6. [Setting Indexes](#setting-indexes)" + "1. [Constructing Tables](#Constructing-Tables)\n", + "2. [Metadata](#Metadata)\n", + "3. [Querying and Data Interrogation](#Querying-and-Data-Interrogation)\n", + "4. [Data Joins/Merging](#Data-Joins/Merging)\n", + "5. [Analytic Functionality](#Analytic-functionality)\n", + "6. [Data Preprocessing](#Data-Preprocessing)" ] }, { "cell_type": "code", "execution_count": null, - "id": "17f28b87", - "metadata": {}, + "id": "5b2f27e1", + "metadata": { + "tags": [ + "hide_code" + ] + }, "outputs": [], "source": [ "import os\n", "os.environ['IGNORE_QHOME'] = '1' # Ignore symlinking PyKX q libraries to QHOME \n", - "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n", + "os.environ['PYKX_Q_LOADED_MARKER'] = '' # Only used here for running Notebook under mkdocs-jupyter during document generation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "356b337c", + "metadata": {}, + "outputs": [], + "source": [ "import pykx as kx\n", "import numpy as np\n", "import pandas as pd\n", @@ -38,7 +51,7 @@ }, { "cell_type": "markdown", - "id": "774122a0", + "id": "b5c9b878", "metadata": {}, "source": [ "## Constructing Tables" @@ -46,7 +59,7 @@ }, { "cell_type": "markdown", - "id": "0fd8910c", + "id": "15884a6f", "metadata": {}, "source": [ "### Table\n", @@ -75,7 +88,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9a748c27", + "id": "a3d8e590", "metadata": {}, "outputs": [], "source": [ @@ -84,7 +97,7 @@ }, { "cell_type": "markdown", - "id": "231a5e28", + "id": "1967dbd6", "metadata": {}, "source": [ "Create a Table from an array like object." @@ -93,7 +106,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e43d716", + "id": "b8c67d04", "metadata": {}, "outputs": [], "source": [ @@ -102,7 +115,7 @@ }, { "cell_type": "markdown", - "id": "1e426cda", + "id": "b59c678b", "metadata": {}, "source": [ "Create a Table from an array like object and provide names for the columns to use." @@ -111,7 +124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2b3c2edf", + "id": "6469f77e", "metadata": {}, "outputs": [], "source": [ @@ -120,7 +133,7 @@ }, { "cell_type": "markdown", - "id": "be094191", + "id": "a3074cc5", "metadata": {}, "source": [ "### Keyed Table\n", @@ -150,7 +163,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d93e73d3", + "id": "03162ab2", "metadata": {}, "outputs": [], "source": [ @@ -159,7 +172,7 @@ }, { "cell_type": "markdown", - "id": "119c2e1f", + "id": "eda04de8", "metadata": {}, "source": [ "Create a keyed table from a list of rows." @@ -168,7 +181,7 @@ { "cell_type": "code", "execution_count": null, - "id": "959fcd3d", + "id": "de9fcc81", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "9d83854e", + "id": "ab5393c3", "metadata": {}, "source": [ "Create a keyed table from a list of rows and provide names for the resulting columns." @@ -186,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4b2c6989", + "id": "576e4254", "metadata": {}, "outputs": [], "source": [ @@ -195,7 +208,7 @@ }, { "cell_type": "markdown", - "id": "356b29d8", + "id": "cca4e246", "metadata": {}, "source": [ "Create a keyed table with a specified index column." @@ -204,7 +217,7 @@ { "cell_type": "code", "execution_count": null, - "id": "acbe339c", + "id": "a29d1521", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +226,7 @@ }, { "cell_type": "markdown", - "id": "95a04686", + "id": "73bf284f", "metadata": {}, "source": [ "## Metadata" @@ -222,7 +235,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a52fdc82", + "id": "4b363f07", "metadata": {}, "outputs": [], "source": [ @@ -233,7 +246,7 @@ }, { "cell_type": "markdown", - "id": "280baf05", + "id": "40155b78", "metadata": {}, "source": [ "### Table.columns\n", @@ -244,7 +257,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a2ee3fad", + "id": "e8a0395e", "metadata": {}, "outputs": [], "source": [ @@ -253,7 +266,7 @@ }, { "cell_type": "markdown", - "id": "40da029e", + "id": "13516f56", "metadata": {}, "source": [ "### Table.dtypes\n", @@ -264,7 +277,7 @@ { "cell_type": "code", "execution_count": null, - "id": "70bd32d2", + "id": "5a312477", "metadata": {}, "outputs": [], "source": [ @@ -273,7 +286,7 @@ }, { "cell_type": "markdown", - "id": "00e49e84", + "id": "10124c07", "metadata": {}, "source": [ "### Table.empty\n", @@ -284,7 +297,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9dc49e08", + "id": "751fc442", "metadata": {}, "outputs": [], "source": [ @@ -293,7 +306,7 @@ }, { "cell_type": "markdown", - "id": "c00e46ef", + "id": "c973fb82", "metadata": {}, "source": [ "### Table.ndim\n", @@ -304,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "db113636", + "id": "ee6b55a0", "metadata": {}, "outputs": [], "source": [ @@ -313,7 +326,7 @@ }, { "cell_type": "markdown", - "id": "5ea4b315", + "id": "07ac8e54", "metadata": {}, "source": [ "### Table.shape\n", @@ -324,7 +337,7 @@ { "cell_type": "code", "execution_count": null, - "id": "78125654", + "id": "8d6f890c", "metadata": {}, "outputs": [], "source": [ @@ -333,7 +346,7 @@ }, { "cell_type": "markdown", - "id": "1e3f85a5", + "id": "654129cc", "metadata": {}, "source": [ "### Table.size\n", @@ -344,10 +357,8 @@ { "cell_type": "code", "execution_count": null, - "id": "c77c5bc7", - "metadata": { - "scrolled": false - }, + "id": "0e621250", + "metadata": {}, "outputs": [], "source": [ "tab.size" @@ -355,473 +366,312 @@ }, { "cell_type": "markdown", - "id": "2be2ece3", + "id": "8e210a91", "metadata": {}, "source": [ - "### Table.mean()\n", - "\n", - "```\n", - "Table.mean(axis=0, numeric_only=False)\n", - "```\n", - "\n", - "Get the mean of values across the requested axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :--------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate mean across 0 is columns, 1 is rows. | 0 |\n", - "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :--------: | :--------------------------------------------------------------------------------------------- |\n", - "| Dictionary | The mean across each row / column with the key corresponding to the row number or column name. |" + "## Querying and Data Interrogation" ] }, { - "cell_type": "markdown", - "id": "cb8c5ef8", + "cell_type": "code", + "execution_count": null, + "id": "603d5534", "metadata": {}, + "outputs": [], "source": [ - "**Examples:**\n", - "\n", - "Calculate the mean across the columns of a table" + "# The examples in this section will use this example table filled with random data\n", + "kx.q('N: 1000')\n", + "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "tab" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "0c3e5d76", + "cell_type": "markdown", + "id": "9bd3dada", "metadata": {}, - "outputs": [], "source": [ - "tab = kx.Table(data=\n", - " {\n", - " 'a': [1, 2, 2, 4],\n", - " 'b': [1, 2, 6, 7],\n", - " 'c': [7, 8, 9, 10],\n", - " 'd': [7, 11, 14, 14]\n", - " }\n", - ")\n", - "tab" + "### Table.all()\n", + "\n", + "```\n", + "Table.all(axis=0, bool_only=False, skipna=True)\n", + "```\n", + "\n", + "Returns whether or not all values across the given axis have a `truthy` value.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate `all` across 0 is columns, 1 is rows. | 0 |\n", + "| bool_only | bool | Only use columns of the table that are boolean types. | False |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `all` on that column / row. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "9986a550", + "id": "95aa447d", "metadata": {}, "outputs": [], "source": [ - "tab.mean()" + "tab.all()" ] }, { "cell_type": "markdown", - "id": "24ac0b99", + "id": "4ac12eb0", "metadata": {}, "source": [ - "Calculate the mean across the rows of a table" + "### Table.any()\n", + "\n", + "```\n", + "Table.any(axis=0, bool_only=False, skipna=True)\n", + "```\n", + "\n", + "Returns whether or not any values across the given axis have a `truthy` value.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate `any` across 0 is columns, 1 is rows. | 0 |\n", + "| bool_only | bool | Only use columns of the table that are boolean types. | False |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `any` on that column / row. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "41f6f669", + "id": "a43aabc4", "metadata": {}, "outputs": [], "source": [ - "tab.mean(axis=1)" + "tab.any()" ] }, { "cell_type": "markdown", - "id": "7bf853c5", + "id": "81a8e19f", "metadata": {}, "source": [ - "### Table.median()\n", + "### Table.at[]\n", "\n", "```\n", - "Table.median(axis=0, numeric_only=False)\n", + "Table.at[row, col]\n", "```\n", "\n", - "Get the median of values across the requested axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :----------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate median across 0 is columns, 1 is rows. | 0 |\n", - "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", + "Access a single value for a row / column pair.\n", "\n", - "**Returns:**\n", + "Similar to `loc[]`, in that both provide label-based lookups. Use `at` if you only need to get or set a single value.\n", "\n", - "| Type | Description |\n", - "| :--------: | :----------------------------------------------------------------------------------------------- |\n", - "| Dictionary | The median across each row / column with the key corresponding to the row number or column name. |" + "The `at` property can be used for both assignment and retrieval of values at a given row and column." ] }, { "cell_type": "markdown", - "id": "98da458a", + "id": "44a37aff", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Calculate the median across the columns of a table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bff5ac07", - "metadata": {}, - "outputs": [], - "source": [ - "tab = kx.Table(data=\n", - " {\n", - " 'a': [1, 2, 2, 4],\n", - " 'b': [1, 2, 6, 7],\n", - " 'c': [7, 8, 9, 10],\n", - " 'd': [7, 11, 14, 14]\n", - " }\n", - ")\n", - "tab" + "Get the value of the `z` column in the 997th row." ] }, { "cell_type": "code", "execution_count": null, - "id": "579c8b33", + "id": "618fe622", "metadata": {}, "outputs": [], "source": [ - "tab.median()" + "tab.at[997, 'z']" ] }, { "cell_type": "markdown", - "id": "f6698350", + "id": "23203909", "metadata": {}, "source": [ - "Calculate the median across the rows of a table" + "Reassign the value of the `z` column in the 997th row to `3.14159`." ] }, { "cell_type": "code", "execution_count": null, - "id": "5664bd93", - "metadata": { - "scrolled": false - }, + "id": "978d991d", + "metadata": {}, "outputs": [], "source": [ - "tab.median(axis=1)" + "tab.at[997, 'z'] = 3.14159\n", + "tab.at[997, 'z']" ] }, { "cell_type": "markdown", - "id": "33af56bb", + "id": "3d62cbbc", "metadata": {}, "source": [ - "### Table.mode()\n", + "### Table.get()\n", "\n", "```\n", - "Table.mode(axis=0, numeric_only=False, dropna=True)\n", + "Table.get(key, default=None)\n", "```\n", "\n", - "Get the mode of values across the requested axis.\n", + "Get a column or columns from a table by key, if the key does not exist return the default value.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------ | :-----: |\n", - "| axis | int | The axis to calculate mode across 0 is columns, 1 is rows. | 0 |\n", - "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", - "| dropna | bool | Remove null values from the data before calculating the mode. | True |\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :--------------------: | :------------------------------------------------------ | :--------: |\n", + "| key | Union[str, list[str]] | The column name or list of names to get from the table. | _required_ |\n", + "| default | int | The default value if the key is not found. | None |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :--------: | :------------------------------------------------------------------------------------------------ |\n", - "| Table | The mode across each row / column with the column corresponding to the row number or column name. |" + "| Type | Description |\n", + "| :---------------: | :------------------------------------------------------------------- |\n", + "| Union[Table, Any] | A table containing only the columns requested or the default value. |" ] }, { "cell_type": "markdown", - "id": "4201c9af", + "id": "00c06637", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Calculate the mode across the columns of a table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4bfe36c", - "metadata": {}, - "outputs": [], - "source": [ - "tab = kx.Table(data=\n", - " {\n", - " 'a': [1, 2, 2, 4],\n", - " 'b': [1, 2, 6, 7],\n", - " 'c': [7, 8, 9, 10],\n", - " 'd': [7, 11, 14, 14]\n", - " }\n", - ")\n", - "tab" + "Get the `y` column from the table." ] }, { "cell_type": "code", "execution_count": null, - "id": "e1a7eeb1", + "id": "f950cc1e", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab.mode()" + "tab.get('y')" ] }, { "cell_type": "markdown", - "id": "6a47af49", + "id": "78608b1c", "metadata": {}, "source": [ - "Calculate the median across the rows of a table" + "Get the `y` and `z` columns from the table." ] }, { "cell_type": "code", "execution_count": null, - "id": "130081ce", + "id": "02d4d586", "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [], "source": [ - "tab.mode(axis=1)" + "tab.get(['y', 'z'])" ] }, { "cell_type": "markdown", - "id": "29dffe0d", + "id": "2a2186aa", "metadata": {}, "source": [ - "Calculate the mode across columns and keep null values." + "Attempt to get the `q` column from the table and receive none as that column does not exist." ] }, { "cell_type": "code", "execution_count": null, - "id": "53a8251a", - "metadata": { - "scrolled": true - }, + "id": "a88ef7dc", + "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data=\n", - " {\n", - " 'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],\n", - " 'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],\n", - " 'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']\n", - " }\n", - ")\n", - "tab" + "print(tab.get('q'))" + ] + }, + { + "cell_type": "markdown", + "id": "ea3dc01a", + "metadata": {}, + "source": [ + "Attempt to get the `q` column from the table and receive the default value `not found` as that column does not exist." ] }, { "cell_type": "code", "execution_count": null, - "id": "f8558148", + "id": "2f3abc92", "metadata": {}, "outputs": [], "source": [ - "tab.mode(dropna=False)" + "tab.get('q', 'not found')" ] }, { "cell_type": "markdown", - "id": "f5c66579", + "id": "b2195cfe", "metadata": {}, "source": [ - "### Table.std()\n", + "### Table.head()\n", "\n", "```\n", - "Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n", + "Table.head(n=5)\n", "```\n", "\n", - "Return sample standard deviation over requested axis. Normalized by N-1 by default. This can be changed using the ddof argument.\n", - "\n", + "Get the first n rows from a table.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | not yet implemented | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "| ddof | int | Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. | 1 |\n", + "| Name | Type | Description | Default |\n", + "| :--: | :--: | :---------------------------- | :-----: |\n", + "| n | int | The number of rows to return. | 5 |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Table | The std across each row / column with the key corresponding to the row number or column name. |" + "| Type | Description |\n", + "| :---: | :------------------------------- |\n", + "| Table | The first `n` rows of the table. |" ] }, { "cell_type": "markdown", - "id": "c2767afd", + "id": "18a0ca1e", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Calculate the std across the columns of a table" + "Return the first 5 rows of the table." ] }, { "cell_type": "code", "execution_count": null, - "id": "87b94fd0", + "id": "5120ce1c", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data=\n", - " {\n", - " 'a': [1, 2, 2, 4],\n", - " 'b': [1, 2, 6, 7],\n", - " 'c': [7, 8, 9, 10],\n", - " 'd': [7, 11, 14, 14]\n", - " }\n", - ")\n", - "tab" + "tab.head()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "3e54d557", - "metadata": {}, - "outputs": [], - "source": [ - "tab.std()" - ] - }, - { - "cell_type": "markdown", - "id": "14950833", - "metadata": {}, - "source": [ - "Calculate the std across the rows of a table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f19161ed", - "metadata": {}, - "outputs": [], - "source": [ - "tab.std(axis=1)" - ] - }, - { - "cell_type": "markdown", - "id": "a8ea5a38", - "metadata": {}, - "source": [ - "Calculate std accross columns with ddof=0:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6361dcb7", - "metadata": {}, - "outputs": [], - "source": [ - "tab.std(ddof=0)" - ] - }, - { - "cell_type": "markdown", - "id": "7e2813b4", - "metadata": {}, - "source": [ - "## Indexing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "77ab64ab", - "metadata": {}, - "outputs": [], - "source": [ - "# The examples in this section will use this example table filled with random data\n", - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", - "tab" - ] - }, - { - "cell_type": "markdown", - "id": "69313988", - "metadata": {}, - "source": [ - "### Table.head()\n", - "\n", - "```\n", - "Table.head(n=5)\n", - "```\n", - "\n", - "Get the first n rows from a table.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :--: | :--: | :---------------------------- | :-----: |\n", - "| n | int | The number of rows to return. | 5 |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :---: | :------------------------------- |\n", - "| Table | The first `n` rows of the table. |" - ] - }, - { - "cell_type": "markdown", - "id": "edf33458", - "metadata": {}, - "source": [ - "**Examples:**\n", - "\n", - "Return the first 5 rows of the table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "916fcf4d", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "tab.head()" - ] - }, - { - "cell_type": "markdown", - "id": "cb58279a", + "cell_type": "markdown", + "id": "08f158a8", "metadata": {}, "source": [ "Return the first 10 rows of the table." @@ -830,7 +680,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bf32db40", + "id": "de9c2842", "metadata": {}, "outputs": [], "source": [ @@ -839,233 +689,134 @@ }, { "cell_type": "markdown", - "id": "a5c4a5e9", - "metadata": {}, - "source": [ - "### Table.tail()\n", - "\n", - "```\n", - "Table.tail(n=5)\n", - "```\n", - "\n", - "Get the last n rows from a table.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :--: | :--: | :---------------------------- | :-----: |\n", - "| n | int | The number of rows to return. | 5 |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :---: | :------------------------------- |\n", - "| Table | The last `n` rows of the table. |" - ] - }, - { - "cell_type": "markdown", - "id": "4e3fee46", - "metadata": {}, - "source": [ - "**Examples:**\n", - "\n", - "Return the last 5 rows of the table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0d34e0b", - "metadata": {}, - "outputs": [], - "source": [ - "tab.tail()" - ] - }, - { - "cell_type": "markdown", - "id": "e223e705", - "metadata": {}, - "source": [ - "Return the last 10 rows of the table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4edae0c3", - "metadata": {}, - "outputs": [], - "source": [ - "tab.tail(10)" - ] - }, - { - "cell_type": "markdown", - "id": "c87325f8", + "id": "d1c370e4", "metadata": {}, "source": [ - "### Table.get()\n", + "### Table.iloc[]\n", "\n", "```\n", - "Table.get(key, default=None)\n", + "Table.iloc[:, :]\n", "```\n", "\n", - "Get a column or columns from a table by key, if the key does not exist return the default value.\n", + "Purely integer-location based indexing for selection by position.\n", "\n", - "**Parameters:**\n", + "`iloc` is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :--------------------: | :------------------------------------------------------ | :--------: |\n", - "| key | Union[str, list[str]] | The column name or list of names to get from the table. | _required_ |\n", - "| default | int | The default value if the key is not found. | None |\n", + "Allowed inputs are:\n", + "- An integer, e.g. 5.\n", + "- A list or array of integers, e.g. [4, 3, 0].\n", + "- A slice object with ints, e.g. 1:7.\n", + "- A BooleanVector.\n", + "- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.\n", + "- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---------------: | :------------------------------------------------------------------- |\n", - "| Union[Table, Any] | A table contatining only the columns requested or the default value. |" + "| Type | Description |\n", + "| :---: | :----------------------------------------------------- |\n", + "| Table | A table containing only the columns / rows requested. |" ] }, { "cell_type": "markdown", - "id": "7c96cd34", + "id": "07e31d96", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Get the `y` column from the table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f64d914", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "tab.get('y')" - ] - }, - { - "cell_type": "markdown", - "id": "88ee5698", - "metadata": {}, - "source": [ - "Get the `y` and `z` columns from the table." + "Get the second row from a table." ] }, { "cell_type": "code", "execution_count": null, - "id": "daef6ce6", + "id": "f8108853", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab.get(['y', 'z'])" + "tab.iloc[1]" ] }, { "cell_type": "markdown", - "id": "26a53f6d", + "id": "30c429f4", "metadata": {}, "source": [ - "Attempt to get the `q` column from the table and recieve none as that column does not exist." + "Get the first 5 rows from a table." ] }, { "cell_type": "code", "execution_count": null, - "id": "3856084d", + "id": "2f817967", "metadata": {}, "outputs": [], "source": [ - "print(tab.get('q'))" + "tab.iloc[:5]" ] }, { "cell_type": "markdown", - "id": "91932d32", + "id": "2eb41e47", "metadata": {}, "source": [ - "Attempt to get the `q` column from the table and recieve the default value `not found` as that column does not exist." + "Get all rows of the table where the `y` column is equal to `AAPL`." ] }, { "cell_type": "code", "execution_count": null, - "id": "7d2a2bcf", - "metadata": {}, + "id": "69e14007", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "tab.get('q', 'not found')" - ] - }, - { - "cell_type": "markdown", - "id": "9e831e14", - "metadata": {}, - "source": [ - "### Table.at[]\n", - "\n", - "```\n", - "Table.at[row, col]\n", - "```\n", - "\n", - "Access a single value for a row / column pair.\n", - "\n", - "Similar to `loc[]`, in that both provide label-based lookups. Use `at` if you only need to get or set a single value.\n", - "\n", - "The `at` property can be used for both assignment and retrieval of values at a given row and column." + "tab.iloc[tab['y'] == 'AAPL']" ] }, { "cell_type": "markdown", - "id": "97519657", + "id": "7861f193", "metadata": {}, "source": [ - "**Examples:**\n", - "\n", - "Get the value of the `z` column in the 997th row." + "Get all rows of the table where the `y` column is equal to `AAPL`, and only return the `y`, `z` and `w` columns." ] }, { "cell_type": "code", "execution_count": null, - "id": "9cd275bf", + "id": "323cc0f8", "metadata": {}, "outputs": [], "source": [ - "tab.at[997, 'z']" + "tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]" ] }, { "cell_type": "markdown", - "id": "1fd39083", + "id": "9de566f3", "metadata": {}, "source": [ - "Reassign the value of the `z` column in the 997th row to `3.14159`." + "Replace all null values in the column `v` with the value `-100`." ] }, { "cell_type": "code", "execution_count": null, - "id": "814fa8e0", + "id": "be66947d", "metadata": {}, "outputs": [], "source": [ - "tab.at[997, 'z'] = 3.14159\n", - "tab.at[997, 'z']" + "tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100\n", + "tab" ] }, { "cell_type": "markdown", - "id": "7815e8c3", + "id": "ed37aa73", "metadata": {}, "source": [ "### Table.loc[]\n", @@ -1096,12 +847,12 @@ "\n", "| Type | Description |\n", "| :---: | :----------------------------------------------------- |\n", - "| Table | A table contatining only the columns / rows requested. |" + "| Table | A table containing only the columns / rows requested. |" ] }, { "cell_type": "markdown", - "id": "5ee06186", + "id": "c68e21f1", "metadata": {}, "source": [ "**Examples:**\n", @@ -1112,7 +863,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12fc6807", + "id": "e46092cc", "metadata": { "scrolled": true }, @@ -1123,7 +874,7 @@ }, { "cell_type": "markdown", - "id": "97206dd7", + "id": "9e136f10", "metadata": {}, "source": [ "Get all rows of the table where the value in the `z` column is greater than `250.0`" @@ -1132,7 +883,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a6c9add0", + "id": "52d2f0fe", "metadata": {}, "outputs": [], "source": [ @@ -1141,7 +892,7 @@ }, { "cell_type": "markdown", - "id": "a32aca6b", + "id": "52c058a6", "metadata": {}, "source": [ "Replace all null values in the column `v` with the value `-100`." @@ -1150,7 +901,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c1ad3a23", + "id": "960f1933", "metadata": { "scrolled": true }, @@ -1162,7 +913,7 @@ }, { "cell_type": "markdown", - "id": "447b9fd2", + "id": "9b262eca", "metadata": {}, "source": [ "Replace all locations in column `v` where the value is `-100` with a null." @@ -1171,7 +922,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31ea02c9", + "id": "f4c974c7", "metadata": {}, "outputs": [], "source": [ @@ -1181,7 +932,7 @@ }, { "cell_type": "markdown", - "id": "ac4c5e4b", + "id": "ddc94e12", "metadata": {}, "source": [ "Usage of the `loc` functionality under the hood additionally allows users to set columns within a table for single or multiple columns. Data passed for this can be q/Python." @@ -1190,7 +941,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f378ba4a", + "id": "f9d06838", "metadata": {}, "outputs": [], "source": [ @@ -1200,7 +951,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0f2936b9", + "id": "1505d9bb", "metadata": {}, "outputs": [], "source": [ @@ -1209,2120 +960,2688 @@ }, { "cell_type": "markdown", - "id": "a3368987", + "id": "05765a04", "metadata": {}, "source": [ - "### Table.iloc[]\n", + "### Table.sample()\n", "\n", "```\n", - "Table.iloc[:, :]\n", + "Table.sample(n, frac, replace, weights, random_state, axis, ignore_index)\n", "```\n", "\n", - "Purely integer-location based indexing for selection by position.\n", + "Sample random data from the table.\n", "\n", - "`iloc` is primarily integer position based (from 0 to length-1 of the axis), but may also be used with a BooleanVector.\n", + "**Parameters:**\n", "\n", - "Allowed inputs are:\n", - "- An integer, e.g. 5.\n", - "- A list or array of integers, e.g. [4, 3, 0].\n", - "- A slice object with ints, e.g. 1:7.\n", - "- A BooleanVector.\n", - "- A callable function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). This is useful in method chains, when you don’t have a reference to the calling object, but would like to base your selection on some value.\n", - "- A tuple of row and column indexes. The tuple elements consist of one of the above inputs, e.g. (0, 1).\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :---: | :----------------------------------------------------------------- | :-----: |\n", + "| n | int | Number of rows to return. Cannot be used with `frac`. Default is 1 if `frac` is None. | None |\n", + "| frac | float | Fraction of the rows to return. Cannot be used with `n`. | None |\n", + "| replace | bool | Whether or not it should be possible to sample the same row twice. | False |\n", + "| weights | None | Not yet implemented. | None |\n", + "| random_state | None | Not yet implemented. | None |\n", + "| axis | None | Not yet implemented. | None |\n", + "| ignore_index | bool | Not yet implemented. | False |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------- |\n", - "| Table | A table contatining only the columns / rows requested. |" - ] - }, - { - "cell_type": "markdown", - "id": "0ef4d8cf", - "metadata": {}, - "source": [ - "**Examples:**\n", - "\n", - "Get the second row from a table." + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "683ab48b", + "id": "8b4a10be", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab.iloc[1]" + "# The examples in this section will use this example table filled with random data\n", + "kx.q('N: 1000')\n", + "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "tab.head()" ] }, { "cell_type": "markdown", - "id": "e71bebdb", + "id": "970c8ea4", "metadata": {}, "source": [ - "Get the first 5 rows from a table." + "**Examples:**\n", + "\n", + "Sample 10 Rows." ] }, { "cell_type": "code", "execution_count": null, - "id": "a13730fd", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "tab.iloc[:5]" - ] - }, - { - "cell_type": "markdown", - "id": "60f892e0", + "id": "9dde77b1", "metadata": {}, - "source": [ - "Get all rows of the table where the `y` column is equal to `AAPL`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d7afdf65", - "metadata": { - "scrolled": true - }, "outputs": [], "source": [ - "tab.iloc[tab['y'] == 'AAPL']" + "tab.sample(n=10)" ] }, { "cell_type": "markdown", - "id": "8b3b9279", + "id": "1d14afe9", "metadata": {}, "source": [ - "Get all rows of the table where the `y` column is equal to `AAPL`, and only return the `y`, `z` and `w` columns." + "Sample 10% of the rows." ] }, { "cell_type": "code", "execution_count": null, - "id": "a0d9f08d", + "id": "32772c46", "metadata": {}, "outputs": [], "source": [ - "tab.iloc[tab['y'] == 'AAPL', ['y', 'z', 'w']]" + "tab.sample(frac=0.1)" ] }, { "cell_type": "markdown", - "id": "045bc156", + "id": "82a7a79d", "metadata": {}, "source": [ - "Replace all null values in the column `v` with the value `-100`." + "Sample 10% of the rows and allow the same row to be sampled twice." ] }, { "cell_type": "code", "execution_count": null, - "id": "7e21c163", + "id": "4c96839b", "metadata": {}, "outputs": [], "source": [ - "tab.iloc[tab['v'] == kx.q('0N'), 'v'] = -100\n", - "tab" + "tab.sample(frac=0.1, replace=True)" ] }, { "cell_type": "markdown", - "id": "76021266", - "metadata": {}, + "id": "82b501a6", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ - "### Table.pop()\n", + "### Table.select_dtypes()\n", "\n", "```\n", - "Table.pop(item)\n", + "Table.select_dtypes(include=None, exclude=None)\n", "```\n", "\n", - "Remove a column or columns from a table by column name and return the column after it has been removed.\n", + "Return a subset of the DataFrame’s columns based on the column dtypes.\n", "\n", - "**Parameters:**\n", + "Allowed inputs for `include`/`exclude` are:\n", + "- A single dtype or string.\n", + "- A list of dtypes or strings.\n", + "- Inputs given for `include` and `exclude` cannot overlap.\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :--------------------: | :------------------------------------------------------ | :--------: |\n", - "| item | Union[str, list[str]] | The column name or list of names to pop from the table. | _required_ |\n", + "The dtype `kx.CharVector` will return an error. Use `kx.CharAtom` for a column of single chars.\n", + "Both `kx.*Atom` and `kx.*Vector` will be taken to mean a column containing a single item per row of type `*`. `kx.List` will include/exclude any columns containing mixed list data (including string columns).\n", + " \n", + "**Parameters:**\n", "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :--------------: | :----------------------------------------------: | :-----: |\n", + "| include | Union[List, str] | A selection of dtypes or strings to be included. | None |\n", + "| exclude | Union[List, str] | A selection of dtypes or strings to be excluded. | None |\n", + " \n", + "At least one of these parameters must be supplied.\n", + " \n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table contatining only the columns removed from the input table. |" + "| Type | Description |\n", + "| :-------: | :----------------------------------------------------------------------------------------------: |\n", + "| Dataframe | The subset of the frame including the dtypes in `include` and excluding the dtypes in `exclude`. |" ] }, { "cell_type": "markdown", - "id": "e5fdfbd3", + "id": "0570165c", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Remove the `v` column from the table and return it." + "The examples in the section will use the example table." ] }, { "cell_type": "code", "execution_count": null, - "id": "7a960191", - "metadata": { - "scrolled": true - }, + "id": "74ade8d1", + "metadata": {}, "outputs": [], "source": [ - "display(tab.head())\n", - "print('\\n\\nPop the `v` column out of the table')\n", - "display(tab.pop(\"v\"))\n", - "print('\\n\\nUpdated Table')\n", - "display(tab.head())" + "df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')" ] }, { "cell_type": "markdown", - "id": "35062560", + "id": "b889d7c7", "metadata": {}, "source": [ - "Remove the `z` and `w` columns from the table and return them." + "Exclude columns containing symbols" ] }, { "cell_type": "code", "execution_count": null, - "id": "a46189b2", - "metadata": { - "scrolled": false - }, + "id": "e8a792da", + "metadata": {}, "outputs": [], "source": [ - "display(tab.head())\n", - "print('\\n\\nPop the `z` and `w` columns out of the table')\n", - "display(tab.pop([\"z\", \"w\"]).head())\n", - "print('\\n\\nUpdated Table')\n", - "display(tab.head())" + "df.select_dtypes(exclude = kx.SymbolVector)" ] }, { "cell_type": "markdown", - "id": "f71b6917", + "id": "c87f28c4", "metadata": {}, "source": [ - "## Reindexing" + "Include a list of column types" ] }, { "cell_type": "code", "execution_count": null, - "id": "a2b1a198", - "metadata": { - "scrolled": true - }, + "id": "ac2af334", + "metadata": {}, "outputs": [], "source": [ - "# The examples in this section will use this example table filled with random data\n", - "kx.q('N: 1000')\n", - "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", - "tab.head()" + "df.select_dtypes(include = [kx.ShortVector, kx.LongVector])" ] }, { "cell_type": "markdown", - "id": "f5a7ac0e", + "id": "ede98735", "metadata": {}, "source": [ - "### Table.drop()\n", + "### Table.tail()\n", "\n", "```\n", - "Table.drop(item, axis=0)\n", + "Table.tail(n=5)\n", "```\n", "\n", - "Remove either columns or rows from a table and return the resulting Table object.\n", + "Get the last n rows from a table.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :--------------------: | :---------------------------------------------------------- | :--------: |\n", - "| item | Union[str, list[str]] | The column name(s) or row number(s) to drop from the table. | _required_ |\n", - "| axis | int | The column name or list of names to pop from the table. | 0 |\n", + "| Name | Type | Description | Default |\n", + "| :--: | :--: | :---------------------------- | :-----: |\n", + "| n | int | The number of rows to return. | 5 |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) / row(s) removed. |" + "| Type | Description |\n", + "| :---: | :------------------------------- |\n", + "| Table | The last `n` rows of the table. |" ] }, { "cell_type": "markdown", - "id": "008a2e74", + "id": "a7b6bd44", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Drop rows from a table." + "Return the last 5 rows of the table." ] }, { "cell_type": "code", "execution_count": null, - "id": "0f74d3f2", + "id": "d1f5f644", "metadata": {}, "outputs": [], "source": [ - "tab.drop([0, 2, 4, 6, 8, 10]).head()" + "tab.tail()" ] }, { "cell_type": "markdown", - "id": "cb4e82aa", + "id": "181a4d86", "metadata": {}, "source": [ - "Drop columns from a table." + "Return the last 10 rows of the table." ] }, { "cell_type": "code", "execution_count": null, - "id": "57ad6a64", + "id": "c8a0bb7b", "metadata": {}, "outputs": [], "source": [ - "tab.drop('y', axis=1).head()" + "tab.tail(10)" ] }, { "cell_type": "markdown", - "id": "90db87b0", + "id": "29b0e773", "metadata": {}, "source": [ - "### Table.drop_duplicates()\n", + "## Data Joins/Merging" + ] + }, + { + "cell_type": "markdown", + "id": "666a7621", + "metadata": {}, + "source": [ + "### Table.merge()\n", "\n", "```\n", - "Table.drop_duplicates()\n", + "Table.merge(\n", + " right,\n", + " how='inner',\n", + " on=None,\n", + " left_on=None,\n", + " right_on=None,\n", + " left_index=False,\n", + " right_index=False,\n", + " sort=False,\n", + " suffixes=('_x', '_y'),\n", + " copy=True,\n", + " validate=None,\n", + " q_join=False\n", + ")\n", "```\n", "\n", - "Remove either columns or rows from a table and return the resulting Table object.\n", + "Merge Table or KeyedTable objects with a database-style join.\n", + "\n", + "The join is done on columns or keys. If joining columns on columns, the Table key will be ignored. Otherwise if joining keys on keys or keys on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-----: |\n", + "| right | Union[Table/KeyedTable] | The object to merge with. | _required_ |\n", + "| how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |\n", + "| on | str | The column name to join on. | None |\n", + "| left_on | str | The column name in the left table to join on. | None |\n", + "| right_on | str | The column name in the right table to join on. | None |\n", + "| left_index | bool | Use the index of the left Table. | False |\n", + "| right_index | bool | Use the index of the right Table. | False |\n", + "| sort | bool | Sort the join keys of the resulting table. | False |\n", + "| suffixes | Tuple(str, str) | The number of rows to return. | ('\\_x', '\\_y') |\n", + "| copy | bool | If False avoid copies and modify the input table. | None |\n", + "| validate | str | If specified checks if merge matches specified type.
- “one_to_one” or “1:1”: check if merge keys are unique in both left and right datasets.
- “one_to_many” or “1:m”: check if merge keys are unique in left dataset.
- “many_to_one” or “m:1”: check if merge keys are unique in right dataset.
- “many_to_many” or “m:m”: allowed, but does not result in checks.
| None |\n", + "| q_join | bool | If True perform native q joins instead of the pandas SQL like joins. More documentation around these joins can be found [here.](https://code.kx.com/q/basics/joins/) | False |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :--------------------------------------- |\n", - "| Table | A table with all duplicate rows removed. |" + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------ |\n", + "| Table / KeyedTable | The resulting table-like object after the join has been preformed. |" ] }, { "cell_type": "markdown", - "id": "3af33f03", + "id": "61d1567a", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Create a table with duplicates for the example" + "Merge tab1 and tab2 on the lkey and rkey columns. The value columns have the default suffixes, \\_x and \\_y, appended." ] }, { "cell_type": "code", "execution_count": null, - "id": "af182307", - "metadata": {}, + "id": "8a9acd51", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')\n", - "tab2" + "tab1 = kx.Table(data={'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})\n", + "tab2 = kx.Table(data={'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})\n", + "tab1.merge(tab2, left_on='lkey', right_on='rkey')" ] }, { "cell_type": "markdown", - "id": "48143d51", + "id": "e004bf64", "metadata": {}, "source": [ - "Drop all duplicate rows from the table." + "Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, \\_x and \\_y, appended." ] }, { "cell_type": "code", "execution_count": null, - "id": "eeff16e7", + "id": "07df7437", "metadata": {}, "outputs": [], "source": [ - "tab2.drop_duplicates()" - ] - }, - { - "cell_type": "markdown", - "id": "6d71c8c0", - "metadata": {}, - "source": [ - "### Table.rename()\n", - "\n", - "```\n", - "Table.rename(columns)\n", - "```\n", - "\n", - "Rename columns in a table and return the resulting Table object.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", - "| item | dict[str, str] | A dictonary of column name to new column name to use when renaming. | _required_ |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) renamed. |" + "tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)" ] }, { "cell_type": "markdown", - "id": "73260da1", + "id": "7350d9db", "metadata": {}, "source": [ - "**Examples:**\n", - "\n", - "The inital table we will be renaming columns on." + "Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns." ] }, { "cell_type": "code", "execution_count": null, - "id": "3cc68fa6", + "id": "23685dcb", "metadata": {}, "outputs": [], "source": [ - "tab.head()" + "tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))" ] }, { "cell_type": "markdown", - "id": "eef94948", + "id": "3b2c65d4", "metadata": {}, "source": [ - "Rename column `y` to `symbol` and `z` to `price`." + "Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns." ] }, { "cell_type": "code", "execution_count": null, - "id": "d5e76248", - "metadata": {}, + "id": "b5d16312", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "tab.rename(columns={'y': 'symbol', 'z': 'price'}).head()" + "try:\n", + " tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=(False, False))\n", + "except BaseException as e:\n", + " print(f'Caught Error: {e}')" ] }, { - "cell_type": "markdown", - "id": "05124590", + "cell_type": "code", + "execution_count": null, + "id": "793df3f3", "metadata": {}, + "outputs": [], "source": [ - "### Table.sample()\n", - "\n", - "```\n", - "Table.sample(n, frac, replace, weights, random_state, axis, ignore_index)\n", - "```\n", - "\n", - "Sample random data from the table.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :---: | :----------------------------------------------------------------- | :-----: |\n", - "| n | int | Number of rows to return. Cannot be used with `frac`. Default is 1 if `frac` is None. | None |\n", - "| frac | float | Fraction of the rows to return. Cannot be used with `n`. | None |\n", - "| replace | bool | Whether or not it should be possible to sample the same row twice. | False |\n", - "| weights | None | Not yet implemented. | None |\n", - "| random_state | None | Not yet implemented. | None |\n", - "| axis | None | Not yet implemented. | None |\n", - "| ignore_index | bool | Not yet implemented. | False |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) renamed. |" + "tab1 = kx.Table(data={'a': ['foo', 'bar'], 'b': [1, 2]})\n", + "tab2 = kx.Table(data={'a': ['foo', 'baz'], 'c': [3, 4]})" ] }, { "cell_type": "markdown", - "id": "e8f78917", + "id": "d58a52a3", "metadata": {}, "source": [ - "**Examples:**\n", - "\n", - "Sample 10 Rows." + "Merge tab1 and tab2 on the `a` column using an inner join." ] }, { "cell_type": "code", "execution_count": null, - "id": "d88ab348", - "metadata": {}, + "id": "1180e6f4", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "tab.sample(n=10)" + "tab1.merge(tab2, how='inner', on='a')" ] }, { "cell_type": "markdown", - "id": "78e03554", + "id": "b14e36da", "metadata": {}, "source": [ - "Sample 10% of the rows." + "Merge tab1 and tab2 on the `a` column using a left join." ] }, { "cell_type": "code", "execution_count": null, - "id": "8585d62e", + "id": "4b0098da", "metadata": {}, "outputs": [], "source": [ - "tab.sample(frac=0.1)" + "tab1.merge(tab2, how='left', on='a')" ] }, { "cell_type": "markdown", - "id": "c77712d3", + "id": "00d0ad6a", "metadata": {}, "source": [ - "Sample 10% of the rows and allow the same row to be sampled twice." + "Merge tab1 and tab2 using a cross join." ] }, { "cell_type": "code", "execution_count": null, - "id": "b138f770", - "metadata": {}, + "id": "b55be868", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "tab.sample(frac=0.1, replace=True)" + "tab1 = kx.Table(data={'left': ['foo', 'bar']})\n", + "tab2 = kx.Table(data={'right': [7, 8]})\n", + "tab1.merge(tab2, how='cross')" ] }, { "cell_type": "markdown", - "id": "6f6f5672", + "id": "7583c015", "metadata": {}, "source": [ - "### Table.select_dtypes()\n", + "### Table.merge_asof()\n", "\n", "```\n", - "Table.select_dtypes(include=None, exclude=None)\n", + "Table.merge_asof(\n", + " right,\n", + " on=None,\n", + " left_on=None,\n", + " right_on=None,\n", + " left_index=False,\n", + " right_index=False,\n", + " by=None,\n", + " left_by=None,\n", + " right_by=None,\n", + " suffixes=('_x', '_y'),\n", + " tolerance=None,\n", + " allow_exact_matches=True,\n", + " direction='backward'\n", + "\n", + ")\n", "```\n", "\n", - "Return a subset of the DataFrame’s columns based on the column dtypes.\n", - " \n", + "Merge Table or KeyedTable objects with a database-style join.\n", + "\n", + "The join is done on columns or keys. If joining columns on columns, the Table key will be ignored. Otherwise if joining keys on keys or keys on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.\n", + "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :--------------: | :----------------------------------------------: | :-----: |\n", - "| include | Union[List, str] | A selection of dtypes or strings to be included. | None |\n", - "| exclude | Union[List, str] | A selection of dtypes or strings to be excluded. | None |\n", - " \n", - "At least one of these parameters must be supplied.\n", - " \n", + "| Name | Type | Description | Default |\n", + "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-----: |\n", + "| right | Union[Table/KeyedTable] | The object to merge with. | _required_ |\n", + "| how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |\n", + "| on | str | The column name to join on. | None |\n", + "| left_on | str | The column name in the left table to join on. | None |\n", + "| right_on | str | The column name in the right table to join on. | None |\n", + "| left_index | bool | Use the index of the left Table. | False |\n", + "| right_index | bool | Use the index of the right Table. | False |\n", + "| by | str | Not yet implemented. | None |\n", + "| left_by | str | Field names to match on in the left table. | None |\n", + "| right_by | str | Field names to match on in the right table. | None |\n", + "| suffixes | Tuple(str, str) | The number of rows to return. | ('\\_x', '\\_y') |\n", + "| tolerance | Any | Not yet implemented. | None |\n", + "| allow_exact_matches | bool | Not yet implemented. | True |\n", + "| direction | str | Not yet implemented. | 'backward' |\n", + "\n", + "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :-------: | :----------------------------------------------------------------------------------------------: |\n", - "| Dataframe | The subset of the frame including the dtypes in `include` and excluding the dtypes in `exclude`. |" + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------ |\n", + "| Table / KeyedTable | The resulting table like object after the join has been preformed. |" ] }, { "cell_type": "markdown", - "id": "6a703c57", + "id": "908499df", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "The examples in the section will use the example table." + "Perform a simple asof join on two tables." ] }, { "cell_type": "code", "execution_count": null, - "id": "5e9734f7", + "id": "e660e496", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:`a`b`c; c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)')" + "left = kx.Table(data={\"a\": [1, 5, 10], \"left_val\": [\"a\", \"b\", \"c\"]})\n", + "right = kx.Table(data={\"a\": [1, 2, 3, 6, 7], \"right_val\": [1, 2, 3, 6, 7]})\n", + "left" ] }, { - "cell_type": "markdown", - "id": "42d9ffa6", + "cell_type": "code", + "execution_count": null, + "id": "e456e4ad", "metadata": {}, + "outputs": [], "source": [ - "Exclude columns contatining symbols" + "right" ] }, { "cell_type": "code", "execution_count": null, - "id": "3d934cf0", + "id": "d4616f6d", "metadata": {}, "outputs": [], "source": [ - "df.select_dtypes(exclude = kx.SymbolVector)" + "left.merge_asof(right)" ] }, { "cell_type": "markdown", - "id": "e4302f7d", + "id": "496d5a72", "metadata": {}, "source": [ - "Include a list of column types" + "Perform a asof join on two tables but first merge them on the by column." ] }, { "cell_type": "code", "execution_count": null, - "id": "f698f5f0", + "id": "3f0fcc13", "metadata": {}, "outputs": [], "source": [ - "df.select_dtypes(include = [kx.ShortVector, kx.LongVector])" - ] - }, - { - "cell_type": "markdown", - "id": "5590d1ca", - "metadata": {}, - "source": [ - "### Table.astype()\n", - "\n", - "```\n", - "Table.astype(dtype, copy=True, errors='raise')\n", - "```\n", - "\n", - "Cast a column/columns of the Dataframes object to a specified `dtype`.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :-: | :-: | :-: | :-: |\n", - "| dtype | data type, or dict of column name -> data type | Use a PyKx wrapper data type or Python type to cast all columns to the same type. Alternatively, use {col: dtype, …}, where col is a column label and dtype is PyKx wrapper data type to cast one or more of the DataFrame’s columns to column-specific types. | |\n", - "| copy | Boolean | Default of True, False not implemented | True |\n", - "| errors | {‘raise’, ‘ignore’} | If passed anything other than 'raise', it will return the dataframe | 'raise' |\n", - "\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :-: | :-: |\n", - "| Dataframe | The dataframe with columns casted according to passed dtypes |" + "trades = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n", + " ],\n", + " \"ticker\": [\n", + " \"GOOG\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"MSFT\",\n", + " \"GOOG\",\n", + " \"AAPL\",\n", + " \"GOOG\",\n", + " \"MSFT\"\n", + " ],\n", + " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n", + " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n", + "})\n", + "quotes = kx.Table(data={\n", + " \"time\": [\n", + " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", + " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n", + " ],\n", + " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n", + " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n", + " \"quantity\": [75, 155, 100, 100, 100]\n", + "})\n", + "trades" ] }, { - "cell_type": "markdown", - "id": "f9ca98d2", + "cell_type": "code", + "execution_count": null, + "id": "b7259913", "metadata": {}, + "outputs": [], "source": [ - "**Examples:**\n", - "\n", - "The examples in the section will use the example table." + "quotes" ] }, { "cell_type": "code", "execution_count": null, - "id": "831836c8", + "id": "32e41b85", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')" + "trades.merge_asof(quotes, on=\"time\")" ] }, { "cell_type": "markdown", - "id": "0bf0d78f", + "id": "04e022a9", "metadata": {}, "source": [ - "Cast all columns to dtype LongVector" + "## Analytic functionality" ] }, { "cell_type": "code", "execution_count": null, - "id": "6833400a", + "id": "c167fdc9", "metadata": {}, "outputs": [], "source": [ - "df.astype(kx.LongVector)" + "# All the examples in this section will use this example table.\n", + "kx.q('N: 100')\n", + "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 250f - N?500f; traded: 100 - N?200; hold: N?0b)')\n", + "tab" ] }, { "cell_type": "markdown", - "id": "7a2bfcd3", + "id": "be074715", "metadata": {}, "source": [ - "Casting as specified in the dcitionary supplied with given dtype per column" + "### Table.abs()\n", + "\n", + "```\n", + "Table.abs(numeric_only=False)\n", + "```\n", + "\n", + "Take the absolute value of each element in the table. This will raise an error if there are columns that contain data that have no absolute value.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| numeric_only | bool | Only use columns of the table that can be converted to an absolute value. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Table / KeyedTable | The resulting table like object with only positive numerical values. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "872db9aa", - "metadata": {}, + "id": "52f27400", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "df.astype({'c1':kx.LongVector, 'c2':'kx.ShortVector'})" + "tab.abs(numeric_only=True)" ] }, { "cell_type": "markdown", - "id": "ef3b4225", + "id": "85d42035", "metadata": {}, "source": [ - "The next example will use this table" + "### Table.count()\n", + "\n", + "```\n", + "Table.count(axis=0, numeric_only=False)\n", + "```\n", + "\n", + "Returns the count of non null values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to count elements across 1 is columns, 0 is rows. | 0 |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `count` on that column / row. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "6a20abdd", + "id": "a53125cb", "metadata": {}, "outputs": [], "source": [ - "df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:(\"abc\";\"def\";\"ghi\");c5:\"abc\";c6:(1 2 3;4 5 6;7 8 9))')" + "tab.count()" ] }, { "cell_type": "markdown", - "id": "908fa4ea", + "id": "77a5a83f", "metadata": {}, "source": [ - "Casting char and string columns to symbol columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ea7fe9e", + "### Table.max()\n", + "\n", + "```\n", + "Table.max(axis=0, skipna=True, numeric_only=False)\n", + "```\n", + "\n", + "Returns the maximum value across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the maximum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `max` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aea50f5", "metadata": {}, "outputs": [], "source": [ - "df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector})" + "tab.max()" ] }, { "cell_type": "markdown", - "id": "0f8813a0", + "id": "71dab7ac", "metadata": {}, "source": [ - "### Table.add_prefix()\n", + "### Table.min()\n", "\n", "```\n", - "Table.add_prefix(columns)\n", + "Table.min(axis=0, skipna=True, numeric_only=False)\n", "```\n", "\n", - "Rename columns adding a prefix in a table and return the resulting Table object.\n", + "Returns the minimum value across the given axis.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", - "| prefix | str | The string that will be concatenated with the name of the columns | _required_ |\n", - "| axis | int | Axis to add prefix on. | 0 |\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the minimum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) renamed adding a prefix. |" - ] - }, - { - "cell_type": "markdown", - "id": "9186ed86", - "metadata": {}, - "source": [ - "**Examples:**\n", - "\n", - "The initial table to which a prefix will be added to its columns" + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `min` on that column / row. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "5f20131b", + "id": "9f13e8a7", "metadata": {}, "outputs": [], "source": [ - "tab.head()" + "tab.min()" ] }, { "cell_type": "markdown", - "id": "73c2b08f", + "id": "1bf3da2a", "metadata": {}, "source": [ - "Add \"col_\" to table columns:" + "### Table.sum()\n", + "\n", + "```\n", + "Table.sum(axis=0, skipna=True, numeric_only=False, min_count=0)\n", + "```\n", + "\n", + "Returns the sum of all values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "| min_count | int | If not set to 0 if there are less then `min_count` values across the axis a null value will be returned | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `sum` on that column / row. |" ] }, { "cell_type": "code", "execution_count": null, - "id": "926c8295", + "id": "09975a7a", "metadata": {}, "outputs": [], "source": [ - "tab.add_prefix(prefix=\"col_\").head()" + "tab.sum()" ] }, { "cell_type": "markdown", - "id": "0a4abc8c", + "id": "97920009", "metadata": {}, "source": [ - "### Table.add_suffix()\n", + "### Table.mean()\n", "\n", "```\n", - "Table.add_suffix(columns)\n", + "Table.mean(axis=0, numeric_only=False)\n", "```\n", "\n", - "Rename columns adding a suffix in a table and return the resulting Table object.\n", + "Get the mean of values across the requested axis.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", - "| suffix | str | The string that will be concatenated with the name of the columns | _required_ |\n", - "| axis | int | Axis to add suffix on. | 0 |\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :--------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate mean across 0 is columns, 1 is rows. | 0 |\n", + "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) renamed adding a suffix. |" + "| Type | Description |\n", + "| :--------: | :--------------------------------------------------------------------------------------------- |\n", + "| Dictionary | The mean across each row / column with the key corresponding to the row number or column name. |" ] }, { "cell_type": "markdown", - "id": "c22262b8", + "id": "dee2e8cc", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "The initial table to which a suffix will be added to its columns" + "Calculate the mean across the columns of a table" ] }, { "cell_type": "code", "execution_count": null, - "id": "55c1f504", + "id": "9d4c8a22", "metadata": {}, "outputs": [], "source": [ - "tab.head()" + "tab = kx.Table(data=\n", + " {\n", + " 'a': [1, 2, 2, 4],\n", + " 'b': [1, 2, 6, 7],\n", + " 'c': [7, 8, 9, 10],\n", + " 'd': [7, 11, 14, 14]\n", + " }\n", + ")\n", + "tab" ] }, { - "cell_type": "markdown", - "id": "b4687851", + "cell_type": "code", + "execution_count": null, + "id": "d02c4cfd", "metadata": {}, + "outputs": [], "source": [ - "Add \"_col\" to table columns:" + "tab.mean()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "e00d0f5c", + "cell_type": "markdown", + "id": "c6feb4ea", "metadata": {}, - "outputs": [], "source": [ - "tab.add_suffix(suffix=\"_col\").head()" + "Calculate the mean across the rows of a table" ] }, { - "cell_type": "markdown", - "id": "718584f8", + "cell_type": "code", + "execution_count": null, + "id": "506a6867", "metadata": {}, + "outputs": [], "source": [ - "## Merging" + "tab.mean(axis=1)" ] }, { "cell_type": "markdown", - "id": "ef401426", + "id": "cd714c1b", "metadata": {}, "source": [ - "### Table.merge()\n", + "### Table.median()\n", "\n", "```\n", - "Table.merge(\n", - " right,\n", - " how='inner',\n", - " on=None,\n", - " left_on=None,\n", - " right_on=None,\n", - " left_index=False,\n", - " right_index=False,\n", - " sort=False,\n", - " suffixes=('_x', '_y'),\n", - " copy=True,\n", - " validate=None,\n", - " q_join=False\n", - ")\n", + "Table.median(axis=0, numeric_only=False)\n", "```\n", "\n", - "Merge Table or KeyedTable objects with a database-style join.\n", - "\n", - "The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.\n", + "Get the median of values across the requested axis.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-----: |\n", - "| right | Union[Table/KeyedTable] | The object to merge with. | _required_ |\n", - "| how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |\n", - "| on | str | The column name to join on. | None |\n", - "| left_on | str | The column name in the left table to join on. | None |\n", - "| right_on | str | The column name in the right table to join on. | None |\n", - "| left_index | bool | Use the index of the left Table. | False |\n", - "| right_index | bool | Use the index of the right Table. | False |\n", - "| sort | bool | Sort the join keys of the resulting table. | False |\n", - "| suffixes | Tuple(str, str) | The number of rows to return. | ('\\_x', '\\_y') |\n", - "| copy | bool | If False avoid copies and modify the input table. | None |\n", - "| validate | str | If specified checks if merge matches specified type.
- “one_to_one” or “1:1”: check if merge keys are unique in both left and right datasets.
- “one_to_many” or “1:m”: check if merge keys are unique in left dataset.
- “many_to_one” or “m:1”: check if merge keys are unique in right dataset.
- “many_to_many” or “m:m”: allowed, but does not result in checks.
| None |\n", - "| q_join | bool | If True perform native q joins instead of the pandas SQL like joins. More documentation around these joins can be found [here.](https://code.kx.com/q/basics/joins/) | False |\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :----------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate median across 0 is columns, 1 is rows. | 0 |\n", + "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------ |\n", - "| Table / KeyedTable | The resulting table like object after the join has been preformed. |" + "| Type | Description |\n", + "| :--------: | :----------------------------------------------------------------------------------------------- |\n", + "| Dictionary | The median across each row / column with the key corresponding to the row number or column name. |" ] }, { "cell_type": "markdown", - "id": "9e613e3c", + "id": "00d44518", "metadata": {}, "source": [ "**Examples:**\n", "\n", - "Merge tab1 and tab2 on the lkey and rkey columns. The value columns have the default suffixes, \\_x and \\_y, appended." + "Calculate the median across the columns of a table" ] }, { "cell_type": "code", "execution_count": null, - "id": "a3b0ec9f", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "tab1 = kx.Table(data={'lkey': ['foo', 'bar', 'baz', 'foo'], 'value': [1, 2, 3, 5]})\n", - "tab2 = kx.Table(data={'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8]})\n", - "tab1.merge(tab2, left_on='lkey', right_on='rkey')" - ] - }, - { - "cell_type": "markdown", - "id": "6e32596c", + "id": "df20ecfc", "metadata": {}, + "outputs": [], "source": [ - "Merge tab1 and tab2 on the lkey and rkey columns using a native q inner join. The value columns have the default suffixes, \\_x and \\_y, appended." + "tab = kx.Table(data=\n", + " {\n", + " 'a': [1, 2, 2, 4],\n", + " 'b': [1, 2, 6, 7],\n", + " 'c': [7, 8, 9, 10],\n", + " 'd': [7, 11, 14, 14]\n", + " }\n", + ")\n", + "tab" ] }, { "cell_type": "code", "execution_count": null, - "id": "8ea253c9", + "id": "6e9dc5be", "metadata": {}, "outputs": [], "source": [ - "tab1.merge(tab2, left_on='lkey', right_on='rkey', q_join=True)" + "tab.median()" ] }, { "cell_type": "markdown", - "id": "2d9240b3", + "id": "585d9d01", "metadata": {}, "source": [ - "Merge tab1 and tab2 with specified left and right suffixes appended to any overlapping columns." + "Calculate the median across the rows of a table" ] }, { "cell_type": "code", "execution_count": null, - "id": "64425a1d", + "id": "6ccf50df", "metadata": {}, "outputs": [], "source": [ - "tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right'))" + "tab.median(axis=1)" ] }, { "cell_type": "markdown", - "id": "e749c7e0", + "id": "aeec2045", "metadata": {}, "source": [ - "Merge tab1 and tab2 but raise an exception if the Tables have any overlapping columns." + "### Table.mode()\n", + "\n", + "```\n", + "Table.mode(axis=0, numeric_only=False, dropna=True)\n", + "```\n", + "\n", + "Get the mode of values across the requested axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------ | :-----: |\n", + "| axis | int | The axis to calculate mode across 0 is columns, 1 is rows. | 0 |\n", + "| numeric_only | bool | Include only columns / rows with numeric data. | False |\n", + "| dropna | bool | Remove null values from the data before calculating the mode. | True |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :--------: | :------------------------------------------------------------------------------------------------ |\n", + "| Table | The mode across each row / column with the column corresponding to the row number or column name. |" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "a938230d", - "metadata": { - "scrolled": true - }, - "outputs": [], + "cell_type": "markdown", + "id": "c52ffed8", + "metadata": {}, "source": [ - "try:\n", - " tab1.merge(tab2, left_on='lkey', right_on='rkey', suffixes=(False, False))\n", - "except BaseException as e:\n", - " print(f'Caught Error: {e}')" + "**Examples:**\n", + "\n", + "Calculate the mode across the columns of a table" ] }, { "cell_type": "code", "execution_count": null, - "id": "b1d99a31", + "id": "786fe3b6", "metadata": {}, "outputs": [], "source": [ - "tab1 = kx.Table(data={'a': ['foo', 'bar'], 'b': [1, 2]})\n", - "tab2 = kx.Table(data={'a': ['foo', 'baz'], 'c': [3, 4]})" - ] - }, - { - "cell_type": "markdown", - "id": "385c0465", - "metadata": {}, - "source": [ - "Merge tab1 and tab2 on the `a` column using an inner join." + "tab = kx.Table(data=\n", + " {\n", + " 'a': [1, 2, 2, 4],\n", + " 'b': [1, 2, 6, 7],\n", + " 'c': [7, 8, 9, 10],\n", + " 'd': [7, 11, 14, 14]\n", + " }\n", + ")\n", + "tab" ] }, { "cell_type": "code", "execution_count": null, - "id": "7431a148", + "id": "58909ffa", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab1.merge(tab2, how='inner', on='a')" + "tab.mode()" + ] + }, + { + "cell_type": "markdown", + "id": "7d437b70", + "metadata": {}, + "source": [ + "Calculate the median across the rows of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfa17533", + "metadata": {}, + "outputs": [], + "source": [ + "tab.mode(axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "4c270df3", + "metadata": {}, + "source": [ + "Calculate the mode across columns and keep null values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80afc141", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "tab = kx.Table(data=\n", + " {\n", + " 'x': [0, 1, 2, 3, 4, 5, 6, 7, np.NaN, np.NaN],\n", + " 'y': [10, 11, 12, 13, 14, 15, 16, 17, 18, np.NaN],\n", + " 'z': ['a', 'b', 'c', 'd', 'd', 'e', 'e', 'f', 'g', 'h']\n", + " }\n", + ")\n", + "tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e3300f5", + "metadata": {}, + "outputs": [], + "source": [ + "tab.mode(dropna=False)" + ] + }, + { + "cell_type": "markdown", + "id": "4117c73f", + "metadata": {}, + "source": [ + "### Table.prod()\n", + "\n", + "```\n", + "Table.prod(axis=0, skipna=True, numeric_only=False, min_count=0)\n", + "```\n", + "\n", + "Returns the product of all values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "| min_count | int | If not set to 0 if there are less then `min_count` values across the axis a null value will be returned | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `prd` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6c64b75", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# This example will use a smaller version of the above table\n", + "# as the result of calculating the product quickly goes over the integer limits.\n", + "kx.q('N: 10')\n", + "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: 10 - N?20; hold: N?0b)')\n", + "tab[tab['traded'] == 0, 'traded'] = 1\n", + "tab[tab['price'] == 0, 'price'] = 1.0\n", + "tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "540297e2", + "metadata": {}, + "outputs": [], + "source": [ + "tab.prod(numeric_only=True)" + ] + }, + { + "cell_type": "markdown", + "id": "c777923e", + "metadata": {}, + "source": [ + "### Table.skew()\n", + "\n", + "```\n", + "Table.skew(axis=0, skipna=True, numeric_only=False)\n", + "```\n", + "\n", + "Returns the skewness of all values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the skewness across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `skew` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc109f0f", + "metadata": {}, + "outputs": [], + "source": [ + "tab.skew(numeric_only=True)" + ] + }, + { + "cell_type": "markdown", + "id": "22940e03", + "metadata": {}, + "source": [ + "### Table.std()\n", + "\n", + "```\n", + "Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n", + "```\n", + "\n", + "Return sample standard deviation over requested axis. Normalized by N-1 by default. This can be changed using the ddof argument.\n", + "\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | not yet implemented | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "| ddof | int | Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. | 1 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Table | The std across each row / column with the key corresponding to the row number or column name. |" + ] + }, + { + "cell_type": "markdown", + "id": "292f9c39", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Calculate the std across the columns of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2df159e", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(data=\n", + " {\n", + " 'a': [1, 2, 2, 4],\n", + " 'b': [1, 2, 6, 7],\n", + " 'c': [7, 8, 9, 10],\n", + " 'd': [7, 11, 14, 14]\n", + " }\n", + ")\n", + "tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63d45751", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std()" + ] + }, + { + "cell_type": "markdown", + "id": "2e9705de", + "metadata": {}, + "source": [ + "Calculate the std across the rows of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8edf71a4", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std(axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "1ef61cd5", + "metadata": {}, + "source": [ + "Calculate std accross columns with ddof=0:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f66fe87", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std(ddof=0)" + ] + }, + { + "cell_type": "markdown", + "id": "c80d90ae", + "metadata": {}, + "source": [ + "## Group By" + ] + }, + { + "cell_type": "markdown", + "id": "2e1d05d5", + "metadata": {}, + "source": [ + "### Table.groupby()\n", + "\n", + "```\n", + "Table.groupby(\n", + " by=None,\n", + " axis=0,\n", + " level=None,\n", + " as_index=True,\n", + " sort=True,\n", + " group_keys=True,\n", + " observed=False,\n", + " dropna=True\n", + ")\n", + "```\n", + "\n", + "Group data based on like values within columns to easily apply operations on groups.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :--------------: | :--: | :-------------------------------------------------------------------------- | :------: |\n", + "| by | Union[Symbol/SymbolVector/int/list] | The column name(s) or column index(es) to group the data on. | None |\n", + "| axis | int | Not Yet Implemented. | 0 |\n", + "| level | Union[Symbol/SymbolVector/int/list] | The column name(s) or column index(es) to group the data on. | None | \n", + "| as_index | bool | Return the table with groups as the key column. | True |\n", + "| sort | bool | Sort the resulting table based off the key. | True |\n", + "| group_keys | bool | Not Yet Implemented. | True | \n", + "| observed | bool | Not Yet Implemented. | False |\n", + "| dropna | bool | Drop groups where the group is null. | True | \n", + "\n", + "Either `by` or `level` can be used to specify the columns to group on, using both will raise an error.\n", + "\n", + "Using and integer or list of integers is only possible when calling `groupby` on a `KeyedTable` object.\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------: | :---------------------------------------------- |\n", + "| GroupbyTable | The resulting table after the grouping is done. |\n", + "\n", + "**Examples:**\n", + "\n", + "Example Table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0454f7d", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "tab = kx.Table(data={\n", + " 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],\n", + " 'Max Speed': [380., 370., 24., 26.],\n", + " 'Max Altitude': [570., 555., 275., 300.]\n", + "})\n", + "\n", + "tab" ] }, { "cell_type": "markdown", - "id": "230a7666", + "id": "55b6b4e0", "metadata": {}, "source": [ - "Merge tab1 and tab2 on the `a` column using a left join." + "Group on the `Animal` column and calculate the mean of the resulting `Max Speed` and `Max Altitude` columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30c55810", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "tab.groupby(kx.SymbolVector(['Animal'])).mean()" + ] + }, + { + "cell_type": "markdown", + "id": "0e62a99f", + "metadata": {}, + "source": [ + "Example table with multiple columns to group on." ] }, { "cell_type": "code", "execution_count": null, - "id": "04b96b08", + "id": "0ceddbbf", "metadata": {}, "outputs": [], "source": [ - "tab1.merge(tab2, how='left', on='a')" + "tab = kx.q('2!', kx.Table(\n", + " data={\n", + " 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],\n", + " 'Type': ['Captive', 'Wild', 'Captive', 'Wild', 'Wild'],\n", + " 'Max Speed': [390., 350., 30., 20., 25.]\n", + " }\n", + "))\n", + "tab" ] }, { "cell_type": "markdown", - "id": "d991656c", + "id": "7e43e1bc", "metadata": {}, "source": [ - "Merge tab1 and tab2 using a cross join." + "Group on multiple columns using thier indexes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c281e305", + "metadata": {}, + "outputs": [], + "source": [ + "tab.groupby(level=[0, 1]).mean()" + ] + }, + { + "cell_type": "markdown", + "id": "e5d04220", + "metadata": {}, + "source": [ + "Example table with Nulls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae67684c", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(\n", + " [\n", + " [\"a\", 12, 12],\n", + " [kx.q('`'), 12.3, 33.],\n", + " [\"b\", 12.3, 123],\n", + " [\"a\", 1, 1]\n", + " ],\n", + " columns=[\"a\", \"b\", \"c\"]\n", + ")\n", + "tab" + ] + }, + { + "cell_type": "markdown", + "id": "512021d7", + "metadata": {}, + "source": [ + "Group on column `a` and keep null groups." ] }, { "cell_type": "code", "execution_count": null, - "id": "09886503", + "id": "a09a6d3a", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "tab1 = kx.Table(data={'left': ['foo', 'bar']})\n", - "tab2 = kx.Table(data={'right': [7, 8]})\n", - "tab1.merge(tab2, how='cross')" + "tab.groupby('a', dropna=False).sum()" ] }, { "cell_type": "markdown", - "id": "b2f4aff1", + "id": "4ca2006b", "metadata": {}, "source": [ - "### Table.merge_asof()\n", + "Group on column `a` keeping null groups and not using the groups as an index column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caa2576e", + "metadata": {}, + "outputs": [], + "source": [ + "tab.groupby('a', dropna=False, as_index=False).sum()" + ] + }, + { + "cell_type": "markdown", + "id": "660b3c92", + "metadata": {}, + "source": [ + "## Apply\n", + "\n", + "### Table.apply()\n", "\n", "```\n", - "Table.merge_asof(\n", - " right,\n", - " on=None,\n", - " left_on=None,\n", - " right_on=None,\n", - " left_index=False,\n", - " right_index=False,\n", - " by=None,\n", - " left_by=None,\n", - " right_by=None,\n", - " suffixes=('_x', '_y'),\n", - " tolerance=None,\n", - " allow_exact_matches=True,\n", - " direction='backward'\n", + "Table.apply(\n", + " func,\n", + " *args,\n", + " axis=0,\n", + " raw=None,\n", + " result_type=None,\n", + " **kwargs\n", + ")\n", + "```\n", + "\n", + "Apply a function along an axis of the DataFrame.\n", + "\n", + "Objects passed to a function are passed as kx list objects.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n", + "| func | function | Function to apply to each column or row. | |\n", + "| `*args` | any | Positional arguments to pass to `func` in addition to the kx list. | |\n", + "| axis | int | The axis along which the function is applied, `0` applies function to each column, `1` applied function to each row. | 0 | \n", + "| raw | bool | Not yet implemented. | None |\n", + "| result_type | str | Not yet implemented. | None |\n", + "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n", + "\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :-----------------------: | :---------------------------------------------- |\n", + "| List, Dictionary or Table | Result of applying `func` along the giveen axis of the `kx.Table`. |\n", + "\n", + "**Examples:**\n", + "\n", + "Example Table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d746cddb", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table([[4, 9]] * 3, columns=['A', 'B'])\n", + "\n", + "tab" + ] + }, + { + "cell_type": "markdown", + "id": "54c09d0c", + "metadata": {}, + "source": [ + "Apply square root on each item within a column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8bbcac7", + "metadata": {}, + "outputs": [], + "source": [ + "tab.apply(kx.q.sqrt)" + ] + }, + { + "cell_type": "markdown", + "id": "09a61483", + "metadata": {}, + "source": [ + "Apply a reducing function sum on either axis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84b92b9b", + "metadata": {}, + "outputs": [], + "source": [ + "tab.apply(kx.q.sum)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "169d8ed3", + "metadata": {}, + "outputs": [], + "source": [ + "tab.apply(lambda x: sum(x), axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "ed4d720c", + "metadata": {}, + "source": [ + "## Aggregate\n", + "\n", + "### Table.agg()\n", "\n", + "```\n", + "Table.agg(\n", + " func,\n", + " axis=0,\n", + " *args,\n", + " **kwargs\n", ")\n", "```\n", "\n", - "Merge Table or KeyedTable objects with a database-style join.\n", + "Aggregate data using one or more operations over a specified axis\n", "\n", - "The join is done on columns or indexes. If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on. When performing a cross merge, no column specifications to merge on are allowed.\n", + "Objects passed to a function are passed as kx vector/list objects.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :---------: | :--: | :-------------------------------------------------------------------------------- | :-----: |\n", - "| right | Union[Table/KeyedTable] | The object to merge with. | _required_ |\n", - "| how | str | The type of join to be used. One of {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}. | ‘inner’ |\n", - "| on | str | The column name to join on. | None |\n", - "| left_on | str | The column name in the left table to join on. | None |\n", - "| right_on | str | The column name in the right table to join on. | None |\n", - "| left_index | bool | Use the index of the left Table. | False |\n", - "| right_index | bool | Use the index of the right Table. | False |\n", - "| by | str | Not yet implemented. | None |\n", - "| left_by | str | Field names to match on in the left table. | None |\n", - "| right_by | str | Field names to match on in the right table. | None |\n", - "| suffixes | Tuple(str, str) | The number of rows to return. | ('\\_x', '\\_y') |\n", - "| tolerance | Any | Not yet implemented. | None |\n", - "| allow_exact_matches | bool | Not yet implemented. | True |\n", - "| direction | str | Not yet implemented. | 'backward' |\n", + "| Name | Type | Description | Default |\n", + "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n", + "| func | function, str, list or dict | Function to use for aggregating the data. If a function this must either work when passed a `Table` or when passed to `Table.apply`

Accepted combinations are:
  • function
  • string function name
  • list of functions and/or function names, e.g. `[kx.q.sum, 'mean']`
  • dict of axis labels -> functions or function names
    • | |\n", + "| `*args` | any | Positional arguments to pass to `func` in addition to the kx list. | |\n", + "| axis | int | The axis along which the function is applied, `0` applies function to each column, at present row based application is not supported. | 0 | \n", + "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n", "\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------ |\n", - "| Table / KeyedTable | The resulting table like object after the join has been preformed. |" + "| Type | Description |\n", + "| :-----------------------: | :---------------------------------------------- |\n", + "| List, Dictionary or Table | Result of applying `func` along the giveen axis of the `kx.Table`. |\n", + "\n", + "**Examples:**\n", + "\n", + "Example Table." ] }, { - "cell_type": "markdown", - "id": "fc696ccf", + "cell_type": "code", + "execution_count": null, + "id": "2696cf42", "metadata": {}, + "outputs": [], "source": [ - "**Examples:**\n", + "tab = kx.Table([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9],\n", + " [kx.FloatAtom.null, kx.FloatAtom.null, kx.FloatAtom.null]],\n", + " columns=['A', 'B', 'C'])\n", "\n", - "Perform a simple asof join on two tables." + "tab" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "6cb634e0", + "cell_type": "markdown", + "id": "3f90677b", "metadata": {}, - "outputs": [], "source": [ - "left = kx.Table(data={\"a\": [1, 5, 10], \"left_val\": [\"a\", \"b\", \"c\"]})\n", - "right = kx.Table(data={\"a\": [1, 2, 3, 6, 7], \"right_val\": [1, 2, 3, 6, 7]})\n", - "left" + "Aggregate a list of functions over rows" ] }, { "cell_type": "code", "execution_count": null, - "id": "81b10932", + "id": "861e5787", "metadata": {}, "outputs": [], "source": [ - "right" + "tab.agg(['sum', 'min'])" + ] + }, + { + "cell_type": "markdown", + "id": "ccdaee01", + "metadata": {}, + "source": [ + "Perform an aggregation using a user specified function" ] }, { "cell_type": "code", "execution_count": null, - "id": "411d19d2", + "id": "b5f9f25b", "metadata": {}, "outputs": [], "source": [ - "left.merge_asof(right)" + "import statistics\n", + "def mode(x):\n", + " return statistics.mode(x)\n", + "tab.agg(mode)" ] }, { "cell_type": "markdown", - "id": "324d24ec", + "id": "667d9961", "metadata": {}, "source": [ - "Perform a asof join on two tables but first merge them on the by column." + "Apply an aggregation supplying column specification for supplied function" ] }, { "cell_type": "code", "execution_count": null, - "id": "d805fa5c", + "id": "60845603", "metadata": {}, "outputs": [], "source": [ - "trades = kx.Table(data={\n", - " \"time\": [\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.030\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.041\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.049\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.072\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.075\")\n", - " ],\n", - " \"ticker\": [\n", - " \"GOOG\",\n", - " \"MSFT\",\n", - " \"MSFT\",\n", - " \"MSFT\",\n", - " \"GOOG\",\n", - " \"AAPL\",\n", - " \"GOOG\",\n", - " \"MSFT\"\n", - " ],\n", - " \"bid\": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],\n", - " \"ask\": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]\n", - "})\n", - "quotes = kx.Table(data={\n", - " \"time\": [\n", - " pd.Timestamp(\"2016-05-25 13:30:00.023\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.038\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\"),\n", - " pd.Timestamp(\"2016-05-25 13:30:00.048\")\n", - " ],\n", - " \"ticker\": [\"MSFT\", \"MSFT\", \"GOOG\", \"GOOG\", \"AAPL\"],\n", - " \"price\": [51.95, 51.95, 720.77, 720.92, 98.0],\n", - " \"quantity\": [75, 155, 100, 100, 100]\n", - "})\n", - "trades" + "tab.agg({'A': 'max', 'B': mode})" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "665d0e74", + "cell_type": "markdown", + "id": "256f5496", "metadata": {}, - "outputs": [], "source": [ - "quotes" + "## Data Preprocessing" + ] + }, + { + "cell_type": "markdown", + "id": "976e633c", + "metadata": {}, + "source": [ + "### Table.add_prefix()\n", + "\n", + "```\n", + "Table.add_prefix(columns)\n", + "```\n", + "\n", + "Rename columns adding a prefix in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| prefix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add prefix on. | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a prefix. |" + ] + }, + { + "cell_type": "markdown", + "id": "77ff0376", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "The initial table to which a prefix will be added to its columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "9398ab6a", + "id": "c71b39c6", "metadata": {}, "outputs": [], "source": [ - "trades.merge_asof(quotes, on=\"time\")" + "tab.head()" ] }, { "cell_type": "markdown", - "id": "acca5289", + "id": "8b6968da", "metadata": {}, "source": [ - "## Computations" + "Add \"col_\" to table columns:" ] }, { "cell_type": "code", "execution_count": null, - "id": "852b5f34", + "id": "aa98ca46", "metadata": {}, "outputs": [], "source": [ - "# All the examples in this section will use this example table.\n", - "kx.q('N: 100')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 250f - N?500f; traded: 100 - N?200; hold: N?0b)')\n", - "tab" + "tab.add_prefix(prefix=\"col_\").head()" ] }, { "cell_type": "markdown", - "id": "93a50ee2", + "id": "5f87eeba", "metadata": {}, "source": [ - "### Table.abs()\n", + "### Table.add_suffix()\n", "\n", "```\n", - "Table.abs(numeric_only=False)\n", + "Table.add_suffix(columns)\n", "```\n", "\n", - "Take the absolute value of each element in the table. Will raise an error if there are columns that contain data that have no absolute value.\n", + "Rename columns adding a suffix in a table and return the resulting Table object.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| numeric_only | bool | Only use columns of the table that can be converted to an absolute value. | False |\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| suffix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add suffix on. | 0 |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Table / KeyedTable | The resulting table like object with only positive numerical values. |" + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a suffix. |" + ] + }, + { + "cell_type": "markdown", + "id": "dc449e82", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "The initial table to which a suffix will be added to its columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "7da5d72b", - "metadata": { - "scrolled": true - }, + "id": "4471a14b", + "metadata": {}, "outputs": [], "source": [ - "tab.abs(numeric_only=True)" + "tab.head()" ] }, { "cell_type": "markdown", - "id": "cbcdf84e", + "id": "b01dfa6c", "metadata": {}, "source": [ - "### Table.all()\n", - "\n", - "```\n", - "Table.all(axis=0, bool_only=False, skipna=True)\n", - "```\n", - "\n", - "Returns whether or not all values across the given axis have a `truthy` value.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate `all` across 0 is columns, 1 is rows. | 0 |\n", - "| bool_only | bool | Only use columns of the table that are boolean types. | False |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `all` on that column / row. |" + "Add \"_col\" to table columns:" ] }, { "cell_type": "code", "execution_count": null, - "id": "7d0b0cd3", + "id": "c7c46631", "metadata": {}, "outputs": [], "source": [ - "tab.all()" + "tab.add_suffix(suffix=\"_col\").head()" ] }, { "cell_type": "markdown", - "id": "aa02cf1c", + "id": "d56eeae9", "metadata": {}, "source": [ - "### Table.any()\n", + "### Table.astype()\n", "\n", "```\n", - "Table.any(axis=0, bool_only=False, skipna=True)\n", + "Table.astype(dtype, copy=True, errors='raise')\n", "```\n", "\n", - "Returns whether or not any values across the given axis have a `truthy` value.\n", + "Cast a column/columns of the Dataframes object to a specified `dtype`.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate `any` across 0 is columns, 1 is rows. | 0 |\n", - "| bool_only | bool | Only use columns of the table that are boolean types. | False |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| Name | Type | Description | Default |\n", + "| :-: | :-: | :-: | :-: |\n", + "| dtype | data type, or dict of column name -> data type | Use a PyKx wrapper data type or Python type to cast all columns to the same type. Alternatively, use {col: dtype, …}, where col is a column label and dtype is PyKx wrapper data type to cast one or more of the DataFrame’s columns to column-specific types. | |\n", + "| copy | Boolean | Default of True, False not implemented | True |\n", + "| errors | {‘raise’, ‘ignore’} | If passed anything other than 'raise', it will return the dataframe | 'raise' |\n", + "\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `any` on that column / row. |" + "| Type | Description |\n", + "| :-: | :-: |\n", + "| Dataframe | The dataframe with columns casted according to passed dtypes |" + ] + }, + { + "cell_type": "markdown", + "id": "5d27ccde", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "The examples in the section will use the example table." ] }, { "cell_type": "code", "execution_count": null, - "id": "a4806993", + "id": "63d18dce", "metadata": {}, "outputs": [], "source": [ - "tab.any()" + "df = kx.q('([] c1:1 2 3i; c2:1 2 3j; c3:1 2 3h; c4:1 2 3i)')" ] }, { "cell_type": "markdown", - "id": "a3c3fccd", + "id": "4e6fad4f", "metadata": {}, "source": [ - "### Table.max()\n", - "\n", - "```\n", - "Table.max(axis=0, skipna=True, numeric_only=False)\n", - "```\n", - "\n", - "Returns the maximum value across the given axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the maximum across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `max` on that column / row. |" + "Cast all columns to dtype LongVector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ef76c1e", + "metadata": {}, + "outputs": [], + "source": [ + "df.astype(kx.LongVector)" + ] + }, + { + "cell_type": "markdown", + "id": "1846286e", + "metadata": {}, + "source": [ + "Casting as specified in the dictionary supplied with given dtype per column" ] }, { "cell_type": "code", "execution_count": null, - "id": "8e9abf02", + "id": "a4cc4bb7", "metadata": {}, "outputs": [], "source": [ - "tab.max()" + "df.astype({'c1':kx.LongVector, 'c2':'kx.ShortVector'})" ] }, { "cell_type": "markdown", - "id": "301ab2c2", + "id": "c77a5800", "metadata": {}, "source": [ - "### Table.min()\n", - "\n", - "```\n", - "Table.min(axis=0, skipna=True, numeric_only=False)\n", - "```\n", - "\n", - "Returns the minimum value across the given axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the minimum across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `min` on that column / row. |" + "The next example will use this table" ] }, { "cell_type": "code", "execution_count": null, - "id": "c1255ac7", + "id": "78b91d9f", "metadata": {}, "outputs": [], "source": [ - "tab.min()" + "df = kx.q('([] c1:3#.z.p; c2:`abc`def`ghi; c3:1 2 3j; c4:(\"abc\";\"def\";\"ghi\");c5:\"abc\";c6:(1 2 3;4 5 6;7 8 9))')" ] }, { "cell_type": "markdown", - "id": "a389f7aa", + "id": "e89a0596", "metadata": {}, "source": [ - "### Table.sum()\n", - "\n", - "```\n", - "Table.sum(axis=0, skipna=True, numeric_only=False, min_count=0)\n", - "```\n", - "\n", - "Returns the sum of all values across the given axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "| min_count | int | If not set to 0 if there are less then `min_count` values across the axis a null value will be returned | 0 |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `sum` on that column / row. |" + "Casting char and string columns to symbol columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "af638f53", + "id": "599dca72", "metadata": {}, "outputs": [], "source": [ - "tab.sum()" + "df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector})" ] }, { "cell_type": "markdown", - "id": "9bf62b1a", + "id": "92ab62d2", "metadata": {}, "source": [ - "### Table.prod()\n", + "### Table.drop()\n", "\n", "```\n", - "Table.prod(axis=0, skipna=True, numeric_only=False, min_count=0)\n", + "Table.drop(item, axis=0)\n", "```\n", "\n", - "Returns the product of all values across the given axis.\n", + "Remove either columns or rows from a table and return the resulting Table object.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "| min_count | int | If not set to 0 if there are less then `min_count` values across the axis a null value will be returned | 0 |\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :--------------------: | :---------------------------------------------------------- | :--------: |\n", + "| item | Union[str, list[str]] | The column name(s) or row number(s) to drop from the table. | _required_ |\n", + "| axis | int | The column name or list of names to pop from the table. | 0 |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `prd` on that column / row. |" + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) / row(s) removed. |" + ] + }, + { + "cell_type": "markdown", + "id": "756e1611", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Drop rows from a table." ] }, { "cell_type": "code", "execution_count": null, - "id": "0ddad367", - "metadata": { - "scrolled": true - }, + "id": "60fb2684", + "metadata": {}, "outputs": [], "source": [ - "# This example will use a smaller version of the above table\n", - "# as the result of calculating the product quickly goes over the integer limits.\n", - "kx.q('N: 10')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: 10 - N?20; hold: N?0b)')\n", - "tab[tab['traded'] == 0, 'traded'] = 1\n", - "tab[tab['price'] == 0, 'price'] = 1.0\n", - "tab" + "# The examples in this section will use this example table filled with random data\n", + "kx.q('N: 1000')\n", + "tab = kx.q('([] x: til N; y: N?`AAPL`GOOG`MSFT; z: N?500f; w: N?1000; v: N?(0N 0 50 100 200 250))')\n", + "tab.head()" ] }, { "cell_type": "code", "execution_count": null, - "id": "151411e2", + "id": "bc0db439", "metadata": {}, "outputs": [], "source": [ - "tab.prod(numeric_only=True)" + "tab.drop([0, 2, 4, 6, 8, 10]).head()" ] }, { "cell_type": "markdown", - "id": "c87d4f95", + "id": "b6b79c9b", "metadata": {}, "source": [ - "### Table.count()\n", - "\n", - "```\n", - "Table.count(axis=0, numeric_only=False)\n", - "```\n", - "\n", - "Returns the count of non null values across the given axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to count elements across 1 is columns, 0 is rows. | 0 |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `count` on that column / row. |" + "Drop columns from a table." ] }, { "cell_type": "code", "execution_count": null, - "id": "6520c195", + "id": "41eb79c1", "metadata": {}, "outputs": [], "source": [ - "tab.count()" + "tab.drop('y', axis=1).head()" ] }, { "cell_type": "markdown", - "id": "ce85797d", + "id": "e34706ea", "metadata": {}, "source": [ - "### Table.skew()\n", + "### Table.drop_duplicates()\n", "\n", "```\n", - "Table.skew(axis=0, skipna=True, numeric_only=False)\n", + "Table.drop_duplicates()\n", "```\n", "\n", - "Returns the skewness of all values across the given axis.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the skewness across 0 is columns, 1 is rows. | 0 |\n", - "| skipna | bool | Ignore any null values along the axis. | True |\n", - "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", - "\n", + "Remove either columns or rows from a table and return the resulting Table object.\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `skew` on that column / row. |" + "| Type | Description |\n", + "| :---: | :--------------------------------------- |\n", + "| Table | A table with all duplicate rows removed. |" + ] + }, + { + "cell_type": "markdown", + "id": "e9e064d1", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Create a table with duplicates for the example" ] }, { "cell_type": "code", "execution_count": null, - "id": "3fb5dce1", + "id": "7c8be915", "metadata": {}, "outputs": [], "source": [ - "tab.skew(numeric_only=True)" + "tab2 = kx.q('([] 100?`AAPL`GOOG`MSFT; 100?3)')\n", + "tab2" ] }, { "cell_type": "markdown", - "id": "499025cb", + "id": "4af0c99d", + "metadata": {}, + "source": [ + "Drop all duplicate rows from the table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f6ec5c7", "metadata": {}, + "outputs": [], "source": [ - "## Setting Indexes" + "tab2.drop_duplicates()" ] }, { "cell_type": "markdown", - "id": "4dc576e8", + "id": "77282b77", "metadata": {}, "source": [ - "### Table.set_index()\n", + "### Table.pop()\n", "\n", "```\n", - "Table.set_index(\n", - " keys,\n", - " drop=True,\n", - " append=False,\n", - " inplace=False,\n", - " verify_integrity=False,\n", - ")\n", + "Table.pop(item)\n", "```\n", "\n", - "Add index/indexes to a Table/KeyedTable.\n", + "Remove a column or columns from a table by column name and return the column after it has been removed.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :--------------: | :--: | :----------------------------------------------------------------------- | :--------: |\n", - "| keys | Union[Symbol/SymbolVector/Table] | The key(s) or data to key on | _required_ |\n", - "| drop | bool | Not Yet Implemented | True |\n", - "| append | bool | Whether to append columns to existing index. | False | \n", - "| inplace | bool | Not Yet Implemented | False |\n", - "| verify_integrity | bool | Check the new index for duplicates | False | \n", + "| Name | Type | Description | Default |\n", + "| :-----: | :--------------------: | :------------------------------------------------------ | :--------: |\n", + "| item | Union[str, list[str]] | The column name or list of names to pop from the table. | _required_ |\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :--------: | :--------------------------------------------- |\n", - "| KeyedTable | The resulting table after the index is applied |\n", - "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table containing only the columns removed from the input table. |" + ] + }, + { + "cell_type": "markdown", + "id": "6846f6a1", + "metadata": {}, + "source": [ "**Examples:**\n", "\n", - "Adding indexes:" + "Remove the `v` column from the table and return it." ] }, { "cell_type": "code", "execution_count": null, - "id": "42a288f7", - "metadata": {}, + "id": "40ab2931", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "kx.q('N: 10')\n", - "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: N?0 1; hold: N?01b)')" + "display(tab.head())\n", + "print('\\n\\nPop the `v` column out of the table')\n", + "display(tab.pop(\"v\"))\n", + "print('\\n\\nUpdated Table')\n", + "display(tab.head())" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "f744959e", + "cell_type": "markdown", + "id": "45aca79f", "metadata": {}, - "outputs": [], "source": [ - "#Setting a single index\n", - "tab.set_index('sym')" + "Remove the `z` and `w` columns from the table and return them." ] }, { "cell_type": "code", "execution_count": null, - "id": "00c31275", + "id": "2f381911", "metadata": {}, "outputs": [], "source": [ - "#Setting multipe indexes\n", - "tab.set_index(['sym', 'traded'])" + "display(tab.head())\n", + "print('\\n\\nPop the `z` and `w` columns out of the table')\n", + "display(tab.pop([\"z\", \"w\"]).head())\n", + "print('\\n\\nUpdated Table')\n", + "display(tab.head())" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "858bbeb2", + "cell_type": "markdown", + "id": "2f4954bb", "metadata": {}, - "outputs": [], "source": [ - "#Pass a table as index (lengths must match)\n", - "status = kx.q('{select movement from ungroup select movement:`down`up 0<=deltas price by sym from x}',tab)\n", - "tab.set_index(status)" + "### Table.rename()\n", + "\n", + "```\n", + "Table.rename(labels=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore')\n", + "```\n", + "\n", + "Rename columns in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :------: | :----: | :------------------------------------------------------------------------------------------------------------------| :---:|\n", + "| labels | dict | A dictionary of either new index or column names to new names to be used in conjunction with the _axis_ parameter. | None |\n", + "| columns | dict | A dictionary of column name to new column name to use when renaming. | None |\n", + "| index | dict | A dictionary of index to new index name to use when renaming keyed tables. | None |\n", + "| axis | {0 or 'index', 1 or 'columns'} | Designating the axis to be renamed by the _labels_ dictionary. | None |\n", + "| copy | None | Not yet implemented. | None |\n", + "| inplace | bool | Not yet implemented. | None |\n", + "| level | None | Not yet implemented. | None |\n", + "| errors | string | Not yet implemented. | None |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given columns or indices renamed. |" ] }, { "cell_type": "markdown", - "id": "450c30ee", + "id": "ddd7f1f2", "metadata": {}, "source": [ - "Appending:" + "**Examples:**\n", + "\n", + "The initial table we will be renaming columns on and a keyed table to rename the index on." ] }, { "cell_type": "code", "execution_count": null, - "id": "b475c811", + "id": "d844c2c3", + "metadata": {}, + "outputs": [], + "source": [ + "tab.head()\n", + "key_tab = kx.KeyedTable(data=tab) " + ] + }, + { + "cell_type": "markdown", + "id": "9b819386", "metadata": {}, - "outputs": [], "source": [ - "#Default is false - previous index 'sym' deleted and replaced by 'hold'\n", - "tab.set_index('sym').set_index('hold')" + "Rename column `x` to `index` and `y` to `symbol` using the `columns` keyword." ] }, { "cell_type": "code", "execution_count": null, - "id": "0fb2c59c", + "id": "e352c9ba", "metadata": {}, "outputs": [], "source": [ - "#append= True will retain 'sym' index and add 'hold' as second index\n", - "tab.set_index('sym').set_index('hold', append= True)" + "tab.rename(columns={'x': 'index', 'y': 'symbol'}).head()" ] }, { "cell_type": "markdown", - "id": "887ffb99", + "id": "4f9e2895-a82a-4f8e-ae2c-d3f898ece131", "metadata": {}, "source": [ - "Verify Integrity:" + "Rename column `x` to `index` and `y` to `symbol` by setting the `axis` keyword." ] }, { "cell_type": "code", "execution_count": null, - "id": "49367c46", + "id": "16ae0555-9d92-4642-9671-03a2790216c8", "metadata": {}, "outputs": [], "source": [ - "#Will allow duplicates in index:\n", - "tab.set_index('sym')" + "tab.rename({'x': 'index', 'y': 'symbol'}, axis = 1).head()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "7bb2aaf0", + "cell_type": "markdown", + "id": "70e2735a-b582-47f7-9557-5f64f2238e89", "metadata": {}, - "outputs": [], "source": [ - "#Will error as 'sym' has duplicates\n", - "try:\n", - " tab.set_index('sym', verify_integrity= True)\n", - "except kx.QError as e:\n", - " print(f'Caught Error: {e}')" + "Rename index of a keyed table by using literal `index` as the `axis` parameter." ] }, { - "cell_type": "markdown", - "id": "7e415861", + "cell_type": "code", + "execution_count": null, + "id": "7b2bcbd6-32ef-4988-ac81-3de73222face", "metadata": {}, + "outputs": [], "source": [ - "## Group By" + "key_tab.rename({0:\"a\", 1:\"b\"}, axis = 'index').head()" ] }, { "cell_type": "markdown", - "id": "8b2d72fb", + "id": "b85d53ba", "metadata": {}, "source": [ - "### Table.groupby()\n", + "### Table.reset_index()\n", "\n", "```\n", - "Table.groupby(\n", - " by=None,\n", - " axis=0,\n", - " level=None,\n", - " as_index=True,\n", - " sort=True,\n", - " group_keys=True,\n", - " observed=False,\n", - " dropna=True\n", - ")\n", + "Table.reset_index(levels, *,\n", + " drop=False, inplace=False,\n", + " col_level=0, col_fill='',\n", + " allow_duplicates=False,\n", + " names=None)\n", "```\n", "\n", - "Group data based on like values within columns to easily apply operations on groups.\n", + "Reset the keys/index of a keyed PyKX table. This can be used to remove/unset one or more keys within a table.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :--------------: | :--: | :-------------------------------------------------------------------------- | :------: |\n", - "| by | Union[Symbol/SymbolVector/int/list] | The column name(s) or column index(es) to group the data on. | None |\n", - "| axis | int | Not Yet Implemented. | 0 |\n", - "| level | Union[Symbol/SymbolVector/int/list] | The column name(s) or column index(es) to group the data on. | None | \n", - "| as_index | bool | Return the table with groups as the key column. | True |\n", - "| sort | bool | Sort the resulting table based off the key. | True |\n", - "| group_keys | bool | Not Yet Implemented. | True | \n", - "| observed | bool | Not Yet Implemented. | False |\n", - "| dropna | bool | Drop groups where the group is null. | True | \n", - "\n", - "Either `by` or `level` can be used to specify the columns to group on, using both will raise an error.\n", + "| Name | Type | Description | Default |\n", + "| :-: | :-: | :-: | :-: |\n", + "| level | int, str or list | The name/indices of the keys to be reset within the table. | None |\n", + "| drop | Boolean | Should remaining key columns be removed from the table post index resetting. | False |\n", + "| inplace | Boolean | Not Yet Implemented | False |\n", + "| col_level | int or str | Not Yet Implemented | 0 |\n", + "| col_fill | object | Not Yet Implemented | '' |\n", + "| allow_duplicates | Boolean | Can duplicate columns be created | False |\n", + "| names | str or list | Not Yet Implemented | None |\n", "\n", - "Using and integer or list of integers is only possible when calling `groupby` on a `KeyedTable` object.\n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :----------: | :---------------------------------------------- |\n", - "| GroupbyTable | The resulting table after the grouping is done. |\n", + "| Type | Description |\n", + "| :-: | :-: |\n", + "| Dataframe | The dataframe with table updated following index reset request |\n", "\n", "**Examples:**\n", "\n", - "Example Table." + "Generate data to be used for index resetting " ] }, { "cell_type": "code", "execution_count": null, - "id": "0789d3f4", - "metadata": { - "scrolled": true - }, + "id": "a723d14d", + "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(data={\n", - " 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],\n", - " 'Max Speed': [380., 370., 24., 26.],\n", - " 'Max Altitude': [570., 555., 275., 300.]\n", - "})\n", - "\n", - "tab" + "N = 1000\n", + "qtab = kx.Table(data = {\n", + " 'x0': kx.random.random(N, ['a', 'b', 'c']),\n", + " 'x1': kx.random.random(N, ['d', 'e', 'f']),\n", + " 'x2': kx.random.random(N, ['g', 'h', 'i']),\n", + " 'y0': kx.random.random(N, 10.0),\n", + " 'y1': kx.random.random(N, 10.0),\n", + " 'y2': kx.random.random(N, kx.GUIDAtom.null)\n", + " }).set_index(['x0', 'x1', 'x2'])\n", + "qtab" ] }, { "cell_type": "markdown", - "id": "8baae3c9", + "id": "089ad779", "metadata": {}, "source": [ - "Group on the `Animal` column and calculate the mean of the resulting `Max Speed` and `Max Altitude` columns." + "Resetting the index of the table will result in original index columns being added to the table directly" ] }, { "cell_type": "code", "execution_count": null, - "id": "734cb6ff", - "metadata": { - "scrolled": true - }, + "id": "4662c138", + "metadata": {}, "outputs": [], "source": [ - "tab.groupby(kx.SymbolVector(['Animal'])).mean()" + "qtab.reset_index()" ] }, { "cell_type": "markdown", - "id": "b3b759af", + "id": "4e019e54", "metadata": {}, "source": [ - "Example table with multiple columns to group on." + "Reset the index adding a specified named column to the table" ] }, { "cell_type": "code", "execution_count": null, - "id": "7966c28c", + "id": "a990ea29", "metadata": {}, "outputs": [], "source": [ - "tab = kx.q('2!', kx.Table(\n", - " data={\n", - " 'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'],\n", - " 'Type': ['Captive', 'Wild', 'Captive', 'Wild', 'Wild'],\n", - " 'Max Speed': [390., 350., 30., 20., 25.]\n", - " }\n", - "))\n", - "tab" + "qtab.reset_index('x0')" ] }, { "cell_type": "markdown", - "id": "e3ab5b1f", + "id": "f186c5fb", "metadata": {}, "source": [ - "Group on multiple columns using thier indexes." + "Reset the index using multiple named columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "c01d3cc9", + "id": "9c62edc4", "metadata": {}, "outputs": [], "source": [ - "tab.groupby(level=[0, 1]).mean()" + "qtab.reset_index(['x0', 'x1'])" ] }, { "cell_type": "markdown", - "id": "d46304f0", + "id": "c6f54a5c", "metadata": {}, "source": [ - "Example table with Nulls." + "Reset the index specifying the column `number` which is to be added to the table" ] }, { "cell_type": "code", "execution_count": null, - "id": "dc222240", + "id": "c52367f4", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table(\n", - " [\n", - " [\"a\", 12, 12],\n", - " [kx.q('`'), 12.3, 33.],\n", - " [\"b\", 12.3, 123],\n", - " [\"a\", 1, 1]\n", - " ],\n", - " columns=[\"a\", \"b\", \"c\"]\n", - ")\n", - "tab" + "qtab.reset_index(0)" ] }, { "cell_type": "markdown", - "id": "4c38e902", + "id": "ee76fa24", "metadata": {}, "source": [ - "Group on column `a` and keep null groups." + "Reset the index specifying multiple numbered columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "833e4a92", - "metadata": { - "scrolled": true - }, + "id": "0cf6b213", + "metadata": {}, "outputs": [], "source": [ - "tab.groupby('a', dropna=False).sum()" + "qtab.reset_index([0, 2])" ] }, { "cell_type": "markdown", - "id": "c26a98ff", + "id": "7fc928a5", "metadata": {}, "source": [ - "Group on column `a` keeping null groups and not using the groups as an index column." + "Drop index columns from table" ] }, { "cell_type": "code", "execution_count": null, - "id": "bb5d1bac", + "id": "8596e5a1", "metadata": {}, "outputs": [], "source": [ - "tab.groupby('a', dropna=False, as_index=False).sum()" + "qtab.reset_index(drop=True)" ] }, { "cell_type": "markdown", - "id": "af8fad39", + "id": "e95b57dd", "metadata": {}, "source": [ - "## Apply\n", - "\n", - "### Table.apply()\n", + "Drop specified key columns from table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dde1ee77", + "metadata": {}, + "outputs": [], + "source": [ + "qtab.reset_index('x0', drop=True)" + ] + }, + { + "cell_type": "markdown", + "id": "8e19ddeb", + "metadata": {}, + "source": [ + "### Table.set_index()\n", "\n", "```\n", - "Table.apply(\n", - " func,\n", - " *args,\n", - " axis=0,\n", - " raw=None,\n", - " result_type=None,\n", - " **kwargs\n", + "Table.set_index(\n", + " keys,\n", + " drop=True,\n", + " append=False,\n", + " inplace=False,\n", + " verify_integrity=False,\n", ")\n", "```\n", "\n", - "Apply a function along an axis of the DataFrame.\n", - "\n", - "Objects passed to a function are passed as kx list objects.\n", + "Add index/indexes to a Table/KeyedTable.\n", "\n", "**Parameters:**\n", "\n", - "| Name | Type | Description | Default |\n", - "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n", - "| func | function | Function to apply to each column or row. | |\n", - "| `*args` | any | Positional arguments to pass to `func` in addition to the kx list. | |\n", - "| axis | int | The axis along which the function is applied, `0` applies function to each column, `1` applied function to each row. | 0 | \n", - "| raw | bool | Not yet implemented. | None |\n", - "| result_type | str | Not yet implemented. | None |\n", - "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n", - "\n", + "| Name | Type | Description | Default |\n", + "| :--------------: | :--: | :----------------------------------------------------------------------- | :--------: |\n", + "| keys | Union[Symbol/SymbolVector/Table] | The key(s) or data to key on | _required_ |\n", + "| drop | bool | Not Yet Implemented | True |\n", + "| append | bool | Whether to append columns to existing index. | False | \n", + "| inplace | bool | Not Yet Implemented | False |\n", + "| verify_integrity | bool | Check the new index for duplicates | False | \n", "\n", "**Returns:**\n", "\n", - "| Type | Description |\n", - "| :-----------------------: | :---------------------------------------------- |\n", - "| List, Dictionary or Table | Result of applying `func` along the giveen axis of the `kx.Table`. |\n", + "| Type | Description |\n", + "| :--------: | :--------------------------------------------- |\n", + "| KeyedTable | The resulting table after the index is applied |\n", "\n", "**Examples:**\n", "\n", - "Example Table." + "Adding indexes:" ] }, { "cell_type": "code", "execution_count": null, - "id": "02f41281", + "id": "6ede4322", "metadata": {}, "outputs": [], "source": [ - "tab = kx.Table([[4, 9]] * 3, columns=['A', 'B'])\n", - "\n", - "tab" + "kx.q('N: 10')\n", + "tab = kx.q('([] sym: N?`AAPL`GOOG`MSFT; price: 2.5f - N?5f; traded: N?0 1; hold: N?01b)')" ] }, { - "cell_type": "markdown", - "id": "cf555661", + "cell_type": "code", + "execution_count": null, + "id": "f6708166", "metadata": {}, + "outputs": [], "source": [ - "Apply square root on each item within a column" + "#Setting a single index\n", + "tab.set_index('sym')" ] }, { "cell_type": "code", "execution_count": null, - "id": "173acc13", + "id": "abf46438", "metadata": {}, "outputs": [], "source": [ - "tab.apply(kx.q.sqrt)" + "#Setting multipe indexes\n", + "tab.set_index(['sym', 'traded'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "567ff8e9", + "metadata": {}, + "outputs": [], + "source": [ + "#Pass a table as index (lengths must match)\n", + "status = kx.q('{select movement from ungroup select movement:`down`up 0<=deltas price by sym from x}',tab)\n", + "tab.set_index(status)" ] }, { "cell_type": "markdown", - "id": "a00dda0c", + "id": "fb24895d", "metadata": {}, "source": [ - "Apply a reducing function sum on either axis" + "Appending:" ] }, { "cell_type": "code", "execution_count": null, - "id": "4936ea30", + "id": "d080737f", "metadata": {}, "outputs": [], "source": [ - "tab.apply(kx.q.sum)" + "#Default is false - previous index 'sym' deleted and replaced by 'hold'\n", + "tab.set_index('sym').set_index('hold')" ] }, { "cell_type": "code", "execution_count": null, - "id": "5df4a3ac", + "id": "c33e779e", "metadata": {}, "outputs": [], "source": [ - "tab.apply(lambda x: sum(x), axis=1)" + "#append= True will retain 'sym' index and add 'hold' as second index\n", + "tab.set_index('sym').set_index('hold', append= True)" ] }, { "cell_type": "markdown", - "id": "8da6da7c", + "id": "c7eab4a6", "metadata": {}, "source": [ - "## Aggregate\n", - "\n", - "### Table.agg()\n", - "\n", - "```\n", - "Table.agg(\n", - " func,\n", - " axis=0,\n", - " *args,\n", - " **kwargs\n", - ")\n", - "```\n", - "\n", - "Aggregate data using one or more operations over a specified axis\n", - "\n", - "Objects passed to a function are passed as kx vector/list objects.\n", - "\n", - "**Parameters:**\n", - "\n", - "| Name | Type | Description | Default |\n", - "| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n", - "| func | function, str, list or dict | Function to use for aggregating the data. If a function this must either work when passed a `Table` or when passed to `Table.apply`

      Accepted combinations are:
      • function
      • string function name
      • list of functions and/or function names, e.g. `[kx.q.sum, 'mean']`
      • dict of axis labels -> functions or function names
        • | |\n", - "| `*args` | any | Positional arguments to pass to `func` in addition to the kx list. | |\n", - "| axis | int | The axis along which the function is applied, `0` applies function to each column, at present row based application is not supported. | 0 | \n", - "| `**kwargs` | dict | Additional keyword arguments to pass as keywords to `func`, this argument is not implemented in the case `func` is a kx callable function. | None | \n", - "\n", - "\n", - "**Returns:**\n", - "\n", - "| Type | Description |\n", - "| :-----------------------: | :---------------------------------------------- |\n", - "| List, Dictionary or Table | Result of applying `func` along the giveen axis of the `kx.Table`. |\n", - "\n", - "**Examples:**\n", - "\n", - "Example Table." + "Verify Integrity:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98fc7587", + "metadata": {}, + "outputs": [], + "source": [ + "#Will allow duplicates in index:\n", + "tab.set_index('sym')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b17c1a22", + "metadata": {}, + "outputs": [], + "source": [ + "#Will error as 'sym' has duplicates\n", + "try:\n", + " tab.set_index('sym', verify_integrity= True)\n", + "except kx.QError as e:\n", + " print(f'Caught Error: {e}')" ] } ], @@ -3342,7 +3661,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/user-guide/advanced/attributes.md b/docs/user-guide/advanced/attributes.md index ce35522..d4d1c50 100644 --- a/docs/user-guide/advanced/attributes.md +++ b/docs/user-guide/advanced/attributes.md @@ -31,7 +31,10 @@ pykx.LongVector(pykx.q('`s#0 1 2 3 4 5 6 7 8 9')) Applying the unique attribute to the first column of the table. ```Python ->>> a = kx.q('([] a: til 10; b: `a`b`c`d`e`f`g`h`i`j)') +>>> a = kx.Table(data = { +... 'a': kx.q.til(5), +... 'b': ['a', 'b', 'c', 'd', 'e'] +... }) >>> kx.q.meta(a) pykx.KeyedTable(pykx.q(' c| t f a @@ -52,7 +55,10 @@ b| s Applying the grouped attribute to a specified column of a table. ```Python ->>> a = kx.q('([] a: til 10; b: `a`a`b`b`c`c`d`d`e`e)') +>>> a = kx.Table(data = { +... 'a': kx.q.til(5), +... 'b': ['a', 'a', 'b', 'b', 'b'] +... }) >>> kx.q.meta(a) pykx.KeyedTable(pykx.q(' c| t f a @@ -73,7 +79,10 @@ b| s g Applying the parted attribute to multiple columns on a table. ```Python ->>> a = kx.q('([] a: til 10; b: `a`a`b`b`c`c`d`d`e`e)') +>>> a = kx.Table(data = { +... 'a': kx.q.til(5), +... 'b': ['a', 'a', 'b', 'b', 'b'] +... }) >>> kx.q.meta(a) pykx.KeyedTable(pykx.q(' c| t f a diff --git a/docs/user-guide/advanced/database.md b/docs/user-guide/advanced/database.md new file mode 100644 index 0000000..26fbb4f --- /dev/null +++ b/docs/user-guide/advanced/database.md @@ -0,0 +1,7 @@ +# Databases + +!!! Warning + + This module is a Beta Feature and is subject to change. To enable this functionality for testing please following configuration instructions [here](../../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` + +This documentation is to be added once all beta functionality for database management has been added, in particular it should cover what a database is and what forms of tables can be managed using the `db` module. diff --git a/docs/user-guide/advanced/ipc.md b/docs/user-guide/advanced/ipc.md index 5ecc89f..4be6a67 100644 --- a/docs/user-guide/advanced/ipc.md +++ b/docs/user-guide/advanced/ipc.md @@ -21,7 +21,7 @@ used to replace the functionality of [`qPython`](https://github.com/exxeleron/qP ```python # Licensed mode with pykx.SyncQConnection('localhost', 5001) as q: - result = q('til 10') + result = q.til(10) print(result) print(result.py()) @@ -32,7 +32,7 @@ with pykx.SyncQConnection('localhost', 5001) as q: ```python # Unlicensed mode with pykx.SyncQConnection('localhost', 5001) as q: - result = q('til 10') + result = q.til(10) print(result) print(result.py()) @@ -50,7 +50,7 @@ ensure that the connection instance is properly closed automatically when leavin Manually creating a `QConnection` ```python -q = pykx.SyncQConnection('localhost' 5001) # Directly instantiate a QConnection instance +q = pykx.SyncQConnection('localhost', 5001) # Directly instantiate a QConnection instance q(...) # Make some queries q.close() # Must manually ensure it is closed when no longer needed ``` diff --git a/docs/user-guide/advanced/pandas_breakdown.md b/docs/user-guide/advanced/pandas_breakdown.md new file mode 100644 index 0000000..1e1b602 --- /dev/null +++ b/docs/user-guide/advanced/pandas_breakdown.md @@ -0,0 +1,86 @@ +# Pandas Like API for PyKX Tables + +The aim of this page is to demonstrate PyKX functionality that aligns with the approach and principles of the Pandas API for DataFrame interactions. Not all operations supported by Pandas are covered, only the operations on PyKX tables that follow Pandas API conventions. In particular, it focuses on areas where PyKX/q has the potential to provide a performance advantage over the use of Pandas. This advantage may be in the memory footprint of the operations and/or in the execution time of the analytic. + +A full breakdown of the the available functionality and examples of its use can be found [here](Pandas_API.ipynb). + +## Covered sections of the Pandas API + +Coverage in this instance refers here to functionality covered by the PyKX API for Tables which has equivalent functionality to the methods and attributes supported by the Pandas DataFrame API. This does not cover the functionality supported by Pandas for interactions with Series objects or for reading/writing CSV/JSON files etc. + + +If there's any functionality you would like to see added to this library, please open an issue [here](https://github.com/KxSystems/pykx/issues) or open a pull request [here](https://github.com/KxSystems/pykx/pulls). + +### Property/metadata type information + +| DataFrame Properties | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|-----------------|-----------------------------|------------------------| +| [columns](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.columns.html) | :material-check: | [link](Pandas_API.ipynb#tablecolumns) | | +| [dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tabledtypes) | | +| [empty](https://https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.empty.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tableempty) | | +| [ndim](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.ndim.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tablendim) | | +| [shape](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shape.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tableshape) | | +| [size](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.size.html) | :material-check: | [link](https://code.kx.com/pykx/2.2/user-guide/advanced/Pandas_API.html#tablesize) | | + +### Analytic functionality + +| DataFrame Method | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|-----------------|-----------------------------|------------------------| +| [abs](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.abs.html) | :material-check: | [link](Pandas_API.ipynb#tableabs) | | +| [agg](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.agg.html) | :material-check: | [link](Pandas_API.ipynb#tableagg) | | +| [apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) | :material-check: | [link](Pandas_API.ipynb#tableapply) | | +| [groupby](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) | :material-check: | [link](Pandas_API.ipynb#tablegroupby) | | +| [max](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.max.html) | :material-check: | [link](Pandas_API.ipynb#tablemax) | | +| [mean](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mean.html) | :material-check: | [link](Pandas_API.ipynb#tablemean) | | +| [median](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.median.html) | :material-check: | [link](Pandas_API.ipynb#tablemedian) | | +| [min](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.min.html) | :material-check: | [link](Pandas_API.ipynb#tablemin) | | +| [mode](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html) | :material-check: | [link](Pandas_API.ipynb#tablemode) | | +| [sum](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sum.html) | :material-check: | [link](Pandas_API.ipynb#tablesum) | | +| [skew](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.skew.html) | :material-check: | [link](Pandas_API.ipynb#tableskew) | | +| [std](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.std.html) | :material-check: | [link](Pandas_API.ipynb#tablestd) | | +| [prod](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.prod.html) | :material-check: | [link](Pandas_API.ipynb#tableprod) | | + +### Querying and data interrogation + +| DataFrame Method | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|-----------------|-----------------------------|------------------------| +| [all](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.all.html) | :material-check: | [link](Pandas_API.ipynb#tableall) | | +| [any](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.any.html) | :material-check: | [link](Pandas_API.ipynb#tableany) | | +| [at](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html) | :material-check: | [link](Pandas_API.ipynb#tableat) | | +| [count](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.count.html) | :material-check: | [link](Pandas_API.ipynb#tablecount) | | +| [get](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.get.html) | :material-check: | [link](Pandas_API.ipynb#tableget) | | +| [head](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html) | :material-check: | [link](Pandas_API.ipynb#tablehead) | | +| [iloc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html) | :material-check: | [link](Pandas_API.ipynb#tableiloc) | | +| [loc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html) | :material-check: | [link](Pandas_API.ipynb#tableloc) | | +| [sample](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html) | :material-check: | [link](Pandas_API.ipynb#tablesample) | | +| [select_dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.select_dtypes.html) | :material-check: | [link](Pandas_API.ipynb#tableselect_dtypes) | | +| [tail](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html) | :material-check: | [link](Pandas_API.ipynb#tabletail) | | + +### Data Preprocessing + +| DataFrame Method | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|-----------------|-----------------------------|------------------------| +| [add_prefix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_prefix.html) | :material-check: | [link](Pandas_API.ipynb#tableas_prefix) | | +| [add_suffix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_suffix.html) | :material-check: | [link](Pandas_API.ipynb#tableas_suffix) | | +| [astype](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html) | :material-check: | [link](Pandas_API.ipynb#tableastype) | | +| [drop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) | :material-check: | [link](Pandas_API.ipynb#tabledrop) | | +| [drop_duplicates](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html) | :material-check: | [link](Pandas_API.ipynb#tabledrop_duplicates) | | +| [pop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pop.html) | :material-check: | [link](Pandas_API.ipynb#tablepop) | | +| [rename](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html) | :material-check: | [link](Pandas_API.ipynb#tablerename) | | +| [set_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html) | :material-check: | [link](Pandas_API.ipynb#tableset_index) | | + +### Data Joins/Merge + +| DataFrame Method | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|-----------------|-----------------------------|------------------------| +| [merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) | :material-check: | [link](Pandas_API.ipynb#tablemerge) | | +| [merge_asof](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge_asof.html) | :material-check: | [link](Pandas_API.ipynb#tablemerge_asof) | | + +### Unsupported Functionality + +| DataFrame Methods | PyKX Supported? | PyKX API Documentation Link | Additional Information | +|----------------------|------------------|-----------------------------|------------------------| +| `*from*` | :material-close: | | Functionality for the creation of PyKX Tables from alternative data sources is not supported at this time. | +| `*plot*` | :material-close: | | Functionality for the plotting of columns/tables is not supported at this time. | +| `*sparse*` | :material-close: | | Sparse data like interactions presently not supported. | +| `to_*` | :material-close: | | Functionality for the conversion/persistence of PyKX Tables to other formats is not supported at this time. | diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index 5a59770..2852400 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -16,6 +16,9 @@ PYKX_KEEP_LOCAL_TIMES="true" [test] PYKX_GC="true" PYKX_RELEASE_GIL="true" + +[beta] +PYKX_BETA_FEATURES="true" ``` On import of PyKX the file `.pykx-config` will be searched for according to the following path ordering, the first location containing a `.pykx-config` file will be used for definition of the : @@ -67,34 +70,46 @@ The options can be used to tune PyKX behavior at run time. These variables need The following variables can be used to enable or disable advanced features of PyKX across all modes of operation: -| Option | Default | Values | Description | Status | -|---------------------------------|---------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------| -| `PYKX_IGNORE_QHOME` | `False` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | | -| `PYKX_KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | | -| `PYKX_ALLOCATOR` | `False` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. | | -| `PYKX_GC` | `False` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the Numpy array at the cost of a small overhead. | | -| `PYKX_LOAD_PYARROW_UNSAFE` | `False` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. | | -| `PYKX_MAX_ERROR_LENGTH` | `256` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. | | -| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from Insights Core (https://code.kx.com/insights/1.2/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | | -| `PYKX_Q_LIB_LOCATION` | `UNSET` | Path to a directory containing q libraries necessary for loading PyKX | See [here](../release-notes/changelog.md#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. | | -| `PYKX_RELEASE_GIL` | `False` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. | | -| `PYKX_Q_LOCK` | `False` | `1` or `true` | When PYKX_Q_LOCK is enabled a re-entrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be thread safe even when using PYKX_RELEASE_GIL. | | -| `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `False` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. | | -| `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. | | -| `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. | | -| `IGNORE_QHOME` | `True` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | `DEPRECATED`, please use `PYKX_IGNORE_QHOME` | -| `KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | `DEPRECATED`, please use `PYKX_KEEP_LOCAL_TIMES` | +| Option | Default | Values | Description | Status | +|---------------------------------|---------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------| +| `PYKX_BETA_FEATURES` | `False` | `1` or `true` | Enable all Beta features supplied with PyKX allowing users to test and prototype code slated for later releases. | | +| `PYKX_IGNORE_QHOME` | `False` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | | +| `PYKX_KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | | +| `PYKX_ALLOCATOR` | `False` | `1` or `true` | When converting a Numpy array to q, PyKX implements a full data copy in order to translate the Numpy array to q representation in memory. When this is set PyKX implements [NEP-49](https://numpy.org/neps/nep-0049.html) which allows q to handle memory allocation of all Numpy arrays so they can be converted more efficiently to q. This avoids the need to resort to a copy where possible. | | +| `PYKX_GC` | `False` | `1` or `true` | When PYKX_ALLOCATOR is enabled, PyKX can trigger q garbage collector when Numpy arrays allocated by PyKX are deallocated. This variable enables this behavior which will release q memory to the OS following deallocation of the Numpy array at the cost of a small overhead. | | +| `PYKX_LOAD_PYARROW_UNSAFE` | `False` | `1` or `true` | By default, PyKX uses a subprocess to import pyarrow as it can result in a crash when the version of pyarrow is incompatible. This variable will trigger a normal import of pyarrow and importing PyKX should be slightly faster. | | +| `PYKX_MAX_ERROR_LENGTH` | `256` | size in characters | By default, PyKX reports IPC connection errors with a message buffer of size 256 characters. This allows the length of these error messages to be modified reducing the chance of excessive error messages polluting logs. | | +| `PYKX_NOQCE` | `False` | `1` or `true` | On Linux, PyKX comes with q Cloud Edition features from Insights Core (https://code.kx.com/insights/1.2/core/). This variable allows a user to skip the loading of q Cloud Edition functionality, saving some time when importing PyKX but removing access to possibly supported additional functionality. | | +| `PYKX_Q_LIB_LOCATION` | `UNSET` | Path to a directory containing q libraries necessary for loading PyKX | See [here](../release-notes/changelog.md#pykx-131) for detailed information. This allows a user to centralise the q libraries, `q.k`, `read.q`, `libq.so` etc to a managed location within their environment which is decentralised from the Python installation. This is required for some enterprise use-cases. | | +| `PYKX_RELEASE_GIL` | `False` | `1` or `true` | When PYKX_RELEASE_GIL is enabled the Python Global Interpreter Lock will not be held when calling into q. | | +| `PYKX_Q_LOCK` | `False` | `1` or `true` | When PYKX_Q_LOCK is enabled a re-entrant lock is added around calls into q, this lock will stop multiple threads from calling into q at the same time. This allows embedded q to be thread safe even when using PYKX_RELEASE_GIL. | | +| `PYKX_DEBUG_INSIGHTS_LIBRARIES` | `False` | `1` or `true` | If the insights libraries failed to load this variable can be used to print out the full error output for debugging purposes. | | +| `PYKX_UNLICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `unlicensed` mode at all times. | | +| `PYKX_LICENSED` | `False` | `1` or `true` | Set PyKX to make use of the library in `licensed` mode at all times. | | +| `PYKX_THREADING` | `False` | `1` or `true` | When importing PyKX start EmbeddedQ within a background thread. This allows calls into q from any thread to modify state, this environment variable is only supported for licensed users. | | +| `PYKX_SKIP_SIGNAL_OVERWRITE` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | +| `PYKX_NO_SIGNAL` | `False` | `1` or `true` | Skip overwriting of [signal](https://docs.python.org/3/library/signal.html) definitions by PyKX, these are presently overwritten by default to reset Pythonic default definitions with are reset by PyKX on initialisation in licensed modality. | | +| `PYKX_NO_SIGINT` | `False` | `1` or `true` | Avoid setting `signal.signal(signal.SIGINT)` once PyKX is loaded, these are presently set to the Python default values once PyKX is loaded to ensure that PyKX licensed modality does not block their use by Python. | `DEPRECATED`, please use `PYKX_NO_SIGNAL` | +| `IGNORE_QHOME` | `True` | `1` or `true` | When loading PyKX on a machine with an existing q installation (and the environment variable `QHOME` set to the installation folder), PyKX will look within this directory for q scripts their dependencies. It will then symlink these files to make them available to load under PyKX. This variable instructs PyKX to not perform this symlinking. | `DEPRECATED`, please use `PYKX_IGNORE_QHOME` | +| `KEEP_LOCAL_TIMES` | `False` | `1` or `true` | When converting a Python datetime object to q, PyKX will translate the Python datetime into UTC before the conversion. This variable instructs PyKX to convert the Python datetime using the local time zone. | `DEPRECATED`, please use `PYKX_KEEP_LOCAL_TIMES` | The variables below can be used to set the environment for q (embedded in PyKX, in licensed mode): -| Variable | Values | Description | -|----------|----------|-------------| -| `QARGS` | See link | Command-line flags to pass to q, see [here](https://code.kx.com/q/basics/cmdline/) for more information. | -| `QHOME` | Path to the users q installation folder | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | -| `QLIC` | Path to the folder where the q license should be found | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | +| Variable | Values | Description | +|----------|----------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------| +| `QARGS` | See link | Command-line flags to pass to q, see [here](https://code.kx.com/q/basics/cmdline/) for more information. | +| `QHOME` | Path to the users q installation folder | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | +| `QLIC` | Path to the folder where the q license should be found | See [here](https://code.kx.com/q/learn/install/#step-5-edit-your-profile) for more information. | | `QINIT` | Path to an additional `*.q` file loaded after `PyKX` has initialized | See [here](https://code.kx.com/q4m3/14_Introduction_to_Kdb%2B/#1481-the-environment-variables) for more information. | +The following variables can be set in configuration or as environment variables to define the `kc.lic` or `k4.lic` license used by PyKX if no license is found + +| Variable | Description | +|---------------------|------------------------------------------------------------------------------------------------| +| `KDB_LICENSE_B64` | This should contain the base-64 encoded contents of a valid `kc.lic` file with `pykx` enabled. | +| `KDB_K4LICENSE_B64` | This should contain the base-64 encoded contents of a valid `k4.lic` file with `pykx` enabled. | + #### PyKX QARGS Supported Additions When using PyKX users can use the following values when defining `QARGS` to modify the behaviour of PyKX at initialisation when running within a Linux environment. diff --git a/docs/user-guide/fundamentals/creating.md b/docs/user-guide/fundamentals/creating.md index e7bf8bb..679b5cb 100644 --- a/docs/user-guide/fundamentals/creating.md +++ b/docs/user-guide/fundamentals/creating.md @@ -206,6 +206,17 @@ AAPL 68.98055 94 ')) ``` +Additionally for retrieval of current temporal information users can make calls to the `date`, `time` and `timestamp` type objects respectively as follows + +```python +>>> kx.DateAtom('today') +pykx.DateAtom(pykx.q('2024.01.05')) +>>> kx.TimeAtom('now') +pykx.TimeAtom(pykx.q('16:22:12.178')) +>>> kx.TimestampAtom('now') +pykx.TimestampAtom(pykx.q('2024.01.05T16:22:21.012631000')) +``` + ### Evaluating q code using `kx.q` For users more familiar with q it is possible to evaluate q code to generate PyKX objects, this can be done as follows @@ -313,7 +324,10 @@ dtype: int64 4 ] >>> ->>> qtab = kx.q('([]5?1f;5?1f)') +>>> qtab = kx.Table(data={ +... 'x': kx.random.random(5, 1.0), +... 'x1': kx.random.random(5, 1.0), +... }) >>> qtab pykx.Table(pykx.q(' x x1 @@ -348,9 +362,9 @@ x1: double ```python >>> import pykx as kx - >>> qtime = kx.q('first 1?0p') + >>> qtime = kx.TimestampAtom('now') >>> qtime - pykx.TimestampAtom(pykx.q('2001.08.17D03:16:23.736627552')) + pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627552')) >>> kx.toq(qtime.py()) - pykx.TimestampAtom(pykx.q('2001.08.17D03:16:23.736627000')) + pykx.TimestampAtom(pykx.q('2024.01.05D03:16:23.736627000')) ``` diff --git a/docs/user-guide/fundamentals/indexing.md b/docs/user-guide/fundamentals/indexing.md index ccb0910..e157b00 100644 --- a/docs/user-guide/fundamentals/indexing.md +++ b/docs/user-guide/fundamentals/indexing.md @@ -51,7 +51,7 @@ IndexError: index out of range N Dimensional list vectors can also be manipulated using single element indexing as follows ```python ->>> x = kx.q('4 4#16?1f') +>>> x = kx.random.random([4, 4], 1.0) >>> x pykx.List(pykx.q(' 0.5294808 0.6916099 0.2296615 0.6919531 @@ -76,7 +76,7 @@ pykx.LongVector(pykx.q('5 6 7 8 9')) >>> x[:8:2] pykx.LongVector(pykx.q('0 2 4 6')) ->>> x = kx.q('4 4#16?1f') +>>> x = kx.random.random([4, 4], 1.0) >>> x[:2] pykx.List(pykx.q(' 0.1477547 0.274227 0.5635053 0.883823 @@ -92,6 +92,35 @@ pykx.CharVector(pykx.q('"defg"')) pykx.CharVector(pykx.q('"ace"')) ``` +Negative slicing works in a similar way and can be used for `list`, `vector` and `table` objects too. + +```python +>>> list = kx.q('("a"; 2; 3.3; `four)') +>>> list[-3:] +pykx.List(pykx.q(' +2 +3.3 +`four +')) + +>>> vector = kx.q.til(5) +>>> vector[:-1] +pykx.LongVector(pykx.q('0 1 2 3')) + +>>> table = kx.Table(data = { +... 'a': [1, 2, 3], +... 'b': [4, 5, 6], +... 'c': [7, 8, 9], +... }) +>>> table[-2:] +pykx.Table(pykx.q(' +a b c +----- +2 5 8 +3 6 9 +')) +``` + ## Assigning and Adding Values to Vectors/Lists Vector assignment in PyKX operates similarly to that provided by Numpy and operations supported on basic Python lists. As with the previous sections this functionality supports both individual element assignment and slice assignment as follows: @@ -114,7 +143,7 @@ pykx.LongVector(pykx.q('0 0 0 0 0 5 6 7 8 10')) ??? Note "N-Dimensional vector element assignment not supported" ```python - >>> x = kx.q('4 4#16?1f') + >>> x = kx.random.random([4, 4], 1.0) >>> x pykx.List(pykx.q(' 0.3927524 0.5170911 0.5159796 0.4066642 diff --git a/docs/user-guide/fundamentals/nulls_and_infinities.md b/docs/user-guide/fundamentals/nulls_and_infinities.md index f74fd48..eb5a988 100644 --- a/docs/user-guide/fundamentals/nulls_and_infinities.md +++ b/docs/user-guide/fundamentals/nulls_and_infinities.md @@ -120,7 +120,7 @@ Vectors with the q types `short`, `int`, and `long` can be converted to Python i Real vectors use the standard `NaN` and `inf` values, and so are handled by q, Python, Numpy, Pandas, and PyArrow in the same way with no special handling. -Temporal vectors use `NaT` to represent null values in Numpy and Pandas, `None` to represent them in pure Python, and PyArrow represents null temporal values like it does for any other data type: by masking it out using the array metadata. +Temporal vectors use `NaT` to represent null values in Numpy and Pandas, left as `pykx.K` objects in pure Python, and PyArrow represents null temporal values like it does for any other data type: by masking it out using the array metadata. When converting a table from q to Python with one of the methods above, each column will be transformed as an independent vector as described above. @@ -158,6 +158,131 @@ x x1 ')) ``` +An important example which represents some of the limitations of Pandas DataFrames when displaying masked arrays in index columns can be seen as follows. + +In the example below we are converting a keyed table containing one key column containing nulls to Pandas, as expected when converted the null mask is applied as appropriate + +```python +>>> keytab = kx.q.xkey('x', +... kx.Table(data = { +... 'x': kx.q('1 2 0N'), +... 'x1': kx.q('1 0N 2'), +... 'x3': [1, 2, 3]}) +... ) +>>> keytab +pykx.KeyedTable(pykx.q(' +x| x1 x3 +-| ----- +1| 1 1 +2| 2 + | 2 3 +')) +>>> keytab.pd() + x1 x3 +x + 1 1 1 + 2 -- 2 +-- 2 3 +``` + +However, when displaying with multi-index columns the mask behaviour is not adhered to, this can be seen as follows + +```python +>>> keytab = kx.q.xkey(['x', 'x1'], +... kx.Table(data = { +... 'x': kx.q('1 2 0N'), +... 'x1': kx.q('1 0N 2'), +... 'x3': [1, 2, 3]}) +... ) +>>> keytab +pykx.KeyedTable(pykx.q(' +x x1| x3 +----| -- +1 1 | 1 +2 | 2 + 2 | 3 +')) +>>> keytab.pd() + x3 +x x1 + 1 1 1 + 2 -9223372036854775808 2 +-9223372036854775808 2 3 +``` + +To illustrate this as a limitation of Pandas rather than PyKX consider the following + +```python +>>> tab = kx.Table(data = { +... 'x': kx.q('1 2 0N'), +... 'x1': kx.q('1 0N 2'), +... 'x3': [1, 2, 3]}) +>>> tab +pykx.Table(pykx.q(' +x x1 x3 +------- +1 1 1 +2 2 + 2 3 +')) +>>> df = tab.pd() +>>> df + x x1 x3 +0 1 1 1 +1 2 -- 2 +2 -- 2 3 +>>> df.set_index(['x']) + x1 x3 +x + 1 1 1 + 2 -- 2 +-- 2 3 +>>> df.set_index(['x', 'x1']) + x3 +x x1 + 1 1 1 + 2 -9223372036854775808 2 +-9223372036854775808 2 3 +``` + +Additional to the above inconsistency with Pandas you may also run into issues with the visual representations of masked arrays when displayed in Pandas DataFrames containing large numbers of rows, for example consider the following case. + +```python +>>> t = kx.q('([] time:.z.p;a:til 1000;b:9,999#0N)') +>>> t.pd() + time a b +0 2023-06-12 01:25:48.178532806 0 9 +1 2023-06-12 01:25:48.178532806 1 -9223372036854775808 +2 2023-06-12 01:25:48.178532806 2 -9223372036854775808 +3 2023-06-12 01:25:48.178532806 3 -9223372036854775808 +4 2023-06-12 01:25:48.178532806 4 -9223372036854775808 +.. ... ... ... +995 2023-06-12 01:25:48.178532806 995 -9223372036854775808 +996 2023-06-12 01:25:48.178532806 996 -9223372036854775808 +997 2023-06-12 01:25:48.178532806 997 -9223372036854775808 +998 2023-06-12 01:25:48.178532806 998 -9223372036854775808 +999 2023-06-12 01:25:48.178532806 999 -9223372036854775808 + +[1000 rows x 3 columns] +``` + +While `-9223372036854778080` does represent an underlying PyKX Null value for display purposes visually it is distracting. To display the DataFrame with the masked values you must set it's `display.max_rows` to be longer than the length of the specified table, the effect of this can be seen as follows. + +```python +>>> import pandas as pd +>>> t = kx.q('([] time:.z.p;a:til 1000;b:9,999#0N)') +>>> pd.set_option('display.max_rows', 1000) +>>> t.pd + time a b +0 2023-11-26 22:16:05.885992 0 9 +1 2023-11-26 22:16:05.885992 1 -- +2 2023-11-26 22:16:05.885992 2 -- +3 2023-11-26 22:16:05.885992 3 -- +4 2023-11-26 22:16:05.885992 4 -- +5 2023-11-26 22:16:05.885992 5 -- +.. +``` + For more information on masked Numpy arrays and interactions with null representation data in Pandas see the following links - [Numpy masked arrays](https://numpy.org/doc/stable/reference/maskedarray.generic.html#filling-in-the-missing-data) diff --git a/docs/user-guide/fundamentals/querying.md b/docs/user-guide/fundamentals/querying.md index 49241bf..ee02eda 100644 --- a/docs/user-guide/fundamentals/querying.md +++ b/docs/user-guide/fundamentals/querying.md @@ -40,27 +40,55 @@ instance. ```python # select from table object ->>> pykx.q.qsql.select(qtab, columns={'maxCol2': 'max col2'}, by={'col1': 'col1'}) +>>> kx.q.qsql.select(qtab, columns={'maxCol2': 'max col2'}, by={'col1': 'col1'}) # or by name ->>> pykx.q.qsql.select('qtab', columns={'maxCol2': 'max col2'}, by={'col1': 'col1'}) +>>> kx.q.qsql.select('qtab', columns={'maxCol2': 'max col2'}, by={'col1': 'col1'}) ``` Or you can use this to run a functional `qSQL` execute. ```python ->>> pykx.q.qsql.exec(qtab, columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}) +>>> kx.q.qsql.exec(qtab, columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}) ``` You can also update rows within tables using `qSQL` for example. ```python ->>> pykx.q.qsql.update(qtab, {'eye': ['blue']}, where='hair=`fair') +>>> kx.q.qsql.update(qtab, {'eye': ['blue']}, where='hair=`fair') ``` You can also delete rows of a table based on vairious conditions using `qSQL`. ```python ->>> pykx.q.qsql.delete('qtab', where=['hair=`fair', 'age=28']) +>>> kx.q.qsql.delete('qtab', where=['hair=`fair', 'age=28']) +``` + +When operating on in-memory tables, updates can be persisted for `select`, `update` and `delete` calls. For example, using the `inplace` keyword for `select` statements. + +```python +>>> qtab = kx.Table(data = {'a': [1, 2, 3], 'b': ['a', 'b', 'c']}) +>>> qtab +pykx.Table(pykx.q(' +a b +--- +1 a +2 b +3 c +')) +>>> kx.q.qsql.select(qtab, where=['a in 1 2'], inplace=True) +pykx.Table(pykx.q(' +a b +--- +1 a +2 b +')) +>>> qtab # Query has been persisted +pykx.Table(pykx.q(' +a b +--- +1 a +2 b +')) ``` ### ANSI SQL API @@ -86,9 +114,10 @@ Finally, you can prepare a `SQL` query and then when it is used later the types match in order for the query to run. ```Python ->>> p = q.sql.prepare('select * from trades where date = $1 and price < $2', +>>> import pykx as kx +>>> p = kx.q.sql.prepare('select * from trades where date = $1 and price < $2', kx.DateAtom, kx.FloatAtom ) ->>> q.sql.execute(p, date(2022, 1, 2), 500.0) +>>> kx.q.sql.execute(p, date(2022, 1, 2), 500.0) ``` diff --git a/examples/subscriber/readme.md b/examples/subscriber/readme.md index 6c605a6..71876fc 100644 --- a/examples/subscriber/readme.md +++ b/examples/subscriber/readme.md @@ -40,7 +40,7 @@ q process is started. ```bash // run the subscriber which will automatically connect $ python subscriber.py -===== Initital Table ===== +===== Initial Table ===== a b --- 4 8 @@ -53,7 +53,7 @@ a b 2 1 1 8 8 5 -===== Initital Table ===== +===== Initial Table ===== ``` diff --git a/examples/subscriber/subscriber.py b/examples/subscriber/subscriber.py index 5ab019a..e187d34 100644 --- a/examples/subscriber/subscriber.py +++ b/examples/subscriber/subscriber.py @@ -30,9 +30,9 @@ async def main_loop(q): async def main(): global table async with kx.RawQConnection(port=5001) as q: - print('===== Initital Table =====') + print('===== Initial Table =====') print(table) - print('===== Initital Table =====') + print('===== Initial Table =====') # Set the variable py_server on the q process pointing towards this processes IPC connection # We use neg to ensure the messages are sent async so no reply is expected from this process await q('py_server: neg .z.w') diff --git a/mkdocs.yml b/mkdocs.yml index b88ea41..2981bf7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -83,6 +83,9 @@ plugins: include: ["*.ipynb"] include_source: True # Let users download the Jupyter notebook to use interactively allow_errors: False + remove_tag_config: + remove_input_tags: + - hide_code - exclude: glob: - internal/* @@ -121,9 +124,11 @@ plugins: skip_files: # Skip files entirely - comparisons.md # Skipped due to false positives - performance.md # Skipped due to false positives - - getting-started/interface_overview.ipynb + - "getting-started/PyKX Introduction Notebook.ipynb" - getting-started/q_magic_command.ipynb - user-guide/advanced/Pandas_API.ipynb + - getting-started/PyKX Introduction Notebook.ipynb + - examples/db-management.ipynb theme: @@ -161,14 +166,14 @@ nav: - kdb+ and q: 'https://code.kx.com/q' - kdb Insights: "https://code.kx.com/insights/core" - kdb Insights Enterprise: "https://code.kx.com/insights/platform/" - - Licensing : "https://code.kx.com/insights/licensing/licensing/" + - KDB.AI: "https://code.kx.com/kdbai/" - PyKX: - index.md - Getting Started: - What is PyKX?: getting-started/what_is_pykx.md - Installation: getting-started/installing.md - Quickstart guide: getting-started/quickstart.md - - Notebook Example: getting-started/interface_overview.ipynb + - PyKX Introduction Notebook: "getting-started/PyKX Introduction Notebook.ipynb" - Jupyter q Magic Command: getting-started/q_magic_command.ipynb - User Guide: - Introduction: user-guide/index.md @@ -182,14 +187,15 @@ nav: - Handling nulls and infinities: user-guide/fundamentals/nulls_and_infinities.md - Advanced usage and performance considerations: - Communicating via IPC: user-guide/advanced/ipc.md + - Database interactions: user-guide/advanced/database.md - Using q functions in a Pythonic way: user-guide/advanced/context_interface.md - Modes of operation: user-guide/advanced/modes.md - Numpy integration: user-guide/advanced/numpy.md - Serialization and de-serialization: user-guide/advanced/serialization.md - Performance considerations: user-guide/advanced/performance.md - Interface limitations: user-guide/advanced/limitations.md - - Pandas API: user-guide/advanced/Pandas_API.ipynb - Attributes: user-guide/advanced/attributes.md + - Pandas Like API Coverage: user-guide/advanced/pandas_breakdown.md - API: - Code execution: - PyKX native functions: api/pykx-execution/q.md @@ -201,30 +207,40 @@ nav: - PyKX type wrappers: api/pykx-q-data/wrappers.md - PyKX to Pythonic data type mapping: api/pykx-q-data/type_conversions.md - Registering Custom Conversions: api/pykx-q-data/register.md + - Pandas Like API: user-guide/advanced/Pandas_API.ipynb - License management: api/license.md - Random data generation: api/random.md - Querying: api/query.md + - Database Interactions: api/db.md + - Remote Python Execution: api/remote.md - IPC: api/ipc.md - PyKX Exceptions: api/exceptions.md - Schema generation: api/schema.md + - System Command Wrappers: api/system.md - File loading and saving: - Writing PyKX data to disk: api/pykx-save-load/write.md - Reading PyKX data from disk: api/pykx-save-load/read.md - Reimporter module: api/reimporting.md - Serialization: api/serialize.md + - Beta Features: + - Introduction: beta-features/index.md + - Database Management: beta-features/db-management.md + - Remote Function Execution: beta-features/remote.md + - Multithreading: beta-features/threading.md + - Python interfacing within q: + - Overview: pykx-under-q/intro.md + - API: pykx-under-q/api.md + - Upgrading from embedPy: pykx-under-q/upgrade.md + - Known Issues: pykx-under-q/known_issues.md - Examples: - Subscriber: examples/subscriber/readme.md - Compression and Encryption: examples/compress_and_encrypt/readme.md - IPC: examples/ipc/README.md - PyKX as a Server: examples/server/server.md - - Multithreaded Execution: examples/threaded_execution/README.md + - Multithreaded Execution: examples/threaded_execution/threading.md - Extras: - Comparisons against other Python/q interfaces: extras/comparisons.md - Known issues: extras/known_issues.md - - Python interfacing within q: - - Overview: pykx-under-q/intro.md - - API: pykx-under-q/api.md - - Upgrading from embedPy: pykx-under-q/upgrade.md - Release notes: - PyKX: release-notes/changelog.md - PyKX under q: release-notes/underq-changelog.md @@ -233,3 +249,7 @@ nav: - Frequently Asked Questions (FAQ): faq.md - Support: support.md - License: license.md + - Contributors: contributors.md + - APIs: "https://code.kx.com/insights/api/index.html" + - Licensing : "https://code.kx.com/insights/licensing/licensing/" + - Help: "https://code.kx.com/home/support/" diff --git a/pyproject.toml b/pyproject.toml index e321ca0..9049736 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,9 +48,12 @@ classifiers = [ "Typing :: Typed", ] dependencies = [ - "numpy~=1.22; python_version!='3.7'", "numpy~=1.20; python_version=='3.7'", - "pandas>=1.2", + "numpy~=1.22; python_version=='3.8'", + "numpy~=1.22; python_version=='3.9'", + "numpy~=1.22; python_version=='3.10'", + "numpy~=1.23; python_version>='3.11'", + "pandas>=1.2, < 2.2.0", "pytz>=2022.1", "toml~=0.10.2", ] @@ -92,6 +95,12 @@ pyarrow = [ # TODO: Fix pyarrow support for python 3.11 "pyarrow>=3.0.0, <10.0.0; python_version!='3.11'", ] +dashboards = [ + "ast2json~=0.3", +] +beta = [ + "dill>=0.2.0", +] test = [ "coverage[toml]==6.3.2", "Cython~=3.0.0", @@ -103,6 +112,7 @@ test = [ "pytest-monitor==1.6.5; sys_platform!='darwin'", "pytest-randomly==3.11.0", "pytest-xdist==2.5.0", + "pytest-order==1.1.0", "psutil==5.9.5", "pytest-timeout>=2.0.0" ] @@ -118,8 +128,11 @@ changelog = "https://code.kx.com/pykx/changelog.html" [build-system] requires = [ "Cython~=3.0.0", - "numpy~=1.22, <1.23; python_version!='3.7'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 "numpy~=1.20.0; python_version=='3.7'", # Use numpy version 1.20.x for building the python 3.7 wheel + "numpy~=1.22, <1.23; python_version=='3.8'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 + "numpy~=1.22, <1.23; python_version=='3.9'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 + "numpy~=1.22, <1.23; python_version=='3.10'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 + "numpy~=1.23.2, <1.24; python_version=='3.11'", "setuptools==60.9.3", "setuptools-scm[toml]~=6.0.1", "tomli>=2.0.1", @@ -172,6 +185,7 @@ exclude = [ "docs/internal", "vcpkg", "tests/win_tests/*", + "scripts/*", "_version.py", ] show-source = "True" @@ -184,6 +198,7 @@ per-file-ignores = [ "__init__.py:F401,F403" # Allow unused and * imports in __init__ files; this lets us expose definitions ] ignore = [ + "C901", # function is too complex "D107", # missing docstring in __init__ "E124", # closing bracket does not match visual indentation; Google's Python style guide is incompatible with this "E225", # missing whitespace around operator (It thinks the * used for pointers is an operator) diff --git a/setup.py b/setup.py index 37c87c4..9013469 100755 --- a/setup.py +++ b/setup.py @@ -80,7 +80,13 @@ def run(self): self.build_q_c_extensions() super().run() - def build_q_c_extension(self, compiler, lib, lib_ext): + def build_q_c_extension(self, compiler, lib, lib_ext, library=None): + libs = [ + 'dl', + *windows_libraries, + ] + if library is not None: + libs.extend(library) return compiler.link_shared_object( objects=compiler.compile( sources=[str(src_dir/f'{lib}.c')], @@ -100,10 +106,7 @@ def build_q_c_extension(self, compiler, lib, lib_ext): ], ), output_filename=str(Path(self.build_lib)/'pykx'/f'{lib}.{lib_ext}'), - libraries=[ - 'dl', - *windows_libraries, - ], + libraries=libs, library_dirs=[ str(src_dir/'lib'/q_lib_dir_name), *windows_library_dirs, @@ -123,6 +126,8 @@ def build_q_c_extensions(self): lib_ext = 'dll' if system == 'Windows' else 'so' self.build_q_c_extension(compiler, 'pykx', lib_ext) self.build_q_c_extension(compiler, 'pykxq', lib_ext) + if system != 'Windows': + self.build_q_c_extension(compiler, '_tcore', lib_ext, library=['pthread']) class clean(default_clean): diff --git a/src/pykx/.gitignore b/src/pykx/.gitignore index 9d85b4c..8c93224 100644 --- a/src/pykx/.gitignore +++ b/src/pykx/.gitignore @@ -6,5 +6,6 @@ !pykx.c !pykxq.c !numpy_conversions.c +!_tcore.c *.html _version.py diff --git a/src/pykx/__init__.py b/src/pykx/__init__.py index 8ffdbb7..0f8632a 100644 --- a/src/pykx/__init__.py +++ b/src/pykx/__init__.py @@ -20,6 +20,11 @@ else: # nocov pass + +# List of beta features available in the current PyKX version +beta_features = [] + + from . import reimporter # Importing core initializes q if in licensed mode, and loads the q C API symbols. This should # happen early on so that if the qinit check is currently happening then no time is wasted. @@ -36,7 +41,7 @@ from warnings import warn from weakref import proxy -from .config import k_allocator, licensed, no_sigint, pykx_platlib_dir +from .config import k_allocator, licensed, no_pykx_signal, no_sigint, pykx_platlib_dir, under_q from . import util if platform.system() == 'Windows': # nocov @@ -44,6 +49,17 @@ if platform.python_version_tuple()[:2] >= ('3', '8'): os.add_dll_directory(pykx_platlib_dir) +# Cache initialised signal values prior to PyKX loading +_signal_list = [ + 'signal.SIGINT', + 'signal.SIGTERM', +] + +_signal_dict = {} + +for i in _signal_list: + _signal_dict[i] = signal.getsignal(eval(i)) + def _first_resolved_path(possible_paths: List[Union[str, Path]]) -> Path: """Returns the resolved version of the first path that exists.""" @@ -273,6 +289,7 @@ def paths(self, paths: List[Union[str, Path]]): from ._ipc import _init as _ipc_init _ipc_init(q) +from .db import DB from .ipc import AsyncQConnection, QConnection, QFuture, RawQConnection, SecureQConnection, SyncQConnection # noqa from .config import qargs, qhome, qlic from .wrappers import * @@ -292,6 +309,12 @@ def paths(self, paths: List[Union[str, Path]]): from .random import _init as _random_init _random_init(q) +from .db import _init as _db_init +_db_init(q) + +from .remote import _init as _remote_init +_remote_init(q) + if k_allocator: from . import _numpy as _pykx_numpy_cext @@ -402,6 +425,8 @@ def deactivate_numpy_allocator(): # Not running under IPython/Jupyter... pass +shutdown_thread = core.shutdown_thread + if licensed: days_to_expiry = q('"D"$', q.z.l[1]) - q.z.D if days_to_expiry < 10: @@ -439,16 +464,18 @@ def deactivate_numpy_allocator(): 'config', 'util', 'q', + 'shutdown_thread', 'PyKXReimport', *exceptions.__all__, *wrappers.__all__, ]) -if not no_sigint: - try: - signal.signal(signal.SIGINT, signal.default_int_handler) - except Exception: - logging.exception('Failed to set SIGINT handler...') +if (not no_sigint) or (not no_pykx_signal): + for k, v in _signal_dict.items(): + try: + signal.signal(eval(k), v) + except Exception: + pass def __dir__(): diff --git a/src/pykx/_tcore.c b/src/pykx/_tcore.c new file mode 100644 index 0000000..feda97c --- /dev/null +++ b/src/pykx/_tcore.c @@ -0,0 +1,604 @@ +#define KXVER 3 +#define PY_SSIZE_T_CLEAN +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "k.h" + + +struct QFuture { + bool done; + K res; +}; + +bool is_done(struct QFuture* fut) { + if (fut->done) + return true; + return false; +} + +struct QCall { + struct QFuture* fut; + int handle; + bool is_dot; + const char* query; + int argc; + K arg0; + K arg1; + K arg2; + K arg3; + K arg4; + K arg5; + K arg6; + K arg7; +}; + +struct QCallNode { + struct QCall* call; + struct QCallNode* next; +}; + +static struct QCallNode* calls_head; +static struct QCallNode* calls_tail; +static void* _q_handle; +static pthread_t q_thread; +static int qinit_rc; +static pthread_mutex_t head_mutex; +static pthread_mutex_t cond_mutex; +static pthread_cond_t cond; +static pthread_mutex_t init_mutex; +static pthread_cond_t init; +static bool kill_thread; + + +int (*_qinit)(int, char**, char*, char*, char*); + +static K (*__ee)(K x); +K _ee(K x) { + return __ee(x); +} + +static K (*__b9)(int x, K k); +K _b9(int x, K k){ + return __b9(x, k); +} + +static K (*__d9)(K x); +K _d9(K x) { + return __d9(x); +} + +static int (*__dj)(int date); +int _dj(int date) { + return __dj(date); +} + +static K (*__dl)(void* f, long long n); +K _dl(void* f, long long n) { + return __dl(f, n); +} + +static K (*__dot)(K x, K y); +K _dot_internal(K x, K y) { + return __ee(__dot(x, y)); +} +K _dot(K x, K y) { + struct QFuture* fut = malloc(sizeof(struct QFuture)); + fut->done = false; + fut->res = (K)0; + struct QCall* call = malloc(sizeof(struct QCall)); + call->fut = fut; + call->handle = 0; + call->is_dot = true; + call->query = NULL; + call->argc = 2; + call->arg0 = x; + call->arg1 = y; + call->arg2 = NULL; + call->arg3 = NULL; + call->arg4 = NULL; + call->arg5 = NULL; + call->arg6 = NULL; + call->arg7 = NULL; + struct QCallNode* call_node = malloc(sizeof(struct QCallNode)); + call_node->next = NULL; + call_node->call = call; + pthread_mutex_lock(&head_mutex); + if (calls_head == NULL) { + calls_head = call_node; + calls_tail = call_node; + } else { + calls_tail->next = call_node; + calls_tail = call_node; + } + pthread_mutex_unlock(&head_mutex); + while (1 == 1) { + pthread_mutex_lock(&cond_mutex); + pthread_cond_signal(&cond); + pthread_mutex_unlock(&cond_mutex); + if (is_done(fut)) { + pthread_mutex_lock(&head_mutex); + free(call_node); + free(call); + K res = fut->res; + free(fut); + pthread_mutex_unlock(&head_mutex); + return res; + } + } + return (K)0; +} + +static K (*__ja)(K* x, void* y); +K _ja(K* x, void* y) { + return __ja(x, y); +} + +static K (*__jk)(K* x, K y); +K _jk(K* x, K y) { + return __jk(x, y); +} + +static K (*__js)(K* x, char* s); +K _js(K* x, char* s) { + return __js(x, s); +} + +static K (*__jv)(K* x, K y); +K _jv(K* x, K y) { + return __jv(x, y); +} + +static K (*__k)(int handle, const char* s, ...); +K _k_internal(int handle, const char* s, int argc, K arg0, K arg1, K arg2, K arg3, K arg4, K arg5, K arg6, K arg7) { + switch (argc) { + case 0: return __ee(__k(handle, s, NULL)); + case 1: return __ee(__k(handle, s, arg0, NULL)); + case 2: return __ee(__k(handle, s, arg0, arg1, NULL)); + case 3: return __ee(__k(handle, s, arg0, arg1, arg2, NULL)); + case 4: return __ee(__k(handle, s, arg0, arg1, arg2, arg3, NULL)); + case 5: return __ee(__k(handle, s, arg0, arg1, arg2, arg3, arg4, NULL)); + case 6: return __ee(__k(handle, s, arg0, arg1, arg2, arg3, arg4, arg5, NULL)); + case 7: return __ee(__k(handle, s, arg0, arg1, arg2, arg3, arg4, arg5, arg6, NULL)); + case 8: return __ee(__k(handle, s, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, NULL)); + } + return (K)0; +} +K _k(int handle, const char* s, ...) { + va_list argp; + va_start(argp, 8); + K qargs[8] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + int qargc = 0; + while (true) { + qargs[qargc] = va_arg(argp, K); + if (!qargs[qargc]) + break; + qargc++; + } + va_end(argp); + + struct QFuture* fut = malloc(sizeof(struct QFuture)); + fut->done = false; + fut->res = (K)0; + struct QCall* call = malloc(sizeof(struct QCall)); + call->fut = fut; + call->handle = handle; + call->is_dot = false; + call->query = s; + call->argc = qargc; + call->arg0 = qargs[0]; + call->arg1 = qargs[1]; + call->arg2 = qargs[2]; + call->arg3 = qargs[3]; + call->arg4 = qargs[4]; + call->arg5 = qargs[5]; + call->arg6 = qargs[6]; + call->arg7 = qargs[7]; + struct QCallNode* call_node = malloc(sizeof(struct QCallNode)); + call_node->next = NULL; + call_node->call = call; + + pthread_mutex_lock(&head_mutex); + if (calls_head == NULL) { + calls_head = call_node; + calls_tail = call_node; + } else { + calls_tail->next = call_node; + calls_tail = call_node; + } + pthread_mutex_unlock(&head_mutex); + while (1 == 1) { + pthread_mutex_lock(&cond_mutex); + pthread_cond_signal(&cond); + pthread_mutex_unlock(&cond_mutex); + if (is_done(fut)) { + pthread_mutex_lock(&head_mutex); + free(call_node); + free(call); + K res = fut->res; + free(fut); + pthread_mutex_unlock(&head_mutex); + return res; + } + } + return (K)0; +} + +static K (*__ka)(int t); +K _ka(int t) { + return __ka(t); +} + +static K (*__kb)(int x); +K _kb(int x) { + return __kb(x); +} + +static K (*__kc)(int x); +K _kc(int x) { + return __kc(x); +} + +static void (*__kclose)(int x); +void _kclose(int x) { + __kclose(x); +} + +static K (*__kd)(int x); +K _kd(int x) { + return __kd(x); +} + +static K (*__ke)(double x); +K _ke(double x) { + return __ke(x); +} + +static K (*__kf)(double x); +K _kf(double x) { + return __kf(x); +} + +static K (*__kg)(int x); +K _kg(int x) { + return __kg(x); +} + +static K (*__kh)(int x); +K _kh(int x) { + return __kh(x); +} + +static int (*__khpunc)(char* v, int w, char* x, int y, int z); +int _khpunc(char* v, int w, char* x, int y, int z) { + return __khpunc(v, w, x, y, z); +} + +static K (*__ki)(int x); +K _ki(int x) { + return __ki(x); +} + +static K (*__kj)(long long x); +K _kj(long long x){ + return __kj(x); +} + +static K (*__knk)(int n, ...); +K _knk(int n, ...) { + void** args = (void**)malloc(sizeof(void*) * n); + + va_list argp; + va_start(argp, n); + for (int i = 0; i < n; i++) { + args[i] = va_arg(argp, void*); + } + va_end(argp); + K res = (K)0; + switch (n) { + case 1: + res = __knk(n, args[0]); + break; + case 2: + res = __knk(n, args[0], args[1]); + break; + case 3: + res = __knk(n, args[0], args[1], args[2]); + break; + case 4: + res = __knk(n, args[0], args[1], args[2], args[3]); + break; + // TODO: We only use knk(2, ...) internally but there may be a point where we need more. + default: + free(args); + return res; + } + + free(args); + return res; +} + +static K (*__knt)(long long n, K x); +K _knt(long long n, K x) { + return __knt(n, x); +} + +static K (*__kp)(char* x); +K _kp(char* x) { + return __kp(x); +} + + +static K (*__kpn)(char* x, long long n); +K _kpn(char* x, long long n) { + return __kpn(x, n); +} + +static K (*__krr)(const char* s); +K _krr(const char* s) { + return __krr(s); +} + + +static K (*__ks)(char* x); +K _ks(char* x) { + return __ks(x); +} + +static K (*__kt)(int x); +K _kt(int x) { + return __kt(x); +} + +static K (*__ktd)(K x); +K _ktd(K x) { + return __ktd(x); +} + +static K (*__ktj)(short _type, long long x); +K _ktj(short _type, long long x) { + return __ktj(_type, x); +} + +static K (*__ktn)(int _type, long long length); +K _ktn(int _type, long long length) { + return __ktn(_type, length); +} + +static K (*__ku)(U x); +K _ku(U x) { + return __ku(x); +} + +static K (*__kz)(double x); +K _kz(double x) { + return __kz(x); +} + +static void (*__m9)(); +void _m9() { + return __m9(); +} + +static int (*__okx)(K x); +int _okx(K x) { + return __okx(x); +} + +static K (*__orr)(const char* x); +K _orr(const char* x) { + return __orr(x); +} + +static void (*__r0)(K k); +void _r0(K k){ + __r0(k); +} + +static K (*__r1)(K k); +K _r1(K k) { + return __r1(k); +} + +static void (*__sd0)(int d); +void _sd0(int d) { + return __sd0(d); +} + +static void (*__sd0x)(int d, int f); +void _sd0x(int d, int f) { + return __sd0x(d, f); +} + +static K (*__sd1)(int d, void* (*f)(int)); +K _sd1(int d, void* (*f)(int)) { + return __sd1(d, f); +} + +static char* (*__sn)(char* s, long long n); +char* _sn(char* s, long long n) { + return __sn(s, n); +} + +static char* (*__ss)(char* s); +char* _ss(char* s) { + return __ss(s); +} + +static K (*__sslInfo)(K x); +K _sslInfo(K x) { + return __sslInfo(x); +} + +static K (*__vak)(int x, const char* s, va_list l); +K _vak(int x, const char* s, va_list l) { + return __vak(x, s, l); +} + +static K (*__vaknk)(int x, va_list l); +K _vaknk(int x, va_list l) { + return __vaknk(x, l); +} + +static int (*__ver)(); +int _ver() { + return __ver(); +} + +static K (*__xD)(K x, K y); +K _xD(K x, K y) { + return __xD(x, y); +} + +static K (*__xT)(K x); +K _xT(K x) { + return __xT(x); +} + +static int (*__ymd)(int year, int month, int day); +int _ymd(int year, int month, int day) { + return __ymd(year, month, day); +} + +int (*_qinit)(int, char**, char*, char*, char*); + +struct QInit { + int argc; + char** argv; + char* qhome; + char* qlic; + char* qqq; +}; + +void* q_thread_init(void* _qini) { + struct QInit* qini = (struct QInit*)_qini; + qinit_rc = _qinit(qini->argc, qini->argv, qini->qhome, qini->qlic, qini->qqq); + pthread_mutex_lock(&init_mutex); + pthread_cond_signal(&init); + pthread_mutex_unlock(&init_mutex); + while (1 == 1) { + pthread_mutex_lock(&cond_mutex); + while (calls_head == NULL && kill_thread == false) { + pthread_cond_wait(&cond, &cond_mutex); + } + pthread_mutex_unlock(&cond_mutex); + pthread_mutex_lock(&head_mutex); + if (kill_thread) + break; + if (calls_head != NULL) { + struct QCall* call = calls_head->call; + if (call->is_dot) { + K res = _dot_internal(call->arg0, call->arg1); + call->fut->res = res; + call->fut->done = true; + } else { + K res = _k_internal(call->handle, call->query, call->argc, call->arg0, call->arg1, call->arg2, call->arg3, call->arg4, call->arg5, call->arg6, call->arg7); + call->fut->res = res; + call->fut->done = true; + } + calls_head = calls_head->next; + } + pthread_mutex_unlock(&head_mutex); + } + pthread_exit(0); + return NULL; +} + +void shutdown_thread() { + pthread_mutex_lock(&head_mutex); + kill_thread = true; + pthread_mutex_unlock(&head_mutex); + pthread_mutex_lock(&cond_mutex); + pthread_cond_signal(&cond); + pthread_mutex_unlock(&cond_mutex); +} + +int q_init(int argc, char** argv, char* qhome, char* qlic, char* qqq) { + calls_head = NULL; + calls_tail = NULL; + kill_thread = false; + pthread_mutex_init(&head_mutex, NULL); + pthread_mutex_init(&cond_mutex, NULL); + pthread_cond_init(&cond, NULL); + pthread_mutex_init(&init_mutex, NULL); + pthread_cond_init(&init, NULL); + struct QInit* qini = malloc(sizeof(struct QInit)); + qini->argc = argc; + qini->argv = argv; + qini->qhome = qhome; + qini->qlic = qlic; + qini->qqq = qqq; // TODO: ADD COMMENT + qinit_rc = -256; + int rc = pthread_create(&q_thread, NULL, q_thread_init, (void*)qini); + + pthread_mutex_lock(&init_mutex); + while (qinit_rc == -256) { + pthread_cond_wait(&init, &init_mutex); + } + pthread_mutex_unlock(&init_mutex); + return qinit_rc; +} + +void sym_init(char* libq_path) { + _q_handle = dlopen(libq_path, RTLD_NOW | RTLD_GLOBAL); + + _qinit = dlsym(_q_handle, "qinit"); + __b9 = dlsym(_q_handle, "b9"); + __d9 = dlsym(_q_handle, "d9"); + __dj = dlsym(_q_handle, "dj"); + __dl = dlsym(_q_handle, "dl"); + __dot = dlsym(_q_handle, "dot"); + __ee = dlsym(_q_handle, "ee"); + __ja = dlsym(_q_handle, "ja"); + __jk = dlsym(_q_handle, "jk"); + __js = dlsym(_q_handle, "js"); + __jv = dlsym(_q_handle, "jv"); + __k = dlsym(_q_handle, "k"); + __ka = dlsym(_q_handle, "ka"); + __kb = dlsym(_q_handle, "kb"); + __kc = dlsym(_q_handle, "kc"); + __kclose = dlsym(_q_handle, "kclose"); + __kd = dlsym(_q_handle, "kd"); + __ke = dlsym(_q_handle, "ke"); + __kf = dlsym(_q_handle, "kf"); + __kg = dlsym(_q_handle, "kg"); + __kh = dlsym(_q_handle, "kh"); + __khpunc = dlsym(_q_handle, "khpunc"); + __ki = dlsym(_q_handle, "ki"); + __kj = dlsym(_q_handle, "kj"); + __knk = dlsym(_q_handle, "knk"); + __knt = dlsym(_q_handle, "knt"); + __kp = dlsym(_q_handle, "kp"); + __kpn = dlsym(_q_handle, "kpn"); + __krr = dlsym(_q_handle, "krr"); + __ks = dlsym(_q_handle, "ks"); + __kt = dlsym(_q_handle, "kt"); + __ktd = dlsym(_q_handle, "ktd"); + __ktj = dlsym(_q_handle, "ktj"); + __ktn = dlsym(_q_handle, "ktn"); + __ku = dlsym(_q_handle, "ku"); + __kz = dlsym(_q_handle, "kz"); + __m9 = dlsym(_q_handle, "m9"); + __okx = dlsym(_q_handle, "okx"); + __orr = dlsym(_q_handle, "orr"); + __r0 = dlsym(_q_handle, "r0"); + __r1 = dlsym(_q_handle, "r1"); + __sd0 = dlsym(_q_handle, "sd0"); + __sd0x = dlsym(_q_handle, "sd0x"); + __sd1 = dlsym(_q_handle, "sd1"); + __sn = dlsym(_q_handle, "sn"); + __ss = dlsym(_q_handle, "ss"); + __sslInfo = dlsym(_q_handle, "sslInfo"); + __vak = dlsym(_q_handle, "vak"); + __vaknk = dlsym(_q_handle, "vaknk"); + __ver = dlsym(_q_handle, "ver"); + __xD = dlsym(_q_handle, "xD"); + __xT = dlsym(_q_handle, "xT"); + __ymd = dlsym(_q_handle, "ymd"); +} diff --git a/src/pykx/_wrappers.pxd b/src/pykx/_wrappers.pxd index 3c5d326..0c21dae 100644 --- a/src/pykx/_wrappers.pxd +++ b/src/pykx/_wrappers.pxd @@ -12,5 +12,4 @@ cdef extern from 'numpy/arrayobject.h': cpdef deserialize(x) -cdef complex UUID_to_complex(u: UUID) except * cdef factory(uintptr_t addr, bint incref, bint err_preamble=*) diff --git a/src/pykx/_wrappers.pyx b/src/pykx/_wrappers.pyx index 310be65..517c607 100644 --- a/src/pykx/_wrappers.pyx +++ b/src/pykx/_wrappers.pyx @@ -76,18 +76,6 @@ cdef inline uint64_t byteswap64(uint64_t x): return x -cdef object complex_to_UUID(np.complex128_t c): - return UUID(int=(int((&c.real)[0]) << 64) | (&c.imag)[0]) - - -cdef complex UUID_to_complex(u: UUID) except *: - cdef uint64_t upper_bits = (u.int & (-1 ^ 0xFFFFFFFFFFFFFFFF)) >> 64 - cdef uint64_t lower_bits = u.int & 0xFFFFFFFFFFFFFFFF - upper_bits = byteswap64(upper_bits) - lower_bits = byteswap64(lower_bits) - return complex((&upper_bits)[0], (&lower_bits)[0]) - - # A cdef class is used to store the reference in order to guarantee r0 is called cdef class _K: cdef core.K k @@ -195,7 +183,6 @@ cpdef k_unpickle(x): # We pickle to a Numpy array instead of bytes to benefit from Numpy's highly performant pickling. cpdef k_pickle(x): - # Call b9 with mode 3: unenumerate & allow serialization of GUIDs, timespan and timestamp cdef core.K k_serialized = core.b9(6, x._addr) serialized = factory(k_serialized, False) cdef np.npy_intp n = k_serialized.n @@ -207,7 +194,6 @@ cpdef k_pickle(x): cpdef k_hash(x): - # Call b9 with mode 2: unenumerate & allow serialization of timespan and timestamp cdef core.K serialized = core.b9(6, x._addr) return hash(PyBytes_FromStringAndSize(serialized.G0, serialized.n)) @@ -307,8 +293,7 @@ def vector_unlicensed_getitem(self, ssize_t index): def guid_atom_py(self, bint raw, bint has_nulls, bint stdlib): if raw: return np.asarray(_k(self).G0)[0] - return complex_to_UUID(np.asarray( - _k(self).G0).byteswap()[0]) + return UUID(bytes=(_k(self).G0)[:16]) def list_np(self, bint raw, bint has_nulls): @@ -477,7 +462,8 @@ cdef inline object select_wrapper(core.K k): # XXX: it's possible to have a dictionary that is not a keyed table, but still uses tables # as its keys key_ktype = (k.G0)[0].t - wrapper = wrappers.KeyedTable if key_ktype == 98 else wrappers.Dictionary + value_ktype = (k.G0)[1].t + wrapper = wrappers.KeyedTable if key_ktype == 98 and value_ktype == 98 else wrappers.Dictionary return wrapper diff --git a/src/pykx/cast.py b/src/pykx/cast.py index 2dd9dae..6b2daf9 100644 --- a/src/pykx/cast.py +++ b/src/pykx/cast.py @@ -75,6 +75,15 @@ def cast_to_python_date(x): raise _cast_TypeError(x, type(x), datetime.date) +def cast_to_python_time(x): + if type(x) is datetime.datetime: + return x.time() + if type(x) is np.datetime64: + return x.astype(datetime.time).time() + else: + raise _cast_TypeError(x, type(x), datetime.time) + + def cast_to_python_datetime(x): if type(x) is datetime.date: return datetime.datetime.combine(x, datetime.datetime.min.time()) @@ -115,6 +124,7 @@ def cast_to_python_timedelta(x): __all__ = [ 'cast_numpy_ndarray_to_dtype', 'cast_to_python_date', + 'cast_to_python_time', 'cast_to_python_datetime', 'cast_to_python_float', 'cast_to_python_int', diff --git a/src/pykx/config.py b/src/pykx/config.py index 9dd7414..63af5d1 100644 --- a/src/pykx/config.py +++ b/src/pykx/config.py @@ -12,7 +12,7 @@ import toml import pandas as pd -from .exceptions import PyKXWarning +from .exceptions import PyKXWarning, QError system = platform.system() @@ -26,6 +26,7 @@ q_lib_dir_name = 'm64arm' if 'Linux' in system and ('arm' in platform.machine() or 'aarch64' in platform.machine()): q_lib_dir_name = 'l64arm' +tcore_path_location = bytes(Path(__file__).parent.resolve(strict=True) / '_tcore.so') # Profile information for user defined config @@ -44,7 +45,7 @@ def _get_config_value(param, default): return os.getenv(param, default) -def _is_enabled(param, cmdflag=None, deprecated=False): +def _is_enabled(param, cmdflag=False, deprecated=False): env_config = _get_config_value(param, '').lower() in ('1', 'true') if deprecated and env_config: warn('The environment variable ' + param + ' is deprecated.\n' @@ -115,6 +116,17 @@ def _is_set(envvar): qargs = tuple(shlex.split(_get_config_value('QARGS', ''))) +def _license_install_B64(license, license_type): + try: + lic = base64.b64decode(license) + except base64.binascii.Error: + raise Exception('Invalid license copy provided, ' + 'please ensure you have copied the license information correctly') + + with open(qlic/license_type, 'wb') as binary_file: + binary_file.write(lic) + + def _license_install(intro=None, return_value=False): # noqa: modes_url = "https://code.kx.com/pykx/user-guide/advanced/modes.html" lic_url = "https://kx.com/kdb-insights-personal-edition-license-download" @@ -166,22 +178,15 @@ def _license_install(intro=None, return_value=False): # noqa: raise Exception(f'Download location provided {download_location} does not exist.') shutil.copy(download_location, qlic) - print('\nPyKX license successfully installed!\n') + print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501 elif install_type == '2': license = input('\nPlease provide your activation key (base64 encoded string) ' 'provided with your welcome email : ').strip() - try: - lic = base64.b64decode(license) - except base64.binascii.Error: - raise Exception('Invalid license copy provided, ' - 'please ensure you have copied the license information correctly') + _license_install_B64(license, 'kc.lic') - with open(qlic/'kc.lic', 'wb') as binary_file: - binary_file.write(lic) - - print('PyKX license successfully installed!\n') + print('\nPyKX license successfully installed. Restart Python for this to take effect.\n') # noqa: E501 elif install_type == '3': if return_value: return False @@ -196,13 +201,25 @@ def _license_install(intro=None, return_value=False): # noqa: if any(i in qargs for i in _arglist) or _licenvset or not hasattr(sys, 'ps1'): # noqa: C901 pass elif not license_located: - _license_install() + kc_b64 = _get_config_value('KDB_LICENSE_B64', None) + k4_b64 = _get_config_value('KDB_K4LICENSE_B64', None) + if kc_b64 is not None: + _license_install_B64(kc_b64, 'kc.lic') + elif k4_b64 is not None: + _license_install_B64(k4_b64, 'k4.lic') + else: + _license_install() licensed = False under_q = _is_enabled('PYKX_UNDER_Q') qlib_location = Path(_get_config_value('PYKX_Q_LIB_LOCATION', pykx_dir/'lib')) -no_sigint = _is_enabled('PYKX_NO_SIGINT') +pykx_threading = _is_enabled('PYKX_THREADING') +if platform.system() == 'Windows': + pykx_threading = False + warn('PYKX_THREADING is only supported on Linux / MacOS, it has been disabled.') +no_sigint = _is_enabled('PYKX_NO_SIGINT', deprecated=True) +no_pykx_signal = _is_enabled('PYKX_NO_SIGNAL') if _is_enabled('PYKX_ENABLE_PANDAS_API', '--pandas-api'): warn('Usage of PYKX_ENABLE_PANDAS_API configuration variable was removed in ' @@ -227,6 +244,7 @@ def _license_install(intro=None, return_value=False): # noqa: use_q_lock = _get_config_value('PYKX_Q_LOCK', False) skip_under_q = _is_enabled('SKIP_UNDERQ', '--skip-under-q') or _is_enabled('PYKX_SKIP_UNDERQ') no_qce = _is_enabled('PYKX_NOQCE', '--no-qce') +beta_features = _is_enabled('PYKX_BETA_FEATURES', '--beta') load_pyarrow_unsafe = _is_enabled('PYKX_LOAD_PYARROW_UNSAFE', '--load-pyarrow-unsafe') pandas_2 = pd.__version__.split('.')[0] == '2' @@ -254,6 +272,14 @@ def _set_keep_local_times(keep_local_times_): keep_local_times = keep_local_times_ +def _check_beta(feature_name, *, status=beta_features): + if status: + return None + raise QError(f'Attempting to use a beta feature "{feature_name}' + '", please set configuration flag PYKX_BETA_FEATURES=true ' + 'to run these operations') + + __all__ = [ 'system', 'q_lib_dir_name', diff --git a/src/pykx/console.py b/src/pykx/console.py index c5bbc79..6f91345 100644 --- a/src/pykx/console.py +++ b/src/pykx/console.py @@ -54,15 +54,15 @@ def __call__(self): self._eval_and_print(code) -class PyConsole(InteractiveConsole): +class PyConsole: def __init__(self): - super().__init__(globals()) - self.push('import sys') - self.push('quit = sys.exit') - self.push('exit = sys.exit') + self.console = InteractiveConsole(globals()) + self.console.push('import sys') + self.console.push('quit = sys.exit') + self.console.push('exit = sys.exit') def interact(self, banner=None, exitmsg=None): try: - super().interact(banner=banner, exitmsg=exitmsg) + self.console.interact(banner=banner, exitmsg=exitmsg) except SystemExit: _pykx_helpers.clean_errors() diff --git a/src/pykx/core.pxd b/src/pykx/core.pxd index 58b674d..3af8dee 100644 --- a/src/pykx/core.pxd +++ b/src/pykx/core.pxd @@ -57,6 +57,7 @@ cdef float* (*kE)(K x) cdef double* (*kF)(K x) cdef K* (*kK)(K x) +cdef void (*_shutdown_thread)() cdef K (*b9)(int mode, K x) cdef K (*d9)(K x) cdef int (*dj)(int date) diff --git a/src/pykx/core.pyx b/src/pykx/core.pyx index 0ddff16..2a999cb 100644 --- a/src/pykx/core.pyx +++ b/src/pykx/core.pyx @@ -1,12 +1,15 @@ +from cython import NULL import os, platform from pathlib import Path +from platform import system from threading import RLock from typing import List, Tuple import re import sys +from . import beta_features from .util import num_available_cores -from .config import _is_enabled, _license_install +from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _check_beta def _normalize_qargs(user_args: List[str]) -> Tuple[bytes]: @@ -58,6 +61,7 @@ cdef int _qinit(int (*qinit)(int, char**, char*, char*, char*), qhome_str: str, cdef char* _libq_path +cdef char* _tcore_path cdef void* _q_handle @@ -165,6 +169,9 @@ cdef inline uintptr_t _keval(const char* code, K k1, K k2, K k3, K k4, K k5, K k # with nogil ensures the gil is dropped during the call into k with nogil: return knogil( k, code, k1, k2, k3, k4, k5, k6, k7, k8) + if pykx_threading: + with nogil: + return k(handle, code, k1, k2, k3, k4, k5, k6, k7, k8, NULL) return k(handle, code, k1, k2, k3, k4, k5, k6, k7, k8, NULL) except BaseException as err: raise err @@ -237,105 +244,135 @@ def _link_qhome(): pass # Skip subdirectories of $QHOME that don't exist. update_marker.touch() +cdef void (*init_syms)(char* x) -if under_q: # nocov - if '--unlicensed' in qargs: # nocov - warn("The '--unlicensed' flag has no effect when running under a q process", # nocov - PyKXWarning) # nocov - _q_handle = dlopen(NULL, RTLD_NOW | RTLD_GLOBAL) # nocov - licensed = True # nocov -else: - # To make Cython happy, we indirectly assign Python values to `_libq_path` - if '--unlicensed' in qargs or _is_enabled('PYKX_UNLICENSED', '--unlicensed'): - _libq_path_py = bytes(find_core_lib('e')) - _libq_path = _libq_path_py - _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) - licensed = False +if not pykx_threading: + if under_q: # nocov + if '--unlicensed' in qargs: # nocov + warn("The '--unlicensed' flag has no effect when running under a q process", # nocov + PyKXWarning) # nocov + _q_handle = dlopen(NULL, RTLD_NOW | RTLD_GLOBAL) # nocov + licensed = True # nocov else: - if platform.system() == 'Windows': # nocov - from ctypes.util import find_library # nocov - if find_library("msvcr100.dll") is None: # nocov - msvcrMSG = "Needed dependency msvcr100.dll missing. See: https://code.kx.com/pykx/getting-started/installing.html" # nocov - if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', --licensed): # nocov - raise PyKXException(msvcrMSG) # nocov - else: # nocov - warn(msvcrMSG, PyKXWarning) # nocov - _core_q_lib_path = find_core_lib('q') - licensed = True - if not _is_enabled('PYKX_UNSAFE_LOAD', '--unsafeload'): - _qinit_check_proc = subprocess.run( - (str(Path(sys.executable).as_posix()), '-c', 'import pykx'), - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - env={ - **os.environ, - 'PYKX_QINIT_CHECK': ';'.join(( - str(_core_q_lib_path), - str(pykx_lib_dir if ignore_qhome is None else qhome), - str(qlic), - # Use the env var directly because `config.qargs` has already split the args. - os.environ.get('QARGS', ''), - )), - } - ) - _qinit_output = ' ' + ' '.join(_qinit_check_proc.stdout.strip().splitlines(True)) - _license_message = False - if _qinit_check_proc.returncode: # Fallback to unlicensed mode - if _qinit_output != ' ': - _capout_msg = f'Captured output from initialization attempt:\n{_qinit_output}' - else: - _capout_msg = '' # nocov - this can only occur under extremely weird circumstances. - if hasattr(sys, 'ps1'): - if re.compile('exp').search(_capout_msg): - _exp_license = 'Your PyKX license has now expired.\n\n'\ - f'{_capout_msg}\n\n'\ - 'Would you like to renew your license? [Y/n]: ' - _license_message = _license_install(_exp_license, True) - elif re.compile('embedq').search(_capout_msg): - _ce_license = 'You appear to be using a non kdb Insights license.\n\n'\ - f'{_capout_msg}\n\n'\ - 'Running PyKX in the absence of a kdb Insights license '\ - 'has reduced functionality.\nWould you like to install '\ - 'a kdb Insights personal license? [Y/n]: ' - _license_message = _license_install(_ce_license, True) - elif re.compile('upd').search(_capout_msg): - _upd_license = 'Your installed license is out of date for this version'\ - ' of PyKX and must be updated.\n\n'\ - f'{_capout_msg}\n\n'\ - 'Would you like to install an updated kdb '\ - 'Insights personal license? [Y/n]: ' - _license_message = _license_install(_upd_license, True) - if (not _license_message) and _qinit_check_proc.returncode: - if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): - raise PyKXException(f'Failed to initialize embedded q.{_capout_msg}') - else: - warn(f'Failed to initialize PyKX successfully with the following error: {_capout_msg}', PyKXWarning) - _libq_path_py = bytes(find_core_lib('e')) - _libq_path = _libq_path_py - _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) - licensed = False - if licensed: # Start in licensed mode - if 'QHOME' in os.environ and not ignore_qhome: - # Only link the user's QHOME to PyKX's QHOME if the user actually set $QHOME. - # Note that `pykx.qhome` has a default value of `./q`, as that is the behavior - # employed by q. - try: - _link_qhome() - except BaseException: - warn('Failed to link user QHOME directory contents to allow access to PyKX.\n' - 'To suppress this warning please set the configuration option "PYKX_IGNORE_QHOME" as outlined at:\n' - 'https://code.kx.com/pykx/user-guide/configuration.html') - _libq_path_py = bytes(_core_q_lib_path) + # To make Cython happy, we indirectly assign Python values to `_libq_path` + if '--unlicensed' in qargs or _is_enabled('PYKX_UNLICENSED', '--unlicensed'): + _libq_path_py = bytes(find_core_lib('e')) _libq_path = _libq_path_py _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) - qinit = dlsym(_q_handle, 'qinit') - qinit_return_code = _qinit(qinit, str(qhome if ignore_qhome else pykx_lib_dir), str(qlic), list(qargs)) - if qinit_return_code: # nocov - dlclose(_q_handle) # nocov - licensed = False # nocov - raise PyKXException( # nocov - f'Non-zero qinit return code {qinit_return_code} despite successful pre-check') # nocov + licensed = False + else: + if platform.system() == 'Windows': # nocov + from ctypes.util import find_library # nocov + if find_library("msvcr100.dll") is None: # nocov + msvcrMSG = "Needed dependency msvcr100.dll missing. See: https://code.kx.com/pykx/getting-started/installing.html" # nocov + if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): # nocov + raise PyKXException(msvcrMSG) # nocov + else: # nocov + warn(msvcrMSG, PyKXWarning) # nocov + _core_q_lib_path = find_core_lib('q') + licensed = True + if not _is_enabled('PYKX_UNSAFE_LOAD', '--unsafeload'): + _qinit_check_proc = subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', 'import pykx'), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + env={ + **os.environ, + 'PYKX_QINIT_CHECK': ';'.join(( + str(_core_q_lib_path), + str(pykx_lib_dir if ignore_qhome is None else qhome), + str(qlic), + # Use the env var directly because `config.qargs` has already split the args. + os.environ.get('QARGS', ''), + )), + } + ) + _qinit_output = ' ' + ' '.join(_qinit_check_proc.stdout.strip().splitlines(True)) + _license_message = False + if _qinit_check_proc.returncode: # Fallback to unlicensed mode + if _qinit_output != ' ': + _capout_msg = f'Captured output from initialization attempt:\n{_qinit_output}' + else: + _capout_msg = '' # nocov - this can only occur under extremely weird circumstances. + if hasattr(sys, 'ps1'): + if re.compile('exp').search(_capout_msg): + _exp_license = 'Your PyKX license has now expired.\n\n'\ + f'{_capout_msg}\n\n'\ + 'Would you like to renew your license? [Y/n]: ' + _license_message = _license_install(_exp_license, True) + elif re.compile('embedq').search(_capout_msg): + _ce_license = 'You appear to be using a non kdb Insights license.\n\n'\ + f'{_capout_msg}\n\n'\ + 'Running PyKX in the absence of a kdb Insights license '\ + 'has reduced functionality.\nWould you like to install '\ + 'a kdb Insights personal license? [Y/n]: ' + _license_message = _license_install(_ce_license, True) + elif re.compile('upd').search(_capout_msg): + _upd_license = 'Your installed license is out of date for this version'\ + ' of PyKX and must be updated.\n\n'\ + f'{_capout_msg}\n\n'\ + 'Would you like to install an updated kdb '\ + 'Insights personal license? [Y/n]: ' + _license_message = _license_install(_upd_license, True) + if (not _license_message) and _qinit_check_proc.returncode: + if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): + raise PyKXException(f'Failed to initialize embedded q.{_capout_msg}') + else: + warn(f'Failed to initialize PyKX successfully with the following error: {_capout_msg}', PyKXWarning) + _libq_path_py = bytes(find_core_lib('e')) + _libq_path = _libq_path_py + _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) + licensed = False + if licensed: # Start in licensed mode + if 'QHOME' in os.environ and not ignore_qhome: + # Only link the user's QHOME to PyKX's QHOME if the user actually set $QHOME. + # Note that `pykx.qhome` has a default value of `./q`, as that is the behavior + # employed by q. + try: + _link_qhome() + except BaseException: + warn('Failed to link user QHOME directory contents to allow access to PyKX.\n' + 'To suppress this warning please set the configuration option "PYKX_IGNORE_QHOME" as outlined at:\n' + 'https://code.kx.com/pykx/user-guide/configuration.html') + _libq_path_py = bytes(_core_q_lib_path) + _libq_path = _libq_path_py + _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) + qinit = dlsym(_q_handle, 'qinit') + qinit_return_code = _qinit(qinit, str(qhome if ignore_qhome else pykx_lib_dir), str(qlic), list(qargs)) + if qinit_return_code: # nocov + dlclose(_q_handle) # nocov + licensed = False # nocov + raise PyKXException( # nocov + f'Non-zero qinit return code {qinit_return_code} despite successful pre-check') # nocov +else: + _check_beta('PYKX Threading') + beta_features.append('PyKX Threading') + _libq_path_py = bytes(str(find_core_lib('q')), 'utf-8') + _tcore_path = tcore_path_location + _libq_path = _libq_path_py + _q_handle = dlopen(_tcore_path, RTLD_NOW | RTLD_GLOBAL) + + init_syms = dlsym(_q_handle, 'sym_init') + init_syms(_libq_path) + qinit = dlsym(_q_handle, 'q_init') + + qinit_return_code = _qinit(qinit, str(qhome if ignore_qhome else pykx_lib_dir), str(qlic), list(qargs)) + if qinit_return_code: # nocov + dlclose(_q_handle) # nocov + licensed = False # nocov + if qinit_return_code == 1: # nocov + raise PyKXException( # nocov + f'qinit failed because of an invalid license file, please ensure you have a valid' + 'q license installed before using PYKX_THREADING.' + ) # nocov + else: # nocov + raise PyKXException( # nocov + f'Non-zero qinit return code {qinit_return_code}, failed to initialize ' + 'PYKX_THREADING.' + ) # nocov + os.environ['QHOME'] = str(qhome if ignore_qhome else pykx_lib_dir) + licensed = True _set_licensed(licensed) @@ -343,74 +380,83 @@ if k_gc and not licensed: raise PyKXException('Early garbage collection requires a valid q license.') - -kG = dlsym(_q_handle, 'kG') -kC = dlsym(_q_handle, 'kC') -kU = dlsym(_q_handle, 'kU') -kS = dlsym(_q_handle, 'kS') -kH = dlsym(_q_handle, 'kH') -kI = dlsym(_q_handle, 'kI') -kJ = dlsym(_q_handle, 'kJ') -kE = dlsym(_q_handle, 'kE') -kF = dlsym(_q_handle, 'kF') -kK = dlsym(_q_handle, 'kK') - -b9 = dlsym(_q_handle, 'b9') -d9 = dlsym(_q_handle, 'd9') -dj = dlsym(_q_handle, 'dj') -dl = dlsym(_q_handle, 'dl') -dot = dlsym(_q_handle, 'dot') -ee = dlsym(_q_handle, 'ee') -ja = dlsym(_q_handle, 'ja') -jk = dlsym(_q_handle, 'jk') -js = dlsym(_q_handle, 'js') -jv = dlsym(_q_handle, 'jv') -k = dlsym(_q_handle, 'k') +sym_name = lambda x: bytes('_' + x, 'utf-8') if pykx_threading else bytes(x, 'utf-8') + +if not pykx_threading: + kG = dlsym(_q_handle, 'kG') + kC = dlsym(_q_handle, 'kC') + kU = dlsym(_q_handle, 'kU') + kS = dlsym(_q_handle, 'kS') + kH = dlsym(_q_handle, 'kH') + kI = dlsym(_q_handle, 'kI') + kJ = dlsym(_q_handle, 'kJ') + kE = dlsym(_q_handle, 'kE') + kF = dlsym(_q_handle, 'kF') + kK = dlsym(_q_handle, 'kK') + +_shutdown_thread = dlsym(_q_handle, 'shutdown_thread') + +cpdef shutdown_thread(): + if pykx_threading: + _shutdown_thread() + + +b9 = dlsym(_q_handle, sym_name('b9')) +d9 = dlsym(_q_handle, sym_name('d9')) +dj = dlsym(_q_handle, sym_name('dj')) +dl = dlsym(_q_handle, sym_name('dl')) +dot = dlsym(_q_handle, sym_name('dot')) +ee = dlsym(_q_handle, sym_name('ee')) +ja = dlsym(_q_handle, sym_name('ja')) +jk = dlsym(_q_handle, sym_name('jk')) +js = dlsym(_q_handle, sym_name('js')) +jv = dlsym(_q_handle, sym_name('jv')) +k = dlsym(_q_handle, sym_name('k')) cdef extern from 'include/foreign.h': K k_wrapper(void* x, char* code, void* a1, void* a2, void* a3, void* a4, void* a5, void* a6, void* a7, void* a8) nogil knogil = k_wrapper -ka = dlsym(_q_handle, 'ka') -kb = dlsym(_q_handle, 'kb') -kc = dlsym(_q_handle, 'kc') -kclose = dlsym(_q_handle, 'kclose') -kd = dlsym(_q_handle, 'kd') -ke = dlsym(_q_handle, 'ke') -kf = dlsym(_q_handle, 'kf') -kg = dlsym(_q_handle, 'kg') -kh = dlsym(_q_handle, 'kh') -khpunc = dlsym(_q_handle, 'khpunc') -ki = dlsym(_q_handle, 'ki') -kj = dlsym(_q_handle, 'kj') -knk = dlsym(_q_handle, 'knk') -knt = dlsym(_q_handle, 'knt') -kp = dlsym(_q_handle, 'kp') -kpn = dlsym(_q_handle, 'kpn') -krr = dlsym(_q_handle, 'krr') -ks = dlsym(_q_handle, 'ks') -kt = dlsym(_q_handle, 'kt') -ktd = dlsym(_q_handle, 'ktd') -ktj = dlsym(_q_handle, 'ktj') -ktn = dlsym(_q_handle, 'ktn') -ku = dlsym(_q_handle, 'ku') -kz = dlsym(_q_handle, 'kz') -m9 = dlsym(_q_handle, 'm9') -okx = dlsym(_q_handle, 'okx') -orr = dlsym(_q_handle, 'orr') -r0 = dlsym(_q_handle, 'r0') -r1 = dlsym(_q_handle, 'r1') -sd0 = dlsym(_q_handle, 'sd0') -sd0x = dlsym(_q_handle, 'sd0x') -sd1 = dlsym(_q_handle, 'sd1') -sd1 = dlsym(_q_handle, 'sd1') -sn = dlsym(_q_handle, 'sn') -ss = dlsym(_q_handle, 'ss') -sslInfo = dlsym(_q_handle, 'sslInfo') -vak = dlsym(_q_handle, 'vak') -vaknk = dlsym(_q_handle, 'vaknk') -ver = dlsym(_q_handle, 'ver') -xD = dlsym(_q_handle, 'xD') -xT = dlsym(_q_handle, 'xT') -ymd = dlsym(_q_handle, 'ymd') +ka = dlsym(_q_handle, sym_name('ka')) +kb = dlsym(_q_handle, sym_name('kb')) +kc = dlsym(_q_handle, sym_name('kc')) +kclose = dlsym(_q_handle, sym_name('kclose')) +kd = dlsym(_q_handle, sym_name('kd')) +ke = dlsym(_q_handle, sym_name('ke')) +kf = dlsym(_q_handle, sym_name('kf')) +kg = dlsym(_q_handle, sym_name('kg')) +kh = dlsym(_q_handle, sym_name('kh')) +khpunc = dlsym(_q_handle, sym_name('khpunc')) +ki = dlsym(_q_handle, sym_name('ki')) +kj = dlsym(_q_handle, sym_name('kj')) +knk = dlsym(_q_handle, sym_name('knk')) +knt = dlsym(_q_handle, sym_name('knt')) +kp = dlsym(_q_handle, sym_name('kp')) +kpn = dlsym(_q_handle, sym_name('kpn')) +krr = dlsym(_q_handle, sym_name('krr')) +ks = dlsym(_q_handle, sym_name('ks')) +kt = dlsym(_q_handle, sym_name('kt')) +ktd = dlsym(_q_handle, sym_name('ktd')) +ktj = dlsym(_q_handle, sym_name('ktj')) +ktn = dlsym(_q_handle, sym_name('ktn')) +ku = dlsym(_q_handle, sym_name('ku')) +kz = dlsym(_q_handle, sym_name('kz')) +m9 = dlsym(_q_handle, sym_name('m9')) +okx = dlsym(_q_handle, sym_name('okx')) +orr = dlsym(_q_handle, sym_name('orr')) +r0 = dlsym(_q_handle, sym_name('r0')) +r1 = dlsym(_q_handle, sym_name('r1')) +sd0 = dlsym(_q_handle, sym_name('sd0')) +sd0x = dlsym(_q_handle, sym_name('sd0x')) +sd1 = dlsym(_q_handle, sym_name('sd1')) +sd1 = dlsym(_q_handle, sym_name('sd1')) +sn = dlsym(_q_handle, sym_name('sn')) +ss = dlsym(_q_handle, sym_name('ss')) +sslInfo = dlsym(_q_handle, sym_name('sslInfo')) +vak = dlsym(_q_handle, sym_name('vak')) +vaknk = dlsym(_q_handle, sym_name('vaknk')) +ver = dlsym(_q_handle, sym_name('ver')) +xD = dlsym(_q_handle, sym_name('xD')) +xT = dlsym(_q_handle, sym_name('xT')) +ymd = dlsym(_q_handle, sym_name('ymd')) _r0_ptr = int(r0) _k_ptr = int(k) diff --git a/src/pykx/db.py b/src/pykx/db.py new file mode 100644 index 0000000..2eb77be --- /dev/null +++ b/src/pykx/db.py @@ -0,0 +1,1120 @@ +"""Functionality for the interaction with and management of databases. + +!!! Warning + + This functionality is provided in it's present form as a BETA + Feature and is subject to change. To enable this functionality + for testing please following configuration instructions + [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` +""" + +from .exceptions import QError +from . import wrappers as k +from . import beta_features +from .config import _check_beta + +import os +from pathlib import Path +from typing import Union +from warnings import warn + +__all__ = [ + 'DB', +] + +beta_features.append('Database Management') + + +def _init(_q): + global q + q = _q + + +def __dir__(): + return __all__ + + +def _check_loading(cls, table, err_msg): + if not cls.loaded: + raise QError("No database referenced/loaded") + if table is not None: + if table not in cls.tables: + raise QError(err_msg + " not possible as specified table not available") + + +def _check_column(cls, table, column): + table_cols = cls.list_columns(table) + if column not in table_cols: + raise QError("Specified column '" + column + "' not present in table '" + table + "'") + + +def _check_table(cls, table): + if not k.PartitionedTable == type(getattr(cls.table, table)): # noqa: E721 + raise QError("Application of Database Management functionality only " + "supported for Partitioned Databases") + + +_ktype_to_conversion = { + k.GUIDAtom: "guid", + k.BooleanAtom: "boolean", + k.ByteAtom: "byte", + k.ShortAtom: "short", + k.IntAtom: "int", + k.LongAtom: "long", + k.RealAtom: "real", + k.FloatAtom: "float", + k.CharAtom: "char", + k.SymbolAtom: "symbol", + k.TimestampAtom: "timestamp", + k.MonthAtom: "month", + k.DateAtom: "date", + k.DatetimeAtom: "datetime", + k.TimespanAtom: "timespan", + k.MinuteAtom: "minute", + k.SecondAtom: "second", + k.TimeAtom: "time", +} + +_func_mapping = { + 'dpt': '{[d;p;f;t;s] .Q.dpt[d;p;t]}', + 'dpft': '{[d;p;f;t;s] .Q.dpft[d;p;f;t]}', + 'dpfs': '{[d;p;f;t;s] .Q.dpfs[d;p;f;s]}', + 'dpfts': '{[d;p;f;t;s] .Q.dpfts[d;p;f;t;s]}' +} + + +class _TABLES: + pass + + +class DB(_TABLES): + """Singleton class used for the management of kdb+ Databases""" + _instance = None + path = None + tables = None + table = _TABLES + loaded = False + + def __new__(cls, *, path=None): + if cls._instance is None: + cls._instance = super(DB, cls).__new__(cls) + return cls._instance + + def __init__(self, *, path=None): + _check_beta('Database Management') + if path is not None: + try: + self.load(path) + except BaseException: + self.path = Path(os.path.abspath(path)) + pass + + def create(self, table, table_name, partition, *, # noqa: C901 + by_field=None, sym_enum=None, log=True): + """ + Create an on-disk partitioned table within a kdb+ database from a supplied + `pykx.Table` object. Once generated this table will be accessible + as an attribute of the `DB` class or a sub attribute of `DB.table`. + + Parameters: + table: The `pykx.Table` object which is to be persisted to disk + table_name: The name with which the table will be persisted and accessible + once loaded and available as a `pykx.PartitionedTable` + partition: The name of the column which is to be used to partition the data if + supplied as a `str` or if supplied as non string object this will be used as + the partition to which all data is persisted + by_field: A field of the table to be used as a by column, this column will be + the second column in the table (the first being the virtual column determined + by the partitioning column) + sym_enum: The name of the symbol enumeration table to be associated with the table + log: Print information about status of partitioned datab + + Returns: + A `None` object on successful invocation, the database class will be + updated to contain attributes associated with the available created table + + Examples: + + Generate a partitioned table from a table containing multiple partitions + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> N = 1000 + >>> qtab = kx.Table(data = { + ... 'date': kx.q.asc(kx.random.random(N, kx.q('2020.01 2020.02 2020.03m'))), + ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + ... 'price': kx.random.random(N, 10.0), + ... 'size': kx.random.random(N, 100) + ... }) + >>> db.create(qtab, 'stocks', 'date', by_field = 'sym', sym_enum = 'symbols') + >>> db.tables + ['stocks'] + >>> db.stocks + pykx.PartitionedTable(pykx.q(' + month sym price size + --------------------------- + 2020.01 AAPL 7.979004 85 + 2020.01 AAPL 5.931866 55 + 2020.01 AAPL 5.255477 49 + 2020.01 AAPL 8.15255 74 + 2020.01 AAPL 4.771067 80 + .. + ')) + ``` + + Add a table as a partition to an on-disk database, in the example below we are adding + a partition to the table generated above + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> N = 333 + >>> qtab = kx.Table(data = { + ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + ... 'price': kx.random.random(N, 10.0), + ... 'size': kx.random.random(N, 100) + ... }) + >>> db.create(qtab, 'stocks', kx.q('2020.04'), by_field = 'sym', sym_enum = 'symbols') + >>> db.tables + ['stocks'] + >>> db.stocks + pykx.PartitionedTable(pykx.q(' + month sym price size + --------------------------- + 2020.01 AAPL 7.979004 85 + 2020.01 AAPL 5.931866 55 + 2020.01 AAPL 5.255477 49 + 2020.01 AAPL 8.15255 74 + 2020.01 AAPL 4.771067 80 + .. + ')) + ``` + """ + save_dir = self.path + func_name = 'dpfts' + if type(table) != k.Table: + raise QError('Supplied table must be of type pykx.Table') + if by_field is None: + func_name = func_name.replace('f', '') + if sym_enum is None: + func_name = func_name.replace('s', '') + qfunc = q(_func_mapping[func_name]) + try: + if type(partition) == str: + if partition not in table.columns: + raise QError(f'Partition column {partition} not in supplied table') + if type(table[partition]).t not in [5, 6, 7, 13, 14]: + raise QError(f'Unsupported type: {type(table[partition])} ' + 'not supported for table partitioning') + parts = q.distinct(table[partition]) + for i in parts: + if log: + print(f'Writing Database Partition {i} to table {table_name}') + q[table_name] = q('{?[x;enlist y;0b;()]}', table, [q('='), partition, i]) + q[table_name] = q('{![x;();0b;enlist y]}', q[table_name], partition) + qfunc(save_dir, i, by_field, table_name, sym_enum) + else: + q[table_name] = table + if log: + print(f'Writing Database Partition {partition} to table {table_name}') + qfunc(save_dir, partition, by_field, table_name, sym_enum) + except QError as err: + q('{![`.;();0b;enlist x]}', table_name) + raise QError(err) + q('{![`.;();0b;enlist x]}', table_name) + self.load(self.path, overwrite=True) + return None + + def load(self, path: Union[Path, str], *, overwrite=False): + """ + Load the tables associated with a kdb+ Database, once loaded a table + is accessible as an attribute of the `DB` class or a sub attribute + of `DB.table`. Note that can alternatively be called when providing a path + on initialisation of the DB class. + + Parameters: + path: The file system path at which your database is located + overwrite: Should loading of the database overwrite any currently + loaded databases + + Returns: + A `None` object on successful invocation, the database class will be + updated to contain attributes associated with available tables + + Examples: + + Load an on-disk database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testData') + >>> db.tables + ['testData'] + >>> db.testData + pykx.PartitionedTable(pykx.q(' + month sym time price size + --------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 12 + 2020.01 FDP 00:00:00.005 33.81127 15 + 2020.01 FDP 00:00:00.027 88.89853 16 + 2020.01 FDP 00:00:00.035 78.33244 9 + 2020.01 JPM 00:00:00.055 68.65177 1 + .. + ')) + >>> db.table.testData + pykx.PartitionedTable(pykx.q(' + month sym time price size + --------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 12 + 2020.01 FDP 00:00:00.005 33.81127 15 + 2020.01 FDP 00:00:00.027 88.89853 16 + 2020.01 FDP 00:00:00.035 78.33244 9 + 2020.01 JPM 00:00:00.055 68.65177 1 + .. + ')) + ``` + + Load an on-disk database when initialising the class + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'testData') + >>> db.tables + ['testData'] + >>> db.testData + pykx.PartitionedTable(pykx.q(' + month sym time price size + --------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 12 + 2020.01 FDP 00:00:00.005 33.81127 15 + 2020.01 FDP 00:00:00.027 88.89853 16 + 2020.01 FDP 00:00:00.035 78.33244 9 + 2020.01 JPM 00:00:00.055 68.65177 1 + .. + ')) + >>> db.table.testData + pykx.PartitionedTable(pykx.q(' + month sym time price size + --------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 12 + 2020.01 FDP 00:00:00.005 33.81127 15 + 2020.01 FDP 00:00:00.027 88.89853 16 + 2020.01 FDP 00:00:00.035 78.33244 9 + 2020.01 JPM 00:00:00.055 68.65177 1 + .. + ')) + ``` + """ + load_path = Path(os.path.abspath(path)) + if not overwrite and self.path == load_path: + raise QError("Attempting to reload existing database. Please pass " + "the keyword overwrite=True to complete database reload") + if not overwrite and self.loaded: + raise QError("Only one kdb+ database can be loaded within a process. " + "Please use the 'overwrite' keyword to load a new database.") + if not load_path.is_dir(): + if load_path.is_file(): + err_info = 'Provided path is a file' + else: + err_info = 'Unable to find object at specified path' + raise QError('Loading of kdb+ databases can only be completed on folders: ' + err_info) + preloaded = self.tables + q(''' + {[dbpath] + @[system"l ",; + 1_string dbpath; + {'"Failed to load Database with error: ",x} + ] + } + ''', load_path) + self.path = load_path + self.loaded = True + tables = q.tables() + self.tables = tables.py() + for i in q('except', self.tables, preloaded).py(): + if hasattr(self, i): + warn(f'A database table "{i}" would overwrite one of the pykx.DB() methods, please access your table via the table attribute') # noqa: E501 + else: + setattr(self, i, q[i]) + setattr(self.table, i, q[i]) + return None + + def _reload(self): + _check_loading(self, None, None) + return self.load(self.path, overwrite=True) + + def rename_column(self, table, original_name, new_name): + """ + Rename a column within a loaded kdb+ Database + + Parameters: + table: The name of the table within which a column is to be renamed + original_name: Name of the column which is to be renamed + new_name: Column name which will be used as the new column name + + Returns: + A `None` object on successful invocation, the database class will be + updated and column rename actioned. + + Examples: + + Rename the column of a table + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.rename_column('testTable', 'sym', 'symbol') + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price size + --------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 12 + 2020.01 FDP 00:00:00.005 33.81127 15 + 2020.01 FDP 00:00:00.027 88.89853 16 + 2020.01 FDP 00:00:00.035 78.33244 9 + 2020.01 JPM 00:00:00.055 68.65177 1 + .. + ')) + ``` + """ + _check_loading(self, table, 'Column rename') + _check_table(self, table) + _check_column(self, table, original_name) + q.dbmaint.renamecol(self.path, table, original_name, new_name) + self._reload() + return None + + def delete_column(self, table, column): + """ + Delete the column of a loaded kdb+ Database + + Parameters: + table: The name of the table within which a column is to be deleted + column: Column which is to be deleted from the database + + Returns: + A `None` object on successful invocation, the database class will be + updated and specified column deleted + + Examples: + + Delete the column of a table + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.delete_column('testTable', 'size') + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price + ------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 + 2020.01 FDP 00:00:00.005 33.81127 + 2020.01 FDP 00:00:00.027 88.89853 + 2020.01 FDP 00:00:00.035 78.33244 + 2020.01 JPM 00:00:00.055 68.65177 + .. + ')) + ``` + """ + _check_loading(self, table, 'Column deletion') + _check_table(self, table) + _check_column(self, table, column) + q.dbmaint.deletecol(self.path, table, column) + self._reload() + return None + + def rename_table(self, original_name, new_name): + """ + Rename a table within a loaded kdb+ Database + + Parameters: + original_name: The name of the table which is to be renamed + new_name: Updated table name + + Returns: + A `None` object on successful invocation, the database class will be + updated, original table name deleted from q memory and new table + accessible + + Examples: + + Rename a database table + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.rename_table('testTable', 'updated_table') + >>> db.tables + ['updated_table'] + ``` + """ + _check_loading(self, original_name, 'Table rename') + _check_table(self, original_name) + q.dbmaint.rentable(self.path, original_name, new_name) + # Remove the original table, without this it persists as an accessible table + q('{![`.;();0b;enlist x]`}', original_name) + self._reload() + return None + + def list_columns(self, table): + """ + List the columns of a table within a loaded kdb+ Database + + Parameters: + table: The name of the table whose columns are listed + + Returns: + A list of strings defining the columns of a table + + Examples: + + List the columns of a table in a database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + ``` + """ + _check_loading(self, table, 'Column listing') + _check_table(self, table) + return q.dbmaint.listcols(self.path, table).py() + + def add_column(self, table, column_name, default_value): + """ + Add a column to a table within a loaded kdb+ Database + + Parameters: + table: The name of the table to which a column is to be added + column_name: Name of the column to be added + default_value: The default value to be used for all existing partitions + + Returns: + A `None` object on successful invocation, the database class will be + updated and the new column available for use/access + + Examples: + + Add a column to a table within a partitioned database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.add_column('testTable', 'test', kx.IntAtom.null) + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + ``` + """ + _check_loading(self, table, 'Column addition') + _check_table(self, table) + q.dbmaint.addcol(self.path, table, column_name, default_value) + self._reload() + return(None) + + def find_column(self, table, column_name): + """ + Functionality for finding a column across partitions within a loaded kdb+ Database + + Parameters: + table: The name of the table within which columns are to be found + column_name: The name of the column to be found within a table + + Returns: + A `None` object on successful invocation printing search status per partition, + if a column does not exist in a specified partition an error will be raised + and the logs will indicate which columns did not have the specified column. + + Examples: + + Find a column that exists + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.find_column('price') + 2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.01/testTable + 2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.02/testTable + ``` + + Attempt to find a column that does not exist + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.find_column('side') + 2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.01/testTable + 2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.02/testTable + Traceback (most recent call last): + ... + pykx.exceptions.QError: Requested column not found in all partitions, see log output above + ``` + """ + _check_loading(self, table, 'Finding columns') + _check_table(self, table) + return q.dbmaint.findcol(self.path, table, column_name).py() + + def reorder_columns(self, table, new_order): + """ + Reorder the columns of a persisted kdb+ database + + Parameters: + table: The name of the table within which columns will be rearranged + new_order: The ordering of the columns following update + + Returns: + A `None` object on successfully updating the columns of the database + + Examples: + + Update the order of columns for a persisted kdb+ database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> col_list = db.list_columns('testTable') + >>> col_list + ['month', 'sym', 'time', 'price', 'size'] + >>> col_list.reverse() + >>> col_list + ['size', 'price', 'time', 'sym', 'month'] + >>> db.reorder_columns('testTable', col_list) + 2023.11.13 17:56:17 reordering columns in `:/usr/pykx/2015.01.01/testTable + 2023.11.13 17:56:17 reordering columns in `:/usr/pykx/2015.01.02/testTable + ['month', 'sym', 'time', 'price', 'size'] + ``` + """ + _check_loading(self, table, 'Column reordering') + _check_table(self, table) + q.dbmaint.reordercols(self.path, table, new_order) + return None + + def set_column_attribute(self, table, column_name, new_attribute): + """ + Set an attribute associated with a column for an on-disk database + + Parameters: + table: The name of the table within which an attribute will be set + column_name: Name of the column to which the attribute will be applied + new_attribute: The attribute which is to be applied, this can be one of + 'sorted'/'u', 'partitioned'/'p', 'unique'/'u' or 'grouped'/'g'. + + Returns: + A `None` object on successfully setting the attribute for a column + + Examples: + + Add an attribute to a column of a persisted database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| j + p | f + sym | s + ')) + >>> db.set_column_attribute('testTable', 'sym', 'grouped') + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| j + p | f + sym | s g + ')) + ``` + """ + _check_loading(self, table, 'Attribute setting') + _check_table(self, table) + _check_column(self, table, column_name) + if new_attribute not in ['s', 'g', 'p', 'u', 'sorted', + 'grouped', 'partitioned', 'unique']: + raise QError("new_attribute must be one of " + "'s', 'g', 'p', 'u', 'sorted', 'grouped' or 'unique'") + if new_attribute not in ['s', 'g', 'p', 'u']: + new_attribute = {'sorted': 's', + 'grouped': 'g', + 'partitioned': 'p', + 'unique': 'u'}[new_attribute] + q.dbmaint.setattrcol(self.path, table, column_name, new_attribute) + return None + + def set_column_type(self, table, column_name, new_type): + """ + Convert/set the type of a column to a specified type + + Parameters: + table: The name of the table within which a column is to be converted + column_name: Name of the column which is to be converted + new_type: PyKX type to which a column is to be converted + + Returns: + A `None` object on successfully updating the type of the column + + Examples: + + Convert the type of a column within a database table + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| j + p | f + sym | s + ')) + >>> db.set_column_type('testTable', 'test', kx.FloatAtom) + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| f + p | f + sym | s + ')) + ``` + """ + _check_loading(self, table, 'Column casting') + _check_table(self, table) + _check_column(self, table, column_name) + if new_type not in _ktype_to_conversion: + raise QError("Unable to find user specified conversion type: " + str(new_type)) + col_type = _ktype_to_conversion[new_type] + try: + q.dbmaint.castcol(self.path, table, column_name, col_type) + except QError as err: + if str(err) == 'type': + raise QError("Unable to convert specified column '" + column_name + "' to type: " + str(new_type)) # noqa: E501 + raise QError(err) + self._reload() + return None + + def clear_column_attribute(self, table, column_name): + """ + Clear an attribute associated with a column of an on-disk database + + Parameters: + table: The name of the table within which the attribute of a column will be removed + column_name: Name of the column from which an attribute will be removed + + Returns: + A `None` object on successful removal of the attribute of a column + + Examples: + + Remove an attribute of a column of a persisted database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.tables + ['testTable'] + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| j + p | f + sym | s g + ')) + >>> db.clear_column_attribute('testTable', 'sym') + >>> kx.q.meta(db.testTable) + pykx.KeyedTable(pykx.q(' + c | t f a + ----| ----- + date| d + test| j + p | f + sym | s + ')) + ``` + """ + _check_loading(self, table, 'Attribute clearing') + _check_table(self, table) + _check_column(self, table, column_name) + q.dbmaint.clearattrcol(self.path, table, column_name) + return None + + def copy_column(self, table, original_column, new_column): + """ + Create a copy of a column within a table + + Parameters: + table: Name of the table + original_column: Name of the column to be copied + new_column: Name of the copied column + + Returns: + A `None` object on successful column copy, reloading the + database following column copy + + Examples: + + Copy a column within a kdb+ database + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.list_columns('testTable') + ['month', 'sym', 'time', 'price', 'size'] + >>> db.copy_column('testTable', 'size', 'dup_size') + ['month', 'sym', 'time', 'price', 'size', 'dup_size'] + ``` + """ + _check_loading(self, table, 'Column copying') + _check_table(self, table) + _check_column(self, table, original_column) + q.dbmaint.copycol(self.path, table, original_column, new_column) + self._reload() + return None + + def apply_function(self, table, column_name, function): + """ + Apply a function per partition on a column of a persisted kdb+ database + + Parameters: + table: Name of the table + column_name: Name of the column on which the function is to be applied + function: Callable function to be applied on a column vector per column + + Returns: + A `None` object on successful application of a function to the column + and the reloading of the database + + Examples: + + Apply a q function to a specified column per partition + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price + ------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 + 2020.01 FDP 00:00:00.005 33.81127 + 2020.01 FDP 00:00:00.027 88.89853 + 2020.01 FDP 00:00:00.035 78.33244 + 2020.01 JPM 00:00:00.055 68.65177 + .. + ')) + >>> db.apply_function('testTable', 'price', kx.q('2*')) + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price + ------------------------------------- + 2020.01 FDP 00:00:00.004 181.8948 + 2020.01 FDP 00:00:00.005 67.62254 + 2020.01 FDP 00:00:00.027 177.7971 + 2020.01 FDP 00:00:00.035 156.6649 + 2020.01 JPM 00:00:00.055 137.3035 + .. + ')) + ``` + + Apply a Python function to the content of a specified column per partition + + ```python + >>> import pykx as kx + >>> db = kx.DB() + >>> db.load('testDB') + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price + ------------------------------------- + 2020.01 FDP 00:00:00.004 90.94738 + 2020.01 FDP 00:00:00.005 33.81127 + 2020.01 FDP 00:00:00.027 88.89853 + 2020.01 FDP 00:00:00.035 78.33244 + 2020.01 JPM 00:00:00.055 68.65177 + .. + ')) + >>> db.apply_function('testTable', 'price', lambda x:2*x.np()) + >>> db.testTable + pykx.PartitionedTable(pykx.q(' + month symbol time price + ------------------------------------- + 2020.01 FDP 00:00:00.004 181.8948 + 2020.01 FDP 00:00:00.005 67.62254 + 2020.01 FDP 00:00:00.027 177.7971 + 2020.01 FDP 00:00:00.035 156.6649 + 2020.01 JPM 00:00:00.055 137.3035 + .. + ')) + ``` + """ + _check_loading(self, table, 'Function application') + _check_table(self, table) + _check_column(self, table, column_name) + if not callable(function): + raise RuntimeError("Provided 'function' is not callable") + q.dbmaint.fncol(self.path, table, column_name, function) + self._reload() + return None + + def fill_database(self): + """ + Fill missing tables from partitions within a database using the + most recent partition as a template, this will report the + partitions but not the tables which are being filled. + + Returns: + A `None` object on successful filling of missing tables in + partitioned database + + Examples: + + Fill missing tables from a database + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> db.fill_database() + Successfully filled missing tables to partition: :/usr/newDB/2020.04 + Successfully filled missing tables to partition: :/usr/newDB/2020.03 + Successfully filled missing tables to partition: :/usr/newDB/2020.02 + Successfully filled missing tables to partition: :/usr/newDB/2020.01 + ``` + """ + fill_parts = [] + try: + fill_parts = q.raze(q.Q.chk(self.path)).py() + except QError as err: + if 'No such file or directory' in str(err): + raise QError("Unable to apply database filling due to write permission issues") # noqa: E501 + raise QError(err) + if 0>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> db.partition_count() + pykx.Dictionary(pykx.q(' + | trades quotes + -------| ------------- + 2020.01| 334 0 + 2020.02| 324 0 + 2020.03| 342 1000 + ')) + ``` + + Copy a column within a kdb+ database + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> db.partition_count(sub_view = kx.q('2020.01 2020.02m')) + pykx.Dictionary(pykx.q(' + | trades quotes + -------| ------------- + 2020.01| 334 0 + 2020.02| 324 0 + ')) + ``` + """ + qtables = self.tables + if subview==None: # noqa: E711 + q.Q.view() + else: + q.Q.view(subview) + for i in qtables: + q.Q.cn(getattr(self.table, i)) + res = q('.Q.pv!flip .Q.pn') + q.Q.view() + return res + + def subview(self, view=None): + """ + Specify the subview to be used when querying a partitioned table + + Parameters: + view: A list of partition values which will serve as a filter + for all queries against any partitioned table within the + database. If view is supplied as `None` this will reset + the query view to all partitions + + Returns: + A `None` object on successful setting of the view state + + Examples: + + Set the subview range to include only `2020.02` and `2020.03` + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> db.subview(kx.q('2020.02 2020.03m') + >>> kx.q.qsql.select(db.trades, 'month') + pykx.Table(pykx.q(' + month + ------- + 2020.02 + 2020.03 + ')) + ``` + + Reset the database subview to include a fully specified range + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> db.subview() + >>> kx.q.qsql.select(db.trades, 'month') + pykx.Table(pykx.q(' + month + ------- + 2020.01 + 2020.02 + 2020.03 + 2020.04 + 2020.05 + ')) + ``` + """ + if view==None: # noqa: E711 + q.Q.view() + else: + q.Q.view(view) + return None + + def enumerate(self, table, *, sym_file=None): + """ + Perform an enumeration on a user specified table against the + current sym files associated with the database + + Parameters: + path: The folder location to which your table will be persisted + table: The `pykx.Table` object which is to be persisted to disk + and which is to undergo enumeration + sym_file: The name of the sym file contained in the folder specified by + the `path` parameter against which enumeration will be completed + + Returns: + The supplied table with enumeration applied + + Examples: + + Enumerate the symbol columns of a table without specifying the `sym` file + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> N = 1000 + >>> tab = kx.Table(data = { + ... 'x': kx.random.random(N, ['a', 'b', 'c']), + ... 'x1': kx.random.random(N, 1.0), + ... 'x2': kx.random.random(N, 10) + ... } + >>> tab = db.enumerate(tab) + >>> tab['x'] + pykx.EnumVector(pykx.q('`sym$`a`b`a`c`b..')) + ``` + + Enumerate the symbol columns of a table specifying the `sym` file used + + ```python + >>> import pykx as kx + >>> db = kx.DB(path = 'newDB') + >>> N = 1000 + >>> tab = kx.Table(data = { + ... 'x': kx.random.random(N, ['a', 'b', 'c']), + ... 'x1': kx.random.random(N, 1.0), + ... 'x2': kx.random.random(N, 10) + ... } + >>> tab = db.enumerate(tab, sym_file = 'mysym') + >>> tab['x'] + pykx.EnumVector(pykx.q('`mysym$`a`b`a`c`b..')) + ``` + """ + load_path = Path(os.path.abspath(self.path)) + if sym_file is None: + return q.Q.en(load_path, table) + else: + return q.Q.ens(load_path, table, sym_file) diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py index 9303dec..2ec67ac 100644 --- a/src/pykx/embedded_q.py +++ b/src/pykx/embedded_q.py @@ -11,7 +11,7 @@ from . import toq from . import wrappers from . import schema -from .config import find_core_lib, licensed, no_qce, pykx_dir, qargs, skip_under_q +from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_threading, qargs, skip_under_q from .core import keval as _keval from .exceptions import FutureCancelled, LicenseException, NoResults, PyKXException, PyKXWarning, QError # noqa from ._wrappers import _factory as factory @@ -124,13 +124,16 @@ def __init__(self): # noqa if not no_qce: code += f'if[not `comkxic in key `;system"l {kxic_path}"];' if os.getenv('PYKX_UNDER_Q') is None: - code += 'setenv[`UNDER_PYTHON;"true"];' - code += f'2:[`$"{pykx_qlib_path}";(`k_pykx_init; 1)][`$"{find_core_lib("q").as_posix()}"];' # noqa: E501 + os.environ['PYKX_UNDER_PYTHON'] = 'true' + code += 'setenv[`PYKX_UNDER_PYTHON;"true"];' + code += f'2:[`$"{pykx_qlib_path}";(`k_pykx_init; 2)][`$"{find_core_lib("q").as_posix()}";{"1b" if pykx_threading else "0b"}];' # noqa: E501 code += f'`.pykx.i.pyfunc set (`$"{pykx_qlib_path}") 2: (`k_pyfunc; 2);' code += f'`.pykx.modpow set {{((`$"{pykx_qlib_path}") 2: (`k_modpow; 3))["j"$x;"j"$y;$[z~(::);(::);"j"$z]]}};' # noqa: E501 else: - code += f'2:[`$"{pykx_qlib_path}q";(`k_pykx_init; 1)][`$"{find_core_lib("q").as_posix()}"];' # noqa: E501 + code += f'2:[`$"{pykx_qlib_path}q";(`k_pykx_init; 2)][`$"{find_core_lib("q").as_posix()}";{"1b" if pykx_threading else "0b"}];' # noqa: E501 code += f'`.pykx.modpow set {{((`$"{pykx_qlib_path}q") 2: (`k_modpow; 3))["j"$x;"j"$y;$[z~(::);(::);"j"$z]]}};' # noqa: E501 + if pykx_threading: + warn('pykx.q is not supported when using PYKX_THREADING.') code += '@[get;`.pykx.i.kxic.loadfailed;{()!()}]' kxic_loadfailed = self._call(code, debug=False).py() if (not no_qce) and ('--no-sql' not in qargs): diff --git a/src/pykx/extensions/dashboards.q b/src/pykx/extensions/dashboards.q new file mode 100644 index 0000000..b7395f4 --- /dev/null +++ b/src/pykx/extensions/dashboards.q @@ -0,0 +1,129 @@ +// dash.q - PyKX functionality for integration with KX Dashboards Integration + +\d .pykx + +// @private +// @desc +// Generate the Python function to retrieve the Python object +dash.util.lib:{x!{@[{.pykx.pyexec x;1b};"import ",string x;0b]} each x}`ast2json`ast + +// @private +// @kind function +// @name .pykx.dash.available +// @category api +// @overview +// _Function to denote if all Python libraries required for dashboards are available_ +dash.available:{all dash.util.lib} + +// @private +// @desc +// Generate Python functions for parsing functions from strings +if[dash.available[]; + .pykx.pyexec"def _pykx_func_parse(py_code):\n", + " parse_info = ast2json.ast2json(ast.parse(py_code))\n", + " idx = next((i for i, item in enumerate(parse_info['body']) if item['_type'] == 'FunctionDef'), None)\n", + " if idx == None:\n", + " raise Exception('No function definition found')\n", + " return parse_info['body'][idx]['name']"; + ] + +// @private +// @kind function +// @name .pykx.dash.util.getFunction +// @category api +// @overview +// _Functionality for the generation of a Python function to be called from code_ +// +// ```q +// .pykx.dash.util.getFunction[pycode] +// ``` +// **Parameters:** +// +// name | type | description | +// ---------------|----------|--------------------------------------------------------------| +// `pycode` | `string` | The Python code this is to be executed for use as a function | +// +// **Returns:** +// +// type | description | +// --------------|-------------| +// `composition` | A wrapped foreign Python object associated with the specified code +// +// **Example:** +// +// ```q +// q).pykx.dash.util.getFunction["def func(x):\n\treturn 1"] +// {[f;x].pykx.util.pykx[f;x]}[foreign]enlist +// ``` +dash.util.getFunction:{[pyCode] + if[not dash.available[]; + '"Required libraries for PyKX Dashboards integration not found" + ]; + funcName:@[.pykx.get[`$"_pykx_func_parse";<]; + .pykx.topy pyCode; + {[err]'err," raised when attempting to retrieve function definition"} + ]; + @[.pykx.pyexec; + pyCode; + {[err]'err," raised when executing supplied Python code"} + ]; + .pykx.get funcName + } + +// @private +// @kind function +// @name .pykx.dash.runFunction +// @category api +// @overview +// _Generate and execute a callable Python function using supplied arguments_ +// +// ```q +// .pykx.dash.runFunction[pycode;args] +// ``` +// **Parameters:** +// +// name | type | description | +// ---------|----------|------------------------------------------------------------------------| +// `pycode` | `string` | The Python code this is to be executed for use as a function | +// `args` | `list` | A mixed/generic list of arguments to be used when calling the function | +// +// **Returns:** +// +// type | description | +// -------|------------------------------------------------------------------------| +// `list` | The list of argument names associated with the user specified function | +// +// **Example:** +// +// Single argument function usage: +// +// ```q +// q).pykx.dash.runFunction["def func(x):\n\treturn x";enlist ([]5?1f;5?1f)] +// x x1 +// ------------------- +// 0.9945242 0.6298664 +// 0.7930745 0.5638081 +// 0.2073435 0.3664924 +// 0.4677034 0.9240405 +// 0.4126605 0.5420167 +// ``` +// +// Multiple argument function usage: +// +// ```q +// q).pykx.dash.runFunction["def func(x, y):\n\treturn x*y";(2;5)] +// 10 +// ``` +// +// Function using Python dependencies: +// +// ```q +// q).pykx.dash.runFunction["import numpy as np\n\ndef func(x):\n\treturn np.linspace(0, x.py(), 5)";enlist 10] +// 0 2.5 5 7.5 10 +// ``` +dash.runFunction:{[pyCode;args] + cache:util.defaultConv; + .pykx.setdefault["k"]; + return:.[{.pykx.dash.util.getFunction[x][<] . (),y};(pyCode;args);{.pykx.util.defaultConv:x;'y}[cache]]; + util.defaultConv:cache; + return} diff --git a/src/pykx/include/py.h b/src/pykx/include/py.h index 9c87daa..e2f77b3 100644 --- a/src/pykx/include/py.h +++ b/src/pykx/include/py.h @@ -153,6 +153,7 @@ typedef struct{ X(P,PyObject_Call,(P,P,P))\ X(P,PyObject_CallObject,(P,P))\ X(int,PyObject_HasAttr,(P,P))\ + X(int,PyObject_HasAttrString,(P,char*))\ X(P,PyObject_GetAttr,(P,P))\ X(int,PyObject_SetAttrString,(P,char*,P))\ X(char*,PyUnicode_AsUTF8,(P))\ @@ -160,6 +161,7 @@ typedef struct{ X(P,PyRun_String,(char*,int,P,P))\ X(P,PyImport_Import,(P))\ X(int,Py_IsInitialized,())\ + X(int,PySys_WriteStdout,(char*,...))\ //https://docs.scipy.org/doc/numpy/reference/c-api.html https://github.com/numpy/numpy/blob/master/numpy/core/code_generators/numpy_api.py #undef PyCFunction_New diff --git a/src/pykx/ipc.py b/src/pykx/ipc.py index a9f9113..f4fd86e 100644 --- a/src/pykx/ipc.py +++ b/src/pykx/ipc.py @@ -19,7 +19,7 @@ has extra functionality around manually polling the send an receive message queues. For more examples of usage of the IPC interface you can look at the -[`interface overview`](../getting-started/interface_overview.ipynb#ipc-communication). +[`interface overview`](../getting-started/PyKX%20Introduction%20Notebook.ipynb#ipc-communication). """ from enum import Enum @@ -338,10 +338,10 @@ def __init__(self, authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `timeout` argument may not always be enforced when making succesive querys. + Note: The `timeout` argument may not always be enforced when making successive queries. When making successive queries if one query times out the next query will wait until a - response has been recieved from the previous query before starting the timer for its own - timeout. This can be avioded by using a seperate `QConnection` instance for each query. + response has been received from the previous query before starting the timer for its own + timeout. This can be avoided by using a separate `QConnection` instance for each query. Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used. @@ -614,6 +614,14 @@ def _recv_socket(self, sock): if len(chunks) == 0: self.close() raise RuntimeError("Attempted to use a closed IPC connection") + elif len(chunks) <8: + self.close() + raise RuntimeError("PyKX attempted to process a message containing less than " + "the expected number of bytes, connection closed." + f"\nReturned bytes: {chunks}.\n" + "If you have a reproducible use-case please raise an " + "issue at https://github.com/kxsystems/pykx/issues with " + "the use-case provided.") # The last 5 bytes of the header contain the size and the first byte contains information # about whether the message is encoded in big-endian or little-endian form @@ -698,7 +706,7 @@ def file_execute( conn.file_execute('file.q') ``` - Connect to a q process using an asyncronous QConnection at IP address 127.0.0.1, + Connect to a q process using an asynchronous QConnection at IP address 127.0.0.1, on port 5000 and execute a file based on absolute path. ```python @@ -773,10 +781,10 @@ def __init__(self, authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `timeout` argument may not always be enforced when making succesive querys. + Note: The `timeout` argument may not always be enforced when making successive queries. When making successive queries if one query times out the next query will wait until a - response has been recieved from the previous query before starting the timer for its own - timeout. This can be avioded by using a seperate `SyncQConnection` instance for each + response has been received from the previous query before starting the timer for its own + timeout. This can be avoided by using a separate `SyncQConnection` instance for each query. Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used. @@ -1001,10 +1009,10 @@ def __init__(self, authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `timeout` argument may not always be enforced when making succesive querys. + Note: The `timeout` argument may not always be enforced when making successive queries. When making successive queries if one query times out the next query will wait until a - response has been recieved from the previous query before starting the timer for its own - timeout. This can be avioded by using a seperate `QConnection` instance for each query. + response has been received from the previous query before starting the timer for its own + timeout. This can be avoided by using a separate `QConnection` instance for each query. Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used. @@ -1425,7 +1433,7 @@ def __init__(self, Note: The `timeout` argument may not always be enforced when making successive queries. When making successive queries if one query times out the next query will wait until a - response has been recieved from the previous query before starting the timer for its own + response has been received from the previous query before starting the timer for its own timeout. This can be avoided by using a separate `QConnection` instance for each query. Note: The overhead of calling `clean_open_connections` is large. @@ -1738,6 +1746,14 @@ def _recv_socket_server(self, sock): # noqa tot_bytes += 8 if len(chunks) == 0: return + elif len(chunks) <8: + self.close() + raise RuntimeError("PyKX attempted to process a message containing less than " + "the expected minimum number of bytes, connection closed." + f"\nReturned bytes: {chunks}.\n" + "If you have a reproducible use-case please raise an " + "issue at https://github.com/kxsystems/pykx/issues with " + "the use-case provided.") # The last 5 bytes of the header contain the size and the first byte contains # information about whether the message is encoded in big-endian or little-endian form @@ -1888,7 +1904,7 @@ def poll_recv(self, amount: int = 1): over the timeout limit. Examples: -2 + ```python q = await pykx.RawQConnection(host='localhost', port=5002) ``` @@ -2043,10 +2059,10 @@ def __init__(self, authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `timeout` argument may not always be enforced when making succesive querys. + Note: The `timeout` argument may not always be enforced when making successive queries. When making successive queries if one query times out the next query will wait until a - response has been recieved from the previous query before starting the timer for its own - timeout. This can be avioded by using a seperate `SecureQConnection` instance for each + response has been received from the previous query before starting the timer for its own + timeout. This can be avoided by using a separate `SecureQConnection` instance for each query. Note: When querying `KX Insights` the `no_ctx=True` keyword argument must be used. diff --git a/src/pykx/lib/dbmaint.q b/src/pykx/lib/dbmaint.q new file mode 100644 index 0000000..78fbc9e --- /dev/null +++ b/src/pykx/lib/dbmaint.q @@ -0,0 +1,151 @@ +/ kdb+ partitioned database maintenance +\d .os +WIN:.z.o in`w32`w64 +pth:{p:$[10h=type x;x;string x];if[WIN;p[where"/"=p]:"\\"];(":"=first p)_ p} +cpy:{system$[WIN;"copy /v /z ";"cp "],pth[x]," ",pth y} +del:{system$[WIN;"del ";"rm "],pth x} +ren:{system$[WIN;"move ";"mv "],pth[x]," ",pth y} +here:{hsym`$system$[WIN;"cd";"pwd"]} +\d . + +\d .dbmaint +add1col:{[tabledir;colname;defaultvalue] + if[not colname in ac:allcols tabledir; + stdout"adding column ",(string colname)," (type ",(string type defaultvalue),") to `",string tabledir; + num:count get(`)sv tabledir,first ac; + .[(`)sv tabledir,colname;();:;num#defaultvalue]; + @[tabledir;`.d;,;colname]]} + +allcols:{[tabledir]get tabledir,`.d} + +allpaths:{[dbdir;table] + files:key dbdir; + if[any files like"par.txt";:raze allpaths[;table]each hsym each`$read0(`)sv dbdir,`par.txt]; + files@:where files like"[0-9]*";(`)sv'dbdir,'files,'table} + +copy1col:{[tabledir;oldcol;newcol] + if[(oldcol in ac)and not newcol in ac:allcols tabledir; + stdout"copying ",(string oldcol)," to ",(string newcol)," in `",string tabledir; + .os.cpy[(`)sv tabledir,oldcol;(`)sv tabledir,newcol];@[tabledir;`.d;,;newcol]]} + +delete1col:{[tabledir;col] + if[col in ac:allcols tabledir; + stdout"deleting column ",(string col)," from `",string tabledir; + .os.del[(`)sv tabledir,col];@[tabledir;`.d;:;ac except col]]} + +/ +enum:{[tabledir;val] + if[not 11=abs type val;:val]; + .[p;();,;u@:iasc u@:where not(u:distinct enlist val)in v:$[type key p:(`)sv tabledir,`sym;get p;0#`]];`sym!(v,u)?val} +\ + +enum:{[tabledir;val]if[not 11=abs type val;:val];.Q.dd[tabledir;`sym]?val} + + +find1col:{[tabledir;col] + $[col in allcols tabledir; + [stdout"column ",string[col]," (type ",(string first"i"$read1((`)sv tabledir,col;8;1)),") in `",string tabledir;1b]; + [stdout"column ",string[col]," *NOT*FOUND* in `",string tabledir;0b]]} + +fix1table:{[tabledir;goodpartition;goodpartitioncols] + if[count missing:goodpartitioncols except allcols tabledir; + stdout"fixing table `",string tabledir;{add1col[x;z;0#get y,z]}[tabledir;goodpartition]each missing]} + +fn1col:{[tabledir;col;fn] + if[col in allcols tabledir; + oldattr:-2!oldvalue:get p:tabledir,col; + newattr:-2!newvalue:fn oldvalue; + if[$[not oldattr~newattr;1b;not oldvalue~newvalue]; + stdout"resaving column ",(string col)," (type ",(string type newvalue),") in `",string tabledir; + oldvalue:0;.[(`)sv p;();:;newvalue]]]} + +reordercols0:{[tabledir;neworder] + if[not((count ac)=count neworder)or all neworder in ac:allcols tabledir;'`order]; + stdout"reordering columns in `",string tabledir; + @[tabledir;`.d;:;neworder]} + +rename1col:{[tabledir;oldname;newname] + if[(oldname in ac)and not newname in ac:allcols tabledir; + stdout"renaming ",(string oldname)," to ",(string newname)," in `",string tabledir; + .os.ren[` sv tabledir,oldname;` sv tabledir,newname];@[tabledir;`.d;:;.[ac;where ac=oldname;:;newname]]]} + +ren1table:{[old;new]stdout"renaming ",(string old)," to ",string new;.os.ren[old;new];} + +add1table:{[dbdir;tablename;table] + stdout"adding ",string tablename; + @[tablename;`;:;.Q.en[dbdir]0#table];} + +stdout:{-1 raze[" "sv string`date`second$.z.P]," ",x;} +validcolname:{(not x in `i,.Q.res,key`.q)and x = .Q.id x} + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// * public + +thisdb:`:. / if functions are to be run within the database instance then use (`:.) as dbdir + +addcol:{[dbdir;table;colname;defaultvalue] / addcol[`:/data/taq;`trade;`noo;0h] + if[not validcolname colname;'(`)sv colname,`invalid.colname]; + add1col[;colname;enum[dbdir;defaultvalue]]each allpaths[dbdir;table];} + +castcol:{[dbdir;table;col;newtype] / castcol[thisdb;`trade;`size;`short] + fncol[dbdir;table;col;newtype$]} + +clearattrcol:{[dbdir;table;col] / clearattr[thisdb;`trade;`sym] + setattrcol[dbdir;table;col;(`)]} + +copycol:{[dbdir;table;oldcol;newcol] / copycol[`:/k4/data/taq;`trade;`size;`size2] + if[not validcolname newcol;'(`)sv newcol,`invalid.newname]; + copy1col[;oldcol;newcol]each allpaths[dbdir;table];} + +deletecol:{[dbdir;table;col] / deletecol[`:/k4/data/taq;`trade;`iz] + delete1col[;col]each allpaths[dbdir;table];} + +findcol:{[dbdir;table;col] / findcol[`:/k4/data/taq;`trade;`iz] + fndcols:find1col[;col]each allpaths[dbdir;table]; + if[not any fndcols;'"Requested column not found in all partitions, see log output above"]} + +/ adds missing columns, but DOESN'T delete extra columns - do that manually +fixtable:{[dbdir;table;goodpartition] / fixtable[`:/k4/data/taq;`trade;`:/data/taq/2005.02.19] + fix1table[;goodpartition;allcols goodpartition]each allpaths[dbdir;table]except goodpartition;} + +fncol:{[dbdir;table;col;fn] / fncol[thisdb;`trade;`price;2*] + fn1col[;col;fn]each allpaths[dbdir;table];} + +listcols:{[dbdir;table] / listcols[`:/k4/data/taq;`trade] + allcols first allpaths[dbdir;table]} + +renamecol:{[dbdir;table;oldname;newname] / renamecol[`:/k4/data/taq;`trade;`woz;`iz] + if[not validcolname newname;'` sv newname,`invalid.newname]; + rename1col[;oldname;newname]each allpaths[dbdir;table];} + +reordercols:{[dbdir;table;neworder] / reordercols[`:/k4/data/taq;`trade;reverse cols trade] + reordercols0[;neworder]each allpaths[dbdir;table];} + +setattrcol:{[dbdir;table;col;newattr] / setattr[thisdb;`trade;`sym;`g] / `s `p `u + fncol[dbdir;table;col;newattr#]} + +addtable:{[dbdir;tablename;table] / addtable[`:.;`trade;([]price...)] + add1table[dbdir;;table]each allpaths[dbdir;tablename];} + +rentable:{[dbdir;old;new] / rentable[`:.;`trade;`transactions] + ren1table'[allpaths[dbdir;old];allpaths[dbdir;new]];} + +\d . +\ +test with https://github.com/KxSystems/kdb/blob/master/tq.q (sample taq database) + +if making changes to current database you need to reload (\l .) to make modifications visible + +if the database you've been modifying is a tick database don't forget to adjust the schema (tick/???.q) to reflect your changes to the data + + +.dbmaint.addcol[`:.;`trade;`num;10] +.dbmaint.addcol[`:.;`trade;`F;`test] +.dbmaint.delete1col[`:./2000.10.02/trade;`F] +.dbmaint.fixtable[`:.;`trade;`:./2000.10.03/trade] +.dbmaint.reordercols[`:.;`quote;except[2 rotate cols quote;`date]] +.dbmaint.clearattrcol[`:.;`trade;`sym] +.dbmaint.setattrcol[`:.;`trade;`sym;`p] +.dbmaint.castcol[`:.;`trade;`time;`second] +.dbmaint.renamecol[`:.;`trade;`price;`PRICE] +`PRICE`size .dbmaint.renamecol[`:.;`trade]'`p`s diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/lib/l64/libq.so index 36f9e86..2213c2e 100755 Binary files a/src/pykx/lib/l64/libq.so and b/src/pykx/lib/l64/libq.so differ diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/lib/l64arm/libq.so index d49f27c..e4c4fd0 100755 Binary files a/src/pykx/lib/l64arm/libq.so and b/src/pykx/lib/l64arm/libq.so differ diff --git a/src/pykx/lib/w64/q.dll b/src/pykx/lib/w64/q.dll index 2e7152a..84cadb8 100644 Binary files a/src/pykx/lib/w64/q.dll and b/src/pykx/lib/w64/q.dll differ diff --git a/src/pykx/lib/w64/q.lib b/src/pykx/lib/w64/q.lib index 566f05e..c0db9fa 100644 Binary files a/src/pykx/lib/w64/q.lib and b/src/pykx/lib/w64/q.lib differ diff --git a/src/pykx/nbextension.py b/src/pykx/nbextension.py index 2ae9fc1..e2f7c18 100644 --- a/src/pykx/nbextension.py +++ b/src/pykx/nbextension.py @@ -13,6 +13,7 @@ def q(instructions, code): # noqa unix = False no_ctx = False displayRet = False + debug = False if len(instructions)>0: instructions = instructions.split(' ') @@ -64,6 +65,13 @@ def q(instructions, code): # noqa displayRet = True instructions.pop(0) continue + elif instructions[0] == '--debug': + debug = True + instructions.pop(0) + continue + elif instructions[0] == '': + instructions.pop(0) + continue else: raise kx.QError( f'Received unknown argument "{instructions[0]}" in %%q magic command' @@ -98,18 +106,35 @@ def q(instructions, code): # noqa _q = kx.q code = [kx.CharVector(x) for x in code.split('\n')][:-1] ret = _q( - "{[ld;code;file] {x where not (::)~/:x} value (@';\"q\";enlist[file],/:value(ld;code))}", + '''{[ld;code;file] + res:1_ {x,enlist `err`res`trc!$[any x`err; + (1b;(::);(::)); + .Q.trp[{(0b;(@) . ("q";x);(::))};y;{(1b;x;.Q.sbt y)}]]} over + enlist[enlist `err`res`trc!(0b;(::);(::))],enlist[file],/:value(ld;code); + select from res where not (::)~/:res} + ''', ld, code, b'jupyter_cell.q' ) if not kx.licensed: ret = ret.py() - for r in ret: - display(r) if displayRet else print(r) + for i in range(len(ret['res'])): + if ret['err'][i]: + if debug: + print(ret['trc'][i].decode()) + raise kx.QError(ret['res'][i].decode()) + else: + display(ret['res'][i]) if displayRet else print(ret['res'][i]) else: for i in range(len(ret)): - display(_q('{x y}', ret, i)) if displayRet else print(_q('{x y}', ret, i)) + r = _q('@', ret, i) + if r['err']: + if debug: + print(r['trc']) + raise kx.QError(r['res'].py().decode()) + else: + display(r['res']) if displayRet else print(r['res']) if issubclass(type(_q), kx.QConnection): _q.close() diff --git a/src/pykx/pandas_api/__init__.py b/src/pykx/pandas_api/__init__.py index 1213df5..83358b9 100644 --- a/src/pykx/pandas_api/__init__.py +++ b/src/pykx/pandas_api/__init__.py @@ -71,6 +71,7 @@ def return_val(*args, **kwargs): from .pandas_indexing import _init as _index_init, PandasIndexing, PandasReindexing, TableLoc from .pandas_merge import _init as _merge_init, GTable_init, PandasGroupBy, PandasMerge from .pandas_set_index import _init as _set_index_init, PandasSetIndex +from .pandas_reset_index import _init as _reset_index_init, PandasResetIndex from .pandas_apply import _init as _apply_init, PandasApply @@ -83,10 +84,12 @@ def _init(_q): _merge_init(q) _set_index_init(q) _apply_init(q) + _reset_index_init(q) class PandasAPI(PandasApply, PandasMeta, PandasIndexing, PandasReindexing, - PandasConversions, PandasMerge, PandasSetIndex, PandasGroupBy): + PandasConversions, PandasMerge, PandasSetIndex, PandasGroupBy, + PandasResetIndex): """PandasAPI mixin class""" replace_self = False prev_locs = {} diff --git a/src/pykx/pandas_api/pandas_conversions.py b/src/pykx/pandas_api/pandas_conversions.py index 46d0f73..870c356 100644 --- a/src/pykx/pandas_api/pandas_conversions.py +++ b/src/pykx/pandas_api/pandas_conversions.py @@ -301,7 +301,11 @@ def select_dtypes(self, include=None, exclude=None): include_type_nums = [kx_type_to_type_number[x] for x in [x for x in kx_type_to_type_number.keys() for y in include if x in str(y)]] - + if 10 in include_type_nums: + raise Exception("'CharVector' not supported." + " Use 'CharAtom' for columns of char atoms." + " 'kx.List' will include any columns containing" + " mixed list data.") # Run for exclude if exclude is not None: if not isinstance(exclude, list): @@ -311,7 +315,11 @@ def select_dtypes(self, include=None, exclude=None): exclude_type_nums = [kx_type_to_type_number[x] for x in [x for x in kx_type_to_type_number.keys() for y in exclude if x in str(y)]] - + if 10 in exclude_type_nums: + raise Exception("'CharVector' not supported." + " Use 'CharAtom' for columns of char atoms." + " 'kx.List' will exclude any columns containing" + " mixed list data.") # Check no overlapping values if include is not None and exclude is not None: if any([x in exclude for x in include]): @@ -320,14 +328,14 @@ def select_dtypes(self, include=None, exclude=None): # Run if include is not None if include is not None: table_out = q('''{[qtab;inc] tCols:cols qtab; - inc:5h$inc; + inc:abs 5h$inc; bList:value (type each flip 0#qtab) in inc; colList:tCols where bList; ?[qtab; (); 0b; colList!colList]}''', self, include_type_nums) # noqa else: table_out = q('''{[qtab;exc] tCols:cols qtab; - exc:5h$exc; + exc:abs 5h$exc; bList:value (type each flip 0#qtab) in exc; colList:tCols where not bList; ?[qtab; (); 0b; colList!colList] }''', diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index e3dfeb0..2d43c84 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -8,7 +8,7 @@ def _init(_q): q = _q -def _get(tab, key, default): +def _get(tab, key, default, cols_check=True): idxs = None _init_tab = None if 'Keyed' in str(type(tab)): @@ -18,6 +18,10 @@ def _get(tab, key, default): if 0 in idxs: keys = keys[1:] key = keys + if cols_check: + if q('{not all x in cols y}', key, tab): + colstr = str(q('{((),x) except cols y}', key, tab).py()) + raise QError(f'Attempted to retrieve inaccessible columns: {colstr}') if isinstance(key, list) or isinstance(key, SymbolVector): if not all([x in tab._keys for x in key]): return default @@ -131,28 +135,12 @@ def _parse_cols(tab, cols): return cols -def _parse_indexes_slice(tab, loc): - step = loc.step if loc.step is not None else 1 - start = loc.start if loc.start is not None else (0 if step > 0 else len(tab) - 1) - stop = loc.stop if loc.stop is not None else (len(tab) if step > 0 else -1) - if step > 0 and stop < 0: - stop = stop + len(tab) - idxs = [] - idx = start - while True: - idxs.append(idx) - idx += step - if (start < stop and idx >= stop) or (start > stop and idx <= stop) or start == stop: - break - return idxs - - def _parse_indexes(tab, loc): if callable(loc): loc = loc(tab) - types = [list, ShortVector, IntVector, LongVector] + types = [list, ShortVector, IntVector, LongVector, range] if isinstance(loc, slice): - loc = _parse_indexes_slice(tab, loc) + loc = range(len(tab))[loc] if ((isinstance(loc, list) and isinstance(loc[0], bool)) or isinstance(loc, BooleanVector))\ and len(loc) == len(tab): idxs = [] @@ -248,6 +236,9 @@ def _loc(tab, loc): # noqa if 'Keyed' in str(type(tab)): return q('{(count keys x)!((0!x) each where y)}', tab, loc) return q('{x where y}', tab, loc) + if isinstance(loc, str): + if q('{not x in cols y}', loc, tab): + raise QError(f'Attempted to retrieve inaccessible column: {loc}') return q('{x[enlist each y]}', tab, loc) @@ -310,16 +301,25 @@ def _drop_columns(tab, labels, errors=True): def _rename_index(tab, labels): if "Keyed" in str(type(tab)): + for x in list(labels.keys()): + if type(x) is not int: + labels.pop(x) return q('''{ idx:first flip key x; idx:@[(count idx;1)#idx;idx?raze key y;y]; ([] idx)!value x}''', tab, labels) # noqa else: - return ValueError('nyi') + return ValueError(f"""Only pykx.KeyedTable objects can + have indexes renamed. Received: {type(tab)}""") def _rename_columns(tab, labels): + for x in list(labels.keys()): + if type(labels[x]) is not str: + raise ValueError('pykx.Table column names can only be of type pykx.SymbolAtom') + if type(x) is not str: + labels.pop(x) if "Keyed" in str(type(tab)): return q('''{ c:cols value x; @@ -349,7 +349,7 @@ def pop(self, col_name: str): @api_return def get(self, key, default=None): """Get items from table based on key, if key is not found default is returned.""" - return _get(self, key, default) + return _get(self, key, default, cols_check=False) @property def at(self): @@ -426,10 +426,12 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index def rename(self, labels=None, index=None, columns=None, axis=0, copy=None, inplace=False, level=None, errors='ignore'): - + if ("Keyed" not in str(type(self)) and columns is None + and ((axis == 'index' or axis == 0) or (index is not None))): + raise ValueError("Can only rename index of a KeyedTable") if labels is None and index is None and columns is None: raise ValueError("must pass an index to rename") - elif axis != 0 and (index is not None or columns is not None): + elif axis !=0 and (index is not None or columns is not None): raise ValueError("Cannot specify both 'axis' and any of 'index' or 'columns'") if (columns is not None or axis==1) and level is not None: @@ -440,13 +442,15 @@ def rename(self, labels=None, index=None, columns=None, axis=0, t = self if labels is not None: - if axis == 0: - t = _rename_index(t, labels) - elif axis == 1: + if axis == 1 or axis == 'columns': t = _rename_columns(t, labels) + elif axis == 0 or axis == 'index': + t = _rename_index(t, labels) else: raise ValueError(f'No axis named {axis}') else: + if (index or columns) is None: + raise ValueError("No columns or indices specified or set") if index is not None: t = _rename_index(t, index) if columns is not None: @@ -462,7 +466,10 @@ def add_suffix(self, suffix, axis=0): (c!`$string[c],\\:string s) xcol t }''', suffix, t) elif axis == 0: - raise ValueError('nyi') + if 'Keyed' in str(type(t)): + raise ValueError('nyi') + else: + return q('{[s;t] c:cols t; (c!`$string[c],\\:string s) xcol t}', suffix, t) else: raise ValueError(f'No axis named {axis}') return t @@ -475,6 +482,10 @@ def add_prefix(self, prefix, axis=0): (c!`$string[s],/:string[c]) xcol t }''', prefix, t) elif axis == 0: + if 'Keyed' in str(type(t)): + raise ValueError('nyi') + else: + return q('{[s;t] c:cols t; (c!`$string[s],/:string[c]) xcol t}', prefix, t) raise ValueError('nyi') else: raise ValueError(f'No axis named {axis}') diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 4ee270a..659a26b 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -196,6 +196,16 @@ def median(self, axis: int = 0, numeric_only: bool = False): tab ) + @convert_result + def skew(self, axis=0, skipna=True, numeric_only=False): + res, cols = preparse_computations(self, axis, skipna, numeric_only) + return (q( + '''{[row] + m:{(sum (x - avg x) xexp y) % count x}; + g1:{[m;x]m:m[x]; m[3] % m[2] xexp 3%2}[m]; + (g1 each row) * {sqrt[n * n-1] % neg[2] + n:count x} each row + }''', res), cols) + @api_return def mode(self, axis: int = 0, numeric_only: bool = False, dropna: bool = True): tab = self @@ -268,16 +278,6 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): min_count ), cols) - @convert_result - def skew(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) - return (q( - '''{[row] - m:{(sum (x - avg x) xexp y) % count x}; - g1:{[m;x]m:m[x]; m[3] % m[2] xexp 3%2}[m]; - (g1 each row) * {sqrt[n * n-1] % neg[2] + n:count x} each row - }''', res), cols) - @convert_result def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): res, cols = preparse_computations(self, axis, skipna, numeric_only) diff --git a/src/pykx/pandas_api/pandas_reset_index.py b/src/pykx/pandas_api/pandas_reset_index.py new file mode 100644 index 0000000..cb10a10 --- /dev/null +++ b/src/pykx/pandas_api/pandas_reset_index.py @@ -0,0 +1,72 @@ +from . import api_return +from ..exceptions import QError + + +def _init(_q): + global q + q = _q + + +class PandasResetIndex: + @api_return + def reset_index(self, levels=None, *, drop=False, inplace=False, + col_level=0, col_fill='', allow_duplicates=False, + names=None): + """Reset keys/index of a PyKX Table""" + if col_level != 0: + raise QError("'col_level' not presently supported") + if col_fill != '': + raise QError("'col_fill' not presently supported") + if names is not None: + raise QError("'names' not presently supported") + if 'Keyed' not in str(type(self)): + return self + if not allow_duplicates: + if q('{any cols[key x] in cols value x}', self).py(): + raise QError('Cannot reset index due to duplicate column names') + if drop and levels is None: + return q.value(self) + if levels is not None: + intlist = False + strlist = False + if drop: + drop_keys = q('{.[{(y _ key x)!value x};(x;y);{[x;y]value x}[x]]}') + if isinstance(levels, list): + strlist = all(isinstance(n, str) for n in levels) + intlist = all(isinstance(n, int) for n in levels) + if isinstance(levels, str) or strlist: + q(''' + { + if[any locs:not ((),x) in cols[key y]; + '"Key(s) ",(", " sv string ((),x) where locs)," not found" + ] + } + ''', + levels, + self) + if drop: + res = q('{x[y;(),z]}', drop_keys, self, levels) + else: + res = q('{(cols[key x] except y) xkey x}', self, levels) + elif isinstance(levels, int) or intlist: + q(''' + { + if[any locs:((),x)>ckeys:count cols key y; + '"Key level(s) ", + (", " sv string((),x)where locs), + " out of range ", + string ckeys + ] + } + ''', + levels, + self) + if drop: + res = q('{x[y;(),cols[key y]z]]}', drop_keys, self, levels) # noqa: E501 + else: + res = q('{(cols[key x] except cols[key x]y) xkey x}', self, levels) + else: + raise TypeError("Unsupported type provided for 'levels'") + else: + res = q('0!', self) + return res diff --git a/src/pykx/pykx.c b/src/pykx/pykx.c index d27a1e5..f71a1ff 100644 --- a/src/pykx/pykx.c +++ b/src/pykx/pykx.c @@ -26,6 +26,7 @@ static K (*kp_ptr)(char*); static K (*k_ptr)(int, char*, ...); void* q_lib; +static PyObject* sys; static PyObject* builtins; static PyObject* toq_module; static PyObject* toq; @@ -48,9 +49,12 @@ static PyObject* error_preamble; int pykx_flag = -1; int init_ptrs = 0; +bool pykx_threading = false; -EXPORT K k_pykx_init(K k_q_lib_path) { +EXPORT K k_pykx_init(K k_q_lib_path, K _pykx_threading) { + if (_pykx_threading->g) + pykx_threading = true; q_lib = dlopen(k_q_lib_path->s, RTLD_NOW | RTLD_GLOBAL); r0_ptr = (void (*)(K))dlsym(q_lib, "r0"); r1_ptr = (K (*)(K))dlsym(q_lib, "r1"); @@ -67,6 +71,7 @@ EXPORT K k_pykx_init(K k_q_lib_path) { PyGILState_STATE gstate; gstate = PyGILState_Ensure(); + sys = PyModule_GetDict(PyImport_ImportModule("sys")); builtins = PyModule_GetDict(PyImport_ImportModule("builtins")); toq_module = PyModule_GetDict(PyImport_AddModule("pykx.toq")); toq = PyDict_GetItemString(toq_module, "toq"); @@ -112,6 +117,13 @@ static int check_py_foreign(K x){return x->t==112 && x->n==2 && *kK(x)==(K)py_de EXPORT K k_check_python(K x){return kb(check_py_foreign(x));} +void flush_stdout() { + PyObject* out = PyDict_GetItemString(sys, "stdout"); + if ( PyObject_HasAttrString(out, "flush") ) { + PyObject_CallMethod(out, "flush", NULL); + } +} + K k_py_error() { if (!PyErr_Occurred()) return (K)0; @@ -150,6 +162,8 @@ K k_py_error() { static PyObject* k_to_py_cast(K x, K typenum, K israw) { + if (pykx_threading) + return Py_None; PyGILState_STATE gstate; gstate = PyGILState_Ensure(); if (x->t == 112) { @@ -178,6 +192,8 @@ static PyObject* k_to_py_cast(K x, K typenum, K israw) { static PyObject* k_to_py_list(K x) { + if (pykx_threading) + return Py_None; PyGILState_STATE gstate; gstate = PyGILState_Ensure(); if (x->t == 112) { @@ -198,6 +214,8 @@ static PyObject* k_to_py_list(K x) { EXPORT K k_to_py_foreign(K x, K typenum, K israw) { K k; + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); PyGILState_STATE gstate; gstate = PyGILState_Ensure(); PyObject* p = k_to_py_cast(x, typenum, israw); @@ -267,6 +285,8 @@ void construct_args_kwargs(PyObject* params, PyObject** args, PyObject** kwargs, EXPORT K k_pyfunc(K k_guid_string, K k_args) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); PyGILState_STATE gstate; gstate = PyGILState_Ensure(); K k = (K)0; // the K object which will be returned @@ -353,6 +373,8 @@ EXPORT K k_pyfunc(K k_guid_string, K k_args) { // k_eval_or_exec == 0 -> eval the code string // k_eval_or_exec == 1 -> exec the code string EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); PyGILState_STATE gstate; gstate = PyGILState_Ensure(); K k; @@ -381,21 +403,25 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { if (!k_ret->g) { if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; } else Py_XDECREF(py_ret); + flush_stdout(); PyGILState_Release(gstate); return (K)0; } if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; } if (as_foreign->g) { k = (K)create_foreign(py_ret); + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; @@ -403,6 +429,7 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { PyObject* py_k_ret = PyObject_CallFunctionObjArgs(toq, py_ret, NULL); Py_XDECREF(py_ret); if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_k_ret); PyGILState_Release(gstate); return k; @@ -411,6 +438,7 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { Py_XDECREF(py_k_ret); k = (K)PyLong_AsLongLong(py_addr); Py_XDECREF(py_addr); + flush_stdout(); PyGILState_Release(gstate); return k; } @@ -489,6 +517,8 @@ EXPORT K k_modpow(K k_base, K k_exp, K k_mod_arg) { EXPORT K foreign_to_q(K f) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); if (f->t != 112) return raise_k_error("Expected foreign object for call to .pykx.toq"); if (!check_py_foreign(f)) @@ -530,6 +560,8 @@ EXPORT K foreign_to_q(K f) { EXPORT K repr(K as_repr, K f) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; if (f->t != 112) { if (as_repr->g){ @@ -551,22 +583,28 @@ EXPORT K repr(K as_repr, K f) { Py_XDECREF(repr); if (!as_repr->g) { const char *bytes = PyBytes_AS_STRING(str); - printf("%s\n", bytes); + PySys_WriteStdout("%s\n", bytes); + flush_stdout(); + PyGILState_Release(gstate); Py_XDECREF(str); return (K)0; } if ((k = k_py_error())) { + flush_stdout(); PyGILState_Release(gstate); Py_XDECREF(str); return k; } const char *chars = PyBytes_AS_STRING(str); + flush_stdout(); PyGILState_Release(gstate); return kp_ptr(chars); } EXPORT K get_attr(K f, K attr) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; if (f->t != 112) { if (f->t == 105) { @@ -593,6 +631,8 @@ EXPORT K get_attr(K f, K attr) { EXPORT K get_global(K attr) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; if (attr->t != -11) { return raise_k_error("Expected a SymbolAtom for the attribute to get in .pykx.get"); @@ -617,6 +657,8 @@ EXPORT K get_global(K attr) { EXPORT K set_global(K attr, K val) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; int gstate = PyGILState_Ensure(); @@ -641,6 +683,8 @@ EXPORT K set_global(K attr, K val) { EXPORT K set_attr(K f, K attr, K val) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); if (f->t != 112) { if (f->t == 105) { return raise_k_error("Expected foreign object for call to .pykx.setattr, try unwrapping the foreign object with `."); @@ -669,6 +713,8 @@ EXPORT K set_attr(K f, K attr, K val) { } EXPORT K import(K module) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; K res; if (module->t != -11) @@ -688,6 +734,8 @@ EXPORT K import(K module) { EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { + if (pykx_threading) + return raise_k_error("pykx.q is not supported when using PYKX_THREADING"); K k; PyObject* pyf = NULL; @@ -744,6 +792,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { if ((k = k_py_error())) { if (pyres) Py_XDECREF(pyres); + flush_stdout(); PyGILState_Release(gstate); return k; } @@ -754,6 +803,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { res = create_foreign(pyres); Py_XDECREF(pyres); + flush_stdout(); PyGILState_Release(gstate); return res; } diff --git a/src/pykx/pykx.q b/src/pykx/pykx.q index 4aae73e..2edd4bb 100644 --- a/src/pykx/pykx.q +++ b/src/pykx/pykx.q @@ -57,8 +57,13 @@ k)c:{'[y;x]}/|: // @desc Compose using enlist for generation of variadic functions k)ce:{'[y;x]}/enlist,|: +// @desc Print a message warning that "UNDER_PYTHON" is deprecated +if[not ""~getenv`UNDER_PYTHON; + -1"WARN: Environment variable 'UNDER_PYTHON' is deprecated, if set locally update to use 'PYKX_UNDER_PYTHON'"; + ] + // @desc Make use of `pykx.so` logic when running under Python -if["true"~getenv`UNDER_PYTHON; +if["true"~getenv`PYKX_UNDER_PYTHON; util.load:2:[hsym`$pykxDir,"/pykx";] ]; @@ -117,19 +122,6 @@ util.CFunctions:flip `qname`cname`args!flip ( // Load defined C functions to PyKX for later use {.Q.dd[`.pykx;x`qname]set util.load x`cname`args}each util.CFunctions; -// @private -// @desc -// Set default conversion type for K objects. -// This is set to numpy by default to facilitate migration from numpy -util.defaultConv:$[ - ""~util.conversion:getenv`PYKX_DEFAULT_CONVERSION; - "default"; - $[util.conversion in ("py";"np";"pd";"pa";"k";"raw";"default"); - util.conversion; - '"Unknown default conversion type" - ] - ] - // @private // @desc Convert q/Python objects to Pythonic foreigns util.toPython:{wrap util.pyForeign[$[type[x]in 104 105 112h;wrap[unwrap x]`;x];y;z]} @@ -159,7 +151,8 @@ util.isconv:{any(util.ispy;util.isnp;util.ispd;util.ispa;util.isk;util.israw)@\: // @private // @desc Convert a supplied argument to the specified python object type util.convertArg:{ - $[util.isconv x; + $[util.isw x;x; + util.isconv x; .z.s[(x; 1; 0b)]; $[not util.isconv x 0;util.toPython . x; util.ispy x 0; [.z.s[(x[0][::][1]; 1; x[2])]]; @@ -176,7 +169,8 @@ util.convertArg:{ // @private // @desc Convert a supplied argument to the default q -> Python type util.toDefault:{ - $[util.isconv x;(::); + $[util.isw x;(::); + util.isconv x;(::); "py" ~ util.defaultConv;topy; "np" ~ util.defaultConv;tonp; "pd" ~ util.defaultConv;topd; @@ -235,7 +229,16 @@ util.wf:{[f;x].pykx.util.pykx[f;x]} // @desc // Functionality used for checking if an supplied // argument is a Python foreign or wrapped object -util.isw:{$[105=type x;.pykx.util.wf~$[104 105h~t:type each u:get x;:.z.s last u;104h~first t;first value first u;0b];0b]} +util.isw:{ + if[not 105h~type x;:0b]; + $[.pykx.util.wf~$[104 105h~t:type each u:get x; + :.z.s last u; + 104h~first t; + first value first u; + 0b];:1b; + 101 105h~t;:(::)~first u; + 100 105h~t;:.pykx.toq~first u; + 0b]} // @private // @desc Functionality for management of keywords/keyword dictionaries etc. @@ -282,11 +285,6 @@ util.parseArgs:{ (hasargs; arglist; kwargs) }; - -// @private -// @desc Wrap a supplied foreign object function -util.wfunc:{[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r} - // ----------------------- // User Callable Functions // ----------------------- @@ -348,7 +346,7 @@ util.wfunc:{[f;x]r:wrap f x 0;$[count x:1_x;.[;x];]r} // // name | type | description // -----------|--------|------------ -// `argList` | `list` | List of opsitional arguments +// `argList` | `list` | List of optional arguments // // **Return:** // @@ -642,7 +640,7 @@ toraw: {x y}(`..raw;;) // @name .pykx.todefault // @category api // @overview -// _Tag a q object to be indicate a raw conversion when called in Python_ +// _Tag a q object to indicate it should use the PyKX default conversion when called in Python_ // // ```q // .pykx.todefault[qObject] @@ -658,26 +656,24 @@ toraw: {x y}(`..raw;;) // // type | description // -------------|------------ -// `projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a raw object. | +// `projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a default object. | // // !!! Note // The `todefault` conversion is used to match embedPy conversion logic, in particular it converts q lists to Python lists when dealing with contiguous datatypes rather than to nested single value array types. Additionally it converts q tables to Pandas DataFrames // // ```q -// // Denote that a q object once passed to Python should be managed as a Numpy object -// q).pykx.toraw til 10 -// enlist[`..raw;;][0 1 2 3 4 5 6 7 8 9] -// -// // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tonp (til 10;til 10) -// +// // Denote that a q object once passed to Python should be managed as a default object +// // in this case a q list is converted to numpy +// q).pykx.todefault til 10 +// enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] // -// // Pass a q object to Python treating the Python object as a raw Object +// // Pass a q list to Python treating the Python object as PyKX default // q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault (til 10;til 10) // // -// // Pass a q Table to Python treating the Python table as a Pandas DataFrame -// q) +// // Pass a q Table to Python by default treating the Python table as a Pandas DataFrame +// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault ([]til 10;til 10) +// // ``` todefault:{$[0h=type x;toraw x;$[99h~type x;all 98h=type each(key x;value x);0b]|98h=type x;topd x;tonp x]} @@ -753,7 +749,13 @@ wrap:ce util.wf@ // q).pykx.unwrap a // foreign // ``` -unwrap:{$[util.isw x;$[104 105h~type each u:get x;(last u)`.;x`.];x]} +unwrap:{ + c:last get last get first get last@; + $[util.isw x;t:type each u:get x;:x]; + if[(101 105h~t) and (::)~first u;:c u]; + if[(100 105h~t) and .pykx.toq~first u;:c u]; + if[104 105h~t;:(last u)`.]; + x`.} // @kind function @@ -794,9 +796,9 @@ unwrap:{$[util.isw x;$[104 105h~type each u:get x;(last u)`.;x`.];x]} // // // ```q -// // Default value on startup is "np" +// // Default value on startup is "default" // q).pykx.util.defaultConv -// "np" +// "default" // // // Set default value to Pandas // q).pykx.setdefault["Pandas"] @@ -973,7 +975,7 @@ pyexec:util.pyrun[0b; 1b; 0b] // q).pykx.eval["lambda x: x + 1"][5]` // 6 // ``` -.pykx.eval:{wrap pyeval x} +.pykx.eval:ce util.wfunc pyeval // @kind function // @name .pykx.qeval @@ -1169,11 +1171,10 @@ repr :{$[type[x]in 104 105 112h;util.repr[1b] unwrap x;.Q.s x]} // 4 0.711172 False // ``` print:{ - $[type[x]in 104 105 112h; - $[util.isconv x; - .pykx.eval["lambda x:print(x)"]x; - util.repr[0b] unwrap x - ]; + $[type[x]in 104 105 112h ; + $[any(util.isw;util.isconv)@\:x; + .pykx.eval["lambda x:print(x)"]x; + show x]; show x ]; } @@ -1459,7 +1460,7 @@ getattr; // Note this function is loaded directly from C // q).pykx.print setCallable // [1 2] // ``` -pycallable:{$[util.isw x;x(>);util.isf x;wrap[x](>);'"Could not convert provided function to callable with Python return"]} +pycallable:{$[util.isw x;wrap[unwrap[x]](>);util.isf x;wrap[x](>);'"Could not convert provided function to callable with Python return"]} // @kind function // @name .pykx.qcallable @@ -1492,14 +1493,44 @@ pycallable:{$[util.isw x;x(>);util.isf x;wrap[x](>);'"Could not convert provided // q).pykx.print setCallable // [1 2] // ``` -qcallable :{$[util.isw x;x(<);util.isf x;wrap[x](<);'"Could not convert provided function to callable with q return"]} +qcallable:{$[util.isw x;wrap[unwrap[x]](<);util.isf x;wrap[x](<);'"Could not convert provided function to callable with q return"]} +// @kind function +// @name .pykx.safeReimport +// @category api +// @overview +// _Isolated execution of a q function which relies on importing PyKX_ +// +// ```q +// .pykx.safeReimport[qFunction] +// ``` +// +// **Parameters:** +// +// name | type | description +// -------------|------------|------------- +// `qFunction` | `function` | A function which is to be run following unsetting of PyKX environment variables and prior to their reset +// +// **Returns:** +// +// type | description +// -------|------------ +// `any` | On successful execution this function will return the result of the executed function +// +// **Example:** +// +// ```q +// q)\l pykx.q +// q).pykx.safeReimport[{system"python -c 'import pykx as kx'";til 5}] +// 0 1 2 3 4 +// ``` safeReimport:{[x] pyexec["pykx_internal_reimporter = pykx.PyKXReimport()"]; envlist:(`PYKX_DEFAULT_CONVERSION; `PYKX_UNDER_Q; `SKIP_UNDERQ; `PYKX_SKIP_UNDERQ; + `PYKX_UNDER_PYTHON; `UNDER_PYTHON; `PYKX_LOADED_UNDER_Q; `PYKX_Q_LOADED_MARKER; @@ -1508,11 +1539,11 @@ safeReimport:{[x] envvals:getenv each envlist; .pykx.eval["pykx_internal_reimporter.reset()"]; - r: x[]; + r: @[{(0b;x y)}x;(::);{(1b;x)}]; pyexec["del pykx_internal_reimporter"]; setenv'[envlist;envvals]; - r + $[r 0;';::] r 1 } // @kind function @@ -1587,8 +1618,15 @@ debugInfo:{ // -------------------------------------------------- // 0.439081 49f2404d-5aec-f7c8-abba-e2885a580fb6 mil // 0.5759051 656b5e69-d445-417e-bfe7-1994ddb87915 igf +// +// // Enter PyKX console setting Python objects using PyKX +// q).pykx.console[] +// >>> a = list(range(5)) +// >>> quit() +// q).pykx.eval["a"]` +// 0 1 2 3 4 // ``` -console:{pyexec"pykx.console.PyConsole().interact(banner='', exitmsg='')"}; +console:{pyexec"from code import InteractiveConsole\n__pykx_console__ = InteractiveConsole(globals())\n__pykx_console__.push('import sys')\n__pykx_console__.push('quit = sys.exit')\n__pykx_console__.push('exit = sys.exit')\ntry:\n line = __pykx_console__.interact(banner='', exitmsg='')\nexcept SystemExit:\n pykx._pykx_helpers.clean_errors()"} // @private // @desc @@ -1596,11 +1634,85 @@ console:{pyexec"pykx.console.PyConsole().interact(banner='', exitmsg='')"}; // or when using the following syntax `p)` within a q session .p.e:{.pykx.pyexec x} +// @private +// @desc +// Set default conversion type for K objects. +setdefault {$[""~c:getenv`PYKX_DEFAULT_CONVERSION;"default";c]}[]; + // @private // @desc // Finalise loading of PyKX functionality setting environment variables // needed to ensure loading PyKX multiple times does not result in unexpected errors finalise[]; +// @private +// @kind function +// @name .pykx.listExtensions +// @category api +// @overview +// _List all q scripts in the extensions directory which can be loaded_ +// +// ```q +// .pykx.listExtensions[] +// ``` +// +// **Returns:** +// +// type | description +// -------|------------ +// `list` | A list of strings denoting the available extensions in your version of PyKX +// +// **Example:** +// +// ```q +// q)\l pykx.q +// q).pykx.listExtensions[] +// "dashboards" +// ``` +listExtensions:{-2 _/:lst where like[;"*.q"]lst:string key hsym`$pykxDir,"/extensions/"} + +// @private +// @kind function +// @name .pykx.loadExtension +// @category api +// @overview +// _Loading of a PyKX extension_ +// +// ```q +// .pykx.loadExtension[ext] +// ``` +// +// **Parameters:** +// +// name | type | description +// -------|----------|------------- +// `ext` | `string` | The name of the extension which is to be loaded +// +// **Returns:** +// +// type | description +// -------|------------ +// `null` | On successful execution this function will load the extension and return null +// +// **Example:** +// +// ```q +// q)\l pykx.q +// q)`dash in key `.pykx +// 0b +// q).pykx.listExtensions[] +// "dashboards" +// q)`dash in key `.pykx +// 1b +// ``` +loadExtension:{[ext] + if[not 10h=type ext;'"Extension provided must be of type string"]; + if[not ext in listExtensions[];'"Extension provided '",ext,"' not available"]; + @[system"l ",; + pykxDir,"/extensions/",ext,".q"; + {'x," raised when attempting to load extension"} + ]; + } + // @desc Restore context used at initialization of script system"d ",string .pykx.util.prevCtx; diff --git a/src/pykx/pykx_init.q_ b/src/pykx/pykx_init.q_ index 6a5eb00..7db2989 100644 Binary files a/src/pykx/pykx_init.q_ and b/src/pykx/pykx_init.q_ differ diff --git a/src/pykx/pykxq.c b/src/pykx/pykxq.c index 8c2c4ad..4d32806 100644 --- a/src/pykx/pykxq.c +++ b/src/pykx/pykxq.c @@ -17,6 +17,7 @@ void* q_lib; +static P sys; static P builtins; static P toq_module; static P toq; @@ -39,6 +40,7 @@ static P M, errfmt; static void** N; int pykx_flag = -1; +bool pykx_threading = false; // Equivalent to starting Python with the `-S` flag. Allows us to edit some global config variables // before `site.main()` is called. @@ -65,10 +67,13 @@ static int check_py_foreign(K x){return x->t==112 && x->n==2 && *kK(x)==(K)py_de EXPORT K k_check_python(K x){return kb(check_py_foreign(x));} -EXPORT K k_pykx_init(K k_q_lib_path) { +EXPORT K k_pykx_init(K k_q_lib_pat, K _pykx_threading) { + if (_pykx_threading->g) + pykx_threading = true; PyGILState_STATE gstate; gstate = PyGILState_Ensure(); + sys = PyModule_GetDict(PyImport_ImportModule("sys")); builtins = PyModule_GetDict(PyImport_ImportModule("builtins")); toq_module = PyModule_GetDict(PyImport_AddModule("pykx.toq")); toq = PyDict_GetItemString(toq_module, "toq"); @@ -146,9 +151,15 @@ static K create_foreign(P p) { return x; } +void flush_stdout() { + P out = PyDict_GetItemString(sys, "stdout"); + if ( PyObject_HasAttrString(out, "flush") ) { + PyObject_CallMethod(out, "flush", NULL); + } +} + K k_py_error() { if (!PyErr_Occurred()) return (K)0; - P ex_type; P ex_value; P ex_traceback; @@ -276,14 +287,17 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { if (!k_ret->g) { if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; } else Py_XDECREF(py_ret); + flush_stdout(); PyGILState_Release(gstate); return (K)0; } if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; @@ -291,6 +305,7 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { if (as_foreign->g) { k = (K)create_foreign(py_ret); + flush_stdout(); Py_XDECREF(py_ret); PyGILState_Release(gstate); return k; @@ -298,6 +313,7 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { P py_k_ret = PyObject_CallFunctionObjArgs(toq, py_ret, NULL); Py_XDECREF(py_ret); if ((k = k_py_error())) { + flush_stdout(); Py_XDECREF(py_k_ret); PyGILState_Release(gstate); return k; @@ -306,6 +322,7 @@ EXPORT K k_pyrun(K k_ret, K k_eval_or_exec, K as_foreign, K k_code_string) { Py_XDECREF(py_k_ret); k = (K)PyLong_AsLongLong(py_addr); Py_XDECREF(py_addr); + flush_stdout(); PyGILState_Release(gstate); return k; } @@ -451,15 +468,19 @@ EXPORT K repr(K as_repr, K f) { Py_XDECREF(repr); if (!as_repr->g) { const char *bytes = PyBytes_AS_STRING(str); - printf("%s\n", bytes); + PySys_WriteStdout("%s\n", bytes); + flush_stdout(); + PyGILState_Release(gstate); Py_XDECREF(str); return (K)0; } if ((k = k_py_error())) { + flush_stdout(); PyGILState_Release(gstate); Py_XDECREF(str); return k; } + flush_stdout(); const char *chars = PyBytes_AS_STRING(str); PyGILState_Release(gstate); return kp(chars); @@ -644,6 +665,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { if ((k = k_py_error())) { if (pyres) Py_XDECREF(pyres); + flush_stdout(); PyGILState_Release(gstate); return k; } @@ -654,6 +676,7 @@ EXPORT K call_func(K f, K has_no_args, K args, K kwargs) { res = create_foreign(pyres); Py_XDECREF(pyres); + flush_stdout(); PyGILState_Release(gstate); return res; } diff --git a/src/pykx/query.py b/src/pykx/query.py index a2cdc7c..4edb733 100644 --- a/src/pykx/query.py +++ b/src/pykx/query.py @@ -4,10 +4,12 @@ from random import choices from string import ascii_letters from typing import Any, Dict, List, Optional, Union +import warnings from . import Q from . import wrappers as k -from .exceptions import PyKXException +from .ipc import QFuture +from .exceptions import PyKXException, QError __all__ = [ @@ -62,7 +64,8 @@ def select(self, table: Union[k.Table, str], columns: Optional[Union[Dict[str, str], k.Dictionary]] = None, where: Optional[Union[List[str], str, k.SymbolAtom, k.SymbolVector]] = None, - by: Optional[Union[Dict[str, str], k.Dictionary]] = None + by: Optional[Union[Dict[str, str], k.Dictionary]] = None, + inplace: bool = False, ) -> k.K: """Apply a q style select statement on tables defined within the process. @@ -78,6 +81,9 @@ def select(self, appropriate aggregations are to be applied. by: A dictionary mapping the names to be assigned to the produced columns and the columns whose results are used to construct the groups of the by clause. + inplace: Whether the result of an update is to be persisted. This operates for tables + referenced by name in q memory or general table objects + https://code.kx.com/q/basics/qsql/#result-and-side-effects. Examples: @@ -115,7 +121,7 @@ def select(self, pykx.q.qsql.select(qtab, columns={'avgCol2': 'avg col2', 'minCol4': 'min col4'}, by={'col1': 'col1'}, where='col3=0b') ``` """ # noqa: E501 - return self._seud(table, 'select', columns, where, by) + return self._seud(table, 'select', columns, where, by, inplace=inplace) def exec(self, table: Union[k.Table, str], @@ -201,7 +207,8 @@ def update(self, columns: Optional[Union[Dict[str, str], k.Dictionary]] = None, where: Optional[Union[List[str], str, k.SymbolAtom, k.SymbolVector]] = None, by: Optional[Union[Dict[str, str], k.Dictionary]] = None, - modify: bool = False + modify: bool = False, + inplace: bool = False, ) -> k.K: """ Apply a q style update statement on tables defined within the process. @@ -219,8 +226,12 @@ def update(self, appropriate aggregations are to be applied. by: A dictionary mapping the names to be assigned to the produced columns and the columns whose results are used to construct the groups of the by clause. - modify: Whether the result of a delete is to be saved. This holds when `table` is the - name of a table in q memory, as outlined at: + modify: `Deprecated`, please use `inplace` instead. Whether the result of an update + is to be saved. This operates for tables referenced by name in q memory or + general table objects + https://code.kx.com/q/basics/qsql/#result-and-side-effects. + inplace: Whether the result of an update is to be persisted. This operates for tables + referenced by name in q memory or general table objects https://code.kx.com/q/basics/qsql/#result-and-side-effects. Examples: @@ -268,17 +279,18 @@ def update(self, Apply an update grouping based on a by phrase and persist the result using the modify keyword ```python - pykx.q.qsql.update('byqtab', columns={'weight': 'avg weight'}, by={'city': 'city'}, modify=True) + pykx.q.qsql.update('byqtab', columns={'weight': 'avg weight'}, by={'city': 'city'}, inplace=True) pykx.q['byqtab'] ``` """ # noqa: E501 - return self._seud(table, 'update', columns, where, by, modify) + return self._seud(table, 'update', columns, where, by, modify, inplace) def delete(self, table: Union[k.Table, str], columns: Optional[Union[List[str], k.SymbolVector]] = None, where: Optional[Union[List[str], str, k.SymbolAtom, k.SymbolVector]] = None, - modify: bool = False + modify: bool = False, + inplace: bool = False, ) -> k.K: """ Apply a q style delete statement on tables defined within the process. @@ -292,8 +304,12 @@ def delete(self, columns: Denotes the columns to be deleted from a table. where: Conditional filtering used to select subsets of the data which are to be deleted from the table. - modify: Whether the result of a delete is to be saved. This holds when `table` is the - name of a table in q memory, as outlined at: + modify: `Deprecated`, please use `inplace` instead. Whether the result of a delete + is to be saved. This holds when `table` is the name of a table in q memory, + as outlined at: + https://code.kx.com/q/basics/qsql/#result-and-side-effects. + inplace: Whether the result of an update is to be persisted. This operates for tables + referenced by name in q memory or general table objects https://code.kx.com/q/basics/qsql/#result-and-side-effects. Examples: @@ -337,11 +353,22 @@ def delete(self, if columns is not None and where is not None: raise TypeError("'where' and 'columns' clauses cannot be used simultaneously in a " "delete statement") - return self._seud(table, 'delete', columns, where, None, modify) + return self._seud(table, 'delete', columns, where, None, modify, inplace) + + def _seud(self, table, query_type, columns=None, where=None, by=None, modify=False, inplace=False) -> k.K: # noqa: C901, E501 + if modify and inplace: + raise RuntimeError("Attempting to use both 'modify' and 'inplace' keywords, please use only 'inplace'") # noqa: E501 + + if modify: + warnings.warn("The 'modify' keyword is now deprecated please use 'inplace'") + inplace = modify - def _seud(self, table, query_type, columns=None, where=None, by=None, modify=False) -> k.K: if not isinstance(table, str): table = k.K(table) + + if isinstance(table, (k.SplayedTable, k.PartitionedTable)) and inplace: + raise QError("Application of 'inplace' updates not " + "supported for splayed/partitioned tables") select_clause = self._generate_clause(columns, 'columns', query_type) by_clause = self._generate_clause(by, 'by', query_type) where_clause = self._generate_clause(where, 'where', query_type) @@ -351,27 +378,38 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, modify=Fal raise TypeError("'table' object provided was not a K tabular object or an " "object which could be converted to an appropriate " "representation") - if modify: - raise TypeError("'modify' argument can only be used when 'table' is saved as a " - "named object in q memory") randstring = ''.join(choices(ascii_letters, k=32)) self.randstring = randstring table_name = f'.pykx.i._{randstring}' self._q[table_name] = table + original_table = table table = table_name elif not isinstance(table, str): raise TypeError("'table' must be a an object which is convertible to a K object " "or a string denoting an item in q memory") query_char = '!' if query_type in ('delete', 'update') else '?' - table_code = f'`$"{table}"' if modify else f'get`$"{table}"' + if (not inplace and query_type in ('delete', 'update')): + table_code = f'get`$"{table}"' + else: + table_code = f'`$"{table}"' try: - return self._q( + res = self._q( f'{query_char}[{table_code};;;]', where_clause, by_clause, select_clause, wait=True, ) + if inplace and isinstance(original_table, k.K): + if query_type in ('delete', 'update'): + res = self._q[table_name] + if isinstance(res, QFuture): + raise QError("'inplace' not supported with asyncronous query") + if type(original_table) != type(res): + raise QError('Returned data format does not match input type, ' + 'cannot perform inplace operation') + original_table.__dict__.update(res.__dict__) + return res finally: if isinstance(original_table, k.K): self._q._call(f'![`.pykx.i;();0b;enlist[`$"_{randstring}"]]', wait=True) @@ -441,7 +479,7 @@ def _generate_clause_where(self, clause_value) -> k.List: class SQL: - """Wrapper around the KX Insights Core ANSI SQL interface. + """Wrapper around the [KX Insights Core ANSI SQL](https://code.kx.com/insights/core/sql.html) interface. Lots of examples within this interface use a table named trades, an example of this table is diff --git a/src/pykx/register.py b/src/pykx/register.py index 1d609ab..b268f99 100644 --- a/src/pykx/register.py +++ b/src/pykx/register.py @@ -1,5 +1,4 @@ """Functionality for the registration of conversion functions between PyKX and Python""" - from .toq import _converter_from_python_type diff --git a/src/pykx/reimporter.py b/src/pykx/reimporter.py index 7a3e90c..e7d710a 100644 --- a/src/pykx/reimporter.py +++ b/src/pykx/reimporter.py @@ -36,6 +36,7 @@ def __init__(self): 'SKIP_UNDERQ', 'PYKX_SKIP_UNDERQ', 'UNDER_PYTHON', + 'PYKX_UNDER_PYTHON', 'PYKX_Q_LOADED_MARKER', 'PYKX_LOADED_UNDER_Q', 'QHOME', diff --git a/src/pykx/remote.py b/src/pykx/remote.py new file mode 100644 index 0000000..050b065 --- /dev/null +++ b/src/pykx/remote.py @@ -0,0 +1,239 @@ +""" +Functionality for the generation and management of remote Python function +execution. + +!!! Warning + + This functionality is provided in it's present form as a BETA + Feature and is subject to change. To enable this functionality + for testing please following configuration instructions + [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'` +""" +import inspect +from typing import Union + +from . import beta_features +from .config import _check_beta +from .ipc import SyncQConnection + + +try: + import dill + import_success = True +except BaseException: + import_success = False + + +__all__ = [ + 'session', + 'function' +] + +beta_features.append('Remote Functions') + + +def _init(_q): + global q + q = _q + + +def __dir__(): + return __all__ + + +class session(): + """ + A session refers to a connection to a remote kdb+/q process against which + users are defining/registering Python Functions which will return results + to a Python session. + """ + def __init__(self): + _check_beta('Remote Functions') + if not import_success: + raise ImportError("Failed to load Python package: 'dill'," + " please install dependency using 'pip install pykx[beta]'") + self.valid = False + self._libraries = [] + self._session = None + + def add_library(self, *args): + """ + Add a list of Python libraries which will be imported prior to definition + of a remote Python function, this allows users for example to import numpy + and use it as a defined library within a remote function. + + Parameters: + *args: A list of strings denoting the packages which are to be imported + for use by a remote function. + + Returns: + Returns a `None` type object on successful invocation. + + Example: + + ```python + >>> from pykx.remote import session + >>> remote_session = session() + >>> remote_session.add_library('numpy', 'pandas') + ``` + """ + if self._session is None: + raise Exception("Unable to add packages in the absence of a session") + for i in args: + if not isinstance(i, str): + raise Exception(f'Supplied library argument {i} is not a str like object, ' + f'supplied object is of type: {type(i)}') + self._libraries.append(i) + + def create(self, + host: Union[str, bytes] = 'localhost', + port: int = None, + *, + username: Union[str, bytes] = '', + password: Union[str, bytes] = '', + timeout: float = 0.0, + large_messages: bool = True, + tls: bool = False): + """ + Populate a session for use when generating a function for remote execution. This + session will be backed by a SyncQConnection instance, note that only one session + can be associated with a given instance of a `session` class. + + Parameters: + host: The host name to which a connection is to be established. + port: The port to which a connection is to be established. + username: Username for q connection authorization. + password: Password for q connection authorization. + timeout: Timeout for blocking socket operations in seconds. If set to `0`, the socket + will be non-blocking. + large_messages: Whether support for messages >2GB should be enabled. + tls: Whether TLS should be used. + + Returns: + Returns a `None` type object on successful connection creation + + Example: + + - Connect to a q session on localhost at port 5050 + + ```python + >>> from pykx.remote import session + >>> remote_session = session() + >>> remote_session.create(port = 5050) + ``` + + - Connect to a user-password protected q session at a defined port + + ```python + >>> from pykx.remote import session + >>> remote_session = session() + >>> remote_session.create_session(port=5001, username='username', password='password') + ``` + """ + if self._session is not None: + raise Exception("Active session in progress") + self._session = SyncQConnection(host, port, + username=username, + password=password, + timeout=timeout, + large_messages=large_messages, + tls=tls, + no_ctx=True) + + def clear(self): + """ + Reset/clear the session and libraries associated with a defined session information + + Example: + + ```python + >>> from pykx.remote import session + >>> remote_session = session() + >>> remote_session.create(port = 5050) + >>> remote_session.add_library('numpy') + >>> {'session': session._session, 'libraries': session._libraries} + {'session': pykx.QConnection(port=5001), 'libraries': ['numpy']} + >>> remote_session.clear() + >>> {'session': session._session, 'libraries': session._libraries} + {'session': None, 'libraries': []} + ``` + """ + self._session = None + self._libraries = [] + + +def function(remote_session, *args): + """ + This decorator allows users to tag functions which will be executed + on a remote server defined by a `kx.remote.session` instance. + + Parameters: + remote_session: Valid `kx.remote.session` object used to interact with external q process + *args: When invoked the decorated function will be passed supplied arguments + + Returns: + When invoked the decorated function will return the result as a PyKX object to the + calling process + + Examples: + + - Call a basic decorated function on a remote process + + ```python + >>> from pykx.remote import session, function + >>> remote_session = session() + >>> session.create(port = 5050) + >>> @function(session) + ... def func(x): + ... return x+1 + >>> func(1) + pykx.LongAtom(pykx.q('2')) + ``` + + - Apply a function making use of a named library + + ```python + >>> from pykx.remote import session, function + >>> remote_session = session() + >>> session.create(port = 5050) + >>> session.add_library('numpy') + >>> @function(session) + ... def func(start, stop, count): + ... return numpy.linspace(start, stop, count) + >>> func(0, 10, 5) + pykx.FloatVector(pykx.q('0 2.5 5 7.5 10')) + ``` + """ + def inner_decorator(func): + def pykx_func(*args, _function=func): + _check_beta('Remote Functions') + if not isinstance(remote_session, session): + raise Exception("Supplied remote_session instance must " + "be a kx.remote.session object") + if remote_session._session is None: + raise Exception("User session must be generated using " + "the 'create_session' function") + if not remote_session.valid: + pykx_loaded = remote_session._session('`pykx in key `') + if not pykx_loaded: + print("PyKX not loaded on remote server, attempting to load PyKX") + remote_session._session("@[system\"l \",;\"pykx.q\";" + "{'\"Failed to load PyKX with error: \",x}]") + remote_session.valid = True + if remote_session._libraries is not None: + for i in remote_session._libraries: + remote_session._session('{x:string x;' + ' @[.pykx.pyexec;' + '"import ",x;{\'"Failed to load package: ",' + 'x," with: ",y}[x]]}', + i) + try: + src = dill.source.getsource(_function) + except BaseException: + src = inspect.getsource(_function) + return remote_session._session('{.pykx.pyexec "\n" sv 1_"\n" vs x; .pykx.get[y;<] . z}', + bytes(src, 'UTF-8'), + _function.__name__, + list(args)) + return pykx_func + return inner_decorator diff --git a/src/pykx/system.py b/src/pykx/system.py index f3b9ddf..8cd44ba 100644 --- a/src/pykx/system.py +++ b/src/pykx/system.py @@ -377,16 +377,15 @@ def variables(self, ns=None): kx.q.system.variables() ``` - Get the variables defined within the `.foo` namespace, note the leading `.` may be ommited. + Get the variables associated with a q namespace/dictionary ``` + kx.q.system.variables('.foo') kx.q.system.variables('foo') ``` """ if ns is not None: ns = str(ns) - if ns[0] != '.': - ns = '.' + ns return self._q._call(f'\\v {ns}', wait=True) return self._q._call('\\v', wait=True) diff --git a/src/pykx/toq.pyx b/src/pykx/toq.pyx index 5d72169..baab0bb 100644 --- a/src/pykx/toq.pyx +++ b/src/pykx/toq.pyx @@ -81,7 +81,7 @@ from cpython.ref cimport Py_INCREF cimport numpy as cnp from pykx cimport core -from pykx._wrappers cimport factory, UUID_to_complex +from pykx._wrappers cimport factory import datetime from ctypes import CDLL @@ -115,6 +115,7 @@ __all__ = [ 'from_bytes', 'from_callable', 'from_datetime_date', + 'from_datetime_time', 'from_datetime_datetime', 'from_datetime_timedelta', 'from_dict', @@ -239,8 +240,7 @@ def _conversion_TypeError(x, input_type, output_type): if output_type is None: output_type = 'K object' x_repr = repr(x) if isinstance(x, str) else f"'{x!r}'" - return TypeError(f"Cannot convert {input_type} {x_repr} to {output_type}") - + return TypeError(f"Cannot convert {input_type} {x_repr} to {output_type}. See pykx.register to register custom conversions.") KType = Union[k.K, int] @@ -773,18 +773,10 @@ def from_uuid_UUID(x: UUID, if ktype is not None and not ktype is k.GUIDAtom: raise _conversion_TypeError(x, repr('uuid.UUID'), ktype) - u = x.int - u = (u & 0x0000000000000000FFFFFFFFFFFFFFFF) << 64 | (u & 0xFFFFFFFFFFFFFFFF0000000000000000) >> 64 # noqa - u = (u & 0x00000000FFFFFFFF00000000FFFFFFFF) << 32 | (u & 0xFFFFFFFF00000000FFFFFFFF00000000) >> 32 # noqa - u = (u & 0x0000FFFF0000FFFF0000FFFF0000FFFF) << 16 | (u & 0xFFFF0000FFFF0000FFFF0000FFFF0000) >> 16 # noqa - u = (u & 0x00FF00FF00FF00FF00FF00FF00FF00FF) << 8 | (u & 0xFF00FF00FF00FF00FF00FF00FF00FF00) >> 8 # noqa - cdef uint64_t upper_bits = (u & (-1 ^ 0xFFFFFFFFFFFFFFFF)) >> 64 - cdef uint64_t lower_bits = u & 0xFFFFFFFFFFFFFFFF - cdef uint64_t data[2] - data[0] = lower_bits - data[1] = upper_bits + u = x.bytes cdef core.U guid - guid.g = data + for i in range(len(u)): + guid.g[i] = u[i] return factory(core.ku(guid), False) @@ -1275,6 +1267,7 @@ def from_numpy_ndarray(x: np.ndarray, cdef long long n = x.size cdef core.K kx = NULL + cdef core.U guid cdef bytes as_bytes cdef uintptr_t data cdef long int i @@ -1282,7 +1275,10 @@ def from_numpy_ndarray(x: np.ndarray, if ktype is k.GUIDVector and x.dtype == object: kx = core.ktn(ktype.t, n) for i in range(n): - (kx.G0)[i] = UUID_to_complex(x[i]) + guid_bytes = x[i].bytes + for j in range(len(guid_bytes)): + guid.g[j] = guid_bytes[j] + (kx.G0)[i] = guid return factory(kx, False) elif ktype is k.SymbolVector: @@ -1831,6 +1827,18 @@ def from_datetime_date(x: Any, handle_nulls=handle_nulls) +def from_datetime_time(x: Any, + ktype: Optional[KType] = None, + *, + cast: bool = False, + handle_nulls: bool = False, +) -> k.TemporalFixedAtom: + if (cast is None or cast) and type(x) is not datetime.time: + x = cast_to_python_time(x) + + return k.toq(datetime.datetime.combine(datetime.date.min, x) - datetime.datetime.min) + + def from_datetime_datetime(x: Any, ktype: Optional[KType] = None, *, @@ -2394,7 +2402,7 @@ def from_callable(x: Callable, params = list(signature(x).parameters.values()) if len(params) > 8: raise ValueError('Too many parameters - q functions cannot have more than 8 parameters') - return q('{@[;`] .pykx.wrap[x]::}', k.Foreign(x)) + return q('{.pykx.wrap[x][<]}', k.Foreign(x)) cdef extern from 'include/foreign.h': @@ -2474,6 +2482,7 @@ _converter_from_ktype = { k.TimestampAtom: from_datetime_datetime, k.MonthAtom: from_datetime_datetime, k.DateAtom: from_datetime_date, + k.DatetimeAtom: from_datetime_datetime, k.TimespanAtom: from_datetime_timedelta, k.MinuteAtom: from_datetime_timedelta, @@ -2545,6 +2554,7 @@ _converter_from_python_type = { UUID: from_uuid_UUID, datetime.date: from_datetime_date, + datetime.time: from_datetime_time, datetime.datetime: from_datetime_datetime, datetime.timedelta: from_datetime_timedelta, np.datetime64: from_numpy_datetime64, diff --git a/src/pykx/util.py b/src/pykx/util.py index 8158cf9..766f07d 100644 --- a/src/pykx/util.py +++ b/src/pykx/util.py @@ -323,14 +323,15 @@ def env_information(): 'PYKX_DEFAULT_CONVERSION', 'PYKX_SKIP_UNDERQ', 'PYKX_UNSET_GLOBALS', 'PYKX_DEBUG_INSIGHTS_LIBRARIES', 'PYKX_EXECUTABLE', 'PYKX_PYTHON_LIB_PATH', 'PYKX_PYTHON_BASE_PATH', 'PYKX_PYTHON_HOME_PATH', 'PYKX_DIR', - 'PYKX_UNLICENSED', 'PYKX_LICENSED' + 'PYKX_UNLICENSED', 'PYKX_LICENSED', 'PYKX_BETA_FEATURES', 'PYKX_NO_SIGNAL' ] for x in envs: env_info += f"{x}: {os.getenv(x, '')}\n" env_info += '\n**** PyKX Deprecated Environment Variables ****\n' - deps = ['SKIP_UNDERQ', 'UNSET_PYKX_GLOBALS', 'KEEP_LOCAL_TIMES', 'IGNORE_QHOME'] + deps = ['SKIP_UNDERQ', 'UNSET_PYKX_GLOBALS', 'KEEP_LOCAL_TIMES', 'IGNORE_QHOME', + 'UNDER_PYTHON', 'PYKX_NO_SIGINT'] for x in deps: env_info += f"{x}: {os.getenv(x, '')}\n" diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py index 89ab3a4..566e19a 100644 --- a/src/pykx/wrappers.py +++ b/src/pykx/wrappers.py @@ -209,18 +209,24 @@ def _idx_to_k(key, n): return key if isinstance(key, Integral): # replace negative index with equivalent positive index + key = _key_preprocess(key, n) + elif isinstance(key, slice): + key = range(n)[key] + return K(key) + + +def _key_preprocess(key, n, slice=False): + if key is not None: if key < 0: key = n + key - if key >= n or key < 0: + if (key >= n or key < 0) and not slice: raise IndexError('index out of range') - elif isinstance(key, slice) and key.stop is None: - # ensure slices have a stop set - # TODO: Ensure this produces index k objects from slices whose behavior matches indexing - # Python lists (KXI-9723). - key = slice(key.start, - n if (key.step or 1) > 0 else -1, - key.step) - return K(key) + elif slice: + if key < 0: + key = 0 + if key > n: + key = n + return(key) def _rich_convert(x: 'K', stdlib: bool = True): @@ -671,6 +677,13 @@ class TimeAtom(TemporalSpanAtom): _np_type = 'ms' _np_dtype = 'timedelta64[ms]' + # TODO: `cast` should be set to False at the next major release (KXI-12945) + def __new__(cls, x: Any, *, cast: bool = None, **kwargs): + if (type(x) == str) and x == 'now': # noqa: E721 + if licensed: + return q('.z.T') + return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags + def _prototype(self=None): return TimeAtom(np.timedelta64(59789214, 'ms')) @@ -821,6 +834,13 @@ class DateAtom(TemporalFixedAtom): _epoch_offset = DATE_OFFSET _np_dtype = 'datetime64[D]' + # TODO: `cast` should be set to False at the next major release (KXI-12945) + def __new__(cls, x: Any, *, cast: bool = None, **kwargs): + if (type(x) == str) and x == 'today': # noqa: E721 + if licensed: + return q('.z.D') + return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags + def _prototype(self=None): return DateAtom(np.datetime64('1972-05-31', 'D')) @@ -879,6 +899,13 @@ class TimestampAtom(TemporalFixedAtom): _epoch_offset = TIMESTAMP_OFFSET _np_dtype = 'datetime64[ns]' + # TODO: `cast` should be set to False at the next major release (KXI-12945) + def __new__(cls, x: Any, *, cast: bool = None, **kwargs): + if (type(x) == str) and x == 'now': # noqa: E721 + if licensed: + return q('.z.P') + return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags + def _prototype(self=None): return TimestampAtom(datetime(2150, 10, 22, 20, 31, 15, 70713)) @@ -2317,7 +2344,6 @@ def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): array = array.view(self._np_type) else: array = array.astype(self._np_type, copy=False) - if raw: has_nulls = False if has_nulls is None or has_nulls: @@ -2327,7 +2353,6 @@ def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): if has_nulls: is_fixed = isinstance(self, TemporalFixedVector) array[nulls] = np.datetime64('NaT') if is_fixed else np.timedelta64('NaT') - return array @@ -2409,7 +2434,14 @@ def py(self, else: return [x.replace(tzinfo=tzinfo) for x in self.np().astype('datetime64[us]').astype(datetime).tolist()] - return self.np().astype('datetime64[us]').astype(datetime).tolist() + converted_vector=self.np().astype('datetime64[us]').astype(datetime).tolist() + null_pos=[] + for x in converted_vector: + if x is None: + null_pos.append(converted_vector.index(x)) + for i in null_pos: + converted_vector[i]=q('0Np') + return converted_vector class MonthVector(TemporalFixedVector): @@ -2687,6 +2719,9 @@ def ungroup(self): return q.ungroup(self) def __getitem__(self, key): + n = len(self) + if isinstance(key, Integral): + key = _key_preprocess(key, n) res = self.loc[key] if isinstance(res, List) and len(res) == 1: res = q('{raze x}', res) @@ -2741,7 +2776,8 @@ def insert( row: Union[list, List], match_schema: bool = False, test_insert: bool = False, - replace_self: bool = True + replace_self: bool = True, + inplace: bool = True ): """Helper function around `q`'s `insert` function which inserts a row or multiple rows into a q Table object. @@ -2752,7 +2788,10 @@ def insert( test_insert: Causes the function to modify a small local copy of the table and return the modified example, this can only be used with embedded q and will not modify the source tables contents. - replace_self: Causes the underlying Table python object to update itself with the + replace_self: `Deprecated` please use `inplace` keyword. + Causes the underlying Table python object to update itself with the + resulting Table after the insert. + inplace: Causes the underlying Table python object to update itself with the resulting Table after the insert. Returns: @@ -2774,7 +2813,10 @@ def insert( q['.pykx.i.itab'] = self q.insert('.pykx.i.itab', row, match_schema, test_insert) res = q('.pykx.i.itab') - if replace_self: + if not replace_self: + warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'", + DeprecationWarning) + if replace_self and inplace: self.__dict__.update(res.__dict__) q('delete itab from `.pykx.i') return res @@ -2784,7 +2826,8 @@ def upsert( row: Union[list, List], match_schema: bool = False, test_insert: bool = False, - replace_self: bool = True + replace_self: bool = True, + inplace: bool = True ): """Helper function around `q`'s `upsert` function which inserts a row or multiple rows into a q Table object. @@ -2795,7 +2838,10 @@ def upsert( test_insert: Causes the function to modify a small local copy of the table and return the modified example, this can only be used with embedded q and will not modify the source tables contents. - replace_self: Causes the underlying Table python object to update itself with the + replace_self: `Deprecated` please use `inplace` keyword. + Causes the underlying Table python object to update itself with the + resulting Table after the upsert. + inplace: Causes the underlying Table python object to update itself with the resulting Table after the upsert. Returns: @@ -2815,7 +2861,10 @@ def upsert( ``` """ res = q.upsert(self, row, match_schema, test_insert) - if replace_self: + if not replace_self: + warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'", + DeprecationWarning) + if replace_self and inplace: self.__dict__.update(res.__dict__) return res @@ -3326,7 +3375,7 @@ def pd(self, *, raw: bool = False, has_nulls: Optional[bool] = None): df = pd.DataFrame(columns=kk.py() + vk.py()) df = df.set_index(kk.py()) return df - idx = [np.stack(kvg(i).np(raw=raw, has_nulls=has_nulls)).reshape(-1) + idx = [kvg(i).np(raw=raw, has_nulls=has_nulls).reshape(-1) for i in range(len(kk))] cols = [vvg(i).np(raw=raw, has_nulls=has_nulls) for i in range(len(vk))] @@ -3366,7 +3415,8 @@ def insert( row: Union[list, List], match_schema: bool = False, test_insert: bool = False, - replace_self: bool = True + replace_self: bool = True, + inplace: bool = True ): """Helper function around `q`'s `insert` function which inserts a row or multiple rows into a q Table object. @@ -3377,7 +3427,10 @@ def insert( test_insert: Causes the function to modify a small local copy of the table and return the modified example, this can only be used with embedded q and will not modify the source tables contents. - replace_self: Causes the underlying Table python object to update itself with the + replace_self: `Deprecated` please use `inplace` keyword. + Causes the underlying Table python object to update itself with the + resulting Table after the insert. + inplace: Causes the underlying Table python object to update itself with the resulting Table after the insert. Returns: @@ -3399,7 +3452,10 @@ def insert( q['.pykx.i.itab'] = self q.insert('.pykx.i.itab', row, match_schema, test_insert) res = q('.pykx.i.itab') - if replace_self: + if not replace_self: + warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'", + DeprecationWarning) + if replace_self and inplace: self.__dict__.update(res.__dict__) q('delete itab from `.pykx.i') return res @@ -3409,7 +3465,8 @@ def upsert( row: Union[list, List], match_schema: bool = False, test_insert: bool = False, - replace_self: bool = True + replace_self: bool = True, + inplace: bool = True ): """Helper function around `q`'s `upsert` function which inserts a row or multiple rows into a q Table object. @@ -3421,8 +3478,11 @@ def upsert( test_insert: Causes the function to modify a small local copy of the table and return the modified example, this can only be used with embedded q and will not modify the source tables contents. - replace_self: Causes the underlying Table python object to update itself with the - resulting Table after the upsert. + replace_self: `Deprecated` please use `inplace` keyword. + Causes the underlying Table python object to update itself with the + resulting Table after the insert. + inplace: Causes the underlying Table python object to update itself with the + resulting Table after the insert. Returns: The resulting table after the given row has been upserted. @@ -3441,7 +3501,10 @@ def upsert( ``` """ res = q.upsert(self, row, match_schema, test_insert) - if replace_self: + if not replace_self: + warnings.warn("Keyword 'replace_self' is deprecated please use 'inplace'", + DeprecationWarning) + if replace_self and inplace: self.__dict__.update(res.__dict__) return res diff --git a/tests/conftest.py b/tests/conftest.py index a8b8738..3a87a16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,6 +45,7 @@ def kx(request): os.environ['QARGS'] = f'-S {seed} --testflag {request.param}' os.environ['PYKX_RELEASE_GIL'] = '1' os.environ['PYKX_Q_LOCK'] = '-1' + os.environ['PYKX_BETA_FEATURES'] = 'true' if os.getenv('CI') and system() == 'Linux' and randint(0, 1) % 2 == 0: os.environ['PYKX_Q_LIB_LOCATION'] = '/specified_q_lib_path' import pykx as kx diff --git a/tests/qcumber_tests/callables.quke b/tests/qcumber_tests/callables.quke index 5a702c3..13afe96 100644 --- a/tests/qcumber_tests/callables.quke +++ b/tests/qcumber_tests/callables.quke @@ -11,6 +11,19 @@ feature .pykx.pycallable 1 2~.pykx.wrap[result]` ) + expect to return a foreign when calling a Python function using a wrapped object and contain result + result:.pykx.pycallable[wrapArange[<]][1;3]; + all( + .pykx.util.isf result; + 1 2~.pykx.wrap[result]` + ) + + expect to return a foreign when calling a Python function using a wrapped object and contain result + result:.pykx.pycallable[wrapArange[>]][1;3]; + all( + .pykx.util.isf result; + 1 2~.pykx.wrap[result]` + ) expect to return a foreign when calling a Python function using foreign and contain result result:.pykx.pycallable[foreignArange][1;3]; all( @@ -31,6 +44,14 @@ feature .pykx.qcallable expect to return a q object when calling a Python function using a wrapped object result:.pykx.qcallable[wrapArange][1;3]; .qu.compare[1 2;result] + + expect to return a q object when calling a Python function using a wrapped object + result:.pykx.qcallable[wrapArange[<]][1;3]; + .qu.compare[1 2;result] + + expect to return a q object when calling a Python function using a wrapped object + result:.pykx.qcallable[wrapArange[>]][1;3]; + .qu.compare[1 2;result] expect to return a q object when calling a Python function using foreign result:.pykx.qcallable[foreignArange][1;3]; diff --git a/tests/qcumber_tests/conversions.quke b/tests/qcumber_tests/conversions.quke index 2666690..ebec9bb 100644 --- a/tests/qcumber_tests/conversions.quke +++ b/tests/qcumber_tests/conversions.quke @@ -77,3 +77,44 @@ feature default conversions expect failure if default conversion unsupported err:@[.pykx.setdefault;"unsupported";{x}]; err like "unknown conversion type: unsupported" +feature time conversions + should return as pykx timespan objects + expect timespan object returned when time passed + .pykx.pyexec"from datetime import time"; + .qu.compare[.pykx.eval["time(11, 34, 56)"]`;0D11:34:56.000000000]; + expect timespan object returned when timedelta passed + .pykx.pyexec"import numpy as np"; + .qu.compare[.pykx.eval["np.timedelta64(1, 'D') + np.timedelta64(9, 'h')"]`;1D09:00:00.000000000] +feature guid conversions + should roundtrip guids correctly + expect same guid atom returned + .pykx.pyexec"from uuid import UUID"; + .qu.compare[.pykx.eval["UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')"]`;"G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]; + expect same guid list returned + .qu.compare[.pykx.eval["[UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')]"]`;enlist"G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]; + expect same guid list returned from numpy + .pykx.pyexec"import numpy as np"; + .qu.compare[.pykx.eval["np.array([UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')], dtype=object)"]`;enlist "G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"] + expect same guid atom returned + .qu.compare[.pykx.eval["UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')"]`;"G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]; + expect same guid list returned + .qu.compare[.pykx.eval["[UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')]"]`;enlist"G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]; + expect same guid list returned from numpy + .qu.compare[.pykx.eval["np.array([UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')], dtype=object)"]`;enlist"G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"] + expect guid atom to python + .qu.compare[.pykx.eval["lambda x: str.encode(str(x))"]["G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]`;"db712ca2-81b1-0080-95dd-7bdb502da77d"]; + expect guid list to python + .qu.compare[.pykx.eval["lambda x: str.encode(str(x))"][enlist "G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]`;"[UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')]"]; + expect guid atom to python + .qu.compare[.pykx.eval["lambda x: str.encode(str(x))"]["G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]`;"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]; + expect guid list to python + .qu.compare[.pykx.eval["lambda x: str.encode(str(x))"][enlist "G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]`;"[UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')]"]; + expect guid atom round trip + .qu.compare[.pykx.eval["lambda x: x"]["G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]`;"G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]; + expect guid list round trip + .qu.compare[.pykx.eval["lambda x: x"][enlist "G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]`;enlist "G"$"db712ca2-81b1-0080-95dd-7bdb502da77d"]; + expect guid atom round trip + .qu.compare[.pykx.eval["lambda x: x"]["G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]`;"G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]; + expect guid list round trip + .qu.compare[.pykx.eval["lambda x: x"][enlist "G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]`;enlist "G"$"7ff0bbb9-ee32-42fe-9631-8c5a09a155a2"]; + diff --git a/tests/qcumber_tests/extensions.quke b/tests/qcumber_tests/extensions.quke new file mode 100644 index 0000000..6917cc7 --- /dev/null +++ b/tests/qcumber_tests/extensions.quke @@ -0,0 +1,43 @@ +feature General Extensions + should Test loading and listing extensions + expect fail to load extension if supplied argument is invalid + @[.pykx.loadExtension;`test;like[;"Extension provided must be of type string"]] + + expect fail to load an extension with an appropriate error + @[.pykx.loadExtension;"noext";like[;"Extension provided 'noext' not available"]] + + expect listing of extensions to operate appropriately + "dashboards" in .pykx.listExtensions[] + +feature Dashboards Extension + before + .pykx.loadExtension["dashboards"]; + + should Fail to run dashboards functionality under various conditions + expect to fail when running a function using an invalid function definition + .[.pykx.dash.runFunction; + ("def func";1); + like[;"SyntaxError('invalid syntax',*"]] + + expect to fail when loading a Python script with invalid content + .[.pykx.dash.runFunction; + ("def func(x):\n\treturn x+1\n\n1+'a'";1); + like[;"TypeError(\"unsupported operand type(s) for +*"]] + + expect to fail when a function definition can't be found + .[.pykx.dash.runFunction; + ("1+1";1); + like[;"Exception('No function definition found')*"]] + + should Correctly execute Python functions supplied as + expect single argument function to operate as expected + res:.pykx.dash.runFunction["def func(x):\n\treturn x+1";1]; + res~2 + + expect multi argument function to operate as expected + res:.pykx.dash.runFunction["def func(x, y):\n\treturn x*y";2 10]; + res~20 + + expect function with python dependencies to work appropriately + res:.pykx.dash.runFunction["import numpy as np\n\ndef func(x):\n\treturn np.linspace(0, x.py(), 5)";10]; + res~0 2.5 5 7.5 10 diff --git a/tests/qcumber_tests/pykx.quke b/tests/qcumber_tests/pykx.quke index 73641f3..30ae7c8 100644 --- a/tests/qcumber_tests/pykx.quke +++ b/tests/qcumber_tests/pykx.quke @@ -32,7 +32,7 @@ feature .pykx.eval expect an error to be raised if a Path object is supplied as a parameter when not suitable system"c 2000 2000"; - @[.pykx.eval"lambda x:x+1";`:test;{x like "*TypeError('can only*"}] + @[.pykx.eval"lambda x:x+1";`:test;{x like "*TypeError(\"unsupported operand type(s) for +: 'PosixPath' and 'int'\")"}] should error appropriately if supplied an incorrect type expect to error if input type is non string @@ -41,13 +41,52 @@ feature .pykx.eval expect to evaluate correctly if supplied a char atom .qu.compare[-7h; type .pykx.eval["1"]`] + should not convert wraps passed to functions + expect not convert wraps passed to functions + np:.pykx.import `numpy; + r:np[`:random.rand;1;2]; + r~.pykx.eval["lambda x: x"] r; feature .pykx.qeval should return the correct values and types expect to return a 2 .qu.compare[2; .pykx.qeval["1+1"]] expect to return a composition .qu.compare[105h; type .pykx.qeval["lambda x, y: x + y"]] - expect to return 2 + expect to return 2 .qu.compare[2; .pykx.qeval["lambda x, y: x + y"][1;1]] expect to return 2 .qu.compare[2; .pykx.qeval["lambda x, y: x + y"] . (1;1)] + expect ` + .qu.compare[`;.pykx.eval["lambda x: x"][<]`] + expect `. + .qu.compare[`.;.pykx.eval["lambda x: x"][<]`.] + expect ` + .qu.compare[`;.pykx.toq .pykx.eval["lambda x: x"][>]`] + expect `. + .qu.compare[`.;.pykx.toq .pykx.eval["lambda x: x"][>]`.] + +feature No segfault on compressed + before + (`:compressed16383;17;1;0) set til 16383 + should No segfault + expect No segfault + .qu.compare[desc til 16383 ;desc get `:compressed16383] + after + hdel `:compressed16383 + +feature Typed wraps are wraps and can be unwrapped + should be wraps + expect 1b + .pykx.util.isw .pykx.eval["lambda x: x"] + expect 1b + .pykx.util.isw .pykx.eval["lambda x: x";<] + expect 1b + .pykx.util.isw .pykx.eval["lambda x: x";>] + + should be possible to unwrap + expect 1b + .pykx.util.isf .pykx.unwrap .pykx.eval["lambda x: x"] + expect 1b + .pykx.util.isf .pykx.unwrap .pykx.eval["lambda x: x";<] + expect 1b + .pykx.util.isf .pykx.unwrap .pykx.eval["lambda x: x";>] diff --git a/tests/qcumber_tests/reimport.quke b/tests/qcumber_tests/reimport.quke new file mode 100644 index 0000000..99e7c29 --- /dev/null +++ b/tests/qcumber_tests/reimport.quke @@ -0,0 +1,19 @@ +feature pykx.q + should not error + expect not error + .qu.compare[(::); system"l pykx.q"] + +feature .pykx.safeReimport + should function safely + expect versions to match + .qu.compare[.pykx.version[];.pykx.safeReimport[{[x;y]first system x," -c 'import pykx as kx;print(kx.__version__)'"}[getenv`PYKX_EXECUTABLE]]] + expect reset env vars on success + PYKX_UNDER_Q:getenv`PYKX_UNDER_Q; + .pykx.safeReimport {1+1}; + .qu.compare[PYKX_UNDER_Q;getenv`PYKX_UNDER_Q]; + expect signal error and reset env vars on failure + PYKX_UNDER_Q:getenv`PYKX_UNDER_Q; + err:@[.pykx.safeReimport;{1+`};{x}]; + .qu.compare["type";err]; + .qu.compare[PYKX_UNDER_Q;getenv`PYKX_UNDER_Q]; + diff --git a/tests/qscripts/test_stdout_stderr.q b/tests/qscripts/test_stdout_stderr.q new file mode 100644 index 0000000..beeffdb --- /dev/null +++ b/tests/qscripts/test_stdout_stderr.q @@ -0,0 +1,27 @@ +if[not `pykx in key `;system"l pykx.q"]; +-1"stdouterrtest,1,q stdout"; +-2"stdouterrtest,2,q stderr"; +.pykx.pyexec"import sys" +.pykx.eval"print('stdouterrtest,3,eval print')"; +.pykx.eval"print('stdouterrtest,4,eval print file=sys.stdout', file=sys.stdout)"; +.pykx.eval"print('stdouterrtest,5,eval print file=sys.stderr', file=sys.stderr)"; +.pykx.pyexec"print('stdouterrtest,6,pyexec print')"; +.pykx.pyexec"print('stdouterrtest,7,pyexec print file=sys.stdout', file=sys.stdout)"; +.pykx.pyexec"print('stdouterrtest,8,pyexec print file=sys.stderr', file=sys.stderr)"; +-1 "stdouterrtest,9,",.pykx.repr .pykx.eval"'.pykx.repr'"; +.pykx.print .pykx.eval"'stdouterrtest,10,.pykx.print'"; +.pykx.print .pykx.tonp .pykx.eval"'stdouterrtest,11,.pykx.print .pykx.tonp .pykx.eval'"; +.pykx.eval["lambda x:print(x)"] `$"stdouterrtest,12,.pykx.eval[\"lambda x:print(x)\"]"; +.pykx.eval["(lambda x:print(x))('stdouterrtest,13,inside a lambda')"] ; +.pykx.eval["print('stdouterrtest,14,not in a lambda')"] ; +.pykx.eval"print('stdouterrtest,15,eval print')"; +.pykx.print .pykx.tonp .pykx.eval"'stdouterrtest,16,.pykx.print .pykx.tonp .pykx.eval'"; +.pykx.eval["lambda x:print(x)"][`$"stdouterrtest,17,.pykx.eval[\"lambda x:print(x)\"]"]`; +.pykx.pyexec"def func(x):\n print(x)"; +func:.pykx.get`func; +func `$"stdouterrtest,18,python function"; +func[`$"stdouterrtest,19,python function`"]`; +.pykx.pyexec["(lambda x:print(x))('stdouterrtest,20,inside a lambda')"] ; +.pykx.pyexec["print('stdouterrtest,21,not in a lambda')"] ; +.pykx.pyexec"print('stdouterrtest,22,eval print')"; +exit 0 \ No newline at end of file diff --git a/tests/test_cloud_edition.py b/tests/test_cloud_edition.py index a62cd84..3d819c6 100644 --- a/tests/test_cloud_edition.py +++ b/tests/test_cloud_edition.py @@ -124,6 +124,10 @@ def test_kurl_aws_minio_small(q, q_port): assert q('{x like first y`domain}', test_url, e) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) @kurl_q_server_init def test_kurl_aws_minio_large(q, q_port): """Test AWS Min.io authentication with large passwords""" diff --git a/tests/test_db.py b/tests/test_db.py new file mode 100644 index 0000000..0cc7744 --- /dev/null +++ b/tests/test_db.py @@ -0,0 +1,317 @@ +import os +import shutil +import warnings + +# Do not import pykx here - use the `kx` fixture instead! +import pytest + + +@pytest.mark.order(1) +def test_creation(kx): + db = kx.DB(path='db') + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + db.create(tab, 't', 'date', by_field='sym', sym_enum='sym') + assert db.tables == ['t'] + + +@pytest.mark.order(2) +def test_create_errors(kx): + db = kx.DB(path='err_db') + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + with pytest.raises(kx.QError) as err: + db.create(1, 't', 'ti', by_field='sym', sym_enum='sym') + assert 'Supplied table must be' in str(err.value) + with pytest.raises(kx.QError) as err: + db.create(tab, 't', 'p', by_field='sym', sym_enum='sym') + assert 'Unsupported type:' in str(err.value) + with pytest.raises(kx.QError) as err: + db.create(tab, 't', 'no_col', by_field='sym', sym_enum='sym') + assert 'Partition column no_col not in supplied' in str(err.value) + + +@pytest.mark.order(3) +def test_load_1(kx): + db = kx.db.DB() + assert db.tables is None + db.load('db') + assert db.tables == ['t'] + assert type(db.t) == kx.PartitionedTable # noqa: E721 + with pytest.raises(kx.QError) as err: + db.load('../db') + assert 'Attempting to reload existing' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('test') + assert 'Only one kdb+ database' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('../pyproject.toml', overwrite=True) + assert 'Provided path is a file' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('doesNotExist', overwrite=True) + assert 'Unable to find object at specified path' in str(err.value) + + +@pytest.mark.order(4) +def test_load_2(kx): + db = kx.DB(path='db') + assert db.tables == ['t'] + assert type(db.t) == kx.PartitionedTable # noqa: E721 + + +@pytest.mark.order(5) +def test_list(kx): + db = kx.DB() + db.load('db') + print(db.tables) + db_cols = db.list_columns('t') + assert db_cols == ['sym', 'ti', 'p', 'sz'] + with pytest.raises(kx.QError) as err: + db.list_columns('no_tab') + assert 'Column listing not possible' in str(err.value) + + +@pytest.mark.order(6) +def test_column_add(kx): + db = kx.DB() + db.load('db') + assert ['sym', 'ti', 'p', 'sz'] == db.list_columns('t') + db.add_column('t', 'vol', kx.IntAtom.null) + db_cols = db.list_columns('t') + assert ['sym', 'ti', 'p', 'sz', 'vol'] == db_cols + + +@pytest.mark.order(7) +def test_column_reorder(kx): + db = kx.DB() + db.load('db') + db.reorder_columns('t', ['vol', 'sym', 'sz', 'p', 'ti']) + assert ['vol', 'sym', 'sz', 'p', 'ti'] == db.list_columns('t') + + +@pytest.mark.order(8) +def test_column_rename(kx): + db = kx.DB() + db.load('db') + db.rename_column('t', 'p', 'price') + assert ['vol', 'sym', 'sz', 'price', 'ti'] == db.list_columns('t') + with pytest.raises(kx.QError) as err: + db.rename_column('t', 'no_col', 'upd') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(9) +def test_column_delete(kx): + db = kx.DB() + db.load('db') + db.delete_column('t', 'vol') + assert ['sym', 'sz', 'price', 'ti']== db.list_columns('t') + with pytest.raises(kx.QError) as err: + db.delete_column('t', 'no_col') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(10) +def test_column_find(kx): + db = kx.DB() + db.load('db') + assert None == db.find_column('t', 'price') # noqa: E711 + with pytest.raises(kx.QError) as err: + db.find_column('t', 'no_col') + assert 'Requested column not found' in str(err.value) + + +@pytest.mark.order(11) +def test_column_set_attr(kx): + db = kx.DB() + db.load('db') + assert 'g' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + db.set_column_attribute('t', 'sym', 'grouped') + assert 'g' in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + with pytest.raises(kx.QError) as err: + db.set_column_attribute('t', 'no_col', 'unique') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(12) +def test_column_clear_attr(kx): + db = kx.DB() + db.load('db') + assert 'g' in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + db.clear_column_attribute('t', 'sym') + assert 'g' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + with pytest.raises(kx.QError) as err: + db.clear_column_attribute('t', 'no_col') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(13) +def test_column_set_type(kx): + db = kx.DB() + db.load('db') + assert b'f' in kx.q.qsql.exec(kx.q.meta(db.t), columns='t').py() + db.set_column_type('t', 'price', kx.LongAtom) + assert b'f' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='t').py() + with pytest.raises(kx.QError) as err: + db.set_column_type('t', 'price', kx.GUIDAtom) + assert "to type: " in str(err.value) + with pytest.raises(kx.QError) as err: + db.set_column_attribute('t', 'no_col', kx.GUIDAtom) + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(14) +def test_column_copy(kx): + db = kx.DB() + db.load('db') + assert ['sym', 'sz', 'price', 'ti'] == db.list_columns('t') + db.copy_column('t', 'sz', 'size') + assert ['sym', 'sz', 'price', 'ti', 'size'] == db.list_columns('t') + assert all(kx.q.qsql.select(db.t, 'sz')['sz'] == kx.q.qsql.select(db.t, 'size')['size']) # noqa: E501 + with pytest.raises(kx.QError) as err: + db.copy_column('t', 'no_col', 'new_name') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(15) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_column_apply(kx): + db = kx.DB() + db.load('db') + assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + db.apply_function('t', 'size', kx.q('2*')) + assert all([200, 400, 300, 420] == kx.q.qsql.select(db.t, 'size')['size']) + db.apply_function('t', 'size', lambda x: x.np()/2) + assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + with pytest.raises(RuntimeError) as err: + db.apply_function('t', 'size', 2) + assert "Provided 'function' is not callable" in str(err.value) + + +@pytest.mark.order(16) +def test_table_rename(kx): + db = kx.DB() + db.load('db') + assert db.tables == ['t'] + db.rename_table('t', 'trades') + assert db.tables == ['trades'] + assert type(db.trades) == kx.PartitionedTable # noqa: E721 + + +@pytest.mark.order(17) +def test_db_fill(kx): + db = kx.DB(path='db') + assert db.tables == ['trades'] + qtab = kx.Table(data={ + 'col1': kx.random.random(1000, 10.0), + 'col2': kx.random.random(1000, 10) + }) + db.create(qtab, 'newtab', kx.q('2015.01.02')) + with pytest.raises(kx.QError) as err: + db.partition_count() + assert '2015.01.01/newtab. OS reports: No such file or directory' in str(err.value) + db.fill_database() + parts = db.partition_count() + all(kx.q.qsql.exec(parts.values(), 'newtab') == [0, 1000]) + + +@pytest.mark.order(18) +def test_load_warning(kx): + kx.q('`:./db/2015.01.01/table/ set .Q.en[`:./db;]([] ti:09:30:00 09:31:00; p:101 102f; sz:100 200; sym:`a`b)') # noqa: E501 + kx.q('`:./db/2015.01.02/table/ set .Q.en[`:./db;]([] ti:09:30:00 09:31:00; p:101.5 102.5; sz:150 210;sym:`b`c)') # noqa: E501 + db = kx.db.DB() + assert db.tables is None + with warnings.catch_warnings(record=True) as w: + db.load('db') + assert 'A database table "table" would overwrite' in str(w[-1].message) + assert type(db.table) != kx.PartitionedTable # noqa: E721 + assert type(db.table.table) == kx.PartitionedTable # noqa: E721 + + +def test_enumerate(kx): + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + db = kx.DB(path='db') + entab = db.enumerate(tab) + assert 20 == entab['sym'].t + assert 'sym' == kx.q.key(entab['sym']) + assert type(kx.q.value(entab['sym'])) == kx.SymbolVector # noqa: E721 + entab1 = db.enumerate(tab, sym_file='mysym') + assert 20 == entab1['sym'].t + assert 'mysym' == kx.q.key(entab1['sym']) + assert type(kx.q.value(entab1['sym'])) == kx.SymbolVector # noqa: E721 + + +def test_partition_count(kx): + db = kx.DB(path='db') + fullview = db.partition_count() + assert type(fullview) == kx.Dictionary # noqa: E721 + assert 2 == len(fullview) + subview = db.partition_count(subview=kx.q('2015.01.02')) + assert type(subview) == kx.Dictionary # noqa: E721 + assert 1 == len(subview) + + +def test_subview(kx): + db = kx.DB(path='db') + db.subview([kx.q('2015.01.01')]) + qtab = kx.q.qsql.select(db.trades) + assert type(qtab) == kx.Table # noqa: E721 + assert 2 == len(qtab) + db.subview() + qtab = kx.q.qsql.select(db.trades) + assert type(qtab) == kx.Table # noqa: E721 + assert 4 == len(qtab) + + +@pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_beta(): + import pykx as kx + with pytest.raises(kx.QError) as err: + kx.DB() + assert 'Attempting to use a beta feature "Data' in str(err.value) + + +def test_splay(kx): + os.mkdir('splay') + kx.q['tab'] = kx.Table(data={ + 'x': kx.random.random(100, ['a', 'b', 'c']), + 'x1': kx.random.random(100, 1.0), + 'x2': kx.random.random(100, 10) + }) + kx.q('`:./splay/tab set .Q.en[`:./splay;tab]') + db = kx.DB() + db.load('splay') + assert type(db.tab) == kx.Table # noqa: E721 + with pytest.raises(kx.QError) as err: + db.rename_column('tab', 'x', 'x3') + shutil.rmtree('../splay') + assert 'Application of Database Management functionality' in str(err.value) + + +@pytest.mark.order(-1) +def test_cleanup(kx): + shutil.rmtree('db') + assert True diff --git a/tests/test_license.py b/tests/test_license.py index 0f8df89..2dd6045 100644 --- a/tests/test_license.py +++ b/tests/test_license.py @@ -17,6 +17,10 @@ def test_initialization_using_unlicensed_mode(tmp_path, q): assert 2 == kx.toq(2).py() +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_fallback_to_unlicensed_mode_error(tmp_path): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) os.environ['QARGS'] = '--licensed' @@ -25,6 +29,10 @@ def test_fallback_to_unlicensed_mode_error(tmp_path): import pykx # noqa: F401 +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_unlicensed_signup(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) inputs = iter(['N']) @@ -34,6 +42,10 @@ def test_unlicensed_signup(tmp_path, monkeypatch): assert not kx.licensed +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_invalid_lic_continue(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) inputs = iter(['F']) @@ -44,6 +56,10 @@ def test_invalid_lic_continue(tmp_path, monkeypatch): assert str(e) == 'Invalid input provided please try again' +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_licensed_signup_no_file(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) inputs = iter(['Y', 'n', '1', '/test/test.blah']) @@ -54,6 +70,10 @@ def test_licensed_signup_no_file(tmp_path, monkeypatch): assert str(e) == "Download location provided /test/test.blah does not exist." +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_licensed_signup_invalid_b64(tmp_path, monkeypatch): os.environ['QLIC'] = os.environ['QHOME'] = str(tmp_path.absolute()) inputs = iter(['Y', 'n', '2', 'data:image/png;test']) @@ -66,6 +86,10 @@ def test_licensed_signup_invalid_b64(tmp_path, monkeypatch): assert str(e) == err_msg +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_licensed_success_file(monkeypatch): qhome_path = os.environ['QHOME'] os.unsetenv('QLIC') @@ -78,6 +102,10 @@ def test_licensed_success_file(monkeypatch): assert [0, 1, 2, 3, 4] == kx.q.til(5).py() +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_licensed_success_b64(monkeypatch): qhome_path = os.environ['QHOME'] os.unsetenv('QLIC') @@ -92,6 +120,19 @@ def test_licensed_success_b64(monkeypatch): assert [0, 1, 2, 3, 4] == kx.q.til(5).py() +def test_envvar_init(): + qhome_path = os.environ['QHOME'] + os.unsetenv('QLIC') + os.unsetenv('QHOME') + with open(qhome_path + '/kc.lic', 'rb') as f: + license_content = base64.encodebytes(f.read()) + os.environ['KDB_LICENSE_B64'] = license_content.decode('utf-8') + + import pykx as kx + assert kx.licensed + assert [0, 1, 2, 3, 4] == kx.q.til(5).py() + + @pytest.mark.parametrize( argnames='QARGS', argvalues=[ @@ -136,10 +177,18 @@ def test_check_license_format(kx): assert str(e) == 'Unsupported option provided for format parameter' +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_check_license_success_file(kx): assert kx.license.check(os.environ['QHOME'] + '/kc.lic') +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_check_license_success_b64(kx): with open(os.environ['QHOME'] + '/kc.lic', 'rb') as f: license = base64.encodebytes(f.read()) diff --git a/tests/test_pandas_agg.py b/tests/test_pandas_agg.py index d82c7e4..7077f8a 100644 --- a/tests/test_pandas_agg.py +++ b/tests/test_pandas_agg.py @@ -1,10 +1,16 @@ """Tests for the Pandas API agg functionality""" +import os + import pytest import statistics +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_single_func(q, kx): tab = q('([] til 10; 1)') gtab = q('([]sym:`a`a`a`b`b;x:1 2 2 3 3)') @@ -28,6 +34,10 @@ def mode(x): assert q('{x~y}', mode_gtab, mode_gdf) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_list_funcs(q, kx): tab = q('([] til 10; 1)') gtab = q('([]sym:`a`a`a`b`b;x:1 2 2 3 3)') @@ -48,6 +58,10 @@ def mode(x): assert q('{x~y}', lst_gdf['x']['mode'], lst_gtab['mode'].values()['x']) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_dict_funcs(q, kx): tab = q('([] til 10; 1)') dict_str = tab.agg({'x': 'min', 'x1': 'max'}) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 4ea8076..6cae5a8 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1,6 +1,7 @@ """Tests for the Pandas API.""" import sys +import os import numpy as np import pandas as pd @@ -234,6 +235,16 @@ def test_df_loc_set(kx, q): df.loc[df['z'] == 'a', 'y', 'z'] = 99 +def test_df_loc_err(kx, q): + df = kx.Table(data={'a': [1, 2, 3]}) + with pytest.raises(kx.QError) as err: + df['b'] + assert 'inaccessible column: b' in str(err.value) + with pytest.raises(kx.QError) as err: + df[['a', 'b', 'c']] + assert "inaccessible columns: ['b', 'c']" in str(err.value) + + def test_df_set_cols(kx, q): qtab = q('([]til 10;10?1f;10?100)') df = qtab @@ -1134,6 +1145,27 @@ def test_df_select_dtypes(kx, q): exclude=[kx.SymbolVector]).py(), q('([] c2:1 2 3h; c3:1 2 3j; c4:1 2 3i)').py() ) + assert check_result_and_type( + kx, + df.select_dtypes(include=[kx.ShortAtom, kx.LongAtom]).py(), + q('([] c2:1 2 3h; c3:1 2 3j)').py() + ) + assert check_result_and_type( + kx, + df.select_dtypes(exclude='kx.LongAtom').py(), + q('([] c1:`a`b`c; c2:1 2 3h; c4:1 2 3i)').py() + ) + df = q('([] c1:"abc";c2:(1 2 3;4 5 6;7 8 9);c3:("abc";"abc";"abc"))') + assert check_result_and_type( + kx, + df.select_dtypes(exclude='kx.List').py(), + q('([] c1:"abc")').py() + ) + assert check_result_and_type( + kx, + df.select_dtypes(include='kx.List').py(), + q('([] c2:(1 2 3;4 5 6;7 8 9);c3:("abc";"abc";"abc"))').py() + ) def test_df_select_dtypes_errors(kx, q): @@ -1145,6 +1177,16 @@ def test_df_select_dtypes_errors(kx, q): " have overlapping elements"): df.select_dtypes(include='kx.LongVector', exclude='kx.LongVector') + with pytest.raises(Exception, match=r"'CharVector' not supported." + " Use 'CharAtom' for columns of char atoms." + " 'kx.List' will include any columns containing" + " mixed list data."): + df.select_dtypes(include='kx.CharVector') + with pytest.raises(Exception, match=r"'CharVector' not supported." + " Use 'CharAtom' for columns of char atoms." + " 'kx.List' will exclude any columns containing" + " mixed list data."): + df.select_dtypes(exclude='kx.CharVector') def test_df_drop(kx, q): @@ -1375,11 +1417,34 @@ def test_df_rename(kx, q): # assert(q('{x~y}', rez, kt.pd().rename(idx))) # {x~y}=1b because of some q attribute assert(all(rez.pd().eq(kt.pd().rename(idx)))) + idx = {0: 'foo', 5: 'bar'} + rez = kt.rename(idx, axis=0) + # assert(q('{x~y}', rez, kt.pd().rename(idx))) # {x~y}=1b because of some q attribute + assert(all(rez.pd().eq(kt.pd().rename(idx)))) + idx = {0: 'foo', 5: 'bar'} rez = kt.rename(index=idx) # assert(q('{x~y}', rez, kt.pd().rename(index=idx))) # {x~y}=1b because of some q attribute assert(all(rez.pd().eq(kt.pd().rename(index=idx)))) + tab = kx.q('([] Policy: 1 2 3)') + + rez = tab.rename({'Policy': 'PolicyID'}, axis=1) + assert all(tab.pd().rename({'Policy': 'PolicyID'}, axis=1).eq(rez.pd())) + + tab = kx.KeyedTable(data=tab) + + idx = {'A': 0, 0: 'a', 'B': 'b'} + rez = tab.rename(idx) + assert all(tab.pd().rename(idx).eq(rez.pd())) + + rez = tab.rename({2: 'B'}) + assert all(tab.pd().rename({2: 'B'}).eq(rez.pd())) + + with pytest.raises(ValueError): + idx = {'A': 0, 0: 'a', 'B': 'b'} + tab.rename(columns=idx) + with pytest.raises(ValueError): t.rename() @@ -1937,6 +2002,10 @@ def test_pandas_groupby_errors(kx, q): tab.groupby(level=[0, 4]) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_pandas_groupby(kx, q): df = pd.DataFrame( { @@ -2067,15 +2136,15 @@ def test_df_add_prefix(kx, q): q_add_prefix = t.add_prefix("col_", axis=1) - assert(q('~', q_add_prefix, t.pd().add_prefix("col_", axis=1))) + assert(q('~', q_add_prefix, t.pd().add_prefix("col_"))) kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_prefix = kt.add_prefix("col_", axis=1) - assert(q('~', q_add_prefix, kt.pd().add_prefix("col_", axis=1))) + assert(q('~', q_add_prefix, kt.pd().add_prefix("col_"))) with pytest.raises(ValueError) as err: - t.add_prefix("col_", axis=0) + t.set_index('x').add_prefix("col_", axis=0) assert 'nyi' in str(err) with pytest.raises(ValueError) as err: @@ -2086,17 +2155,17 @@ def test_df_add_prefix(kx, q): def test_df_add_suffix(kx, q): t = q('([] til 5; 5?5; 5?1f; (5;5)#100?" ")') - q_add_suffix = t.add_suffix("_col", axis=1) + q_add_suffix = t.add_suffix("_col") - assert(q('~', q_add_suffix, t.pd().add_suffix("_col", axis=1))) + assert(q('~', q_add_suffix, t.pd().add_suffix("_col"))) kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_suffix = kt.add_suffix("_col", axis=1) - assert(q('~', q_add_suffix, kt.pd().add_suffix("_col", axis=1))) + assert(q('~', q_add_suffix, kt.pd().add_suffix("_col"))) with pytest.raises(ValueError) as err: - t.add_suffix("_col", axis=0) + t.set_index('x').add_suffix("_col", axis=0) assert 'nyi' in str(err) with pytest.raises(ValueError) as err: diff --git a/tests/test_pandas_apply.py b/tests/test_pandas_apply.py index 2680b86..9b39f9f 100644 --- a/tests/test_pandas_apply.py +++ b/tests/test_pandas_apply.py @@ -1,5 +1,7 @@ """Tests for the Pandas API apply functionality""" +import os + import numpy as np import pytest @@ -72,6 +74,10 @@ def test_sum_axis_1_col_2(q, kx): assert all(sum_data == q('1+til 10')) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_add_axis_0_cols_1(q, kx): tab = q('([] til 10)') @@ -84,6 +90,10 @@ def add_1(x): assert all(add_data == kx.toq(tab.pd().apply(add_1))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_add_axis_0_cols_2(q, kx): tab = q('([] til 10; 1)') @@ -96,6 +106,10 @@ def add_1(x): assert q('{all raze x}', add_data.values() == q('(1+til 10;10#2)')) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_add_axis_1_cols_1(q, kx): tab = q('([] til 10)') @@ -107,6 +121,10 @@ def add_1(x): assert all(add_data == kx.toq(tab.pd().apply(add_1, axis=1))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_add_axis_1_cols_2(q, kx): tab = q('([] til 10; 1)') @@ -119,6 +137,10 @@ def add_1(x): assert q('{all raze x}', add_data.values() == q('(1+til 10;10#2)')) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_sum_axis_0_cols_1(q, kx): tab = q('([] til 10)') sum_data = tab.apply(np.sum) @@ -128,6 +150,10 @@ def test_py_sum_axis_0_cols_1(q, kx): assert all(sum_data == kx.toq(tab.pd().apply(np.sum))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_sum_axis_1_cols_1(q, kx): tab = q('([] til 10)') sum_data = tab.apply(np.sum, axis=1) @@ -136,6 +162,10 @@ def test_py_sum_axis_1_cols_1(q, kx): assert all(sum_data == kx.toq(tab.pd().apply(np.sum, axis=1))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_sum_axis_0_cols_2(q, kx): tab = q('([] til 10; 1)') sum_data = tab.apply(np.sum) @@ -145,6 +175,10 @@ def test_py_sum_axis_0_cols_2(q, kx): assert all(sum_data == kx.toq(tab.pd().apply(np.sum))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_sum_axis_1_cols_2(q, kx): tab = q('([] til 10; 1)') sum_data = tab.apply(np.sum, axis=1) @@ -153,6 +187,10 @@ def test_py_sum_axis_1_cols_2(q, kx): assert all(sum_data == kx.toq(tab.pd().apply(np.sum, axis=1))) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_py_args(q, kx): tab = q('([] til 10; 1)') diff --git a/tests/test_pykx.py b/tests/test_pykx.py index 553ab55..06f38f0 100644 --- a/tests/test_pykx.py +++ b/tests/test_pykx.py @@ -67,6 +67,10 @@ def test_qinit_startup(): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_unlicensed_env(): os.environ['PYKX_UNLICENSED'] = 'true' import pykx as kx @@ -75,16 +79,24 @@ def test_unlicensed_env(): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_qinit_qq_startup(): # PyKX would not initialise appropriately if q.q exists in QHOME containing a show statement shutil.copy('tests/qinit.q', os.environ['QHOME']+'/q.q') import pykx as kx - os.remove(os.environ['QHOME']+'/q.q') + try_clean(os.environ['QHOME']+'/q.q') assert kx.q('2 + 2') == 4 @disposable_env_only @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_QHOME_symlinks(): # This logic to get QHOME is copied from `pykx.config`, since we can't use `pykx.qhome` until # after PyKX has been imported, but that would ruin the test. @@ -198,6 +210,10 @@ def test_top_level_attributes(kx): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_q_lock_error_instant(): os.environ['PYKX_RELEASE_GIL'] = '1' os.environ['PYKX_Q_LOCK'] = '0' @@ -222,6 +238,40 @@ def test_pykx_safe_reimport(): assert output.split('\n')[-1] == "0 1 2 3 4 5 6 7 8 9" +@pytest.mark.isolate +def test_pykx_sigkill(): + returncode = subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', 'import pykx as kx; import os; import signal; pid = os.getpid(); os.kill(pid, signal.SIGKILL)'), # noqa: E501 + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ).returncode + assert returncode == -9 + + +@pytest.mark.isolate +@pytest.mark.xfail(reason='Local testing shows appropriate behaviour') +def test_pykx_sigint(): + output = subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', 'import pykx as kx; import os; import signal; pid = os.getpid(); os.kill(pid, signal.SIGINT)'), # noqa: E501 + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ).stdout.strip() + assert output.split('\n')[-1] == 'KeyboardInterrupt' + + +@pytest.mark.isolate +def test_pykx_sigterm(): + returncode = subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', 'import pykx as kx; import os; import signal; pid = os.getpid(); os.kill(pid, signal.SIGTERM)'), # noqa: E501 + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ).returncode + assert returncode == -15 + + @pytest.mark.isolate def test_pykx_star(): output = subprocess.run( @@ -231,3 +281,62 @@ def test_pykx_star(): text=True, ).stdout.strip() assert output.split('\n')[-1] == "0 1 2 3 4 5 6 7 8 9" + + +@pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_pykx_stdout_stderr(): + output = subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', + 'import pykx;pykx.q(\'\\l tests/qscripts/test_stdout_stderr.q\')'), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ).stdout.strip() + output = output.split('\n') + output = [x for x in output if x[:13] == 'stdouterrtest'] + assert (lambda x: ([int(i.split(',')[1]) for i in x]))(output) == list(range(1, 23)) + + +@pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_pykx_stdout_stderr_under_q(): + subprocess.run( + (str(Path(sys.executable).as_posix()), '-c', + 'import pykx;pykx.install_into_QHOME()'), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + output = subprocess.run( + ('q', 'tests/qscripts/test_stdout_stderr.q', '-q'), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ).stdout.strip() + output = output.split('\n') + output = [x for x in output if x[:13] == 'stdouterrtest'] + assert (lambda x: ([int(i.split(',')[1]) for i in x]))(output) == list(range(1, 23)) + + +@pytest.mark.isolate +def test_compressed_enum_segfault(kx): + import tempfile + temp_dir = tempfile.TemporaryDirectory() + kx.q('{`base set hsym x}', temp_dir.name) + kx.q('.z.zd:17 1 0') + kx.q('n:10000') + kx.q('trade:([] sym:`$string til n)') + kx.q('.Q.dpft[base;;`sym;`trade] each 2020.01.01 + til 100') + kx.q('system"l ",1_ string base') + kx.q('select from trade') + + +def test_is_enabled(kx): + assert kx.config.ignore_qhome is False diff --git a/tests/test_q.py b/tests/test_q.py index 2f41d1d..5975913 100644 --- a/tests/test_q.py +++ b/tests/test_q.py @@ -55,6 +55,20 @@ def test_setitem(q, kx): q['views'] = 'views' # check element of the .q namespace +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_setitem_func(kx): + def func(n=2): + return n + + kx.q['func']= func + assert 1 == kx.q('func', 1) + assert '' == kx.q('func', '') + assert '.' == kx.q('func', '.') + + @pytest.mark.ipc def test_delitem(q, kx): key = 'test_key' @@ -125,6 +139,10 @@ def test_get_q_singleton_from_class(kx, q): @pytest.mark.unlicensed(unlicensed_only=True) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_unlicensed_call(kx): with pytest.raises(kx.LicenseException, match=f"(?i)run q code via '{kx.q!r}'"): kx.q('{[xn] xn-((xn*xn)-2)%2*xn}\\[1.5]') @@ -152,6 +170,10 @@ def test_large_vector(q): assert q('sum', v).py() == 225179981032980480 +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_path_arguments(q): # KXI-30172: Projections of PyKX functions don't support Path a = q("{[f;x] f x}")(lambda x: x)(Path('test')) diff --git a/tests/test_q_foreign.py b/tests/test_q_foreign.py index da3d171..e484590 100644 --- a/tests/test_q_foreign.py +++ b/tests/test_q_foreign.py @@ -1,5 +1,6 @@ import pytest +import os import sys from platform import system @@ -35,6 +36,10 @@ def __init__(self, x, y): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_refcount_under_q(): if system() != 'Windows': import pykx as kx @@ -129,6 +134,10 @@ def generator(i): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_under_q(): import pykx as kx if system() != 'Windows': @@ -157,6 +166,10 @@ def __init__(self, x): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_class_under_q(): if system() != 'Windows': import pykx as kx @@ -196,6 +209,10 @@ def __repr__(self): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_functions_under_q(): if system() != 'Windows': import pykx as kx @@ -241,6 +258,10 @@ def args_kwargs(*args, **kwargs): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_setattr_under_q(kx, q): if system() != 'Windows': import pykx as kx @@ -262,6 +283,10 @@ def __init__(self, x): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_foreign_setattr_arrow_under_q(q, kx, pa): if system() != 'Windows': import pykx as kx diff --git a/tests/test_query.py b/tests/test_query.py index f39eb5e..270141c 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -1,9 +1,11 @@ +from tempfile import TemporaryDirectory + # Do not import pykx here - use the `kx` fixture instead! import pytest @pytest.mark.ipc -def test_select(q): +def test_select(kx, q): # python q object based on memory location qtab = q('([]col1:100?`a`b`c;col2:100?1f;col3:100?5)') qbool = q('100?0b') @@ -36,6 +38,45 @@ def test_select(q): ).py() with pytest.raises(TypeError): q.qsql.select([1, 2, 3]).py() + with pytest.raises(kx.QError) as err: + q.qsql.select(qtab, + columns={'col2': 'col2', 'col3': 'col3'}, + where=['col3<0.5', 'col2>0.7'], + by={'col1': 'col1'}, + inplace=True + ).py() + assert 'Returned data format does not match' in str(err) + q.qsql.select(qtab, + columns={'col2': 'col2', 'col3': 'col3'}, + where=['col3<0.5', 'col2>0.7'], + inplace=True + ) + assert q('select col2, col3 from qtab where col3<0.5,col2>0.7').py() ==\ + qtab.py() + + +def test_partitioned_query(kx, q): + with TemporaryDirectory() as tmp_dir: + db = kx.DB(path=tmp_dir) + N = 1000 + qtab = kx.Table(data={ + 'date': kx.q.asc(kx.random.random(N, kx.q('2020.01.01 2020.01.02 2020.01.03'))), + 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + 'price': kx.random.random(N, 10.0), + 'size': kx.random.random(N, 100) + }) + db.create(qtab, 'qtable', 'date') + with pytest.raises(kx.QError) as err: + kx.q.qsql.select(db.qtable, where=['sym=`AAPL'], inplace=True) + assert "Application of 'inplace' updates not supported" in str(err) + + with pytest.raises(kx.QError) as err: + kx.q.qsql.delete(db.qtable, where=['sym=`AAPL'], inplace=True) + assert "Application of 'inplace' updates not supported" in str(err) + + with pytest.raises(kx.QError) as err: + kx.q.qsql.update(db.qtable, where=['sym=`AAPL'], inplace=True) + assert "Application of 'inplace' updates not supported" in str(err) @pytest.mark.asyncio @@ -81,6 +122,14 @@ async def test_select_async(kx, q_port): where=['col3<0.5', 'col2>0.7'] )).py() + with pytest.raises(kx.QError) as err: + await (q.qsql.select(qtab, + columns={'col2': 'col2', 'col3': 'col3'}, + where=['col3<0.5', 'col2>0.7'], + inplace=True + )) + assert "'inplace' not supported" in str(err) + @pytest.mark.ipc def test_exec(q): @@ -162,13 +211,17 @@ def test_update(q): q['byqtab'] = byqtab assert q('update avg weight by city from byqtab').py() ==\ q.qsql.update(byqtab, {'weight': 'avg weight'}, by={'city': 'city'}).py() - q.qsql.update('byqtab', columns={'weight': 'avg weight'}, by={'city': 'city'}, modify=True) - with pytest.raises(TypeError): - q.qsql.update(byqtab, columns={'weight': 'avg weight'}, by={'city': 'city'}, modify=True) + with pytest.raises(TypeError): - q.qsql.update([1, 2, 3], columns={'weight': 'avg weight'}, by={'city': 'city'}, modify=True) - assert q['byqtab'].py() == \ - q.qsql.update(byqtab, {'weight': 'avg weight'}, by={'city': 'city'}).py() + q.qsql.update([1, 2, 3], columns={'weight': 'avg weight'}, by={'city': 'city'}, inplace=True) # noqa: E501 + + q.qsql.update('byqtab', columns={'weight': 'max weight'}, by={'city': 'city'}, inplace=True) + q.qsql.update(byqtab, columns={'weight': 'max weight'}, by={'city': 'city'}, inplace=True) + assert q['byqtab'].py() == byqtab.py() + + with pytest.raises(RuntimeError) as err: + q.qsql.update(qtab, columns={'newcol': 'weight'}, modify=True, inplace=True) + assert 'Attempting to use both' in str(err) @pytest.mark.asyncio @@ -192,19 +245,19 @@ async def test_update_async(kx, q_port): 'color:100?`red`green`blue;weight:0.5*100?20;' 'city:100?`london`paris`rome)') q['byqtab'] = byqtab - assert (await q('update avg weight by city from byqtab')).py() ==\ - (await q.qsql.update(byqtab, {'weight': 'avg weight'}, by={'city': 'city'})).py() + assert (await q('update max weight by city from byqtab')).py() ==\ + (await q.qsql.update(byqtab, {'weight': 'max weight'}, by={'city': 'city'})).py() await q.qsql.update('byqtab', - columns={'weight': 'avg weight'}, + columns={'weight': 'max weight'}, by={'city': 'city'}, - modify=True) + inplace=True) with pytest.raises(TypeError): await q.qsql.update(byqtab, - columns={'weight': 'avg weight'}, + columns={'weight': 'max weight'}, by={'city': 'city'}, - modify=True) + inplace=True) assert q['byqtab'].py() == \ - (await q.qsql.update(byqtab, {'weight': 'avg weight'}, by={'city': 'city'})).py() + (await q.qsql.update(byqtab, {'weight': 'max weight'}, by={'city': 'city'})).py() @pytest.mark.ipc @@ -221,14 +274,13 @@ def test_delete(q): assert q('delete from qtab where hair=`fair').py() == \ q.qsql.delete(qtab, where='hair=`fair').py() assert q('delete from qtab where qbool').py() - q.qsql.delete('q-tab', where='hair=`fair', modify=True) - with pytest.raises(TypeError): - q.qsql.delete(qtab, where='hair=`fair', modify=True) + q.qsql.delete('q-tab', where='hair=`fair', inplace=True) + q.qsql.delete(qtab, where='hair=`fair', inplace=True) + assert q['q-tab'].py() == qtab.py() with pytest.raises(TypeError): q.qsql.delete('q-tab', where='hair=`fair', columns=['age']) with pytest.raises(TypeError): q.qsql.delete([1, 2, 3], where='hair=`fair', columns=['age']) - assert q['q-tab'].py() == q('delete from qtab where hair=`fair').py() @pytest.mark.asyncio @@ -249,9 +301,9 @@ async def test_delete_async(kx, q_port): assert (await q('delete from qtab where hair=`fair')).py() == \ (await q.qsql.delete(qtab, where='hair=`fair')).py() assert (await q('delete from qtab where qbool')).py() - await q.qsql.delete('q-tab', where='hair=`fair', modify=True) + await q.qsql.delete('q-tab', where='hair=`fair', inplace=True) with pytest.raises(TypeError): - await q.qsql.delete(qtab, where='hair=`fair', modify=True) + await q.qsql.delete(qtab, where='hair=`fair', inplace=True) with pytest.raises(TypeError): await q.qsql.delete('q-tab', where='hair=`fair', columns=['age']) assert q['q-tab'].py() == (await q('delete from qtab where hair=`fair')).py() @@ -310,7 +362,10 @@ def test_table_insert_method(q): qtab = q('([] a: 1 2 3 4 5; b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)') q_inserted_tab = q('([] a: 1 2 3 4 5 6; b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)') - assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + with pytest.warns(DeprecationWarning, + match=r"Keyword 'replace_self' is deprecated please use 'inplace'"): + assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + assert qtab.insert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py() assert qtab.py() != q_inserted_tab.py() qtab.insert([6, 6.0, 'f']) @@ -322,7 +377,10 @@ def test_table_upsert_method(q): qtab = q('([] a: 1 2 3 4 5; b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)') q_inserted_tab = q('([] a: 1 2 3 4 5 6; b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)') - assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + with pytest.warns(DeprecationWarning, + match=r"Keyword 'replace_self' is deprecated please use 'inplace'"): + assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + assert qtab.upsert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py() assert qtab.py() != q_inserted_tab.py() qtab.upsert([6, 6.0, 'f']) @@ -334,7 +392,10 @@ def test_keyed_table_insert_method(q): qtab = q('([a: 1 2 3 4 5] b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)') q_inserted_tab = q('([a: 1 2 3 4 5 6] b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)') - assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + with pytest.warns(DeprecationWarning, + match=r"Keyword 'replace_self' is deprecated please use 'inplace'"): + assert qtab.insert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + assert qtab.insert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py() assert qtab.py() != q_inserted_tab.py() qtab.insert([6, 6.0, 'f']) @@ -346,7 +407,10 @@ def test_keyed_table_upsert_method(q): qtab = q('([a: 1 2 3 4 5] b: 1.0 2.0 3.0 4.0 5.0; c: `a`b`c`d`e)') q_inserted_tab = q('([a: 1 2 3 4 5 6] b: 1.0 2.0 3.0 4.0 5.0 6.0; c: `a`b`c`d`e`f)') - assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + with pytest.warns(DeprecationWarning, + match=r"Keyword 'replace_self' is deprecated please use 'inplace'"): + assert qtab.upsert([6, 6.0, 'f'], replace_self=False).py() == q_inserted_tab.py() + assert qtab.upsert([6, 6.0, 'f'], inplace=False).py() == q_inserted_tab.py() assert qtab.py() != q_inserted_tab.py() qtab.upsert([6, 6.0, 'f']) diff --git a/tests/test_register.py b/tests/test_register.py index 67faf65..2920dc1 100644 --- a/tests/test_register.py +++ b/tests/test_register.py @@ -6,7 +6,9 @@ def test_register_py_toq(q, kx): with pytest.raises(TypeError) as err_info: kx.toq(complex(1, 2)) - assert str(err_info.value) == "Cannot convert '(1+2j)' to K object" + assert str(err_info.value) == ( + "Cannot convert '(1+2j)' to K object." + " See pykx.register to register custom conversions.") def complex_toq(data): return kx.toq([data.real, data.imag]) diff --git a/tests/test_reimport.py b/tests/test_reimport.py new file mode 100644 index 0000000..08e1d46 --- /dev/null +++ b/tests/test_reimport.py @@ -0,0 +1,15 @@ +# Do not import pykx here - use the `kx` fixture instead! +import subprocess + + +def test_reimport(kx): + with kx.PyKXReimport(): + ret = subprocess.run(["python", "-c", "import pykx"]) + assert 0 == ret.returncode + + +def test_reimport_kdefault(kx): + with kx.PyKXReimport(): + ret = subprocess.run( + ["python", "-c", "import os;os.environ['PYKX_DEFAULT_CONVERSION']='k';import pykx"]) + assert 0 == ret.returncode diff --git a/tests/test_remote.py b/tests/test_remote.py new file mode 100644 index 0000000..b825d06 --- /dev/null +++ b/tests/test_remote.py @@ -0,0 +1,96 @@ +# Do not import pykx here - use the `kx` fixture instead! +import pytest + + +def test_session_create_clear(kx, q_port): + session = kx.remote.session() + assert session._session is None + session.create(port=q_port) + assert isinstance(session._session, kx.SyncQConnection) + session.clear() + assert session._session is None + + +def test_library_add_clear(kx, q_port): + session = kx.remote.session() + session.create(port=q_port) + assert session._libraries == [] + session.add_library('numpy', 'pandas') + assert session._libraries == ['numpy', 'pandas'] + session.clear() + assert session._libraries == [] + + +def test_session_errors(kx, q_port): + session = kx.remote.session() + with pytest.raises(Exception) as err: + session.add_library('numpy') + assert 'Unable to add packages in the absence' in str(err.value) + session.create(port=q_port) + with pytest.raises(Exception) as err: + session.create(port=q_port) + assert 'Active session in progress' in str(err.value) + + +@pytest.mark.xfail(reason="KXI-36200", strict=False) +@pytest.mark.unlicensed +def test_remote_exec(kx, q_port): + session = kx.remote.session() + session.create(port=q_port) + + @kx.remote.function(session) + def func(x): + return x+1 + assert kx.q('2') == func(1) + + +@pytest.mark.xfail(reason="KXI-36200", strict=False) +@pytest.mark.unlicensed +def test_remote_library_exec(kx, q_port): + session = kx.remote.session() + session.create(port=q_port) + session.add_library('pykx') + + @kx.remote.function(session) + def pykx_func(x, y): + return pykx.q.til(x) + y # noqa: F821 + assert kx.q('5+til 5') == pykx_func(5, 5) + + +@pytest.mark.xfail(reason="KXI-36200", strict=False) +@pytest.mark.unlicensed +def test_exec_failures(kx, q_port): + @kx.remote.function(10) + def test_func(x): + return x+1 + with pytest.raises(Exception) as err: + test_func(1) + assert 'Supplied remote_session instance must be' in str(err.value) + + session = kx.remote.session() + + @kx.remote.function(session) + def test_func(x): + return x+1 + with pytest.raises(Exception) as err: + test_func(2) + assert "User session must be generated using the 'create_session'" in str(err.value) + + session = kx.remote.session() + session.create(port=q_port) + + @kx.remote.function(session) + def test_func(x): + return numpy.array([x.py()]) # noqa: F821 + with pytest.raises(kx.exceptions.QError) as err: + test_func(10) + assert "name 'numpy' is not defined" in str(err.value) + + session.add_library('undefined') + + @kx.remote.function(session) + def test_func(x): + return x+1 + with pytest.raises(kx.exception.QError) as err: + test_func(1) + assert "Failed to load package: undefined" in str(err.value) diff --git a/tests/test_reset_index.py b/tests/test_reset_index.py new file mode 100644 index 0000000..4d166d1 --- /dev/null +++ b/tests/test_reset_index.py @@ -0,0 +1,67 @@ + +"""Tests for the Pandas API set_index.""" +import pytest + + +def test_reset_index_single(q): + df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)') + assert q('~', df, df.set_index('x').reset_index()) + + +def test_reset_index_multi(q): + df = q('([] x: til 10; y: 10 - til 10; z: 10?`a`b`c)') + assert q('~', df, df.set_index(['x', 'y']).reset_index()) + + +def test_reset_int(q): + df = q('([x: til 10; y: 10 - til 10]z: 10?`a`b`c)') + pddf = df.pd() + assert q('~', df.reset_index(0), pddf.reset_index(0)) + assert q('~', df.reset_index(1), pddf.reset_index(1)) + assert q('~', df.reset_index([0, 1]), pddf.reset_index([0, 1])) + + +def test_reset_str(q): + df = q('([x: til 10; y: 10 - til 10]z: 10?`a`b`c)') + pddf = df.pd() + assert q('~', df.reset_index('x'), pddf.reset_index('x')) + assert q('~', df.reset_index('y'), pddf.reset_index('y')) + assert q('~', df.reset_index(['x', 'y']), pddf.reset_index(['x', 'y'])) + + +def test_reset_drop(q): + df = q('([x: til 10; y: 10 - til 10]z: 10?`a`b`c)') + pddf = df.pd() + assert q('~', df.reset_index('x', drop=True), pddf.reset_index('x', drop=True)) + assert q('~', df.reset_index('y', drop=True), pddf.reset_index('y', drop=True)) + assert q('~', df.reset_index(['x', 'y'], drop=True), pddf.reset_index(['x', 'y'], drop=True)) + + +def test_reset_duplicates(kx, q): + df = q('([til 10;10?1f];10?1f;10?1f)') + assert q('~', df.reset_index(allow_duplicates=True), q('0!', df)) + with pytest.raises(kx.QError) as err: + df.reset_index() + assert 'Cannot reset index' in str(err) + + +def test_reset_errors(kx, q): + df = q('([til 10;10?1f];10?1f;10?1f)') + with pytest.raises(kx.QError) as err: + df.reset_index(col_level=1) + assert "'col_level' not presently" in str(err) + with pytest.raises(kx.QError) as err: + df.reset_index(col_fill=1) + assert "'col_fill' not presently" in str(err) + with pytest.raises(kx.QError) as err: + df.reset_index(names=['a', 'b']) + assert "'names' not presently" in str(err) + with pytest.raises(TypeError) as err: + df.reset_index(levels=1.0, allow_duplicates=True) + assert "Unsupported type provided for 'levels'" in str(err) + with pytest.raises(kx.QError) as err: + df.reset_index('missing_col', allow_duplicates=True) + assert "Key(s) missing_col not found" in str(err) + with pytest.raises(kx.QError) as err: + df.reset_index(10, allow_duplicates=True) + assert 'out of range' in str(err) diff --git a/tests/test_system.py b/tests/test_system.py index 6f2da19..b9d47cb 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -18,6 +18,10 @@ def test_qargs_s_flag(num_threads): @pytest.mark.isolate +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_qargs_s_flag_missing(): os.environ['QARGS'] = '--licensed -s' with pytest.raises(Exception, match='ValueError: Missing argument for'): @@ -26,6 +30,10 @@ def test_qargs_s_flag_missing(): @pytest.mark.isolate @pytest.mark.parametrize('num_threads', (1.5, 1.0, 'hmmm')) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_qargs_s_flag_invalid(num_threads): os.environ['QARGS'] = f'--licensed -s {num_threads}' with pytest.raises(Exception, match='ValueError: Invalid argument for'): @@ -107,7 +115,8 @@ def test_system_variables(): assert all(kx.q.system.variables() == kx.q('enlist `a')) print(kx.q.system.variables('.pykx')) assert all(kx.q.system.variables('.pykx') == kx.q('`debug`i`pykxDir`pykxExecutable`util')) - assert all(kx.q.system.variables('pykx') == kx.q('`debug`i`pykxDir`pykxExecutable`util')) + kx.q('pykx.a:til 10;pykx.b:20') + assert all(kx.q.system.variables('pykx') == kx.q('`a`b')) @pytest.mark.isolate @@ -300,8 +309,10 @@ def test_system_variables_ipc(q_port): assert all(q.system.variables() == q('enlist `a')) q('.pykx.i: 5') q('.pykx.pykxDir: til 10') + q('pykx.a: til 10') + q('pykx.b: til 10') assert all(q.system.variables('.pykx') == q('`i`pykxDir')) - assert all(q.system.variables('pykx') == q('`i`pykxDir')) + assert all(q.system.variables('pykx') == q('`a`b')) @pytest.mark.isolate diff --git a/tests/test_toq.py b/tests/test_toq.py index f61fccc..5d744ee 100644 --- a/tests/test_toq.py +++ b/tests/test_toq.py @@ -1,10 +1,10 @@ -from datetime import date, datetime, timedelta +from datetime import date, datetime, time, timedelta from functools import partial import math import os from pathlib import Path from sys import getrefcount -from uuid import uuid4 +from uuid import UUID, uuid4 # Do not import Pandas, PyArrow, or PyKX here - use the pd/pa/kx fixtures instead! import numpy as np @@ -43,6 +43,10 @@ def q_atom_types(kx): @pytest.mark.unlicensed(unlicensed_only=True) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_adapt_k_unlicensed_error(kx): a = kx.toq(1.5) with pytest.raises(kx.LicenseException): @@ -291,6 +295,23 @@ def test_from_datetime_date(kx): kx.DatetimeAtom(d) +@pytest.mark.unlicensed +def test_from_datetime_time(kx): + t = time(15, 55, 23) + t_np = np.datetime64('2005-02-25T03:30') + + kd = kx.K(t) + assert isinstance(kd, kx.TimespanAtom) + assert kd.py() == timedelta(seconds=57323) + + kd_np = kx.toq.from_datetime_time(t_np, cast=True) + assert isinstance(kd_np, kx.TimespanAtom) + assert kd_np.py() == timedelta(seconds=12600) + + with pytest.raises(TypeError): + kx.TimeAtom(t_np) + + @pytest.mark.unlicensed def test_from_datetime_datetime(kx): d = datetime(2020, 9, 8, 7, 6, 5, 4) @@ -394,6 +415,79 @@ def test_from_UUID(kx): if kx.licensed: assert str(kx.K(u)) == str(u) + u = UUID('db712ca2-81b1-0080-95dd-7bdb502da77d') + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u)) == str(u) + assert kx.toq(u, kx.GUIDAtom).py() == kx.GUIDAtom(u).py() == u + if kx.licensed: + assert str(kx.K(u)) == str(u) + + u = UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2') + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u)) == str(u) + assert kx.toq(u, kx.GUIDAtom).py() == kx.GUIDAtom(u).py() == u + if kx.licensed: + assert str(kx.K(u)) == str(u) + + +@pytest.mark.unlicensed +def test_from_UUID_list(kx): + u = [UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')] + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).py() == kx.GUIDVector(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + + u = [UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')] + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).py() == kx.GUIDVector(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + + +@pytest.mark.unlicensed +def test_from_UUID_np_array(kx): + u = np.array([UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')], dtype=object) + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).py() == kx.GUIDVector(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + + u = np.array([UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')], dtype=object) + assert kx.K(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).py() == kx.GUIDVector(u).py() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + + +@pytest.mark.unlicensed +def test_to_UUID_np_array(kx): + u = np.array([UUID('db712ca2-81b1-0080-95dd-7bdb502da77d')], dtype=object) + assert kx.K(u).np() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).np() == kx.GUIDVector(u).np() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + + u = np.array([UUID('7ff0bbb9-ee32-42fe-9631-8c5a09a155a2')], dtype=object) + assert kx.K(u).np() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + assert kx.toq(u, kx.GUIDVector).np() == kx.GUIDVector(u).np() == u + if kx.licensed: + assert str(kx.K(u[0])) == str(u[0]) + @pytest.mark.unlicensed def test_from_tuple(kx): @@ -896,6 +990,11 @@ def test_from_pandas_series(kx, pd): @pytest.mark.nep49 +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +@pytest.mark.xfail(reason="KXI-33749", strict=False) def test_from_pandas_series_licensed(q, kx): float_vector = q('100?1f') assert all(float_vector == kx.K(float_vector.pd())) @@ -1037,7 +1136,8 @@ def test_toq_pa_tabular_ktype(q, kx, pa): @pytest.mark.unlicensed -def test_toq_dict_error(q, kx): +@pytest.mark.xfail(reason="Windows test execution fails intermittently with license error", strict=False) # noqa: E501 +def test_toq_dict_error(q, kx, pa): pdSeries = q('1 2 3').pd() with pytest.raises(TypeError, match=r"'ktype' .*"): kx.toq(pdSeries, {'x': kx.LongVector}) diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 5e4e4ac..0047c40 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -7,10 +7,12 @@ import gc import math from operator import index +import os import pickle from platform import python_implementation from textwrap import dedent from uuid import UUID +import itertools # Do not import Pandas, PyArrow, or PyKX here - use the pd/pa/kx fixtures instead! import numpy as np @@ -18,7 +20,6 @@ import pytz from packaging import version - pypy = python_implementation() == 'PyPy' @@ -64,6 +65,10 @@ def test_pykx_q_get(kx, q): @pytest.mark.embedded +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_pykx_q_getattr(kx, q): q("af:.pykx.eval\"type('Car', (object,), {'speed': 200, 'color': 'red'})\"") q('af[`:speed]`') @@ -158,6 +163,10 @@ def test_str(self, q): assert str(q('()')) == '' @pytest.mark.unlicensed(unlicensed_only=True) + @pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' + ) def test_repr_str_unlicensed(self, kx): x = kx.K([0, None, '', float('NaN')]) assert str(x) == repr(x) @@ -233,17 +242,204 @@ def test_equality(self, q): assert q('5') != None # noqa: E711 assert not q('5') == None # noqa: E711 + def test_slicing(self, q, kx): + test_vector = q('1 2 3') + assert test_vector[0].py() == test_vector.py()[0] + assert test_vector[-1].py() == test_vector.py()[-1] + assert test_vector[1:].py() == test_vector.py()[1:] + assert test_vector[-1:].py() == test_vector.py()[-1:] + assert test_vector[-2:-1].py() == test_vector.py()[-2:-1] + assert test_vector[-5:].py() == test_vector.py()[-5:] + assert test_vector[:-1].py() == test_vector.py()[:-1] + assert test_vector[:-3].py() == test_vector.py()[:-3] + assert test_vector[::1].py() == test_vector.py()[::1] + assert test_vector[::2].py() == test_vector.py()[::2] + assert test_vector[-1:5:2].py() == test_vector.py()[-1:5:2] + assert test_vector[::-1].py() == test_vector.py()[::-1] + with pytest.raises(ValueError) as err: + test_vector[::0] + assert 'slice step cannot be zero' in str(err) + + test_list = q('(1 2 3; 4 5 6)') + assert test_list[0].py() == test_list.py()[0] + assert test_list[-1].py() == test_list.py()[-1] + assert test_list[:6].py() == test_list.py()[:6] + with pytest.raises(ValueError) as err: + test_list[::0] + assert 'slice step cannot be zero' in str(err) + + test_table = q('([] a:1 2 3)') + assert all(test_table[1:].pd() == test_table.pd()[1:].reset_index(drop=True)) + assert all(test_table[-1:].pd() == test_table.pd()[-1:].reset_index(drop=True)) + with pytest.raises(ValueError) as err: + test_table[::0] + assert 'slice step cannot be zero' in str(err) + + test_table2 = q('([] a:1 2 3; b:4 5 6; c:7 8 9)') + assert all(test_table2[2:].pd() == test_table2.pd()[2:].reset_index(drop=True)) + assert all(test_table2[-2:].pd() == test_table2.pd()[-2:].reset_index(drop=True)) + assert all(test_table2[-3:].pd() == test_table2.pd()[-3:].reset_index(drop=True)) + assert test_table2[6:] == test_table2[10:] + assert all(test_table2[-4:].pd() == test_table2.pd()[-4:].reset_index(drop=True)) + assert all(test_table2[:4].pd() == test_table2.pd()[:4].reset_index(drop=True)) + assert all(test_table2[::1].pd() == test_table2.pd()[::1].reset_index(drop=True)) + assert all(test_table2[::2].pd() == test_table2.pd()[::2].reset_index(drop=True)) + assert all(test_table2[-1:5:2].pd() == test_table2.pd()[-1:5:2].reset_index(drop=True)) + assert all(test_table2[::-1].pd() == test_table2.pd()[::-1].reset_index(drop=True)) + assert test_table2[:-9] == q('sublist', 0, test_table2) + assert all(test_table2[:-1].pd() == test_table2.pd()[:-1]) + + empty_vector = q('`long$()') + assert empty_vector[1:] == empty_vector + assert empty_vector[-1:] == empty_vector + assert empty_vector[:1] == empty_vector + assert empty_vector[:-1] == empty_vector + + list_of_empties = q('(();();())') + assert (list_of_empties[1:] == q('(();())')).all() + assert (list_of_empties[-1:] == q('enlist ()')).all() + assert (list_of_empties[:1] == q('enlist ()')).all() + assert (list_of_empties[:-1] == q('(();())')).all() + + empty_table = q('([] a:(); b:(); c:())') + assert empty_table[1:] == empty_table + assert empty_table[-1:] == empty_table + assert empty_table[:1] == empty_table + assert empty_table[:-1] == empty_table + + vector_of_one = q('enlist 1') + assert vector_of_one[1:] == kx.LongVector(q('`long$()')) + assert (vector_of_one[:1] == q('enlist 1')).all() + assert (vector_of_one[:1] == q('enlist 1')).all() + assert vector_of_one[:-1] == kx.LongVector(q('`long$()')) + + list_of_two = q('(1; 2.0)') + assert (list_of_two[1:] == q('enlist 2f')).all() + assert (list_of_two[:1] == q('enlist 1')).all() + assert (list_of_two[-1:] == q('enlist 2f')).all() + assert (list_of_two[:-1] == q('enlist 1')).all() + + def test_vector_indexing(self, q): # noqa: C901 + vector = q('til 3') + vectorpy = vector.py() + indexList = [-3, -2, -1, 0, 1, 2, 3, None] + listOfLists = [indexList, indexList, indexList] + comboList = list(itertools.product(*listOfLists)) + + for i in comboList: + s = slice(*i) + try: + q = vector[s] + qNoqNulls = [None if i.is_null else i.py() for i in q] + qErr = False + except Exception as ex: + qEx = ex + qErr = True + try: + p = vectorpy[s] + pErr = False + except Exception as ex: + pEx = ex + pErr = True + if not qErr and not pErr: + if not qNoqNulls == p: + print(s, qNoqNulls, p) + raise AssertionError + elif qErr and not pErr: + print(s, qEx, p) + raise AssertionError + elif not qErr and pErr: + print(s, q, pEx) + raise AssertionError + elif qErr and pErr: + if not qErr == pErr: + print(s, qEx, pEx) + raise AssertionError + else: + print(s) + raise AssertionError + + def test_list_indexing(self, q): # noqa: C901 + vector = q('(1i;2f;3j)') + vectorpy = vector.py() + indexList = [-3, -2, -1, 0, 1, 2, 3, None] + listOfLists = [indexList, indexList, indexList] + comboList = list(itertools.product(*listOfLists)) + + for i in comboList: + s = slice(*i) + try: + q = vector[s] + qNoqNulls = [None if i.is_null else i.py() for i in q] + qErr = False + except Exception as ex: + qEx = ex + qErr = True + try: + p = vectorpy[s] + pErr = False + except Exception as ex: + pEx = ex + pErr = True + if not qErr and not pErr: + if not qNoqNulls == p: + print(s, qNoqNulls, p) + raise AssertionError + elif qErr and not pErr: + print(s, qEx, p) + raise AssertionError + elif not qErr and pErr: + print(s, q, pEx) + raise AssertionError + elif qErr and pErr: + if not qErr == pErr: + print(s, qEx, pEx) + raise AssertionError + else: + print(s) + raise AssertionError + + def test_table_indexing(self, q): # noqa: C901 + tab = q('([] a:1 2 3; b:4 5 6; c:7 8 9)') + tabpd = tab.pd() + indexList = [-3, -2, -1, 0, 1, 2, 3, None] + listOfLists = [indexList, indexList, indexList] + comboList = list(itertools.product(*listOfLists)) + + for i in comboList: + s = slice(*i) + try: + q = tab[s].pd() + qErr = False + except Exception as ex: + qEx = ex + qErr = True + try: + p = tabpd[s].reset_index(drop=True) + pErr = False + except Exception as ex: + pEx = ex + pErr = True + if not qErr and not pErr: + if len(q) != len(p) or not all(q == p): + print(s, q, p) + raise AssertionError + elif qErr and not pErr: + print(s, qEx, p) + raise AssertionError + elif not qErr and pErr: + print(s, q, pEx) + raise AssertionError + elif qErr and pErr: + if not qErr == pErr: + print(s, qEx, pEx) + raise AssertionError + else: + print(s) + raise AssertionError -class Test_Atom: - def test_char_atom(self, kx, q): - atom = kx.CharAtom('a') - assert 1 == len(atom) - assert atom[0].py() == b'a' - with pytest.raises(IndexError): - atom[1] - with pytest.raises(IndexError): - atom[-1] +class Test_Atom: def test_boolean_atom(self, q): t, f = q('1b'), q('0b') assert t == True # noqa @@ -309,6 +505,10 @@ def test_inf_fail(self, kx): assert 'Retrieval of infinite values' in str(err) @pytest.mark.unlicensed(unlicensed_only=True) + @pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' + ) def test_null_inf_unlic(self, kx): qtypes = [kx.ByteAtom, kx.GUIDAtom, kx.ShortAtom, kx.IntAtom, kx.LongAtom, kx.RealAtom, @@ -1267,6 +1467,10 @@ def f(type_code, zero): for type_code, zero in types: f(type_code, zero) + def test_np_timestampvector_nulls(self, q, kx): + assert kx.q('0Np').py() is None + assert kx.q('enlist 0Np').py() == [kx.TimestampAtom(kx.q('0Np'))] + class Test_List: v = '(0b;"G"$"00000000-0000-0000-0000-000000000001";0x02;3h;4i;5j;6e;7f)' @@ -1741,28 +1945,6 @@ def test_empty_vector(self, q): assert q('"p"$()').np().dtype == np.dtype('datetime64[ns]') assert q('"p"$()').np(raw=True).dtype == np.int64 - def test_extracting_date_and_time(self, kx): - ts = kx.q('2023.10.25D16:42:01.292070013') - assert ts.date == kx.DateAtom(kx.q('2023.10.25')) - assert ts.time == kx.TimeAtom(kx.q('16:42:01.292')) - assert ts.year == kx.IntAtom(kx.q('2023i')) - assert ts.month == kx.IntAtom(kx.q('10i')) - assert ts.day == kx.IntAtom(kx.q('25i')) - assert ts.hour == kx.IntAtom(kx.q('16i')) - assert ts.minute == kx.IntAtom(kx.q('42i')) - assert ts.second == kx.IntAtom(kx.q('1i')) - - ts_2 = kx.q('2018.11.09D12:21:08.456123789') - tsv = kx.q('enlist', ts, ts_2) - assert (tsv.date == kx.DateVector(kx.q('2023.10.25 2018.11.09'))).all() - assert (tsv.time == kx.TimeVector(kx.q('16:42:01.292 12:21:08.456'))).all() - assert (tsv.year == kx.IntVector(kx.q('2023 2018i'))).all() - assert (tsv.month == kx.IntVector(kx.q('10 11i'))).all() - assert (tsv.day == kx.IntVector(kx.q('25 9i'))).all() - assert (tsv.hour == kx.IntVector(kx.q('16 12i'))).all() - assert (tsv.minute == kx.IntVector(kx.q('42 21i'))).all() - assert (tsv.second == kx.IntVector(kx.q('1 8i'))).all() - class Test_MonthVector: q_vec_str = '2006.04 1947.10 1876.04 2170.01m' @@ -2512,6 +2694,7 @@ def test_nested_keyed_dict(self, q): class Test_KeyedTable: kt = '([k1:100+til 3] x:til 3; y:`singly`keyed`table)' mkt = '([k1:`a`b`a;k2:100+til 3] x:til 3; y:`multi`keyed`table)' + mkt_mask = '([col1:0 1 0N]col2: 0 1 0N;col3: 1 2 3)' def test_bool(self, q): assert q(self.kt).any() @@ -2559,6 +2742,16 @@ def test_pd(self, q): assert kt_pd['y'][102] == 'table' assert b'pykx' not in pickle.dumps(kt_pd) + def test_mask_keyed_pd(self, q, kx): + mkt_mask_q = q(self.mkt_mask) + mkt_mask_pd = mkt_mask_q.pd() + mkt_mask_multi_q = mkt_mask_q.set_index(['col2'], append=True) + mkt_mask_multi_pd = mkt_mask_multi_q.pd() + assert isinstance(kx.toq(mkt_mask_pd.index), kx.LongVector) + assert isinstance(kx.toq(mkt_mask_pd['col2']), kx.LongVector) + assert all(kx.q('0!', kx.toq(mkt_mask_pd)) == kx.q('0!', mkt_mask_q)) + assert all(kx.q('0!', kx.toq(mkt_mask_multi_pd)) == kx.q('0!', mkt_mask_multi_q)) + def test_multi_keyed_pd(self, q): mkt_pd = q(self.mkt).pd() assert mkt_pd['x'][('a', 100)] == 0 @@ -2778,6 +2971,10 @@ def test_call(self, q): assert 1024 == q.pykx.modpow(2, 10, None) @pytest.mark.unlicensed(unlicensed_only=True) + @pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' + ) def test_call_unlicensed(self, kx, q_port): q = kx.QConnection(port=q_port) funcs = ( @@ -2937,6 +3134,10 @@ def test_dotted_adverbs(self, q): == q(',').vs(q('"ab"'), q('"XY"'))) assert q(',').vs(q('"ab"'), q('"XY"')).py() == [b'aXY', b'bXY'] + @pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' + ) def test_nested_error(self, kx, q): try: q('{x[y;z]}', lambda x, y: x.py() + y.py(), 'sym', 2) @@ -3429,6 +3630,10 @@ def test_attributes_keyed_table(kx, q): tab.parted(['x', 'x1']) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) def test_apply_vector(q, kx): longvec = q('til 10') assert (longvec.apply(lambda x: x+1) == q('1+til 10')).all() @@ -3459,6 +3664,14 @@ def func(x): longvec.apply(q('{x+y}'), y=1) +def test_magic_dates_times(kx): + assert kx.q('.z.D') == kx.DateAtom('today') + curr_time = kx.q('.z.T') + assert curr_time <= kx.TimeAtom('now') + curr_tstamp = kx.q('.z.P') + assert curr_tstamp <= kx.TimestampAtom('now') + + def checkHTML(tab): html = tab._repr_html_() return (html.count(''), html.count(''), html.count(''))