Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pyo3-polars 0.15; matching Polars 0.41.0 #84

Merged
merged 1 commit into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ members = [
]

[workspace.dependencies]
polars = { version = "0.40.0", default-features = false }
polars-core = { version = "0.40.0", default-features = false }
polars-ffi = { version = "0.40.0", default-features = false }
polars-plan = { version = "0.40.0", default-feautres = false }
polars-lazy = { version = "0.40.0", default-features = false }
polars = { version = "0.41.0", default-features = false }
polars-core = { version = "0.41.0", default-features = false }
polars-ffi = { version = "0.41.0", default-features = false }
polars-plan = { version = "0.41.0", default-feautres = false }
polars-lazy = { version = "0.41.0", default-features = false }
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@ Documentation for this functionality may also be found in the [Polars User Guide
This is new functionality and should be preferred over `2.` as this
will circumvent the GIL and will be the way we want to support extending polars.


Parallelism and optimizations are managed by the default polars runtime. That runtime will call into the plugin function.
The plugin functions are compiled separately.

We can therefore keep polars more lean and maybe add support for a `polars-distance`, `polars-geo`, `polars-ml`, etc.
We can therefore keep polars more lean and maybe add support for a `polars-distance`, `polars-geo`, `polars-ml`, etc.
Those can then have specialized expressions and don't have to worry as much for code bloat as they can be optionally installed.

The idea is that you define an expression in another Rust crate with a proc_macro `polars_expr`.
Expand Down Expand Up @@ -75,6 +74,7 @@ def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
kwargs={"capitalize": capitalize},
)
```

Compile/ship and then it is ready to use:

```python
Expand All @@ -90,7 +90,9 @@ out = df.with_columns(
pig_latin = language.pig_latinnify("names")
)
```

Alternatively, you can [register a custom namespace](https://docs.pola.rs/py-polars/html/reference/api/polars.api.register_expr_namespace.html#polars.api.register_expr_namespace), which enables you to write:

```python
out = df.with_columns(
pig_latin = pl.col("names").language.pig_latinnify()
Expand Down
2 changes: 1 addition & 1 deletion example/derive_expression/expression_lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ crate-type = ["cdylib"]
polars = { workspace = true, features = ["fmt", "dtype-date", "timezones"], default-features = false }
pyo3 = { version = "0.21", features = ["abi3-py38"] }
pyo3-polars = { version = "*", path = "../../../pyo3-polars", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
rayon = "1.7.0"
serde = { version = "1", features = ["derive"] }

[target.'cfg(target_os = "linux")'.dependencies]
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location
from polars.plugins import register_plugin_function
from pathlib import Path

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def is_leap_year(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="is_leap_year",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr],
function_name="is_leap_year",
is_elementwise=True,
)

Expand All @@ -20,6 +20,8 @@ def is_leap_year(expr: IntoExpr) -> pl.Expr:
# purposes.
def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr],
function_name="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)
33 changes: 16 additions & 17 deletions example/derive_expression/expression_lib/expression_lib/dist.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,27 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location
from polars.plugins import register_plugin_function
from pathlib import Path

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)

from expression_lib.utils import parse_into_expr

def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr, other],
function_name="hamming_distance",
is_elementwise=True,
)


def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr, other],
function_name="jaccard_similarity",
is_elementwise=True,
)

Expand All @@ -34,10 +33,10 @@ def haversine(
end_long: IntoExpr,
) -> pl.Expr:
start_lat = parse_into_expr(start_lat)
return start_lat.register_plugin(
lib=lib,
args=[start_long, end_lat, end_long],
symbol="haversine",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[start_lat, start_long, end_lat, end_long],
function_name="haversine",
is_elementwise=True,
cast_to_supertypes=True,
cast_to_supertype=True,
)
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location
from polars.plugins import register_plugin_function
from pathlib import Path

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr],
function_name="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)
Expand All @@ -28,15 +29,15 @@ def append_args(
This example shows how arguments other than `Series` can be used.
"""
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[],
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
function_name="append_kwargs",
is_elementwise=True,
)
12 changes: 6 additions & 6 deletions example/derive_expression/expression_lib/expression_lib/panic.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location
from polars.plugins import register_plugin_function

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)
from pathlib import Path


def panic(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="panic",
return register_plugin_function(
plugin_path=Path(__file__).parent,
args=[expr],
function_name="panic",
)
4 changes: 2 additions & 2 deletions example/derive_expression/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@
string_arg="example",
)
)
except pl.ComputeError as e:
except pl.exceptions.ComputeError as e:
assert "the plugin failed with message" in str(e)


try:
out.with_columns(pl.col("names").panic.panic())
except pl.ComputeError as e:
except pl.exceptions.ComputeError as e:
assert "the plugin panicked" in str(e)

print("finished")
2 changes: 1 addition & 1 deletion pyo3-polars-derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pyo3-polars-derive"
version = "0.8.0"
version = "0.9.0"
edition = "2021"
license = "MIT"
readme = "README.md"
Expand Down
4 changes: 2 additions & 2 deletions pyo3-polars/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pyo3-polars"
version = "0.14.0"
version = "0.15.0"
edition = "2021"
license = "MIT"
readme = "../README.md"
Expand All @@ -17,7 +17,7 @@ polars-ffi = { workspace = true, optional = true }
polars-lazy = { workspace = true, optional = true }
polars-plan = { workspace = true, optional = true }
pyo3 = "0.21.0"
pyo3-polars-derive = { version = "0.8.0", path = "../pyo3-polars-derive", optional = true }
pyo3-polars-derive = { version = "0.9.0", path = "../pyo3-polars-derive", optional = true }
serde = { version = "1", optional = true }
serde-pickle = { version = "1", optional = true }
thiserror = "1"
Expand Down
8 changes: 8 additions & 0 deletions pyo3-polars/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ impl std::convert::From<PyPolarsErr> for PyErr {
PolarsError::StringCacheMismatch(err) => {
StringCacheMismatchError::new_err(err.to_string())
}
PolarsError::SQLInterface(err) => {
SQLInterface::new_err(err.to_string())
},
PolarsError::SQLSyntax(err) => {
SQLSyntax::new_err(err.to_string())
}
PolarsError::Context { error, .. } => convert(error),
}
}
Expand Down Expand Up @@ -67,3 +73,5 @@ create_exception!(exceptions, ShapeError, PyException);
create_exception!(exceptions, SchemaError, PyException);
create_exception!(exceptions, DuplicateError, PyException);
create_exception!(exceptions, StringCacheMismatchError, PyException);
create_exception!(exceptions, SQLInterface, PyException);
create_exception!(exceptions, SQLSyntax, PyException);
2 changes: 1 addition & 1 deletion pyo3-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ use pyo3::ffi::Py_uintptr_t;
use pyo3::prelude::*;

#[cfg(feature = "lazy")]
use {polars_lazy::frame::LazyFrame, polars_plan::logical_plan::DslPlan};
use {polars_lazy::frame::LazyFrame, polars_plan::plans::DslPlan};

#[repr(transparent)]
#[derive(Debug, Clone)]
Expand Down
Loading