diff --git a/omniscidb/ConfigBuilder/ConfigBuilder.cpp b/omniscidb/ConfigBuilder/ConfigBuilder.cpp index 78054e9ff..869240e36 100644 --- a/omniscidb/ConfigBuilder/ConfigBuilder.cpp +++ b/omniscidb/ConfigBuilder/ConfigBuilder.cpp @@ -574,6 +574,12 @@ bool ConfigBuilder::parseCommandLineArgs(int argc, ->default_value(config_->debug.enable_automatic_ir_metadata) ->implicit_value(true), "Enable automatic IR metadata (debug builds only)."); + opt_desc.add_options()( + "enable-gpu-code-compilation-cache", + po::value(&config_->debug.enable_gpu_code_compilation_cache) + ->default_value(config_->debug.enable_gpu_code_compilation_cache) + ->implicit_value(true), + "Enable GPU compilation code caching."); // storage opt_desc.add_options()( diff --git a/omniscidb/QueryEngine/NativeCodegen.cpp b/omniscidb/QueryEngine/NativeCodegen.cpp index 46d1c6b2f..ed5685dd5 100644 --- a/omniscidb/QueryEngine/NativeCodegen.cpp +++ b/omniscidb/QueryEngine/NativeCodegen.cpp @@ -385,7 +385,7 @@ std::shared_ptr Executor::optimizeAndCodegenGPU( auto key = get_code_cache_key(query_func, cgen_state_.get()); auto cached_code = Executor::gpu_code_accessor->get_value(key); - if (cached_code) { + if (config_->debug.enable_gpu_code_compilation_cache && cached_code) { return cached_code; } diff --git a/omniscidb/Shared/Config.h b/omniscidb/Shared/Config.h index 6524885ef..ebe500111 100644 --- a/omniscidb/Shared/Config.h +++ b/omniscidb/Shared/Config.h @@ -171,6 +171,7 @@ struct DebugConfig { std::string build_ra_cache = ""; std::string use_ra_cache = ""; bool enable_automatic_ir_metadata = true; + bool enable_gpu_code_compilation_cache = true; std::string log_dir = "hdk_log"; }; diff --git a/python/pyhdk/hdk.py b/python/pyhdk/hdk.py index b5931ef46..a57ec900e 100644 --- a/python/pyhdk/hdk.py +++ b/python/pyhdk/hdk.py @@ -1750,7 +1750,7 @@ def schema(self): """ pass - def run(self): + def run(self, **kwargs): """ Run query with the current node as a query root node. @@ -2624,6 +2624,7 @@ def ntile(self, tile_count): def init(**kwargs): if init._instance is None: init._instance = HDK(**kwargs) + # TODO: kwargs are ignored here return init._instance diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 000000000..5e396306f --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,38 @@ +# +# Copyright 2022 Intel Corporation. +# +# SPDX-License-Identifier: Apache-2.0 + + +from pytest import fixture +from dataclasses import dataclass + + +def pytest_addoption(parser): + parser.addoption( + "--device", action="store", default="CPU", help="Choose device type (CPU/GPU)." + ) + + +@dataclass +class ExecutionConfig: + enable_heterogeneous: bool + device_type: str + + +cpu_cfg = ExecutionConfig(False, "CPU") +gpu_cfg = ExecutionConfig(False, "GPU") +het_cfg = ExecutionConfig(True, "AUTO") + + +def get_execution_config(device_type: str): + if device_type.lower() == "cpu": + return cpu_cfg + if device_type.lower() == "gpu": + return gpu_cfg + raise ValueError("Unsupported exeuction config: " + str(device_type)) + + +@fixture +def exe_cfg(request): + return get_execution_config(request.config.getoption("--device")) diff --git a/python/tests/test_pyhdk_api.py b/python/tests/test_pyhdk_api.py index bcff6db8f..ad7facfb4 100644 --- a/python/tests/test_pyhdk_api.py +++ b/python/tests/test_pyhdk_api.py @@ -152,7 +152,7 @@ def test_import_parquet(self, create_table, glob): class TestBuilder(BaseTest): - def test_scan(self): + def test_scan(self, exe_cfg): hdk = pyhdk.init() table_name = "table_test_scan" hdk.import_pydict({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]}, table_name) @@ -165,7 +165,7 @@ def test_scan(self): _ = hdk.scan(1) hdk.drop_table(table_name) - def test_drop_table(self): + def test_drop_table(self, exe_cfg): hdk = pyhdk.init() table_name = "table_test_drop_table" @@ -184,7 +184,7 @@ def test_drop_table(self): with pytest.raises(TypeError) as e: hdk.drop_table(1) - def test_type_from_str(self): + def test_type_from_str(self, exe_cfg): hdk = pyhdk.init() assert str(hdk.type("int")) == "INT64" assert str(hdk.type("fp")) == "FP64" @@ -192,7 +192,7 @@ def test_type_from_str(self): with pytest.raises(TypeError) as e: hdk.type(1) - def test_cst(self): + def test_cst(self, exe_cfg): hdk = pyhdk.init() self.check_cst(hdk.cst(None), "NULL", "NULLT") self.check_cst(hdk.cst(None, "int"), "NULL", "INT64") @@ -245,7 +245,7 @@ def test_cst(self): with pytest.raises(RuntimeError) as e: hdk.cst([]) - def test_ref(self): + def test_ref(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]}) self.check_ref(ht.ref(0), 0) @@ -269,7 +269,7 @@ def test_ref(self): hdk.drop_table(ht) - def test_proj(self): + def test_proj(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3], "b": [1.1, 2.2, 3.3]}) proj = ht.proj( @@ -284,7 +284,7 @@ def test_proj(self): a=ht.ref("b"), ) check_res( - proj.run(), + proj.run(device_type=exe_cfg.device_type), { "a_1": [1, 2, 3], "b": [1.1, 2.2, 3.3], @@ -302,7 +302,7 @@ def test_proj(self): hdk.drop_table(ht) - def test_sort(self): + def test_sort(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( { @@ -312,7 +312,7 @@ def test_sort(self): ) check_res( - ht.sort("a", -1).run(), + ht.sort("a", -1).run(device_type=exe_cfg.device_type), { "a": [1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, "null"], "b": [1.0, 1.0, 2.0, 2.0, "null", 1.0, 1.0, 2.0, 2.0, 2.0], @@ -320,7 +320,9 @@ def test_sort(self): ) check_res( - ht.sort(("a", "desc"), (ht.ref("b"), "asc")).run(), + ht.sort(("a", "desc"), (ht.ref("b"), "asc")).run( + device_type=exe_cfg.device_type + ), { "a": [2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, "null"], "b": [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, "null", 2.0], @@ -328,7 +330,9 @@ def test_sort(self): ) check_res( - ht.sort(("a", "desc", "first"), ht.ref("b")).run(), + ht.sort(("a", "desc", "first"), ht.ref("b")).run( + device_type=exe_cfg.device_type + ), { "a": ["null", 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0], "b": [2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, "null"], @@ -336,7 +340,9 @@ def test_sort(self): ) check_res( - ht.sort(("a", "desc"), fields={"b": "asc"}).run(), + ht.sort(("a", "desc"), fields={"b": "asc"}).run( + device_type=exe_cfg.device_type + ), { "a": [2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, "null"], "b": [1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, "null", 2.0], @@ -344,7 +350,9 @@ def test_sort(self): ) check_res( - ht.sort(fields={"b": ("asc", "first")}, a="desc").run(), + ht.sort(fields={"b": ("asc", "first")}, a="desc").run( + device_type=exe_cfg.device_type + ), { "a": [1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, "null"], "b": ["null", 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0], @@ -352,7 +360,9 @@ def test_sort(self): ) check_res( - ht.sort(b=("asc", "first"), a=("desc", "first")).run(), + ht.sort(b=("asc", "first"), a=("desc", "first")).run( + device_type=exe_cfg.device_type + ), { "a": [1.0, 2.0, 2.0, 1.0, 1.0, "null", 2.0, 2.0, 1.0, 1.0], "b": ["null", 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0], @@ -362,7 +372,7 @@ def test_sort(self): # TODO: test offset when result set conversion to arrow is fixed # and respects offset option. check_res( - ht.sort("a", "b", limit=5, offset=0).run(), + ht.sort("a", "b", limit=5, offset=0).run(device_type=exe_cfg.device_type), {"a": [1, 1, 1, 1, 1], "b": [1.0, 1.0, 2.0, 2.0, "null"]}, ) @@ -423,7 +433,7 @@ def test_sort(self): hdk.drop_table(ht) - def test_agg(self): + def test_agg(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( { @@ -436,7 +446,7 @@ def test_agg(self): check_res( ht.agg(["a", -2], "sum(c)", ht.ref("c").min(), hdk.count()) .sort("a", "b") - .run(), + .run(device_type=exe_cfg.device_type), { "a": [1, 1, 2, 2], "b": [1, 2, 1, 2], @@ -454,7 +464,7 @@ def test_agg(self): cv=ht.ref("c").avg(), ) .sort("a") - .run(), + .run(device_type=exe_cfg.device_type), { "a": [1, 2], "bc": [5, 5], @@ -465,7 +475,9 @@ def test_agg(self): ) check_res( - ht.agg(ht.ref("b"), cd=ht.ref("a").count(True)).sort("b").run(), + ht.agg(ht.ref("b"), cd=ht.ref("a").count(True)) + .sort("b") + .run(device_type=exe_cfg.device_type), {"b": [1, 2], "cd": [2, 2]}, ) @@ -477,12 +489,14 @@ def test_agg(self): a3=ht.ref("c").approx_quantile(1), ) .sort("b") - .run(), + .run(device_type=exe_cfg.device_type), {"b": [1, 2], "a1": [1, 6], "a2": [3, 8], "a3": [5, 10]}, ) check_res( - ht.agg("b", s1="stddev(a)", s2=ht["c"].stddev()).sort("b").run(), + ht.agg("b", s1="stddev(a)", s2=ht["c"].stddev()) + .sort("b") + .run(device_type=exe_cfg.device_type), {"b": [1, 2], "s1": [0.547723, 0.547723], "s2": [1.581139, 1.581139]}, ) @@ -518,27 +532,33 @@ def test_agg(self): hdk.drop_table(ht) hdk.drop_table(ht1) - def test_filter(self): + def test_filter(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "b": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]} ) check_res( - ht.filter(ht["a"] > 5).run(), {"a": [6, 7, 8, 9, 10], "b": [5, 4, 3, 2, 1]} + ht.filter(ht["a"] > 5).run(device_type=exe_cfg.device_type), + {"a": [6, 7, 8, 9, 10], "b": [5, 4, 3, 2, 1]}, ) - check_res(ht.filter(ht["a"] >= 6, ht["b"] > 4).run(), {"a": [6], "b": [5]}) + check_res( + ht.filter(ht["a"] >= 6, ht["b"] > 4).run(device_type=exe_cfg.device_type), + {"a": [6], "b": [5]}, + ) check_res( - ht.filter((ht["a"] < 2).logical_or(ht["b"] <= 2)).run(), + ht.filter((ht["a"] < 2).logical_or(ht["b"] <= 2)).run( + device_type=exe_cfg.device_type + ), {"a": [1, 9, 10], "b": [10, 2, 1]}, ) check_res( ht.filter( (ht["a"] == 2).logical_or(ht["a"] == 3).logical_and(ht["b"] != 9) - ).run(), + ).run(device_type=exe_cfg.device_type), {"a": [3], "b": [8]}, ) @@ -546,9 +566,17 @@ def test_filter(self): ht = hdk.import_pydict({"a": [1, 2, None, None, 5], "b": [10, 9, 8, 7, 6]}) - check_res(ht.filter(ht["a"].is_null()).proj("b").run(), {"b": [8, 7]}) + check_res( + ht.filter(ht["a"].is_null()).proj("b").run(device_type=exe_cfg.device_type), + {"b": [8, 7]}, + ) - check_res(ht.filter(ht["a"].is_not_null()).proj("b").run(), {"b": [10, 9, 6]}) + check_res( + ht.filter(ht["a"].is_not_null()) + .proj("b") + .run(device_type=exe_cfg.device_type), + {"b": [10, 9, 6]}, + ) with pytest.raises(TypeError): ht.filter("a") @@ -557,7 +585,7 @@ def test_filter(self): hdk.drop_table(ht) - def test_join(self): + def test_join(self, exe_cfg): hdk = pyhdk.init() ht1 = hdk.import_pydict( {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], "x": [1.1, 2.2, 3.3, 4.4, 5.5]} @@ -566,10 +594,13 @@ def test_join(self): {"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5], "y": [5.5, 4.4, 3.3, 2.2, 1.1]} ) - check_res(ht1.join(ht2).run(), {"a": [3], "b": [3], "x": [3.3], "y": [3.3]}) + check_res( + ht1.join(ht2).run(device_type=exe_cfg.device_type), + {"a": [3], "b": [3], "x": [3.3], "y": [3.3]}, + ) check_res( - ht1.join(ht2, how="left").run(), + ht1.join(ht2, how="left").run(device_type=exe_cfg.device_type), { "a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], @@ -579,7 +610,7 @@ def test_join(self): ) check_res( - ht1.join(ht2, "a").run(), + ht1.join(ht2, "a").run(device_type=exe_cfg.device_type), { "a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], @@ -590,12 +621,12 @@ def test_join(self): ) check_res( - ht1.join(ht2, ["a", "b"]).run(), + ht1.join(ht2, ["a", "b"]).run(device_type=exe_cfg.device_type), {"a": [3], "b": [3], "x": [3.3], "y": [3.3]}, ) check_res( - ht1.join(ht2, "a", "b").run(), + ht1.join(ht2, "a", "b").run(device_type=exe_cfg.device_type), { "a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], @@ -606,12 +637,14 @@ def test_join(self): ) check_res( - ht1.join(ht2, ["a", "b"], ["b", "a"]).run(), + ht1.join(ht2, ["a", "b"], ["b", "a"]).run(device_type=exe_cfg.device_type), {"a": [3], "b": [3], "x": [3.3], "y": [3.3]}, ) check_res( - ht1.join(ht2, cond=ht1["a"].eq(ht2["b"])).run(), + ht1.join(ht2, cond=ht1["a"].eq(ht2["b"])).run( + device_type=exe_cfg.device_type + ), { "a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], @@ -642,18 +675,26 @@ def test_join(self): hdk.drop_table(ht1) hdk.drop_table(ht2) - def test_math_ops(self): + def test_math_ops(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], "x": [1.1, 2.2, 3.3, 4.4, 5.5]} ) - check_res(ht.proj(ht["a"].uminus()).run(), {"expr_1": [-1, -2, -3, -4, -5]}) + check_res( + ht.proj(ht["a"].uminus()).run(device_type=exe_cfg.device_type), + {"expr_1": [-1, -2, -3, -4, -5]}, + ) - check_res(ht.proj(-ht["a"]).run(), {"expr_1": [-1, -2, -3, -4, -5]}) + check_res( + ht.proj(-ht["a"]).run(device_type=exe_cfg.device_type), + {"expr_1": [-1, -2, -3, -4, -5]}, + ) check_res( - ht.proj(a1=ht["a"] + ht["b"], a2=ht["a"] + 1, a3=ht["a"] + 1.5).run(), + ht.proj(a1=ht["a"] + ht["b"], a2=ht["a"] + 1, a3=ht["a"] + 1.5).run( + device_type=exe_cfg.device_type + ), { "a1": [6, 6, 6, 6, 6], "a2": [2, 3, 4, 5, 6], @@ -662,7 +703,9 @@ def test_math_ops(self): ) check_res( - ht.proj(a1=ht["a"] - ht["b"], a2=ht["a"] - 1, a3=ht["a"] - 1.5).run(), + ht.proj(a1=ht["a"] - ht["b"], a2=ht["a"] - 1, a3=ht["a"] - 1.5).run( + device_type=exe_cfg.device_type + ), { "a1": [-4, -2, 0, 2, 4], "a2": [0, 1, 2, 3, 4], @@ -671,7 +714,9 @@ def test_math_ops(self): ) check_res( - ht.proj(a1=ht["a"] * ht["b"], a2=ht["a"] * 2, a3=ht["a"] * 1.5).run(), + ht.proj(a1=ht["a"] * ht["b"], a2=ht["a"] * 2, a3=ht["a"] * 1.5).run( + device_type=exe_cfg.device_type + ), { "a1": [5, 8, 9, 8, 5], "a2": [2, 4, 6, 8, 10], @@ -680,7 +725,9 @@ def test_math_ops(self): ) check_res( - ht.proj(a1=ht["a"] / ht["b"], a2=ht["a"] / 2, a3=ht["a"] / 2.0).run(), + ht.proj(a1=ht["a"] / ht["b"], a2=ht["a"] / 2, a3=ht["a"] / 2.0).run( + device_type=exe_cfg.device_type + ), { "a1": [0.2, 0.5, 1.0, 2.0, 5.0], "a2": [0.5, 1.0, 1.5, 2.0, 2.5], @@ -689,7 +736,9 @@ def test_math_ops(self): ) check_res( - ht.proj(a1=ht["a"] // ht["b"], a2=ht["a"] // 2, a3=ht["x"] // 2.0).run(), + ht.proj(a1=ht["a"] // ht["b"], a2=ht["a"] // 2, a3=ht["x"] // 2.0).run( + device_type=exe_cfg.device_type + ), { "a1": [0, 0, 1, 2, 5], "a2": [0, 1, 1, 2, 2], @@ -703,7 +752,7 @@ def test_math_ops(self): a2=ht["a"].div(2), a3=ht["a"].div(2.0), a4=ht["x"].div(2.0), - ).run(), + ).run(device_type=exe_cfg.device_type), { "a1": [0, 0, 1, 2, 5], "a2": [0, 1, 1, 2, 2], @@ -713,31 +762,35 @@ def test_math_ops(self): ) check_res( - ht.proj(a1=ht["a"] % ht["b"], a2=ht["a"] % 2).run(), + ht.proj(a1=ht["a"] % ht["b"], a2=ht["a"] % 2).run( + device_type=exe_cfg.device_type + ), {"a1": [1, 2, 0, 0, 0], "a2": [1, 0, 1, 0, 1]}, ) hdk.drop_table(ht) - def test_cast(self): + def test_cast(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [1.1, 2.2, 3.3, 4.4, 5.5]}) check_res( - ht.proj(c1=ht["a"].cast("fp64"), c2=ht["b"].cast("int")).run(), + ht.proj(c1=ht["a"].cast("fp64"), c2=ht["b"].cast("int")).run( + device_type=exe_cfg.device_type + ), {"c1": [1.0, 2.0, 3.0, 4.0, 5.0], "c2": [1, 2, 3, 4, 6]}, ) check_res( ht.proj( c1=hdk.cst("1970-01-01 01:00:00").cast("timestamp[ms]").cast("int") - ).run(), + ).run(device_type=exe_cfg.device_type), {"c1": [3600000, 3600000, 3600000, 3600000, 3600000]}, ) hdk.drop_table(ht) - def test_extract(self): + def test_extract(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( { @@ -752,7 +805,7 @@ def test_extract(self): r1=ht["a"].extract("year"), r2=ht["a"].extract("month"), r3=ht["a"].extract("day"), - ).run(), + ).run(device_type=exe_cfg.device_type), {"r1": [2023, 2022, 2021], "r2": [2, 3, 4], "r3": [7, 8, 9]}, ) @@ -761,7 +814,7 @@ def test_extract(self): hdk.drop_table(ht) - def test_date_add(self): + def test_date_add(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict( { @@ -778,7 +831,7 @@ def test_date_add(self): d2=ht["a"].add(1, "month"), d3=ht["a"].sub(ht["b"], "day"), d4=ht["a"].sub(1, "hour"), - ).run(), + ).run(device_type=exe_cfg.device_type), { "d1": [ pandas.Timestamp("2024-02-07 00:00:00"), @@ -818,26 +871,29 @@ def test_date_add(self): hdk.drop_table(ht) - def test_unnest(self): + def test_unnest(self, exe_cfg): hdk = pyhdk.init() ht = hdk.create_table("test1", [("a", "array(int)")]) hdk.import_pydict({"a": [[1, 2], [1, 2, 3, 4]]}, ht) check_res( - ht.proj(a=ht["a"].unnest()).agg(["a"], "count").sort("a").run(), + ht.proj(a=ht["a"].unnest()) + .agg(["a"], "count") + .sort("a") + .run(device_type=exe_cfg.device_type), {"a": [1, 2, 3, 4], "count": [2, 2, 1, 1]}, ) hdk.drop_table(ht) - def test_pow(self): + def test_pow(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [1.0, 2.0, 3.0, 4.0, 5.0]}) check_res( - ht.proj( - a1=ht["a"].pow(2), a2=ht["a"].pow(ht["b"]), b=ht["b"].pow(2.0) - ).run(), + ht.proj(a1=ht["a"].pow(2), a2=ht["a"].pow(ht["b"]), b=ht["b"].pow(2.0)).run( + device_type=exe_cfg.device_type + ), { "a1": [1.0, 4.0, 9.0, 16.0, 25.0], "a2": [1.0, 4.0, 27.0, 256.0, 3125.0], @@ -847,53 +903,57 @@ def test_pow(self): hdk.drop_table(ht) - def test_at(self): + def test_at(self, exe_cfg): hdk = pyhdk.init() ht = hdk.create_table("test1", [("a", "array(int)"), ("b", "int")]) hdk.import_pydict({"a": [[1, 2], [2, 3, 4]], "b": [2, 3]}, ht) check_res( - ht.proj(a1=ht["a"].at(1), a2=ht["a"][ht["b"]], a3=ht["a"].at(-1)).run(), + ht.proj(a1=ht["a"].at(1), a2=ht["a"][ht["b"]], a3=ht["a"].at(-1)).run( + device_type=exe_cfg.device_type + ), {"a1": [1, 2], "a2": [2, 4], "a3": ["null", "null"]}, ) hdk.drop_table(ht) - def test_run_on_res(self): + def test_run_on_res(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}) - res1 = ht.proj("b", "a").run() + res1 = ht.proj("b", "a").run(device_type=exe_cfg.device_type) check_res(res1, {"b": [10, 20, 30, 40, 50], "a": [1, 2, 3, 4, 5]}) - res2 = res1.agg(["a"], "count").run() + res2 = res1.agg(["a"], "count").run(device_type=exe_cfg.device_type) check_res(res2, {"a": [1, 2, 3, 4, 5], "count": [1, 1, 1, 1, 1]}) - res3 = res2.join(res1, "count", "a").run() + res3 = res2.join(res1, "count", "a").run(device_type=exe_cfg.device_type) check_res( res3, {"a": [1, 2, 3, 4, 5], "count": [1, 1, 1, 1, 1], "b": [10, 10, 10, 10, 10]}, ) - res4 = res2.run() + res4 = res2.run(device_type=exe_cfg.device_type) check_res(res4, {"a": [1, 2, 3, 4, 5], "count": [1, 1, 1, 1, 1]}) - res5 = res4.filter(res4.ref("a") > res4["count"]).run() + res5 = res4.filter(res4.ref("a") > res4["count"]).run( + device_type=exe_cfg.device_type + ) check_res(res5, {"a": [2, 3, 4, 5], "count": [1, 1, 1, 1]}) - res6 = res5.run() + res6 = res5.run(device_type=exe_cfg.device_type) check_res(res6, {"a": [2, 3, 4, 5], "count": [1, 1, 1, 1]}) assert res6.is_scan assert res6.size == 2 check_schema(res6.schema, {"a": "INT64", "count": "INT32[NN]"}) - res7 = res6.proj("rowid", "a", "count").run() + res7 = res6.proj("rowid", "a", "count").run(device_type=exe_cfg.device_type) check_res( res7, {"rowid": [0, 1, 2, 3], "a": [2, 3, 4, 5], "count": [1, 1, 1, 1]} ) - def test_row(self): + def test_row(self, exe_cfg): hdk = pyhdk.init() ht = hdk.create_table( "test1", [("a", "int"), ("b", "fp64"), ("c", "text"), ("d", "array(int)")] @@ -908,7 +968,7 @@ def test_row(self): ht, ) - res = ht.proj("d", "c", "b", "a").run() + res = ht.proj("d", "c", "b", "a").run(device_type=exe_cfg.device_type) row1 = res.row(0) assert row1 == [[1, None, 3], "str1", 1.1, 1] row2 = res.row(1) @@ -918,27 +978,27 @@ def test_row(self): hdk.drop_table(ht) - def test_shape(self): + def test_shape(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}) assert ht.shape == (5, 2) hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}, ht) assert ht.shape == (10, 2) - res1 = ht.filter(ht["a"] > 3).run() + res1 = ht.filter(ht["a"] > 3).run(device_type=exe_cfg.device_type) assert res1.shape == (4, 2) - res2 = res1.proj("a").run() + res2 = res1.proj("a").run(device_type=exe_cfg.device_type) assert res2.shape == (4, 1) with pytest.raises(RuntimeError): res2.proj(0).shape - def test_head(self): + def test_head(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5]}) - res = ht.run() + res = ht.run(device_type=exe_cfg.device_type) res1 = res.head(10) check_res(res1, {"a": [1, 2, 3, 4, 5]}) res2 = res.head(3) @@ -948,11 +1008,11 @@ def test_head(self): hdk.drop_table(ht) - def test_tail(self): + def test_tail(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5]}) - res = ht.run() + res = ht.run(device_type=exe_cfg.device_type) res1 = res.tail(10) check_res(res1, {"a": [1, 2, 3, 4, 5]}) res2 = res.tail(3) @@ -1064,16 +1124,19 @@ def test_last_value(self): class TestSql(BaseTest): - def test_no_alias(self): + def test_no_alias(self, exe_cfg): hdk = pyhdk.init() ht = hdk.import_pydict({"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1]}) check_res( - hdk.sql(f"SELECT a, b FROM {ht.table_name};"), + hdk.sql( + f"SELECT a, b FROM {ht.table_name};", + query_opts={"device_type": exe_cfg.device_type}, + ), {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1]}, ) - def test_alias(self): + def test_alias(self, exe_cfg): hdk = pyhdk.init() ht1 = hdk.import_pydict( {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], "x": [1.1, 2.2, 3.3, 4.4, 5.5]} @@ -1083,11 +1146,19 @@ def test_alias(self): ) check_res( - hdk.sql("SELECT a, b FROM t1;", t1=ht1), + hdk.sql( + "SELECT a, b FROM t1;", + t1=ht1, + query_opts={"device_type": exe_cfg.device_type}, + ), {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1]}, ) check_res( - hdk.sql("SELECT a, b FROM t1;", t1=ht2), + hdk.sql( + "SELECT a, b FROM t1;", + t1=ht2, + query_opts={"device_type": exe_cfg.device_type}, + ), {"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}, ) @@ -1096,6 +1167,7 @@ def test_alias(self): "SELECT t1.a, t1.b, t1.x, t2.y FROM t1, t2 WHERE t1.b = t2.a ORDER BY t1.a;", t1=ht1, t2=ht2, + query_opts={"device_type": exe_cfg.device_type}, ), { "a": [1, 2, 3, 4, 5], @@ -1105,19 +1177,30 @@ def test_alias(self): }, ) - def test_run_on_res(self): + def test_run_on_res(self, exe_cfg): hdk = pyhdk.init() ht1 = hdk.import_pydict( {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1], "x": [1.1, 2.2, 3.3, 4.4, 5.5]} ) - res1 = hdk.sql("SELECT a, b FROM t1;", t1=ht1) + res1 = hdk.sql( + "SELECT a, b FROM t1;", + t1=ht1, + query_opts={"device_type": exe_cfg.device_type}, + ) check_res(res1, {"a": [1, 2, 3, 4, 5], "b": [5, 4, 3, 2, 1]}) - res2 = hdk.sql("SELECT b + 1 as b, a - 1 as a FROM t1;", t1=res1) + res2 = hdk.sql( + "SELECT b + 1 as b, a - 1 as a FROM t1;", + t1=res1, + query_opts={"device_type": exe_cfg.device_type}, + ) check_res(res2, {"b": [6, 5, 4, 3, 2], "a": [0, 1, 2, 3, 4]}) - res3 = hdk.sql(f"SELECT b - 1 as b, a + 1 as a FROM {res1.table_name};") + res3 = hdk.sql( + f"SELECT b - 1 as b, a + 1 as a FROM {res1.table_name};", + query_opts={"device_type": exe_cfg.device_type}, + ) check_res(res3, {"b": [4, 3, 2, 1, 0], "a": [2, 3, 4, 5, 6]}) @@ -1160,9 +1243,9 @@ def check_taxi_q4_res(res): class TestTaxiSql(BaseTaxiTest): - def test_taxi_over_csv_modular(self): + def test_taxi_over_csv_modular(self, exe_cfg): # Initialize HDK components - config = pyhdk.buildConfig() + config = pyhdk.buildConfig(enable_gpu_code_compilation_cache=False) storage = pyhdk.storage.ArrowStorage(1, config) data_mgr = pyhdk.storage.DataMgr(config) data_mgr.registerDataProvider(storage) @@ -1179,7 +1262,7 @@ def test_taxi_over_csv_modular(self): "SELECT cab_type, count(*) as cnt FROM trips GROUP BY cab_type;" ) rel_alg_executor = pyhdk.sql.RelAlgExecutor(executor, storage, data_mgr, ra) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q1_res(res) # Run Taxi Q2 SQL query @@ -1190,7 +1273,7 @@ def test_taxi_over_csv_modular(self): ORDER BY passenger_count;""" ) rel_alg_executor = pyhdk.sql.RelAlgExecutor(executor, storage, data_mgr, ra) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q2_res(res) # Run Taxi Q3 SQL query @@ -1201,7 +1284,7 @@ def test_taxi_over_csv_modular(self): ORDER BY passenger_count;""" ) rel_alg_executor = pyhdk.sql.RelAlgExecutor(executor, storage, data_mgr, ra) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q3_res(res) # Run Taxi Q4 SQL query @@ -1216,12 +1299,12 @@ def test_taxi_over_csv_modular(self): ORDER BY pickup_year, cnt desc;""" ) rel_alg_executor = pyhdk.sql.RelAlgExecutor(executor, storage, data_mgr, ra) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q4_res(res) storage.dropTable("trips") - def test_taxi_over_csv_explicit_instance(self): + def test_taxi_over_csv_explicit_instance(self, exe_cfg): # Initialize HDK components wrapped into a single object hdk = pyhdk.init() @@ -1233,7 +1316,10 @@ def test_taxi_over_csv_explicit_instance(self): ) # Run Taxi Q1 SQL query - res = hdk.sql("SELECT cab_type, count(*) as cnt FROM trips GROUP BY cab_type;") + res = hdk.sql( + "SELECT cab_type, count(*) as cnt FROM trips GROUP BY cab_type;", + query_opts={"device_type": exe_cfg.device_type}, + ) self.check_taxi_q1_res(res) # Run Taxi Q2 SQL query @@ -1241,7 +1327,8 @@ def test_taxi_over_csv_explicit_instance(self): """SELECT passenger_count, AVG(total_amount) as total_amount_avg FROM trips GROUP BY passenger_count - ORDER BY passenger_count;""" + ORDER BY passenger_count;""", + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q2_res(res) @@ -1250,7 +1337,8 @@ def test_taxi_over_csv_explicit_instance(self): """SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, count(*) as cnt FROM trips GROUP BY passenger_count, pickup_year - ORDER BY passenger_count;""" + ORDER BY passenger_count;""", + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q3_res(res) @@ -1263,13 +1351,14 @@ def test_taxi_over_csv_explicit_instance(self): count(*) AS cnt FROM trips GROUP BY passenger_count, pickup_year, distance - ORDER BY pickup_year, cnt desc;""" + ORDER BY pickup_year, cnt desc;""", + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q4_res(res) hdk.drop_table("trips") - def test_taxi_over_csv_explicit_aliases(self): + def test_taxi_over_csv_explicit_aliases(self, exe_cfg): # Initialize HDK components hidden from users hdk = pyhdk.init() @@ -1285,6 +1374,7 @@ def test_taxi_over_csv_explicit_aliases(self): res = hdk.sql( "SELECT cab_type, count(*) as cnt FROM trips GROUP BY cab_type;", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q1_res(res) @@ -1295,6 +1385,7 @@ def test_taxi_over_csv_explicit_aliases(self): GROUP BY passenger_count ORDER BY passenger_count;""", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q2_res(res) @@ -1305,6 +1396,7 @@ def test_taxi_over_csv_explicit_aliases(self): GROUP BY passenger_count, pickup_year ORDER BY passenger_count;""", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q3_res(res) @@ -1319,13 +1411,14 @@ def test_taxi_over_csv_explicit_aliases(self): GROUP BY passenger_count, pickup_year, distance ORDER BY pickup_year, cnt desc;""", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q4_res(res) hdk.drop_table(trips) @pytest.mark.skip(reason="unimplemented concept") - def test_taxi_over_csv_multistep(self): + def test_taxi_over_csv_multistep(self, exe_cfg): # Initialize HDK components hidden from users hdk = pyhdk.init() @@ -1339,6 +1432,7 @@ def test_taxi_over_csv_multistep(self): """SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year FROM trips""", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) res = hdk.sql( """SELECT passenger_count, pickup_year, count(*) as cnt @@ -1346,6 +1440,7 @@ def test_taxi_over_csv_multistep(self): GROUP BY passenger_count, pickup_year ORDER BY passenger_count;""", trips=tmp, + query_opts={"device_type": exe_cfg.device_type}, ) self.check_taxi_q3_res(res) @@ -1357,23 +1452,29 @@ def test_taxi_over_csv_multistep(self): cast(trip_distance as int) AS distance FROM trips;""", trips=trips, + query_opts={"device_type": exe_cfg.device_type}, ) tmp = hdk.sql( """SELECT passenger_count, pickup_year, distance, count(*) AS cnt FROM trips GROUP BY passenger_count, pickup_year, distance;""", trips=tmp, + query_opts={"device_type": exe_cfg.device_type}, + ) + res = hdk.sql( + "SELET * FROM trips ORDER BY pickup_year, cnt desc;", + trips=tmp, + query_opts={"device_type": exe_cfg.device_type}, ) - res = hdk.sql("SELET * FROM trips ORDER BY pickup_year, cnt desc;", trips=tmp) self.check_taxi_q4_res(res) hdk.drop_table(trips) class TestTaxiIR(BaseTaxiTest): - def test_taxi_over_csv_modular(self): + def test_taxi_over_csv_modular(self, exe_cfg): # Initialize HDK components - config = pyhdk.buildConfig() + config = pyhdk.buildConfig(enable_gpu_code_compilation_cache=False) storage = pyhdk.storage.ArrowStorage(1, config) data_mgr = pyhdk.storage.DataMgr(config) data_mgr.registerDataProvider(storage) @@ -1390,7 +1491,7 @@ def test_taxi_over_csv_modular(self): rel_alg_executor = pyhdk.sql.RelAlgExecutor( executor, storage, data_mgr, dag=dag ) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q1_res(res) # Run Taxi Q2 IR query @@ -1404,7 +1505,7 @@ def test_taxi_over_csv_modular(self): rel_alg_executor = pyhdk.sql.RelAlgExecutor( executor, storage, data_mgr, dag=dag ) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q2_res(res) # Run Taxi Q3 IR query @@ -1422,7 +1523,7 @@ def test_taxi_over_csv_modular(self): rel_alg_executor = pyhdk.sql.RelAlgExecutor( executor, storage, data_mgr, dag=dag ) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q3_res(res) # Run Taxi Q4 IR query @@ -1441,12 +1542,12 @@ def test_taxi_over_csv_modular(self): rel_alg_executor = pyhdk.sql.RelAlgExecutor( executor, storage, data_mgr, dag=dag ) - res = rel_alg_executor.execute() + res = rel_alg_executor.execute(device_type=exe_cfg.device_type) self.check_taxi_q4_res(res) storage.dropTable("trips") - def test_taxi_over_csv_explicit_instance(self): + def test_taxi_over_csv_explicit_instance(self, exe_cfg): # Initialize HDK components wrapped into a single object hdk = pyhdk.init() @@ -1456,7 +1557,11 @@ def test_taxi_over_csv_explicit_instance(self): ) # Run Taxi Q1 IR query - res = hdk.scan("trips").agg("cab_type", "count").run() + res = ( + hdk.scan("trips") + .agg("cab_type", "count") + .run(device_type=exe_cfg.device_type) + ) self.check_taxi_q1_res(res) # Run Taxi Q2 IR query @@ -1464,7 +1569,7 @@ def test_taxi_over_csv_explicit_instance(self): hdk.scan("trips") .agg("passenger_count", "avg(total_amount)") .sort("passenger_count") - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q2_res(res) @@ -1476,7 +1581,7 @@ def test_taxi_over_csv_explicit_instance(self): ) .agg(["passenger_count", "pickup_year"], "count") .sort("passenger_count") - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q3_res(res) @@ -1490,13 +1595,13 @@ def test_taxi_over_csv_explicit_instance(self): ) .agg(["passenger_count", "pickup_year", "distance"], "count") .sort(("pickup_year", "asc"), ("count", "desc")) - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q4_res(res) hdk.drop_table(trips) - def test_taxi_over_csv_implicit_scan(self): + def test_taxi_over_csv_implicit_scan(self, exe_cfg): # Initialize HDK components hidden from users hdk = pyhdk.init() @@ -1508,14 +1613,14 @@ def test_taxi_over_csv_implicit_scan(self): ) # Run Taxi Q1 IR query - res = trips.agg("cab_type", "count").run() + res = trips.agg("cab_type", "count").run(device_type=exe_cfg.device_type) self.check_taxi_q1_res(res) # Run Taxi Q2 IR query res = ( trips.agg("passenger_count", "avg(total_amount)") .sort("passenger_count") - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q2_res(res) @@ -1526,7 +1631,7 @@ def test_taxi_over_csv_implicit_scan(self): ) .agg(["passenger_count", "pickup_year"], "count") .sort("passenger_count") - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q3_res(res) @@ -1539,14 +1644,14 @@ def test_taxi_over_csv_implicit_scan(self): ) .agg([0, 1, 2], "count") .sort(("pickup_year", "asc"), ("count", "desc")) - .run() + .run(device_type=exe_cfg.device_type) ) self.check_taxi_q4_res(res) hdk.drop_table(trips) @pytest.mark.skip(reason="unimplemented concept") - def test_run_query_on_results(self): + def test_run_query_on_results(self, exe_cfg): # Initialize HDK components hidden from users hdk = pyhdk.init() @@ -1556,10 +1661,12 @@ def test_run_query_on_results(self): ) # Run a part of Taxi Q2 IR query - res = trips.agg("passenger_count", "avg(total_amount)").run() + res = trips.agg("passenger_count", "avg(total_amount)").run( + device_type=exe_cfg.device_type + ) # Now sort it to get the final result # Can we make it without transforming to Arrow with the following import to ArrowStorage? - res = res.sort("passenger_count").run() + res = res.sort("passenger_count").run(device_type=exe_cfg.device_type) self.check_taxi_q2_res(res) hdk.drop_table(trips)