From 5d109d11f5f94e88ef1f9ad9885838539c4086a2 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 17:04:12 +0000 Subject: [PATCH 01/17] Coerce Array inner types --- .../expr-common/src/type_coercion/binary.rs | 52 +++++++++++++++---- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index c32b4951db44..7a4a6c0a8b98 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1138,27 +1138,44 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { + Arc::new(Arc::unwrap_or_clone(Arc::clone($LHS_TYPE)).with_data_type( + comparison_coercion($LHS_TYPE.data_type(), $RHS_TYPE.data_type())?, + )) + }; +} + /// Coercion rules for list types. fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - (List(_), List(_)) => Some(lhs_type.clone()), - (LargeList(_), List(_)) => Some(lhs_type.clone()), - (List(_), LargeList(_)) => Some(rhs_type.clone()), - (LargeList(_), LargeList(_)) => Some(lhs_type.clone()), - (List(_), FixedSizeList(_, _)) => Some(lhs_type.clone()), - (FixedSizeList(_, _), List(_)) => Some(rhs_type.clone()), + (List(lhs_field), List(rhs_field)) + | (List(lhs_field), FixedSizeList(rhs_field, _)) + | (FixedSizeList(lhs_field, _), List(rhs_field)) => { + Some(List(coerce_list_children!(lhs_field, rhs_field))) + } + + (LargeList(lhs_field), List(rhs_field)) + | (List(lhs_field), LargeList(rhs_field)) + | (LargeList(lhs_field), LargeList(rhs_field)) + | (LargeList(lhs_field), FixedSizeList(rhs_field, _)) + | (FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { + Some(LargeList(coerce_list_children!(lhs_field, rhs_field))) + } + // Coerce to the left side FixedSizeList type if the list lengths are the same, // otherwise coerce to list with the left type for dynamic length - (FixedSizeList(lf, ls), FixedSizeList(_, rs)) => { + (FixedSizeList(lhs_field, ls), FixedSizeList(rhs_field, rs)) => { if ls == rs { - Some(lhs_type.clone()) + Some(FixedSizeList( + coerce_list_children!(lhs_field, rhs_field), + *rs, + )) } else { - Some(List(Arc::clone(lf))) + Some(List(Arc::clone(lhs_field))) } } - (LargeList(_), FixedSizeList(_, _)) => Some(lhs_type.clone()), - (FixedSizeList(_, _), LargeList(_)) => Some(rhs_type.clone()), _ => None, } } @@ -2105,6 +2122,19 @@ mod tests { DataType::List(Arc::clone(&inner_field)) ); + // Negative test: inner_timestamp_field and inner_field are not compatible because their inner types are not compatible + let inner_timestamp_field = Arc::new(Field::new( + "item", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + )); + let result_type = get_input_types( + &DataType::List(Arc::clone(&inner_field)), + &Operator::Eq, + &DataType::List(Arc::clone(&inner_timestamp_field)), + ); + assert!(result_type.is_err()); + // TODO add other data type Ok(()) } From 9ddeda1daf5a1b0f25a58ae49d0ca8731cd40761 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 17:41:34 +0000 Subject: [PATCH 02/17] =?UTF-8?q?`coerce=5Flist=5Fchildren`=20macro=20?= =?UTF-8?q?=E2=86=92=20fn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../expr-common/src/type_coercion/binary.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 7a4a6c0a8b98..0f94a083d793 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1138,12 +1138,13 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { - Arc::new(Arc::unwrap_or_clone(Arc::clone($LHS_TYPE)).with_data_type( - comparison_coercion($LHS_TYPE.data_type(), $RHS_TYPE.data_type())?, - )) - }; +fn coerce_list_children(lhs_field: &FieldRef, rhs_field: &FieldRef) -> Option { + Some(Arc::new( + Arc::unwrap_or_clone(Arc::clone(lhs_field)).with_data_type(comparison_coercion( + lhs_field.data_type(), + rhs_field.data_type(), + )?), + )) } /// Coercion rules for list types. @@ -1153,7 +1154,7 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { (List(lhs_field), List(rhs_field)) | (List(lhs_field), FixedSizeList(rhs_field, _)) | (FixedSizeList(lhs_field, _), List(rhs_field)) => { - Some(List(coerce_list_children!(lhs_field, rhs_field))) + Some(List(coerce_list_children(lhs_field, rhs_field)?)) } (LargeList(lhs_field), List(rhs_field)) @@ -1161,7 +1162,7 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { | (LargeList(lhs_field), LargeList(rhs_field)) | (LargeList(lhs_field), FixedSizeList(rhs_field, _)) | (FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { - Some(LargeList(coerce_list_children!(lhs_field, rhs_field))) + Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) } // Coerce to the left side FixedSizeList type if the list lengths are the same, @@ -1169,7 +1170,7 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { (FixedSizeList(lhs_field, ls), FixedSizeList(rhs_field, rs)) => { if ls == rs { Some(FixedSizeList( - coerce_list_children!(lhs_field, rhs_field), + coerce_list_children(lhs_field, rhs_field)?, *rs, )) } else { From f353e3f71b07d980df05d81f5716f39d78014525 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 17:47:56 +0000 Subject: [PATCH 03/17] Rearrange & simplify patterns --- .../expr-common/src/type_coercion/binary.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 0f94a083d793..a559ced1cf42 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1151,18 +1151,17 @@ fn coerce_list_children(lhs_field: &FieldRef, rhs_field: &FieldRef) -> Option Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - (List(lhs_field), List(rhs_field)) - | (List(lhs_field), FixedSizeList(rhs_field, _)) - | (FixedSizeList(lhs_field, _), List(rhs_field)) => { - Some(List(coerce_list_children(lhs_field, rhs_field)?)) + ( + LargeList(lhs_field), + List(rhs_field) | LargeList(rhs_field) | FixedSizeList(rhs_field, _), + ) + | (FixedSizeList(lhs_field, _) | List(lhs_field), LargeList(rhs_field)) => { + Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) } - (LargeList(lhs_field), List(rhs_field)) - | (List(lhs_field), LargeList(rhs_field)) - | (LargeList(lhs_field), LargeList(rhs_field)) - | (LargeList(lhs_field), FixedSizeList(rhs_field, _)) - | (FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { - Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) + (List(lhs_field), List(rhs_field) | FixedSizeList(rhs_field, _)) + | (FixedSizeList(lhs_field, _), List(rhs_field)) => { + Some(List(coerce_list_children(lhs_field, rhs_field)?)) } // Coerce to the left side FixedSizeList type if the list lengths are the same, From 22414e3cb1413cbc3a2b8c7dd2b86aa1fc7c87d7 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 17:48:51 +0000 Subject: [PATCH 04/17] Add casting for `FixedSizeList` --- datafusion/expr-common/src/type_coercion/binary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index a559ced1cf42..fb0bf7089680 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1173,7 +1173,7 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { *rs, )) } else { - Some(List(Arc::clone(lhs_field))) + Some(List(coerce_list_children(lhs_field, rhs_field)?)) } } _ => None, From 3c9a7689c1d6ca8b7d04496b0c7e86c66b4154d5 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 17:54:12 +0000 Subject: [PATCH 05/17] Add sql logic test --- datafusion/sqllogictest/test_files/union.slt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index fb7afdda2ea8..ae6d74db10a3 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -761,3 +761,11 @@ SELECT NULL WHERE FALSE; ---- 0.5 1 + +# Test Union of List Types. Issue: https://github.com/apache/datafusion/issues/12291 +query error +SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; +---- +DataFusion error: type_coercion +caused by +Error during planning: Incompatible inputs for Union: Previous inputs were of type List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), but got incompatible type List(Field { name: "item", data_type: Timestamp(Nanosecond, Some("+00:00")), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) on column 'x' From 6e0c56b1fdf4d09d88642c72a1b7d866614c7fc8 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 18:06:56 +0000 Subject: [PATCH 06/17] Fix test --- datafusion/sqllogictest/test_files/union.slt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index ae6d74db10a3..7c33e9377103 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -763,9 +763,5 @@ SELECT NULL WHERE FALSE; 1 # Test Union of List Types. Issue: https://github.com/apache/datafusion/issues/12291 -query error -SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; ----- -DataFusion error: type_coercion -caused by -Error during planning: Incompatible inputs for Union: Previous inputs were of type List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), but got incompatible type List(Field { name: "item", data_type: Timestamp(Nanosecond, Some("+00:00")), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) on column 'x' +query error DataFusion error: type_coercion +SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; \ No newline at end of file From d169abcedf979602cbd689a332a576ffcc2ad4bf Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 16 Nov 2024 18:44:00 +0000 Subject: [PATCH 07/17] Expand error --- datafusion/sqllogictest/test_files/union.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 7c33e9377103..3e7dcf0f2d53 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -763,5 +763,5 @@ SELECT NULL WHERE FALSE; 1 # Test Union of List Types. Issue: https://github.com/apache/datafusion/issues/12291 -query error DataFusion error: type_coercion +query error DataFusion error: type_coercion\ncaused by\nError during planning: Incompatible inputs for Union: Previous inputs were of type List(.*), but got incompatible type List(.*) on column 'x' SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; \ No newline at end of file From baf99cbeb08d9c30cb5cebb36de5878b5f57a878 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sun, 17 Nov 2024 14:02:15 +0000 Subject: [PATCH 08/17] Switch to `Arc::clone` --- datafusion/expr-common/src/type_coercion/binary.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index fb0bf7089680..5a375ede8a8d 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1139,12 +1139,9 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { - Some(Arc::new( - Arc::unwrap_or_clone(Arc::clone(lhs_field)).with_data_type(comparison_coercion( - lhs_field.data_type(), - rhs_field.data_type(), - )?), - )) + Some(Arc::new((**lhs_field).clone().with_data_type( + comparison_coercion(lhs_field.data_type(), rhs_field.data_type())?, + ))) } /// Coercion rules for list types. From 639f2361a8d8b519bdb97375abd40ca254a67176 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sun, 17 Nov 2024 20:02:19 +0000 Subject: [PATCH 09/17] OR nullable-s --- .../expr-common/src/type_coercion/binary.rs | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 5a375ede8a8d..2bae7b205826 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1139,9 +1139,15 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { - Some(Arc::new((**lhs_field).clone().with_data_type( - comparison_coercion(lhs_field.data_type(), rhs_field.data_type())?, - ))) + Some(Arc::new( + (**lhs_field) + .clone() + .with_data_type(comparison_coercion( + lhs_field.data_type(), + rhs_field.data_type(), + )?) + .with_nullable(lhs_field.is_nullable() || rhs_field.is_nullable()), + )) } /// Coercion rules for list types. @@ -1152,7 +1158,7 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { LargeList(lhs_field), List(rhs_field) | LargeList(rhs_field) | FixedSizeList(rhs_field, _), ) - | (FixedSizeList(lhs_field, _) | List(lhs_field), LargeList(rhs_field)) => { + | (List(lhs_field) | FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) } @@ -2136,6 +2142,19 @@ mod tests { Ok(()) } + #[test] + fn test_list_coercion() { + let lhs_type = DataType::List(Arc::new(Field::new("lhs", DataType::Int8, false))); + + let rhs_type = DataType::List(Arc::new(Field::new("rhs", DataType::Int64, true))); + + let coerced_type = list_coercion(&lhs_type, &rhs_type).unwrap(); + assert_eq!( + coerced_type, + DataType::List(Arc::new(Field::new("lhs", DataType::Int64, true))) + ); // nullable because the RHS is nullable + } + #[test] fn test_type_coercion_logical_op() -> Result<()> { test_coercion_binary_rule!( From 2691f8089a54ac31385a3df6104dcde147f2bc0b Mon Sep 17 00:00:00 2001 From: blaginin Date: Sun, 17 Nov 2024 20:07:45 +0000 Subject: [PATCH 10/17] Add successful test --- datafusion/sqllogictest/test_files/union.slt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 3e7dcf0f2d53..9a8ab221d09c 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -763,5 +763,11 @@ SELECT NULL WHERE FALSE; 1 # Test Union of List Types. Issue: https://github.com/apache/datafusion/issues/12291 -query error DataFusion error: type_coercion\ncaused by\nError during planning: Incompatible inputs for Union: Previous inputs were of type List(.*), but got incompatible type List(.*) on column 'x' -SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; \ No newline at end of file +query error DataFusion error: type_coercion\ncaused by\nError during planning: Incompatible inputs for Union: Previous inputs were of type List(.*), but got incompatible type List(.*) on column 'x' +SELECT make_array(2) x UNION ALL SELECT make_array(now()) x; + +query ? +select make_array(arrow_cast(2, 'UInt8')) x UNION ALL SELECT make_array(arrow_cast(-2, 'Int8')) x; +---- +[-2] +[2] \ No newline at end of file From 1181abc6fa327ad0d7c3fee9130aba66c0d79a5b Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 19 Nov 2024 16:35:30 +0000 Subject: [PATCH 11/17] Switch to `type_union_resolution` --- datafusion/expr-common/src/type_coercion/binary.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 2bae7b205826..3bbc374a5a71 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1139,13 +1139,11 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { + let data_types = vec![lhs_field.data_type().clone(), rhs_field.data_type().clone()]; Some(Arc::new( (**lhs_field) .clone() - .with_data_type(comparison_coercion( - lhs_field.data_type(), - rhs_field.data_type(), - )?) + .with_data_type(type_union_resolution(&data_types)?) .with_nullable(lhs_field.is_nullable() || rhs_field.is_nullable()), )) } From 306e7fa1de4b787e4455d70bf516893bcc0ab275 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 19 Nov 2024 16:42:01 +0000 Subject: [PATCH 12/17] Add a note on `coerce_list_children` --- datafusion/expr-common/src/type_coercion/binary.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 3bbc374a5a71..b42483f1ba02 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1138,6 +1138,7 @@ fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { let data_types = vec![lhs_field.data_type().clone(), rhs_field.data_type().clone()]; Some(Arc::new( From 18ee2136884b279a42aa516f057c7a8d3fd53f84 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 19 Nov 2024 18:21:16 +0000 Subject: [PATCH 13/17] Handle different list types inside `type_union_resolution_coercion` --- datafusion/expr-common/src/type_coercion/binary.rs | 6 +----- datafusion/sqllogictest/test_files/union.slt | 8 +++++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index b42483f1ba02..1623f92eed86 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -480,11 +480,6 @@ fn type_union_resolution_coercion( let new_value_type = type_union_resolution_coercion(value_type, other_type); new_value_type.map(|t| DataType::Dictionary(index_type.clone(), Box::new(t))) } - (DataType::List(lhs), DataType::List(rhs)) => { - let new_item_type = - type_union_resolution_coercion(lhs.data_type(), rhs.data_type()); - new_item_type.map(|t| DataType::List(Arc::new(Field::new("item", t, true)))) - } (DataType::Struct(lhs), DataType::Struct(rhs)) => { if lhs.len() != rhs.len() { return None; @@ -529,6 +524,7 @@ fn type_union_resolution_coercion( // Numeric coercion is the same as comparison coercion, both find the narrowest type // that can accommodate both types binary_numeric_coercion(lhs_type, rhs_type) + .or_else(|| list_coercion(lhs_type, rhs_type)) .or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type)) .or_else(|| string_coercion(lhs_type, rhs_type)) .or_else(|| numeric_string_coercion(lhs_type, rhs_type)) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 9a8ab221d09c..b5e82f613a46 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -770,4 +770,10 @@ query ? select make_array(arrow_cast(2, 'UInt8')) x UNION ALL SELECT make_array(arrow_cast(-2, 'Int8')) x; ---- [-2] -[2] \ No newline at end of file +[2] + +query ? +select make_array(make_array(1)) x UNION ALL SELECT make_array(arrow_cast(make_array(-1), 'LargeList(Int8)')) x; +---- +[[-1]] +[[1]] From cb72340c83735c1050ee26dc8ea460b2ff5450f2 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 19 Nov 2024 18:53:58 +0000 Subject: [PATCH 14/17] Add a `make_array` test with different array types --- datafusion/sqllogictest/test_files/array.slt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 1e60699a1f65..763caa88e7cb 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1155,6 +1155,10 @@ select column1, column5 from arrays_values_without_nulls; [21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7] [31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9] +# make array with arrays of different types +query ? +select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) + query ??? select make_array(column1), make_array(column1, column5), From 91b468d0f337a73c65021c50ce889890cde9bccf Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 19 Nov 2024 19:19:08 +0000 Subject: [PATCH 15/17] Add the test value --- datafusion/functions-nested/src/make_array.rs | 3 +-- datafusion/sqllogictest/test_files/array.slt | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index de67b0ae3874..c84b6f010968 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -26,7 +26,7 @@ use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait, }; use arrow_buffer::OffsetBuffer; -use arrow_schema::DataType::{LargeList, List, Null}; +use arrow_schema::DataType::{List, Null}; use arrow_schema::{DataType, Field}; use datafusion_common::{plan_err, utils::array_into_list_array_nullable, Result}; use datafusion_expr::binary::{ @@ -198,7 +198,6 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result { let array = new_null_array(&DataType::Int64, length); Ok(Arc::new(array_into_list_array_nullable(array))) } - LargeList(..) => array_array::(arrays, data_type), _ => array_array::(arrays, data_type), } } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 763caa88e7cb..a25ef40bbca6 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1158,6 +1158,8 @@ select column1, column5 from arrays_values_without_nulls; # make array with arrays of different types query ? select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) +---- +[[1], [-1]] query ??? select make_array(column1), From a3052be9ea134209797277f38667ff7120440d62 Mon Sep 17 00:00:00 2001 From: Dmitrii Blaginin Date: Wed, 20 Nov 2024 17:38:24 +0000 Subject: [PATCH 16/17] Update datafusion/sqllogictest/test_files/array.slt Co-authored-by: Jay Zhan --- datafusion/sqllogictest/test_files/array.slt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index a25ef40bbca6..e6676d683f91 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1161,6 +1161,12 @@ select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')) ---- [[1], [-1]] +query T +select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))); +---- +List(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) + + query ??? select make_array(column1), make_array(column1, column5), From 1692229da03788aae2675320b49b90e0959cbe49 Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 20 Nov 2024 17:42:54 +0000 Subject: [PATCH 17/17] Reorder match --- .../expr-common/src/type_coercion/binary.rs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 1623f92eed86..bff74252df7b 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1149,19 +1149,6 @@ fn coerce_list_children(lhs_field: &FieldRef, rhs_field: &FieldRef) -> Option Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - ( - LargeList(lhs_field), - List(rhs_field) | LargeList(rhs_field) | FixedSizeList(rhs_field, _), - ) - | (List(lhs_field) | FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { - Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) - } - - (List(lhs_field), List(rhs_field) | FixedSizeList(rhs_field, _)) - | (FixedSizeList(lhs_field, _), List(rhs_field)) => { - Some(List(coerce_list_children(lhs_field, rhs_field)?)) - } - // Coerce to the left side FixedSizeList type if the list lengths are the same, // otherwise coerce to list with the left type for dynamic length (FixedSizeList(lhs_field, ls), FixedSizeList(rhs_field, rs)) => { @@ -1174,6 +1161,19 @@ fn list_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { Some(List(coerce_list_children(lhs_field, rhs_field)?)) } } + // LargeList on any side + ( + LargeList(lhs_field), + List(rhs_field) | LargeList(rhs_field) | FixedSizeList(rhs_field, _), + ) + | (List(lhs_field) | FixedSizeList(lhs_field, _), LargeList(rhs_field)) => { + Some(LargeList(coerce_list_children(lhs_field, rhs_field)?)) + } + // Lists on both sides + (List(lhs_field), List(rhs_field) | FixedSizeList(rhs_field, _)) + | (FixedSizeList(lhs_field, _), List(rhs_field)) => { + Some(List(coerce_list_children(lhs_field, rhs_field)?)) + } _ => None, } }