Skip to content

Commit

Permalink
#176: add decimal format cast in hive (#256)
Browse files Browse the repository at this point in the history
* #176: add decimal format cast in hive
  • Loading branch information
AnastasiiaSergienko authored Sep 3, 2019
1 parent 8fee28c commit 811feae
Show file tree
Hide file tree
Showing 13 changed files with 227 additions and 85 deletions.
11 changes: 11 additions & 0 deletions doc/dialects/hive.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,14 @@ See also:

The dialect was tested with the Cloudera Hive JDBC driver available on the [Cloudera downloads page](http://www.cloudera.com/downloads). The driver is also available directly from [Simba technologies](http://www.simba.com/), who developed the driver.
We tested with the JDBC 4.1.

## Type Mappings

-`DECIMAL with precision > 36` is casted to `VARCHAR` to prevent a loss of precision.

If you want to return a DECIMAL type you can set the property HIVE_CAST_NUMBER_TO_DECIMAL_WITH_PRECISION_AND_SCALE:

`HIVE_CAST_NUMBER_TO_DECIMAL_WITH_PRECISION_AND_SCALE='36,20'`

This will cast DECIMAL with precision > 36, DECIMAL without precision to DECIMAL(36,20).
Keep in mind that this will yield errors if the data in the Hive database does not fit into the specified DECIMAL type.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import java.sql.SQLException;
import java.util.*;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.regex.*;

import com.exasol.ExaMetadata;
import com.exasol.adapter.AdapterException;
Expand Down Expand Up @@ -266,4 +266,19 @@ protected void checkImportPropertyConsistency(final String importFromProperty, f
}
}
}

protected void validateCastNumberToDecimalProperty(final String castNumberToDecimalProperty)
throws PropertyValidationException {
if (this.properties.containsKey(castNumberToDecimalProperty)) {
final Pattern pattern = Pattern.compile("\\s*(\\d+)\\s*,\\s*(\\d+)\\s*");
final String precisionAndScale = this.properties.get(castNumberToDecimalProperty);
final Matcher matcher = pattern.matcher(precisionAndScale);
if (!matcher.matches()) {
throw new PropertyValidationException("Unable to parse adapter property " + castNumberToDecimalProperty
+ " value \"" + precisionAndScale
+ " into a number's precision and scale. The required format is \"<precision>.<scale>\", where "
+ "both are integer numbers.");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.exasol.adapter.dialects.hive;

import com.exasol.adapter.*;
import com.exasol.adapter.dialects.*;
import com.exasol.adapter.jdbc.*;
import com.exasol.adapter.metadata.*;

import java.sql.*;

import static com.exasol.adapter.dialects.hive.HiveProperties.HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY;

/**
* This class implements Hive-specific reading of column metadata.
*/
public class HiveColumnMetadataReader extends BaseColumnMetadataReader {

/**
* Create a new instance of the {@link HiveColumnMetadataReader}.
*
* @param connection connection to the remote data source
* @param properties user-defined adapter properties
* @param identifierConverter converter between source and Exasol identifiers
*/
public HiveColumnMetadataReader(final Connection connection, final AdapterProperties properties,
final IdentifierConverter identifierConverter) {
super(connection, properties, identifierConverter);
}

@Override
public DataType mapJdbcType(final JdbcTypeDescription jdbcTypeDescription) {
if (jdbcTypeDescription.getJdbcType() == Types.DECIMAL) {
return mapDecimal(jdbcTypeDescription);
} else {
return super.mapJdbcType(jdbcTypeDescription);
}
}

protected DataType mapDecimal(final JdbcTypeDescription jdbcTypeDescription) {
final int jdbcPrecision = jdbcTypeDescription.getPrecisionOrSize();
final int scale = jdbcTypeDescription.getDecimalScale();
if (jdbcPrecision <= DataType.MAX_EXASOL_DECIMAL_PRECISION) {
return DataType.createDecimal(jdbcPrecision, scale);
} else if (this.properties.containsKey(HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY)) {
return getNumberTypeFromProperty(HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
} else {
return DataType.createMaximumSizeVarChar(DataType.ExaCharset.UTF8);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ protected IdentifierConverter createIdentifierConverter() {

@Override
protected ColumnMetadataReader createColumnMetadataReader() {
return new BaseColumnMetadataReader(this.connection, this.properties, this.identifierConverter);
return new HiveColumnMetadataReader(this.connection, this.properties, this.identifierConverter);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.exasol.adapter.dialects.hive;

/**
* This class contains HIVE-specific adapter properties.
*/
public final class HiveProperties {
public static final String HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY = "HIVE_CAST_NUMBER_TO_DECIMAL_WITH_PRECISION_AND_SCALE";

private HiveProperties() {
// prevent instantiation
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import static com.exasol.adapter.capabilities.MainCapability.*;
import static com.exasol.adapter.capabilities.PredicateCapability.*;
import static com.exasol.adapter.capabilities.ScalarFunctionCapability.*;
import static com.exasol.adapter.dialects.hive.HiveProperties.HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY;
import static com.exasol.adapter.dialects.oracle.OracleProperties.*;

import java.sql.Connection;
import java.util.*;
Expand All @@ -27,7 +29,7 @@ public class HiveSqlDialect extends AbstractSqlDialect {
private static final List<String> SUPPORTED_PROPERTIES = Arrays.asList(SQL_DIALECT_PROPERTY,
CONNECTION_NAME_PROPERTY, CONNECTION_STRING_PROPERTY, USERNAME_PROPERTY, PASSWORD_PROPERTY,
CATALOG_NAME_PROPERTY, SCHEMA_NAME_PROPERTY, TABLE_FILTER_PROPERTY, EXCLUDED_CAPABILITIES_PROPERTY,
DEBUG_ADDRESS_PROPERTY, LOG_LEVEL_PROPERTY);
DEBUG_ADDRESS_PROPERTY, LOG_LEVEL_PROPERTY, HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY);

private static Capabilities createCapabilityList() {
return Capabilities.builder()
Expand Down Expand Up @@ -130,6 +132,12 @@ protected RemoteMetadataReader createRemoteMetadataReader() {
return new HiveMetadataReader(this.connection, this.properties);
}

@Override
public void validateProperties() throws PropertyValidationException {
super.validateProperties();
validateCastNumberToDecimalProperty(HIVE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
}

@Override
protected QueryRewriter createQueryRewriter() {
return new BaseQueryRewriter(this, this.remoteMetadataReader, this.connection);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import java.sql.Connection;
import java.sql.Types;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.exasol.adapter.AdapterProperties;
import com.exasol.adapter.BinaryColumnHandling;
Expand Down Expand Up @@ -86,28 +84,13 @@ private DataType workAroundNumberWithoutScaleAndPrecision() {

private DataType getOracleNumberTargetType() {
if (this.properties.containsKey(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY)) {
return getOracleNumberTypeFromProperty();
return getNumberTypeFromProperty(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
} else {
return DataType.createMaximumSizeVarChar(DataType.ExaCharset.UTF8);
}
}

private DataType getOracleNumberTypeFromProperty() {
final Pattern pattern = Pattern.compile("\\s*(\\d+)\\s*,\\s*(\\d+)\\s*");
final String oraclePrecisionAndScale = this.properties.get(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
final Matcher matcher = pattern.matcher(oraclePrecisionAndScale);
if (matcher.matches()) {
final int precision = Integer.parseInt(matcher.group(1));
final int scale = Integer.parseInt(matcher.group(2));
return DataType.createDecimal(precision, scale);
} else {
throw new IllegalArgumentException("Unable to parse adapter property "
+ ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY + " value \"" + oraclePrecisionAndScale
+ " into a number precision and scale. The required format is \"<precision>.<scale>\", where both are integer numbers.");
}
}

public DataType mapBlobType() {
private DataType mapBlobType() {
if (this.properties.getBinaryColumnHandling() == BinaryColumnHandling.IGNORE) {
return DataType.createUnsupported();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import java.sql.Connection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.exasol.adapter.AdapterProperties;
Expand Down Expand Up @@ -168,25 +166,11 @@ public void validateProperties() throws PropertyValidationException {
super.validateProperties();
checkImportPropertyConsistency(ORACLE_IMPORT_PROPERTY, ORACLE_CONNECTION_NAME_PROPERTY);
validateBooleanProperty(ORACLE_IMPORT_PROPERTY);
validateCastNumberToDecimalProperty();
validateCastNumberToDecimalProperty(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
}

@Override
public List<String> getSupportedProperties() {
return SUPPORTED_PROPERTIES;
}

private void validateCastNumberToDecimalProperty() throws PropertyValidationException {
if (this.properties.containsKey(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY)) {
final Pattern pattern = Pattern.compile("\\s*(\\d+)\\s*,\\s*(\\d+)\\s*");
final String oraclePrecisionAndScale = this.properties.get(ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY);
final Matcher matcher = pattern.matcher(oraclePrecisionAndScale);
if (!matcher.matches()) {
throw new PropertyValidationException("Unable to parse adapter property "
+ ORACLE_CAST_NUMBER_TO_DECIMAL_PROPERTY + " value \"" + oraclePrecisionAndScale
+ " into a number precison and scale. The required format is \"<precsion>.<scale>\", where "
+ "both are integer numbers.");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.*;

import com.exasol.adapter.AdapterProperties;
import com.exasol.adapter.dialects.IdentifierConverter;
Expand Down Expand Up @@ -336,4 +337,19 @@ protected DataType mapJdbcTypeNumericToDecimalWithFallbackToDouble(final JdbcTyp
return DataType.createDouble();
}
}

protected DataType getNumberTypeFromProperty(final String property) {
final Pattern pattern = Pattern.compile("\\s*(\\d+)\\s*,\\s*(\\d+)\\s*");
final String precisionAndScale = this.properties.get(property);
final Matcher matcher = pattern.matcher(precisionAndScale);
if (matcher.matches()) {
final int precision = Integer.parseInt(matcher.group(1));
final int scale = Integer.parseInt(matcher.group(2));
return DataType.createDecimal(precision, scale);
} else {
throw new IllegalArgumentException("Unable to parse adapter property " + property + " value \""
+ precisionAndScale
+ " into a number precision and scale. The required format is \"<precision>.<scale>\", where both are integer numbers.");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.exasol.adapter.dialects.hive;

import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.MatcherAssert.*;

import java.sql.*;

import org.junit.jupiter.api.*;

import com.exasol.adapter.*;
import com.exasol.adapter.dialects.*;
import com.exasol.adapter.metadata.*;

class HiveColumnMetadataReaderTest {
private HiveColumnMetadataReader columnMetadataReader;

@BeforeEach
void beforeEach() {
this.columnMetadataReader = new HiveColumnMetadataReader(null, AdapterProperties.emptyProperties(),
BaseIdentifierConverter.createDefault());
}

@Test
void mapDecimalReturnDecimal() {
final JdbcTypeDescription typeDescription = new JdbcTypeDescription(Types.DECIMAL, 0,
DataType.MAX_EXASOL_DECIMAL_PRECISION, 10, "DECIMAL");
assertThat(columnMetadataReader.mapJdbcType(typeDescription), equalTo(DataType.createDecimal(36, 0)));
}

@Test
void mapDecimalReturnVarchar() {
final JdbcTypeDescription typeDescription = new JdbcTypeDescription(Types.DECIMAL, 0,
DataType.MAX_EXASOL_DECIMAL_PRECISION + 1, 10, "DECIMAL");
assertThat(columnMetadataReader.mapJdbcType(typeDescription),
equalTo(DataType.createMaximumSizeVarChar(DataType.ExaCharset.UTF8)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.FileNotFoundException;
import java.math.BigDecimal;
import java.math.*;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;

import org.junit.Assume;
import com.exasol.adapter.dialects.oracle.*;
import org.junit.*;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
Expand All @@ -26,6 +27,7 @@
@ExtendWith(IntegrationTestConfigurationCondition.class)
public class HiveSqlDialectIT extends AbstractIntegrationTest {
private static final String VIRTUAL_SCHEMA = "VS_HIVE";
private static final String VIRTUAL_SCHEMA_JDBC_NUMBER_TO_DECIMAL = "VS_HIVE_JDBC_NUMBER_TO_DECIMAL";
private static final String HIVE_SCHEMA = "default";
private static final boolean IS_LOCAL = false;
private static final String HIVE_CONNECTION = "HIVE_CONNECTION";
Expand All @@ -34,13 +36,17 @@ public class HiveSqlDialectIT extends AbstractIntegrationTest {
static void beforeAll() throws FileNotFoundException, SQLException, ClassNotFoundException {
Assume.assumeTrue(getConfig().hiveTestsRequested());
setConnection(connectToExa());

createTestSchema();

createHiveJDBCAdapter();
createHiveConnection();
createVirtualSchema(VIRTUAL_SCHEMA, HiveSqlDialect.NAME, "", HIVE_SCHEMA, HIVE_CONNECTION, "", "",
"ADAPTER.JDBC_ADAPTER", "", IS_LOCAL, getConfig().debugAddress(), "", null, "");
// create JDBC virtual schema with special DECIMAL handling
createVirtualSchema(VIRTUAL_SCHEMA_JDBC_NUMBER_TO_DECIMAL, HiveSqlDialect.NAME, "", HIVE_SCHEMA,
HIVE_CONNECTION, "", "",
// "ADAPTER.JDBC_ORACLE_DEBUG",
"ADAPTER.JDBC_ADAPTER", "", IS_LOCAL, getConfig().debugAddress(), "",
"hive_cast_number_to_decimal_with_precision_and_scale='36,2'", "");
}

private static void createTestSchema() throws SQLException, ClassNotFoundException, FileNotFoundException {
Expand All @@ -59,6 +65,12 @@ private static void createTestSchema() throws SQLException, ClassNotFoundExcepti
stmt.execute("truncate table t2");
stmt.execute("insert into t2 values (2,'bbb'), (3,'ccc')");

stmt.execute(
"create table decimal_cast(decimal_col1 decimal(12, 6), decimal_col2 decimal(36, 16), decimal_col3 decimal(38, 17))");
stmt.execute("truncate table decimal_cast");
stmt.execute(
"insert into decimal_cast values (123456.12345671, 123456789.011111111111111, 1234444444444444444.5555555555555555555555550)");

stmt.execute(
"CREATE TABLE ALL_HIVE_DATA_TYPES(ARRAYCOL ARRAY<string>, BIGINTEGER BIGINT, BOOLCOLUMN BOOLEAN, CHARCOLUMN CHAR(1), DECIMALCOL DECIMAL(10,0), DOUBLECOL DOUBLE, FLOATCOL FLOAT, INTCOL INT, MAPCOL MAP<string,int>, SMALLINTEGER SMALLINT, STRINGCOL STRING, STRUCTCOL struct<a : int, b : int>, TIMESTAMPCOL TIMESTAMP, TINYINTEGER TINYINT, VARCHARCOL VARCHAR(10), BINARYCOL BINARY, DATECOL DATE)");
stmt.execute("truncate table ALL_HIVE_DATA_TYPES");
Expand Down Expand Up @@ -352,4 +364,23 @@ private static void createHiveJDBCAdapter() throws SQLException, FileNotFoundExc
private static void createHiveConnection() throws SQLException, FileNotFoundException {
createConnection(HIVE_CONNECTION, getConfig().getHiveDockerJdbcConnectionString(), "", "");
}

@Test
void testNumberBeyondExasolPrecisionMaxValueToDecimalColumnTypes() throws SQLException {
final ResultSet resultSet = executeQuery(
"SELECT COLUMN_NAME, COLUMN_TYPE FROM EXA_DBA_COLUMNS WHERE COLUMN_SCHEMA = '"
+ VIRTUAL_SCHEMA_JDBC_NUMBER_TO_DECIMAL
+ "' AND COLUMN_TABLE='DECIMAL_CAST' ORDER BY COLUMN_ORDINAL_POSITION");
assertNextRow(resultSet, "DECIMAL_COL1", "DECIMAL(12,6)");
assertNextRow(resultSet, "DECIMAL_COL2", "DECIMAL(36,16)");
assertNextRow(resultSet, "DECIMAL_COL3", "DECIMAL(36,2)");
}

@Test
void testNumberBeyondExasolPrecisionMaxValueToDecimal() throws SQLException {
final String query = "SELECT * FROM " + VIRTUAL_SCHEMA_JDBC_NUMBER_TO_DECIMAL + ".decimal_cast";
final ResultSet resultSet = executeQuery(query);
assertNextRow(resultSet, new BigDecimal("123456.123457"), new BigDecimal("123456789.0111111111111110"),
new BigDecimal("1234444444444444444.55"));
}
}
Loading

0 comments on commit 811feae

Please sign in to comment.