-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added function
RST_AsFormat()
to change raster format / driver in-situ
- Loading branch information
Showing
14 changed files
with
377 additions
and
4 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
...ala/com/databricks/labs/mosaic/core/raster/operator/RasterTranslate/TranslateFormat.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package com.databricks.labs.mosaic.core.raster.operator.RasterTranslate | ||
|
||
import com.databricks.labs.mosaic.core.raster.api.GDAL | ||
import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL | ||
import com.databricks.labs.mosaic.core.raster.operator.gdal.GDALTranslate | ||
import com.databricks.labs.mosaic.utils.PathUtils | ||
|
||
object TranslateFormat { | ||
|
||
/** | ||
* Converts the data type of a raster's bands | ||
* | ||
* @param raster | ||
* The raster to update. | ||
* @param newFormat | ||
* The new format of the raster. | ||
* @return | ||
* A MosaicRasterGDAL object. | ||
*/ | ||
def update( | ||
raster: MosaicRasterGDAL, | ||
newFormat: String | ||
): MosaicRasterGDAL = { | ||
|
||
val outOptions = raster.getWriteOptions.copy(format = newFormat, extension = GDAL.getExtension(newFormat)) | ||
val resultFileName = PathUtils.createTmpFilePath(outOptions.extension) | ||
|
||
val result = GDALTranslate.executeTranslate( | ||
resultFileName, | ||
raster, | ||
command = s"gdal_translate", | ||
outOptions | ||
) | ||
|
||
result | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_AsFormat.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package com.databricks.labs.mosaic.expressions.raster | ||
|
||
import com.databricks.labs.mosaic.core.raster.api.GDAL | ||
import com.databricks.labs.mosaic.core.raster.operator.RasterTranslate.TranslateFormat | ||
import com.databricks.labs.mosaic.core.types.RasterTileType | ||
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile | ||
import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} | ||
import com.databricks.labs.mosaic.expressions.raster.base.Raster1ArgExpression | ||
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig | ||
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder | ||
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} | ||
import org.apache.spark.sql.types.DataType | ||
import org.apache.spark.unsafe.types.UTF8String | ||
|
||
case class RST_AsFormat ( | ||
tileExpr: Expression, | ||
newFormat: Expression, | ||
expressionConfig: MosaicExpressionConfig | ||
) extends Raster1ArgExpression[RST_AsFormat]( | ||
tileExpr, | ||
newFormat, | ||
returnsRaster = true, | ||
expressionConfig | ||
) | ||
with NullIntolerant | ||
with CodegenFallback { | ||
|
||
override def dataType: DataType = { | ||
GDAL.enable(expressionConfig) | ||
RasterTileType(expressionConfig.getCellIdType, tileExpr, expressionConfig.isRasterUseCheckpoint) | ||
} | ||
|
||
/** Changes the data type of a band of the raster. */ | ||
override def rasterTransform(tile: MosaicRasterTile, arg1: Any): Any = { | ||
|
||
val newFormat = arg1.asInstanceOf[UTF8String].toString | ||
if (tile.getRaster.driverShortName.getOrElse("") == newFormat) { | ||
return tile | ||
} | ||
val result = TranslateFormat.update(tile.getRaster, newFormat) | ||
tile.copy(raster = result).setDriver(newFormat) | ||
} | ||
|
||
} | ||
|
||
/** Expression info required for the expression registration for spark SQL. */ | ||
object RST_AsFormat extends WithExpressionInfo { | ||
|
||
override def name: String = "rst_asformat" | ||
|
||
override def usage: String = "_FUNC_(expr1) - Returns a raster tile in a different underlying format" | ||
|
||
override def example: String = | ||
""" | ||
| Examples: | ||
| > SELECT _FUNC_(tile, 'GTiff') | ||
| {index_id, updated_raster, parentPath, driver} | ||
| """.stripMargin | ||
|
||
override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { | ||
GenericExpressionFactory.getBaseBuilder[RST_AsFormat](2, expressionConfig) | ||
} | ||
|
||
} |
51 changes: 51 additions & 0 deletions
51
src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Format.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package com.databricks.labs.mosaic.expressions.raster | ||
|
||
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile | ||
import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} | ||
import com.databricks.labs.mosaic.expressions.raster.base.RasterExpression | ||
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig | ||
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder | ||
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} | ||
import org.apache.spark.sql.types.{DataType, StringType} | ||
import org.apache.spark.unsafe.types.UTF8String | ||
|
||
case class RST_Format ( | ||
tileExpr: Expression, | ||
expressionConfig: MosaicExpressionConfig | ||
) extends RasterExpression[RST_Format]( | ||
tileExpr, | ||
returnsRaster = false, | ||
expressionConfig | ||
) | ||
with NullIntolerant | ||
with CodegenFallback { | ||
|
||
override def dataType: DataType = StringType | ||
|
||
/** Returns the format of the raster. */ | ||
override def rasterTransform(tile: MosaicRasterTile): Any = { | ||
UTF8String.fromString(tile.getDriver) | ||
} | ||
|
||
} | ||
|
||
/** Expression info required for the expression registration for spark SQL. */ | ||
object RST_Format extends WithExpressionInfo { | ||
|
||
override def name: String = "rst_format" | ||
|
||
override def usage: String = "_FUNC_(expr1) - Returns the driver used to read the raster" | ||
|
||
override def example: String = | ||
""" | ||
| Examples: | ||
| > SELECT _FUNC_(tile) | ||
| 'GTiff' | ||
| """.stripMargin | ||
|
||
override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { | ||
GenericExpressionFactory.getBaseBuilder[RST_Format](1, expressionConfig) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
54 changes: 54 additions & 0 deletions
54
src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_AsFormatBehaviours.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package com.databricks.labs.mosaic.expressions.raster | ||
|
||
import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI | ||
import com.databricks.labs.mosaic.core.index.IndexSystem | ||
import com.databricks.labs.mosaic.functions.MosaicContext | ||
import com.databricks.labs.mosaic.test.mocks.filePath | ||
import com.databricks.labs.mosaic.{MOSAIC_RASTER_READ_IN_MEMORY, MOSAIC_RASTER_READ_STRATEGY} | ||
import org.apache.spark.sql.QueryTest | ||
import org.apache.spark.sql.functions.lit | ||
import org.scalatest.matchers.should.Matchers._ | ||
|
||
trait RST_AsFormatBehaviours extends QueryTest { | ||
|
||
// noinspection MapGetGet | ||
def behavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { | ||
spark.sparkContext.setLogLevel("ERROR") | ||
val mc = MosaicContext.build(indexSystem, geometryAPI) | ||
mc.register() | ||
val sc = spark | ||
import mc.functions._ | ||
import sc.implicits._ | ||
|
||
val subDataset = "t2m" | ||
|
||
val rastersInMemory = spark.read | ||
.format("gdal") | ||
.option(MOSAIC_RASTER_READ_STRATEGY, MOSAIC_RASTER_READ_IN_MEMORY) | ||
.load(filePath("/binary/netcdf-ECMWF/")) | ||
.withColumn("tile", rst_getsubdataset($"tile", lit(subDataset))) | ||
|
||
val newFormat = "GTiff" | ||
|
||
val df = rastersInMemory | ||
.withColumn("updated_tile", rst_asformat($"tile", lit(newFormat))) | ||
.select(rst_format($"updated_tile").as("new_type")) | ||
|
||
rastersInMemory | ||
.createOrReplaceTempView("source") | ||
|
||
noException should be thrownBy spark.sql(s""" | ||
|select rst_asformat(tile, '$newFormat') from source | ||
|""".stripMargin) | ||
|
||
noException should be thrownBy rastersInMemory | ||
.withColumn("tile", rst_updatetype($"tile", lit(newFormat))) | ||
.select("tile") | ||
|
||
val result = df.first.getString(0) | ||
|
||
result shouldBe newFormat | ||
|
||
} | ||
|
||
} |
32 changes: 32 additions & 0 deletions
32
src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_AsFormatTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package com.databricks.labs.mosaic.expressions.raster | ||
|
||
import com.databricks.labs.mosaic.core.geometry.api.JTS | ||
import com.databricks.labs.mosaic.core.index.H3IndexSystem | ||
import org.apache.spark.sql.QueryTest | ||
import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode | ||
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.test.SharedSparkSessionGDAL | ||
|
||
import scala.util.Try | ||
|
||
class RST_AsFormatTest extends QueryTest with SharedSparkSessionGDAL with RST_AsFormatBehaviours { | ||
|
||
private val noCodegen = | ||
withSQLConf( | ||
SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", | ||
SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString | ||
) _ | ||
|
||
// Hotfix for SharedSparkSession afterAll cleanup. | ||
override def afterAll(): Unit = Try(super.afterAll()) | ||
|
||
// These tests are not index system nor geometry API specific. | ||
// Only testing one pairing is sufficient. | ||
test("Testing RST_UpdateFormat with manual GDAL registration (H3, JTS).") { | ||
noCodegen { | ||
assume(System.getProperty("os.name") == "Linux") | ||
behavior(H3IndexSystem, JTS) | ||
} | ||
} | ||
} | ||
|
Oops, something went wrong.