Skip to content

Commit

Permalink
ogr2ogr: fix 'ogr2ogr out.parquet in.gpkg/fgb/parquet -t_srs {srs_def…
Browse files Browse the repository at this point in the history
…}' optimized code path (3.10.0 regression)

Since 3.10.0, reprojecting from GPKG/FlatGeoBuf/Parquet uses the Arrow
optimized code path. But the way we dealt with a temporary array was
incorrect when writing to Parquet/Arrow ...

Workaround: add --config OGR2OGR_USE_ARROW_API=NO
  • Loading branch information
rouault committed Nov 17, 2024
1 parent 9b5cc40 commit e69c6df
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
48 changes: 35 additions & 13 deletions apps/ogr2ogr_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5935,10 +5935,42 @@ bool LayerTranslator::TranslateArrow(
const auto nArrayLength = array.length;

// Coordinate reprojection
const void *backupGeomArrayBuffers2 = nullptr;
if (m_bTransform)
{
struct GeomArrayReleaser
{
const void *origin_buffers_2 = nullptr;
void (*origin_release)(struct ArrowArray *) = nullptr;
void *origin_private_data = nullptr;

static void init(struct ArrowArray *psGeomArray)
{
GeomArrayReleaser *releaser = new GeomArrayReleaser();
CPLAssert(psGeomArray->n_buffers >= 3);
releaser->origin_buffers_2 = psGeomArray->buffers[2];
releaser->origin_private_data = psGeomArray->private_data;
releaser->origin_release = psGeomArray->release;
psGeomArray->release = GeomArrayReleaser::release;
psGeomArray->private_data = releaser;
}

static void release(struct ArrowArray *psGeomArray)
{
GeomArrayReleaser *releaser =
static_cast<GeomArrayReleaser *>(
psGeomArray->private_data);
psGeomArray->buffers[2] = releaser->origin_buffers_2;
psGeomArray->private_data = releaser->origin_private_data;
psGeomArray->release = releaser->origin_release;
if (psGeomArray->release)
psGeomArray->release(psGeomArray);
delete releaser;
}
};

auto *psGeomArray = array.children[iArrowGeomFieldIndex];
GeomArrayReleaser::init(psGeomArray);

GByte *pabyWKB = static_cast<GByte *>(
const_cast<void *>(psGeomArray->buffers[2]));
const uint32_t *panOffsets =
Expand All @@ -5958,7 +5990,6 @@ bool LayerTranslator::TranslateArrow(
break;
}
memcpy(abyModifiedWKB.data(), pabyWKB, panOffsets[nArrayLength]);
backupGeomArrayBuffers2 = psGeomArray->buffers[2];
psGeomArray->buffers[2] = abyModifiedWKB.data();

std::atomic<bool> atomicRet{true};
Expand Down Expand Up @@ -6030,7 +6061,6 @@ bool LayerTranslator::TranslateArrow(
bRet = atomicRet;
if (!bRet)
{
psGeomArray->buffers[2] = backupGeomArrayBuffers2;
if (array.release)
array.release(&array);
break;
Expand All @@ -6041,24 +6071,16 @@ bool LayerTranslator::TranslateArrow(
const bool bWriteOK = psInfo->m_poDstLayer->WriteArrowBatch(
&schema, &array, aosOptionsWriteArrowBatch.List());

if (backupGeomArrayBuffers2)
{
auto *psGeomArray = array.children[iArrowGeomFieldIndex];
psGeomArray->buffers[2] = backupGeomArrayBuffers2;
}
if (array.release)
array.release(&array);

if (!bWriteOK)
{
CPLError(CE_Failure, CPLE_AppDefined, "WriteArrowBatch() failed");
if (array.release)
array.release(&array);
bRet = false;
break;
}

if (array.release)
array.release(&array);

/* Report progress */
if (pfnProgress)
{
Expand Down
18 changes: 18 additions & 0 deletions autotest/ogr/ogr_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4150,3 +4150,21 @@ def test_ogr_parquet_IsArrowSchemaSupported_arrow_15_types(
success, error_msg = dst_lyr.IsArrowSchemaSupported(schema)
assert not success
assert error_msg == expected_error_msg


###############################################################################


def test_ogr_parquet_ogr2ogr_reprojection(tmp_vsimem):

outfilename = str(tmp_vsimem / "test.parquet")
gdal.VectorTranslate(
outfilename,
"data/parquet/poly.parquet",
srcSRS="EPSG:32632",
dstSRS="EPSG:4326",
)
with ogr.Open(outfilename) as ds:
assert ds.GetLayer(0).GetExtent() == pytest.approx(
(8.73380363499761, 8.774681944824946, 43.01833481785084, 43.04292637071279)
)

0 comments on commit e69c6df

Please sign in to comment.