Skip to content

Commit

Permalink
Add spatial presence matrices (#221)
Browse files Browse the repository at this point in the history
This adds dataframes for creating a join table between the scenes and observations/variables. New methods are added to the `ExperimentAxisQuery` class for getting a Arrow array of scene names that relate to the obs or var in the query.

---------

Co-authored-by: Aaron Wolen <[email protected]>
  • Loading branch information
jp-dark and aaronwolen authored Sep 26, 2024
1 parent b53cd2b commit 73ccc06
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
11 changes: 11 additions & 0 deletions python-spec/src/somacore/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ class Experiment(
spatial = _mixin.item[_SceneColl]() # TODO: Discuss the name of this element.
"""A collection of named spatial scenes."""

obs_spatial_presence = _mixin.item[_DF]()
"""A dataframe that stores the presence of obs in the spatial scenes.
This provides a join table for the obs ``soma_joinid`` and the scene names used in
the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid``
and ``scene_id``. The ``scene_id`` column must have type ``string``. The
dataframe must contain a ``boolean`` column ``soma_data``. The values of ``soma_data`` are
``True`` if the obs ``soma_joinid`` is contained in the scene
``scene_id`` and ``False`` otherwise.
"""

def axis_query(
self,
measurement_name: str,
Expand Down
11 changes: 11 additions & 0 deletions python-spec/src/somacore/measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,14 @@ class Measurement(
This is indexed by ``[varid_1, varid_2]``.
"""

var_spatial_presence = _mixin.item[_DF]()
"""A dataframe that stores the presence of var in the spatial scenes.
This provides a join table for the var ``soma_joinid`` and the scene names used in
the ``spatial`` collection. This dataframe must contain index columns ``soma_joinid``
and ``scene_id``. The ``scene_id`` column must have type ``string``. The
dataframe must contain a ``boolean`` column ``data``. The values of ``data`` are
``True`` if the var with varid ``soma_joinid`` is contained in scene with name
``scene_id`` and ``False`` otherwise.
"""
46 changes: 46 additions & 0 deletions python-spec/src/somacore/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy.typing as npt
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pacomp
from scipy import sparse
from typing_extensions import Literal, Protocol, Self, TypedDict

Expand Down Expand Up @@ -267,6 +268,48 @@ def varm(self, layer: str) -> data.SparseRead:
"""
return self._axism_inner(_Axis.VAR, layer)

def obs_scene_ids(self) -> pa.Array:
"""Returns a pyarrow array with scene ids that contain obs from this
query.
Lifecycle: experimental
"""
try:
obs_scene = self.experiment.obs_spatial_presence
except KeyError as ke:
raise KeyError("Missing obs_scene") from ke
if not isinstance(obs_scene, data.DataFrame):
raise TypeError("obs_scene must be a dataframe.")

full_table = obs_scene.read(
coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))),
result_order=options.ResultOrder.COLUMN_MAJOR,
value_filter="data != 0",
).concat()

return pacomp.unique(full_table["scene_id"])

def var_scene_ids(self) -> pa.Array:
"""Return a pyarrow array with scene ids that contain var from this
query.
Lifecycle: experimental
"""
try:
var_scene = self._ms.var_spatial_presence
except KeyError as ke:
raise KeyError("Missing var_scene") from ke
if not isinstance(var_scene, data.DataFrame):
raise TypeError("var_scene must be a dataframe.")

full_table = var_scene.read(
coords=((_Axis.OBS.getattr_from(self._joinids), slice(None))),
result_order=options.ResultOrder.COLUMN_MAJOR,
value_filter="data != 0",
).concat()

return pacomp.unique(full_table["scene_id"])

def to_anndata(
self,
X_name: str,
Expand Down Expand Up @@ -826,6 +869,9 @@ def obs(self) -> data.DataFrame: ...
@property
def context(self) -> Optional[base_types.ContextBase]: ...

@property
def obs_spatial_presence(self) -> data.DataFrame: ...


class _HasObsVar(Protocol[_T_co]):
"""Something which has an ``obs`` and ``var`` field.
Expand Down

0 comments on commit 73ccc06

Please sign in to comment.