From 6a993b7e6016574fe2f6cb93ac8889a4777d40e1 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 19 Feb 2024 03:05:39 -0800 Subject: [PATCH] Lots of changes regarding single-cell data Rename CellTypeLabelling to CellTypeAssignment and perform some related renaming. Add basic VOs for cell type assignment, protocol, etc. Add an explicit dependency on mtj since we use it for parsing MatrixMarket formats. --- gemma-core/pom.xml | 8 + .../matrix/BulkExpressionDataMatrix.java | 50 +++++- .../DoubleSingleCellExpressionDataMatrix.java | 135 +++++++++++++++++ .../matrix/EmptyExpressionMatrix.java | 9 +- .../matrix/ExpressionDataBooleanMatrix.java | 20 +-- .../matrix/ExpressionDataDoubleMatrix.java | 20 +-- .../matrix/ExpressionDataIntegerMatrix.java | 13 +- .../matrix/ExpressionDataMatrix.java | 89 ++++------- .../ExpressionDataMatrixColumnSort.java | 6 +- .../matrix/ExpressionDataStringMatrix.java | 13 +- .../SingleCellExpressionDataMatrix.java | 9 ++ .../singleCell/MexSingleCellDataLoader.java | 28 ++-- .../singleCell/SingleCellDataLoader.java | 15 +- .../java/ubic/gemma/core/util/ListUtils.java | 49 +++++- .../model/analysis/AnalysisValueObject.java | 13 ++ .../CellTypeAssignmentValueObject.java | 50 ++++++ .../model/analysis/ProtocolValueObject.java | 26 ++++ .../gemma/model/common/protocol/Protocol.java | 19 ++- .../quantitationtype/QuantitationType.java | 88 ++--------- .../bioAssayData/CellTypeAssignment.java | 75 +++++++++ .../bioAssayData/CellTypeLabelling.java | 57 ------- .../bioAssayData/SingleCellDimension.java | 28 ++-- .../SingleCellDimensionValueObject.java | 63 ++++++++ .../SingleCellExpressionDataVector.java | 3 +- .../designElement/CompositeSequence.java | 8 + .../ExpressionExperimentValueObject.java | 16 ++ .../experiment/ExpressionExperimentDao.java | 32 ++-- .../ExpressionExperimentDaoImpl.java | 54 +++++-- ...SingleCellExpressionExperimentService.java | 21 ++- ...leCellExpressionExperimentServiceImpl.java | 63 ++++---- .../persistence/util/ByteArrayUtils.java | 26 ++++ .../gemma/model/analysis/Analysis.hbm.xml | 20 +-- .../model/analysis/Investigation.hbm.xml | 2 +- .../bioAssayData/SingleCellDimension.hbm.xml | 4 +- ...exSingleCellDataLoaderPersistenceTest.java | 104 +++++++++++++ .../MexSingleCellDataLoaderTest.java | 50 ++---- .../expression/singleCell/MexTestUtils.java | 40 +++++ .../ExpressionExperimentDaoTest.java | 63 +++++++- ...leCellExpressionExperimentServiceTest.java | 142 ++++-------------- 39 files changed, 1008 insertions(+), 523 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java create mode 100644 gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java create mode 100644 gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java create mode 100644 gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java delete mode 100644 gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java create mode 100644 gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java create mode 100644 gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java create mode 100644 gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java create mode 100644 gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index 6d2caaa3ae..2a7f5d4d56 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -316,6 +316,14 @@ 4.2.2.GA + + + + com.googlecode.matrix-toolkits-java + mtj + 1.0.4 + + diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java index b3fd1e6350..1a655dd7e0 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java @@ -49,6 +49,18 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ Collection getQuantitationTypes(); + /** + * @return a {@link BioAssayDimension} that covers all the biomaterials in this matrix. + * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials + * used in the experiment. + */ + BioAssayDimension getBestBioAssayDimension(); + + /** + * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing. + */ + boolean hasMissingValues(); + /** * Access a single value of the matrix. Note that because there can be multiple bioassays per column and multiple * designelements per row, it is possible for this method to retrieve a data that does not come from the bioassay @@ -69,6 +81,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ T[][] get( List designElements, List bioAssays ); + /** + * Access the entire matrix. + * + * @return T[][] + */ + T[][] getRawMatrix(); + /** * Access a single column of the matrix. * @@ -85,6 +104,21 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ T[][] getColumns( List bioAssays ); + + /** + * @return list of elements representing the row 'labels'. + */ + List getRowElements(); + + /** + * Number of columns that use the given design element. Useful if the matrix includes data from more than one array + * design. + * + * @param el el + * @return int + */ + int columns( CompositeSequence el ); + /** * @param index i * @return BioMaterial. Note that if this represents a subsetted data set, the BioMaterial may be a lightweight @@ -98,13 +132,6 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ int getColumnIndex( BioMaterial bioMaterial ); - /** - * @return The bioassaydimension that covers all the biomaterials in this matrix. - * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials - * used in the experiment. - */ - BioAssayDimension getBestBioAssayDimension(); - /** * Produce a BioAssayDimension representing the matrix columns for a specific row. The designelement argument is * needed because a matrix can combine data from multiple array designs, each of which will generate its own @@ -122,4 +149,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { * used in the study. */ Collection getBioAssaysForColumn( int index ); + + /** + * Set a value in the matrix, by index + * + * @param row row + * @param column col + * @param value val + */ + void set( int row, int column, T value ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java new file mode 100644 index 0000000000..1beae14df6 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java @@ -0,0 +1,135 @@ +package ubic.gemma.core.datastructure.matrix; + +import no.uib.cipr.matrix.sparse.CompRowMatrix; +import org.springframework.util.Assert; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.util.ByteArrayUtils; + +import java.util.*; + +/** + * @author poirigui + */ +public class DoubleSingleCellExpressionDataMatrix implements SingleCellExpressionDataMatrix { + + private static final Comparator designElementComparator = Comparator.comparing( CompositeSequence::getName ) + .thenComparing( CompositeSequence::getId ); + + private final ExpressionExperiment expressionExperiment; + private final QuantitationType quantitationType; + private final SingleCellDimension singleCellDimension; + private final CompRowMatrix matrix; + private final List designElements; + + public DoubleSingleCellExpressionDataMatrix( Collection vectors ) { + Assert.isTrue( !vectors.isEmpty(), "At least one vector must be supplied. Use EmptyExpressionDataMatrix for empty data matrices instead." ); + Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getQuantitationType ).distinct().count() == 1, + "All vectors must have the same quantitation type." ); + Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getSingleCellDimension ).distinct().count() == 1, + "All vectors must have the same single-cell dimension." ); + SingleCellExpressionDataVector vector = vectors.iterator().next(); + expressionExperiment = vector.getExpressionExperiment(); + quantitationType = vector.getQuantitationType(); + singleCellDimension = vector.getSingleCellDimension(); + // sort vectors by CS + List sortedVectors = new ArrayList<>( vectors ); + sortedVectors.sort( Comparator.comparing( SingleCellExpressionDataVector::getDesignElement, designElementComparator ) ); + int rows = sortedVectors.size(); + int i = 0; + int[][] nz = new int[rows][]; + for ( SingleCellExpressionDataVector v : sortedVectors ) { + nz[i++] = v.getDataIndices(); + } + matrix = new CompRowMatrix( rows, singleCellDimension.getNumberOfCells(), nz ); + designElements = new ArrayList<>( sortedVectors.size() ); + i = 0; + for ( SingleCellExpressionDataVector v : sortedVectors ) { + designElements.add( v.getDesignElement() ); + double[] row = ByteArrayUtils.byteArrayToDoubles( v.getData() ); + int[] indices = v.getDataIndices(); + for ( int j = 0; j < row.length; j++ ) { + matrix.set( i, indices[j], row[j] ); + } + i++; + } + } + + @Override + public ExpressionExperiment getExpressionExperiment() { + return expressionExperiment; + } + + @Override + public int columns() { + return matrix.numColumns(); + } + + @Override + public Double get( int row, int column ) { + return matrix.get( row, column ); + } + + @Override + public Double[] getColumn( int column ) { + Double[] vec = new Double[matrix.numRows()]; + for ( int j = 0; j < matrix.numRows(); j++ ) { + vec[j] = matrix.get( j, column ); + } + return vec; + } + + @Override + public List getDesignElements() { + return designElements; + } + + @Override + public CompositeSequence getDesignElementForRow( int index ) { + return designElements.get( index ); + } + + @Override + public Double[] getRow( CompositeSequence designElement ) { + int ix = getRowIndex( designElement ); + if ( ix == -1 ) { + return null; + } + return getRow( ix ); + } + + @Override + public Double[] getRow( int index ) { + Double[] vec = new Double[matrix.numColumns()]; + int[] rowptr = matrix.getRowPointers(); + int[] colind = matrix.getColumnIndices(); + double[] data = matrix.getData(); + for ( int i = rowptr[index]; i < rowptr[index + 1]; i++ ) { + vec[colind[i]] = data[i]; + } + return vec; + } + + @Override + public int getRowIndex( CompositeSequence designElement ) { + return Math.max( Collections.binarySearch( designElements, designElement, designElementComparator ), -1 ); + } + + @Override + public int rows() { + return matrix.numRows(); + } + + @Override + public QuantitationType getQuantitationType() { + return quantitationType; + } + + @Override + public SingleCellDimension getSingleCellDimension() { + return singleCellDimension; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java index 24325c7b28..1c6c012809 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java @@ -84,7 +84,7 @@ public Object[] getColumn( BioAssay bioAssay ) { } @Override - public Object[] getColumn( Integer column ) { + public Object[] getColumn( int column ) { throw new UnsupportedOperationException(); } @@ -104,12 +104,7 @@ public Object[] getRow( CompositeSequence designElement ) { } @Override - public Object[] getRow( Integer index ) { - throw new UnsupportedOperationException(); - } - - @Override - public Object[][] getRows( List designElements ) { + public Object[] getRow( int index ) { throw new UnsupportedOperationException(); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java index 17b5e8e762..967785b019 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java @@ -91,7 +91,7 @@ public Boolean[] getColumn( BioAssay bioAssay ) { } @Override - public Boolean[] getColumn( Integer index ) { + public Boolean[] getColumn( int index ) { ObjectMatrix1D rawResult = this.matrix.viewColumn( index ); Boolean[] res = new Boolean[rawResult.size()]; int i = 0; @@ -135,26 +135,10 @@ public Boolean[] getRow( CompositeSequence designElement ) { } @Override - public Boolean[] getRow( Integer index ) { + public Boolean[] getRow( int index ) { return matrix.getRow( index ); } - @Override - public Boolean[][] getRows( List designElements ) { - if ( designElements == null ) { - return null; - } - - Boolean[][] result = new Boolean[designElements.size()][]; - int i = 0; - for ( CompositeSequence element : designElements ) { - Boolean[] rowResult = this.getRow( element ); - result[i] = rowResult; - i++; - } - return result; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java index 2f7615a815..8762372430 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java @@ -314,7 +314,7 @@ public Double[] getColumn( BioAssay bioAssay ) { } @Override - public Double[] getColumn( Integer index ) { + public Double[] getColumn( int index ) { double[] rawResult = this.matrix.getColumn( index ); assert rawResult != null; Double[] result = new Double[rawResult.length]; @@ -350,27 +350,11 @@ public Double[] getRow( CompositeSequence designElement ) { } @Override - public Double[] getRow( Integer index ) { + public Double[] getRow( int index ) { double[] rawRow = matrix.getRow( index ); return ArrayUtils.toObject( rawRow ); } - @Override - public Double[][] getRows( List designElements ) { - if ( designElements == null ) { - return null; - } - - Double[][] result = new Double[designElements.size()][]; - int i = 0; - for ( CompositeSequence element : designElements ) { - Double[] rowResult = this.getRow( element ); - result[i] = rowResult; - i++; - } - return result; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java index 258ef2fc27..a6dc073b9e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java @@ -70,7 +70,7 @@ public Integer[] getColumn( BioAssay bioAssay ) { } @Override - public Integer[] getColumn( Integer index ) { + public Integer[] getColumn( int index ) { return this.matrix.getColumn( index ); } @@ -98,19 +98,10 @@ public Integer[] getRow( CompositeSequence designElement ) { } @Override - public Integer[] getRow( Integer index ) { + public Integer[] getRow( int index ) { return this.matrix.getRow( index ); } - @Override - public Integer[][] getRows( List designElements ) { - Integer[][] res = new Integer[this.rows()][]; - for ( int i = 0; i < designElements.size(); i++ ) { - res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) ); - } - return res; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java index 3f9715f2f3..17c61bf95b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java @@ -21,13 +21,18 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import javax.annotation.Nullable; import java.util.List; /** * Represents a matrix of data from an {@link ExpressionExperiment}. + *

+ * The rows of this matrix represent design elements. * * @author pavlidis * @author keshav + * @see BulkExpressionDataMatrix + * @see SingleCellExpressionDataMatrix */ public interface ExpressionDataMatrix { @@ -37,103 +42,63 @@ public interface ExpressionDataMatrix { ExpressionExperiment getExpressionExperiment(); /** - * Total number of columns. - * - * @return int + * Obtain all the design elements in this data matrix. */ - int columns(); + List getDesignElements(); /** - * Number of columns that use the given design element. Useful if the matrix includes data from more than one array - * design. + * Return a design element for a given index. * - * @param el el - * @return int + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()} */ - int columns( CompositeSequence el ); + CompositeSequence getDesignElementForRow( int index ); /** - * Access a single value of the matrix. This is generally the easiest way to do it. - * - * @param row row - * @param column col - * @return t + * Obtain the total number of columns. */ - T get( int row, int column ); + int columns(); /** * Access a single column of the matrix. * * @param column index * @return T[] + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #columns()} */ - T[] getColumn( Integer column ); - - /** - * Obtain all the design elements in this data matrix. - */ - List getDesignElements(); + T[] getColumn( int column ); /** - * @param index i - * @return cs + * @return int */ - CompositeSequence getDesignElementForRow( int index ); + int rows(); /** - * Access the entire matrix. + * Access a single row of the matrix, by index. A complete row is returned. * - * @return T[][] + * @param index i + * @return t[] + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()} */ - T[][] getRawMatrix(); + T[] getRow( int index ); /** * Return a row that 'came from' the given design element. * * @param designElement de - * @return t + * @return the corresponding row or null if the design element is not found in the matrix */ + @Nullable T[] getRow( CompositeSequence designElement ); /** - * Access a single row of the matrix, by index. A complete row is returned. - * - * @param index i - * @return t[] + * @return the index for the given design element, or -1 if not found */ - T[] getRow( Integer index ); - - /** - * @return list of elements representing the row 'labels'. - */ - List getRowElements(); - int getRowIndex( CompositeSequence designElement ); /** - * Access a submatrix + * Access a single value of the matrix by row and column. * - * @param designElements de - * @return T[][] - */ - T[][] getRows( List designElements ); - - /** - * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing. + * @throws IndexOutOfBoundsException if either the row or column is outside the matrix bounds */ - boolean hasMissingValues(); - - /** - * @return int - */ - int rows(); - - /** - * Set a value in the matrix, by index - * - * @param row row - * @param column col - * @param value val - */ - void set( int row, int column, T value ); + T get( int row, int column ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java index 2333fad2f5..7c0ff3ecbf 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java @@ -204,7 +204,7 @@ public static DoubleMatrix orderByExperimentalDesign( DoubleMat * @param mat matrix * @return bio materials */ - public static List orderByExperimentalDesign( ExpressionDataMatrix mat ) { + public static List orderByExperimentalDesign( BulkExpressionDataMatrix mat ) { List start = ExpressionDataMatrixColumnSort.getBms( mat ); List ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign( start, null ); @@ -466,7 +466,7 @@ private static LinkedHashMap> chunkOnFactor( Expe /** * Get all biomaterials for a matrix. */ - private static List getBms( ExpressionDataMatrix mat ) { + private static List getBms( BulkExpressionDataMatrix mat ) { List result = new ArrayList<>(); for ( int i = 0; i < mat.columns(); i++ ) { result.add( mat.getBioMaterialForColumn( i ) ); @@ -476,6 +476,7 @@ private static List getBms( ExpressionDataMatrix mat ) { /** * Get all (non-constant) factors used by the passed biomaterials + * * @param bms biomaterials * @return factors relevant to these biomaterials, ignoring those which have constant values. */ @@ -574,6 +575,7 @@ private static List orderByFactor( ExperimentalFactor ef, Map

* Any batch factor is used last (we sort by batch only within the most granular factor's levels) *

+ * * @param start biomaterials to sort * @param factors sorted list of factors to define sort order for biomaterials, cannot be null */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java index 111409dc2e..5bbadb48d9 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java @@ -94,7 +94,7 @@ public String[] getColumn( BioAssay bioAssay ) { } @Override - public String[] getColumn( Integer index ) { + public String[] getColumn( int index ) { return this.matrix.getColumn( index ); } @@ -122,19 +122,10 @@ public String[] getRow( CompositeSequence designElement ) { } @Override - public String[] getRow( Integer index ) { + public String[] getRow( int index ) { return matrix.getRow( index ); } - @Override - public String[][] getRows( List designElements ) { - String[][] res = new String[this.rows()][]; - for ( int i = 0; i < designElements.size(); i++ ) { - res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) ); - } - return res; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java index 2821fa3329..c9118be112 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java @@ -24,4 +24,13 @@ public interface SingleCellExpressionDataMatrix extends ExpressionDataMatrix< * Return the single-cell dimension for this matrix. */ SingleCellDimension getSingleCellDimension(); + + /** + * {@inheritDoc} + *

+ * Important note: Retrieving a column is a {@code O(n log m)} operation where {@code n} is the number of + * vectors and {@code m} is the number of cells. Always favour row-oriented operations when possible. + */ + @Override + T[] getColumn( int column ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java index eb59585463..327385f10e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java @@ -8,9 +8,8 @@ import org.springframework.util.Assert; import ubic.basecode.io.ByteArrayConverter; import ubic.gemma.model.common.quantitationtype.*; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; @@ -25,8 +24,6 @@ import java.util.stream.Stream; import java.util.zip.GZIPInputStream; -import static java.util.function.Function.identity; - /** * Load single cell data from 10X Genomics MEX format. * @@ -59,13 +56,17 @@ public MexSingleCellDataLoader( List sampleNames, List barcodeFile && barcodeFiles.size() == genesFiles.size() && genesFiles.size() == matrixFiles.size(), "There must be exactly the same number of each type of files." ); - this.sampleNames = sampleNames; + this.sampleNames = Collections.unmodifiableList( sampleNames ); this.barcodeFiles = barcodeFiles; this.genesFiles = genesFiles; this.matrixFiles = matrixFiles; this.numberOfSamples = barcodeFiles.size(); } + public List getSampleNames() { + return sampleNames; + } + @Override public SingleCellDimension getSingleCellDimension( Collection bioAssays ) throws IOException { SingleCellDimension scd = new SingleCellDimension(); @@ -106,15 +107,12 @@ public Set getQuantitationTypes() { * MEX does not provide cell type labels. */ @Override - public Optional getCellTypeLabelling() { + public Optional getCellTypeLabelling() { return Optional.empty(); } @Override - public Stream loadVectors( ArrayDesign platform, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException { - Map probeByName = platform.getCompositeSequences().stream() - .collect( Collectors.toMap( CompositeSequence::getName, identity() ) ); - + public Stream loadVectors( Map elementsMapping, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException { // location of a given element in individual matrices Map elementsToSampleMatrixRow = new HashMap<>(); ArrayList matrices = new ArrayList<>( numberOfSamples ); @@ -129,9 +127,9 @@ public Stream loadVectors( ArrayDesign platform, String[] pieces = s.split( "\t", 3 ); String geneId = pieces[0]; String geneSymbol = pieces[1]; - CompositeSequence probe = probeByName.get( geneId ); + CompositeSequence probe = elementsMapping.get( geneId ); if ( probe == null && allowMappingProbeNamesToGeneSymbols ) { - probe = probeByName.get( geneSymbol ); + probe = elementsMapping.get( geneSymbol ); } if ( probe == null ) { missingElements.add( geneId ); @@ -149,11 +147,11 @@ public Stream loadVectors( ArrayDesign platform, } if ( missingElements.size() == elements.size() ) { - throw new IllegalArgumentException( "None of the elements of " + platform + " match genes from " + genesFile + "." ); + throw new IllegalArgumentException( "None of the elements matched genes from " + genesFile + "." ); } else if ( missingElements.size() > 10 ) { - log.warn( String.format( "%s does not have elements for %d/%d genes from %s.", platform, missingElements.size(), elements.size(), genesFile ) ); + log.warn( String.format( "The supplied mapping does not have elements for %d/%d genes from %s.", missingElements.size(), elements.size(), genesFile ) ); } else if ( !missingElements.isEmpty() ) { - log.warn( String.format( "%s does not have elements for the following genes: %s from %s.", platform, + log.warn( String.format( "The supplied mapping does not have elements for the following genes: %s from %s.", missingElements.stream().sorted().collect( Collectors.joining( ", " ) ), genesFile ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java index 61591ecb9d..cc32470874 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java @@ -1,14 +1,15 @@ package ubic.gemma.core.loader.expression.singleCell; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.designElement.CompositeSequence; import java.io.IOException; import java.util.Collection; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Stream; @@ -40,18 +41,18 @@ public interface SingleCellDataLoader { /** * Load single-cell type labelling present in the data. */ - Optional getCellTypeLabelling() throws IOException; + Optional getCellTypeLabelling() throws IOException; /** * Produces a stream of single-cell expression data vectors for the given {@link QuantitationType}. - *

- * Make sure to close the stream when done, preferably using a try-with-resource block. * - * @param platform a platform to use when mapping vectors to probes/genes + * @param elementsMapping a mapping of element names used in the dataset to {@link CompositeSequence} * @param dimension a dimension to use for creating vectors, may be loaded from the single-cell data with * {@link #getSingleCellDimension(Collection)} * @param quantitationType a quantitation type to extract from the data for, may be loaded from the single-cell data * with {@link #getQuantitationTypes()} + * @return a stream of single-cell expression data vectors that must be closed when done, preferably using a + * try-with-resource block. */ - Stream loadVectors( ArrayDesign platform, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException; + Stream loadVectors( Map elementsMapping, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java index 8caf21bf72..27fb81e20f 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java @@ -11,6 +11,7 @@ /** * Utilities and algorithms for {@link List}. + * * @author poirigui */ public class ListUtils { @@ -31,6 +32,7 @@ public static Map indexOfElements( List list ) { /** * Get a case-insensitive mapping of string elements to their first occurrence in a {@link List}. + * * @see #indexOfElements(List) */ public static Map indexOfCaseInsensitiveStringElements( List list ) { @@ -49,25 +51,57 @@ private static void fillMap( Map element2position, List list } } + /** + * Get an element of a sparse array. + * + * @param array + * @param indices + * @param index + * @param defaultValue + * @param + * @return + */ + public static T getSparseArrayElement( T[] array, int[] indices, int numberOfElements, int index, T defaultValue ) { + Assert.isTrue( array.length == indices.length, + String.format( "Invalid size for sparse array, it must contain %d indices.", array.length ) ); + // special case for dense array + if ( indices.length == numberOfElements ) { + return array[index]; + } + if ( index < 0 ) { + // FIXME: add support for negative indexing + throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); + } + if ( index >= numberOfElements ) { + throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); + } + int offset = binarySearch( indices, index ); + if ( offset < 0 ) { + return defaultValue; + } + return array[offset]; + } + /** * Get an element of a sparse range array. + * * @param array collection of elements applying for the ranges * @param offsets starting offsets of the ranges * @param numberOfElements the size of the original array * @param index a position to retrieve - * @throws ArrayIndexOutOfBoundsException if the index is out of bounds - * @throws IllegalArgumentException if the array and offsets do not have the same size + * @throws IndexOutOfBoundsException if the requested index is out of bounds + * @throws IllegalArgumentException if the array is empty or its size differs from offsets * @see #validateSparseRangeArray(List, int[], int) */ - public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) { + public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) throws IllegalArgumentException, IndexOutOfBoundsException { Assert.isTrue( array.size() == offsets.length, - String.format( "Invalid size for offsets array, it must contain %d indices.", array.size() ) ); + String.format( "Invalid size for sparse range array, it must contain %d indices.", array.size() ) ); if ( index < 0 ) { // FIXME: add support for negative indexing - throw new ArrayIndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); + throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); } if ( index >= numberOfElements ) { - throw new ArrayIndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); + throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); } int offset = binarySearch( offsets, index ); if ( offset < 0 ) { @@ -78,12 +112,15 @@ public static T getSparseRangeArrayElement( List array, int[] offsets, in /** * Validate a sparse range array. + * * @param array collection of elements applying for the ranges * @param offsets starting offsets of the ranges * @param numberOfElements the size of the original array * @throws IllegalArgumentException if the sparse range array is invalid */ public static void validateSparseRangeArray( List array, int[] offsets, int numberOfElements ) throws IllegalArgumentException { + Assert.isTrue( numberOfElements == 0 || !array.isEmpty(), + "A non-empty sparse range array must have at least one element." ); Assert.isTrue( array.size() == offsets.length, "There must be as many offsets as entries in the corresponding array." ); int k = 0; diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java index 41dc9bac31..b6de5faf67 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java @@ -4,11 +4,24 @@ public abstract class AnalysisValueObject extends IdentifiableValueObject { + private ProtocolValueObject protocol; + protected AnalysisValueObject() { super(); } protected AnalysisValueObject( T analysis ) { super( analysis ); + if ( analysis.getProtocol() != null ) { + this.protocol = new ProtocolValueObject( analysis.getProtocol() ); + } + } + + public ProtocolValueObject getProtocol() { + return protocol; + } + + public void setProtocol( ProtocolValueObject protocol ) { + this.protocol = protocol; } } diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java new file mode 100644 index 0000000000..6e8e887dd3 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java @@ -0,0 +1,50 @@ +package ubic.gemma.model.analysis; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.apachecommons.CommonsLog; +import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @author poirigui + */ +@Data +@EqualsAndHashCode(callSuper = true) +@CommonsLog +public class CellTypeAssignmentValueObject extends AnalysisValueObject { + + /** + * A list of IDs, one-per-cell, that refers to one of the cell type labels in {@link #cellTypes}. + *

+ * {@code null} is used to indicate an unknown cell type. + */ + private List cellTypeIds; + + /** + * A set of cell types that are assigned to individual cells. + */ + private Set cellTypes; + + public CellTypeAssignmentValueObject( CellTypeAssignment cellTypeAssignment ) { + super( cellTypeAssignment ); + try { + cellTypeIds = Arrays.stream( cellTypeAssignment.getCellTypeIndices() ) + .mapToObj( cellTypeAssignment::getCellType ) + .map( characteristic -> characteristic != null ? characteristic.getId() : null ) + .collect( Collectors.toList() ); + } catch ( IndexOutOfBoundsException e ) { + // this may happen because getCellType() can fail if the data we have is incorrect, but we don't want to + // break the VO serialization which would break the REST API. + log.warn( "Cell type IDs is invalid for " + cellTypeAssignment + "." ); + } + cellTypes = cellTypeAssignment.getCellTypes().stream() + .map( CharacteristicValueObject::new ) + .collect( Collectors.toSet() ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java new file mode 100644 index 0000000000..a821658ef7 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java @@ -0,0 +1,26 @@ +package ubic.gemma.model.analysis; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import ubic.gemma.model.IdentifiableValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.common.protocol.Protocol; + +import java.util.Set; + +@Data +@EqualsAndHashCode(callSuper = true) +public class ProtocolValueObject extends IdentifiableValueObject { + + private String name; + + private String description; + + private Set characteristics; + + public ProtocolValueObject( Protocol protocol ) { + super( protocol ); + this.name = protocol.getName(); + this.description = protocol.getDescription(); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java index 781665b209..12ef59c785 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java @@ -1,8 +1,8 @@ /* * The Gemma project. - * + * * Copyright (c) 2006-2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,13 +20,28 @@ import gemma.gsec.model.Securable; import ubic.gemma.model.common.AbstractDescribable; +import ubic.gemma.model.common.description.Characteristic; import java.io.Serializable; +import java.util.Set; public class Protocol extends AbstractDescribable implements Securable, Serializable { private static final long serialVersionUID = -1902891452989019766L; + /** + * Characteristics describing the protocol. + */ + private Set characteristics; + + public Set getCharacteristics() { + return characteristics; + } + + public void setCharacteristics( Set characteristics ) { + this.characteristics = characteristics; + } + public static final class Factory { public static Protocol newInstance() { diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java index ab1b243ed5..7b130d5b66 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java @@ -214,84 +214,27 @@ public boolean equals( Object object ) { return false; } final QuantitationType that = ( QuantitationType ) object; - if ( that.getId() != null && this.getId() != null ) { - return Objects.equals( that.getId(), this.getId() ); - } - - if ( that.getName() != null && this.getName() != null && !this.getName().equals( that.getName() ) ) { - return false; - } - - if ( this.getScale() != null && that.getScale() != null && !this.getScale().equals( that.getScale() ) ) { - return false; - } - - if ( this.getIsPreferred() != that.getIsPreferred() ) { - return false; - } - - if ( this.getIsRatio() != that.getIsRatio() ) { - return false; - } - - if ( this.getIsNormalized() != that.getIsNormalized() ) { - return false; - } - - if ( this.getIsBackground() != that.getIsBackground() ) { - return false; + return getId().equals( that.getId() ); } - - if ( this.getIsBackgroundSubtracted() != that.getIsBackgroundSubtracted() ) { - return false; - } - - if ( this.getGeneralType() != null && that.getGeneralType() != null && !this.getGeneralType() - .equals( that.getGeneralType() ) ) { - return false; - } - - //noinspection SimplifiableIfStatement // Better readability - if ( this.getRepresentation() != null && that.getRepresentation() != null && !this.getRepresentation() - .equals( that.getRepresentation() ) ) { - return false; - } - - return this.getType() == null || that.getRepresentation() == null || this.getType().equals( that.getType() ); + return Objects.equals( getName(), that.getName() ) + && Objects.equals( scale, that.scale ) + && Objects.equals( isPreferred, that.isPreferred ) + && Objects.equals( isRatio, that.isRatio ) + && Objects.equals( isNormalized, that.isNormalized ) + && Objects.equals( isBackground, that.isBackground ) + && Objects.equals( isBackgroundSubtracted, that.isBackgroundSubtracted ) + && Objects.equals( isBatchCorrected, that.isBatchCorrected ) + && Objects.equals( type, that.type ) + && Objects.equals( generalType, that.generalType ) + && Objects.equals( representation, that.representation ) + && Objects.equals( isRecomputedFromRawData, that.isRecomputedFromRawData ); } @Override public int hashCode() { - int hashCode = 0; - hashCode = 29 * hashCode + ( this.getId() == null ? this.computeHashCode() : this.getId().hashCode() ); - return hashCode; - } - - private int computeHashCode() { - int hashCode = 0; - if ( this.getName() != null ) { - hashCode = hashCode + this.getName().hashCode(); - } - if ( this.getType() != null ) { - hashCode = hashCode + this.getType().hashCode(); - } - if ( this.getRepresentation() != null ) { - hashCode = hashCode + this.getRepresentation().hashCode(); - } - if ( this.getGeneralType() != null ) { - hashCode = hashCode + this.getGeneralType().hashCode(); - } - if ( this.getScale() != null ) { - hashCode = hashCode + this.getScale().hashCode(); - } - hashCode += Boolean.hashCode( this.getIsBackground() ); - hashCode += Boolean.hashCode( this.getIsBackgroundSubtracted() ); - hashCode += Boolean.hashCode( this.getIsNormalized() ); - hashCode += Boolean.hashCode( this.getIsPreferred() ); - hashCode += Boolean.hashCode( this.getIsRatio() ); - - return hashCode; + return Objects.hash( getName(), type, representation, generalType, scale, isBackground, isBackgroundSubtracted, + isNormalized, isPreferred, isBatchCorrected, isRatio, isRecomputedFromRawData ); } @Override @@ -350,6 +293,7 @@ public static QuantitationType newInstance( QuantitationType quantitationType ) result.isBackground = quantitationType.isBackground; result.isBackgroundSubtracted = quantitationType.isBackgroundSubtracted; result.isBatchCorrected = quantitationType.isBatchCorrected; + result.isRecomputedFromRawData = quantitationType.isRecomputedFromRawData; return result; } diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java new file mode 100644 index 0000000000..14e6d9f3f4 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java @@ -0,0 +1,75 @@ +package ubic.gemma.model.expression.bioAssayData; + +import lombok.Getter; +import lombok.Setter; +import ubic.gemma.model.analysis.Analysis; +import ubic.gemma.model.common.description.Characteristic; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +/** + * Represents the labelling of cell types. + */ +@Getter +@Setter +public class CellTypeAssignment extends Analysis { + + /** + * A special indicator for {@link #cellTypeIndices} when the cell type is unknown. + */ + public static final int UNKNOWN_CELL_TYPE = -1; + + /** + * Indicate if this labelling is the preferred one. + */ + private boolean preferred; + + /** + * Cell types assignment to individual cells from the {@link #cellTypes} collections. + *

+ * The value {@code -1} is used to indicate an unknown cell type. + */ + private int[] cellTypeIndices; + + /** + * List of cell types. + */ + private List cellTypes = new ArrayList<>(); + + /** + * Number of cell types. + *

+ * This must always be equal to number of elements of {@link #cellTypes}. + */ + private Integer numberOfCellTypes; + + /** + * Obtain the type assignment of a given cell. + * + * @return the type assignment of a given cell, or null if the type was assigne to {@link #UNKNOWN_CELL_TYPE}. + * @throws IndexOutOfBoundsException if the cell index is out of range or if the value is ousitde the range o + */ + @Nullable + public Characteristic getCellType( int cellIndex ) throws IndexOutOfBoundsException { + int i = cellTypeIndices[cellIndex]; + if ( i == UNKNOWN_CELL_TYPE ) { + return null; + } else { + return cellTypes.get( i ); + } + } + + @Override + public int hashCode() { + return Objects.hash( Arrays.hashCode( cellTypeIndices ), cellTypes ); + } + + @Override + public boolean equals( Object object ) { + return super.equals( object ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java deleted file mode 100644 index b67bf182e0..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java +++ /dev/null @@ -1,57 +0,0 @@ -package ubic.gemma.model.expression.bioAssayData; - -import lombok.Getter; -import lombok.Setter; -import org.springframework.util.Assert; -import ubic.gemma.model.analysis.Analysis; -import ubic.gemma.model.common.description.Characteristic; - -import java.util.Arrays; -import java.util.List; -import java.util.Objects; - -/** - * Represents the labelling of cell types. - */ -@Getter -@Setter -public class CellTypeLabelling extends Analysis { - - /** - * Indicate if this labelling is the preferred one. - */ - private boolean preferred; - - /** - * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections. - */ - private int[] cellTypes; - - /** - * Cell type labels. - */ - private List cellTypeLabels; - - /** - * Number of distinct cell types. - *

- * This must always be equal to number of distinct elements of {@link #cellTypeLabels}. - */ - private Integer numberOfCellTypeLabels; - - public Characteristic getCellTypeLabel( int index ) { - Assert.notNull( cellTypes, "No cell types have been assigned." ); - Assert.notNull( cellTypeLabels, "No cell labels exist." ); - return cellTypeLabels.get( cellTypes[index] ); - } - - @Override - public int hashCode() { - return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels ); - } - - @Override - public boolean equals( Object object ) { - return super.equals( object ); - } -} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java index 5e3dba7915..3c172c0c4c 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java @@ -11,6 +11,12 @@ import static ubic.gemma.core.util.ListUtils.getSparseRangeArrayElement; +/** + * Represents a single-cell dimension, holding shared information for a set of {@link SingleCellExpressionDataVector}. + * + * @author poirigui + * @see SingleCellExpressionDataVector + */ @Getter @Setter public class SingleCellDimension implements Identifiable { @@ -29,28 +35,28 @@ public class SingleCellDimension implements Identifiable { /** * Number of cells. *

- * This should always be equal to the size of {@link #cellIds}. + * This must always be equal to the size of {@link #cellIds}. */ private int numberOfCells = 0; /** * Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should - * always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty. + * always contain a preferred labelling as per {@link CellTypeAssignment#isPreferred()} if non-empty. */ - private Set cellTypeLabellings = new HashSet<>(); + private Set cellTypeAssignments = new HashSet<>(); /** - * List of bioassays that each cell belongs to. + * List of {@link BioAssay}s applicable to the cells. *

- * The {@link BioAssay} {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}. - * To find the bioassay type of a given cell, use {@link #getBioAssay(int)}. + * The {@link BioAssay} in {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}. + * To find the bioassay of a given cell, use {@link #getBioAssay(int)}. */ private List bioAssays = new ArrayList<>(); /** * Offsets of the bioassays. *

- * This always contain {@code bioAssays.size()} elements. + * This must always contain {@code bioAssays.size()} elements. *

* This is stored in the database using {@link ByteArrayType}. */ @@ -60,8 +66,10 @@ public class SingleCellDimension implements Identifiable { * Obtain the {@link BioAssay} for a given cell position. * * @param cellIndex the cell position in {@link #cellIds} + * @throws IllegalArgumentException if the sparse range array is invalid as per {@link ubic.gemma.core.util.ListUtils#getSparseRangeArrayElement(List, int[], int, int)} + * @throws IndexOutOfBoundsException if the index is out of bounds */ - public BioAssay getBioAssay( int cellIndex ) { + public BioAssay getBioAssay( int cellIndex ) throws IndexOutOfBoundsException { return getSparseRangeArrayElement( bioAssays, bioAssaysOffset, cellIds.size(), cellIndex ); } @@ -71,11 +79,13 @@ public BioAssay getBioAssay( int cellIndex ) { * @param sampleIndex the sample position in {@link #bioAssays} */ public List getCellIdsBySample( int sampleIndex ) { - return cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) ); + return Collections.unmodifiableList( cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) ) ); } /** * Obtain the number for cells for the given sample. + *

+ * This is more efficient than looking up the size of {@link #getCellIdsBySample(int)}. * * @param sampleIndex the sample position in {@link #bioAssays} */ diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java new file mode 100644 index 0000000000..d5277ecdac --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java @@ -0,0 +1,63 @@ +package ubic.gemma.model.expression.bioAssayData; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.apachecommons.CommonsLog; +import ubic.gemma.model.IdentifiableValueObject; +import ubic.gemma.model.analysis.CellTypeAssignmentValueObject; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssay.BioAssayValueObject; +import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; + +/** + * Value object for a single-cell dimension. + *

+ * {@link BioAssay}s are unpacked into a list of IDs. This is suitable because this object is displayed in the context + * of an {@link ExpressionExperimentValueObject} and its associated {@link BioAssayValueObject}. + * + * @author poirigui + */ +@Data +@EqualsAndHashCode(callSuper = true) +@CommonsLog +public class SingleCellDimensionValueObject extends IdentifiableValueObject { + + /** + * Cell identifiers. + */ + private List cellIds; + + /** + * A list of {@link ubic.gemma.model.expression.bioAssay.BioAssay} IDs that are applicable to the cells. + */ + private List bioAssayIds; + + /** + * The preferred cell type assignment. + */ + @Nullable + private CellTypeAssignmentValueObject cellTypeAssignment; + + /** + * @param cellTypeAssignment a featured cell type assignment from {@link SingleCellDimension#getCellTypeAssignments()} + */ + public SingleCellDimensionValueObject( SingleCellDimension singleCellDimension, @Nullable CellTypeAssignment cellTypeAssignment ) { + super( singleCellDimension ); + this.cellIds = singleCellDimension.getCellIds(); + this.bioAssayIds = new ArrayList<>( singleCellDimension.getCellIds().size() ); + try { + for ( int i = 0; i < singleCellDimension.getCellIds().size(); i++ ) { + this.bioAssayIds.add( singleCellDimension.getBioAssay( i ).getId() ); + } + } catch ( IllegalArgumentException | IndexOutOfBoundsException e ) { + log.warn( "The bioassays sparse range array is invalid for " + singleCellDimension, e ); + } + if ( cellTypeAssignment != null ) { + this.cellTypeAssignment = new CellTypeAssignmentValueObject( cellTypeAssignment ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java index 8278286721..a704f16122 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java @@ -8,11 +8,12 @@ import java.util.Objects; /** - * An expression data vector that contains data at the resolution of a single cell. + * An expression data vector that contains data at the resolution of individual cells. *

* This is achieved by storing cell metadata such as IDs and cell types in a {@link SingleCellDimension} that is shared * among all vectors of a given {@link ubic.gemma.model.expression.experiment.ExpressionExperiment} and individual * non-zero cell expression in a sparse data structure similar to the rows of a CSR matrix. + * * @author poirigui */ @Getter diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java index a09ddcd534..5c091ee13d 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java @@ -133,6 +133,14 @@ public static CompositeSequence newInstance( String name, ArrayDesign ad ) { cs.setArrayDesign( ad ); return cs; } + + public static CompositeSequence newInstance( String name, ArrayDesign ad, BioSequence bioSequence ) { + CompositeSequence cs = new CompositeSequence(); + cs.setName( name ); + cs.setArrayDesign( ad ); + cs.setBiologicalCharacteristic( bioSequence ); + return cs; + } } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java index 046b57a557..0a4077b4cb 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java @@ -13,6 +13,9 @@ import org.hibernate.Hibernate; import ubic.gemma.model.annotations.GemmaWebOnly; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimensionValueObject; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.util.EntityUtils; @@ -74,6 +77,12 @@ public class ExpressionExperimentValueObject extends AbstractCuratableValueObjec private String technologyType; + /** + * The single-cell dimension of the preferred single-cell vectors. + */ + @Nullable + private SingleCellDimensionValueObject singleCellDimension; + /** * Required when using the class as a spring bean. */ @@ -142,6 +151,11 @@ public ExpressionExperimentValueObject( ExpressionExperiment ee ) { } } + public ExpressionExperimentValueObject( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment ) { + this( ee ); + this.singleCellDimension = new SingleCellDimensionValueObject( singleCellDimension, cellTypeAssignment ); + } + /** * Creates a new {@link ExpressionExperiment} value object with additional information about ownership. */ @@ -169,6 +183,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.accession = vo.getAccession(); this.batchConfound = vo.getBatchConfound(); this.batchEffect = vo.getBatchEffect(); + this.batchEffectStatistics = vo.getBatchEffectStatistics(); this.externalDatabase = vo.getExternalDatabase(); this.externalUri = vo.getExternalUri(); this.metadata = vo.getMetadata(); @@ -186,6 +201,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.isShared = vo.getIsShared(); this.geeq = vo.getGeeq(); this.suitableForDEA = vo.getSuitableForDEA(); + this.singleCellDimension = vo.getSingleCellDimension(); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index bf253e1820..fb5dc6f9f5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -10,10 +10,7 @@ import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; -import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; -import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -105,6 +102,7 @@ public interface ExpressionExperimentDao * Obtain the dataset usage frequency by technology type for the given dataset IDs. *

* Note: No ACL filtering is performed. + * * @see #getTechnologyTypeUsageFrequency() */ Map getTechnologyTypeUsageFrequency( Collection eeIds ); @@ -123,6 +121,7 @@ public interface ExpressionExperimentDao * Obtain dataset usage frequency by platform currently for the given dataset IDs. *

* Note: no ACL filtering is performed. Only administrator can see troubled platforms. + * * @see #getArrayDesignsUsageFrequency(int) */ Map getArrayDesignsUsageFrequency( Collection eeIds, int maxResults ); @@ -142,6 +141,7 @@ public interface ExpressionExperimentDao * Obtain dataset usage frequency by platform currently for the given dataset IDs. *

* Note: no ACL filtering is performed. Only administrators can see troubled platforms. + * * @see #getOriginalPlatformsUsageFrequency(int) */ Map getOriginalPlatformsUsageFrequency( Collection eeIds, int maxResults ); @@ -214,11 +214,11 @@ Map> getSampleRemovalEvents( * Special method for front-end access. This is partly redundant with {@link #loadValueObjects(Filters, Sort, int, int)}; * however, it fills in more information, returns ExpressionExperimentDetailsValueObject * - * @param ids only list specific ids, or null to ignore - * @param taxon only list EEs in the specified taxon, or null to ignore - * @param sort the field to order the results by. - * @param offset offset - * @param limit maximum number of results to return + * @param ids only list specific ids, or null to ignore + * @param taxon only list EEs in the specified taxon, or null to ignore + * @param sort the field to order the results by. + * @param offset offset + * @param limit maximum number of results to return * @return a list of EE details VOs representing experiments matching the given arguments. */ Slice loadDetailsValueObjects( @Nullable Collection ids, @Nullable Taxon taxon, @Nullable Sort sort, int offset, int limit ); @@ -313,22 +313,28 @@ Map> getSampleRemovalEvents( void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension ); - List getCellTypeLabellings( ExpressionExperiment ee ); + List getCellTypeLabellings( ExpressionExperiment ee ); /** * Obtain the preferred labelling of the preferred single-cell vectors. + * * @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type - * labellings + * labellings */ @Nullable - CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ); /** * Add the given cell type labelling to the single-cell dimension. *

* If the new labelling is preferred, any existing one is marked as non-preferred. */ - void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling ); + void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment ); List getCellTypes( ExpressionExperiment ee ); + + /** + * Obtain a set of single-cell data vectors for the given quantitation type. + */ + List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index ecc8156b9b..a1703e4669 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -44,10 +44,7 @@ import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; -import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; -import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -841,6 +838,7 @@ public Map getAnnotationsUsageFrequency( @Nullable Collect *

* FIXME: There's a bug in Hibernate that that prevents it from producing proper tuples the excluded URIs and * retained term URIs + * * @param column column holding the URI to be excluded * @param labelColumn column holding the label (only used if excludeFreeText or excludeUncategorized is true, * then we will check if the label is non-null to cover some edge cases) @@ -1749,6 +1747,7 @@ protected ExpressionExperimentValueObject doLoadValueObject( ExpressionExperimen @Override protected void postProcessValueObjects( List results ) { populateArrayDesignCount( results ); + populateSingleCellMetadata( results ); } @Override @@ -1967,7 +1966,7 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens } @Override - public List getCellTypeLabellings( ExpressionExperiment ee ) { + public List getCellTypeLabellings( ExpressionExperiment ee ) { //noinspection unchecked return getSessionFactory().getCurrentSession() .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " @@ -1980,8 +1979,8 @@ public List getCellTypeLabellings( ExpressionExperiment ee ) @Nullable @Override - public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { - return ( CellTypeLabelling ) getSessionFactory().getCurrentSession() + public CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + return ( CellTypeAssignment ) getSessionFactory().getCurrentSession() .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " + "join scedv.singleCellDimension scd " + "join scd.cellTypeLabellings ctl " @@ -1991,9 +1990,9 @@ public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee } @Override - public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) { + public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeAssignment labelling ) { if ( labelling.isPreferred() ) { - for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) { + for ( CellTypeAssignment l : dimension.getCellTypeAssignments() ) { if ( l.isPreferred() ) { log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." ); l.setPreferred( false ); @@ -2002,7 +2001,7 @@ public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension d } } getSessionFactory().getCurrentSession().persist( labelling ); - dimension.getCellTypeLabellings().add( labelling ); + dimension.getCellTypeAssignments().add( labelling ); } @Override @@ -2018,6 +2017,17 @@ public List getCellTypes( ExpressionExperiment ee ) { .list(); } + @Override + public List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select scedv from SingleCellExpressionDataVector scedv " + + "where scedv.expressionExperiment = :ee and scedv.quantitationType = :qt" ) + .setParameter( "ee", expressionExperiment ) + .setParameter( "qt", quantitationType ) + .list(); + } + @Override protected Query getFilteringQuery( @Nullable Filters filters, @Nullable Sort sort ) { // the constants for aliases are messing with the inspector @@ -2272,4 +2282,28 @@ private void populateArrayDesignCount( Collection eevos ) { + //noinspection unchecked + List results = getSessionFactory().getCurrentSession() + .createQuery( "select scedv.expressionExperiment.id, scd, cta from ExpressionExperiment ee " + + "join ee.singleCellExpressionDataVectors scedv " + + "join scedv.quantitationType qt " + + "join scedv.singleCellDimension scd " + + "left join scd.cellTypeAssignments cta " + + "where scedv.expressionExperiment.id in :ees " + + "and qt.isPreferred = true and cta is null or cta.preferred = true " + + "group by scedv.expressionExperiment" ) + .setParameterList( "ees", EntityUtils.getIds( eevos ) ) + .list(); + if ( !results.isEmpty() ) { + Map voById = EntityUtils.getIdMap( eevos ); + for ( Object[] row : results ) { + Long id = ( Long ) row[0]; + SingleCellDimension scd = ( SingleCellDimension ) row[1]; + CellTypeAssignment cta = ( CellTypeAssignment ) row[2]; + voById.get( id ).setSingleCellDimension( new SingleCellDimensionValueObject( scd, cta ) ); + } + } + } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java index 0388b076ed..8d91cdc2c9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java @@ -1,10 +1,11 @@ package ubic.gemma.persistence.service.expression.experiment; import org.springframework.security.access.annotation.Secured; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.protocol.Protocol; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.experiment.ExperimentalFactor; @@ -16,6 +17,12 @@ public interface SingleCellExpressionExperimentService { + /** + * Obtain a single-cell expression data matrix for the given quantitation type. + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_EDIT" }) + SingleCellExpressionDataMatrix getSingleCellExpressionDataMatrix( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ); + /** * Add single-cell data vectors. */ @@ -44,12 +51,13 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua /** * Relabel the cell types of an existing set of single-cell vectors. + * * @param newCellTypeLabels the new cell types labels, must match the number of cells * @param labellingProtocol the protocol used to generate the new labelling, or null if unknown * @return a new, preferred cell type labelling */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description ); + CellTypeAssignment relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description ); /** * Remove the given cell type labelling. @@ -57,20 +65,20 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua * If the cell type labelling is preferred and applies the the preferred vectors as per {@link #getPreferredCellTypeLabelling(ExpressionExperiment)}, the cell type factor will be removed. */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeLabelling cellTypeLabelling ); + void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeAssignment cellTypeAssignment ); /** * Obtain all the cell type labellings from all single-cell vectors. */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - List getCellTypeLabellings( ExpressionExperiment ee ); + List getCellTypeLabellings( ExpressionExperiment ee ); /** * Obtain the preferred cell type labelling from the preferred single-cell vectors. */ @Nullable @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ); /** * Obtain the cell types of a given single-cell dataset. @@ -85,9 +93,10 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua *

* Analyses involving the factor are removed and samples mentioning the factor values are updated as per * {@link ExperimentalFactorService#remove(ExperimentalFactor)}. + * * @return the created cell type factor * @throws IllegalStateException if the dataset does not have a preferred cell type labelling for its preferred set - * of single-cell vectors + * of single-cell vectors */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java index 55def3d502..8cbb99fa50 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java @@ -6,6 +6,8 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.Assert; +import ubic.gemma.core.datastructure.matrix.DoubleSingleCellExpressionDataMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent; @@ -16,7 +18,7 @@ import ubic.gemma.model.common.quantitationtype.PrimitiveType; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; @@ -52,6 +54,12 @@ public class SingleCellExpressionExperimentServiceImpl implements SingleCellExpr @Deprecated private SessionFactory sessionFactory; + @Override + @Transactional(readOnly = true) + public SingleCellExpressionDataMatrix getSingleCellExpressionDataMatrix( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ) { + return new DoubleSingleCellExpressionDataMatrix( expressionExperimentDao.getSingleCellDataVectors( expressionExperiment, quantitationType ) ); + } + @Override @Transactional public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType quantitationType, Collection vectors ) { @@ -88,7 +96,7 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType ee.getQuantitationTypes().add( quantitationType ); expressionExperimentDao.update( ee ); // will take care of creating vectors if ( quantitationType.getIsPreferred() && scdCreated ) { - CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + CellTypeAssignment preferredLabelling = scd.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).findFirst().orElse( null ); if ( preferredLabelling != null ) { log.info( "New single-cell preferred vectors were added, recreating the cell type factor." ); recreateCellTypeFactor( ee, preferredLabelling ); @@ -135,7 +143,7 @@ public void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationT int numVectorsAdded = ee.getSingleCellExpressionDataVectors().size() - ( previousSize - numVectorsRemoved ); expressionExperimentDao.update( ee ); if ( quantitationType.getIsPreferred() && scdCreated ) { - CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + CellTypeAssignment preferredLabelling = scd.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).findFirst().orElse( null ); if ( preferredLabelling != null ) { log.info( "Preferred single-cell vectors were replaced, recreating the cell type factor." ); recreateCellTypeFactor( ee, preferredLabelling ); @@ -213,7 +221,8 @@ public void removeSingleCellDataVectors( ExpressionExperiment ee, QuantitationTy /** * Remove the given single-cell vectors and their corresponding single-cell dimension if necessary. - * @param ee the experiment to remove the vectors from. + * + * @param ee the experiment to remove the vectors from. * @param additionalVectors additional vectors to check if the single-cell dimension is still in use (i.e. vectors that are in the process of being added). */ private void removeSingleCellVectorsAndDimensionIfNecessary( ExpressionExperiment ee, @@ -254,11 +263,11 @@ public List getSingleCellDimensions( ExpressionExperiment e @Override @Transactional - public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, Protocol protocol, String description ) { + public CellTypeAssignment relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol protocol, @Nullable String description ) { Assert.notNull( ee.getId(), "Dataset must be persistent." ); Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); - CellTypeLabelling labelling = new CellTypeLabelling(); + CellTypeAssignment labelling = new CellTypeAssignment(); labelling.setPreferred( true ); labelling.setProtocol( protocol ); labelling.setDescription( description ); @@ -267,11 +276,11 @@ public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDi for ( int i = 0; i < ct.length; i++ ) { ct[i] = Collections.binarySearch( labels, newCellTypeLabels.get( i ) ); } - labelling.setCellTypes( ct ); - labelling.setCellTypeLabels( labels.stream() + labelling.setCellTypeIndices( ct ); + labelling.setCellTypes( labels.stream() .map( l -> Characteristic.Factory.newInstance( Categories.CELL_TYPE, l, null ) ) .collect( Collectors.toList() ) ); - labelling.setNumberOfCellTypeLabels( labels.size() ); + labelling.setNumberOfCellTypes( labels.size() ); expressionExperimentDao.addCellTypeLabelling( ee, dimension, labelling ); validateSingleCellDimension( ee, dimension ); log.info( "Relabelled single-cell vectors for " + ee + " with: " + labelling ); @@ -287,14 +296,14 @@ public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDi @Override @Transactional - public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling cellTypeLabelling ) { + public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeAssignment cellTypeAssignment ) { Assert.notNull( ee.getId(), "Dataset must be persistent." ); Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); - Assert.isTrue( dimension.getCellTypeLabellings().contains( cellTypeLabelling ), + Assert.isTrue( dimension.getCellTypeAssignments().contains( cellTypeAssignment ), "The supplied labelling does not belong to the dimension." ); - boolean alsoRemoveFactor = cellTypeLabelling.equals( getPreferredCellTypeLabelling( ee ) ); - dimension.getCellTypeLabellings().remove( cellTypeLabelling ); + boolean alsoRemoveFactor = cellTypeAssignment.equals( getPreferredCellTypeLabelling( ee ) ); + dimension.getCellTypeAssignments().remove( cellTypeAssignment ); if ( alsoRemoveFactor ) { log.info( "The preferred cell type labels have been removed, removing the cell type factor..." ); removeCellTypeFactorIfExists( ee ); @@ -303,13 +312,13 @@ public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension d @Override @Transactional(readOnly = true) - public List getCellTypeLabellings( ExpressionExperiment ee ) { + public List getCellTypeLabellings( ExpressionExperiment ee ) { return expressionExperimentDao.getCellTypeLabellings( ee ); } @Override @Transactional(readOnly = true) - public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + public CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ) { return expressionExperimentDao.getPreferredCellTypeLabelling( ee ); } @@ -331,21 +340,21 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim } Assert.isTrue( scbad.getCellIds().size() == scbad.getNumberOfCells(), "The number of cell IDs must match the number of cells." ); - Assert.isTrue( scbad.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).count() <= 1, + Assert.isTrue( scbad.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).count() <= 1, "There must be at most one preferred cell type labelling." ); - for ( CellTypeLabelling labelling : scbad.getCellTypeLabellings() ) { - Assert.notNull( labelling.getNumberOfCellTypeLabels() ); - Assert.notNull( labelling.getCellTypeLabels() ); - Assert.isTrue( labelling.getCellTypes().length == scbad.getCellIds().size(), + for ( CellTypeAssignment labelling : scbad.getCellTypeAssignments() ) { + Assert.notNull( labelling.getNumberOfCellTypes() ); + Assert.notNull( labelling.getCellTypes() ); + Assert.isTrue( labelling.getCellTypeIndices().length == scbad.getCellIds().size(), "The number of cell types must match the number of cell IDs." ); - int numberOfCellTypeLabels = labelling.getCellTypeLabels().size(); + int numberOfCellTypeLabels = labelling.getCellTypes().size(); Assert.isTrue( numberOfCellTypeLabels > 0, "There must be at least one cell type label declared in the cellTypeLabels collection." ); - Assert.isTrue( labelling.getCellTypeLabels().stream().distinct().count() == labelling.getCellTypeLabels().size(), + Assert.isTrue( labelling.getCellTypes().stream().distinct().count() == labelling.getCellTypes().size(), "Cell type labels must be unique." ); - Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypeLabels(), + Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypes(), "The number of cell types must match the number of values the cellTypeLabels collection." ); - for ( int k : labelling.getCellTypes() ) { + for ( int k : labelling.getCellTypeIndices() ) { Assert.isTrue( k >= 0 && k < numberOfCellTypeLabels, String.format( "Cell type vector values must be within the [%d, %d[ range.", 0, numberOfCellTypeLabels ) ); } @@ -359,12 +368,12 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim @Override @Transactional public ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ) { - CellTypeLabelling ctl = getPreferredCellTypeLabelling( ee ); + CellTypeAssignment ctl = getPreferredCellTypeLabelling( ee ); Assert.notNull( ctl, "There must be a preferred cell type labelling for " + ee + " to update the cell type factor." ); return recreateCellTypeFactor( ee, ctl ); } - private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeLabelling ctl ) { + private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeAssignment ctl ) { removeCellTypeFactorIfExists( ee ); // create a new cell type factor ExperimentalFactor cellTypeFactor = ExperimentalFactor.Factory.newInstance(); @@ -372,7 +381,7 @@ private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, Cell cellTypeFactor.setCategory( Characteristic.Factory.newInstance( Categories.CELL_TYPE ) ); cellTypeFactor.setExperimentalDesign( ee.getExperimentalDesign() ); ee.getExperimentalDesign().getExperimentalFactors().add( cellTypeFactor ); - for ( Characteristic ct : ctl.getCellTypeLabels() ) { + for ( Characteristic ct : ctl.getCellTypes() ) { FactorValue fv = new FactorValue(); Statement s = new Statement(); s.setCategory( ct.getCategory() ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java new file mode 100644 index 0000000000..f405986ded --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java @@ -0,0 +1,26 @@ +package ubic.gemma.persistence.util; + +import ubic.basecode.io.ByteArrayConverter; + +/** + * Utilities for working with byte arrays. + * + * @author poirigui + * @see ByteArrayConverter + */ +public class ByteArrayUtils { + + private static final ByteArrayConverter byteArrayConverter = new ByteArrayConverter(); + + public static byte[] doubleArrayToBytes( Double[] data ) { + return byteArrayConverter.doubleArrayToBytes( data ); + } + + public static byte[] doubleArrayToBytes( double[] data ) { + return byteArrayConverter.doubleArrayToBytes( data ); + } + + public static double[] byteArrayToDoubles( byte[] bytes ) { + return byteArrayConverter.byteArrayToDoubles( bytes ); + } +} diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml index 51f001613f..d403b35bb9 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml @@ -142,27 +142,27 @@ - - - + + int - - - + + + + - + - - + + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml index 8073ab7bc5..e77178bfdd 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml @@ -89,7 +89,7 @@ fetch="select"> - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml index f673b92e86..be7fd31712 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml @@ -20,11 +20,11 @@ - + - + diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java new file mode 100644 index 0000000000..1aa3395a55 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java @@ -0,0 +1,104 @@ +package ubic.gemma.core.loader.expression.singleCell; + +import org.hibernate.SessionFactory; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.genome.Taxon; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; +import ubic.gemma.persistence.service.expression.experiment.*; +import ubic.gemma.persistence.util.TestComponent; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; + +import static org.mockito.Mockito.mock; +import static ubic.gemma.core.loader.expression.singleCell.MexTestUtils.createLoaderForResourceDir; + +/** + * Load and persist single-cell data stored in the MEX format. + */ +@ContextConfiguration +public class MexSingleCellDataLoaderPersistenceTest extends BaseDatabaseTest { + + @Configuration + @TestComponent + static class MexSingleCellDataLoaderPersistenceTestContextConfiguration extends BaseDatabaseTestContextConfiguration { + @Bean + public SingleCellExpressionExperimentService singleCellExpressionExperimentService() { + return new SingleCellExpressionExperimentServiceImpl(); + } + + @Bean + public ExpressionExperimentDao expressionExperimentDao( SessionFactory sessionFactory ) { + return new ExpressionExperimentDaoImpl( sessionFactory ); + } + + @Bean + public ExperimentalFactorService experimentalFactorService() { + return mock(); + } + + @Bean + public AuditTrailService auditTrailService() { + return mock(); + } + } + + @Autowired + private SingleCellExpressionExperimentService singleCellExpressionExperimentService; + + @Test + public void test() throws IOException { + MexSingleCellDataLoader loader = createLoaderForResourceDir( "/data/loader/expression/singleCell/GSE224438" ); + + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign platform = new ArrayDesign(); + platform.setPrimaryTaxon( taxon ); + Map elementsMapping; + ClassPathResource cpr = new ClassPathResource( "data/loader/expression/singleCell/GSE224438/GSM7022367_1_features.tsv.gz" ); + try ( BufferedReader br = new BufferedReader( new InputStreamReader( new GZIPInputStream( cpr.getInputStream() ) ) ) ) { + elementsMapping = br.lines() + .map( line -> line.split( "\t", 2 )[0] ) + .collect( Collectors.toMap( s -> s, name -> CompositeSequence.Factory.newInstance( name, platform ) ) ); + } + platform.getCompositeSequences().addAll( elementsMapping.values() ); + sessionFactory.getCurrentSession().persist( platform ); + ExpressionExperiment ee = new ExpressionExperiment(); + + for ( String sampleName : loader.getSampleNames() ) { + BioMaterial bm = BioMaterial.Factory.newInstance( sampleName, taxon ); + sessionFactory.getCurrentSession().persist( bm ); + BioAssay ba = BioAssay.Factory.newInstance( sampleName, platform, bm ); + bm.getBioAssaysUsedIn().add( ba ); + ee.getBioAssays().add( ba ); + } + + sessionFactory.getCurrentSession().persist( ee ); + sessionFactory.getCurrentSession().flush(); + SingleCellDimension dimension = loader.getSingleCellDimension( ee.getBioAssays() ); + QuantitationType qt = loader.getQuantitationTypes().iterator().next(); + sessionFactory.getCurrentSession().persist( qt ); + try ( Stream stream = loader.loadVectors( elementsMapping, dimension, qt ) ) { + singleCellExpressionExperimentService.addSingleCellDataVectors( ee, qt, stream.collect( Collectors.toList() ) ); + } + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java index 832a3699bb..3453f0dbd6 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java @@ -2,12 +2,9 @@ import org.junit.Test; import org.springframework.core.io.ClassPathResource; -import org.springframework.core.io.Resource; -import org.springframework.core.io.support.PathMatchingResourcePatternResolver; import ubic.basecode.io.ByteArrayConverter; import ubic.gemma.model.common.quantitationtype.PrimitiveType; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; @@ -17,54 +14,33 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.nio.file.Path; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.loader.expression.singleCell.MexTestUtils.createLoaderForResourceDir; -public class MexCellDataLoaderTest { +public class MexSingleCellDataLoaderTest { private static final ByteArrayConverter byteArrayConverter = new ByteArrayConverter(); @Test public void test() throws IOException { - ArrayDesign platform = ArrayDesign.Factory.newInstance( "GPL12311", null ); - - // consider the first file as a platform! + // consider the first file for mapping to elements + Map elementsMapping; ClassPathResource cpr = new ClassPathResource( "data/loader/expression/singleCell/GSE224438/GSM7022367_1_features.tsv.gz" ); try ( BufferedReader br = new BufferedReader( new InputStreamReader( new GZIPInputStream( cpr.getInputStream() ) ) ) ) { - br.lines().forEach( line -> platform.getCompositeSequences().add( CompositeSequence.Factory.newInstance( line.split( "\t", 2 )[0] ) ) ); + elementsMapping = br.lines() + .map( line -> line.split( "\t", 2 )[0] ) + .collect( Collectors.toMap( s -> s, CompositeSequence.Factory::newInstance ) ); } - List sampleNames = new ArrayList<>(); - List barcodeFiles = new ArrayList<>(); - List geneFiles = new ArrayList<>(); - List matrixFiles = new ArrayList<>(); - PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(); - Resource[] resources = resolver.getResources( "data/loader/expression/singleCell/GSE224438/*" ); - Map> f = Arrays.stream( resources ) - .collect( Collectors.groupingBy( r -> r.getFilename().split( "_", 2 )[0], Collectors.toList() ) ); - f = new TreeMap<>( f ); - for ( Map.Entry> entry : f.entrySet() ) { - String sampleName = entry.getKey(); - Resource barcodeFile = entry.getValue().stream() - .filter( p -> p.getFilename().endsWith( "barcodes.tsv.gz" ) ) - .findFirst() - .orElse( null ); - Resource geneFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "features.tsv.gz" ) ).findFirst().orElse( null ); - Resource matrixFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "matrix.mtx.gz" ) ).findFirst().orElse( null ); - if ( barcodeFile != null && geneFile != null && matrixFile != null ) { - sampleNames.add( sampleName ); - barcodeFiles.add( barcodeFile.getFile().toPath() ); - geneFiles.add( geneFile.getFile().toPath() ); - matrixFiles.add( matrixFile.getFile().toPath() ); - } - } - MexCellDataLoader loader = new MexCellDataLoader( sampleNames, barcodeFiles, geneFiles, matrixFiles ); + MexSingleCellDataLoader loader = createLoaderForResourceDir( "data/loader/expression/singleCell/GSE224438" ); ArrayList bas = new ArrayList<>(); - for ( String sampleName : sampleNames ) { + for ( String sampleName : loader.getSampleNames() ) { bas.add( BioAssay.Factory.newInstance( sampleName, null, BioMaterial.Factory.newInstance( sampleName ) ) ); } assertThat( loader.getCellTypeLabelling() ).isEmpty(); @@ -79,7 +55,7 @@ public void test() throws IOException { assertThat( dimension.getNumberOfCellsBySample( 9 ) ).isEqualTo( 1000 ); assertThat( dimension.getBioAssaysOffset() ) .containsExactly( 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000 ); - List vectors = loader.loadVectors( platform, dimension, qt ).collect( Collectors.toList() ); + List vectors = loader.loadVectors( elementsMapping, dimension, qt ).collect( Collectors.toList() ); assertThat( vectors ) .hasSize( 1000 ) .allSatisfy( v -> { diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java new file mode 100644 index 0000000000..828536f389 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java @@ -0,0 +1,40 @@ +package ubic.gemma.core.loader.expression.singleCell; + +import org.springframework.core.io.Resource; +import org.springframework.core.io.support.PathMatchingResourcePatternResolver; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +public class MexTestUtils { + + public static MexSingleCellDataLoader createLoaderForResourceDir( String resourceDir ) throws IOException { + List sampleNames = new ArrayList<>(); + List barcodeFiles = new ArrayList<>(); + List geneFiles = new ArrayList<>(); + List matrixFiles = new ArrayList<>(); + PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(); + Resource[] resources = resolver.getResources( resourceDir + "/*" ); + Map> f = Arrays.stream( resources ) + .collect( Collectors.groupingBy( r -> r.getFilename().split( "_", 2 )[0], Collectors.toList() ) ); + f = new TreeMap<>( f ); + for ( Map.Entry> entry : f.entrySet() ) { + String sampleName = entry.getKey(); + Resource barcodeFile = entry.getValue().stream() + .filter( p -> p.getFilename().endsWith( "barcodes.tsv.gz" ) ) + .findFirst() + .orElse( null ); + Resource geneFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "features.tsv.gz" ) ).findFirst().orElse( null ); + Resource matrixFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "matrix.mtx.gz" ) ).findFirst().orElse( null ); + if ( barcodeFile != null && geneFile != null && matrixFile != null ) { + sampleNames.add( sampleName ); + barcodeFiles.add( barcodeFile.getFile().toPath() ); + geneFiles.add( geneFile.getFile().toPath() ); + matrixFiles.add( matrixFile.getFile().toPath() ); + } + } + return new MexSingleCellDataLoader( sampleNames, barcodeFiles, geneFiles, matrixFiles ); + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index 38ba47506b..6fd0e86bce 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -15,14 +15,17 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestExecutionListeners; import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.common.description.Categories; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; -import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExperimentalDesign; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.util.*; @@ -386,6 +389,62 @@ public void testRemoveExperimentWithSharedBioMaterial() { assertTrue( bm.getBioAssaysUsedIn().contains( ba2 ) ); } + @Test + @WithMockUser + public void testLoadValueObjectWithSingleCellData() { + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign ad = new ArrayDesign(); + ad.setPrimaryTaxon( taxon ); + sessionFactory.getCurrentSession().persist( ad ); + CompositeSequence cs = new CompositeSequence(); + cs.setArrayDesign( ad ); + sessionFactory.getCurrentSession().persist( cs ); + BioMaterial bm = new BioMaterial(); + bm.setSourceTaxon( taxon ); + sessionFactory.getCurrentSession().persist( bm ); + ExpressionExperiment ee = new ExpressionExperiment(); + BioAssay ba = new BioAssay(); + ba.setArrayDesignUsed( ad ); + ba.setSampleUsed( bm ); + ee.getBioAssays().add( ba ); + SingleCellDimension scd = new SingleCellDimension(); + scd.setCellIds( Arrays.asList( "A", "B", "C" ) ); + scd.getBioAssays().add( ba ); + scd.setBioAssaysOffset( new int[] { 0 } ); + CellTypeAssignment cta = new CellTypeAssignment(); + cta.setCellTypeIndices( new int[] { 0, 1, 1, 0 } ); + cta.setCellTypes( Arrays.asList( Characteristic.Factory.newInstance( Categories.CELL_TYPE, "X", null ), + Characteristic.Factory.newInstance( Categories.CELL_TYPE, "Y", null ) ) ); + cta.setPreferred( true ); + cta.setNumberOfCellTypes( 0 ); + scd.getCellTypeAssignments().add( cta ); + sessionFactory.getCurrentSession().persist( scd ); + QuantitationType qt = new QuantitationType(); + qt.setGeneralType( GeneralType.QUANTITATIVE ); + qt.setType( StandardQuantitationType.COUNT ); + qt.setRepresentation( PrimitiveType.DOUBLE ); + qt.setScale( ScaleType.COUNT ); + qt.setIsPreferred( true ); + ee.getQuantitationTypes().add( qt ); + SingleCellExpressionDataVector vector = new SingleCellExpressionDataVector(); + vector.setData( ByteArrayUtils.doubleArrayToBytes( new double[] { 1.0, 2.0, 1.0, 2.0 } ) ); + vector.setDataIndices( new int[] { 0, 1, 2, 4 } ); + vector.setExpressionExperiment( ee ); + vector.setDesignElement( cs ); + vector.setQuantitationType( qt ); + vector.setSingleCellDimension( scd ); + ee.getSingleCellExpressionDataVectors().add( vector ); + sessionFactory.getCurrentSession().persist( ee ); + sessionFactory.getCurrentSession().flush(); + ExpressionExperimentValueObject eevo = expressionExperimentDao.loadValueObject( ee ); + assertNotNull( eevo ); + assertNotNull( eevo.getSingleCellDimension() ); + assertEquals( Arrays.asList( "A", "B", "C" ), eevo.getSingleCellDimension().getCellIds() ); + assertNotNull( eevo.getSingleCellDimension().getCellTypeAssignment() ); + assertEquals( Arrays.asList( 1L, 2L, 2L, 1L ), eevo.getSingleCellDimension().getCellTypeAssignment().getCellTypeIds() ); + } + private ExpressionExperiment reload( ExpressionExperiment e ) { sessionFactory.getCurrentSession().flush(); sessionFactory.getCurrentSession().evict( e ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java index 79728e4a46..ea0b27727b 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java @@ -1,6 +1,5 @@ package ubic.gemma.persistence.service.expression.experiment; -import gemma.gsec.SecurityService; import org.apache.commons.lang3.RandomStringUtils; import org.hibernate.NonUniqueResultException; import org.hibernate.SessionFactory; @@ -10,11 +9,8 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.security.access.AccessDecisionManager; import org.springframework.test.context.ContextConfiguration; -import ubic.gemma.core.analysis.preprocess.svd.SVDService; -import ubic.gemma.core.ontology.OntologyService; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; @@ -24,7 +20,7 @@ import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -33,14 +29,7 @@ import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; -import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; -import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; -import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.util.TestComponent; import java.util.Arrays; @@ -76,94 +65,14 @@ public ExpressionExperimentDao expressionExperimentDao( SessionFactory sessionFa return new ExpressionExperimentDaoImpl( sessionFactory ); } - @Bean - public AuditEventService auditEventService() { - return mock( AuditEventService.class ); - } - - @Bean - public AuditTrailService auditTrailService() { - return mock( AuditTrailService.class ); - } - - @Bean - public BioAssayDimensionService bioAssayDimensionService() { - return mock( BioAssayDimensionService.class ); - } - - @Bean - public DifferentialExpressionAnalysisService differentialExpressionAnalysisService() { - return mock( DifferentialExpressionAnalysisService.class ); - } - - @Bean - public ExpressionExperimentSetService expressionExperimentSetService() { - return mock( ExpressionExperimentSetService.class ); - } - - @Bean - public ExpressionExperimentSubSetService expressionExperimentSubSetService() { - return mock( ExpressionExperimentSubSetService.class ); - } - @Bean public ExperimentalFactorService experimentalFactorService() { - return mock( ExperimentalFactorService.class ); - } - - @Bean - public FactorValueService factorValueService() { - return mock( FactorValueService.class ); - } - - @Bean - public OntologyService ontologyService() { - return mock( OntologyService.class ); - } - - @Bean - public PrincipalComponentAnalysisService principalComponentAnalysisService() { - return mock( PrincipalComponentAnalysisService.class ); + return mock(); } @Bean - public QuantitationTypeService quantitationTypeService() { - return mock( QuantitationTypeService.class ); - } - - @Bean - public SearchService searchService() { - return mock( SearchService.class ); - } - - @Bean - public SecurityService securityService() { - return mock( SecurityService.class ); - } - - @Bean - public SVDService svdService() { - return mock( SVDService.class ); - } - - @Bean - public CoexpressionAnalysisService coexpressionAnalysisService() { - return mock( CoexpressionAnalysisService.class ); - } - - @Bean - public SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService() { - return mock( SampleCoexpressionAnalysisService.class ); - } - - @Bean - public BlacklistedEntityService blacklistedEntityService() { - return mock( BlacklistedEntityService.class ); - } - - @Bean - public AccessDecisionManager accessDecisionManager() { - return mock( AccessDecisionManager.class ); + public AuditTrailService auditTrailService() { + return mock(); } } @@ -176,6 +85,9 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private ExpressionExperimentDao expressionExperimentDao; + @Autowired + private ExperimentalFactorService experimentalFactorService; + private ArrayDesign ad; private ExpressionExperiment ee; @@ -210,6 +122,19 @@ public void resetMocks() { reset( auditTrailService ); } + @Test + public void testGetSingleCellDataMatrix() { + Collection vectors = createSingleCellVectors( true ); + QuantitationType qt = vectors.iterator().next().getQuantitationType(); + SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension(); + scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors ); + SingleCellExpressionDataMatrix matrix = scExpressionExperimentService.getSingleCellExpressionDataMatrix( ee, qt ); + assertThat( matrix.getQuantitationType() ).isEqualTo( qt ); + assertThat( matrix.getSingleCellDimension() ).isEqualTo( scd ); + assertThat( matrix.columns() ).isEqualTo( 100 ); + assertThat( matrix.rows() ).isEqualTo( 100 ); + } + @Test public void testAddSingleCellDataVectors() { Collection vectors = createSingleCellVectors( true ); @@ -225,8 +150,8 @@ public void testAddSingleCellDataVectors() { assertThat( scExpressionExperimentService.getSingleCellDimensions( ee ) ) .hasSize( 1 ) .allSatisfy( scd -> { - assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 0 ).getValue() ).isEqualTo( "A" ); - assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 50 ).getValue() ).isEqualTo( "B" ); + assertThat( scd.getCellTypeAssignments().iterator().next().getCellType( 0 ).getValue() ).isEqualTo( "A" ); + assertThat( scd.getCellTypeAssignments().iterator().next().getCellType( 50 ).getValue() ).isEqualTo( "B" ); } ); Collection vectors2 = createSingleCellVectors( true ); @@ -356,14 +281,14 @@ public void testRelabelCellTypes() { for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 75 ? "A" : "B"; } - CellTypeLabelling newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null ); + CellTypeAssignment newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null ); assertThat( newLabelling ).satisfies( l -> { assertThat( l.getId() ).isNotNull(); assertThat( l.isPreferred() ).isTrue(); } ); assertThat( ee.getSingleCellExpressionDataVectors() ) .hasSize( 10 ) - .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeLabellings() ).contains( newLabelling ) ); + .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeAssignments() ).contains( newLabelling ) ); assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) ) .hasSize( 1 ) .contains( newLabelling ); @@ -403,9 +328,6 @@ public void testGetPreferredCellTypeLabellingWhenNonUnique() { .isInstanceOf( NonUniqueResultException.class ); } - @Autowired - private ExperimentalFactorService experimentalFactorService; - @Test public void testRecreateCellTypeFactor() { when( experimentalFactorService.create( any( ExperimentalFactor.class ) ) ).thenAnswer( a -> a.getArgument( 0 ) ); @@ -426,14 +348,14 @@ private SingleCellDimension createSingleCellDimension() { for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 50 ? 0 : 1; } - CellTypeLabelling labelling = new CellTypeLabelling(); + CellTypeAssignment labelling = new CellTypeAssignment(); labelling.setPreferred( true ); - labelling.setCellTypes( ct ); - labelling.setCellTypeLabels( Arrays.asList( + labelling.setCellTypeIndices( ct ); + labelling.setCellTypes( Arrays.asList( Characteristic.Factory.newInstance( Categories.CELL_TYPE, "A", null ), Characteristic.Factory.newInstance( Categories.CELL_TYPE, "B", null ) ) ); - labelling.setNumberOfCellTypeLabels( 2 ); - scd.getCellTypeLabellings().add( labelling ); + labelling.setNumberOfCellTypes( 2 ); + scd.getCellTypeAssignments().add( labelling ); scd.getBioAssays().addAll( ee.getBioAssays() ); scd.setBioAssaysOffset( new int[] { 0, 25, 50, 75 } ); return scd; @@ -473,8 +395,8 @@ private Collection createSingleCellVectors( Sing v.setSingleCellDimension( scd ); v.setQuantitationType( qt ); v.setData( new byte[8 * 100] ); - int[] ix = new int[8 * 100]; - for ( int i = 0; i < 800; i++ ) { + int[] ix = new int[100]; + for ( int i = 0; i < 100; i++ ) { ix[i] = i; } v.setDataIndices( ix );