Skip to content

Commit

Permalink
make sure reference and alternate is always upper case
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Oct 16, 2023
1 parent f280604 commit b23a290
Show file tree
Hide file tree
Showing 12 changed files with 52 additions and 20 deletions.
6 changes: 4 additions & 2 deletions src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ public Variant(String chromosome, int start, int end, String reference, String a
this.chromosome = chromosome;
this.start = start;
this.end = end;
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.reference = (reference != null) ? reference : "";
this.alternate = (alternate != null) ? alternate : "";

Expand Down Expand Up @@ -229,7 +231,7 @@ public String getReference() {
}

public void setReference(String reference) {
this.reference = reference;
this.reference = reference.toUpperCase();
this.length = Math.max(reference.length(), alternate.length());
}

Expand All @@ -238,7 +240,7 @@ public String getAlternate() {
}

public void setAlternate(String alternate) {
this.alternate = alternate;
this.alternate = alternate.toUpperCase();
this.length = Math.max(reference.length(), alternate.length());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
* Entry that associates a variant and a file in a variant archive. It contains
Expand Down Expand Up @@ -82,7 +83,10 @@ public VariantSourceEntry(String fileId, String studyId) {
public VariantSourceEntry(String fileId, String studyId, String[] secondaryAlternates, String format) {
this.fileId = fileId;
this.studyId = studyId;
this.secondaryAlternates = secondaryAlternates;
this.secondaryAlternates = Arrays.stream(secondaryAlternates)
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
this.format = format;

this.samplesData = new ArrayList<>();
Expand Down Expand Up @@ -111,7 +115,10 @@ public String[] getSecondaryAlternates() {
}

public void setSecondaryAlternates(String[] secondaryAlternates) {
this.secondaryAlternates = secondaryAlternates;
this.secondaryAlternates = Arrays.stream(secondaryAlternates)
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);;
}

public String getFormat() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ public VariantStats(String chromosome, int position, String referenceAllele, Str
Variant.VariantType variantType, float maf, float mgf, String mafAllele, String mgfGenotype,
int numMissingAlleles, int numMissingGenotypes, int numMendelErrors, float percentCasesDominant,
float percentControlsDominant, float percentCasesRecessive, float percentControlsRecessive) {
this.refAllele = referenceAllele;
this.altAllele = alternateAlleles;
this.refAllele = Objects.nonNull(referenceAllele) ? referenceAllele.toUpperCase(): null;
this.altAllele = Objects.nonNull(alternateAlleles) ? alternateAlleles.toUpperCase(): null;
this.variantType = variantType;

this.maf = maf;
Expand Down Expand Up @@ -134,15 +134,15 @@ public String getRefAllele() {
}

public void setRefAllele(String refAllele) {
this.refAllele = refAllele;
this.refAllele = Objects.nonNull(refAllele) ? refAllele.toUpperCase() : null;
}

public String getAltAllele() {
return altAllele;
}

public void setAltAllele(String altAllele) {
this.altAllele = altAllele;
this.altAllele = Objects.nonNull(altAllele) ? altAllele.toUpperCase() : null;
}

public Variant.VariantType getVariantType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import java.util.Collections;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

/**
Expand Down Expand Up @@ -83,6 +84,8 @@ public Annotation(String chromosome, int start, int end, String referenceAllele,
this.vepVersion = vepVersion;
this.vepCacheVersion = vepCacheVersion;

referenceAllele = Objects.nonNull(referenceAllele) ? referenceAllele.toUpperCase() : null;
alternativeAllele = Objects.nonNull(alternativeAllele) ? alternativeAllele.toUpperCase() : null;
this.id = buildAnnotationId(chromosome, start, referenceAllele, alternativeAllele, vepVersion, vepCacheVersion);
this.xrefs = new HashSet<>();
this.consequenceTypes = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
Expand Down Expand Up @@ -116,6 +117,8 @@ public class VariantDocument {
public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Map<String, Set<String>> hgvs, Set<String> ids,
Set<VariantSourceEntryMongo> variantSources) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand All @@ -139,6 +142,8 @@ public VariantDocument(Variant.VariantType variantType, String chromosome, int s
public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Set<HgvsMongo> hgvs, Set<String> ids,
Set<VariantSourceEntryMongo> variantSources) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand All @@ -160,6 +165,8 @@ public VariantDocument(Variant.VariantType variantType, String chromosome, int s
}

public static String buildVariantId(String chromosome, int start, String reference, String alternate) {
reference = reference.toUpperCase();
alternate = alternate.toUpperCase();
StringBuilder builder = new StringBuilder(chromosome);
builder.append("_");
builder.append(start);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ALTERNATE_FIELD;
Expand Down Expand Up @@ -79,6 +80,8 @@ public class SimplifiedVariant {

public SimplifiedVariant(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Map<String, Set<String>> hgvs) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

/**
* Mongo database representation of Variant Source entry.
Expand Down Expand Up @@ -73,8 +75,10 @@ public VariantSourceEntryMongo(String fileId, String studyId, String[] alternate
this.fileId = fileId;
this.studyId = studyId;
if (alternates != null && alternates.length > 0) {
this.alternates = new String[alternates.length];
System.arraycopy(alternates, 0, this.alternates, 0, alternates.length);
this.alternates = Arrays.stream(alternates)
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
}
attrs = buildAttributes(attributes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

public class AccessionReportLineMapper extends VariantVcfFactory implements LineMapper<Variant> {
Expand All @@ -37,7 +38,7 @@ public Variant mapLine(String line, int lineNumber) {
String chromosome = fields[0];
int position = Integer.parseInt(fields[1]);
String reference = getReference(fields);
String alternateAllele = fields[4];
String alternateAllele = Objects.nonNull(fields[4]) ? fields[4].toUpperCase() : null ;

VariantCoreFields keyFields = getVariantCoreKeyFields(chromosome, position, reference, alternateAllele);
Variant variant = new Variant(chromosome, (int) keyFields.getStart(), (int) keyFields.getEnd(), keyFields.getReference(), keyFields.getAlternate());
Expand All @@ -48,7 +49,7 @@ public Variant mapLine(String line, int lineNumber) {
}

private String getReference(String[] fields) {
return fields[3].equals(".") ? "" : fields[3];
return fields[3].equals(".") ? "" : fields[3].toUpperCase();
}

private Set<String> getIds(String[] fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ private Map<String,String> parseVariant(String variantString, String coordinates
parsedVariant.put("chromosome", leftVariantFields[0]);
}
parsedVariant.put("start", leftVariantFields[leftVariantFields.length-2]);
parsedVariant.put("reference", leftVariantFields[leftVariantFields.length-1]);
parsedVariant.put("alternative", variantFields[1]);
parsedVariant.put("reference", leftVariantFields[leftVariantFields.length-1].toUpperCase());
parsedVariant.put("alternative", variantFields[1].toUpperCase());
} catch (ArrayIndexOutOfBoundsException e) {
logger.error("Unexpected variant format for column 1: "+variantString);
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import static java.lang.Math.max;

Expand Down Expand Up @@ -121,11 +122,14 @@ private Set<String> getIds(String[] fields) {
}

private String getReference(String[] fields) {
return fields[3].equals(".") ? "" : fields[3];
return fields[3].equals(".") ? "" : fields[3].toUpperCase();
}

private String[] getAlternateAlleles(String[] fields, String chromosome, int position, String reference) {
return fields[4].split(",");
return Arrays.stream(fields[4].split(","))
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
}

private float getQuality(String[] fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ public EnsemblVariant(String chromosome, long start, long end, String reference,
this.chromosome = chromosome;
this.start = start;
this.end = end;
this.reference = reference;
this.alternate = alternate;
this.reference = reference.toUpperCase();
this.alternate = alternate.toUpperCase();
transformToEnsemblFormat();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import uk.ac.ebi.eva.pipeline.Application;

import java.util.Map;
import java.util.Objects;

/**
* Statistics related to a set of samples for a given variant.
Expand Down Expand Up @@ -122,15 +123,15 @@ public String getReference() {
}

void setReference(String reference) {
this.reference = reference;
this.reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
}

public String getAlternate() {
return alternate;
}

void setAlternate(String alternate) {
this.alternate = alternate;
this.alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
}

public String getCohortId() {
Expand Down

0 comments on commit b23a290

Please sign in to comment.