Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#54 - CNV off latest master #887

Merged
merged 5 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions analysis/views/views_karyomapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def get_variant_lookup_and_scatter_data(karyomapping_bins):
x = []
text = []
for variant_id, chrom, position, ref, alt in variant_data:

variant_string = Variant.format_tuple(chrom, position, ref, alt)
end = Variant.calculate_end(position, ref, alt)
variant_string = Variant.format_tuple(chrom, position, end, ref, alt)
variant_id_lookup[variant_string] = variant_id
x.append(position)
text.append(variant_string)
Expand Down
13 changes: 11 additions & 2 deletions annotation/annotation_version_querysets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from django.db.models.query_utils import Q

from annotation.models import AnnotationVersion, VariantAnnotation
from annotation.models import AnnotationVersion, VariantAnnotation, VariantAnnotationPipelineType
from library.django_utils.django_queryset_sql_transformer import get_queryset_with_transformer_hook
from snpdb.models import Variant

Expand Down Expand Up @@ -43,13 +43,22 @@ def get_queryset_for_annotation_version(klass, annotation_version):
return qs


def get_unannotated_variants_qs(annotation_version, min_variant_id=None, max_variant_id=None):
def get_unannotated_variants_qs(annotation_version, pipeline_type=None, min_variant_id=None, max_variant_id=None):
# Explicitly join to version partition so other version annotations don't count
qs = get_variant_queryset_for_annotation_version(annotation_version)
q_filters = VariantAnnotation.VARIANT_ANNOTATION_Q + \
[Variant.get_contigs_q(annotation_version.genome_build),
Q(variantannotation__isnull=True)] # Not annotated

q_symbolic = Q(locus__ref__seq__contains='<') | Q(alt__seq__contains='<')
if pipeline_type:
if pipeline_type == VariantAnnotationPipelineType.STANDARD:
q_filters.append(~q_symbolic)
elif pipeline_type == VariantAnnotationPipelineType.CNV:
q_filters.append(q_symbolic)
else:
raise ValueError(f"Unrecognised {pipeline_type=}")

if min_variant_id:
q_filters.append(Q(pk__gte=min_variant_id))
if max_variant_id:
Expand Down
9 changes: 7 additions & 2 deletions annotation/grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from django.db.models import QuerySet, ExpressionWrapper, F, fields

from annotation.models import VariantAnnotationVersion, AnnotationRun, AnnotationStatus
from annotation.models import VariantAnnotationVersion, AnnotationRun, AnnotationStatus, VariantAnnotationPipelineType
from genes.models_enums import AnnotationConsortium
from library.jqgrid.jqgrid_user_row_config import JqGridUserRowConfig
from snpdb.models.models_genome import GenomeBuild
Expand All @@ -17,6 +17,10 @@ class AnnotationRunColumns(DatatableConfig):
def status(row: Dict[str, Any]):
return AnnotationStatus(row["status"]).label

@staticmethod
def pipeline_type(row: Dict[str, Any]):
return VariantAnnotationPipelineType(row["pipeline_type"]).label

@staticmethod
def format_timedelta(cell: CellData):
delta: timedelta = cell.value
Expand All @@ -39,9 +43,10 @@ def __init__(self, request):
self.rich_columns = [
RichColumn(key="id", label='ID', orderable=True, client_renderer='idRenderer'),
RichColumn(key="status", orderable=True, renderer=self.status),
RichColumn(key="pipeline_type", orderable=True, renderer=self.pipeline_type),
RichColumn(key="annotation_range_lock__version__genome_build__name", label='Build', orderable=True),
RichColumn(key="annotation_range_lock__version__id", label='Version', orderable=True),
RichColumn(key="annotation_range_lock__count", label='Var Count', orderable=True),
RichColumn(key="dump_count", label='VCF Count', orderable=True),
RichColumn(key="vep_skipped_count", label="VEP Skipped", orderable=True),
RichColumn(key="annotation_range_lock__min_variant__id", label="Min Var", orderable=True),
RichColumn(key="annotation_range_lock__max_variant__id", label="Max Var", orderable=True),
Expand Down
4 changes: 3 additions & 1 deletion annotation/management/commands/vep_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.conf import settings
from django.core.management.base import BaseCommand

from annotation.models import VariantAnnotationPipelineType
from annotation.vep_annotation import run_vep
from snpdb.models.models_genome import GenomeBuild

Expand Down Expand Up @@ -47,7 +48,8 @@ def handle(self, *args, **options):

output_filename = os.path.join(output_dir, f"{base_name}.{vep_suffix}.vcf.gz")
return_code, std_out, std_err = run_vep(vcf_filename, output_filename,
genome_build, genome_build.annotation_consortium)
genome_build, genome_build.annotation_consortium,
VariantAnnotationPipelineType.STANDARD)
if return_code != 0:
logging.info(std_out)
logging.error(std_err)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 4.2.2 on 2023-08-29 06:08

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('annotation', '0075_clinvarrecord_date_clinvar_created_and_more'),
]

operations = [
migrations.AlterModelOptions(
name='annotationrun',
options={},
),
migrations.AddField(
model_name='annotationrun',
name='dump_count',
field=models.IntegerField(null=True),
),
migrations.AddField(
model_name='annotationrun',
name='pipeline_type',
field=models.CharField(choices=[('S', 'Standard Short Variant'), ('C', 'CNV')], default='S', max_length=1),
),
migrations.AlterField(
model_name='annotationrun',
name='annotation_range_lock',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='annotation.annotationrangelock'),
),
migrations.AlterUniqueTogether(
name='annotationrun',
unique_together={('annotation_range_lock', 'pipeline_type')},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 4.2.2 on 2023-08-29 06:08

from django.db import migrations
from django.db.models import F
from django.utils import timezone


def _one_off_create_empty_cnv_annotation_runs(apps, _schema_editor):
AnnotationRun = apps.get_model("annotation", "AnnotationRun")

# VariantAnnotationPipelineType
VARIANT_ANNOTATION_PIPELINE_TYPE_STANDARD = "S"
VARIANT_ANNOTATION_PIPELINE_TYPE_CNV = "C"
ANNOTATION_STATUS_FINISHED = 'F'


now = timezone.now()
records = []
ar_qs = AnnotationRun.objects.filter(pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_STANDARD)
# Update existing to have dump set to annotated (didn't skip)
ar_qs.update(dump_count=F("annotated_count"))

for ar in AnnotationRun.objects.filter(pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_CNV):
cnv_ar = AnnotationRun(
status=ANNOTATION_STATUS_FINISHED,
annotation_range_lock=ar.annotation_range_lock,
pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_CNV,
dump_start=now,
dump_end=now,
annotation_start=now,
annotation_end=now,
upload_start=now,
upload_end=now,
upload_attempts=0,
dump_count=0,
annotated_count=0,
)
records.append(cnv_ar)

AnnotationRun.objects.bulk_create(records)


class Migration(migrations.Migration):

dependencies = [
('annotation', '0076_alter_annotationrun_options_annotationrun_dump_count_and_more'),
]

operations = [
migrations.RunPython(_one_off_create_empty_cnv_annotation_runs),
]
12 changes: 12 additions & 0 deletions annotation/migrations/0078_merge_20230912_1032.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Generated by Django 4.1.4 on 2023-09-12 01:02

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("annotation", "0076_rename_rcvs_clinvarrecordcollection_urls"),
("annotation", "0077_one_off_create_empty_cnv_annotation_runs"),
]

operations = []
44 changes: 32 additions & 12 deletions annotation/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from annotation.models.models_citations import Citation, CitationFetchRequest, CitationFetchResponse
from annotation.models.models_enums import AnnotationStatus, \
VariantClass, ColumnAnnotationCategory, VEPPlugin, VEPCustom, ClinVarReviewStatus, VEPSkippedReason, \
ManualVariantEntryType, HumanProteinAtlasAbundance, EssentialGeneCRISPR, EssentialGeneCRISPR2, EssentialGeneGeneTrap
ManualVariantEntryType, HumanProteinAtlasAbundance, EssentialGeneCRISPR, EssentialGeneCRISPR2, \
EssentialGeneGeneTrap, VariantAnnotationPipelineType
from annotation.utils.clinvar_constants import CLINVAR_REVIEW_EXPERT_PANEL_STARS_VALUE
from genes.models import GeneSymbol, Gene, TranscriptVersion, Transcript, GeneAnnotationRelease
from genes.models_enums import AnnotationConsortium
Expand Down Expand Up @@ -685,7 +686,9 @@ def __str__(self):

class AnnotationRun(TimeStampedModel):
status = models.CharField(max_length=1, choices=AnnotationStatus.choices, default=AnnotationStatus.CREATED)
annotation_range_lock = models.OneToOneField(AnnotationRangeLock, null=True, on_delete=CASCADE)
annotation_range_lock = models.ForeignKey(AnnotationRangeLock, null=True, on_delete=CASCADE)
pipeline_type = models.CharField(max_length=1, choices=VariantAnnotationPipelineType.choices,
default=VariantAnnotationPipelineType.STANDARD)
# task_id is used as a lock to prevent multiple Celery jobs from executing same job
task_id = models.CharField(max_length=36, null=True)
dump_start = models.DateTimeField(null=True)
Expand All @@ -704,9 +707,13 @@ class AnnotationRun(TimeStampedModel):
vep_warnings = models.TextField(null=True)
vcf_dump_filename = models.TextField(null=True)
vcf_annotated_filename = models.TextField(null=True)
dump_count = models.IntegerField(null=True)
annotated_count = models.IntegerField(null=True)
celery_task_logs = models.JSONField(null=False, default=dict) # Key=task_id, so we keep logs from multiple runs

class Meta:
unique_together = ('annotation_range_lock', 'pipeline_type')

@property
def variant_annotation_version(self):
return self.annotation_range_lock.version
Expand All @@ -724,14 +731,17 @@ def get_status(self):
status = AnnotationStatus.DUMP_STARTED
if self.dump_end:
status = AnnotationStatus.DUMP_COMPLETED
if self.annotation_start:
status = AnnotationStatus.ANNOTATION_STARTED
if self.annotation_end:
status = AnnotationStatus.ANNOTATION_COMPLETED
if self.upload_start:
status = AnnotationStatus.UPLOAD_STARTED
if self.upload_end:
if self.dump_count == 0:
status = AnnotationStatus.FINISHED
else:
if self.annotation_start:
status = AnnotationStatus.ANNOTATION_STARTED
if self.annotation_end:
status = AnnotationStatus.ANNOTATION_COMPLETED
if self.upload_start:
status = AnnotationStatus.UPLOAD_STARTED
if self.upload_end:
status = AnnotationStatus.FINISHED
return status

@property
Expand All @@ -750,6 +760,15 @@ def delete_related_objects(self):
qs = get_queryset_for_annotation_version(klass, annotation_version)
qs.filter(annotation_run=self).delete()

def get_dump_filename(self) -> str:
PIPELINE_TYPE = {
VariantAnnotationPipelineType.STANDARD: "standard",
VariantAnnotationPipelineType.CNV: "cnv",
}
type_desc = PIPELINE_TYPE.get(self.pipeline_type, str(self.pipeline_type))
vcf_base_name = f"dump_{self.pk}_{type_desc}.vcf"
return os.path.join(settings.ANNOTATION_VCF_DUMP_DIR, vcf_base_name)

def delete(self, using=None, keep_parents=False):
self.delete_related_objects()
super().delete(using=using, keep_parents=keep_parents)
Expand All @@ -760,7 +779,7 @@ def set_task_log(self, key, value):
task_log[key] = value

def __str__(self):
return f"AnnotationRun: {localtime(self.modified)} ({self.status})"
return f"AnnotationRun: {self.pk}/{self.get_pipeline_type_display()}: ({self.status})"


class AbstractVariantAnnotation(models.Model):
Expand All @@ -781,7 +800,8 @@ class AbstractVariantAnnotation(models.Model):
# The best way to see how these map to VEP fields is via the annotation details page
amino_acids = models.TextField(null=True, blank=True)
cadd_phred = models.FloatField(null=True, blank=True)
canonical = models.BooleanField(null=True, blank=True) # TODO: This doesn't need to be nullable (default=False)
# TODO: This doesn't need to be nullable (default=False) - but will be slow. Change with next schema change
canonical = models.BooleanField(null=True, blank=True)
nmd_escaping_variant = models.BooleanField(null=True, blank=True)
codons = models.TextField(null=True, blank=True)
consequence = models.TextField(null=True, blank=True)
Expand Down Expand Up @@ -983,7 +1003,7 @@ class VariantAnnotation(AbstractVariantAnnotation):
# List of filters to describe variants that can be annotated
VARIANT_ANNOTATION_Q = [
Variant.get_no_reference_q(),
~Q(alt__seq__in=['.', '*', "<DEL>"]), # Exclude non-standard variants
~Q(alt__seq__in=['.', '*']), # Exclude non-standard variants
]

@cached_property
Expand Down
6 changes: 6 additions & 0 deletions annotation/models/models_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ class ClinGenClassification(models.TextChoices):
DISPUTED = 'P', 'Disputed'


class VariantAnnotationPipelineType(models.TextChoices):
""" We have standard long and short """
STANDARD = "S", "Standard Short Variant"
CNV = "C", "CNV"


class VariantClass(models.TextChoices):
""" https://asia.ensembl.org/info/genome/variation/prediction/classification.html#classes """

Expand Down
Loading
Loading