SACGF · TheMadBug · Sep 13, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/analysis/views/views_karyomapping.py b/analysis/views/views_karyomapping.py
@@ -77,8 +77,8 @@ def get_variant_lookup_and_scatter_data(karyomapping_bins):
         x = []
         text = []
         for variant_id, chrom, position, ref, alt in variant_data:
-
-            variant_string = Variant.format_tuple(chrom, position, ref, alt)
+            end = Variant.calculate_end(position, ref, alt)
+            variant_string = Variant.format_tuple(chrom, position, end, ref, alt)
             variant_id_lookup[variant_string] = variant_id
             x.append(position)
             text.append(variant_string)

diff --git a/annotation/annotation_version_querysets.py b/annotation/annotation_version_querysets.py
@@ -15,7 +15,7 @@
 
 from django.db.models.query_utils import Q
 
-from annotation.models import AnnotationVersion, VariantAnnotation
+from annotation.models import AnnotationVersion, VariantAnnotation, VariantAnnotationPipelineType
 from library.django_utils.django_queryset_sql_transformer import get_queryset_with_transformer_hook
 from snpdb.models import Variant
 
@@ -43,13 +43,22 @@ def get_queryset_for_annotation_version(klass, annotation_version):
     return qs
 
 
-def get_unannotated_variants_qs(annotation_version, min_variant_id=None, max_variant_id=None):
+def get_unannotated_variants_qs(annotation_version, pipeline_type=None, min_variant_id=None, max_variant_id=None):
     # Explicitly join to version partition so other version annotations don't count
     qs = get_variant_queryset_for_annotation_version(annotation_version)
     q_filters = VariantAnnotation.VARIANT_ANNOTATION_Q + \
         [Variant.get_contigs_q(annotation_version.genome_build),
          Q(variantannotation__isnull=True)]  # Not annotated
 
+    q_symbolic = Q(locus__ref__seq__contains='<') | Q(alt__seq__contains='<')
+    if pipeline_type:
+        if pipeline_type == VariantAnnotationPipelineType.STANDARD:
+            q_filters.append(~q_symbolic)
+        elif pipeline_type == VariantAnnotationPipelineType.CNV:
+            q_filters.append(q_symbolic)
+        else:
+            raise ValueError(f"Unrecognised {pipeline_type=}")
+
     if min_variant_id:
         q_filters.append(Q(pk__gte=min_variant_id))
     if max_variant_id:

diff --git a/annotation/grids.py b/annotation/grids.py
@@ -4,7 +4,7 @@
 
 from django.db.models import QuerySet, ExpressionWrapper, F, fields
 
-from annotation.models import VariantAnnotationVersion, AnnotationRun, AnnotationStatus
+from annotation.models import VariantAnnotationVersion, AnnotationRun, AnnotationStatus, VariantAnnotationPipelineType
 from genes.models_enums import AnnotationConsortium
 from library.jqgrid.jqgrid_user_row_config import JqGridUserRowConfig
 from snpdb.models.models_genome import GenomeBuild
@@ -17,6 +17,10 @@ class AnnotationRunColumns(DatatableConfig):
     def status(row: Dict[str, Any]):
         return AnnotationStatus(row["status"]).label
 
+    @staticmethod
+    def pipeline_type(row: Dict[str, Any]):
+        return VariantAnnotationPipelineType(row["pipeline_type"]).label
+
     @staticmethod
     def format_timedelta(cell: CellData):
         delta: timedelta = cell.value
@@ -39,9 +43,10 @@ def __init__(self, request):
         self.rich_columns = [
             RichColumn(key="id", label='ID', orderable=True, client_renderer='idRenderer'),
             RichColumn(key="status", orderable=True, renderer=self.status),
+            RichColumn(key="pipeline_type", orderable=True, renderer=self.pipeline_type),
             RichColumn(key="annotation_range_lock__version__genome_build__name", label='Build', orderable=True),
             RichColumn(key="annotation_range_lock__version__id", label='Version', orderable=True),
-            RichColumn(key="annotation_range_lock__count", label='Var Count', orderable=True),
+            RichColumn(key="dump_count", label='VCF Count', orderable=True),
             RichColumn(key="vep_skipped_count", label="VEP Skipped", orderable=True),
             RichColumn(key="annotation_range_lock__min_variant__id", label="Min Var", orderable=True),
             RichColumn(key="annotation_range_lock__max_variant__id", label="Max Var", orderable=True),

diff --git a/annotation/management/commands/vep_run.py b/annotation/management/commands/vep_run.py
@@ -9,6 +9,7 @@
 from django.conf import settings
 from django.core.management.base import BaseCommand
 
+from annotation.models import VariantAnnotationPipelineType
 from annotation.vep_annotation import run_vep
 from snpdb.models.models_genome import GenomeBuild
 
@@ -47,7 +48,8 @@ def handle(self, *args, **options):
 
         output_filename = os.path.join(output_dir, f"{base_name}.{vep_suffix}.vcf.gz")
         return_code, std_out, std_err = run_vep(vcf_filename, output_filename,
-                                                genome_build, genome_build.annotation_consortium)
+                                                genome_build, genome_build.annotation_consortium,
+                                                VariantAnnotationPipelineType.STANDARD)
         if return_code != 0:
             logging.info(std_out)
             logging.error(std_err)

diff --git a/annotation/migrations/0076_alter_annotationrun_options_annotationrun_dump_count_and_more.py b/annotation/migrations/0076_alter_annotationrun_options_annotationrun_dump_count_and_more.py
@@ -0,0 +1,37 @@
+# Generated by Django 4.2.2 on 2023-08-29 06:08
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('annotation', '0075_clinvarrecord_date_clinvar_created_and_more'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='annotationrun',
+            options={},
+        ),
+        migrations.AddField(
+            model_name='annotationrun',
+            name='dump_count',
+            field=models.IntegerField(null=True),
+        ),
+        migrations.AddField(
+            model_name='annotationrun',
+            name='pipeline_type',
+            field=models.CharField(choices=[('S', 'Standard Short Variant'), ('C', 'CNV')], default='S', max_length=1),
+        ),
+        migrations.AlterField(
+            model_name='annotationrun',
+            name='annotation_range_lock',
+            field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='annotation.annotationrangelock'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='annotationrun',
+            unique_together={('annotation_range_lock', 'pipeline_type')},
+        ),
+    ]
diff --git a/annotation/migrations/0077_one_off_create_empty_cnv_annotation_runs.py b/annotation/migrations/0077_one_off_create_empty_cnv_annotation_runs.py
@@ -0,0 +1,51 @@
+# Generated by Django 4.2.2 on 2023-08-29 06:08
+
+from django.db import migrations
+from django.db.models import F
+from django.utils import timezone
+
+
+def _one_off_create_empty_cnv_annotation_runs(apps, _schema_editor):
+    AnnotationRun = apps.get_model("annotation", "AnnotationRun")
+
+    # VariantAnnotationPipelineType
+    VARIANT_ANNOTATION_PIPELINE_TYPE_STANDARD = "S"
+    VARIANT_ANNOTATION_PIPELINE_TYPE_CNV = "C"
+    ANNOTATION_STATUS_FINISHED = 'F'
+
+
+    now = timezone.now()
+    records = []
+    ar_qs = AnnotationRun.objects.filter(pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_STANDARD)
+    # Update existing to have dump set to annotated (didn't skip)
+    ar_qs.update(dump_count=F("annotated_count"))
+
+    for ar in AnnotationRun.objects.filter(pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_CNV):
+        cnv_ar = AnnotationRun(
+            status=ANNOTATION_STATUS_FINISHED,
+            annotation_range_lock=ar.annotation_range_lock,
+            pipeline_type=VARIANT_ANNOTATION_PIPELINE_TYPE_CNV,
+            dump_start=now,
+            dump_end=now,
+            annotation_start=now,
+            annotation_end=now,
+            upload_start=now,
+            upload_end=now,
+            upload_attempts=0,
+            dump_count=0,
+            annotated_count=0,
+        )
+        records.append(cnv_ar)
+
+    AnnotationRun.objects.bulk_create(records)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('annotation', '0076_alter_annotationrun_options_annotationrun_dump_count_and_more'),
+    ]
+
+    operations = [
+        migrations.RunPython(_one_off_create_empty_cnv_annotation_runs),
+    ]
diff --git a/annotation/migrations/0078_merge_20230912_1032.py b/annotation/migrations/0078_merge_20230912_1032.py
@@ -0,0 +1,12 @@
+# Generated by Django 4.1.4 on 2023-09-12 01:02
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("annotation", "0076_rename_rcvs_clinvarrecordcollection_urls"),
+        ("annotation", "0077_one_off_create_empty_cnv_annotation_runs"),
+    ]
+
+    operations = []
diff --git a/annotation/models/models.py b/annotation/models/models.py
@@ -26,7 +26,8 @@
 from annotation.models.models_citations import Citation, CitationFetchRequest, CitationFetchResponse
 from annotation.models.models_enums import AnnotationStatus, \
     VariantClass, ColumnAnnotationCategory, VEPPlugin, VEPCustom, ClinVarReviewStatus, VEPSkippedReason, \
-    ManualVariantEntryType, HumanProteinAtlasAbundance, EssentialGeneCRISPR, EssentialGeneCRISPR2, EssentialGeneGeneTrap
+    ManualVariantEntryType, HumanProteinAtlasAbundance, EssentialGeneCRISPR, EssentialGeneCRISPR2, \
+    EssentialGeneGeneTrap, VariantAnnotationPipelineType
 from annotation.utils.clinvar_constants import CLINVAR_REVIEW_EXPERT_PANEL_STARS_VALUE
 from genes.models import GeneSymbol, Gene, TranscriptVersion, Transcript, GeneAnnotationRelease
 from genes.models_enums import AnnotationConsortium
@@ -685,7 +686,9 @@ def __str__(self):
 
 class AnnotationRun(TimeStampedModel):
     status = models.CharField(max_length=1, choices=AnnotationStatus.choices, default=AnnotationStatus.CREATED)
-    annotation_range_lock = models.OneToOneField(AnnotationRangeLock, null=True, on_delete=CASCADE)
+    annotation_range_lock = models.ForeignKey(AnnotationRangeLock, null=True, on_delete=CASCADE)
+    pipeline_type = models.CharField(max_length=1, choices=VariantAnnotationPipelineType.choices,
+                                     default=VariantAnnotationPipelineType.STANDARD)
     # task_id is used as a lock to prevent multiple Celery jobs from executing same job
     task_id = models.CharField(max_length=36, null=True)
     dump_start = models.DateTimeField(null=True)
@@ -704,9 +707,13 @@ class AnnotationRun(TimeStampedModel):
     vep_warnings = models.TextField(null=True)
     vcf_dump_filename = models.TextField(null=True)
     vcf_annotated_filename = models.TextField(null=True)
+    dump_count = models.IntegerField(null=True)
     annotated_count = models.IntegerField(null=True)
     celery_task_logs = models.JSONField(null=False, default=dict)  # Key=task_id, so we keep logs from multiple runs
 
+    class Meta:
+        unique_together = ('annotation_range_lock', 'pipeline_type')
+
     @property
     def variant_annotation_version(self):
         return self.annotation_range_lock.version
@@ -724,14 +731,17 @@ def get_status(self):
                 status = AnnotationStatus.DUMP_STARTED
             if self.dump_end:
                 status = AnnotationStatus.DUMP_COMPLETED
-            if self.annotation_start:
-                status = AnnotationStatus.ANNOTATION_STARTED
-            if self.annotation_end:
-                status = AnnotationStatus.ANNOTATION_COMPLETED
-            if self.upload_start:
-                status = AnnotationStatus.UPLOAD_STARTED
-            if self.upload_end:
+            if self.dump_count == 0:
                 status = AnnotationStatus.FINISHED
+            else:
+                if self.annotation_start:
+                    status = AnnotationStatus.ANNOTATION_STARTED
+                if self.annotation_end:
+                    status = AnnotationStatus.ANNOTATION_COMPLETED
+                if self.upload_start:
+                    status = AnnotationStatus.UPLOAD_STARTED
+                if self.upload_end:
+                    status = AnnotationStatus.FINISHED
         return status
 
     @property
@@ -750,6 +760,15 @@ def delete_related_objects(self):
             qs = get_queryset_for_annotation_version(klass, annotation_version)
             qs.filter(annotation_run=self).delete()
 
+    def get_dump_filename(self) -> str:
+        PIPELINE_TYPE = {
+            VariantAnnotationPipelineType.STANDARD: "standard",
+            VariantAnnotationPipelineType.CNV: "cnv",
+        }
+        type_desc = PIPELINE_TYPE.get(self.pipeline_type, str(self.pipeline_type))
+        vcf_base_name = f"dump_{self.pk}_{type_desc}.vcf"
+        return os.path.join(settings.ANNOTATION_VCF_DUMP_DIR, vcf_base_name)
+
     def delete(self, using=None, keep_parents=False):
         self.delete_related_objects()
         super().delete(using=using, keep_parents=keep_parents)
@@ -760,7 +779,7 @@ def set_task_log(self, key, value):
         task_log[key] = value
 
     def __str__(self):
-        return f"AnnotationRun: {localtime(self.modified)} ({self.status})"
+        return f"AnnotationRun: {self.pk}/{self.get_pipeline_type_display()}: ({self.status})"
 
 
 class AbstractVariantAnnotation(models.Model):
@@ -781,7 +800,8 @@ class AbstractVariantAnnotation(models.Model):
     # The best way to see how these map to VEP fields is via the annotation details page
     amino_acids = models.TextField(null=True, blank=True)
     cadd_phred = models.FloatField(null=True, blank=True)
-    canonical = models.BooleanField(null=True, blank=True)  # TODO: This doesn't need to be nullable (default=False)
+    # TODO: This doesn't need to be nullable (default=False) - but will be slow. Change with next schema change
+    canonical = models.BooleanField(null=True, blank=True)
     nmd_escaping_variant = models.BooleanField(null=True, blank=True)
     codons = models.TextField(null=True, blank=True)
     consequence = models.TextField(null=True, blank=True)
@@ -983,7 +1003,7 @@ class VariantAnnotation(AbstractVariantAnnotation):
     # List of filters to describe variants that can be annotated
     VARIANT_ANNOTATION_Q = [
         Variant.get_no_reference_q(),
-        ~Q(alt__seq__in=['.', '*', "<DEL>"]),  # Exclude non-standard variants
+        ~Q(alt__seq__in=['.', '*']),  # Exclude non-standard variants
     ]
 
     @cached_property

diff --git a/annotation/models/models_enums.py b/annotation/models/models_enums.py
@@ -65,6 +65,12 @@ class ClinGenClassification(models.TextChoices):
     DISPUTED = 'P', 'Disputed'
 
 
+class VariantAnnotationPipelineType(models.TextChoices):
+    """ We have standard long and short  """
+    STANDARD = "S", "Standard Short Variant"
+    CNV = "C", "CNV"
+
+
 class VariantClass(models.TextChoices):
     """ https://asia.ensembl.org/info/genome/variation/prediction/classification.html#classes """