Skip to content

Commit

Permalink
#3544 remove non ascii characters from classifications (#949)
Browse files Browse the repository at this point in the history
* #3544 remove non ascii characters from classifications

* #3544 management script for previous commit

* #3544 modified code to replace U+00A0.
  • Loading branch information
Bharath-kandula authored Nov 19, 2024
1 parent 24dd47d commit 75ecb26
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
39 changes: 39 additions & 0 deletions classification/management/commands/remove_invisible_characters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json

from django.core.management.base import BaseCommand
import re
from classification.models import Classification
from library.guardian_utils import admin_bot


def has_invisible_characters(text):
pattern = "\u00a0"
return re.search(pattern, text)


def ensure_string(data):
if isinstance(data, (dict, list)):
return json.dumps(data)
elif isinstance(data, str):
return data
else:
return str(data)


class Command(BaseCommand):

def handle(self, *args, **options):
classifications = Classification.objects.all()
user = admin_bot()

for classification in classifications:
evidence = classification.evidence

for key, value in evidence.items():
for k, v in value.items():
if match := has_invisible_characters(ensure_string(v)):
print(f"match found in {classification.id} record, in: {key}")
classification.revalidate(user=user)

self.stdout.write(
self.style.SUCCESS('Invisible characters removed from evidence field for all classifications.'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from django.db import migrations

from manual.operations.manual_operations import ManualOperation


class Migration(migrations.Migration):

dependencies = [
('classification', '0114_classification_withdraw_reason'),
]

operations = [
ManualOperation(task_id=ManualOperation.task_id_manage(["remove_invisible_characters"]))
]
7 changes: 7 additions & 0 deletions classification/models/classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import json
import re
import uuid
from collections import Counter, namedtuple
Expand All @@ -25,6 +26,7 @@
from django.urls.base import reverse
from django_extensions.db.models import TimeStampedModel
from guardian.shortcuts import assign_perm, get_objects_for_user
from unidecode import unidecode

from annotation.models.models import AnnotationVersion, VariantAnnotationVersion, VariantAnnotation
from annotation.regexes import db_ref_regexes, DbRegexes
Expand Down Expand Up @@ -1131,6 +1133,11 @@ def process_entry(self, cell: VCDataCell, source: str):
e_key = cell.e_key
note = cell.note

if value and '\u00a0' in value:
value = value.replace('\u00a0', ' ')
if note and '\u00a0' in note:
note = note.replace('\u00a0', ' ')

if self.lab.external:
cell.validate = False

Expand Down

0 comments on commit 75ecb26

Please sign in to comment.