Skip to content

Commit

Permalink
modify storage structure, implement file autoremoval #83 (#86)
Browse files Browse the repository at this point in the history
Co-authored-by: David Greenwood <[email protected]>
  • Loading branch information
fqrious and himynamesdave authored Nov 16, 2024
1 parent f2fe9d7 commit 5fd6d7d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 41 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ uritemplate==4.1.1
urllib3==2.2.2
vine==5.1.0
wcwidth==0.2.13
django-cleanup==9.0.0
stix2arango @ https://github.com/muchdogesec/stix2arango/releases/download/main-2024-11-16/stix2arango-0.0.2-py3-none-any.whl
file2txt @ https://github.com/muchdogesec/file2txt/releases/download/main-2024-11-16/file2txt-0.0.1b2-py3-none-any.whl
txt2stix @ https://github.com/muchdogesec/txt2stix/releases/download/main-2024-11-13/txt2stix-0.0.1b5-py3-none-any.whl
Expand Down
24 changes: 14 additions & 10 deletions stixify/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
'dogesec_commons.objects.app.ArangoObjectsViewApp',
'django.contrib.postgres',
'stixify.web',
'django_cleanup.apps.CleanupConfig',
]

MIDDLEWARE = [
Expand Down Expand Up @@ -120,19 +121,22 @@
}

if os.getenv("USE_S3_STORAGE") == "1":
options = {
"bucket_name": os.environ["R2_BUCKET_NAME"],
"endpoint_url": os.environ["R2_ENDPOINT_URL"],
"access_key": os.environ["R2_ACCESS_KEY"],
"secret_key": os.environ["R2_SECRET_KEY"],
'custom_domain': os.environ["R2_CUSTOM_DOMAIN"],
'location': 'files',
}
STORAGES["default"] = {
"BACKEND": "storages.backends.s3.S3Storage",
"OPTIONS": {
"bucket_name": os.environ["R2_BUCKET_NAME"],
"endpoint_url": os.environ["R2_ENDPOINT_URL"],
"access_key": os.environ["R2_ACCESS_KEY"],
"secret_key": os.environ["R2_SECRET_KEY"],
'custom_domain': os.environ["R2_CUSTOM_DOMAIN"],
'location': 'media',
},
"OPTIONS": options,
}
STORAGES["staticfiles"] = {
"BACKEND": "storages.backends.s3.S3Storage",
"OPTIONS": {**options, 'location':'django/staticfiles'},
}
STORAGES["staticfiles"] = copy.deepcopy(STORAGES["default"])
STORAGES["staticfiles"]["OPTIONS"]['location'] = 'staticfiles'

# Password validation
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
Expand Down
38 changes: 7 additions & 31 deletions stixify/web/models.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import logging
import os
import sys
from typing import Iterable
from django.conf import settings
from django.db.models.signals import post_delete, pre_save
from django.db.models.signals import post_delete
from django.dispatch import receiver
from django.db import models
from django.contrib.postgres.fields import ArrayField
import uuid, typing
from django.utils.text import slugify
from urllib.parse import urlparse
from functools import partial
import txt2stix.common
import txt2stix, txt2stix.extractions
from django.core.exceptions import ValidationError
from django.db.models import F, CharField, Value
from django.db.models.functions import Concat
from datetime import datetime, timezone
from django.core.files.uploadedfile import InMemoryUploadedFile
import stix2
Expand Down Expand Up @@ -104,10 +95,8 @@ def save(self, *args, **kwargs):

def upload_to_func(instance: 'File|FileImage', filename):
if isinstance(instance, FileImage):
id = instance.report.id
else:
id = instance.id
return os.path.join(str(id), 'files', filename)
instance = instance.report
return os.path.join(str(instance.identity['id']), str(instance.report_id), filename)

def validate_file(file: InMemoryUploadedFile, mode: str):
_, ext = os.path.splitext(file.name)
Expand All @@ -118,14 +107,14 @@ def validate_file(file: InMemoryUploadedFile, mode: str):

class File(CommonSTIXProps):
id = models.UUIDField(unique=True, max_length=64, primary_key=True, default=uuid.uuid4)
file = models.FileField(upload_to=upload_to_func, help_text="Full path to the file to be converted. Must match a supported file type: `application/pdf`, `application/msword`, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, `application/vnd.ms-powerpoint`, `application/vnd.openxmlformats-officedocument.presentationml.presentation`, `text/html`, `text/csv`, `image/jpg`, `image/jpeg`, `image/png`, `image/webp`. The filetype must be supported by the `mode` used or you will receive an error.")
file = models.FileField(max_length=1024, upload_to=upload_to_func, help_text="Full path to the file to be converted. Must match a supported file type: `application/pdf`, `application/msword`, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, `application/vnd.ms-powerpoint`, `application/vnd.openxmlformats-officedocument.presentationml.presentation`, `text/html`, `text/csv`, `image/jpg`, `image/jpeg`, `image/png`, `image/webp`. The filetype must be supported by the `mode` used or you will receive an error.")
profile = models.ForeignKey(Profile, on_delete=models.PROTECT)
dossiers = models.ManyToManyField(Dossier, related_name="files", help_text="The Dossier ID(s) you want to add the generated Report for this File to.")
mimetype = models.CharField(max_length=512)
mode = models.CharField(max_length=256, help_text="How the File should be processed. Generally the `mode` should match the filetype of `file` selected. Except for HTML documents where you can use `html` mode (processes entirety of HTML page) and `html_article` mode (where only the article on the page will be processed).")
markdown_file = models.FileField(upload_to=upload_to_func, null=True)
markdown_file = models.FileField(max_length=256, upload_to=upload_to_func, null=True)
summary = models.CharField(max_length=65536, null=True, default=None)

@property
def report_id(self):
return 'report--'+str(self.id)
Expand All @@ -137,19 +126,6 @@ def report_id(self, value):
def clean(self) -> None:
validate_file(self.file, self.mode)
return super().clean()

@receiver(post_delete, sender=File)
def remove_files_on_delete(sender, instance: File, **kwargs):
filename = instance.file.name
files = [f.file for f in instance.images.all()] + [instance.file]
for f in files:
f.delete(save=False)
while filename:
filename = "/".join(filename.split('/')[:-1])
try:
instance.file.storage.delete(filename)
except Exception as e:
logging.debug(e)

@receiver(post_delete, sender=File)
def remove_reports_on_delete(sender, instance: File, **kwargs):
Expand All @@ -159,7 +135,7 @@ def remove_reports_on_delete(sender, instance: File, **kwargs):

class FileImage(models.Model):
report = models.ForeignKey(File, related_name='images', on_delete=models.CASCADE)
file = models.ImageField(upload_to=upload_to_func)
file = models.ImageField(upload_to=upload_to_func, max_length=256)
name = models.CharField(max_length=256)


Expand Down

0 comments on commit 5fd6d7d

Please sign in to comment.