diff --git a/README.md b/README.md index 3dc39ba..e4a0856 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ If you are using Intezer's Dynamic Execution module, then set the service timeou * **polling_period_in_seconds**: This integer is the time to wait between status checks for the current analysis. * **analysis_timeout_in_seconds**: This integer is the time to wait for an analysis to complete. * **try_to_download_every_file**: This is a flag used for indicating if we want to attempt to download every available file, despite receiving an error on a previous attempt. -* **download_subfiles**: This is a flag used for indicating if we want to download sub files. Users may want to set this to `false` because extracted [files that are downloaded count against your quota](https://support.intezer.com/hc/en-us/articles/360021366619-How-is-Your-Analysis-Quota-Calculated-). +* **download_subfiles**: This is a flag used for indicating if we want to download sub files. Users may want to set this to `false` because extracted [files that are downloaded count against your quota](https://docs.intezer.com/docs/quota-consumption). * **min_malware_genes**: This is the minimum number of "malware" genes found in the "Family Details" for us to set the verdict of the analysis to malicious. * **score_administration_tools**: This is a flag used for indicating if we want to score files marked as "administration tools" as suspicious. If set to `false`, then no file with this designation will score based on this. * **use_black_box_verdicts**: This is a flag used for indicating if we want to use the verdict that the Intezer assigns an analysis based on their proprietary algorithm for verdicts. If not, we will rely on gene counts. diff --git a/intezer.py b/intezer.py index 9e00eb2..eac486a 100644 --- a/intezer.py +++ b/intezer.py @@ -1,9 +1,12 @@ +import os + from datetime import datetime from enum import Enum from http import HTTPStatus from time import sleep, time from typing import Any, Dict, List, Optional, Set +from assemblyline.common import forge from assemblyline.common.exceptions import NonRecoverableError from assemblyline.common.str_utils import truncate from assemblyline.odm.models.ontology.results.process import Process as ProcessModel @@ -494,6 +497,9 @@ def __init__(self, config: Optional[Dict] = None) -> None: super().__init__(config) self.log.debug("Initializing the Intezer service...") self.client: Optional[ALIntezerApi] = None + self.privileged = os.environ.get("PRIVILEGED", "false").lower() + if self.privileged == "true": + self.filestore = forge.get_filestore() def start(self) -> None: global global_safelist @@ -971,9 +977,25 @@ def _handle_subanalyses( if self.config.get("download_subfiles", True): if can_we_download_files or self.config.get("try_to_download_every_file", False): - file_was_downloaded = self.client.download_file_by_sha256(sub_sha256, self.working_directory) + + file_was_downloaded = False + path = f"{os.path.join(self.working_directory, sub_sha256)}.sample" + + if self.privileged == "true": + # Attempt to download from AL4 filestore first. + # This prevents un-necessary hit against user's quota with Intezer + if self.filestore.exists(sub_sha256): + self.filestore.download(sub_sha256, path) + if os.path.exists(path): + self.log.debug(f"Downloaded file from filestore: {sub_sha256}") + file_was_downloaded = True + + # if file was not downloaded via the filestore, attempt to download from Intezer. + if not file_was_downloaded: + file_was_downloaded = self.client.download_file_by_sha256(sub_sha256, self.working_directory) + self.log.debug(f"Downloaded file from Intezer: {sub_sha256}") + if file_was_downloaded: - path = f"{self.working_directory}/{sub_sha256}.sample" try: request.add_extracted( path, diff --git a/service_manifest.yml b/service_manifest.yml index 9c5c49b..121fec5 100644 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -111,4 +111,4 @@ docker_config: allow_internet_access: true image: ${REGISTRY}cccs/assemblyline-service-intezer:$SERVICE_TAG cpu_cores: 0.5 - ram_mb: 256 + ram_mb: 512