LPC-HH · irenedutta23 · Aug 16, 2024 · Aug 16, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/HHbbgg_flow/__pycache__/__init__.cpython-311.pyc b/HHbbgg_flow/__pycache__/__init__.cpython-311.pyc
diff --git a/HHbbgg_flow/__pycache__/__init__.cpython-39.pyc b/HHbbgg_flow/__pycache__/__init__.cpython-39.pyc
diff --git a/HHbbgg_flow/analysis_manager/__init__.py b/HHbbgg_flow/analysis_manager/__init__.py
@@ -1 +0,0 @@
-from .run_analysis import main as analysis_runner

diff --git a/HHbbgg_flow/analysis_manager/analysis.py b/HHbbgg_flow/analysis_manager/analysis.py
@@ -0,0 +1,10 @@
+import os, sys
+from HHbbgg_flow.ttH_killer import process_ttH_vars, process_ttH_sideband
+import logging
+logger = logging.getLogger(__name__)
+
+def run_analysis(args):
+    if args.ttH_vars_bool:
+        process_ttH_vars(args.ttH_vars_config, args.out_pq_size, args.output_dir_path)
+    elif args.ttH_sideband_bool:
+        process_ttH_sideband(args.ttH_sideband_config, args.out_pq_size)
diff --git a/HHbbgg_flow/condor/__init__.py b/HHbbgg_flow/condor/__init__.py
diff --git a/HHbbgg_flow/condor/exe_template.sh b/HHbbgg_flow/condor/exe_template.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# Set proxy
+export X509_USER_PROXY=$PWD/GRID_PROXY
+
+# >>> conda initialize >>>
+# !! Contents within this block are managed by 'conda init' !!
+__conda_setup="$('/usr/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
+if [ $? -eq 0 ]; then
+    eval "$__conda_setup"
+else
+    if [ -f "/usr/etc/profile.d/conda.sh" ]; then
+        . "/usr/etc/profile.d/conda.sh"
+    else
+        export PATH="/usr/bin:$PATH"
+    fi
+fi
+unset __conda_setup
+
+mkdir -p HHbbgg # dir to unload the conda env
+xrdcp root://cmseos.fnal.gov//store/user/idutta/HiggsDNA_env_vars_Run3/HHbbgg_conda_env.tar.gz .
+tar -xf HHbbgg_conda_env.tar.gz -C HHbbgg
+ls -lrth HHbbgg/*/*
+source HHbbgg/bin/activate
+
+tar -xf hhbbgg-flow.tar.gz # dir to unload the HHbbggFlow repo contents
+ls -lrth
+python run_analysis.py
+
+#for i in *.parquet; do xrdcp -f $i root://cmseos.fnal.gov//store/user/idutta/HiggsDNA/EOS_OUTPUT_DIR/SAMPLE/JOB/$i; rm -rf *.parquet; done
+#for i in *summary*; do xrdcp -f $i root://cmseos.fnal.gov//store/user/idutta/HiggsDNA/EOS_OUTPUT_DIR/SAMPLE/JOB/$i; rm -rf *summary*; done
diff --git a/HHbbgg_flow/condor/submit_condor_jobs.py b/HHbbgg_flow/condor/submit_condor_jobs.py
@@ -0,0 +1,110 @@
+import json
+import os, sys
+import math
+import subprocess
+import shutil, fileinput
+
+# Define the maximum number of files to process per job (as defined in `run_analysis.py`)
+FILES_PER_JOB = 5  # Adjust this value as needed
+
+
+# Get user proxy
+status, proxy_info = subprocess.getstatusoutput("voms-proxy-info")
+proxy_info = proxy_info.split('\n')
+for line in proxy_info:
+    if "path" in line:
+        user_proxy = line.split(":")[-1].strip()
+
+
+xrd_analysis_tarfile= "/uscms/home/idutta/nobackup/HiggsDNA_central_Run3/HHbbggFlow/hhbbgg-flow.tar.gz"
+python_file="/uscms/home/idutta/nobackup/HiggsDNA_central_Run3/HHbbggFlow/scripts/run_analysis.py"
+executable="exe_template.sh"
+req_memory= "4096"
+req_disk="10000"
+req_ncpus="1"
+
+# Path to the original SMsamples.json file
+samples_file = '../metadata/samples/SMsamples.json'
+samples_to_run = {
+    "Run3_2022postEE" : {
+        "ttHToGG" : ["nominal"]
+    },
+    "Run3_2022preEE" : {
+        "ttHToGG" : ["nominal"]
+    }
+}
+# Load the JSON data
+with open(samples_file, 'r') as f:
+    samples_data = json.load(f)
+
+# Create output directories for job scripts and JSON files
+output_dir = 'job_samples/'
+os.makedirs(output_dir, exist_ok=True)
+exe_dir = 'job_executables/'
+os.makedirs(exe_dir, exist_ok=True)
+log_dir='job_logs/'
+os.makedirs(log_dir, exist_ok=True) 
+
+# Prepare job list
+job_list = []
+
+# Split the samples into smaller JSON files
+for era, sample_names in samples_to_run.items():
+    for sname, s_types in sample_names.items():
+        for stype in s_types:
+            files = samples_data['data_eras'][era]['samples'][sname]['files'][stype]
+            num_jobs = math.ceil(len(files) / FILES_PER_JOB)
+
+            for job_index in range(num_jobs):
+                # Calculate file range for this job
+                start_index = job_index * FILES_PER_JOB
+                end_index = start_index + FILES_PER_JOB
+                job_files = files[start_index:end_index]
+
+                # Create new JSON structure
+                job_sample_info = {
+                    sname: {
+                        'xs': samples_data['data_eras'][era]['samples'][sname]['xs'],
+                        'bf': samples_data['data_eras'][era]['samples'][sname]['bf'],
+                        'files': {
+                            stype: job_files
+                        }
+                        ,                    }
+                }
+
+                # Write to a new JSON file
+                job_json_file = os.path.join(output_dir, f'{era}_{sname}_{stype}_job_{job_index}.json')
+                with open(job_json_file, 'w') as jf:
+                    json.dump(job_sample_info, jf, indent=4)
+
+                # Add to job list
+                job_list.append(job_json_file)
+                output_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.out')
+                error_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.err')
+                log_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.log')
+
+                # Create a job_exe.sh script for this job
+                job_jdl_file = os.path.join(exe_dir, f'{era}_{sname}_{stype}_job_{job_index}.jdl')
+                shutil.copyfile("submit_template.txt",job_jdl_file)
+                for line in fileinput.FileInput(job_jdl_file, inplace=1):
+                    line=line.replace("GRID_PROXY", user_proxy)
+                    line=line.replace("XRD_ANALYSIS_TARFILE",xrd_analysis_tarfile)
+                    line=line.replace("CONFIG_FILE",job_json_file)
+                    line=line.replace("PYTHON_FILE",python_file)
+                    line=line.replace("EXECUTABLE",executable)
+                    line=line.replace("OUTPUT",output_file)
+                    line=line.replace("ERROR",error_file)
+                    line=line.replace("LOG",log_file)
+                    line=line.replace("REQ_MEMORY",req_memory)
+                    line=line.replace("REQ_DISK",req_disk)
+                    line=line.replace("REQ_NCPUS",req_ncpus)
+                    print(line.rstrip())
+
+                submitCommand = "condor_submit "+job_jdl_file
+                print(submitCommand)
+                os.system(submitCommand)
+
+# Write job list to file
+with open('sample_jobs.txt', 'w') as jf:
+    for job_file in job_list:
+        jf.write(f"HHbbgg_flow/condor/{job_file}\n")
diff --git a/HHbbgg_flow/condor/submit_template.txt b/HHbbgg_flow/condor/submit_template.txt
@@ -0,0 +1,15 @@
+universe = vanilla
+should_transfer_files = Yes
++SingularityImage = "/cvmfs/unpacked.cern.ch/registry.hub.docker.com/coffeateam/coffea-dask:latest"
+
+transfer_input_files    = GRID_PROXY, XRD_ANALYSIS_TARFILE, CONFIG_FILE, PYTHON_FILE
+executable = EXECUTABLE
+output = OUTPUT
+error = ERROR
+log = LOG
+
+RequestMemory = REQ_MEMORY
+RequestDisk = REQ_DISK
+RequestCpus = REQ_NCPUS
+
+queue 1