Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Condor development branch #6

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added HHbbgg_flow/__pycache__/__init__.cpython-311.pyc
Binary file not shown.
Binary file added HHbbgg_flow/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
1 change: 0 additions & 1 deletion HHbbgg_flow/analysis_manager/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from .run_analysis import main as analysis_runner
10 changes: 10 additions & 0 deletions HHbbgg_flow/analysis_manager/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import os, sys
from HHbbgg_flow.ttH_killer import process_ttH_vars, process_ttH_sideband
import logging
logger = logging.getLogger(__name__)

def run_analysis(args):
if args.ttH_vars_bool:
process_ttH_vars(args.ttH_vars_config, args.out_pq_size, args.output_dir_path)
elif args.ttH_sideband_bool:
process_ttH_sideband(args.ttH_sideband_config, args.out_pq_size)
Empty file added HHbbgg_flow/condor/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions HHbbgg_flow/condor/exe_template.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

# Set proxy
export X509_USER_PROXY=$PWD/GRID_PROXY

# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/usr/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/usr/etc/profile.d/conda.sh" ]; then
. "/usr/etc/profile.d/conda.sh"
else
export PATH="/usr/bin:$PATH"
fi
fi
unset __conda_setup

mkdir -p HHbbgg # dir to unload the conda env
xrdcp root://cmseos.fnal.gov//store/user/idutta/HiggsDNA_env_vars_Run3/HHbbgg_conda_env.tar.gz .
tar -xf HHbbgg_conda_env.tar.gz -C HHbbgg
ls -lrth HHbbgg/*/*
source HHbbgg/bin/activate

tar -xf hhbbgg-flow.tar.gz # dir to unload the HHbbggFlow repo contents
ls -lrth
python run_analysis.py

#for i in *.parquet; do xrdcp -f $i root://cmseos.fnal.gov//store/user/idutta/HiggsDNA/EOS_OUTPUT_DIR/SAMPLE/JOB/$i; rm -rf *.parquet; done
#for i in *summary*; do xrdcp -f $i root://cmseos.fnal.gov//store/user/idutta/HiggsDNA/EOS_OUTPUT_DIR/SAMPLE/JOB/$i; rm -rf *summary*; done
110 changes: 110 additions & 0 deletions HHbbgg_flow/condor/submit_condor_jobs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import json
import os, sys
import math
import subprocess
import shutil, fileinput

# Define the maximum number of files to process per job (as defined in `run_analysis.py`)
FILES_PER_JOB = 5 # Adjust this value as needed


# Get user proxy
status, proxy_info = subprocess.getstatusoutput("voms-proxy-info")
proxy_info = proxy_info.split('\n')
for line in proxy_info:
if "path" in line:
user_proxy = line.split(":")[-1].strip()


xrd_analysis_tarfile= "/uscms/home/idutta/nobackup/HiggsDNA_central_Run3/HHbbggFlow/hhbbgg-flow.tar.gz"
python_file="/uscms/home/idutta/nobackup/HiggsDNA_central_Run3/HHbbggFlow/scripts/run_analysis.py"
executable="exe_template.sh"
req_memory= "4096"
req_disk="10000"
req_ncpus="1"

# Path to the original SMsamples.json file
samples_file = '../metadata/samples/SMsamples.json'
samples_to_run = {
"Run3_2022postEE" : {
"ttHToGG" : ["nominal"]
},
"Run3_2022preEE" : {
"ttHToGG" : ["nominal"]
}
}
# Load the JSON data
with open(samples_file, 'r') as f:
samples_data = json.load(f)

# Create output directories for job scripts and JSON files
output_dir = 'job_samples/'
os.makedirs(output_dir, exist_ok=True)
exe_dir = 'job_executables/'
os.makedirs(exe_dir, exist_ok=True)
log_dir='job_logs/'
os.makedirs(log_dir, exist_ok=True)

# Prepare job list
job_list = []

# Split the samples into smaller JSON files
for era, sample_names in samples_to_run.items():
for sname, s_types in sample_names.items():
for stype in s_types:
files = samples_data['data_eras'][era]['samples'][sname]['files'][stype]
num_jobs = math.ceil(len(files) / FILES_PER_JOB)

for job_index in range(num_jobs):
# Calculate file range for this job
start_index = job_index * FILES_PER_JOB
end_index = start_index + FILES_PER_JOB
job_files = files[start_index:end_index]

# Create new JSON structure
job_sample_info = {
sname: {
'xs': samples_data['data_eras'][era]['samples'][sname]['xs'],
'bf': samples_data['data_eras'][era]['samples'][sname]['bf'],
'files': {
stype: job_files
}
, }
}

# Write to a new JSON file
job_json_file = os.path.join(output_dir, f'{era}_{sname}_{stype}_job_{job_index}.json')
with open(job_json_file, 'w') as jf:
json.dump(job_sample_info, jf, indent=4)

# Add to job list
job_list.append(job_json_file)
output_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.out')
error_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.err')
log_file = os.path.join(log_dir, f'{era}_{sname}_{stype}_job_{job_index}.log')

# Create a job_exe.sh script for this job
job_jdl_file = os.path.join(exe_dir, f'{era}_{sname}_{stype}_job_{job_index}.jdl')
shutil.copyfile("submit_template.txt",job_jdl_file)
for line in fileinput.FileInput(job_jdl_file, inplace=1):
line=line.replace("GRID_PROXY", user_proxy)
line=line.replace("XRD_ANALYSIS_TARFILE",xrd_analysis_tarfile)
line=line.replace("CONFIG_FILE",job_json_file)
line=line.replace("PYTHON_FILE",python_file)
line=line.replace("EXECUTABLE",executable)
line=line.replace("OUTPUT",output_file)
line=line.replace("ERROR",error_file)
line=line.replace("LOG",log_file)
line=line.replace("REQ_MEMORY",req_memory)
line=line.replace("REQ_DISK",req_disk)
line=line.replace("REQ_NCPUS",req_ncpus)
print(line.rstrip())

submitCommand = "condor_submit "+job_jdl_file
print(submitCommand)
os.system(submitCommand)

# Write job list to file
with open('sample_jobs.txt', 'w') as jf:
for job_file in job_list:
jf.write(f"HHbbgg_flow/condor/{job_file}\n")
15 changes: 15 additions & 0 deletions HHbbgg_flow/condor/submit_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
universe = vanilla
should_transfer_files = Yes
+SingularityImage = "/cvmfs/unpacked.cern.ch/registry.hub.docker.com/coffeateam/coffea-dask:latest"

transfer_input_files = GRID_PROXY, XRD_ANALYSIS_TARFILE, CONFIG_FILE, PYTHON_FILE
executable = EXECUTABLE
output = OUTPUT
error = ERROR
log = LOG

RequestMemory = REQ_MEMORY
RequestDisk = REQ_DISK
RequestCpus = REQ_NCPUS

queue 1
Loading