Skip to content

Commit

Permalink
Performance improvements when initializing simulations - merge with l…
Browse files Browse the repository at this point in the history
…egacy-devel (#322)

* added tqdm bars for multi_proc

* Progress bar for multi proc mode

* checkpoint

* changed deepcopy -> copy

* added pbar for init and end of the sim

* removed more deepcopies

* remove unused requirements

* change requirements to allow for more up to date  versions

* fix run_id bug

* add cadCAD-tweaked info

* reactivate multi-cpu processing

* reactivate multi-cpu processing

* Update README.md

* Update README.md

* get policy input refactors

* get policy input refactors

* improve get_policy_input perf by 2x

* improve get_policy_input perf by 2x

* small refactors

* refactor type hints to py>3.9

* rm fn dependency

* v 0.4.29

* use context manager for process pool

* rm temp testing files

* CHANGELOG

* refactors for fixing

* some refactors, P&P model not properly working

* sync

* fixes

* rm p&p .py

* revert to old README

* fix runs / subsets not being init properly

* fix N being 1 unit above the expected

* change hints for 3.9 compat

* fix py3.9 compat

* fix op breaking py=3.9
  • Loading branch information
danlessa authored Dec 15, 2023
1 parent 31979d4 commit 803c8fb
Show file tree
Hide file tree
Showing 24 changed files with 12,720 additions and 12,570 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,6 @@ simulations/validation
simulations/poc/local/
simulations/regression_tests/poc
simulations/regression_tests/poc_configs
dist/*
testing/tests/import_cadCAD.nbconvert.ipynb
testing/tests/cadCAD_memory_address.json
15 changes: 12 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
### February 15, 2023
* **Fixes:**
- Package has been cleaned-up for working with Python 3.10
### 0.4.29.1

#### Changes
- Parallel executor uses the context manager handling the Process Pool lifetime

### 0.4.29

- Merged repo with the `cadCAD_tweaked`, which includes performance improvements
- Python 3.10 compatible
### September 28, 2021
#### New Features:
* **ver. ≥ `0.4.28`:**
Expand Down Expand Up @@ -217,16 +226,16 @@ as a 2 dimensional `list`
+----+------------+-----------+----+---------------------+-----+---------+----------+
```

* **Flattened Configuration List:** The `cadCAD.configs` (System Model Configuration) `list` has been **temporarily**
* **Flattened Configuration list:** The `cadCAD.configs` (System Model Configuration) `list` has been **temporarily**
flattened to contain single run `cadCAD.configuration.Configuration` objects to both fault-tolerant simulation and
elastic workloads. This functionality will be restored in a subsequent release by a class that returns
`cadCAD.configs`'s original representation in ver. `0.3.1`.
* The conversion utilities have been provided to restore its original representation of configurations with
runs >= 1
* [System Configuration Conversions](documentation/System_Configuration.md)
* Configuration as List of Configuration Objects (as in ver. `0.3.1`)
* Configuration as list of Configuration Objects (as in ver. `0.3.1`)
* New: System Configuration as a Pandas DataFrame
* New: System Configuration as List of Dictionaries
* New: System Configuration as list of Dictionaries

###### Examples:
* Notes:
Expand Down
27 changes: 10 additions & 17 deletions cadCAD/configuration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,12 @@ def append_model(
sim_config = t[0]
sim_config['subset_id'] = subset_id
sim_config['subset_window'] = self.subset_window
N = sim_config['N']
if N > 1:
for n in range(N):
sim_config['simulation_id'] = self.simulation_id
sim_config['run_id'] = n
sim_config['N'] = 1
new_sim_configs.append(deepcopy(sim_config))
del sim_config
else:
N: int = sim_config['N']
for n in range(0, N):
sim_config['simulation_id'] = self.simulation_id
sim_config['run_id'] = 0
new_sim_configs.append(deepcopy(sim_config))

sim_config['run_id'] = n
sim_config['N'] = 1
new_sim_configs.append((sim_config.copy()))
sim_cnt_local += 1

if model_id == None:
Expand All @@ -128,7 +121,7 @@ def append_model(
if run_id >= max_runs:
sim_config['N'] = run_id - (max_runs - 1)

self.exp_window = deepcopy(self.exp_window)
self.exp_window = self.exp_window.copy()
config = Configuration(
exp_creation_ts=self.exp_creation_ts,

Expand Down Expand Up @@ -192,19 +185,19 @@ def __init__(self, policy_id: Dict[str, int] = {'identity': 0}) -> None:
def p_identity(self, var_dict, sub_step, sL, s, **kwargs):
return self.beh_id_return_val

def policy_identity(self, k: str) -> Callable:
def policy_identity(self, k: str) -> callable:
return self.p_identity

def no_state_identity(self, var_dict, sub_step, sL, s, _input, **kwargs):
return None

def state_identity(self, k: str) -> Callable:
def state_identity(self, k: str) -> callable:
return lambda var_dict, sub_step, sL, s, _input, **kwargs: (k, s[k])

# state_identity = cloudpickle.dumps(state_identity)

def apply_identity_funcs(self,
identity: Callable,
identity: callable,
df: DataFrame,
cols: List[str]) -> DataFrame:
"""
Expand Down Expand Up @@ -239,7 +232,7 @@ def create_matrix_field(self, partial_state_updates, key: str) -> DataFrame:
return pd.DataFrame({'empty': []})

def generate_config(self, initial_state, partial_state_updates, exo_proc
) -> List[Tuple[List[Callable], List[Callable]]]:
) -> List[tuple[list[callable], List[callable]]]:

def no_update_handler(bdf, sdf):
if (bdf.empty == False) and (sdf.empty == True):
Expand Down
12 changes: 6 additions & 6 deletions cadCAD/configuration/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pandas as pd # type: ignore
from datetime import datetime, timedelta
from collections import Counter
from copy import deepcopy
from functools import reduce
from funcy import curry # type: ignore
from cadCAD.types import *
Expand Down Expand Up @@ -144,7 +143,7 @@ def trigger(end_substep, y, f):
def env_trigger(end_substep):
def trigger(end_substep, trigger_field, trigger_vals, funct_list):
def env_update(state_dict, sweep_dict, target_value):
state_dict_copy = deepcopy(state_dict)
state_dict_copy = state_dict.copy()
# Use supstep to simulate current sysMetrics
if state_dict_copy['substep'] == end_substep:
state_dict_copy['timestep'] = state_dict_copy['timestep'] + 1
Expand Down Expand Up @@ -189,6 +188,7 @@ def config_sim(config_dict: ConfigurationDict):
return config_dict
elif (1 in param_values_length_set):
return [{**config_dict, "M": M}

for M in flatten_tabulated_dict(tabulate_dict(params))]
else:
raise Exception('When sweeping, `M` list lengths should either be 1 and/or equal. ')
Expand Down Expand Up @@ -259,8 +259,8 @@ def sweep_partial_states(_type, in_config):
for partial_state, state_dict in filtered_partial_states.items():
for state, state_funcs in state_dict.items():
for f in state_funcs:
config = deepcopy(in_config)
config.partial_state_update_blocks[partial_state][_type][state] = f
config = in_config.copy()
config.partial_state_updates[partial_state][_type][state] = f
configs.append(config)
del config
else:
Expand All @@ -275,8 +275,8 @@ def sweep_states(state_type, states, in_config):
if len(filtered_states) > 0:
for state, state_funcs in filtered_states.items():
for f in state_funcs:
config = deepcopy(in_config)
exploded_states = deepcopy(states)
config = in_config.copy()
exploded_states = states.copy()
exploded_states[state] = f
if state_type == 'exogenous':
config.exogenous_states = exploded_states
Expand Down
2 changes: 1 addition & 1 deletion cadCAD/configuration/utils/depreciationHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def sanitize_config(config):


def sanitize_partial_state_updates(partial_state_updates):
new_partial_state_updates = deepcopy(partial_state_updates)
new_partial_state_updates = partial_state_updates.copy()
def rename_keys(d):
if 'behaviors' in d:
d['policies'] = d.pop('behaviors')
Expand Down
19 changes: 12 additions & 7 deletions cadCAD/engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from time import time
from typing import Callable, Dict, List, Any, Tuple, Union
from tqdm.auto import tqdm

from cadCAD.utils import flatten
from cadCAD.utils.execution import print_exec_info
Expand All @@ -9,10 +10,10 @@
from cadCAD.engine.execution import single_proc_exec, parallelize_simulations, local_simulations
from cadCAD.types import *

VarDictType = Dict[str, List[Any]]
StatesListsType = List[Dict[str, Any]]
ConfigsType = List[Tuple[List[Callable], List[Callable]]]
EnvProcessesType = Dict[str, Callable]
VarDictType = Dict[str, List[object]]
StatesListsType = List[dict[str, object]]
ConfigsType = List[tuple[list[callable], List[callable]]]
EnvProcessesType = Dict[str, callable]


class ExecutionMode:
Expand Down Expand Up @@ -79,7 +80,7 @@ def __init__(self,
self.configs = configs
self.empty_return = empty_return

def execute(self) -> Tuple[Any, Any, Dict[str, Any]]:
def execute(self) -> Tuple[object, object, Dict[str, object]]:
if self.empty_return is True:
return [], [], []

Expand All @@ -99,7 +100,9 @@ def execute(self) -> Tuple[Any, Any, Dict[str, Any]]:
print_exec_info(self.exec_context, configs_as_objs(self.configs))

t1 = time()
for x in self.configs:
for x in tqdm(self.configs,
total=len(self.configs),
desc="Initializing configurations"):
sessions.append(
{
'user_id': x.user_id, 'experiment_id': x.experiment_id, 'session_id': x.session_id,
Expand Down Expand Up @@ -145,7 +148,9 @@ def get_final_results(simulations: List[StateHistory],
sessions: List[SessionDict],
remote_threshold: int):
flat_timesteps, tensor_fields = [], []
for sim_result, psu, ep in list(zip(simulations, psus, eps)):
for sim_result, psu, ep in tqdm(list(zip(simulations, psus, eps)),
total=len(simulations),
desc='Flattening results'):
flat_timesteps.append(flatten(sim_result))
tensor_fields.append(create_tensor_field(psu, ep))

Expand Down
21 changes: 9 additions & 12 deletions cadCAD/engine/execution.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Callable, Dict, List, Any, Tuple
from pathos.multiprocessing import ProcessPool as PPool # type: ignore
from pathos.multiprocessing import ProcessPool # type: ignore
from collections import Counter
from cadCAD.types import *
from cadCAD.utils import flatten

VarDictType = Dict[str, List[Any]]
StatesListsType = List[Dict[str, Any]]
ConfigsType = List[Tuple[List[Callable], List[Callable]]]
EnvProcessesType = Dict[str, Callable]
VarDictType = Dict[str, List[object]]
StatesListsType = List[dict[str, object]]
ConfigsType = List[tuple[list[callable], List[callable]]]
EnvProcessesType = Dict[str, callable]


def single_proc_exec(
Expand Down Expand Up @@ -96,13 +96,10 @@ def parallelize_simulations(

def process_executor(params):
if len_configs_structs > 1:
pp = PPool(processes=len_configs_structs)
results = pp.map(
lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n, additional_objs), params
)
pp.close()
pp.join()
pp.clear()
with ProcessPool(processes=len_configs_structs) as pp:
results = pp.map(
lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params
)
else:
t = params[0]
results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n)
Expand Down
Loading

0 comments on commit 803c8fb

Please sign in to comment.