Performance improvements when initializing simulations - merge with l…

…egacy-devel (#322) * added tqdm bars for multi_proc * Progress bar for multi proc mode * checkpoint * changed deepcopy -> copy * added pbar for init and end of the sim * removed more deepcopies * remove unused requirements * change requirements to allow for more up to date versions * fix run_id bug * add cadCAD-tweaked info * reactivate multi-cpu processing * reactivate multi-cpu processing * Update README.md * Update README.md * get policy input refactors * get policy input refactors * improve get_policy_input perf by 2x * improve get_policy_input perf by 2x * small refactors * refactor type hints to py>3.9 * rm fn dependency * v 0.4.29 * use context manager for process pool * rm temp testing files * CHANGELOG * refactors for fixing * some refactors, P&P model not properly working * sync * fixes * rm p&p .py * revert to old README * fix runs / subsets not being init properly * fix N being 1 unit above the expected * change hints for 3.9 compat * fix py3.9 compat * fix op breaking py=3.9
cadCAD-org · Dec 15, 2023 · 803c8fb · 803c8fb
1 parent 31979d4
commit 803c8fb
Show file tree

Hide file tree

Showing 24 changed files with 12,720 additions and 12,570 deletions.
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,6 @@ simulations/validation
 simulations/poc/local/
 simulations/regression_tests/poc
 simulations/regression_tests/poc_configs
+dist/*
+testing/tests/import_cadCAD.nbconvert.ipynb
+testing/tests/cadCAD_memory_address.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,15 @@
 ### February 15, 2023
 * **Fixes:** 
     - Package has been cleaned-up for working with Python 3.10
+### 0.4.29.1
+
+#### Changes
+- Parallel executor uses the context manager handling the Process Pool lifetime
+
+### 0.4.29
+
+- Merged repo with the `cadCAD_tweaked`, which includes performance improvements
+- Python 3.10 compatible
 ### September 28, 2021
 #### New Features:
 * **ver. ≥ `0.4.28`:**
@@ -217,16 +226,16 @@ as a 2 dimensional `list`
     +----+------------+-----------+----+---------------------+-----+---------+----------+
     ```
 
-* **Flattened Configuration List:** The `cadCAD.configs` (System Model Configuration) `list` has been **temporarily** 
+* **Flattened Configuration list:** The `cadCAD.configs` (System Model Configuration) `list` has been **temporarily** 
 flattened to contain single run `cadCAD.configuration.Configuration` objects to both fault-tolerant simulation and 
 elastic workloads. This functionality will be restored in a subsequent release by a class that returns 
 `cadCAD.configs`'s original representation in ver. `0.3.1`.
     * The conversion utilities have been provided to restore its original representation of configurations with 
     runs >= 1
         * [System Configuration Conversions](documentation/System_Configuration.md)
-            * Configuration as List of Configuration Objects (as in ver. `0.3.1`) 
+            * Configuration as list of Configuration Objects (as in ver. `0.3.1`) 
             * New: System Configuration as a Pandas DataFrame
-            * New: System Configuration as List of Dictionaries
+            * New: System Configuration as list of Dictionaries
 
 ###### Examples:
 * Notes:

diff --git a/cadCAD/configuration/__init__.py b/cadCAD/configuration/__init__.py
@@ -90,19 +90,12 @@ def append_model(
             sim_config = t[0]
             sim_config['subset_id'] = subset_id
             sim_config['subset_window'] = self.subset_window
-            N = sim_config['N']
-            if N > 1:
-                for n in range(N):
-                    sim_config['simulation_id'] = self.simulation_id
-                    sim_config['run_id'] = n
-                    sim_config['N'] = 1
-                    new_sim_configs.append(deepcopy(sim_config))
-                del sim_config
-            else:
+            N: int = sim_config['N']
+            for n in range(0, N):
                 sim_config['simulation_id'] = self.simulation_id
-                sim_config['run_id'] = 0
-                new_sim_configs.append(deepcopy(sim_config))
-
+                sim_config['run_id'] = n
+                sim_config['N'] = 1
+                new_sim_configs.append((sim_config.copy()))
             sim_cnt_local += 1
 
         if model_id == None:
@@ -128,7 +121,7 @@ def append_model(
                 if run_id >= max_runs:
                     sim_config['N'] = run_id - (max_runs - 1)
 
-            self.exp_window = deepcopy(self.exp_window)
+            self.exp_window = self.exp_window.copy()
             config = Configuration(
                 exp_creation_ts=self.exp_creation_ts,
 
@@ -192,19 +185,19 @@ def __init__(self, policy_id: Dict[str, int] = {'identity': 0}) -> None:
     def p_identity(self, var_dict, sub_step, sL, s, **kwargs):
         return self.beh_id_return_val
 
-    def policy_identity(self, k: str) -> Callable:
+    def policy_identity(self, k: str) -> callable:
         return self.p_identity
 
     def no_state_identity(self, var_dict, sub_step, sL, s, _input, **kwargs):
         return None
 
-    def state_identity(self, k: str) -> Callable:
+    def state_identity(self, k: str) -> callable:
         return lambda var_dict, sub_step, sL, s, _input, **kwargs: (k, s[k])
 
     # state_identity = cloudpickle.dumps(state_identity)
 
     def apply_identity_funcs(self,
-                             identity: Callable,
+                             identity: callable,
                              df: DataFrame,
                              cols: List[str]) -> DataFrame:
         """
@@ -239,7 +232,7 @@ def create_matrix_field(self, partial_state_updates, key: str) -> DataFrame:
             return pd.DataFrame({'empty': []})
 
     def generate_config(self, initial_state, partial_state_updates, exo_proc
-                       ) -> List[Tuple[List[Callable], List[Callable]]]:
+                       ) -> List[tuple[list[callable], List[callable]]]:
 
         def no_update_handler(bdf, sdf):
             if (bdf.empty == False) and (sdf.empty == True):

diff --git a/cadCAD/configuration/utils/__init__.py b/cadCAD/configuration/utils/__init__.py
@@ -1,7 +1,6 @@
 import pandas as pd # type: ignore
 from datetime import datetime, timedelta
 from collections import Counter
-from copy import deepcopy
 from functools import reduce
 from funcy import curry # type: ignore
 from cadCAD.types import *
@@ -144,7 +143,7 @@ def trigger(end_substep, y, f):
 def env_trigger(end_substep):
     def trigger(end_substep, trigger_field, trigger_vals, funct_list):
         def env_update(state_dict, sweep_dict, target_value):
-            state_dict_copy = deepcopy(state_dict)
+            state_dict_copy = state_dict.copy()
             # Use supstep to simulate current sysMetrics
             if state_dict_copy['substep'] == end_substep:
                 state_dict_copy['timestep'] = state_dict_copy['timestep'] + 1
@@ -189,6 +188,7 @@ def config_sim(config_dict: ConfigurationDict):
                 return config_dict
             elif (1 in param_values_length_set):
                 return [{**config_dict, "M": M} 
+
                         for M in flatten_tabulated_dict(tabulate_dict(params))]
             else:
                 raise Exception('When sweeping, `M` list lengths should either be 1 and/or equal. ')
@@ -259,8 +259,8 @@ def sweep_partial_states(_type, in_config):
         for partial_state, state_dict in filtered_partial_states.items():
             for state, state_funcs in state_dict.items():
                 for f in state_funcs:
-                    config = deepcopy(in_config)
-                    config.partial_state_update_blocks[partial_state][_type][state] = f
+                    config = in_config.copy()
+                    config.partial_state_updates[partial_state][_type][state] = f
                     configs.append(config)
                     del config
     else:
@@ -275,8 +275,8 @@ def sweep_states(state_type, states, in_config):
     if len(filtered_states) > 0:
         for state, state_funcs in filtered_states.items():
             for f in state_funcs:
-                config = deepcopy(in_config)
-                exploded_states = deepcopy(states)
+                config = in_config.copy()
+                exploded_states = states.copy()
                 exploded_states[state] = f
                 if state_type == 'exogenous':
                     config.exogenous_states = exploded_states

diff --git a/cadCAD/configuration/utils/depreciationHandler.py b/cadCAD/configuration/utils/depreciationHandler.py
@@ -15,7 +15,7 @@ def sanitize_config(config):
 
 
 def sanitize_partial_state_updates(partial_state_updates):
-    new_partial_state_updates = deepcopy(partial_state_updates)
+    new_partial_state_updates = partial_state_updates.copy()
     def rename_keys(d):
         if 'behaviors' in d:
             d['policies'] = d.pop('behaviors')

diff --git a/cadCAD/engine/__init__.py b/cadCAD/engine/__init__.py
@@ -1,5 +1,6 @@
 from time import time
 from typing import Callable, Dict, List, Any, Tuple, Union
+from tqdm.auto import tqdm
 
 from cadCAD.utils import flatten
 from cadCAD.utils.execution import print_exec_info
@@ -9,10 +10,10 @@
 from cadCAD.engine.execution import single_proc_exec, parallelize_simulations, local_simulations
 from cadCAD.types import *
 
-VarDictType = Dict[str, List[Any]]
-StatesListsType = List[Dict[str, Any]]
-ConfigsType = List[Tuple[List[Callable], List[Callable]]]
-EnvProcessesType = Dict[str, Callable]
+VarDictType = Dict[str, List[object]]
+StatesListsType = List[dict[str, object]]
+ConfigsType = List[tuple[list[callable], List[callable]]]
+EnvProcessesType = Dict[str, callable]
 
 
 class ExecutionMode:
@@ -79,7 +80,7 @@ def __init__(self,
         self.configs = configs
         self.empty_return = empty_return
 
-    def execute(self) -> Tuple[Any, Any, Dict[str, Any]]:
+    def execute(self) -> Tuple[object, object, Dict[str, object]]:
         if self.empty_return is True:
             return [], [], []
 
@@ -99,7 +100,9 @@ def execute(self) -> Tuple[Any, Any, Dict[str, Any]]:
         print_exec_info(self.exec_context, configs_as_objs(self.configs))
 
         t1 = time()
-        for x in self.configs:
+        for x in tqdm(self.configs,
+                      total=len(self.configs),
+                      desc="Initializing configurations"):
             sessions.append(
                 {
                     'user_id': x.user_id, 'experiment_id': x.experiment_id, 'session_id': x.session_id,
@@ -145,7 +148,9 @@ def get_final_results(simulations: List[StateHistory],
                               sessions: List[SessionDict],
                               remote_threshold: int):
             flat_timesteps, tensor_fields = [], []
-            for sim_result, psu, ep in list(zip(simulations, psus, eps)):
+            for sim_result, psu, ep in tqdm(list(zip(simulations, psus, eps)),
+                                            total=len(simulations),
+                                            desc='Flattening results'):
                 flat_timesteps.append(flatten(sim_result))
                 tensor_fields.append(create_tensor_field(psu, ep))
 

diff --git a/cadCAD/engine/execution.py b/cadCAD/engine/execution.py
@@ -1,13 +1,13 @@
 from typing import Callable, Dict, List, Any, Tuple
-from pathos.multiprocessing import ProcessPool as PPool # type: ignore
+from pathos.multiprocessing import ProcessPool # type: ignore
 from collections import Counter
 from cadCAD.types import *
 from cadCAD.utils import flatten
 
-VarDictType = Dict[str, List[Any]]
-StatesListsType = List[Dict[str, Any]]
-ConfigsType = List[Tuple[List[Callable], List[Callable]]]
-EnvProcessesType = Dict[str, Callable]
+VarDictType = Dict[str, List[object]]
+StatesListsType = List[dict[str, object]]
+ConfigsType = List[tuple[list[callable], List[callable]]]
+EnvProcessesType = Dict[str, callable]
 
 
 def single_proc_exec(
@@ -96,13 +96,10 @@ def parallelize_simulations(
 
     def process_executor(params):
         if len_configs_structs > 1:
-            pp = PPool(processes=len_configs_structs)
-            results = pp.map(
-                lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n, additional_objs), params
-            )
-            pp.close()
-            pp.join()
-            pp.clear()
+            with ProcessPool(processes=len_configs_structs) as pp:
+                results = pp.map(
+                    lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n), params
+                )
         else:
             t = params[0]
             results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], configured_n)