From f22f2e756991b1b92b5e2d9c6e1b6bc3851f854f Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 13:50:59 +0100 Subject: [PATCH 1/5] move some stuff to the QuotientContainer --- paynt/quotient/mdp_family.py | 24 +-------------- paynt/quotient/quotient.py | 52 +++++++++++++++----------------- paynt/synthesizer/policy_tree.py | 2 +- 3 files changed, 27 insertions(+), 51 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index d861996d8..d93f8c136 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -56,17 +56,6 @@ def map_state_to_available_actions(state_action_choices): state_to_actions.append(available_actions) return state_to_actions - @staticmethod - def compute_choice_destinations(mdp): - choice_destinations = [] - for choice in range(mdp.nr_choices): - destinations = [] - for entry in mdp.transition_matrix.get_row(choice): - destinations.append(entry.column) - choice_destinations.append(destinations) - return choice_destinations - - def __init__(self, quotient_mdp, coloring, specification): super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification) @@ -80,14 +69,11 @@ def __init__(self, quotient_mdp, coloring, specification): self.state_action_choices = None # for each state of the quotient, a list of available actions self.state_to_actions = None - # for each choice of the quotient, a list of its state-destinations - self.choice_destinations = None self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp) self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices( self.quotient_mdp, self.num_actions, self.choice_to_action) self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices) - self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp) @property @@ -120,15 +106,7 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None - def choices_to_hole_selection(self, choice_mask): - hole_selection = [set() for hole_index in self.design_space.hole_indices] - for choice in choice_mask: - choice_options = self.coloring.action_to_hole_options[choice] - for hole_index,option in choice_options.items(): - hole_selection[hole_index].add(option) - hole_selection = [list(options) for options in hole_selection] - return hole_selection - + def empty_policy(self): return [None] * self.quotient_mdp.nr_states diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 43bfdf642..59cbf400d 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -21,6 +21,16 @@ class QuotientContainer: # if True, hole scores in the state will be multiplied with the number of expected visits of this state compute_expected_visits = True + @staticmethod + def compute_choice_destinations(mdp): + choice_destinations = [] + for choice in range(mdp.nr_choices): + destinations = [] + for entry in mdp.transition_matrix.get_row(choice): + destinations.append(entry.column) + choice_destinations.append(destinations) + return choice_destinations + def __init__(self, quotient_mdp = None, coloring = None, specification = None): @@ -36,6 +46,9 @@ def __init__(self, quotient_mdp = None, coloring = None, self.subsystem_builder_options.build_state_mapping = True self.subsystem_builder_options.build_action_mapping = True + # for each choice of the quotient, a list of its state-destinations + self.choice_destinations = QuotientContainer.compute_choice_destinations(self.quotient_mdp) + # (optional) counter of discarded assignments self.discarded = None @@ -133,45 +146,30 @@ def build_chain(self, family): return DTMC(dtmc,self,state_map,choice_map) - def scheduler_selection(self, mdp, scheduler): - ''' Get hole options involved in the scheduler selection. ''' - assert scheduler.memoryless and scheduler.deterministic - - # construct DTMC that corresponds to this scheduler and filter reachable states/choices - choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) - dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) - choices = [ choice_map[state] for state in range(dtmc.nr_states) ] - - # map relevant choices to hole options - selection = [set() for hole_index in mdp.design_space.hole_indices] + def choices_to_hole_selection(self, choices): + hole_selection = [set() for hole_index in self.design_space.hole_indices] for choice in choices: - global_choice = mdp.quotient_choice_map[choice] - choice_options = self.coloring.action_to_hole_options[global_choice] + choice_options = self.coloring.action_to_hole_options[choice] for hole_index,option in choice_options.items(): - selection[hole_index].add(option) - selection = [list(options) for options in selection] + hole_selection[hole_index].add(option) + hole_selection = [list(options) for options in hole_selection] + return hole_selection - return selection - def scheduler_selection_with_coloring(self, mdp, scheduler, coloring): + def scheduler_selection(self, mdp, scheduler, coloring=None): ''' Get hole options involved in the scheduler selection. ''' assert scheduler.memoryless and scheduler.deterministic + + if coloring is None: + coloring = self.coloring # construct DTMC that corresponds to this scheduler and filter reachable states/choices choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) choices = [ choice_map[state] for state in range(dtmc.nr_states) ] - # map relevant choices to hole options - selection = [set() for hole_index in mdp.design_space.hole_indices] - for choice in choices: - global_choice = mdp.quotient_choice_map[choice] - choice_options = coloring.action_to_hole_options[global_choice] - for hole_index,option in choice_options.items(): - selection[hole_index].add(option) - selection = [list(options) for options in selection] - - return selection + hole_selection = self.choices_to_hole_selection(choices) + return hole_selection @staticmethod diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 4e3423b29..6e71e2f9f 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -691,7 +691,7 @@ def synthesize_policy_for_family(self, family, prop, all_sat=False, iteration_li sat_mdp_policies[index] = policy current_results.append(primary_result) - selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring) + selection = self.quotient.scheduler_selection(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring) self.update_scores(score_lists, selection) scores = {hole:len(score_list) for hole, score_list in score_lists.items()} From ca30c0a526fc4b5806bab7610ee4a23e075d2351 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 14:31:18 +0100 Subject: [PATCH 2/5] avoid building DTMC to compute reachable choices of the scheduler --- paynt/quotient/mdp_family.py | 20 +++++------ paynt/quotient/quotient.py | 57 ++++++++++++++++++++++++++------ paynt/synthesizer/policy_tree.py | 8 ++--- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index d93f8c136..c42df0c7e 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -56,6 +56,7 @@ def map_state_to_available_actions(state_action_choices): state_to_actions.append(available_actions) return state_to_actions + def __init__(self, quotient_mdp, coloring, specification): super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification) @@ -103,23 +104,18 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None state_visited[dst] = True state_queue.append(dst) return choice_mask_reachable - - - + def empty_policy(self): - return [None] * self.quotient_mdp.nr_states + return self.empty_scheduler() def scheduler_to_policy(self, scheduler, mdp): + state_to_choice = self.scheduler_to_state_to_choice(mdp,scheduler) policy = self.empty_policy() - nci = mdp.model.nondeterministic_choice_indices.copy() - for state in range(mdp.model.nr_states): - state_choice = scheduler.get_choice(state).get_deterministic_choice() - choice = nci[state] + state_choice - quotient_choice = mdp.quotient_choice_map[choice] - action = self.choice_to_action[quotient_choice] - quotient_state = mdp.quotient_state_map[state] - policy[quotient_state] = action + for state in range(self.quotient_mdp.nr_states): + choice = state_to_choice[state] + if choice is not None: + policy[state] = self.choice_to_action[choice] return policy diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 59cbf400d..073993115 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -146,7 +146,49 @@ def build_chain(self, family): return DTMC(dtmc,self,state_map,choice_map) - def choices_to_hole_selection(self, choices): + def empty_scheduler(self): + return [None] * self.quotient_mdp.nr_states + + def keep_reachable_choices_of_scheduler(self, state_to_choice): + state_to_choice_reachable = self.empty_scheduler() + state_visited = [False]*self.quotient_mdp.nr_states + initial_state = list(self.quotient_mdp.initial_states)[0] + state_visited[initial_state] = True + state_queue = [initial_state] + while state_queue: + state = state_queue.pop() + choice = state_to_choice[state] + state_to_choice_reachable[state] = choice + for dst in self.choice_destinations[choice]: + if not state_visited[dst]: + state_visited[dst] = True + state_queue.append(dst) + return state_to_choice_reachable + + def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True): + assert scheduler.memoryless and scheduler.deterministic + state_to_choice = self.empty_scheduler() + nci = mdp.model.nondeterministic_choice_indices.copy() + for state in range(mdp.model.nr_states): + choice = nci[state] + scheduler.get_choice(state).get_deterministic_choice() + quotient_choice = mdp.quotient_choice_map[choice] + quotient_state = mdp.quotient_state_map[state] + state_to_choice[quotient_state] = quotient_choice + if keep_reachable_choices: + state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice) + return state_to_choice + + def state_to_choice_to_choices(self, state_to_choice): + choices = stormpy.BitVector(self.quotient_mdp.nr_choices,False) + for choice in state_to_choice: + if choice is not None: + choices.set(choice,True) + return choices + + + def choices_to_hole_selection(self, choices, coloring=None): + if coloring is None: + coloring = self.coloring hole_selection = [set() for hole_index in self.design_space.hole_indices] for choice in choices: choice_options = self.coloring.action_to_hole_options[choice] @@ -159,16 +201,9 @@ def choices_to_hole_selection(self, choices): def scheduler_selection(self, mdp, scheduler, coloring=None): ''' Get hole options involved in the scheduler selection. ''' assert scheduler.memoryless and scheduler.deterministic - - if coloring is None: - coloring = self.coloring - - # construct DTMC that corresponds to this scheduler and filter reachable states/choices - choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) - dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) - choices = [ choice_map[state] for state in range(dtmc.nr_states) ] - - hole_selection = self.choices_to_hole_selection(choices) + state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True) + choices = self.state_to_choice_to_choices(state_to_choice) + hole_selection = self.choices_to_hole_selection(choices,coloring) return hole_selection diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 6e71e2f9f..61b384d84 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -483,18 +483,16 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): if SynthesizerPolicyTree.use_optimistic_splitting: scheduler_choices = game_solver.solution_reachable_choices state_values = game_solver.solution_state_values + scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices) else: - scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False) - choices = primary_primary_result.result.scheduler.compute_action_support(family.mdp.model.nondeterministic_choice_indices) - for choice in choices: - scheduler_choices.set(family.mdp.quotient_choice_map[choice],True) + state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler) + scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): quotient_state = family.mdp.quotient_state_map[state] state_values[quotient_state] = primary_primary_result.result.at(state) # map reachable scheduler choices to hole options - scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices) hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices) if False: From ecb6fb8428b160c0e2f67c741c9e9f2197b17644 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 15:01:23 +0100 Subject: [PATCH 3/5] replace keep_reachable_choices --- paynt/quotient/mdp_family.py | 25 ------------------------- paynt/synthesizer/policy_tree.py | 6 +++--- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index c42df0c7e..6068e1d05 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -81,31 +81,6 @@ def __init__(self, quotient_mdp, coloring, specification): def num_actions(self): return len(self.action_labels) - - def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None): - if mdp is None: - mdp = self.quotient_mdp - if choice_destinations is None: - choice_destinations = self.choice_destinations - state_visited = [False]*mdp.nr_states - initial_state = list(mdp.initial_states)[0] - state_visited[initial_state] = True - state_queue = [initial_state] - tm = mdp.transition_matrix - choice_mask_reachable = stormpy.BitVector(mdp.nr_choices,False) - while state_queue: - state = state_queue.pop() - for choice in mdp.transition_matrix.get_rows_for_group(state): - if not choice_mask[choice]: - continue - choice_mask_reachable.set(choice,True) - for dst in choice_destinations[choice]: - if not state_visited[dst]: - state_visited[dst] = True - state_queue.append(dst) - return choice_mask_reachable - - def empty_policy(self): return self.empty_scheduler() diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 61b384d84..8945e366e 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -481,18 +481,18 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): # undecided: choose scheduler choices to be used for splitting if SynthesizerPolicyTree.use_optimistic_splitting: - scheduler_choices = game_solver.solution_reachable_choices state_values = game_solver.solution_state_values - scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices) + state_to_choice = game_solver.solution_state_to_quotient_choice + state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice) else: state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler) - scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) state_values = [0] * self.quotient.quotient_mdp.nr_states for state in range(family.mdp.states): quotient_state = family.mdp.quotient_state_map[state] state_values[quotient_state] = primary_primary_result.result.at(state) # map reachable scheduler choices to hole options + scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices) if False: From 1021d242bd6a5bc01cd02239c658732fb6252924 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 15:13:53 +0100 Subject: [PATCH 4/5] fix not using optional coloring --- paynt/quotient/coloring.py | 12 +++++++++++- paynt/quotient/quotient.py | 18 ++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/paynt/quotient/coloring.py b/paynt/quotient/coloring.py index 721d3d88c..93e77ebce 100644 --- a/paynt/quotient/coloring.py +++ b/paynt/quotient/coloring.py @@ -56,7 +56,7 @@ def __init__(self, mdp, holes, action_to_hole_options): @property def num_choices(self): return len(self.action_to_hole_options) - + def select_actions(self, family): ''' Select non-default actions relevant in the provided design space. @@ -112,3 +112,13 @@ def select_actions(self, family): selected_actions_bv = stormpy.synthesis.construct_selection(self.default_actions, selected_actions) return hole_selected_actions,selected_actions,selected_actions_bv + + + def choices_to_hole_selection(self, choices): + hole_selection = [set() for hole_index in self.holes.hole_indices] + for choice in choices: + choice_options = self.action_to_hole_options[choice] + for hole_index,option in choice_options.items(): + hole_selection[hole_index].add(option) + hole_selection = [list(options) for options in hole_selection] + return hole_selection diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 073993115..c5e5321ac 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -185,25 +185,15 @@ def state_to_choice_to_choices(self, state_to_choice): choices.set(choice,True) return choices - - def choices_to_hole_selection(self, choices, coloring=None): - if coloring is None: - coloring = self.coloring - hole_selection = [set() for hole_index in self.design_space.hole_indices] - for choice in choices: - choice_options = self.coloring.action_to_hole_options[choice] - for hole_index,option in choice_options.items(): - hole_selection[hole_index].add(option) - hole_selection = [list(options) for options in hole_selection] - return hole_selection - def scheduler_selection(self, mdp, scheduler, coloring=None): ''' Get hole options involved in the scheduler selection. ''' assert scheduler.memoryless and scheduler.deterministic - state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True) + state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler) choices = self.state_to_choice_to_choices(state_to_choice) - hole_selection = self.choices_to_hole_selection(choices,coloring) + if coloring is None: + coloring = self.coloring + hole_selection = coloring.choices_to_hole_selection(choices) return hole_selection From c301db9291fe19f9739d5740996fdd80bb1b4055 Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Fri, 8 Dec 2023 15:18:44 +0100 Subject: [PATCH 5/5] whatever --- paynt/synthesizer/policy_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 8945e366e..6e25a425d 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -493,7 +493,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): # map reachable scheduler choices to hole options scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice) - hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices) + hole_selection = self.quotient.coloring.choices_to_hole_selection(scheduler_choices) if False: # sanity check