From 301c19ae00d25e483c08835c2226866d52d326ec Mon Sep 17 00:00:00 2001 From: Roman Andriushchenko Date: Thu, 30 Nov 2023 13:36:43 +0100 Subject: [PATCH] fix skipping zero choice values for scheduler scoring --- paynt/quotient/mdp_family.py | 2 ++ paynt/quotient/quotient.py | 23 ++++++++++------------- paynt/synthesizer/policy_tree.py | 1 + 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py index 84a555fce..97a853771 100644 --- a/paynt/quotient/mdp_family.py +++ b/paynt/quotient/mdp_family.py @@ -119,6 +119,8 @@ def choices_to_hole_selection(self, choice_mask): for choice in choice_mask: choice_options = self.coloring.action_to_hole_options[choice] for hole_index,option in choice_options.items(): + if hole_index == 0: + print(0, option) hole_selection[hole_index].add(option) hole_selection = [list(options) for options in hole_selection] return hole_selection diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 946bc3d18..9308a8de1 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -144,9 +144,7 @@ def choice_values(self, mdp, prop, state_values): ''' Get choice values after model checking MDP against a property. Value of choice c: s -> s' is computed as - ev(s) * [ rew(c) + P(s,c,s') * mc(s') ], where - - ev(s) is the expected number of visits of state s in DTMC induced by - the primary scheduler + rew(c) + sum_s' [ P(s,c,s') * mc(s') ], where - rew(c) is the reward associated with choice (c) - P(s,c,s') is the probability of transitioning from s to s' under action c - mc(s') is the model checking result in state s' @@ -215,7 +213,6 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va # for each hole, compute its difference sum and a number of affected states hole_difference_sum = {hole_index: 0 for hole_index in inconsistent_assignments} hole_states_affected = {hole_index: 0 for hole_index in inconsistent_assignments} - tm = mdp.model.transition_matrix for state in range(mdp.states): @@ -223,11 +220,9 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va hole_min = [None] * num_holes hole_max = [None] * num_holes - for choice in tm.get_rows_for_group(state): + for choice in mdp.model.transition_matrix.get_rows_for_group(state): value = choice_values[choice] - if value == 0: - continue choice_global = mdp.quotient_choice_map[choice] if self.coloring.default_actions.get(choice_global): continue @@ -258,6 +253,11 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va hole_difference_sum[hole_index] += difference hole_states_affected[hole_index] += 1 + print(inconsistent_assignments) + print(hole_states_affected) + for hole_index in inconsistent_assignments: + assert hole_states_affected[hole_index] > 0 + # aggregate inconsistent_differences = { hole_index: (hole_difference_sum[hole_index] / hole_states_affected[hole_index]) @@ -270,20 +270,17 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va def scheduler_selection_quantitative(self, mdp, prop, result): ''' Get hole options involved in the scheduler selection. - Use numeric values to filter spurious inconsistencies. ''' - - scheduler = result.scheduler - # get qualitative scheduler selection, filter inconsistent assignments - selection = self.scheduler_selection(mdp, scheduler) + selection = self.scheduler_selection(mdp, result.scheduler) inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(selection) if len(options) > 1 } + print(inconsistent_assignments) if len(inconsistent_assignments) == 0: return selection,None,None,None # extract choice values, compute expected visits and estimate scheduler difference choice_values = self.choice_values(mdp.model, prop, result.get_values()) - choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) + choices = result.scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) expected_visits = self.expected_visits(mdp.model, prop, choices) inconsistent_differences = self.estimate_scheduler_difference(mdp, inconsistent_assignments, choice_values, expected_visits) diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index b53992350..01032d59f 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -410,6 +410,7 @@ def choose_splitter_round_robin(self, family, prop, scheduler_choices, state_val def choose_splitter(self, family, prop, scheduler_choices, state_values, hole_selection): splitter = None + print(hole_selection) inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(hole_selection) if len(options) > 1} if len(inconsistent_assignments)==0: for hole_index,hole in enumerate(family):