Skip to content

Commit

Permalink
fix skipping zero choice values for scheduler scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Nov 30, 2023
1 parent 01b0bc0 commit 301c19a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 13 deletions.
2 changes: 2 additions & 0 deletions paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ def choices_to_hole_selection(self, choice_mask):
for choice in choice_mask:
choice_options = self.coloring.action_to_hole_options[choice]
for hole_index,option in choice_options.items():
if hole_index == 0:
print(0, option)
hole_selection[hole_index].add(option)
hole_selection = [list(options) for options in hole_selection]
return hole_selection
Expand Down
23 changes: 10 additions & 13 deletions paynt/quotient/quotient.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,7 @@ def choice_values(self, mdp, prop, state_values):
'''
Get choice values after model checking MDP against a property.
Value of choice c: s -> s' is computed as
ev(s) * [ rew(c) + P(s,c,s') * mc(s') ], where
- ev(s) is the expected number of visits of state s in DTMC induced by
the primary scheduler
rew(c) + sum_s' [ P(s,c,s') * mc(s') ], where
- rew(c) is the reward associated with choice (c)
- P(s,c,s') is the probability of transitioning from s to s' under action c
- mc(s') is the model checking result in state s'
Expand Down Expand Up @@ -215,19 +213,16 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
# for each hole, compute its difference sum and a number of affected states
hole_difference_sum = {hole_index: 0 for hole_index in inconsistent_assignments}
hole_states_affected = {hole_index: 0 for hole_index in inconsistent_assignments}
tm = mdp.model.transition_matrix

for state in range(mdp.states):

# for this state, compute for each inconsistent hole the difference in choice values between respective options
hole_min = [None] * num_holes
hole_max = [None] * num_holes

for choice in tm.get_rows_for_group(state):
for choice in mdp.model.transition_matrix.get_rows_for_group(state):

value = choice_values[choice]
if value == 0:
continue
choice_global = mdp.quotient_choice_map[choice]
if self.coloring.default_actions.get(choice_global):
continue
Expand Down Expand Up @@ -258,6 +253,11 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
hole_difference_sum[hole_index] += difference
hole_states_affected[hole_index] += 1

print(inconsistent_assignments)
print(hole_states_affected)
for hole_index in inconsistent_assignments:
assert hole_states_affected[hole_index] > 0

# aggregate
inconsistent_differences = {
hole_index: (hole_difference_sum[hole_index] / hole_states_affected[hole_index])
Expand All @@ -270,20 +270,17 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
def scheduler_selection_quantitative(self, mdp, prop, result):
'''
Get hole options involved in the scheduler selection.
Use numeric values to filter spurious inconsistencies.
'''

scheduler = result.scheduler

# get qualitative scheduler selection, filter inconsistent assignments
selection = self.scheduler_selection(mdp, scheduler)
selection = self.scheduler_selection(mdp, result.scheduler)
inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(selection) if len(options) > 1 }
print(inconsistent_assignments)
if len(inconsistent_assignments) == 0:
return selection,None,None,None

# extract choice values, compute expected visits and estimate scheduler difference
choice_values = self.choice_values(mdp.model, prop, result.get_values())
choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
choices = result.scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
expected_visits = self.expected_visits(mdp.model, prop, choices)
inconsistent_differences = self.estimate_scheduler_difference(mdp, inconsistent_assignments, choice_values, expected_visits)

Expand Down
1 change: 1 addition & 0 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ def choose_splitter_round_robin(self, family, prop, scheduler_choices, state_val

def choose_splitter(self, family, prop, scheduler_choices, state_values, hole_selection):
splitter = None
print(hole_selection)
inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(hole_selection) if len(options) > 1}
if len(inconsistent_assignments)==0:
for hole_index,hole in enumerate(family):
Expand Down

0 comments on commit 301c19a

Please sign in to comment.