From 301c19ae00d25e483c08835c2226866d52d326ec Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Thu, 30 Nov 2023 13:36:43 +0100
Subject: [PATCH] fix skipping zero choice values for scheduler scoring

---
 paynt/quotient/mdp_family.py     |  2 ++
 paynt/quotient/quotient.py       | 23 ++++++++++-------------
 paynt/synthesizer/policy_tree.py |  1 +
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index 84a555fce..97a853771 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -119,6 +119,8 @@ def choices_to_hole_selection(self, choice_mask):
         for choice in choice_mask:
             choice_options = self.coloring.action_to_hole_options[choice]
             for hole_index,option in choice_options.items():
+                if hole_index == 0:
+                    print(0, option)
                 hole_selection[hole_index].add(option)
         hole_selection = [list(options) for options in hole_selection]
         return hole_selection
diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 946bc3d18..9308a8de1 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -144,9 +144,7 @@ def choice_values(self, mdp, prop, state_values):
         '''
         Get choice values after model checking MDP against a property.
         Value of choice c: s -> s' is computed as
-        ev(s) * [ rew(c) + P(s,c,s') * mc(s') ], where
-        - ev(s) is the expected number of visits of state s in DTMC induced by
-          the primary scheduler
+        rew(c) + sum_s' [ P(s,c,s') * mc(s') ], where
         - rew(c) is the reward associated with choice (c)
         - P(s,c,s') is the probability of transitioning from s to s' under action c
         - mc(s') is the model checking result in state s'
@@ -215,7 +213,6 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
         # for each hole, compute its difference sum and a number of affected states
         hole_difference_sum = {hole_index: 0 for hole_index in inconsistent_assignments}
         hole_states_affected = {hole_index: 0 for hole_index in inconsistent_assignments}
-        tm = mdp.model.transition_matrix
         
         for state in range(mdp.states):
 
@@ -223,11 +220,9 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
             hole_min = [None] * num_holes
             hole_max = [None] * num_holes
             
-            for choice in tm.get_rows_for_group(state):
+            for choice in mdp.model.transition_matrix.get_rows_for_group(state):
                 
                 value = choice_values[choice]
-                if value == 0:
-                    continue
                 choice_global = mdp.quotient_choice_map[choice]
                 if self.coloring.default_actions.get(choice_global):
                     continue
@@ -258,6 +253,11 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
                 hole_difference_sum[hole_index] += difference
                 hole_states_affected[hole_index] += 1
 
+        print(inconsistent_assignments)
+        print(hole_states_affected)
+        for hole_index in inconsistent_assignments:
+            assert hole_states_affected[hole_index] > 0
+        
         # aggregate
         inconsistent_differences = {
             hole_index: (hole_difference_sum[hole_index] / hole_states_affected[hole_index])
@@ -270,20 +270,17 @@ def estimate_scheduler_difference(self, mdp, inconsistent_assignments, choice_va
     def scheduler_selection_quantitative(self, mdp, prop, result):
         '''
         Get hole options involved in the scheduler selection.
-        Use numeric values to filter spurious inconsistencies.
         '''
-
-        scheduler = result.scheduler
-
         # get qualitative scheduler selection, filter inconsistent assignments
-        selection = self.scheduler_selection(mdp, scheduler)
+        selection = self.scheduler_selection(mdp, result.scheduler)
         inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(selection) if len(options) > 1 }
+        print(inconsistent_assignments)
         if len(inconsistent_assignments) == 0:
             return selection,None,None,None
         
         # extract choice values, compute expected visits and estimate scheduler difference
         choice_values = self.choice_values(mdp.model, prop, result.get_values())
-        choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
+        choices = result.scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
         expected_visits = self.expected_visits(mdp.model, prop, choices)
         inconsistent_differences = self.estimate_scheduler_difference(mdp, inconsistent_assignments, choice_values, expected_visits)
 
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index b53992350..01032d59f 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -410,6 +410,7 @@ def choose_splitter_round_robin(self, family, prop, scheduler_choices, state_val
 
     def choose_splitter(self, family, prop, scheduler_choices, state_values, hole_selection):
         splitter = None
+        print(hole_selection)
         inconsistent_assignments = {hole_index:options for hole_index,options in enumerate(hole_selection) if len(options) > 1}
         if len(inconsistent_assignments)==0:
             for hole_index,hole in enumerate(family):