From f22f2e756991b1b92b5e2d9c6e1b6bc3851f854f Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Fri, 8 Dec 2023 13:50:59 +0100
Subject: [PATCH 1/5] move some stuff to the QuotientContainer

---
 paynt/quotient/mdp_family.py     | 24 +--------------
 paynt/quotient/quotient.py       | 52 +++++++++++++++-----------------
 paynt/synthesizer/policy_tree.py |  2 +-
 3 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index d861996d8..d93f8c136 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -56,17 +56,6 @@ def map_state_to_available_actions(state_action_choices):
             state_to_actions.append(available_actions)
         return state_to_actions
 
-    @staticmethod
-    def compute_choice_destinations(mdp):
-        choice_destinations = []
-        for choice in range(mdp.nr_choices):
-            destinations = []
-            for entry in mdp.transition_matrix.get_row(choice):
-                destinations.append(entry.column)
-            choice_destinations.append(destinations)
-        return choice_destinations
-
-        
     def __init__(self, quotient_mdp, coloring, specification):
         super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)
 
@@ -80,14 +69,11 @@ def __init__(self, quotient_mdp, coloring, specification):
         self.state_action_choices = None
         # for each state of the quotient, a list of available actions
         self.state_to_actions = None
-        # for each choice of the quotient, a list of its state-destinations
-        self.choice_destinations = None
 
         self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)
         self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices(
             self.quotient_mdp, self.num_actions, self.choice_to_action)
         self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
-        self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp)
     
     
     @property
@@ -120,15 +106,7 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None
     
     
     
-    def choices_to_hole_selection(self, choice_mask):
-        hole_selection = [set() for hole_index in self.design_space.hole_indices]
-        for choice in choice_mask:
-            choice_options = self.coloring.action_to_hole_options[choice]
-            for hole_index,option in choice_options.items():
-                hole_selection[hole_index].add(option)
-        hole_selection = [list(options) for options in hole_selection]
-        return hole_selection
-
+    
     def empty_policy(self):
         return [None] * self.quotient_mdp.nr_states
 
diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 43bfdf642..59cbf400d 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -21,6 +21,16 @@ class QuotientContainer:
     # if True, hole scores in the state will be multiplied with the number of expected visits of this state
     compute_expected_visits = True
 
+    @staticmethod
+    def compute_choice_destinations(mdp):
+        choice_destinations = []
+        for choice in range(mdp.nr_choices):
+            destinations = []
+            for entry in mdp.transition_matrix.get_row(choice):
+                destinations.append(entry.column)
+            choice_destinations.append(destinations)
+        return choice_destinations
+
     
     def __init__(self, quotient_mdp = None, coloring = None,
         specification = None):
@@ -36,6 +46,9 @@ def __init__(self, quotient_mdp = None, coloring = None,
         self.subsystem_builder_options.build_state_mapping = True
         self.subsystem_builder_options.build_action_mapping = True
 
+        # for each choice of the quotient, a list of its state-destinations
+        self.choice_destinations = QuotientContainer.compute_choice_destinations(self.quotient_mdp)
+
         # (optional) counter of discarded assignments
         self.discarded = None
 
@@ -133,45 +146,30 @@ def build_chain(self, family):
         return DTMC(dtmc,self,state_map,choice_map)
 
     
-    def scheduler_selection(self, mdp, scheduler):
-        ''' Get hole options involved in the scheduler selection. '''
-        assert scheduler.memoryless and scheduler.deterministic
-        
-        # construct DTMC that corresponds to this scheduler and filter reachable states/choices
-        choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
-        dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
-        choices = [ choice_map[state] for state in range(dtmc.nr_states) ]
-        
-        # map relevant choices to hole options
-        selection = [set() for hole_index in mdp.design_space.hole_indices]
+    def choices_to_hole_selection(self, choices):
+        hole_selection = [set() for hole_index in self.design_space.hole_indices]
         for choice in choices:
-            global_choice = mdp.quotient_choice_map[choice]
-            choice_options = self.coloring.action_to_hole_options[global_choice]
+            choice_options = self.coloring.action_to_hole_options[choice]
             for hole_index,option in choice_options.items():
-                selection[hole_index].add(option)
-        selection = [list(options) for options in selection]
+                hole_selection[hole_index].add(option)
+        hole_selection = [list(options) for options in hole_selection]
+        return hole_selection
 
-        return selection
     
-    def scheduler_selection_with_coloring(self, mdp, scheduler, coloring):
+    def scheduler_selection(self, mdp, scheduler, coloring=None):
         ''' Get hole options involved in the scheduler selection. '''
         assert scheduler.memoryless and scheduler.deterministic
+
+        if coloring is None:
+            coloring = self.coloring
         
         # construct DTMC that corresponds to this scheduler and filter reachable states/choices
         choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
         dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
         choices = [ choice_map[state] for state in range(dtmc.nr_states) ]
         
-        # map relevant choices to hole options
-        selection = [set() for hole_index in mdp.design_space.hole_indices]
-        for choice in choices:
-            global_choice = mdp.quotient_choice_map[choice]
-            choice_options = coloring.action_to_hole_options[global_choice]
-            for hole_index,option in choice_options.items():
-                selection[hole_index].add(option)
-        selection = [list(options) for options in selection]
-
-        return selection
+        hole_selection = self.choices_to_hole_selection(choices)
+        return hole_selection
 
     
     @staticmethod
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 4e3423b29..6e71e2f9f 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -691,7 +691,7 @@ def synthesize_policy_for_family(self, family, prop, all_sat=False, iteration_li
                     sat_mdp_policies[index] = policy
 
                 current_results.append(primary_result)
-                selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring)
+                selection = self.quotient.scheduler_selection(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring)
                 self.update_scores(score_lists, selection)
 
                 scores = {hole:len(score_list) for hole, score_list in score_lists.items()}

From ca30c0a526fc4b5806bab7610ee4a23e075d2351 Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Fri, 8 Dec 2023 14:31:18 +0100
Subject: [PATCH 2/5] avoid building DTMC to compute reachable choices of the
 scheduler

---
 paynt/quotient/mdp_family.py     | 20 +++++------
 paynt/quotient/quotient.py       | 57 ++++++++++++++++++++++++++------
 paynt/synthesizer/policy_tree.py |  8 ++---
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index d93f8c136..c42df0c7e 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -56,6 +56,7 @@ def map_state_to_available_actions(state_action_choices):
             state_to_actions.append(available_actions)
         return state_to_actions
 
+    
     def __init__(self, quotient_mdp, coloring, specification):
         super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)
 
@@ -103,23 +104,18 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None
                         state_visited[dst] = True
                         state_queue.append(dst)
         return choice_mask_reachable
-    
-    
-    
+
     
     def empty_policy(self):
-        return [None] * self.quotient_mdp.nr_states
+        return self.empty_scheduler()
 
     def scheduler_to_policy(self, scheduler, mdp):            
+        state_to_choice = self.scheduler_to_state_to_choice(mdp,scheduler)
         policy = self.empty_policy()
-        nci = mdp.model.nondeterministic_choice_indices.copy()
-        for state in range(mdp.model.nr_states):
-            state_choice = scheduler.get_choice(state).get_deterministic_choice()
-            choice = nci[state] + state_choice
-            quotient_choice = mdp.quotient_choice_map[choice]
-            action = self.choice_to_action[quotient_choice]
-            quotient_state = mdp.quotient_state_map[state]
-            policy[quotient_state] = action
+        for state in range(self.quotient_mdp.nr_states):
+            choice = state_to_choice[state]
+            if choice is not None:
+                policy[state] = self.choice_to_action[choice]
         return policy
 
 
diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 59cbf400d..073993115 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -146,7 +146,49 @@ def build_chain(self, family):
         return DTMC(dtmc,self,state_map,choice_map)
 
     
-    def choices_to_hole_selection(self, choices):
+    def empty_scheduler(self):
+        return [None] * self.quotient_mdp.nr_states
+
+    def keep_reachable_choices_of_scheduler(self, state_to_choice):
+        state_to_choice_reachable = self.empty_scheduler()
+        state_visited = [False]*self.quotient_mdp.nr_states
+        initial_state = list(self.quotient_mdp.initial_states)[0]
+        state_visited[initial_state] = True
+        state_queue = [initial_state]
+        while state_queue:
+            state = state_queue.pop()
+            choice = state_to_choice[state]
+            state_to_choice_reachable[state] = choice
+            for dst in self.choice_destinations[choice]:
+                if not state_visited[dst]:
+                    state_visited[dst] = True
+                    state_queue.append(dst)
+        return state_to_choice_reachable
+
+    def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True):
+        assert scheduler.memoryless and scheduler.deterministic
+        state_to_choice = self.empty_scheduler()
+        nci = mdp.model.nondeterministic_choice_indices.copy()
+        for state in range(mdp.model.nr_states):
+            choice = nci[state] + scheduler.get_choice(state).get_deterministic_choice()
+            quotient_choice = mdp.quotient_choice_map[choice]
+            quotient_state = mdp.quotient_state_map[state]
+            state_to_choice[quotient_state] = quotient_choice
+        if keep_reachable_choices:
+            state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice)
+        return state_to_choice
+
+    def state_to_choice_to_choices(self, state_to_choice):
+        choices = stormpy.BitVector(self.quotient_mdp.nr_choices,False)
+        for choice in state_to_choice:
+            if choice is not None:
+                choices.set(choice,True)
+        return choices
+
+
+    def choices_to_hole_selection(self, choices, coloring=None):
+        if coloring is None:
+            coloring = self.coloring
         hole_selection = [set() for hole_index in self.design_space.hole_indices]
         for choice in choices:
             choice_options = self.coloring.action_to_hole_options[choice]
@@ -159,16 +201,9 @@ def choices_to_hole_selection(self, choices):
     def scheduler_selection(self, mdp, scheduler, coloring=None):
         ''' Get hole options involved in the scheduler selection. '''
         assert scheduler.memoryless and scheduler.deterministic
-
-        if coloring is None:
-            coloring = self.coloring
-        
-        # construct DTMC that corresponds to this scheduler and filter reachable states/choices
-        choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
-        dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
-        choices = [ choice_map[state] for state in range(dtmc.nr_states) ]
-        
-        hole_selection = self.choices_to_hole_selection(choices)
+        state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True)
+        choices = self.state_to_choice_to_choices(state_to_choice)
+        hole_selection = self.choices_to_hole_selection(choices,coloring)
         return hole_selection
 
     
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 6e71e2f9f..61b384d84 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -483,18 +483,16 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
         if SynthesizerPolicyTree.use_optimistic_splitting:
             scheduler_choices = game_solver.solution_reachable_choices
             state_values = game_solver.solution_state_values
+            scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices)
         else:
-            scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False)
-            choices = primary_primary_result.result.scheduler.compute_action_support(family.mdp.model.nondeterministic_choice_indices)
-            for choice in choices:
-                scheduler_choices.set(family.mdp.quotient_choice_map[choice],True)
+            state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler)
+            scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
             state_values = [0] * self.quotient.quotient_mdp.nr_states
             for state in range(family.mdp.states):
                 quotient_state = family.mdp.quotient_state_map[state]
                 state_values[quotient_state] = primary_primary_result.result.at(state)
 
         # map reachable scheduler choices to hole options
-        scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices)
         hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices)
         
         if False:

From ecb6fb8428b160c0e2f67c741c9e9f2197b17644 Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Fri, 8 Dec 2023 15:01:23 +0100
Subject: [PATCH 3/5] replace keep_reachable_choices

---
 paynt/quotient/mdp_family.py     | 25 -------------------------
 paynt/synthesizer/policy_tree.py |  6 +++---
 2 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
index c42df0c7e..6068e1d05 100644
--- a/paynt/quotient/mdp_family.py
+++ b/paynt/quotient/mdp_family.py
@@ -81,31 +81,6 @@ def __init__(self, quotient_mdp, coloring, specification):
     def num_actions(self):
         return len(self.action_labels)
 
-
-    def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None):
-        if mdp is None:
-            mdp = self.quotient_mdp
-        if choice_destinations is None:
-            choice_destinations = self.choice_destinations
-        state_visited = [False]*mdp.nr_states
-        initial_state = list(mdp.initial_states)[0]
-        state_visited[initial_state] = True
-        state_queue = [initial_state]
-        tm = mdp.transition_matrix
-        choice_mask_reachable = stormpy.BitVector(mdp.nr_choices,False)
-        while state_queue:
-            state = state_queue.pop()
-            for choice in mdp.transition_matrix.get_rows_for_group(state):
-                if not choice_mask[choice]:
-                    continue
-                choice_mask_reachable.set(choice,True)
-                for dst in choice_destinations[choice]:
-                    if not state_visited[dst]:
-                        state_visited[dst] = True
-                        state_queue.append(dst)
-        return choice_mask_reachable
-
-    
     def empty_policy(self):
         return self.empty_scheduler()
 
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 61b384d84..8945e366e 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -481,18 +481,18 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
     
         # undecided: choose scheduler choices to be used for splitting
         if SynthesizerPolicyTree.use_optimistic_splitting:
-            scheduler_choices = game_solver.solution_reachable_choices
             state_values = game_solver.solution_state_values
-            scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices)
+            state_to_choice = game_solver.solution_state_to_quotient_choice
+            state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice)
         else:
             state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler)
-            scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
             state_values = [0] * self.quotient.quotient_mdp.nr_states
             for state in range(family.mdp.states):
                 quotient_state = family.mdp.quotient_state_map[state]
                 state_values[quotient_state] = primary_primary_result.result.at(state)
 
         # map reachable scheduler choices to hole options
+        scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
         hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices)
         
         if False:

From 1021d242bd6a5bc01cd02239c658732fb6252924 Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Fri, 8 Dec 2023 15:13:53 +0100
Subject: [PATCH 4/5] fix not using optional coloring

---
 paynt/quotient/coloring.py | 12 +++++++++++-
 paynt/quotient/quotient.py | 18 ++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/paynt/quotient/coloring.py b/paynt/quotient/coloring.py
index 721d3d88c..93e77ebce 100644
--- a/paynt/quotient/coloring.py
+++ b/paynt/quotient/coloring.py
@@ -56,7 +56,7 @@ def __init__(self, mdp, holes, action_to_hole_options):
     @property
     def num_choices(self):
         return len(self.action_to_hole_options)
-        
+
     def select_actions(self, family):
         '''
         Select non-default actions relevant in the provided design space.
@@ -112,3 +112,13 @@ def select_actions(self, family):
         selected_actions_bv = stormpy.synthesis.construct_selection(self.default_actions, selected_actions)
         
         return hole_selected_actions,selected_actions,selected_actions_bv
+
+    
+    def choices_to_hole_selection(self, choices):
+        hole_selection = [set() for hole_index in self.holes.hole_indices]
+        for choice in choices:
+            choice_options = self.action_to_hole_options[choice]
+            for hole_index,option in choice_options.items():
+                hole_selection[hole_index].add(option)
+        hole_selection = [list(options) for options in hole_selection]
+        return hole_selection
diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 073993115..c5e5321ac 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -185,25 +185,15 @@ def state_to_choice_to_choices(self, state_to_choice):
                 choices.set(choice,True)
         return choices
 
-
-    def choices_to_hole_selection(self, choices, coloring=None):
-        if coloring is None:
-            coloring = self.coloring
-        hole_selection = [set() for hole_index in self.design_space.hole_indices]
-        for choice in choices:
-            choice_options = self.coloring.action_to_hole_options[choice]
-            for hole_index,option in choice_options.items():
-                hole_selection[hole_index].add(option)
-        hole_selection = [list(options) for options in hole_selection]
-        return hole_selection
-
     
     def scheduler_selection(self, mdp, scheduler, coloring=None):
         ''' Get hole options involved in the scheduler selection. '''
         assert scheduler.memoryless and scheduler.deterministic
-        state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True)
+        state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler)
         choices = self.state_to_choice_to_choices(state_to_choice)
-        hole_selection = self.choices_to_hole_selection(choices,coloring)
+        if coloring is None:
+            coloring = self.coloring
+        hole_selection = coloring.choices_to_hole_selection(choices)
         return hole_selection
 
     

From c301db9291fe19f9739d5740996fdd80bb1b4055 Mon Sep 17 00:00:00 2001
From: Roman Andriushchenko <iandri@fit.vut.cz>
Date: Fri, 8 Dec 2023 15:18:44 +0100
Subject: [PATCH 5/5] whatever

---
 paynt/synthesizer/policy_tree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 8945e366e..6e25a425d 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -493,7 +493,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
 
         # map reachable scheduler choices to hole options
         scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
-        hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices)
+        hole_selection = self.quotient.coloring.choices_to_hole_selection(scheduler_choices)
         
         if False:
             # sanity check