fix keeping reachable choices for pessimistic splitting

randriu · Jan 17, 2024 · 2b29aa3 · 2b29aa3
1 parent 9e53703
commit 2b29aa3
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 22 deletions.
diff --git a/paynt/cli.py b/paynt/cli.py
@@ -118,7 +118,7 @@ def setup_logger(log_path = None):
 
 @click.option("--mdp-split-wrt-mdp", is_flag=True, default=False,
     help="# if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used")
-@click.option("--mdp-discard-unreachable-actions", is_flag=True, default=False,
+@click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False,
     help="# if set, unreachable choices will be discarded from the splitting scheduler")
 @click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False,
     help="# if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" +
@@ -142,7 +142,7 @@ def paynt_run(
     use_storm_cutoffs, unfold_strategy_storm,
     export_fsc_storm, export_fsc_paynt, export_evaluation,
     all_in_one,
-    mdp_split_wrt_mdp, mdp_discard_unreachable_actions, mdp_use_randomized_abstraction,
+    mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction,
     ce_generator,
     profiling
 ):
@@ -161,7 +161,7 @@ def paynt_run(
     paynt.quotient.pomdp.PomdpQuotient.posterior_aware = posterior_aware
 
     paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp
-    paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_actions = mdp_discard_unreachable_actions
+    paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices
     paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction
 
     storm_control = None

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
@@ -127,7 +127,7 @@ def build_assignment(self, family):
     def empty_scheduler(self):
         return [None] * self.quotient_mdp.nr_states
 
-    def keep_reachable_choices_of_scheduler(self, state_to_choice):
+    def discard_unreachable_choices(self, state_to_choice):
         state_to_choice_reachable = self.empty_scheduler()
         state_visited = [False]*self.quotient_mdp.nr_states
         initial_state = list(self.quotient_mdp.initial_states)[0]
@@ -143,15 +143,15 @@ def keep_reachable_choices_of_scheduler(self, state_to_choice):
                     state_queue.append(dst)
         return state_to_choice_reachable
 
-    def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True):
+    def scheduler_to_state_to_choice(self, mdp, scheduler, discard_unreachable_choices=True):
         state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map)
         state_to_choice = self.empty_scheduler()
         for state in range(mdp.model.nr_states):
             quotient_choice = state_to_quotient_choice[state]
             quotient_state = mdp.quotient_state_map[state]
             state_to_choice[quotient_state] = quotient_choice
-        if keep_reachable_choices:
-            state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice)
+        if discard_unreachable_choices:
+            state_to_choice = self.discard_unreachable_choices(state_to_choice)
         return state_to_choice
 
     def state_to_choice_to_choices(self, state_to_choice):

diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
@@ -485,7 +485,7 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):
     # if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used
     split_wrt_mdp_scheduler = False
     # if True, unreachable choices will be discarded from the splitting scheduler
-    discard_unreachable_actions = False
+    discard_unreachable_choices = False
     # if True, randomized abstraction guess-and-verify will be used instead of game abstraction
     use_randomized_abstraction = False
 
@@ -572,31 +572,31 @@ def try_randomized_abstraction(self, family, prop):
 
         # apply policy and check if it is SAT for all MDPs in the family
         policy_sat = self.verify_policy(family, prop, policy)
-
         return policy,policy_sat
 
-    def state_to_choice_to_scheduler(self, state_to_choice):
-        # uncomment this to use only reachable choices of the game scheduler
-        if SynthesizerPolicyTree.discard_unreachable_actions:
-            state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice)
+    def state_to_choice_to_hole_selection(self, state_to_choice):
+        if SynthesizerPolicyTree.discard_unreachable_choices:
+            state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice)
         scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
         hole_selection = self.quotient.coloring.collectHoleOptions(scheduler_choices)
         return scheduler_choices,hole_selection
 
     def parse_game_scheduler(self, game_solver):
-        state_values = game_solver.solution_state_values
         state_to_choice = game_solver.solution_state_to_quotient_choice.copy()
-        scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice)
-        return scheduler_choices,state_values,hole_selection
+        scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
+        state_values = game_solver.solution_state_values
+        return scheduler_choices,hole_selection,state_values
 
     def parse_mdp_scheduler(self, family, mdp_result):
-        state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, mdp_result.result.scheduler)
-        scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice)
+        state_to_choice = self.quotient.scheduler_to_state_to_choice(
+            family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False
+        )
+        scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
         state_values = [0] * self.quotient.quotient_mdp.nr_states
         for state in range(family.mdp.states):
             quotient_state = family.mdp.quotient_state_map[state]
             state_values[quotient_state] = mdp_result.result.at(state)
-        return scheduler_choices,state_values,hole_selection
+        return scheduler_choices,hole_selection,state_values
 
 
     def verify_family(self, family, game_solver, prop):
@@ -638,9 +638,9 @@ def verify_family(self, family, game_solver, prop):
 
         # undecided: choose scheduler choices to be used for splitting
         if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler):
-            scheduler_choices,state_values,hole_selection = self.parse_game_scheduler(game_solver)
+            scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)
         else:
-            scheduler_choices,state_values,hole_selection = self.parse_mdp_scheduler(family, mdp_result)
+            scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result)
 
         splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection)
         mdp_family_result.splitter = splitter
@@ -723,7 +723,7 @@ def split(self, family, prop, hole_selection, splitter, policy):
             subfamily.candidate_policy = None
             subfamilies.append(subfamily)
 
-        if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_actions:
+        if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices:
             self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy)
 
         return suboptions,subfamilies