Skip to content

Commit

Permalink
fix keeping reachable choices for pessimistic splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Jan 17, 2024
1 parent 9e53703 commit 2b29aa3
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 22 deletions.
6 changes: 3 additions & 3 deletions paynt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def setup_logger(log_path = None):

@click.option("--mdp-split-wrt-mdp", is_flag=True, default=False,
help="# if set, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used")
@click.option("--mdp-discard-unreachable-actions", is_flag=True, default=False,
@click.option("--mdp-discard-unreachable-choices", is_flag=True, default=False,
help="# if set, unreachable choices will be discarded from the splitting scheduler")
@click.option("--mdp-use-randomized-abstraction", is_flag=True, default=False,
help="# if set, randomized abstraction guess-and-verify will be used instead of game abstraction;" +
Expand All @@ -142,7 +142,7 @@ def paynt_run(
use_storm_cutoffs, unfold_strategy_storm,
export_fsc_storm, export_fsc_paynt, export_evaluation,
all_in_one,
mdp_split_wrt_mdp, mdp_discard_unreachable_actions, mdp_use_randomized_abstraction,
mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction,
ce_generator,
profiling
):
Expand All @@ -161,7 +161,7 @@ def paynt_run(
paynt.quotient.pomdp.PomdpQuotient.posterior_aware = posterior_aware

paynt.synthesizer.policy_tree.SynthesizerPolicyTree.split_wrt_mdp_scheduler = mdp_split_wrt_mdp
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_actions = mdp_discard_unreachable_actions
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices
paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction

storm_control = None
Expand Down
8 changes: 4 additions & 4 deletions paynt/quotient/quotient.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def build_assignment(self, family):
def empty_scheduler(self):
return [None] * self.quotient_mdp.nr_states

def keep_reachable_choices_of_scheduler(self, state_to_choice):
def discard_unreachable_choices(self, state_to_choice):
state_to_choice_reachable = self.empty_scheduler()
state_visited = [False]*self.quotient_mdp.nr_states
initial_state = list(self.quotient_mdp.initial_states)[0]
Expand All @@ -143,15 +143,15 @@ def keep_reachable_choices_of_scheduler(self, state_to_choice):
state_queue.append(dst)
return state_to_choice_reachable

def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True):
def scheduler_to_state_to_choice(self, mdp, scheduler, discard_unreachable_choices=True):
state_to_quotient_choice = payntbind.synthesis.schedulerToStateToGlobalChoice(scheduler, mdp.model, mdp.quotient_choice_map)
state_to_choice = self.empty_scheduler()
for state in range(mdp.model.nr_states):
quotient_choice = state_to_quotient_choice[state]
quotient_state = mdp.quotient_state_map[state]
state_to_choice[quotient_state] = quotient_choice
if keep_reachable_choices:
state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice)
if discard_unreachable_choices:
state_to_choice = self.discard_unreachable_choices(state_to_choice)
return state_to_choice

def state_to_choice_to_choices(self, state_to_choice):
Expand Down
30 changes: 15 additions & 15 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):
# if True, MDP abstraction scheduler will be used for splitting, otherwise game abstraction scheduler will be used
split_wrt_mdp_scheduler = False
# if True, unreachable choices will be discarded from the splitting scheduler
discard_unreachable_actions = False
discard_unreachable_choices = False
# if True, randomized abstraction guess-and-verify will be used instead of game abstraction
use_randomized_abstraction = False

Expand Down Expand Up @@ -572,31 +572,31 @@ def try_randomized_abstraction(self, family, prop):

# apply policy and check if it is SAT for all MDPs in the family
policy_sat = self.verify_policy(family, prop, policy)

return policy,policy_sat

def state_to_choice_to_scheduler(self, state_to_choice):
# uncomment this to use only reachable choices of the game scheduler
if SynthesizerPolicyTree.discard_unreachable_actions:
state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice)
def state_to_choice_to_hole_selection(self, state_to_choice):
if SynthesizerPolicyTree.discard_unreachable_choices:
state_to_choice = self.quotient.discard_unreachable_choices(state_to_choice)
scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
hole_selection = self.quotient.coloring.collectHoleOptions(scheduler_choices)
return scheduler_choices,hole_selection

def parse_game_scheduler(self, game_solver):
state_values = game_solver.solution_state_values
state_to_choice = game_solver.solution_state_to_quotient_choice.copy()
scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice)
return scheduler_choices,state_values,hole_selection
scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
state_values = game_solver.solution_state_values
return scheduler_choices,hole_selection,state_values

def parse_mdp_scheduler(self, family, mdp_result):
state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, mdp_result.result.scheduler)
scheduler_choices,hole_selection = self.state_to_choice_to_scheduler(state_to_choice)
state_to_choice = self.quotient.scheduler_to_state_to_choice(
family.mdp, mdp_result.result.scheduler, discard_unreachable_choices=False
)
scheduler_choices,hole_selection = self.state_to_choice_to_hole_selection(state_to_choice)
state_values = [0] * self.quotient.quotient_mdp.nr_states
for state in range(family.mdp.states):
quotient_state = family.mdp.quotient_state_map[state]
state_values[quotient_state] = mdp_result.result.at(state)
return scheduler_choices,state_values,hole_selection
return scheduler_choices,hole_selection,state_values


def verify_family(self, family, game_solver, prop):
Expand Down Expand Up @@ -638,9 +638,9 @@ def verify_family(self, family, game_solver, prop):

# undecided: choose scheduler choices to be used for splitting
if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler):
scheduler_choices,state_values,hole_selection = self.parse_game_scheduler(game_solver)
scheduler_choices,hole_selection,state_values = self.parse_game_scheduler(game_solver)
else:
scheduler_choices,state_values,hole_selection = self.parse_mdp_scheduler(family, mdp_result)
scheduler_choices,hole_selection,state_values = self.parse_mdp_scheduler(family, mdp_result)

splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection)
mdp_family_result.splitter = splitter
Expand Down Expand Up @@ -723,7 +723,7 @@ def split(self, family, prop, hole_selection, splitter, policy):
subfamily.candidate_policy = None
subfamilies.append(subfamily)

if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_actions:
if not (SynthesizerPolicyTree.use_randomized_abstraction or SynthesizerPolicyTree.split_wrt_mdp_scheduler) and not SynthesizerPolicyTree.discard_unreachable_choices:
self.assign_candidate_policy(subfamilies, hole_selection, splitter, policy)

return suboptions,subfamilies
Expand Down

0 comments on commit 2b29aa3

Please sign in to comment.