Skip to content

Commit

Permalink
force investigating the reachable part of a scheduler
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Dec 8, 2023
1 parent 951e73d commit 6520ddb
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 97 deletions.
65 changes: 7 additions & 58 deletions paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,7 @@ def map_state_to_available_actions(state_action_choices):
state_to_actions.append(available_actions)
return state_to_actions

@staticmethod
def compute_choice_destinations(mdp):
choice_destinations = []
for choice in range(mdp.nr_choices):
destinations = []
for entry in mdp.transition_matrix.get_row(choice):
destinations.append(entry.column)
choice_destinations.append(destinations)
return choice_destinations



def __init__(self, quotient_mdp, coloring, specification):
super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)

Expand All @@ -80,68 +70,27 @@ def __init__(self, quotient_mdp, coloring, specification):
self.state_action_choices = None
# for each state of the quotient, a list of available actions
self.state_to_actions = None
# for each choice of the quotient, a list of its state-destinations
self.choice_destinations = None

self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)
self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices(
self.quotient_mdp, self.num_actions, self.choice_to_action)
self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp)


@property
def num_actions(self):
return len(self.action_labels)


def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None):
if mdp is None:
mdp = self.quotient_mdp
if choice_destinations is None:
choice_destinations = self.choice_destinations
state_visited = [False]*mdp.nr_states
initial_state = list(mdp.initial_states)[0]
state_visited[initial_state] = True
state_queue = [initial_state]
tm = mdp.transition_matrix
choice_mask_reachable = stormpy.BitVector(mdp.nr_choices,False)
while state_queue:
state = state_queue.pop()
for choice in mdp.transition_matrix.get_rows_for_group(state):
if not choice_mask[choice]:
continue
choice_mask_reachable.set(choice,True)
for dst in choice_destinations[choice]:
if not state_visited[dst]:
state_visited[dst] = True
state_queue.append(dst)
return choice_mask_reachable



def choices_to_hole_selection(self, choice_mask):
hole_selection = [set() for hole_index in self.design_space.hole_indices]
for choice in choice_mask:
choice_options = self.coloring.action_to_hole_options[choice]
for hole_index,option in choice_options.items():
hole_selection[hole_index].add(option)
hole_selection = [list(options) for options in hole_selection]
return hole_selection

def empty_policy(self):
return [None] * self.quotient_mdp.nr_states
return self.empty_scheduler()

def scheduler_to_policy(self, scheduler, mdp):
state_to_choice = self.scheduler_to_state_to_choice(mdp,scheduler)
policy = self.empty_policy()
nci = mdp.model.nondeterministic_choice_indices.copy()
for state in range(mdp.model.nr_states):
state_choice = scheduler.get_choice(state).get_deterministic_choice()
choice = nci[state] + state_choice
quotient_choice = mdp.quotient_choice_map[choice]
action = self.choice_to_action[quotient_choice]
quotient_state = mdp.quotient_state_map[state]
policy[quotient_state] = action
for state in range(self.quotient_mdp.nr_states):
choice = state_to_choice[state]
if choice is not None:
policy[state] = self.choice_to_action[choice]
return policy


Expand Down
97 changes: 65 additions & 32 deletions paynt/quotient/quotient.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ class QuotientContainer:
# if True, hole scores in the state will be multiplied with the number of expected visits of this state
compute_expected_visits = True

@staticmethod
def compute_choice_destinations(mdp):
choice_destinations = []
for choice in range(mdp.nr_choices):
destinations = []
for entry in mdp.transition_matrix.get_row(choice):
destinations.append(entry.column)
choice_destinations.append(destinations)
return choice_destinations


def __init__(self, quotient_mdp = None, coloring = None,
specification = None):
Expand All @@ -36,6 +46,9 @@ def __init__(self, quotient_mdp = None, coloring = None,
self.subsystem_builder_options.build_state_mapping = True
self.subsystem_builder_options.build_action_mapping = True

# for each choice of the quotient, a list of its state-destinations
self.choice_destinations = QuotientContainer.compute_choice_destinations(self.quotient_mdp)

# (optional) counter of discarded assignments
self.discarded = None

Expand Down Expand Up @@ -133,45 +146,65 @@ def build_chain(self, family):
return DTMC(dtmc,self,state_map,choice_map)


def scheduler_selection(self, mdp, scheduler):
''' Get hole options involved in the scheduler selection. '''
def empty_scheduler(self):
return [None] * self.quotient_mdp.nr_states

def keep_reachable_choices_of_scheduler(self, state_to_choice):
state_to_choice_reachable = self.empty_scheduler()
state_visited = [False]*self.quotient_mdp.nr_states
initial_state = list(self.quotient_mdp.initial_states)[0]
state_visited[initial_state] = True
state_queue = [initial_state]
while state_queue:
state = state_queue.pop()
choice = state_to_choice[state]
state_to_choice_reachable[state] = choice
for dst in self.choice_destinations[choice]:
if not state_visited[dst]:
state_visited[dst] = True
state_queue.append(dst)
return state_to_choice_reachable

def scheduler_to_state_to_choice(self, mdp, scheduler, keep_reachable_choices=True):
assert scheduler.memoryless and scheduler.deterministic

# construct DTMC that corresponds to this scheduler and filter reachable states/choices
choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
choices = [ choice_map[state] for state in range(dtmc.nr_states) ]

# map relevant choices to hole options
selection = [set() for hole_index in mdp.design_space.hole_indices]
state_to_choice = self.empty_scheduler()
nci = mdp.model.nondeterministic_choice_indices.copy()
for state in range(mdp.model.nr_states):
choice = nci[state] + scheduler.get_choice(state).get_deterministic_choice()
quotient_choice = mdp.quotient_choice_map[choice]
quotient_state = mdp.quotient_state_map[state]
state_to_choice[quotient_state] = quotient_choice
if keep_reachable_choices:
state_to_choice = self.keep_reachable_choices_of_scheduler(state_to_choice)
return state_to_choice

def state_to_choice_to_choices(self, state_to_choice):
choices = stormpy.BitVector(self.quotient_mdp.nr_choices,False)
for choice in state_to_choice:
if choice is not None:
choices.set(choice,True)
return choices


def choices_to_hole_selection(self, choices, coloring=None):
if coloring is None:
coloring = self.coloring
hole_selection = [set() for hole_index in self.design_space.hole_indices]
for choice in choices:
global_choice = mdp.quotient_choice_map[choice]
choice_options = self.coloring.action_to_hole_options[global_choice]
choice_options = self.coloring.action_to_hole_options[choice]
for hole_index,option in choice_options.items():
selection[hole_index].add(option)
selection = [list(options) for options in selection]
hole_selection[hole_index].add(option)
hole_selection = [list(options) for options in hole_selection]
return hole_selection

return selection

def scheduler_selection_with_coloring(self, mdp, scheduler, coloring):
def scheduler_selection(self, mdp, scheduler, coloring=None):
''' Get hole options involved in the scheduler selection. '''
assert scheduler.memoryless and scheduler.deterministic

# construct DTMC that corresponds to this scheduler and filter reachable states/choices
choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
choices = [ choice_map[state] for state in range(dtmc.nr_states) ]

# map relevant choices to hole options
selection = [set() for hole_index in mdp.design_space.hole_indices]
for choice in choices:
global_choice = mdp.quotient_choice_map[choice]
choice_options = coloring.action_to_hole_options[global_choice]
for hole_index,option in choice_options.items():
selection[hole_index].add(option)
selection = [list(options) for options in selection]

return selection
state_to_choice = self.scheduler_to_state_to_choice(mdp, scheduler, keep_reachable=True)
choices = self.state_to_choice_to_choices(state_to_choice)
hole_selection = self.choices_to_hole_selection(choices,coloring)
return hole_selection


@staticmethod
Expand Down
12 changes: 5 additions & 7 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,20 +481,18 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):

# undecided: choose scheduler choices to be used for splitting
if SynthesizerPolicyTree.use_optimistic_splitting:
scheduler_choices = game_solver.solution_reachable_choices
state_values = game_solver.solution_state_values
state_to_choice = game_solver.solution_state_to_quotient_choice
state_to_choice = self.quotient.keep_reachable_choices_of_scheduler(state_to_choice)
else:
scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False)
choices = primary_primary_result.result.scheduler.compute_action_support(family.mdp.model.nondeterministic_choice_indices)
for choice in choices:
scheduler_choices.set(family.mdp.quotient_choice_map[choice],True)
state_to_choice = self.quotient.scheduler_to_state_to_choice(family.mdp, primary_primary_result.result.scheduler)
state_values = [0] * self.quotient.quotient_mdp.nr_states
for state in range(family.mdp.states):
quotient_state = family.mdp.quotient_state_map[state]
state_values[quotient_state] = primary_primary_result.result.at(state)

# map reachable scheduler choices to hole options
scheduler_choices = self.quotient.keep_reachable_choices(scheduler_choices)
scheduler_choices = self.quotient.state_to_choice_to_choices(state_to_choice)
hole_selection = self.quotient.choices_to_hole_selection(scheduler_choices)

if False:
Expand Down Expand Up @@ -691,7 +689,7 @@ def synthesize_policy_for_family(self, family, prop, all_sat=False, iteration_li
sat_mdp_policies[index] = policy

current_results.append(primary_result)
selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring)
selection = self.quotient.scheduler_selection(current_action_family.mdp, primary_result.result.scheduler, self.action_coloring)
self.update_scores(score_lists, selection)

scores = {hole:len(score_list) for hole, score_list in score_lists.items()}
Expand Down

0 comments on commit 6520ddb

Please sign in to comment.