Skip to content

Commit

Permalink
build randomized abstraction in Paynt; double-check MDP (non-)determi…
Browse files Browse the repository at this point in the history
…nism
  • Loading branch information
Roman Andriushchenko committed Dec 4, 2023
1 parent 341a98b commit 53f12a8
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 144 deletions.
173 changes: 126 additions & 47 deletions paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import stormpy
import stormpy.synthesis

import paynt.quotient.holes
import paynt.quotient.models
import paynt.quotient.quotient
import paynt.verification.property_result

import collections

import logging
logger = logging.getLogger(__name__)


class MdpFamilyQuotientContainer(paynt.quotient.quotient.QuotientContainer):

@classmethod
def extract_choice_labels(cls, mdp):
@staticmethod
def extract_choice_labels(mdp):
'''
:param mdp having a canonic choice labeling (exactly 1 label for each choice)
:return a list of action labels
Expand All @@ -27,57 +26,123 @@ def extract_choice_labels(cls, mdp):
label_to_action = {label:index for index,label in enumerate(action_labels)}

choice_to_action = [None] * mdp.nr_choices
state_to_actions = []
tm = mdp.transition_matrix
for state in range(mdp.nr_states):
state_choice_label_indices = set()
for choice in range(tm.get_row_group_start(state),tm.get_row_group_end(state)):
for choice in mdp.transition_matrix.get_rows_for_group(state):
label = list(mdp.choice_labeling.get_labels_of_choice(choice))[0]
action = label_to_action[label]
choice_to_action[choice] = action
state_choice_label_indices.add(action)
state_to_actions.append(list(state_choice_label_indices))

return action_labels,choice_to_action,state_to_actions
return action_labels,choice_to_action


def __init__(self, quotient_mdp, coloring, specification):
super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)
self.design_space = paynt.quotient.holes.DesignSpace(coloring.holes)

self.action_labels,self.choice_to_action,self.state_to_actions = \
MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)

self.choice_destinations = self.compute_choice_destinations(self.quotient_mdp)
self.state_action_choices = self.compute_state_action_choices(self.quotient_mdp, self.num_actions, self.choice_to_action)


@property
def num_actions(self):
return len(self.action_labels)


def compute_state_action_choices(self, mdp, num_actions, choice_to_action):
@staticmethod
def map_state_action_to_choices(mdp, num_actions, choice_to_action):
state_action_choices = []
tm = mdp.transition_matrix
for state in range(mdp.nr_states):
action_choices = [[] for action in range(num_actions)]
for choice in range(tm.get_row_group_start(state),tm.get_row_group_end(state)):
for choice in mdp.transition_matrix.get_rows_for_group(state):
action = choice_to_action[choice]
action_choices[action].append(choice)
state_action_choices.append(action_choices)
return state_action_choices

def compute_choice_destinations(self, mdp):
@staticmethod
def map_state_to_available_actions(state_action_choices):
state_to_actions = []
for state,action_choices in enumerate(state_action_choices):
available_actions = []
for action,choices in enumerate(action_choices):
if choices:
available_actions.append(action)
state_to_actions.append(available_actions)
return state_to_actions

@staticmethod
def compute_choice_destinations(mdp):
choice_destinations = []
tm = mdp.transition_matrix
for choice in range(mdp.nr_choices):
destinations = []
for entry in tm.get_row(choice):
for entry in mdp.transition_matrix.get_row(choice):
destinations.append(entry.column)
choice_destinations.append(destinations)
return choice_destinations

@staticmethod
def randomize_action_variant(mdp, state_action_choices):
'''
Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized.
'''

components = stormpy.storage.SparseModelComponents()
# copy state labeling
state_labeling = stormpy.storage.StateLabeling(mdp.nr_states)
for label in mdp.labeling.get_labels():
state_labeling.add_label(label)
state_labeling.set_states(label, mdp.labeling.get_states(label))
components.state_labeling = state_labeling

# build transition matrix and reward models
reward_vectors = {name:[] for name in mdp.reward_models}
builder = stormpy.storage.SparseMatrixBuilder(has_custom_row_grouping=True)
tm = mdp.transition_matrix
num_rows = 0
choice_to_action = []
for state in range(mdp.nr_states):
builder.new_row_group(num_rows)
for action,choices in enumerate(state_action_choices[state]):
if not choices:
continue

# new choice
choice_to_action.append(action)

# handle transition matrix
num_choices = len(choices)
dst_prob = collections.defaultdict(int)
for choice in choices:
for entry in tm.get_row(choice):
dst_prob[entry.column] += entry.value()/num_choices

for dst,prob in dst_prob.items():
builder.add_next_value(num_rows,dst,prob)
num_rows += 1

# handle reward models
for name,reward_model in mdp.reward_models.items():
reward_value = 0
for choice in choices:
reward_value += reward_model.get_state_action_reward(choice)
reward_value /= num_choices
reward_vectors[name].append(reward_value)

components.transition_matrix = builder.build()
components.reward_models = {}
for name,state_action_reward in reward_vectors.items():
components.reward_models[name] = stormpy.SparseRewardModel(optional_state_action_reward_vector=state_action_reward)
model = stormpy.storage.SparseMdp(components)

return model,choice_to_action



def __init__(self, quotient_mdp, coloring, specification):
super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)

self.design_space = paynt.quotient.holes.DesignSpace(coloring.holes)

self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)
self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices(
self.quotient_mdp, self.num_actions, self.choice_to_action)
self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp)

self.randomized_abstraction = MdpFamilyQuotientContainer.randomize_action_variant(
self.quotient_mdp, self.state_action_choices)


@property
def num_actions(self):
return len(self.action_labels)


def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None):
if mdp is None:
Expand All @@ -92,7 +157,7 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None
choice_mask_reachable = stormpy.BitVector(mdp.nr_choices,False)
while state_queue:
state = state_queue.pop()
for choice in range(tm.get_row_group_start(state),tm.get_row_group_end(state)):
for choice in mdp.transition_matrix.get_rows_for_group(state):
if not choice_mask[choice]:
continue
choice_mask_reachable.set(choice,True)
Expand All @@ -103,16 +168,6 @@ def keep_reachable_choices(self, choice_mask, mdp=None, choice_destinations=None
return choice_mask_reachable


def choices_to_policy(self, choice_mask):
policy = [self.num_actions] * self.quotient_mdp.nr_states
tm = self.quotient_mdp.transition_matrix
for state in range(self.quotient_mdp.nr_states):
for choice in range(tm.get_row_group_start(state),tm.get_row_group_end(state)):
if choice_mask[choice]:
assert policy[state] == self.num_actions
policy[state] = self.choice_to_action[choice]
return policy


def choices_to_hole_selection(self, choice_mask):
hole_selection = [set() for hole_index in self.design_space.hole_indices]
Expand All @@ -123,6 +178,9 @@ def choices_to_hole_selection(self, choice_mask):
hole_selection = [list(options) for options in hole_selection]
return hole_selection

def empty_policy(self):
return [self.num_actions] * self.quotient_mdp.nr_states


def fix_policy_for_family(self, family, policy):
'''
Expand Down Expand Up @@ -164,10 +222,31 @@ def apply_policy_to_family(self, family, policy):
return self.build_from_choice_mask(choice_mask)



def assert_mdp_is_deterministic(self, mdp, family):
if mdp.is_deterministic:
return

logger.error(f"applied policy to a singleton family {family} and obtained MDP with nondeterminism")
for state in range(mdp.model.nr_states):

choices = mdp.model.transition_matrix.get_rows_for_group(state)
if len(choices)>1:
quotient_state = mdp.quotient_state_map[state]
quotient_choices = [mdp.quotient_choice_map[choice] for choice in choices]
state_str = self.quotient_mdp.state_valuations.get_string(quotient_state)
state_str = state_str.replace(" ","")
state_str = state_str.replace("\t","")
actions_str = [self.action_labels[self.choice_to_action[choice]] for choice in quotient_choices]
logger.error(f"the following state {state_str} has multiple actions {actions_str}")
logger.error("aborting...")
exit(1)


def build_game_abstraction_solver(self, prop):
target_label = str(prop.formula.subformula.subformula)
solver = stormpy.synthesis.GameAbstractionSolver(
self.quotient_mdp, len(self.action_labels), self.choice_to_action, target_label
)
return solver


21 changes: 4 additions & 17 deletions paynt/quotient/pomdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,41 +137,28 @@ def __init__(self, model, quotient, quotient_state_map, quotient_choice_map):
self.state_action_to_local_choice.append(action_to_local_choice)


class PomdpFamilyQuotientContainer(paynt.quotient.quotient.QuotientContainer):
class PomdpFamilyQuotientContainer(paynt.quotient.mdp_family.MdpFamilyQuotientContainer):

def __init__(self, quotient_mdp, coloring, specification, obs_evaluator):
super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)
self.obs_evaluator = obs_evaluator
self.design_space = paynt.quotient.holes.DesignSpace(coloring.holes)

# a list of action labels
self.action_labels = None
# for each choice, an index of its label in self.action_labels
self.choice_to_action = None
# for each observation, a list of actions (indices) available
self.observation_to_actions = None

# POMDP manager used for unfolding the memory model into the quotient POMDP
self.product_pomdp_fsc = None

self.action_labels,self.choice_to_action,state_to_actions = \
paynt.quotient.mdp_family.MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)

# identify actions available at each observation
self.observation_to_actions = [None] * self.num_observations
for state,state_actions in enumerate(state_to_actions):
for state,available_actions in enumerate(self.state_to_actions):
obs = self.state_to_observation[state]
if self.observation_to_actions[obs] is not None:
assert self.observation_to_actions[obs] == state_actions,\
assert self.observation_to_actions[obs] == available_actions,\
f"two states in observation class {obs} differ in available actions"
continue
self.observation_to_actions[obs] = state_actions
self.observation_to_actions[obs] = available_actions


@property
def num_actions(self):
return len(self.action_labels)

@property
def num_observations(self):
return self.obs_evaluator.num_obs_classes
Expand Down
Loading

0 comments on commit 53f12a8

Please sign in to comment.