Skip to content

Commit

Permalink
add don't care action for MDP DT synthesis
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Sep 13, 2024
1 parent 431bacb commit 940cad5
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 5 deletions.
4 changes: 2 additions & 2 deletions paynt/parser/jani.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def construct_edge(self, edge, substitution = None):
for templ_edge_dest in edge.template_edge.destinations:
assignments = templ_edge_dest.assignments.clone()
if substitution is not None:
# assignments.substitute(substitution, substitute_transcendental_numbers=True)
assignments.substitute(substitution) # legacy version
assignments.substitute(substitution, substitute_transcendental_numbers=True)
# assignments.substitute(substitution) # legacy version
templ_edge.add_destination(stormpy.storage.JaniTemplateEdgeDestination(assignments))

new_edge = stormpy.storage.JaniEdge(
Expand Down
2 changes: 2 additions & 0 deletions paynt/quotient/mdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def __init__(self, mdp, specification):
super().__init__(specification=specification)
updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
if updated is not None: mdp = updated
mdp = payntbind.synthesis.addDontCareAction(mdp)

self.quotient_mdp = mdp
self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp)
self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)
Expand Down
5 changes: 3 additions & 2 deletions paynt/synthesizer/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def verify_hole_selection(self, family, hole_selection):
family.analysis_result.improving_assignment = assignment
family.analysis_result.improving_value = assignment_value
family.analysis_result.can_improve = True
self.update_optimum(family)

family_value = family.analysis_result.optimality_result.primary.value
if(abs(family_value-assignment_value) < 1e-3):
Expand Down Expand Up @@ -175,8 +176,8 @@ def run(self, optimum_threshold=None, export_evaluation=None):
self.synthesize_tree(depth)
if self.global_resource_limit_reached():
break
if self.best_assignment is not None:
# if self.best_assignment_value is not None and abs( (self.best_assignment_value-mc_result.value)/mc_result.value ) <1e-2:
# if self.best_assignment is not None:
if self.best_assignment_value is not None and abs( (self.best_assignment_value-mc_result.value)/mc_result.value ) <1e-2:
break

logger.info(f"the optimal scheduler has value: {mc_result}")
Expand Down
3 changes: 2 additions & 1 deletion payntbind/src/synthesis/quotient/ColoringSmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ BitVector ColoringSmt<ValueType>::selectCompatibleChoices(Family const& subfamil
if(not any_choice_enabled) {
if(subfamily.isAssignment()) {
// STORM_LOG_WARN("Hole assignment does not induce a DTMC, enabling first action...");
uint64_t choice = row_groups[state];
// uint64_t choice = row_groups[state]; // pick the first choice
uint64_t choice = row_groups[state+1]-1; // pick the last choice executing the random choice
selection.set(choice,true);
for(uint64_t dst: choice_destinations[choice]) {
if(not state_reached[dst]) {
Expand Down
1 change: 1 addition & 0 deletions payntbind/src/synthesis/translation/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ void bindings_translation(py::module& m) {
m.def("extractActionLabels", &synthesis::extractActionLabels<double>);
m.def("enableAllActions", py::overload_cast<storm::models::sparse::Model<double> const&>(&synthesis::enableAllActions<double>));
m.def("restoreActionsInAbsorbingStates", &synthesis::restoreActionsInAbsorbingStates<double>);
m.def("addDontCareAction", &synthesis::addDontCareAction<double>);

py::class_<synthesis::SubPomdpBuilder<double>, std::shared_ptr<synthesis::SubPomdpBuilder<double>>>(m, "SubPomdpBuilder")
.def(py::init<storm::models::sparse::Pomdp<double> const&>())
Expand Down
88 changes: 88 additions & 0 deletions payntbind/src/synthesis/translation/choiceTransformation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "src/synthesis/translation/componentTranslations.h"

#include <storm/exceptions/InvalidModelException.h>
#include <storm/exceptions/UnexpectedException.h>
#include <storm/exceptions/InvalidArgumentException.h>
#include <storm/models/sparse/Pomdp.h>
#include <storm/utility/builder.h>
Expand Down Expand Up @@ -323,6 +324,91 @@ std::shared_ptr<storm::models::sparse::Model<ValueType>> restoreActionsInAbsorbi
return synthesis::removeAction(*model_absorbing_enabled, NO_ACTION_LABEL, absorbing_states);
}

template<typename ValueType>
std::shared_ptr<storm::models::sparse::Model<ValueType>> addDontCareAction(
storm::models::sparse::Model<ValueType> const& model
) {
auto [action_labels,choice_to_action] = synthesis::extractActionLabels<ValueType>(model);
std::string DONT_CARE_ACTION_LABEL = "__random__";
auto it = std::find(action_labels.begin(),action_labels.end(),DONT_CARE_ACTION_LABEL);
STORM_LOG_THROW(it == action_labels.end(), storm::exceptions::UnexpectedException,
"label " << DONT_CARE_ACTION_LABEL << " is already defined");
uint64_t num_actions = action_labels.size();
uint64_t num_states = model.getNumberOfStates();
uint64_t num_choices = model.getNumberOfChoices();

// for each action, find any choice that corresponds to this action
// we compute this to easily build choice labeling later
std::vector<uint64_t> action_reference_choice(num_actions);
for(uint64_t choice = 0; choice < num_choices; ++choice) {
action_reference_choice[choice_to_action[choice]] = choice;
}

// translate choices
std::vector<uint64_t> translated_to_original_choice;
std::vector<uint64_t> row_groups_new;
for(uint64_t state = 0; state < num_states; ++state) {
row_groups_new.push_back(translated_to_original_choice.size());
// copy existing choices
for(uint64_t choice: model.getTransitionMatrix().getRowGroupIndices(state)) {
translated_to_original_choice.push_back(choice);
}
// add don't care action
translated_to_original_choice.push_back(num_choices);
}
row_groups_new.push_back(translated_to_original_choice.size());
uint64_t num_translated_choices = translated_to_original_choice.size();
storm::storage::BitVector translated_choice_mask(num_translated_choices,false);
for(uint64_t translated_choice = 0; translated_choice < num_translated_choices; ++translated_choice) {
translated_choice_mask.set(translated_choice, translated_to_original_choice[translated_choice] < num_choices);
}

// build components
storm::storage::sparse::ModelComponents<ValueType> components = componentsFromModel(model);
components.choiceOrigins.reset();
auto choiceLabeling = synthesis::translateChoiceLabeling<ValueType>(model,translated_to_original_choice,translated_choice_mask);
choiceLabeling.addLabel(DONT_CARE_ACTION_LABEL, ~translated_choice_mask);
components.choiceLabeling = choiceLabeling;
storm::storage::SparseMatrixBuilder<ValueType> builder(num_translated_choices, num_states, 0, true, true, num_states);
for(uint64_t state = 0; state < num_states; ++state) {
builder.newRowGroup(row_groups_new[state]);
// copy existing choices
std::map<uint64_t,ValueType> dont_care_transitions;
uint64_t new_translated_choice = row_groups_new[state+1]-1;
uint64_t state_num_choices = new_translated_choice-row_groups_new[state];
for(uint64_t translated_choice = row_groups_new[state]; translated_choice < new_translated_choice; ++translated_choice) {
uint64_t choice = translated_to_original_choice[translated_choice];
for(auto entry: model.getTransitionMatrix().getRow(choice)) {
uint64_t dst = entry.getColumn();
ValueType prob = entry.getValue();
builder.addNextValue(translated_choice, dst, prob);
dont_care_transitions[dst] += prob/state_num_choices;
}
}
// add don't care action
for(auto [dst,prob]: dont_care_transitions) {
builder.addNextValue(new_translated_choice,dst,prob);
}
}
components.transitionMatrix = builder.build();
auto rewardModels = synthesis::translateRewardModels(model,translated_to_original_choice,translated_choice_mask);
for(auto & [name,reward_model]: rewardModels) {
std::cout << "processing " << name << std::endl;
std::vector<ValueType> & choice_reward = reward_model.getStateActionRewardVector();
ValueType reward_sum = 0;
for(uint64_t state = 0; state < num_states; ++state) {
uint64_t new_translated_choice = row_groups_new[state+1]-1;
uint64_t state_num_choices = new_translated_choice-row_groups_new[state];
for(uint64_t translated_choice = row_groups_new[state]; translated_choice < new_translated_choice; ++translated_choice) {
reward_sum += choice_reward[translated_choice];
}
choice_reward[new_translated_choice] = reward_sum / state_num_choices;
}
}
components.rewardModels = rewardModels;
return storm::utility::builder::buildModelFromComponents<ValueType,storm::models::sparse::StandardRewardModel<ValueType>>(model.getType(),std::move(components));
}

template std::vector<std::vector<uint64_t>> computeChoiceDestinations<double>(
storm::models::sparse::Model<double> const& model);
template std::pair<std::vector<std::string>,std::vector<uint64_t>> extractActionLabels<double>(
Expand All @@ -343,5 +429,7 @@ template std::shared_ptr<storm::models::sparse::Model<double>> removeAction<doub
storm::storage::BitVector const& state_mask);
template std::shared_ptr<storm::models::sparse::Model<double>> restoreActionsInAbsorbingStates<double>(
storm::models::sparse::Model<double> const& model);
template std::shared_ptr<storm::models::sparse::Model<double>> addDontCareAction<double>(
storm::models::sparse::Model<double> const& model);

}
8 changes: 8 additions & 0 deletions payntbind/src/synthesis/translation/choiceTransformation.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,12 @@ std::shared_ptr<storm::models::sparse::Model<ValueType>> restoreActionsInAbsorbi
storm::models::sparse::Model<ValueType> const& model
);

/**
* To every state of an MDP add an explicit action that executes a random choice between available actions.
*/
template<typename ValueType>
std::shared_ptr<storm::models::sparse::Model<ValueType>> addDontCareAction(
storm::models::sparse::Model<ValueType> const& model
);

}

0 comments on commit 940cad5

Please sign in to comment.