add don't care action for MDP DT synthesis

randriu · Sep 13, 2024 · 940cad5 · 940cad5
1 parent 431bacb
commit 940cad5
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 5 deletions.
diff --git a/paynt/parser/jani.py b/paynt/parser/jani.py
@@ -219,8 +219,8 @@ def construct_edge(self, edge, substitution = None):
         for templ_edge_dest in edge.template_edge.destinations:
             assignments = templ_edge_dest.assignments.clone()
             if substitution is not None:
-                # assignments.substitute(substitution, substitute_transcendental_numbers=True)
-                assignments.substitute(substitution) # legacy version
+                assignments.substitute(substitution, substitute_transcendental_numbers=True)
+                # assignments.substitute(substitution) # legacy version
             templ_edge.add_destination(stormpy.storage.JaniTemplateEdgeDestination(assignments))
 
         new_edge = stormpy.storage.JaniEdge(

diff --git a/paynt/quotient/mdp.py b/paynt/quotient/mdp.py
@@ -165,6 +165,8 @@ def __init__(self, mdp, specification):
         super().__init__(specification=specification)
         updated = payntbind.synthesis.restoreActionsInAbsorbingStates(mdp)
         if updated is not None: mdp = updated
+        mdp = payntbind.synthesis.addDontCareAction(mdp)
+
         self.quotient_mdp = mdp
         self.choice_destinations = payntbind.synthesis.computeChoiceDestinations(mdp)
         self.action_labels,self.choice_to_action = payntbind.synthesis.extractActionLabels(mdp)

diff --git a/paynt/synthesizer/decision_tree.py b/paynt/synthesizer/decision_tree.py
@@ -41,6 +41,7 @@ def verify_hole_selection(self, family, hole_selection):
             family.analysis_result.improving_assignment = assignment
             family.analysis_result.improving_value = assignment_value
             family.analysis_result.can_improve = True
+            self.update_optimum(family)
 
             family_value = family.analysis_result.optimality_result.primary.value
             if(abs(family_value-assignment_value) < 1e-3):
@@ -175,8 +176,8 @@ def run(self, optimum_threshold=None, export_evaluation=None):
                 self.synthesize_tree(depth)
                 if self.global_resource_limit_reached():
                     break
-                if self.best_assignment is not None:
-                # if self.best_assignment_value is not None and abs( (self.best_assignment_value-mc_result.value)/mc_result.value ) <1e-2:
+                # if self.best_assignment is not None:
+                if self.best_assignment_value is not None and abs( (self.best_assignment_value-mc_result.value)/mc_result.value ) <1e-2:
                     break
 
         logger.info(f"the optimal scheduler has value: {mc_result}")

diff --git a/payntbind/src/synthesis/quotient/ColoringSmt.cpp b/payntbind/src/synthesis/quotient/ColoringSmt.cpp
@@ -318,7 +318,8 @@ BitVector ColoringSmt<ValueType>::selectCompatibleChoices(Family const& subfamil
         if(not any_choice_enabled) {
             if(subfamily.isAssignment()) {
                 // STORM_LOG_WARN("Hole assignment does not induce a DTMC, enabling first action...");
-                uint64_t choice = row_groups[state];
+                // uint64_t choice = row_groups[state]; // pick the first choice
+                uint64_t choice = row_groups[state+1]-1; // pick the last choice executing the random choice
                 selection.set(choice,true);
                 for(uint64_t dst: choice_destinations[choice]) {
                     if(not state_reached[dst]) {

diff --git a/payntbind/src/synthesis/translation/bindings.cpp b/payntbind/src/synthesis/translation/bindings.cpp
@@ -16,6 +16,7 @@ void bindings_translation(py::module& m) {
     m.def("extractActionLabels", &synthesis::extractActionLabels<double>);
     m.def("enableAllActions", py::overload_cast<storm::models::sparse::Model<double> const&>(&synthesis::enableAllActions<double>));
     m.def("restoreActionsInAbsorbingStates", &synthesis::restoreActionsInAbsorbingStates<double>);
+    m.def("addDontCareAction", &synthesis::addDontCareAction<double>);
 
     py::class_<synthesis::SubPomdpBuilder<double>, std::shared_ptr<synthesis::SubPomdpBuilder<double>>>(m, "SubPomdpBuilder")
         .def(py::init<storm::models::sparse::Pomdp<double> const&>())

diff --git a/payntbind/src/synthesis/translation/choiceTransformation.cpp b/payntbind/src/synthesis/translation/choiceTransformation.cpp
@@ -3,6 +3,7 @@
 #include "src/synthesis/translation/componentTranslations.h"
 
 #include <storm/exceptions/InvalidModelException.h>
+#include <storm/exceptions/UnexpectedException.h>
 #include <storm/exceptions/InvalidArgumentException.h>
 #include <storm/models/sparse/Pomdp.h>
 #include <storm/utility/builder.h>
@@ -323,6 +324,91 @@ std::shared_ptr<storm::models::sparse::Model<ValueType>> restoreActionsInAbsorbi
     return synthesis::removeAction(*model_absorbing_enabled, NO_ACTION_LABEL, absorbing_states);
 }
 
+template<typename ValueType>
+std::shared_ptr<storm::models::sparse::Model<ValueType>> addDontCareAction(
+    storm::models::sparse::Model<ValueType> const& model
+) {
+    auto [action_labels,choice_to_action] = synthesis::extractActionLabels<ValueType>(model);
+    std::string DONT_CARE_ACTION_LABEL = "__random__";
+    auto it = std::find(action_labels.begin(),action_labels.end(),DONT_CARE_ACTION_LABEL);
+    STORM_LOG_THROW(it == action_labels.end(), storm::exceptions::UnexpectedException,
+        "label " << DONT_CARE_ACTION_LABEL << " is already defined");
+    uint64_t num_actions = action_labels.size();
+    uint64_t num_states = model.getNumberOfStates();
+    uint64_t num_choices = model.getNumberOfChoices();
+
+    // for each action, find any choice that corresponds to this action
+    // we compute this to easily build choice labeling later
+    std::vector<uint64_t> action_reference_choice(num_actions);
+    for(uint64_t choice = 0; choice < num_choices; ++choice) {
+        action_reference_choice[choice_to_action[choice]] = choice;
+    }
+
+    // translate choices
+    std::vector<uint64_t> translated_to_original_choice;
+    std::vector<uint64_t> row_groups_new;
+    for(uint64_t state = 0; state < num_states; ++state) {
+        row_groups_new.push_back(translated_to_original_choice.size());
+        // copy existing choices
+        for(uint64_t choice: model.getTransitionMatrix().getRowGroupIndices(state)) {
+            translated_to_original_choice.push_back(choice);
+        }
+        // add don't care action
+        translated_to_original_choice.push_back(num_choices);
+    }
+    row_groups_new.push_back(translated_to_original_choice.size());
+    uint64_t num_translated_choices = translated_to_original_choice.size();
+    storm::storage::BitVector translated_choice_mask(num_translated_choices,false);
+    for(uint64_t translated_choice = 0; translated_choice < num_translated_choices; ++translated_choice) {
+        translated_choice_mask.set(translated_choice, translated_to_original_choice[translated_choice] < num_choices);
+    }
+
+    // build components
+    storm::storage::sparse::ModelComponents<ValueType> components = componentsFromModel(model);
+    components.choiceOrigins.reset();
+    auto choiceLabeling = synthesis::translateChoiceLabeling<ValueType>(model,translated_to_original_choice,translated_choice_mask);
+    choiceLabeling.addLabel(DONT_CARE_ACTION_LABEL, ~translated_choice_mask);
+    components.choiceLabeling = choiceLabeling;
+    storm::storage::SparseMatrixBuilder<ValueType> builder(num_translated_choices, num_states, 0, true, true, num_states);
+    for(uint64_t state = 0; state < num_states; ++state) {
+        builder.newRowGroup(row_groups_new[state]);
+        // copy existing choices
+        std::map<uint64_t,ValueType> dont_care_transitions;
+        uint64_t new_translated_choice = row_groups_new[state+1]-1;
+        uint64_t state_num_choices = new_translated_choice-row_groups_new[state];
+        for(uint64_t translated_choice = row_groups_new[state]; translated_choice < new_translated_choice; ++translated_choice) {
+            uint64_t choice = translated_to_original_choice[translated_choice];
+            for(auto entry: model.getTransitionMatrix().getRow(choice)) {
+                uint64_t dst = entry.getColumn();
+                ValueType prob = entry.getValue();
+                builder.addNextValue(translated_choice, dst, prob);
+                dont_care_transitions[dst] += prob/state_num_choices;
+            }
+        }
+        // add don't care action
+        for(auto [dst,prob]: dont_care_transitions) {
+            builder.addNextValue(new_translated_choice,dst,prob);
+        }
+    }
+    components.transitionMatrix =  builder.build();
+    auto rewardModels = synthesis::translateRewardModels(model,translated_to_original_choice,translated_choice_mask);
+    for(auto & [name,reward_model]: rewardModels) {
+        std::cout << "processing " << name  << std::endl;
+        std::vector<ValueType> & choice_reward = reward_model.getStateActionRewardVector();
+        ValueType reward_sum = 0;
+        for(uint64_t state = 0; state < num_states; ++state) {
+            uint64_t new_translated_choice = row_groups_new[state+1]-1;
+            uint64_t state_num_choices = new_translated_choice-row_groups_new[state];
+            for(uint64_t translated_choice = row_groups_new[state]; translated_choice < new_translated_choice; ++translated_choice) {
+                reward_sum += choice_reward[translated_choice];
+            }
+            choice_reward[new_translated_choice] = reward_sum / state_num_choices;
+        }
+    }
+    components.rewardModels = rewardModels;
+    return storm::utility::builder::buildModelFromComponents<ValueType,storm::models::sparse::StandardRewardModel<ValueType>>(model.getType(),std::move(components));
+}
+
 template std::vector<std::vector<uint64_t>> computeChoiceDestinations<double>(
     storm::models::sparse::Model<double> const& model);
 template std::pair<std::vector<std::string>,std::vector<uint64_t>> extractActionLabels<double>(
@@ -343,5 +429,7 @@ template std::shared_ptr<storm::models::sparse::Model<double>> removeAction<doub
     storm::storage::BitVector const& state_mask);
 template std::shared_ptr<storm::models::sparse::Model<double>> restoreActionsInAbsorbingStates<double>(
     storm::models::sparse::Model<double> const& model);
+template std::shared_ptr<storm::models::sparse::Model<double>> addDontCareAction<double>(
+    storm::models::sparse::Model<double> const& model);
 
 }
diff --git a/payntbind/src/synthesis/translation/choiceTransformation.h b/payntbind/src/synthesis/translation/choiceTransformation.h
@@ -100,4 +100,12 @@ std::shared_ptr<storm::models::sparse::Model<ValueType>> restoreActionsInAbsorbi
     storm::models::sparse::Model<ValueType> const& model
 );
 
+/**
+ * To every state of an MDP add an explicit action that executes a random choice between available actions.
+ */
+template<typename ValueType>
+std::shared_ptr<storm::models::sparse::Model<ValueType>> addDontCareAction(
+    storm::models::sparse::Model<ValueType> const& model
+);
+
 }