diff --git a/.gitignore b/.gitignore index 1082f85b0..fbcaf8f23 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ __pycache__/ *.egg-info/ -prerequisites/ +prerequisites/ \ No newline at end of file diff --git a/README.md b/README.md index 3f65127a7..49cd63dc0 100644 --- a/README.md +++ b/README.md @@ -75,13 +75,20 @@ Options associated with the synthesis of finite-state controllers (FSCs) for a P - ``--pomdp-memory-size INTEGER`` implicit memory size for POMDP FSCs [default: 1] - ``--fsc-synthesis``: enables incremental synthesis of FSCs for a POMDP using iterative exploration of k-FSCs - ``--posterior-aware``: enables the synthesis of posterior aware FSCs +- ``--native-discount``: runs synthesis with discounted model checking instead of performing model transformation -SAYNT [6] and Storm associated options (pomdp-api branch of Storm and Stormpy are needed): +SAYNT [6] and New SAYNT [CAV23SI] basic options: +- ``--saynt``: run SAYNT with default options (15min timeout, to adjust look at advanced SAYNT options) +- ``--new-saynt``: run SAYNT with computing multiple cut-off FSCs (15min timeout) + +Advanced SAYNT [6] and Storm associated options: - ``--storm-pomdp``: enables the use of Storm features, this flag is necessary for the other options in this section to work - ``--iterative-storm INTEGER INTEGER INTEGER``: runs the SAYNT algorithm, the parameters represent overall timeout, paynt timeout, storm timeout respectivelly. The recommended parameters for 15 minute runtime are 900 60 10 - ``--get-storm-result INTEGER``: runs PAYNT for specified amount of seconds and then runs Storm using the computed FSC at cut-offs - ``--storm-options [cutoff|clip2|clip4|overapp|5mil|10mil|20mil|refine]``: sets the options for Storm [default: ``cutoff``] - ``--prune-storm``: if enabled Storm results are used to prune the family of FSCs +- ``--saynt-threads INTEGER``: specifies the number of cut-off FSCs SAYNT should compute in one iteration, if set to 0 the number is chosen automatically based on the belief exploration +- ``--saynt-overapprox``: enables the use of overapproximations in choosing from what beliefs FSCs should be computed - ``--unfold-strategy-storm [paynt|storm|cutoff]``: sets how the memory is unfolded [default: ``storm``] - ``--use-storm-cutoffs``: if enabled the actions from cut-offs are considered in the prioritization and unfolding - ``--export-fsc-paynt PATH``: stores the best found FSC from PAYNT to specified file @@ -91,18 +98,19 @@ Other options: - ``--help``: shows the help message of the PAYNT and aborts - ``--export [drn|pomdp]``: exports the model to *.drn/*.pomdp and aborts - ``--incomplete-search``: uses incomplete search during synthesis +- ``--alpha_vector_analysis PATH``: takes alpha vector set from specified file and performs unrolling on the given POMDP using these alpha vectors to analyse their quality Here are various PAYNT calls: ```shell -python3 paynt.py --project models/cav21/maze --props hard.props -python3 paynt.py --project models/cav21/maze --props hard.props --method hybrid -python3 paynt.py --project models/pomdp/uai/grid-avoid-4-0 -python3 paynt.py --project models/pomdp/uai/grid-avoid-4-0 --pomdp-memory-size 2 -python3 paynt.py --project models/pomdp/uai/grid-avoid-4-0 --pomdp-memory-size 5 --method ar_multicore -timeout 10s python3 paynt.py --project models/pomdp/uai/grid-avoid-4-0 --fsc-synthesis -python3 paynt.py --project models/pomdp/storm-integration/4x3-95 --fsc-synthesis --storm-pomdp --iterative-storm 180 60 10 -python3 paynt.py --project models/pomdp/storm-integration/rocks-12 --fsc-synthesis --storm-pomdp --get-storm-result 0 +python3 paynt.py --project models/archive/cav21-paynt/maze --props hard.props +python3 paynt.py --project models/archive/cav21-paynt/maze --props hard.props --method hybrid +python3 paynt.py --project models/archive/uai22-pomdp/grid-avoid-4-0 +python3 paynt.py --project models/archive/uai22-pomdp/grid-avoid-4-0 --pomdp-memory-size 2 +python3 paynt.py --project models/archive/uai22-pomdp/grid-avoid-4-0 --pomdp-memory-size 5 --method ar_multicore +timeout 10s python3 paynt.py --project models/archive/uai22-pomdp/grid-avoid-4-0 --fsc-synthesis +python3 paynt.py --project models/archive/cav23-saynt/4x3-95 --saynt --iterative-storm 180 60 10 +python3 paynt.py --project models/archive/cav23-saynt/rocks-12 --fsc-synthesis --storm-pomdp --get-storm-result 0 ``` The Python environment can be deactivated by runnning diff --git a/models/archive/cav23-saynt/long-traces/sketch.pomdp b/models/archive/cav23-saynt/long-traces/sketch.pomdp new file mode 100644 index 000000000..c79021f72 --- /dev/null +++ b/models/archive/cav23-saynt/long-traces/sketch.pomdp @@ -0,0 +1,851 @@ +# auto-generated from PRISM program + +discount: 0.95 +values: reward +states: 207 +actions: 3 +observations: 5 + +start: +1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +# transition matrix + +T: 0 : 0 : 1 1.0 +T: 1 : 0 : 2 1.0 +T: 2 : 0 : 1 1.0 +T: 0 : 1 : 3 1.0 +T: 1 : 1 : 0 1.0 +T: 2 : 1 : 3 1.0 +T: 0 : 2 : 4 1.0 +T: 1 : 2 : 5 1.0 +T: 2 : 2 : 4 1.0 +T: 0 : 3 : 6 1.0 +T: 1 : 3 : 1 1.0 +T: 2 : 3 : 6 1.0 +T: 0 : 4 : 7 1.0 +T: 1 : 4 : 2 1.0 +T: 2 : 4 : 7 1.0 +T: 0 : 5 : 1 1.0 +T: 1 : 5 : 2 1.0 +T: 2 : 5 : 1 1.0 +T: 0 : 6 : 8 1.0 +T: 1 : 6 : 3 1.0 +T: 2 : 6 : 8 1.0 +T: 0 : 7 : 9 1.0 +T: 1 : 7 : 4 1.0 +T: 2 : 7 : 9 1.0 +T: 0 : 8 : 10 1.0 +T: 1 : 8 : 6 1.0 +T: 2 : 8 : 10 1.0 +T: 0 : 9 : 11 1.0 +T: 1 : 9 : 7 1.0 +T: 2 : 9 : 11 1.0 +T: 0 : 10 : 12 1.0 +T: 1 : 10 : 8 1.0 +T: 2 : 10 : 12 1.0 +T: 0 : 11 : 13 1.0 +T: 1 : 11 : 9 1.0 +T: 2 : 11 : 13 1.0 +T: 0 : 12 : 14 1.0 +T: 1 : 12 : 10 1.0 +T: 2 : 12 : 14 1.0 +T: 0 : 13 : 15 1.0 +T: 1 : 13 : 11 1.0 +T: 2 : 13 : 15 1.0 +T: 0 : 14 : 16 1.0 +T: 1 : 14 : 12 1.0 +T: 2 : 14 : 16 1.0 +T: 0 : 15 : 17 1.0 +T: 1 : 15 : 13 1.0 +T: 2 : 15 : 17 1.0 +T: 0 : 16 : 18 1.0 +T: 1 : 16 : 14 1.0 +T: 2 : 16 : 18 1.0 +T: 0 : 17 : 19 1.0 +T: 1 : 17 : 15 1.0 +T: 2 : 17 : 19 1.0 +T: 0 : 18 : 20 1.0 +T: 1 : 18 : 16 1.0 +T: 2 : 18 : 20 1.0 +T: 0 : 19 : 21 1.0 +T: 1 : 19 : 17 1.0 +T: 2 : 19 : 21 1.0 +T: 0 : 20 : 22 1.0 +T: 1 : 20 : 18 1.0 +T: 2 : 20 : 23 0.1 +T: 2 : 20 : 24 0.9 +T: 0 : 21 : 25 1.0 +T: 1 : 21 : 19 1.0 +T: 2 : 21 : 25 1.0 +T: 0 : 22 : 26 1.0 +T: 1 : 22 : 20 1.0 +T: 2 : 22 : 26 1.0 +T: 0 : 23 : 206 1.0 +T: 1 : 23 : 206 1.0 +T: 2 : 23 : 206 1.0 +T: 0 : 24 : 24 1.0 +T: 1 : 24 : 24 1.0 +T: 2 : 24 : 24 1.0 +T: 0 : 25 : 27 1.0 +T: 1 : 25 : 21 1.0 +T: 2 : 25 : 27 1.0 +T: 0 : 26 : 28 1.0 +T: 1 : 26 : 22 1.0 +T: 2 : 26 : 28 1.0 +T: 0 : 27 : 29 1.0 +T: 1 : 27 : 25 1.0 +T: 2 : 27 : 29 1.0 +T: 0 : 28 : 30 1.0 +T: 1 : 28 : 26 1.0 +T: 2 : 28 : 30 1.0 +T: 0 : 29 : 31 1.0 +T: 1 : 29 : 27 1.0 +T: 2 : 29 : 31 1.0 +T: 0 : 30 : 32 1.0 +T: 1 : 30 : 28 1.0 +T: 2 : 30 : 32 1.0 +T: 0 : 31 : 33 1.0 +T: 1 : 31 : 29 1.0 +T: 2 : 31 : 33 1.0 +T: 0 : 32 : 34 1.0 +T: 1 : 32 : 30 1.0 +T: 2 : 32 : 34 1.0 +T: 0 : 33 : 35 1.0 +T: 1 : 33 : 31 1.0 +T: 2 : 33 : 35 1.0 +T: 0 : 34 : 36 1.0 +T: 1 : 34 : 32 1.0 +T: 2 : 34 : 36 1.0 +T: 0 : 35 : 37 1.0 +T: 1 : 35 : 33 1.0 +T: 2 : 35 : 37 1.0 +T: 0 : 36 : 38 1.0 +T: 1 : 36 : 34 1.0 +T: 2 : 36 : 38 1.0 +T: 0 : 37 : 39 1.0 +T: 1 : 37 : 35 1.0 +T: 2 : 37 : 39 1.0 +T: 0 : 38 : 40 1.0 +T: 1 : 38 : 36 1.0 +T: 2 : 38 : 40 1.0 +T: 0 : 39 : 41 1.0 +T: 1 : 39 : 37 1.0 +T: 2 : 39 : 41 1.0 +T: 0 : 40 : 42 1.0 +T: 1 : 40 : 38 1.0 +T: 2 : 40 : 42 1.0 +T: 0 : 41 : 43 1.0 +T: 1 : 41 : 39 1.0 +T: 2 : 41 : 43 1.0 +T: 0 : 42 : 44 1.0 +T: 1 : 42 : 40 1.0 +T: 2 : 42 : 44 1.0 +T: 0 : 43 : 45 1.0 +T: 1 : 43 : 41 1.0 +T: 2 : 43 : 45 1.0 +T: 0 : 44 : 46 1.0 +T: 1 : 44 : 42 1.0 +T: 2 : 44 : 46 1.0 +T: 0 : 45 : 47 1.0 +T: 1 : 45 : 43 1.0 +T: 2 : 45 : 47 1.0 +T: 0 : 46 : 48 1.0 +T: 1 : 46 : 44 1.0 +T: 2 : 46 : 48 1.0 +T: 0 : 47 : 49 1.0 +T: 1 : 47 : 45 1.0 +T: 2 : 47 : 49 1.0 +T: 0 : 48 : 50 1.0 +T: 1 : 48 : 46 1.0 +T: 2 : 48 : 50 1.0 +T: 0 : 49 : 51 1.0 +T: 1 : 49 : 47 1.0 +T: 2 : 49 : 51 1.0 +T: 0 : 50 : 52 1.0 +T: 1 : 50 : 48 1.0 +T: 2 : 50 : 52 1.0 +T: 0 : 51 : 53 1.0 +T: 1 : 51 : 49 1.0 +T: 2 : 51 : 53 1.0 +T: 0 : 52 : 54 1.0 +T: 1 : 52 : 50 1.0 +T: 2 : 52 : 54 1.0 +T: 0 : 53 : 55 1.0 +T: 1 : 53 : 51 1.0 +T: 2 : 53 : 55 1.0 +T: 0 : 54 : 56 1.0 +T: 1 : 54 : 52 1.0 +T: 2 : 54 : 56 1.0 +T: 0 : 55 : 57 1.0 +T: 1 : 55 : 53 1.0 +T: 2 : 55 : 57 1.0 +T: 0 : 56 : 58 1.0 +T: 1 : 56 : 54 1.0 +T: 2 : 56 : 58 1.0 +T: 0 : 57 : 59 1.0 +T: 1 : 57 : 55 1.0 +T: 2 : 57 : 59 1.0 +T: 0 : 58 : 60 1.0 +T: 1 : 58 : 56 1.0 +T: 2 : 58 : 60 1.0 +T: 0 : 59 : 61 1.0 +T: 1 : 59 : 57 1.0 +T: 2 : 59 : 61 1.0 +T: 0 : 60 : 62 1.0 +T: 1 : 60 : 58 1.0 +T: 2 : 60 : 62 1.0 +T: 0 : 61 : 63 1.0 +T: 1 : 61 : 59 1.0 +T: 2 : 61 : 63 1.0 +T: 0 : 62 : 64 1.0 +T: 1 : 62 : 60 1.0 +T: 2 : 62 : 64 1.0 +T: 0 : 63 : 65 1.0 +T: 1 : 63 : 61 1.0 +T: 2 : 63 : 65 1.0 +T: 0 : 64 : 66 1.0 +T: 1 : 64 : 62 1.0 +T: 2 : 64 : 66 1.0 +T: 0 : 65 : 67 1.0 +T: 1 : 65 : 63 1.0 +T: 2 : 65 : 67 1.0 +T: 0 : 66 : 68 1.0 +T: 1 : 66 : 64 1.0 +T: 2 : 66 : 68 1.0 +T: 0 : 67 : 69 1.0 +T: 1 : 67 : 65 1.0 +T: 2 : 67 : 69 1.0 +T: 0 : 68 : 70 1.0 +T: 1 : 68 : 66 1.0 +T: 2 : 68 : 70 1.0 +T: 0 : 69 : 71 1.0 +T: 1 : 69 : 67 1.0 +T: 2 : 69 : 71 1.0 +T: 0 : 70 : 72 1.0 +T: 1 : 70 : 68 1.0 +T: 2 : 70 : 72 1.0 +T: 0 : 71 : 73 1.0 +T: 1 : 71 : 69 1.0 +T: 2 : 71 : 73 1.0 +T: 0 : 72 : 74 1.0 +T: 1 : 72 : 70 1.0 +T: 2 : 72 : 74 1.0 +T: 0 : 73 : 75 1.0 +T: 1 : 73 : 71 1.0 +T: 2 : 73 : 75 1.0 +T: 0 : 74 : 76 1.0 +T: 1 : 74 : 72 1.0 +T: 2 : 74 : 76 1.0 +T: 0 : 75 : 77 1.0 +T: 1 : 75 : 73 1.0 +T: 2 : 75 : 77 1.0 +T: 0 : 76 : 78 1.0 +T: 1 : 76 : 74 1.0 +T: 2 : 76 : 78 1.0 +T: 0 : 77 : 79 1.0 +T: 1 : 77 : 75 1.0 +T: 2 : 77 : 79 1.0 +T: 0 : 78 : 80 1.0 +T: 1 : 78 : 76 1.0 +T: 2 : 78 : 80 1.0 +T: 0 : 79 : 81 1.0 +T: 1 : 79 : 77 1.0 +T: 2 : 79 : 81 1.0 +T: 0 : 80 : 82 1.0 +T: 1 : 80 : 78 1.0 +T: 2 : 80 : 82 1.0 +T: 0 : 81 : 83 1.0 +T: 1 : 81 : 79 1.0 +T: 2 : 81 : 83 1.0 +T: 0 : 82 : 84 1.0 +T: 1 : 82 : 80 1.0 +T: 2 : 82 : 84 1.0 +T: 0 : 83 : 85 1.0 +T: 1 : 83 : 81 1.0 +T: 2 : 83 : 85 1.0 +T: 0 : 84 : 86 1.0 +T: 1 : 84 : 82 1.0 +T: 2 : 84 : 86 1.0 +T: 0 : 85 : 87 1.0 +T: 1 : 85 : 83 1.0 +T: 2 : 85 : 87 1.0 +T: 0 : 86 : 88 1.0 +T: 1 : 86 : 84 1.0 +T: 2 : 86 : 88 1.0 +T: 0 : 87 : 89 1.0 +T: 1 : 87 : 85 1.0 +T: 2 : 87 : 89 1.0 +T: 0 : 88 : 90 1.0 +T: 1 : 88 : 86 1.0 +T: 2 : 88 : 90 1.0 +T: 0 : 89 : 91 1.0 +T: 1 : 89 : 87 1.0 +T: 2 : 89 : 91 1.0 +T: 0 : 90 : 92 1.0 +T: 1 : 90 : 88 1.0 +T: 2 : 90 : 92 1.0 +T: 0 : 91 : 93 1.0 +T: 1 : 91 : 89 1.0 +T: 2 : 91 : 93 1.0 +T: 0 : 92 : 94 1.0 +T: 1 : 92 : 90 1.0 +T: 2 : 92 : 94 1.0 +T: 0 : 93 : 95 1.0 +T: 1 : 93 : 91 1.0 +T: 2 : 93 : 95 1.0 +T: 0 : 94 : 96 1.0 +T: 1 : 94 : 92 1.0 +T: 2 : 94 : 96 1.0 +T: 0 : 95 : 97 1.0 +T: 1 : 95 : 93 1.0 +T: 2 : 95 : 97 1.0 +T: 0 : 96 : 98 1.0 +T: 1 : 96 : 94 1.0 +T: 2 : 96 : 98 1.0 +T: 0 : 97 : 99 1.0 +T: 1 : 97 : 95 1.0 +T: 2 : 97 : 99 1.0 +T: 0 : 98 : 100 1.0 +T: 1 : 98 : 96 1.0 +T: 2 : 98 : 100 1.0 +T: 0 : 99 : 101 1.0 +T: 1 : 99 : 97 1.0 +T: 2 : 99 : 101 1.0 +T: 0 : 100 : 102 1.0 +T: 1 : 100 : 98 1.0 +T: 2 : 100 : 102 1.0 +T: 0 : 101 : 103 1.0 +T: 1 : 101 : 99 1.0 +T: 2 : 101 : 103 1.0 +T: 0 : 102 : 104 1.0 +T: 1 : 102 : 100 1.0 +T: 2 : 102 : 104 1.0 +T: 0 : 103 : 105 1.0 +T: 1 : 103 : 101 1.0 +T: 2 : 103 : 105 1.0 +T: 0 : 104 : 106 1.0 +T: 1 : 104 : 102 1.0 +T: 2 : 104 : 106 1.0 +T: 0 : 105 : 107 1.0 +T: 1 : 105 : 103 1.0 +T: 2 : 105 : 107 1.0 +T: 0 : 106 : 108 1.0 +T: 1 : 106 : 104 1.0 +T: 2 : 106 : 108 1.0 +T: 0 : 107 : 109 1.0 +T: 1 : 107 : 105 1.0 +T: 2 : 107 : 109 1.0 +T: 0 : 108 : 110 1.0 +T: 1 : 108 : 106 1.0 +T: 2 : 108 : 110 1.0 +T: 0 : 109 : 111 1.0 +T: 1 : 109 : 107 1.0 +T: 2 : 109 : 111 1.0 +T: 0 : 110 : 112 1.0 +T: 1 : 110 : 108 1.0 +T: 2 : 110 : 112 1.0 +T: 0 : 111 : 113 1.0 +T: 1 : 111 : 109 1.0 +T: 2 : 111 : 113 1.0 +T: 0 : 112 : 114 1.0 +T: 1 : 112 : 110 1.0 +T: 2 : 112 : 114 1.0 +T: 0 : 113 : 115 1.0 +T: 1 : 113 : 111 1.0 +T: 2 : 113 : 115 1.0 +T: 0 : 114 : 116 1.0 +T: 1 : 114 : 112 1.0 +T: 2 : 114 : 116 1.0 +T: 0 : 115 : 117 1.0 +T: 1 : 115 : 113 1.0 +T: 2 : 115 : 117 1.0 +T: 0 : 116 : 118 1.0 +T: 1 : 116 : 114 1.0 +T: 2 : 116 : 118 1.0 +T: 0 : 117 : 119 1.0 +T: 1 : 117 : 115 1.0 +T: 2 : 117 : 119 1.0 +T: 0 : 118 : 120 1.0 +T: 1 : 118 : 116 1.0 +T: 2 : 118 : 120 1.0 +T: 0 : 119 : 121 1.0 +T: 1 : 119 : 117 1.0 +T: 2 : 119 : 121 1.0 +T: 0 : 120 : 122 1.0 +T: 1 : 120 : 118 1.0 +T: 2 : 120 : 122 1.0 +T: 0 : 121 : 123 1.0 +T: 1 : 121 : 119 1.0 +T: 2 : 121 : 123 1.0 +T: 0 : 122 : 124 1.0 +T: 1 : 122 : 120 1.0 +T: 2 : 122 : 124 1.0 +T: 0 : 123 : 125 1.0 +T: 1 : 123 : 121 1.0 +T: 2 : 123 : 125 1.0 +T: 0 : 124 : 126 1.0 +T: 1 : 124 : 122 1.0 +T: 2 : 124 : 126 1.0 +T: 0 : 125 : 127 1.0 +T: 1 : 125 : 123 1.0 +T: 2 : 125 : 127 1.0 +T: 0 : 126 : 128 1.0 +T: 1 : 126 : 124 1.0 +T: 2 : 126 : 128 1.0 +T: 0 : 127 : 129 1.0 +T: 1 : 127 : 125 1.0 +T: 2 : 127 : 129 1.0 +T: 0 : 128 : 130 1.0 +T: 1 : 128 : 126 1.0 +T: 2 : 128 : 130 1.0 +T: 0 : 129 : 131 1.0 +T: 1 : 129 : 127 1.0 +T: 2 : 129 : 131 1.0 +T: 0 : 130 : 132 1.0 +T: 1 : 130 : 128 1.0 +T: 2 : 130 : 132 1.0 +T: 0 : 131 : 133 1.0 +T: 1 : 131 : 129 1.0 +T: 2 : 131 : 133 1.0 +T: 0 : 132 : 134 1.0 +T: 1 : 132 : 130 1.0 +T: 2 : 132 : 134 1.0 +T: 0 : 133 : 135 1.0 +T: 1 : 133 : 131 1.0 +T: 2 : 133 : 135 1.0 +T: 0 : 134 : 136 1.0 +T: 1 : 134 : 132 1.0 +T: 2 : 134 : 136 1.0 +T: 0 : 135 : 137 1.0 +T: 1 : 135 : 133 1.0 +T: 2 : 135 : 137 1.0 +T: 0 : 136 : 138 1.0 +T: 1 : 136 : 134 1.0 +T: 2 : 136 : 138 1.0 +T: 0 : 137 : 139 1.0 +T: 1 : 137 : 135 1.0 +T: 2 : 137 : 139 1.0 +T: 0 : 138 : 140 1.0 +T: 1 : 138 : 136 1.0 +T: 2 : 138 : 140 1.0 +T: 0 : 139 : 141 1.0 +T: 1 : 139 : 137 1.0 +T: 2 : 139 : 141 1.0 +T: 0 : 140 : 142 1.0 +T: 1 : 140 : 138 1.0 +T: 2 : 140 : 142 1.0 +T: 0 : 141 : 143 1.0 +T: 1 : 141 : 139 1.0 +T: 2 : 141 : 143 1.0 +T: 0 : 142 : 144 1.0 +T: 1 : 142 : 140 1.0 +T: 2 : 142 : 144 1.0 +T: 0 : 143 : 145 1.0 +T: 1 : 143 : 141 1.0 +T: 2 : 143 : 145 1.0 +T: 0 : 144 : 146 1.0 +T: 1 : 144 : 142 1.0 +T: 2 : 144 : 146 1.0 +T: 0 : 145 : 147 1.0 +T: 1 : 145 : 143 1.0 +T: 2 : 145 : 147 1.0 +T: 0 : 146 : 148 1.0 +T: 1 : 146 : 144 1.0 +T: 2 : 146 : 148 1.0 +T: 0 : 147 : 149 1.0 +T: 1 : 147 : 145 1.0 +T: 2 : 147 : 149 1.0 +T: 0 : 148 : 150 1.0 +T: 1 : 148 : 146 1.0 +T: 2 : 148 : 150 1.0 +T: 0 : 149 : 151 1.0 +T: 1 : 149 : 147 1.0 +T: 2 : 149 : 151 1.0 +T: 0 : 150 : 152 1.0 +T: 1 : 150 : 148 1.0 +T: 2 : 150 : 152 1.0 +T: 0 : 151 : 153 1.0 +T: 1 : 151 : 149 1.0 +T: 2 : 151 : 153 1.0 +T: 0 : 152 : 154 1.0 +T: 1 : 152 : 150 1.0 +T: 2 : 152 : 154 1.0 +T: 0 : 153 : 155 1.0 +T: 1 : 153 : 151 1.0 +T: 2 : 153 : 155 1.0 +T: 0 : 154 : 156 1.0 +T: 1 : 154 : 152 1.0 +T: 2 : 154 : 156 1.0 +T: 0 : 155 : 157 1.0 +T: 1 : 155 : 153 1.0 +T: 2 : 155 : 157 1.0 +T: 0 : 156 : 158 1.0 +T: 1 : 156 : 154 1.0 +T: 2 : 156 : 158 1.0 +T: 0 : 157 : 159 1.0 +T: 1 : 157 : 155 1.0 +T: 2 : 157 : 159 1.0 +T: 0 : 158 : 160 1.0 +T: 1 : 158 : 156 1.0 +T: 2 : 158 : 160 1.0 +T: 0 : 159 : 161 1.0 +T: 1 : 159 : 157 1.0 +T: 2 : 159 : 161 1.0 +T: 0 : 160 : 162 1.0 +T: 1 : 160 : 158 1.0 +T: 2 : 160 : 162 1.0 +T: 0 : 161 : 163 1.0 +T: 1 : 161 : 159 1.0 +T: 2 : 161 : 163 1.0 +T: 0 : 162 : 164 1.0 +T: 1 : 162 : 160 1.0 +T: 2 : 162 : 164 1.0 +T: 0 : 163 : 165 1.0 +T: 1 : 163 : 161 1.0 +T: 2 : 163 : 165 1.0 +T: 0 : 164 : 166 1.0 +T: 1 : 164 : 162 1.0 +T: 2 : 164 : 166 1.0 +T: 0 : 165 : 167 1.0 +T: 1 : 165 : 163 1.0 +T: 2 : 165 : 167 1.0 +T: 0 : 166 : 168 1.0 +T: 1 : 166 : 164 1.0 +T: 2 : 166 : 168 1.0 +T: 0 : 167 : 169 1.0 +T: 1 : 167 : 165 1.0 +T: 2 : 167 : 169 1.0 +T: 0 : 168 : 170 1.0 +T: 1 : 168 : 166 1.0 +T: 2 : 168 : 170 1.0 +T: 0 : 169 : 171 1.0 +T: 1 : 169 : 167 1.0 +T: 2 : 169 : 171 1.0 +T: 0 : 170 : 172 1.0 +T: 1 : 170 : 168 1.0 +T: 2 : 170 : 172 1.0 +T: 0 : 171 : 173 1.0 +T: 1 : 171 : 169 1.0 +T: 2 : 171 : 173 1.0 +T: 0 : 172 : 174 1.0 +T: 1 : 172 : 170 1.0 +T: 2 : 172 : 174 1.0 +T: 0 : 173 : 175 1.0 +T: 1 : 173 : 171 1.0 +T: 2 : 173 : 175 1.0 +T: 0 : 174 : 176 1.0 +T: 1 : 174 : 172 1.0 +T: 2 : 174 : 176 1.0 +T: 0 : 175 : 177 1.0 +T: 1 : 175 : 173 1.0 +T: 2 : 175 : 177 1.0 +T: 0 : 176 : 178 1.0 +T: 1 : 176 : 174 1.0 +T: 2 : 176 : 178 1.0 +T: 0 : 177 : 179 1.0 +T: 1 : 177 : 175 1.0 +T: 2 : 177 : 179 1.0 +T: 0 : 178 : 180 1.0 +T: 1 : 178 : 176 1.0 +T: 2 : 178 : 180 1.0 +T: 0 : 179 : 181 1.0 +T: 1 : 179 : 177 1.0 +T: 2 : 179 : 181 1.0 +T: 0 : 180 : 182 1.0 +T: 1 : 180 : 178 1.0 +T: 2 : 180 : 182 1.0 +T: 0 : 181 : 183 1.0 +T: 1 : 181 : 179 1.0 +T: 2 : 181 : 183 1.0 +T: 0 : 182 : 184 1.0 +T: 1 : 182 : 180 1.0 +T: 2 : 182 : 184 1.0 +T: 0 : 183 : 185 1.0 +T: 1 : 183 : 181 1.0 +T: 2 : 183 : 185 1.0 +T: 0 : 184 : 186 1.0 +T: 1 : 184 : 182 1.0 +T: 2 : 184 : 186 1.0 +T: 0 : 185 : 187 1.0 +T: 1 : 185 : 183 1.0 +T: 2 : 185 : 187 1.0 +T: 0 : 186 : 188 1.0 +T: 1 : 186 : 184 1.0 +T: 2 : 186 : 188 1.0 +T: 0 : 187 : 189 1.0 +T: 1 : 187 : 185 1.0 +T: 2 : 187 : 189 1.0 +T: 0 : 188 : 190 1.0 +T: 1 : 188 : 186 1.0 +T: 2 : 188 : 190 1.0 +T: 0 : 189 : 191 1.0 +T: 1 : 189 : 187 1.0 +T: 2 : 189 : 191 1.0 +T: 0 : 190 : 192 1.0 +T: 1 : 190 : 188 1.0 +T: 2 : 190 : 192 1.0 +T: 0 : 191 : 193 1.0 +T: 1 : 191 : 189 1.0 +T: 2 : 191 : 193 1.0 +T: 0 : 192 : 194 1.0 +T: 1 : 192 : 190 1.0 +T: 2 : 192 : 194 1.0 +T: 0 : 193 : 195 1.0 +T: 1 : 193 : 191 1.0 +T: 2 : 193 : 195 1.0 +T: 0 : 194 : 196 1.0 +T: 1 : 194 : 192 1.0 +T: 2 : 194 : 196 1.0 +T: 0 : 195 : 197 1.0 +T: 1 : 195 : 193 1.0 +T: 2 : 195 : 197 1.0 +T: 0 : 196 : 198 1.0 +T: 1 : 196 : 194 1.0 +T: 2 : 196 : 198 1.0 +T: 0 : 197 : 199 1.0 +T: 1 : 197 : 195 1.0 +T: 2 : 197 : 199 1.0 +T: 0 : 198 : 200 1.0 +T: 1 : 198 : 196 1.0 +T: 2 : 198 : 200 1.0 +T: 0 : 199 : 201 1.0 +T: 1 : 199 : 197 1.0 +T: 2 : 199 : 201 1.0 +T: 0 : 200 : 202 1.0 +T: 1 : 200 : 198 1.0 +T: 2 : 200 : 202 1.0 +T: 0 : 201 : 203 1.0 +T: 1 : 201 : 199 1.0 +T: 2 : 201 : 203 1.0 +T: 0 : 202 : 24 1.0 +T: 1 : 202 : 200 1.0 +T: 2 : 202 : 24 1.0 +T: 0 : 203 : 204 1.0 +T: 1 : 203 : 201 1.0 +T: 2 : 203 : 204 0.5 +T: 2 : 203 : 205 0.5 +T: 0 : 204 : 204 1.0 +T: 1 : 204 : 204 1.0 +T: 2 : 204 : 204 1.0 +T: 0 : 205 : 206 1.0 +T: 1 : 205 : 206 1.0 +T: 2 : 205 : 206 1.0 +T: 0 : 206 : 206 1.0 +T: 1 : 206 : 206 1.0 +T: 2 : 206 : 206 1.0 + +# observations + +O: * : 0 : 1 1.0 +O: * : 1 : 0 1.0 +O: * : 2 : 0 1.0 +O: * : 3 : 0 1.0 +O: * : 4 : 0 1.0 +O: * : 5 : 1 1.0 +O: * : 6 : 0 1.0 +O: * : 7 : 0 1.0 +O: * : 8 : 0 1.0 +O: * : 9 : 0 1.0 +O: * : 10 : 0 1.0 +O: * : 11 : 0 1.0 +O: * : 12 : 0 1.0 +O: * : 13 : 0 1.0 +O: * : 14 : 0 1.0 +O: * : 15 : 0 1.0 +O: * : 16 : 0 1.0 +O: * : 17 : 0 1.0 +O: * : 18 : 0 1.0 +O: * : 19 : 0 1.0 +O: * : 20 : 2 1.0 +O: * : 21 : 0 1.0 +O: * : 22 : 0 1.0 +O: * : 23 : 3 1.0 +O: * : 24 : 4 1.0 +O: * : 25 : 0 1.0 +O: * : 26 : 0 1.0 +O: * : 27 : 0 1.0 +O: * : 28 : 0 1.0 +O: * : 29 : 0 1.0 +O: * : 30 : 0 1.0 +O: * : 31 : 0 1.0 +O: * : 32 : 0 1.0 +O: * : 33 : 0 1.0 +O: * : 34 : 0 1.0 +O: * : 35 : 0 1.0 +O: * : 36 : 0 1.0 +O: * : 37 : 0 1.0 +O: * : 38 : 0 1.0 +O: * : 39 : 0 1.0 +O: * : 40 : 0 1.0 +O: * : 41 : 0 1.0 +O: * : 42 : 0 1.0 +O: * : 43 : 0 1.0 +O: * : 44 : 0 1.0 +O: * : 45 : 0 1.0 +O: * : 46 : 0 1.0 +O: * : 47 : 0 1.0 +O: * : 48 : 0 1.0 +O: * : 49 : 0 1.0 +O: * : 50 : 0 1.0 +O: * : 51 : 0 1.0 +O: * : 52 : 0 1.0 +O: * : 53 : 0 1.0 +O: * : 54 : 0 1.0 +O: * : 55 : 0 1.0 +O: * : 56 : 0 1.0 +O: * : 57 : 0 1.0 +O: * : 58 : 0 1.0 +O: * : 59 : 0 1.0 +O: * : 60 : 0 1.0 +O: * : 61 : 0 1.0 +O: * : 62 : 0 1.0 +O: * : 63 : 0 1.0 +O: * : 64 : 0 1.0 +O: * : 65 : 0 1.0 +O: * : 66 : 0 1.0 +O: * : 67 : 0 1.0 +O: * : 68 : 0 1.0 +O: * : 69 : 0 1.0 +O: * : 70 : 0 1.0 +O: * : 71 : 0 1.0 +O: * : 72 : 0 1.0 +O: * : 73 : 0 1.0 +O: * : 74 : 0 1.0 +O: * : 75 : 0 1.0 +O: * : 76 : 0 1.0 +O: * : 77 : 0 1.0 +O: * : 78 : 0 1.0 +O: * : 79 : 0 1.0 +O: * : 80 : 0 1.0 +O: * : 81 : 0 1.0 +O: * : 82 : 0 1.0 +O: * : 83 : 0 1.0 +O: * : 84 : 0 1.0 +O: * : 85 : 0 1.0 +O: * : 86 : 0 1.0 +O: * : 87 : 0 1.0 +O: * : 88 : 0 1.0 +O: * : 89 : 0 1.0 +O: * : 90 : 0 1.0 +O: * : 91 : 0 1.0 +O: * : 92 : 0 1.0 +O: * : 93 : 0 1.0 +O: * : 94 : 0 1.0 +O: * : 95 : 0 1.0 +O: * : 96 : 0 1.0 +O: * : 97 : 0 1.0 +O: * : 98 : 0 1.0 +O: * : 99 : 0 1.0 +O: * : 100 : 0 1.0 +O: * : 101 : 0 1.0 +O: * : 102 : 0 1.0 +O: * : 103 : 0 1.0 +O: * : 104 : 0 1.0 +O: * : 105 : 0 1.0 +O: * : 106 : 0 1.0 +O: * : 107 : 0 1.0 +O: * : 108 : 0 1.0 +O: * : 109 : 0 1.0 +O: * : 110 : 0 1.0 +O: * : 111 : 0 1.0 +O: * : 112 : 0 1.0 +O: * : 113 : 0 1.0 +O: * : 114 : 0 1.0 +O: * : 115 : 0 1.0 +O: * : 116 : 0 1.0 +O: * : 117 : 0 1.0 +O: * : 118 : 0 1.0 +O: * : 119 : 0 1.0 +O: * : 120 : 0 1.0 +O: * : 121 : 0 1.0 +O: * : 122 : 0 1.0 +O: * : 123 : 0 1.0 +O: * : 124 : 0 1.0 +O: * : 125 : 0 1.0 +O: * : 126 : 0 1.0 +O: * : 127 : 0 1.0 +O: * : 128 : 0 1.0 +O: * : 129 : 0 1.0 +O: * : 130 : 0 1.0 +O: * : 131 : 0 1.0 +O: * : 132 : 0 1.0 +O: * : 133 : 0 1.0 +O: * : 134 : 0 1.0 +O: * : 135 : 0 1.0 +O: * : 136 : 0 1.0 +O: * : 137 : 0 1.0 +O: * : 138 : 0 1.0 +O: * : 139 : 0 1.0 +O: * : 140 : 0 1.0 +O: * : 141 : 0 1.0 +O: * : 142 : 0 1.0 +O: * : 143 : 0 1.0 +O: * : 144 : 0 1.0 +O: * : 145 : 0 1.0 +O: * : 146 : 0 1.0 +O: * : 147 : 0 1.0 +O: * : 148 : 0 1.0 +O: * : 149 : 0 1.0 +O: * : 150 : 0 1.0 +O: * : 151 : 0 1.0 +O: * : 152 : 0 1.0 +O: * : 153 : 0 1.0 +O: * : 154 : 0 1.0 +O: * : 155 : 0 1.0 +O: * : 156 : 0 1.0 +O: * : 157 : 0 1.0 +O: * : 158 : 0 1.0 +O: * : 159 : 0 1.0 +O: * : 160 : 0 1.0 +O: * : 161 : 0 1.0 +O: * : 162 : 0 1.0 +O: * : 163 : 0 1.0 +O: * : 164 : 0 1.0 +O: * : 165 : 0 1.0 +O: * : 166 : 0 1.0 +O: * : 167 : 0 1.0 +O: * : 168 : 0 1.0 +O: * : 169 : 0 1.0 +O: * : 170 : 0 1.0 +O: * : 171 : 0 1.0 +O: * : 172 : 0 1.0 +O: * : 173 : 0 1.0 +O: * : 174 : 0 1.0 +O: * : 175 : 0 1.0 +O: * : 176 : 0 1.0 +O: * : 177 : 0 1.0 +O: * : 178 : 0 1.0 +O: * : 179 : 0 1.0 +O: * : 180 : 0 1.0 +O: * : 181 : 0 1.0 +O: * : 182 : 0 1.0 +O: * : 183 : 0 1.0 +O: * : 184 : 0 1.0 +O: * : 185 : 0 1.0 +O: * : 186 : 0 1.0 +O: * : 187 : 0 1.0 +O: * : 188 : 0 1.0 +O: * : 189 : 0 1.0 +O: * : 190 : 0 1.0 +O: * : 191 : 0 1.0 +O: * : 192 : 0 1.0 +O: * : 193 : 0 1.0 +O: * : 194 : 0 1.0 +O: * : 195 : 0 1.0 +O: * : 196 : 0 1.0 +O: * : 197 : 0 1.0 +O: * : 198 : 0 1.0 +O: * : 199 : 0 1.0 +O: * : 200 : 0 1.0 +O: * : 201 : 0 1.0 +O: * : 202 : 0 1.0 +O: * : 203 : 2 1.0 +O: * : 204 : 4 1.0 +O: * : 205 : 3 1.0 +O: * : 206 : 0 1.0 + +# rewards from reachability + +R: * : * : 23 : * 1.0 +R: * : * : 205 : * 1.0 diff --git a/models/archive/cav23-saynt/long-traces/sketch.props b/models/archive/cav23-saynt/long-traces/sketch.props new file mode 100644 index 000000000..58121ac47 --- /dev/null +++ b/models/archive/cav23-saynt/long-traces/sketch.props @@ -0,0 +1 @@ +Pmax=? [F goal] \ No newline at end of file diff --git a/models/archive/cav23-saynt/long-traces/sketch.templ b/models/archive/cav23-saynt/long-traces/sketch.templ new file mode 100644 index 000000000..fd015ed18 --- /dev/null +++ b/models/archive/cav23-saynt/long-traces/sketch.templ @@ -0,0 +1,30 @@ +pomdp + +formula switch_state = x=0; +formula finish_state = (x=10 & y=0) | (x=100 & y=1); +formula goal = y=2; +formula fail = x=101; + +observable "switch_state" = switch_state; +observable "finish_state" = finish_state; +observable "goal" = goal; +observable "fail" = fail; + +module main + + x : [0..101] init 0; + y : [0..3] init 0; + + + [switch_0] x=0 -> 1: (y'=0)&(x'=1); + [switch_1] x=0 -> 1: (y'=1)&(x'=1); + + [up] x>0 & x<101 -> 1:(x'=min(x+1,101)); + [down] x>0 & x<101 -> 1:(x'=max(x-1,0)); + + [finish] (x=10 & y=0) -> 0.1:(y'=2) + 0.9:(x'=101); + [finish] (x=100 & y=1) -> 0.5:(y'=2) + 0.5:(x'=101); + + [loop] x=101 -> 1: true; + +endmodule diff --git a/models/archive/cav23-saynt/problem-saynt/sketch.props b/models/archive/cav23-saynt/problem-saynt/sketch.props new file mode 100644 index 000000000..edd96150b --- /dev/null +++ b/models/archive/cav23-saynt/problem-saynt/sketch.props @@ -0,0 +1 @@ +R{"steps"}min=? [F goal] \ No newline at end of file diff --git a/models/archive/cav23-saynt/problem-saynt/sketch.templ b/models/archive/cav23-saynt/problem-saynt/sketch.templ new file mode 100644 index 000000000..955a02bb5 --- /dev/null +++ b/models/archive/cav23-saynt/problem-saynt/sketch.templ @@ -0,0 +1,132 @@ +pomdp + +formula goal = x=1 & y=3; + +const int x1 = 1; +const int x2 = 2; +const int x3 = 3; +const int y1 = 1; +const int y2 = 2; +const int y3 = 3; + +observable "in" = (x=1 & y=1 & pos11=0) | (x=2 & y=1 & pos21=0) | (x=3 & y=1 & pos31=0) | (x=3 & y=2 & pos32=0) | (x=3 & y=3 & pos33=0) | (x=2 & y=3 & pos23=0); +observable "switch" = (x=1 & y=1 & (pos11=1 | pos11=2 | pos11=4)) | (x=2 & y=1 & (pos21=1 | pos21=2 | pos21=4)) | (x=3 & y=1 & (pos31=1 | pos31=2 | pos31=4)) | (x=3 & y=2 & (pos32=1 | pos32=2 | pos32=4)) | (x=3 & y=3 & (pos33=1 | pos33=2 | pos33=4)) | (x=2 & y=3 & (pos23=1 | pos23=2 | pos23=4)); +observable "out" = (x=1 & y=1 & pos11=3) | (x=2 & y=1 & pos21=3) | (x=3 & y=1 & pos31=3) | (x=3 & y=2 & pos32=3) | (x=3 & y=3 & pos33=3) | (x=2 & y=3 & pos23=3); +observable "goal" = goal; + +// can go in this direction +formula u = x=3 & y<3; +formula r = x<3 & (y=1 | y=3); +formula d = x=3 & y>1; +formula l = x>1 & (y=1 | y=3); + +// updates of coordinates (if possible) +formula yu = u ? (y+1) : y; +formula xr = r ? (x+1) : x; +formula yd = d ? (y-1) : y; +formula xl = l ? (x-1) : x; + +const double switch_rate = 0.5; +const double exit_rate = 0.99; + + +module grid + x : [1..3] init 1; + y : [1..3] init 1; + + [up] (x=1 & y=1 & pos11=3) | (x=2 & y=1 & pos21=3) | (x=3 & y=1 & pos31=3) | (x=3 & y=2 & pos32=3) | (x=3 & y=3 & pos33=3) | (x=2 & y=3 & pos23=3) -> (y'=yu); + [right] (x=1 & y=1 & pos11=3) | (x=2 & y=1 & pos21=3) | (x=3 & y=1 & pos31=3) | (x=3 & y=2 & pos32=3) | (x=3 & y=3 & pos33=3) | (x=2 & y=3 & pos23=3) -> (x'=xr); + [down] (x=1 & y=1 & pos11=3) | (x=2 & y=1 & pos21=3) | (x=3 & y=1 & pos31=3) | (x=3 & y=2 & pos32=3) | (x=3 & y=3 & pos33=3) | (x=2 & y=3 & pos23=3) -> (y'=yd); + [left] (x=1 & y=1 & pos11=3) | (x=2 & y=1 & pos21=3) | (x=3 & y=1 & pos31=3) | (x=3 & y=2 & pos32=3) | (x=3 & y=3 & pos33=3) | (x=2 & y=3 & pos23=3) -> (x'=xl); + +endmodule + + + +module cells + pos11 : [0..4] init 0; + pos21 : [0..4] init 0; + pos31 : [0..4] init 0; + pos32 : [0..4] init 0; + pos33 : [0..4] init 0; + pos23 : [0..4] init 0; + + + [up] true -> (pos11'=0) & (pos21'=0) & (pos31'=0) & (pos32'=0) & (pos33'=0) & (pos23'=0); + [right] true -> (pos11'=0) & (pos21'=0) & (pos31'=0) & (pos32'=0) & (pos33'=0) & (pos23'=0); + [down] true -> (pos11'=0) & (pos21'=0) & (pos31'=0) & (pos32'=0) & (pos33'=0) & (pos23'=0); + [left] true -> (pos11'=0) & (pos21'=0) & (pos31'=0) & (pos32'=0) & (pos33'=0) & (pos23'=0); + + + [in] x=x1 & y=y1 & pos11=0 -> 0.5: (pos11'=1) + 0.5: (pos11'=2); + + [l] x=x1 & y=y1 & pos11=1 -> switch_rate: (pos11'=4) + 1-switch_rate: (pos11'=1); + [r] x=x1 & y=y1 & pos11=1 -> switch_rate: (pos11'=1) + 1-switch_rate: (pos11'=2); + [l] x=x1 & y=y1 & pos11=2 -> switch_rate: (pos11'=2) + 1-switch_rate: (pos11'=1); + [r] x=x1 & y=y1 & pos11=2 -> switch_rate: (pos11'=4) + 1-switch_rate: (pos11'=2); + [l] x=x1 & y=y1 & pos11=4 -> exit_rate: (pos11'=3) + 1-exit_rate: (pos11'=0); + [r] x=x1 & y=y1 & pos11=4 -> exit_rate: (pos11'=3) + 1-exit_rate: (pos11'=0); + + + [in] x=x2 & y=y1 & pos21=0 -> 0.5: (pos21'=1) + 0.5: (pos21'=2); + + [l] x=x2 & y=y1 & pos21=1 -> switch_rate: (pos21'=4) + 1-switch_rate: (pos21'=1); + [r] x=x2 & y=y1 & pos21=1 -> switch_rate: (pos21'=1) + 1-switch_rate: (pos21'=2); + [l] x=x2 & y=y1 & pos21=2 -> switch_rate: (pos21'=2) + 1-switch_rate: (pos21'=1); + [r] x=x2 & y=y1 & pos21=2 -> switch_rate: (pos21'=4) + 1-switch_rate: (pos21'=2); + [l] x=x2 & y=y1 & pos21=4 -> exit_rate: (pos21'=3) + 1-exit_rate: (pos21'=0); + [r] x=x2 & y=y1 & pos21=4 -> exit_rate: (pos21'=3) + 1-exit_rate: (pos21'=0); + + + + [in] x=x3 & y=y1 & pos31=0 -> 0.5: (pos31'=1) + 0.5: (pos31'=2); + + [l] x=x3 & y=y1 & pos31=1 -> switch_rate: (pos31'=4) + 1-switch_rate: (pos31'=1); + [r] x=x3 & y=y1 & pos31=1 -> switch_rate: (pos31'=1) + 1-switch_rate: (pos31'=2); + [l] x=x3 & y=y1 & pos31=2 -> switch_rate: (pos31'=2) + 1-switch_rate: (pos31'=1); + [r] x=x3 & y=y1 & pos31=2 -> switch_rate: (pos31'=4) + 1-switch_rate: (pos31'=2); + [l] x=x3 & y=y1 & pos31=4 -> exit_rate: (pos31'=3) + 1-exit_rate: (pos31'=0); + [r] x=x3 & y=y1 & pos31=4 -> exit_rate: (pos31'=3) + 1-exit_rate: (pos31'=0); + + + + [in] x=x3 & y=y2 & pos32=0 -> 0.5: (pos32'=1) + 0.5: (pos32'=2); + + [l] x=x3 & y=y2 & pos32=1 -> switch_rate: (pos32'=4) + 1-switch_rate: (pos32'=1); + [r] x=x3 & y=y2 & pos32=1 -> switch_rate: (pos32'=1) + 1-switch_rate: (pos32'=2); + [l] x=x3 & y=y2 & pos32=2 -> switch_rate: (pos32'=2) + 1-switch_rate: (pos32'=1); + [r] x=x3 & y=y2 & pos32=2 -> switch_rate: (pos32'=4) + 1-switch_rate: (pos32'=2); + [l] x=x3 & y=y2 & pos32=4 -> exit_rate: (pos32'=3) + 1-exit_rate: (pos32'=0); + [r] x=x3 & y=y2 & pos32=4 -> exit_rate: (pos32'=3) + 1-exit_rate: (pos32'=0); + + + + [in] x=x3 & y=y3 & pos33=0 -> 0.5: (pos33'=1) + 0.5: (pos33'=2); + + [l] x=x3 & y=y3 & pos33=1 -> switch_rate: (pos33'=4) + 1-switch_rate: (pos33'=1); + [r] x=x3 & y=y3 & pos33=1 -> switch_rate: (pos33'=1) + 1-switch_rate: (pos33'=2); + [l] x=x3 & y=y3 & pos33=2 -> switch_rate: (pos33'=2) + 1-switch_rate: (pos33'=1); + [r] x=x3 & y=y3 & pos33=2 -> switch_rate: (pos33'=4) + 1-switch_rate: (pos33'=2); + [l] x=x3 & y=y3 & pos33=4 -> exit_rate: (pos33'=3) + 1-exit_rate: (pos33'=0); + [r] x=x3 & y=y3 & pos33=4 -> exit_rate: (pos33'=3) + 1-exit_rate: (pos33'=0); + + + + [in] x=x2 & y=y3 & pos23=0 -> 0.5: (pos23'=1) + 0.5: (pos23'=2); + + [l] x=x2 & y=y3 & pos23=1 -> switch_rate: (pos23'=4) + 1-switch_rate: (pos23'=1); + [r] x=x2 & y=y3 & pos23=1 -> switch_rate: (pos23'=1) + 1-switch_rate: (pos23'=2); + [l] x=x2 & y=y3 & pos23=2 -> switch_rate: (pos23'=2) + 1-switch_rate: (pos23'=1); + [r] x=x2 & y=y3 & pos23=2 -> switch_rate: (pos23'=4) + 1-switch_rate: (pos23'=2); + [l] x=x2 & y=y3 & pos23=4 -> exit_rate: (pos23'=3) + 1-exit_rate: (pos23'=0); + [r] x=x2 & y=y3 & pos23=4 -> exit_rate: (pos23'=3) + 1-exit_rate: (pos23'=0); + +endmodule + + +rewards "steps" + [up] true : 1; + [right] true : 1; + [down] true : 1; + [left] true : 1; +endrewards \ No newline at end of file diff --git a/models/cassandra/native_discount_reward.props b/models/cassandra/native_discount_reward.props new file mode 100644 index 000000000..6fae5d2e7 --- /dev/null +++ b/models/cassandra/native_discount_reward.props @@ -0,0 +1 @@ +R{"reward"}max=? [C{0.95}] \ No newline at end of file diff --git a/paynt/cli.py b/paynt/cli.py index 92dda7354..50aca7de0 100644 --- a/paynt/cli.py +++ b/paynt/cli.py @@ -10,6 +10,8 @@ import paynt.synthesizer.synthesizer import paynt.synthesizer.synthesizer_cegis import paynt.synthesizer.policy_tree +import paynt.parser.alpha_vector_parser +import paynt.verification.alpha_vector_verification import click import sys @@ -27,6 +29,9 @@ def setup_logger(log_path = None): root.setLevel(logging.DEBUG) # root.setLevel(logging.INFO) + # disable all logging + # logging.disable(logging.CRITICAL) + # formatter = logging.Formatter('%(asctime)s %(threadName)s - %(name)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(filename)s - %(message)s') @@ -89,6 +94,12 @@ def setup_logger(log_path = None): type=click.Choice(["cutoff", "clip2", "clip4", "small", "refine", "overapp", "2mil", "5mil", "10mil", "20mil", "30mil", "50mil"]), show_default=True, help="run Storm using pre-defined settings and use the result to enhance PAYNT. Can only be used together with --storm-pomdp flag") +@click.option( + "--storm-exploration-heuristic", + default="bfs", + type=click.Choice(["bfs", "gap", "reachability", "uncertainty"]), + show_default=True, + help="chooses Storm's belief exploration heuristic, Can only be used together with --storm-pomdp flag") @click.option("--iterative-storm", nargs=3, type=int, show_default=True, default=None, help="runs the iterative PAYNT/Storm integration. Arguments timeout, paynt_timeout, storm_timeout. Can only be used together with --storm-pomdp flag") @click.option("--get-storm-result", default=None, type=int, @@ -103,6 +114,14 @@ def setup_logger(log_path = None): type=click.Choice(["storm", "paynt", "cutoff"]), show_default=True, help="specify memory unfold strategy. Can only be used together with --storm-pomdp flag") +@click.option("--saynt-threads", default=None, type=int, + help="run SAYNT with FSCs for non-initial beliefs. 0 - for dynamic number of different beliefs, N > 0 - set number of different beliefs") +@click.option("--saynt-overapprox", is_flag=True, default=False, + help="run Storm to obtain belief value overapproximations that can be used to better choose from what beliefs FSCs should be computed") +@click.option("--saynt", is_flag=True, default=False, + help="run default SAYNT") +@click.option("--new-saynt", is_flag=True, default=False, + help="run SAYNT with multiple cut-offs FSCs") @click.option("--export-fsc-storm", type=click.Path(), default=None, help="path to output file for SAYNT belief FSC") @@ -130,6 +149,13 @@ def setup_logger(log_path = None): @click.option( "--constraint-bound", type=click.FLOAT, help="bound for creating constrained POMDP for cassandra models", ) +@click.option( + "--native-discount", is_flag=True, default=False, + help="# if set, MDP dicount model checking engine is used (expecting cassandra models)" +) + +@click.option("--alpha-vector-analysis", type=click.Path(), default=None, + help="filename containing alpha vector policy") @click.option( "--ce-generator", type=click.Choice(["dtmc", "mdp"]), default="dtmc", show_default=True, @@ -144,12 +170,13 @@ def paynt_run( method, incomplete_search, disable_expected_visits, fsc_synthesis, pomdp_memory_size, posterior_aware, - storm_pomdp, iterative_storm, get_storm_result, storm_options, prune_storm, - use_storm_cutoffs, unfold_strategy_storm, + storm_pomdp, iterative_storm, get_storm_result, storm_options, storm_exploration_heuristic, prune_storm, + use_storm_cutoffs, unfold_strategy_storm, saynt_threads, saynt_overapprox, saynt, new_saynt, export_fsc_storm, export_fsc_paynt, export_evaluation, all_in_one, all_in_one_maxmem, mdp_split_wrt_mdp, mdp_discard_unreachable_choices, mdp_use_randomized_abstraction, - constraint_bound, + constraint_bound, native_discount, + alpha_vector_analysis, ce_generator, profiling ): @@ -171,24 +198,45 @@ def paynt_run( paynt.synthesizer.policy_tree.SynthesizerPolicyTree.discard_unreachable_choices = mdp_discard_unreachable_choices paynt.synthesizer.policy_tree.SynthesizerPolicyTree.use_randomized_abstraction = mdp_use_randomized_abstraction + # QoL change for calling SAYNT, TODO discuss the default values because this is what we use for 15min timeout + if saynt: + fsc_synthesis = True + storm_pomdp = True + if iterative_storm is None: + iterative_storm = (900, 60, 10) + elif new_saynt: + fsc_synthesis = True + storm_pomdp = True + if iterative_storm is None: + iterative_storm = (900, 90, 2) + if saynt_threads is None: + saynt_threads = 0 + storm_control = None if storm_pomdp: storm_control = paynt.quotient.storm_pomdp_control.StormPOMDPControl() storm_control.set_options( storm_options, get_storm_result, iterative_storm, use_storm_cutoffs, - unfold_strategy_storm, prune_storm, export_fsc_storm, export_fsc_paynt + unfold_strategy_storm, prune_storm, export_fsc_storm, export_fsc_paynt, saynt_threads, saynt_overapprox, + storm_exploration_heuristic ) sketch_path = os.path.join(project, sketch) properties_path = os.path.join(project, props) - if all_in_one is None: - quotient = paynt.parser.sketch.Sketch.load_sketch(sketch_path, properties_path, export, relative_error, precision, constraint_bound) - synthesizer = paynt.synthesizer.synthesizer.Synthesizer.choose_synthesizer(quotient, method, fsc_synthesis, storm_control) - synthesizer.run(optimum_threshold, export_evaluation) - else: + if alpha_vector_analysis is not None: + quotient = paynt.parser.sketch.Sketch.load_sketch(sketch_path, properties_path, export, relative_error, precision, constraint_bound, native_discount) + assert isinstance(quotient, paynt.quotient.pomdp.PomdpQuotient), "expected POMDP input for alpha vector analysis" + alpha_vector_set = paynt.parser.alpha_vector_parser.AlphaVectorParser.parse_sarsop_xml(alpha_vector_analysis) + alpha_vector_verifier = paynt.verification.alpha_vector_verification.AlphaVectorVerification(quotient) + alpha_vector_verifier.verify_alpha_vectors(alpha_vector_set) + elif all_in_one is not None: all_in_one_program, specification, family = paynt.parser.sketch.Sketch.load_sketch_as_all_in_one(sketch_path, properties_path) all_in_one_analysis = paynt.synthesizer.all_in_one.AllInOne(all_in_one_program, specification, all_in_one, all_in_one_maxmem, family) all_in_one_analysis.run() + else: + quotient = paynt.parser.sketch.Sketch.load_sketch(sketch_path, properties_path, export, relative_error, precision, constraint_bound, native_discount) + synthesizer = paynt.synthesizer.synthesizer.Synthesizer.choose_synthesizer(quotient, method, fsc_synthesis, storm_control) + synthesizer.run(optimum_threshold, export_evaluation) if profiling: profiler.disable() print_profiler_stats(profiler) diff --git a/paynt/parser/alpha_vector_parser.py b/paynt/parser/alpha_vector_parser.py new file mode 100644 index 000000000..00de37c0a --- /dev/null +++ b/paynt/parser/alpha_vector_parser.py @@ -0,0 +1,45 @@ +import xml.etree.ElementTree as ET + +import logging +logger = logging.getLogger(__name__) + + +# class for POMDP policy represented as a set of alpha vectors +# alpha_vectors property is a list of list of floats, where each inner list has a length equal to number of states in a POMDP +# alpha_vector_actions is a list of ints, where the values in the list represent the chosen action +class AlphaVectorSet: + + def __init__(self, alpha_vectors, alpha_vector_actions): + self.alpha_vectors = alpha_vectors + self.alpha_vector_actions = alpha_vector_actions + assert len(self.alpha_vectors) == len(self.alpha_vector_actions), "alpha vector count and the number of their corresponding actions should match" + + +# currently supporting only the output of SARSOP C++ implementation, we can extend this to support the Julia POMDP format in the future +class AlphaVectorParser: + + def __init__(self): + pass + + @classmethod + def parse_sarsop_xml(self, filename): + logger.info("parsing alpha vectors from SARSOP XML format") + tree = ET.parse(filename) + root = tree.getroot() + + alpha_vector_policy = root[0] + vector_length = int(alpha_vector_policy.attrib['vectorLength']) + + alpha_vectors = [] + alpha_vector_actions = [] + + for alpha_vector in alpha_vector_policy: + parsed_vector = [float(x) for x in alpha_vector.text.split(' ')[:-1]] + assert vector_length == len(parsed_vector), "the length of the vector differs from the expected length" + parsed_action = int(alpha_vector.attrib['action']) + alpha_vectors.append(parsed_vector) + alpha_vector_actions.append(parsed_action) + + return AlphaVectorSet(alpha_vectors, alpha_vector_actions) + + diff --git a/paynt/parser/pomdp_parser.py b/paynt/parser/pomdp_parser.py index a813814f1..237aecf13 100644 --- a/paynt/parser/pomdp_parser.py +++ b/paynt/parser/pomdp_parser.py @@ -46,14 +46,20 @@ def read_pomdp_solve(cls, sketch_path): @classmethod - def write_model_in_pomdp_solve_format(cls, pomdp, output_path, property_path): + def write_model_in_pomdp_solve_format(cls, pomdp, specification, output_path, property_path): num_states = pomdp.nr_states - num_choices = pomdp.nr_choices num_obs = pomdp.nr_observations max_num_choices = max([pomdp.get_nr_available_actions(state) for state in range(num_states)]) + main_property = specification.all_properties()[0] + property_minimizing = main_property.minimizing + property_probability_op = main_property.property.raw_formula.is_probability_operator + property_reward_name = None + if not property_probability_op and main_property.property.raw_formula.has_reward_name: + property_reward_name = main_property.property.raw_formula.reward_name + # adding one additional state after target is reached so that no more reward can be collected desc = """\ # auto-generated from PRISM program @@ -63,68 +69,128 @@ def write_model_in_pomdp_solve_format(cls, pomdp, output_path, property_path): actions: {} observations: {} -""".format(num_states,max_num_choices,num_obs) +""".format(num_states+1,max_num_choices,num_obs) # initial state state_init = pomdp.initial_states[0] - initial_distr = [1 if state == state_init else 0 for state in range(num_states)] + initial_distr = [1 if state == state_init else 0 for state in range(num_states+1)] initial_distr = [str(x) for x in initial_distr] initial_distr = ' '.join(initial_distr) desc += f"start:\n{initial_distr}\n\n" + # get target states + target_label = main_property.get_target_label() + target_states = pomdp.labeling.get_states(target_label) + # transition matrix desc += "# transition matrix\n\n" tm = pomdp.transition_matrix for state in range(num_states): - action_index = 0 - group_start = tm.get_row_group_start(state) - group_end = tm.get_row_group_end(state) - trivial_action = group_end == group_start + 1 - for row_index in range(group_start, group_end): - for entry in tm.get_row(row_index): - action_index_str = action_index if not trivial_action else "*" - desc += f"T: {action_index_str} : {state} : {entry.column} {entry.value()}\n" - action_index += 1 + if state in target_states: + action_index = 0 + while action_index < max_num_choices: + desc += f"T: {action_index} : {state} : {num_states} 1.0\n" + action_index += 1 + else: + action_index = 0 + group_start = tm.get_row_group_start(state) + group_end = tm.get_row_group_end(state) + first_action_columns = [] + first_action_values = [] + for row_index in range(group_start, group_end): + for entry in tm.get_row(row_index): + desc += f"T: {action_index} : {state} : {entry.column} {entry.value()}\n" + if action_index == 0: + first_action_columns.append(entry.column) + first_action_values.append(entry.value()) + action_index += 1 + while action_index < max_num_choices: + for index in range(len(first_action_columns)): + desc += f"T: {action_index} : {state} : {first_action_columns[index]} {first_action_values[index]}\n" + action_index += 1 + # add self loops for the new 'after target' state + action_index = 0 + while action_index < max_num_choices: + desc += f"T: {action_index} : {num_states} : {num_states} 1.0\n" + action_index += 1 # observations desc += "\n# observations\n\n" for state in range(num_states): - desc += f"O: * : {state} : {pomdp.observations[state]} 1\n" + desc += f"O: * : {state} : {pomdp.observations[state]} 1.0\n" + # give some observation to the new state + desc += f"O: * : {num_states} : 0 1.0\n" # rewards - if len(pomdp.reward_models) > 0: + if not property_probability_op: # rewards desc += "\n# rewards\n\n" # assuming a single reward model - rewards = next(iter(pomdp.reward_models.values())) + rewards = pomdp.reward_models[property_reward_name] if property_reward_name is not None else list(pomdp.reward_models.values())[0] # convert rewards to state-based state_rewards = [] if rewards.has_state_rewards: state_rewards = list(rewards.state_rewards) + # elif rewards.has_state_action_rewards: + # state_action_rewards = list(rewards.state_action_rewards) + # print(state_action_rewards) + # for state in range(num_states): + # group_start = tm.get_row_group_start(state) + # state_rewards.append(state_action_rewards[group_start]) + # else: + # raise TypeError("unknown reward type") + + # print state-based rewards + for state in range(num_states): + rew = state_rewards[state] + if rew != 0: + if property_minimizing: + desc += f"R: * : {state} : * : * {rew*-1}\n" + else: + desc += f"R: * : {state} : * : * {rew}\n" elif rewards.has_state_action_rewards: state_action_rewards = list(rewards.state_action_rewards) + state_action_index = 0 for state in range(num_states): + action_index = 0 group_start = tm.get_row_group_start(state) - state_rewards.append(state_action_rewards[group_start]) - else: - raise TypeError("unknown reward type") - - # print state-based rewards - for state in range(num_states): - rew = state_rewards[state] - if rew != 0: - desc += f"R: * : {state} : * : * {rew}\n" + group_end = tm.get_row_group_end(state) + first_action_reward = None + for row_index in range(group_start, group_end): + if state_action_rewards[state_action_index] != 0: + if property_minimizing: + desc += f"R: {action_index} : {state} : * : * {state_action_rewards[state_action_index]*-1}\n" + if first_action_reward is None: + first_action_reward = state_action_rewards[state_action_index]*-1 + else: + desc += f"R: {action_index} : {state} : * : * {state_action_rewards[state_action_index]}\n" + if first_action_reward is None: + first_action_reward = state_action_rewards[state_action_index] + action_index += 1 + state_action_index += 1 + if first_action_reward is not None: + while action_index < max_num_choices: + desc += f"R: {action_index} : {state} : * : * {first_action_reward}\n" + action_index += 1 + else: + desc += "\n# rewards from reachability\n\n" + for state in target_states: + if property_minimizing: + desc += f"R: * : * : {state} : * -1.0\n" + else: + desc += f"R: * : * : {state} : * 1.0\n" + # ready to print logger.info("Writing POMDP in pomdp-solve format to {} ...".format(output_path)) with open(output_path, 'w') as f: f.write(desc) - logger.info("Writing default discount property to {} ...".format(property_path)) - with open(property_path, 'w') as f: - f.write('R{"rew0"}max=? [F "target"]') + # logger.info("Writing default discount property to {} ...".format(property_path)) + # with open(property_path, 'w') as f: + # f.write('R{"rew0"}max=? [F "target"]') logger.info("Write OK, aborting ...") exit(0) diff --git a/paynt/parser/sketch.py b/paynt/parser/sketch.py index 6fbb8d1b6..33ef6caa6 100644 --- a/paynt/parser/sketch.py +++ b/paynt/parser/sketch.py @@ -57,7 +57,7 @@ class Sketch: @classmethod def load_sketch(cls, sketch_path, properties_path, - export=None, relative_error=0, precision=1e-4, constraint_bound=None): + export=None, relative_error=0, precision=1e-4, constraint_bound=None, native_discount=False): prism = None explicit_quotient = None @@ -100,9 +100,17 @@ def load_sketch(cls, sketch_path, properties_path, if decpomdp_manager is None: raise SyntaxError logger.info("applying discount factor transformation...") - decpomdp_manager.apply_discount_factor_transformation() + if not native_discount: + decpomdp_manager.apply_discount_factor_transformation() explicit_quotient = decpomdp_manager.construct_pomdp() - if constraint_bound is not None: + if native_discount: + paynt.quotient.quotient.Quotient.discounted_expected_visits = decpomdp_manager.discount_factor + optimality = paynt.verification.property.construct_discount_property( + decpomdp_manager.reward_model_name, + decpomdp_manager.reward_minimizing, + decpomdp_manager.discount_factor) + specification = paynt.verification.property.Specification([optimality]) + elif constraint_bound is not None: specification = PrismParser.parse_specification(properties_path, relative_error) else: optimality = paynt.verification.property.construct_reward_property( @@ -116,6 +124,10 @@ def load_sketch(cls, sketch_path, properties_path, assert filetype is not None, "unknow format of input file" logger.info("sketch parsing OK") + + if specification.has_optimality: + optimality_subformula = specification.optimality.formula.subformula + paynt.quotient.models.Mdp.native_cassandra = (optimality_subformula.is_discounted_total_reward_formula or optimality_subformula.is_discounted_cumulative_reward_formula) paynt.quotient.models.Mdp.initialize(specification) paynt.verification.property.Property.initialize() @@ -131,7 +143,7 @@ def load_sketch(cls, sketch_path, properties_path, logger.info(f"found the following specification {specification}") if export is not None: - Sketch.export(export, sketch_path, jani_unfolder, explicit_quotient) + Sketch.export(export, sketch_path, jani_unfolder, explicit_quotient, specification) logger.info("export OK, aborting...") exit(0) @@ -192,7 +204,9 @@ def load_sketch_as_all_in_one(cls, sketch_path, properties_path): return prism, specification, family @classmethod - def export(cls, export, sketch_path, jani_unfolder, explicit_quotient): + def export(cls, export, sketch_path, jani_unfolder, explicit_quotient, specification=None): + if (explicit_quotient.is_nondeterministic_model and explicit_quotient.is_partially_observable): + explicit_quotient = stormpy.pomdp.make_canonic(explicit_quotient) if export == "jani": assert jani_unfolder is not None, "jani unfolder was not used" output_path = substitute_suffix(sketch_path, '.', 'jani') @@ -205,7 +219,7 @@ def export(cls, export, sketch_path, jani_unfolder, explicit_quotient): "cannot '--export pomdp' with non-POMDP sketches" output_path = substitute_suffix(sketch_path, '.', 'pomdp') property_path = substitute_suffix(sketch_path, '/', 'props.pomdp') - PomdpParser.write_model_in_pomdp_solve_format(explicit_quotient, output_path, property_path) + PomdpParser.write_model_in_pomdp_solve_format(explicit_quotient, specification, output_path, property_path) @classmethod diff --git a/paynt/quotient/models.py b/paynt/quotient/models.py index ff68fa0b1..6063d697e 100644 --- a/paynt/quotient/models.py +++ b/paynt/quotient/models.py @@ -11,6 +11,9 @@ class Mdp: # options for the construction of chains builder_options = None + # flag used when PAYNT calls discounted value iteration on cassandra models + native_cassandra = False + @classmethod def initialize(cls, specification): # builder options @@ -58,7 +61,12 @@ def initial_state(self): def model_check_property(self, prop, alt=False): formula = prop.formula if not alt else prop.formula_alt result = paynt.verification.property.Property.model_check(self.model,formula) - value = result.at(self.initial_state) + if not self.native_cassandra: + value = result.at(self.initial_state) + else: + value = 0 + for entry in self.model.transition_matrix.get_row(0): + value += entry.value() * result.at(entry.column) return paynt.verification.property_result.PropertyResult(prop, result, value) class SubMdp(Mdp): diff --git a/paynt/quotient/pomdp.py b/paynt/quotient/pomdp.py index f0f9a29ae..fb132fd21 100644 --- a/paynt/quotient/pomdp.py +++ b/paynt/quotient/pomdp.py @@ -395,7 +395,7 @@ def estimate_scheduler_difference(self, mdp, quotient_choice_map, inconsistent_a source_state = choice_to_state[choice_0] source_state_visits = expected_visits[source_state] - # assert source_state_visits != 0 + assert source_state_visits != math.inf, f"state visits for state {source_state} is inf" if source_state_visits == 0: continue diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 73f461dc3..8f3967055 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -16,6 +16,9 @@ class Quotient: # if True, expected visits will not be computed for hole scoring disable_expected_visits = False + # if the value is <1, the dtmc is transformed to discounted version using this value as the discount factor before computing EVTs + discounted_expected_visits = 1 + @staticmethod def make_vector_defined(vector): vector_noinf = [ value if value != math.inf else 0 for value in vector] @@ -203,13 +206,22 @@ def compute_expected_visits(self, mdp, prop, choices): # extract DTMC induced by this MDP-scheduler sub_mdp,state_map,_ = self.restrict_mdp(mdp, choices) dtmc = Quotient.mdp_to_dtmc(sub_mdp) - dtmc_visits = paynt.verification.property.Property.compute_expected_visits(dtmc) + + if self.discounted_expected_visits < 1: + # compute discounted visits + discounted_dtmc = payntbind.synthesis.apply_discount_transformation_to_dtmc(dtmc, self.discounted_expected_visits) + dtmc_visits = paynt.verification.property.Property.compute_expected_visits(discounted_dtmc) + dtmc_visits = dtmc_visits[:-1] # remove expected visits for sink state + else: + # compute visits + dtmc_visits = paynt.verification.property.Property.compute_expected_visits(dtmc) # handle infinity- and zero-visits if prop.minimizing: dtmc_visits = Quotient.make_vector_defined(dtmc_visits) else: - dtmc_visits = [ value if value != math.inf else 0 for value in dtmc_visits] + max_without_inf = max([value for value in dtmc_visits if value != math.inf]) + dtmc_visits = [ value if value != math.inf else max_without_inf*10 for value in dtmc_visits] # map vector of expected visits onto the state space of the quotient MDP expected_visits = [0] * mdp.nr_states @@ -334,7 +346,6 @@ def discard(self, mdp, hole_assignments, core_suboptions, other_suboptions, inco return reduced_design_space, suboptions - def split(self, family, incomplete_search): mdp = family.mdp @@ -373,7 +384,6 @@ def split(self, family, incomplete_search): return design_subspaces - def double_check_assignment(self, assignment): ''' Double-check whether this assignment truly improves optimum. diff --git a/paynt/quotient/storm_pomdp_control.py b/paynt/quotient/storm_pomdp_control.py index 23d0e2823..f4fd24fca 100644 --- a/paynt/quotient/storm_pomdp_control.py +++ b/paynt/quotient/storm_pomdp_control.py @@ -1,6 +1,7 @@ import stormpy import stormpy.pomdp import payntbind +import paynt.quotient.pomdp import paynt.utils.profiler @@ -8,6 +9,9 @@ from threading import Thread from time import sleep +import math + +from queue import Queue import logging logger = logging.getLogger(__name__) @@ -49,6 +53,13 @@ class StormPOMDPControl: paynt_timeout = None storm_timeout = None + enhanced_saynt = None + enhanced_saynt_threads = [] + storm_fsc_usage = {} + total_fsc_used = 0 + use_uniform_obs_beliefs = True + dynamic_thread_timeout = False + storm_terminated = False s_queue = None @@ -57,12 +68,14 @@ class StormPOMDPControl: export_fsc_storm = None export_fsc_paynt = None + def __init__(self): pass def set_options(self, - storm_options, get_storm_result, iterative_storm, use_storm_cutoffs, - unfold_strategy_storm, prune_storm, export_fsc_storm, export_fsc_paynt + storm_options="cutoff", get_storm_result=None, iterative_storm=None, use_storm_cutoffs=False, + unfold_strategy_storm="storm", prune_storm=False, export_fsc_storm=None, export_fsc_paynt=None, enhanced_saynt=None, + saynt_overapprox = False, storm_exploration_heuristic = "bfs" ): self.storm_options = storm_options if get_storm_result is not None: @@ -73,6 +86,12 @@ def set_options(self, self.unfold_strategy_storm = unfold_strategy_storm self.export_fsc_storm = export_fsc_storm self.export_fsc_paynt = export_fsc_paynt + self.enhanced_saynt = enhanced_saynt + self.saynt_overapprox = saynt_overapprox + self.storm_exploration_heuristic = storm_exploration_heuristic + + if self.saynt_overapprox: + self.dynamic_thread_timeout = True self.incomplete_exploration = False if prune_storm: @@ -85,6 +104,23 @@ def set_options(self, elif unfold_strategy_storm == "cutoff": self.unfold_cutoff = True + # create copy of the storm control with the same settings + def copy(self): + copy_storm_control = StormPOMDPControl() + copy_storm_control.storm_options = self.storm_options + copy_storm_control.use_cutoffs = self.use_cutoffs + copy_storm_control.unfold_strategy_storm = self.unfold_strategy_storm + copy_storm_control.incomplete_exploration = self.incomplete_exploration + copy_storm_control.unfold_storm = self.unfold_storm + copy_storm_control.unfold_cutoff = self.unfold_cutoff + copy_storm_control.unfold_storm = self.unfold_storm + copy_storm_control.unfold_cutoff = self.unfold_cutoff + copy_storm_control.iteration_timeout = self.iteration_timeout + copy_storm_control.storm_exploration_heuristic = self.storm_exploration_heuristic + + return copy_storm_control + + def get_storm_result(self): self.run_storm_analysis() self.parse_results(self.quotient) @@ -148,11 +184,11 @@ def run_storm_analysis(self): #print(result.lower_bound) else: print(f'-----------Storm----------- \ - \nValue = {value} | Time elapsed = {round(storm_timer.read(),1)}s | FSC size = {size}\nFSC (dot) = {result.induced_mc_from_scheduler.to_dot()}\n', flush=True) + \nValue = {value} | Time elapsed = {round(storm_timer.read(),1)}s | FSC size = {size}\n', flush=True) exit() print(f'-----------Storm----------- \ - \nValue = {value} | Time elapsed = {round(storm_timer.read(),1)}s | FSC size = {size}\nFSC (dot) = {result.induced_mc_from_scheduler.to_dot()}\n', flush=True) + \nValue = {value} | Time elapsed = {round(storm_timer.read(),1)}s | FSC size = {size}\n', flush=True) self.latest_storm_result = result if self.quotient.specification.optimality.minimizing: @@ -167,9 +203,9 @@ def interactive_storm_setup(self): belmc = stormpy.pomdp.BeliefExplorationModelCheckerDouble(self.pomdp, options) # start interactive belief model checker, this function is called only once to start the storm thread. To resume Storm computation 'interactive_storm_resume' is used - def interactive_storm_start(self, storm_timeout): + def interactive_storm_start(self, storm_timeout, enhanced=False): self.storm_thread = Thread(target=self.interactive_run, args=(belmc,)) - control_thread = Thread(target=self.interactive_control, args=(belmc, True, storm_timeout,)) + control_thread = Thread(target=self.interactive_control, args=(belmc, True, storm_timeout, enhanced)) logger.info("Interactive Storm started") control_thread.start() @@ -180,8 +216,8 @@ def interactive_storm_start(self, storm_timeout): self.belief_explorer = belmc.get_interactive_belief_explorer() # resume interactive belief model checker, should be called only after belief model checker was previously started - def interactive_storm_resume(self, storm_timeout): - control_thread = Thread(target=self.interactive_control, args=(belmc, False, storm_timeout,)) + def interactive_storm_resume(self, storm_timeout, enhanced=False): + control_thread = Thread(target=self.interactive_control, args=(belmc, False, storm_timeout, enhanced)) logger.info("Interactive Storm resumed") control_thread.start() @@ -225,7 +261,7 @@ def interactive_run(self, belmc): logger.info("Storm POMDP analysis completed") # ensures correct execution of one loop of Storm exploration - def interactive_control(self, belmc, start, storm_timeout): + def interactive_control(self, belmc, start, storm_timeout, enhanced=False): if belmc.has_converged(): logger.info("Storm already terminated.") return @@ -233,20 +269,23 @@ def interactive_control(self, belmc, start, storm_timeout): # Update cut-off FSC values provided by PAYNT if not start: logger.info("Updating FSC values in Storm") - self.belief_explorer.set_fsc_values(self.paynt_export) + self.belief_explorer.set_fsc_values(self.paynt_export, 0) belmc.continue_unfolding() # wait for Storm to start exploring while not belmc.is_exploring(): if belmc.has_converged(): break - sleep(1) + sleep(0.01) sleep(storm_timeout) if self.storm_terminated: return logger.info("Pausing Storm") - belmc.pause_unfolding() + if enhanced: + belmc.pause_unfolding_for_cut_off_values() + else: + belmc.pause_unfolding() # wait for the result to be constructed from the explored belief MDP while not belmc.is_result_ready(): @@ -254,6 +293,10 @@ def interactive_control(self, belmc, start, storm_timeout): result = belmc.get_interactive_result() + if enhanced: + self.beliefs = belmc.get_beliefs_from_exchange() + self.belief_overapp_values = belmc.get_exchange_overapproximation_map() + value = result.upper_bound if self.quotient.specification.optimality.minimizing else result.lower_bound size = self.get_belief_controller_size(result, self.paynt_fsc_size) @@ -277,8 +320,27 @@ def interactive_control(self, belmc, start, storm_timeout): ######## # Different options for Storm below (would be nice to make this more succint) + def get_base_options(self, options): + if paynt.quotient.models.Mdp.native_cassandra: + options.recompute_initial_value_without_discounting = True + options.exploration_heuristic = self.get_exploration_heuristic() + return options + + + def get_exploration_heuristic(self): + if self.storm_exploration_heuristic == "bfs": + return stormpy.pomdp.BeliefExplorationHeuristic.BreadthFirst + if self.storm_exploration_heuristic == "gap": + return stormpy.pomdp.BeliefExplorationHeuristic.GapPrio + if self.storm_exploration_heuristic == "reachability": + return stormpy.pomdp.BeliefExplorationHeuristic.ProbabilityPrio + if self.storm_exploration_heuristic == "uncertainty": + return stormpy.pomdp.BeliefExplorationHeuristic.ExcessUncertainty + + def get_cutoff_options(self, belief_states=100000): options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(False, True) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = belief_states options.use_clipping = False @@ -286,6 +348,7 @@ def get_cutoff_options(self, belief_states=100000): def get_overapp_options(self, belief_states=20000000): options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(True, False) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = belief_states options.use_clipping = False @@ -293,6 +356,7 @@ def get_overapp_options(self, belief_states=20000000): def get_refine_options(self, step_limit=0): options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(False, True) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = 0 options.size_threshold_factor = 2 @@ -306,6 +370,7 @@ def get_refine_options(self, step_limit=0): def get_clip2_options(self): options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(False, True) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = 0 options.use_clipping = True @@ -315,6 +380,7 @@ def get_clip2_options(self): def get_clip4_options(self): options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(False, True) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = 0 options.use_clipping = True @@ -323,9 +389,11 @@ def get_clip4_options(self): return options def get_interactive_options(self): - options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(False, True) + options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(self.saynt_overapprox, True) + options = self.get_base_options(options) options.use_state_elimination_cutoff = False options.size_threshold_init = 0 + options.resolution_init = 2 options.skip_heuristic_schedulers = False options.interactive_unfolding = True options.gap_threshold_init = 0 @@ -358,6 +426,61 @@ def storm_pomdp_analysis(model, formulas): return result + + + def create_thread_control(self, belief_info, belief_type, obs_option=True): + if belief_type == "obs": + if obs_option: + obs_states = [] + for state in range(self.quotient.pomdp.nr_states): + if self.quotient.pomdp.observations[state] == belief_info: + obs_states.append(state) + prob = 1/len(obs_states) + initial_belief = {x:prob for x in obs_states} + else: + initial_belief = self.average_belief_data[belief_info] + elif belief_type == "sup": + states = list(belief_info) + prob = 1/len(states) + initial_belief = {x:prob for x in states} + elif belief_type == "belief": + initial_belief = self.beliefs[belief_info] + elif belief_type == "custom": + initial_belief = belief_info + else: + assert False, "wrong belief type" + + sub_pomdp = self.sub_pomdp_builder.start_from_belief(initial_belief) + sub_pomdp_quotient = paynt.quotient.pomdp.PomdpQuotient(sub_pomdp, self.quotient.specification.copy()) + + sub_pomdp_storm_control = self.copy() + + sub_pomdp_synthesizer = paynt.synthesizer.synthesizer_pomdp.SynthesizerPOMDP(sub_pomdp_quotient, "ar", sub_pomdp_storm_control) + sub_pomdp_synthesizer.main_synthesizer = False + + sub_pomdp_thread = Thread(target=sub_pomdp_synthesizer.strategy_iterative_storm, args=(True, False)) + + sub_pomdp_states_to_full = self.sub_pomdp_builder.state_sub_to_full + + if belief_type == "obs": + belief_thread_type_label = "obs_" + str(belief_info) + elif belief_type == "belief": + belief_thread_type_label = "belief_" + str(belief_info) + else: + belief_thread_type_label = "custom" + + belief_thread_data = {"synthesizer": sub_pomdp_synthesizer, "thread": sub_pomdp_thread, "state_map": sub_pomdp_states_to_full, "active": True, "type": belief_thread_type_label} + + self.enhanced_saynt_threads.append(belief_thread_data) + + # create index for FSC in Storm + thread_index = self.belief_explorer.add_fsc_values([]) + assert thread_index == len(self.enhanced_saynt_threads), "Newly created thread and its index in Storm are not matching" + + self.enhanced_saynt_threads[thread_index-1]["thread"].start() + self.enhanced_saynt_threads[thread_index-1]["synthesizer"].interactive_queue.put("timeout") + + # parse the current Storm and PAYNT results if they are available def parse_results(self, quotient): if self.latest_storm_result is not None: @@ -381,6 +504,8 @@ def parse_storm_result(self, quotient): result = {x:[] for x in range(quotient.observations)} result_no_cutoffs = {x:[] for x in range(quotient.observations)} + + self.storm_fsc_usage = {} for state in self.latest_storm_result.induced_mc_from_scheduler.states: # TODO what if there were no labels in the model? @@ -426,15 +551,24 @@ def parse_storm_result(self, quotient): # parse cut-off states else: - if 'finite_mem' in state.labels and not finite_mem: - finite_mem = True - self.parse_paynt_result(self.quotient) - for obs, actions in self.result_dict_paynt.items(): - for action in actions: - if action not in result_no_cutoffs[obs]: - result_no_cutoffs[obs].append(action) - if action not in result[obs]: - result[obs].append(action) + for label in state.labels: + if 'finite_mem' in label: + fsc_index = int(label.split('_')[-1]) + if fsc_index not in self.storm_fsc_usage.keys(): + self.storm_fsc_usage[fsc_index] = 1 + if fsc_index == 0: + finite_mem_dict = self.result_dict_paynt + else: + finite_mem_dict = self.enhanced_saynt_threads[fsc_index-1]["synthesizer"].storm_control.result_dict_paynt + for obs, actions in finite_mem_dict.items(): + for action in actions: + if action not in result_no_cutoffs[obs]: + result_no_cutoffs[obs].append(action) + if action not in result[obs]: + result[obs].append(action) + else: + self.storm_fsc_usage[fsc_index] += 1 + break else: if len(cutoff_epxloration) == 0: continue @@ -471,7 +605,158 @@ def parse_storm_result(self, quotient): del result_no_cutoffs[obs] self.result_dict = result - self.result_dict_no_cutoffs = result_no_cutoffs + self.result_dict_no_cutoffs = result_no_cutoffs + self.total_fsc_used = sum(list(self.storm_fsc_usage.values())) + + + def parse_belief_data(self): + obs_dict = {} + support_dict = {} + total_obs_belief_dict = {} + + for belief in self.beliefs.values(): + belief_obs = self.quotient.pomdp.get_observation(list(belief.keys())[0]) + if belief_obs not in obs_dict.keys(): + obs_dict[belief_obs] = 1 + else: + obs_dict[belief_obs] += 1 + + if belief_obs not in total_obs_belief_dict.keys(): + total_obs_belief_dict[belief_obs] = {} + for state, probability in belief.items(): + total_obs_belief_dict[belief_obs][state] = probability + else: + for state, probability in belief.items(): + if state not in total_obs_belief_dict[belief_obs].keys(): + total_obs_belief_dict[belief_obs][state] = probability + else: + total_obs_belief_dict[belief_obs][state] += probability + + support = tuple(list(belief.keys())) + if support not in support_dict.keys(): + support_dict[support] = 1 + else: + support_dict[support] += 1 + + average_obs_belief_dict = {} + + for obs, total_dict in total_obs_belief_dict.items(): + total_value = sum(list(total_dict.values())) + average_obs_belief_dict[obs] = {} + for state, state_total_val in total_dict.items(): + average_obs_belief_dict[obs][state] = state_total_val / total_value + + percent_1 = round(len(self.beliefs)/100) + + sorted_obs = sorted(obs_dict, key=obs_dict.get) + sorted_support = sorted(support_dict, key=support_dict.get) + + main_observation_list = [obs for obs in sorted_obs if obs_dict[obs] > percent_1] + main_support_list = [sup for sup in sorted_support if support_dict[sup] > percent_1] + + residue_observation_list = [obs for obs in sorted_obs if obs not in main_observation_list] + residue_support_list = [sup for sup in sorted_support if sup not in main_support_list] + + self.main_obs_belief_data = main_observation_list + self.main_support_belief_data = main_support_list + self.residue_obs_belief_data = residue_observation_list + self.average_belief_data = average_obs_belief_dict + + + def compute_belief_value(self, belief, obs, fsc_values=[]): + best_value = None + + for values in fsc_values: + if obs >= len(values): + continue + for index, mem_values in enumerate(values[obs]): + vl = 0 + for state, prob in belief.items(): + if not state in mem_values.keys(): + break + vl += prob * mem_values[state] + else: + if best_value is None: + best_value = vl + elif (self.quotient.specification.optimality.minimizing and best_value > vl) or (not(self.quotient.specification.optimality.minimizing) and best_value < vl): + best_value = vl + + return best_value + + + def overapp_belief_value_analysis(self, number_of_beliefs): + + export_values = [x["synthesizer"].storm_control.paynt_export for x in self.enhanced_saynt_threads] + export_values.append(self.paynt_export) + + analysed_beliefs = 0 + + obs_differences = {} + obs_count = {} + belief_values_dif = {} + + for belief_id, belief in self.beliefs.items(): + if belief_id in self.belief_overapp_values.keys(): + analysed_beliefs += 1 + belief_obs = self.quotient.pomdp.get_observation(list(belief.keys())[0]) + belief_value = self.compute_belief_value(belief, belief_obs, export_values) + if belief_value is None: + continue + belief_values_dif[belief_id] = abs(self.belief_overapp_values[belief_id] - belief_value) + + if belief_obs not in obs_differences.keys(): + obs_count[belief_obs] = 1 + obs_differences[belief_obs] = abs(self.belief_overapp_values[belief_id] - belief_value) + else: + obs_count[belief_obs] += 1 + obs_differences[belief_obs] += abs(self.belief_overapp_values[belief_id] - belief_value) + + obs_values = {obs_key:obs_dif/obs_count[obs_key] for obs_key, obs_dif in obs_differences.items()} + sorted_obs_values = {k: v for k, v in sorted(obs_values.items(), key=lambda item: item[1], reverse=True)} + + sorted_belief_values_dif = {k: v for k, v in sorted(belief_values_dif.items(), key=lambda item: item[1], reverse=True)} + + obs_to_activate = [] + beliefs_to_activate = [] + + if number_of_beliefs == 0: + number_of_threads_to_activate = len(self.main_obs_belief_data) + if number_of_threads_to_activate <= 4: + number_of_threads_to_activate = 5 + else: + number_of_threads_to_activate = number_of_beliefs - 1 + + for obs in sorted_obs_values.keys(): + obs_to_activate.append(obs) + number_of_threads_to_activate -= 1 + if number_of_threads_to_activate <= 2: + break + + added_belief_obs = [self.quotient.pomdp.get_observation(list(self.beliefs[list(sorted_belief_values_dif.keys())[0]].keys())[0])] + beliefs_to_activate.append(list(sorted_belief_values_dif.keys())[0]) + number_of_threads_to_activate -= 1 + + for belief_id, _ in sorted_belief_values_dif.items(): + if number_of_threads_to_activate > 1 and belief_id not in beliefs_to_activate: + added_belief_obs.append(self.quotient.pomdp.get_observation(list(self.beliefs[belief_id].keys())[0])) + beliefs_to_activate.append(belief_id) + number_of_threads_to_activate -= 1 + continue + if self.quotient.pomdp.get_observation(list(self.beliefs[belief_id].keys())[0]) not in added_belief_obs: + beliefs_to_activate.append(belief_id) + break + + self.activate_threads(obs_to_activate, beliefs_to_activate) + + print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") + print(sorted_obs_values) + my_iter = 0 + for belief_id, diff in sorted_belief_values_dif.items(): + print(belief_id, self.quotient.pomdp.get_observation(list(self.beliefs[belief_id].keys())[0]), diff) + my_iter += 1 + if my_iter == 20: + break + print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") # help function for cut-off parsing, returns list of actions for given choice_string @@ -507,7 +792,10 @@ def parse_paynt_result(self, quotient): if name.startswith('M'): continue name = name.strip('A()') - obs = name.split(',')[0] + if not paynt.quotient.pomdp.PomdpQuotient.posterior_aware: + obs = name.split(',')[0] + else: + obs = name.split(',')[1] observation = self.quotient.observation_labels.index(obs) option = self.latest_paynt_result.hole_options(hole)[0] @@ -617,6 +905,39 @@ def is_memory_needed(self): memory_needed = True break return memory_needed + + def deactivate_threads(self): + for thread in self.enhanced_saynt_threads: + thread["active"] = False + + def activate_threads(self, obs_threads, belief_threads): + logger.info(f"activating observation threads: {obs_threads}") + + for obs in obs_threads: + for thread in self.enhanced_saynt_threads: + thread_type = thread["type"].split('_') + thread_val = thread_type[1] + thread_type = thread_type[0] + if thread_type == "obs" and int(thread_val) == obs: + thread["active"] = True + break + else: + self.create_thread_control(obs, "obs", self.use_uniform_obs_beliefs) + + logger.info(f"activating belief threads: {belief_threads}") + + for belief in belief_threads: + for thread in self.enhanced_saynt_threads: + thread_type = thread["type"].split('_') + thread_val = thread_type[1] + thread_type = thread_type[0] + if thread_type == "belief" and int(thread_val) == belief: + thread["active"] = True + break + else: + self.create_thread_control(belief, "belief", self.use_uniform_obs_beliefs) + + # update all of the important data structures according to the current results def update_data(self): @@ -674,8 +995,6 @@ def get_belief_controller_size(self, storm_result, paynt_fsc_size=None): for state in belief_mc.states: if 'cutoff' not in state.labels: non_frontier_states += 1 - elif 'finite_mem' in state.labels and not uses_fsc: - uses_fsc = True else: for label in state.labels: if 'sched_' in label: @@ -683,11 +1002,18 @@ def get_belief_controller_size(self, storm_result, paynt_fsc_size=None): if int(scheduler_index) in used_randomized_schedulers: continue used_randomized_schedulers.append(int(scheduler_index)) + if 'finite_mem' in label: + uses_fsc = True if uses_fsc: # Compute the size of FSC if paynt_fsc_size: fsc_size = paynt_fsc_size + for fsc_index in self.storm_fsc_usage.keys(): + if fsc_index == 0: + continue + fsc_size += self.enhanced_saynt_threads[fsc_index-1]["synthesizer"].storm_control.paynt_fsc_size + for index in used_randomized_schedulers: observation_actions = {x:[] for x in range(self.quotient.observations)} @@ -701,6 +1027,6 @@ def get_belief_controller_size(self, storm_result, paynt_fsc_size=None): observation_actions[observation].append(action) randomized_schedulers_size += sum(list([len(support) for support in observation_actions.values()])) * 3 - result_size = non_frontier_states + belief_mc.nr_transitions + fsc_size + randomized_schedulers_size + result_size = non_frontier_states + 2*belief_mc.nr_transitions + fsc_size + randomized_schedulers_size return result_size diff --git a/paynt/synthesizer/statistic.py b/paynt/synthesizer/statistic.py index 89090e5b9..1760b7b8e 100644 --- a/paynt/synthesizer/statistic.py +++ b/paynt/synthesizer/statistic.py @@ -152,6 +152,7 @@ def status(self): def print_status(self): if not self.synthesis_timer.read() > self.status_horizon: return + print(self.status(), flush=True) self.status_horizon = self.synthesis_timer.read() + Statistic.status_period_seconds diff --git a/paynt/synthesizer/synthesizer_ar_storm.py b/paynt/synthesizer/synthesizer_ar_storm.py index fb25afdaf..0e1aa7b0a 100644 --- a/paynt/synthesizer/synthesizer_ar_storm.py +++ b/paynt/synthesizer/synthesizer_ar_storm.py @@ -16,18 +16,20 @@ class SynthesizerARStorm(Synthesizer): # family exploration order: True = DFS, False = BFS exploration_order_dfs = True - # buffer containing subfamilies to be checked after the main restricted family - subfamilies_buffer = None - - main_family = None - # if True, Storm over-approximation will be run to help with family pruning storm_pruning = False - storm_control = None - s_queue = None - saynt_timer = None + def __init__(self, quotient, subfamilies_buffer=None, main_family=None, storm_control=None, s_queue=None, saynt_timer=None, main_thread=True): + super().__init__(quotient) + # buffer containing subfamilies to be checked after the main restricted family + self.subfamilies_buffer = subfamilies_buffer + self.main_family = main_family + self.storm_control = storm_control + self.s_queue = s_queue + self.saynt_timer = saynt_timer + self.main_thread = main_thread + @property def method_name(self): @@ -80,7 +82,7 @@ def verify_family(self, family): family.analysis_result = res if family.analysis_result.improving_value is not None: - if self.saynt_timer is not None: + if self.saynt_timer is not None and self.main_thread: print(f'-----------PAYNT----------- \ \nValue = {family.analysis_result.improving_value} | Time elapsed = {round(self.saynt_timer.read(),1)}s | FSC size = {self.quotient.policy_size(family.analysis_result.improving_assignment)}\n', flush=True) if self.storm_control.export_fsc_paynt is not None: @@ -134,13 +136,14 @@ def synthesize_one(self, family): self.storm_control.paynt_export = self.quotient.extract_policy(satisfying_assignment) self.storm_control.paynt_bounds = self.quotient.specification.optimality.optimum self.storm_control.paynt_fsc_size = self.quotient.policy_size(self.storm_control.latest_paynt_result) + self.storm_control.parse_paynt_result(self.quotient) self.storm_control.update_data() logger.info("Pausing synthesis") self.s_queue.get() self.stat.synthesis_timer.stop() # check for the signal that PAYNT can be resumed or terminated while self.s_queue.empty(): - sleep(1) + sleep(0.5) status = self.s_queue.get() if status == "resume": logger.info("Resuming synthesis") diff --git a/paynt/synthesizer/synthesizer_pomdp.py b/paynt/synthesizer/synthesizer_pomdp.py index fdc891496..ca9f10918 100644 --- a/paynt/synthesizer/synthesizer_pomdp.py +++ b/paynt/synthesizer/synthesizer_pomdp.py @@ -1,4 +1,5 @@ import stormpy +import payntbind from .statistic import Statistic import paynt.synthesizer.synthesizer_ar @@ -40,6 +41,9 @@ def __init__(self, quotient, method, storm_control): self.synthesizer = SynthesizerHybrid self.total_iters = 0 + self.storm_control = None + self.main_synthesizer = True + if storm_control is not None: self.use_storm = True self.storm_control = storm_control @@ -48,16 +52,22 @@ def __init__(self, quotient, method, storm_control): self.storm_control.spec_formulas = self.quotient.specification.stormpy_formulae() self.synthesis_terminate = False self.synthesizer = SynthesizerARStorm # SAYNT only works with abstraction refinement + self.saynt_timer = Timer() + self.interactive_queue = Queue() if self.storm_control.iteration_timeout is not None: - self.saynt_timer = Timer() - self.synthesizer.saynt_timer = self.saynt_timer self.storm_control.saynt_timer = self.saynt_timer + if self.storm_control.enhanced_saynt is not None: + self.storm_control.sub_pomdp_builder = payntbind.synthesis.SubPomdpBuilder(self.quotient.pomdp) - def synthesize(self, family, print_stats=True): - synthesizer = self.synthesizer(self.quotient) + def synthesize(self, family, print_stats=True, main_family=None, subfamilies=None): + if self.storm_control is not None: + synthesizer = self.synthesizer(self.quotient, subfamilies_buffer=subfamilies, main_family=main_family, storm_control=self.storm_control, s_queue=self.interactive_queue, saynt_timer=self.saynt_timer, main_thread=self.main_synthesizer) + else: + synthesizer = self.synthesizer(self.quotient) family.constraint_indices = self.quotient.design_space.constraint_indices assignment = synthesizer.synthesize(family, keep_optimum=True, print_stats=print_stats) - self.total_iters += synthesizer.stat.iterations_mdp + if synthesizer.stat.iterations_mdp is not None: + self.total_iters += synthesizer.stat.iterations_mdp return assignment # iterative strategy using Storm analysis to enhance the synthesis @@ -67,8 +77,6 @@ def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): ''' mem_size = paynt.quotient.pomdp.PomdpQuotient.initial_memory_size - self.synthesizer.storm_control = self.storm_control - while True: # for x in range(2): @@ -139,10 +147,7 @@ def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): main_family = family subfamilies = [] - self.synthesizer.subfamilies_buffer = subfamilies - self.synthesizer.main_family = main_family - - assignment = self.synthesize(family) + assignment = self.synthesize(family, subfamilies=subfamilies, main_family=main_family) if assignment is not None: self.storm_control.latest_paynt_result = assignment @@ -152,6 +157,7 @@ def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): self.storm_control.update_data() + logger.info(f"Terminate: {self.synthesis_terminate}") if self.synthesis_terminate: break @@ -161,8 +167,6 @@ def strategy_iterative_storm(self, unfold_imperfect_only, unfold_storm=True): # main SAYNT loop def iterative_storm_loop(self, timeout, paynt_timeout, storm_timeout, iteration_limit=0): - self.interactive_queue = Queue() - self.synthesizer.s_queue = self.interactive_queue self.storm_control.interactive_storm_setup() iteration = 1 paynt_thread = Thread(target=self.strategy_iterative_storm, args=(True, self.storm_control.unfold_storm)) @@ -217,10 +221,176 @@ def iterative_storm_loop(self, timeout, paynt_timeout, storm_timeout, iteration_ self.saynt_timer.stop() + + # enhanced SAYNT loop + def enhanced_iterative_storm_loop(self, timeout, paynt_timeout, storm_timeout, number_of_beliefs=0, iteration_limit=0): + self.storm_control.interactive_storm_setup() + iteration = 1 + paynt_thread = Thread(target=self.strategy_iterative_storm, args=(True, self.storm_control.unfold_storm)) + + iteration_timeout = time.time() + timeout + + self.saynt_timer.start() + + while True: + if iteration == 1: + self.storm_control.interactive_storm_start(storm_timeout, True) + self.storm_control.parse_belief_data() + + self.storm_control.belief_explorer.add_fsc_values(self.storm_control.paynt_export) + + if False: + obs = self.quotient.observation_labels.index("[goal=0\t& in=1\t& out=0\t& switch=0]") + obs_states = [] + + for state in self.quotient.pomdp.states: + state_obs = self.quotient.pomdp.get_observation(state) + if state_obs == obs: + obs_states.append(state.id) + + for state in obs_states: + self.storm_control.create_thread_control({state: 1}, "custom", self.storm_control.use_uniform_obs_beliefs) + + else: + if number_of_beliefs == 0: + beliefs_remaining = len(self.storm_control.main_obs_belief_data) + # beliefs_remaining = len(self.storm_control.main_support_belief_data) + else: + beliefs_remaining = number_of_beliefs - 1 + if beliefs_remaining != 0: + for index, belief_type_data in enumerate([self.storm_control.main_obs_belief_data, self.storm_control.residue_obs_belief_data]): + # for index, belief_type_data in enumerate([self.storm_control.main_support_belief_data]): + index_type = "obs" if index in [0,1] else "sup" + # index_type = "sup" + for obs_or_sup in belief_type_data: + self.storm_control.create_thread_control(obs_or_sup, index_type, self.storm_control.use_uniform_obs_beliefs) + beliefs_remaining -= 1 + if beliefs_remaining == 0: + break + if beliefs_remaining == 0: + break + + number_of_threads = len(self.storm_control.enhanced_saynt_threads) + logger.info(f"{number_of_threads} threads for enhanced SAYNT created") + active_beliefs = len([True for x in self.storm_control.enhanced_saynt_threads if x["active"]]) + 1 + logger.info(f"number of currently active threads: {active_beliefs}") + logger.info(f"new synthesis time per thread: {paynt_timeout/active_beliefs}s") + + paynt_thread.start() + + logger.info("Timeout for PAYNT started") + time.sleep(paynt_timeout/active_beliefs) + self.interactive_queue.put("timeout") + + while not self.interactive_queue.empty(): + time.sleep(0.5) + + self.storm_control.belief_explorer.set_fsc_values(self.storm_control.paynt_export, 0) + + for index, belief_thread in enumerate(self.storm_control.enhanced_saynt_threads): + if belief_thread["active"]: + logger.info(f'starting synthesis for {belief_thread["type"]}') + belief_thread["synthesizer"].interactive_queue.put("resume") + time.sleep(paynt_timeout/active_beliefs) + belief_thread["synthesizer"].interactive_queue.put("timeout") + while not belief_thread["synthesizer"].interactive_queue.empty(): + time.sleep(0.5) + + export_full = [] + for mem_export in belief_thread["synthesizer"].storm_control.paynt_export: + one_memory = [] + for val_export in mem_export: + full_pomdp_values = {belief_thread["state_map"][mem_export]:val for mem_export, val in val_export.items()} + one_memory.append(full_pomdp_values) + export_full.append(one_memory) + + self.storm_control.belief_explorer.set_fsc_values(export_full, index+1) + else: + self.storm_control.interactive_storm_resume(storm_timeout) + self.storm_control.parse_belief_data() + + if self.storm_control.dynamic_thread_timeout: + if self.storm_control.saynt_overapprox: + self.storm_control.deactivate_threads() + self.storm_control.overapp_belief_value_analysis(number_of_beliefs) + else: + # TODO some other dynamic timeout method + pass + + logger.info(f"{len(self.storm_control.enhanced_saynt_threads) - number_of_threads} new threads for enhanced SAYNT created") + logger.info(f"total number of created threads: {len(self.storm_control.enhanced_saynt_threads)}") + number_of_threads = len(self.storm_control.enhanced_saynt_threads) + active_beliefs = len([True for x in self.storm_control.enhanced_saynt_threads if x["active"]]) + 1 + logger.info(f"number of currently active threads: {active_beliefs}") + logger.info(f"new synthesis time per thread: {paynt_timeout/active_beliefs}s") + + self.interactive_queue.put("resume") + time.sleep(paynt_timeout/active_beliefs) + self.interactive_queue.put("timeout") + + while not self.interactive_queue.empty(): + time.sleep(0.5) + + for index, belief_thread in enumerate(self.storm_control.enhanced_saynt_threads): + if belief_thread["active"]: + logger.info(f'starting synthesis for {belief_thread["type"]}') + belief_thread["synthesizer"].interactive_queue.put("resume") + time.sleep(paynt_timeout/active_beliefs) + belief_thread["synthesizer"].interactive_queue.put("timeout") + while not belief_thread["synthesizer"].interactive_queue.empty(): + time.sleep(0.5) + + export_full = [] + for mem_export in belief_thread["synthesizer"].storm_control.paynt_export: + one_memory = [] + for val_export in mem_export: + full_pomdp_values = {belief_thread["state_map"][mem_export]:val for mem_export, val in val_export.items()} + one_memory.append(full_pomdp_values) + export_full.append(one_memory) + + self.storm_control.belief_explorer.set_fsc_values(export_full, index+1) + + # compute sizes of controllers + self.storm_control.belief_controller_size = self.storm_control.get_belief_controller_size(self.storm_control.latest_storm_result, self.storm_control.paynt_fsc_size) + + print("\n------------------------------------\n") + print("PAYNT results: ") + print(self.storm_control.paynt_bounds) + print("controller size: {}".format(self.storm_control.paynt_fsc_size)) + + print() + + print("Storm results: ") + print(self.storm_control.storm_bounds) + print("controller size: {}".format(self.storm_control.belief_controller_size)) + print("\n------------------------------------\n") + + print(f"used FSCs: {self.storm_control.storm_fsc_usage}") + print(f"FSCs used count: {self.storm_control.total_fsc_used}") + + if time.time() > iteration_timeout or iteration == iteration_limit: + break + + iteration += 1 + + for belief_thread in self.storm_control.enhanced_saynt_threads: + belief_thread["synthesizer"].interactive_queue.put("terminate") + belief_thread["synthesizer"].synthesis_terminate = True + belief_thread["thread"].join() + + self.interactive_queue.put("terminate") + self.synthesis_terminate = True + paynt_thread.join() + + self.storm_control.interactive_storm_terminate() + + self.saynt_timer.stop() + + + + # run PAYNT POMDP synthesis with a given timeout def run_synthesis_timeout(self, timeout): - self.interactive_queue = Queue() - self.synthesizer.s_queue = self.interactive_queue paynt_thread = Thread(target=self.strategy_iterative_storm, args=(True, False)) iteration_timeout = time.time() + timeout paynt_thread.start() @@ -229,7 +399,7 @@ def run_synthesis_timeout(self, timeout): if time.time() > iteration_timeout: break - time.sleep(1) + time.sleep(0.1) self.interactive_queue.put("pause") self.interactive_queue.put("terminate") @@ -244,8 +414,6 @@ def strategy_storm(self, unfold_imperfect_only, unfold_storm=True): ''' mem_size = paynt.quotient.pomdp.PomdpQuotient.initial_memory_size - self.synthesizer.storm_control = self.storm_control - while True: # for x in range(2): @@ -318,10 +486,7 @@ def strategy_storm(self, unfold_imperfect_only, unfold_storm=True): main_family = family subfamilies = [] - self.synthesizer.subfamilies_buffer = subfamilies - self.synthesizer.main_family = main_family - - assignment = self.synthesize(family) + assignment = self.synthesize(family, subfamilies=subfamilies, main_family=main_family) if assignment is not None: self.storm_control.latest_paynt_result = assignment @@ -336,7 +501,7 @@ def strategy_storm(self, unfold_imperfect_only, unfold_storm=True): #break - def strategy_iterative(self, unfold_imperfect_only): + def strategy_iterative(self, unfold_imperfect_only, max_memory=None): ''' @param unfold_imperfect_only if True, only imperfect observations will be unfolded ''' @@ -360,6 +525,9 @@ def strategy_iterative(self, unfold_imperfect_only): # break mem_size += 1 + if max_memory is not None and mem_size > max_memory: + break + #break def solve_mdp(self, family): @@ -580,16 +748,23 @@ def run(self, optimum_threshold=None, export_evaluation=None): # choose the synthesis strategy: if self.use_storm: logger.info("Storm POMDP option enabled") - logger.info("Storm settings: iterative - {}, get_storm_result - {}, storm_options - {}, prune_storm - {}, unfold_strategy - {}, use_storm_cutoffs - {}".format( + logger.info("Storm settings: iterative - {}, get_storm_result - {}, storm_options - {}, prune_storm - {}, unfold_strategy - {}, use_storm_cutoffs - {}, enhanced_saynt - {}, saynt_overapprox - {}".format( (self.storm_control.iteration_timeout, self.storm_control.paynt_timeout, self.storm_control.storm_timeout), self.storm_control.get_result, - self.storm_control.storm_options, self.storm_control.incomplete_exploration, (self.storm_control.unfold_storm, self.storm_control.unfold_cutoff), self.storm_control.use_cutoffs + self.storm_control.storm_options, self.storm_control.incomplete_exploration, (self.storm_control.unfold_storm, self.storm_control.unfold_cutoff), self.storm_control.use_cutoffs, self.storm_control.enhanced_saynt, self.storm_control.saynt_overapprox )) # start SAYNT if self.storm_control.iteration_timeout is not None: - self.iterative_storm_loop(timeout=self.storm_control.iteration_timeout, - paynt_timeout=self.storm_control.paynt_timeout, - storm_timeout=self.storm_control.storm_timeout, - iteration_limit=0) + if self.storm_control.enhanced_saynt is None: + self.iterative_storm_loop(timeout=self.storm_control.iteration_timeout, + paynt_timeout=self.storm_control.paynt_timeout, + storm_timeout=self.storm_control.storm_timeout, + iteration_limit=0) + else: + self.enhanced_iterative_storm_loop(timeout=self.storm_control.iteration_timeout, + paynt_timeout=self.storm_control.paynt_timeout, + storm_timeout=self.storm_control.storm_timeout, + number_of_beliefs=self.storm_control.enhanced_saynt, + iteration_limit=0) # run PAYNT for a time given by 'self.storm_control.get_result' and then run Storm using the best computed FSC at cut-offs elif self.storm_control.get_result is not None: if self.storm_control.get_result: diff --git a/paynt/verification/alpha_vector_verification.py b/paynt/verification/alpha_vector_verification.py new file mode 100644 index 000000000..aa2ae5006 --- /dev/null +++ b/paynt/verification/alpha_vector_verification.py @@ -0,0 +1,29 @@ + +import payntbind +import stormpy +from paynt.parser.alpha_vector_parser import AlphaVectorSet + +import logging +logger = logging.getLogger(__name__) + + +class AlphaVectorVerification: + + def __init__(self, pomdp_quotient): + self.quotient = pomdp_quotient + + # calls belief unrolling in Storm with the given set of alpha vectors and verifies them with respect to the undiscounted property + # this verification assumes the alpha vectors were obtained on a cassandra pomdp generated from the model using the --export pomdp functionality + def verify_alpha_vectors(self, alpha_vector_set): + + number_of_belief_threshold = 1000000 # number of beliefs to be unfolded before cut-offs are applied + dummy_value = 20 # for reward specifications BeliefMCExplorer uses this value instead of Storm cut-offs, can be useful when getting inf/-inf + + alpha_vector_bind = payntbind.synthesis.AlphaVectorsSet(alpha_vector_set.alpha_vectors, alpha_vector_set.alpha_vector_actions) + + alpha_vector_explorer = payntbind.synthesis.BeliefMCExplorer(self.quotient.pomdp, number_of_belief_threshold) + + result = alpha_vector_explorer.check_alpha_vectors(self.quotient.specification.stormpy_formulae()[0], alpha_vector_bind) + + logger.info(f"Alpha vector verified value: {result.upper_bound}") + logger.info(f"explored MC number of states: {result.induced_mc_from_scheduler.nr_states}") \ No newline at end of file diff --git a/paynt/verification/property.py b/paynt/verification/property.py index d4bf303bf..c8074f218 100644 --- a/paynt/verification/property.py +++ b/paynt/verification/property.py @@ -25,6 +25,14 @@ def construct_reward_property(reward_name, minimizing, target_label): optimality = OptimalityProperty(formula, 0) return optimality +# discounted reward property supported by Storm +def construct_discount_property(reward_name, minimizing, discount_factor): + direction = "min" if minimizing else "max" + formula_str = 'R{"' + reward_name + '"}' + direction + '=? [C{' + str(discount_factor) + '}]' + formula = stormpy.parse_properties_without_context(formula_str)[0] + optimality = OptimalityProperty(formula, 0) + return optimality + class Property: ''' Wrapper over a stormpy property. ''' diff --git a/payntbind/src/synthesis/helpers.cpp b/payntbind/src/synthesis/helpers.cpp index 8040944c3..5a69a1df8 100644 --- a/payntbind/src/synthesis/helpers.cpp +++ b/payntbind/src/synthesis/helpers.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include namespace synthesis { @@ -42,6 +44,52 @@ void removeRewardModel(storm::models::sparse::Model & model, std::str model.removeRewardModel(reward_name); } +// applies discount model transformation to a given DTMC +// currently used for discounted expected visits +template +std::shared_ptr> applyDiscountTransformationToDtmc(storm::models::sparse::Dtmc &model, double discount_factor) { + storm::storage::sparse::ModelComponents components; + + auto dtmcNumberOfStates = model.getNumberOfStates(); + + // labeling + storm::models::sparse::StateLabeling stateLabeling(dtmcNumberOfStates+1); + storm::storage::BitVector init_flags = model.getInitialStates(); + init_flags.resize(dtmcNumberOfStates+1, false); + stateLabeling.addLabel("init", std::move(init_flags)); + storm::storage::BitVector discount_flag(dtmcNumberOfStates+1, false); + discount_flag.set(dtmcNumberOfStates, true); + stateLabeling.addLabel("discount_sink", std::move(discount_flag)); + components.stateLabeling = stateLabeling; + + // transition matrix + storm::storage::SparseMatrix const& transitionMatrix = model.getTransitionMatrix(); + storm::storage::SparseMatrixBuilder builder; + for (uint_fast64_t state = 0; state < dtmcNumberOfStates; state++) { + // this function was created for cassandra models where we don't want to discount the first transition + // maybe it's not needed? TODO + if (state == 0) { + for (auto entry: transitionMatrix.getRow(state)) { + builder.addNextValue(state, entry.getColumn(), entry.getValue()); + } + builder.addNextValue(state, dtmcNumberOfStates, 0); + } + else { + for (auto entry: transitionMatrix.getRow(state)) { + builder.addNextValue(state, entry.getColumn(), entry.getValue() * discount_factor); + } + builder.addNextValue(state, dtmcNumberOfStates, 1-discount_factor); + } + } + for (uint_fast64_t state = 0; state < dtmcNumberOfStates; state++) { + builder.addNextValue(dtmcNumberOfStates, state, 0); + } + builder.addNextValue(dtmcNumberOfStates, dtmcNumberOfStates, 1); + components.transitionMatrix = builder.build(); + + return std::make_shared>(std::move(components)); +} + } @@ -59,6 +107,8 @@ void define_helpers(py::module& m) { m.def("transform_until_to_eventually", &synthesis::transformUntilToEventually, py::arg("formula")); m.def("remove_reward_model", &synthesis::removeRewardModel, py::arg("model"), py::arg("reward_name")); + m.def("apply_discount_transformation_to_dtmc", &synthesis::applyDiscountTransformationToDtmc, py::arg("model"), py::arg("discount_factor")); + m.def("multiply_with_vector", [] (storm::storage::SparseMatrix matrix,std::vector vector) { std::vector result(matrix.getRowCount()); matrix.multiplyWithVector(vector, result); diff --git a/payntbind/src/synthesis/pomdp/BeliefMCExplorer.cpp b/payntbind/src/synthesis/pomdp/BeliefMCExplorer.cpp new file mode 100644 index 000000000..bb1fcc5c2 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/BeliefMCExplorer.cpp @@ -0,0 +1,297 @@ +// Most of the code taken from Alex Bork + +#include "BeliefMCExplorer.h" +#include +#include +#include +#include +#include +#include + +namespace synthesis { + + template + BeliefMCExplorer::BeliefMCExplorer(std::shared_ptr pomdp, uint64_t const& sizeThreshold, double const& dummyCutoffValue, bool trivialCutOff) + : inputPomdp(pomdp) { + this->precision = 1e-12; + this->sizeThreshold = sizeThreshold; + this->dummyCutoffValue = dummyCutoffValue; + this->trivialCutOff = trivialCutOff; + } + + template + typename BeliefMCExplorer::Result BeliefMCExplorer::checkAlphaVectors(storm::logic::Formula const& formula, AlphaVectorSet const& alphaVectorSet) { + storm::Environment env; + return checkAlphaVectors(formula, alphaVectorSet, env); + } + + template + typename BeliefMCExplorer::Result BeliefMCExplorer::checkAlphaVectors(storm::logic::Formula const& formula, AlphaVectorSet const& alphaVectorSet, storm::Environment const& env) { + STORM_PRINT_AND_LOG("Start checking the MC induced by the alpha vector policy...\n") + auto formulaInfo = storm::pomdp::analysis::getFormulaInformation(pomdp(), formula); + std::optional rewardModelName; + std::set targetObservations; + if (formulaInfo.isNonNestedReachabilityProbability() || formulaInfo.isNonNestedExpectedRewardFormula()) { + if (formulaInfo.getTargetStates().observationClosed) { + targetObservations = formulaInfo.getTargetStates().observations; + } else { + storm::transformer::MakeStateSetObservationClosed obsCloser(inputPomdp); + std::tie(preprocessedPomdp, targetObservations) = obsCloser.transform(formulaInfo.getTargetStates().states); + } + if (formulaInfo.isNonNestedReachabilityProbability()) { + if (!formulaInfo.getSinkStates().empty()) { + storm::storage::sparse::ModelComponents components; + components.stateLabeling = pomdp().getStateLabeling(); + components.rewardModels = pomdp().getRewardModels(); + auto matrix = pomdp().getTransitionMatrix(); + matrix.makeRowGroupsAbsorbing(formulaInfo.getSinkStates().states); + components.transitionMatrix = matrix; + components.observabilityClasses = pomdp().getObservations(); + if(pomdp().hasChoiceLabeling()){ + components.choiceLabeling = pomdp().getChoiceLabeling(); + } + if(pomdp().hasObservationValuations()){ + components.observationValuations = pomdp().getObservationValuations(); + } + preprocessedPomdp = std::make_shared(std::move(components), true); + auto reachableFromSinkStates = storm::utility::graph::getReachableStates(pomdp().getTransitionMatrix(), formulaInfo.getSinkStates().states, formulaInfo.getSinkStates().states, ~formulaInfo.getSinkStates().states); + reachableFromSinkStates &= ~formulaInfo.getSinkStates().states; + STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); + } + } else { + // Expected reward formula! + rewardModelName = formulaInfo.getRewardModelName(); + } + } else { + STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Unsupported formula '" << formula << "'."); + } + + // Compute bound using the underlying MDP and a simple policy + auto underlyingMdp = std::make_shared>(pomdp().getTransitionMatrix(), pomdp().getStateLabeling(), pomdp().getRewardModels()); + auto resultPtr = storm::api::verifyWithSparseEngine(underlyingMdp, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + std::vector fullyObservableResult = std::move(resultPtr->template asExplicitQuantitativeCheckResult().getValueVector()); + + if(formulaInfo.minimize()){ + pomdpValueBounds.trivialPomdpValueBounds.lower.push_back(fullyObservableResult); + } else { + pomdpValueBounds.trivialPomdpValueBounds.upper.push_back(fullyObservableResult); + } + + // For cut-offs, we need to provide values. To not skew result too much, we always take the first enabled action + storm::storage::Scheduler pomdpScheduler(pomdp().getNumberOfStates()); + for(uint64_t state = 0; state < pomdp().getNumberOfStates(); ++state) { + pomdpScheduler.setChoice(0, state); + } + + // Model check the DTMC resulting from the policy + auto scheduledModel = underlyingMdp->applyScheduler(pomdpScheduler, false); + resultPtr = storm::api::verifyWithSparseEngine(scheduledModel, storm::api::createTask(formula.asSharedPointer(), false)); + STORM_LOG_THROW(resultPtr, storm::exceptions::UnexpectedException, "No check result obtained."); + STORM_LOG_THROW(resultPtr->isExplicitQuantitativeCheckResult(), storm::exceptions::UnexpectedException, "Unexpected Check result Type"); + auto cutoffVec = resultPtr->template asExplicitQuantitativeCheckResult().getValueVector(); + if(formulaInfo.minimize()){ + pomdpValueBounds.trivialPomdpValueBounds.upper.push_back(cutoffVec); + } else { + pomdpValueBounds.trivialPomdpValueBounds.lower.push_back(cutoffVec); + } + + auto manager = std::make_shared(pomdp(), storm::utility::convertNumber(precision), BeliefManagerType::TriangulationMode::Static); + if (rewardModelName) { + manager->setRewardModel(rewardModelName); + } + + auto explorer = std::make_shared(manager, pomdpValueBounds.trivialPomdpValueBounds); + exploreMC(targetObservations, formulaInfo.minimize(), rewardModelName.has_value(), manager, explorer, cutoffVec, alphaVectorSet, env); + + STORM_LOG_ASSERT(explorer->hasComputedValues(), "Values for MC were not computed"); + + double mcValue = explorer->getComputedValueAtInitialState(); + MCExplorationResult.updateUpperBound(mcValue); + MCExplorationResult.updateLowerBound(mcValue); + MCExplorationResult.schedulerAsMarkovChain = explorer->getExploredMdp(); + + return MCExplorationResult; + } + + + + template + bool BeliefMCExplorer::exploreMC(const std::set &targetObservations, bool min, bool computeRewards, std::shared_ptr &beliefManager, std::shared_ptr &beliefExplorer, std::vector const &cutoffVec, AlphaVectorSet const& alphaVectorSet, storm::Environment const& env){ + if (computeRewards) { + beliefExplorer->startNewExploration(storm::utility::zero()); + } else { + beliefExplorer->startNewExploration(storm::utility::one(), storm::utility::zero()); + } + + bool useDummyValues = false; + if (dummyCutoffValue != std::numeric_limits::infinity()) { + useDummyValues = true; + } + + //TODO use timelimit + bool fixPoint = true; + bool timeLimitExceeded = false; + bool stopExploration = false; + //TODO stopping criterion + while (beliefExplorer->hasUnexploredState()) { + if (false) { + STORM_LOG_INFO("Exploration time limit exceeded."); + timeLimitExceeded = true; + } + + uint64_t currId = beliefExplorer->exploreNextState(); + // std::cout << "exploring " << currId << std::endl; + // std::cout << "{"; + // for (auto const &pointEntry : beliefManager->getBeliefAsMap(currId)) { + // std::cout << pointEntry.first << " : " << pointEntry.second << ", "; + // } + // std::cout << "}" << std::endl; + + + if (timeLimitExceeded) { + fixPoint = false; + } + if (targetObservations.count(beliefManager->getBeliefObservation(currId)) != 0) { + // std::cout << "adding target" << std::endl; + beliefExplorer->setCurrentStateIsTarget(); + beliefExplorer->addSelfloopTransition(); + beliefExplorer->addChoiceLabelToCurrentState(0, "loop"); + } else { + if (timeLimitExceeded || beliefExplorer->getCurrentNumberOfMdpStates() >= sizeThreshold /*&& !statistics.beliefMdpDetectedToBeFinite*/) { + stopExploration = true; + beliefExplorer->setCurrentStateIsTruncated(); + } + if(!stopExploration) { + auto numberOfLocalChoices = beliefManager->getBeliefNumberOfChoices(currId); + // if (numberOfLocalChoices == 1) { + // auto successors = beliefManager->expand(currId, 0); + // for (auto const &successor : successors) { + // bool added = beliefExplorer->addTransitionToBelief(0, successor.first, successor.second, stopExploration); + // STORM_LOG_ASSERT(added, "transition was supposed to be added"); + // } + // continue; + // } + // determine the best action from the alpha vectors + auto chosenLocalActionIndex = getBestActionInBelief(currId, beliefManager, beliefExplorer, alphaVectorSet); + // std::cout << "chosen action index " << chosenLocalActionIndex << std::endl; + // if action is not in the model, choose the first action + if(chosenLocalActionIndex >= numberOfLocalChoices){ + // std::cout << "changing action to 0" << std::endl; + // if we are computing rewards apply cut-offs, otherwise add self-loop + // if (computeRewards) { + // auto cutOffValue = beliefManager->getWeightedSum(currId, cutoffVec); + // beliefExplorer->addTransitionsToExtraStates(0, storm::utility::one()); + // if (useDummyValues) { + // beliefExplorer->addRewardToCurrentState(0, dummyCutoffValue); + // } else { + // beliefExplorer->addRewardToCurrentState(0, cutOffValue); + // } + // if(pomdp().hasChoiceLabeling()){ + // beliefExplorer->addChoiceLabelToCurrentState(0, "cutoff"); + // } + // } else { + // beliefExplorer->addSelfloopTransition(); + // beliefExplorer->addChoiceLabelToCurrentState(0, "loop"); + // } + chosenLocalActionIndex = 0; + } + // Add successor transitions for the chosen action + auto truncationProbability = storm::utility::zero(); + auto truncationValueBound = storm::utility::zero();\ + auto successors = beliefManager->expand(currId, chosenLocalActionIndex); + for (auto const &successor : successors) { + // std::cout << "adding successor " << successor.first << " with probability " << successor.second << std::endl; + bool added = beliefExplorer->addTransitionToBelief(0, successor.first, successor.second, stopExploration); + if (!added) { + STORM_LOG_ASSERT(stopExploration, "Didn't add a transition although exploration shouldn't be stopped."); + // We did not explore this successor state. Get a bound on the "missing" value + truncationProbability += successor.second; + truncationValueBound += successor.second * (min ? beliefExplorer->computeUpperValueBoundAtBelief(successor.first) + : beliefExplorer->computeLowerValueBoundAtBelief(successor.first)); + } + } + if (computeRewards) { + // The truncationValueBound will be added on top of the reward introduced by the current belief state. + beliefExplorer->addRewardToCurrentState(0, beliefManager->getBeliefActionReward(currId, chosenLocalActionIndex) + truncationValueBound); + } + } else { + //When we stop, we apply simple cut-offs + auto cutOffValue = beliefManager->getWeightedSum(currId, cutoffVec); + if (computeRewards) { + beliefExplorer->addTransitionsToExtraStates(0, storm::utility::one()); + if (useDummyValues) { + beliefExplorer->addRewardToCurrentState(0, dummyCutoffValue); + } else { + beliefExplorer->addRewardToCurrentState(0, cutOffValue); + } + } else { + if (trivialCutOff) { + cutOffValue = 1; + } + beliefExplorer->addTransitionsToExtraStates(0, cutOffValue,storm::utility::one() - cutOffValue); + } + if(pomdp().hasChoiceLabeling()){ + beliefExplorer->addChoiceLabelToCurrentState(0, "cutoff"); + } + } + } + if (storm::utility::resources::isTerminate()) { + break; + } + } + + if (storm::utility::resources::isTerminate()) { + return false; + } + + beliefExplorer->finishExploration(); + STORM_PRINT_AND_LOG("Finished exploring Alpha Vector Policy induced Markov chain.\n Start analysis...\n"); + + beliefExplorer->computeValuesOfExploredMdp(env, min ? storm::solver::OptimizationDirection::Minimize : storm::solver::OptimizationDirection::Maximize); + // don't overwrite statistics of a previous, successful computation + return fixPoint; + } + + + + template + uint64_t BeliefMCExplorer::getBestActionInBelief(uint64_t beliefId, std::shared_ptr &beliefManager, std::shared_ptr &beliefExplorer, AlphaVectorSet const& alphaVectorSet) { + uint64_t bestAlphaIndex = 0; + double bestAlphaValue = 0; + auto belief = beliefManager->getBeliefAsMap(beliefId); + for (auto const &pointEntry : belief) { + if (alphaVectorSet.alphaVectors.at(0).size() <= pointEntry.first) { + return 0; + } + bestAlphaValue += alphaVectorSet.alphaVectors.at(0).at(pointEntry.first) * pointEntry.second; + } + for (uint64_t alphaIndex = 1; alphaIndex < alphaVectorSet.alphaVectors.size(); alphaIndex++ ) { + double alphaValue = 0; + for (auto const &pointEntry : belief) { + alphaValue += alphaVectorSet.alphaVectors.at(alphaIndex).at(pointEntry.first) * pointEntry.second; + } + + if (alphaValue > bestAlphaValue) { + bestAlphaValue = alphaValue; + bestAlphaIndex = alphaIndex; + } + } + return alphaVectorSet.alphaVectorActions.at(bestAlphaIndex); + } + + + + template + PomdpModelType const& BeliefMCExplorer::pomdp() const { + if (preprocessedPomdp) { + return *preprocessedPomdp; + } else { + return *inputPomdp; + } + } + + + template class BeliefMCExplorer>; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/BeliefMCExplorer.h b/payntbind/src/synthesis/pomdp/BeliefMCExplorer.h new file mode 100644 index 000000000..13373afa1 --- /dev/null +++ b/payntbind/src/synthesis/pomdp/BeliefMCExplorer.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace synthesis { + + class AlphaVectorSet { + using AlphaVector = std::vector; + + public: + + AlphaVectorSet(std::vector const& alphaVectors, std::vector const& alphaVectorActions) { + this->alphaVectors = alphaVectors; + this->alphaVectorActions = alphaVectorActions; + } + + std::vector alphaVectors; + std::vector alphaVectorActions; + }; + + template + class BeliefMCExplorer { + + typedef storm::storage::BeliefManager BeliefManagerType; + typedef storm::builder::BeliefMdpExplorer ExplorerType; + typedef typename PomdpModelType::ValueType PomdpValueType; + typedef typename storm::pomdp::modelchecker::BeliefExplorationPomdpModelChecker::Result Result; + + public: + + BeliefMCExplorer(std::shared_ptr pomdp, uint64_t const& sizeThreshold=1000000, double const& dummyCutoffValue=std::numeric_limits::infinity(), bool trivialCutOff=false); + + Result checkAlphaVectors(storm::logic::Formula const& formula, AlphaVectorSet const& alphaVectorSet); + + Result checkAlphaVectors(storm::logic::Formula const& formula, AlphaVectorSet const& alphaVectorSet, storm::Environment const& env); + + PomdpModelType const& pomdp() const; + + private: + + bool exploreMC(std::set const &targetObservations, bool min, bool computeRewards, std::shared_ptr& beliefManager, std::shared_ptr& beliefExplorer, std::vector const &cutoffVec, AlphaVectorSet const& alphaVectorSet, + storm::Environment const& env); + + uint64_t getBestActionInBelief(uint64_t beliefId, std::shared_ptr &beliefManager, std::shared_ptr &beliefExplorer, AlphaVectorSet const& alphaVectorSet); + + std::shared_ptr inputPomdp; + std::shared_ptr preprocessedPomdp; + + double precision; + uint64_t sizeThreshold; + double dummyCutoffValue; + bool trivialCutOff; + + Result MCExplorationResult = Result(-storm::utility::infinity(), storm::utility::infinity()); + + storm::pomdp::modelchecker::POMDPValueBounds pomdpValueBounds; + + }; +} \ No newline at end of file diff --git a/payntbind/src/synthesis/pomdp/bindings.cpp b/payntbind/src/synthesis/pomdp/bindings.cpp index e2191d85d..94542b889 100644 --- a/payntbind/src/synthesis/pomdp/bindings.cpp +++ b/payntbind/src/synthesis/pomdp/bindings.cpp @@ -2,6 +2,7 @@ #include "PomdpManager.h" #include "PomdpManagerAposteriori.h" +#include "BeliefMCExplorer.h" void bindings_pomdp(py::module& m) { @@ -39,5 +40,15 @@ void bindings_pomdp(py::module& m) { .def_property_readonly("update_holes", [](synthesis::PomdpManagerAposteriori& manager) {return manager.update_holes;}) ; + py::class_>>(m, "BeliefMCExplorer") + .def(py::init>, uint64_t, double, bool>(), py::arg("pomdp"), py::arg("size_threshold") = 1000000, py::arg("dummy_cutoff_value") = std::numeric_limits::infinity(), py::arg("trivial_cutoffs") = false) + .def("check_alpha_vectors", py::overload_cast(&synthesis::BeliefMCExplorer>::checkAlphaVectors), py::arg("formula"), py::arg("alpha_vector_set")) + ; + + py::class_(m, "AlphaVectorsSet" , "Alpha vectors class") + .def(py::init> const&, std::vector const&>()) + .def_property_readonly("alpha_vectors", [](synthesis::AlphaVectorSet alphaVectorsSet) {return alphaVectorsSet.alphaVectors;}) + ; + }