Merge pull request #15 from epatrizio/abs-inter

Abstract interpretation
epatrizio · Apr 28, 2023 · a41d59c · a41d59c
2 parents eb4e071 + 2d21341
commit a41d59c
Show file tree

Hide file tree

Showing 32 changed files with 1,031 additions and 9 deletions.
diff --git a/Makefile b/Makefile
@@ -15,8 +15,17 @@ all:
 	ocamlc -c parser.ml;
 	ocamlc -c lexer.ml;
 	ocamlc -c typer.ml;
+	ocamlc -c domain.ml;
+	ocamlc -c domain_value.ml;
+	ocamlc -c domain_non_rel.ml;
+	ocamlc -c domain_concrete.ml;
+	ocamlc -c domain_constant.ml;
+	ocamlc -c domain_interval.ml;
+	ocamlc -c abstract_interpreter.ml;
 	ocamlc -c main.ml;
-	ocamlc -o $(EXE) utils.cmo ast.cmo compiler.cmo lexer.cmo parser.cmo typer.cmo main.cmo
+	ocamlc -o $(EXE) utils.cmo ast.cmo compiler.cmo lexer.cmo parser.cmo typer.cmo \
+	domain.cmo domain_value.cmo domain_non_rel.cmo domain_concrete.cmo domain_constant.cmo \
+	domain_interval.cmo abstract_interpreter.cmo main.cmo
 
 clean:
 	rm -rf *.cmo *.cmi lexer.ml parser.ml parser.mli $(EXE)
@@ -27,6 +36,9 @@ clean_bc:
 check_grammar:
 	menhir --list-errors parser.mly
 
+help:
+	@./$(EXE) --help
+
 compile:
 	@./$(EXE) tests/$(S)
 
@@ -36,6 +48,15 @@ compile_debug:
 compile_no_typing:
 	@./$(EXE) --no-typing tests/$(S)
 
+abs_inter_concrete:
+	@./$(EXE) --abs-inter-concrete tests/$(S)
+
+abs_inter_constant:
+	@./$(EXE) --abs-inter-constant tests/$(S)
+
+abs_inter_interval:
+	@./$(EXE) --abs-inter-interval tests/$(S)
+
 vm:
 	@$(VM) tests/build/bc_$(S)
 

diff --git a/README.md b/README.md
@@ -2,7 +2,8 @@
 
 Here is a compiler producing bytecode for a simplified implementation of the OCaml Virtual Machine, called Mini-ZAM.
 
-*This project is part of the Advanced Compilation course of my professional training at Sorbonne University (2022).*
+*This project is part of the Advanced Compilation course of my professional training at
+Sorbonne University (2022 2023).*
 
 ## Technical stack / choices
 
@@ -71,3 +72,19 @@ S=t0.txt make compile_debug
 ```
 
 Then, you need a Mini-ZAM to interpret the generated bytecode.
+
+## Abstract interpretation
+
+Here is a short introduction to [abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation),
+which is an advanced **static program analysis** technique. For example
+**Abstract interpretation** allows to detect possible division by 0, or memory overflows.
+
+This implementation (a simple adaptation of a lesson on this pedagogical compiler),
+represents the very beginning of a complete research field in computer science.
+It allows to have a first idea and to see how it can be written.
+
+I only consider integer values on 3 different analyses:
+
+* Concrete analysis: `S=tai1-0.txt make abs_inter_concrete`
+* Constants analysis: `S=tai1-0.txt make abs_inter_constant`
+* Intervals analysis: `S=tai1-0.txt make abs_inter_interval`
diff --git a/abstract_interpreter.ml b/abstract_interpreter.ml
@@ -0,0 +1,67 @@
+(* Abstract interpretation - interpreter by induction *)
+
+open Ast
+open Domain
+
+module type INTERPRETER =
+sig
+  val analyse_prog: prog -> unit
+end
+
+module Interprete(D : DOMAIN) =
+(struct
+
+  type t = D.t
+
+  let filter (a : t) (e : expr) (r : bool) : t =
+    let rec expr_handle a (*e,_*) e r =
+      match e with
+      | Eunop (_, Tbool, Unot, e) -> expr_handle a e (not r)
+      | Ebinop (_, Tbool, Band, e1, e2) -> 
+          (if r then D.meet else D.join) (expr_handle a e1 r) (expr_handle a e2 r)
+      | Ebinop (_, Tbool, Bor, e1, e2) -> 
+          (if r then D.join else D.meet) (expr_handle a e1 r) (expr_handle a e2 r)
+      | Ebinop (_, _, Beq, e1, e2) -> D.compare a e1 (if r then Beq else Bneq) e2
+      | Ebinop (_, _, Bneq, e1, e2) -> D.compare a e1 (if r then Bneq else Beq) e2
+      | Ebinop (_, _, Blt, e1, e2) -> D.compare a e1 (if r then Blt else Bge) e2
+      | Ebinop (_, _, Bgt, e1, e2) -> D.compare a e1 (if r then Bgt else Ble) e2
+      | Ebinop (_, _, Ble, e1, e2) -> D.compare a e1 (if r then Ble else Bgt) e2
+      | Ebinop (_, _, Bge, e1, e2) -> D.compare a e1 (if r then Bge else Blt) e2
+      | Ecst (_, Tbool, Cbool b) -> if b = r then a else D.bottom ()
+      | _ -> D.bottom ()
+    in
+    expr_handle a e r
+
+  let rec eval_stmt (a : t) (s : stmt) : t =
+    let r = match s with
+    | Sassign (_, (Tint, v_name), e, s) -> eval_stmt (D.assign a v_name e) s
+    | Srefassign (_, (Tint, v_name), e) -> D.assign a v_name e
+    | Sprint_ai ((l,_), (Tint, v_name)) -> D.print l.pos_lnum a v_name; a
+    | Sprintall_ai (l,_) -> D.print_all l.pos_lnum a; a
+    | Sblock (Bstmt s) -> eval_stmt a s
+    | Sblock (Bseq_l (s, b)) -> eval_stmt (eval_stmt a s) (Sblock b)
+    | Sblock (Bseq_r (b, s)) -> eval_stmt (eval_stmt a (Sblock b)) s
+    | Sif (_, e, s1, s2) -> 
+        let t = eval_stmt (filter a e true) s1 in
+        let f = eval_stmt (filter a e false) s2 in
+          D.join t f
+    | Swhile (_, e, b) ->
+        let rec fix (f:(t -> t -> t) -> t -> t) (op:t -> t -> t) (x:t) (i:int) : t =
+          let fx = f op x in
+            if D.subset fx x then fx
+            else fix f (if i < 3 then D.join else D.widen) fx (i+1) (* widening after 3 iterations *)
+        in
+        let f op x = op a (eval_stmt (filter x e true) (Sblock b)) in
+        let inv = fix f D.join a 0 in
+          filter inv e false
+    | Sfor (l,s1,e,s2,b) ->
+        let a1 = eval_stmt a s1 in
+          eval_stmt a1 (Swhile (l, e, (Bseq_r (b, s2))))
+    | _ -> D.bottom ()
+    in
+      r
+
+  let analyse_prog (stmt : prog) : unit =
+    let _ = eval_stmt (D.init()) stmt in ()
+
+end : INTERPRETER)
diff --git a/ast.ml b/ast.ml
@@ -35,6 +35,7 @@ type expr =
   | Earray of loc * typ * expr list
   | Eaget of loc * typ * ident * expr
   | Easize of loc * typ * ident
+  | Erand of loc * typ * expr * expr
 
 type stmt =
   | Sassign of loc * ident * expr * stmt
@@ -45,6 +46,8 @@ type stmt =
   | Swhile of loc * expr * block
   | Sfor of loc * stmt * expr * stmt * block
   | Sprint of expr
+  | Sprint_ai of loc * ident
+  | Sprintall_ai of loc
   | Sexit
   | Sskip
 

diff --git a/compiler.ml b/compiler.ml
@@ -46,7 +46,8 @@ let rec compile_expr ?(label = "") e env k li =
       compile_stmt (Sassign (loc, (ty1,tmp), e, Sif (loc, Ebinop (loc, ty1, Bge, (Eident (loc,ty2,(ty2,tmp))), (Easize (loc,Tint,(ty2,i)))), Sexit, Sskip))) env li @ 
         compile_expr e env k li @ ["PUSH"] @ compile_expr (Eident (loc,ty2,(ty2,i))) env (k+1) li @  ["GETVECTITEM"] @ li
   | Easize (loc,_,(typ,i)) -> compile_expr (Eident (loc,typ,(typ,i))) env k li @ ["VECTLENGTH"] @ li
-
+  | Erand (loc,typ,e1,e2) -> 
+      (compile_expr e2 env k li) @ ["PUSH"] @ (compile_expr e1 env (k+1) li) @ ["PRIM rand -- TODO"]
   and compile_stmt ?(label = "") s env li =
   match s with
   | Sassign(loc,(_,i),e,s) ->
@@ -64,6 +65,8 @@ let rec compile_expr ?(label = "") e env k li =
     compile_expr e env 0 li @ labeled_inst ~label:("wcond"^sct) ("BRANCHIFNOT wdone"^sct) @ compile_block b env li @ compile_expr e env 0 li @ ["BRANCH wcond" ^ sct] @ labeled_inst ~label:("wdone"^sct) ""
   | Sfor (loc,s1,e,s2,b) -> compile_stmt s1 env li @ compile_stmt (Swhile (loc,e, Bseq_r (b,s2))) env li
   | Sprint e -> (compile_expr ~label:label e env 0 li) @ ["PRIM print"]
+  | Sprint_ai _ -> [] (* only for abstract interpretation statement *)
+  | Sprintall_ai _ -> [] (* "" *)
   | Sexit -> labeled_inst ~label:label ("STOP") @ li
   | Sskip -> labeled_inst ~label:label ("CONST 0") @ li
 

diff --git a/domain.ml b/domain.ml
@@ -0,0 +1,47 @@
+(* Abstract interpretation - Domains signature (abstract or concrete) *)
+
+open Ast
+
+module type DOMAIN =
+  sig
+
+    (* abstract elements type *)
+    type t
+
+    (* initial empty environment *)
+    val init: unit -> t
+
+    (* empty set *)
+    val bottom: unit -> t
+
+    (* add a variable in environment *)
+    val add_var: t -> string -> t
+
+    (* remove a variable in environment *)
+    val del_var: t -> string -> t
+
+    (* assign an integer expression to a variable *)
+    val assign: t -> string -> expr -> t
+
+    (* filter environments *)
+    val compare: t -> expr -> binop -> expr -> t
+
+    (* abstract join *)
+    val join: t -> t -> t
+
+    (* abstract intersection *)
+    val meet: t -> t -> t
+
+    (* abstract widening *)
+    val widen: t -> t -> t
+
+    (* abstract element is included in another one *)
+    val subset: t -> t -> bool
+
+    (* abstract element is the empty set *)
+    val is_bottom: t -> bool
+
+    val print: int -> t -> string -> unit
+    val print_all: int -> t -> unit
+
+  end
diff --git a/domain_concrete.ml b/domain_concrete.ml
@@ -0,0 +1,149 @@
+(* Abstract interpretation - Concrete execution domain *)
+
+open Ast
+open Domain
+
+open Format
+
+module Concrete = (struct
+
+  (* variables (strings) in a map *)
+  module Env = Map.Make(String)
+
+  (* environment : we consider integer value *)
+  type env = int Env.t
+
+  (* environments set *)
+  module EnvSet =
+    Set.Make
+      (struct
+        type t = env
+        let compare = Env.compare Int.compare
+      end)
+
+  (* an element is a set of possible environments *)
+  type t = EnvSet.t
+
+  (* a set of integer values *)
+  module ValSet = Set.Make
+    (struct
+      type t = int
+      let compare = Int.compare
+    end)
+
+  let int_map (f : int -> int) (s : ValSet.t) : ValSet.t =
+    ValSet.fold (fun x acc -> ValSet.add (f x) acc) s ValSet.empty
+
+  let int2_map (f: int -> int -> int) (s1 : ValSet.t) (s2 : ValSet.t) : ValSet.t =
+    ValSet.fold
+      (fun x1 acc ->
+        ValSet.fold
+          (fun x2 acc -> ValSet.add (f x1 x2) acc) s2 acc
+      ) s1 ValSet.empty
+
+  let rec eval_expr (e : expr) (m : env) : ValSet.t =
+    match e with
+    | Ecst (_, Tint, Cint c) -> ValSet.singleton c
+    | Eident (_, Tint, (Tint, var)) -> ValSet.singleton (Env.find var m)
+    | Eref (_, Tint, e) -> eval_expr e m
+    | Ederef (_, Tint, (Tint, var)) -> ValSet.singleton (Env.find var m)
+    | Ebinop (_, Tint, Badd, e1, e2) ->
+        let v1 = eval_expr e1 m and v2 = eval_expr e2 m in
+          int2_map (fun x y -> x + y) v1 v2
+    | Ebinop (_, Tint, Bsub, e1, e2) ->
+        let v1 = eval_expr e1 m and v2 = eval_expr e2 m in
+          int2_map (fun x y -> x - y) v1 v2
+    | Ebinop (_, Tint, Bmul, e1, e2) ->
+        let v1 = eval_expr e1 m and v2 = eval_expr e2 m in
+          int2_map (fun x y -> x * y) v1 v2
+    | Ebinop (_, Tint, Bdiv, e1, e2) ->
+        let v1 = eval_expr e1 m and v2 = eval_expr e2 m in
+          let v2 = ValSet.remove 0 v2 in int2_map (fun x y -> x / y) v1 v2
+    | Erand (_, Tint, Ecst (_, Tint, Cint i1), Ecst (_, Tint, Cint i2)) ->
+        let rec rand_set v set =
+          if v > i2 then set
+          else rand_set (v+1) (ValSet.add v set)
+        in
+        rand_set i1 ValSet.empty
+    | _ -> ValSet.empty
+
+  let eval_compare (e1 : expr) (bop : binop) (e2 : expr) (m : env) : bool =
+    let f =
+      match bop with
+      | Beq -> Int.equal
+      | Bneq -> fun x y -> not (Int.equal x y)
+      | Blt -> fun x y -> x < y
+      | Ble -> fun x y -> x <= y
+      | Bgt -> fun x y -> x > y
+      | Bge -> fun x y -> x >= y
+      | _ -> fun _ _ -> false   (* todo *)
+    in
+    let s1 = eval_expr e1 m and s2 = eval_expr e2 m in
+    ValSet.fold
+      (fun v1 acc ->
+        ValSet.fold
+          (fun v2 acc ->
+            (acc || (f v1 v2))
+          ) s2 acc
+      ) s1 false
+
+  let env_set_map f m =
+    EnvSet.fold (fun x acc -> EnvSet.add (f x) acc) m EnvSet.empty
+
+  let init () = EnvSet.singleton Env.empty
+
+  let bottom () = EnvSet.empty
+
+  let add_var m v = env_set_map (Env.add v Int.zero) m
+
+  let del_var m v = env_set_map (Env.remove v) m
+
+  let assign m var e =
+    EnvSet.fold
+      (fun env acc ->
+        let s = eval_expr e env in
+        ValSet.fold
+          (fun v acc ->
+            EnvSet.add (Env.add var v env) acc
+          ) s acc
+      ) m EnvSet.empty
+
+  let compare m e1 op e2 =
+    EnvSet.filter (fun env -> eval_compare e1 op e2 env) m
+
+  let join m1 m2 = EnvSet.union m1 m2
+
+  let widen = join
+
+  let meet m1 m2 = EnvSet.inter m1 m2
+
+  let subset m1 m2 = EnvSet.subset m1 m2
+
+  let is_bottom m = EnvSet.is_empty m
+
+  let print lnum m var =
+    eprintf "line %d: " lnum;
+    eprintf "{ ";
+    EnvSet.iter
+      (fun env ->
+        eprintf "[";
+        eprintf "%s=%s" var (Int.to_string (Env.find var env));
+        eprintf "]; "
+      ) m;
+    eprintf "}@."
+
+  let print_all lnum m =
+    eprintf "line %d: " lnum;
+    eprintf "{ ";
+    EnvSet.iter
+      (fun env ->
+        eprintf "[";
+        Env.iter
+          (fun var v ->
+            eprintf "%s=%s;" var (Int.to_string v)
+          ) env;
+        eprintf "]; "
+      ) m;
+      eprintf "}@."
+
+end: DOMAIN)