Designing a Theorem Prover, Part 7

Environment Link to heading

Our unification algorithm maintains the environment which is defined like this:
\(\{(?a_1, t_1), \dots, (?a_k, t_k) \}\), where \(\forall i: a_i \neq t_i\)

We will evaluate each term in this env and replace meta-variable \(?a_i\) with \(t_i\) when necessary. When we encounter an \(?a_i\) we will do the replacement and work with \(t_i\) recursively. This process will eventually stop, as the occurs check prevents loops such as \({(?b, f (?a)), (?a, g(?b))}\).

In code we’ll represent environment as Base.Map.t

(* unification.ml *)
module Env = struct
  open Base

  type t = (string, Term.t, String.comparator_witness) Map.t

  let empty = Map.empty (module String)
  let add env (var, term) = Map.add_exn env ~key:var ~data:term
  let find env var = Map.find env var
end

Here is the interface of our Unification module:

(* unification.mli *)

(** Module representing the environment for unification.

    The environment is used to avoid repeatedly substituting meta-variables
    throughout the equations every time a meta-variable is assigned. Instead,
    it maintains a mapping of meta-variable -> term assignments and resolves
    terms on-the-fly.

    Keeping environment allows us to ensure that cycles are
    prevented using occurs check which disallows assignments like [(?b, f(?a)), (?a, g(?b))]. *)
module Env : sig
  type t
  (** Type representing the environment,
      which maps meta-variables to terms. *)

  val empty : t
  (** Empty environment with no variable assignments. *)

  val add : t -> string * Term.t -> t
  (** Adds an assignment [var] -> [term] to the environment [env]. *)

  val find : t -> string -> Term.t option
  (** Looks up the assignment for [var] in [env]. *)
end

val unify : Env.t -> Formula.t * Formula.t -> (Env.t, string) result
(**
  Attempts to unify two atomic formulas [f1] and [f2] under the environment [env].

  This function only handles atomic formulas, which are predicates of the form
  [P(t1, ..., tn)] and [Q(u1, ..., un)].

  - If the predicates [p1] and [p2] of [f1] and [f2] are the same, it delegates
    to [unify_terms] to unify their terms [ts] and [us].
  - If the predicates [p1] and [p2] differ, it returns an error indicating
    the mismatch.
  - If either [f1] or [f2] is not an atomic formula, it returns an error
    indicating that only predicates are supported.

  @param env The current environment for unification.
  @param f1 The first atomic formula to unify.
  @param f2 The second atomic formula to unify.
  @return The updated env if unification succeeds, or an error message if it fails.

  @raise Error if the predicates differ or if non-atomic formulas are provided.

  Example:
  {[
    let env = [] in
    let f1 = Formula.Pred ("P", [Term.Var "x"; Term.Const 1]) in
    let f2 = Formula.Pred ("P", [Term.Const 2; Term.Const 1]) in
    match unify env (f1, f2) with
    | Ok new_env -> (* Unification succeeded, work with new_env *)
    | Error msg -> (* Handle unification failure *)
  ]}
*)

We’ll do the \(?a \mapsto term\) (meta-variable -> term) replacement by calling function
chase_var: Env.t -> Term.t -> Term.t:

(* unification.ml *)

(** Resolves the given term (if it is a variable)
    by repeatedly replacing it with its assignment in the environment [env].

    For example, if [env] contains mappings ?a -> ?b and ?b -> ?c,
    then [chase_var env (Term.Var "a")] will return [Term.Var "c"].

    @param env The environment mapping variables to terms.
    @param term The term to resolve.
    @return The resolved term, or the input term if no resolution is possible. *)
let rec chase_var env = function
  | Term.Var var ->
      let term = Env.find env var in
      Option.fold ~none:(Term.Var var) ~some:(chase_var env) term
  | term -> term

Here are a few inline tests to demonstrate how chase_var works:

(* unification.ml *)

let%test "chase_var: nonexistent variable" =
  let env = Env.empty in
  let var = Term.Var "x" in
  chase_var env var = var

let%test "chase_var: basic variable resolution" =
  let env = Env.add Env.empty ("x", Term.Function ("f", [])) in
  let term = chase_var env (Term.Var "x") in
  term = Term.Function ("f", [])

let%test "chase_var: chained variable resolution" =
  let env = Env.add Env.empty ("x", Term.Function ("f", [])) in
  let env = Env.add env ("y", Term.Var "x") in
  let term = chase_var env (Term.Var "y") in
  term = Term.Function ("f", [])

Our main unification function has the following type signature:
val unify : Env.t -> Formula.t * Formula.t -> (Env.t, string) result

(* unification.ml *)

let rec unify env (f1, f2) =
  match (f1, f2) with
  | Formula.Pred (p1, ts), Formula.Pred (p2, us) ->
      if p1 = p2 then unify_terms env (ts, us)
      else
        Error
          (Printf.sprintf "Can not unify different predicates %s and %s" p1 p2)
  | _, _ ->
      Error
        (Printf.sprintf
           "Only atomic formulas (predicates) are supported. Given: %s and %s"
           (Formula.to_string f1) (Formula.to_string f2))

As you can see If the predicates p1 and p2 of f1 and f2 are the same, we call the unify_terms to unify their terms ts and us. Here is how unify_terms looks like:
val unify_terms : Env.t -> Term.t list * Term.t list -> (Env.t, string) result

(* unification.ml *)

(* ... *)
and unify_terms env (ts, us) =
  let open Base.Result.Let_syntax in
  match (ts, us) with
  | [], [] -> Ok env
  | t :: ts, u :: us ->
      let%bind env' = unify_term env (t, u) in
      unify_terms env' (ts, us)
  | _, _ -> Error "Lists of terms have different length"

The unify_terms function unifies (recursively) two lists of terms (ts and us) within a given env:

If both lists are empty, then unification is successful and it returns the current env.
If the lists have the same length, it processes each pair of terms by calling the unify_term and updating the environment as it goes.
If the lists have different lengths, it returns an error.

At the beginning we open the Base.Result.Let_syntax. That’s because we’re using ppx_let (monadic let-bindings) to simplify things a bit by propagating errors during unification.

Next comes the unify_term function:
val unify_term : Env.t -> Term.t * Term.t -> (Env.t, string) result

(* unification.ml *)

(* ... *)
and unify_term env (t, u) =
  match (t, u) with
  | Term.Var v, t -> unify_var env (chase_var env (Term.Var v), chase_var env t)
  | t, Term.Var v -> unify_var env (chase_var env t, chase_var env (Term.Var v))
  | Term.Param (a, _), Param (b, _) ->
      if a = b then Ok env
      else
        Error
          (Printf.sprintf "Can not unify different parameters %s and %s" a b)
  | Term.Function (f, ts), Term.Function (g, us) ->
      if f = g then unify_terms env (ts, us)
      else
        Error (Printf.sprintf "Can not unify different functions %s and %s" f g)
  | t, u ->
      Error
        (Printf.sprintf "Can not unify terms %s and %s" (Term.to_string t)
           (Term.to_string u))

It attempts to unify two terms, t and u using the given env. Here’s how it does that:

If one term is a meta-variable (Term.Var) and the other is any term, it resolves the meta-variable using chase_var and delegates to unify_var for unification.
If both terms are parameters (Term.Param), it compares their names. If the names are the same, then unification succeeds (and we leave the env as is), otherwise they can not be unified so we return an error.
If both terms are functions (Term.Function), it checks if their names match. When names match, it recursively unifies their argument lists using unify_terms, otherwise it returns an error.
For any other combination of terms, unification fails, returning an error with a detailed message.

We already discussed the chase_var function, so now let’s have a look at the unify_var that has the following signature:

val unify_var : Env.t -> Term.t * Term.t -> (Env.t, string) result

(* unification.ml *)

(* ... *)
and unify_var env (var, term) =
  match (var, term) with
  | Var _, t when var = t -> Ok env
  | Var v, t ->
      if occurs_in env v t then
        Error (Printf.sprintf "Occurs check failed for variable: %s" v)
      else Ok (Env.add env (v, t))
  | t, u -> unify_term env (t, u)

It unifies a meta-variable term (var) with another term (term) within the given environment (env):

If the meta-variable and the term are the same, unification is trivial, and it returns the current environment unchanged.
If the meta-variable occurs within the term (directly or indirectly, e.g., through references in the environment), it returns an error, as unification would create a cyclic dependency.
If the meta-variable does not occur in the term, it adds the meta-variable -> term mapping to the environment
For any other combination of terms, it delegates to unify_term

Occurs check:

(* unification.ml *)

(** Returns [true] if [var] occurs in the given term. *)
let rec occurs_in env var = function
  | Term.Var var' ->
      if var' = var then true
      else
        let term = Env.find env var in
        Option.fold ~none:false ~some:(occurs_in env var) term
  | Term.Param (_, vars) ->
      let terms = List.map (fun s -> Term.Var s) vars in
      occurs_in_any_of env var terms
  | Term.Function (_, terms) -> occurs_in_any_of env var terms
  | _ -> false

(** Returns [true] if [var] occurs in any of the given [terms]. *)
and occurs_in_any_of env var terms =
  let term = List.find_opt (fun term -> occurs_in env var term) terms in
  Option.is_some term

Here are some tests to illustrate the use cases:

(* unification.ml *)

let%test "occurs_in: variable does not occur" =
  let env = Env.empty in
  let term = Term.Function ("f", [ Term.Param ("p", [ "y" ]); Term.Var "z" ]) in
  occurs_in env "x" term = false

let%test "occurs_in: variable occurs directly" =
  let env = Env.empty in
  let term = Term.Function ("f", [ Term.Var "x"; Term.Var "y" ]) in
  occurs_in env "x" term

let%test "occurs_in: variable occurs in nested Function" =
  let env = Env.empty in
  let term =
    Term.Function ("f", [ Term.Function ("g", [ Term.Var "x" ]); Term.Var "y" ])
  in
  occurs_in env "x" term

Finally, we want to have tests for the top-level unify function:

(* unification.ml *)

let%test "unify: successful predicate unification" =
  (* P(x, p(a)) = P(f(), p(a))
     expected: x -> f() *)
  let env = Env.empty in
  let f1 = Formula.Pred ("P", [ Term.Var "x"; Term.Param ("p", [ "a" ]) ]) in
  let f2 =
    Formula.Pred ("P", [ Term.Function ("f", []); Term.Param ("p", [ "a" ]) ])
  in
  match unify env (f1, f2) with
  | Ok env' ->
      let resolved = chase_var env' (Term.Var "x") in
      resolved = Term.Function ("f", [])
  | Error _ -> false

let%test "unify: mismatched predicate names" =
  (* P(x) != Q(f())
     expected: Error *)
  let env = Env.empty in
  let f1 = Formula.Pred ("P", [ Term.Var "x" ]) in
  let f2 = Formula.Pred ("Q", [ Term.Function ("f", []) ]) in
  match unify env (f1, f2) with Ok _ -> false | Error _ -> true

let%test "unify: occurs check failure" =
  (* P(x) = P(f(x))
     expected: Error "Occurs check failed" *)
  let env = Env.empty in
  let f1 = Formula.Pred ("P", [ Term.Var "x" ]) in
  let f2 = Formula.Pred ("P", [ Term.Function ("f", [ Term.Var "x" ]) ]) in
  match unify env (f1, f2) with
  | Ok _ -> false
  | Error msg -> String.starts_with msg ~prefix:"Occurs check failed"

let%test "unify: multiple variables in deep structure" =
  (* P(f(x, g(y, z)), h(w)) = P(f(a, g(b, c)), h(d))
     expected: x -> a, y -> b, z -> c, w -> d *)
  let env = Env.empty in
  let f1 =
    Formula.Pred
      ( "P",
        [
          Term.Function
            ( "f",
              [
                Term.Var "x"; Term.Function ("g", [ Term.Var "y"; Term.Var "z" ]);
              ] );
          Term.Function ("h", [ Term.Var "w" ]);
        ] )
  in
  let f2 =
    Formula.Pred
      ( "P",
        [
          Term.Function
            ( "f",
              [
                Term.Param ("a", []);
                Term.Function
                  ("g", [ Term.Param ("b", []); Term.Param ("c", []) ]);
              ] );
          Term.Function ("h", [ Term.Param ("d", []) ]);
        ] )
  in
  match unify env (f1, f2) with
  | Ok env' ->
      let x = chase_var env' (Term.Var "x") in
      let y = chase_var env' (Term.Var "y") in
      let z = chase_var env' (Term.Var "z") in
      let w = chase_var env' (Term.Var "w") in
      x = Term.Param ("a", [])
      && y = Term.Param ("b", [])
      && z = Term.Param ("c", [])
      && w = Term.Param ("d", [])
  | Error _ -> false

That’s it for today. We got a working unification. I’m not very happy with these inline tests, so before moving forward, I’ll probably rewrite them using alcotest.

Till next time!