src/HOL/Tools/ATP/atp_problem.ML
author blanchet
Sun, 22 May 2011 14:49:35 +0200
changeset 43778 cabb3a947894
parent 43626 8ea9c6fa8b53
child 43780 0134d6650092
permissions -rw-r--r--
reorganized ATP formats a little bit
     1 (*  Title:      HOL/Tools/ATP/atp_problem.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Abstract representation of ATP problems and TPTP syntax.
     6 *)
     7 
     8 signature ATP_PROBLEM =
     9 sig
    10   datatype 'a fo_term = ATerm of 'a * 'a fo_term list
    11   datatype quantifier = AForall | AExists
    12   datatype connective = ANot | AAnd | AOr | AImplies | AIf | AIff | ANotIff
    13   datatype ('a, 'b, 'c) formula =
    14     AQuant of quantifier * ('a * 'b option) list * ('a, 'b, 'c) formula |
    15     AConn of connective * ('a, 'b, 'c) formula list |
    16     AAtom of 'c
    17 
    18   datatype format = UEQ | FOF | TFF
    19   datatype formula_kind = Axiom | Definition | Lemma | Hypothesis | Conjecture
    20   datatype 'a problem_line =
    21     Decl of string * 'a * 'a list * 'a |
    22     Formula of string * formula_kind * ('a, 'a, 'a fo_term) formula
    23                * string fo_term option * string fo_term option
    24   type 'a problem = (string * 'a problem_line list) list
    25 
    26 (* official TPTP syntax *)
    27   val tptp_special_prefix : string
    28   val tptp_false : string
    29   val tptp_true : string
    30   val tptp_tff_type_of_types : string
    31   val tptp_tff_bool_type : string
    32   val tptp_tff_individual_type : string
    33   val timestamp : unit -> string
    34   val hashw : word * word -> word
    35   val hashw_string : string * word -> word
    36   val is_atp_variable : string -> bool
    37   val tptp_strings_for_atp_problem : format -> string problem -> string list
    38   val nice_atp_problem :
    39     bool -> ('a * (string * string) problem_line list) list
    40     -> ('a * string problem_line list) list
    41        * (string Symtab.table * string Symtab.table) option
    42 end;
    43 
    44 structure ATP_Problem : ATP_PROBLEM =
    45 struct
    46 
    47 (** ATP problem **)
    48 
    49 datatype 'a fo_term = ATerm of 'a * 'a fo_term list
    50 datatype quantifier = AForall | AExists
    51 datatype connective = ANot | AAnd | AOr | AImplies | AIf | AIff | ANotIff
    52 datatype ('a, 'b, 'c) formula =
    53   AQuant of quantifier * ('a * 'b option) list * ('a, 'b, 'c) formula |
    54   AConn of connective * ('a, 'b, 'c) formula list |
    55   AAtom of 'c
    56 
    57 datatype format = UEQ | FOF | TFF
    58 datatype formula_kind = Axiom | Definition | Lemma | Hypothesis | Conjecture
    59 datatype 'a problem_line =
    60   Decl of string * 'a * 'a list * 'a |
    61   Formula of string * formula_kind * ('a, 'a, 'a fo_term) formula
    62              * string fo_term option * string fo_term option
    63 type 'a problem = (string * 'a problem_line list) list
    64 
    65 (* official TPTP syntax *)
    66 val tptp_special_prefix = "$"
    67 val tptp_false = "$false"
    68 val tptp_true = "$true"
    69 val tptp_tff_type_of_types = "$tType"
    70 val tptp_tff_bool_type = "$o"
    71 val tptp_tff_individual_type = "$i"
    72 
    73 val timestamp = Date.fmt "%Y-%m-%d %H:%M:%S" o Date.fromTimeLocal o Time.now
    74 
    75 (* This hash function is recommended in Compilers: Principles, Techniques, and
    76    Tools, by Aho, Sethi, and Ullman. The "hashpjw" function, which they
    77    particularly recommend, triggers a bug in versions of Poly/ML up to 4.2.0. *)
    78 fun hashw (u, w) = Word.+ (u, Word.* (0w65599, w))
    79 fun hashw_char (c, w) = hashw (Word.fromInt (Char.ord c), w)
    80 fun hashw_string (s : string, w) = CharVector.foldl hashw_char w s
    81 
    82 fun string_for_kind Axiom = "axiom"
    83   | string_for_kind Definition = "definition"
    84   | string_for_kind Lemma = "lemma"
    85   | string_for_kind Hypothesis = "hypothesis"
    86   | string_for_kind Conjecture = "conjecture"
    87 
    88 fun string_for_term (ATerm (s, [])) = s
    89   | string_for_term (ATerm ("equal", ts)) =
    90     space_implode " = " (map string_for_term ts)
    91   | string_for_term (ATerm ("[]", ts)) =
    92     (* used for lists in the optional "source" field of a derivation *)
    93     "[" ^ commas (map string_for_term ts) ^ "]"
    94   | string_for_term (ATerm (s, ts)) =
    95     s ^ "(" ^ commas (map string_for_term ts) ^ ")"
    96 fun string_for_quantifier AForall = "!"
    97   | string_for_quantifier AExists = "?"
    98 fun string_for_connective ANot = "~"
    99   | string_for_connective AAnd = "&"
   100   | string_for_connective AOr = "|"
   101   | string_for_connective AImplies = "=>"
   102   | string_for_connective AIf = "<="
   103   | string_for_connective AIff = "<=>"
   104   | string_for_connective ANotIff = "<~>"
   105 fun string_for_bound_var TFF (s, ty) =
   106     s ^ " : " ^ (ty |> the_default tptp_tff_individual_type)
   107   | string_for_bound_var _ (s, _) = s
   108 fun string_for_formula format (AQuant (q, xs, phi)) =
   109     "(" ^ string_for_quantifier q ^
   110     "[" ^ commas (map (string_for_bound_var format) xs) ^ "] : " ^
   111     string_for_formula format phi ^ ")"
   112   | string_for_formula _ (AConn (ANot, [AAtom (ATerm ("equal", ts))])) =
   113     space_implode " != " (map string_for_term ts)
   114   | string_for_formula format (AConn (c, [phi])) =
   115     "(" ^ string_for_connective c ^ " " ^ string_for_formula format phi ^ ")"
   116   | string_for_formula format (AConn (c, phis)) =
   117     "(" ^ space_implode (" " ^ string_for_connective c ^ " ")
   118                         (map (string_for_formula format) phis) ^ ")"
   119   | string_for_formula _ (AAtom tm) = string_for_term tm
   120 
   121 fun string_for_symbol_type [] res_ty = res_ty
   122   | string_for_symbol_type [arg_ty] res_ty = arg_ty ^ " > " ^ res_ty
   123   | string_for_symbol_type arg_tys res_ty =
   124     string_for_symbol_type ["(" ^ space_implode " * " arg_tys ^ ")"] res_ty
   125 
   126 val default_source =
   127   ATerm ("inference", ATerm ("isabelle", []) :: replicate 2 (ATerm ("[]", [])))
   128 
   129 fun string_for_problem_line _ (Decl (ident, sym, arg_tys, res_ty)) =
   130     "tff(" ^ ident ^ ", type,\n    " ^ sym ^ " : " ^
   131     string_for_symbol_type arg_tys res_ty ^ ").\n"
   132   | string_for_problem_line format
   133                             (Formula (ident, kind, phi, source, useful_info)) =
   134     (case format of UEQ => "cnf" | FOF => "fof" | TFF => "tff") ^
   135     "(" ^ ident ^ ", " ^ string_for_kind kind ^ ",\n    (" ^
   136     string_for_formula format phi ^ ")" ^
   137     (case (source, useful_info) of
   138        (NONE, NONE) => ""
   139      | (SOME tm, NONE) => ", " ^ string_for_term tm
   140      | (_, SOME tm) =>
   141        ", " ^ string_for_term (source |> the_default default_source) ^
   142        ", " ^ string_for_term tm) ^ ").\n"
   143 fun tptp_strings_for_atp_problem format problem =
   144   "% This file was generated by Isabelle (most likely Sledgehammer)\n\
   145   \% " ^ timestamp () ^ "\n" ::
   146   maps (fn (_, []) => []
   147          | (heading, lines) =>
   148            "\n% " ^ heading ^ " (" ^ string_of_int (length lines) ^ ")\n" ::
   149            map (string_for_problem_line format) lines)
   150        problem
   151 
   152 fun is_atp_variable s = Char.isUpper (String.sub (s, 0))
   153 
   154 
   155 (** Nice names **)
   156 
   157 fun empty_name_pool readable_names =
   158   if readable_names then SOME (Symtab.empty, Symtab.empty) else NONE
   159 
   160 fun pool_fold f xs z = pair z #> fold_rev (fn x => uncurry (f x)) xs
   161 fun pool_map f xs =
   162   pool_fold (fn x => fn ys => fn pool => f x pool |>> (fn y => y :: ys)) xs []
   163 
   164 val no_qualifiers =
   165   let
   166     fun skip [] = []
   167       | skip (#"." :: cs) = skip cs
   168       | skip (c :: cs) = if Char.isAlphaNum c then skip cs else c :: keep cs
   169     and keep [] = []
   170       | keep (#"." :: cs) = skip cs
   171       | keep (c :: cs) = c :: keep cs
   172   in String.explode #> rev #> keep #> rev #> String.implode end
   173 
   174 (* Long names can slow down the ATPs. *)
   175 val max_readable_name_size = 20
   176 
   177 (* "op" is also reserved, to avoid the unreadable "op_1", "op_2", etc., in the
   178    problem files. "equal" is reserved by some ATPs. "eq" is reserved to ensure
   179    that "HOL.eq" is correctly mapped to equality. *)
   180 val reserved_nice_names = ["op", "equal", "eq"]
   181 fun readable_name full_name s =
   182   if s = full_name then
   183     s
   184   else
   185     s |> no_qualifiers
   186       |> Name.desymbolize (Char.isUpper (String.sub (full_name, 0)))
   187          (* SNARK doesn't like sort (type) names that end with digits. We make
   188             an effort to avoid this here. *)
   189       |> (fn s => if Char.isDigit (String.sub (s, size s - 1)) then s ^ "_"
   190                   else s)
   191       |> (fn s =>
   192              if size s > max_readable_name_size then
   193                String.substring (s, 0, max_readable_name_size div 2 - 4) ^
   194                Word.toString (hashw_string (full_name, 0w0)) ^
   195                String.extract (s, size s - max_readable_name_size div 2 + 4,
   196                                NONE)
   197              else
   198                s)
   199       |> (fn s => if member (op =) reserved_nice_names s then full_name else s)
   200 
   201 fun nice_name (full_name, _) NONE = (full_name, NONE)
   202   | nice_name (full_name, desired_name) (SOME the_pool) =
   203     if String.isPrefix "$" full_name then
   204       (full_name, SOME the_pool)
   205     else case Symtab.lookup (fst the_pool) full_name of
   206       SOME nice_name => (nice_name, SOME the_pool)
   207     | NONE =>
   208       let
   209         val nice_prefix = readable_name full_name desired_name
   210         fun add j =
   211           let
   212             (* The trailing "_" is for SNARK (cf. comment above). *)
   213             val nice_name =
   214               nice_prefix ^ (if j = 0 then "" else "_" ^ string_of_int j ^ "_")
   215           in
   216             case Symtab.lookup (snd the_pool) nice_name of
   217               SOME full_name' =>
   218               if full_name = full_name' then (nice_name, the_pool)
   219               else add (j + 1)
   220             | NONE =>
   221               (nice_name,
   222                (Symtab.update_new (full_name, nice_name) (fst the_pool),
   223                 Symtab.update_new (nice_name, full_name) (snd the_pool)))
   224           end
   225       in add 0 |> apsnd SOME end
   226 
   227 fun nice_term (ATerm (name, ts)) =
   228   nice_name name ##>> pool_map nice_term ts #>> ATerm
   229 fun nice_formula (AQuant (q, xs, phi)) =
   230     pool_map nice_name (map fst xs)
   231     ##>> pool_map (fn NONE => pair NONE
   232                     | SOME ty => nice_name ty #>> SOME) (map snd xs)
   233     ##>> nice_formula phi
   234     #>> (fn ((ss, ts), phi) => AQuant (q, ss ~~ ts, phi))
   235   | nice_formula (AConn (c, phis)) =
   236     pool_map nice_formula phis #>> curry AConn c
   237   | nice_formula (AAtom tm) = nice_term tm #>> AAtom
   238 fun nice_problem_line (Decl (ident, sym, arg_tys, res_ty)) =
   239     nice_name sym
   240     ##>> pool_map nice_name arg_tys
   241     ##>> nice_name res_ty
   242     #>> (fn ((sym, arg_tys), res_ty) => Decl (ident, sym, arg_tys, res_ty))
   243   | nice_problem_line (Formula (ident, kind, phi, source, useful_info)) =
   244     nice_formula phi
   245     #>> (fn phi => Formula (ident, kind, phi, source, useful_info))
   246 fun nice_problem problem =
   247   pool_map (fn (heading, lines) =>
   248                pool_map nice_problem_line lines #>> pair heading) problem
   249 fun nice_atp_problem readable_names problem =
   250   nice_problem problem (empty_name_pool readable_names)
   251 
   252 end;