src/HOL/Tools/ATP/atp_problem.ML
author blanchet
Sun, 22 May 2011 14:49:35 +0200
changeset 43778 cabb3a947894
parent 43626 8ea9c6fa8b53
child 43780 0134d6650092
permissions -rw-r--r--
reorganized ATP formats a little bit
blanchet@38293
     1
(*  Title:      HOL/Tools/ATP/atp_problem.ML
blanchet@38261
     2
    Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
blanchet@37509
     3
    Author:     Jasmin Blanchette, TU Muenchen
blanchet@37509
     4
blanchet@39692
     5
Abstract representation of ATP problems and TPTP syntax.
blanchet@37509
     6
*)
blanchet@37509
     7
blanchet@38253
     8
signature ATP_PROBLEM =
blanchet@37509
     9
sig
blanchet@38226
    10
  datatype 'a fo_term = ATerm of 'a * 'a fo_term list
blanchet@38226
    11
  datatype quantifier = AForall | AExists
blanchet@38226
    12
  datatype connective = ANot | AAnd | AOr | AImplies | AIf | AIff | ANotIff
blanchet@43402
    13
  datatype ('a, 'b, 'c) formula =
blanchet@43402
    14
    AQuant of quantifier * ('a * 'b option) list * ('a, 'b, 'c) formula |
blanchet@43402
    15
    AConn of connective * ('a, 'b, 'c) formula list |
blanchet@43402
    16
    AAtom of 'c
blanchet@38228
    17
blanchet@43778
    18
  datatype format = UEQ | FOF | TFF
blanchet@43396
    19
  datatype formula_kind = Axiom | Definition | Lemma | Hypothesis | Conjecture
blanchet@43398
    20
  datatype 'a problem_line =
blanchet@43414
    21
    Decl of string * 'a * 'a list * 'a |
blanchet@43448
    22
    Formula of string * formula_kind * ('a, 'a, 'a fo_term) formula
blanchet@43400
    23
               * string fo_term option * string fo_term option
blanchet@38251
    24
  type 'a problem = (string * 'a problem_line list) list
blanchet@38226
    25
blanchet@43587
    26
(* official TPTP syntax *)
blanchet@43617
    27
  val tptp_special_prefix : string
blanchet@43617
    28
  val tptp_false : string
blanchet@43617
    29
  val tptp_true : string
blanchet@43587
    30
  val tptp_tff_type_of_types : string
blanchet@43587
    31
  val tptp_tff_bool_type : string
blanchet@43587
    32
  val tptp_tff_individual_type : string
blanchet@38253
    33
  val timestamp : unit -> string
blanchet@43438
    34
  val hashw : word * word -> word
blanchet@43438
    35
  val hashw_string : string * word -> word
blanchet@39692
    36
  val is_atp_variable : string -> bool
blanchet@43580
    37
  val tptp_strings_for_atp_problem : format -> string problem -> string list
blanchet@39692
    38
  val nice_atp_problem :
blanchet@38251
    39
    bool -> ('a * (string * string) problem_line list) list
blanchet@38251
    40
    -> ('a * string problem_line list) list
blanchet@38251
    41
       * (string Symtab.table * string Symtab.table) option
blanchet@37509
    42
end;
blanchet@37509
    43
blanchet@38253
    44
structure ATP_Problem : ATP_PROBLEM =
blanchet@37509
    45
struct
blanchet@37509
    46
blanchet@37643
    47
(** ATP problem **)
blanchet@37643
    48
blanchet@38198
    49
datatype 'a fo_term = ATerm of 'a * 'a fo_term list
blanchet@38198
    50
datatype quantifier = AForall | AExists
blanchet@38226
    51
datatype connective = ANot | AAnd | AOr | AImplies | AIf | AIff | ANotIff
blanchet@43402
    52
datatype ('a, 'b, 'c) formula =
blanchet@43402
    53
  AQuant of quantifier * ('a * 'b option) list * ('a, 'b, 'c) formula |
blanchet@43402
    54
  AConn of connective * ('a, 'b, 'c) formula list |
blanchet@43402
    55
  AAtom of 'c
blanchet@38198
    56
blanchet@43778
    57
datatype format = UEQ | FOF | TFF
blanchet@43396
    58
datatype formula_kind = Axiom | Definition | Lemma | Hypothesis | Conjecture
blanchet@43398
    59
datatype 'a problem_line =
blanchet@43414
    60
  Decl of string * 'a * 'a list * 'a |
blanchet@43448
    61
  Formula of string * formula_kind * ('a, 'a, 'a fo_term) formula
blanchet@43400
    62
             * string fo_term option * string fo_term option
blanchet@37643
    63
type 'a problem = (string * 'a problem_line list) list
blanchet@37643
    64
blanchet@43587
    65
(* official TPTP syntax *)
blanchet@43617
    66
val tptp_special_prefix = "$"
blanchet@43617
    67
val tptp_false = "$false"
blanchet@43617
    68
val tptp_true = "$true"
blanchet@43587
    69
val tptp_tff_type_of_types = "$tType"
blanchet@43587
    70
val tptp_tff_bool_type = "$o"
blanchet@43587
    71
val tptp_tff_individual_type = "$i"
blanchet@43587
    72
blanchet@38253
    73
val timestamp = Date.fmt "%Y-%m-%d %H:%M:%S" o Date.fromTimeLocal o Time.now
blanchet@38253
    74
blanchet@43438
    75
(* This hash function is recommended in Compilers: Principles, Techniques, and
blanchet@43438
    76
   Tools, by Aho, Sethi, and Ullman. The "hashpjw" function, which they
blanchet@43438
    77
   particularly recommend, triggers a bug in versions of Poly/ML up to 4.2.0. *)
blanchet@43438
    78
fun hashw (u, w) = Word.+ (u, Word.* (0w65599, w))
blanchet@43438
    79
fun hashw_char (c, w) = hashw (Word.fromInt (Char.ord c), w)
blanchet@43438
    80
fun hashw_string (s : string, w) = CharVector.foldl hashw_char w s
blanchet@43438
    81
blanchet@38854
    82
fun string_for_kind Axiom = "axiom"
blanchet@42640
    83
  | string_for_kind Definition = "definition"
blanchet@42640
    84
  | string_for_kind Lemma = "lemma"
blanchet@38854
    85
  | string_for_kind Hypothesis = "hypothesis"
blanchet@38854
    86
  | string_for_kind Conjecture = "conjecture"
blanchet@38854
    87
blanchet@38198
    88
fun string_for_term (ATerm (s, [])) = s
blanchet@38334
    89
  | string_for_term (ATerm ("equal", ts)) =
blanchet@38334
    90
    space_implode " = " (map string_for_term ts)
blanchet@42640
    91
  | string_for_term (ATerm ("[]", ts)) =
blanchet@42640
    92
    (* used for lists in the optional "source" field of a derivation *)
blanchet@42640
    93
    "[" ^ commas (map string_for_term ts) ^ "]"
blanchet@38198
    94
  | string_for_term (ATerm (s, ts)) =
blanchet@38334
    95
    s ^ "(" ^ commas (map string_for_term ts) ^ ")"
blanchet@38198
    96
fun string_for_quantifier AForall = "!"
blanchet@38198
    97
  | string_for_quantifier AExists = "?"
blanchet@38198
    98
fun string_for_connective ANot = "~"
blanchet@38198
    99
  | string_for_connective AAnd = "&"
blanchet@38198
   100
  | string_for_connective AOr = "|"
blanchet@38198
   101
  | string_for_connective AImplies = "=>"
blanchet@38226
   102
  | string_for_connective AIf = "<="
blanchet@38198
   103
  | string_for_connective AIff = "<=>"
blanchet@38226
   104
  | string_for_connective ANotIff = "<~>"
blanchet@43778
   105
fun string_for_bound_var TFF (s, ty) =
blanchet@43587
   106
    s ^ " : " ^ (ty |> the_default tptp_tff_individual_type)
blanchet@43778
   107
  | string_for_bound_var _ (s, _) = s
blanchet@43587
   108
fun string_for_formula format (AQuant (q, xs, phi)) =
blanchet@43397
   109
    "(" ^ string_for_quantifier q ^
blanchet@43587
   110
    "[" ^ commas (map (string_for_bound_var format) xs) ^ "] : " ^
blanchet@43587
   111
    string_for_formula format phi ^ ")"
blanchet@43587
   112
  | string_for_formula _ (AConn (ANot, [AAtom (ATerm ("equal", ts))])) =
blanchet@38239
   113
    space_implode " != " (map string_for_term ts)
blanchet@43587
   114
  | string_for_formula format (AConn (c, [phi])) =
blanchet@43587
   115
    "(" ^ string_for_connective c ^ " " ^ string_for_formula format phi ^ ")"
blanchet@43587
   116
  | string_for_formula format (AConn (c, phis)) =
blanchet@38198
   117
    "(" ^ space_implode (" " ^ string_for_connective c ^ " ")
blanchet@43587
   118
                        (map (string_for_formula format) phis) ^ ")"
blanchet@43587
   119
  | string_for_formula _ (AAtom tm) = string_for_term tm
blanchet@38198
   120
blanchet@43399
   121
fun string_for_symbol_type [] res_ty = res_ty
blanchet@43399
   122
  | string_for_symbol_type [arg_ty] res_ty = arg_ty ^ " > " ^ res_ty
blanchet@43399
   123
  | string_for_symbol_type arg_tys res_ty =
blanchet@43399
   124
    string_for_symbol_type ["(" ^ space_implode " * " arg_tys ^ ")"] res_ty
blanchet@43399
   125
blanchet@43510
   126
val default_source =
blanchet@43510
   127
  ATerm ("inference", ATerm ("isabelle", []) :: replicate 2 (ATerm ("[]", [])))
blanchet@43510
   128
blanchet@43580
   129
fun string_for_problem_line _ (Decl (ident, sym, arg_tys, res_ty)) =
blanchet@43404
   130
    "tff(" ^ ident ^ ", type,\n    " ^ sym ^ " : " ^
blanchet@43399
   131
    string_for_symbol_type arg_tys res_ty ^ ").\n"
blanchet@43580
   132
  | string_for_problem_line format
blanchet@43448
   133
                            (Formula (ident, kind, phi, source, useful_info)) =
blanchet@43778
   134
    (case format of UEQ => "cnf" | FOF => "fof" | TFF => "tff") ^
blanchet@43580
   135
    "(" ^ ident ^ ", " ^ string_for_kind kind ^ ",\n    (" ^
blanchet@43587
   136
    string_for_formula format phi ^ ")" ^
blanchet@43580
   137
    (case (source, useful_info) of
blanchet@43580
   138
       (NONE, NONE) => ""
blanchet@43580
   139
     | (SOME tm, NONE) => ", " ^ string_for_term tm
blanchet@43580
   140
     | (_, SOME tm) =>
blanchet@43580
   141
       ", " ^ string_for_term (source |> the_default default_source) ^
blanchet@43580
   142
       ", " ^ string_for_term tm) ^ ").\n"
blanchet@43580
   143
fun tptp_strings_for_atp_problem format problem =
blanchet@37643
   144
  "% This file was generated by Isabelle (most likely Sledgehammer)\n\
blanchet@37643
   145
  \% " ^ timestamp () ^ "\n" ::
blanchet@37643
   146
  maps (fn (_, []) => []
blanchet@37643
   147
         | (heading, lines) =>
wenzelm@41739
   148
           "\n% " ^ heading ^ " (" ^ string_of_int (length lines) ^ ")\n" ::
blanchet@43580
   149
           map (string_for_problem_line format) lines)
blanchet@38854
   150
       problem
blanchet@37643
   151
blanchet@39692
   152
fun is_atp_variable s = Char.isUpper (String.sub (s, 0))
blanchet@38251
   153
blanchet@37643
   154
blanchet@37643
   155
(** Nice names **)
blanchet@37643
   156
blanchet@37624
   157
fun empty_name_pool readable_names =
blanchet@37643
   158
  if readable_names then SOME (Symtab.empty, Symtab.empty) else NONE
blanchet@37624
   159
blanchet@37624
   160
fun pool_fold f xs z = pair z #> fold_rev (fn x => uncurry (f x)) xs
blanchet@37624
   161
fun pool_map f xs =
blanchet@37624
   162
  pool_fold (fn x => fn ys => fn pool => f x pool |>> (fn y => y :: ys)) xs []
blanchet@37624
   163
blanchet@43088
   164
val no_qualifiers =
blanchet@43088
   165
  let
blanchet@43088
   166
    fun skip [] = []
blanchet@43088
   167
      | skip (#"." :: cs) = skip cs
blanchet@43088
   168
      | skip (c :: cs) = if Char.isAlphaNum c then skip cs else c :: keep cs
blanchet@43088
   169
    and keep [] = []
blanchet@43088
   170
      | keep (#"." :: cs) = skip cs
blanchet@43088
   171
      | keep (c :: cs) = c :: keep cs
blanchet@43088
   172
  in String.explode #> rev #> keep #> rev #> String.implode end
blanchet@43088
   173
blanchet@43626
   174
(* Long names can slow down the ATPs. *)
blanchet@43589
   175
val max_readable_name_size = 20
blanchet@43438
   176
blanchet@39355
   177
(* "op" is also reserved, to avoid the unreadable "op_1", "op_2", etc., in the
blanchet@39355
   178
   problem files. "equal" is reserved by some ATPs. "eq" is reserved to ensure
blanchet@39355
   179
   that "HOL.eq" is correctly mapped to equality. *)
blanchet@39355
   180
val reserved_nice_names = ["op", "equal", "eq"]
blanchet@37624
   181
fun readable_name full_name s =
blanchet@37643
   182
  if s = full_name then
blanchet@37643
   183
    s
blanchet@37643
   184
  else
blanchet@43438
   185
    s |> no_qualifiers
blanchet@43438
   186
      |> Name.desymbolize (Char.isUpper (String.sub (full_name, 0)))
blanchet@43454
   187
         (* SNARK doesn't like sort (type) names that end with digits. We make
blanchet@43454
   188
            an effort to avoid this here. *)
blanchet@43454
   189
      |> (fn s => if Char.isDigit (String.sub (s, size s - 1)) then s ^ "_"
blanchet@43454
   190
                  else s)
blanchet@43454
   191
      |> (fn s =>
blanchet@43530
   192
             if size s > max_readable_name_size then
blanchet@43530
   193
               String.substring (s, 0, max_readable_name_size div 2 - 4) ^
blanchet@43454
   194
               Word.toString (hashw_string (full_name, 0w0)) ^
blanchet@43530
   195
               String.extract (s, size s - max_readable_name_size div 2 + 4,
blanchet@43530
   196
                               NONE)
blanchet@43454
   197
             else
blanchet@43454
   198
               s)
blanchet@43438
   199
      |> (fn s => if member (op =) reserved_nice_names s then full_name else s)
blanchet@37624
   200
blanchet@37624
   201
fun nice_name (full_name, _) NONE = (full_name, NONE)
blanchet@37624
   202
  | nice_name (full_name, desired_name) (SOME the_pool) =
blanchet@39630
   203
    if String.isPrefix "$" full_name then
blanchet@39630
   204
      (full_name, SOME the_pool)
blanchet@39630
   205
    else case Symtab.lookup (fst the_pool) full_name of
blanchet@37624
   206
      SOME nice_name => (nice_name, SOME the_pool)
blanchet@37624
   207
    | NONE =>
blanchet@37624
   208
      let
blanchet@37624
   209
        val nice_prefix = readable_name full_name desired_name
blanchet@37624
   210
        fun add j =
blanchet@37624
   211
          let
blanchet@43515
   212
            (* The trailing "_" is for SNARK (cf. comment above). *)
blanchet@43515
   213
            val nice_name =
blanchet@43515
   214
              nice_prefix ^ (if j = 0 then "" else "_" ^ string_of_int j ^ "_")
blanchet@37624
   215
          in
blanchet@37624
   216
            case Symtab.lookup (snd the_pool) nice_name of
blanchet@37624
   217
              SOME full_name' =>
blanchet@37624
   218
              if full_name = full_name' then (nice_name, the_pool)
blanchet@37624
   219
              else add (j + 1)
blanchet@37624
   220
            | NONE =>
blanchet@37624
   221
              (nice_name,
blanchet@37624
   222
               (Symtab.update_new (full_name, nice_name) (fst the_pool),
blanchet@37624
   223
                Symtab.update_new (nice_name, full_name) (snd the_pool)))
blanchet@37624
   224
          end
blanchet@37624
   225
      in add 0 |> apsnd SOME end
blanchet@37624
   226
blanchet@38198
   227
fun nice_term (ATerm (name, ts)) =
blanchet@38198
   228
  nice_name name ##>> pool_map nice_term ts #>> ATerm
blanchet@38198
   229
fun nice_formula (AQuant (q, xs, phi)) =
blanchet@43397
   230
    pool_map nice_name (map fst xs)
blanchet@43397
   231
    ##>> pool_map (fn NONE => pair NONE
blanchet@43402
   232
                    | SOME ty => nice_name ty #>> SOME) (map snd xs)
blanchet@43397
   233
    ##>> nice_formula phi
blanchet@43397
   234
    #>> (fn ((ss, ts), phi) => AQuant (q, ss ~~ ts, phi))
blanchet@38198
   235
  | nice_formula (AConn (c, phis)) =
blanchet@38198
   236
    pool_map nice_formula phis #>> curry AConn c
blanchet@38280
   237
  | nice_formula (AAtom tm) = nice_term tm #>> AAtom
blanchet@43414
   238
fun nice_problem_line (Decl (ident, sym, arg_tys, res_ty)) =
blanchet@43399
   239
    nice_name sym
blanchet@43399
   240
    ##>> pool_map nice_name arg_tys
blanchet@43399
   241
    ##>> nice_name res_ty
blanchet@43414
   242
    #>> (fn ((sym, arg_tys), res_ty) => Decl (ident, sym, arg_tys, res_ty))
blanchet@43448
   243
  | nice_problem_line (Formula (ident, kind, phi, source, useful_info)) =
blanchet@43400
   244
    nice_formula phi
blanchet@43448
   245
    #>> (fn phi => Formula (ident, kind, phi, source, useful_info))
blanchet@38171
   246
fun nice_problem problem =
blanchet@37643
   247
  pool_map (fn (heading, lines) =>
blanchet@38171
   248
               pool_map nice_problem_line lines #>> pair heading) problem
blanchet@39692
   249
fun nice_atp_problem readable_names problem =
blanchet@38251
   250
  nice_problem problem (empty_name_pool readable_names)
blanchet@37509
   251
blanchet@37509
   252
end;