src/HOL/Tools/ATP/atp_proof.ML
author blanchet
Thu, 16 Sep 2010 13:52:17 +0200
changeset 39695 c6b21584f336
parent 39694 acb25e9cf6fb
child 39697 b505208f435d
permissions -rw-r--r--
merge constructors
     1 (*  Title:      HOL/Tools/ATP/atp_proof.ML
     2     Author:     Lawrence C. Paulson, Cambridge University Computer Laboratory
     3     Author:     Claire Quigley, Cambridge University Computer Laboratory
     4     Author:     Jasmin Blanchette, TU Muenchen
     5 
     6 Abstract representation of ATP proofs and TSTP/Vampire/SPASS syntax.
     7 *)
     8 
     9 signature ATP_PROOF =
    10 sig
    11   type 'a fo_term = 'a ATP_Problem.fo_term
    12   type 'a uniform_formula = 'a ATP_Problem.uniform_formula
    13 
    14   type step_name = string * string option
    15 
    16   datatype 'a step =
    17     Definition of step_name * 'a * 'a |
    18     Inference of step_name * 'a * step_name list
    19 
    20   type 'a proof = 'a uniform_formula step list
    21 
    22   val is_same_step : step_name * step_name -> bool
    23   val atp_proof_from_tstplike_string : string -> string proof
    24   val map_term_names_in_atp_proof :
    25     (string -> string) -> string proof -> string proof
    26   val nasty_atp_proof : string Symtab.table -> string proof -> string proof
    27 end;
    28 
    29 structure ATP_Proof : ATP_PROOF =
    30 struct
    31 
    32 (*### FIXME: DUPLICATED FROM SLEDGEHAMMER_UTIL *)
    33 fun strip_spaces_in_list _ [] = []
    34   | strip_spaces_in_list _ [c1] = if Char.isSpace c1 then [] else [str c1]
    35   | strip_spaces_in_list is_evil [c1, c2] =
    36     strip_spaces_in_list is_evil [c1] @ strip_spaces_in_list is_evil [c2]
    37   | strip_spaces_in_list is_evil (c1 :: c2 :: c3 :: cs) =
    38     if Char.isSpace c1 then
    39       strip_spaces_in_list is_evil (c2 :: c3 :: cs)
    40     else if Char.isSpace c2 then
    41       if Char.isSpace c3 then
    42         strip_spaces_in_list is_evil (c1 :: c3 :: cs)
    43       else
    44         str c1 :: (if forall is_evil [c1, c3] then [" "] else []) @
    45         strip_spaces_in_list is_evil (c3 :: cs)
    46     else
    47       str c1 :: strip_spaces_in_list is_evil (c2 :: c3 :: cs)
    48 fun strip_spaces is_evil =
    49   implode o strip_spaces_in_list is_evil o String.explode
    50 
    51 fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
    52 val strip_spaces_except_between_ident_chars = strip_spaces is_ident_char
    53 
    54 open ATP_Problem
    55 
    56 fun mk_anot (AConn (ANot, [phi])) = phi
    57   | mk_anot phi = AConn (ANot, [phi])
    58 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
    59 
    60 type step_name = string * string option
    61 
    62 fun is_same_step p = p |> pairself fst |> op =
    63 
    64 fun step_name_ord p =
    65   let val q = pairself fst p in
    66     (* The "unprefix" part is to cope with remote Vampire's output. The proper
    67        solution would be to perform a topological sort, e.g. using the nice
    68        "Graph" functor. *)
    69     case pairself (Int.fromString o perhaps (try (unprefix "f"))) q of
    70       (NONE, NONE) => string_ord q
    71     | (NONE, SOME _) => LESS
    72     | (SOME _, NONE) => GREATER
    73     | (SOME i, SOME j) => int_ord (i, j)
    74   end
    75 
    76 datatype 'a step =
    77   Definition of step_name * 'a * 'a |
    78   Inference of step_name * 'a * step_name list
    79 
    80 type 'a proof = 'a uniform_formula step list
    81 
    82 fun step_name (Definition (name, _, _)) = name
    83   | step_name (Inference (name, _, _)) = name
    84 
    85 (**** PARSING OF TSTP FORMAT ****)
    86 
    87 (*Strings enclosed in single quotes, e.g. filenames*)
    88 val scan_general_id =
    89   $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode
    90   || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
    91      >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)
    92 
    93 (* Generalized first-order terms, which include file names, numbers, etc. *)
    94 fun parse_annotation strict x =
    95   ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id)
    96       >> (strict ? filter (is_some o Int.fromString)))
    97    -- Scan.optional (parse_annotation strict) [] >> op @
    98    || $$ "(" |-- parse_annotations strict --| $$ ")"
    99    || $$ "[" |-- parse_annotations strict --| $$ "]") x
   100 and parse_annotations strict x =
   101   (Scan.optional (parse_annotation strict
   102                   ::: Scan.repeat ($$ "," |-- parse_annotation strict)) []
   103    >> flat) x
   104 
   105 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
   106    which can be hard to disambiguate from function application in an LL(1)
   107    parser. As a workaround, we extend the TPTP term syntax with such detritus
   108    and ignore it. *)
   109 fun parse_vampire_detritus x =
   110   (scan_general_id |-- $$ ":" --| scan_general_id >> K []) x
   111 
   112 fun parse_term x =
   113   (scan_general_id
   114     -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms)
   115                       --| $$ ")") []
   116     --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
   117    >> ATerm) x
   118 and parse_terms x = (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x
   119 
   120 val parse_atom =
   121   parse_term -- Scan.option (Scan.option ($$ "!") --| $$ "=" -- parse_term)
   122   >> (fn (u1, NONE) => AAtom u1
   123        | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
   124        | (u1, SOME (SOME _, u2)) =>
   125          mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))
   126 
   127 fun fo_term_head (ATerm (s, _)) = s
   128 
   129 (* TPTP formulas are fully parenthesized, so we don't need to worry about
   130    operator precedence. *)
   131 fun parse_formula x =
   132   (($$ "(" |-- parse_formula --| $$ ")"
   133     || ($$ "!" >> K AForall || $$ "?" >> K AExists)
   134        --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_formula
   135        >> (fn ((q, ts), phi) => AQuant (q, map fo_term_head ts, phi))
   136     || $$ "~" |-- parse_formula >> mk_anot
   137     || parse_atom)
   138    -- Scan.option ((Scan.this_string "=>" >> K AImplies
   139                     || Scan.this_string "<=>" >> K AIff
   140                     || Scan.this_string "<~>" >> K ANotIff
   141                     || Scan.this_string "<=" >> K AIf
   142                     || $$ "|" >> K AOr || $$ "&" >> K AAnd)
   143                    -- parse_formula)
   144    >> (fn (phi1, NONE) => phi1
   145         | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
   146 
   147 val parse_tstp_extra_arguments =
   148   Scan.optional ($$ "," |-- parse_annotation false
   149                  --| Scan.option ($$ "," |-- parse_annotations false)) []
   150 
   151 (* Syntax: (fof|cnf)\(<num>, <formula_role>, <formula> <extra_arguments>\).
   152    The <num> could be an identifier, but we assume integers. *)
   153 val parse_tstp_line =
   154   ((Scan.this_string "fof" || Scan.this_string "cnf") -- $$ "(")
   155     |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
   156     -- parse_formula -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
   157    >> (fn (((num, role), phi), deps) =>
   158           let
   159             val (name, deps) =
   160               case deps of
   161                 ["file", _, s] => ((num, SOME s), [])
   162               | _ => ((num, NONE), deps)
   163           in
   164             case role of
   165               "definition" =>
   166               (case phi of
   167                  AConn (AIff, [phi1 as AAtom _, phi2]) =>
   168                  Definition (name, phi1, phi2)
   169                | AAtom (ATerm ("c_equal", _)) =>
   170                  (* Vampire's equality proxy axiom *)
   171                  Inference (name, phi, map (rpair NONE) deps)
   172                | _ => raise Fail "malformed definition")
   173             | _ => Inference (name, phi, map (rpair NONE) deps)
   174           end)
   175 
   176 (**** PARSING OF VAMPIRE OUTPUT ****)
   177 
   178 (* Syntax: <num>. <formula> <annotation> *)
   179 val parse_vampire_line =
   180   scan_general_id --| $$ "." -- parse_formula -- parse_annotation true
   181   >> (fn ((num, phi), deps) =>
   182          Inference ((num, NONE), phi, map (rpair NONE) deps))
   183 
   184 (**** PARSING OF SPASS OUTPUT ****)
   185 
   186 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
   187    is not clear anyway. *)
   188 val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id
   189 
   190 val parse_spass_annotations =
   191   Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
   192                                          --| Scan.option ($$ ","))) []
   193 
   194 (* It is not clear why some literals are followed by sequences of stars and/or
   195    pluses. We ignore them. *)
   196 val parse_decorated_atom =
   197   parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")
   198 
   199 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
   200   | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
   201   | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
   202   | mk_horn (neg_lits, pos_lits) =
   203     mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
   204                        foldr1 (mk_aconn AOr) pos_lits)
   205 
   206 val parse_horn_clause =
   207   Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
   208     -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
   209     -- Scan.repeat parse_decorated_atom
   210   >> (mk_horn o apfst (op @))
   211 
   212 (* Syntax: <num>[0:<inference><annotations>]
   213    <atoms> || <atoms> -> <atoms>. *)
   214 val parse_spass_line =
   215   scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
   216     -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
   217   >> (fn ((num, deps), u) => Inference ((num, NONE), u, map (rpair NONE) deps))
   218 
   219 val parse_line = parse_tstp_line || parse_vampire_line || parse_spass_line
   220 val parse_proof =
   221   fst o Scan.finite Symbol.stopper
   222             (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
   223                             (Scan.repeat1 parse_line)))
   224   o explode o strip_spaces_except_between_ident_chars (*### FIXME: why isn't strip_spaces enough?*)
   225 
   226 fun clean_up_dependency seen dep = find_first (curry is_same_step dep) seen
   227 fun clean_up_dependencies _ [] = []
   228   | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
   229     step :: clean_up_dependencies (name :: seen) steps
   230   | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
   231     Inference (name, u, map_filter (clean_up_dependency seen) deps) ::
   232     clean_up_dependencies (name :: seen) steps
   233 
   234 val atp_proof_from_tstplike_string =
   235   suffix "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
   236   #> parse_proof
   237   #> sort (step_name_ord o pairself step_name)
   238   #> clean_up_dependencies []
   239 
   240 fun map_term_names_in_term f (ATerm (s, ts)) =
   241   ATerm (f s, map (map_term_names_in_term f) ts)
   242 fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
   243     AQuant (q, xs, map_term_names_in_formula f phi)
   244   | map_term_names_in_formula f (AConn (c, phis)) =
   245     AConn (c, map (map_term_names_in_formula f) phis)
   246   | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
   247 fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
   248     Definition (name, map_term_names_in_formula f phi1,
   249                 map_term_names_in_formula f phi2)
   250   | map_term_names_in_step f (Inference (name, phi, deps)) =
   251     Inference (name, map_term_names_in_formula f phi, deps)
   252 fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)
   253 
   254 fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
   255 fun nasty_atp_proof pool =
   256   if Symtab.is_empty pool then I
   257   else map_term_names_in_atp_proof (nasty_name pool)
   258 
   259 end;