src/HOL/Tools/ATP/atp_proof.ML
author blanchet
Sun, 01 May 2011 18:37:24 +0200
changeset 43402 a462dbaa584f
parent 43397 46d485f8d144
child 43407 a513730db7b0
permissions -rw-r--r--
added more rudimentary type support to Sledgehammer's ATP encoding
     1 (*  Title:      HOL/Tools/ATP/atp_proof.ML
     2     Author:     Lawrence C. Paulson, Cambridge University Computer Laboratory
     3     Author:     Claire Quigley, Cambridge University Computer Laboratory
     4     Author:     Jasmin Blanchette, TU Muenchen
     5 
     6 Abstract representation of ATP proofs and TSTP/Vampire/SPASS syntax.
     7 *)
     8 
     9 signature ATP_PROOF =
    10 sig
    11   type 'a fo_term = 'a ATP_Problem.fo_term
    12   type ('a, 'b, 'c) formula = ('a, 'b, 'c) ATP_Problem.formula
    13 
    14   datatype failure =
    15     Unprovable | IncompleteUnprovable | ProofMissing | UnsoundProof |
    16     CantConnect | TimedOut | OutOfResources | SpassTooOld | VampireTooOld |
    17     NoPerl | NoLibwwwPerl | NoRealZ3 | MalformedInput | MalformedOutput |
    18     Interrupted | Crashed | InternalError | UnknownError of string
    19 
    20   type step_name = string * string option
    21 
    22   datatype 'a step =
    23     Definition of step_name * 'a * 'a |
    24     Inference of step_name * 'a * step_name list
    25 
    26   type 'a proof = ('a, 'a, 'a fo_term) formula step list
    27 
    28   val strip_spaces : (char -> bool) -> string -> string
    29   val short_output : bool -> string -> string
    30   val string_for_failure : failure -> string
    31   val extract_important_message : string -> string
    32   val extract_known_failure :
    33     (failure * string) list -> string -> failure option
    34   val extract_tstplike_proof_and_outcome :
    35     bool -> bool -> bool -> int -> (string * string) list
    36     -> (failure * string) list -> string -> string * failure option
    37   val is_same_step : step_name * step_name -> bool
    38   val atp_proof_from_tstplike_proof : string -> string proof
    39   val map_term_names_in_atp_proof :
    40     (string -> string) -> string proof -> string proof
    41   val nasty_atp_proof : string Symtab.table -> string proof -> string proof
    42 end;
    43 
    44 structure ATP_Proof : ATP_PROOF =
    45 struct
    46 
    47 open ATP_Problem
    48 
    49 datatype failure =
    50   Unprovable | IncompleteUnprovable | ProofMissing | UnsoundProof |
    51   CantConnect | TimedOut | OutOfResources | SpassTooOld | VampireTooOld |
    52   NoPerl | NoLibwwwPerl | NoRealZ3 | MalformedInput | MalformedOutput |
    53   Interrupted | Crashed | InternalError | UnknownError of string
    54 
    55 fun strip_spaces_in_list _ [] = []
    56   | strip_spaces_in_list _ [c1] = if Char.isSpace c1 then [] else [str c1]
    57   | strip_spaces_in_list is_evil [c1, c2] =
    58     strip_spaces_in_list is_evil [c1] @ strip_spaces_in_list is_evil [c2]
    59   | strip_spaces_in_list is_evil (c1 :: c2 :: c3 :: cs) =
    60     if Char.isSpace c1 then
    61       strip_spaces_in_list is_evil (c2 :: c3 :: cs)
    62     else if Char.isSpace c2 then
    63       if Char.isSpace c3 then
    64         strip_spaces_in_list is_evil (c1 :: c3 :: cs)
    65       else
    66         str c1 :: (if forall is_evil [c1, c3] then [" "] else []) @
    67         strip_spaces_in_list is_evil (c3 :: cs)
    68     else
    69       str c1 :: strip_spaces_in_list is_evil (c2 :: c3 :: cs)
    70 fun strip_spaces is_evil =
    71   implode o strip_spaces_in_list is_evil o String.explode
    72 
    73 fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
    74 val strip_spaces_except_between_ident_chars = strip_spaces is_ident_char
    75 
    76 fun elide_string threshold s =
    77   if size s > threshold then
    78     String.extract (s, 0, SOME (threshold div 2 - 5)) ^ " ...... " ^
    79     String.extract (s, size s - (threshold + 1) div 2 + 6, NONE)
    80   else
    81     s
    82 fun short_output verbose output =
    83   if verbose then
    84     if output = "" then "No details available" else elide_string 1000 output
    85   else
    86     ""
    87 
    88 val missing_message_tail =
    89   " appears to be missing. You will need to install it if you want to invoke \
    90   \remote provers."
    91 
    92 fun string_for_failure Unprovable =
    93     "The problem is unprovable."
    94   | string_for_failure IncompleteUnprovable =
    95     "The prover gave up."
    96   | string_for_failure ProofMissing =
    97     "The prover claims the conjecture is a theorem but did not provide a proof."
    98   | string_for_failure UnsoundProof =
    99     "The prover found a type-unsound proof. (Or, very unlikely, your axioms \
   100     \are inconsistent.)"
   101   | string_for_failure CantConnect = "Cannot connect to remote server."
   102   | string_for_failure TimedOut = "Timed out."
   103   | string_for_failure OutOfResources = "The prover ran out of resources."
   104   | string_for_failure SpassTooOld =
   105     "Isabelle requires a more recent version of SPASS with support for the \
   106     \TPTP syntax. To install it, download and extract the package \
   107     \\"http://isabelle.in.tum.de/dist/contrib/spass-3.7.tar.gz\" and add the \
   108     \\"spass-3.7\" directory's absolute path to " ^
   109     Path.print (Path.expand (Path.appends
   110                (Path.variable "ISABELLE_HOME_USER" ::
   111                 map Path.basic ["etc", "components"]))) ^
   112     " on a line of its own."
   113   | string_for_failure VampireTooOld =
   114     "Isabelle requires a more recent version of Vampire. To install it, follow \
   115     \the instructions from the Sledgehammer manual (\"isabelle doc\
   116     \ sledgehammer\")."
   117   | string_for_failure NoPerl = "Perl" ^ missing_message_tail
   118   | string_for_failure NoLibwwwPerl =
   119     "The Perl module \"libwww-perl\"" ^ missing_message_tail
   120   | string_for_failure NoRealZ3 =
   121     "The environment variable \"Z3_REAL_SOLVER\" must be set to Z3's full path."
   122   | string_for_failure MalformedInput =
   123     "The generated problem is malformed. Please report this to the Isabelle \
   124     \developers."
   125   | string_for_failure MalformedOutput = "The prover output is malformed."
   126   | string_for_failure Crashed = "The prover crashed."
   127   | string_for_failure InternalError = "An internal prover error occurred."
   128   | string_for_failure (UnknownError string) =
   129     "A prover error occurred" ^
   130     (if string = "" then ". (Pass the \"verbose\" option for details.)"
   131      else ":\n" ^ string)
   132 
   133 fun extract_delimited (begin_delim, end_delim) output =
   134   output |> first_field begin_delim |> the |> snd
   135          |> first_field end_delim |> the |> fst
   136          |> first_field "\n" |> the |> snd
   137   handle Option.Option => ""
   138 
   139 val tstp_important_message_delims =
   140   ("% SZS start RequiredInformation", "% SZS end RequiredInformation")
   141 
   142 fun extract_important_message output =
   143   case extract_delimited tstp_important_message_delims output of
   144     "" => ""
   145   | s => s |> space_explode "\n" |> filter_out (curry (op =) "")
   146            |> map (perhaps (try (unprefix "%")))
   147            |> map (perhaps (try (unprefix " ")))
   148            |> space_implode "\n " |> quote
   149 
   150 (* Splits by the first possible of a list of delimiters. *)
   151 fun extract_tstplike_proof delims output =
   152   case pairself (find_first (fn s => String.isSubstring s output))
   153                 (ListPair.unzip delims) of
   154     (SOME begin_delim, SOME end_delim) =>
   155     extract_delimited (begin_delim, end_delim) output
   156   | _ => ""
   157 
   158 fun extract_known_failure known_failures output =
   159   known_failures
   160   |> find_first (fn (_, pattern) => String.isSubstring pattern output)
   161   |> Option.map fst
   162 
   163 fun extract_tstplike_proof_and_outcome debug verbose complete res_code
   164                                        proof_delims known_failures output =
   165   case extract_known_failure known_failures output of
   166     NONE =>
   167     (case extract_tstplike_proof proof_delims output of
   168        "" =>
   169        ("", SOME (if res_code = 0 andalso (not debug orelse output = "") then
   170                     ProofMissing
   171                   else
   172                     UnknownError (short_output verbose output)))
   173      | tstplike_proof =>
   174        if res_code = 0 then (tstplike_proof, NONE)
   175        else ("", SOME (UnknownError (short_output verbose output))))
   176   | SOME failure =>
   177     ("", SOME (if failure = IncompleteUnprovable andalso complete then
   178                  Unprovable
   179                else
   180                  failure))
   181 
   182 fun mk_anot (AConn (ANot, [phi])) = phi
   183   | mk_anot phi = AConn (ANot, [phi])
   184 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
   185 
   186 type step_name = string * string option
   187 
   188 fun is_same_step p = p |> pairself fst |> op =
   189 
   190 fun step_name_ord p =
   191   let val q = pairself fst p in
   192     (* The "unprefix" part is to cope with remote Vampire's output. The proper
   193        solution would be to perform a topological sort, e.g. using the nice
   194        "Graph" functor. *)
   195     case pairself (Int.fromString o perhaps (try (unprefix "f"))) q of
   196       (NONE, NONE) => string_ord q
   197     | (NONE, SOME _) => LESS
   198     | (SOME _, NONE) => GREATER
   199     | (SOME i, SOME j) => int_ord (i, j)
   200   end
   201 
   202 datatype 'a step =
   203   Definition of step_name * 'a * 'a |
   204   Inference of step_name * 'a * step_name list
   205 
   206 type 'a proof = ('a, 'a, 'a fo_term) formula step list
   207 
   208 fun step_name (Definition (name, _, _)) = name
   209   | step_name (Inference (name, _, _)) = name
   210 
   211 (**** PARSING OF TSTP FORMAT ****)
   212 
   213 (*Strings enclosed in single quotes, e.g. filenames*)
   214 val scan_general_id =
   215   $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode
   216   || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
   217      >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)
   218 
   219 (* Generalized first-order terms, which include file names, numbers, etc. *)
   220 fun parse_annotation strict x =
   221   ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id)
   222       >> (strict ? filter (is_some o Int.fromString)))
   223    -- Scan.optional (parse_annotation strict) [] >> op @
   224    || $$ "(" |-- parse_annotations strict --| $$ ")"
   225    || $$ "[" |-- parse_annotations strict --| $$ "]") x
   226 and parse_annotations strict x =
   227   (Scan.optional (parse_annotation strict
   228                   ::: Scan.repeat ($$ "," |-- parse_annotation strict)) []
   229    >> flat) x
   230 
   231 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
   232    which can be hard to disambiguate from function application in an LL(1)
   233    parser. As a workaround, we extend the TPTP term syntax with such detritus
   234    and ignore it. *)
   235 fun parse_vampire_detritus x =
   236   (scan_general_id |-- $$ ":" --| scan_general_id >> K []) x
   237 
   238 fun parse_term x =
   239   (scan_general_id
   240      -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms)
   241                        --| $$ ")") []
   242      --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
   243    >> ATerm) x
   244 and parse_terms x = (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x
   245 
   246 fun parse_atom x =
   247   (parse_term -- Scan.option (Scan.option ($$ "!") --| $$ "=" -- parse_term)
   248    >> (fn (u1, NONE) => AAtom u1
   249         | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
   250         | (u1, SOME (SOME _, u2)) =>
   251           mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))) x
   252 
   253 fun fo_term_head (ATerm (s, _)) = s
   254 
   255 (* TPTP formulas are fully parenthesized, so we don't need to worry about
   256    operator precedence. *)
   257 fun parse_formula x =
   258   (($$ "(" |-- parse_formula --| $$ ")"
   259     || ($$ "!" >> K AForall || $$ "?" >> K AExists)
   260        --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_formula
   261        >> (fn ((q, ts), phi) =>
   262               (* FIXME: TFF *)
   263               AQuant (q, map (rpair NONE o fo_term_head) ts, phi))
   264     || $$ "~" |-- parse_formula >> mk_anot
   265     || parse_atom)
   266    -- Scan.option ((Scan.this_string "=>" >> K AImplies
   267                     || Scan.this_string "<=>" >> K AIff
   268                     || Scan.this_string "<~>" >> K ANotIff
   269                     || Scan.this_string "<=" >> K AIf
   270                     || $$ "|" >> K AOr || $$ "&" >> K AAnd)
   271                    -- parse_formula)
   272    >> (fn (phi1, NONE) => phi1
   273         | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
   274 
   275 val parse_tstp_extra_arguments =
   276   Scan.optional ($$ "," |-- parse_annotation false
   277                  --| Scan.option ($$ "," |-- parse_annotations false)) []
   278 
   279 val vampire_unknown_fact = "unknown"
   280 
   281 (* Syntax: (cnf|fof|tff)\(<num>, <formula_role>, <formula> <extra_arguments>\).
   282    The <num> could be an identifier, but we assume integers. *)
   283 val parse_tstp_line =
   284   ((Scan.this_string "cnf" || Scan.this_string "fof" || Scan.this_string "tff")
   285       -- $$ "(")
   286     |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
   287     -- parse_formula -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
   288    >> (fn (((num, role), phi), deps) =>
   289           let
   290             val (name, deps) =
   291               case deps of
   292                 ["file", _, s] =>
   293                 ((num, if s = vampire_unknown_fact then NONE else SOME s), [])
   294               | _ => ((num, NONE), deps)
   295           in
   296             case role of
   297               "definition" =>
   298               (case phi of
   299                  AConn (AIff, [phi1 as AAtom _, phi2]) =>
   300                  Definition (name, phi1, phi2)
   301                | AAtom (ATerm ("c_equal", _)) =>
   302                  (* Vampire's equality proxy axiom *)
   303                  Inference (name, phi, map (rpair NONE) deps)
   304                | _ => raise Fail "malformed definition")
   305             | _ => Inference (name, phi, map (rpair NONE) deps)
   306           end)
   307 
   308 (**** PARSING OF VAMPIRE OUTPUT ****)
   309 
   310 val parse_vampire_braced_stuff =
   311   $$ "{" -- Scan.repeat (scan_general_id --| Scan.option ($$ ",")) -- $$ "}"
   312 val parse_vampire_parenthesized_detritus =
   313   $$ "(" |-- parse_vampire_detritus --| $$ ")"
   314 
   315 (* Syntax: <num>. <formula> <annotation> *)
   316 val parse_vampire_line =
   317   scan_general_id --| $$ "." -- parse_formula
   318     --| Scan.option parse_vampire_braced_stuff
   319     --| Scan.option parse_vampire_parenthesized_detritus
   320     -- parse_annotation true
   321   >> (fn ((num, phi), deps) =>
   322          Inference ((num, NONE), phi, map (rpair NONE) deps))
   323 
   324 (**** PARSING OF SPASS OUTPUT ****)
   325 
   326 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
   327    is not clear anyway. *)
   328 val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id
   329 
   330 val parse_spass_annotations =
   331   Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
   332                                          --| Scan.option ($$ ","))) []
   333 
   334 (* It is not clear why some literals are followed by sequences of stars and/or
   335    pluses. We ignore them. *)
   336 fun parse_decorated_atom x =
   337   (parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")) x
   338 
   339 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
   340   | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
   341   | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
   342   | mk_horn (neg_lits, pos_lits) =
   343     mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
   344                        foldr1 (mk_aconn AOr) pos_lits)
   345 
   346 fun parse_horn_clause x =
   347   (Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
   348      -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
   349      -- Scan.repeat parse_decorated_atom
   350    >> (mk_horn o apfst (op @))) x
   351 
   352 (* Syntax: <num>[0:<inference><annotations>]
   353    <atoms> || <atoms> -> <atoms>. *)
   354 fun parse_spass_line x =
   355   (scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
   356      -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
   357    >> (fn ((num, deps), u) =>
   358           Inference ((num, NONE), u, map (rpair NONE) deps))) x
   359 
   360 fun parse_line x = (parse_tstp_line || parse_vampire_line || parse_spass_line) x
   361 val parse_proof =
   362   fst o Scan.finite Symbol.stopper
   363             (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
   364                             (Scan.repeat1 parse_line)))
   365   o raw_explode o strip_spaces_except_between_ident_chars
   366 
   367 fun clean_up_dependency seen dep = find_first (curry is_same_step dep) seen
   368 fun clean_up_dependencies _ [] = []
   369   | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
   370     step :: clean_up_dependencies (name :: seen) steps
   371   | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
   372     Inference (name, u, map_filter (clean_up_dependency seen) deps) ::
   373     clean_up_dependencies (name :: seen) steps
   374 
   375 fun atp_proof_from_tstplike_proof "" = []
   376   | atp_proof_from_tstplike_proof s =
   377     s ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
   378     |> parse_proof
   379     |> sort (step_name_ord o pairself step_name)
   380     |> clean_up_dependencies []
   381 
   382 fun map_term_names_in_term f (ATerm (s, ts)) =
   383   ATerm (f s, map (map_term_names_in_term f) ts)
   384 fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
   385     AQuant (q, xs, map_term_names_in_formula f phi)
   386   | map_term_names_in_formula f (AConn (c, phis)) =
   387     AConn (c, map (map_term_names_in_formula f) phis)
   388   | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
   389 fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
   390     Definition (name, map_term_names_in_formula f phi1,
   391                 map_term_names_in_formula f phi2)
   392   | map_term_names_in_step f (Inference (name, phi, deps)) =
   393     Inference (name, map_term_names_in_formula f phi, deps)
   394 fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)
   395 
   396 fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
   397 fun nasty_atp_proof pool =
   398   if Symtab.is_empty pool then I
   399   else map_term_names_in_atp_proof (nasty_name pool)
   400 
   401 end;