1 (* Title: HOL/Tools/ATP/atp_proof.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Abstract representation of ATP proofs and TSTP/SPASS syntax.
11 type 'a fo_term = 'a ATP_Problem.fo_term
12 type ('a, 'b, 'c) formula = ('a, 'b, 'c) ATP_Problem.formula
13 type 'a problem = 'a ATP_Problem.problem
15 exception UNRECOGNIZED_ATP_PROOF of unit
19 IncompleteUnprovable |
22 UnsoundProof of bool * string list |
37 UnknownError of string
39 type step_name = string * string option
42 Definition of step_name * 'a * 'a |
43 Inference of step_name * 'a * step_name list
45 type 'a proof = ('a, 'a, 'a fo_term) formula step list
47 val strip_spaces : bool -> (char -> bool) -> string -> string
48 val short_output : bool -> string -> string
49 val string_for_failure : failure -> string
50 val extract_important_message : string -> string
51 val extract_known_failure :
52 (failure * string) list -> string -> failure option
53 val extract_tstplike_proof_and_outcome :
54 bool -> bool -> int -> (string * string) list -> (failure * string) list
55 -> string -> string * failure option
56 val is_same_atp_step : step_name -> step_name -> bool
57 val scan_general_id : string list -> string * string list
59 string list -> (string, 'a, string fo_term) formula * string list
60 val atp_proof_from_tstplike_proof : string problem -> string -> string proof
61 val clean_up_atp_proof_dependencies : string proof -> string proof
62 val map_term_names_in_atp_proof :
63 (string -> string) -> string proof -> string proof
64 val nasty_atp_proof : string Symtab.table -> string proof -> string proof
67 structure ATP_Proof : ATP_PROOF =
72 exception UNRECOGNIZED_ATP_PROOF of unit
76 IncompleteUnprovable |
79 UnsoundProof of bool * string list |
94 UnknownError of string
96 fun strip_c_style_comment _ [] = []
97 | strip_c_style_comment is_evil (#"*" :: #"/" :: cs) =
98 strip_spaces_in_list true is_evil cs
99 | strip_c_style_comment is_evil (_ :: cs) = strip_c_style_comment is_evil cs
100 and strip_spaces_in_list _ _ [] = []
101 | strip_spaces_in_list true is_evil (#"%" :: cs) =
102 strip_spaces_in_list true is_evil
103 (cs |> chop_while (not_equal #"\n") |> snd)
104 | strip_spaces_in_list true is_evil (#"/" :: #"*" :: cs) =
105 strip_c_style_comment is_evil cs
106 | strip_spaces_in_list _ _ [c1] = if Char.isSpace c1 then [] else [str c1]
107 | strip_spaces_in_list skip_comments is_evil [c1, c2] =
108 strip_spaces_in_list skip_comments is_evil [c1] @
109 strip_spaces_in_list skip_comments is_evil [c2]
110 | strip_spaces_in_list skip_comments is_evil (c1 :: c2 :: c3 :: cs) =
111 if Char.isSpace c1 then
112 strip_spaces_in_list skip_comments is_evil (c2 :: c3 :: cs)
113 else if Char.isSpace c2 then
114 if Char.isSpace c3 then
115 strip_spaces_in_list skip_comments is_evil (c1 :: c3 :: cs)
117 str c1 :: (if forall is_evil [c1, c3] then [" "] else []) @
118 strip_spaces_in_list skip_comments is_evil (c3 :: cs)
120 str c1 :: strip_spaces_in_list skip_comments is_evil (c2 :: c3 :: cs)
121 fun strip_spaces skip_comments is_evil =
122 implode o strip_spaces_in_list skip_comments is_evil o String.explode
124 fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
125 val strip_spaces_except_between_ident_chars = strip_spaces true is_ident_char
127 fun elide_string threshold s =
128 if size s > threshold then
129 String.extract (s, 0, SOME (threshold div 2 - 5)) ^ " ...... " ^
130 String.extract (s, size s - (threshold + 1) div 2 + 6, NONE)
133 fun short_output verbose output =
135 if output = "" then "No details available" else elide_string 1000 output
139 val missing_message_tail =
140 " appears to be missing. You will need to install it if you want to invoke \
143 fun involving [] = ""
144 | involving ss = "involving " ^ commas_quote ss ^ " "
146 fun string_for_failure Unprovable = "The problem is unprovable."
147 | string_for_failure IncompleteUnprovable = "The prover gave up."
148 | string_for_failure ProofMissing =
149 "The prover claims the conjecture is a theorem but did not provide a proof."
150 | string_for_failure ProofIncomplete =
151 "The prover claims the conjecture is a theorem but provided an incomplete \
153 | string_for_failure (UnsoundProof (false, ss)) =
154 "The prover found a type-unsound proof " ^ involving ss ^
155 "(or, less likely, your axioms are inconsistent). Try passing the \
156 \\"full_types\" option to Sledgehammer to avoid such spurious proofs."
157 | string_for_failure (UnsoundProof (true, ss)) =
158 "The prover found a type-unsound proof " ^ involving ss ^
159 "even though a supposedly type-sound encoding was used (or, less likely, \
160 \your axioms are inconsistent). You might want to report this to the \
161 \Isabelle developers."
162 | string_for_failure CantConnect = "Cannot connect to remote server."
163 | string_for_failure TimedOut = "Timed out."
164 | string_for_failure Inappropriate =
165 "The problem lies outside the prover's scope."
166 | string_for_failure OutOfResources = "The prover ran out of resources."
167 | string_for_failure SpassTooOld =
168 "Isabelle requires a more recent version of SPASS with support for the \
169 \TPTP syntax. To install it, download and extract the package \
170 \\"http://isabelle.in.tum.de/dist/contrib/spass-3.7.tar.gz\" and add the \
171 \\"spass-3.7\" directory's absolute path to " ^
172 Path.print (Path.expand (Path.appends
173 (Path.variable "ISABELLE_HOME_USER" ::
174 map Path.basic ["etc", "components"]))) ^
175 " on a line of its own."
176 | string_for_failure VampireTooOld =
177 "Isabelle requires a more recent version of Vampire. To install it, follow \
178 \the instructions from the Sledgehammer manual (\"isabelle doc\
180 | string_for_failure NoPerl = "Perl" ^ missing_message_tail
181 | string_for_failure NoLibwwwPerl =
182 "The Perl module \"libwww-perl\"" ^ missing_message_tail
183 | string_for_failure NoRealZ3 =
184 "The environment variable \"Z3_REAL_SOLVER\" must be set to Z3's full path."
185 | string_for_failure MalformedInput =
186 "The generated problem is malformed. Please report this to the Isabelle \
188 | string_for_failure MalformedOutput = "The prover output is malformed."
189 | string_for_failure Crashed = "The prover crashed."
190 | string_for_failure InternalError = "An internal prover error occurred."
191 | string_for_failure (UnknownError string) =
192 "A prover error occurred" ^
193 (if string = "" then ". (Pass the \"verbose\" option for details.)"
196 fun extract_delimited (begin_delim, end_delim) output =
197 output |> first_field begin_delim |> the |> snd
198 |> first_field end_delim |> the |> fst
199 |> first_field "\n" |> the |> snd
200 handle Option.Option => ""
202 val tstp_important_message_delims =
203 ("% SZS start RequiredInformation", "% SZS end RequiredInformation")
205 fun extract_important_message output =
206 case extract_delimited tstp_important_message_delims output of
208 | s => s |> space_explode "\n" |> filter_out (curry (op =) "")
209 |> map (perhaps (try (unprefix "%")))
210 |> map (perhaps (try (unprefix " ")))
211 |> space_implode "\n " |> quote
213 (* Splits by the first possible of a list of delimiters. *)
214 fun extract_tstplike_proof delims output =
215 case pairself (find_first (fn s => String.isSubstring s output))
216 (ListPair.unzip delims) of
217 (SOME begin_delim, SOME end_delim) =>
218 extract_delimited (begin_delim, end_delim) output
221 fun extract_known_failure known_failures output =
223 |> find_first (fn (_, pattern) => String.isSubstring pattern output)
226 fun extract_tstplike_proof_and_outcome verbose complete res_code proof_delims
227 known_failures output =
228 case (extract_tstplike_proof proof_delims output,
229 extract_known_failure known_failures output) of
230 (_, SOME ProofIncomplete) => ("", SOME ProofIncomplete)
231 | ("", SOME failure) =>
232 ("", SOME (if failure = IncompleteUnprovable andalso complete then Unprovable
235 ("", SOME (if res_code = 0 andalso output = "" then ProofMissing
236 else UnknownError (short_output verbose output)))
237 | (tstplike_proof, _) => (tstplike_proof, NONE)
239 type step_name = string * string option
241 fun is_same_atp_step (s1, _) (s2, _) = s1 = s2
243 val vampire_fact_prefix = "f"
245 fun step_name_ord p =
246 let val q = pairself fst p in
247 (* The "unprefix" part is to cope with remote Vampire's output. The proper
248 solution would be to perform a topological sort, e.g. using the nice
250 case pairself (Int.fromString
251 o perhaps (try (unprefix vampire_fact_prefix))) q of
252 (NONE, NONE) => string_ord q
253 | (NONE, SOME _) => LESS
254 | (SOME _, NONE) => GREATER
255 | (SOME i, SOME j) => int_ord (i, j)
259 Definition of step_name * 'a * 'a |
260 Inference of step_name * 'a * step_name list
262 type 'a proof = ('a, 'a, 'a fo_term) formula step list
264 fun step_name (Definition (name, _, _)) = name
265 | step_name (Inference (name, _, _)) = name
267 (**** PARSING OF TSTP FORMAT ****)
269 (* FIXME: temporary hack *)
270 fun repair_waldmeister_step_name s =
271 case space_explode "." s of
273 (case a of "0" => "X" | "1" => "Y" | _ => "Z" ^ a) ^
274 (if size b = 1 then "0" else "") ^ b ^ c ^ d
277 (* Strings enclosed in single quotes (e.g., file names) *)
278 val scan_general_id =
279 $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'"
280 >> implode >> repair_waldmeister_step_name
281 || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
282 >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)
284 (* Generalized first-order terms, which include file names, numbers, etc. *)
285 fun parse_annotation x =
286 ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id))
287 -- Scan.optional parse_annotation [] >> op @
288 || $$ "(" |-- parse_annotations --| $$ ")"
289 || $$ "[" |-- parse_annotations --| $$ "]") x
290 and parse_annotations x =
291 (Scan.optional (parse_annotation
292 ::: Scan.repeat ($$ "," |-- parse_annotation)) []
295 fun list_app (f, args) =
296 fold (fn arg => fn f => ATerm (tptp_app, [f, arg])) args f
298 (* We ignore TFF and THF types for now. *)
299 fun parse_type_stuff x =
300 Scan.repeat (($$ tptp_has_type || $$ tptp_fun_type) |-- parse_arg) x
302 ($$ "(" |-- parse_term --| $$ ")" --| parse_type_stuff
303 || scan_general_id --| parse_type_stuff
304 -- Scan.optional ($$ "(" |-- parse_terms --| $$ ")") []
307 (parse_arg -- Scan.repeat ($$ tptp_app |-- parse_arg) >> list_app) x
309 (parse_app -- Scan.option (Scan.option ($$ tptp_not_infix) --| $$ tptp_equal
311 >> (fn (u1, NONE) => u1
312 | (u1, SOME (NONE, u2)) => ATerm ("equal", [u1, u2])
313 | (u1, SOME (SOME _, u2)) =>
314 ATerm (tptp_not, [ATerm ("equal", [u1, u2])]))) x
316 (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x
318 (* TODO: Avoid duplication with "parse_term" above. *)
320 (parse_term -- Scan.option (Scan.option ($$ tptp_not_infix) --| $$ tptp_equal
322 >> (fn (u1, NONE) => AAtom u1
323 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("equal", [u1, u2]))
324 | (u1, SOME (SOME _, u2)) =>
325 mk_anot (AAtom (ATerm ("equal", [u1, u2]))))) x
327 fun fo_term_head (ATerm (s, _)) = s
329 (* TPTP formulas are fully parenthesized, so we don't need to worry about
330 operator precedence. *)
331 fun parse_literal x =
332 ((Scan.repeat ($$ tptp_not) >> length)
333 -- ($$ "(" |-- parse_formula --| $$ ")"
334 || parse_quantified_formula
336 >> (fn (n, phi) => phi |> n mod 2 = 1 ? mk_anot)) x
337 and parse_formula x =
339 -- Scan.option ((Scan.this_string tptp_implies >> K AImplies
340 || Scan.this_string tptp_iff >> K AIff
341 || Scan.this_string tptp_not_iff >> K ANotIff
342 || Scan.this_string tptp_if >> K AIf
343 || $$ tptp_or >> K AOr
344 || $$ tptp_and >> K AAnd)
346 >> (fn (phi1, NONE) => phi1
347 | (phi1, SOME (c, phi2)) => mk_aconn c phi1 phi2)) x
348 and parse_quantified_formula x =
349 (($$ tptp_forall >> K AForall || $$ tptp_exists >> K AExists)
350 --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_literal
351 >> (fn ((q, ts), phi) =>
352 (* We ignore TFF and THF types for now. *)
353 AQuant (q, map (rpair NONE o fo_term_head) ts, phi))) x
355 fun skip_formula ss =
358 | skip 0 (ss as "," :: _) = ss
359 | skip 0 (ss as ")" :: _) = ss
360 | skip 0 (ss as "]" :: _) = ss
361 | skip n ("(" :: ss) = skip (n + 1) ss
362 | skip n ("[" :: ss) = skip (n + 1) ss
363 | skip n ("]" :: ss) = skip (n - 1) ss
364 | skip n (")" :: ss) = skip (n - 1) ss
365 | skip n (_ :: ss) = skip n ss
366 in (AAtom (ATerm ("", [])), skip 0 ss) end
368 val parse_tstp_extra_arguments =
369 Scan.optional ($$ "," |-- parse_annotation
370 --| Scan.option ($$ "," |-- parse_annotations)) []
372 val vampire_unknown_fact = "unknown"
373 val waldmeister_conjecture = "conjecture_1"
375 val tofof_fact_prefix = "fof_"
377 fun is_same_term subst tm1 tm2 =
379 fun do_term_pair _ NONE = NONE
380 | do_term_pair (ATerm (s1, tm1), ATerm (s2, tm2)) (SOME subst) =
381 case pairself is_atp_variable (s1, s2) of
383 (case AList.lookup (op =) subst s1 of
384 SOME s2' => if s2' = s2 then SOME subst else NONE
386 if null (AList.find (op =) subst s2) then SOME ((s1, s2) :: subst)
389 if s1 = s2 andalso length tm1 = length tm2 then
390 SOME subst |> fold do_term_pair (tm1 ~~ tm2)
394 in SOME subst |> do_term_pair (tm1, tm2) |> is_some end
396 fun is_same_formula subst (AQuant (q1, xs1, phi1)) (AQuant (q2, xs2, phi2)) =
397 q1 = q2 andalso length xs1 = length xs2 andalso
398 is_same_formula ((map fst xs1 ~~ map fst xs2) @ subst) phi1 phi2
399 | is_same_formula subst (AConn (c1, phis1)) (AConn (c2, phis2)) =
400 c1 = c2 andalso length phis1 = length phis2 andalso
401 forall (uncurry (is_same_formula subst)) (phis1 ~~ phis2)
402 | is_same_formula subst (AAtom (ATerm ("equal", [tm11, tm12]))) (AAtom tm2) =
403 is_same_term subst (ATerm ("equal", [tm11, tm12])) tm2 orelse
404 is_same_term subst (ATerm ("equal", [tm12, tm11])) tm2
405 | is_same_formula subst (AAtom tm1) (AAtom tm2) = is_same_term subst tm1 tm2
406 | is_same_formula _ _ _ = false
408 fun matching_formula_line_identifier phi (Formula (ident, _, phi', _, _)) =
409 if is_same_formula [] phi phi' then SOME ident else NONE
410 | matching_formula_line_identifier _ _ = NONE
412 fun find_formula_in_problem problem phi =
413 problem |> maps snd |> map_filter (matching_formula_line_identifier phi)
416 (* Syntax: (cnf|fof|tff|thf)\(<num>, <formula_role>,
417 <formula> <extra_arguments>\).
418 The <num> could be an identifier, but we assume integers. *)
419 fun parse_tstp_line problem =
420 ((Scan.this_string tptp_cnf || Scan.this_string tptp_fof
421 || Scan.this_string tptp_tff || Scan.this_string tptp_thf) -- $$ "(")
422 |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
423 -- (parse_formula || skip_formula) -- parse_tstp_extra_arguments --| $$ ")"
425 >> (fn (((num, role), phi), deps) =>
428 (* Waldmeister isn't exactly helping. *)
432 if s = vampire_unknown_fact then
434 else if s = waldmeister_conjecture then
435 find_formula_in_problem problem (mk_anot phi)
437 SOME (s |> perhaps (try (unprefix tofof_fact_prefix)))),
439 | ["file", _] => ((num, find_formula_in_problem problem phi), [])
440 | _ => ((num, NONE), deps)
445 AConn (AIff, [phi1 as AAtom _, phi2]) =>
446 Definition (name, phi1, phi2)
447 | AAtom (ATerm ("equal", _)) =>
448 (* Vampire's equality proxy axiom *)
449 Inference (name, phi, map (rpair NONE) deps)
450 | _ => raise UNRECOGNIZED_ATP_PROOF ())
451 | _ => Inference (name, phi, map (rpair NONE) deps)
454 (**** PARSING OF SPASS OUTPUT ****)
456 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
457 is not clear anyway. *)
458 val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id
460 val parse_spass_annotations =
461 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
462 --| Scan.option ($$ ","))) []
464 (* It is not clear why some literals are followed by sequences of stars and/or
465 pluses. We ignore them. *)
466 fun parse_decorated_atom x =
467 (parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")) x
469 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
470 | mk_horn ([], pos_lits) = foldr1 (uncurry (mk_aconn AOr)) pos_lits
471 | mk_horn (neg_lits, []) = mk_anot (foldr1 (uncurry (mk_aconn AAnd)) neg_lits)
472 | mk_horn (neg_lits, pos_lits) =
473 mk_aconn AImplies (foldr1 (uncurry (mk_aconn AAnd)) neg_lits)
474 (foldr1 (uncurry (mk_aconn AOr)) pos_lits)
476 fun parse_horn_clause x =
477 (Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
478 -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
479 -- Scan.repeat parse_decorated_atom
480 >> (mk_horn o apfst (op @))) x
482 (* Syntax: <num>[0:<inference><annotations>]
483 <atoms> || <atoms> -> <atoms>. *)
484 fun parse_spass_line x =
485 (scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
486 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
487 >> (fn ((num, deps), u) =>
488 Inference ((num, NONE), u, map (rpair NONE) deps))) x
490 fun parse_line problem = parse_tstp_line problem || parse_spass_line
491 fun parse_proof problem s =
492 s |> strip_spaces_except_between_ident_chars
494 |> Scan.finite Symbol.stopper
495 (Scan.error (!! (fn _ => raise UNRECOGNIZED_ATP_PROOF ())
496 (Scan.repeat1 (parse_line problem))))
499 fun atp_proof_from_tstplike_proof _ "" = []
500 | atp_proof_from_tstplike_proof problem s =
501 s ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
502 |> parse_proof problem
503 |> sort (step_name_ord o pairself step_name)
505 fun clean_up_dependencies _ [] = []
506 | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
507 step :: clean_up_dependencies (name :: seen) steps
508 | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
510 map_filter (fn dep => find_first (is_same_atp_step dep) seen)
512 clean_up_dependencies (name :: seen) steps
514 val clean_up_atp_proof_dependencies = clean_up_dependencies []
516 fun map_term_names_in_term f (ATerm (s, ts)) =
517 ATerm (f s, map (map_term_names_in_term f) ts)
518 fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
519 AQuant (q, xs, map_term_names_in_formula f phi)
520 | map_term_names_in_formula f (AConn (c, phis)) =
521 AConn (c, map (map_term_names_in_formula f) phis)
522 | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
523 fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
524 Definition (name, map_term_names_in_formula f phi1,
525 map_term_names_in_formula f phi2)
526 | map_term_names_in_step f (Inference (name, phi, deps)) =
527 Inference (name, map_term_names_in_formula f phi, deps)
528 fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)
530 fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
531 fun nasty_atp_proof pool =
532 if Symtab.is_empty pool then I
533 else map_term_names_in_atp_proof (nasty_name pool)