1 (* Title: HOL/Tools/ATP/atp_proof.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Abstract representation of ATP proofs and TSTP/SPASS syntax.
11 type ('a, 'b) ho_term = ('a, 'b) ATP_Problem.ho_term
12 type ('a, 'b, 'c) formula = ('a, 'b, 'c) ATP_Problem.formula
13 type 'a problem = 'a ATP_Problem.problem
15 exception UNRECOGNIZED_ATP_PROOF of unit
22 UnsoundProof of bool option * string list | (* FIXME: doesn't belong here *)
36 UnknownError of string
38 type step_name = string * string list option
41 Definition of step_name * 'a * 'a |
42 Inference of step_name * 'a * step_name list
44 type 'a proof = ('a, 'a, ('a, 'a) ho_term) formula step list
46 val short_output : bool -> string -> string
47 val string_for_failure : failure -> string
48 val extract_important_message : string -> string
49 val extract_known_failure :
50 (failure * string) list -> string -> failure option
51 val extract_tstplike_proof_and_outcome :
52 bool -> bool -> (string * string) list -> (failure * string) list -> string
53 -> string * failure option
54 val is_same_atp_step : step_name -> step_name -> bool
55 val scan_general_id : string list -> string * string list
57 string list -> (string, 'a, (string, 'a) ho_term) formula * string list
58 val atp_proof_from_tstplike_proof :
59 string problem -> string -> string -> string proof
60 val clean_up_atp_proof_dependencies : string proof -> string proof
61 val map_term_names_in_atp_proof :
62 (string -> string) -> string proof -> string proof
63 val nasty_atp_proof : string Symtab.table -> string proof -> string proof
66 structure ATP_Proof : ATP_PROOF =
72 exception UNRECOGNIZED_ATP_PROOF of unit
79 UnsoundProof of bool option * string list |
93 UnknownError of string
95 fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
96 val strip_spaces_except_between_ident_chars = strip_spaces true is_ident_char
98 fun elide_string threshold s =
99 if size s > threshold then
100 String.extract (s, 0, SOME (threshold div 2 - 5)) ^ " ...... " ^
101 String.extract (s, size s - (threshold + 1) div 2 + 6, NONE)
104 fun short_output verbose output =
106 if output = "" then "No details available" else elide_string 1000 output
110 val missing_message_tail =
111 " appears to be missing. You will need to install it if you want to invoke \
114 fun involving [] = ""
116 "involving " ^ space_implode " " (Try.serial_commas "and" (map quote ss)) ^
119 fun string_for_failure Unprovable = "The problem is unprovable."
120 | string_for_failure GaveUp = "The prover gave up."
121 | string_for_failure ProofMissing =
122 "The prover claims the conjecture is a theorem but did not provide a proof."
123 | string_for_failure ProofIncomplete =
124 "The prover claims the conjecture is a theorem but provided an incomplete \
126 | string_for_failure (UnsoundProof (NONE, ss)) =
127 "The prover found a type-unsound proof " ^ involving ss ^
128 "(or, less likely, your axioms are inconsistent). Specify a sound type \
129 \encoding or omit the \"type_enc\" option."
130 | string_for_failure (UnsoundProof (SOME false, ss)) =
131 "The prover found a type-unsound proof " ^ involving ss ^
132 "(or, less likely, your axioms are inconsistent). Try passing the \
133 \\"sound\" option to Sledgehammer to avoid such spurious proofs."
134 | string_for_failure (UnsoundProof (SOME true, ss)) =
135 "The prover found a type-unsound proof " ^ involving ss ^
136 "even though a supposedly type-sound encoding was used (or, less likely, \
137 \your axioms are inconsistent). Please report this to the Isabelle \
139 | string_for_failure CantConnect = "Cannot connect to remote server."
140 | string_for_failure TimedOut = "Timed out."
141 | string_for_failure Inappropriate =
142 "The problem lies outside the prover's scope."
143 | string_for_failure OutOfResources = "The prover ran out of resources."
144 | string_for_failure SpassTooOld =
145 "Isabelle requires a more recent version of SPASS with support for the \
146 \TPTP syntax. To install it, download and extract the package \
147 \\"http://isabelle.in.tum.de/dist/contrib/spass-3.7.tar.gz\" and add the \
148 \\"spass-3.7\" directory's absolute path to " ^
149 Path.print (Path.expand (Path.explode "$ISABELLE_HOME_USER/etc/components")) ^
150 " on a line of its own."
151 | string_for_failure VampireTooOld =
152 "Isabelle requires a more recent version of Vampire. To install it, follow \
153 \the instructions from the Sledgehammer manual (\"isabelle doc\
155 | string_for_failure NoPerl = "Perl" ^ missing_message_tail
156 | string_for_failure NoLibwwwPerl =
157 "The Perl module \"libwww-perl\"" ^ missing_message_tail
158 | string_for_failure MalformedInput =
159 "The generated problem is malformed. Please report this to the Isabelle \
161 | string_for_failure MalformedOutput = "The prover output is malformed."
162 | string_for_failure Interrupted = "The prover was interrupted."
163 | string_for_failure Crashed = "The prover crashed."
164 | string_for_failure InternalError = "An internal prover error occurred."
165 | string_for_failure (UnknownError string) =
166 "A prover error occurred" ^
167 (if string = "" then ". (Pass the \"verbose\" option for details.)"
170 fun extract_delimited (begin_delim, end_delim) output =
171 output |> first_field begin_delim |> the |> snd
172 |> first_field end_delim |> the |> fst
173 |> first_field "\n" |> the |> snd
174 handle Option.Option => ""
176 val tstp_important_message_delims =
177 ("% SZS start RequiredInformation", "% SZS end RequiredInformation")
179 fun extract_important_message output =
180 case extract_delimited tstp_important_message_delims output of
182 | s => s |> space_explode "\n" |> filter_out (curry (op =) "")
183 |> map (perhaps (try (unprefix "%")))
184 |> map (perhaps (try (unprefix " ")))
185 |> space_implode "\n " |> quote
187 (* Splits by the first possible of a list of delimiters. *)
188 fun extract_tstplike_proof delims output =
189 case pairself (find_first (fn s => String.isSubstring s output))
190 (ListPair.unzip delims) of
191 (SOME begin_delim, SOME end_delim) =>
192 extract_delimited (begin_delim, end_delim) output
195 fun extract_known_failure known_failures output =
197 |> find_first (fn (_, pattern) => String.isSubstring pattern output)
200 fun extract_tstplike_proof_and_outcome verbose complete proof_delims
201 known_failures output =
202 case (extract_tstplike_proof proof_delims output,
203 extract_known_failure known_failures output) of
204 (_, SOME ProofIncomplete) => ("", SOME ProofIncomplete)
205 | ("", SOME ProofMissing) => ("", NONE)
206 | ("", SOME failure) =>
207 ("", SOME (if failure = GaveUp andalso complete then Unprovable
209 | ("", NONE) => ("", SOME (UnknownError (short_output verbose output)))
210 | (tstplike_proof, _) => (tstplike_proof, NONE)
212 type step_name = string * string list option
214 fun is_same_atp_step (s1, _) (s2, _) = s1 = s2
216 val vampire_fact_prefix = "f"
218 fun step_name_ord p =
219 let val q = pairself fst p in
220 (* The "unprefix" part is to cope with remote Vampire's output. The proper
221 solution would be to perform a topological sort, e.g. using the nice
223 case pairself (Int.fromString
224 o perhaps (try (unprefix vampire_fact_prefix))) q of
225 (NONE, NONE) => string_ord q
226 | (NONE, SOME _) => LESS
227 | (SOME _, NONE) => GREATER
228 | (SOME i, SOME j) => int_ord (i, j)
232 Definition of step_name * 'a * 'a |
233 Inference of step_name * 'a * step_name list
235 type 'a proof = ('a, 'a, ('a, 'a) ho_term) formula step list
237 fun step_name (Definition (name, _, _)) = name
238 | step_name (Inference (name, _, _)) = name
240 (**** PARSING OF TSTP FORMAT ****)
242 (* FIXME: temporary hack *)
243 fun repair_waldmeister_step_name s =
244 case space_explode "." s of
246 (case a of "0" => "X" | "1" => "Y" | _ => "Z" ^ a) ^
247 (if size b = 1 then "0" else "") ^ b ^ c ^ d
250 (* Strings enclosed in single quotes (e.g., file names) *)
251 val scan_general_id =
252 $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'"
253 >> implode >> repair_waldmeister_step_name
254 || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
255 >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)
257 (* Generalized first-order terms, which include file names, numbers, etc. *)
258 fun parse_annotation x =
259 ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id))
260 -- Scan.optional parse_annotation [] >> op @
261 || $$ "(" |-- parse_annotations --| $$ ")"
262 || $$ "[" |-- parse_annotations --| $$ "]") x
263 and parse_annotations x =
264 (Scan.optional (parse_annotation
265 ::: Scan.repeat ($$ "," |-- parse_annotation)) []
268 fun list_app (f, args) =
269 fold (fn arg => fn f => ATerm (tptp_app, [f, arg])) args f
271 (* We ignore TFF and THF types for now. *)
272 fun parse_type_stuff x =
273 Scan.repeat (($$ tptp_has_type || $$ tptp_fun_type) |-- parse_arg) x
275 ($$ "(" |-- parse_term --| $$ ")" --| parse_type_stuff
276 || scan_general_id --| parse_type_stuff
277 -- Scan.optional ($$ "(" |-- parse_terms --| $$ ")") []
280 (parse_arg -- Scan.repeat ($$ tptp_app |-- parse_arg) >> list_app) x
282 (parse_app -- Scan.option (Scan.option ($$ tptp_not_infix) --| $$ tptp_equal
284 >> (fn (u1, NONE) => u1
285 | (u1, SOME (NONE, u2)) => ATerm ("equal", [u1, u2])
286 | (u1, SOME (SOME _, u2)) =>
287 ATerm (tptp_not, [ATerm ("equal", [u1, u2])]))) x
289 (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x
291 (* TODO: Avoid duplication with "parse_term" above. *)
293 (parse_term -- Scan.option (Scan.option ($$ tptp_not_infix) --| $$ tptp_equal
295 >> (fn (u1, NONE) => AAtom u1
296 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("equal", [u1, u2]))
297 | (u1, SOME (SOME _, u2)) =>
298 mk_anot (AAtom (ATerm ("equal", [u1, u2]))))) x
300 fun ho_term_head (ATerm (s, _)) = s
302 (* TPTP formulas are fully parenthesized, so we don't need to worry about
303 operator precedence. *)
304 fun parse_literal x =
305 ((Scan.repeat ($$ tptp_not) >> length)
306 -- ($$ "(" |-- parse_formula --| $$ ")"
307 || parse_quantified_formula
309 >> (fn (n, phi) => phi |> n mod 2 = 1 ? mk_anot)) x
310 and parse_formula x =
312 -- Scan.option ((Scan.this_string tptp_implies
313 || Scan.this_string tptp_iff
314 || Scan.this_string tptp_not_iff
315 || Scan.this_string tptp_if
317 || $$ tptp_and) -- parse_formula)
318 >> (fn (phi1, NONE) => phi1
319 | (phi1, SOME (c, phi2)) =>
320 if c = tptp_implies then mk_aconn AImplies phi1 phi2
321 else if c = tptp_iff then mk_aconn AIff phi1 phi2
322 else if c = tptp_not_iff then mk_anot (mk_aconn AIff phi1 phi2)
323 else if c = tptp_if then mk_aconn AImplies phi2 phi1
324 else if c = tptp_or then mk_aconn AOr phi1 phi2
325 else if c = tptp_and then mk_aconn AAnd phi1 phi2
326 else raise Fail ("impossible connective " ^ quote c))) x
327 and parse_quantified_formula x =
328 (($$ tptp_forall >> K AForall || $$ tptp_exists >> K AExists)
329 --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_literal
330 >> (fn ((q, ts), phi) =>
331 (* We ignore TFF and THF types for now. *)
332 AQuant (q, map (rpair NONE o ho_term_head) ts, phi))) x
334 fun skip_formula ss =
337 | skip 0 (ss as "," :: _) = ss
338 | skip 0 (ss as ")" :: _) = ss
339 | skip 0 (ss as "]" :: _) = ss
340 | skip n ("(" :: ss) = skip (n + 1) ss
341 | skip n ("[" :: ss) = skip (n + 1) ss
342 | skip n ("]" :: ss) = skip (n - 1) ss
343 | skip n (")" :: ss) = skip (n - 1) ss
344 | skip n (_ :: ss) = skip n ss
345 in (AAtom (ATerm ("", [])), skip 0 ss) end
347 val parse_tstp_extra_arguments =
348 Scan.optional ($$ "," |-- parse_annotation
349 --| Scan.option ($$ "," |-- parse_annotations)) []
351 val vampire_unknown_fact = "unknown"
352 val waldmeister_conjecture = "conjecture_1"
354 val tofof_fact_prefix = "fof_"
356 fun is_same_term subst tm1 tm2 =
358 fun do_term_pair _ NONE = NONE
359 | do_term_pair (ATerm (s1, tm1), ATerm (s2, tm2)) (SOME subst) =
360 case pairself is_tptp_variable (s1, s2) of
362 (case AList.lookup (op =) subst s1 of
363 SOME s2' => if s2' = s2 then SOME subst else NONE
365 if null (AList.find (op =) subst s2) then SOME ((s1, s2) :: subst)
368 if s1 = s2 andalso length tm1 = length tm2 then
369 SOME subst |> fold do_term_pair (tm1 ~~ tm2)
373 in SOME subst |> do_term_pair (tm1, tm2) |> is_some end
375 fun is_same_formula subst (AQuant (q1, xs1, phi1)) (AQuant (q2, xs2, phi2)) =
376 q1 = q2 andalso length xs1 = length xs2 andalso
377 is_same_formula ((map fst xs1 ~~ map fst xs2) @ subst) phi1 phi2
378 | is_same_formula subst (AConn (c1, phis1)) (AConn (c2, phis2)) =
379 c1 = c2 andalso length phis1 = length phis2 andalso
380 forall (uncurry (is_same_formula subst)) (phis1 ~~ phis2)
381 | is_same_formula subst (AAtom (ATerm ("equal", [tm11, tm12]))) (AAtom tm2) =
382 is_same_term subst (ATerm ("equal", [tm11, tm12])) tm2 orelse
383 is_same_term subst (ATerm ("equal", [tm12, tm11])) tm2
384 | is_same_formula subst (AAtom tm1) (AAtom tm2) = is_same_term subst tm1 tm2
385 | is_same_formula _ _ _ = false
387 fun matching_formula_line_identifier phi (Formula (ident, _, phi', _, _)) =
388 if is_same_formula [] phi phi' then SOME ident else NONE
389 | matching_formula_line_identifier _ _ = NONE
391 fun find_formula_in_problem problem phi =
392 problem |> maps snd |> map_filter (matching_formula_line_identifier phi)
395 (* Syntax: (cnf|fof|tff|thf)\(<num>, <formula_role>,
396 <formula> <extra_arguments>\).
397 The <num> could be an identifier, but we assume integers. *)
398 fun parse_tstp_line problem =
399 ((Scan.this_string tptp_cnf || Scan.this_string tptp_fof
400 || Scan.this_string tptp_tff || Scan.this_string tptp_thf) -- $$ "(")
401 |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
402 -- (parse_formula || skip_formula) -- parse_tstp_extra_arguments --| $$ ")"
404 >> (fn (((num, role), phi), deps) =>
407 (* Waldmeister isn't exactly helping. *)
411 if s = vampire_unknown_fact then
413 else if s = waldmeister_conjecture then
414 find_formula_in_problem problem (mk_anot phi)
416 SOME [s |> perhaps (try (unprefix tofof_fact_prefix))]),
418 | ["file", _] => ((num, find_formula_in_problem problem phi), [])
419 | _ => ((num, NONE), deps)
424 AConn (AIff, [phi1 as AAtom _, phi2]) =>
425 Definition (name, phi1, phi2)
426 | AAtom (ATerm ("equal", _)) =>
427 (* Vampire's equality proxy axiom *)
428 Inference (name, phi, map (rpair NONE) deps)
429 | _ => raise UNRECOGNIZED_ATP_PROOF ())
430 | _ => Inference (name, phi, map (rpair NONE) deps)
433 (**** PARSING OF SPASS OUTPUT ****)
435 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
436 is not clear anyway. *)
437 val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id
439 val parse_spass_annotations =
440 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
441 --| Scan.option ($$ ","))) []
443 (* It is not clear why some literals are followed by sequences of stars and/or
444 pluses. We ignore them. *)
445 fun parse_decorated_atom x =
446 (parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")) x
448 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
449 | mk_horn ([], pos_lits) = foldr1 (uncurry (mk_aconn AOr)) pos_lits
450 | mk_horn (neg_lits, []) = mk_anot (foldr1 (uncurry (mk_aconn AAnd)) neg_lits)
451 | mk_horn (neg_lits, pos_lits) =
452 mk_aconn AImplies (foldr1 (uncurry (mk_aconn AAnd)) neg_lits)
453 (foldr1 (uncurry (mk_aconn AOr)) pos_lits)
455 fun parse_horn_clause x =
456 (Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
457 -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
458 -- Scan.repeat parse_decorated_atom
459 >> (mk_horn o apfst (op @))) x
461 fun resolve_spass_num spass_names num =
462 case Int.fromString num of
463 SOME j => if j > 0 andalso j <= Vector.length spass_names then
464 SOME (Vector.sub (spass_names, j - 1))
469 (* Syntax: <num>[0:<inference><annotations>]
470 <atoms> || <atoms> -> <atoms>. *)
471 fun parse_spass_line spass_names x =
472 (scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
473 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
474 >> (fn ((num, deps), u) =>
475 Inference ((num, resolve_spass_num spass_names num), u,
476 map (swap o `(resolve_spass_num spass_names)) deps))) x
478 fun parse_line problem spass_names =
479 parse_tstp_line problem || parse_spass_line spass_names
480 fun parse_proof problem spass_names tstp =
481 tstp |> strip_spaces_except_between_ident_chars
483 |> Scan.finite Symbol.stopper
484 (Scan.error (!! (fn _ => raise UNRECOGNIZED_ATP_PROOF ())
485 (Scan.repeat1 (parse_line problem spass_names))))
488 (** SPASS's FLOTTER hack **)
490 (* This is a hack required for keeping track of facts after they have been
491 clausified by SPASS's FLOTTER preprocessor. The "ATP/scripts/spass" script is
492 also part of this hack. *)
494 val set_ClauseFormulaRelationN = "set_ClauseFormulaRelation"
496 fun extract_clause_sequence output =
498 val tokens_of = String.tokens (not o Char.isAlphaNum)
499 fun extract_num ("clause" :: (ss as _ :: _)) = Int.fromString (List.last ss)
500 | extract_num _ = NONE
501 in output |> split_lines |> map_filter (extract_num o tokens_of) end
503 fun is_head_digit s = Char.isDigit (String.sub (s, 0))
504 val scan_integer = Scan.many1 is_head_digit >> (the o Int.fromString o implode)
506 val parse_clause_formula_pair =
507 $$ "(" |-- scan_integer --| $$ ","
508 -- (Symbol.scan_id ::: Scan.repeat ($$ "," |-- Symbol.scan_id)) --| $$ ")"
509 --| Scan.option ($$ ",")
510 val parse_clause_formula_relation =
511 Scan.this_string set_ClauseFormulaRelationN |-- $$ "("
512 |-- Scan.repeat parse_clause_formula_pair
513 val extract_clause_formula_relation =
514 Substring.full #> Substring.position set_ClauseFormulaRelationN
515 #> snd #> Substring.position "." #> fst #> Substring.string
516 #> raw_explode #> filter_out Symbol.is_blank #> parse_clause_formula_relation
519 fun extract_spass_name_vector output =
520 (if String.isSubstring set_ClauseFormulaRelationN output then
522 val num_seq = extract_clause_sequence output
523 val name_map = extract_clause_formula_relation output
524 val name_seq = num_seq |> map (these o AList.lookup (op =) name_map)
530 fun atp_proof_from_tstplike_proof _ _ "" = []
531 | atp_proof_from_tstplike_proof problem output tstp =
532 tstp ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
533 |> parse_proof problem (extract_spass_name_vector output)
534 |> sort (step_name_ord o pairself step_name)
536 fun clean_up_dependencies _ [] = []
537 | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
538 step :: clean_up_dependencies (name :: seen) steps
539 | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
541 map_filter (fn dep => find_first (is_same_atp_step dep) seen)
543 clean_up_dependencies (name :: seen) steps
545 fun clean_up_atp_proof_dependencies proof = clean_up_dependencies [] proof
547 fun map_term_names_in_term f (ATerm (s, ts)) =
548 ATerm (f s, map (map_term_names_in_term f) ts)
549 fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
550 AQuant (q, xs, map_term_names_in_formula f phi)
551 | map_term_names_in_formula f (AConn (c, phis)) =
552 AConn (c, map (map_term_names_in_formula f) phis)
553 | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
554 fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
555 Definition (name, map_term_names_in_formula f phi1,
556 map_term_names_in_formula f phi2)
557 | map_term_names_in_step f (Inference (name, phi, deps)) =
558 Inference (name, map_term_names_in_formula f phi, deps)
559 fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)
561 fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
562 fun nasty_atp_proof pool =
563 if Symtab.is_empty pool then I
564 else map_term_names_in_atp_proof (nasty_name pool)