1 (* Title: HOL/Tools/ATP/atp_proof.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Abstract representation of ATP proofs and TSTP/Vampire/SPASS syntax.
11 type 'a fo_term = 'a ATP_Problem.fo_term
12 type ('a, 'b, 'c) formula = ('a, 'b, 'c) ATP_Problem.formula
15 Unprovable | IncompleteUnprovable | ProofMissing | UnsoundProof |
16 CantConnect | TimedOut | OutOfResources | SpassTooOld | VampireTooOld |
17 NoPerl | NoLibwwwPerl | NoRealZ3 | MalformedInput | MalformedOutput |
18 Interrupted | Crashed | InternalError | UnknownError of string
20 type step_name = string * string option
23 Definition of step_name * 'a * 'a |
24 Inference of step_name * 'a * step_name list
26 type 'a proof = ('a, 'a, 'a fo_term) formula step list
28 val strip_spaces : (char -> bool) -> string -> string
29 val short_output : bool -> string -> string
30 val string_for_failure : failure -> string
31 val extract_important_message : string -> string
32 val extract_known_failure :
33 (failure * string) list -> string -> failure option
34 val extract_tstplike_proof_and_outcome :
35 bool -> bool -> bool -> int -> (string * string) list
36 -> (failure * string) list -> string -> string * failure option
37 val is_same_step : step_name * step_name -> bool
38 val atp_proof_from_tstplike_proof : string -> string proof
39 val map_term_names_in_atp_proof :
40 (string -> string) -> string proof -> string proof
41 val nasty_atp_proof : string Symtab.table -> string proof -> string proof
44 structure ATP_Proof : ATP_PROOF =
50 Unprovable | IncompleteUnprovable | ProofMissing | UnsoundProof |
51 CantConnect | TimedOut | OutOfResources | SpassTooOld | VampireTooOld |
52 NoPerl | NoLibwwwPerl | NoRealZ3 | MalformedInput | MalformedOutput |
53 Interrupted | Crashed | InternalError | UnknownError of string
55 fun strip_spaces_in_list _ [] = []
56 | strip_spaces_in_list _ [c1] = if Char.isSpace c1 then [] else [str c1]
57 | strip_spaces_in_list is_evil [c1, c2] =
58 strip_spaces_in_list is_evil [c1] @ strip_spaces_in_list is_evil [c2]
59 | strip_spaces_in_list is_evil (c1 :: c2 :: c3 :: cs) =
60 if Char.isSpace c1 then
61 strip_spaces_in_list is_evil (c2 :: c3 :: cs)
62 else if Char.isSpace c2 then
63 if Char.isSpace c3 then
64 strip_spaces_in_list is_evil (c1 :: c3 :: cs)
66 str c1 :: (if forall is_evil [c1, c3] then [" "] else []) @
67 strip_spaces_in_list is_evil (c3 :: cs)
69 str c1 :: strip_spaces_in_list is_evil (c2 :: c3 :: cs)
70 fun strip_spaces is_evil =
71 implode o strip_spaces_in_list is_evil o String.explode
73 fun is_ident_char c = Char.isAlphaNum c orelse c = #"_"
74 val strip_spaces_except_between_ident_chars = strip_spaces is_ident_char
76 fun elide_string threshold s =
77 if size s > threshold then
78 String.extract (s, 0, SOME (threshold div 2 - 5)) ^ " ...... " ^
79 String.extract (s, size s - (threshold + 1) div 2 + 6, NONE)
82 fun short_output verbose output =
84 if output = "" then "No details available" else elide_string 1000 output
88 val missing_message_tail =
89 " appears to be missing. You will need to install it if you want to invoke \
92 fun string_for_failure Unprovable =
93 "The problem is unprovable."
94 | string_for_failure IncompleteUnprovable =
96 | string_for_failure ProofMissing =
97 "The prover claims the conjecture is a theorem but did not provide a proof."
98 | string_for_failure UnsoundProof =
99 "The prover found a type-unsound proof. (Or, very unlikely, your axioms \
101 | string_for_failure CantConnect = "Cannot connect to remote server."
102 | string_for_failure TimedOut = "Timed out."
103 | string_for_failure OutOfResources = "The prover ran out of resources."
104 | string_for_failure SpassTooOld =
105 "Isabelle requires a more recent version of SPASS with support for the \
106 \TPTP syntax. To install it, download and extract the package \
107 \\"http://isabelle.in.tum.de/dist/contrib/spass-3.7.tar.gz\" and add the \
108 \\"spass-3.7\" directory's absolute path to " ^
109 Path.print (Path.expand (Path.appends
110 (Path.variable "ISABELLE_HOME_USER" ::
111 map Path.basic ["etc", "components"]))) ^
112 " on a line of its own."
113 | string_for_failure VampireTooOld =
114 "Isabelle requires a more recent version of Vampire. To install it, follow \
115 \the instructions from the Sledgehammer manual (\"isabelle doc\
117 | string_for_failure NoPerl = "Perl" ^ missing_message_tail
118 | string_for_failure NoLibwwwPerl =
119 "The Perl module \"libwww-perl\"" ^ missing_message_tail
120 | string_for_failure NoRealZ3 =
121 "The environment variable \"Z3_REAL_SOLVER\" must be set to Z3's full path."
122 | string_for_failure MalformedInput =
123 "The generated problem is malformed. Please report this to the Isabelle \
125 | string_for_failure MalformedOutput = "The prover output is malformed."
126 | string_for_failure Crashed = "The prover crashed."
127 | string_for_failure InternalError = "An internal prover error occurred."
128 | string_for_failure (UnknownError string) =
129 "A prover error occurred" ^
130 (if string = "" then ". (Pass the \"verbose\" option for details.)"
133 fun extract_delimited (begin_delim, end_delim) output =
134 output |> first_field begin_delim |> the |> snd
135 |> first_field end_delim |> the |> fst
136 |> first_field "\n" |> the |> snd
137 handle Option.Option => ""
139 val tstp_important_message_delims =
140 ("% SZS start RequiredInformation", "% SZS end RequiredInformation")
142 fun extract_important_message output =
143 case extract_delimited tstp_important_message_delims output of
145 | s => s |> space_explode "\n" |> filter_out (curry (op =) "")
146 |> map (perhaps (try (unprefix "%")))
147 |> map (perhaps (try (unprefix " ")))
148 |> space_implode "\n " |> quote
150 (* Splits by the first possible of a list of delimiters. *)
151 fun extract_tstplike_proof delims output =
152 case pairself (find_first (fn s => String.isSubstring s output))
153 (ListPair.unzip delims) of
154 (SOME begin_delim, SOME end_delim) =>
155 extract_delimited (begin_delim, end_delim) output
158 fun extract_known_failure known_failures output =
160 |> find_first (fn (_, pattern) => String.isSubstring pattern output)
163 fun extract_tstplike_proof_and_outcome debug verbose complete res_code
164 proof_delims known_failures output =
165 case extract_known_failure known_failures output of
167 (case extract_tstplike_proof proof_delims output of
169 ("", SOME (if res_code = 0 andalso (not debug orelse output = "") then
172 UnknownError (short_output verbose output)))
174 if res_code = 0 then (tstplike_proof, NONE)
175 else ("", SOME (UnknownError (short_output verbose output))))
177 ("", SOME (if failure = IncompleteUnprovable andalso complete then
182 fun mk_anot (AConn (ANot, [phi])) = phi
183 | mk_anot phi = AConn (ANot, [phi])
184 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
186 type step_name = string * string option
188 fun is_same_step p = p |> pairself fst |> op =
190 fun step_name_ord p =
191 let val q = pairself fst p in
192 (* The "unprefix" part is to cope with remote Vampire's output. The proper
193 solution would be to perform a topological sort, e.g. using the nice
195 case pairself (Int.fromString o perhaps (try (unprefix "f"))) q of
196 (NONE, NONE) => string_ord q
197 | (NONE, SOME _) => LESS
198 | (SOME _, NONE) => GREATER
199 | (SOME i, SOME j) => int_ord (i, j)
203 Definition of step_name * 'a * 'a |
204 Inference of step_name * 'a * step_name list
206 type 'a proof = ('a, 'a, 'a fo_term) formula step list
208 fun step_name (Definition (name, _, _)) = name
209 | step_name (Inference (name, _, _)) = name
211 (**** PARSING OF TSTP FORMAT ****)
213 (*Strings enclosed in single quotes, e.g. filenames*)
214 val scan_general_id =
215 $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode
216 || Scan.repeat ($$ "$") -- Scan.many1 Symbol.is_letdig
217 >> (fn (ss1, ss2) => implode ss1 ^ implode ss2)
219 (* Generalized first-order terms, which include file names, numbers, etc. *)
220 fun parse_annotation strict x =
221 ((scan_general_id ::: Scan.repeat ($$ " " |-- scan_general_id)
222 >> (strict ? filter (is_some o Int.fromString)))
223 -- Scan.optional (parse_annotation strict) [] >> op @
224 || $$ "(" |-- parse_annotations strict --| $$ ")"
225 || $$ "[" |-- parse_annotations strict --| $$ "]") x
226 and parse_annotations strict x =
227 (Scan.optional (parse_annotation strict
228 ::: Scan.repeat ($$ "," |-- parse_annotation strict)) []
231 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
232 which can be hard to disambiguate from function application in an LL(1)
233 parser. As a workaround, we extend the TPTP term syntax with such detritus
235 fun parse_vampire_detritus x =
236 (scan_general_id |-- $$ ":" --| scan_general_id >> K []) x
240 -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms)
242 --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
244 and parse_terms x = (parse_term ::: Scan.repeat ($$ "," |-- parse_term)) x
247 (parse_term -- Scan.option (Scan.option ($$ "!") --| $$ "=" -- parse_term)
248 >> (fn (u1, NONE) => AAtom u1
249 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
250 | (u1, SOME (SOME _, u2)) =>
251 mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))) x
253 fun fo_term_head (ATerm (s, _)) = s
255 (* TPTP formulas are fully parenthesized, so we don't need to worry about
256 operator precedence. *)
257 fun parse_formula x =
258 (($$ "(" |-- parse_formula --| $$ ")"
259 || ($$ "!" >> K AForall || $$ "?" >> K AExists)
260 --| $$ "[" -- parse_terms --| $$ "]" --| $$ ":" -- parse_formula
261 >> (fn ((q, ts), phi) =>
263 AQuant (q, map (rpair NONE o fo_term_head) ts, phi))
264 || $$ "~" |-- parse_formula >> mk_anot
266 -- Scan.option ((Scan.this_string "=>" >> K AImplies
267 || Scan.this_string "<=>" >> K AIff
268 || Scan.this_string "<~>" >> K ANotIff
269 || Scan.this_string "<=" >> K AIf
270 || $$ "|" >> K AOr || $$ "&" >> K AAnd)
272 >> (fn (phi1, NONE) => phi1
273 | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
275 val parse_tstp_extra_arguments =
276 Scan.optional ($$ "," |-- parse_annotation false
277 --| Scan.option ($$ "," |-- parse_annotations false)) []
279 val vampire_unknown_fact = "unknown"
281 (* Syntax: (cnf|fof|tff)\(<num>, <formula_role>, <formula> <extra_arguments>\).
282 The <num> could be an identifier, but we assume integers. *)
283 val parse_tstp_line =
284 ((Scan.this_string "cnf" || Scan.this_string "fof" || Scan.this_string "tff")
286 |-- scan_general_id --| $$ "," -- Symbol.scan_id --| $$ ","
287 -- parse_formula -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
288 >> (fn (((num, role), phi), deps) =>
293 ((num, if s = vampire_unknown_fact then NONE else SOME s), [])
294 | _ => ((num, NONE), deps)
299 AConn (AIff, [phi1 as AAtom _, phi2]) =>
300 Definition (name, phi1, phi2)
301 | AAtom (ATerm ("c_equal", _)) =>
302 (* Vampire's equality proxy axiom *)
303 Inference (name, phi, map (rpair NONE) deps)
304 | _ => raise Fail "malformed definition")
305 | _ => Inference (name, phi, map (rpair NONE) deps)
308 (**** PARSING OF VAMPIRE OUTPUT ****)
310 val parse_vampire_braced_stuff =
311 $$ "{" -- Scan.repeat (scan_general_id --| Scan.option ($$ ",")) -- $$ "}"
312 val parse_vampire_parenthesized_detritus =
313 $$ "(" |-- parse_vampire_detritus --| $$ ")"
315 (* Syntax: <num>. <formula> <annotation> *)
316 val parse_vampire_line =
317 scan_general_id --| $$ "." -- parse_formula
318 --| Scan.option parse_vampire_braced_stuff
319 --| Scan.option parse_vampire_parenthesized_detritus
320 -- parse_annotation true
321 >> (fn ((num, phi), deps) =>
322 Inference ((num, NONE), phi, map (rpair NONE) deps))
324 (**** PARSING OF SPASS OUTPUT ****)
326 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
327 is not clear anyway. *)
328 val parse_dot_name = scan_general_id --| $$ "." --| scan_general_id
330 val parse_spass_annotations =
331 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
332 --| Scan.option ($$ ","))) []
334 (* It is not clear why some literals are followed by sequences of stars and/or
335 pluses. We ignore them. *)
336 fun parse_decorated_atom x =
337 (parse_atom --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")) x
339 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
340 | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
341 | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
342 | mk_horn (neg_lits, pos_lits) =
343 mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
344 foldr1 (mk_aconn AOr) pos_lits)
346 fun parse_horn_clause x =
347 (Scan.repeat parse_decorated_atom --| $$ "|" --| $$ "|"
348 -- Scan.repeat parse_decorated_atom --| $$ "-" --| $$ ">"
349 -- Scan.repeat parse_decorated_atom
350 >> (mk_horn o apfst (op @))) x
352 (* Syntax: <num>[0:<inference><annotations>]
353 <atoms> || <atoms> -> <atoms>. *)
354 fun parse_spass_line x =
355 (scan_general_id --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
356 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause --| $$ "."
357 >> (fn ((num, deps), u) =>
358 Inference ((num, NONE), u, map (rpair NONE) deps))) x
360 fun parse_line x = (parse_tstp_line || parse_vampire_line || parse_spass_line) x
362 fst o Scan.finite Symbol.stopper
363 (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
364 (Scan.repeat1 parse_line)))
365 o raw_explode o strip_spaces_except_between_ident_chars
367 fun clean_up_dependency seen dep = find_first (curry is_same_step dep) seen
368 fun clean_up_dependencies _ [] = []
369 | clean_up_dependencies seen ((step as Definition (name, _, _)) :: steps) =
370 step :: clean_up_dependencies (name :: seen) steps
371 | clean_up_dependencies seen (Inference (name, u, deps) :: steps) =
372 Inference (name, u, map_filter (clean_up_dependency seen) deps) ::
373 clean_up_dependencies (name :: seen) steps
375 fun atp_proof_from_tstplike_proof "" = []
376 | atp_proof_from_tstplike_proof s =
377 s ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
379 |> sort (step_name_ord o pairself step_name)
380 |> clean_up_dependencies []
382 fun map_term_names_in_term f (ATerm (s, ts)) =
383 ATerm (f s, map (map_term_names_in_term f) ts)
384 fun map_term_names_in_formula f (AQuant (q, xs, phi)) =
385 AQuant (q, xs, map_term_names_in_formula f phi)
386 | map_term_names_in_formula f (AConn (c, phis)) =
387 AConn (c, map (map_term_names_in_formula f) phis)
388 | map_term_names_in_formula f (AAtom t) = AAtom (map_term_names_in_term f t)
389 fun map_term_names_in_step f (Definition (name, phi1, phi2)) =
390 Definition (name, map_term_names_in_formula f phi1,
391 map_term_names_in_formula f phi2)
392 | map_term_names_in_step f (Inference (name, phi, deps)) =
393 Inference (name, map_term_names_in_formula f phi, deps)
394 fun map_term_names_in_atp_proof f = map (map_term_names_in_step f)
396 fun nasty_name pool s = s |> Symtab.lookup pool |> the_default s
397 fun nasty_atp_proof pool =
398 if Symtab.is_empty pool then I
399 else map_term_names_in_atp_proof (nasty_name pool)