1 (* Title: HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Transfer of proofs from external provers.
9 signature SLEDGEHAMMER_PROOF_RECONSTRUCT =
11 type minimize_command = string list -> string
13 val metis_line: bool -> int -> int -> string list -> string
15 bool * minimize_command * string * string vector * thm * int
16 -> string * string list
18 string Symtab.table * bool * int * Proof.context * int list list
19 -> bool * minimize_command * string * string vector * thm * int
20 -> string * string list
22 bool -> string Symtab.table * bool * int * Proof.context * int list list
23 -> bool * minimize_command * string * string vector * thm * int
24 -> string * string list
27 structure Sledgehammer_Proof_Reconstruct : SLEDGEHAMMER_PROOF_RECONSTRUCT =
32 open Sledgehammer_Util
33 open Sledgehammer_Fact_Filter
34 open Sledgehammer_Translate
36 type minimize_command = string list -> string
38 (* Simple simplifications to ensure that sort annotations don't leave a trail of
40 fun s_not @{const False} = @{const True}
41 | s_not @{const True} = @{const False}
42 | s_not (@{const Not} $ t) = t
43 | s_not t = @{const Not} $ t
44 fun s_conj (@{const True}, t2) = t2
45 | s_conj (t1, @{const True}) = t1
46 | s_conj p = HOLogic.mk_conj p
47 fun s_disj (@{const False}, t2) = t2
48 | s_disj (t1, @{const False}) = t1
49 | s_disj p = HOLogic.mk_disj p
50 fun s_imp (@{const True}, t2) = t2
51 | s_imp (t1, @{const False}) = s_not t1
52 | s_imp p = HOLogic.mk_imp p
53 fun s_iff (@{const True}, t2) = t2
54 | s_iff (t1, @{const True}) = t1
55 | s_iff (t1, t2) = HOLogic.eq_const HOLogic.boolT $ t1 $ t2
57 fun mk_anot (AConn (ANot, [phi])) = phi
58 | mk_anot phi = AConn (ANot, [phi])
59 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
61 fun index_in_shape x = find_index (exists (curry (op =) x))
62 fun is_axiom_number axiom_names num =
63 num > 0 andalso num <= Vector.length axiom_names andalso
64 Vector.sub (axiom_names, num - 1) <> ""
65 fun is_conjecture_number conjecture_shape num =
66 index_in_shape num conjecture_shape >= 0
68 fun negate_term (Const (@{const_name All}, T) $ Abs (s, T', t')) =
69 Const (@{const_name Ex}, T) $ Abs (s, T', negate_term t')
70 | negate_term (Const (@{const_name Ex}, T) $ Abs (s, T', t')) =
71 Const (@{const_name All}, T) $ Abs (s, T', negate_term t')
72 | negate_term (@{const "op -->"} $ t1 $ t2) =
73 @{const "op &"} $ t1 $ negate_term t2
74 | negate_term (@{const "op &"} $ t1 $ t2) =
75 @{const "op |"} $ negate_term t1 $ negate_term t2
76 | negate_term (@{const "op |"} $ t1 $ t2) =
77 @{const "op &"} $ negate_term t1 $ negate_term t2
78 | negate_term (@{const Not} $ t) = t
79 | negate_term t = @{const Not} $ t
81 datatype ('a, 'b, 'c, 'd, 'e) raw_step =
82 Definition of 'a * 'b * 'c |
83 Inference of 'a * 'd * 'e list
85 fun raw_step_number (Definition (num, _, _)) = num
86 | raw_step_number (Inference (num, _, _)) = num
88 (**** PARSING OF TSTP FORMAT ****)
90 (*Strings enclosed in single quotes, e.g. filenames*)
91 val scan_quoted = $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode;
93 val scan_dollar_name =
94 Scan.repeat ($$ "$") -- Symbol.scan_id >> (fn (ss, s) => implode ss ^ s)
96 fun repair_name _ "$true" = "c_True"
97 | repair_name _ "$false" = "c_False"
98 | repair_name _ "$$e" = "c_equal" (* seen in Vampire proofs *)
99 | repair_name _ "equal" = "c_equal" (* needed by SPASS? *)
100 | repair_name pool s =
101 case Symtab.lookup pool s of
104 if String.isPrefix "sQ" s andalso String.isSuffix "_eqProxy" s then
105 "c_equal" (* seen in Vampire proofs *)
108 (* Generalized first-order terms, which include file names, numbers, etc. *)
109 val parse_potential_integer =
110 (scan_dollar_name || scan_quoted) >> K NONE
111 || scan_integer >> SOME
112 fun parse_annotation x =
113 ((parse_potential_integer ::: Scan.repeat ($$ " " |-- parse_potential_integer)
114 >> map_filter I) -- Scan.optional parse_annotation []
115 >> uncurry (union (op =))
116 || $$ "(" |-- parse_annotations --| $$ ")"
117 || $$ "[" |-- parse_annotations --| $$ "]") x
118 and parse_annotations x =
119 (Scan.optional (parse_annotation
120 ::: Scan.repeat ($$ "," |-- parse_annotation)) []
121 >> (fn numss => fold (union (op =)) numss [])) x
123 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
124 which can be hard to disambiguate from function application in an LL(1)
125 parser. As a workaround, we extend the TPTP term syntax with such detritus
127 fun parse_vampire_detritus x =
128 (scan_integer |-- $$ ":" --| scan_integer >> K []) x
130 fun parse_term pool x =
131 ((scan_dollar_name >> repair_name pool)
132 -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms pool)
134 --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
136 and parse_terms pool x =
137 (parse_term pool ::: Scan.repeat ($$ "," |-- parse_term pool)) x
139 fun parse_atom pool =
140 parse_term pool -- Scan.option (Scan.option ($$ "!") --| $$ "="
142 >> (fn (u1, NONE) => AAtom u1
143 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
144 | (u1, SOME (SOME _, u2)) =>
145 mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))
147 fun fo_term_head (ATerm (s, _)) = s
149 (* TPTP formulas are fully parenthesized, so we don't need to worry about
150 operator precedence. *)
151 fun parse_formula pool x =
152 (($$ "(" |-- parse_formula pool --| $$ ")"
153 || ($$ "!" >> K AForall || $$ "?" >> K AExists)
154 --| $$ "[" -- parse_terms pool --| $$ "]" --| $$ ":"
155 -- parse_formula pool
156 >> (fn ((q, ts), phi) => AQuant (q, map fo_term_head ts, phi))
157 || $$ "~" |-- parse_formula pool >> mk_anot
159 -- Scan.option ((Scan.this_string "=>" >> K AImplies
160 || Scan.this_string "<=>" >> K AIff
161 || Scan.this_string "<~>" >> K ANotIff
162 || Scan.this_string "<=" >> K AIf
163 || $$ "|" >> K AOr || $$ "&" >> K AAnd)
164 -- parse_formula pool)
165 >> (fn (phi1, NONE) => phi1
166 | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
168 val parse_tstp_extra_arguments =
169 Scan.optional ($$ "," |-- parse_annotation
170 --| Scan.option ($$ "," |-- parse_annotations)) []
172 (* Syntax: (fof|cnf)\(<num>, <formula_role>, <formula> <extra_arguments>\).
173 The <num> could be an identifier, but we assume integers. *)
174 fun parse_tstp_line pool =
175 ((Scan.this_string "fof" || Scan.this_string "cnf") -- $$ "(")
176 |-- scan_integer --| $$ "," -- Symbol.scan_id --| $$ ","
177 -- parse_formula pool -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
178 >> (fn (((num, role), phi), deps) =>
182 AConn (AIff, [phi1 as AAtom _, phi2]) =>
183 Definition (num, phi1, phi2)
184 | AAtom (ATerm ("c_equal", _)) =>
185 Inference (num, phi, deps) (* Vampire's equality proxy axiom *)
186 | _ => raise Fail "malformed definition")
187 | _ => Inference (num, phi, deps))
189 (**** PARSING OF VAMPIRE OUTPUT ****)
191 (* Syntax: <num>. <formula> <annotation> *)
192 fun parse_vampire_line pool =
193 scan_integer --| $$ "." -- parse_formula pool -- parse_annotation
194 >> (fn ((num, phi), deps) => Inference (num, phi, deps))
196 (**** PARSING OF SPASS OUTPUT ****)
198 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
199 is not clear anyway. *)
200 val parse_dot_name = scan_integer --| $$ "." --| scan_integer
202 val parse_spass_annotations =
203 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
204 --| Scan.option ($$ ","))) []
206 (* It is not clear why some literals are followed by sequences of stars and/or
207 pluses. We ignore them. *)
208 fun parse_decorated_atom pool =
209 parse_atom pool --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")
211 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
212 | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
213 | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
214 | mk_horn (neg_lits, pos_lits) =
215 mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
216 foldr1 (mk_aconn AOr) pos_lits)
218 fun parse_horn_clause pool =
219 Scan.repeat (parse_decorated_atom pool) --| $$ "|" --| $$ "|"
220 -- Scan.repeat (parse_decorated_atom pool) --| $$ "-" --| $$ ">"
221 -- Scan.repeat (parse_decorated_atom pool)
222 >> (mk_horn o apfst (op @))
224 (* Syntax: <num>[0:<inference><annotations>]
225 <atoms> || <atoms> -> <atoms>. *)
226 fun parse_spass_line pool =
227 scan_integer --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
228 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause pool --| $$ "."
229 >> (fn ((num, deps), u) => Inference (num, u, deps))
231 fun parse_line pool =
232 parse_tstp_line pool || parse_vampire_line pool || parse_spass_line pool
233 fun parse_lines pool = Scan.repeat1 (parse_line pool)
234 fun parse_proof pool =
235 fst o Scan.finite Symbol.stopper
236 (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
238 o explode o strip_spaces
240 (**** INTERPRETATION OF TSTP SYNTAX TREES ****)
242 exception FO_TERM of string fo_term list
243 exception FORMULA of (string, string fo_term) formula list
244 exception SAME of unit
246 (* Type variables are given the basic sort "HOL.type". Some will later be
247 constrained by information from type literals, or by type inference. *)
248 fun type_from_fo_term tfrees (u as ATerm (a, us)) =
249 let val Ts = map (type_from_fo_term tfrees) us in
250 case strip_prefix_and_undo_ascii type_const_prefix a of
251 SOME b => Type (invert_const b, Ts)
253 if not (null us) then
254 raise FO_TERM [u] (* only "tconst"s have type arguments *)
255 else case strip_prefix_and_undo_ascii tfree_prefix a of
257 let val s = "'" ^ b in
258 TFree (s, AList.lookup (op =) tfrees s |> the_default HOLogic.typeS)
261 case strip_prefix_and_undo_ascii tvar_prefix a of
262 SOME b => TVar (("'" ^ b, 0), HOLogic.typeS)
264 (* Variable from the ATP, say "X1" *)
265 Type_Infer.param 0 (a, HOLogic.typeS)
268 (* Type class literal applied to a type. Returns triple of polarity, class,
270 fun type_constraint_from_term pos tfrees (u as ATerm (a, us)) =
271 case (strip_prefix_and_undo_ascii class_prefix a,
272 map (type_from_fo_term tfrees) us) of
273 (SOME b, [T]) => (pos, b, T)
274 | _ => raise FO_TERM [u]
276 (** Accumulate type constraints in a formula: negative type literals **)
277 fun add_var (key, z) = Vartab.map_default (key, []) (cons z)
278 fun add_type_constraint (false, cl, TFree (a ,_)) = add_var ((a, ~1), cl)
279 | add_type_constraint (false, cl, TVar (ix, _)) = add_var (ix, cl)
280 | add_type_constraint _ = I
282 fun fix_atp_variable_name f s =
284 fun subscript_name s n = s ^ nat_subscript n
285 val s = String.map f s
287 case space_explode "_" s of
288 [_] => (case take_suffix Char.isDigit (String.explode s) of
289 (cs1 as _ :: _, cs2 as _ :: _) =>
290 subscript_name (String.implode cs1)
291 (the (Int.fromString (String.implode cs2)))
293 | [s1, s2] => (case Int.fromString s2 of
294 SOME n => subscript_name s1 n
299 (* First-order translation. No types are known for variables. "HOLogic.typeT"
300 should allow them to be inferred. *)
301 fun raw_term_from_pred thy full_types tfrees =
303 fun aux opt_T extra_us u =
305 ATerm ("hBOOL", [u1]) => aux (SOME @{typ bool}) [] u1
306 | ATerm ("hAPP", [u1, u2]) => aux opt_T (u2 :: extra_us) u1
308 if a = type_wrapper_name then
311 aux (SOME (type_from_fo_term tfrees typ_u)) extra_us term_u
312 | _ => raise FO_TERM us
313 else case strip_prefix_and_undo_ascii const_prefix a of
315 list_comb (Const (@{const_name "op ="}, HOLogic.typeT),
316 map (aux NONE []) us)
319 val c = invert_const b
320 val num_type_args = num_type_args thy c
321 val (type_us, term_us) =
322 chop (if full_types then 0 else num_type_args) us
323 (* Extra args from "hAPP" come after any arguments given directly to
325 val term_ts = map (aux NONE []) term_us
326 val extra_ts = map (aux NONE []) extra_us
328 Const (c, if full_types then
330 SOME T => map fastype_of term_ts ---> T
332 if num_type_args = 0 then
333 Sign.const_instance thy (c, [])
335 raise Fail ("no type information for " ^ quote c)
337 Sign.const_instance thy (c,
338 map (type_from_fo_term tfrees) type_us))
339 in list_comb (t, term_ts @ extra_ts) end
340 | NONE => (* a free or schematic variable *)
342 val ts = map (aux NONE []) (us @ extra_us)
343 val T = map fastype_of ts ---> HOLogic.typeT
345 case strip_prefix_and_undo_ascii fixed_var_prefix a of
346 SOME b => Free (b, T)
348 case strip_prefix_and_undo_ascii schematic_var_prefix a of
349 SOME b => Var ((b, 0), T)
351 if is_tptp_variable a then
352 Var ((fix_atp_variable_name Char.toLower a, 0), T)
354 (* Skolem constants? *)
355 Var ((fix_atp_variable_name Char.toUpper a, 0), T)
356 in list_comb (t, ts) end
357 in aux (SOME HOLogic.boolT) [] end
359 fun term_from_pred thy full_types tfrees pos (u as ATerm (s, _)) =
360 if String.isPrefix class_prefix s then
361 add_type_constraint (type_constraint_from_term pos tfrees u)
362 #> pair @{const True}
364 pair (raw_term_from_pred thy full_types tfrees u)
366 val combinator_table =
367 [(@{const_name COMBI}, @{thm COMBI_def_raw}),
368 (@{const_name COMBK}, @{thm COMBK_def_raw}),
369 (@{const_name COMBB}, @{thm COMBB_def_raw}),
370 (@{const_name COMBC}, @{thm COMBC_def_raw}),
371 (@{const_name COMBS}, @{thm COMBS_def_raw})]
373 fun uncombine_term (t1 $ t2) = betapply (pairself uncombine_term (t1, t2))
374 | uncombine_term (Abs (s, T, t')) = Abs (s, T, uncombine_term t')
375 | uncombine_term (t as Const (x as (s, _))) =
376 (case AList.lookup (op =) combinator_table s of
377 SOME thm => thm |> prop_of |> specialize_type @{theory} x |> Logic.dest_equals |> snd
379 | uncombine_term t = t
381 (* Update schematic type variables with detected sort constraints. It's not
382 totally clear when this code is necessary. *)
383 fun repair_tvar_sorts (t, tvar_tab) =
385 fun do_type (Type (a, Ts)) = Type (a, map do_type Ts)
386 | do_type (TVar (xi, s)) =
387 TVar (xi, the_default s (Vartab.lookup tvar_tab xi))
388 | do_type (TFree z) = TFree z
389 fun do_term (Const (a, T)) = Const (a, do_type T)
390 | do_term (Free (a, T)) = Free (a, do_type T)
391 | do_term (Var (xi, T)) = Var (xi, do_type T)
392 | do_term (t as Bound _) = t
393 | do_term (Abs (a, T, t)) = Abs (a, do_type T, do_term t)
394 | do_term (t1 $ t2) = do_term t1 $ do_term t2
395 in t |> not (Vartab.is_empty tvar_tab) ? do_term end
397 fun quantify_over_free quant_s free_s body_t =
398 case Term.add_frees body_t [] |> filter (curry (op =) free_s o fst) of
400 | frees as (_, free_T) :: _ =>
401 Abs (free_s, free_T, fold (curry abstract_over) (map Free frees) body_t)
403 (* Interpret an ATP formula as a HOL term, extracting sort constraints as they
404 appear in the formula. *)
405 fun prop_from_formula thy full_types tfrees phi =
407 fun do_formula pos phi =
409 AQuant (_, [], phi) => do_formula pos phi
410 | AQuant (q, x :: xs, phi') =>
411 do_formula pos (AQuant (q, xs, phi'))
412 #>> quantify_over_free (case q of
413 AForall => @{const_name All}
414 | AExists => @{const_name Ex}) x
415 | AConn (ANot, [phi']) => do_formula (not pos) phi' #>> s_not
416 | AConn (c, [phi1, phi2]) =>
417 do_formula (pos |> c = AImplies ? not) phi1
418 ##>> do_formula pos phi2
423 | AIf => s_imp o swap
425 | ANotIff => s_not o s_iff)
426 | AAtom tm => term_from_pred thy full_types tfrees pos tm
427 | _ => raise FORMULA [phi]
428 in repair_tvar_sorts (do_formula true phi Vartab.empty) end
430 fun check_formula ctxt =
431 Type_Infer.constrain HOLogic.boolT
432 #> Syntax.check_term (ProofContext.set_mode ProofContext.mode_schematic ctxt)
435 (**** Translation of TSTP files to Isar Proofs ****)
437 fun unvarify_term (Var ((s, 0), T)) = Free (s, T)
438 | unvarify_term t = raise TERM ("unvarify_term: non-Var", [t])
440 fun decode_line full_types tfrees (Definition (num, phi1, phi2)) ctxt =
442 val thy = ProofContext.theory_of ctxt
443 val t1 = prop_from_formula thy full_types tfrees phi1
444 val vars = snd (strip_comb t1)
445 val frees = map unvarify_term vars
446 val unvarify_args = subst_atomic (vars ~~ frees)
447 val t2 = prop_from_formula thy full_types tfrees phi2
449 HOLogic.eq_const HOLogic.typeT $ t1 $ t2
450 |> unvarify_args |> uncombine_term |> check_formula ctxt
453 (Definition (num, t1, t2),
454 fold Variable.declare_term (maps OldTerm.term_frees [t1, t2]) ctxt)
456 | decode_line full_types tfrees (Inference (num, u, deps)) ctxt =
458 val thy = ProofContext.theory_of ctxt
459 val t = u |> prop_from_formula thy full_types tfrees
460 |> uncombine_term |> check_formula ctxt
462 (Inference (num, t, deps),
463 fold Variable.declare_term (OldTerm.term_frees t) ctxt)
465 fun decode_lines ctxt full_types tfrees lines =
466 fst (fold_map (decode_line full_types tfrees) lines ctxt)
468 fun is_same_inference _ (Definition _) = false
469 | is_same_inference t (Inference (_, t', _)) = t aconv t'
471 (* No "real" literals means only type information (tfree_tcs, clsrel, or
473 val is_only_type_information = curry (op aconv) HOLogic.true_const
475 fun replace_one_dep (old, new) dep = if dep = old then new else [dep]
476 fun replace_deps_in_line _ (line as Definition _) = line
477 | replace_deps_in_line p (Inference (num, t, deps)) =
478 Inference (num, t, fold (union (op =) o replace_one_dep p) deps [])
480 (* Discard axioms; consolidate adjacent lines that prove the same formula, since
481 they differ only in type information.*)
482 fun add_line _ _ (line as Definition _) lines = line :: lines
483 | add_line conjecture_shape axiom_names (Inference (num, t, [])) lines =
484 (* No dependencies: axiom, conjecture, or (for Vampire) internal axioms or
486 if is_axiom_number axiom_names num then
487 (* Axioms are not proof lines. *)
488 if is_only_type_information t then
489 map (replace_deps_in_line (num, [])) lines
490 (* Is there a repetition? If so, replace later line by earlier one. *)
491 else case take_prefix (not o is_same_inference t) lines of
492 (_, []) => lines (*no repetition of proof line*)
493 | (pre, Inference (num', _, _) :: post) =>
494 pre @ map (replace_deps_in_line (num', [num])) post
495 else if is_conjecture_number conjecture_shape num then
496 Inference (num, negate_term t, []) :: lines
498 map (replace_deps_in_line (num, [])) lines
499 | add_line _ _ (Inference (num, t, deps)) lines =
500 (* Type information will be deleted later; skip repetition test. *)
501 if is_only_type_information t then
502 Inference (num, t, deps) :: lines
503 (* Is there a repetition? If so, replace later line by earlier one. *)
504 else case take_prefix (not o is_same_inference t) lines of
505 (* FIXME: Doesn't this code risk conflating proofs involving different
507 (_, []) => Inference (num, t, deps) :: lines
508 | (pre, Inference (num', t', _) :: post) =>
509 Inference (num, t', deps) ::
510 pre @ map (replace_deps_in_line (num', [num])) post
512 (* Recursively delete empty lines (type information) from the proof. *)
513 fun add_nontrivial_line (Inference (num, t, [])) lines =
514 if is_only_type_information t then delete_dep num lines
515 else Inference (num, t, []) :: lines
516 | add_nontrivial_line line lines = line :: lines
517 and delete_dep num lines =
518 fold_rev add_nontrivial_line (map (replace_deps_in_line (num, [])) lines) []
520 (* ATPs sometimes reuse free variable names in the strangest ways. Removing
521 offending lines often does the trick. *)
522 fun is_bad_free frees (Free x) = not (member (op =) frees x)
523 | is_bad_free _ _ = false
525 (* Vampire is keen on producing these. *)
526 fun is_trivial_formula (@{const Not} $ (Const (@{const_name "op ="}, _)
527 $ t1 $ t2)) = (t1 aconv t2)
528 | is_trivial_formula _ = false
530 fun add_desired_line _ _ _ _ (line as Definition (num, _, _)) (j, lines) =
531 (j, line :: map (replace_deps_in_line (num, [])) lines)
532 | add_desired_line isar_shrink_factor conjecture_shape axiom_names frees
533 (Inference (num, t, deps)) (j, lines) =
535 if is_axiom_number axiom_names num orelse
536 is_conjecture_number conjecture_shape num orelse
537 (not (is_only_type_information t) andalso
538 null (Term.add_tvars t []) andalso
539 not (exists_subterm (is_bad_free frees) t) andalso
540 not (is_trivial_formula t) andalso
541 (null lines orelse (* last line must be kept *)
542 (length deps >= 2 andalso j mod isar_shrink_factor = 0))) then
543 Inference (num, t, deps) :: lines (* keep line *)
545 map (replace_deps_in_line (num, deps)) lines) (* drop line *)
547 (** EXTRACTING LEMMAS **)
549 (* A list consisting of the first number in each line is returned. For TSTP,
550 interesting lines have the form "fof(108, axiom, ...)", where the number
551 (108) is extracted. For SPASS, lines have the form "108[0:Inp] ...", where
552 the first number (108) is extracted. For Vampire, we look for
553 "108. ... [input]". *)
554 fun used_facts_in_atp_proof axiom_names atp_proof =
557 let val j = Int.fromString num |> the_default ~1 in
558 if is_axiom_number axiom_names j then
559 SOME (Vector.sub (axiom_names, j - 1))
564 String.tokens (fn c => not (Char.isAlphaNum c) andalso c <> #"_")
565 val thm_name_list = Vector.foldr (op ::) [] axiom_names
566 fun do_line ("fof" :: num :: "axiom" :: (rest as _ :: _)) =
567 (case strip_prefix_and_undo_ascii axiom_prefix (List.last rest) of
569 if member (op =) rest "file" then SOME name else axiom_name num
570 | NONE => axiom_name num)
571 | do_line (num :: "0" :: "Inp" :: _) = axiom_name num
572 | do_line (num :: rest) =
573 (case List.last rest of "input" => axiom_name num | _ => NONE)
575 in atp_proof |> split_lines |> map_filter (do_line o tokens_of) end
578 val no_label = ("", ~1)
581 val assum_prefix = "A"
582 val fact_prefix = "F"
584 fun string_for_label (s, num) = s ^ string_of_int num
586 fun metis_using [] = ""
588 "using " ^ space_implode " " (map string_for_label ls) ^ " "
589 fun metis_apply _ 1 = "by "
590 | metis_apply 1 _ = "apply "
591 | metis_apply i _ = "prefer " ^ string_of_int i ^ " apply "
592 fun metis_name full_types = if full_types then "metisFT" else "metis"
593 fun metis_call full_types [] = metis_name full_types
594 | metis_call full_types ss =
595 "(" ^ metis_name full_types ^ " " ^ space_implode " " ss ^ ")"
596 fun metis_command full_types i n (ls, ss) =
597 metis_using ls ^ metis_apply i n ^ metis_call full_types ss
598 fun metis_line full_types i n ss =
599 "Try this command: " ^
600 Markup.markup Markup.sendback (metis_command full_types i n ([], ss)) ^ ".\n"
601 fun minimize_line _ [] = ""
602 | minimize_line minimize_command facts =
603 case minimize_command facts of
606 "To minimize the number of lemmas, try this: " ^
607 Markup.markup Markup.sendback command ^ ".\n"
609 val unprefix_chained = perhaps (try (unprefix chained_prefix))
611 fun used_facts axiom_names =
612 used_facts_in_atp_proof axiom_names
613 #> List.partition (String.isPrefix chained_prefix)
614 #>> map (unprefix chained_prefix)
615 #> pairself (sort_distinct string_ord)
617 fun metis_proof_text (full_types, minimize_command, atp_proof, axiom_names,
620 val (chained_lemmas, other_lemmas) = used_facts axiom_names atp_proof
621 val lemmas = other_lemmas @ chained_lemmas
622 val n = Logic.count_prems (prop_of goal)
624 (metis_line full_types i n other_lemmas ^
625 minimize_line minimize_command lemmas, lemmas)
628 (** Isar proof construction and manipulation **)
630 fun merge_fact_sets (ls1, ss1) (ls2, ss2) =
631 (union (op =) ls1 ls2, union (op =) ss1 ss2)
633 type label = string * int
634 type facts = label list * string list
636 datatype qualifier = Show | Then | Moreover | Ultimately
639 Fix of (string * typ) list |
641 Assume of label * term |
642 Have of qualifier list * label * term * byline
645 CaseSplit of step list list * facts
647 fun smart_case_split [] facts = ByMetis facts
648 | smart_case_split proofs facts = CaseSplit (proofs, facts)
650 fun add_fact_from_dep axiom_names num =
651 if is_axiom_number axiom_names num then
652 apsnd (insert (op =) (Vector.sub (axiom_names, num - 1)))
654 apfst (insert (op =) (raw_prefix, num))
656 fun forall_of v t = HOLogic.all_const (fastype_of v) $ lambda v t
657 fun forall_vars t = fold_rev forall_of (map Var (Term.add_vars t [])) t
659 fun step_for_line _ _ (Definition (_, t1, t2)) = Let (t1, t2)
660 | step_for_line _ _ (Inference (num, t, [])) = Assume ((raw_prefix, num), t)
661 | step_for_line axiom_names j (Inference (num, t, deps)) =
662 Have (if j = 1 then [Show] else [], (raw_prefix, num),
664 ByMetis (fold (add_fact_from_dep axiom_names) deps ([], [])))
666 fun proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
667 atp_proof conjecture_shape axiom_names params frees =
670 atp_proof ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
672 |> sort (int_ord o pairself raw_step_number)
673 |> decode_lines ctxt full_types tfrees
674 |> rpair [] |-> fold_rev (add_line conjecture_shape axiom_names)
675 |> rpair [] |-> fold_rev add_nontrivial_line
676 |> rpair (0, []) |-> fold_rev (add_desired_line isar_shrink_factor
677 conjecture_shape axiom_names frees)
680 (if null params then [] else [Fix params]) @
681 map2 (step_for_line axiom_names) (length lines downto 1) lines
684 (* When redirecting proofs, we keep information about the labels seen so far in
685 the "backpatches" data structure. The first component indicates which facts
686 should be associated with forthcoming proof steps. The second component is a
687 pair ("assum_ls", "drop_ls"), where "assum_ls" are the labels that should
688 become assumptions and "drop_ls" are the labels that should be dropped in a
690 type backpatches = (label * facts) list * (label list * label list)
692 fun used_labels_of_step (Have (_, _, _, by)) =
694 ByMetis (ls, _) => ls
695 | CaseSplit (proofs, (ls, _)) =>
696 fold (union (op =) o used_labels_of) proofs ls)
697 | used_labels_of_step _ = []
698 and used_labels_of proof = fold (union (op =) o used_labels_of_step) proof []
700 fun new_labels_of_step (Fix _) = []
701 | new_labels_of_step (Let _) = []
702 | new_labels_of_step (Assume (l, _)) = [l]
703 | new_labels_of_step (Have (_, l, _, _)) = [l]
704 val new_labels_of = maps new_labels_of_step
709 | aux proof_tail (proofs as (proof1 :: _)) =
710 if exists null proofs then
712 else if forall (curry (op =) (hd proof1) o hd) (tl proofs) then
713 aux (hd proof1 :: proof_tail) (map tl proofs)
714 else case hd proof1 of
715 Have ([], l, t, _) => (* FIXME: should we really ignore the "by"? *)
716 if forall (fn Have ([], l', t', _) :: _ => (l, t) = (l', t')
717 | _ => false) (tl proofs) andalso
718 not (exists (member (op =) (maps new_labels_of proofs))
719 (used_labels_of proof_tail)) then
720 SOME (l, t, map rev proofs, proof_tail)
724 in aux [] o map rev end
726 fun case_split_qualifiers proofs =
727 case length proofs of
732 fun redirect_proof conjecture_shape hyp_ts concl_t proof =
734 (* The first pass outputs those steps that are independent of the negated
735 conjecture. The second pass flips the proof by contradiction to obtain a
736 direct proof, introducing case splits when an inference depends on
737 several facts that depend on the negated conjecture. *)
739 nth hyp_ts (index_in_shape num conjecture_shape)
741 raise Fail ("Cannot find hypothesis " ^ Int.toString num)
742 val concl_ls = map (pair raw_prefix) (List.last conjecture_shape)
743 val canonicalize_labels =
744 map (fn l => if member (op =) concl_ls l then hd concl_ls else l)
746 fun first_pass ([], contra) = ([], contra)
747 | first_pass ((step as Fix _) :: proof, contra) =
748 first_pass (proof, contra) |>> cons step
749 | first_pass ((step as Let _) :: proof, contra) =
750 first_pass (proof, contra) |>> cons step
751 | first_pass ((step as Assume (l as (_, num), _)) :: proof, contra) =
752 if member (op =) concl_ls l then
753 first_pass (proof, contra ||> l = hd concl_ls ? cons step)
755 first_pass (proof, contra) |>> cons (Assume (l, find_hyp num))
756 | first_pass (Have (qs, l, t, ByMetis (ls, ss)) :: proof, contra) =
758 val ls = canonicalize_labels ls
759 val step = Have (qs, l, t, ByMetis (ls, ss))
761 if exists (member (op =) (fst contra)) ls then
762 first_pass (proof, contra |>> cons l ||> cons step)
764 first_pass (proof, contra) |>> cons step
766 | first_pass _ = raise Fail "malformed proof"
767 val (proof_top, (contra_ls, contra_proof)) =
768 first_pass (proof, (concl_ls, []))
769 val backpatch_label = the_default ([], []) oo AList.lookup (op =) o fst
770 fun backpatch_labels patches ls =
771 fold merge_fact_sets (map (backpatch_label patches) ls) ([], [])
772 fun second_pass end_qs ([], assums, patches) =
773 ([Have (end_qs, no_label, concl_t,
774 ByMetis (backpatch_labels patches (map snd assums)))], patches)
775 | second_pass end_qs (Assume (l, t) :: proof, assums, patches) =
776 second_pass end_qs (proof, (t, l) :: assums, patches)
777 | second_pass end_qs (Have (qs, l, t, ByMetis (ls, ss)) :: proof, assums,
779 if member (op =) (snd (snd patches)) l andalso
780 not (member (op =) (fst (snd patches)) l) andalso
781 not (AList.defined (op =) (fst patches) l) then
782 second_pass end_qs (proof, assums, patches ||> apsnd (append ls))
784 (case List.partition (member (op =) contra_ls) ls of
785 ([contra_l], co_ls) =>
786 if member (op =) qs Show then
787 second_pass end_qs (proof, assums,
788 patches |>> cons (contra_l, (co_ls, ss)))
792 patches |>> cons (contra_l, (l :: co_ls, ss)))
793 |>> cons (if member (op =) (fst (snd patches)) l then
794 Assume (l, negate_term t)
796 Have (qs, l, negate_term t,
797 ByMetis (backpatch_label patches l)))
798 | (contra_ls as _ :: _, co_ls) =>
803 if member (op =) concl_ls l then
807 val drop_ls = filter (curry (op <>) l) contra_ls
811 patches ||> apfst (insert (op =) l)
812 ||> apsnd (union (op =) drop_ls))
815 val (assumes, facts) =
816 if member (op =) (fst (snd patches)) l then
817 ([Assume (l, negate_term t)], (l :: co_ls, ss))
821 (case join_proofs proofs of
822 SOME (l, t, proofs, proof_tail) =>
823 Have (case_split_qualifiers proofs @
824 (if null proof_tail then end_qs else []), l, t,
825 smart_case_split proofs facts) :: proof_tail
827 [Have (case_split_qualifiers proofs @ end_qs, no_label,
828 concl_t, smart_case_split proofs facts)],
832 | _ => raise Fail "malformed proof")
833 | second_pass _ _ = raise Fail "malformed proof"
835 second_pass [Show] (contra_proof, [], ([], ([], []))) |> fst
836 in proof_top @ proof_bottom end
838 val kill_duplicate_assumptions_in_proof =
840 fun relabel_facts subst =
841 apfst (map (fn l => AList.lookup (op =) subst l |> the_default l))
842 fun do_step (step as Assume (l, t)) (proof, subst, assums) =
843 (case AList.lookup (op aconv) assums t of
844 SOME l' => (proof, (l, l') :: subst, assums)
845 | NONE => (step :: proof, subst, (t, l) :: assums))
846 | do_step (Have (qs, l, t, by)) (proof, subst, assums) =
849 ByMetis facts => ByMetis (relabel_facts subst facts)
850 | CaseSplit (proofs, facts) =>
851 CaseSplit (map do_proof proofs, relabel_facts subst facts)) ::
852 proof, subst, assums)
853 | do_step step (proof, subst, assums) = (step :: proof, subst, assums)
854 and do_proof proof = fold do_step proof ([], [], []) |> #1 |> rev
857 val then_chain_proof =
860 | aux _ ((step as Assume (l, _)) :: proof) = step :: aux l proof
861 | aux l' (Have (qs, l, t, by) :: proof) =
864 Have (if member (op =) ls l' then
866 ByMetis (filter_out (curry (op =) l') ls, ss))
868 (qs, l, t, ByMetis (ls, ss)))
869 | CaseSplit (proofs, facts) =>
870 Have (qs, l, t, CaseSplit (map (aux no_label) proofs, facts))) ::
872 | aux _ (step :: proof) = step :: aux no_label proof
875 fun kill_useless_labels_in_proof proof =
877 val used_ls = used_labels_of proof
878 fun do_label l = if member (op =) used_ls l then l else no_label
879 fun do_step (Assume (l, t)) = Assume (do_label l, t)
880 | do_step (Have (qs, l, t, by)) =
881 Have (qs, do_label l, t,
883 CaseSplit (proofs, facts) =>
884 CaseSplit (map (map do_step) proofs, facts)
886 | do_step step = step
887 in map do_step proof end
889 fun prefix_for_depth n = replicate_string (n + 1)
893 fun aux _ _ _ [] = []
894 | aux subst depth (next_assum, next_fact) (Assume (l, t) :: proof) =
896 Assume (l, t) :: aux subst depth (next_assum, next_fact) proof
898 let val l' = (prefix_for_depth depth assum_prefix, next_assum) in
900 aux ((l, l') :: subst) depth (next_assum + 1, next_fact) proof
902 | aux subst depth (next_assum, next_fact) (Have (qs, l, t, by) :: proof) =
904 val (l', subst, next_fact) =
906 (l, subst, next_fact)
909 val l' = (prefix_for_depth depth fact_prefix, next_fact)
910 in (l', (l, l') :: subst, next_fact + 1) end
913 case AList.lookup (op =) subst l of
915 | NONE => raise Fail ("unknown label " ^
916 quote (string_for_label l))))
919 ByMetis facts => ByMetis (relabel_facts facts)
920 | CaseSplit (proofs, facts) =>
921 CaseSplit (map (aux subst (depth + 1) (1, 1)) proofs,
924 Have (qs, l', t, by) ::
925 aux subst depth (next_assum, next_fact) proof
927 | aux subst depth nextp (step :: proof) =
928 step :: aux subst depth nextp proof
929 in aux [] 0 (1, 1) end
931 fun string_for_proof ctxt full_types i n =
933 fun fix_print_mode f x =
934 setmp_CRITICAL show_no_free_types true
935 (setmp_CRITICAL show_types true
936 (Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
937 (print_mode_value ())) f)) x
938 fun do_indent ind = replicate_string (ind * indent_size) " "
940 maybe_quote s ^ " :: " ^
941 maybe_quote (fix_print_mode (Syntax.string_of_typ ctxt) T)
942 fun do_label l = if l = no_label then "" else string_for_label l ^ ": "
944 (if member (op =) qs Moreover then "moreover " else "") ^
945 (if member (op =) qs Ultimately then "ultimately " else "") ^
946 (if member (op =) qs Then then
947 if member (op =) qs Show then "thus" else "hence"
949 if member (op =) qs Show then "show" else "have")
950 val do_term = maybe_quote o fix_print_mode (Syntax.string_of_term ctxt)
951 fun do_facts (ls, ss) =
953 val ls = ls |> sort_distinct (prod_ord string_ord int_ord)
954 val ss = ss |> map unprefix_chained |> sort_distinct string_ord
955 in metis_command full_types 1 1 (ls, ss) end
956 and do_step ind (Fix xs) =
957 do_indent ind ^ "fix " ^ space_implode " and " (map do_free xs) ^ "\n"
958 | do_step ind (Let (t1, t2)) =
959 do_indent ind ^ "let " ^ do_term t1 ^ " = " ^ do_term t2 ^ "\n"
960 | do_step ind (Assume (l, t)) =
961 do_indent ind ^ "assume " ^ do_label l ^ do_term t ^ "\n"
962 | do_step ind (Have (qs, l, t, ByMetis facts)) =
963 do_indent ind ^ do_have qs ^ " " ^
964 do_label l ^ do_term t ^ " " ^ do_facts facts ^ "\n"
965 | do_step ind (Have (qs, l, t, CaseSplit (proofs, facts))) =
966 space_implode (do_indent ind ^ "moreover\n")
967 (map (do_block ind) proofs) ^
968 do_indent ind ^ do_have qs ^ " " ^ do_label l ^ do_term t ^ " " ^
969 do_facts facts ^ "\n"
970 and do_steps prefix suffix ind steps =
971 let val s = implode (map (do_step ind) steps) in
972 replicate_string (ind * indent_size - size prefix) " " ^ prefix ^
973 String.extract (s, ind * indent_size,
974 SOME (size s - ind * indent_size - 1)) ^
977 and do_block ind proof = do_steps "{ " " }" (ind + 1) proof
978 (* One-step proofs are pointless; better use the Metis one-liner
980 and do_proof [Have (_, _, _, ByMetis _)] = ""
982 (if i <> 1 then "prefer " ^ string_of_int i ^ "\n" else "") ^
983 do_indent 0 ^ "proof -\n" ^
984 do_steps "" "" 1 proof ^
985 do_indent 0 ^ (if n <> 1 then "next" else "qed") ^ "\n"
988 fun isar_proof_text (pool, debug, isar_shrink_factor, ctxt, conjecture_shape)
989 (other_params as (full_types, _, atp_proof, axiom_names,
992 val (params, hyp_ts, concl_t) = strip_subgoal goal i
993 val frees = fold Term.add_frees (concl_t :: hyp_ts) []
994 val tfrees = fold Term.add_tfrees (concl_t :: hyp_ts) []
995 val n = Logic.count_prems (prop_of goal)
996 val (one_line_proof, lemma_names) = metis_proof_text other_params
997 fun isar_proof_for () =
998 case proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
999 atp_proof conjecture_shape axiom_names params
1001 |> redirect_proof conjecture_shape hyp_ts concl_t
1002 |> kill_duplicate_assumptions_in_proof
1004 |> kill_useless_labels_in_proof
1006 |> string_for_proof ctxt full_types i n of
1007 "" => "No structured proof available.\n"
1008 | proof => "\nStructured proof:\n" ^ Markup.markup Markup.sendback proof
1013 try isar_proof_for ()
1014 |> the_default "Warning: The Isar proof construction failed.\n"
1015 in (one_line_proof ^ isar_proof, lemma_names) end
1017 fun proof_text isar_proof isar_params other_params =
1018 (if isar_proof then isar_proof_text isar_params else metis_proof_text)