1 (* Title: HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Transfer of proofs from external provers.
9 signature SLEDGEHAMMER_PROOF_RECONSTRUCT =
11 type locality = Sledgehammer_Fact_Filter.locality
12 type minimize_command = string list -> string
15 bool * minimize_command * string * (string * locality) vector * thm * int
16 -> string * (string * locality) list
18 string Symtab.table * bool * int * Proof.context * int list list
19 -> bool * minimize_command * string * (string * locality) vector * thm * int
20 -> string * (string * locality) list
22 bool -> string Symtab.table * bool * int * Proof.context * int list list
23 -> bool * minimize_command * string * (string * locality) vector * thm * int
24 -> string * (string * locality) list
27 structure Sledgehammer_Proof_Reconstruct : SLEDGEHAMMER_PROOF_RECONSTRUCT =
32 open Sledgehammer_Util
33 open Sledgehammer_Fact_Filter
34 open Sledgehammer_Translate
36 type minimize_command = string list -> string
38 (* Simple simplifications to ensure that sort annotations don't leave a trail of
40 fun s_not @{const False} = @{const True}
41 | s_not @{const True} = @{const False}
42 | s_not (@{const Not} $ t) = t
43 | s_not t = @{const Not} $ t
44 fun s_conj (@{const True}, t2) = t2
45 | s_conj (t1, @{const True}) = t1
46 | s_conj p = HOLogic.mk_conj p
47 fun s_disj (@{const False}, t2) = t2
48 | s_disj (t1, @{const False}) = t1
49 | s_disj p = HOLogic.mk_disj p
50 fun s_imp (@{const True}, t2) = t2
51 | s_imp (t1, @{const False}) = s_not t1
52 | s_imp p = HOLogic.mk_imp p
53 fun s_iff (@{const True}, t2) = t2
54 | s_iff (t1, @{const True}) = t1
55 | s_iff (t1, t2) = HOLogic.eq_const HOLogic.boolT $ t1 $ t2
57 fun mk_anot (AConn (ANot, [phi])) = phi
58 | mk_anot phi = AConn (ANot, [phi])
59 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
61 fun index_in_shape x = find_index (exists (curry (op =) x))
62 fun is_axiom_number axiom_names num =
63 num > 0 andalso num <= Vector.length axiom_names andalso
64 fst (Vector.sub (axiom_names, num - 1)) <> ""
65 fun is_conjecture_number conjecture_shape num =
66 index_in_shape num conjecture_shape >= 0
68 fun negate_term (Const (@{const_name All}, T) $ Abs (s, T', t')) =
69 Const (@{const_name Ex}, T) $ Abs (s, T', negate_term t')
70 | negate_term (Const (@{const_name Ex}, T) $ Abs (s, T', t')) =
71 Const (@{const_name All}, T) $ Abs (s, T', negate_term t')
72 | negate_term (@{const "op -->"} $ t1 $ t2) =
73 @{const "op &"} $ t1 $ negate_term t2
74 | negate_term (@{const "op &"} $ t1 $ t2) =
75 @{const "op |"} $ negate_term t1 $ negate_term t2
76 | negate_term (@{const "op |"} $ t1 $ t2) =
77 @{const "op &"} $ negate_term t1 $ negate_term t2
78 | negate_term (@{const Not} $ t) = t
79 | negate_term t = @{const Not} $ t
81 datatype ('a, 'b, 'c, 'd, 'e) raw_step =
82 Definition of 'a * 'b * 'c |
83 Inference of 'a * 'd * 'e list
85 fun raw_step_number (Definition (num, _, _)) = num
86 | raw_step_number (Inference (num, _, _)) = num
88 (**** PARSING OF TSTP FORMAT ****)
90 (*Strings enclosed in single quotes, e.g. filenames*)
91 val scan_quoted = $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode;
93 val scan_dollar_name =
94 Scan.repeat ($$ "$") -- Symbol.scan_id >> (fn (ss, s) => implode ss ^ s)
96 fun repair_name _ "$true" = "c_True"
97 | repair_name _ "$false" = "c_False"
98 | repair_name _ "$$e" = "c_equal" (* seen in Vampire proofs *)
99 | repair_name _ "equal" = "c_equal" (* needed by SPASS? *)
100 | repair_name pool s =
101 case Symtab.lookup pool s of
104 if String.isPrefix "sQ" s andalso String.isSuffix "_eqProxy" s then
105 "c_equal" (* seen in Vampire proofs *)
108 (* Generalized first-order terms, which include file names, numbers, etc. *)
109 val parse_potential_integer =
110 (scan_dollar_name || scan_quoted) >> K NONE
111 || scan_integer >> SOME
112 fun parse_annotation x =
113 ((parse_potential_integer ::: Scan.repeat ($$ " " |-- parse_potential_integer)
114 >> map_filter I) -- Scan.optional parse_annotation []
115 >> uncurry (union (op =))
116 || $$ "(" |-- parse_annotations --| $$ ")"
117 || $$ "[" |-- parse_annotations --| $$ "]") x
118 and parse_annotations x =
119 (Scan.optional (parse_annotation
120 ::: Scan.repeat ($$ "," |-- parse_annotation)) []
121 >> (fn numss => fold (union (op =)) numss [])) x
123 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
124 which can be hard to disambiguate from function application in an LL(1)
125 parser. As a workaround, we extend the TPTP term syntax with such detritus
127 fun parse_vampire_detritus x =
128 (scan_integer |-- $$ ":" --| scan_integer >> K []) x
130 fun parse_term pool x =
131 ((scan_dollar_name >> repair_name pool)
132 -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms pool)
134 --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
136 and parse_terms pool x =
137 (parse_term pool ::: Scan.repeat ($$ "," |-- parse_term pool)) x
139 fun parse_atom pool =
140 parse_term pool -- Scan.option (Scan.option ($$ "!") --| $$ "="
142 >> (fn (u1, NONE) => AAtom u1
143 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
144 | (u1, SOME (SOME _, u2)) =>
145 mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))
147 fun fo_term_head (ATerm (s, _)) = s
149 (* TPTP formulas are fully parenthesized, so we don't need to worry about
150 operator precedence. *)
151 fun parse_formula pool x =
152 (($$ "(" |-- parse_formula pool --| $$ ")"
153 || ($$ "!" >> K AForall || $$ "?" >> K AExists)
154 --| $$ "[" -- parse_terms pool --| $$ "]" --| $$ ":"
155 -- parse_formula pool
156 >> (fn ((q, ts), phi) => AQuant (q, map fo_term_head ts, phi))
157 || $$ "~" |-- parse_formula pool >> mk_anot
159 -- Scan.option ((Scan.this_string "=>" >> K AImplies
160 || Scan.this_string "<=>" >> K AIff
161 || Scan.this_string "<~>" >> K ANotIff
162 || Scan.this_string "<=" >> K AIf
163 || $$ "|" >> K AOr || $$ "&" >> K AAnd)
164 -- parse_formula pool)
165 >> (fn (phi1, NONE) => phi1
166 | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
168 val parse_tstp_extra_arguments =
169 Scan.optional ($$ "," |-- parse_annotation
170 --| Scan.option ($$ "," |-- parse_annotations)) []
172 (* Syntax: (fof|cnf)\(<num>, <formula_role>, <formula> <extra_arguments>\).
173 The <num> could be an identifier, but we assume integers. *)
174 fun parse_tstp_line pool =
175 ((Scan.this_string "fof" || Scan.this_string "cnf") -- $$ "(")
176 |-- scan_integer --| $$ "," -- Symbol.scan_id --| $$ ","
177 -- parse_formula pool -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
178 >> (fn (((num, role), phi), deps) =>
182 AConn (AIff, [phi1 as AAtom _, phi2]) =>
183 Definition (num, phi1, phi2)
184 | AAtom (ATerm ("c_equal", _)) =>
185 Inference (num, phi, deps) (* Vampire's equality proxy axiom *)
186 | _ => raise Fail "malformed definition")
187 | _ => Inference (num, phi, deps))
189 (**** PARSING OF VAMPIRE OUTPUT ****)
191 (* Syntax: <num>. <formula> <annotation> *)
192 fun parse_vampire_line pool =
193 scan_integer --| $$ "." -- parse_formula pool -- parse_annotation
194 >> (fn ((num, phi), deps) => Inference (num, phi, deps))
196 (**** PARSING OF SPASS OUTPUT ****)
198 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
199 is not clear anyway. *)
200 val parse_dot_name = scan_integer --| $$ "." --| scan_integer
202 val parse_spass_annotations =
203 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
204 --| Scan.option ($$ ","))) []
206 (* It is not clear why some literals are followed by sequences of stars and/or
207 pluses. We ignore them. *)
208 fun parse_decorated_atom pool =
209 parse_atom pool --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")
211 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
212 | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
213 | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
214 | mk_horn (neg_lits, pos_lits) =
215 mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
216 foldr1 (mk_aconn AOr) pos_lits)
218 fun parse_horn_clause pool =
219 Scan.repeat (parse_decorated_atom pool) --| $$ "|" --| $$ "|"
220 -- Scan.repeat (parse_decorated_atom pool) --| $$ "-" --| $$ ">"
221 -- Scan.repeat (parse_decorated_atom pool)
222 >> (mk_horn o apfst (op @))
224 (* Syntax: <num>[0:<inference><annotations>]
225 <atoms> || <atoms> -> <atoms>. *)
226 fun parse_spass_line pool =
227 scan_integer --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
228 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause pool --| $$ "."
229 >> (fn ((num, deps), u) => Inference (num, u, deps))
231 fun parse_line pool =
232 parse_tstp_line pool || parse_vampire_line pool || parse_spass_line pool
233 fun parse_lines pool = Scan.repeat1 (parse_line pool)
234 fun parse_proof pool =
235 fst o Scan.finite Symbol.stopper
236 (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
238 o explode o strip_spaces_except_between_ident_chars
240 (**** INTERPRETATION OF TSTP SYNTAX TREES ****)
242 exception FO_TERM of string fo_term list
243 exception FORMULA of (string, string fo_term) formula list
244 exception SAME of unit
246 (* Type variables are given the basic sort "HOL.type". Some will later be
247 constrained by information from type literals, or by type inference. *)
248 fun type_from_fo_term tfrees (u as ATerm (a, us)) =
249 let val Ts = map (type_from_fo_term tfrees) us in
250 case strip_prefix_and_unascii type_const_prefix a of
251 SOME b => Type (invert_const b, Ts)
253 if not (null us) then
254 raise FO_TERM [u] (* only "tconst"s have type arguments *)
255 else case strip_prefix_and_unascii tfree_prefix a of
257 let val s = "'" ^ b in
258 TFree (s, AList.lookup (op =) tfrees s |> the_default HOLogic.typeS)
261 case strip_prefix_and_unascii tvar_prefix a of
262 SOME b => TVar (("'" ^ b, 0), HOLogic.typeS)
264 (* Variable from the ATP, say "X1" *)
265 Type_Infer.param 0 (a, HOLogic.typeS)
268 (* Type class literal applied to a type. Returns triple of polarity, class,
270 fun type_constraint_from_term pos tfrees (u as ATerm (a, us)) =
271 case (strip_prefix_and_unascii class_prefix a,
272 map (type_from_fo_term tfrees) us) of
273 (SOME b, [T]) => (pos, b, T)
274 | _ => raise FO_TERM [u]
276 (** Accumulate type constraints in a formula: negative type literals **)
277 fun add_var (key, z) = Vartab.map_default (key, []) (cons z)
278 fun add_type_constraint (false, cl, TFree (a ,_)) = add_var ((a, ~1), cl)
279 | add_type_constraint (false, cl, TVar (ix, _)) = add_var (ix, cl)
280 | add_type_constraint _ = I
282 fun repair_atp_variable_name f s =
284 fun subscript_name s n = s ^ nat_subscript n
285 val s = String.map f s
287 case space_explode "_" s of
288 [_] => (case take_suffix Char.isDigit (String.explode s) of
289 (cs1 as _ :: _, cs2 as _ :: _) =>
290 subscript_name (String.implode cs1)
291 (the (Int.fromString (String.implode cs2)))
293 | [s1, s2] => (case Int.fromString s2 of
294 SOME n => subscript_name s1 n
299 (* First-order translation. No types are known for variables. "HOLogic.typeT"
300 should allow them to be inferred. *)
301 fun raw_term_from_pred thy full_types tfrees =
303 fun aux opt_T extra_us u =
305 ATerm ("hBOOL", [u1]) => aux (SOME @{typ bool}) [] u1
306 | ATerm ("hAPP", [u1, u2]) => aux opt_T (u2 :: extra_us) u1
308 if a = type_wrapper_name then
311 aux (SOME (type_from_fo_term tfrees typ_u)) extra_us term_u
312 | _ => raise FO_TERM us
313 else case strip_prefix_and_unascii const_prefix a of
315 list_comb (Const (@{const_name "op ="}, HOLogic.typeT),
316 map (aux NONE []) us)
319 val c = invert_const b
320 val num_type_args = num_type_args thy c
321 val (type_us, term_us) =
322 chop (if full_types then 0 else num_type_args) us
323 (* Extra args from "hAPP" come after any arguments given directly to
325 val term_ts = map (aux NONE []) term_us
326 val extra_ts = map (aux NONE []) extra_us
328 Const (c, if full_types then
330 SOME T => map fastype_of term_ts ---> T
332 if num_type_args = 0 then
333 Sign.const_instance thy (c, [])
335 raise Fail ("no type information for " ^ quote c)
337 Sign.const_instance thy (c,
338 map (type_from_fo_term tfrees) type_us))
339 in list_comb (t, term_ts @ extra_ts) end
340 | NONE => (* a free or schematic variable *)
342 val ts = map (aux NONE []) (us @ extra_us)
343 val T = map fastype_of ts ---> HOLogic.typeT
345 case strip_prefix_and_unascii fixed_var_prefix a of
346 SOME b => Free (b, T)
348 case strip_prefix_and_unascii schematic_var_prefix a of
349 SOME b => Var ((b, 0), T)
351 if is_tptp_variable a then
352 Var ((repair_atp_variable_name Char.toLower a, 0), T)
354 (* Skolem constants? *)
355 Var ((repair_atp_variable_name Char.toUpper a, 0), T)
356 in list_comb (t, ts) end
357 in aux (SOME HOLogic.boolT) [] end
359 fun term_from_pred thy full_types tfrees pos (u as ATerm (s, _)) =
360 if String.isPrefix class_prefix s then
361 add_type_constraint (type_constraint_from_term pos tfrees u)
362 #> pair @{const True}
364 pair (raw_term_from_pred thy full_types tfrees u)
366 val combinator_table =
367 [(@{const_name COMBI}, @{thm COMBI_def_raw}),
368 (@{const_name COMBK}, @{thm COMBK_def_raw}),
369 (@{const_name COMBB}, @{thm COMBB_def_raw}),
370 (@{const_name COMBC}, @{thm COMBC_def_raw}),
371 (@{const_name COMBS}, @{thm COMBS_def_raw})]
373 fun uncombine_term (t1 $ t2) = betapply (pairself uncombine_term (t1, t2))
374 | uncombine_term (Abs (s, T, t')) = Abs (s, T, uncombine_term t')
375 | uncombine_term (t as Const (x as (s, _))) =
376 (case AList.lookup (op =) combinator_table s of
377 SOME thm => thm |> prop_of |> specialize_type @{theory} x |> Logic.dest_equals |> snd
379 | uncombine_term t = t
381 (* Update schematic type variables with detected sort constraints. It's not
382 totally clear when this code is necessary. *)
383 fun repair_tvar_sorts (t, tvar_tab) =
385 fun do_type (Type (a, Ts)) = Type (a, map do_type Ts)
386 | do_type (TVar (xi, s)) =
387 TVar (xi, the_default s (Vartab.lookup tvar_tab xi))
388 | do_type (TFree z) = TFree z
389 fun do_term (Const (a, T)) = Const (a, do_type T)
390 | do_term (Free (a, T)) = Free (a, do_type T)
391 | do_term (Var (xi, T)) = Var (xi, do_type T)
392 | do_term (t as Bound _) = t
393 | do_term (Abs (a, T, t)) = Abs (a, do_type T, do_term t)
394 | do_term (t1 $ t2) = do_term t1 $ do_term t2
395 in t |> not (Vartab.is_empty tvar_tab) ? do_term end
397 fun quantify_over_free quant_s free_s body_t =
398 case Term.add_frees body_t [] |> filter (curry (op =) free_s o fst) of
400 | frees as (_, free_T) :: _ =>
401 Abs (free_s, free_T, fold (curry abstract_over) (map Free frees) body_t)
403 (* Interpret an ATP formula as a HOL term, extracting sort constraints as they
404 appear in the formula. *)
405 fun prop_from_formula thy full_types tfrees phi =
407 fun do_formula pos phi =
409 AQuant (_, [], phi) => do_formula pos phi
410 | AQuant (q, x :: xs, phi') =>
411 do_formula pos (AQuant (q, xs, phi'))
412 #>> quantify_over_free (case q of
413 AForall => @{const_name All}
414 | AExists => @{const_name Ex})
415 (repair_atp_variable_name Char.toLower x)
416 | AConn (ANot, [phi']) => do_formula (not pos) phi' #>> s_not
417 | AConn (c, [phi1, phi2]) =>
418 do_formula (pos |> c = AImplies ? not) phi1
419 ##>> do_formula pos phi2
424 | AIf => s_imp o swap
426 | ANotIff => s_not o s_iff)
427 | AAtom tm => term_from_pred thy full_types tfrees pos tm
428 | _ => raise FORMULA [phi]
429 in repair_tvar_sorts (do_formula true phi Vartab.empty) end
431 fun check_formula ctxt =
432 Type_Infer.constrain HOLogic.boolT
433 #> Syntax.check_term (ProofContext.set_mode ProofContext.mode_schematic ctxt)
436 (**** Translation of TSTP files to Isar Proofs ****)
438 fun unvarify_term (Var ((s, 0), T)) = Free (s, T)
439 | unvarify_term t = raise TERM ("unvarify_term: non-Var", [t])
441 fun decode_line full_types tfrees (Definition (num, phi1, phi2)) ctxt =
443 val thy = ProofContext.theory_of ctxt
444 val t1 = prop_from_formula thy full_types tfrees phi1
445 val vars = snd (strip_comb t1)
446 val frees = map unvarify_term vars
447 val unvarify_args = subst_atomic (vars ~~ frees)
448 val t2 = prop_from_formula thy full_types tfrees phi2
450 HOLogic.eq_const HOLogic.typeT $ t1 $ t2
451 |> unvarify_args |> uncombine_term |> check_formula ctxt
454 (Definition (num, t1, t2),
455 fold Variable.declare_term (maps OldTerm.term_frees [t1, t2]) ctxt)
457 | decode_line full_types tfrees (Inference (num, u, deps)) ctxt =
459 val thy = ProofContext.theory_of ctxt
460 val t = u |> prop_from_formula thy full_types tfrees
461 |> uncombine_term |> check_formula ctxt
463 (Inference (num, t, deps),
464 fold Variable.declare_term (OldTerm.term_frees t) ctxt)
466 fun decode_lines ctxt full_types tfrees lines =
467 fst (fold_map (decode_line full_types tfrees) lines ctxt)
469 fun is_same_inference _ (Definition _) = false
470 | is_same_inference t (Inference (_, t', _)) = t aconv t'
472 (* No "real" literals means only type information (tfree_tcs, clsrel, or
474 val is_only_type_information = curry (op aconv) HOLogic.true_const
476 fun replace_one_dep (old, new) dep = if dep = old then new else [dep]
477 fun replace_deps_in_line _ (line as Definition _) = line
478 | replace_deps_in_line p (Inference (num, t, deps)) =
479 Inference (num, t, fold (union (op =) o replace_one_dep p) deps [])
481 (* Discard axioms; consolidate adjacent lines that prove the same formula, since
482 they differ only in type information.*)
483 fun add_line _ _ (line as Definition _) lines = line :: lines
484 | add_line conjecture_shape axiom_names (Inference (num, t, [])) lines =
485 (* No dependencies: axiom, conjecture, or (for Vampire) internal axioms or
487 if is_axiom_number axiom_names num then
488 (* Axioms are not proof lines. *)
489 if is_only_type_information t then
490 map (replace_deps_in_line (num, [])) lines
491 (* Is there a repetition? If so, replace later line by earlier one. *)
492 else case take_prefix (not o is_same_inference t) lines of
493 (_, []) => lines (*no repetition of proof line*)
494 | (pre, Inference (num', _, _) :: post) =>
495 pre @ map (replace_deps_in_line (num', [num])) post
496 else if is_conjecture_number conjecture_shape num then
497 Inference (num, negate_term t, []) :: lines
499 map (replace_deps_in_line (num, [])) lines
500 | add_line _ _ (Inference (num, t, deps)) lines =
501 (* Type information will be deleted later; skip repetition test. *)
502 if is_only_type_information t then
503 Inference (num, t, deps) :: lines
504 (* Is there a repetition? If so, replace later line by earlier one. *)
505 else case take_prefix (not o is_same_inference t) lines of
506 (* FIXME: Doesn't this code risk conflating proofs involving different
508 (_, []) => Inference (num, t, deps) :: lines
509 | (pre, Inference (num', t', _) :: post) =>
510 Inference (num, t', deps) ::
511 pre @ map (replace_deps_in_line (num', [num])) post
513 (* Recursively delete empty lines (type information) from the proof. *)
514 fun add_nontrivial_line (Inference (num, t, [])) lines =
515 if is_only_type_information t then delete_dep num lines
516 else Inference (num, t, []) :: lines
517 | add_nontrivial_line line lines = line :: lines
518 and delete_dep num lines =
519 fold_rev add_nontrivial_line (map (replace_deps_in_line (num, [])) lines) []
521 (* ATPs sometimes reuse free variable names in the strangest ways. Removing
522 offending lines often does the trick. *)
523 fun is_bad_free frees (Free x) = not (member (op =) frees x)
524 | is_bad_free _ _ = false
526 (* Vampire is keen on producing these. *)
527 fun is_trivial_formula (@{const Not} $ (Const (@{const_name "op ="}, _)
528 $ t1 $ t2)) = (t1 aconv t2)
529 | is_trivial_formula _ = false
531 fun add_desired_line _ _ _ _ (line as Definition (num, _, _)) (j, lines) =
532 (j, line :: map (replace_deps_in_line (num, [])) lines)
533 | add_desired_line isar_shrink_factor conjecture_shape axiom_names frees
534 (Inference (num, t, deps)) (j, lines) =
536 if is_axiom_number axiom_names num orelse
537 is_conjecture_number conjecture_shape num orelse
538 (not (is_only_type_information t) andalso
539 null (Term.add_tvars t []) andalso
540 not (exists_subterm (is_bad_free frees) t) andalso
541 not (is_trivial_formula t) andalso
542 (null lines orelse (* last line must be kept *)
543 (length deps >= 2 andalso j mod isar_shrink_factor = 0))) then
544 Inference (num, t, deps) :: lines (* keep line *)
546 map (replace_deps_in_line (num, deps)) lines) (* drop line *)
548 (** EXTRACTING LEMMAS **)
550 (* Like "split_line", but ignores "\n" that follow a comma (as in SNARK's
552 val split_proof_lines =
555 | aux line [] = [implode (rev line)]
556 | aux line ("," :: "\n" :: rest) = aux ("," :: line) rest
557 | aux line ("\n" :: rest) = aux line [] @ aux [] rest
558 | aux line (s :: rest) = aux (s :: line) rest
559 in aux [] o explode end
561 (* A list consisting of the first number in each line is returned. For TSTP,
562 interesting lines have the form "fof(108, axiom, ...)", where the number
563 (108) is extracted. For SPASS, lines have the form "108[0:Inp] ...", where
564 the first number (108) is extracted. For Vampire, we look for
565 "108. ... [input]". *)
566 fun used_facts_in_atp_proof axiom_names atp_proof =
568 fun axiom_name_at_index num =
569 let val j = Int.fromString num |> the_default ~1 in
570 if is_axiom_number axiom_names j then
571 SOME (Vector.sub (axiom_names, j - 1))
576 String.tokens (fn c => not (Char.isAlphaNum c) andalso c <> #"_")
577 fun do_line (tag :: num :: "axiom" :: (rest as _ :: _)) =
578 if tag = "cnf" orelse tag = "fof" then
579 (case strip_prefix_and_unascii axiom_prefix (List.last rest) of
581 if member (op =) rest "file" then
582 SOME (name, find_first_in_vector axiom_names name General)
584 axiom_name_at_index num
585 | NONE => axiom_name_at_index num)
588 | do_line (num :: "0" :: "Inp" :: _) = axiom_name_at_index num
589 | do_line (num :: rest) =
590 (case List.last rest of "input" => axiom_name_at_index num | _ => NONE)
592 in atp_proof |> split_proof_lines |> map_filter (do_line o tokens_of) end
595 val no_label = ("", ~1)
598 val assum_prefix = "A"
599 val fact_prefix = "F"
601 fun string_for_label (s, num) = s ^ string_of_int num
603 fun metis_using [] = ""
605 "using " ^ space_implode " " (map string_for_label ls) ^ " "
606 fun metis_apply _ 1 = "by "
607 | metis_apply 1 _ = "apply "
608 | metis_apply i _ = "prefer " ^ string_of_int i ^ " apply "
609 fun metis_name full_types = if full_types then "metisFT" else "metis"
610 fun metis_call full_types [] = metis_name full_types
611 | metis_call full_types ss =
612 "(" ^ metis_name full_types ^ " " ^ space_implode " " ss ^ ")"
613 fun metis_command full_types i n (ls, ss) =
614 metis_using ls ^ metis_apply i n ^ metis_call full_types ss
615 fun metis_line full_types i n ss =
616 "Try this command: " ^
617 Markup.markup Markup.sendback (metis_command full_types i n ([], ss)) ^ "."
618 fun minimize_line _ [] = ""
619 | minimize_line minimize_command ss =
620 case minimize_command ss of
623 "\nTo minimize the number of lemmas, try this: " ^
624 Markup.markup Markup.sendback command ^ "."
626 fun used_facts axiom_names =
627 used_facts_in_atp_proof axiom_names
628 #> List.partition (curry (op =) Chained o snd)
629 #> pairself (sort_distinct (string_ord o pairself fst))
631 fun metis_proof_text (full_types, minimize_command, atp_proof, axiom_names,
634 val (chained_lemmas, other_lemmas) = used_facts axiom_names atp_proof
635 val n = Logic.count_prems (prop_of goal)
637 (metis_line full_types i n (map fst other_lemmas) ^
638 minimize_line minimize_command (map fst (other_lemmas @ chained_lemmas)),
639 other_lemmas @ chained_lemmas)
642 (** Isar proof construction and manipulation **)
644 fun merge_fact_sets (ls1, ss1) (ls2, ss2) =
645 (union (op =) ls1 ls2, union (op =) ss1 ss2)
647 type label = string * int
648 type facts = label list * string list
650 datatype qualifier = Show | Then | Moreover | Ultimately
653 Fix of (string * typ) list |
655 Assume of label * term |
656 Have of qualifier list * label * term * byline
659 CaseSplit of step list list * facts
661 fun smart_case_split [] facts = ByMetis facts
662 | smart_case_split proofs facts = CaseSplit (proofs, facts)
664 fun add_fact_from_dep axiom_names num =
665 if is_axiom_number axiom_names num then
666 apsnd (insert (op =) (fst (Vector.sub (axiom_names, num - 1))))
668 apfst (insert (op =) (raw_prefix, num))
670 fun forall_of v t = HOLogic.all_const (fastype_of v) $ lambda v t
671 fun forall_vars t = fold_rev forall_of (map Var (Term.add_vars t [])) t
673 fun step_for_line _ _ (Definition (_, t1, t2)) = Let (t1, t2)
674 | step_for_line _ _ (Inference (num, t, [])) = Assume ((raw_prefix, num), t)
675 | step_for_line axiom_names j (Inference (num, t, deps)) =
676 Have (if j = 1 then [Show] else [], (raw_prefix, num),
678 ByMetis (fold (add_fact_from_dep axiom_names) deps ([], [])))
680 fun proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
681 atp_proof conjecture_shape axiom_names params frees =
684 atp_proof ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
686 |> sort (int_ord o pairself raw_step_number)
687 |> decode_lines ctxt full_types tfrees
688 |> rpair [] |-> fold_rev (add_line conjecture_shape axiom_names)
689 |> rpair [] |-> fold_rev add_nontrivial_line
690 |> rpair (0, []) |-> fold_rev (add_desired_line isar_shrink_factor
691 conjecture_shape axiom_names frees)
694 (if null params then [] else [Fix params]) @
695 map2 (step_for_line axiom_names) (length lines downto 1) lines
698 (* When redirecting proofs, we keep information about the labels seen so far in
699 the "backpatches" data structure. The first component indicates which facts
700 should be associated with forthcoming proof steps. The second component is a
701 pair ("assum_ls", "drop_ls"), where "assum_ls" are the labels that should
702 become assumptions and "drop_ls" are the labels that should be dropped in a
704 type backpatches = (label * facts) list * (label list * label list)
706 fun used_labels_of_step (Have (_, _, _, by)) =
708 ByMetis (ls, _) => ls
709 | CaseSplit (proofs, (ls, _)) =>
710 fold (union (op =) o used_labels_of) proofs ls)
711 | used_labels_of_step _ = []
712 and used_labels_of proof = fold (union (op =) o used_labels_of_step) proof []
714 fun new_labels_of_step (Fix _) = []
715 | new_labels_of_step (Let _) = []
716 | new_labels_of_step (Assume (l, _)) = [l]
717 | new_labels_of_step (Have (_, l, _, _)) = [l]
718 val new_labels_of = maps new_labels_of_step
723 | aux proof_tail (proofs as (proof1 :: _)) =
724 if exists null proofs then
726 else if forall (curry (op =) (hd proof1) o hd) (tl proofs) then
727 aux (hd proof1 :: proof_tail) (map tl proofs)
728 else case hd proof1 of
729 Have ([], l, t, _) => (* FIXME: should we really ignore the "by"? *)
730 if forall (fn Have ([], l', t', _) :: _ => (l, t) = (l', t')
731 | _ => false) (tl proofs) andalso
732 not (exists (member (op =) (maps new_labels_of proofs))
733 (used_labels_of proof_tail)) then
734 SOME (l, t, map rev proofs, proof_tail)
738 in aux [] o map rev end
740 fun case_split_qualifiers proofs =
741 case length proofs of
746 fun redirect_proof conjecture_shape hyp_ts concl_t proof =
748 (* The first pass outputs those steps that are independent of the negated
749 conjecture. The second pass flips the proof by contradiction to obtain a
750 direct proof, introducing case splits when an inference depends on
751 several facts that depend on the negated conjecture. *)
753 nth hyp_ts (index_in_shape num conjecture_shape)
755 raise Fail ("Cannot find hypothesis " ^ Int.toString num)
756 val concl_ls = map (pair raw_prefix) (List.last conjecture_shape)
757 val canonicalize_labels =
758 map (fn l => if member (op =) concl_ls l then hd concl_ls else l)
760 fun first_pass ([], contra) = ([], contra)
761 | first_pass ((step as Fix _) :: proof, contra) =
762 first_pass (proof, contra) |>> cons step
763 | first_pass ((step as Let _) :: proof, contra) =
764 first_pass (proof, contra) |>> cons step
765 | first_pass ((step as Assume (l as (_, num), _)) :: proof, contra) =
766 if member (op =) concl_ls l then
767 first_pass (proof, contra ||> l = hd concl_ls ? cons step)
769 first_pass (proof, contra) |>> cons (Assume (l, find_hyp num))
770 | first_pass (Have (qs, l, t, ByMetis (ls, ss)) :: proof, contra) =
772 val ls = canonicalize_labels ls
773 val step = Have (qs, l, t, ByMetis (ls, ss))
775 if exists (member (op =) (fst contra)) ls then
776 first_pass (proof, contra |>> cons l ||> cons step)
778 first_pass (proof, contra) |>> cons step
780 | first_pass _ = raise Fail "malformed proof"
781 val (proof_top, (contra_ls, contra_proof)) =
782 first_pass (proof, (concl_ls, []))
783 val backpatch_label = the_default ([], []) oo AList.lookup (op =) o fst
784 fun backpatch_labels patches ls =
785 fold merge_fact_sets (map (backpatch_label patches) ls) ([], [])
786 fun second_pass end_qs ([], assums, patches) =
787 ([Have (end_qs, no_label, concl_t,
788 ByMetis (backpatch_labels patches (map snd assums)))], patches)
789 | second_pass end_qs (Assume (l, t) :: proof, assums, patches) =
790 second_pass end_qs (proof, (t, l) :: assums, patches)
791 | second_pass end_qs (Have (qs, l, t, ByMetis (ls, ss)) :: proof, assums,
793 if member (op =) (snd (snd patches)) l andalso
794 not (member (op =) (fst (snd patches)) l) andalso
795 not (AList.defined (op =) (fst patches) l) then
796 second_pass end_qs (proof, assums, patches ||> apsnd (append ls))
798 (case List.partition (member (op =) contra_ls) ls of
799 ([contra_l], co_ls) =>
800 if member (op =) qs Show then
801 second_pass end_qs (proof, assums,
802 patches |>> cons (contra_l, (co_ls, ss)))
806 patches |>> cons (contra_l, (l :: co_ls, ss)))
807 |>> cons (if member (op =) (fst (snd patches)) l then
808 Assume (l, negate_term t)
810 Have (qs, l, negate_term t,
811 ByMetis (backpatch_label patches l)))
812 | (contra_ls as _ :: _, co_ls) =>
817 if member (op =) concl_ls l then
821 val drop_ls = filter (curry (op <>) l) contra_ls
825 patches ||> apfst (insert (op =) l)
826 ||> apsnd (union (op =) drop_ls))
829 val (assumes, facts) =
830 if member (op =) (fst (snd patches)) l then
831 ([Assume (l, negate_term t)], (l :: co_ls, ss))
835 (case join_proofs proofs of
836 SOME (l, t, proofs, proof_tail) =>
837 Have (case_split_qualifiers proofs @
838 (if null proof_tail then end_qs else []), l, t,
839 smart_case_split proofs facts) :: proof_tail
841 [Have (case_split_qualifiers proofs @ end_qs, no_label,
842 concl_t, smart_case_split proofs facts)],
846 | _ => raise Fail "malformed proof")
847 | second_pass _ _ = raise Fail "malformed proof"
849 second_pass [Show] (contra_proof, [], ([], ([], []))) |> fst
850 in proof_top @ proof_bottom end
852 (* FIXME: Still needed? Probably not. *)
853 val kill_duplicate_assumptions_in_proof =
855 fun relabel_facts subst =
856 apfst (map (fn l => AList.lookup (op =) subst l |> the_default l))
857 fun do_step (step as Assume (l, t)) (proof, subst, assums) =
858 (case AList.lookup (op aconv) assums t of
859 SOME l' => (proof, (l, l') :: subst, assums)
860 | NONE => (step :: proof, subst, (t, l) :: assums))
861 | do_step (Have (qs, l, t, by)) (proof, subst, assums) =
864 ByMetis facts => ByMetis (relabel_facts subst facts)
865 | CaseSplit (proofs, facts) =>
866 CaseSplit (map do_proof proofs, relabel_facts subst facts)) ::
867 proof, subst, assums)
868 | do_step step (proof, subst, assums) = (step :: proof, subst, assums)
869 and do_proof proof = fold do_step proof ([], [], []) |> #1 |> rev
872 val then_chain_proof =
875 | aux _ ((step as Assume (l, _)) :: proof) = step :: aux l proof
876 | aux l' (Have (qs, l, t, by) :: proof) =
879 Have (if member (op =) ls l' then
881 ByMetis (filter_out (curry (op =) l') ls, ss))
883 (qs, l, t, ByMetis (ls, ss)))
884 | CaseSplit (proofs, facts) =>
885 Have (qs, l, t, CaseSplit (map (aux no_label) proofs, facts))) ::
887 | aux _ (step :: proof) = step :: aux no_label proof
890 fun kill_useless_labels_in_proof proof =
892 val used_ls = used_labels_of proof
893 fun do_label l = if member (op =) used_ls l then l else no_label
894 fun do_step (Assume (l, t)) = Assume (do_label l, t)
895 | do_step (Have (qs, l, t, by)) =
896 Have (qs, do_label l, t,
898 CaseSplit (proofs, facts) =>
899 CaseSplit (map (map do_step) proofs, facts)
901 | do_step step = step
902 in map do_step proof end
904 fun prefix_for_depth n = replicate_string (n + 1)
908 fun aux _ _ _ [] = []
909 | aux subst depth (next_assum, next_fact) (Assume (l, t) :: proof) =
911 Assume (l, t) :: aux subst depth (next_assum, next_fact) proof
913 let val l' = (prefix_for_depth depth assum_prefix, next_assum) in
915 aux ((l, l') :: subst) depth (next_assum + 1, next_fact) proof
917 | aux subst depth (next_assum, next_fact) (Have (qs, l, t, by) :: proof) =
919 val (l', subst, next_fact) =
921 (l, subst, next_fact)
924 val l' = (prefix_for_depth depth fact_prefix, next_fact)
925 in (l', (l, l') :: subst, next_fact + 1) end
928 case AList.lookup (op =) subst l of
930 | NONE => raise Fail ("unknown label " ^
931 quote (string_for_label l))))
934 ByMetis facts => ByMetis (relabel_facts facts)
935 | CaseSplit (proofs, facts) =>
936 CaseSplit (map (aux subst (depth + 1) (1, 1)) proofs,
939 Have (qs, l', t, by) ::
940 aux subst depth (next_assum, next_fact) proof
942 | aux subst depth nextp (step :: proof) =
943 step :: aux subst depth nextp proof
944 in aux [] 0 (1, 1) end
946 fun string_for_proof ctxt full_types i n =
948 fun fix_print_mode f x =
949 setmp_CRITICAL show_no_free_types true
950 (setmp_CRITICAL show_types true
951 (Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
952 (print_mode_value ())) f)) x
953 fun do_indent ind = replicate_string (ind * indent_size) " "
955 maybe_quote s ^ " :: " ^
956 maybe_quote (fix_print_mode (Syntax.string_of_typ ctxt) T)
957 fun do_label l = if l = no_label then "" else string_for_label l ^ ": "
959 (if member (op =) qs Moreover then "moreover " else "") ^
960 (if member (op =) qs Ultimately then "ultimately " else "") ^
961 (if member (op =) qs Then then
962 if member (op =) qs Show then "thus" else "hence"
964 if member (op =) qs Show then "show" else "have")
965 val do_term = maybe_quote o fix_print_mode (Syntax.string_of_term ctxt)
966 fun do_facts (ls, ss) =
967 metis_command full_types 1 1
968 (ls |> sort_distinct (prod_ord string_ord int_ord),
969 ss |> sort_distinct string_ord)
970 and do_step ind (Fix xs) =
971 do_indent ind ^ "fix " ^ space_implode " and " (map do_free xs) ^ "\n"
972 | do_step ind (Let (t1, t2)) =
973 do_indent ind ^ "let " ^ do_term t1 ^ " = " ^ do_term t2 ^ "\n"
974 | do_step ind (Assume (l, t)) =
975 do_indent ind ^ "assume " ^ do_label l ^ do_term t ^ "\n"
976 | do_step ind (Have (qs, l, t, ByMetis facts)) =
977 do_indent ind ^ do_have qs ^ " " ^
978 do_label l ^ do_term t ^ " " ^ do_facts facts ^ "\n"
979 | do_step ind (Have (qs, l, t, CaseSplit (proofs, facts))) =
980 space_implode (do_indent ind ^ "moreover\n")
981 (map (do_block ind) proofs) ^
982 do_indent ind ^ do_have qs ^ " " ^ do_label l ^ do_term t ^ " " ^
983 do_facts facts ^ "\n"
984 and do_steps prefix suffix ind steps =
985 let val s = implode (map (do_step ind) steps) in
986 replicate_string (ind * indent_size - size prefix) " " ^ prefix ^
987 String.extract (s, ind * indent_size,
988 SOME (size s - ind * indent_size - 1)) ^
991 and do_block ind proof = do_steps "{ " " }" (ind + 1) proof
992 (* One-step proofs are pointless; better use the Metis one-liner
994 and do_proof [Have (_, _, _, ByMetis _)] = ""
996 (if i <> 1 then "prefer " ^ string_of_int i ^ "\n" else "") ^
997 do_indent 0 ^ "proof -\n" ^
998 do_steps "" "" 1 proof ^
999 do_indent 0 ^ (if n <> 1 then "next" else "qed")
1002 fun isar_proof_text (pool, debug, isar_shrink_factor, ctxt, conjecture_shape)
1003 (other_params as (full_types, _, atp_proof, axiom_names,
1006 val (params, hyp_ts, concl_t) = strip_subgoal goal i
1007 val frees = fold Term.add_frees (concl_t :: hyp_ts) []
1008 val tfrees = fold Term.add_tfrees (concl_t :: hyp_ts) []
1009 val n = Logic.count_prems (prop_of goal)
1010 val (one_line_proof, lemma_names) = metis_proof_text other_params
1011 fun isar_proof_for () =
1012 case proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
1013 atp_proof conjecture_shape axiom_names params
1015 |> redirect_proof conjecture_shape hyp_ts concl_t
1016 |> kill_duplicate_assumptions_in_proof
1018 |> kill_useless_labels_in_proof
1020 |> string_for_proof ctxt full_types i n of
1021 "" => "\nNo structured proof available."
1022 | proof => "\n\nStructured proof:\n" ^ Markup.markup Markup.sendback proof
1027 try isar_proof_for ()
1028 |> the_default "\nWarning: The Isar proof construction failed."
1029 in (one_line_proof ^ isar_proof, lemma_names) end
1031 fun proof_text isar_proof isar_params other_params =
1032 (if isar_proof then isar_proof_text isar_params else metis_proof_text)