fix bug in the SPASS Flotter hack, when a conjecture FOF is translated to several CNF clauses
1 (* Title: HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML
2 Author: Lawrence C. Paulson, Cambridge University Computer Laboratory
3 Author: Claire Quigley, Cambridge University Computer Laboratory
4 Author: Jasmin Blanchette, TU Muenchen
6 Transfer of proofs from external provers.
9 signature SLEDGEHAMMER_PROOF_RECONSTRUCT =
11 type minimize_command = string list -> string
13 val axiom_prefix : string
14 val conjecture_prefix : string
15 val arity_clause_prefix : string
16 val tfrees_name : string
17 val metis_line: bool -> int -> int -> string list -> string
19 bool * minimize_command * string * string vector * thm * int
20 -> string * string list
22 string Symtab.table * bool * int * Proof.context * int list list
23 -> bool * minimize_command * string * string vector * thm * int
24 -> string * string list
26 bool -> string Symtab.table * bool * int * Proof.context * int list list
27 -> bool * minimize_command * string * string vector * thm * int
28 -> string * string list
31 structure Sledgehammer_Proof_Reconstruct : SLEDGEHAMMER_PROOF_RECONSTRUCT =
36 open Sledgehammer_Util
37 open Sledgehammer_Fact_Filter
39 type minimize_command = string list -> string
41 val axiom_prefix = "ax_"
42 val conjecture_prefix = "conj_"
43 val arity_clause_prefix = "clsarity_"
44 val tfrees_name = "tfrees"
46 (* Simple simplifications to ensure that sort annotations don't leave a trail of
48 fun s_not @{const False} = @{const True}
49 | s_not @{const True} = @{const False}
50 | s_not (@{const Not} $ t) = t
51 | s_not t = @{const Not} $ t
52 fun s_conj (@{const True}, t2) = t2
53 | s_conj (t1, @{const True}) = t1
54 | s_conj p = HOLogic.mk_conj p
55 fun s_disj (@{const False}, t2) = t2
56 | s_disj (t1, @{const False}) = t1
57 | s_disj p = HOLogic.mk_disj p
58 fun s_imp (@{const True}, t2) = t2
59 | s_imp (t1, @{const False}) = s_not t1
60 | s_imp p = HOLogic.mk_imp p
61 fun s_iff (@{const True}, t2) = t2
62 | s_iff (t1, @{const True}) = t1
63 | s_iff (t1, t2) = HOLogic.eq_const HOLogic.boolT $ t1 $ t2
65 fun mk_anot (AConn (ANot, [phi])) = phi
66 | mk_anot phi = AConn (ANot, [phi])
67 fun mk_aconn c (phi1, phi2) = AConn (c, [phi1, phi2])
69 val index_in_shape : int -> int list list -> int =
70 find_index o exists o curry (op =)
71 fun is_axiom_clause_number thm_names num =
72 num > 0 andalso num <= Vector.length thm_names andalso
73 Vector.sub (thm_names, num - 1) <> ""
74 fun is_conjecture_clause_number conjecture_shape num =
75 index_in_shape num conjecture_shape >= 0
77 fun negate_term (Const (@{const_name All}, T) $ Abs (s, T', t')) =
78 Const (@{const_name Ex}, T) $ Abs (s, T', negate_term t')
79 | negate_term (Const (@{const_name Ex}, T) $ Abs (s, T', t')) =
80 Const (@{const_name All}, T) $ Abs (s, T', negate_term t')
81 | negate_term (@{const "op -->"} $ t1 $ t2) =
82 @{const "op &"} $ t1 $ negate_term t2
83 | negate_term (@{const "op &"} $ t1 $ t2) =
84 @{const "op |"} $ negate_term t1 $ negate_term t2
85 | negate_term (@{const "op |"} $ t1 $ t2) =
86 @{const "op &"} $ negate_term t1 $ negate_term t2
87 | negate_term (@{const Not} $ t) = t
88 | negate_term t = @{const Not} $ t
90 datatype ('a, 'b, 'c, 'd, 'e) raw_step =
91 Definition of 'a * 'b * 'c |
92 Inference of 'a * 'd * 'e list
94 fun raw_step_number (Definition (num, _, _)) = num
95 | raw_step_number (Inference (num, _, _)) = num
97 (**** PARSING OF TSTP FORMAT ****)
99 (*Strings enclosed in single quotes, e.g. filenames*)
100 val scan_quoted = $$ "'" |-- Scan.repeat (~$$ "'") --| $$ "'" >> implode;
102 val scan_dollar_name =
103 Scan.repeat ($$ "$") -- Symbol.scan_id >> (fn (ss, s) => implode ss ^ s)
105 fun repair_name _ "$true" = "c_True"
106 | repair_name _ "$false" = "c_False"
107 | repair_name _ "$$e" = "c_equal" (* seen in Vampire proofs *)
108 | repair_name _ "equal" = "c_equal" (* needed by SPASS? *)
109 | repair_name pool s =
110 case Symtab.lookup pool s of
113 if String.isPrefix "sQ" s andalso String.isSuffix "_eqProxy" s then
114 "c_equal" (* seen in Vampire proofs *)
117 (* Generalized first-order terms, which include file names, numbers, etc. *)
118 val parse_potential_integer =
119 (scan_dollar_name || scan_quoted) >> K NONE
120 || scan_integer >> SOME
121 fun parse_annotation x =
122 ((parse_potential_integer ::: Scan.repeat ($$ " " |-- parse_potential_integer)
123 >> map_filter I) -- Scan.optional parse_annotation []
124 >> uncurry (union (op =))
125 || $$ "(" |-- parse_annotations --| $$ ")"
126 || $$ "[" |-- parse_annotations --| $$ "]") x
127 and parse_annotations x =
128 (Scan.optional (parse_annotation
129 ::: Scan.repeat ($$ "," |-- parse_annotation)) []
130 >> (fn numss => fold (union (op =)) numss [])) x
132 (* Vampire proof lines sometimes contain needless information such as "(0:3)",
133 which can be hard to disambiguate from function application in an LL(1)
134 parser. As a workaround, we extend the TPTP term syntax with such detritus
136 val parse_vampire_detritus = scan_integer |-- $$ ":" --| scan_integer >> K []
138 fun parse_term pool x =
139 ((scan_dollar_name >> repair_name pool)
140 -- Scan.optional ($$ "(" |-- (parse_vampire_detritus || parse_terms pool)
142 --| Scan.optional ($$ "(" |-- parse_vampire_detritus --| $$ ")") []
144 and parse_terms pool x =
145 (parse_term pool ::: Scan.repeat ($$ "," |-- parse_term pool)) x
147 fun parse_atom pool =
148 parse_term pool -- Scan.option (Scan.option ($$ "!") --| $$ "="
150 >> (fn (u1, NONE) => AAtom u1
151 | (u1, SOME (NONE, u2)) => AAtom (ATerm ("c_equal", [u1, u2]))
152 | (u1, SOME (SOME _, u2)) =>
153 mk_anot (AAtom (ATerm ("c_equal", [u1, u2]))))
155 fun fo_term_head (ATerm (s, _)) = s
157 (* TPTP formulas are fully parenthesized, so we don't need to worry about
158 operator precedence. *)
159 fun parse_formula pool x =
160 (($$ "(" |-- parse_formula pool --| $$ ")"
161 || ($$ "!" >> K AForall || $$ "?" >> K AExists)
162 --| $$ "[" -- parse_terms pool --| $$ "]" --| $$ ":"
163 -- parse_formula pool
164 >> (fn ((q, ts), phi) => AQuant (q, map fo_term_head ts, phi))
165 || $$ "~" |-- parse_formula pool >> mk_anot
167 -- Scan.option ((Scan.this_string "=>" >> K AImplies
168 || Scan.this_string "<=>" >> K AIff
169 || Scan.this_string "<~>" >> K ANotIff
170 || Scan.this_string "<=" >> K AIf
171 || $$ "|" >> K AOr || $$ "&" >> K AAnd)
172 -- parse_formula pool)
173 >> (fn (phi1, NONE) => phi1
174 | (phi1, SOME (c, phi2)) => mk_aconn c (phi1, phi2))) x
176 val parse_tstp_extra_arguments =
177 Scan.optional ($$ "," |-- parse_annotation
178 --| Scan.option ($$ "," |-- parse_annotations)) []
180 (* Syntax: (fof|cnf)\(<num>, <formula_role>, <formula> <extra_arguments>\).
181 The <num> could be an identifier, but we assume integers. *)
182 fun parse_tstp_line pool =
183 ((Scan.this_string "fof" || Scan.this_string "cnf") -- $$ "(")
184 |-- scan_integer --| $$ "," -- Symbol.scan_id --| $$ ","
185 -- parse_formula pool -- parse_tstp_extra_arguments --| $$ ")" --| $$ "."
186 >> (fn (((num, role), phi), deps) =>
190 AConn (AIff, [phi1 as AAtom _, phi2]) =>
191 Definition (num, phi1, phi2)
192 | AAtom (ATerm ("c_equal", _)) =>
193 Inference (num, phi, deps) (* Vampire's equality proxy axiom *)
194 | _ => raise Fail "malformed definition")
195 | _ => Inference (num, phi, deps))
197 (**** PARSING OF VAMPIRE OUTPUT ****)
199 (* Syntax: <num>. <formula> <annotation> *)
200 fun parse_vampire_line pool =
201 scan_integer --| $$ "." -- parse_formula pool -- parse_annotation
202 >> (fn ((num, phi), deps) => Inference (num, phi, deps))
204 (**** PARSING OF SPASS OUTPUT ****)
206 (* SPASS returns clause references of the form "x.y". We ignore "y", whose role
207 is not clear anyway. *)
208 val parse_dot_name = scan_integer --| $$ "." --| scan_integer
210 val parse_spass_annotations =
211 Scan.optional ($$ ":" |-- Scan.repeat (parse_dot_name
212 --| Scan.option ($$ ","))) []
214 (* It is not clear why some literals are followed by sequences of stars and/or
215 pluses. We ignore them. *)
216 fun parse_decorated_atom pool =
217 parse_atom pool --| Scan.repeat ($$ "*" || $$ "+" || $$ " ")
219 fun mk_horn ([], []) = AAtom (ATerm ("c_False", []))
220 | mk_horn ([], pos_lits) = foldr1 (mk_aconn AOr) pos_lits
221 | mk_horn (neg_lits, []) = mk_anot (foldr1 (mk_aconn AAnd) neg_lits)
222 | mk_horn (neg_lits, pos_lits) =
223 mk_aconn AImplies (foldr1 (mk_aconn AAnd) neg_lits,
224 foldr1 (mk_aconn AOr) pos_lits)
226 fun parse_horn_clause pool =
227 Scan.repeat (parse_decorated_atom pool) --| $$ "|" --| $$ "|"
228 -- Scan.repeat (parse_decorated_atom pool) --| $$ "-" --| $$ ">"
229 -- Scan.repeat (parse_decorated_atom pool)
230 >> (mk_horn o apfst (op @))
232 (* Syntax: <num>[0:<inference><annotations>]
233 <atoms> || <atoms> -> <atoms>. *)
234 fun parse_spass_line pool =
235 scan_integer --| $$ "[" --| $$ "0" --| $$ ":" --| Symbol.scan_id
236 -- parse_spass_annotations --| $$ "]" -- parse_horn_clause pool --| $$ "."
237 >> (fn ((num, deps), u) => Inference (num, u, deps))
239 fun parse_line pool =
240 parse_tstp_line pool || parse_vampire_line pool || parse_spass_line pool
241 fun parse_lines pool = Scan.repeat1 (parse_line pool)
242 fun parse_proof pool =
243 fst o Scan.finite Symbol.stopper
244 (Scan.error (!! (fn _ => raise Fail "unrecognized ATP output")
246 o explode o strip_spaces
248 (**** INTERPRETATION OF TSTP SYNTAX TREES ****)
250 exception FO_TERM of string fo_term list
251 exception FORMULA of (string, string fo_term) formula list
252 exception SAME of unit
254 (* Type variables are given the basic sort "HOL.type". Some will later be
255 constrained by information from type literals, or by type inference. *)
256 fun type_from_fo_term tfrees (u as ATerm (a, us)) =
257 let val Ts = map (type_from_fo_term tfrees) us in
258 case strip_prefix_and_undo_ascii type_const_prefix a of
259 SOME b => Type (invert_const b, Ts)
261 if not (null us) then
262 raise FO_TERM [u] (* only "tconst"s have type arguments *)
263 else case strip_prefix_and_undo_ascii tfree_prefix a of
265 let val s = "'" ^ b in
266 TFree (s, AList.lookup (op =) tfrees s |> the_default HOLogic.typeS)
269 case strip_prefix_and_undo_ascii tvar_prefix a of
270 SOME b => TVar (("'" ^ b, 0), HOLogic.typeS)
272 (* Variable from the ATP, say "X1" *)
273 Type_Infer.param 0 (a, HOLogic.typeS)
276 (* Type class literal applied to a type. Returns triple of polarity, class,
278 fun type_constraint_from_term pos tfrees (u as ATerm (a, us)) =
279 case (strip_prefix_and_undo_ascii class_prefix a,
280 map (type_from_fo_term tfrees) us) of
281 (SOME b, [T]) => (pos, b, T)
282 | _ => raise FO_TERM [u]
284 (** Accumulate type constraints in a clause: negative type literals **)
285 fun add_var (key, z) = Vartab.map_default (key, []) (cons z)
286 fun add_type_constraint (false, cl, TFree (a ,_)) = add_var ((a, ~1), cl)
287 | add_type_constraint (false, cl, TVar (ix, _)) = add_var (ix, cl)
288 | add_type_constraint _ = I
290 fun fix_atp_variable_name s =
292 fun subscript_name s n = s ^ nat_subscript n
293 val s = String.map Char.toLower s
295 case space_explode "_" s of
296 [_] => (case take_suffix Char.isDigit (String.explode s) of
297 (cs1 as _ :: _, cs2 as _ :: _) =>
298 subscript_name (String.implode cs1)
299 (the (Int.fromString (String.implode cs2)))
301 | [s1, s2] => (case Int.fromString s2 of
302 SOME n => subscript_name s1 n
307 (* First-order translation. No types are known for variables. "HOLogic.typeT"
308 should allow them to be inferred. *)
309 fun raw_term_from_pred thy full_types tfrees =
311 fun aux opt_T extra_us u =
313 ATerm ("hBOOL", [u1]) => aux (SOME @{typ bool}) [] u1
314 | ATerm ("hAPP", [u1, u2]) => aux opt_T (u2 :: extra_us) u1
316 if a = type_wrapper_name then
319 aux (SOME (type_from_fo_term tfrees typ_u)) extra_us term_u
320 | _ => raise FO_TERM us
321 else case strip_prefix_and_undo_ascii const_prefix a of
323 list_comb (Const (@{const_name "op ="}, HOLogic.typeT),
324 map (aux NONE []) us)
327 val c = invert_const b
328 val num_type_args = num_type_args thy c
329 val (type_us, term_us) =
330 chop (if full_types then 0 else num_type_args) us
331 (* Extra args from "hAPP" come after any arguments given directly to
333 val term_ts = map (aux NONE []) term_us
334 val extra_ts = map (aux NONE []) extra_us
336 Const (c, if full_types then
338 SOME T => map fastype_of term_ts ---> T
340 if num_type_args = 0 then
341 Sign.const_instance thy (c, [])
343 raise Fail ("no type information for " ^ quote c)
345 Sign.const_instance thy (c,
346 map (type_from_fo_term tfrees) type_us))
347 in list_comb (t, term_ts @ extra_ts) end
348 | NONE => (* a free or schematic variable *)
350 val ts = map (aux NONE []) (us @ extra_us)
351 val T = map fastype_of ts ---> HOLogic.typeT
353 case strip_prefix_and_undo_ascii fixed_var_prefix a of
354 SOME b => Free (b, T)
356 case strip_prefix_and_undo_ascii schematic_var_prefix a of
357 SOME b => Var ((b, 0), T)
359 if is_tptp_variable a then
360 Var ((fix_atp_variable_name a, 0), T)
362 raise Fail ("Unexpected constant: " ^ quote a)
363 in list_comb (t, ts) end
364 in aux (SOME HOLogic.boolT) [] end
366 fun term_from_pred thy full_types tfrees pos (u as ATerm (s, _)) =
367 if String.isPrefix class_prefix s then
368 add_type_constraint (type_constraint_from_term pos tfrees u)
369 #> pair @{const True}
371 pair (raw_term_from_pred thy full_types tfrees u)
373 val combinator_table =
374 [(@{const_name COMBI}, @{thm COMBI_def_raw}),
375 (@{const_name COMBK}, @{thm COMBK_def_raw}),
376 (@{const_name COMBB}, @{thm COMBB_def_raw}),
377 (@{const_name COMBC}, @{thm COMBC_def_raw}),
378 (@{const_name COMBS}, @{thm COMBS_def_raw})]
380 fun uncombine_term (t1 $ t2) = betapply (pairself uncombine_term (t1, t2))
381 | uncombine_term (Abs (s, T, t')) = Abs (s, T, uncombine_term t')
382 | uncombine_term (t as Const (x as (s, _))) =
383 (case AList.lookup (op =) combinator_table s of
384 SOME thm => thm |> prop_of |> specialize_type @{theory} x |> Logic.dest_equals |> snd
386 | uncombine_term t = t
388 (* Update schematic type variables with detected sort constraints. It's not
389 totally clear when this code is necessary. *)
390 fun repair_tvar_sorts (t, tvar_tab) =
392 fun do_type (Type (a, Ts)) = Type (a, map do_type Ts)
393 | do_type (TVar (xi, s)) =
394 TVar (xi, the_default s (Vartab.lookup tvar_tab xi))
395 | do_type (TFree z) = TFree z
396 fun do_term (Const (a, T)) = Const (a, do_type T)
397 | do_term (Free (a, T)) = Free (a, do_type T)
398 | do_term (Var (xi, T)) = Var (xi, do_type T)
399 | do_term (t as Bound _) = t
400 | do_term (Abs (a, T, t)) = Abs (a, do_type T, do_term t)
401 | do_term (t1 $ t2) = do_term t1 $ do_term t2
402 in t |> not (Vartab.is_empty tvar_tab) ? do_term end
404 fun quantify_over_free quant_s free_s body_t =
405 case Term.add_frees body_t [] |> filter (curry (op =) free_s o fst) of
407 | frees as (_, free_T) :: _ =>
408 Abs (free_s, free_T, fold (curry abstract_over) (map Free frees) body_t)
410 (* Interpret a list of syntax trees as a clause, extracting sort constraints
411 as they appear in the formula. *)
412 fun prop_from_formula thy full_types tfrees phi =
414 fun do_formula pos phi =
416 AQuant (_, [], phi) => do_formula pos phi
417 | AQuant (q, x :: xs, phi') =>
418 do_formula pos (AQuant (q, xs, phi'))
419 #>> quantify_over_free (case q of
420 AForall => @{const_name All}
421 | AExists => @{const_name Ex}) x
422 | AConn (ANot, [phi']) => do_formula (not pos) phi' #>> s_not
423 | AConn (c, [phi1, phi2]) =>
424 do_formula (pos |> c = AImplies ? not) phi1
425 ##>> do_formula pos phi2
430 | AIf => s_imp o swap
432 | ANotIff => s_not o s_iff)
433 | AAtom tm => term_from_pred thy full_types tfrees pos tm
434 | _ => raise FORMULA [phi]
435 in repair_tvar_sorts (do_formula true phi Vartab.empty) end
437 fun check_formula ctxt =
438 Type_Infer.constrain HOLogic.boolT
439 #> Syntax.check_term (ProofContext.set_mode ProofContext.mode_schematic ctxt)
442 (**** Translation of TSTP files to Isar Proofs ****)
444 fun unvarify_term (Var ((s, 0), T)) = Free (s, T)
445 | unvarify_term t = raise TERM ("unvarify_term: non-Var", [t])
447 fun decode_line full_types tfrees (Definition (num, phi1, phi2)) ctxt =
449 val thy = ProofContext.theory_of ctxt
450 val t1 = prop_from_formula thy full_types tfrees phi1
451 val vars = snd (strip_comb t1)
452 val frees = map unvarify_term vars
453 val unvarify_args = subst_atomic (vars ~~ frees)
454 val t2 = prop_from_formula thy full_types tfrees phi2
456 HOLogic.eq_const HOLogic.typeT $ t1 $ t2
457 |> unvarify_args |> uncombine_term |> check_formula ctxt
460 (Definition (num, t1, t2),
461 fold Variable.declare_term (maps OldTerm.term_frees [t1, t2]) ctxt)
463 | decode_line full_types tfrees (Inference (num, u, deps)) ctxt =
465 val thy = ProofContext.theory_of ctxt
466 val t = u |> prop_from_formula thy full_types tfrees
467 |> uncombine_term |> check_formula ctxt
469 (Inference (num, t, deps),
470 fold Variable.declare_term (OldTerm.term_frees t) ctxt)
472 fun decode_lines ctxt full_types tfrees lines =
473 fst (fold_map (decode_line full_types tfrees) lines ctxt)
475 fun is_same_inference _ (Definition _) = false
476 | is_same_inference t (Inference (_, t', _)) = t aconv t'
478 (* No "real" literals means only type information (tfree_tcs, clsrel, or
480 val is_only_type_information = curry (op aconv) HOLogic.true_const
482 fun replace_one_dep (old, new) dep = if dep = old then new else [dep]
483 fun replace_deps_in_line _ (line as Definition _) = line
484 | replace_deps_in_line p (Inference (num, t, deps)) =
485 Inference (num, t, fold (union (op =) o replace_one_dep p) deps [])
487 (*Discard axioms; consolidate adjacent lines that prove the same clause, since they differ
488 only in type information.*)
489 fun add_line _ _ (line as Definition _) lines = line :: lines
490 | add_line conjecture_shape thm_names (Inference (num, t, [])) lines =
491 (* No dependencies: axiom, conjecture clause, or internal axioms or
492 definitions (Vampire). *)
493 if is_axiom_clause_number thm_names num then
494 (* Axioms are not proof lines. *)
495 if is_only_type_information t then
496 map (replace_deps_in_line (num, [])) lines
497 (* Is there a repetition? If so, replace later line by earlier one. *)
498 else case take_prefix (not o is_same_inference t) lines of
499 (_, []) => lines (*no repetition of proof line*)
500 | (pre, Inference (num', _, _) :: post) =>
501 pre @ map (replace_deps_in_line (num', [num])) post
502 else if is_conjecture_clause_number conjecture_shape num then
503 Inference (num, t, []) :: lines
505 map (replace_deps_in_line (num, [])) lines
506 | add_line _ _ (Inference (num, t, deps)) lines =
507 (* Type information will be deleted later; skip repetition test. *)
508 if is_only_type_information t then
509 Inference (num, t, deps) :: lines
510 (* Is there a repetition? If so, replace later line by earlier one. *)
511 else case take_prefix (not o is_same_inference t) lines of
512 (* FIXME: Doesn't this code risk conflating proofs involving different
514 (_, []) => Inference (num, t, deps) :: lines
515 | (pre, Inference (num', t', _) :: post) =>
516 Inference (num, t', deps) ::
517 pre @ map (replace_deps_in_line (num', [num])) post
519 (* Recursively delete empty lines (type information) from the proof. *)
520 fun add_nontrivial_line (Inference (num, t, [])) lines =
521 if is_only_type_information t then delete_dep num lines
522 else Inference (num, t, []) :: lines
523 | add_nontrivial_line line lines = line :: lines
524 and delete_dep num lines =
525 fold_rev add_nontrivial_line (map (replace_deps_in_line (num, [])) lines) []
527 (* ATPs sometimes reuse free variable names in the strangest ways. Removing
528 offending lines often does the trick. *)
529 fun is_bad_free frees (Free x) = not (member (op =) frees x)
530 | is_bad_free _ _ = false
532 (* Vampire is keen on producing these. *)
533 fun is_trivial_formula (@{const Not} $ (Const (@{const_name "op ="}, _)
534 $ t1 $ t2)) = (t1 aconv t2)
535 | is_trivial_formula _ = false
537 fun add_desired_line _ _ _ _ (line as Definition (num, _, _)) (j, lines) =
538 (j, line :: map (replace_deps_in_line (num, [])) lines)
539 | add_desired_line isar_shrink_factor conjecture_shape thm_names frees
540 (Inference (num, t, deps)) (j, lines) =
542 if is_axiom_clause_number thm_names num orelse
543 is_conjecture_clause_number conjecture_shape num orelse
544 (not (is_only_type_information t) andalso
545 null (Term.add_tvars t []) andalso
546 not (exists_subterm (is_bad_free frees) t) andalso
547 not (is_trivial_formula t) andalso
548 (null lines orelse (* last line must be kept *)
549 (length deps >= 2 andalso j mod isar_shrink_factor = 0))) then
550 Inference (num, t, deps) :: lines (* keep line *)
552 map (replace_deps_in_line (num, deps)) lines) (* drop line *)
554 (** EXTRACTING LEMMAS **)
556 (* A list consisting of the first number in each line is returned. For TSTP,
557 interesting lines have the form "fof(108, axiom, ...)", where the number
558 (108) is extracted. For SPASS, lines have the form "108[0:Inp] ...", where
559 the first number (108) is extracted. For Vampire, we look for
560 "108. ... [input]". *)
561 fun used_facts_in_atp_proof thm_names atp_proof =
564 let val j = Int.fromString num |> the_default ~1 in
565 if is_axiom_clause_number thm_names j then
566 SOME (Vector.sub (thm_names, j - 1))
571 String.tokens (fn c => not (Char.isAlphaNum c) andalso c <> #"_")
572 val thm_name_list = Vector.foldr (op ::) [] thm_names
573 fun do_line ("fof" :: num :: "axiom" :: (rest as _ :: _)) =
574 (case strip_prefix_and_undo_ascii axiom_prefix (List.last rest) of
576 if member (op =) rest "file" then SOME name else axiom_name num
577 | NONE => axiom_name num)
578 | do_line (num :: "0" :: "Inp" :: _) = axiom_name num
579 | do_line (num :: rest) =
580 (case List.last rest of "input" => axiom_name num | _ => NONE)
582 in atp_proof |> split_lines |> map_filter (do_line o tokens_of) end
585 val no_label = ("", ~1)
588 val assum_prefix = "A"
589 val fact_prefix = "F"
591 fun string_for_label (s, num) = s ^ string_of_int num
593 fun metis_using [] = ""
595 "using " ^ space_implode " " (map string_for_label ls) ^ " "
596 fun metis_apply _ 1 = "by "
597 | metis_apply 1 _ = "apply "
598 | metis_apply i _ = "prefer " ^ string_of_int i ^ " apply "
599 fun metis_name full_types = if full_types then "metisFT" else "metis"
600 fun metis_call full_types [] = metis_name full_types
601 | metis_call full_types ss =
602 "(" ^ metis_name full_types ^ " " ^ space_implode " " ss ^ ")"
603 fun metis_command full_types i n (ls, ss) =
604 metis_using ls ^ metis_apply i n ^ metis_call full_types ss
605 fun metis_line full_types i n ss =
606 "Try this command: " ^
607 Markup.markup Markup.sendback (metis_command full_types i n ([], ss)) ^ ".\n"
608 fun minimize_line _ [] = ""
609 | minimize_line minimize_command facts =
610 case minimize_command facts of
613 "To minimize the number of lemmas, try this: " ^
614 Markup.markup Markup.sendback command ^ ".\n"
616 val unprefix_chained = perhaps (try (unprefix chained_prefix))
618 fun used_facts thm_names =
619 used_facts_in_atp_proof thm_names
620 #> List.partition (String.isPrefix chained_prefix)
621 #>> map (unprefix chained_prefix)
622 #> pairself (sort_distinct string_ord)
624 fun metis_proof_text (full_types, minimize_command, atp_proof, thm_names, goal,
627 val (chained_lemmas, other_lemmas) = used_facts thm_names atp_proof
628 val lemmas = other_lemmas @ chained_lemmas
629 val n = Logic.count_prems (prop_of goal)
631 (metis_line full_types i n other_lemmas ^
632 minimize_line minimize_command lemmas, lemmas)
635 (** Isar proof construction and manipulation **)
637 fun merge_fact_sets (ls1, ss1) (ls2, ss2) =
638 (union (op =) ls1 ls2, union (op =) ss1 ss2)
640 type label = string * int
641 type facts = label list * string list
643 datatype qualifier = Show | Then | Moreover | Ultimately
646 Fix of (string * typ) list |
648 Assume of label * term |
649 Have of qualifier list * label * term * byline
652 CaseSplit of step list list * facts
654 fun smart_case_split [] facts = ByMetis facts
655 | smart_case_split proofs facts = CaseSplit (proofs, facts)
657 fun add_fact_from_dep thm_names num =
658 if is_axiom_clause_number thm_names num then
659 apsnd (insert (op =) (Vector.sub (thm_names, num - 1)))
661 apfst (insert (op =) (raw_prefix, num))
663 fun forall_of v t = HOLogic.all_const (fastype_of v) $ lambda v t
664 fun forall_vars t = fold_rev forall_of (map Var (Term.add_vars t [])) t
666 fun step_for_line _ _ (Definition (_, t1, t2)) = Let (t1, t2)
667 | step_for_line _ _ (Inference (num, t, [])) = Assume ((raw_prefix, num), t)
668 | step_for_line thm_names j (Inference (num, t, deps)) =
669 Have (if j = 1 then [Show] else [], (raw_prefix, num),
671 ByMetis (fold (add_fact_from_dep thm_names) deps ([], [])))
673 fun proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
674 atp_proof conjecture_shape thm_names params frees =
677 atp_proof ^ "$" (* the $ sign acts as a sentinel (FIXME: needed?) *)
679 |> sort (int_ord o pairself raw_step_number)
680 |> decode_lines ctxt full_types tfrees
681 |> rpair [] |-> fold_rev (add_line conjecture_shape thm_names)
682 |> rpair [] |-> fold_rev add_nontrivial_line
683 |> rpair (0, []) |-> fold_rev (add_desired_line isar_shrink_factor
684 conjecture_shape thm_names frees)
687 (if null params then [] else [Fix params]) @
688 map2 (step_for_line thm_names) (length lines downto 1) lines
691 (* When redirecting proofs, we keep information about the labels seen so far in
692 the "backpatches" data structure. The first component indicates which facts
693 should be associated with forthcoming proof steps. The second component is a
694 pair ("assum_ls", "drop_ls"), where "assum_ls" are the labels that should
695 become assumptions and "drop_ls" are the labels that should be dropped in a
697 type backpatches = (label * facts) list * (label list * label list)
699 fun used_labels_of_step (Have (_, _, _, by)) =
701 ByMetis (ls, _) => ls
702 | CaseSplit (proofs, (ls, _)) =>
703 fold (union (op =) o used_labels_of) proofs ls)
704 | used_labels_of_step _ = []
705 and used_labels_of proof = fold (union (op =) o used_labels_of_step) proof []
707 fun new_labels_of_step (Fix _) = []
708 | new_labels_of_step (Let _) = []
709 | new_labels_of_step (Assume (l, _)) = [l]
710 | new_labels_of_step (Have (_, l, _, _)) = [l]
711 val new_labels_of = maps new_labels_of_step
716 | aux proof_tail (proofs as (proof1 :: _)) =
717 if exists null proofs then
719 else if forall (curry (op =) (hd proof1) o hd) (tl proofs) then
720 aux (hd proof1 :: proof_tail) (map tl proofs)
721 else case hd proof1 of
722 Have ([], l, t, _) => (* FIXME: should we really ignore the "by"? *)
723 if forall (fn Have ([], l', t', _) :: _ => (l, t) = (l', t')
724 | _ => false) (tl proofs) andalso
725 not (exists (member (op =) (maps new_labels_of proofs))
726 (used_labels_of proof_tail)) then
727 SOME (l, t, map rev proofs, proof_tail)
731 in aux [] o map rev end
733 fun case_split_qualifiers proofs =
734 case length proofs of
739 fun redirect_proof conjecture_shape hyp_ts concl_t proof =
741 (* The first pass outputs those steps that are independent of the negated
742 conjecture. The second pass flips the proof by contradiction to obtain a
743 direct proof, introducing case splits when an inference depends on
744 several facts that depend on the negated conjecture. *)
746 nth hyp_ts (index_in_shape num conjecture_shape)
748 raise Fail ("Cannot find hypothesis " ^ Int.toString num)
749 val concl_ls = map (pair raw_prefix) (List.last conjecture_shape)
750 val canonicalize_labels =
751 map (fn l => if member (op =) concl_ls l then hd concl_ls else l)
753 fun first_pass ([], contra) = ([], contra)
754 | first_pass ((step as Fix _) :: proof, contra) =
755 first_pass (proof, contra) |>> cons step
756 | first_pass ((step as Let _) :: proof, contra) =
757 first_pass (proof, contra) |>> cons step
758 | first_pass ((step as Assume (l as (_, num), _)) :: proof, contra) =
759 if member (op =) concl_ls l then
760 first_pass (proof, contra ||> l = hd concl_ls ? cons step)
762 first_pass (proof, contra) |>> cons (Assume (l, find_hyp num))
763 | first_pass (Have (qs, l, t, ByMetis (ls, ss)) :: proof, contra) =
765 val ls = canonicalize_labels ls
766 val step = Have (qs, l, t, ByMetis (ls, ss))
768 if exists (member (op =) (fst contra)) ls then
769 first_pass (proof, contra |>> cons l ||> cons step)
771 first_pass (proof, contra) |>> cons step
773 | first_pass _ = raise Fail "malformed proof"
774 val (proof_top, (contra_ls, contra_proof)) =
775 first_pass (proof, (concl_ls, []))
776 val backpatch_label = the_default ([], []) oo AList.lookup (op =) o fst
777 fun backpatch_labels patches ls =
778 fold merge_fact_sets (map (backpatch_label patches) ls) ([], [])
779 fun second_pass end_qs ([], assums, patches) =
780 ([Have (end_qs, no_label, concl_t,
781 ByMetis (backpatch_labels patches (map snd assums)))], patches)
782 | second_pass end_qs (Assume (l, t) :: proof, assums, patches) =
783 second_pass end_qs (proof, (t, l) :: assums, patches)
784 | second_pass end_qs (Have (qs, l, t, ByMetis (ls, ss)) :: proof, assums,
786 if member (op =) (snd (snd patches)) l andalso
787 not (member (op =) (fst (snd patches)) l) andalso
788 not (AList.defined (op =) (fst patches) l) then
789 second_pass end_qs (proof, assums, patches ||> apsnd (append ls))
791 (case List.partition (member (op =) contra_ls) ls of
792 ([contra_l], co_ls) =>
793 if member (op =) qs Show then
794 second_pass end_qs (proof, assums,
795 patches |>> cons (contra_l, (co_ls, ss)))
799 patches |>> cons (contra_l, (l :: co_ls, ss)))
800 |>> cons (if member (op =) (fst (snd patches)) l then
801 Assume (l, negate_term t)
803 Have (qs, l, negate_term t,
804 ByMetis (backpatch_label patches l)))
805 | (contra_ls as _ :: _, co_ls) =>
810 if member (op =) concl_ls l then
814 val drop_ls = filter (curry (op <>) l) contra_ls
818 patches ||> apfst (insert (op =) l)
819 ||> apsnd (union (op =) drop_ls))
822 val (assumes, facts) =
823 if member (op =) (fst (snd patches)) l then
824 ([Assume (l, negate_term t)], (l :: co_ls, ss))
828 (case join_proofs proofs of
829 SOME (l, t, proofs, proof_tail) =>
830 Have (case_split_qualifiers proofs @
831 (if null proof_tail then end_qs else []), l, t,
832 smart_case_split proofs facts) :: proof_tail
834 [Have (case_split_qualifiers proofs @ end_qs, no_label,
835 concl_t, smart_case_split proofs facts)],
839 | _ => raise Fail "malformed proof")
840 | second_pass _ _ = raise Fail "malformed proof"
842 second_pass [Show] (contra_proof, [], ([], ([], []))) |> fst
843 in proof_top @ proof_bottom end
845 val kill_duplicate_assumptions_in_proof =
847 fun relabel_facts subst =
848 apfst (map (fn l => AList.lookup (op =) subst l |> the_default l))
849 fun do_step (step as Assume (l, t)) (proof, subst, assums) =
850 (case AList.lookup (op aconv) assums t of
851 SOME l' => (proof, (l, l') :: subst, assums)
852 | NONE => (step :: proof, subst, (t, l) :: assums))
853 | do_step (Have (qs, l, t, by)) (proof, subst, assums) =
856 ByMetis facts => ByMetis (relabel_facts subst facts)
857 | CaseSplit (proofs, facts) =>
858 CaseSplit (map do_proof proofs, relabel_facts subst facts)) ::
859 proof, subst, assums)
860 | do_step step (proof, subst, assums) = (step :: proof, subst, assums)
861 and do_proof proof = fold do_step proof ([], [], []) |> #1 |> rev
864 val then_chain_proof =
867 | aux _ ((step as Assume (l, _)) :: proof) = step :: aux l proof
868 | aux l' (Have (qs, l, t, by) :: proof) =
871 Have (if member (op =) ls l' then
873 ByMetis (filter_out (curry (op =) l') ls, ss))
875 (qs, l, t, ByMetis (ls, ss)))
876 | CaseSplit (proofs, facts) =>
877 Have (qs, l, t, CaseSplit (map (aux no_label) proofs, facts))) ::
879 | aux _ (step :: proof) = step :: aux no_label proof
882 fun kill_useless_labels_in_proof proof =
884 val used_ls = used_labels_of proof
885 fun do_label l = if member (op =) used_ls l then l else no_label
886 fun do_step (Assume (l, t)) = Assume (do_label l, t)
887 | do_step (Have (qs, l, t, by)) =
888 Have (qs, do_label l, t,
890 CaseSplit (proofs, facts) =>
891 CaseSplit (map (map do_step) proofs, facts)
893 | do_step step = step
894 in map do_step proof end
896 fun prefix_for_depth n = replicate_string (n + 1)
900 fun aux _ _ _ [] = []
901 | aux subst depth (next_assum, next_fact) (Assume (l, t) :: proof) =
903 Assume (l, t) :: aux subst depth (next_assum, next_fact) proof
905 let val l' = (prefix_for_depth depth assum_prefix, next_assum) in
907 aux ((l, l') :: subst) depth (next_assum + 1, next_fact) proof
909 | aux subst depth (next_assum, next_fact) (Have (qs, l, t, by) :: proof) =
911 val (l', subst, next_fact) =
913 (l, subst, next_fact)
916 val l' = (prefix_for_depth depth fact_prefix, next_fact)
917 in (l', (l, l') :: subst, next_fact + 1) end
920 case AList.lookup (op =) subst l of
922 | NONE => raise Fail ("unknown label " ^
923 quote (string_for_label l))))
926 ByMetis facts => ByMetis (relabel_facts facts)
927 | CaseSplit (proofs, facts) =>
928 CaseSplit (map (aux subst (depth + 1) (1, 1)) proofs,
931 Have (qs, l', t, by) ::
932 aux subst depth (next_assum, next_fact) proof
934 | aux subst depth nextp (step :: proof) =
935 step :: aux subst depth nextp proof
936 in aux [] 0 (1, 1) end
938 fun string_for_proof ctxt full_types i n =
940 fun fix_print_mode f x =
941 setmp_CRITICAL show_no_free_types true
942 (setmp_CRITICAL show_types true
943 (Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
944 (print_mode_value ())) f)) x
945 fun do_indent ind = replicate_string (ind * indent_size) " "
947 maybe_quote s ^ " :: " ^
948 maybe_quote (fix_print_mode (Syntax.string_of_typ ctxt) T)
949 fun do_label l = if l = no_label then "" else string_for_label l ^ ": "
951 (if member (op =) qs Moreover then "moreover " else "") ^
952 (if member (op =) qs Ultimately then "ultimately " else "") ^
953 (if member (op =) qs Then then
954 if member (op =) qs Show then "thus" else "hence"
956 if member (op =) qs Show then "show" else "have")
957 val do_term = maybe_quote o fix_print_mode (Syntax.string_of_term ctxt)
958 fun do_facts (ls, ss) =
960 val ls = ls |> sort_distinct (prod_ord string_ord int_ord)
961 val ss = ss |> map unprefix_chained |> sort_distinct string_ord
962 in metis_command full_types 1 1 (ls, ss) end
963 and do_step ind (Fix xs) =
964 do_indent ind ^ "fix " ^ space_implode " and " (map do_free xs) ^ "\n"
965 | do_step ind (Let (t1, t2)) =
966 do_indent ind ^ "let " ^ do_term t1 ^ " = " ^ do_term t2 ^ "\n"
967 | do_step ind (Assume (l, t)) =
968 do_indent ind ^ "assume " ^ do_label l ^ do_term t ^ "\n"
969 | do_step ind (Have (qs, l, t, ByMetis facts)) =
970 do_indent ind ^ do_have qs ^ " " ^
971 do_label l ^ do_term t ^ " " ^ do_facts facts ^ "\n"
972 | do_step ind (Have (qs, l, t, CaseSplit (proofs, facts))) =
973 space_implode (do_indent ind ^ "moreover\n")
974 (map (do_block ind) proofs) ^
975 do_indent ind ^ do_have qs ^ " " ^ do_label l ^ do_term t ^ " " ^
976 do_facts facts ^ "\n"
977 and do_steps prefix suffix ind steps =
978 let val s = implode (map (do_step ind) steps) in
979 replicate_string (ind * indent_size - size prefix) " " ^ prefix ^
980 String.extract (s, ind * indent_size,
981 SOME (size s - ind * indent_size - 1)) ^
984 and do_block ind proof = do_steps "{ " " }" (ind + 1) proof
985 (* One-step proofs are pointless; better use the Metis one-liner
987 and do_proof [Have (_, _, _, ByMetis _)] = ""
989 (if i <> 1 then "prefer " ^ string_of_int i ^ "\n" else "") ^
990 do_indent 0 ^ "proof -\n" ^
991 do_steps "" "" 1 proof ^
992 do_indent 0 ^ (if n <> 1 then "next" else "qed") ^ "\n"
995 fun isar_proof_text (pool, debug, isar_shrink_factor, ctxt, conjecture_shape)
996 (other_params as (full_types, _, atp_proof, thm_names, goal,
999 val (params, hyp_ts, concl_t) = strip_subgoal goal i
1000 val frees = fold Term.add_frees (concl_t :: hyp_ts) []
1001 val tfrees = fold Term.add_tfrees (concl_t :: hyp_ts) []
1002 val n = Logic.count_prems (prop_of goal)
1003 val (one_line_proof, lemma_names) = metis_proof_text other_params
1004 fun isar_proof_for () =
1005 case proof_from_atp_proof pool ctxt full_types tfrees isar_shrink_factor
1006 atp_proof conjecture_shape thm_names params
1008 |> redirect_proof conjecture_shape hyp_ts concl_t
1009 |> kill_duplicate_assumptions_in_proof
1011 |> kill_useless_labels_in_proof
1013 |> string_for_proof ctxt full_types i n of
1014 "" => "No structured proof available.\n"
1015 | proof => "\nStructured proof:\n" ^ Markup.markup Markup.sendback proof
1020 try isar_proof_for ()
1021 |> the_default "Warning: The Isar proof construction failed.\n"
1022 in (one_line_proof ^ isar_proof, lemma_names) end
1024 fun proof_text isar_proof isar_params other_params =
1025 (if isar_proof then isar_proof_text isar_params else metis_proof_text)