make relevance filter work in term of a "max_relevant" option + use Vampire SOS;
"max_relevant" is more reliable than "max_relevant_per_iter";
also made sure that the option is monotone -- larger values should lead to more axioms -- which wasn't always the case before;
SOS for Vampire makes a difference of about 3% (i.e. 3% more proofs are found)
1.1 --- a/src/HOL/Tools/ATP/atp_systems.ML Wed Aug 25 09:42:28 2010 +0200
1.2 +++ b/src/HOL/Tools/ATP/atp_systems.ML Wed Aug 25 17:49:52 2010 +0200
1.3 @@ -19,7 +19,7 @@
1.4 has_incomplete_mode: bool,
1.5 proof_delims: (string * string) list,
1.6 known_failures: (failure * string) list,
1.7 - default_max_relevant_per_iter: int,
1.8 + default_max_relevant: int,
1.9 default_theory_relevant: bool,
1.10 explicit_forall: bool,
1.11 use_conjecture_for_hypotheses: bool}
1.12 @@ -52,7 +52,7 @@
1.13 has_incomplete_mode: bool,
1.14 proof_delims: (string * string) list,
1.15 known_failures: (failure * string) list,
1.16 - default_max_relevant_per_iter: int,
1.17 + default_max_relevant: int,
1.18 default_theory_relevant: bool,
1.19 explicit_forall: bool,
1.20 use_conjecture_for_hypotheses: bool}
1.21 @@ -159,7 +159,7 @@
1.22 "# Cannot determine problem status within resource limit"),
1.23 (OutOfResources, "SZS status: ResourceOut"),
1.24 (OutOfResources, "SZS status ResourceOut")],
1.25 - default_max_relevant_per_iter = 80 (* FUDGE *),
1.26 + default_max_relevant = 500 (* FUDGE *),
1.27 default_theory_relevant = false,
1.28 explicit_forall = false,
1.29 use_conjecture_for_hypotheses = true}
1.30 @@ -186,7 +186,7 @@
1.31 (MalformedInput, "Undefined symbol"),
1.32 (MalformedInput, "Free Variable"),
1.33 (SpassTooOld, "tptp2dfg")],
1.34 - default_max_relevant_per_iter = 40 (* FUDGE *),
1.35 + default_max_relevant = 350 (* FUDGE *),
1.36 default_theory_relevant = true,
1.37 explicit_forall = true,
1.38 use_conjecture_for_hypotheses = true}
1.39 @@ -199,10 +199,11 @@
1.40 val vampire_config : prover_config =
1.41 {exec = ("VAMPIRE_HOME", "vampire"),
1.42 required_execs = [],
1.43 - arguments = fn _ => fn timeout =>
1.44 - "--mode casc -t " ^ string_of_int (to_secs 0 timeout) ^
1.45 - " --thanks Andrei --input_file",
1.46 - has_incomplete_mode = false,
1.47 + arguments = fn complete => fn timeout =>
1.48 + ("--mode casc -t " ^ string_of_int (to_secs 0 timeout) ^
1.49 + " --thanks Andrei --input_file")
1.50 + |> not complete ? prefix "--sos on ",
1.51 + has_incomplete_mode = true,
1.52 proof_delims =
1.53 [("=========== Refutation ==========",
1.54 "======= End of refutation ======="),
1.55 @@ -215,7 +216,7 @@
1.56 (Unprovable, "Satisfiability detected"),
1.57 (Unprovable, "Termination reason: Satisfiable"),
1.58 (VampireTooOld, "not a valid option")],
1.59 - default_max_relevant_per_iter = 45 (* FUDGE *),
1.60 + default_max_relevant = 400 (* FUDGE *),
1.61 default_theory_relevant = false,
1.62 explicit_forall = false,
1.63 use_conjecture_for_hypotheses = true}
1.64 @@ -255,7 +256,7 @@
1.65 | SOME sys => sys
1.66
1.67 fun remote_config system_name system_versions proof_delims known_failures
1.68 - default_max_relevant_per_iter default_theory_relevant
1.69 + default_max_relevant default_theory_relevant
1.70 use_conjecture_for_hypotheses : prover_config =
1.71 {exec = ("ISABELLE_ATP", "scripts/remote_atp"),
1.72 required_execs = [],
1.73 @@ -267,26 +268,26 @@
1.74 known_failures =
1.75 known_failures @ known_perl_failures @
1.76 [(TimedOut, "says Timeout")],
1.77 - default_max_relevant_per_iter = default_max_relevant_per_iter,
1.78 + default_max_relevant = default_max_relevant,
1.79 default_theory_relevant = default_theory_relevant,
1.80 explicit_forall = true,
1.81 use_conjecture_for_hypotheses = use_conjecture_for_hypotheses}
1.82
1.83 fun remotify_config system_name system_versions
1.84 - ({proof_delims, known_failures, default_max_relevant_per_iter,
1.85 + ({proof_delims, known_failures, default_max_relevant,
1.86 default_theory_relevant, use_conjecture_for_hypotheses, ...}
1.87 : prover_config) : prover_config =
1.88 remote_config system_name system_versions proof_delims known_failures
1.89 - default_max_relevant_per_iter default_theory_relevant
1.90 + default_max_relevant default_theory_relevant
1.91 use_conjecture_for_hypotheses
1.92
1.93 val remotify_name = prefix "remote_"
1.94 fun remote_prover name system_name system_versions proof_delims known_failures
1.95 - default_max_relevant_per_iter default_theory_relevant
1.96 + default_max_relevant default_theory_relevant
1.97 use_conjecture_for_hypotheses =
1.98 (remotify_name name,
1.99 remote_config system_name system_versions proof_delims known_failures
1.100 - default_max_relevant_per_iter default_theory_relevant
1.101 + default_max_relevant default_theory_relevant
1.102 use_conjecture_for_hypotheses)
1.103 fun remotify_prover (name, config) system_name system_versions =
1.104 (remotify_name name, remotify_config system_name system_versions config)
1.105 @@ -294,11 +295,11 @@
1.106 val remote_e = remotify_prover e "EP" ["1.0", "1.1", "1.2"]
1.107 val remote_vampire = remotify_prover vampire "Vampire" ["9.9", "0.6", "1.0"]
1.108 val remote_sine_e =
1.109 - remote_prover "sine_e" "SInE" [] []
1.110 - [(Unprovable, "says Unknown")] 150 (* FUDGE *) false true
1.111 + remote_prover "sine_e" "SInE" [] [] [(Unprovable, "says Unknown")]
1.112 + 1000 (* FUDGE *) false true
1.113 val remote_snark =
1.114 remote_prover "snark" "SNARK---" [] [("refutation.", "end_refutation.")] []
1.115 - 50 (* FUDGE *) false true
1.116 + 350 (* FUDGE *) false true
1.117
1.118 (* Setup *)
1.119
2.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer.ML Wed Aug 25 09:42:28 2010 +0200
2.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer.ML Wed Aug 25 17:49:52 2010 +0200
2.3 @@ -19,8 +19,8 @@
2.4 full_types: bool,
2.5 explicit_apply: bool,
2.6 relevance_threshold: real,
2.7 - relevance_decay: real,
2.8 - max_relevant_per_iter: int option,
2.9 + relevance_decay: real option,
2.10 + max_relevant: int option,
2.11 theory_relevant: bool option,
2.12 isar_proof: bool,
2.13 isar_shrink_factor: int,
2.14 @@ -87,8 +87,8 @@
2.15 full_types: bool,
2.16 explicit_apply: bool,
2.17 relevance_threshold: real,
2.18 - relevance_decay: real,
2.19 - max_relevant_per_iter: int option,
2.20 + relevance_decay: real option,
2.21 + max_relevant: int option,
2.22 theory_relevant: bool option,
2.23 isar_proof: bool,
2.24 isar_shrink_factor: int,
2.25 @@ -187,7 +187,7 @@
2.26 val seq = extract_clause_sequence output
2.27 val name_map = extract_clause_formula_relation output
2.28 fun renumber_conjecture j =
2.29 - conjecture_prefix ^ Int.toString (j - j0)
2.30 + conjecture_prefix ^ string_of_int (j - j0)
2.31 |> AList.find (fn (s, ss) => member (op =) ss s) name_map
2.32 |> map (fn s => find_index (curry (op =) s) seq + 1)
2.33 fun name_for_number j =
2.34 @@ -210,12 +210,11 @@
2.35
2.36 fun prover_fun atp_name
2.37 {exec, required_execs, arguments, has_incomplete_mode, proof_delims,
2.38 - known_failures, default_max_relevant_per_iter, default_theory_relevant,
2.39 + known_failures, default_max_relevant, default_theory_relevant,
2.40 explicit_forall, use_conjecture_for_hypotheses}
2.41 ({debug, verbose, overlord, full_types, explicit_apply,
2.42 - relevance_threshold, relevance_decay,
2.43 - max_relevant_per_iter, theory_relevant, isar_proof,
2.44 - isar_shrink_factor, timeout, ...} : params)
2.45 + relevance_threshold, relevance_decay, max_relevant, theory_relevant,
2.46 + isar_proof, isar_shrink_factor, timeout, ...} : params)
2.47 minimize_command
2.48 ({subgoal, goal, relevance_override, axioms} : problem) =
2.49 let
2.50 @@ -223,7 +222,7 @@
2.51 val thy = ProofContext.theory_of ctxt
2.52 val (params, hyp_ts, concl_t) = strip_subgoal th subgoal
2.53
2.54 - fun print s = (priority s; if debug then tracing s else ())
2.55 + val print = priority
2.56 fun print_v f = () |> verbose ? print o f
2.57 fun print_d f = () |> debug ? print o f
2.58
2.59 @@ -232,8 +231,7 @@
2.60 SOME axioms => axioms
2.61 | NONE =>
2.62 (relevant_facts full_types relevance_threshold relevance_decay
2.63 - (the_default default_max_relevant_per_iter
2.64 - max_relevant_per_iter)
2.65 + (the_default default_max_relevant max_relevant)
2.66 (the_default default_theory_relevant theory_relevant)
2.67 relevance_override goal hyp_ts concl_t
2.68 |> tap ((fn n => print_v (fn () =>
2.69 @@ -257,6 +255,7 @@
2.70 else error ("No such directory: " ^ the_dest_dir ^ ".")
2.71 end;
2.72
2.73 + val measure_run_time = verbose orelse Config.get ctxt measure_runtime
2.74 val command = Path.explode (getenv (fst exec) ^ "/" ^ snd exec)
2.75 (* write out problem file and call prover *)
2.76 fun command_line complete timeout probfile =
2.77 @@ -264,10 +263,8 @@
2.78 val core = File.shell_path command ^ " " ^ arguments complete timeout ^
2.79 " " ^ File.shell_path probfile
2.80 in
2.81 - (if Config.get ctxt measure_runtime then
2.82 - "TIMEFORMAT='%3U'; { time " ^ core ^ " ; }"
2.83 - else
2.84 - "exec " ^ core) ^ " 2>&1"
2.85 + (if measure_run_time then "TIMEFORMAT='%3U'; { time " ^ core ^ " ; }"
2.86 + else "exec " ^ core) ^ " 2>&1"
2.87 end
2.88 fun split_time s =
2.89 let
2.90 @@ -296,8 +293,7 @@
2.91 prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n")
2.92 else
2.93 I)
2.94 - |>> (if Config.get ctxt measure_runtime then split_time
2.95 - else rpair 0)
2.96 + |>> (if measure_run_time then split_time else rpair 0)
2.97 val (proof, outcome) =
2.98 extract_proof_and_outcome complete res_code proof_delims
2.99 known_failures output
2.100 @@ -354,6 +350,11 @@
2.101 proof_text isar_proof
2.102 (pool, debug, isar_shrink_factor, ctxt, conjecture_shape)
2.103 (full_types, minimize_command, proof, axiom_names, th, subgoal)
2.104 + |>> (fn message =>
2.105 + message ^ (if verbose then
2.106 + "\nATP CPU time: " ^ string_of_int msecs ^ " ms."
2.107 + else
2.108 + ""))
2.109 | SOME failure => (string_for_failure failure, [])
2.110 in
2.111 {outcome = outcome, message = message, pool = pool,
3.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact_filter.ML Wed Aug 25 09:42:28 2010 +0200
3.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact_filter.ML Wed Aug 25 17:49:52 2010 +0200
3.3 @@ -11,11 +11,11 @@
3.4 only: bool}
3.5
3.6 val trace : bool Unsynchronized.ref
3.7 - val name_thms_pair_from_ref :
3.8 + val name_thm_pairs_from_ref :
3.9 Proof.context -> unit Symtab.table -> thm list -> Facts.ref
3.10 - -> (unit -> string * bool) * thm list
3.11 + -> ((unit -> string * bool) * (bool * thm)) list
3.12 val relevant_facts :
3.13 - bool -> real -> real -> int -> bool -> relevance_override
3.14 + bool -> real -> real option -> int -> bool -> relevance_override
3.15 -> Proof.context * (thm list * 'a) -> term list -> term
3.16 -> ((string * bool) * thm) list
3.17 end;
3.18 @@ -37,13 +37,22 @@
3.19
3.20 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
3.21
3.22 -fun name_thms_pair_from_ref ctxt reserved chained_ths xref =
3.23 - let val ths = ProofContext.get_fact ctxt xref in
3.24 - (fn () => let
3.25 - val name = Facts.string_of_ref xref
3.26 - val name = name |> Symtab.defined reserved name ? quote
3.27 - val chained = forall (member Thm.eq_thm chained_ths) ths
3.28 - in (name, chained) end, ths)
3.29 +fun repair_name reserved multi j name =
3.30 + (name |> Symtab.defined reserved name ? quote) ^
3.31 + (if multi then "(" ^ Int.toString j ^ ")" else "")
3.32 +
3.33 +fun name_thm_pairs_from_ref ctxt reserved chained_ths xref =
3.34 + let
3.35 + val ths = ProofContext.get_fact ctxt xref
3.36 + val name = Facts.string_of_ref xref
3.37 + val multi = length ths > 1
3.38 + in
3.39 + fold (fn th => fn (j, rest) =>
3.40 + (j + 1, (fn () => (repair_name reserved multi j name,
3.41 + member Thm.eq_thm chained_ths th),
3.42 + (multi, th)) :: rest))
3.43 + ths (1, [])
3.44 + |> snd
3.45 end
3.46
3.47 (***************************************************************)
3.48 @@ -53,30 +62,44 @@
3.49 (*** constants with types ***)
3.50
3.51 (*An abstraction of Isabelle types*)
3.52 -datatype const_typ = CTVar | CType of string * const_typ list
3.53 +datatype pseudotype = PVar | PType of string * pseudotype list
3.54 +
3.55 +fun string_for_pseudotype PVar = "?"
3.56 + | string_for_pseudotype (PType (s, Ts)) =
3.57 + (case Ts of
3.58 + [] => ""
3.59 + | [T] => string_for_pseudotype T
3.60 + | Ts => string_for_pseudotypes Ts ^ " ") ^ s
3.61 +and string_for_pseudotypes Ts =
3.62 + "(" ^ commas (map string_for_pseudotype Ts) ^ ")"
3.63
3.64 (*Is the second type an instance of the first one?*)
3.65 -fun match_type (CType(con1,args1)) (CType(con2,args2)) =
3.66 - con1=con2 andalso match_types args1 args2
3.67 - | match_type CTVar _ = true
3.68 - | match_type _ CTVar = false
3.69 -and match_types [] [] = true
3.70 - | match_types (a1::as1) (a2::as2) = match_type a1 a2 andalso match_types as1 as2;
3.71 +fun match_pseudotype (PType (a, T), PType (b, U)) =
3.72 + a = b andalso match_pseudotypes (T, U)
3.73 + | match_pseudotype (PVar, _) = true
3.74 + | match_pseudotype (_, PVar) = false
3.75 +and match_pseudotypes ([], []) = true
3.76 + | match_pseudotypes (T :: Ts, U :: Us) =
3.77 + match_pseudotype (T, U) andalso match_pseudotypes (Ts, Us)
3.78
3.79 (*Is there a unifiable constant?*)
3.80 -fun const_mem const_tab (c, c_typ) =
3.81 - exists (match_types c_typ) (these (Symtab.lookup const_tab c))
3.82 +fun pseudoconst_mem f const_tab (c, c_typ) =
3.83 + exists (curry (match_pseudotypes o f) c_typ)
3.84 + (these (Symtab.lookup const_tab c))
3.85
3.86 -(*Maps a "real" type to a const_typ*)
3.87 -fun const_typ_of (Type (c,typs)) = CType (c, map const_typ_of typs)
3.88 - | const_typ_of (TFree _) = CTVar
3.89 - | const_typ_of (TVar _) = CTVar
3.90 +fun pseudotype_for (Type (c,typs)) = PType (c, map pseudotype_for typs)
3.91 + | pseudotype_for (TFree _) = PVar
3.92 + | pseudotype_for (TVar _) = PVar
3.93 +(* Pairs a constant with the list of its type instantiations. *)
3.94 +fun pseudoconst_for thy (c, T) =
3.95 + (c, map pseudotype_for (Sign.const_typargs thy (c, T)))
3.96 + handle TYPE _ => (c, []) (* Variable (locale constant): monomorphic *)
3.97
3.98 -(*Pairs a constant with the list of its type instantiations (using const_typ)*)
3.99 -fun const_with_typ thy (c,typ) =
3.100 - let val tvars = Sign.const_typargs thy (c,typ) in
3.101 - (c, map const_typ_of tvars) end
3.102 - handle TYPE _ => (c, []) (*Variable (locale constant): monomorphic*)
3.103 +fun string_for_pseudoconst (s, []) = s
3.104 + | string_for_pseudoconst (s, Ts) = s ^ string_for_pseudotypes Ts
3.105 +fun string_for_super_pseudoconst (s, [[]]) = s
3.106 + | string_for_super_pseudoconst (s, Tss) =
3.107 + s ^ "{" ^ commas (map string_for_pseudotypes Tss) ^ "}"
3.108
3.109 (*Add a const/type pair to the table, but a [] entry means a standard connective,
3.110 which we ignore.*)
3.111 @@ -86,7 +109,7 @@
3.112
3.113 fun is_formula_type T = (T = HOLogic.boolT orelse T = propT)
3.114
3.115 -val fresh_prefix = "Sledgehammer.FRESH."
3.116 +val fresh_prefix = "Sledgehammer.skolem."
3.117 val flip = Option.map not
3.118 (* These are typically simplified away by "Meson.presimplify". *)
3.119 val boring_consts =
3.120 @@ -99,7 +122,7 @@
3.121 introduce a fresh constant to simulate the effect of Skolemization. *)
3.122 fun do_term t =
3.123 case t of
3.124 - Const x => add_const_to_table (const_with_typ thy x)
3.125 + Const x => add_const_to_table (pseudoconst_for thy x)
3.126 | Free (s, _) => add_const_to_table (s, [])
3.127 | t1 $ t2 => fold do_term [t1, t2]
3.128 | Abs (_, _, t') => do_term t'
3.129 @@ -166,23 +189,23 @@
3.130
3.131 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
3.132 first by constant name and second by its list of type instantiations. For the
3.133 - latter, we need a linear ordering on "const_typ list". *)
3.134 + latter, we need a linear ordering on "pseudotype list". *)
3.135
3.136 -fun const_typ_ord p =
3.137 +fun pseudotype_ord p =
3.138 case p of
3.139 - (CTVar, CTVar) => EQUAL
3.140 - | (CTVar, CType _) => LESS
3.141 - | (CType _, CTVar) => GREATER
3.142 - | (CType q1, CType q2) =>
3.143 - prod_ord fast_string_ord (dict_ord const_typ_ord) (q1, q2)
3.144 + (PVar, PVar) => EQUAL
3.145 + | (PVar, PType _) => LESS
3.146 + | (PType _, PVar) => GREATER
3.147 + | (PType q1, PType q2) =>
3.148 + prod_ord fast_string_ord (dict_ord pseudotype_ord) (q1, q2)
3.149
3.150 structure CTtab =
3.151 - Table(type key = const_typ list val ord = dict_ord const_typ_ord)
3.152 + Table(type key = pseudotype list val ord = dict_ord pseudotype_ord)
3.153
3.154 fun count_axiom_consts theory_relevant thy (_, th) =
3.155 let
3.156 fun do_const (a, T) =
3.157 - let val (c, cts) = const_with_typ thy (a, T) in
3.158 + let val (c, cts) = pseudoconst_for thy (a, T) in
3.159 (* Two-dimensional table update. Constant maps to types maps to
3.160 count. *)
3.161 CTtab.map_default (cts, 0) (Integer.add 1)
3.162 @@ -199,8 +222,8 @@
3.163 (**** Actual Filtering Code ****)
3.164
3.165 (*The frequency of a constant is the sum of those of all instances of its type.*)
3.166 -fun const_frequency const_tab (c, cts) =
3.167 - CTtab.fold (fn (cts', m) => match_types cts cts' ? Integer.add m)
3.168 +fun pseudoconst_freq match const_tab (c, cts) =
3.169 + CTtab.fold (fn (cts', m) => match (cts, cts') ? Integer.add m)
3.170 (the (Symtab.lookup const_tab c)) 0
3.171 handle Option.Option => 0
3.172
3.173 @@ -214,29 +237,22 @@
3.174 fun irrel_log (x : real) = Math.ln (x + 19.0) / 6.4
3.175
3.176 (* Computes a constant's weight, as determined by its frequency. *)
3.177 -val rel_const_weight = rel_log o real oo const_frequency
3.178 -val irrel_const_weight = irrel_log o real oo const_frequency
3.179 -(* fun irrel_const_weight _ _ = 1.0 FIXME: OLD CODE *)
3.180 +val rel_weight = rel_log o real oo pseudoconst_freq match_pseudotypes
3.181 +val irrel_weight =
3.182 + irrel_log o real oo pseudoconst_freq (match_pseudotypes o swap)
3.183 +(* fun irrel_weight _ _ = 1.0 FIXME: OLD CODE *)
3.184
3.185 fun axiom_weight const_tab relevant_consts axiom_consts =
3.186 - let
3.187 - val (rel, irrel) = List.partition (const_mem relevant_consts) axiom_consts
3.188 - val rel_weight = fold (curry Real.+ o rel_const_weight const_tab) rel 0.0
3.189 - val irrel_weight = fold (curry Real.+ o irrel_const_weight const_tab) irrel 0.0
3.190 - val res = rel_weight / (rel_weight + irrel_weight)
3.191 - in if Real.isFinite res then res else 0.0 end
3.192 -
3.193 -(* OLD CODE:
3.194 -(*Relevant constants are weighted according to frequency,
3.195 - but irrelevant constants are simply counted. Otherwise, Skolem functions,
3.196 - which are rare, would harm a formula's chances of being picked.*)
3.197 -fun axiom_weight const_tab relevant_consts axiom_consts =
3.198 - let
3.199 - val rel = filter (const_mem relevant_consts) axiom_consts
3.200 - val rel_weight = fold (curry Real.+ o rel_const_weight const_tab) rel 0.0
3.201 - val res = rel_weight / (rel_weight + real (length axiom_consts - length rel))
3.202 - in if Real.isFinite res then res else 0.0 end
3.203 -*)
3.204 + case axiom_consts |> List.partition (pseudoconst_mem I relevant_consts)
3.205 + ||> filter_out (pseudoconst_mem swap relevant_consts) of
3.206 + ([], []) => 0.0
3.207 + | (_, []) => 1.0
3.208 + | (rel, irrel) =>
3.209 + let
3.210 + val rel_weight = fold (curry Real.+ o rel_weight const_tab) rel 0.0
3.211 + val irrel_weight = fold (curry Real.+ o irrel_weight const_tab) irrel 0.0
3.212 + val res = rel_weight / (rel_weight + irrel_weight)
3.213 + in if Real.isFinite res then res else 0.0 end
3.214
3.215 fun consts_of_term thy t =
3.216 Symtab.fold (fn (x, ys) => fold (fn y => cons (x, y)) ys)
3.217 @@ -247,83 +263,82 @@
3.218 |> consts_of_term thy)
3.219
3.220 type annotated_thm =
3.221 - ((unit -> string * bool) * thm) * (string * const_typ list) list
3.222 + ((unit -> string * bool) * thm) * (string * pseudotype list) list
3.223
3.224 -(*For a reverse sort, putting the largest values first.*)
3.225 -fun compare_pairs ((_, w1), (_, w2)) = Real.compare (w2, w1)
3.226 +fun rev_compare_pairs ((_, w1), (_, w2)) = Real.compare (w2, w1)
3.227
3.228 -(* Limit the number of new facts, to prevent runaway acceptance. *)
3.229 -fun take_best max_relevant_per_iter (new_pairs : (annotated_thm * real) list) =
3.230 - let val nnew = length new_pairs in
3.231 - if nnew <= max_relevant_per_iter then
3.232 - (map #1 new_pairs, [])
3.233 - else
3.234 - let
3.235 - val new_pairs = sort compare_pairs new_pairs
3.236 - val accepted = List.take (new_pairs, max_relevant_per_iter)
3.237 - in
3.238 - trace_msg (fn () => ("Number of candidates, " ^ Int.toString nnew ^
3.239 - ", exceeds the limit of " ^ Int.toString max_relevant_per_iter));
3.240 - trace_msg (fn () => ("Effective pass mark: " ^ Real.toString (#2 (List.last accepted))));
3.241 - trace_msg (fn () => "Actually passed: " ^
3.242 - space_implode ", " (map (fst o (fn f => f ()) o fst o fst o fst) accepted));
3.243 - (map #1 accepted, List.drop (new_pairs, max_relevant_per_iter))
3.244 - end
3.245 - end;
3.246 +fun take_best max (new_pairs : (annotated_thm * real) list) =
3.247 + let
3.248 + val ((perfect, more_perfect), imperfect) =
3.249 + new_pairs |> List.partition (fn (_, w) => w > 0.99999)
3.250 + |>> chop (max - 1) ||> sort rev_compare_pairs
3.251 + val (accepted, rejected) =
3.252 + case more_perfect @ imperfect of
3.253 + [] => (perfect, [])
3.254 + | (q :: qs) => (q :: perfect, qs)
3.255 + in
3.256 + trace_msg (fn () => "Number of candidates: " ^
3.257 + string_of_int (length new_pairs));
3.258 + trace_msg (fn () => "Effective threshold: " ^
3.259 + Real.toString (#2 (hd accepted)));
3.260 + trace_msg (fn () => "Actually passed: " ^
3.261 + (accepted |> map (fn (((name, _), _), weight) =>
3.262 + fst (name ()) ^ " [" ^ Real.toString weight ^ "]")
3.263 + |> commas));
3.264 + (map #1 accepted, rejected)
3.265 + end
3.266
3.267 val threshold_divisor = 2.0
3.268 val ridiculous_threshold = 0.1
3.269
3.270 -fun relevance_filter ctxt relevance_threshold relevance_decay
3.271 - max_relevant_per_iter theory_relevant
3.272 - ({add, del, ...} : relevance_override) axioms goal_ts =
3.273 +fun relevance_filter ctxt relevance_threshold relevance_decay max_relevant
3.274 + theory_relevant ({add, del, ...} : relevance_override)
3.275 + axioms goal_ts =
3.276 let
3.277 val thy = ProofContext.theory_of ctxt
3.278 val const_tab = fold (count_axiom_consts theory_relevant thy) axioms
3.279 Symtab.empty
3.280 - val goal_const_tab = get_consts thy (SOME false) goal_ts
3.281 - val _ =
3.282 - trace_msg (fn () => "Initial constants: " ^
3.283 - commas (goal_const_tab |> Symtab.dest
3.284 - |> filter (curry (op <>) [] o snd)
3.285 - |> map fst))
3.286 val add_thms = maps (ProofContext.get_fact ctxt) add
3.287 val del_thms = maps (ProofContext.get_fact ctxt) del
3.288 - fun iter j threshold rel_const_tab =
3.289 + fun iter j max threshold rel_const_tab rest =
3.290 let
3.291 + fun game_over rejects =
3.292 + if j = 0 andalso threshold >= ridiculous_threshold then
3.293 + (* First iteration? Try again. *)
3.294 + iter 0 max (threshold / threshold_divisor) rel_const_tab rejects
3.295 + else
3.296 + (* Add "add:" facts. *)
3.297 + if null add_thms then
3.298 + []
3.299 + else
3.300 + map_filter (fn ((p as (_, th), _), _) =>
3.301 + if member Thm.eq_thm add_thms th then SOME p
3.302 + else NONE) rejects
3.303 fun relevant ([], rejects) [] =
3.304 - (* Nothing was added this iteration. *)
3.305 - if j = 0 andalso threshold >= ridiculous_threshold then
3.306 - (* First iteration? Try again. *)
3.307 - iter 0 (threshold / threshold_divisor) rel_const_tab
3.308 - (map (apsnd SOME) rejects)
3.309 - else
3.310 - (* Add "add:" facts. *)
3.311 - if null add_thms then
3.312 - []
3.313 - else
3.314 - map_filter (fn ((p as (_, th), _), _) =>
3.315 - if member Thm.eq_thm add_thms th then SOME p
3.316 - else NONE) rejects
3.317 + (* Nothing has been added this iteration. *)
3.318 + game_over (map (apsnd SOME) rejects)
3.319 | relevant (new_pairs, rejects) [] =
3.320 let
3.321 - val (new_rels, more_rejects) =
3.322 - take_best max_relevant_per_iter new_pairs
3.323 + val (new_rels, more_rejects) = take_best max new_pairs
3.324 val rel_const_tab' =
3.325 rel_const_tab |> fold add_const_to_table (maps snd new_rels)
3.326 - fun is_dirty c =
3.327 - const_mem rel_const_tab' c andalso
3.328 - not (const_mem rel_const_tab c)
3.329 + fun is_dirty (c, _) =
3.330 + Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
3.331 val rejects =
3.332 more_rejects @ rejects
3.333 |> map (fn (ax as (_, consts), old_weight) =>
3.334 (ax, if exists is_dirty consts then NONE
3.335 else SOME old_weight))
3.336 val threshold = threshold + (1.0 - threshold) * relevance_decay
3.337 + val max = max - length new_rels
3.338 in
3.339 - trace_msg (fn () => "relevant this iteration: " ^
3.340 - Int.toString (length new_rels));
3.341 - map #1 new_rels @ iter (j + 1) threshold rel_const_tab' rejects
3.342 + trace_msg (fn () => "New or updated constants: " ^
3.343 + commas (rel_const_tab' |> Symtab.dest
3.344 + |> subtract (op =) (Symtab.dest rel_const_tab)
3.345 + |> map string_for_super_pseudoconst));
3.346 + map #1 new_rels @
3.347 + (if max = 0 then game_over rejects
3.348 + else iter (j + 1) max threshold rel_const_tab' rejects)
3.349 end
3.350 | relevant (new_rels, rejects)
3.351 (((ax as ((name, th), axiom_consts)), cached_weight)
3.352 @@ -335,26 +350,29 @@
3.353 | NONE => axiom_weight const_tab rel_const_tab axiom_consts
3.354 in
3.355 if weight >= threshold then
3.356 - (trace_msg (fn () =>
3.357 - fst (name ()) ^ " passes: " ^ Real.toString weight
3.358 - ^ " consts: " ^ commas (map fst axiom_consts));
3.359 - relevant ((ax, weight) :: new_rels, rejects) rest)
3.360 + relevant ((ax, weight) :: new_rels, rejects) rest
3.361 else
3.362 relevant (new_rels, (ax, weight) :: rejects) rest
3.363 end
3.364 in
3.365 - trace_msg (fn () => "relevant_facts, current threshold: " ^
3.366 - Real.toString threshold);
3.367 - relevant ([], [])
3.368 + trace_msg (fn () =>
3.369 + "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
3.370 + Real.toString threshold ^ ", constants: " ^
3.371 + commas (rel_const_tab |> Symtab.dest
3.372 + |> filter (curry (op <>) [] o snd)
3.373 + |> map string_for_super_pseudoconst));
3.374 + relevant ([], []) rest
3.375 end
3.376 in
3.377 axioms |> filter_out (member Thm.eq_thm del_thms o snd)
3.378 |> map (rpair NONE o pair_consts_axiom theory_relevant thy)
3.379 - |> iter 0 relevance_threshold goal_const_tab
3.380 + |> iter 0 max_relevant relevance_threshold
3.381 + (get_consts thy (SOME false) goal_ts)
3.382 |> tap (fn res => trace_msg (fn () =>
3.383 "Total relevant: " ^ Int.toString (length res)))
3.384 end
3.385
3.386 +
3.387 (***************************************************************)
3.388 (* Retrieving and filtering lemmas *)
3.389 (***************************************************************)
3.390 @@ -547,14 +565,7 @@
3.391 val name2 = Name_Space.extern full_space name0
3.392 in
3.393 case find_first check_thms [name1, name2, name0] of
3.394 - SOME name =>
3.395 - let
3.396 - val name =
3.397 - name |> Symtab.defined reserved name ? quote
3.398 - in
3.399 - if multi then name ^ "(" ^ Int.toString j ^ ")"
3.400 - else name
3.401 - end
3.402 + SOME name => repair_name reserved multi j name
3.403 | NONE => ""
3.404 end, is_chained th), (multi, th)) :: rest)) ths
3.405 #> snd
3.406 @@ -567,25 +578,26 @@
3.407 (* The single-name theorems go after the multiple-name ones, so that single
3.408 names are preferred when both are available. *)
3.409 fun name_thm_pairs ctxt respect_no_atp =
3.410 - List.partition (fst o snd) #> op @
3.411 - #> map (apsnd snd)
3.412 + List.partition (fst o snd) #> op @ #> map (apsnd snd)
3.413 #> respect_no_atp ? filter_out (No_ATPs.member ctxt o snd)
3.414
3.415 (***************************************************************)
3.416 (* ATP invocation methods setup *)
3.417 (***************************************************************)
3.418
3.419 -fun relevant_facts full_types relevance_threshold relevance_decay
3.420 - max_relevant_per_iter theory_relevant
3.421 - (relevance_override as {add, del, only})
3.422 +fun relevant_facts full_types relevance_threshold relevance_decay max_relevant
3.423 + theory_relevant (relevance_override as {add, del, only})
3.424 (ctxt, (chained_ths, _)) hyp_ts concl_t =
3.425 let
3.426 + val relevance_decay =
3.427 + case relevance_decay of
3.428 + SOME x => x
3.429 + | NONE => 0.35 / Math.ln (Real.fromInt (max_relevant + 1))
3.430 val add_thms = maps (ProofContext.get_fact ctxt) add
3.431 val reserved = reserved_isar_keyword_table ()
3.432 val axioms =
3.433 (if only then
3.434 - maps ((fn (n, ths) => map (pair n o pair false) ths)
3.435 - o name_thms_pair_from_ref ctxt reserved chained_ths) add
3.436 + maps (name_thm_pairs_from_ref ctxt reserved chained_ths) add
3.437 else
3.438 all_name_thms_pairs ctxt reserved full_types add_thms chained_ths)
3.439 |> name_thm_pairs ctxt (respect_no_atp andalso not only)
3.440 @@ -598,11 +610,10 @@
3.441 else if relevance_threshold < 0.0 then
3.442 axioms
3.443 else
3.444 - relevance_filter ctxt relevance_threshold relevance_decay
3.445 - max_relevant_per_iter theory_relevant relevance_override
3.446 - axioms (concl_t :: hyp_ts))
3.447 - |> map (apfst (fn f => f ()))
3.448 - |> sort_wrt (fst o fst)
3.449 + relevance_filter ctxt relevance_threshold relevance_decay max_relevant
3.450 + theory_relevant relevance_override axioms
3.451 + (concl_t :: hyp_ts))
3.452 + |> map (apfst (fn f => f ())) |> sort_wrt (fst o fst)
3.453 end
3.454
3.455 end;
4.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact_minimize.ML Wed Aug 25 09:42:28 2010 +0200
4.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact_minimize.ML Wed Aug 25 17:49:52 2010 +0200
4.3 @@ -40,22 +40,20 @@
4.4 "")
4.5 end
4.6
4.7 -fun test_theorems ({debug, verbose, overlord, atps, full_types,
4.8 - relevance_threshold, relevance_decay, isar_proof,
4.9 +fun test_theorems ({debug, verbose, overlord, atps, full_types, isar_proof,
4.10 isar_shrink_factor, ...} : params)
4.11 (prover : prover) explicit_apply timeout subgoal state
4.12 - name_thms_pairs =
4.13 + axioms =
4.14 let
4.15 val _ =
4.16 - priority ("Testing " ^ n_theorems (map fst name_thms_pairs) ^ "...")
4.17 + priority ("Testing " ^ n_theorems (map fst axioms) ^ "...")
4.18 val params =
4.19 {debug = debug, verbose = verbose, overlord = overlord, atps = atps,
4.20 full_types = full_types, explicit_apply = explicit_apply,
4.21 - relevance_threshold = relevance_threshold,
4.22 - relevance_decay = relevance_decay, max_relevant_per_iter = NONE,
4.23 + relevance_threshold = 1.1, relevance_decay = NONE, max_relevant = NONE,
4.24 theory_relevant = NONE, isar_proof = isar_proof,
4.25 isar_shrink_factor = isar_shrink_factor, timeout = timeout}
4.26 - val axioms = maps (fn (n, ths) => map (pair n) ths) name_thms_pairs
4.27 + val axioms = maps (fn (n, ths) => map (pair n) ths) axioms
4.28 val {context = ctxt, facts, goal} = Proof.goal state
4.29 val problem =
4.30 {subgoal = subgoal, goal = (ctxt, (facts, goal)),
4.31 @@ -65,7 +63,7 @@
4.32 in
4.33 priority (case outcome of
4.34 NONE =>
4.35 - if length used_thm_names = length name_thms_pairs then
4.36 + if length used_thm_names = length axioms then
4.37 "Found proof."
4.38 else
4.39 "Found proof with " ^ n_theorems used_thm_names ^ "."
4.40 @@ -91,10 +89,9 @@
4.41 val fudge_msecs = 1000
4.42
4.43 fun minimize_theorems {atps = [], ...} _ _ _ _ = error "No ATP is set."
4.44 - | minimize_theorems
4.45 - (params as {debug, atps = atp :: _, full_types, isar_proof,
4.46 - isar_shrink_factor, timeout, ...})
4.47 - i n state name_thms_pairs =
4.48 + | minimize_theorems (params as {debug, atps = atp :: _, full_types,
4.49 + isar_proof, isar_shrink_factor, timeout, ...})
4.50 + i n state axioms =
4.51 let
4.52 val thy = Proof.theory_of state
4.53 val prover = get_prover_fun thy atp
4.54 @@ -104,13 +101,12 @@
4.55 val (_, hyp_ts, concl_t) = strip_subgoal goal i
4.56 val explicit_apply =
4.57 not (forall (Meson.is_fol_term thy)
4.58 - (concl_t :: hyp_ts @
4.59 - maps (map prop_of o snd) name_thms_pairs))
4.60 + (concl_t :: hyp_ts @ maps (map prop_of o snd) axioms))
4.61 fun do_test timeout =
4.62 test_theorems params prover explicit_apply timeout i state
4.63 val timer = Timer.startRealTimer ()
4.64 in
4.65 - (case do_test timeout name_thms_pairs of
4.66 + (case do_test timeout axioms of
4.67 result as {outcome = NONE, pool, used_thm_names,
4.68 conjecture_shape, ...} =>
4.69 let
4.70 @@ -120,7 +116,7 @@
4.71 |> Time.fromMilliseconds
4.72 val (min_thms, {proof, axiom_names, ...}) =
4.73 sublinear_minimize (do_test new_timeout)
4.74 - (filter_used_facts used_thm_names name_thms_pairs) ([], result)
4.75 + (filter_used_facts used_thm_names axioms) ([], result)
4.76 val n = length min_thms
4.77 val _ = priority (cat_lines
4.78 ["Minimized: " ^ string_of_int n ^ " theorem" ^ plural_s n] ^
4.79 @@ -152,15 +148,14 @@
4.80 val ctxt = Proof.context_of state
4.81 val reserved = reserved_isar_keyword_table ()
4.82 val chained_ths = #facts (Proof.goal state)
4.83 - val name_thms_pairs =
4.84 - map (apfst (fn f => f ())
4.85 - o name_thms_pair_from_ref ctxt reserved chained_ths) refs
4.86 + val axioms =
4.87 + maps (map (fn (name, (_, th)) => (name (), [th]))
4.88 + o name_thm_pairs_from_ref ctxt reserved chained_ths) refs
4.89 in
4.90 case subgoal_count state of
4.91 0 => priority "No subgoal!"
4.92 | n =>
4.93 - (kill_atps ();
4.94 - priority (#2 (minimize_theorems params i n state name_thms_pairs)))
4.95 + (kill_atps (); priority (#2 (minimize_theorems params i n state axioms)))
4.96 end
4.97
4.98 end;
5.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML Wed Aug 25 09:42:28 2010 +0200
5.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML Wed Aug 25 17:49:52 2010 +0200
5.3 @@ -68,8 +68,8 @@
5.4 ("overlord", "false"),
5.5 ("explicit_apply", "false"),
5.6 ("relevance_threshold", "40"),
5.7 - ("relevance_decay", "31"),
5.8 - ("max_relevant_per_iter", "smart"),
5.9 + ("relevance_decay", "smart"),
5.10 + ("max_relevant", "smart"),
5.11 ("theory_relevant", "smart"),
5.12 ("isar_proof", "false"),
5.13 ("isar_shrink_factor", "1")]
5.14 @@ -169,8 +169,9 @@
5.15 val relevance_threshold =
5.16 0.01 * Real.fromInt (lookup_int "relevance_threshold")
5.17 val relevance_decay =
5.18 - 0.01 * Real.fromInt (lookup_int "relevance_decay")
5.19 - val max_relevant_per_iter = lookup_int_option "max_relevant_per_iter"
5.20 + lookup_int_option "relevance_decay"
5.21 + |> Option.map (fn n => 0.01 * Real.fromInt n)
5.22 + val max_relevant = lookup_int_option "max_relevant"
5.23 val theory_relevant = lookup_bool_option "theory_relevant"
5.24 val isar_proof = lookup_bool "isar_proof"
5.25 val isar_shrink_factor = Int.max (1, lookup_int "isar_shrink_factor")
5.26 @@ -179,8 +180,7 @@
5.27 {debug = debug, verbose = verbose, overlord = overlord, atps = atps,
5.28 full_types = full_types, explicit_apply = explicit_apply,
5.29 relevance_threshold = relevance_threshold,
5.30 - relevance_decay = relevance_decay,
5.31 - max_relevant_per_iter = max_relevant_per_iter,
5.32 + relevance_decay = relevance_decay, max_relevant = max_relevant,
5.33 theory_relevant = theory_relevant, isar_proof = isar_proof,
5.34 isar_shrink_factor = isar_shrink_factor, timeout = timeout}
5.35 end