# HG changeset patch # User blanchet # Date 1342815585 -7200 # Node ID d4b7c7be311673ce1fcf15721c63ffc8756a3495 # Parent 2b5ad61e2ccc95f27c887ebcec9bf26a64e9f328 renamed ML files diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/IsaMakefile --- a/src/HOL/IsaMakefile Fri Jul 20 22:19:45 2012 +0200 +++ b/src/HOL/IsaMakefile Fri Jul 20 22:19:45 2012 +0200 @@ -371,8 +371,8 @@ Tools/semiring_normalizer.ML \ Tools/Sledgehammer/async_manager.ML \ Tools/Sledgehammer/sledgehammer_fact.ML \ - Tools/Sledgehammer/sledgehammer_filter_iter.ML \ - Tools/Sledgehammer/sledgehammer_filter_mash.ML \ + Tools/Sledgehammer/sledgehammer_mash.ML \ + Tools/Sledgehammer/sledgehammer_mepo.ML \ Tools/Sledgehammer/sledgehammer_minimize.ML \ Tools/Sledgehammer/sledgehammer_isar.ML \ Tools/Sledgehammer/sledgehammer_provers.ML \ diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/Sledgehammer.thy --- a/src/HOL/Sledgehammer.thy Fri Jul 20 22:19:45 2012 +0200 +++ b/src/HOL/Sledgehammer.thy Fri Jul 20 22:19:45 2012 +0200 @@ -14,8 +14,8 @@ "Tools/Sledgehammer/sledgehammer_fact.ML" "Tools/Sledgehammer/sledgehammer_provers.ML" "Tools/Sledgehammer/sledgehammer_minimize.ML" - "Tools/Sledgehammer/sledgehammer_filter_iter.ML" - "Tools/Sledgehammer/sledgehammer_filter_mash.ML" + "Tools/Sledgehammer/sledgehammer_mepo.ML" + "Tools/Sledgehammer/sledgehammer_mash.ML" "Tools/Sledgehammer/sledgehammer_run.ML" "Tools/Sledgehammer/sledgehammer_isar.ML" begin diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML Fri Jul 20 22:19:45 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,537 +0,0 @@ -(* Title: HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML - Author: Jia Meng, Cambridge University Computer Laboratory and NICTA - Author: Jasmin Blanchette, TU Muenchen - -Sledgehammer's iterative relevance filter. -*) - -signature SLEDGEHAMMER_FILTER_ITER = -sig - type stature = ATP_Problem_Generate.stature - type fact = Sledgehammer_Fact.fact - type params = Sledgehammer_Provers.params - type relevance_fudge = Sledgehammer_Provers.relevance_fudge - - val trace : bool Config.T - val pseudo_abs_name : string - val pseudo_skolem_prefix : string - val const_names_in_fact : - theory -> (string * typ -> term list -> bool * term list) -> term - -> string list - val iterative_relevant_facts : - Proof.context -> params -> string -> int -> relevance_fudge option - -> term list -> term -> fact list -> fact list -end; - -structure Sledgehammer_Filter_Iter : SLEDGEHAMMER_FILTER_ITER = -struct - -open ATP_Problem_Generate -open Sledgehammer_Fact -open Sledgehammer_Provers - -val trace = - Attrib.setup_config_bool @{binding sledgehammer_filter_iter_trace} (K false) -fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () - -val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator -val pseudo_abs_name = sledgehammer_prefix ^ "abs" -val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko" -val theory_const_suffix = Long_Name.separator ^ " 1" - -fun order_of_type (Type (@{type_name fun}, [T1, T2])) = - Int.max (order_of_type T1 + 1, order_of_type T2) - | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0 - | order_of_type _ = 0 - -(* An abstraction of Isabelle types and first-order terms *) -datatype pattern = PVar | PApp of string * pattern list -datatype ptype = PType of int * pattern list - -fun string_for_pattern PVar = "_" - | string_for_pattern (PApp (s, ps)) = - if null ps then s else s ^ string_for_patterns ps -and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")" -fun string_for_ptype (PType (_, ps)) = string_for_patterns ps - -(*Is the second type an instance of the first one?*) -fun match_pattern (PVar, _) = true - | match_pattern (PApp _, PVar) = false - | match_pattern (PApp (s, ps), PApp (t, qs)) = - s = t andalso match_patterns (ps, qs) -and match_patterns (_, []) = true - | match_patterns ([], _) = false - | match_patterns (p :: ps, q :: qs) = - match_pattern (p, q) andalso match_patterns (ps, qs) -fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs) - -(* Is there a unifiable constant? *) -fun pconst_mem f consts (s, ps) = - exists (curry (match_ptype o f) ps) - (map snd (filter (curry (op =) s o fst) consts)) -fun pconst_hyper_mem f const_tab (s, ps) = - exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s)) - -fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts) - | pattern_for_type (TFree (s, _)) = PApp (s, []) - | pattern_for_type (TVar _) = PVar - -(* Pairs a constant with the list of its type instantiations. *) -fun ptype thy const x = - (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x)) - else []) -fun rich_ptype thy const (s, T) = - PType (order_of_type T, ptype thy const (s, T)) -fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T)) - -fun string_for_hyper_pconst (s, ps) = - s ^ "{" ^ commas (map string_for_ptype ps) ^ "}" - -(* Add a pconstant to the table, but a [] entry means a standard - connective, which we ignore.*) -fun add_pconst_to_table also_skolem (s, p) = - if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I - else Symtab.map_default (s, [p]) (insert (op =) p) - -(* Set constants tend to pull in too many irrelevant facts. We limit the damage - by treating them more or less as if they were built-in but add their - axiomatization at the end. *) -val set_consts = [@{const_name Collect}, @{const_name Set.member}] -val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong} - -fun add_pconsts_in_term thy is_built_in_const also_skolems pos = - let - val flip = Option.map not - (* We include free variables, as well as constants, to handle locales. For - each quantifiers that must necessarily be skolemized by the automatic - prover, we introduce a fresh constant to simulate the effect of - Skolemization. *) - fun do_const const ext_arg (x as (s, _)) ts = - let val (built_in, ts) = is_built_in_const x ts in - if member (op =) set_consts s then - fold (do_term ext_arg) ts - else - (not built_in - ? add_pconst_to_table also_skolems (rich_pconst thy const x)) - #> fold (do_term false) ts - end - and do_term ext_arg t = - case strip_comb t of - (Const x, ts) => do_const true ext_arg x ts - | (Free x, ts) => do_const false ext_arg x ts - | (Abs (_, T, t'), ts) => - ((null ts andalso not ext_arg) - (* Since lambdas on the right-hand side of equalities are usually - extensionalized later by "abs_extensionalize_term", we don't - penalize them here. *) - ? add_pconst_to_table true (pseudo_abs_name, - PType (order_of_type T + 1, []))) - #> fold (do_term false) (t' :: ts) - | (_, ts) => fold (do_term false) ts - fun do_quantifier will_surely_be_skolemized abs_T body_t = - do_formula pos body_t - #> (if also_skolems andalso will_surely_be_skolemized then - add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (), - PType (order_of_type abs_T, [])) - else - I) - and do_term_or_formula ext_arg T = - if T = HOLogic.boolT then do_formula NONE else do_term ext_arg - and do_formula pos t = - case t of - Const (@{const_name all}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T t' - | @{const "==>"} $ t1 $ t2 => - do_formula (flip pos) t1 #> do_formula pos t2 - | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 => - do_term_or_formula false T t1 #> do_term_or_formula true T t2 - | @{const Trueprop} $ t1 => do_formula pos t1 - | @{const False} => I - | @{const True} => I - | @{const Not} $ t1 => do_formula (flip pos) t1 - | Const (@{const_name All}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T t' - | Const (@{const_name Ex}, _) $ Abs (_, T, t') => - do_quantifier (pos = SOME true) T t' - | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] - | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] - | @{const HOL.implies} $ t1 $ t2 => - do_formula (flip pos) t1 #> do_formula pos t2 - | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 => - do_term_or_formula false T t1 #> do_term_or_formula true T t2 - | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])])) - $ t1 $ t2 $ t3 => - do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3] - | Const (@{const_name Ex1}, _) $ Abs (_, T, t') => - do_quantifier (is_some pos) T t' - | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') => - do_quantifier (pos = SOME false) T - (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t')) - | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') => - do_quantifier (pos = SOME true) T - (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t')) - | (t0 as Const (_, @{typ bool})) $ t1 => - do_term false t0 #> do_formula pos t1 (* theory constant *) - | _ => do_term false t - in do_formula pos end - -fun pconsts_in_fact thy is_built_in_const t = - Symtab.fold (fn (s, pss) => fold (cons o pair s) pss) - (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true - (SOME true) t) [] - -val const_names_in_fact = map fst ooo pconsts_in_fact - -(* Inserts a dummy "constant" referring to the theory name, so that relevance - takes the given theory into account. *) -fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...} - : relevance_fudge) thy_name t = - if exists (curry (op <) 0.0) [theory_const_rel_weight, - theory_const_irrel_weight] then - Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t - else - t - -fun theory_const_prop_of fudge th = - theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th) - -fun pair_consts_fact thy is_built_in_const fudge fact = - case fact |> snd |> theory_const_prop_of fudge - |> pconsts_in_fact thy is_built_in_const of - [] => NONE - | consts => SOME ((fact, consts), NONE) - -(* A two-dimensional symbol table counts frequencies of constants. It's keyed - first by constant name and second by its list of type instantiations. For the - latter, we need a linear ordering on "pattern list". *) - -fun pattern_ord p = - case p of - (PVar, PVar) => EQUAL - | (PVar, PApp _) => LESS - | (PApp _, PVar) => GREATER - | (PApp q1, PApp q2) => - prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2) -fun ptype_ord (PType p, PType q) = - prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q) - -structure PType_Tab = Table(type key = ptype val ord = ptype_ord) - -fun count_fact_consts thy fudge = - let - fun do_const const (s, T) ts = - (* Two-dimensional table update. Constant maps to types maps to count. *) - PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1) - |> Symtab.map_default (s, PType_Tab.empty) - #> fold do_term ts - and do_term t = - case strip_comb t of - (Const x, ts) => do_const true x ts - | (Free x, ts) => do_const false x ts - | (Abs (_, _, t'), ts) => fold do_term (t' :: ts) - | (_, ts) => fold do_term ts - in do_term o theory_const_prop_of fudge o snd end - -fun pow_int _ 0 = 1.0 - | pow_int x 1 = x - | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x - -(*The frequency of a constant is the sum of those of all instances of its type.*) -fun pconst_freq match const_tab (c, ps) = - PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m) - (the (Symtab.lookup const_tab c)) 0 - - -(* A surprising number of theorems contain only a few significant constants. - These include all induction rules, and other general theorems. *) - -(* "log" seems best in practice. A constant function of one ignores the constant - frequencies. Rare constants give more points if they are relevant than less - rare ones. *) -fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0) - -(* Irrelevant constants are treated differently. We associate lower penalties to - very rare constants and very common ones -- the former because they can't - lead to the inclusion of too many new facts, and the latter because they are - so common as to be of little interest. *) -fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...} - : relevance_fudge) order freq = - let val (k, x) = worse_irrel_freq |> `Real.ceil in - (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x - else rel_weight_for order freq / rel_weight_for order k) - * pow_int higher_order_irrel_weight (order - 1) - end - -fun multiplier_for_const_name local_const_multiplier s = - if String.isSubstring "." s then 1.0 else local_const_multiplier - -(* Computes a constant's weight, as determined by its frequency. *) -fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight - theory_const_weight chained_const_weight weight_for f - const_tab chained_const_tab (c as (s, PType (m, _))) = - if s = pseudo_abs_name then - abs_weight - else if String.isPrefix pseudo_skolem_prefix s then - skolem_weight - else if String.isSuffix theory_const_suffix s then - theory_const_weight - else - multiplier_for_const_name local_const_multiplier s - * weight_for m (pconst_freq (match_ptype o f) const_tab c) - |> (if chained_const_weight < 1.0 andalso - pconst_hyper_mem I chained_const_tab c then - curry (op *) chained_const_weight - else - I) - -fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight, - theory_const_rel_weight, ...} : relevance_fudge) - const_tab = - generic_pconst_weight local_const_multiplier abs_rel_weight 0.0 - theory_const_rel_weight 0.0 rel_weight_for I const_tab - Symtab.empty - -fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight, - skolem_irrel_weight, - theory_const_irrel_weight, - chained_const_irrel_weight, ...}) - const_tab chained_const_tab = - generic_pconst_weight local_const_multiplier abs_irrel_weight - skolem_irrel_weight theory_const_irrel_weight - chained_const_irrel_weight (irrel_weight_for fudge) swap - const_tab chained_const_tab - -fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) = - intro_bonus - | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus - | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus - | stature_bonus {local_bonus, ...} (Local, _) = local_bonus - | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus - | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus - | stature_bonus _ _ = 0.0 - -fun is_odd_const_name s = - s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse - String.isSuffix theory_const_suffix s - -fun fact_weight fudge stature const_tab relevant_consts chained_consts - fact_consts = - case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts) - ||> filter_out (pconst_hyper_mem swap relevant_consts) of - ([], _) => 0.0 - | (rel, irrel) => - if forall (forall (is_odd_const_name o fst)) [rel, irrel] then - 0.0 - else - let - val irrel = irrel |> filter_out (pconst_mem swap rel) - val rel_weight = - 0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel - val irrel_weight = - ~ (stature_bonus fudge stature) - |> fold (curry (op +) - o irrel_pconst_weight fudge const_tab chained_consts) irrel - val res = rel_weight / (rel_weight + irrel_weight) - in if Real.isFinite res then res else 0.0 end - -fun take_most_relevant ctxt max_facts remaining_max - ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) - (candidates : ((fact * (string * ptype) list) * real) list) = - let - val max_imperfect = - Real.ceil (Math.pow (max_imperfect, - Math.pow (Real.fromInt remaining_max - / Real.fromInt max_facts, max_imperfect_exp))) - val (perfect, imperfect) = - candidates |> sort (Real.compare o swap o pairself snd) - |> take_prefix (fn (_, w) => w > 0.99999) - val ((accepts, more_rejects), rejects) = - chop max_imperfect imperfect |>> append perfect |>> chop remaining_max - in - trace_msg ctxt (fn () => - "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^ - string_of_int (length candidates) ^ "): " ^ - (accepts |> map (fn ((((name, _), _), _), weight) => - name () ^ " [" ^ Real.toString weight ^ "]") - |> commas)); - (accepts, more_rejects @ rejects) - end - -fun if_empty_replace_with_scope thy is_built_in_const facts sc tab = - if Symtab.is_empty tab then - Symtab.empty - |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false)) - (map_filter (fn ((_, (sc', _)), th) => - if sc' = sc then SOME (prop_of th) else NONE) facts) - else - tab - -fun consider_arities is_built_in_const th = - let - fun aux _ _ NONE = NONE - | aux t args (SOME tab) = - case t of - t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 [] - | Const (x as (s, _)) => - (if is_built_in_const x args |> fst then - SOME tab - else case Symtab.lookup tab s of - NONE => SOME (Symtab.update (s, length args) tab) - | SOME n => if n = length args then SOME tab else NONE) - | _ => SOME tab - in aux (prop_of th) [] end - -(* FIXME: This is currently only useful for polymorphic type encodings. *) -fun could_benefit_from_ext is_built_in_const facts = - fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty) - |> is_none - -(* High enough so that it isn't wrongly considered as very relevant (e.g., for E - weights), but low enough so that it is unlikely to be truncated away if few - facts are included. *) -val special_fact_index = 75 - -fun relevance_filter ctxt thres0 decay max_facts is_built_in_const - (fudge as {threshold_divisor, ridiculous_threshold, ...}) facts hyp_ts - concl_t = - let - val thy = Proof_Context.theory_of ctxt - val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty - val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME - val chained_ts = - facts |> map_filter (fn ((_, (Chained, _)), th) => SOME (prop_of th) - | _ => NONE) - val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts - val goal_const_tab = - Symtab.empty |> fold (add_pconsts true) hyp_ts - |> add_pconsts false concl_t - |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab) - |> fold (if_empty_replace_with_scope thy is_built_in_const facts) - [Chained, Assum, Local] - fun iter j remaining_max thres rel_const_tab hopeless hopeful = - let - fun relevant [] _ [] = - (* Nothing has been added this iteration. *) - if j = 0 andalso thres >= ridiculous_threshold then - (* First iteration? Try again. *) - iter 0 max_facts (thres / threshold_divisor) rel_const_tab - hopeless hopeful - else - [] - | relevant candidates rejects [] = - let - val (accepts, more_rejects) = - take_most_relevant ctxt max_facts remaining_max fudge candidates - val rel_const_tab' = - rel_const_tab - |> fold (add_pconst_to_table false) (maps (snd o fst) accepts) - fun is_dirty (c, _) = - Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c - val (hopeful_rejects, hopeless_rejects) = - (rejects @ hopeless, ([], [])) - |-> fold (fn (ax as (_, consts), old_weight) => - if exists is_dirty consts then - apfst (cons (ax, NONE)) - else - apsnd (cons (ax, old_weight))) - |>> append (more_rejects - |> map (fn (ax as (_, consts), old_weight) => - (ax, if exists is_dirty consts then NONE - else SOME old_weight))) - val thres = - 1.0 - (1.0 - thres) - * Math.pow (decay, Real.fromInt (length accepts)) - val remaining_max = remaining_max - length accepts - in - trace_msg ctxt (fn () => "New or updated constants: " ^ - commas (rel_const_tab' |> Symtab.dest - |> subtract (op =) (rel_const_tab |> Symtab.dest) - |> map string_for_hyper_pconst)); - map (fst o fst) accepts @ - (if remaining_max = 0 then - [] - else - iter (j + 1) remaining_max thres rel_const_tab' - hopeless_rejects hopeful_rejects) - end - | relevant candidates rejects - (((ax as (((_, stature), _), fact_consts)), cached_weight) - :: hopeful) = - let - val weight = - case cached_weight of - SOME w => w - | NONE => fact_weight fudge stature const_tab rel_const_tab - chained_const_tab fact_consts - in - if weight >= thres then - relevant ((ax, weight) :: candidates) rejects hopeful - else - relevant candidates ((ax, weight) :: rejects) hopeful - end - in - trace_msg ctxt (fn () => - "ITERATION " ^ string_of_int j ^ ": current threshold: " ^ - Real.toString thres ^ ", constants: " ^ - commas (rel_const_tab |> Symtab.dest - |> filter (curry (op <>) [] o snd) - |> map string_for_hyper_pconst)); - relevant [] [] hopeful - end - fun uses_const s t = - fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t - false - fun uses_const_anywhere accepts s = - exists (uses_const s o prop_of o snd) accepts orelse - exists (uses_const s) (concl_t :: hyp_ts) - fun add_set_const_thms accepts = - exists (uses_const_anywhere accepts) set_consts ? append set_thms - fun insert_into_facts accepts [] = accepts - | insert_into_facts accepts ths = - let - val add = facts |> filter (member Thm.eq_thm_prop ths o snd) - val (bef, after) = - accepts |> filter_out (member Thm.eq_thm_prop ths o snd) - |> take (max_facts - length add) - |> chop special_fact_index - in bef @ add @ after end - fun insert_special_facts accepts = - (* FIXME: get rid of "ext" here once it is treated as a helper *) - [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext} - |> add_set_const_thms accepts - |> insert_into_facts accepts - in - facts |> map_filter (pair_consts_fact thy is_built_in_const fudge) - |> iter 0 max_facts thres0 goal_const_tab [] - |> insert_special_facts - |> tap (fn accepts => trace_msg ctxt (fn () => - "Total relevant: " ^ string_of_int (length accepts))) - end - -fun iterative_relevant_facts ctxt - ({fact_thresholds = (thres0, thres1), ...} : params) prover - max_facts fudge hyp_ts concl_t facts = - let - val thy = Proof_Context.theory_of ctxt - val is_built_in_const = - Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover - val fudge = - case fudge of - SOME fudge => fudge - | NONE => Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover - val decay = Math.pow ((1.0 - thres1) / (1.0 - thres0), - 1.0 / Real.fromInt (max_facts + 1)) - in - trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^ - " facts"); - (if thres1 < 0.0 then - facts - else if thres0 > 1.0 orelse thres0 > thres1 then - [] - else - relevance_filter ctxt thres0 decay max_facts is_built_in_const fudge - facts hyp_ts - (concl_t |> theory_constify fudge (Context.theory_name thy))) - end - -end; diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML Fri Jul 20 22:19:45 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,691 +0,0 @@ -(* Title: HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML - Author: Jasmin Blanchette, TU Muenchen - -Sledgehammer's machine-learning-based relevance filter (MaSh). -*) - -signature SLEDGEHAMMER_FILTER_MASH = -sig - type status = ATP_Problem_Generate.status - type stature = ATP_Problem_Generate.stature - type fact = Sledgehammer_Fact.fact - type fact_override = Sledgehammer_Fact.fact_override - type params = Sledgehammer_Provers.params - type relevance_fudge = Sledgehammer_Provers.relevance_fudge - type prover_result = Sledgehammer_Provers.prover_result - - val trace : bool Config.T - val MaShN : string - val mepoN : string - val mashN : string - val meshN : string - val fact_filters : string list - val escape_meta : string -> string - val escape_metas : string list -> string - val unescape_meta : string -> string - val unescape_metas : string -> string list - val extract_query : string -> string * string list - val nickname_of : thm -> string - val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list - val mesh_facts : - int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list - val is_likely_tautology_or_too_meta : thm -> bool - val theory_ord : theory * theory -> order - val thm_ord : thm * thm -> order - val features_of : - Proof.context -> string -> theory -> status -> term list -> string list - val isabelle_dependencies_of : unit Symtab.table -> thm -> string list - val goal_of_thm : theory -> thm -> thm - val run_prover_for_mash : - Proof.context -> params -> string -> fact list -> thm -> prover_result - val mash_CLEAR : Proof.context -> unit - val mash_INIT : - Proof.context -> bool - -> (string * string list * string list * string list) list -> unit - val mash_ADD : - Proof.context -> bool - -> (string * string list * string list * string list) list -> unit - val mash_QUERY : - Proof.context -> bool -> int -> string list * string list -> string list - val mash_unlearn : Proof.context -> unit - val mash_could_suggest_facts : unit -> bool - val mash_can_suggest_facts : Proof.context -> bool - val mash_suggest_facts : - Proof.context -> params -> string -> int -> term list -> term - -> ('a * thm) list -> ('a * thm) list * ('a * thm) list - val mash_learn_thy : - Proof.context -> params -> theory -> Time.time -> fact list -> string - val mash_learn_proof : - Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit - val relevant_facts : - Proof.context -> params -> string -> int -> fact_override -> term list - -> term -> fact list -> fact list - val kill_learners : unit -> unit - val running_learners : unit -> unit -end; - -structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH = -struct - -open ATP_Util -open ATP_Problem_Generate -open Sledgehammer_Util -open Sledgehammer_Fact -open Sledgehammer_Filter_Iter -open Sledgehammer_Provers -open Sledgehammer_Minimize - -val trace = - Attrib.setup_config_bool @{binding sledgehammer_filter_mash_trace} (K false) -fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () - -val MaShN = "MaSh" - -val mepoN = "mepo" -val mashN = "mash" -val meshN = "mesh" - -val fact_filters = [meshN, mepoN, mashN] - -fun mash_home () = getenv "MASH_HOME" -fun mash_state_dir () = - getenv "ISABELLE_HOME_USER" ^ "/mash" - |> tap (Isabelle_System.mkdir o Path.explode) -fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode - - -(*** Isabelle helpers ***) - -fun meta_char c = - if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse - c = #")" orelse c = #"," then - String.str c - else - (* fixed width, in case more digits follow *) - "\\" ^ stringN_of_int 3 (Char.ord c) - -fun unmeta_chars accum [] = String.implode (rev accum) - | unmeta_chars accum (#"\\" :: d1 :: d2 :: d3 :: cs) = - (case Int.fromString (String.implode [d1, d2, d3]) of - SOME n => unmeta_chars (Char.chr n :: accum) cs - | NONE => "" (* error *)) - | unmeta_chars _ (#"\\" :: _) = "" (* error *) - | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs - -val escape_meta = String.translate meta_char -val escape_metas = map escape_meta #> space_implode " " -val unescape_meta = String.explode #> unmeta_chars [] -val unescape_metas = - space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta - -fun extract_query line = - case space_explode ":" line of - [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs) - | _ => ("", []) - -fun parent_of_local_thm th = - let - val thy = th |> Thm.theory_of_thm - val facts = thy |> Global_Theory.facts_of - val space = facts |> Facts.space_of - fun id_of s = #id (Name_Space.the_entry space s) - fun max_id (s', _) (s, id) = - let val id' = id_of s' in if id > id' then (s, id) else (s', id') end - in ("", ~1) |> Facts.fold_static max_id facts |> fst end - -val local_prefix = "local" ^ Long_Name.separator - -fun nickname_of th = - let val hint = Thm.get_name_hint th in - (* FIXME: There must be a better way to detect local facts. *) - case try (unprefix local_prefix) hint of - SOME suff => - parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suff - | NONE => hint - end - -fun suggested_facts suggs facts = - let - fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact) - val tab = Symtab.empty |> fold add_fact facts - in map_filter (Symtab.lookup tab) suggs end - -(* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *) -fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0 - -fun sum_sq_avg [] = 0 - | sum_sq_avg xs = - Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs - -fun mesh_facts max_facts [(selected, unknown)] = - take max_facts selected @ take (max_facts - length selected) unknown - | mesh_facts max_facts mess = - let - val mess = mess |> map (apfst (`length)) - val fact_eq = Thm.eq_thm o pairself snd - fun score_in fact ((sel_len, sels), unks) = - case find_index (curry fact_eq fact) sels of - ~1 => (case find_index (curry fact_eq fact) unks of - ~1 => SOME sel_len - | _ => NONE) - | j => SOME j - fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg - val facts = fold (union fact_eq o take max_facts o snd o fst) mess [] - in - facts |> map (`score_of) |> sort (int_ord o swap o pairself fst) - |> map snd |> take max_facts - end - -val thy_feature_prefix = "y_" - -val thy_feature_name_of = prefix thy_feature_prefix -val const_name_of = prefix const_prefix -val type_name_of = prefix type_const_prefix -val class_name_of = prefix class_prefix - -fun is_likely_tautology_or_too_meta th = - let - val is_boring_const = member (op =) atp_widely_irrelevant_consts - fun is_boring_bool t = - not (exists_Const (not o is_boring_const o fst) t) orelse - exists_type (exists_subtype (curry (op =) @{typ prop})) t - fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t - | is_boring_prop (@{const "==>"} $ t $ u) = - is_boring_prop t andalso is_boring_prop u - | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) = - is_boring_prop t andalso is_boring_prop u - | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) = - is_boring_bool t andalso is_boring_bool u - | is_boring_prop _ = true - in - is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th)) - end - -fun theory_ord p = - if Theory.eq_thy p then - EQUAL - else if Theory.subthy p then - LESS - else if Theory.subthy (swap p) then - GREATER - else case int_ord (pairself (length o Theory.ancestors_of) p) of - EQUAL => string_ord (pairself Context.theory_name p) - | order => order - -val thm_ord = theory_ord o pairself theory_of_thm - -val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}] - -fun interesting_terms_types_and_classes ctxt prover term_max_depth - type_max_depth ts = - let - fun is_bad_const (x as (s, _)) args = - member (op =) atp_logical_consts s orelse - fst (is_built_in_const_for_prover ctxt prover x args) - fun add_classes @{sort type} = I - | add_classes S = union (op =) (map class_name_of S) - fun do_add_type (Type (s, Ts)) = - (not (member (op =) bad_types s) ? insert (op =) (type_name_of s)) - #> fold do_add_type Ts - | do_add_type (TFree (_, S)) = add_classes S - | do_add_type (TVar (_, S)) = add_classes S - fun add_type T = type_max_depth >= 0 ? do_add_type T - fun mk_app s args = - if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")" - else s - fun patternify ~1 _ = "" - | patternify depth t = - case strip_comb t of - (Const (s, _), args) => - mk_app (const_name_of s) (map (patternify (depth - 1)) args) - | _ => "" - fun add_term_patterns ~1 _ = I - | add_term_patterns depth t = - insert (op =) (patternify depth t) - #> add_term_patterns (depth - 1) t - val add_term = add_term_patterns term_max_depth - fun add_patterns t = - let val (head, args) = strip_comb t in - (case head of - Const (x as (_, T)) => - not (is_bad_const x args) ? (add_term t #> add_type T) - | Free (_, T) => add_type T - | Var (_, T) => add_type T - | Abs (_, T, body) => add_type T #> add_patterns body - | _ => I) - #> fold add_patterns args - end - in [] |> fold add_patterns ts end - -fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1}) - -val term_max_depth = 1 -val type_max_depth = 1 - -(* TODO: Generate type classes for types? *) -fun features_of ctxt prover thy status ts = - thy_feature_name_of (Context.theory_name thy) :: - interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth - ts - |> forall is_lambda_free ts ? cons "no_lams" - |> forall (not o exists_Const is_exists) ts ? cons "no_skos" - |> (case status of - General => I - | Induction => cons "induction" - | Intro => cons "intro" - | Inductive => cons "inductive" - | Elim => cons "elim" - | Simp => cons "simp" - | Def => cons "def") - -fun isabelle_dependencies_of all_facts = thms_in_proof (SOME all_facts) - -val freezeT = Type.legacy_freeze_type - -fun freeze (t $ u) = freeze t $ freeze u - | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t) - | freeze (Var ((s, _), T)) = Free (s, freezeT T) - | freeze (Const (s, T)) = Const (s, freezeT T) - | freeze (Free (s, T)) = Free (s, freezeT T) - | freeze t = t - -fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init - -fun run_prover_for_mash ctxt params prover facts goal = - let - val problem = - {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1, - facts = facts |> map (apfst (apfst (fn name => name ()))) - |> map Untranslated_Fact} - val prover = get_minimizing_prover ctxt Normal (K ()) prover - in prover params (K (K (K ""))) problem end - - -(*** Low-level communication with MaSh ***) - -fun write_file (xs, f) file = - let val path = Path.explode file in - File.write path ""; - xs |> chunk_list 500 - |> List.app (File.append path o space_implode "" o map f) - end - -fun mash_info overlord = - if overlord then (getenv "ISABELLE_HOME_USER", "") - else (getenv "ISABELLE_TMP", serial_string ()) - -fun run_mash ctxt overlord (temp_dir, serial) core = - let - val log_file = temp_dir ^ "/mash_log" ^ serial - val err_file = temp_dir ^ "/mash_err" ^ serial - val command = - mash_home () ^ "/mash.py --quiet --outputDir " ^ mash_state_dir () ^ - " --log " ^ log_file ^ " " ^ core ^ " 2>&1 > " ^ err_file - in - trace_msg ctxt (fn () => "Running " ^ command); - write_file ([], K "") log_file; - write_file ([], K "") err_file; - Isabelle_System.bash command; - if overlord then () - else (map (File.rm o Path.explode) [log_file, err_file]; ()); - trace_msg ctxt (K "Done") - end - -(* TODO: Eliminate code once "mash.py" handles sequences of ADD commands as fast - as a single INIT. *) -fun run_mash_init ctxt overlord write_access write_feats write_deps = - let - val info as (temp_dir, serial) = mash_info overlord - val in_dir = temp_dir ^ "/mash_init" ^ serial - val in_dir_path = in_dir |> Path.explode |> tap Isabelle_System.mkdir - in - write_file write_access (in_dir ^ "/mash_accessibility"); - write_file write_feats (in_dir ^ "/mash_features"); - write_file write_deps (in_dir ^ "/mash_dependencies"); - run_mash ctxt overlord info ("--init --inputDir " ^ in_dir); - (* FIXME: temporary hack *) - if overlord then () - else (Isabelle_System.bash ("rm -r -f " ^ File.shell_path in_dir_path); ()) - end - -fun run_mash_commands ctxt overlord save max_suggs write_cmds read_suggs = - let - val info as (temp_dir, serial) = mash_info overlord - val sugg_file = temp_dir ^ "/mash_suggs" ^ serial - val cmd_file = temp_dir ^ "/mash_commands" ^ serial - in - write_file ([], K "") sugg_file; - write_file write_cmds cmd_file; - run_mash ctxt overlord info - ("--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^ - " --numberOfPredictions " ^ string_of_int max_suggs ^ - (if save then " --saveModel" else "")); - read_suggs (fn () => File.read_lines (Path.explode sugg_file)) - |> tap (fn _ => - if overlord then () - else (map (File.rm o Path.explode) [sugg_file, cmd_file]; ())) - end - -fun str_of_update (name, parents, feats, deps) = - "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^ - escape_metas feats ^ "; " ^ escape_metas deps ^ "\n" - -fun str_of_query (parents, feats) = - "? " ^ escape_metas parents ^ "; " ^ escape_metas feats - -fun init_str_of_update get (upd as (name, _, _, _)) = - escape_meta name ^ ": " ^ escape_metas (get upd) ^ "\n" - -fun mash_CLEAR ctxt = - let val path = mash_state_dir () |> Path.explode in - trace_msg ctxt (K "MaSh CLEAR"); - File.fold_dir (fn file => fn () => - File.rm (Path.append path (Path.basic file))) - path () - end - -fun mash_INIT ctxt _ [] = mash_CLEAR ctxt - | mash_INIT ctxt overlord upds = - (trace_msg ctxt (fn () => "MaSh INIT " ^ - elide_string 1000 (space_implode " " (map #1 upds))); - run_mash_init ctxt overlord (upds, init_str_of_update #2) - (upds, init_str_of_update #3) (upds, init_str_of_update #4)) - -fun mash_ADD _ _ [] = () - | mash_ADD ctxt overlord upds = - (trace_msg ctxt (fn () => "MaSh ADD " ^ - elide_string 1000 (space_implode " " (map #1 upds))); - run_mash_commands ctxt overlord true 0 (upds, str_of_update) (K ())) - -fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) = - (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats); - run_mash_commands ctxt overlord false max_suggs - ([query], str_of_query) - (fn suggs => snd (extract_query (List.last (suggs ())))) - handle List.Empty => []) - - -(*** High-level communication with MaSh ***) - -fun try_graph ctxt when def f = - f () - handle Graph.CYCLES (cycle :: _) => - (trace_msg ctxt (fn () => - "Cycle involving " ^ commas cycle ^ " when " ^ when); def) - | Graph.UNDEF name => - (trace_msg ctxt (fn () => - "Unknown fact " ^ quote name ^ " when " ^ when); def) - -type mash_state = - {thys : bool Symtab.table, - fact_graph : unit Graph.T} - -val empty_state = {thys = Symtab.empty, fact_graph = Graph.empty} - -local - -fun mash_load _ (state as (true, _)) = state - | mash_load ctxt _ = - let val path = mash_state_path () in - (true, - case try File.read_lines path of - SOME (comp_thys :: incomp_thys :: fact_lines) => - let - fun add_thy comp thy = Symtab.update (thy, comp) - fun add_edge_to name parent = - Graph.default_node (parent, ()) - #> Graph.add_edge (parent, name) - fun add_fact_line line = - case extract_query line of - ("", _) => I (* shouldn't happen *) - | (name, parents) => - Graph.default_node (name, ()) - #> fold (add_edge_to name) parents - val thys = - Symtab.empty |> fold (add_thy true) (unescape_metas comp_thys) - |> fold (add_thy false) (unescape_metas incomp_thys) - val fact_graph = - try_graph ctxt "loading state" Graph.empty (fn () => - Graph.empty |> fold add_fact_line fact_lines) - in {thys = thys, fact_graph = fact_graph} end - | _ => empty_state) - end - -fun mash_save ({thys, fact_graph, ...} : mash_state) = - let - val path = mash_state_path () - val thys = Symtab.dest thys - val line_for_thys = escape_metas o AList.find (op =) thys - fun fact_line_for name parents = - escape_meta name ^ ": " ^ escape_metas parents - val append_fact = File.append path o suffix "\n" oo fact_line_for - in - File.write path (line_for_thys true ^ "\n" ^ line_for_thys false ^ "\n"); - Graph.fold (fn (name, ((), (parents, _))) => fn () => - append_fact name (Graph.Keys.dest parents)) - fact_graph () - end - -val global_state = - Synchronized.var "Sledgehammer_Filter_MaSh.global_state" (false, empty_state) - -in - -fun mash_map ctxt f = - Synchronized.change global_state (mash_load ctxt ##> (f #> tap mash_save)) - -fun mash_get ctxt = - Synchronized.change_result global_state (mash_load ctxt #> `snd) - -fun mash_unlearn ctxt = - Synchronized.change global_state (fn _ => - (mash_CLEAR ctxt; File.write (mash_state_path ()) ""; - (true, empty_state))) - -end - -fun mash_could_suggest_facts () = mash_home () <> "" -fun mash_can_suggest_facts ctxt = - not (Graph.is_empty (#fact_graph (mash_get ctxt))) - -fun parents_wrt_facts facts fact_graph = - let - val facts = [] |> fold (cons o nickname_of o snd) facts - val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts - fun insert_not_seen seen name = - not (member (op =) seen name) ? insert (op =) name - fun parents_of _ parents [] = parents - | parents_of seen parents (name :: names) = - if Symtab.defined tab name then - parents_of (name :: seen) (name :: parents) names - else - parents_of (name :: seen) parents - (Graph.Keys.fold (insert_not_seen seen) - (Graph.imm_preds fact_graph name) names) - in parents_of [] [] (Graph.maximals fact_graph) end - -(* Generate more suggestions than requested, because some might be thrown out - later for various reasons and "meshing" gives better results with some - slack. *) -fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts) - -fun is_fact_in_graph fact_graph (_, th) = - can (Graph.get_node fact_graph) (nickname_of th) - -fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts - concl_t facts = - let - val thy = Proof_Context.theory_of ctxt - val fact_graph = #fact_graph (mash_get ctxt) - val parents = parents_wrt_facts facts fact_graph - val feats = features_of ctxt prover thy General (concl_t :: hyp_ts) - val suggs = - if Graph.is_empty fact_graph then [] - else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats) - val selected = facts |> suggested_facts suggs - val unknown = facts |> filter_out (is_fact_in_graph fact_graph) - in (selected, unknown) end - -fun add_thys_for thy = - let fun add comp thy = Symtab.update (Context.theory_name thy, comp) in - add false thy #> fold (add true) (Theory.ancestors_of thy) - end - -fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) = - let - fun maybe_add_from from (accum as (parents, graph)) = - try_graph ctxt "updating graph" accum (fn () => - (from :: parents, Graph.add_edge_acyclic (from, name) graph)) - val graph = graph |> Graph.default_node (name, ()) - val (parents, graph) = ([], graph) |> fold maybe_add_from parents - val (deps, graph) = ([], graph) |> fold maybe_add_from deps - in ((name, parents, feats, deps) :: upds, graph) end - -val pass1_learn_timeout_factor = 0.5 - -(* Too many dependencies is a sign that a decision procedure is at work. There - isn't much too learn from such proofs. *) -val max_dependencies = 10 - -(* The timeout is understood in a very slack fashion. *) -fun mash_learn_thy ctxt ({provers, verbose, overlord, ...} : params) thy timeout - facts = - let - val timer = Timer.startRealTimer () - val prover = hd provers - fun timed_out frac = - Time.> (Timer.checkRealTimer timer, time_mult frac timeout) - val {fact_graph, ...} = mash_get ctxt - val new_facts = - facts |> filter_out (is_fact_in_graph fact_graph) - |> sort (thm_ord o pairself snd) - in - if null new_facts then - "" - else - let - val ths = facts |> map snd - val all_names = - ths |> filter_out is_likely_tautology_or_too_meta - |> map (rpair () o nickname_of) - |> Symtab.make - fun trim_deps deps = if length deps > max_dependencies then [] else deps - fun do_fact _ (accum as (_, true)) = accum - | do_fact ((_, (_, status)), th) ((parents, upds), false) = - let - val name = nickname_of th - val feats = - features_of ctxt prover (theory_of_thm th) status [prop_of th] - val deps = isabelle_dependencies_of all_names th |> trim_deps - val upd = (name, parents, feats, deps) - in (([name], upd :: upds), timed_out pass1_learn_timeout_factor) end - val parents = parents_wrt_facts facts fact_graph - val ((_, upds), _) = - ((parents, []), false) |> fold do_fact new_facts |>> apsnd rev - val n = length upds - fun trans {thys, fact_graph} = - let - val mash_INIT_or_ADD = - if Graph.is_empty fact_graph then mash_INIT else mash_ADD - val (upds, fact_graph) = - ([], fact_graph) |> fold (update_fact_graph ctxt) upds - in - (mash_INIT_or_ADD ctxt overlord (rev upds); - {thys = thys |> add_thys_for thy, - fact_graph = fact_graph}) - end - in - mash_map ctxt trans; - if verbose then - "Processed " ^ string_of_int n ^ " proof" ^ plural_s n ^ - (if verbose then - " in " ^ string_from_time (Timer.checkRealTimer timer) - else - "") ^ "." - else - "" - end - end - -fun mash_learn_proof ctxt ({provers, overlord, ...} : params) t facts used_ths = - let - val thy = Proof_Context.theory_of ctxt - val prover = hd provers - val name = ATP_Util.timestamp () ^ " " ^ serial_string () (* fresh enough *) - val feats = features_of ctxt prover thy General [t] - val deps = used_ths |> map nickname_of - in - mash_map ctxt (fn {thys, fact_graph} => - let - val parents = parents_wrt_facts facts fact_graph - val upds = [(name, parents, feats, deps)] - val (upds, fact_graph) = - ([], fact_graph) |> fold (update_fact_graph ctxt) upds - in - mash_ADD ctxt overlord upds; - {thys = thys, fact_graph = fact_graph} - end) - end - -(* The threshold should be large enough so that MaSh doesn't kick in for Auto - Sledgehammer and Try. *) -val min_secs_for_learning = 15 -val learn_timeout_factor = 2.0 - -fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover - max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts = - if not (subset (op =) (the_list fact_filter, fact_filters)) then - error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".") - else if only then - facts - else if max_facts <= 0 orelse null facts then - [] - else - let - val thy = Proof_Context.theory_of ctxt - fun maybe_learn () = - if not learn orelse Async_Manager.has_running_threads MaShN then - () - else if Time.toSeconds timeout >= min_secs_for_learning then - let - val soft_timeout = time_mult learn_timeout_factor timeout - val hard_timeout = time_mult 4.0 soft_timeout - val birth_time = Time.now () - val death_time = Time.+ (birth_time, hard_timeout) - val desc = ("machine learner for Sledgehammer", "") - in - Async_Manager.launch MaShN birth_time death_time desc - (fn () => - (true, mash_learn_thy ctxt params thy soft_timeout facts)) - end - else - () - val fact_filter = - case fact_filter of - SOME ff => (() |> ff <> mepoN ? maybe_learn; ff) - | NONE => - if mash_can_suggest_facts ctxt then (maybe_learn (); meshN) - else if mash_could_suggest_facts () then (maybe_learn (); mepoN) - else mepoN - val add_ths = Attrib.eval_thms ctxt add - fun prepend_facts ths accepts = - ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @ - (accepts |> filter_out (member Thm.eq_thm_prop ths o snd))) - |> take max_facts - fun iter () = - iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts - concl_t facts - fun mash () = - mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts - val mess = - [] |> (if fact_filter <> mashN then cons (iter (), []) else I) - |> (if fact_filter <> mepoN then cons (mash ()) else I) - in - mesh_facts max_facts mess - |> not (null add_ths) ? prepend_facts add_ths - end - -fun kill_learners () = Async_Manager.kill_threads MaShN "learner" -fun running_learners () = Async_Manager.running_threads MaShN "learner" - -end; diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML Fri Jul 20 22:19:45 2012 +0200 @@ -0,0 +1,691 @@ +(* Title: HOL/Tools/Sledgehammer/sledgehammer_mash.ML + Author: Jasmin Blanchette, TU Muenchen + +Sledgehammer's machine-learning-based relevance filter (MaSh). +*) + +signature SLEDGEHAMMER_FILTER_MASH = +sig + type status = ATP_Problem_Generate.status + type stature = ATP_Problem_Generate.stature + type fact = Sledgehammer_Fact.fact + type fact_override = Sledgehammer_Fact.fact_override + type params = Sledgehammer_Provers.params + type relevance_fudge = Sledgehammer_Provers.relevance_fudge + type prover_result = Sledgehammer_Provers.prover_result + + val trace : bool Config.T + val MaShN : string + val mepoN : string + val mashN : string + val meshN : string + val fact_filters : string list + val escape_meta : string -> string + val escape_metas : string list -> string + val unescape_meta : string -> string + val unescape_metas : string -> string list + val extract_query : string -> string * string list + val nickname_of : thm -> string + val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list + val mesh_facts : + int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list + val is_likely_tautology_or_too_meta : thm -> bool + val theory_ord : theory * theory -> order + val thm_ord : thm * thm -> order + val features_of : + Proof.context -> string -> theory -> status -> term list -> string list + val isabelle_dependencies_of : unit Symtab.table -> thm -> string list + val goal_of_thm : theory -> thm -> thm + val run_prover_for_mash : + Proof.context -> params -> string -> fact list -> thm -> prover_result + val mash_CLEAR : Proof.context -> unit + val mash_INIT : + Proof.context -> bool + -> (string * string list * string list * string list) list -> unit + val mash_ADD : + Proof.context -> bool + -> (string * string list * string list * string list) list -> unit + val mash_QUERY : + Proof.context -> bool -> int -> string list * string list -> string list + val mash_unlearn : Proof.context -> unit + val mash_could_suggest_facts : unit -> bool + val mash_can_suggest_facts : Proof.context -> bool + val mash_suggest_facts : + Proof.context -> params -> string -> int -> term list -> term + -> ('a * thm) list -> ('a * thm) list * ('a * thm) list + val mash_learn_thy : + Proof.context -> params -> theory -> Time.time -> fact list -> string + val mash_learn_proof : + Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit + val relevant_facts : + Proof.context -> params -> string -> int -> fact_override -> term list + -> term -> fact list -> fact list + val kill_learners : unit -> unit + val running_learners : unit -> unit +end; + +structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH = +struct + +open ATP_Util +open ATP_Problem_Generate +open Sledgehammer_Util +open Sledgehammer_Fact +open Sledgehammer_Filter_Iter +open Sledgehammer_Provers +open Sledgehammer_Minimize + +val trace = + Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false) +fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () + +val MaShN = "MaSh" + +val mepoN = "mepo" +val mashN = "mash" +val meshN = "mesh" + +val fact_filters = [meshN, mepoN, mashN] + +fun mash_home () = getenv "MASH_HOME" +fun mash_state_dir () = + getenv "ISABELLE_HOME_USER" ^ "/mash" + |> tap (Isabelle_System.mkdir o Path.explode) +fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode + + +(*** Isabelle helpers ***) + +fun meta_char c = + if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse + c = #")" orelse c = #"," then + String.str c + else + (* fixed width, in case more digits follow *) + "\\" ^ stringN_of_int 3 (Char.ord c) + +fun unmeta_chars accum [] = String.implode (rev accum) + | unmeta_chars accum (#"\\" :: d1 :: d2 :: d3 :: cs) = + (case Int.fromString (String.implode [d1, d2, d3]) of + SOME n => unmeta_chars (Char.chr n :: accum) cs + | NONE => "" (* error *)) + | unmeta_chars _ (#"\\" :: _) = "" (* error *) + | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs + +val escape_meta = String.translate meta_char +val escape_metas = map escape_meta #> space_implode " " +val unescape_meta = String.explode #> unmeta_chars [] +val unescape_metas = + space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta + +fun extract_query line = + case space_explode ":" line of + [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs) + | _ => ("", []) + +fun parent_of_local_thm th = + let + val thy = th |> Thm.theory_of_thm + val facts = thy |> Global_Theory.facts_of + val space = facts |> Facts.space_of + fun id_of s = #id (Name_Space.the_entry space s) + fun max_id (s', _) (s, id) = + let val id' = id_of s' in if id > id' then (s, id) else (s', id') end + in ("", ~1) |> Facts.fold_static max_id facts |> fst end + +val local_prefix = "local" ^ Long_Name.separator + +fun nickname_of th = + let val hint = Thm.get_name_hint th in + (* FIXME: There must be a better way to detect local facts. *) + case try (unprefix local_prefix) hint of + SOME suff => + parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suff + | NONE => hint + end + +fun suggested_facts suggs facts = + let + fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact) + val tab = Symtab.empty |> fold add_fact facts + in map_filter (Symtab.lookup tab) suggs end + +(* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *) +fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0 + +fun sum_sq_avg [] = 0 + | sum_sq_avg xs = + Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs + +fun mesh_facts max_facts [(selected, unknown)] = + take max_facts selected @ take (max_facts - length selected) unknown + | mesh_facts max_facts mess = + let + val mess = mess |> map (apfst (`length)) + val fact_eq = Thm.eq_thm o pairself snd + fun score_in fact ((sel_len, sels), unks) = + case find_index (curry fact_eq fact) sels of + ~1 => (case find_index (curry fact_eq fact) unks of + ~1 => SOME sel_len + | _ => NONE) + | j => SOME j + fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg + val facts = fold (union fact_eq o take max_facts o snd o fst) mess [] + in + facts |> map (`score_of) |> sort (int_ord o swap o pairself fst) + |> map snd |> take max_facts + end + +val thy_feature_prefix = "y_" + +val thy_feature_name_of = prefix thy_feature_prefix +val const_name_of = prefix const_prefix +val type_name_of = prefix type_const_prefix +val class_name_of = prefix class_prefix + +fun is_likely_tautology_or_too_meta th = + let + val is_boring_const = member (op =) atp_widely_irrelevant_consts + fun is_boring_bool t = + not (exists_Const (not o is_boring_const o fst) t) orelse + exists_type (exists_subtype (curry (op =) @{typ prop})) t + fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t + | is_boring_prop (@{const "==>"} $ t $ u) = + is_boring_prop t andalso is_boring_prop u + | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) = + is_boring_prop t andalso is_boring_prop u + | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) = + is_boring_bool t andalso is_boring_bool u + | is_boring_prop _ = true + in + is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th)) + end + +fun theory_ord p = + if Theory.eq_thy p then + EQUAL + else if Theory.subthy p then + LESS + else if Theory.subthy (swap p) then + GREATER + else case int_ord (pairself (length o Theory.ancestors_of) p) of + EQUAL => string_ord (pairself Context.theory_name p) + | order => order + +val thm_ord = theory_ord o pairself theory_of_thm + +val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}] + +fun interesting_terms_types_and_classes ctxt prover term_max_depth + type_max_depth ts = + let + fun is_bad_const (x as (s, _)) args = + member (op =) atp_logical_consts s orelse + fst (is_built_in_const_for_prover ctxt prover x args) + fun add_classes @{sort type} = I + | add_classes S = union (op =) (map class_name_of S) + fun do_add_type (Type (s, Ts)) = + (not (member (op =) bad_types s) ? insert (op =) (type_name_of s)) + #> fold do_add_type Ts + | do_add_type (TFree (_, S)) = add_classes S + | do_add_type (TVar (_, S)) = add_classes S + fun add_type T = type_max_depth >= 0 ? do_add_type T + fun mk_app s args = + if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")" + else s + fun patternify ~1 _ = "" + | patternify depth t = + case strip_comb t of + (Const (s, _), args) => + mk_app (const_name_of s) (map (patternify (depth - 1)) args) + | _ => "" + fun add_term_patterns ~1 _ = I + | add_term_patterns depth t = + insert (op =) (patternify depth t) + #> add_term_patterns (depth - 1) t + val add_term = add_term_patterns term_max_depth + fun add_patterns t = + let val (head, args) = strip_comb t in + (case head of + Const (x as (_, T)) => + not (is_bad_const x args) ? (add_term t #> add_type T) + | Free (_, T) => add_type T + | Var (_, T) => add_type T + | Abs (_, T, body) => add_type T #> add_patterns body + | _ => I) + #> fold add_patterns args + end + in [] |> fold add_patterns ts end + +fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1}) + +val term_max_depth = 1 +val type_max_depth = 1 + +(* TODO: Generate type classes for types? *) +fun features_of ctxt prover thy status ts = + thy_feature_name_of (Context.theory_name thy) :: + interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth + ts + |> forall is_lambda_free ts ? cons "no_lams" + |> forall (not o exists_Const is_exists) ts ? cons "no_skos" + |> (case status of + General => I + | Induction => cons "induction" + | Intro => cons "intro" + | Inductive => cons "inductive" + | Elim => cons "elim" + | Simp => cons "simp" + | Def => cons "def") + +fun isabelle_dependencies_of all_facts = thms_in_proof (SOME all_facts) + +val freezeT = Type.legacy_freeze_type + +fun freeze (t $ u) = freeze t $ freeze u + | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t) + | freeze (Var ((s, _), T)) = Free (s, freezeT T) + | freeze (Const (s, T)) = Const (s, freezeT T) + | freeze (Free (s, T)) = Free (s, freezeT T) + | freeze t = t + +fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init + +fun run_prover_for_mash ctxt params prover facts goal = + let + val problem = + {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1, + facts = facts |> map (apfst (apfst (fn name => name ()))) + |> map Untranslated_Fact} + val prover = get_minimizing_prover ctxt Normal (K ()) prover + in prover params (K (K (K ""))) problem end + + +(*** Low-level communication with MaSh ***) + +fun write_file (xs, f) file = + let val path = Path.explode file in + File.write path ""; + xs |> chunk_list 500 + |> List.app (File.append path o space_implode "" o map f) + end + +fun mash_info overlord = + if overlord then (getenv "ISABELLE_HOME_USER", "") + else (getenv "ISABELLE_TMP", serial_string ()) + +fun run_mash ctxt overlord (temp_dir, serial) core = + let + val log_file = temp_dir ^ "/mash_log" ^ serial + val err_file = temp_dir ^ "/mash_err" ^ serial + val command = + mash_home () ^ "/mash.py --quiet --outputDir " ^ mash_state_dir () ^ + " --log " ^ log_file ^ " " ^ core ^ " 2>&1 > " ^ err_file + in + trace_msg ctxt (fn () => "Running " ^ command); + write_file ([], K "") log_file; + write_file ([], K "") err_file; + Isabelle_System.bash command; + if overlord then () + else (map (File.rm o Path.explode) [log_file, err_file]; ()); + trace_msg ctxt (K "Done") + end + +(* TODO: Eliminate code once "mash.py" handles sequences of ADD commands as fast + as a single INIT. *) +fun run_mash_init ctxt overlord write_access write_feats write_deps = + let + val info as (temp_dir, serial) = mash_info overlord + val in_dir = temp_dir ^ "/mash_init" ^ serial + val in_dir_path = in_dir |> Path.explode |> tap Isabelle_System.mkdir + in + write_file write_access (in_dir ^ "/mash_accessibility"); + write_file write_feats (in_dir ^ "/mash_features"); + write_file write_deps (in_dir ^ "/mash_dependencies"); + run_mash ctxt overlord info ("--init --inputDir " ^ in_dir); + (* FIXME: temporary hack *) + if overlord then () + else (Isabelle_System.bash ("rm -r -f " ^ File.shell_path in_dir_path); ()) + end + +fun run_mash_commands ctxt overlord save max_suggs write_cmds read_suggs = + let + val info as (temp_dir, serial) = mash_info overlord + val sugg_file = temp_dir ^ "/mash_suggs" ^ serial + val cmd_file = temp_dir ^ "/mash_commands" ^ serial + in + write_file ([], K "") sugg_file; + write_file write_cmds cmd_file; + run_mash ctxt overlord info + ("--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^ + " --numberOfPredictions " ^ string_of_int max_suggs ^ + (if save then " --saveModel" else "")); + read_suggs (fn () => File.read_lines (Path.explode sugg_file)) + |> tap (fn _ => + if overlord then () + else (map (File.rm o Path.explode) [sugg_file, cmd_file]; ())) + end + +fun str_of_update (name, parents, feats, deps) = + "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^ + escape_metas feats ^ "; " ^ escape_metas deps ^ "\n" + +fun str_of_query (parents, feats) = + "? " ^ escape_metas parents ^ "; " ^ escape_metas feats + +fun init_str_of_update get (upd as (name, _, _, _)) = + escape_meta name ^ ": " ^ escape_metas (get upd) ^ "\n" + +fun mash_CLEAR ctxt = + let val path = mash_state_dir () |> Path.explode in + trace_msg ctxt (K "MaSh CLEAR"); + File.fold_dir (fn file => fn () => + File.rm (Path.append path (Path.basic file))) + path () + end + +fun mash_INIT ctxt _ [] = mash_CLEAR ctxt + | mash_INIT ctxt overlord upds = + (trace_msg ctxt (fn () => "MaSh INIT " ^ + elide_string 1000 (space_implode " " (map #1 upds))); + run_mash_init ctxt overlord (upds, init_str_of_update #2) + (upds, init_str_of_update #3) (upds, init_str_of_update #4)) + +fun mash_ADD _ _ [] = () + | mash_ADD ctxt overlord upds = + (trace_msg ctxt (fn () => "MaSh ADD " ^ + elide_string 1000 (space_implode " " (map #1 upds))); + run_mash_commands ctxt overlord true 0 (upds, str_of_update) (K ())) + +fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) = + (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats); + run_mash_commands ctxt overlord false max_suggs + ([query], str_of_query) + (fn suggs => snd (extract_query (List.last (suggs ())))) + handle List.Empty => []) + + +(*** High-level communication with MaSh ***) + +fun try_graph ctxt when def f = + f () + handle Graph.CYCLES (cycle :: _) => + (trace_msg ctxt (fn () => + "Cycle involving " ^ commas cycle ^ " when " ^ when); def) + | Graph.UNDEF name => + (trace_msg ctxt (fn () => + "Unknown fact " ^ quote name ^ " when " ^ when); def) + +type mash_state = + {thys : bool Symtab.table, + fact_graph : unit Graph.T} + +val empty_state = {thys = Symtab.empty, fact_graph = Graph.empty} + +local + +fun mash_load _ (state as (true, _)) = state + | mash_load ctxt _ = + let val path = mash_state_path () in + (true, + case try File.read_lines path of + SOME (comp_thys :: incomp_thys :: fact_lines) => + let + fun add_thy comp thy = Symtab.update (thy, comp) + fun add_edge_to name parent = + Graph.default_node (parent, ()) + #> Graph.add_edge (parent, name) + fun add_fact_line line = + case extract_query line of + ("", _) => I (* shouldn't happen *) + | (name, parents) => + Graph.default_node (name, ()) + #> fold (add_edge_to name) parents + val thys = + Symtab.empty |> fold (add_thy true) (unescape_metas comp_thys) + |> fold (add_thy false) (unescape_metas incomp_thys) + val fact_graph = + try_graph ctxt "loading state" Graph.empty (fn () => + Graph.empty |> fold add_fact_line fact_lines) + in {thys = thys, fact_graph = fact_graph} end + | _ => empty_state) + end + +fun mash_save ({thys, fact_graph, ...} : mash_state) = + let + val path = mash_state_path () + val thys = Symtab.dest thys + val line_for_thys = escape_metas o AList.find (op =) thys + fun fact_line_for name parents = + escape_meta name ^ ": " ^ escape_metas parents + val append_fact = File.append path o suffix "\n" oo fact_line_for + in + File.write path (line_for_thys true ^ "\n" ^ line_for_thys false ^ "\n"); + Graph.fold (fn (name, ((), (parents, _))) => fn () => + append_fact name (Graph.Keys.dest parents)) + fact_graph () + end + +val global_state = + Synchronized.var "Sledgehammer_Filter_MaSh.global_state" (false, empty_state) + +in + +fun mash_map ctxt f = + Synchronized.change global_state (mash_load ctxt ##> (f #> tap mash_save)) + +fun mash_get ctxt = + Synchronized.change_result global_state (mash_load ctxt #> `snd) + +fun mash_unlearn ctxt = + Synchronized.change global_state (fn _ => + (mash_CLEAR ctxt; File.write (mash_state_path ()) ""; + (true, empty_state))) + +end + +fun mash_could_suggest_facts () = mash_home () <> "" +fun mash_can_suggest_facts ctxt = + not (Graph.is_empty (#fact_graph (mash_get ctxt))) + +fun parents_wrt_facts facts fact_graph = + let + val facts = [] |> fold (cons o nickname_of o snd) facts + val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts + fun insert_not_seen seen name = + not (member (op =) seen name) ? insert (op =) name + fun parents_of _ parents [] = parents + | parents_of seen parents (name :: names) = + if Symtab.defined tab name then + parents_of (name :: seen) (name :: parents) names + else + parents_of (name :: seen) parents + (Graph.Keys.fold (insert_not_seen seen) + (Graph.imm_preds fact_graph name) names) + in parents_of [] [] (Graph.maximals fact_graph) end + +(* Generate more suggestions than requested, because some might be thrown out + later for various reasons and "meshing" gives better results with some + slack. *) +fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts) + +fun is_fact_in_graph fact_graph (_, th) = + can (Graph.get_node fact_graph) (nickname_of th) + +fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts + concl_t facts = + let + val thy = Proof_Context.theory_of ctxt + val fact_graph = #fact_graph (mash_get ctxt) + val parents = parents_wrt_facts facts fact_graph + val feats = features_of ctxt prover thy General (concl_t :: hyp_ts) + val suggs = + if Graph.is_empty fact_graph then [] + else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats) + val selected = facts |> suggested_facts suggs + val unknown = facts |> filter_out (is_fact_in_graph fact_graph) + in (selected, unknown) end + +fun add_thys_for thy = + let fun add comp thy = Symtab.update (Context.theory_name thy, comp) in + add false thy #> fold (add true) (Theory.ancestors_of thy) + end + +fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) = + let + fun maybe_add_from from (accum as (parents, graph)) = + try_graph ctxt "updating graph" accum (fn () => + (from :: parents, Graph.add_edge_acyclic (from, name) graph)) + val graph = graph |> Graph.default_node (name, ()) + val (parents, graph) = ([], graph) |> fold maybe_add_from parents + val (deps, graph) = ([], graph) |> fold maybe_add_from deps + in ((name, parents, feats, deps) :: upds, graph) end + +val pass1_learn_timeout_factor = 0.5 + +(* Too many dependencies is a sign that a decision procedure is at work. There + isn't much too learn from such proofs. *) +val max_dependencies = 10 + +(* The timeout is understood in a very slack fashion. *) +fun mash_learn_thy ctxt ({provers, verbose, overlord, ...} : params) thy timeout + facts = + let + val timer = Timer.startRealTimer () + val prover = hd provers + fun timed_out frac = + Time.> (Timer.checkRealTimer timer, time_mult frac timeout) + val {fact_graph, ...} = mash_get ctxt + val new_facts = + facts |> filter_out (is_fact_in_graph fact_graph) + |> sort (thm_ord o pairself snd) + in + if null new_facts then + "" + else + let + val ths = facts |> map snd + val all_names = + ths |> filter_out is_likely_tautology_or_too_meta + |> map (rpair () o nickname_of) + |> Symtab.make + fun trim_deps deps = if length deps > max_dependencies then [] else deps + fun do_fact _ (accum as (_, true)) = accum + | do_fact ((_, (_, status)), th) ((parents, upds), false) = + let + val name = nickname_of th + val feats = + features_of ctxt prover (theory_of_thm th) status [prop_of th] + val deps = isabelle_dependencies_of all_names th |> trim_deps + val upd = (name, parents, feats, deps) + in (([name], upd :: upds), timed_out pass1_learn_timeout_factor) end + val parents = parents_wrt_facts facts fact_graph + val ((_, upds), _) = + ((parents, []), false) |> fold do_fact new_facts |>> apsnd rev + val n = length upds + fun trans {thys, fact_graph} = + let + val mash_INIT_or_ADD = + if Graph.is_empty fact_graph then mash_INIT else mash_ADD + val (upds, fact_graph) = + ([], fact_graph) |> fold (update_fact_graph ctxt) upds + in + (mash_INIT_or_ADD ctxt overlord (rev upds); + {thys = thys |> add_thys_for thy, + fact_graph = fact_graph}) + end + in + mash_map ctxt trans; + if verbose then + "Processed " ^ string_of_int n ^ " proof" ^ plural_s n ^ + (if verbose then + " in " ^ string_from_time (Timer.checkRealTimer timer) + else + "") ^ "." + else + "" + end + end + +fun mash_learn_proof ctxt ({provers, overlord, ...} : params) t facts used_ths = + let + val thy = Proof_Context.theory_of ctxt + val prover = hd provers + val name = ATP_Util.timestamp () ^ " " ^ serial_string () (* fresh enough *) + val feats = features_of ctxt prover thy General [t] + val deps = used_ths |> map nickname_of + in + mash_map ctxt (fn {thys, fact_graph} => + let + val parents = parents_wrt_facts facts fact_graph + val upds = [(name, parents, feats, deps)] + val (upds, fact_graph) = + ([], fact_graph) |> fold (update_fact_graph ctxt) upds + in + mash_ADD ctxt overlord upds; + {thys = thys, fact_graph = fact_graph} + end) + end + +(* The threshold should be large enough so that MaSh doesn't kick in for Auto + Sledgehammer and Try. *) +val min_secs_for_learning = 15 +val learn_timeout_factor = 2.0 + +fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover + max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts = + if not (subset (op =) (the_list fact_filter, fact_filters)) then + error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".") + else if only then + facts + else if max_facts <= 0 orelse null facts then + [] + else + let + val thy = Proof_Context.theory_of ctxt + fun maybe_learn () = + if not learn orelse Async_Manager.has_running_threads MaShN then + () + else if Time.toSeconds timeout >= min_secs_for_learning then + let + val soft_timeout = time_mult learn_timeout_factor timeout + val hard_timeout = time_mult 4.0 soft_timeout + val birth_time = Time.now () + val death_time = Time.+ (birth_time, hard_timeout) + val desc = ("machine learner for Sledgehammer", "") + in + Async_Manager.launch MaShN birth_time death_time desc + (fn () => + (true, mash_learn_thy ctxt params thy soft_timeout facts)) + end + else + () + val fact_filter = + case fact_filter of + SOME ff => (() |> ff <> mepoN ? maybe_learn; ff) + | NONE => + if mash_can_suggest_facts ctxt then (maybe_learn (); meshN) + else if mash_could_suggest_facts () then (maybe_learn (); mepoN) + else mepoN + val add_ths = Attrib.eval_thms ctxt add + fun prepend_facts ths accepts = + ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @ + (accepts |> filter_out (member Thm.eq_thm_prop ths o snd))) + |> take max_facts + fun iter () = + iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts + concl_t facts + fun mash () = + mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts + val mess = + [] |> (if fact_filter <> mashN then cons (iter (), []) else I) + |> (if fact_filter <> mepoN then cons (mash ()) else I) + in + mesh_facts max_facts mess + |> not (null add_ths) ? prepend_facts add_ths + end + +fun kill_learners () = Async_Manager.kill_threads MaShN "learner" +fun running_learners () = Async_Manager.running_threads MaShN "learner" + +end; diff -r 2b5ad61e2ccc -r d4b7c7be3116 src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML Fri Jul 20 22:19:45 2012 +0200 @@ -0,0 +1,537 @@ +(* Title: HOL/Tools/Sledgehammer/sledgehammer_mepo.ML + Author: Jia Meng, Cambridge University Computer Laboratory and NICTA + Author: Jasmin Blanchette, TU Muenchen + +Sledgehammer's iterative relevance filter (MePo = Meng-Paulson). +*) + +signature SLEDGEHAMMER_FILTER_ITER = +sig + type stature = ATP_Problem_Generate.stature + type fact = Sledgehammer_Fact.fact + type params = Sledgehammer_Provers.params + type relevance_fudge = Sledgehammer_Provers.relevance_fudge + + val trace : bool Config.T + val pseudo_abs_name : string + val pseudo_skolem_prefix : string + val const_names_in_fact : + theory -> (string * typ -> term list -> bool * term list) -> term + -> string list + val iterative_relevant_facts : + Proof.context -> params -> string -> int -> relevance_fudge option + -> term list -> term -> fact list -> fact list +end; + +structure Sledgehammer_Filter_Iter : SLEDGEHAMMER_FILTER_ITER = +struct + +open ATP_Problem_Generate +open Sledgehammer_Fact +open Sledgehammer_Provers + +val trace = + Attrib.setup_config_bool @{binding sledgehammer_filter_iter_trace} (K false) +fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () + +val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator +val pseudo_abs_name = sledgehammer_prefix ^ "abs" +val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko" +val theory_const_suffix = Long_Name.separator ^ " 1" + +fun order_of_type (Type (@{type_name fun}, [T1, T2])) = + Int.max (order_of_type T1 + 1, order_of_type T2) + | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0 + | order_of_type _ = 0 + +(* An abstraction of Isabelle types and first-order terms *) +datatype pattern = PVar | PApp of string * pattern list +datatype ptype = PType of int * pattern list + +fun string_for_pattern PVar = "_" + | string_for_pattern (PApp (s, ps)) = + if null ps then s else s ^ string_for_patterns ps +and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")" +fun string_for_ptype (PType (_, ps)) = string_for_patterns ps + +(*Is the second type an instance of the first one?*) +fun match_pattern (PVar, _) = true + | match_pattern (PApp _, PVar) = false + | match_pattern (PApp (s, ps), PApp (t, qs)) = + s = t andalso match_patterns (ps, qs) +and match_patterns (_, []) = true + | match_patterns ([], _) = false + | match_patterns (p :: ps, q :: qs) = + match_pattern (p, q) andalso match_patterns (ps, qs) +fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs) + +(* Is there a unifiable constant? *) +fun pconst_mem f consts (s, ps) = + exists (curry (match_ptype o f) ps) + (map snd (filter (curry (op =) s o fst) consts)) +fun pconst_hyper_mem f const_tab (s, ps) = + exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s)) + +fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts) + | pattern_for_type (TFree (s, _)) = PApp (s, []) + | pattern_for_type (TVar _) = PVar + +(* Pairs a constant with the list of its type instantiations. *) +fun ptype thy const x = + (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x)) + else []) +fun rich_ptype thy const (s, T) = + PType (order_of_type T, ptype thy const (s, T)) +fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T)) + +fun string_for_hyper_pconst (s, ps) = + s ^ "{" ^ commas (map string_for_ptype ps) ^ "}" + +(* Add a pconstant to the table, but a [] entry means a standard + connective, which we ignore.*) +fun add_pconst_to_table also_skolem (s, p) = + if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I + else Symtab.map_default (s, [p]) (insert (op =) p) + +(* Set constants tend to pull in too many irrelevant facts. We limit the damage + by treating them more or less as if they were built-in but add their + axiomatization at the end. *) +val set_consts = [@{const_name Collect}, @{const_name Set.member}] +val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong} + +fun add_pconsts_in_term thy is_built_in_const also_skolems pos = + let + val flip = Option.map not + (* We include free variables, as well as constants, to handle locales. For + each quantifiers that must necessarily be skolemized by the automatic + prover, we introduce a fresh constant to simulate the effect of + Skolemization. *) + fun do_const const ext_arg (x as (s, _)) ts = + let val (built_in, ts) = is_built_in_const x ts in + if member (op =) set_consts s then + fold (do_term ext_arg) ts + else + (not built_in + ? add_pconst_to_table also_skolems (rich_pconst thy const x)) + #> fold (do_term false) ts + end + and do_term ext_arg t = + case strip_comb t of + (Const x, ts) => do_const true ext_arg x ts + | (Free x, ts) => do_const false ext_arg x ts + | (Abs (_, T, t'), ts) => + ((null ts andalso not ext_arg) + (* Since lambdas on the right-hand side of equalities are usually + extensionalized later by "abs_extensionalize_term", we don't + penalize them here. *) + ? add_pconst_to_table true (pseudo_abs_name, + PType (order_of_type T + 1, []))) + #> fold (do_term false) (t' :: ts) + | (_, ts) => fold (do_term false) ts + fun do_quantifier will_surely_be_skolemized abs_T body_t = + do_formula pos body_t + #> (if also_skolems andalso will_surely_be_skolemized then + add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (), + PType (order_of_type abs_T, [])) + else + I) + and do_term_or_formula ext_arg T = + if T = HOLogic.boolT then do_formula NONE else do_term ext_arg + and do_formula pos t = + case t of + Const (@{const_name all}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T t' + | @{const "==>"} $ t1 $ t2 => + do_formula (flip pos) t1 #> do_formula pos t2 + | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 => + do_term_or_formula false T t1 #> do_term_or_formula true T t2 + | @{const Trueprop} $ t1 => do_formula pos t1 + | @{const False} => I + | @{const True} => I + | @{const Not} $ t1 => do_formula (flip pos) t1 + | Const (@{const_name All}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T t' + | Const (@{const_name Ex}, _) $ Abs (_, T, t') => + do_quantifier (pos = SOME true) T t' + | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] + | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2] + | @{const HOL.implies} $ t1 $ t2 => + do_formula (flip pos) t1 #> do_formula pos t2 + | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 => + do_term_or_formula false T t1 #> do_term_or_formula true T t2 + | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])])) + $ t1 $ t2 $ t3 => + do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3] + | Const (@{const_name Ex1}, _) $ Abs (_, T, t') => + do_quantifier (is_some pos) T t' + | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') => + do_quantifier (pos = SOME false) T + (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t')) + | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') => + do_quantifier (pos = SOME true) T + (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t')) + | (t0 as Const (_, @{typ bool})) $ t1 => + do_term false t0 #> do_formula pos t1 (* theory constant *) + | _ => do_term false t + in do_formula pos end + +fun pconsts_in_fact thy is_built_in_const t = + Symtab.fold (fn (s, pss) => fold (cons o pair s) pss) + (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true + (SOME true) t) [] + +val const_names_in_fact = map fst ooo pconsts_in_fact + +(* Inserts a dummy "constant" referring to the theory name, so that relevance + takes the given theory into account. *) +fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...} + : relevance_fudge) thy_name t = + if exists (curry (op <) 0.0) [theory_const_rel_weight, + theory_const_irrel_weight] then + Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t + else + t + +fun theory_const_prop_of fudge th = + theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th) + +fun pair_consts_fact thy is_built_in_const fudge fact = + case fact |> snd |> theory_const_prop_of fudge + |> pconsts_in_fact thy is_built_in_const of + [] => NONE + | consts => SOME ((fact, consts), NONE) + +(* A two-dimensional symbol table counts frequencies of constants. It's keyed + first by constant name and second by its list of type instantiations. For the + latter, we need a linear ordering on "pattern list". *) + +fun pattern_ord p = + case p of + (PVar, PVar) => EQUAL + | (PVar, PApp _) => LESS + | (PApp _, PVar) => GREATER + | (PApp q1, PApp q2) => + prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2) +fun ptype_ord (PType p, PType q) = + prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q) + +structure PType_Tab = Table(type key = ptype val ord = ptype_ord) + +fun count_fact_consts thy fudge = + let + fun do_const const (s, T) ts = + (* Two-dimensional table update. Constant maps to types maps to count. *) + PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1) + |> Symtab.map_default (s, PType_Tab.empty) + #> fold do_term ts + and do_term t = + case strip_comb t of + (Const x, ts) => do_const true x ts + | (Free x, ts) => do_const false x ts + | (Abs (_, _, t'), ts) => fold do_term (t' :: ts) + | (_, ts) => fold do_term ts + in do_term o theory_const_prop_of fudge o snd end + +fun pow_int _ 0 = 1.0 + | pow_int x 1 = x + | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x + +(*The frequency of a constant is the sum of those of all instances of its type.*) +fun pconst_freq match const_tab (c, ps) = + PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m) + (the (Symtab.lookup const_tab c)) 0 + + +(* A surprising number of theorems contain only a few significant constants. + These include all induction rules, and other general theorems. *) + +(* "log" seems best in practice. A constant function of one ignores the constant + frequencies. Rare constants give more points if they are relevant than less + rare ones. *) +fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0) + +(* Irrelevant constants are treated differently. We associate lower penalties to + very rare constants and very common ones -- the former because they can't + lead to the inclusion of too many new facts, and the latter because they are + so common as to be of little interest. *) +fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...} + : relevance_fudge) order freq = + let val (k, x) = worse_irrel_freq |> `Real.ceil in + (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x + else rel_weight_for order freq / rel_weight_for order k) + * pow_int higher_order_irrel_weight (order - 1) + end + +fun multiplier_for_const_name local_const_multiplier s = + if String.isSubstring "." s then 1.0 else local_const_multiplier + +(* Computes a constant's weight, as determined by its frequency. *) +fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight + theory_const_weight chained_const_weight weight_for f + const_tab chained_const_tab (c as (s, PType (m, _))) = + if s = pseudo_abs_name then + abs_weight + else if String.isPrefix pseudo_skolem_prefix s then + skolem_weight + else if String.isSuffix theory_const_suffix s then + theory_const_weight + else + multiplier_for_const_name local_const_multiplier s + * weight_for m (pconst_freq (match_ptype o f) const_tab c) + |> (if chained_const_weight < 1.0 andalso + pconst_hyper_mem I chained_const_tab c then + curry (op *) chained_const_weight + else + I) + +fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight, + theory_const_rel_weight, ...} : relevance_fudge) + const_tab = + generic_pconst_weight local_const_multiplier abs_rel_weight 0.0 + theory_const_rel_weight 0.0 rel_weight_for I const_tab + Symtab.empty + +fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight, + skolem_irrel_weight, + theory_const_irrel_weight, + chained_const_irrel_weight, ...}) + const_tab chained_const_tab = + generic_pconst_weight local_const_multiplier abs_irrel_weight + skolem_irrel_weight theory_const_irrel_weight + chained_const_irrel_weight (irrel_weight_for fudge) swap + const_tab chained_const_tab + +fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) = + intro_bonus + | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus + | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus + | stature_bonus {local_bonus, ...} (Local, _) = local_bonus + | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus + | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus + | stature_bonus _ _ = 0.0 + +fun is_odd_const_name s = + s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse + String.isSuffix theory_const_suffix s + +fun fact_weight fudge stature const_tab relevant_consts chained_consts + fact_consts = + case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts) + ||> filter_out (pconst_hyper_mem swap relevant_consts) of + ([], _) => 0.0 + | (rel, irrel) => + if forall (forall (is_odd_const_name o fst)) [rel, irrel] then + 0.0 + else + let + val irrel = irrel |> filter_out (pconst_mem swap rel) + val rel_weight = + 0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel + val irrel_weight = + ~ (stature_bonus fudge stature) + |> fold (curry (op +) + o irrel_pconst_weight fudge const_tab chained_consts) irrel + val res = rel_weight / (rel_weight + irrel_weight) + in if Real.isFinite res then res else 0.0 end + +fun take_most_relevant ctxt max_facts remaining_max + ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge) + (candidates : ((fact * (string * ptype) list) * real) list) = + let + val max_imperfect = + Real.ceil (Math.pow (max_imperfect, + Math.pow (Real.fromInt remaining_max + / Real.fromInt max_facts, max_imperfect_exp))) + val (perfect, imperfect) = + candidates |> sort (Real.compare o swap o pairself snd) + |> take_prefix (fn (_, w) => w > 0.99999) + val ((accepts, more_rejects), rejects) = + chop max_imperfect imperfect |>> append perfect |>> chop remaining_max + in + trace_msg ctxt (fn () => + "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^ + string_of_int (length candidates) ^ "): " ^ + (accepts |> map (fn ((((name, _), _), _), weight) => + name () ^ " [" ^ Real.toString weight ^ "]") + |> commas)); + (accepts, more_rejects @ rejects) + end + +fun if_empty_replace_with_scope thy is_built_in_const facts sc tab = + if Symtab.is_empty tab then + Symtab.empty + |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false)) + (map_filter (fn ((_, (sc', _)), th) => + if sc' = sc then SOME (prop_of th) else NONE) facts) + else + tab + +fun consider_arities is_built_in_const th = + let + fun aux _ _ NONE = NONE + | aux t args (SOME tab) = + case t of + t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 [] + | Const (x as (s, _)) => + (if is_built_in_const x args |> fst then + SOME tab + else case Symtab.lookup tab s of + NONE => SOME (Symtab.update (s, length args) tab) + | SOME n => if n = length args then SOME tab else NONE) + | _ => SOME tab + in aux (prop_of th) [] end + +(* FIXME: This is currently only useful for polymorphic type encodings. *) +fun could_benefit_from_ext is_built_in_const facts = + fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty) + |> is_none + +(* High enough so that it isn't wrongly considered as very relevant (e.g., for E + weights), but low enough so that it is unlikely to be truncated away if few + facts are included. *) +val special_fact_index = 75 + +fun relevance_filter ctxt thres0 decay max_facts is_built_in_const + (fudge as {threshold_divisor, ridiculous_threshold, ...}) facts hyp_ts + concl_t = + let + val thy = Proof_Context.theory_of ctxt + val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty + val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME + val chained_ts = + facts |> map_filter (fn ((_, (Chained, _)), th) => SOME (prop_of th) + | _ => NONE) + val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts + val goal_const_tab = + Symtab.empty |> fold (add_pconsts true) hyp_ts + |> add_pconsts false concl_t + |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab) + |> fold (if_empty_replace_with_scope thy is_built_in_const facts) + [Chained, Assum, Local] + fun iter j remaining_max thres rel_const_tab hopeless hopeful = + let + fun relevant [] _ [] = + (* Nothing has been added this iteration. *) + if j = 0 andalso thres >= ridiculous_threshold then + (* First iteration? Try again. *) + iter 0 max_facts (thres / threshold_divisor) rel_const_tab + hopeless hopeful + else + [] + | relevant candidates rejects [] = + let + val (accepts, more_rejects) = + take_most_relevant ctxt max_facts remaining_max fudge candidates + val rel_const_tab' = + rel_const_tab + |> fold (add_pconst_to_table false) (maps (snd o fst) accepts) + fun is_dirty (c, _) = + Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c + val (hopeful_rejects, hopeless_rejects) = + (rejects @ hopeless, ([], [])) + |-> fold (fn (ax as (_, consts), old_weight) => + if exists is_dirty consts then + apfst (cons (ax, NONE)) + else + apsnd (cons (ax, old_weight))) + |>> append (more_rejects + |> map (fn (ax as (_, consts), old_weight) => + (ax, if exists is_dirty consts then NONE + else SOME old_weight))) + val thres = + 1.0 - (1.0 - thres) + * Math.pow (decay, Real.fromInt (length accepts)) + val remaining_max = remaining_max - length accepts + in + trace_msg ctxt (fn () => "New or updated constants: " ^ + commas (rel_const_tab' |> Symtab.dest + |> subtract (op =) (rel_const_tab |> Symtab.dest) + |> map string_for_hyper_pconst)); + map (fst o fst) accepts @ + (if remaining_max = 0 then + [] + else + iter (j + 1) remaining_max thres rel_const_tab' + hopeless_rejects hopeful_rejects) + end + | relevant candidates rejects + (((ax as (((_, stature), _), fact_consts)), cached_weight) + :: hopeful) = + let + val weight = + case cached_weight of + SOME w => w + | NONE => fact_weight fudge stature const_tab rel_const_tab + chained_const_tab fact_consts + in + if weight >= thres then + relevant ((ax, weight) :: candidates) rejects hopeful + else + relevant candidates ((ax, weight) :: rejects) hopeful + end + in + trace_msg ctxt (fn () => + "ITERATION " ^ string_of_int j ^ ": current threshold: " ^ + Real.toString thres ^ ", constants: " ^ + commas (rel_const_tab |> Symtab.dest + |> filter (curry (op <>) [] o snd) + |> map string_for_hyper_pconst)); + relevant [] [] hopeful + end + fun uses_const s t = + fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t + false + fun uses_const_anywhere accepts s = + exists (uses_const s o prop_of o snd) accepts orelse + exists (uses_const s) (concl_t :: hyp_ts) + fun add_set_const_thms accepts = + exists (uses_const_anywhere accepts) set_consts ? append set_thms + fun insert_into_facts accepts [] = accepts + | insert_into_facts accepts ths = + let + val add = facts |> filter (member Thm.eq_thm_prop ths o snd) + val (bef, after) = + accepts |> filter_out (member Thm.eq_thm_prop ths o snd) + |> take (max_facts - length add) + |> chop special_fact_index + in bef @ add @ after end + fun insert_special_facts accepts = + (* FIXME: get rid of "ext" here once it is treated as a helper *) + [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext} + |> add_set_const_thms accepts + |> insert_into_facts accepts + in + facts |> map_filter (pair_consts_fact thy is_built_in_const fudge) + |> iter 0 max_facts thres0 goal_const_tab [] + |> insert_special_facts + |> tap (fn accepts => trace_msg ctxt (fn () => + "Total relevant: " ^ string_of_int (length accepts))) + end + +fun iterative_relevant_facts ctxt + ({fact_thresholds = (thres0, thres1), ...} : params) prover + max_facts fudge hyp_ts concl_t facts = + let + val thy = Proof_Context.theory_of ctxt + val is_built_in_const = + Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover + val fudge = + case fudge of + SOME fudge => fudge + | NONE => Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover + val decay = Math.pow ((1.0 - thres1) / (1.0 - thres0), + 1.0 / Real.fromInt (max_facts + 1)) + in + trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^ + " facts"); + (if thres1 < 0.0 then + facts + else if thres0 > 1.0 orelse thres0 > thres1 then + [] + else + relevance_filter ctxt thres0 decay max_facts is_built_in_const fudge + facts hyp_ts + (concl_t |> theory_constify fudge (Context.theory_name thy))) + end + +end;