renamed ML files
authorblanchet
Fri, 20 Jul 2012 22:19:45 +0200
changeset 49395d4b7c7be3116
parent 49394 2b5ad61e2ccc
child 49396 1b7d798460bb
renamed ML files
src/HOL/IsaMakefile
src/HOL/Sledgehammer.thy
src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML
src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML
     1.1 --- a/src/HOL/IsaMakefile	Fri Jul 20 22:19:45 2012 +0200
     1.2 +++ b/src/HOL/IsaMakefile	Fri Jul 20 22:19:45 2012 +0200
     1.3 @@ -371,8 +371,8 @@
     1.4    Tools/semiring_normalizer.ML \
     1.5    Tools/Sledgehammer/async_manager.ML \
     1.6    Tools/Sledgehammer/sledgehammer_fact.ML \
     1.7 -  Tools/Sledgehammer/sledgehammer_filter_iter.ML \
     1.8 -  Tools/Sledgehammer/sledgehammer_filter_mash.ML \
     1.9 +  Tools/Sledgehammer/sledgehammer_mash.ML \
    1.10 +  Tools/Sledgehammer/sledgehammer_mepo.ML \
    1.11    Tools/Sledgehammer/sledgehammer_minimize.ML \
    1.12    Tools/Sledgehammer/sledgehammer_isar.ML \
    1.13    Tools/Sledgehammer/sledgehammer_provers.ML \
     2.1 --- a/src/HOL/Sledgehammer.thy	Fri Jul 20 22:19:45 2012 +0200
     2.2 +++ b/src/HOL/Sledgehammer.thy	Fri Jul 20 22:19:45 2012 +0200
     2.3 @@ -14,8 +14,8 @@
     2.4       "Tools/Sledgehammer/sledgehammer_fact.ML"
     2.5       "Tools/Sledgehammer/sledgehammer_provers.ML"
     2.6       "Tools/Sledgehammer/sledgehammer_minimize.ML"
     2.7 -     "Tools/Sledgehammer/sledgehammer_filter_iter.ML"
     2.8 -     "Tools/Sledgehammer/sledgehammer_filter_mash.ML"
     2.9 +     "Tools/Sledgehammer/sledgehammer_mepo.ML"
    2.10 +     "Tools/Sledgehammer/sledgehammer_mash.ML"
    2.11       "Tools/Sledgehammer/sledgehammer_run.ML"
    2.12       "Tools/Sledgehammer/sledgehammer_isar.ML"
    2.13  begin
     3.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML	Fri Jul 20 22:19:45 2012 +0200
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,537 +0,0 @@
     3.4 -(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter_iter.ML
     3.5 -    Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3.6 -    Author:     Jasmin Blanchette, TU Muenchen
     3.7 -
     3.8 -Sledgehammer's iterative relevance filter.
     3.9 -*)
    3.10 -
    3.11 -signature SLEDGEHAMMER_FILTER_ITER =
    3.12 -sig
    3.13 -  type stature = ATP_Problem_Generate.stature
    3.14 -  type fact = Sledgehammer_Fact.fact
    3.15 -  type params = Sledgehammer_Provers.params
    3.16 -  type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    3.17 -
    3.18 -  val trace : bool Config.T
    3.19 -  val pseudo_abs_name : string
    3.20 -  val pseudo_skolem_prefix : string
    3.21 -  val const_names_in_fact :
    3.22 -    theory -> (string * typ -> term list -> bool * term list) -> term
    3.23 -    -> string list
    3.24 -  val iterative_relevant_facts :
    3.25 -    Proof.context -> params -> string -> int -> relevance_fudge option
    3.26 -    -> term list -> term -> fact list -> fact list
    3.27 -end;
    3.28 -
    3.29 -structure Sledgehammer_Filter_Iter : SLEDGEHAMMER_FILTER_ITER =
    3.30 -struct
    3.31 -
    3.32 -open ATP_Problem_Generate
    3.33 -open Sledgehammer_Fact
    3.34 -open Sledgehammer_Provers
    3.35 -
    3.36 -val trace =
    3.37 -  Attrib.setup_config_bool @{binding sledgehammer_filter_iter_trace} (K false)
    3.38 -fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    3.39 -
    3.40 -val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
    3.41 -val pseudo_abs_name = sledgehammer_prefix ^ "abs"
    3.42 -val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko"
    3.43 -val theory_const_suffix = Long_Name.separator ^ " 1"
    3.44 -
    3.45 -fun order_of_type (Type (@{type_name fun}, [T1, T2])) =
    3.46 -    Int.max (order_of_type T1 + 1, order_of_type T2)
    3.47 -  | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
    3.48 -  | order_of_type _ = 0
    3.49 -
    3.50 -(* An abstraction of Isabelle types and first-order terms *)
    3.51 -datatype pattern = PVar | PApp of string * pattern list
    3.52 -datatype ptype = PType of int * pattern list
    3.53 -
    3.54 -fun string_for_pattern PVar = "_"
    3.55 -  | string_for_pattern (PApp (s, ps)) =
    3.56 -    if null ps then s else s ^ string_for_patterns ps
    3.57 -and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
    3.58 -fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
    3.59 -
    3.60 -(*Is the second type an instance of the first one?*)
    3.61 -fun match_pattern (PVar, _) = true
    3.62 -  | match_pattern (PApp _, PVar) = false
    3.63 -  | match_pattern (PApp (s, ps), PApp (t, qs)) =
    3.64 -    s = t andalso match_patterns (ps, qs)
    3.65 -and match_patterns (_, []) = true
    3.66 -  | match_patterns ([], _) = false
    3.67 -  | match_patterns (p :: ps, q :: qs) =
    3.68 -    match_pattern (p, q) andalso match_patterns (ps, qs)
    3.69 -fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
    3.70 -
    3.71 -(* Is there a unifiable constant? *)
    3.72 -fun pconst_mem f consts (s, ps) =
    3.73 -  exists (curry (match_ptype o f) ps)
    3.74 -         (map snd (filter (curry (op =) s o fst) consts))
    3.75 -fun pconst_hyper_mem f const_tab (s, ps) =
    3.76 -  exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
    3.77 -
    3.78 -fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
    3.79 -  | pattern_for_type (TFree (s, _)) = PApp (s, [])
    3.80 -  | pattern_for_type (TVar _) = PVar
    3.81 -
    3.82 -(* Pairs a constant with the list of its type instantiations. *)
    3.83 -fun ptype thy const x =
    3.84 -  (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
    3.85 -   else [])
    3.86 -fun rich_ptype thy const (s, T) =
    3.87 -  PType (order_of_type T, ptype thy const (s, T))
    3.88 -fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
    3.89 -
    3.90 -fun string_for_hyper_pconst (s, ps) =
    3.91 -  s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
    3.92 -
    3.93 -(* Add a pconstant to the table, but a [] entry means a standard
    3.94 -   connective, which we ignore.*)
    3.95 -fun add_pconst_to_table also_skolem (s, p) =
    3.96 -  if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I
    3.97 -  else Symtab.map_default (s, [p]) (insert (op =) p)
    3.98 -
    3.99 -(* Set constants tend to pull in too many irrelevant facts. We limit the damage
   3.100 -   by treating them more or less as if they were built-in but add their
   3.101 -   axiomatization at the end. *)
   3.102 -val set_consts = [@{const_name Collect}, @{const_name Set.member}]
   3.103 -val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong}
   3.104 -
   3.105 -fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   3.106 -  let
   3.107 -    val flip = Option.map not
   3.108 -    (* We include free variables, as well as constants, to handle locales. For
   3.109 -       each quantifiers that must necessarily be skolemized by the automatic
   3.110 -       prover, we introduce a fresh constant to simulate the effect of
   3.111 -       Skolemization. *)
   3.112 -    fun do_const const ext_arg (x as (s, _)) ts =
   3.113 -      let val (built_in, ts) = is_built_in_const x ts in
   3.114 -        if member (op =) set_consts s then
   3.115 -          fold (do_term ext_arg) ts
   3.116 -        else
   3.117 -          (not built_in
   3.118 -           ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   3.119 -          #> fold (do_term false) ts
   3.120 -      end
   3.121 -    and do_term ext_arg t =
   3.122 -      case strip_comb t of
   3.123 -        (Const x, ts) => do_const true ext_arg x ts
   3.124 -      | (Free x, ts) => do_const false ext_arg x ts
   3.125 -      | (Abs (_, T, t'), ts) =>
   3.126 -        ((null ts andalso not ext_arg)
   3.127 -         (* Since lambdas on the right-hand side of equalities are usually
   3.128 -            extensionalized later by "abs_extensionalize_term", we don't
   3.129 -            penalize them here. *)
   3.130 -         ? add_pconst_to_table true (pseudo_abs_name,
   3.131 -                                     PType (order_of_type T + 1, [])))
   3.132 -        #> fold (do_term false) (t' :: ts)
   3.133 -      | (_, ts) => fold (do_term false) ts
   3.134 -    fun do_quantifier will_surely_be_skolemized abs_T body_t =
   3.135 -      do_formula pos body_t
   3.136 -      #> (if also_skolems andalso will_surely_be_skolemized then
   3.137 -            add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (),
   3.138 -                                      PType (order_of_type abs_T, []))
   3.139 -          else
   3.140 -            I)
   3.141 -    and do_term_or_formula ext_arg T =
   3.142 -      if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   3.143 -    and do_formula pos t =
   3.144 -      case t of
   3.145 -        Const (@{const_name all}, _) $ Abs (_, T, t') =>
   3.146 -        do_quantifier (pos = SOME false) T t'
   3.147 -      | @{const "==>"} $ t1 $ t2 =>
   3.148 -        do_formula (flip pos) t1 #> do_formula pos t2
   3.149 -      | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   3.150 -        do_term_or_formula false T t1 #> do_term_or_formula true T t2
   3.151 -      | @{const Trueprop} $ t1 => do_formula pos t1
   3.152 -      | @{const False} => I
   3.153 -      | @{const True} => I
   3.154 -      | @{const Not} $ t1 => do_formula (flip pos) t1
   3.155 -      | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   3.156 -        do_quantifier (pos = SOME false) T t'
   3.157 -      | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   3.158 -        do_quantifier (pos = SOME true) T t'
   3.159 -      | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   3.160 -      | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   3.161 -      | @{const HOL.implies} $ t1 $ t2 =>
   3.162 -        do_formula (flip pos) t1 #> do_formula pos t2
   3.163 -      | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   3.164 -        do_term_or_formula false T t1 #> do_term_or_formula true T t2
   3.165 -      | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   3.166 -        $ t1 $ t2 $ t3 =>
   3.167 -        do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   3.168 -      | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   3.169 -        do_quantifier (is_some pos) T t'
   3.170 -      | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   3.171 -        do_quantifier (pos = SOME false) T
   3.172 -                      (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   3.173 -      | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   3.174 -        do_quantifier (pos = SOME true) T
   3.175 -                      (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   3.176 -      | (t0 as Const (_, @{typ bool})) $ t1 =>
   3.177 -        do_term false t0 #> do_formula pos t1  (* theory constant *)
   3.178 -      | _ => do_term false t
   3.179 -  in do_formula pos end
   3.180 -
   3.181 -fun pconsts_in_fact thy is_built_in_const t =
   3.182 -  Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   3.183 -              (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   3.184 -                                                   (SOME true) t) []
   3.185 -
   3.186 -val const_names_in_fact = map fst ooo pconsts_in_fact
   3.187 -
   3.188 -(* Inserts a dummy "constant" referring to the theory name, so that relevance
   3.189 -   takes the given theory into account. *)
   3.190 -fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   3.191 -                     : relevance_fudge) thy_name t =
   3.192 -  if exists (curry (op <) 0.0) [theory_const_rel_weight,
   3.193 -                                theory_const_irrel_weight] then
   3.194 -    Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   3.195 -  else
   3.196 -    t
   3.197 -
   3.198 -fun theory_const_prop_of fudge th =
   3.199 -  theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   3.200 -
   3.201 -fun pair_consts_fact thy is_built_in_const fudge fact =
   3.202 -  case fact |> snd |> theory_const_prop_of fudge
   3.203 -            |> pconsts_in_fact thy is_built_in_const of
   3.204 -    [] => NONE
   3.205 -  | consts => SOME ((fact, consts), NONE)
   3.206 -
   3.207 -(* A two-dimensional symbol table counts frequencies of constants. It's keyed
   3.208 -   first by constant name and second by its list of type instantiations. For the
   3.209 -   latter, we need a linear ordering on "pattern list". *)
   3.210 -
   3.211 -fun pattern_ord p =
   3.212 -  case p of
   3.213 -    (PVar, PVar) => EQUAL
   3.214 -  | (PVar, PApp _) => LESS
   3.215 -  | (PApp _, PVar) => GREATER
   3.216 -  | (PApp q1, PApp q2) =>
   3.217 -    prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   3.218 -fun ptype_ord (PType p, PType q) =
   3.219 -  prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   3.220 -
   3.221 -structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   3.222 -
   3.223 -fun count_fact_consts thy fudge =
   3.224 -  let
   3.225 -    fun do_const const (s, T) ts =
   3.226 -      (* Two-dimensional table update. Constant maps to types maps to count. *)
   3.227 -      PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   3.228 -      |> Symtab.map_default (s, PType_Tab.empty)
   3.229 -      #> fold do_term ts
   3.230 -    and do_term t =
   3.231 -      case strip_comb t of
   3.232 -        (Const x, ts) => do_const true x ts
   3.233 -      | (Free x, ts) => do_const false x ts
   3.234 -      | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   3.235 -      | (_, ts) => fold do_term ts
   3.236 -  in do_term o theory_const_prop_of fudge o snd end
   3.237 -
   3.238 -fun pow_int _ 0 = 1.0
   3.239 -  | pow_int x 1 = x
   3.240 -  | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   3.241 -
   3.242 -(*The frequency of a constant is the sum of those of all instances of its type.*)
   3.243 -fun pconst_freq match const_tab (c, ps) =
   3.244 -  PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   3.245 -                 (the (Symtab.lookup const_tab c)) 0
   3.246 -
   3.247 -
   3.248 -(* A surprising number of theorems contain only a few significant constants.
   3.249 -   These include all induction rules, and other general theorems. *)
   3.250 -
   3.251 -(* "log" seems best in practice. A constant function of one ignores the constant
   3.252 -   frequencies. Rare constants give more points if they are relevant than less
   3.253 -   rare ones. *)
   3.254 -fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   3.255 -
   3.256 -(* Irrelevant constants are treated differently. We associate lower penalties to
   3.257 -   very rare constants and very common ones -- the former because they can't
   3.258 -   lead to the inclusion of too many new facts, and the latter because they are
   3.259 -   so common as to be of little interest. *)
   3.260 -fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   3.261 -                      : relevance_fudge) order freq =
   3.262 -  let val (k, x) = worse_irrel_freq |> `Real.ceil in
   3.263 -    (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   3.264 -     else rel_weight_for order freq / rel_weight_for order k)
   3.265 -    * pow_int higher_order_irrel_weight (order - 1)
   3.266 -  end
   3.267 -
   3.268 -fun multiplier_for_const_name local_const_multiplier s =
   3.269 -  if String.isSubstring "." s then 1.0 else local_const_multiplier
   3.270 -
   3.271 -(* Computes a constant's weight, as determined by its frequency. *)
   3.272 -fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   3.273 -                          theory_const_weight chained_const_weight weight_for f
   3.274 -                          const_tab chained_const_tab (c as (s, PType (m, _))) =
   3.275 -  if s = pseudo_abs_name then
   3.276 -    abs_weight
   3.277 -  else if String.isPrefix pseudo_skolem_prefix s then
   3.278 -    skolem_weight
   3.279 -  else if String.isSuffix theory_const_suffix s then
   3.280 -    theory_const_weight
   3.281 -  else
   3.282 -    multiplier_for_const_name local_const_multiplier s
   3.283 -    * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   3.284 -    |> (if chained_const_weight < 1.0 andalso
   3.285 -           pconst_hyper_mem I chained_const_tab c then
   3.286 -          curry (op *) chained_const_weight
   3.287 -        else
   3.288 -          I)
   3.289 -
   3.290 -fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   3.291 -                        theory_const_rel_weight, ...} : relevance_fudge)
   3.292 -                      const_tab =
   3.293 -  generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   3.294 -                        theory_const_rel_weight 0.0 rel_weight_for I const_tab
   3.295 -                        Symtab.empty
   3.296 -
   3.297 -fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   3.298 -                                   skolem_irrel_weight,
   3.299 -                                   theory_const_irrel_weight,
   3.300 -                                   chained_const_irrel_weight, ...})
   3.301 -                        const_tab chained_const_tab =
   3.302 -  generic_pconst_weight local_const_multiplier abs_irrel_weight
   3.303 -                        skolem_irrel_weight theory_const_irrel_weight
   3.304 -                        chained_const_irrel_weight (irrel_weight_for fudge) swap
   3.305 -                        const_tab chained_const_tab
   3.306 -
   3.307 -fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) =
   3.308 -    intro_bonus
   3.309 -  | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus
   3.310 -  | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus
   3.311 -  | stature_bonus {local_bonus, ...} (Local, _) = local_bonus
   3.312 -  | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus
   3.313 -  | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus
   3.314 -  | stature_bonus _ _ = 0.0
   3.315 -
   3.316 -fun is_odd_const_name s =
   3.317 -  s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse
   3.318 -  String.isSuffix theory_const_suffix s
   3.319 -
   3.320 -fun fact_weight fudge stature const_tab relevant_consts chained_consts
   3.321 -                fact_consts =
   3.322 -  case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   3.323 -                   ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   3.324 -    ([], _) => 0.0
   3.325 -  | (rel, irrel) =>
   3.326 -    if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   3.327 -      0.0
   3.328 -    else
   3.329 -      let
   3.330 -        val irrel = irrel |> filter_out (pconst_mem swap rel)
   3.331 -        val rel_weight =
   3.332 -          0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   3.333 -        val irrel_weight =
   3.334 -          ~ (stature_bonus fudge stature)
   3.335 -          |> fold (curry (op +)
   3.336 -                   o irrel_pconst_weight fudge const_tab chained_consts) irrel
   3.337 -        val res = rel_weight / (rel_weight + irrel_weight)
   3.338 -      in if Real.isFinite res then res else 0.0 end
   3.339 -
   3.340 -fun take_most_relevant ctxt max_facts remaining_max
   3.341 -        ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   3.342 -        (candidates : ((fact * (string * ptype) list) * real) list) =
   3.343 -  let
   3.344 -    val max_imperfect =
   3.345 -      Real.ceil (Math.pow (max_imperfect,
   3.346 -                    Math.pow (Real.fromInt remaining_max
   3.347 -                              / Real.fromInt max_facts, max_imperfect_exp)))
   3.348 -    val (perfect, imperfect) =
   3.349 -      candidates |> sort (Real.compare o swap o pairself snd)
   3.350 -                 |> take_prefix (fn (_, w) => w > 0.99999)
   3.351 -    val ((accepts, more_rejects), rejects) =
   3.352 -      chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   3.353 -  in
   3.354 -    trace_msg ctxt (fn () =>
   3.355 -        "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   3.356 -        string_of_int (length candidates) ^ "): " ^
   3.357 -        (accepts |> map (fn ((((name, _), _), _), weight) =>
   3.358 -                            name () ^ " [" ^ Real.toString weight ^ "]")
   3.359 -                 |> commas));
   3.360 -    (accepts, more_rejects @ rejects)
   3.361 -  end
   3.362 -
   3.363 -fun if_empty_replace_with_scope thy is_built_in_const facts sc tab =
   3.364 -  if Symtab.is_empty tab then
   3.365 -    Symtab.empty
   3.366 -    |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   3.367 -            (map_filter (fn ((_, (sc', _)), th) =>
   3.368 -                            if sc' = sc then SOME (prop_of th) else NONE) facts)
   3.369 -  else
   3.370 -    tab
   3.371 -
   3.372 -fun consider_arities is_built_in_const th =
   3.373 -  let
   3.374 -    fun aux _ _ NONE = NONE
   3.375 -      | aux t args (SOME tab) =
   3.376 -        case t of
   3.377 -          t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   3.378 -        | Const (x as (s, _)) =>
   3.379 -          (if is_built_in_const x args |> fst then
   3.380 -             SOME tab
   3.381 -           else case Symtab.lookup tab s of
   3.382 -             NONE => SOME (Symtab.update (s, length args) tab)
   3.383 -           | SOME n => if n = length args then SOME tab else NONE)
   3.384 -        | _ => SOME tab
   3.385 -  in aux (prop_of th) [] end
   3.386 -
   3.387 -(* FIXME: This is currently only useful for polymorphic type encodings. *)
   3.388 -fun could_benefit_from_ext is_built_in_const facts =
   3.389 -  fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   3.390 -  |> is_none
   3.391 -
   3.392 -(* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   3.393 -   weights), but low enough so that it is unlikely to be truncated away if few
   3.394 -   facts are included. *)
   3.395 -val special_fact_index = 75
   3.396 -
   3.397 -fun relevance_filter ctxt thres0 decay max_facts is_built_in_const
   3.398 -        (fudge as {threshold_divisor, ridiculous_threshold, ...}) facts hyp_ts
   3.399 -        concl_t =
   3.400 -  let
   3.401 -    val thy = Proof_Context.theory_of ctxt
   3.402 -    val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   3.403 -    val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   3.404 -    val chained_ts =
   3.405 -      facts |> map_filter (fn ((_, (Chained, _)), th) => SOME (prop_of th)
   3.406 -                            | _ => NONE)
   3.407 -    val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   3.408 -    val goal_const_tab =
   3.409 -      Symtab.empty |> fold (add_pconsts true) hyp_ts
   3.410 -                   |> add_pconsts false concl_t
   3.411 -      |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   3.412 -      |> fold (if_empty_replace_with_scope thy is_built_in_const facts)
   3.413 -              [Chained, Assum, Local]
   3.414 -    fun iter j remaining_max thres rel_const_tab hopeless hopeful =
   3.415 -      let
   3.416 -        fun relevant [] _ [] =
   3.417 -            (* Nothing has been added this iteration. *)
   3.418 -            if j = 0 andalso thres >= ridiculous_threshold then
   3.419 -              (* First iteration? Try again. *)
   3.420 -              iter 0 max_facts (thres / threshold_divisor) rel_const_tab
   3.421 -                   hopeless hopeful
   3.422 -            else
   3.423 -              []
   3.424 -          | relevant candidates rejects [] =
   3.425 -            let
   3.426 -              val (accepts, more_rejects) =
   3.427 -                take_most_relevant ctxt max_facts remaining_max fudge candidates
   3.428 -              val rel_const_tab' =
   3.429 -                rel_const_tab
   3.430 -                |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   3.431 -              fun is_dirty (c, _) =
   3.432 -                Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   3.433 -              val (hopeful_rejects, hopeless_rejects) =
   3.434 -                 (rejects @ hopeless, ([], []))
   3.435 -                 |-> fold (fn (ax as (_, consts), old_weight) =>
   3.436 -                              if exists is_dirty consts then
   3.437 -                                apfst (cons (ax, NONE))
   3.438 -                              else
   3.439 -                                apsnd (cons (ax, old_weight)))
   3.440 -                 |>> append (more_rejects
   3.441 -                             |> map (fn (ax as (_, consts), old_weight) =>
   3.442 -                                        (ax, if exists is_dirty consts then NONE
   3.443 -                                             else SOME old_weight)))
   3.444 -              val thres =
   3.445 -                1.0 - (1.0 - thres)
   3.446 -                      * Math.pow (decay, Real.fromInt (length accepts))
   3.447 -              val remaining_max = remaining_max - length accepts
   3.448 -            in
   3.449 -              trace_msg ctxt (fn () => "New or updated constants: " ^
   3.450 -                  commas (rel_const_tab' |> Symtab.dest
   3.451 -                          |> subtract (op =) (rel_const_tab |> Symtab.dest)
   3.452 -                          |> map string_for_hyper_pconst));
   3.453 -              map (fst o fst) accepts @
   3.454 -              (if remaining_max = 0 then
   3.455 -                 []
   3.456 -               else
   3.457 -                 iter (j + 1) remaining_max thres rel_const_tab'
   3.458 -                      hopeless_rejects hopeful_rejects)
   3.459 -            end
   3.460 -          | relevant candidates rejects
   3.461 -                     (((ax as (((_, stature), _), fact_consts)), cached_weight)
   3.462 -                      :: hopeful) =
   3.463 -            let
   3.464 -              val weight =
   3.465 -                case cached_weight of
   3.466 -                  SOME w => w
   3.467 -                | NONE => fact_weight fudge stature const_tab rel_const_tab
   3.468 -                                      chained_const_tab fact_consts
   3.469 -            in
   3.470 -              if weight >= thres then
   3.471 -                relevant ((ax, weight) :: candidates) rejects hopeful
   3.472 -              else
   3.473 -                relevant candidates ((ax, weight) :: rejects) hopeful
   3.474 -            end
   3.475 -        in
   3.476 -          trace_msg ctxt (fn () =>
   3.477 -              "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   3.478 -              Real.toString thres ^ ", constants: " ^
   3.479 -              commas (rel_const_tab |> Symtab.dest
   3.480 -                      |> filter (curry (op <>) [] o snd)
   3.481 -                      |> map string_for_hyper_pconst));
   3.482 -          relevant [] [] hopeful
   3.483 -        end
   3.484 -    fun uses_const s t =
   3.485 -      fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t
   3.486 -                  false
   3.487 -    fun uses_const_anywhere accepts s =
   3.488 -      exists (uses_const s o prop_of o snd) accepts orelse
   3.489 -      exists (uses_const s) (concl_t :: hyp_ts)
   3.490 -    fun add_set_const_thms accepts =
   3.491 -      exists (uses_const_anywhere accepts) set_consts ? append set_thms
   3.492 -    fun insert_into_facts accepts [] = accepts
   3.493 -      | insert_into_facts accepts ths =
   3.494 -        let
   3.495 -          val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   3.496 -          val (bef, after) =
   3.497 -            accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   3.498 -                    |> take (max_facts - length add)
   3.499 -                    |> chop special_fact_index
   3.500 -        in bef @ add @ after end
   3.501 -    fun insert_special_facts accepts =
   3.502 -       (* FIXME: get rid of "ext" here once it is treated as a helper *)
   3.503 -       [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   3.504 -          |> add_set_const_thms accepts
   3.505 -          |> insert_into_facts accepts
   3.506 -  in
   3.507 -    facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   3.508 -          |> iter 0 max_facts thres0 goal_const_tab []
   3.509 -          |> insert_special_facts
   3.510 -          |> tap (fn accepts => trace_msg ctxt (fn () =>
   3.511 -                      "Total relevant: " ^ string_of_int (length accepts)))
   3.512 -  end
   3.513 -
   3.514 -fun iterative_relevant_facts ctxt
   3.515 -        ({fact_thresholds = (thres0, thres1), ...} : params) prover
   3.516 -        max_facts fudge hyp_ts concl_t facts =
   3.517 -  let
   3.518 -    val thy = Proof_Context.theory_of ctxt
   3.519 -    val is_built_in_const =
   3.520 -      Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover
   3.521 -    val fudge =
   3.522 -      case fudge of
   3.523 -        SOME fudge => fudge
   3.524 -      | NONE => Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover
   3.525 -    val decay = Math.pow ((1.0 - thres1) / (1.0 - thres0),
   3.526 -                          1.0 / Real.fromInt (max_facts + 1))
   3.527 -  in
   3.528 -    trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   3.529 -                             " facts");
   3.530 -    (if thres1 < 0.0 then
   3.531 -       facts
   3.532 -     else if thres0 > 1.0 orelse thres0 > thres1 then
   3.533 -       []
   3.534 -     else
   3.535 -       relevance_filter ctxt thres0 decay max_facts is_built_in_const fudge
   3.536 -           facts hyp_ts
   3.537 -           (concl_t |> theory_constify fudge (Context.theory_name thy)))
   3.538 -  end
   3.539 -
   3.540 -end;
     4.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML	Fri Jul 20 22:19:45 2012 +0200
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,691 +0,0 @@
     4.4 -(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML
     4.5 -    Author:     Jasmin Blanchette, TU Muenchen
     4.6 -
     4.7 -Sledgehammer's machine-learning-based relevance filter (MaSh).
     4.8 -*)
     4.9 -
    4.10 -signature SLEDGEHAMMER_FILTER_MASH =
    4.11 -sig
    4.12 -  type status = ATP_Problem_Generate.status
    4.13 -  type stature = ATP_Problem_Generate.stature
    4.14 -  type fact = Sledgehammer_Fact.fact
    4.15 -  type fact_override = Sledgehammer_Fact.fact_override
    4.16 -  type params = Sledgehammer_Provers.params
    4.17 -  type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    4.18 -  type prover_result = Sledgehammer_Provers.prover_result
    4.19 -
    4.20 -  val trace : bool Config.T
    4.21 -  val MaShN : string
    4.22 -  val mepoN : string
    4.23 -  val mashN : string
    4.24 -  val meshN : string
    4.25 -  val fact_filters : string list
    4.26 -  val escape_meta : string -> string
    4.27 -  val escape_metas : string list -> string
    4.28 -  val unescape_meta : string -> string
    4.29 -  val unescape_metas : string -> string list
    4.30 -  val extract_query : string -> string * string list
    4.31 -  val nickname_of : thm -> string
    4.32 -  val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    4.33 -  val mesh_facts :
    4.34 -    int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    4.35 -  val is_likely_tautology_or_too_meta : thm -> bool
    4.36 -  val theory_ord : theory * theory -> order
    4.37 -  val thm_ord : thm * thm -> order
    4.38 -  val features_of :
    4.39 -    Proof.context -> string -> theory -> status -> term list -> string list
    4.40 -  val isabelle_dependencies_of : unit Symtab.table -> thm -> string list
    4.41 -  val goal_of_thm : theory -> thm -> thm
    4.42 -  val run_prover_for_mash :
    4.43 -    Proof.context -> params -> string -> fact list -> thm -> prover_result
    4.44 -  val mash_CLEAR : Proof.context -> unit
    4.45 -  val mash_INIT :
    4.46 -    Proof.context -> bool
    4.47 -    -> (string * string list * string list * string list) list -> unit
    4.48 -  val mash_ADD :
    4.49 -    Proof.context -> bool
    4.50 -    -> (string * string list * string list * string list) list -> unit
    4.51 -  val mash_QUERY :
    4.52 -    Proof.context -> bool -> int -> string list * string list -> string list
    4.53 -  val mash_unlearn : Proof.context -> unit
    4.54 -  val mash_could_suggest_facts : unit -> bool
    4.55 -  val mash_can_suggest_facts : Proof.context -> bool
    4.56 -  val mash_suggest_facts :
    4.57 -    Proof.context -> params -> string -> int -> term list -> term
    4.58 -    -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    4.59 -  val mash_learn_thy :
    4.60 -    Proof.context -> params -> theory -> Time.time -> fact list -> string
    4.61 -  val mash_learn_proof :
    4.62 -    Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit
    4.63 -  val relevant_facts :
    4.64 -    Proof.context -> params -> string -> int -> fact_override -> term list
    4.65 -    -> term -> fact list -> fact list
    4.66 -  val kill_learners : unit -> unit
    4.67 -  val running_learners : unit -> unit
    4.68 -end;
    4.69 -
    4.70 -structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH =
    4.71 -struct
    4.72 -
    4.73 -open ATP_Util
    4.74 -open ATP_Problem_Generate
    4.75 -open Sledgehammer_Util
    4.76 -open Sledgehammer_Fact
    4.77 -open Sledgehammer_Filter_Iter
    4.78 -open Sledgehammer_Provers
    4.79 -open Sledgehammer_Minimize
    4.80 -
    4.81 -val trace =
    4.82 -  Attrib.setup_config_bool @{binding sledgehammer_filter_mash_trace} (K false)
    4.83 -fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    4.84 -
    4.85 -val MaShN = "MaSh"
    4.86 -
    4.87 -val mepoN = "mepo"
    4.88 -val mashN = "mash"
    4.89 -val meshN = "mesh"
    4.90 -
    4.91 -val fact_filters = [meshN, mepoN, mashN]
    4.92 -
    4.93 -fun mash_home () = getenv "MASH_HOME"
    4.94 -fun mash_state_dir () =
    4.95 -  getenv "ISABELLE_HOME_USER" ^ "/mash"
    4.96 -  |> tap (Isabelle_System.mkdir o Path.explode)
    4.97 -fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode
    4.98 -
    4.99 -
   4.100 -(*** Isabelle helpers ***)
   4.101 -
   4.102 -fun meta_char c =
   4.103 -  if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   4.104 -     c = #")" orelse c = #"," then
   4.105 -    String.str c
   4.106 -  else
   4.107 -    (* fixed width, in case more digits follow *)
   4.108 -    "\\" ^ stringN_of_int 3 (Char.ord c)
   4.109 -
   4.110 -fun unmeta_chars accum [] = String.implode (rev accum)
   4.111 -  | unmeta_chars accum (#"\\" :: d1 :: d2 :: d3 :: cs) =
   4.112 -    (case Int.fromString (String.implode [d1, d2, d3]) of
   4.113 -       SOME n => unmeta_chars (Char.chr n :: accum) cs
   4.114 -     | NONE => "" (* error *))
   4.115 -  | unmeta_chars _ (#"\\" :: _) = "" (* error *)
   4.116 -  | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   4.117 -
   4.118 -val escape_meta = String.translate meta_char
   4.119 -val escape_metas = map escape_meta #> space_implode " "
   4.120 -val unescape_meta = String.explode #> unmeta_chars []
   4.121 -val unescape_metas =
   4.122 -  space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   4.123 -
   4.124 -fun extract_query line =
   4.125 -  case space_explode ":" line of
   4.126 -    [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
   4.127 -  | _ => ("", [])
   4.128 -
   4.129 -fun parent_of_local_thm th =
   4.130 -  let
   4.131 -    val thy = th |> Thm.theory_of_thm
   4.132 -    val facts = thy |> Global_Theory.facts_of
   4.133 -    val space = facts |> Facts.space_of
   4.134 -    fun id_of s = #id (Name_Space.the_entry space s)
   4.135 -    fun max_id (s', _) (s, id) =
   4.136 -      let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   4.137 -  in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   4.138 -
   4.139 -val local_prefix = "local" ^ Long_Name.separator
   4.140 -
   4.141 -fun nickname_of th =
   4.142 -  let val hint = Thm.get_name_hint th in
   4.143 -    (* FIXME: There must be a better way to detect local facts. *)
   4.144 -    case try (unprefix local_prefix) hint of
   4.145 -      SOME suff =>
   4.146 -      parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suff
   4.147 -    | NONE => hint
   4.148 -  end
   4.149 -
   4.150 -fun suggested_facts suggs facts =
   4.151 -  let
   4.152 -    fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   4.153 -    val tab = Symtab.empty |> fold add_fact facts
   4.154 -  in map_filter (Symtab.lookup tab) suggs end
   4.155 -
   4.156 -(* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
   4.157 -fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
   4.158 -
   4.159 -fun sum_sq_avg [] = 0
   4.160 -  | sum_sq_avg xs =
   4.161 -    Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
   4.162 -
   4.163 -fun mesh_facts max_facts [(selected, unknown)] =
   4.164 -    take max_facts selected @ take (max_facts - length selected) unknown
   4.165 -  | mesh_facts max_facts mess =
   4.166 -    let
   4.167 -      val mess = mess |> map (apfst (`length))
   4.168 -      val fact_eq = Thm.eq_thm o pairself snd
   4.169 -      fun score_in fact ((sel_len, sels), unks) =
   4.170 -        case find_index (curry fact_eq fact) sels of
   4.171 -          ~1 => (case find_index (curry fact_eq fact) unks of
   4.172 -                   ~1 => SOME sel_len
   4.173 -                 | _ => NONE)
   4.174 -        | j => SOME j
   4.175 -      fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   4.176 -      val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   4.177 -    in
   4.178 -      facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   4.179 -            |> map snd |> take max_facts
   4.180 -    end
   4.181 -
   4.182 -val thy_feature_prefix = "y_"
   4.183 -
   4.184 -val thy_feature_name_of = prefix thy_feature_prefix
   4.185 -val const_name_of = prefix const_prefix
   4.186 -val type_name_of = prefix type_const_prefix
   4.187 -val class_name_of = prefix class_prefix
   4.188 -
   4.189 -fun is_likely_tautology_or_too_meta th =
   4.190 -  let
   4.191 -    val is_boring_const = member (op =) atp_widely_irrelevant_consts
   4.192 -    fun is_boring_bool t =
   4.193 -      not (exists_Const (not o is_boring_const o fst) t) orelse
   4.194 -      exists_type (exists_subtype (curry (op =) @{typ prop})) t
   4.195 -    fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   4.196 -      | is_boring_prop (@{const "==>"} $ t $ u) =
   4.197 -        is_boring_prop t andalso is_boring_prop u
   4.198 -      | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   4.199 -        is_boring_prop t andalso is_boring_prop u
   4.200 -      | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   4.201 -        is_boring_bool t andalso is_boring_bool u
   4.202 -      | is_boring_prop _ = true
   4.203 -  in
   4.204 -    is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   4.205 -  end
   4.206 -
   4.207 -fun theory_ord p =
   4.208 -  if Theory.eq_thy p then
   4.209 -    EQUAL
   4.210 -  else if Theory.subthy p then
   4.211 -    LESS
   4.212 -  else if Theory.subthy (swap p) then
   4.213 -    GREATER
   4.214 -  else case int_ord (pairself (length o Theory.ancestors_of) p) of
   4.215 -    EQUAL => string_ord (pairself Context.theory_name p)
   4.216 -  | order => order
   4.217 -
   4.218 -val thm_ord = theory_ord o pairself theory_of_thm
   4.219 -
   4.220 -val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   4.221 -
   4.222 -fun interesting_terms_types_and_classes ctxt prover term_max_depth
   4.223 -                                        type_max_depth ts =
   4.224 -  let
   4.225 -    fun is_bad_const (x as (s, _)) args =
   4.226 -      member (op =) atp_logical_consts s orelse
   4.227 -      fst (is_built_in_const_for_prover ctxt prover x args)
   4.228 -    fun add_classes @{sort type} = I
   4.229 -      | add_classes S = union (op =) (map class_name_of S)
   4.230 -    fun do_add_type (Type (s, Ts)) =
   4.231 -        (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   4.232 -        #> fold do_add_type Ts
   4.233 -      | do_add_type (TFree (_, S)) = add_classes S
   4.234 -      | do_add_type (TVar (_, S)) = add_classes S
   4.235 -    fun add_type T = type_max_depth >= 0 ? do_add_type T
   4.236 -    fun mk_app s args =
   4.237 -      if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   4.238 -      else s
   4.239 -    fun patternify ~1 _ = ""
   4.240 -      | patternify depth t =
   4.241 -        case strip_comb t of
   4.242 -          (Const (s, _), args) =>
   4.243 -          mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   4.244 -        | _ => ""
   4.245 -    fun add_term_patterns ~1 _ = I
   4.246 -      | add_term_patterns depth t =
   4.247 -        insert (op =) (patternify depth t)
   4.248 -        #> add_term_patterns (depth - 1) t
   4.249 -    val add_term = add_term_patterns term_max_depth
   4.250 -    fun add_patterns t =
   4.251 -      let val (head, args) = strip_comb t in
   4.252 -        (case head of
   4.253 -           Const (x as (_, T)) =>
   4.254 -           not (is_bad_const x args) ? (add_term t #> add_type T)
   4.255 -         | Free (_, T) => add_type T
   4.256 -         | Var (_, T) => add_type T
   4.257 -         | Abs (_, T, body) => add_type T #> add_patterns body
   4.258 -         | _ => I)
   4.259 -        #> fold add_patterns args
   4.260 -      end
   4.261 -  in [] |> fold add_patterns ts end
   4.262 -
   4.263 -fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   4.264 -
   4.265 -val term_max_depth = 1
   4.266 -val type_max_depth = 1
   4.267 -
   4.268 -(* TODO: Generate type classes for types? *)
   4.269 -fun features_of ctxt prover thy status ts =
   4.270 -  thy_feature_name_of (Context.theory_name thy) ::
   4.271 -  interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   4.272 -                                      ts
   4.273 -  |> forall is_lambda_free ts ? cons "no_lams"
   4.274 -  |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   4.275 -  |> (case status of
   4.276 -        General => I
   4.277 -      | Induction => cons "induction"
   4.278 -      | Intro => cons "intro"
   4.279 -      | Inductive => cons "inductive"
   4.280 -      | Elim => cons "elim"
   4.281 -      | Simp => cons "simp"
   4.282 -      | Def => cons "def")
   4.283 -
   4.284 -fun isabelle_dependencies_of all_facts = thms_in_proof (SOME all_facts)
   4.285 -
   4.286 -val freezeT = Type.legacy_freeze_type
   4.287 -
   4.288 -fun freeze (t $ u) = freeze t $ freeze u
   4.289 -  | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   4.290 -  | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   4.291 -  | freeze (Const (s, T)) = Const (s, freezeT T)
   4.292 -  | freeze (Free (s, T)) = Free (s, freezeT T)
   4.293 -  | freeze t = t
   4.294 -
   4.295 -fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   4.296 -
   4.297 -fun run_prover_for_mash ctxt params prover facts goal =
   4.298 -  let
   4.299 -    val problem =
   4.300 -      {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   4.301 -       facts = facts |> map (apfst (apfst (fn name => name ())))
   4.302 -                     |> map Untranslated_Fact}
   4.303 -    val prover = get_minimizing_prover ctxt Normal (K ()) prover
   4.304 -  in prover params (K (K (K ""))) problem end
   4.305 -
   4.306 -
   4.307 -(*** Low-level communication with MaSh ***)
   4.308 -
   4.309 -fun write_file (xs, f) file =
   4.310 -  let val path = Path.explode file in
   4.311 -    File.write path "";
   4.312 -    xs |> chunk_list 500
   4.313 -       |> List.app (File.append path o space_implode "" o map f)
   4.314 -  end
   4.315 -
   4.316 -fun mash_info overlord =
   4.317 -  if overlord then (getenv "ISABELLE_HOME_USER", "")
   4.318 -  else (getenv "ISABELLE_TMP", serial_string ())
   4.319 -
   4.320 -fun run_mash ctxt overlord (temp_dir, serial) core =
   4.321 -  let
   4.322 -    val log_file = temp_dir ^ "/mash_log" ^ serial
   4.323 -    val err_file = temp_dir ^ "/mash_err" ^ serial
   4.324 -    val command =
   4.325 -      mash_home () ^ "/mash.py --quiet --outputDir " ^ mash_state_dir () ^
   4.326 -      " --log " ^ log_file ^ " " ^ core ^ " 2>&1 > " ^ err_file
   4.327 -  in
   4.328 -    trace_msg ctxt (fn () => "Running " ^ command);
   4.329 -    write_file ([], K "") log_file;
   4.330 -    write_file ([], K "") err_file;
   4.331 -    Isabelle_System.bash command;
   4.332 -    if overlord then ()
   4.333 -    else (map (File.rm o Path.explode) [log_file, err_file]; ());
   4.334 -    trace_msg ctxt (K "Done")
   4.335 -  end
   4.336 -
   4.337 -(* TODO: Eliminate code once "mash.py" handles sequences of ADD commands as fast
   4.338 -   as a single INIT. *)
   4.339 -fun run_mash_init ctxt overlord write_access write_feats write_deps =
   4.340 -  let
   4.341 -    val info as (temp_dir, serial) = mash_info overlord
   4.342 -    val in_dir = temp_dir ^ "/mash_init" ^ serial
   4.343 -    val in_dir_path = in_dir |> Path.explode |> tap Isabelle_System.mkdir
   4.344 -  in
   4.345 -    write_file write_access (in_dir ^ "/mash_accessibility");
   4.346 -    write_file write_feats (in_dir ^ "/mash_features");
   4.347 -    write_file write_deps (in_dir ^ "/mash_dependencies");
   4.348 -    run_mash ctxt overlord info ("--init --inputDir " ^ in_dir);
   4.349 -    (* FIXME: temporary hack *)
   4.350 -    if overlord then ()
   4.351 -    else (Isabelle_System.bash ("rm -r -f " ^ File.shell_path in_dir_path); ())
   4.352 -  end
   4.353 -
   4.354 -fun run_mash_commands ctxt overlord save max_suggs write_cmds read_suggs =
   4.355 -  let
   4.356 -    val info as (temp_dir, serial) = mash_info overlord
   4.357 -    val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   4.358 -    val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   4.359 -  in
   4.360 -    write_file ([], K "") sugg_file;
   4.361 -    write_file write_cmds cmd_file;
   4.362 -    run_mash ctxt overlord info
   4.363 -             ("--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   4.364 -              " --numberOfPredictions " ^ string_of_int max_suggs ^
   4.365 -              (if save then " --saveModel" else ""));
   4.366 -    read_suggs (fn () => File.read_lines (Path.explode sugg_file))
   4.367 -    |> tap (fn _ =>
   4.368 -               if overlord then ()
   4.369 -               else (map (File.rm o Path.explode) [sugg_file, cmd_file]; ()))
   4.370 -  end
   4.371 -
   4.372 -fun str_of_update (name, parents, feats, deps) =
   4.373 -  "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   4.374 -  escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   4.375 -
   4.376 -fun str_of_query (parents, feats) =
   4.377 -  "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   4.378 -
   4.379 -fun init_str_of_update get (upd as (name, _, _, _)) =
   4.380 -  escape_meta name ^ ": " ^ escape_metas (get upd) ^ "\n"
   4.381 -
   4.382 -fun mash_CLEAR ctxt =
   4.383 -  let val path = mash_state_dir () |> Path.explode in
   4.384 -    trace_msg ctxt (K "MaSh CLEAR");
   4.385 -    File.fold_dir (fn file => fn () =>
   4.386 -                      File.rm (Path.append path (Path.basic file)))
   4.387 -                  path ()
   4.388 -  end
   4.389 -
   4.390 -fun mash_INIT ctxt _ [] = mash_CLEAR ctxt
   4.391 -  | mash_INIT ctxt overlord upds =
   4.392 -    (trace_msg ctxt (fn () => "MaSh INIT " ^
   4.393 -         elide_string 1000 (space_implode " " (map #1 upds)));
   4.394 -     run_mash_init ctxt overlord (upds, init_str_of_update #2)
   4.395 -                   (upds, init_str_of_update #3) (upds, init_str_of_update #4))
   4.396 -
   4.397 -fun mash_ADD _ _ [] = ()
   4.398 -  | mash_ADD ctxt overlord upds =
   4.399 -    (trace_msg ctxt (fn () => "MaSh ADD " ^
   4.400 -         elide_string 1000 (space_implode " " (map #1 upds)));
   4.401 -     run_mash_commands ctxt overlord true 0 (upds, str_of_update) (K ()))
   4.402 -
   4.403 -fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   4.404 -  (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   4.405 -   run_mash_commands ctxt overlord false max_suggs
   4.406 -       ([query], str_of_query)
   4.407 -       (fn suggs => snd (extract_query (List.last (suggs ()))))
   4.408 -   handle List.Empty => [])
   4.409 -
   4.410 -
   4.411 -(*** High-level communication with MaSh ***)
   4.412 -
   4.413 -fun try_graph ctxt when def f =
   4.414 -  f ()
   4.415 -  handle Graph.CYCLES (cycle :: _) =>
   4.416 -         (trace_msg ctxt (fn () =>
   4.417 -              "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   4.418 -       | Graph.UNDEF name =>
   4.419 -         (trace_msg ctxt (fn () =>
   4.420 -              "Unknown fact " ^ quote name ^ " when " ^ when); def)
   4.421 -
   4.422 -type mash_state =
   4.423 -  {thys : bool Symtab.table,
   4.424 -   fact_graph : unit Graph.T}
   4.425 -
   4.426 -val empty_state = {thys = Symtab.empty, fact_graph = Graph.empty}
   4.427 -
   4.428 -local
   4.429 -
   4.430 -fun mash_load _ (state as (true, _)) = state
   4.431 -  | mash_load ctxt _ =
   4.432 -    let val path = mash_state_path () in
   4.433 -      (true,
   4.434 -       case try File.read_lines path of
   4.435 -         SOME (comp_thys :: incomp_thys :: fact_lines) =>
   4.436 -         let
   4.437 -           fun add_thy comp thy = Symtab.update (thy, comp)
   4.438 -           fun add_edge_to name parent =
   4.439 -             Graph.default_node (parent, ())
   4.440 -             #> Graph.add_edge (parent, name)
   4.441 -           fun add_fact_line line =
   4.442 -             case extract_query line of
   4.443 -               ("", _) => I (* shouldn't happen *)
   4.444 -             | (name, parents) =>
   4.445 -               Graph.default_node (name, ())
   4.446 -               #> fold (add_edge_to name) parents
   4.447 -           val thys =
   4.448 -             Symtab.empty |> fold (add_thy true) (unescape_metas comp_thys)
   4.449 -                          |> fold (add_thy false) (unescape_metas incomp_thys)
   4.450 -           val fact_graph =
   4.451 -             try_graph ctxt "loading state" Graph.empty (fn () =>
   4.452 -                 Graph.empty |> fold add_fact_line fact_lines)
   4.453 -         in {thys = thys, fact_graph = fact_graph} end
   4.454 -       | _ => empty_state)
   4.455 -    end
   4.456 -
   4.457 -fun mash_save ({thys, fact_graph, ...} : mash_state) =
   4.458 -  let
   4.459 -    val path = mash_state_path ()
   4.460 -    val thys = Symtab.dest thys
   4.461 -    val line_for_thys = escape_metas o AList.find (op =) thys
   4.462 -    fun fact_line_for name parents =
   4.463 -      escape_meta name ^ ": " ^ escape_metas parents
   4.464 -    val append_fact = File.append path o suffix "\n" oo fact_line_for
   4.465 -  in
   4.466 -    File.write path (line_for_thys true ^ "\n" ^ line_for_thys false ^ "\n");
   4.467 -    Graph.fold (fn (name, ((), (parents, _))) => fn () =>
   4.468 -                   append_fact name (Graph.Keys.dest parents))
   4.469 -        fact_graph ()
   4.470 -  end
   4.471 -
   4.472 -val global_state =
   4.473 -  Synchronized.var "Sledgehammer_Filter_MaSh.global_state" (false, empty_state)
   4.474 -
   4.475 -in
   4.476 -
   4.477 -fun mash_map ctxt f =
   4.478 -  Synchronized.change global_state (mash_load ctxt ##> (f #> tap mash_save))
   4.479 -
   4.480 -fun mash_get ctxt =
   4.481 -  Synchronized.change_result global_state (mash_load ctxt #> `snd)
   4.482 -
   4.483 -fun mash_unlearn ctxt =
   4.484 -  Synchronized.change global_state (fn _ =>
   4.485 -      (mash_CLEAR ctxt; File.write (mash_state_path ()) "";
   4.486 -       (true, empty_state)))
   4.487 -
   4.488 -end
   4.489 -
   4.490 -fun mash_could_suggest_facts () = mash_home () <> ""
   4.491 -fun mash_can_suggest_facts ctxt =
   4.492 -  not (Graph.is_empty (#fact_graph (mash_get ctxt)))
   4.493 -
   4.494 -fun parents_wrt_facts facts fact_graph =
   4.495 -  let
   4.496 -    val facts = [] |> fold (cons o nickname_of o snd) facts
   4.497 -    val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   4.498 -    fun insert_not_seen seen name =
   4.499 -      not (member (op =) seen name) ? insert (op =) name
   4.500 -    fun parents_of _ parents [] = parents
   4.501 -      | parents_of seen parents (name :: names) =
   4.502 -        if Symtab.defined tab name then
   4.503 -          parents_of (name :: seen) (name :: parents) names
   4.504 -        else
   4.505 -          parents_of (name :: seen) parents
   4.506 -                     (Graph.Keys.fold (insert_not_seen seen)
   4.507 -                                      (Graph.imm_preds fact_graph name) names)
   4.508 -  in parents_of [] [] (Graph.maximals fact_graph) end
   4.509 -
   4.510 -(* Generate more suggestions than requested, because some might be thrown out
   4.511 -   later for various reasons and "meshing" gives better results with some
   4.512 -   slack. *)
   4.513 -fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts)
   4.514 -
   4.515 -fun is_fact_in_graph fact_graph (_, th) =
   4.516 -  can (Graph.get_node fact_graph) (nickname_of th)
   4.517 -
   4.518 -fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   4.519 -                       concl_t facts =
   4.520 -  let
   4.521 -    val thy = Proof_Context.theory_of ctxt
   4.522 -    val fact_graph = #fact_graph (mash_get ctxt)
   4.523 -    val parents = parents_wrt_facts facts fact_graph
   4.524 -    val feats = features_of ctxt prover thy General (concl_t :: hyp_ts)
   4.525 -    val suggs =
   4.526 -      if Graph.is_empty fact_graph then []
   4.527 -      else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   4.528 -    val selected = facts |> suggested_facts suggs
   4.529 -    val unknown = facts |> filter_out (is_fact_in_graph fact_graph)
   4.530 -  in (selected, unknown) end
   4.531 -
   4.532 -fun add_thys_for thy =
   4.533 -  let fun add comp thy = Symtab.update (Context.theory_name thy, comp) in
   4.534 -    add false thy #> fold (add true) (Theory.ancestors_of thy)
   4.535 -  end
   4.536 -
   4.537 -fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) =
   4.538 -  let
   4.539 -    fun maybe_add_from from (accum as (parents, graph)) =
   4.540 -      try_graph ctxt "updating graph" accum (fn () =>
   4.541 -          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   4.542 -    val graph = graph |> Graph.default_node (name, ())
   4.543 -    val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   4.544 -    val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   4.545 -  in ((name, parents, feats, deps) :: upds, graph) end
   4.546 -
   4.547 -val pass1_learn_timeout_factor = 0.5
   4.548 -
   4.549 -(* Too many dependencies is a sign that a decision procedure is at work. There
   4.550 -   isn't much too learn from such proofs. *)
   4.551 -val max_dependencies = 10
   4.552 -
   4.553 -(* The timeout is understood in a very slack fashion. *)
   4.554 -fun mash_learn_thy ctxt ({provers, verbose, overlord, ...} : params) thy timeout
   4.555 -                   facts =
   4.556 -  let
   4.557 -    val timer = Timer.startRealTimer ()
   4.558 -    val prover = hd provers
   4.559 -    fun timed_out frac =
   4.560 -      Time.> (Timer.checkRealTimer timer, time_mult frac timeout)
   4.561 -    val {fact_graph, ...} = mash_get ctxt
   4.562 -    val new_facts =
   4.563 -      facts |> filter_out (is_fact_in_graph fact_graph)
   4.564 -            |> sort (thm_ord o pairself snd)
   4.565 -  in
   4.566 -    if null new_facts then
   4.567 -      ""
   4.568 -    else
   4.569 -      let
   4.570 -        val ths = facts |> map snd
   4.571 -        val all_names =
   4.572 -          ths |> filter_out is_likely_tautology_or_too_meta
   4.573 -              |> map (rpair () o nickname_of)
   4.574 -              |> Symtab.make
   4.575 -        fun trim_deps deps = if length deps > max_dependencies then [] else deps
   4.576 -        fun do_fact _ (accum as (_, true)) = accum
   4.577 -          | do_fact ((_, (_, status)), th) ((parents, upds), false) =
   4.578 -            let
   4.579 -              val name = nickname_of th
   4.580 -              val feats =
   4.581 -                features_of ctxt prover (theory_of_thm th) status [prop_of th]
   4.582 -              val deps = isabelle_dependencies_of all_names th |> trim_deps
   4.583 -              val upd = (name, parents, feats, deps)
   4.584 -            in (([name], upd :: upds), timed_out pass1_learn_timeout_factor) end
   4.585 -        val parents = parents_wrt_facts facts fact_graph
   4.586 -        val ((_, upds), _) =
   4.587 -          ((parents, []), false) |> fold do_fact new_facts |>> apsnd rev
   4.588 -        val n = length upds
   4.589 -        fun trans {thys, fact_graph} =
   4.590 -          let
   4.591 -            val mash_INIT_or_ADD =
   4.592 -              if Graph.is_empty fact_graph then mash_INIT else mash_ADD
   4.593 -            val (upds, fact_graph) =
   4.594 -              ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   4.595 -          in
   4.596 -            (mash_INIT_or_ADD ctxt overlord (rev upds);
   4.597 -             {thys = thys |> add_thys_for thy,
   4.598 -              fact_graph = fact_graph})
   4.599 -          end
   4.600 -      in
   4.601 -        mash_map ctxt trans;
   4.602 -        if verbose then
   4.603 -          "Processed " ^ string_of_int n ^ " proof" ^ plural_s n ^
   4.604 -          (if verbose then
   4.605 -             " in " ^ string_from_time (Timer.checkRealTimer timer)
   4.606 -           else
   4.607 -             "") ^ "."
   4.608 -        else
   4.609 -          ""
   4.610 -      end
   4.611 -  end
   4.612 -
   4.613 -fun mash_learn_proof ctxt ({provers, overlord, ...} : params) t facts used_ths =
   4.614 -  let
   4.615 -    val thy = Proof_Context.theory_of ctxt
   4.616 -    val prover = hd provers
   4.617 -    val name = ATP_Util.timestamp () ^ " " ^ serial_string () (* fresh enough *)
   4.618 -    val feats = features_of ctxt prover thy General [t]
   4.619 -    val deps = used_ths |> map nickname_of
   4.620 -  in
   4.621 -    mash_map ctxt (fn {thys, fact_graph} =>
   4.622 -        let
   4.623 -          val parents = parents_wrt_facts facts fact_graph
   4.624 -          val upds = [(name, parents, feats, deps)]
   4.625 -          val (upds, fact_graph) =
   4.626 -            ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   4.627 -        in
   4.628 -          mash_ADD ctxt overlord upds;
   4.629 -          {thys = thys, fact_graph = fact_graph}
   4.630 -        end)
   4.631 -  end
   4.632 -
   4.633 -(* The threshold should be large enough so that MaSh doesn't kick in for Auto
   4.634 -   Sledgehammer and Try. *)
   4.635 -val min_secs_for_learning = 15
   4.636 -val learn_timeout_factor = 2.0
   4.637 -
   4.638 -fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   4.639 -        max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   4.640 -  if not (subset (op =) (the_list fact_filter, fact_filters)) then
   4.641 -    error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   4.642 -  else if only then
   4.643 -    facts
   4.644 -  else if max_facts <= 0 orelse null facts then
   4.645 -    []
   4.646 -  else
   4.647 -    let
   4.648 -      val thy = Proof_Context.theory_of ctxt
   4.649 -      fun maybe_learn () =
   4.650 -        if not learn orelse Async_Manager.has_running_threads MaShN then
   4.651 -          ()
   4.652 -        else if Time.toSeconds timeout >= min_secs_for_learning then
   4.653 -          let
   4.654 -            val soft_timeout = time_mult learn_timeout_factor timeout
   4.655 -            val hard_timeout = time_mult 4.0 soft_timeout
   4.656 -            val birth_time = Time.now ()
   4.657 -            val death_time = Time.+ (birth_time, hard_timeout)
   4.658 -            val desc = ("machine learner for Sledgehammer", "")
   4.659 -          in
   4.660 -            Async_Manager.launch MaShN birth_time death_time desc
   4.661 -                (fn () =>
   4.662 -                    (true, mash_learn_thy ctxt params thy soft_timeout facts))
   4.663 -          end
   4.664 -        else
   4.665 -          ()
   4.666 -      val fact_filter =
   4.667 -        case fact_filter of
   4.668 -          SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   4.669 -        | NONE =>
   4.670 -          if mash_can_suggest_facts ctxt then (maybe_learn (); meshN)
   4.671 -          else if mash_could_suggest_facts () then (maybe_learn (); mepoN)
   4.672 -          else mepoN
   4.673 -      val add_ths = Attrib.eval_thms ctxt add
   4.674 -      fun prepend_facts ths accepts =
   4.675 -        ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   4.676 -         (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   4.677 -        |> take max_facts
   4.678 -      fun iter () =
   4.679 -        iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   4.680 -                                 concl_t facts
   4.681 -      fun mash () =
   4.682 -        mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   4.683 -      val mess =
   4.684 -        [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   4.685 -           |> (if fact_filter <> mepoN then cons (mash ()) else I)
   4.686 -    in
   4.687 -      mesh_facts max_facts mess
   4.688 -      |> not (null add_ths) ? prepend_facts add_ths
   4.689 -    end
   4.690 -
   4.691 -fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   4.692 -fun running_learners () = Async_Manager.running_threads MaShN "learner"
   4.693 -
   4.694 -end;
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Fri Jul 20 22:19:45 2012 +0200
     5.3 @@ -0,0 +1,691 @@
     5.4 +(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     5.5 +    Author:     Jasmin Blanchette, TU Muenchen
     5.6 +
     5.7 +Sledgehammer's machine-learning-based relevance filter (MaSh).
     5.8 +*)
     5.9 +
    5.10 +signature SLEDGEHAMMER_FILTER_MASH =
    5.11 +sig
    5.12 +  type status = ATP_Problem_Generate.status
    5.13 +  type stature = ATP_Problem_Generate.stature
    5.14 +  type fact = Sledgehammer_Fact.fact
    5.15 +  type fact_override = Sledgehammer_Fact.fact_override
    5.16 +  type params = Sledgehammer_Provers.params
    5.17 +  type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    5.18 +  type prover_result = Sledgehammer_Provers.prover_result
    5.19 +
    5.20 +  val trace : bool Config.T
    5.21 +  val MaShN : string
    5.22 +  val mepoN : string
    5.23 +  val mashN : string
    5.24 +  val meshN : string
    5.25 +  val fact_filters : string list
    5.26 +  val escape_meta : string -> string
    5.27 +  val escape_metas : string list -> string
    5.28 +  val unescape_meta : string -> string
    5.29 +  val unescape_metas : string -> string list
    5.30 +  val extract_query : string -> string * string list
    5.31 +  val nickname_of : thm -> string
    5.32 +  val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    5.33 +  val mesh_facts :
    5.34 +    int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    5.35 +  val is_likely_tautology_or_too_meta : thm -> bool
    5.36 +  val theory_ord : theory * theory -> order
    5.37 +  val thm_ord : thm * thm -> order
    5.38 +  val features_of :
    5.39 +    Proof.context -> string -> theory -> status -> term list -> string list
    5.40 +  val isabelle_dependencies_of : unit Symtab.table -> thm -> string list
    5.41 +  val goal_of_thm : theory -> thm -> thm
    5.42 +  val run_prover_for_mash :
    5.43 +    Proof.context -> params -> string -> fact list -> thm -> prover_result
    5.44 +  val mash_CLEAR : Proof.context -> unit
    5.45 +  val mash_INIT :
    5.46 +    Proof.context -> bool
    5.47 +    -> (string * string list * string list * string list) list -> unit
    5.48 +  val mash_ADD :
    5.49 +    Proof.context -> bool
    5.50 +    -> (string * string list * string list * string list) list -> unit
    5.51 +  val mash_QUERY :
    5.52 +    Proof.context -> bool -> int -> string list * string list -> string list
    5.53 +  val mash_unlearn : Proof.context -> unit
    5.54 +  val mash_could_suggest_facts : unit -> bool
    5.55 +  val mash_can_suggest_facts : Proof.context -> bool
    5.56 +  val mash_suggest_facts :
    5.57 +    Proof.context -> params -> string -> int -> term list -> term
    5.58 +    -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    5.59 +  val mash_learn_thy :
    5.60 +    Proof.context -> params -> theory -> Time.time -> fact list -> string
    5.61 +  val mash_learn_proof :
    5.62 +    Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit
    5.63 +  val relevant_facts :
    5.64 +    Proof.context -> params -> string -> int -> fact_override -> term list
    5.65 +    -> term -> fact list -> fact list
    5.66 +  val kill_learners : unit -> unit
    5.67 +  val running_learners : unit -> unit
    5.68 +end;
    5.69 +
    5.70 +structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH =
    5.71 +struct
    5.72 +
    5.73 +open ATP_Util
    5.74 +open ATP_Problem_Generate
    5.75 +open Sledgehammer_Util
    5.76 +open Sledgehammer_Fact
    5.77 +open Sledgehammer_Filter_Iter
    5.78 +open Sledgehammer_Provers
    5.79 +open Sledgehammer_Minimize
    5.80 +
    5.81 +val trace =
    5.82 +  Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
    5.83 +fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    5.84 +
    5.85 +val MaShN = "MaSh"
    5.86 +
    5.87 +val mepoN = "mepo"
    5.88 +val mashN = "mash"
    5.89 +val meshN = "mesh"
    5.90 +
    5.91 +val fact_filters = [meshN, mepoN, mashN]
    5.92 +
    5.93 +fun mash_home () = getenv "MASH_HOME"
    5.94 +fun mash_state_dir () =
    5.95 +  getenv "ISABELLE_HOME_USER" ^ "/mash"
    5.96 +  |> tap (Isabelle_System.mkdir o Path.explode)
    5.97 +fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode
    5.98 +
    5.99 +
   5.100 +(*** Isabelle helpers ***)
   5.101 +
   5.102 +fun meta_char c =
   5.103 +  if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   5.104 +     c = #")" orelse c = #"," then
   5.105 +    String.str c
   5.106 +  else
   5.107 +    (* fixed width, in case more digits follow *)
   5.108 +    "\\" ^ stringN_of_int 3 (Char.ord c)
   5.109 +
   5.110 +fun unmeta_chars accum [] = String.implode (rev accum)
   5.111 +  | unmeta_chars accum (#"\\" :: d1 :: d2 :: d3 :: cs) =
   5.112 +    (case Int.fromString (String.implode [d1, d2, d3]) of
   5.113 +       SOME n => unmeta_chars (Char.chr n :: accum) cs
   5.114 +     | NONE => "" (* error *))
   5.115 +  | unmeta_chars _ (#"\\" :: _) = "" (* error *)
   5.116 +  | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   5.117 +
   5.118 +val escape_meta = String.translate meta_char
   5.119 +val escape_metas = map escape_meta #> space_implode " "
   5.120 +val unescape_meta = String.explode #> unmeta_chars []
   5.121 +val unescape_metas =
   5.122 +  space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   5.123 +
   5.124 +fun extract_query line =
   5.125 +  case space_explode ":" line of
   5.126 +    [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
   5.127 +  | _ => ("", [])
   5.128 +
   5.129 +fun parent_of_local_thm th =
   5.130 +  let
   5.131 +    val thy = th |> Thm.theory_of_thm
   5.132 +    val facts = thy |> Global_Theory.facts_of
   5.133 +    val space = facts |> Facts.space_of
   5.134 +    fun id_of s = #id (Name_Space.the_entry space s)
   5.135 +    fun max_id (s', _) (s, id) =
   5.136 +      let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   5.137 +  in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   5.138 +
   5.139 +val local_prefix = "local" ^ Long_Name.separator
   5.140 +
   5.141 +fun nickname_of th =
   5.142 +  let val hint = Thm.get_name_hint th in
   5.143 +    (* FIXME: There must be a better way to detect local facts. *)
   5.144 +    case try (unprefix local_prefix) hint of
   5.145 +      SOME suff =>
   5.146 +      parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suff
   5.147 +    | NONE => hint
   5.148 +  end
   5.149 +
   5.150 +fun suggested_facts suggs facts =
   5.151 +  let
   5.152 +    fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   5.153 +    val tab = Symtab.empty |> fold add_fact facts
   5.154 +  in map_filter (Symtab.lookup tab) suggs end
   5.155 +
   5.156 +(* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
   5.157 +fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
   5.158 +
   5.159 +fun sum_sq_avg [] = 0
   5.160 +  | sum_sq_avg xs =
   5.161 +    Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
   5.162 +
   5.163 +fun mesh_facts max_facts [(selected, unknown)] =
   5.164 +    take max_facts selected @ take (max_facts - length selected) unknown
   5.165 +  | mesh_facts max_facts mess =
   5.166 +    let
   5.167 +      val mess = mess |> map (apfst (`length))
   5.168 +      val fact_eq = Thm.eq_thm o pairself snd
   5.169 +      fun score_in fact ((sel_len, sels), unks) =
   5.170 +        case find_index (curry fact_eq fact) sels of
   5.171 +          ~1 => (case find_index (curry fact_eq fact) unks of
   5.172 +                   ~1 => SOME sel_len
   5.173 +                 | _ => NONE)
   5.174 +        | j => SOME j
   5.175 +      fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   5.176 +      val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   5.177 +    in
   5.178 +      facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   5.179 +            |> map snd |> take max_facts
   5.180 +    end
   5.181 +
   5.182 +val thy_feature_prefix = "y_"
   5.183 +
   5.184 +val thy_feature_name_of = prefix thy_feature_prefix
   5.185 +val const_name_of = prefix const_prefix
   5.186 +val type_name_of = prefix type_const_prefix
   5.187 +val class_name_of = prefix class_prefix
   5.188 +
   5.189 +fun is_likely_tautology_or_too_meta th =
   5.190 +  let
   5.191 +    val is_boring_const = member (op =) atp_widely_irrelevant_consts
   5.192 +    fun is_boring_bool t =
   5.193 +      not (exists_Const (not o is_boring_const o fst) t) orelse
   5.194 +      exists_type (exists_subtype (curry (op =) @{typ prop})) t
   5.195 +    fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   5.196 +      | is_boring_prop (@{const "==>"} $ t $ u) =
   5.197 +        is_boring_prop t andalso is_boring_prop u
   5.198 +      | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   5.199 +        is_boring_prop t andalso is_boring_prop u
   5.200 +      | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   5.201 +        is_boring_bool t andalso is_boring_bool u
   5.202 +      | is_boring_prop _ = true
   5.203 +  in
   5.204 +    is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   5.205 +  end
   5.206 +
   5.207 +fun theory_ord p =
   5.208 +  if Theory.eq_thy p then
   5.209 +    EQUAL
   5.210 +  else if Theory.subthy p then
   5.211 +    LESS
   5.212 +  else if Theory.subthy (swap p) then
   5.213 +    GREATER
   5.214 +  else case int_ord (pairself (length o Theory.ancestors_of) p) of
   5.215 +    EQUAL => string_ord (pairself Context.theory_name p)
   5.216 +  | order => order
   5.217 +
   5.218 +val thm_ord = theory_ord o pairself theory_of_thm
   5.219 +
   5.220 +val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   5.221 +
   5.222 +fun interesting_terms_types_and_classes ctxt prover term_max_depth
   5.223 +                                        type_max_depth ts =
   5.224 +  let
   5.225 +    fun is_bad_const (x as (s, _)) args =
   5.226 +      member (op =) atp_logical_consts s orelse
   5.227 +      fst (is_built_in_const_for_prover ctxt prover x args)
   5.228 +    fun add_classes @{sort type} = I
   5.229 +      | add_classes S = union (op =) (map class_name_of S)
   5.230 +    fun do_add_type (Type (s, Ts)) =
   5.231 +        (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   5.232 +        #> fold do_add_type Ts
   5.233 +      | do_add_type (TFree (_, S)) = add_classes S
   5.234 +      | do_add_type (TVar (_, S)) = add_classes S
   5.235 +    fun add_type T = type_max_depth >= 0 ? do_add_type T
   5.236 +    fun mk_app s args =
   5.237 +      if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   5.238 +      else s
   5.239 +    fun patternify ~1 _ = ""
   5.240 +      | patternify depth t =
   5.241 +        case strip_comb t of
   5.242 +          (Const (s, _), args) =>
   5.243 +          mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   5.244 +        | _ => ""
   5.245 +    fun add_term_patterns ~1 _ = I
   5.246 +      | add_term_patterns depth t =
   5.247 +        insert (op =) (patternify depth t)
   5.248 +        #> add_term_patterns (depth - 1) t
   5.249 +    val add_term = add_term_patterns term_max_depth
   5.250 +    fun add_patterns t =
   5.251 +      let val (head, args) = strip_comb t in
   5.252 +        (case head of
   5.253 +           Const (x as (_, T)) =>
   5.254 +           not (is_bad_const x args) ? (add_term t #> add_type T)
   5.255 +         | Free (_, T) => add_type T
   5.256 +         | Var (_, T) => add_type T
   5.257 +         | Abs (_, T, body) => add_type T #> add_patterns body
   5.258 +         | _ => I)
   5.259 +        #> fold add_patterns args
   5.260 +      end
   5.261 +  in [] |> fold add_patterns ts end
   5.262 +
   5.263 +fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   5.264 +
   5.265 +val term_max_depth = 1
   5.266 +val type_max_depth = 1
   5.267 +
   5.268 +(* TODO: Generate type classes for types? *)
   5.269 +fun features_of ctxt prover thy status ts =
   5.270 +  thy_feature_name_of (Context.theory_name thy) ::
   5.271 +  interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   5.272 +                                      ts
   5.273 +  |> forall is_lambda_free ts ? cons "no_lams"
   5.274 +  |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   5.275 +  |> (case status of
   5.276 +        General => I
   5.277 +      | Induction => cons "induction"
   5.278 +      | Intro => cons "intro"
   5.279 +      | Inductive => cons "inductive"
   5.280 +      | Elim => cons "elim"
   5.281 +      | Simp => cons "simp"
   5.282 +      | Def => cons "def")
   5.283 +
   5.284 +fun isabelle_dependencies_of all_facts = thms_in_proof (SOME all_facts)
   5.285 +
   5.286 +val freezeT = Type.legacy_freeze_type
   5.287 +
   5.288 +fun freeze (t $ u) = freeze t $ freeze u
   5.289 +  | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   5.290 +  | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   5.291 +  | freeze (Const (s, T)) = Const (s, freezeT T)
   5.292 +  | freeze (Free (s, T)) = Free (s, freezeT T)
   5.293 +  | freeze t = t
   5.294 +
   5.295 +fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   5.296 +
   5.297 +fun run_prover_for_mash ctxt params prover facts goal =
   5.298 +  let
   5.299 +    val problem =
   5.300 +      {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   5.301 +       facts = facts |> map (apfst (apfst (fn name => name ())))
   5.302 +                     |> map Untranslated_Fact}
   5.303 +    val prover = get_minimizing_prover ctxt Normal (K ()) prover
   5.304 +  in prover params (K (K (K ""))) problem end
   5.305 +
   5.306 +
   5.307 +(*** Low-level communication with MaSh ***)
   5.308 +
   5.309 +fun write_file (xs, f) file =
   5.310 +  let val path = Path.explode file in
   5.311 +    File.write path "";
   5.312 +    xs |> chunk_list 500
   5.313 +       |> List.app (File.append path o space_implode "" o map f)
   5.314 +  end
   5.315 +
   5.316 +fun mash_info overlord =
   5.317 +  if overlord then (getenv "ISABELLE_HOME_USER", "")
   5.318 +  else (getenv "ISABELLE_TMP", serial_string ())
   5.319 +
   5.320 +fun run_mash ctxt overlord (temp_dir, serial) core =
   5.321 +  let
   5.322 +    val log_file = temp_dir ^ "/mash_log" ^ serial
   5.323 +    val err_file = temp_dir ^ "/mash_err" ^ serial
   5.324 +    val command =
   5.325 +      mash_home () ^ "/mash.py --quiet --outputDir " ^ mash_state_dir () ^
   5.326 +      " --log " ^ log_file ^ " " ^ core ^ " 2>&1 > " ^ err_file
   5.327 +  in
   5.328 +    trace_msg ctxt (fn () => "Running " ^ command);
   5.329 +    write_file ([], K "") log_file;
   5.330 +    write_file ([], K "") err_file;
   5.331 +    Isabelle_System.bash command;
   5.332 +    if overlord then ()
   5.333 +    else (map (File.rm o Path.explode) [log_file, err_file]; ());
   5.334 +    trace_msg ctxt (K "Done")
   5.335 +  end
   5.336 +
   5.337 +(* TODO: Eliminate code once "mash.py" handles sequences of ADD commands as fast
   5.338 +   as a single INIT. *)
   5.339 +fun run_mash_init ctxt overlord write_access write_feats write_deps =
   5.340 +  let
   5.341 +    val info as (temp_dir, serial) = mash_info overlord
   5.342 +    val in_dir = temp_dir ^ "/mash_init" ^ serial
   5.343 +    val in_dir_path = in_dir |> Path.explode |> tap Isabelle_System.mkdir
   5.344 +  in
   5.345 +    write_file write_access (in_dir ^ "/mash_accessibility");
   5.346 +    write_file write_feats (in_dir ^ "/mash_features");
   5.347 +    write_file write_deps (in_dir ^ "/mash_dependencies");
   5.348 +    run_mash ctxt overlord info ("--init --inputDir " ^ in_dir);
   5.349 +    (* FIXME: temporary hack *)
   5.350 +    if overlord then ()
   5.351 +    else (Isabelle_System.bash ("rm -r -f " ^ File.shell_path in_dir_path); ())
   5.352 +  end
   5.353 +
   5.354 +fun run_mash_commands ctxt overlord save max_suggs write_cmds read_suggs =
   5.355 +  let
   5.356 +    val info as (temp_dir, serial) = mash_info overlord
   5.357 +    val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   5.358 +    val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   5.359 +  in
   5.360 +    write_file ([], K "") sugg_file;
   5.361 +    write_file write_cmds cmd_file;
   5.362 +    run_mash ctxt overlord info
   5.363 +             ("--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   5.364 +              " --numberOfPredictions " ^ string_of_int max_suggs ^
   5.365 +              (if save then " --saveModel" else ""));
   5.366 +    read_suggs (fn () => File.read_lines (Path.explode sugg_file))
   5.367 +    |> tap (fn _ =>
   5.368 +               if overlord then ()
   5.369 +               else (map (File.rm o Path.explode) [sugg_file, cmd_file]; ()))
   5.370 +  end
   5.371 +
   5.372 +fun str_of_update (name, parents, feats, deps) =
   5.373 +  "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   5.374 +  escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   5.375 +
   5.376 +fun str_of_query (parents, feats) =
   5.377 +  "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   5.378 +
   5.379 +fun init_str_of_update get (upd as (name, _, _, _)) =
   5.380 +  escape_meta name ^ ": " ^ escape_metas (get upd) ^ "\n"
   5.381 +
   5.382 +fun mash_CLEAR ctxt =
   5.383 +  let val path = mash_state_dir () |> Path.explode in
   5.384 +    trace_msg ctxt (K "MaSh CLEAR");
   5.385 +    File.fold_dir (fn file => fn () =>
   5.386 +                      File.rm (Path.append path (Path.basic file)))
   5.387 +                  path ()
   5.388 +  end
   5.389 +
   5.390 +fun mash_INIT ctxt _ [] = mash_CLEAR ctxt
   5.391 +  | mash_INIT ctxt overlord upds =
   5.392 +    (trace_msg ctxt (fn () => "MaSh INIT " ^
   5.393 +         elide_string 1000 (space_implode " " (map #1 upds)));
   5.394 +     run_mash_init ctxt overlord (upds, init_str_of_update #2)
   5.395 +                   (upds, init_str_of_update #3) (upds, init_str_of_update #4))
   5.396 +
   5.397 +fun mash_ADD _ _ [] = ()
   5.398 +  | mash_ADD ctxt overlord upds =
   5.399 +    (trace_msg ctxt (fn () => "MaSh ADD " ^
   5.400 +         elide_string 1000 (space_implode " " (map #1 upds)));
   5.401 +     run_mash_commands ctxt overlord true 0 (upds, str_of_update) (K ()))
   5.402 +
   5.403 +fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   5.404 +  (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   5.405 +   run_mash_commands ctxt overlord false max_suggs
   5.406 +       ([query], str_of_query)
   5.407 +       (fn suggs => snd (extract_query (List.last (suggs ()))))
   5.408 +   handle List.Empty => [])
   5.409 +
   5.410 +
   5.411 +(*** High-level communication with MaSh ***)
   5.412 +
   5.413 +fun try_graph ctxt when def f =
   5.414 +  f ()
   5.415 +  handle Graph.CYCLES (cycle :: _) =>
   5.416 +         (trace_msg ctxt (fn () =>
   5.417 +              "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   5.418 +       | Graph.UNDEF name =>
   5.419 +         (trace_msg ctxt (fn () =>
   5.420 +              "Unknown fact " ^ quote name ^ " when " ^ when); def)
   5.421 +
   5.422 +type mash_state =
   5.423 +  {thys : bool Symtab.table,
   5.424 +   fact_graph : unit Graph.T}
   5.425 +
   5.426 +val empty_state = {thys = Symtab.empty, fact_graph = Graph.empty}
   5.427 +
   5.428 +local
   5.429 +
   5.430 +fun mash_load _ (state as (true, _)) = state
   5.431 +  | mash_load ctxt _ =
   5.432 +    let val path = mash_state_path () in
   5.433 +      (true,
   5.434 +       case try File.read_lines path of
   5.435 +         SOME (comp_thys :: incomp_thys :: fact_lines) =>
   5.436 +         let
   5.437 +           fun add_thy comp thy = Symtab.update (thy, comp)
   5.438 +           fun add_edge_to name parent =
   5.439 +             Graph.default_node (parent, ())
   5.440 +             #> Graph.add_edge (parent, name)
   5.441 +           fun add_fact_line line =
   5.442 +             case extract_query line of
   5.443 +               ("", _) => I (* shouldn't happen *)
   5.444 +             | (name, parents) =>
   5.445 +               Graph.default_node (name, ())
   5.446 +               #> fold (add_edge_to name) parents
   5.447 +           val thys =
   5.448 +             Symtab.empty |> fold (add_thy true) (unescape_metas comp_thys)
   5.449 +                          |> fold (add_thy false) (unescape_metas incomp_thys)
   5.450 +           val fact_graph =
   5.451 +             try_graph ctxt "loading state" Graph.empty (fn () =>
   5.452 +                 Graph.empty |> fold add_fact_line fact_lines)
   5.453 +         in {thys = thys, fact_graph = fact_graph} end
   5.454 +       | _ => empty_state)
   5.455 +    end
   5.456 +
   5.457 +fun mash_save ({thys, fact_graph, ...} : mash_state) =
   5.458 +  let
   5.459 +    val path = mash_state_path ()
   5.460 +    val thys = Symtab.dest thys
   5.461 +    val line_for_thys = escape_metas o AList.find (op =) thys
   5.462 +    fun fact_line_for name parents =
   5.463 +      escape_meta name ^ ": " ^ escape_metas parents
   5.464 +    val append_fact = File.append path o suffix "\n" oo fact_line_for
   5.465 +  in
   5.466 +    File.write path (line_for_thys true ^ "\n" ^ line_for_thys false ^ "\n");
   5.467 +    Graph.fold (fn (name, ((), (parents, _))) => fn () =>
   5.468 +                   append_fact name (Graph.Keys.dest parents))
   5.469 +        fact_graph ()
   5.470 +  end
   5.471 +
   5.472 +val global_state =
   5.473 +  Synchronized.var "Sledgehammer_Filter_MaSh.global_state" (false, empty_state)
   5.474 +
   5.475 +in
   5.476 +
   5.477 +fun mash_map ctxt f =
   5.478 +  Synchronized.change global_state (mash_load ctxt ##> (f #> tap mash_save))
   5.479 +
   5.480 +fun mash_get ctxt =
   5.481 +  Synchronized.change_result global_state (mash_load ctxt #> `snd)
   5.482 +
   5.483 +fun mash_unlearn ctxt =
   5.484 +  Synchronized.change global_state (fn _ =>
   5.485 +      (mash_CLEAR ctxt; File.write (mash_state_path ()) "";
   5.486 +       (true, empty_state)))
   5.487 +
   5.488 +end
   5.489 +
   5.490 +fun mash_could_suggest_facts () = mash_home () <> ""
   5.491 +fun mash_can_suggest_facts ctxt =
   5.492 +  not (Graph.is_empty (#fact_graph (mash_get ctxt)))
   5.493 +
   5.494 +fun parents_wrt_facts facts fact_graph =
   5.495 +  let
   5.496 +    val facts = [] |> fold (cons o nickname_of o snd) facts
   5.497 +    val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   5.498 +    fun insert_not_seen seen name =
   5.499 +      not (member (op =) seen name) ? insert (op =) name
   5.500 +    fun parents_of _ parents [] = parents
   5.501 +      | parents_of seen parents (name :: names) =
   5.502 +        if Symtab.defined tab name then
   5.503 +          parents_of (name :: seen) (name :: parents) names
   5.504 +        else
   5.505 +          parents_of (name :: seen) parents
   5.506 +                     (Graph.Keys.fold (insert_not_seen seen)
   5.507 +                                      (Graph.imm_preds fact_graph name) names)
   5.508 +  in parents_of [] [] (Graph.maximals fact_graph) end
   5.509 +
   5.510 +(* Generate more suggestions than requested, because some might be thrown out
   5.511 +   later for various reasons and "meshing" gives better results with some
   5.512 +   slack. *)
   5.513 +fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts)
   5.514 +
   5.515 +fun is_fact_in_graph fact_graph (_, th) =
   5.516 +  can (Graph.get_node fact_graph) (nickname_of th)
   5.517 +
   5.518 +fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   5.519 +                       concl_t facts =
   5.520 +  let
   5.521 +    val thy = Proof_Context.theory_of ctxt
   5.522 +    val fact_graph = #fact_graph (mash_get ctxt)
   5.523 +    val parents = parents_wrt_facts facts fact_graph
   5.524 +    val feats = features_of ctxt prover thy General (concl_t :: hyp_ts)
   5.525 +    val suggs =
   5.526 +      if Graph.is_empty fact_graph then []
   5.527 +      else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   5.528 +    val selected = facts |> suggested_facts suggs
   5.529 +    val unknown = facts |> filter_out (is_fact_in_graph fact_graph)
   5.530 +  in (selected, unknown) end
   5.531 +
   5.532 +fun add_thys_for thy =
   5.533 +  let fun add comp thy = Symtab.update (Context.theory_name thy, comp) in
   5.534 +    add false thy #> fold (add true) (Theory.ancestors_of thy)
   5.535 +  end
   5.536 +
   5.537 +fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) =
   5.538 +  let
   5.539 +    fun maybe_add_from from (accum as (parents, graph)) =
   5.540 +      try_graph ctxt "updating graph" accum (fn () =>
   5.541 +          (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   5.542 +    val graph = graph |> Graph.default_node (name, ())
   5.543 +    val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   5.544 +    val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   5.545 +  in ((name, parents, feats, deps) :: upds, graph) end
   5.546 +
   5.547 +val pass1_learn_timeout_factor = 0.5
   5.548 +
   5.549 +(* Too many dependencies is a sign that a decision procedure is at work. There
   5.550 +   isn't much too learn from such proofs. *)
   5.551 +val max_dependencies = 10
   5.552 +
   5.553 +(* The timeout is understood in a very slack fashion. *)
   5.554 +fun mash_learn_thy ctxt ({provers, verbose, overlord, ...} : params) thy timeout
   5.555 +                   facts =
   5.556 +  let
   5.557 +    val timer = Timer.startRealTimer ()
   5.558 +    val prover = hd provers
   5.559 +    fun timed_out frac =
   5.560 +      Time.> (Timer.checkRealTimer timer, time_mult frac timeout)
   5.561 +    val {fact_graph, ...} = mash_get ctxt
   5.562 +    val new_facts =
   5.563 +      facts |> filter_out (is_fact_in_graph fact_graph)
   5.564 +            |> sort (thm_ord o pairself snd)
   5.565 +  in
   5.566 +    if null new_facts then
   5.567 +      ""
   5.568 +    else
   5.569 +      let
   5.570 +        val ths = facts |> map snd
   5.571 +        val all_names =
   5.572 +          ths |> filter_out is_likely_tautology_or_too_meta
   5.573 +              |> map (rpair () o nickname_of)
   5.574 +              |> Symtab.make
   5.575 +        fun trim_deps deps = if length deps > max_dependencies then [] else deps
   5.576 +        fun do_fact _ (accum as (_, true)) = accum
   5.577 +          | do_fact ((_, (_, status)), th) ((parents, upds), false) =
   5.578 +            let
   5.579 +              val name = nickname_of th
   5.580 +              val feats =
   5.581 +                features_of ctxt prover (theory_of_thm th) status [prop_of th]
   5.582 +              val deps = isabelle_dependencies_of all_names th |> trim_deps
   5.583 +              val upd = (name, parents, feats, deps)
   5.584 +            in (([name], upd :: upds), timed_out pass1_learn_timeout_factor) end
   5.585 +        val parents = parents_wrt_facts facts fact_graph
   5.586 +        val ((_, upds), _) =
   5.587 +          ((parents, []), false) |> fold do_fact new_facts |>> apsnd rev
   5.588 +        val n = length upds
   5.589 +        fun trans {thys, fact_graph} =
   5.590 +          let
   5.591 +            val mash_INIT_or_ADD =
   5.592 +              if Graph.is_empty fact_graph then mash_INIT else mash_ADD
   5.593 +            val (upds, fact_graph) =
   5.594 +              ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   5.595 +          in
   5.596 +            (mash_INIT_or_ADD ctxt overlord (rev upds);
   5.597 +             {thys = thys |> add_thys_for thy,
   5.598 +              fact_graph = fact_graph})
   5.599 +          end
   5.600 +      in
   5.601 +        mash_map ctxt trans;
   5.602 +        if verbose then
   5.603 +          "Processed " ^ string_of_int n ^ " proof" ^ plural_s n ^
   5.604 +          (if verbose then
   5.605 +             " in " ^ string_from_time (Timer.checkRealTimer timer)
   5.606 +           else
   5.607 +             "") ^ "."
   5.608 +        else
   5.609 +          ""
   5.610 +      end
   5.611 +  end
   5.612 +
   5.613 +fun mash_learn_proof ctxt ({provers, overlord, ...} : params) t facts used_ths =
   5.614 +  let
   5.615 +    val thy = Proof_Context.theory_of ctxt
   5.616 +    val prover = hd provers
   5.617 +    val name = ATP_Util.timestamp () ^ " " ^ serial_string () (* fresh enough *)
   5.618 +    val feats = features_of ctxt prover thy General [t]
   5.619 +    val deps = used_ths |> map nickname_of
   5.620 +  in
   5.621 +    mash_map ctxt (fn {thys, fact_graph} =>
   5.622 +        let
   5.623 +          val parents = parents_wrt_facts facts fact_graph
   5.624 +          val upds = [(name, parents, feats, deps)]
   5.625 +          val (upds, fact_graph) =
   5.626 +            ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   5.627 +        in
   5.628 +          mash_ADD ctxt overlord upds;
   5.629 +          {thys = thys, fact_graph = fact_graph}
   5.630 +        end)
   5.631 +  end
   5.632 +
   5.633 +(* The threshold should be large enough so that MaSh doesn't kick in for Auto
   5.634 +   Sledgehammer and Try. *)
   5.635 +val min_secs_for_learning = 15
   5.636 +val learn_timeout_factor = 2.0
   5.637 +
   5.638 +fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   5.639 +        max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   5.640 +  if not (subset (op =) (the_list fact_filter, fact_filters)) then
   5.641 +    error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   5.642 +  else if only then
   5.643 +    facts
   5.644 +  else if max_facts <= 0 orelse null facts then
   5.645 +    []
   5.646 +  else
   5.647 +    let
   5.648 +      val thy = Proof_Context.theory_of ctxt
   5.649 +      fun maybe_learn () =
   5.650 +        if not learn orelse Async_Manager.has_running_threads MaShN then
   5.651 +          ()
   5.652 +        else if Time.toSeconds timeout >= min_secs_for_learning then
   5.653 +          let
   5.654 +            val soft_timeout = time_mult learn_timeout_factor timeout
   5.655 +            val hard_timeout = time_mult 4.0 soft_timeout
   5.656 +            val birth_time = Time.now ()
   5.657 +            val death_time = Time.+ (birth_time, hard_timeout)
   5.658 +            val desc = ("machine learner for Sledgehammer", "")
   5.659 +          in
   5.660 +            Async_Manager.launch MaShN birth_time death_time desc
   5.661 +                (fn () =>
   5.662 +                    (true, mash_learn_thy ctxt params thy soft_timeout facts))
   5.663 +          end
   5.664 +        else
   5.665 +          ()
   5.666 +      val fact_filter =
   5.667 +        case fact_filter of
   5.668 +          SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   5.669 +        | NONE =>
   5.670 +          if mash_can_suggest_facts ctxt then (maybe_learn (); meshN)
   5.671 +          else if mash_could_suggest_facts () then (maybe_learn (); mepoN)
   5.672 +          else mepoN
   5.673 +      val add_ths = Attrib.eval_thms ctxt add
   5.674 +      fun prepend_facts ths accepts =
   5.675 +        ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   5.676 +         (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   5.677 +        |> take max_facts
   5.678 +      fun iter () =
   5.679 +        iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   5.680 +                                 concl_t facts
   5.681 +      fun mash () =
   5.682 +        mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   5.683 +      val mess =
   5.684 +        [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   5.685 +           |> (if fact_filter <> mepoN then cons (mash ()) else I)
   5.686 +    in
   5.687 +      mesh_facts max_facts mess
   5.688 +      |> not (null add_ths) ? prepend_facts add_ths
   5.689 +    end
   5.690 +
   5.691 +fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   5.692 +fun running_learners () = Async_Manager.running_threads MaShN "learner"
   5.693 +
   5.694 +end;
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML	Fri Jul 20 22:19:45 2012 +0200
     6.3 @@ -0,0 +1,537 @@
     6.4 +(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mepo.ML
     6.5 +    Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     6.6 +    Author:     Jasmin Blanchette, TU Muenchen
     6.7 +
     6.8 +Sledgehammer's iterative relevance filter (MePo = Meng-Paulson).
     6.9 +*)
    6.10 +
    6.11 +signature SLEDGEHAMMER_FILTER_ITER =
    6.12 +sig
    6.13 +  type stature = ATP_Problem_Generate.stature
    6.14 +  type fact = Sledgehammer_Fact.fact
    6.15 +  type params = Sledgehammer_Provers.params
    6.16 +  type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    6.17 +
    6.18 +  val trace : bool Config.T
    6.19 +  val pseudo_abs_name : string
    6.20 +  val pseudo_skolem_prefix : string
    6.21 +  val const_names_in_fact :
    6.22 +    theory -> (string * typ -> term list -> bool * term list) -> term
    6.23 +    -> string list
    6.24 +  val iterative_relevant_facts :
    6.25 +    Proof.context -> params -> string -> int -> relevance_fudge option
    6.26 +    -> term list -> term -> fact list -> fact list
    6.27 +end;
    6.28 +
    6.29 +structure Sledgehammer_Filter_Iter : SLEDGEHAMMER_FILTER_ITER =
    6.30 +struct
    6.31 +
    6.32 +open ATP_Problem_Generate
    6.33 +open Sledgehammer_Fact
    6.34 +open Sledgehammer_Provers
    6.35 +
    6.36 +val trace =
    6.37 +  Attrib.setup_config_bool @{binding sledgehammer_filter_iter_trace} (K false)
    6.38 +fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    6.39 +
    6.40 +val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
    6.41 +val pseudo_abs_name = sledgehammer_prefix ^ "abs"
    6.42 +val pseudo_skolem_prefix = sledgehammer_prefix ^ "sko"
    6.43 +val theory_const_suffix = Long_Name.separator ^ " 1"
    6.44 +
    6.45 +fun order_of_type (Type (@{type_name fun}, [T1, T2])) =
    6.46 +    Int.max (order_of_type T1 + 1, order_of_type T2)
    6.47 +  | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
    6.48 +  | order_of_type _ = 0
    6.49 +
    6.50 +(* An abstraction of Isabelle types and first-order terms *)
    6.51 +datatype pattern = PVar | PApp of string * pattern list
    6.52 +datatype ptype = PType of int * pattern list
    6.53 +
    6.54 +fun string_for_pattern PVar = "_"
    6.55 +  | string_for_pattern (PApp (s, ps)) =
    6.56 +    if null ps then s else s ^ string_for_patterns ps
    6.57 +and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
    6.58 +fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
    6.59 +
    6.60 +(*Is the second type an instance of the first one?*)
    6.61 +fun match_pattern (PVar, _) = true
    6.62 +  | match_pattern (PApp _, PVar) = false
    6.63 +  | match_pattern (PApp (s, ps), PApp (t, qs)) =
    6.64 +    s = t andalso match_patterns (ps, qs)
    6.65 +and match_patterns (_, []) = true
    6.66 +  | match_patterns ([], _) = false
    6.67 +  | match_patterns (p :: ps, q :: qs) =
    6.68 +    match_pattern (p, q) andalso match_patterns (ps, qs)
    6.69 +fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
    6.70 +
    6.71 +(* Is there a unifiable constant? *)
    6.72 +fun pconst_mem f consts (s, ps) =
    6.73 +  exists (curry (match_ptype o f) ps)
    6.74 +         (map snd (filter (curry (op =) s o fst) consts))
    6.75 +fun pconst_hyper_mem f const_tab (s, ps) =
    6.76 +  exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
    6.77 +
    6.78 +fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
    6.79 +  | pattern_for_type (TFree (s, _)) = PApp (s, [])
    6.80 +  | pattern_for_type (TVar _) = PVar
    6.81 +
    6.82 +(* Pairs a constant with the list of its type instantiations. *)
    6.83 +fun ptype thy const x =
    6.84 +  (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
    6.85 +   else [])
    6.86 +fun rich_ptype thy const (s, T) =
    6.87 +  PType (order_of_type T, ptype thy const (s, T))
    6.88 +fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
    6.89 +
    6.90 +fun string_for_hyper_pconst (s, ps) =
    6.91 +  s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
    6.92 +
    6.93 +(* Add a pconstant to the table, but a [] entry means a standard
    6.94 +   connective, which we ignore.*)
    6.95 +fun add_pconst_to_table also_skolem (s, p) =
    6.96 +  if (not also_skolem andalso String.isPrefix pseudo_skolem_prefix s) then I
    6.97 +  else Symtab.map_default (s, [p]) (insert (op =) p)
    6.98 +
    6.99 +(* Set constants tend to pull in too many irrelevant facts. We limit the damage
   6.100 +   by treating them more or less as if they were built-in but add their
   6.101 +   axiomatization at the end. *)
   6.102 +val set_consts = [@{const_name Collect}, @{const_name Set.member}]
   6.103 +val set_thms = @{thms Collect_mem_eq mem_Collect_eq Collect_cong}
   6.104 +
   6.105 +fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   6.106 +  let
   6.107 +    val flip = Option.map not
   6.108 +    (* We include free variables, as well as constants, to handle locales. For
   6.109 +       each quantifiers that must necessarily be skolemized by the automatic
   6.110 +       prover, we introduce a fresh constant to simulate the effect of
   6.111 +       Skolemization. *)
   6.112 +    fun do_const const ext_arg (x as (s, _)) ts =
   6.113 +      let val (built_in, ts) = is_built_in_const x ts in
   6.114 +        if member (op =) set_consts s then
   6.115 +          fold (do_term ext_arg) ts
   6.116 +        else
   6.117 +          (not built_in
   6.118 +           ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   6.119 +          #> fold (do_term false) ts
   6.120 +      end
   6.121 +    and do_term ext_arg t =
   6.122 +      case strip_comb t of
   6.123 +        (Const x, ts) => do_const true ext_arg x ts
   6.124 +      | (Free x, ts) => do_const false ext_arg x ts
   6.125 +      | (Abs (_, T, t'), ts) =>
   6.126 +        ((null ts andalso not ext_arg)
   6.127 +         (* Since lambdas on the right-hand side of equalities are usually
   6.128 +            extensionalized later by "abs_extensionalize_term", we don't
   6.129 +            penalize them here. *)
   6.130 +         ? add_pconst_to_table true (pseudo_abs_name,
   6.131 +                                     PType (order_of_type T + 1, [])))
   6.132 +        #> fold (do_term false) (t' :: ts)
   6.133 +      | (_, ts) => fold (do_term false) ts
   6.134 +    fun do_quantifier will_surely_be_skolemized abs_T body_t =
   6.135 +      do_formula pos body_t
   6.136 +      #> (if also_skolems andalso will_surely_be_skolemized then
   6.137 +            add_pconst_to_table true (pseudo_skolem_prefix ^ serial_string (),
   6.138 +                                      PType (order_of_type abs_T, []))
   6.139 +          else
   6.140 +            I)
   6.141 +    and do_term_or_formula ext_arg T =
   6.142 +      if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   6.143 +    and do_formula pos t =
   6.144 +      case t of
   6.145 +        Const (@{const_name all}, _) $ Abs (_, T, t') =>
   6.146 +        do_quantifier (pos = SOME false) T t'
   6.147 +      | @{const "==>"} $ t1 $ t2 =>
   6.148 +        do_formula (flip pos) t1 #> do_formula pos t2
   6.149 +      | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   6.150 +        do_term_or_formula false T t1 #> do_term_or_formula true T t2
   6.151 +      | @{const Trueprop} $ t1 => do_formula pos t1
   6.152 +      | @{const False} => I
   6.153 +      | @{const True} => I
   6.154 +      | @{const Not} $ t1 => do_formula (flip pos) t1
   6.155 +      | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   6.156 +        do_quantifier (pos = SOME false) T t'
   6.157 +      | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   6.158 +        do_quantifier (pos = SOME true) T t'
   6.159 +      | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   6.160 +      | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   6.161 +      | @{const HOL.implies} $ t1 $ t2 =>
   6.162 +        do_formula (flip pos) t1 #> do_formula pos t2
   6.163 +      | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   6.164 +        do_term_or_formula false T t1 #> do_term_or_formula true T t2
   6.165 +      | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   6.166 +        $ t1 $ t2 $ t3 =>
   6.167 +        do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   6.168 +      | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   6.169 +        do_quantifier (is_some pos) T t'
   6.170 +      | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   6.171 +        do_quantifier (pos = SOME false) T
   6.172 +                      (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   6.173 +      | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   6.174 +        do_quantifier (pos = SOME true) T
   6.175 +                      (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   6.176 +      | (t0 as Const (_, @{typ bool})) $ t1 =>
   6.177 +        do_term false t0 #> do_formula pos t1  (* theory constant *)
   6.178 +      | _ => do_term false t
   6.179 +  in do_formula pos end
   6.180 +
   6.181 +fun pconsts_in_fact thy is_built_in_const t =
   6.182 +  Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   6.183 +              (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   6.184 +                                                   (SOME true) t) []
   6.185 +
   6.186 +val const_names_in_fact = map fst ooo pconsts_in_fact
   6.187 +
   6.188 +(* Inserts a dummy "constant" referring to the theory name, so that relevance
   6.189 +   takes the given theory into account. *)
   6.190 +fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   6.191 +                     : relevance_fudge) thy_name t =
   6.192 +  if exists (curry (op <) 0.0) [theory_const_rel_weight,
   6.193 +                                theory_const_irrel_weight] then
   6.194 +    Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   6.195 +  else
   6.196 +    t
   6.197 +
   6.198 +fun theory_const_prop_of fudge th =
   6.199 +  theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   6.200 +
   6.201 +fun pair_consts_fact thy is_built_in_const fudge fact =
   6.202 +  case fact |> snd |> theory_const_prop_of fudge
   6.203 +            |> pconsts_in_fact thy is_built_in_const of
   6.204 +    [] => NONE
   6.205 +  | consts => SOME ((fact, consts), NONE)
   6.206 +
   6.207 +(* A two-dimensional symbol table counts frequencies of constants. It's keyed
   6.208 +   first by constant name and second by its list of type instantiations. For the
   6.209 +   latter, we need a linear ordering on "pattern list". *)
   6.210 +
   6.211 +fun pattern_ord p =
   6.212 +  case p of
   6.213 +    (PVar, PVar) => EQUAL
   6.214 +  | (PVar, PApp _) => LESS
   6.215 +  | (PApp _, PVar) => GREATER
   6.216 +  | (PApp q1, PApp q2) =>
   6.217 +    prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   6.218 +fun ptype_ord (PType p, PType q) =
   6.219 +  prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   6.220 +
   6.221 +structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   6.222 +
   6.223 +fun count_fact_consts thy fudge =
   6.224 +  let
   6.225 +    fun do_const const (s, T) ts =
   6.226 +      (* Two-dimensional table update. Constant maps to types maps to count. *)
   6.227 +      PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   6.228 +      |> Symtab.map_default (s, PType_Tab.empty)
   6.229 +      #> fold do_term ts
   6.230 +    and do_term t =
   6.231 +      case strip_comb t of
   6.232 +        (Const x, ts) => do_const true x ts
   6.233 +      | (Free x, ts) => do_const false x ts
   6.234 +      | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   6.235 +      | (_, ts) => fold do_term ts
   6.236 +  in do_term o theory_const_prop_of fudge o snd end
   6.237 +
   6.238 +fun pow_int _ 0 = 1.0
   6.239 +  | pow_int x 1 = x
   6.240 +  | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   6.241 +
   6.242 +(*The frequency of a constant is the sum of those of all instances of its type.*)
   6.243 +fun pconst_freq match const_tab (c, ps) =
   6.244 +  PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   6.245 +                 (the (Symtab.lookup const_tab c)) 0
   6.246 +
   6.247 +
   6.248 +(* A surprising number of theorems contain only a few significant constants.
   6.249 +   These include all induction rules, and other general theorems. *)
   6.250 +
   6.251 +(* "log" seems best in practice. A constant function of one ignores the constant
   6.252 +   frequencies. Rare constants give more points if they are relevant than less
   6.253 +   rare ones. *)
   6.254 +fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   6.255 +
   6.256 +(* Irrelevant constants are treated differently. We associate lower penalties to
   6.257 +   very rare constants and very common ones -- the former because they can't
   6.258 +   lead to the inclusion of too many new facts, and the latter because they are
   6.259 +   so common as to be of little interest. *)
   6.260 +fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   6.261 +                      : relevance_fudge) order freq =
   6.262 +  let val (k, x) = worse_irrel_freq |> `Real.ceil in
   6.263 +    (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   6.264 +     else rel_weight_for order freq / rel_weight_for order k)
   6.265 +    * pow_int higher_order_irrel_weight (order - 1)
   6.266 +  end
   6.267 +
   6.268 +fun multiplier_for_const_name local_const_multiplier s =
   6.269 +  if String.isSubstring "." s then 1.0 else local_const_multiplier
   6.270 +
   6.271 +(* Computes a constant's weight, as determined by its frequency. *)
   6.272 +fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   6.273 +                          theory_const_weight chained_const_weight weight_for f
   6.274 +                          const_tab chained_const_tab (c as (s, PType (m, _))) =
   6.275 +  if s = pseudo_abs_name then
   6.276 +    abs_weight
   6.277 +  else if String.isPrefix pseudo_skolem_prefix s then
   6.278 +    skolem_weight
   6.279 +  else if String.isSuffix theory_const_suffix s then
   6.280 +    theory_const_weight
   6.281 +  else
   6.282 +    multiplier_for_const_name local_const_multiplier s
   6.283 +    * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   6.284 +    |> (if chained_const_weight < 1.0 andalso
   6.285 +           pconst_hyper_mem I chained_const_tab c then
   6.286 +          curry (op *) chained_const_weight
   6.287 +        else
   6.288 +          I)
   6.289 +
   6.290 +fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   6.291 +                        theory_const_rel_weight, ...} : relevance_fudge)
   6.292 +                      const_tab =
   6.293 +  generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   6.294 +                        theory_const_rel_weight 0.0 rel_weight_for I const_tab
   6.295 +                        Symtab.empty
   6.296 +
   6.297 +fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   6.298 +                                   skolem_irrel_weight,
   6.299 +                                   theory_const_irrel_weight,
   6.300 +                                   chained_const_irrel_weight, ...})
   6.301 +                        const_tab chained_const_tab =
   6.302 +  generic_pconst_weight local_const_multiplier abs_irrel_weight
   6.303 +                        skolem_irrel_weight theory_const_irrel_weight
   6.304 +                        chained_const_irrel_weight (irrel_weight_for fudge) swap
   6.305 +                        const_tab chained_const_tab
   6.306 +
   6.307 +fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) =
   6.308 +    intro_bonus
   6.309 +  | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus
   6.310 +  | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus
   6.311 +  | stature_bonus {local_bonus, ...} (Local, _) = local_bonus
   6.312 +  | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus
   6.313 +  | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus
   6.314 +  | stature_bonus _ _ = 0.0
   6.315 +
   6.316 +fun is_odd_const_name s =
   6.317 +  s = pseudo_abs_name orelse String.isPrefix pseudo_skolem_prefix s orelse
   6.318 +  String.isSuffix theory_const_suffix s
   6.319 +
   6.320 +fun fact_weight fudge stature const_tab relevant_consts chained_consts
   6.321 +                fact_consts =
   6.322 +  case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   6.323 +                   ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   6.324 +    ([], _) => 0.0
   6.325 +  | (rel, irrel) =>
   6.326 +    if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   6.327 +      0.0
   6.328 +    else
   6.329 +      let
   6.330 +        val irrel = irrel |> filter_out (pconst_mem swap rel)
   6.331 +        val rel_weight =
   6.332 +          0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   6.333 +        val irrel_weight =
   6.334 +          ~ (stature_bonus fudge stature)
   6.335 +          |> fold (curry (op +)
   6.336 +                   o irrel_pconst_weight fudge const_tab chained_consts) irrel
   6.337 +        val res = rel_weight / (rel_weight + irrel_weight)
   6.338 +      in if Real.isFinite res then res else 0.0 end
   6.339 +
   6.340 +fun take_most_relevant ctxt max_facts remaining_max
   6.341 +        ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   6.342 +        (candidates : ((fact * (string * ptype) list) * real) list) =
   6.343 +  let
   6.344 +    val max_imperfect =
   6.345 +      Real.ceil (Math.pow (max_imperfect,
   6.346 +                    Math.pow (Real.fromInt remaining_max
   6.347 +                              / Real.fromInt max_facts, max_imperfect_exp)))
   6.348 +    val (perfect, imperfect) =
   6.349 +      candidates |> sort (Real.compare o swap o pairself snd)
   6.350 +                 |> take_prefix (fn (_, w) => w > 0.99999)
   6.351 +    val ((accepts, more_rejects), rejects) =
   6.352 +      chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   6.353 +  in
   6.354 +    trace_msg ctxt (fn () =>
   6.355 +        "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   6.356 +        string_of_int (length candidates) ^ "): " ^
   6.357 +        (accepts |> map (fn ((((name, _), _), _), weight) =>
   6.358 +                            name () ^ " [" ^ Real.toString weight ^ "]")
   6.359 +                 |> commas));
   6.360 +    (accepts, more_rejects @ rejects)
   6.361 +  end
   6.362 +
   6.363 +fun if_empty_replace_with_scope thy is_built_in_const facts sc tab =
   6.364 +  if Symtab.is_empty tab then
   6.365 +    Symtab.empty
   6.366 +    |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   6.367 +            (map_filter (fn ((_, (sc', _)), th) =>
   6.368 +                            if sc' = sc then SOME (prop_of th) else NONE) facts)
   6.369 +  else
   6.370 +    tab
   6.371 +
   6.372 +fun consider_arities is_built_in_const th =
   6.373 +  let
   6.374 +    fun aux _ _ NONE = NONE
   6.375 +      | aux t args (SOME tab) =
   6.376 +        case t of
   6.377 +          t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   6.378 +        | Const (x as (s, _)) =>
   6.379 +          (if is_built_in_const x args |> fst then
   6.380 +             SOME tab
   6.381 +           else case Symtab.lookup tab s of
   6.382 +             NONE => SOME (Symtab.update (s, length args) tab)
   6.383 +           | SOME n => if n = length args then SOME tab else NONE)
   6.384 +        | _ => SOME tab
   6.385 +  in aux (prop_of th) [] end
   6.386 +
   6.387 +(* FIXME: This is currently only useful for polymorphic type encodings. *)
   6.388 +fun could_benefit_from_ext is_built_in_const facts =
   6.389 +  fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   6.390 +  |> is_none
   6.391 +
   6.392 +(* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   6.393 +   weights), but low enough so that it is unlikely to be truncated away if few
   6.394 +   facts are included. *)
   6.395 +val special_fact_index = 75
   6.396 +
   6.397 +fun relevance_filter ctxt thres0 decay max_facts is_built_in_const
   6.398 +        (fudge as {threshold_divisor, ridiculous_threshold, ...}) facts hyp_ts
   6.399 +        concl_t =
   6.400 +  let
   6.401 +    val thy = Proof_Context.theory_of ctxt
   6.402 +    val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   6.403 +    val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   6.404 +    val chained_ts =
   6.405 +      facts |> map_filter (fn ((_, (Chained, _)), th) => SOME (prop_of th)
   6.406 +                            | _ => NONE)
   6.407 +    val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   6.408 +    val goal_const_tab =
   6.409 +      Symtab.empty |> fold (add_pconsts true) hyp_ts
   6.410 +                   |> add_pconsts false concl_t
   6.411 +      |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   6.412 +      |> fold (if_empty_replace_with_scope thy is_built_in_const facts)
   6.413 +              [Chained, Assum, Local]
   6.414 +    fun iter j remaining_max thres rel_const_tab hopeless hopeful =
   6.415 +      let
   6.416 +        fun relevant [] _ [] =
   6.417 +            (* Nothing has been added this iteration. *)
   6.418 +            if j = 0 andalso thres >= ridiculous_threshold then
   6.419 +              (* First iteration? Try again. *)
   6.420 +              iter 0 max_facts (thres / threshold_divisor) rel_const_tab
   6.421 +                   hopeless hopeful
   6.422 +            else
   6.423 +              []
   6.424 +          | relevant candidates rejects [] =
   6.425 +            let
   6.426 +              val (accepts, more_rejects) =
   6.427 +                take_most_relevant ctxt max_facts remaining_max fudge candidates
   6.428 +              val rel_const_tab' =
   6.429 +                rel_const_tab
   6.430 +                |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   6.431 +              fun is_dirty (c, _) =
   6.432 +                Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   6.433 +              val (hopeful_rejects, hopeless_rejects) =
   6.434 +                 (rejects @ hopeless, ([], []))
   6.435 +                 |-> fold (fn (ax as (_, consts), old_weight) =>
   6.436 +                              if exists is_dirty consts then
   6.437 +                                apfst (cons (ax, NONE))
   6.438 +                              else
   6.439 +                                apsnd (cons (ax, old_weight)))
   6.440 +                 |>> append (more_rejects
   6.441 +                             |> map (fn (ax as (_, consts), old_weight) =>
   6.442 +                                        (ax, if exists is_dirty consts then NONE
   6.443 +                                             else SOME old_weight)))
   6.444 +              val thres =
   6.445 +                1.0 - (1.0 - thres)
   6.446 +                      * Math.pow (decay, Real.fromInt (length accepts))
   6.447 +              val remaining_max = remaining_max - length accepts
   6.448 +            in
   6.449 +              trace_msg ctxt (fn () => "New or updated constants: " ^
   6.450 +                  commas (rel_const_tab' |> Symtab.dest
   6.451 +                          |> subtract (op =) (rel_const_tab |> Symtab.dest)
   6.452 +                          |> map string_for_hyper_pconst));
   6.453 +              map (fst o fst) accepts @
   6.454 +              (if remaining_max = 0 then
   6.455 +                 []
   6.456 +               else
   6.457 +                 iter (j + 1) remaining_max thres rel_const_tab'
   6.458 +                      hopeless_rejects hopeful_rejects)
   6.459 +            end
   6.460 +          | relevant candidates rejects
   6.461 +                     (((ax as (((_, stature), _), fact_consts)), cached_weight)
   6.462 +                      :: hopeful) =
   6.463 +            let
   6.464 +              val weight =
   6.465 +                case cached_weight of
   6.466 +                  SOME w => w
   6.467 +                | NONE => fact_weight fudge stature const_tab rel_const_tab
   6.468 +                                      chained_const_tab fact_consts
   6.469 +            in
   6.470 +              if weight >= thres then
   6.471 +                relevant ((ax, weight) :: candidates) rejects hopeful
   6.472 +              else
   6.473 +                relevant candidates ((ax, weight) :: rejects) hopeful
   6.474 +            end
   6.475 +        in
   6.476 +          trace_msg ctxt (fn () =>
   6.477 +              "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   6.478 +              Real.toString thres ^ ", constants: " ^
   6.479 +              commas (rel_const_tab |> Symtab.dest
   6.480 +                      |> filter (curry (op <>) [] o snd)
   6.481 +                      |> map string_for_hyper_pconst));
   6.482 +          relevant [] [] hopeful
   6.483 +        end
   6.484 +    fun uses_const s t =
   6.485 +      fold_aterms (curry (fn (Const (s', _), false) => s' = s | (_, b) => b)) t
   6.486 +                  false
   6.487 +    fun uses_const_anywhere accepts s =
   6.488 +      exists (uses_const s o prop_of o snd) accepts orelse
   6.489 +      exists (uses_const s) (concl_t :: hyp_ts)
   6.490 +    fun add_set_const_thms accepts =
   6.491 +      exists (uses_const_anywhere accepts) set_consts ? append set_thms
   6.492 +    fun insert_into_facts accepts [] = accepts
   6.493 +      | insert_into_facts accepts ths =
   6.494 +        let
   6.495 +          val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   6.496 +          val (bef, after) =
   6.497 +            accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   6.498 +                    |> take (max_facts - length add)
   6.499 +                    |> chop special_fact_index
   6.500 +        in bef @ add @ after end
   6.501 +    fun insert_special_facts accepts =
   6.502 +       (* FIXME: get rid of "ext" here once it is treated as a helper *)
   6.503 +       [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   6.504 +          |> add_set_const_thms accepts
   6.505 +          |> insert_into_facts accepts
   6.506 +  in
   6.507 +    facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   6.508 +          |> iter 0 max_facts thres0 goal_const_tab []
   6.509 +          |> insert_special_facts
   6.510 +          |> tap (fn accepts => trace_msg ctxt (fn () =>
   6.511 +                      "Total relevant: " ^ string_of_int (length accepts)))
   6.512 +  end
   6.513 +
   6.514 +fun iterative_relevant_facts ctxt
   6.515 +        ({fact_thresholds = (thres0, thres1), ...} : params) prover
   6.516 +        max_facts fudge hyp_ts concl_t facts =
   6.517 +  let
   6.518 +    val thy = Proof_Context.theory_of ctxt
   6.519 +    val is_built_in_const =
   6.520 +      Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover
   6.521 +    val fudge =
   6.522 +      case fudge of
   6.523 +        SOME fudge => fudge
   6.524 +      | NONE => Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover
   6.525 +    val decay = Math.pow ((1.0 - thres1) / (1.0 - thres0),
   6.526 +                          1.0 / Real.fromInt (max_facts + 1))
   6.527 +  in
   6.528 +    trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   6.529 +                             " facts");
   6.530 +    (if thres1 < 0.0 then
   6.531 +       facts
   6.532 +     else if thres0 > 1.0 orelse thres0 > thres1 then
   6.533 +       []
   6.534 +     else
   6.535 +       relevance_filter ctxt thres0 decay max_facts is_built_in_const fudge
   6.536 +           facts hyp_ts
   6.537 +           (concl_t |> theory_constify fudge (Context.theory_name thy)))
   6.538 +  end
   6.539 +
   6.540 +end;