src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author blanchet
Thu, 12 May 2011 15:29:19 +0200
changeset 43600 1d375de437e9
parent 43597 86683865278d
child 43603 2a9dcff63b80
permissions -rw-r--r--
fine-tuned the relevance filter, so that equations of the form "c = (%x. _)" and constants occurring in chained facts are not unduely penalized
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    11 
    12   type relevance_fudge =
    13     {allow_ext : bool,
    14      local_const_multiplier : real,
    15      worse_irrel_freq : real,
    16      higher_order_irrel_weight : real,
    17      abs_rel_weight : real,
    18      abs_irrel_weight : real,
    19      skolem_irrel_weight : real,
    20      theory_const_rel_weight : real,
    21      theory_const_irrel_weight : real,
    22      chained_const_irrel_weight : real,
    23      intro_bonus : real,
    24      elim_bonus : real,
    25      simp_bonus : real,
    26      local_bonus : real,
    27      assum_bonus : real,
    28      chained_bonus : real,
    29      max_imperfect : real,
    30      max_imperfect_exp : real,
    31      threshold_divisor : real,
    32      ridiculous_threshold : real}
    33 
    34   type relevance_override =
    35     {add : (Facts.ref * Attrib.src list) list,
    36      del : (Facts.ref * Attrib.src list) list,
    37      only : bool}
    38 
    39   val trace : bool Config.T
    40   val new_monomorphizer : bool Config.T
    41   val ignore_no_atp : bool Config.T
    42   val instantiate_inducts : bool Config.T
    43   val is_locality_global : locality -> bool
    44   val fact_from_ref :
    45     Proof.context -> unit Symtab.table -> thm list
    46     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    47   val all_facts :
    48     Proof.context -> 'a Symtab.table -> bool -> thm list -> thm list
    49     -> (((unit -> string) * locality) * (bool * thm)) list
    50   val const_names_in_fact :
    51     theory -> (string * typ -> term list -> bool * term list) -> term
    52     -> string list
    53   val relevant_facts :
    54     Proof.context -> real * real -> int
    55     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    56     -> relevance_override -> thm list -> term list -> term
    57     -> ((string * locality) * thm) list
    58 end;
    59 
    60 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    61 struct
    62 
    63 open Sledgehammer_Util
    64 
    65 val trace =
    66   Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false)
    67 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    68 
    69 (* experimental features *)
    70 val new_monomorphizer =
    71   Attrib.setup_config_bool @{binding sledgehammer_new_monomorphizer} (K false)
    72 val ignore_no_atp =
    73   Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false)
    74 val instantiate_inducts =
    75   Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false)
    76 
    77 datatype locality = General | Intro | Elim | Simp | Local | Assum | Chained
    78 
    79 (* (quasi-)underapproximation of the truth *)
    80 fun is_locality_global Local = false
    81   | is_locality_global Assum = false
    82   | is_locality_global Chained = false
    83   | is_locality_global _ = true
    84 
    85 type relevance_fudge =
    86   {allow_ext : bool,
    87    local_const_multiplier : real,
    88    worse_irrel_freq : real,
    89    higher_order_irrel_weight : real,
    90    abs_rel_weight : real,
    91    abs_irrel_weight : real,
    92    skolem_irrel_weight : real,
    93    theory_const_rel_weight : real,
    94    theory_const_irrel_weight : real,
    95    chained_const_irrel_weight : real,
    96    intro_bonus : real,
    97    elim_bonus : real,
    98    simp_bonus : real,
    99    local_bonus : real,
   100    assum_bonus : real,
   101    chained_bonus : real,
   102    max_imperfect : real,
   103    max_imperfect_exp : real,
   104    threshold_divisor : real,
   105    ridiculous_threshold : real}
   106 
   107 type relevance_override =
   108   {add : (Facts.ref * Attrib.src list) list,
   109    del : (Facts.ref * Attrib.src list) list,
   110    only : bool}
   111 
   112 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   113 val abs_name = sledgehammer_prefix ^ "abs"
   114 val skolem_prefix = sledgehammer_prefix ^ "sko"
   115 val theory_const_suffix = Long_Name.separator ^ " 1"
   116 
   117 fun needs_quoting reserved s =
   118   Symtab.defined reserved s orelse
   119   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   120 
   121 fun make_name reserved multi j name =
   122   (name |> needs_quoting reserved name ? quote) ^
   123   (if multi then "(" ^ string_of_int j ^ ")" else "")
   124 
   125 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   126   | explode_interval max (Facts.From i) = i upto i + max - 1
   127   | explode_interval _ (Facts.Single i) = [i]
   128 
   129 val backquote =
   130   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   131 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   132   let
   133     val ths = Attrib.eval_thms ctxt [xthm]
   134     val bracket =
   135       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   136       |> implode
   137     fun nth_name j =
   138       case xref of
   139         Facts.Fact s => backquote s ^ bracket
   140       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   141       | Facts.Named ((name, _), NONE) =>
   142         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   143       | Facts.Named ((name, _), SOME intervals) =>
   144         make_name reserved true
   145                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   146         bracket
   147   in
   148     (ths, (0, []))
   149     |-> fold (fn th => fn (j, rest) =>
   150                  (j + 1, ((nth_name j,
   151                           if member Thm.eq_thm chained_ths th then Chained
   152                           else General), th) :: rest))
   153     |> snd
   154   end
   155 
   156 (* This is a terrible hack. Free variables are sometimes code as "M__" when they
   157    are displayed as "M" and we want to avoid clashes with these. But sometimes
   158    it's even worse: "Ma__" encodes "M". So we simply reserve all prefixes of all
   159    free variables. In the worse case scenario, where the fact won't be resolved
   160    correctly, the user can fix it manually, e.g., by naming the fact in
   161    question. Ideally we would need nothing of it, but backticks just don't work
   162    with schematic variables. *)
   163 fun all_prefixes_of s =
   164   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   165 fun close_form t =
   166   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   167   |> fold (fn ((s, i), T) => fn (t', taken) =>
   168               let val s' = Name.variant taken s in
   169                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   170                   else Term.all) T
   171                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   172                  s' :: taken)
   173               end)
   174           (Term.add_vars t [] |> sort_wrt (fst o fst))
   175   |> fst
   176 
   177 fun string_for_term ctxt t =
   178   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   179                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   180   |> String.translate (fn c => if Char.isPrint c then str c else "")
   181   |> simplify_spaces
   182 
   183 (** Structural induction rules **)
   184 
   185 fun struct_induct_rule_on th =
   186   case Logic.strip_horn (prop_of th) of
   187     (prems, @{const Trueprop}
   188             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   189     if not (is_TVar ind_T) andalso length prems > 1 andalso
   190        exists (exists_subterm (curry (op aconv) p)) prems andalso
   191        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   192       SOME (p_name, ind_T)
   193     else
   194       NONE
   195   | _ => NONE
   196 
   197 fun instantiate_induct_rule ctxt concl_prop p_name ((name, loc), (multi, th))
   198                             ind_x =
   199   let
   200     fun varify_noninducts (t as Free (s, T)) =
   201         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   202       | varify_noninducts t = t
   203     val p_inst =
   204       concl_prop |> map_aterms varify_noninducts |> close_form
   205                  |> lambda (Free ind_x)
   206                  |> string_for_term ctxt
   207   in
   208     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", loc),
   209      (multi, th |> read_instantiate ctxt [((p_name, 0), p_inst)]))
   210   end
   211 
   212 fun type_match thy (T1, T2) =
   213   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   214   handle Type.TYPE_MATCH => false
   215 
   216 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, (_, th))) =
   217   case struct_induct_rule_on th of
   218     SOME (p_name, ind_T) =>
   219     let val thy = Proof_Context.theory_of ctxt in
   220       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   221               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   222     end
   223   | NONE => [ax]
   224 
   225 (***************************************************************)
   226 (* Relevance Filtering                                         *)
   227 (***************************************************************)
   228 
   229 (*** constants with types ***)
   230 
   231 fun order_of_type (Type (@{type_name fun}, [T1, @{typ bool}])) =
   232     order_of_type T1 (* cheat: pretend sets are first-order *)
   233   | order_of_type (Type (@{type_name fun}, [T1, T2])) =
   234     Int.max (order_of_type T1 + 1, order_of_type T2)
   235   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   236   | order_of_type _ = 0
   237 
   238 (* An abstraction of Isabelle types and first-order terms *)
   239 datatype pattern = PVar | PApp of string * pattern list
   240 datatype ptype = PType of int * pattern list
   241 
   242 fun string_for_pattern PVar = "_"
   243   | string_for_pattern (PApp (s, ps)) =
   244     if null ps then s else s ^ string_for_patterns ps
   245 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   246 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   247 
   248 (*Is the second type an instance of the first one?*)
   249 fun match_pattern (PVar, _) = true
   250   | match_pattern (PApp _, PVar) = false
   251   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   252     s = t andalso match_patterns (ps, qs)
   253 and match_patterns (_, []) = true
   254   | match_patterns ([], _) = false
   255   | match_patterns (p :: ps, q :: qs) =
   256     match_pattern (p, q) andalso match_patterns (ps, qs)
   257 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   258 
   259 (* Is there a unifiable constant? *)
   260 fun pconst_mem f consts (s, ps) =
   261   exists (curry (match_ptype o f) ps)
   262          (map snd (filter (curry (op =) s o fst) consts))
   263 fun pconst_hyper_mem f const_tab (s, ps) =
   264   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   265 
   266 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   267   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   268   | pattern_for_type (TVar _) = PVar
   269 
   270 (* Pairs a constant with the list of its type instantiations. *)
   271 fun ptype thy const x =
   272   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   273    else [])
   274 fun rich_ptype thy const (s, T) =
   275   PType (order_of_type T, ptype thy const (s, T))
   276 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   277 
   278 fun string_for_hyper_pconst (s, ps) =
   279   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   280 
   281 (* Add a pconstant to the table, but a [] entry means a standard
   282    connective, which we ignore.*)
   283 fun add_pconst_to_table also_skolem (s, p) =
   284   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   285   else Symtab.map_default (s, [p]) (insert (op =) p)
   286 
   287 fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   288   let
   289     val flip = Option.map not
   290     (* We include free variables, as well as constants, to handle locales. For
   291        each quantifiers that must necessarily be skolemized by the automatic
   292        prover, we introduce a fresh constant to simulate the effect of
   293        Skolemization. *)
   294     fun do_const const x ts =
   295       let val (built_in, ts) = is_built_in_const x ts in
   296         (not built_in
   297          ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   298         #> fold (do_term false) ts
   299       end
   300     and do_term eq_arg t =
   301       case strip_comb t of
   302         (Const x, ts) => do_const true x ts
   303       | (Free x, ts) => do_const false x ts
   304       | (Abs (_, T, t'), ts) =>
   305         ((null ts andalso not eq_arg)
   306          (* Since lambdas in equalities are usually extensionalized later by
   307             "extensionalize_term", we don't penalize them here. *)
   308          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   309         #> fold (do_term false) (t' :: ts)
   310       | (_, ts) => fold (do_term false) ts
   311     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   312       do_formula pos body_t
   313       #> (if also_skolems andalso will_surely_be_skolemized then
   314             add_pconst_to_table true
   315                 (gensym skolem_prefix, PType (order_of_type abs_T, []))
   316           else
   317             I)
   318     and do_term_or_formula eq_arg T =
   319       if T = HOLogic.boolT then do_formula NONE else do_term eq_arg
   320     and do_formula pos t =
   321       case t of
   322         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   323         do_quantifier (pos = SOME false) T t'
   324       | @{const "==>"} $ t1 $ t2 =>
   325         do_formula (flip pos) t1 #> do_formula pos t2
   326       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   327         fold (do_term_or_formula true T) [t1, t2]
   328       | @{const Trueprop} $ t1 => do_formula pos t1
   329       | @{const False} => I
   330       | @{const True} => I
   331       | @{const Not} $ t1 => do_formula (flip pos) t1
   332       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   333         do_quantifier (pos = SOME false) T t'
   334       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   335         do_quantifier (pos = SOME true) T t'
   336       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   337       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   338       | @{const HOL.implies} $ t1 $ t2 =>
   339         do_formula (flip pos) t1 #> do_formula pos t2
   340       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   341         fold (do_term_or_formula true T) [t1, t2]
   342       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   343         $ t1 $ t2 $ t3 =>
   344         do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   345       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   346         do_quantifier (is_some pos) T t'
   347       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   348         do_quantifier (pos = SOME false) T
   349                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   350       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   351         do_quantifier (pos = SOME true) T
   352                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   353       | (t0 as Const (_, @{typ bool})) $ t1 =>
   354         do_term false t0 #> do_formula pos t1  (* theory constant *)
   355       | _ => do_term false t
   356   in do_formula pos end
   357 
   358 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   359   takes the given theory into account.*)
   360 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   361                      : relevance_fudge) thy_name t =
   362   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   363                                 theory_const_irrel_weight] then
   364     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   365   else
   366     t
   367 
   368 fun theory_const_prop_of fudge th =
   369   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   370 
   371 (**** Constant / Type Frequencies ****)
   372 
   373 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   374    first by constant name and second by its list of type instantiations. For the
   375    latter, we need a linear ordering on "pattern list". *)
   376 
   377 fun pattern_ord p =
   378   case p of
   379     (PVar, PVar) => EQUAL
   380   | (PVar, PApp _) => LESS
   381   | (PApp _, PVar) => GREATER
   382   | (PApp q1, PApp q2) =>
   383     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   384 fun ptype_ord (PType p, PType q) =
   385   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   386 
   387 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   388 
   389 fun count_fact_consts thy fudge =
   390   let
   391     fun do_const const (s, T) ts =
   392       (* Two-dimensional table update. Constant maps to types maps to count. *)
   393       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   394       |> Symtab.map_default (s, PType_Tab.empty)
   395       #> fold do_term ts
   396     and do_term t =
   397       case strip_comb t of
   398         (Const x, ts) => do_const true x ts
   399       | (Free x, ts) => do_const false x ts
   400       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   401       | (_, ts) => fold do_term ts
   402   in do_term o theory_const_prop_of fudge o snd end
   403 
   404 
   405 (**** Actual Filtering Code ****)
   406 
   407 fun pow_int _ 0 = 1.0
   408   | pow_int x 1 = x
   409   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   410 
   411 (*The frequency of a constant is the sum of those of all instances of its type.*)
   412 fun pconst_freq match const_tab (c, ps) =
   413   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   414                  (the (Symtab.lookup const_tab c)) 0
   415 
   416 
   417 (* A surprising number of theorems contain only a few significant constants.
   418    These include all induction rules, and other general theorems. *)
   419 
   420 (* "log" seems best in practice. A constant function of one ignores the constant
   421    frequencies. Rare constants give more points if they are relevant than less
   422    rare ones. *)
   423 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   424 
   425 (* Irrelevant constants are treated differently. We associate lower penalties to
   426    very rare constants and very common ones -- the former because they can't
   427    lead to the inclusion of too many new facts, and the latter because they are
   428    so common as to be of little interest. *)
   429 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   430                       : relevance_fudge) order freq =
   431   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   432     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   433      else rel_weight_for order freq / rel_weight_for order k)
   434     * pow_int higher_order_irrel_weight (order - 1)
   435   end
   436 
   437 fun multiplier_for_const_name local_const_multiplier s =
   438   if String.isSubstring "." s then 1.0 else local_const_multiplier
   439 
   440 (* Computes a constant's weight, as determined by its frequency. *)
   441 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   442                           theory_const_weight chained_const_weight weight_for f
   443                           const_tab chained_const_tab (c as (s, PType (m, _))) =
   444   if s = abs_name then
   445     abs_weight
   446   else if String.isPrefix skolem_prefix s then
   447     skolem_weight
   448   else if String.isSuffix theory_const_suffix s then
   449     theory_const_weight
   450   else
   451     multiplier_for_const_name local_const_multiplier s
   452     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   453     |> (if chained_const_weight < 1.0 andalso
   454            pconst_hyper_mem I chained_const_tab c then
   455           curry (op *) chained_const_weight
   456         else
   457           I)
   458 
   459 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   460                         theory_const_rel_weight, ...} : relevance_fudge)
   461                       const_tab =
   462   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   463                         theory_const_rel_weight 0.0 rel_weight_for I const_tab
   464                         Symtab.empty
   465 
   466 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   467                                    skolem_irrel_weight,
   468                                    theory_const_irrel_weight,
   469                                    chained_const_irrel_weight, ...})
   470                         const_tab chained_const_tab =
   471   generic_pconst_weight local_const_multiplier abs_irrel_weight
   472                         skolem_irrel_weight theory_const_irrel_weight
   473                         chained_const_irrel_weight (irrel_weight_for fudge) swap
   474                         const_tab chained_const_tab
   475 
   476 fun locality_bonus (_ : relevance_fudge) General = 0.0
   477   | locality_bonus {intro_bonus, ...} Intro = intro_bonus
   478   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   479   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   480   | locality_bonus {local_bonus, ...} Local = local_bonus
   481   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   482   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   483 
   484 fun is_odd_const_name s =
   485   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   486   String.isSuffix theory_const_suffix s
   487 
   488 fun fact_weight fudge loc const_tab relevant_consts chained_consts fact_consts =
   489   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   490                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   491     ([], _) => 0.0
   492   | (rel, irrel) =>
   493     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   494       0.0
   495     else
   496       let
   497         val irrel = irrel |> filter_out (pconst_mem swap rel)
   498         val rel_weight =
   499           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   500         val irrel_weight =
   501           ~ (locality_bonus fudge loc)
   502           |> fold (curry (op +)
   503                    o irrel_pconst_weight fudge const_tab chained_consts) irrel
   504         val res = rel_weight / (rel_weight + irrel_weight)
   505       in if Real.isFinite res then res else 0.0 end
   506 
   507 fun pconsts_in_fact thy is_built_in_const t =
   508   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   509               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   510                                                    (SOME true) t) []
   511 
   512 fun pair_consts_fact thy is_built_in_const fudge fact =
   513   case fact |> snd |> theory_const_prop_of fudge
   514             |> pconsts_in_fact thy is_built_in_const of
   515     [] => NONE
   516   | consts => SOME ((fact, consts), NONE)
   517 
   518 val const_names_in_fact = map fst ooo pconsts_in_fact
   519 
   520 type annotated_thm =
   521   (((unit -> string) * locality) * thm) * (string * ptype) list
   522 
   523 fun take_most_relevant ctxt max_relevant remaining_max
   524         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   525         (candidates : (annotated_thm * real) list) =
   526   let
   527     val max_imperfect =
   528       Real.ceil (Math.pow (max_imperfect,
   529                     Math.pow (Real.fromInt remaining_max
   530                               / Real.fromInt max_relevant, max_imperfect_exp)))
   531     val (perfect, imperfect) =
   532       candidates |> sort (Real.compare o swap o pairself snd)
   533                  |> take_prefix (fn (_, w) => w > 0.99999)
   534     val ((accepts, more_rejects), rejects) =
   535       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   536   in
   537     trace_msg ctxt (fn () =>
   538         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   539         string_of_int (length candidates) ^ "): " ^
   540         (accepts |> map (fn ((((name, _), _), _), weight) =>
   541                             name () ^ " [" ^ Real.toString weight ^ "]")
   542                  |> commas));
   543     (accepts, more_rejects @ rejects)
   544   end
   545 
   546 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   547   if Symtab.is_empty tab then
   548     Symtab.empty
   549     |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   550             (map_filter (fn ((_, loc'), th) =>
   551                             if loc' = loc then SOME (prop_of th) else NONE)
   552                         facts)
   553   else
   554     tab
   555 
   556 fun consider_arities is_built_in_const th =
   557   let
   558     fun aux _ _ NONE = NONE
   559       | aux t args (SOME tab) =
   560         case t of
   561           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   562         | Const (x as (s, _)) =>
   563           (if is_built_in_const x args |> fst then
   564              SOME tab
   565            else case Symtab.lookup tab s of
   566              NONE => SOME (Symtab.update (s, length args) tab)
   567            | SOME n => if n = length args then SOME tab else NONE)
   568         | _ => SOME tab
   569   in aux (prop_of th) [] end
   570 
   571 (* FIXME: This is currently only useful for polymorphic type systems. *)
   572 fun could_benefit_from_ext is_built_in_const facts =
   573   fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   574   |> is_none
   575 
   576 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   577         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   578         ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t =
   579   let
   580     val thy = Proof_Context.theory_of ctxt
   581     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   582     val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   583     val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   584     val goal_const_tab =
   585       Symtab.empty |> fold (add_pconsts true) hyp_ts
   586                    |> add_pconsts false concl_t
   587       |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   588       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   589               [Chained, Assum, Local]
   590     val add_ths = Attrib.eval_thms ctxt add
   591     val del_ths = Attrib.eval_thms ctxt del
   592     val facts = facts |> filter_out (member Thm.eq_thm del_ths o snd)
   593     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   594       let
   595         fun relevant [] _ [] =
   596             (* Nothing has been added this iteration. *)
   597             if j = 0 andalso threshold >= ridiculous_threshold then
   598               (* First iteration? Try again. *)
   599               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   600                    hopeless hopeful
   601             else
   602               []
   603           | relevant candidates rejects [] =
   604             let
   605               val (accepts, more_rejects) =
   606                 take_most_relevant ctxt max_relevant remaining_max fudge
   607                                    candidates
   608               val rel_const_tab' =
   609                 rel_const_tab
   610                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   611               fun is_dirty (c, _) =
   612                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   613               val (hopeful_rejects, hopeless_rejects) =
   614                  (rejects @ hopeless, ([], []))
   615                  |-> fold (fn (ax as (_, consts), old_weight) =>
   616                               if exists is_dirty consts then
   617                                 apfst (cons (ax, NONE))
   618                               else
   619                                 apsnd (cons (ax, old_weight)))
   620                  |>> append (more_rejects
   621                              |> map (fn (ax as (_, consts), old_weight) =>
   622                                         (ax, if exists is_dirty consts then NONE
   623                                              else SOME old_weight)))
   624               val threshold =
   625                 1.0 - (1.0 - threshold)
   626                       * Math.pow (decay, Real.fromInt (length accepts))
   627               val remaining_max = remaining_max - length accepts
   628             in
   629               trace_msg ctxt (fn () => "New or updated constants: " ^
   630                   commas (rel_const_tab' |> Symtab.dest
   631                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   632                           |> map string_for_hyper_pconst));
   633               map (fst o fst) accepts @
   634               (if remaining_max = 0 then
   635                  []
   636                else
   637                  iter (j + 1) remaining_max threshold rel_const_tab'
   638                       hopeless_rejects hopeful_rejects)
   639             end
   640           | relevant candidates rejects
   641                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   642                       :: hopeful) =
   643             let
   644               val weight =
   645                 case cached_weight of
   646                   SOME w => w
   647                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   648                                       chained_const_tab fact_consts
   649             in
   650               if weight >= threshold then
   651                 relevant ((ax, weight) :: candidates) rejects hopeful
   652               else
   653                 relevant candidates ((ax, weight) :: rejects) hopeful
   654             end
   655         in
   656           trace_msg ctxt (fn () =>
   657               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   658               Real.toString threshold ^ ", constants: " ^
   659               commas (rel_const_tab |> Symtab.dest
   660                       |> filter (curry (op <>) [] o snd)
   661                       |> map string_for_hyper_pconst));
   662           relevant [] [] hopeful
   663         end
   664     fun add_facts ths accepts =
   665       (facts |> filter (member Thm.eq_thm ths o snd)) @
   666       (accepts |> filter_out (member Thm.eq_thm ths o snd))
   667       |> take max_relevant
   668   in
   669     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   670           |> iter 0 max_relevant threshold0 goal_const_tab []
   671           |> not (null add_ths) ? add_facts add_ths
   672           |> (fn accepts =>
   673                  accepts |> could_benefit_from_ext is_built_in_const accepts
   674                             ? add_facts @{thms ext})
   675           |> tap (fn accepts => trace_msg ctxt (fn () =>
   676                       "Total relevant: " ^ string_of_int (length accepts)))
   677   end
   678 
   679 
   680 (***************************************************************)
   681 (* Retrieving and filtering lemmas                             *)
   682 (***************************************************************)
   683 
   684 (*** retrieve lemmas and filter them ***)
   685 
   686 (*Reject theorems with names like "List.filter.filter_list_def" or
   687   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   688 fun is_package_def a =
   689   let val names = Long_Name.explode a in
   690     (length names > 2 andalso not (hd names = "local") andalso
   691      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   692   end
   693 
   694 fun mk_fact_table g f xs =
   695   fold (Termtab.update o `(g o prop_of o f)) xs Termtab.empty
   696 fun uniquify xs = Termtab.fold (cons o snd) (mk_fact_table I snd xs) []
   697 
   698 (* FIXME: put other record thms here, or declare as "no_atp" *)
   699 fun multi_base_blacklist ctxt =
   700   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   701    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   702    "weak_case_cong"]
   703   |> not (Config.get ctxt instantiate_inducts) ? append ["induct", "inducts"]
   704   |> map (prefix ".")
   705 
   706 val max_lambda_nesting = 3
   707 
   708 fun term_has_too_many_lambdas max (t1 $ t2) =
   709     exists (term_has_too_many_lambdas max) [t1, t2]
   710   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   711     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   712   | term_has_too_many_lambdas _ _ = false
   713 
   714 (* Don't count nested lambdas at the level of formulas, since they are
   715    quantifiers. *)
   716 fun formula_has_too_many_lambdas Ts (Abs (_, T, t)) =
   717     formula_has_too_many_lambdas (T :: Ts) t
   718   | formula_has_too_many_lambdas Ts t =
   719     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   720       exists (formula_has_too_many_lambdas Ts) (#2 (strip_comb t))
   721     else
   722       term_has_too_many_lambdas max_lambda_nesting t
   723 
   724 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   725    was 11. *)
   726 val max_apply_depth = 15
   727 
   728 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   729   | apply_depth (Abs (_, _, t)) = apply_depth t
   730   | apply_depth _ = 0
   731 
   732 fun is_formula_too_complex t =
   733   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas [] t
   734 
   735 (* FIXME: Extend to "Meson" and "Metis" *)
   736 val exists_sledgehammer_const =
   737   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   738 
   739 (* FIXME: make more reliable *)
   740 val exists_low_level_class_const =
   741   exists_Const (fn (s, _) =>
   742      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   743 
   744 fun is_metastrange_theorem th =
   745   case head_of (concl_of th) of
   746       Const (a, _) => (a <> @{const_name Trueprop} andalso
   747                        a <> @{const_name "=="})
   748     | _ => false
   749 
   750 fun is_that_fact th =
   751   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   752   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   753                            | _ => false) (prop_of th)
   754 
   755 val type_has_top_sort =
   756   exists_subtype (fn TFree (_, []) => true | TVar (_, []) => true | _ => false)
   757 
   758 (**** Predicates to detect unwanted facts (prolific or likely to cause
   759       unsoundness) ****)
   760 
   761 fun is_theorem_bad_for_atps thm =
   762   let val t = prop_of thm in
   763     is_formula_too_complex t orelse exists_type type_has_top_sort t orelse
   764     exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   765     is_metastrange_theorem thm orelse is_that_fact thm
   766   end
   767 
   768 fun meta_equify (@{const Trueprop}
   769                  $ (Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2)) =
   770     Const (@{const_name "=="}, T --> T --> @{typ prop}) $ t1 $ t2
   771   | meta_equify t = t
   772 
   773 val normalize_simp_prop =
   774   meta_equify
   775   #> map_aterms (fn Var ((s, _), T) => Var ((s, 0), T) | t => t)
   776   #> map_types (map_type_tvar (fn ((s, _), S) => TVar ((s, 0), S)))
   777 
   778 fun clasimpset_rules_of ctxt =
   779   let
   780     val {safeIs, safeEs, hazIs, hazEs, ...} = ctxt |> claset_of |> rep_cs
   781     val intros = safeIs @ hazIs
   782     val elims = map Classical.classical_rule (safeEs @ hazEs)
   783     val simps = ctxt |> simpset_of |> dest_ss |> #simps
   784   in
   785     (mk_fact_table I I intros,
   786      mk_fact_table I I elims,
   787      mk_fact_table normalize_simp_prop snd simps)
   788   end
   789 
   790 fun all_facts ctxt reserved really_all add_ths chained_ths =
   791   let
   792     val thy = Proof_Context.theory_of ctxt
   793     val global_facts = Global_Theory.facts_of thy
   794     val local_facts = Proof_Context.facts_of ctxt
   795     val named_locals = local_facts |> Facts.dest_static []
   796     val assms = Assumption.all_assms_of ctxt
   797     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   798     val is_chained = member Thm.eq_thm chained_ths
   799     val (intros, elims, simps) = clasimpset_rules_of ctxt
   800     fun locality_of_theorem global th =
   801       if is_chained th then
   802         Chained
   803       else if global then
   804         let val t = prop_of th in
   805           if Termtab.defined intros t then Intro
   806           else if Termtab.defined elims t then Elim
   807           else if Termtab.defined simps (normalize_simp_prop t) then Simp
   808           else General
   809         end
   810       else
   811         if is_assum th then Assum else Local
   812     fun is_good_unnamed_local th =
   813       not (Thm.has_name_hint th) andalso
   814       forall (fn (_, ths) => not (member Thm.eq_thm ths th)) named_locals
   815     val unnamed_locals =
   816       union Thm.eq_thm (Facts.props local_facts) chained_ths
   817       |> filter is_good_unnamed_local |> map (pair "" o single)
   818     val full_space =
   819       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   820     fun add_facts global foldx facts =
   821       foldx (fn (name0, ths) =>
   822         if not really_all andalso name0 <> "" andalso
   823            forall (not o member Thm.eq_thm add_ths) ths andalso
   824            (Facts.is_concealed facts name0 orelse
   825             (not (Config.get ctxt ignore_no_atp) andalso
   826              is_package_def name0) orelse
   827             exists (fn s => String.isSuffix s name0)
   828                    (multi_base_blacklist ctxt) orelse
   829             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   830           I
   831         else
   832           let
   833             val multi = length ths > 1
   834             val backquote_thm =
   835               backquote o string_for_term ctxt o close_form o prop_of
   836             fun check_thms a =
   837               case try (Proof_Context.get_thms ctxt) a of
   838                 NONE => false
   839               | SOME ths' => Thm.eq_thms (ths, ths')
   840           in
   841             pair 1
   842             #> fold (fn th => fn (j, rest) =>
   843                         (j + 1,
   844                          if is_theorem_bad_for_atps th andalso
   845                             not (member Thm.eq_thm add_ths th) then
   846                            rest
   847                          else
   848                            (((fn () =>
   849                                  if name0 = "" then
   850                                    th |> backquote_thm
   851                                  else
   852                                    [Facts.extern ctxt facts name0,
   853                                     Name_Space.extern ctxt full_space name0,
   854                                     name0]
   855                                    |> find_first check_thms
   856                                    |> (fn SOME name =>
   857                                           make_name reserved multi j name
   858                                         | NONE => "")),
   859                               locality_of_theorem global th),
   860                               (multi, th)) :: rest)) ths
   861             #> snd
   862           end)
   863   in
   864     [] |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   865        |> add_facts true Facts.fold_static global_facts global_facts
   866   end
   867 
   868 (* The single-name theorems go after the multiple-name ones, so that single
   869    names are preferred when both are available. *)
   870 fun rearrange_facts ctxt only =
   871   List.partition (fst o snd) #> op @ #> map (apsnd snd)
   872   #> (not (Config.get ctxt ignore_no_atp) andalso not only)
   873      ? filter_out (No_ATPs.member ctxt o snd)
   874 
   875 fun external_frees t =
   876   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   877 
   878 fun relevant_facts ctxt (threshold0, threshold1) max_relevant is_built_in_const
   879                    fudge (override as {add, only, ...}) chained_ths hyp_ts
   880                    concl_t =
   881   let
   882     val thy = Proof_Context.theory_of ctxt
   883     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   884                           1.0 / Real.fromInt (max_relevant + 1))
   885     val add_ths = Attrib.eval_thms ctxt add
   886     val reserved = reserved_isar_keyword_table ()
   887     val ind_stmt =
   888       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   889       |> Object_Logic.atomize_term thy
   890     val ind_stmt_xs = external_frees ind_stmt
   891     val facts =
   892       (if only then
   893          maps (map (fn ((name, loc), th) => ((K name, loc), (true, th)))
   894                o fact_from_ref ctxt reserved chained_ths) add
   895        else
   896          all_facts ctxt reserved false add_ths chained_ths)
   897       |> Config.get ctxt instantiate_inducts
   898          ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   899       |> rearrange_facts ctxt only
   900       |> uniquify
   901   in
   902     trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   903                              " facts");
   904     (if only orelse threshold1 < 0.0 then
   905        facts
   906      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   907              max_relevant = 0 then
   908        []
   909      else
   910        relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   911            fudge override facts (chained_ths |> map prop_of) hyp_ts
   912            (concl_t |> theory_constify fudge (Context.theory_name thy)))
   913     |> map (apfst (apfst (fn f => f ())))
   914   end
   915 
   916 end;