src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author wenzelm
Sat, 17 Mar 2012 09:51:18 +0100
changeset 47850 80123a220219
parent 47601 6256e064f8fa
child 47893 8eac39af4ec0
permissions -rw-r--r--
'definition' no longer exports the foundational "raw_def";
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   type status = ATP_Problem_Generate.status
    11   type stature = ATP_Problem_Generate.stature
    12 
    13   type relevance_fudge =
    14     {local_const_multiplier : real,
    15      worse_irrel_freq : real,
    16      higher_order_irrel_weight : real,
    17      abs_rel_weight : real,
    18      abs_irrel_weight : real,
    19      skolem_irrel_weight : real,
    20      theory_const_rel_weight : real,
    21      theory_const_irrel_weight : real,
    22      chained_const_irrel_weight : real,
    23      intro_bonus : real,
    24      elim_bonus : real,
    25      simp_bonus : real,
    26      local_bonus : real,
    27      assum_bonus : real,
    28      chained_bonus : real,
    29      max_imperfect : real,
    30      max_imperfect_exp : real,
    31      threshold_divisor : real,
    32      ridiculous_threshold : real}
    33 
    34   type relevance_override =
    35     {add : (Facts.ref * Attrib.src list) list,
    36      del : (Facts.ref * Attrib.src list) list,
    37      only : bool}
    38 
    39   val trace : bool Config.T
    40   val ignore_no_atp : bool Config.T
    41   val instantiate_inducts : bool Config.T
    42   val no_relevance_override : relevance_override
    43   val const_names_in_fact :
    44     theory -> (string * typ -> term list -> bool * term list) -> term
    45     -> string list
    46   val clasimpset_rule_table_of : Proof.context -> status Termtab.table
    47   val fact_from_ref :
    48     Proof.context -> unit Symtab.table -> thm list -> status Termtab.table
    49     -> Facts.ref * Attrib.src list -> ((string * stature) * thm) list
    50   val all_facts :
    51     Proof.context -> bool -> 'a Symtab.table -> bool -> thm list
    52     -> thm list -> status Termtab.table -> (((unit -> string) * stature) * thm) list
    53   val nearly_all_facts :
    54     Proof.context -> bool -> relevance_override -> thm list -> term list -> term
    55     -> (((unit -> string) * stature) * thm) list
    56   val relevant_facts :
    57     Proof.context -> real * real -> int
    58     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    59     -> relevance_override -> thm list -> term list -> term
    60     -> (((unit -> string) * stature) * thm) list
    61     -> ((string * stature) * thm) list
    62 end;
    63 
    64 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    65 struct
    66 
    67 open ATP_Problem_Generate
    68 open Metis_Tactic
    69 open Sledgehammer_Util
    70 
    71 val trace =
    72   Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false)
    73 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    74 
    75 (* experimental features *)
    76 val ignore_no_atp =
    77   Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false)
    78 val instantiate_inducts =
    79   Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false)
    80 
    81 type relevance_fudge =
    82   {local_const_multiplier : real,
    83    worse_irrel_freq : real,
    84    higher_order_irrel_weight : real,
    85    abs_rel_weight : real,
    86    abs_irrel_weight : real,
    87    skolem_irrel_weight : real,
    88    theory_const_rel_weight : real,
    89    theory_const_irrel_weight : real,
    90    chained_const_irrel_weight : real,
    91    intro_bonus : real,
    92    elim_bonus : real,
    93    simp_bonus : real,
    94    local_bonus : real,
    95    assum_bonus : real,
    96    chained_bonus : real,
    97    max_imperfect : real,
    98    max_imperfect_exp : real,
    99    threshold_divisor : real,
   100    ridiculous_threshold : real}
   101 
   102 type relevance_override =
   103   {add : (Facts.ref * Attrib.src list) list,
   104    del : (Facts.ref * Attrib.src list) list,
   105    only : bool}
   106 
   107 val no_relevance_override = {add = [], del = [], only = false}
   108 
   109 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   110 val abs_name = sledgehammer_prefix ^ "abs"
   111 val skolem_prefix = sledgehammer_prefix ^ "sko"
   112 val theory_const_suffix = Long_Name.separator ^ " 1"
   113 
   114 (* unfolding these can yield really huge terms *)
   115 val risky_spec_eqs = @{thms Bit0_def Bit1_def}
   116 
   117 fun clasimpset_rule_table_of ctxt =
   118   let
   119     val thy = Proof_Context.theory_of ctxt
   120     val atomize = HOLogic.mk_Trueprop o Object_Logic.atomize_term thy
   121     fun add stature normalizers get_th =
   122       fold (fn rule =>
   123                let
   124                  val th = rule |> get_th
   125                  val t =
   126                    th |> Thm.maxidx_of th > 0 ? zero_var_indexes |> prop_of
   127                in
   128                  fold (fn normalize => Termtab.update (normalize t, stature))
   129                       (I :: normalizers)
   130                end)
   131 (*
   132     TODO: "intro" and "elim" rules are not exploited yet by SPASS (or any other
   133     prover). Reintroduce this code when it becomes useful. It costs about 50 ms
   134     per Sledgehammer invocation.
   135 
   136     val {safeIs, safeEs, hazIs, hazEs, ...} =
   137       ctxt |> claset_of |> Classical.rep_cs
   138     val intros = Item_Net.content safeIs @ Item_Net.content hazIs
   139     val elims =
   140       Item_Net.content safeEs @ Item_Net.content hazEs
   141       |> map Classical.classical_rule
   142 *)
   143     val simps = ctxt |> simpset_of |> dest_ss |> #simps
   144     val spec_eqs =
   145       ctxt |> Spec_Rules.get
   146            |> filter (curry (op =) Spec_Rules.Equational o fst)
   147            |> maps (snd o snd)
   148            |> filter_out (member Thm.eq_thm_prop risky_spec_eqs)
   149   in
   150     Termtab.empty |> add Simp [atomize] snd simps
   151                   |> add Spec_Eq [] I spec_eqs
   152 (*
   153                   |> add Elim [] I elims
   154                   |> add Intro [] I intros
   155 *)
   156   end
   157 
   158 fun needs_quoting reserved s =
   159   Symtab.defined reserved s orelse
   160   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   161 
   162 fun make_name reserved multi j name =
   163   (name |> needs_quoting reserved name ? quote) ^
   164   (if multi then "(" ^ string_of_int j ^ ")" else "")
   165 
   166 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   167   | explode_interval max (Facts.From i) = i upto i + max - 1
   168   | explode_interval _ (Facts.Single i) = [i]
   169 
   170 val backquote =
   171   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   172 
   173 fun is_assum assms th = exists (fn ct => prop_of th aconv term_of ct) assms
   174 fun is_chained chained_ths = member Thm.eq_thm_prop chained_ths
   175 
   176 fun scope_of_thm global assms chained_ths th =
   177   if is_chained chained_ths th then Chained
   178   else if global then Global
   179   else if is_assum assms th then Assum
   180   else Local
   181 
   182 fun status_of_thm css_table name th =
   183   (* FIXME: use structured name *)
   184   if String.isSubstring ".induct" name orelse
   185      String.isSubstring ".inducts" name then
   186     Induct
   187   else case Termtab.lookup css_table (prop_of th) of
   188     SOME status => status
   189   | NONE => General
   190 
   191 fun stature_of_thm global assms chained_ths css_table name th =
   192   (scope_of_thm global assms chained_ths th, status_of_thm css_table name th)
   193 
   194 fun fact_from_ref ctxt reserved chained_ths css_table (xthm as (xref, args)) =
   195   let
   196     val ths = Attrib.eval_thms ctxt [xthm]
   197     val bracket =
   198       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   199       |> implode
   200     fun nth_name j =
   201       case xref of
   202         Facts.Fact s => backquote s ^ bracket
   203       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   204       | Facts.Named ((name, _), NONE) =>
   205         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   206       | Facts.Named ((name, _), SOME intervals) =>
   207         make_name reserved true
   208                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   209         bracket
   210   in
   211     (ths, (0, []))
   212     |-> fold (fn th => fn (j, rest) =>
   213                  let val name = nth_name j in
   214                    (j + 1, ((name, stature_of_thm false [] chained_ths
   215                                              css_table name th), th) :: rest)
   216                  end)
   217     |> snd
   218   end
   219 
   220 (* This is a terrible hack. Free variables are sometimes coded as "M__" when
   221    they are displayed as "M" and we want to avoid clashes with these. But
   222    sometimes it's even worse: "Ma__" encodes "M". So we simply reserve all
   223    prefixes of all free variables. In the worse case scenario, where the fact
   224    won't be resolved correctly, the user can fix it manually, e.g., by naming
   225    the fact in question. Ideally we would need nothing of it, but backticks
   226    simply don't work with schematic variables. *)
   227 fun all_prefixes_of s =
   228   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   229 fun close_form t =
   230   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   231   |> fold (fn ((s, i), T) => fn (t', taken) =>
   232               let val s' = singleton (Name.variant_list taken) s in
   233                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   234                   else Logic.all_const) T
   235                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   236                  s' :: taken)
   237               end)
   238           (Term.add_vars t [] |> sort_wrt (fst o fst))
   239   |> fst
   240 
   241 fun string_for_term ctxt t =
   242   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   243                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   244   |> String.translate (fn c => if Char.isPrint c then str c else "")
   245   |> simplify_spaces
   246 
   247 (** Structural induction rules **)
   248 
   249 fun struct_induct_rule_on th =
   250   case Logic.strip_horn (prop_of th) of
   251     (prems, @{const Trueprop}
   252             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   253     if not (is_TVar ind_T) andalso length prems > 1 andalso
   254        exists (exists_subterm (curry (op aconv) p)) prems andalso
   255        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   256       SOME (p_name, ind_T)
   257     else
   258       NONE
   259   | _ => NONE
   260 
   261 fun instantiate_induct_rule ctxt concl_prop p_name ((name, stature), th) ind_x =
   262   let
   263     fun varify_noninducts (t as Free (s, T)) =
   264         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   265       | varify_noninducts t = t
   266     val p_inst =
   267       concl_prop |> map_aterms varify_noninducts |> close_form
   268                  |> lambda (Free ind_x)
   269                  |> string_for_term ctxt
   270   in
   271     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]",
   272       stature), th |> read_instantiate ctxt [((p_name, 0), p_inst)])
   273   end
   274 
   275 fun type_match thy (T1, T2) =
   276   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   277   handle Type.TYPE_MATCH => false
   278 
   279 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) =
   280   case struct_induct_rule_on th of
   281     SOME (p_name, ind_T) =>
   282     let val thy = Proof_Context.theory_of ctxt in
   283       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   284               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   285     end
   286   | NONE => [ax]
   287 
   288 (***************************************************************)
   289 (* Relevance Filtering                                         *)
   290 (***************************************************************)
   291 
   292 (*** constants with types ***)
   293 
   294 fun order_of_type (Type (@{type_name fun}, [T1, T2])) =
   295     Int.max (order_of_type T1 + 1, order_of_type T2)
   296   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   297   | order_of_type _ = 0
   298 
   299 (* An abstraction of Isabelle types and first-order terms *)
   300 datatype pattern = PVar | PApp of string * pattern list
   301 datatype ptype = PType of int * pattern list
   302 
   303 fun string_for_pattern PVar = "_"
   304   | string_for_pattern (PApp (s, ps)) =
   305     if null ps then s else s ^ string_for_patterns ps
   306 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   307 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   308 
   309 (*Is the second type an instance of the first one?*)
   310 fun match_pattern (PVar, _) = true
   311   | match_pattern (PApp _, PVar) = false
   312   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   313     s = t andalso match_patterns (ps, qs)
   314 and match_patterns (_, []) = true
   315   | match_patterns ([], _) = false
   316   | match_patterns (p :: ps, q :: qs) =
   317     match_pattern (p, q) andalso match_patterns (ps, qs)
   318 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   319 
   320 (* Is there a unifiable constant? *)
   321 fun pconst_mem f consts (s, ps) =
   322   exists (curry (match_ptype o f) ps)
   323          (map snd (filter (curry (op =) s o fst) consts))
   324 fun pconst_hyper_mem f const_tab (s, ps) =
   325   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   326 
   327 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   328   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   329   | pattern_for_type (TVar _) = PVar
   330 
   331 (* Pairs a constant with the list of its type instantiations. *)
   332 fun ptype thy const x =
   333   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   334    else [])
   335 fun rich_ptype thy const (s, T) =
   336   PType (order_of_type T, ptype thy const (s, T))
   337 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   338 
   339 fun string_for_hyper_pconst (s, ps) =
   340   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   341 
   342 (* Add a pconstant to the table, but a [] entry means a standard
   343    connective, which we ignore.*)
   344 fun add_pconst_to_table also_skolem (s, p) =
   345   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   346   else Symtab.map_default (s, [p]) (insert (op =) p)
   347 
   348 fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   349   let
   350     val flip = Option.map not
   351     (* We include free variables, as well as constants, to handle locales. For
   352        each quantifiers that must necessarily be skolemized by the automatic
   353        prover, we introduce a fresh constant to simulate the effect of
   354        Skolemization. *)
   355     fun do_const const x ts =
   356       let val (built_in, ts) = is_built_in_const x ts in
   357         (not built_in
   358          ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   359         #> fold (do_term false) ts
   360       end
   361     and do_term ext_arg t =
   362       case strip_comb t of
   363         (Const x, ts) => do_const true x ts
   364       | (Free x, ts) => do_const false x ts
   365       | (Abs (_, T, t'), ts) =>
   366         ((null ts andalso not ext_arg)
   367          (* Since lambdas on the right-hand side of equalities are usually
   368             extensionalized later by "extensionalize_term", we don't penalize
   369             them here. *)
   370          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   371         #> fold (do_term false) (t' :: ts)
   372       | (_, ts) => fold (do_term false) ts
   373     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   374       do_formula pos body_t
   375       #> (if also_skolems andalso will_surely_be_skolemized then
   376             add_pconst_to_table true
   377                 (legacy_gensym skolem_prefix, PType (order_of_type abs_T, []))
   378           else
   379             I)
   380     and do_term_or_formula ext_arg T =
   381       if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   382     and do_formula pos t =
   383       case t of
   384         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   385         do_quantifier (pos = SOME false) T t'
   386       | @{const "==>"} $ t1 $ t2 =>
   387         do_formula (flip pos) t1 #> do_formula pos t2
   388       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   389         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   390       | @{const Trueprop} $ t1 => do_formula pos t1
   391       | @{const False} => I
   392       | @{const True} => I
   393       | @{const Not} $ t1 => do_formula (flip pos) t1
   394       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   395         do_quantifier (pos = SOME false) T t'
   396       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   397         do_quantifier (pos = SOME true) T t'
   398       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   399       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   400       | @{const HOL.implies} $ t1 $ t2 =>
   401         do_formula (flip pos) t1 #> do_formula pos t2
   402       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   403         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   404       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   405         $ t1 $ t2 $ t3 =>
   406         do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   407       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   408         do_quantifier (is_some pos) T t'
   409       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   410         do_quantifier (pos = SOME false) T
   411                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   412       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   413         do_quantifier (pos = SOME true) T
   414                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   415       | (t0 as Const (_, @{typ bool})) $ t1 =>
   416         do_term false t0 #> do_formula pos t1  (* theory constant *)
   417       | _ => do_term false t
   418   in do_formula pos end
   419 
   420 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   421   takes the given theory into account.*)
   422 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   423                      : relevance_fudge) thy_name t =
   424   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   425                                 theory_const_irrel_weight] then
   426     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   427   else
   428     t
   429 
   430 fun theory_const_prop_of fudge th =
   431   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   432 
   433 (**** Constant / Type Frequencies ****)
   434 
   435 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   436    first by constant name and second by its list of type instantiations. For the
   437    latter, we need a linear ordering on "pattern list". *)
   438 
   439 fun pattern_ord p =
   440   case p of
   441     (PVar, PVar) => EQUAL
   442   | (PVar, PApp _) => LESS
   443   | (PApp _, PVar) => GREATER
   444   | (PApp q1, PApp q2) =>
   445     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   446 fun ptype_ord (PType p, PType q) =
   447   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   448 
   449 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   450 
   451 fun count_fact_consts thy fudge =
   452   let
   453     fun do_const const (s, T) ts =
   454       (* Two-dimensional table update. Constant maps to types maps to count. *)
   455       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   456       |> Symtab.map_default (s, PType_Tab.empty)
   457       #> fold do_term ts
   458     and do_term t =
   459       case strip_comb t of
   460         (Const x, ts) => do_const true x ts
   461       | (Free x, ts) => do_const false x ts
   462       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   463       | (_, ts) => fold do_term ts
   464   in do_term o theory_const_prop_of fudge o snd end
   465 
   466 
   467 (**** Actual Filtering Code ****)
   468 
   469 fun pow_int _ 0 = 1.0
   470   | pow_int x 1 = x
   471   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   472 
   473 (*The frequency of a constant is the sum of those of all instances of its type.*)
   474 fun pconst_freq match const_tab (c, ps) =
   475   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   476                  (the (Symtab.lookup const_tab c)) 0
   477 
   478 
   479 (* A surprising number of theorems contain only a few significant constants.
   480    These include all induction rules, and other general theorems. *)
   481 
   482 (* "log" seems best in practice. A constant function of one ignores the constant
   483    frequencies. Rare constants give more points if they are relevant than less
   484    rare ones. *)
   485 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   486 
   487 (* Irrelevant constants are treated differently. We associate lower penalties to
   488    very rare constants and very common ones -- the former because they can't
   489    lead to the inclusion of too many new facts, and the latter because they are
   490    so common as to be of little interest. *)
   491 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   492                       : relevance_fudge) order freq =
   493   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   494     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   495      else rel_weight_for order freq / rel_weight_for order k)
   496     * pow_int higher_order_irrel_weight (order - 1)
   497   end
   498 
   499 fun multiplier_for_const_name local_const_multiplier s =
   500   if String.isSubstring "." s then 1.0 else local_const_multiplier
   501 
   502 (* Computes a constant's weight, as determined by its frequency. *)
   503 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   504                           theory_const_weight chained_const_weight weight_for f
   505                           const_tab chained_const_tab (c as (s, PType (m, _))) =
   506   if s = abs_name then
   507     abs_weight
   508   else if String.isPrefix skolem_prefix s then
   509     skolem_weight
   510   else if String.isSuffix theory_const_suffix s then
   511     theory_const_weight
   512   else
   513     multiplier_for_const_name local_const_multiplier s
   514     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   515     |> (if chained_const_weight < 1.0 andalso
   516            pconst_hyper_mem I chained_const_tab c then
   517           curry (op *) chained_const_weight
   518         else
   519           I)
   520 
   521 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   522                         theory_const_rel_weight, ...} : relevance_fudge)
   523                       const_tab =
   524   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   525                         theory_const_rel_weight 0.0 rel_weight_for I const_tab
   526                         Symtab.empty
   527 
   528 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   529                                    skolem_irrel_weight,
   530                                    theory_const_irrel_weight,
   531                                    chained_const_irrel_weight, ...})
   532                         const_tab chained_const_tab =
   533   generic_pconst_weight local_const_multiplier abs_irrel_weight
   534                         skolem_irrel_weight theory_const_irrel_weight
   535                         chained_const_irrel_weight (irrel_weight_for fudge) swap
   536                         const_tab chained_const_tab
   537 
   538 fun stature_bonus ({intro_bonus, ...} : relevance_fudge) (_, Intro) =
   539     intro_bonus
   540   | stature_bonus {elim_bonus, ...} (_, Elim) = elim_bonus
   541   | stature_bonus {simp_bonus, ...} (_, Simp) = simp_bonus
   542   | stature_bonus {local_bonus, ...} (Local, _) = local_bonus
   543   | stature_bonus {assum_bonus, ...} (Assum, _) = assum_bonus
   544   | stature_bonus {chained_bonus, ...} (Chained, _) = chained_bonus
   545   | stature_bonus _ _ = 0.0
   546 
   547 fun is_odd_const_name s =
   548   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   549   String.isSuffix theory_const_suffix s
   550 
   551 fun fact_weight fudge stature const_tab relevant_consts chained_consts
   552                 fact_consts =
   553   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   554                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   555     ([], _) => 0.0
   556   | (rel, irrel) =>
   557     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   558       0.0
   559     else
   560       let
   561         val irrel = irrel |> filter_out (pconst_mem swap rel)
   562         val rel_weight =
   563           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   564         val irrel_weight =
   565           ~ (stature_bonus fudge stature)
   566           |> fold (curry (op +)
   567                    o irrel_pconst_weight fudge const_tab chained_consts) irrel
   568         val res = rel_weight / (rel_weight + irrel_weight)
   569       in if Real.isFinite res then res else 0.0 end
   570 
   571 fun pconsts_in_fact thy is_built_in_const t =
   572   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   573               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   574                                                    (SOME true) t) []
   575 
   576 fun pair_consts_fact thy is_built_in_const fudge fact =
   577   case fact |> snd |> theory_const_prop_of fudge
   578             |> pconsts_in_fact thy is_built_in_const of
   579     [] => NONE
   580   | consts => SOME ((fact, consts), NONE)
   581 
   582 val const_names_in_fact = map fst ooo pconsts_in_fact
   583 
   584 type annotated_thm =
   585   (((unit -> string) * stature) * thm) * (string * ptype) list
   586 
   587 fun take_most_relevant ctxt max_relevant remaining_max
   588         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   589         (candidates : (annotated_thm * real) list) =
   590   let
   591     val max_imperfect =
   592       Real.ceil (Math.pow (max_imperfect,
   593                     Math.pow (Real.fromInt remaining_max
   594                               / Real.fromInt max_relevant, max_imperfect_exp)))
   595     val (perfect, imperfect) =
   596       candidates |> sort (Real.compare o swap o pairself snd)
   597                  |> take_prefix (fn (_, w) => w > 0.99999)
   598     val ((accepts, more_rejects), rejects) =
   599       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   600   in
   601     trace_msg ctxt (fn () =>
   602         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   603         string_of_int (length candidates) ^ "): " ^
   604         (accepts |> map (fn ((((name, _), _), _), weight) =>
   605                             name () ^ " [" ^ Real.toString weight ^ "]")
   606                  |> commas));
   607     (accepts, more_rejects @ rejects)
   608   end
   609 
   610 fun if_empty_replace_with_scope thy is_built_in_const facts sc tab =
   611   if Symtab.is_empty tab then
   612     Symtab.empty
   613     |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   614             (map_filter (fn ((_, (sc', _)), th) =>
   615                             if sc' = sc then SOME (prop_of th) else NONE) facts)
   616   else
   617     tab
   618 
   619 fun consider_arities is_built_in_const th =
   620   let
   621     fun aux _ _ NONE = NONE
   622       | aux t args (SOME tab) =
   623         case t of
   624           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   625         | Const (x as (s, _)) =>
   626           (if is_built_in_const x args |> fst then
   627              SOME tab
   628            else case Symtab.lookup tab s of
   629              NONE => SOME (Symtab.update (s, length args) tab)
   630            | SOME n => if n = length args then SOME tab else NONE)
   631         | _ => SOME tab
   632   in aux (prop_of th) [] end
   633 
   634 (* FIXME: This is currently only useful for polymorphic type encodings. *)
   635 fun could_benefit_from_ext is_built_in_const facts =
   636   fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   637   |> is_none
   638 
   639 (* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   640    weights), but low enough so that it is unlikely to be truncated away if few
   641    facts are included. *)
   642 val special_fact_index = 75
   643 
   644 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   645         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   646         ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t =
   647   let
   648     val thy = Proof_Context.theory_of ctxt
   649     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   650     val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   651     val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   652     val goal_const_tab =
   653       Symtab.empty |> fold (add_pconsts true) hyp_ts
   654                    |> add_pconsts false concl_t
   655       |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   656       |> fold (if_empty_replace_with_scope thy is_built_in_const facts)
   657               [Chained, Assum, Local]
   658     val add_ths = Attrib.eval_thms ctxt add
   659     val del_ths = Attrib.eval_thms ctxt del
   660     val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd)
   661     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   662       let
   663         fun relevant [] _ [] =
   664             (* Nothing has been added this iteration. *)
   665             if j = 0 andalso threshold >= ridiculous_threshold then
   666               (* First iteration? Try again. *)
   667               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   668                    hopeless hopeful
   669             else
   670               []
   671           | relevant candidates rejects [] =
   672             let
   673               val (accepts, more_rejects) =
   674                 take_most_relevant ctxt max_relevant remaining_max fudge
   675                                    candidates
   676               val rel_const_tab' =
   677                 rel_const_tab
   678                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   679               fun is_dirty (c, _) =
   680                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   681               val (hopeful_rejects, hopeless_rejects) =
   682                  (rejects @ hopeless, ([], []))
   683                  |-> fold (fn (ax as (_, consts), old_weight) =>
   684                               if exists is_dirty consts then
   685                                 apfst (cons (ax, NONE))
   686                               else
   687                                 apsnd (cons (ax, old_weight)))
   688                  |>> append (more_rejects
   689                              |> map (fn (ax as (_, consts), old_weight) =>
   690                                         (ax, if exists is_dirty consts then NONE
   691                                              else SOME old_weight)))
   692               val threshold =
   693                 1.0 - (1.0 - threshold)
   694                       * Math.pow (decay, Real.fromInt (length accepts))
   695               val remaining_max = remaining_max - length accepts
   696             in
   697               trace_msg ctxt (fn () => "New or updated constants: " ^
   698                   commas (rel_const_tab' |> Symtab.dest
   699                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   700                           |> map string_for_hyper_pconst));
   701               map (fst o fst) accepts @
   702               (if remaining_max = 0 then
   703                  []
   704                else
   705                  iter (j + 1) remaining_max threshold rel_const_tab'
   706                       hopeless_rejects hopeful_rejects)
   707             end
   708           | relevant candidates rejects
   709                      (((ax as (((_, stature), _), fact_consts)), cached_weight)
   710                       :: hopeful) =
   711             let
   712               val weight =
   713                 case cached_weight of
   714                   SOME w => w
   715                 | NONE => fact_weight fudge stature const_tab rel_const_tab
   716                                       chained_const_tab fact_consts
   717             in
   718               if weight >= threshold then
   719                 relevant ((ax, weight) :: candidates) rejects hopeful
   720               else
   721                 relevant candidates ((ax, weight) :: rejects) hopeful
   722             end
   723         in
   724           trace_msg ctxt (fn () =>
   725               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   726               Real.toString threshold ^ ", constants: " ^
   727               commas (rel_const_tab |> Symtab.dest
   728                       |> filter (curry (op <>) [] o snd)
   729                       |> map string_for_hyper_pconst));
   730           relevant [] [] hopeful
   731         end
   732     fun prepend_facts ths accepts =
   733       ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   734        (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   735       |> take max_relevant
   736     fun insert_into_facts accepts [] = accepts
   737       | insert_into_facts accepts ths =
   738         let
   739           val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   740           val (bef, after) =
   741             accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   742                     |> take (max_relevant - length add)
   743                     |> chop special_fact_index
   744         in bef @ add @ after end
   745     fun insert_special_facts accepts =
   746        [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   747           |> insert_into_facts accepts
   748   in
   749     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   750           |> iter 0 max_relevant threshold0 goal_const_tab []
   751           |> not (null add_ths) ? prepend_facts add_ths
   752           |> insert_special_facts
   753           |> tap (fn accepts => trace_msg ctxt (fn () =>
   754                       "Total relevant: " ^ string_of_int (length accepts)))
   755   end
   756 
   757 
   758 (***************************************************************)
   759 (* Retrieving and filtering lemmas                             *)
   760 (***************************************************************)
   761 
   762 (*** retrieve lemmas and filter them ***)
   763 
   764 (*Reject theorems with names like "List.filter.filter_list_def" or
   765   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   766 fun is_package_def a =
   767   let val names = Long_Name.explode a in
   768     (length names > 2 andalso not (hd names = "local") andalso
   769      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   770   end
   771 
   772 fun uniquify xs =
   773   Termtab.fold (cons o snd)
   774                (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) []
   775 
   776 (* FIXME: put other record thms here, or declare as "no_atp" *)
   777 fun multi_base_blacklist ctxt ho_atp =
   778   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   779    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   780    "weak_case_cong"]
   781   |> not (ho_atp orelse (Config.get ctxt instantiate_inducts)) ?
   782         append ["induct", "inducts"]
   783   |> map (prefix ".")
   784 
   785 val max_lambda_nesting = 3 (*only applies if not ho_atp*)
   786 
   787 fun term_has_too_many_lambdas max (t1 $ t2) =
   788     exists (term_has_too_many_lambdas max) [t1, t2]
   789   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   790     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   791   | term_has_too_many_lambdas _ _ = false
   792 
   793 (* Don't count nested lambdas at the level of formulas, since they are
   794    quantifiers. *)
   795 fun formula_has_too_many_lambdas true _ _ = false (*i.e. ho_atp*)
   796   | formula_has_too_many_lambdas _ Ts (Abs (_, T, t)) =
   797       formula_has_too_many_lambdas false (T :: Ts) t
   798   | formula_has_too_many_lambdas _ Ts t =
   799     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   800       exists (formula_has_too_many_lambdas false Ts) (#2 (strip_comb t))
   801     else
   802       term_has_too_many_lambdas max_lambda_nesting t
   803 
   804 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   805    was 11. *)
   806 val max_apply_depth = 15
   807 
   808 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   809   | apply_depth (Abs (_, _, t)) = apply_depth t
   810   | apply_depth _ = 0
   811 
   812 fun is_formula_too_complex ho_atp t =
   813   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas ho_atp [] t
   814 
   815 (* FIXME: Extend to "Meson" and "Metis" *)
   816 val exists_sledgehammer_const =
   817   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   818 
   819 (* FIXME: make more reliable *)
   820 val exists_low_level_class_const =
   821   exists_Const (fn (s, _) =>
   822      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   823 
   824 fun is_metastrange_theorem th =
   825   case head_of (concl_of th) of
   826     Const (s, _) => (s <> @{const_name Trueprop} andalso
   827                      s <> @{const_name "=="})
   828   | _ => false
   829 
   830 fun is_that_fact th =
   831   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   832   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   833                            | _ => false) (prop_of th)
   834 
   835 (**** Predicates to detect unwanted facts (prolific or likely to cause
   836       unsoundness) ****)
   837 
   838 fun is_theorem_bad_for_atps ho_atp exporter thm =
   839   is_metastrange_theorem thm orelse
   840   (not exporter andalso
   841    let val t = prop_of thm in
   842      is_formula_too_complex ho_atp t orelse exists_type type_has_top_sort t orelse
   843      exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   844      is_that_fact thm
   845    end)
   846 
   847 fun all_facts ctxt ho_atp reserved exporter add_ths chained_ths css_table =
   848   let
   849     val thy = Proof_Context.theory_of ctxt
   850     val global_facts = Global_Theory.facts_of thy
   851     val local_facts = Proof_Context.facts_of ctxt
   852     val named_locals = local_facts |> Facts.dest_static []
   853     val assms = Assumption.all_assms_of ctxt
   854     fun is_good_unnamed_local th =
   855       not (Thm.has_name_hint th) andalso
   856       forall (fn (_, ths) => not (member Thm.eq_thm_prop ths th)) named_locals
   857     val unnamed_locals =
   858       union Thm.eq_thm_prop (Facts.props local_facts) chained_ths
   859       |> filter is_good_unnamed_local |> map (pair "" o single)
   860     val full_space =
   861       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   862     fun add_facts global foldx facts =
   863       foldx (fn (name0, ths) =>
   864         if not exporter andalso name0 <> "" andalso
   865            forall (not o member Thm.eq_thm_prop add_ths) ths andalso
   866            (Facts.is_concealed facts name0 orelse
   867             (not (Config.get ctxt ignore_no_atp) andalso
   868              is_package_def name0) orelse
   869             exists (fn s => String.isSuffix s name0)
   870                    (multi_base_blacklist ctxt ho_atp)) then
   871           I
   872         else
   873           let
   874             val multi = length ths > 1
   875             val backquote_thm =
   876               backquote o string_for_term ctxt o close_form o prop_of
   877             fun check_thms a =
   878               case try (Proof_Context.get_thms ctxt) a of
   879                 NONE => false
   880               | SOME ths' => eq_list Thm.eq_thm_prop (ths, ths')
   881           in
   882             pair 1
   883             #> fold (fn th => fn (j, (multis, unis)) =>
   884                         (j + 1,
   885                          if not (member Thm.eq_thm_prop add_ths th) andalso
   886                             is_theorem_bad_for_atps ho_atp exporter th then
   887                            (multis, unis)
   888                          else
   889                            let
   890                              val new =
   891                                (((fn () =>
   892                                  if name0 = "" then
   893                                    th |> backquote_thm
   894                                  else
   895                                    [Facts.extern ctxt facts name0,
   896                                     Name_Space.extern ctxt full_space name0,
   897                                     name0]
   898                                    |> find_first check_thms
   899                                    |> (fn SOME name =>
   900                                           make_name reserved multi j name
   901                                         | NONE => "")),
   902                                 stature_of_thm global assms chained_ths
   903                                                css_table name0 th), th)
   904                            in
   905                              if multi then (new :: multis, unis)
   906                              else (multis, new :: unis)
   907                            end)) ths
   908             #> snd
   909           end)
   910   in
   911     (* The single-name theorems go after the multiple-name ones, so that single
   912        names are preferred when both are available. *)
   913     ([], []) |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   914              |> add_facts true Facts.fold_static global_facts global_facts
   915              |> op @
   916   end
   917 
   918 fun external_frees t =
   919   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   920 
   921 fun nearly_all_facts ctxt ho_atp ({add, only, ...} : relevance_override)
   922                      chained_ths hyp_ts concl_t =
   923   if only andalso null add then
   924     []
   925   else
   926     let
   927       val thy = Proof_Context.theory_of ctxt
   928       val reserved = reserved_isar_keyword_table ()
   929       val add_ths = Attrib.eval_thms ctxt add
   930       val ind_stmt =
   931         (hyp_ts |> filter_out (null o external_frees), concl_t)
   932         |> Logic.list_implies |> Object_Logic.atomize_term thy
   933       val ind_stmt_xs = external_frees ind_stmt
   934       val css_table = clasimpset_rule_table_of ctxt
   935     in
   936       (if only then
   937          maps (map (fn ((name, stature), th) => ((K name, stature), th))
   938                o fact_from_ref ctxt reserved chained_ths css_table) add
   939        else
   940          all_facts ctxt ho_atp reserved false add_ths chained_ths css_table)
   941       |> Config.get ctxt instantiate_inducts
   942          ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   943       |> (not (Config.get ctxt ignore_no_atp) andalso not only)
   944          ? filter_out (No_ATPs.member ctxt o snd)
   945       |> uniquify
   946     end
   947 
   948 fun relevant_facts ctxt (threshold0, threshold1) max_relevant is_built_in_const
   949                    fudge (override as {only, ...}) chained_ths hyp_ts concl_t
   950                    facts =
   951   let
   952     val thy = Proof_Context.theory_of ctxt
   953     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   954                           1.0 / Real.fromInt (max_relevant + 1))
   955   in
   956     trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   957                              " facts");
   958     (if only orelse threshold1 < 0.0 then
   959        facts
   960      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   961              max_relevant = 0 then
   962        []
   963      else
   964        relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   965            fudge override facts (chained_ths |> map prop_of) hyp_ts
   966            (concl_t |> theory_constify fudge (Context.theory_name thy)))
   967     |> map (apfst (apfst (fn f => f ())))
   968   end
   969 
   970 end;