src/HOL/Tools/Sledgehammer/sledgehammer_filter.ML
author blanchet
Mon, 02 Jan 2012 14:32:20 +0100
changeset 46944 b2594cc862d7
parent 46853 989b1eede03c
child 47088 7b19666f0e3d
permissions -rw-r--r--
removed special handling for set constants in relevance filter
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter.ML
     2     Author:     Jia Meng, Cambridge University Computer Laboratory and NICTA
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Sledgehammer's relevance filter.
     6 *)
     7 
     8 signature SLEDGEHAMMER_FILTER =
     9 sig
    10   type locality = ATP_Translate.locality
    11 
    12   type relevance_fudge =
    13     {local_const_multiplier : real,
    14      worse_irrel_freq : real,
    15      higher_order_irrel_weight : real,
    16      abs_rel_weight : real,
    17      abs_irrel_weight : real,
    18      skolem_irrel_weight : real,
    19      theory_const_rel_weight : real,
    20      theory_const_irrel_weight : real,
    21      chained_const_irrel_weight : real,
    22      intro_bonus : real,
    23      elim_bonus : real,
    24      simp_bonus : real,
    25      local_bonus : real,
    26      assum_bonus : real,
    27      chained_bonus : real,
    28      max_imperfect : real,
    29      max_imperfect_exp : real,
    30      threshold_divisor : real,
    31      ridiculous_threshold : real}
    32 
    33   type relevance_override =
    34     {add : (Facts.ref * Attrib.src list) list,
    35      del : (Facts.ref * Attrib.src list) list,
    36      only : bool}
    37 
    38   val trace : bool Config.T
    39   val ignore_no_atp : bool Config.T
    40   val instantiate_inducts : bool Config.T
    41   val no_relevance_override : relevance_override
    42   val const_names_in_fact :
    43     theory -> (string * typ -> term list -> bool * term list) -> term
    44     -> string list
    45   val fact_from_ref :
    46     Proof.context -> unit Symtab.table -> thm list
    47     -> Facts.ref * Attrib.src list -> ((string * locality) * thm) list
    48   val all_facts :
    49     Proof.context -> bool -> 'a Symtab.table -> bool -> thm list -> thm list
    50     -> (((unit -> string) * locality) * thm) list
    51   val nearly_all_facts :
    52     Proof.context -> bool -> relevance_override -> thm list -> term list -> term
    53     -> (((unit -> string) * locality) * thm) list
    54   val relevant_facts :
    55     Proof.context -> real * real -> int
    56     -> (string * typ -> term list -> bool * term list) -> relevance_fudge
    57     -> relevance_override -> thm list -> term list -> term
    58     -> (((unit -> string) * locality) * thm) list
    59     -> ((string * locality) * thm) list
    60 end;
    61 
    62 structure Sledgehammer_Filter : SLEDGEHAMMER_FILTER =
    63 struct
    64 
    65 open ATP_Translate
    66 open Metis_Tactic
    67 open Sledgehammer_Util
    68 
    69 val trace =
    70   Attrib.setup_config_bool @{binding sledgehammer_filter_trace} (K false)
    71 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    72 
    73 (* experimental features *)
    74 val ignore_no_atp =
    75   Attrib.setup_config_bool @{binding sledgehammer_ignore_no_atp} (K false)
    76 val instantiate_inducts =
    77   Attrib.setup_config_bool @{binding sledgehammer_instantiate_inducts} (K false)
    78 
    79 type relevance_fudge =
    80   {local_const_multiplier : real,
    81    worse_irrel_freq : real,
    82    higher_order_irrel_weight : real,
    83    abs_rel_weight : real,
    84    abs_irrel_weight : real,
    85    skolem_irrel_weight : real,
    86    theory_const_rel_weight : real,
    87    theory_const_irrel_weight : real,
    88    chained_const_irrel_weight : real,
    89    intro_bonus : real,
    90    elim_bonus : real,
    91    simp_bonus : real,
    92    local_bonus : real,
    93    assum_bonus : real,
    94    chained_bonus : real,
    95    max_imperfect : real,
    96    max_imperfect_exp : real,
    97    threshold_divisor : real,
    98    ridiculous_threshold : real}
    99 
   100 type relevance_override =
   101   {add : (Facts.ref * Attrib.src list) list,
   102    del : (Facts.ref * Attrib.src list) list,
   103    only : bool}
   104 
   105 val no_relevance_override = {add = [], del = [], only = false}
   106 
   107 val sledgehammer_prefix = "Sledgehammer" ^ Long_Name.separator
   108 val abs_name = sledgehammer_prefix ^ "abs"
   109 val skolem_prefix = sledgehammer_prefix ^ "sko"
   110 val theory_const_suffix = Long_Name.separator ^ " 1"
   111 
   112 fun needs_quoting reserved s =
   113   Symtab.defined reserved s orelse
   114   exists (not o Lexicon.is_identifier) (Long_Name.explode s)
   115 
   116 fun make_name reserved multi j name =
   117   (name |> needs_quoting reserved name ? quote) ^
   118   (if multi then "(" ^ string_of_int j ^ ")" else "")
   119 
   120 fun explode_interval _ (Facts.FromTo (i, j)) = i upto j
   121   | explode_interval max (Facts.From i) = i upto i + max - 1
   122   | explode_interval _ (Facts.Single i) = [i]
   123 
   124 val backquote =
   125   raw_explode #> map (fn "`" => "\\`" | s => s) #> implode #> enclose "`" "`"
   126 fun fact_from_ref ctxt reserved chained_ths (xthm as (xref, args)) =
   127   let
   128     val ths = Attrib.eval_thms ctxt [xthm]
   129     val bracket =
   130       map (enclose "[" "]" o Pretty.str_of o Args.pretty_src ctxt) args
   131       |> implode
   132     fun nth_name j =
   133       case xref of
   134         Facts.Fact s => backquote s ^ bracket
   135       | Facts.Named (("", _), _) => "[" ^ bracket ^ "]"
   136       | Facts.Named ((name, _), NONE) =>
   137         make_name reserved (length ths > 1) (j + 1) name ^ bracket
   138       | Facts.Named ((name, _), SOME intervals) =>
   139         make_name reserved true
   140                  (nth (maps (explode_interval (length ths)) intervals) j) name ^
   141         bracket
   142   in
   143     (ths, (0, []))
   144     |-> fold (fn th => fn (j, rest) =>
   145                  (j + 1,
   146                   ((nth_name j,
   147                     if member Thm.eq_thm_prop chained_ths th then Chained
   148                     else General), th) :: rest))
   149     |> snd
   150   end
   151 
   152 (* This is a terrible hack. Free variables are sometimes coded as "M__" when
   153    they are displayed as "M" and we want to avoid clashes with these. But
   154    sometimes it's even worse: "Ma__" encodes "M". So we simply reserve all
   155    prefixes of all free variables. In the worse case scenario, where the fact
   156    won't be resolved correctly, the user can fix it manually, e.g., by naming
   157    the fact in question. Ideally we would need nothing of it, but backticks
   158    simply don't work with schematic variables. *)
   159 fun all_prefixes_of s =
   160   map (fn i => String.extract (s, 0, SOME i)) (1 upto size s - 1)
   161 fun close_form t =
   162   (t, [] |> Term.add_free_names t |> maps all_prefixes_of)
   163   |> fold (fn ((s, i), T) => fn (t', taken) =>
   164               let val s' = singleton (Name.variant_list taken) s in
   165                 ((if fastype_of t' = HOLogic.boolT then HOLogic.all_const
   166                   else Term.all) T
   167                  $ Abs (s', T, abstract_over (Var ((s, i), T), t')),
   168                  s' :: taken)
   169               end)
   170           (Term.add_vars t [] |> sort_wrt (fst o fst))
   171   |> fst
   172 
   173 fun string_for_term ctxt t =
   174   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
   175                    (print_mode_value ())) (Syntax.string_of_term ctxt) t
   176   |> String.translate (fn c => if Char.isPrint c then str c else "")
   177   |> simplify_spaces
   178 
   179 (** Structural induction rules **)
   180 
   181 fun struct_induct_rule_on th =
   182   case Logic.strip_horn (prop_of th) of
   183     (prems, @{const Trueprop}
   184             $ ((p as Var ((p_name, 0), _)) $ (a as Var (_, ind_T)))) =>
   185     if not (is_TVar ind_T) andalso length prems > 1 andalso
   186        exists (exists_subterm (curry (op aconv) p)) prems andalso
   187        not (exists (exists_subterm (curry (op aconv) a)) prems) then
   188       SOME (p_name, ind_T)
   189     else
   190       NONE
   191   | _ => NONE
   192 
   193 fun instantiate_induct_rule ctxt concl_prop p_name ((name, loc), th) ind_x =
   194   let
   195     fun varify_noninducts (t as Free (s, T)) =
   196         if (s, T) = ind_x orelse can dest_funT T then t else Var ((s, 0), T)
   197       | varify_noninducts t = t
   198     val p_inst =
   199       concl_prop |> map_aterms varify_noninducts |> close_form
   200                  |> lambda (Free ind_x)
   201                  |> string_for_term ctxt
   202   in
   203     ((fn () => name () ^ "[where " ^ p_name ^ " = " ^ quote p_inst ^ "]", loc),
   204      th |> read_instantiate ctxt [((p_name, 0), p_inst)])
   205   end
   206 
   207 fun type_match thy (T1, T2) =
   208   (Sign.typ_match thy (T2, T1) Vartab.empty; true)
   209   handle Type.TYPE_MATCH => false
   210 
   211 fun instantiate_if_induct_rule ctxt stmt stmt_xs (ax as (_, th)) =
   212   case struct_induct_rule_on th of
   213     SOME (p_name, ind_T) =>
   214     let val thy = Proof_Context.theory_of ctxt in
   215       stmt_xs |> filter (fn (_, T) => type_match thy (T, ind_T))
   216               |> map_filter (try (instantiate_induct_rule ctxt stmt p_name ax))
   217     end
   218   | NONE => [ax]
   219 
   220 (***************************************************************)
   221 (* Relevance Filtering                                         *)
   222 (***************************************************************)
   223 
   224 (*** constants with types ***)
   225 
   226 fun order_of_type (Type (@{type_name fun}, [T1, T2])) =
   227     Int.max (order_of_type T1 + 1, order_of_type T2)
   228   | order_of_type (Type (_, Ts)) = fold (Integer.max o order_of_type) Ts 0
   229   | order_of_type _ = 0
   230 
   231 (* An abstraction of Isabelle types and first-order terms *)
   232 datatype pattern = PVar | PApp of string * pattern list
   233 datatype ptype = PType of int * pattern list
   234 
   235 fun string_for_pattern PVar = "_"
   236   | string_for_pattern (PApp (s, ps)) =
   237     if null ps then s else s ^ string_for_patterns ps
   238 and string_for_patterns ps = "(" ^ commas (map string_for_pattern ps) ^ ")"
   239 fun string_for_ptype (PType (_, ps)) = string_for_patterns ps
   240 
   241 (*Is the second type an instance of the first one?*)
   242 fun match_pattern (PVar, _) = true
   243   | match_pattern (PApp _, PVar) = false
   244   | match_pattern (PApp (s, ps), PApp (t, qs)) =
   245     s = t andalso match_patterns (ps, qs)
   246 and match_patterns (_, []) = true
   247   | match_patterns ([], _) = false
   248   | match_patterns (p :: ps, q :: qs) =
   249     match_pattern (p, q) andalso match_patterns (ps, qs)
   250 fun match_ptype (PType (_, ps), PType (_, qs)) = match_patterns (ps, qs)
   251 
   252 (* Is there a unifiable constant? *)
   253 fun pconst_mem f consts (s, ps) =
   254   exists (curry (match_ptype o f) ps)
   255          (map snd (filter (curry (op =) s o fst) consts))
   256 fun pconst_hyper_mem f const_tab (s, ps) =
   257   exists (curry (match_ptype o f) ps) (these (Symtab.lookup const_tab s))
   258 
   259 fun pattern_for_type (Type (s, Ts)) = PApp (s, map pattern_for_type Ts)
   260   | pattern_for_type (TFree (s, _)) = PApp (s, [])
   261   | pattern_for_type (TVar _) = PVar
   262 
   263 (* Pairs a constant with the list of its type instantiations. *)
   264 fun ptype thy const x =
   265   (if const then map pattern_for_type (these (try (Sign.const_typargs thy) x))
   266    else [])
   267 fun rich_ptype thy const (s, T) =
   268   PType (order_of_type T, ptype thy const (s, T))
   269 fun rich_pconst thy const (s, T) = (s, rich_ptype thy const (s, T))
   270 
   271 fun string_for_hyper_pconst (s, ps) =
   272   s ^ "{" ^ commas (map string_for_ptype ps) ^ "}"
   273 
   274 (* Add a pconstant to the table, but a [] entry means a standard
   275    connective, which we ignore.*)
   276 fun add_pconst_to_table also_skolem (s, p) =
   277   if (not also_skolem andalso String.isPrefix skolem_prefix s) then I
   278   else Symtab.map_default (s, [p]) (insert (op =) p)
   279 
   280 fun add_pconsts_in_term thy is_built_in_const also_skolems pos =
   281   let
   282     val flip = Option.map not
   283     (* We include free variables, as well as constants, to handle locales. For
   284        each quantifiers that must necessarily be skolemized by the automatic
   285        prover, we introduce a fresh constant to simulate the effect of
   286        Skolemization. *)
   287     fun do_const const x ts =
   288       let val (built_in, ts) = is_built_in_const x ts in
   289         (not built_in
   290          ? add_pconst_to_table also_skolems (rich_pconst thy const x))
   291         #> fold (do_term false) ts
   292       end
   293     and do_term ext_arg t =
   294       case strip_comb t of
   295         (Const x, ts) => do_const true x ts
   296       | (Free x, ts) => do_const false x ts
   297       | (Abs (_, T, t'), ts) =>
   298         ((null ts andalso not ext_arg)
   299          (* Since lambdas on the right-hand side of equalities are usually
   300             extensionalized later by "extensionalize_term", we don't penalize
   301             them here. *)
   302          ? add_pconst_to_table true (abs_name, PType (order_of_type T + 1, [])))
   303         #> fold (do_term false) (t' :: ts)
   304       | (_, ts) => fold (do_term false) ts
   305     fun do_quantifier will_surely_be_skolemized abs_T body_t =
   306       do_formula pos body_t
   307       #> (if also_skolems andalso will_surely_be_skolemized then
   308             add_pconst_to_table true
   309                 (legacy_gensym skolem_prefix, PType (order_of_type abs_T, []))
   310           else
   311             I)
   312     and do_term_or_formula ext_arg T =
   313       if T = HOLogic.boolT then do_formula NONE else do_term ext_arg
   314     and do_formula pos t =
   315       case t of
   316         Const (@{const_name all}, _) $ Abs (_, T, t') =>
   317         do_quantifier (pos = SOME false) T t'
   318       | @{const "==>"} $ t1 $ t2 =>
   319         do_formula (flip pos) t1 #> do_formula pos t2
   320       | Const (@{const_name "=="}, Type (_, [T, _])) $ t1 $ t2 =>
   321         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   322       | @{const Trueprop} $ t1 => do_formula pos t1
   323       | @{const False} => I
   324       | @{const True} => I
   325       | @{const Not} $ t1 => do_formula (flip pos) t1
   326       | Const (@{const_name All}, _) $ Abs (_, T, t') =>
   327         do_quantifier (pos = SOME false) T t'
   328       | Const (@{const_name Ex}, _) $ Abs (_, T, t') =>
   329         do_quantifier (pos = SOME true) T t'
   330       | @{const HOL.conj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   331       | @{const HOL.disj} $ t1 $ t2 => fold (do_formula pos) [t1, t2]
   332       | @{const HOL.implies} $ t1 $ t2 =>
   333         do_formula (flip pos) t1 #> do_formula pos t2
   334       | Const (@{const_name HOL.eq}, Type (_, [T, _])) $ t1 $ t2 =>
   335         do_term_or_formula false T t1 #> do_term_or_formula true T t2
   336       | Const (@{const_name If}, Type (_, [_, Type (_, [T, _])]))
   337         $ t1 $ t2 $ t3 =>
   338         do_formula NONE t1 #> fold (do_term_or_formula false T) [t2, t3]
   339       | Const (@{const_name Ex1}, _) $ Abs (_, T, t') =>
   340         do_quantifier (is_some pos) T t'
   341       | Const (@{const_name Ball}, _) $ t1 $ Abs (_, T, t') =>
   342         do_quantifier (pos = SOME false) T
   343                       (HOLogic.mk_imp (incr_boundvars 1 t1 $ Bound 0, t'))
   344       | Const (@{const_name Bex}, _) $ t1 $ Abs (_, T, t') =>
   345         do_quantifier (pos = SOME true) T
   346                       (HOLogic.mk_conj (incr_boundvars 1 t1 $ Bound 0, t'))
   347       | (t0 as Const (_, @{typ bool})) $ t1 =>
   348         do_term false t0 #> do_formula pos t1  (* theory constant *)
   349       | _ => do_term false t
   350   in do_formula pos end
   351 
   352 (*Inserts a dummy "constant" referring to the theory name, so that relevance
   353   takes the given theory into account.*)
   354 fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}
   355                      : relevance_fudge) thy_name t =
   356   if exists (curry (op <) 0.0) [theory_const_rel_weight,
   357                                 theory_const_irrel_weight] then
   358     Const (thy_name ^ theory_const_suffix, @{typ bool}) $ t
   359   else
   360     t
   361 
   362 fun theory_const_prop_of fudge th =
   363   theory_constify fudge (Context.theory_name (theory_of_thm th)) (prop_of th)
   364 
   365 (**** Constant / Type Frequencies ****)
   366 
   367 (* A two-dimensional symbol table counts frequencies of constants. It's keyed
   368    first by constant name and second by its list of type instantiations. For the
   369    latter, we need a linear ordering on "pattern list". *)
   370 
   371 fun pattern_ord p =
   372   case p of
   373     (PVar, PVar) => EQUAL
   374   | (PVar, PApp _) => LESS
   375   | (PApp _, PVar) => GREATER
   376   | (PApp q1, PApp q2) =>
   377     prod_ord fast_string_ord (dict_ord pattern_ord) (q1, q2)
   378 fun ptype_ord (PType p, PType q) =
   379   prod_ord (dict_ord pattern_ord) int_ord (swap p, swap q)
   380 
   381 structure PType_Tab = Table(type key = ptype val ord = ptype_ord)
   382 
   383 fun count_fact_consts thy fudge =
   384   let
   385     fun do_const const (s, T) ts =
   386       (* Two-dimensional table update. Constant maps to types maps to count. *)
   387       PType_Tab.map_default (rich_ptype thy const (s, T), 0) (Integer.add 1)
   388       |> Symtab.map_default (s, PType_Tab.empty)
   389       #> fold do_term ts
   390     and do_term t =
   391       case strip_comb t of
   392         (Const x, ts) => do_const true x ts
   393       | (Free x, ts) => do_const false x ts
   394       | (Abs (_, _, t'), ts) => fold do_term (t' :: ts)
   395       | (_, ts) => fold do_term ts
   396   in do_term o theory_const_prop_of fudge o snd end
   397 
   398 
   399 (**** Actual Filtering Code ****)
   400 
   401 fun pow_int _ 0 = 1.0
   402   | pow_int x 1 = x
   403   | pow_int x n = if n > 0 then x * pow_int x (n - 1) else pow_int x (n + 1) / x
   404 
   405 (*The frequency of a constant is the sum of those of all instances of its type.*)
   406 fun pconst_freq match const_tab (c, ps) =
   407   PType_Tab.fold (fn (qs, m) => match (ps, qs) ? Integer.add m)
   408                  (the (Symtab.lookup const_tab c)) 0
   409 
   410 
   411 (* A surprising number of theorems contain only a few significant constants.
   412    These include all induction rules, and other general theorems. *)
   413 
   414 (* "log" seems best in practice. A constant function of one ignores the constant
   415    frequencies. Rare constants give more points if they are relevant than less
   416    rare ones. *)
   417 fun rel_weight_for _ freq = 1.0 + 2.0 / Math.ln (Real.fromInt freq + 1.0)
   418 
   419 (* Irrelevant constants are treated differently. We associate lower penalties to
   420    very rare constants and very common ones -- the former because they can't
   421    lead to the inclusion of too many new facts, and the latter because they are
   422    so common as to be of little interest. *)
   423 fun irrel_weight_for ({worse_irrel_freq, higher_order_irrel_weight, ...}
   424                       : relevance_fudge) order freq =
   425   let val (k, x) = worse_irrel_freq |> `Real.ceil in
   426     (if freq < k then Math.ln (Real.fromInt (freq + 1)) / Math.ln x
   427      else rel_weight_for order freq / rel_weight_for order k)
   428     * pow_int higher_order_irrel_weight (order - 1)
   429   end
   430 
   431 fun multiplier_for_const_name local_const_multiplier s =
   432   if String.isSubstring "." s then 1.0 else local_const_multiplier
   433 
   434 (* Computes a constant's weight, as determined by its frequency. *)
   435 fun generic_pconst_weight local_const_multiplier abs_weight skolem_weight
   436                           theory_const_weight chained_const_weight weight_for f
   437                           const_tab chained_const_tab (c as (s, PType (m, _))) =
   438   if s = abs_name then
   439     abs_weight
   440   else if String.isPrefix skolem_prefix s then
   441     skolem_weight
   442   else if String.isSuffix theory_const_suffix s then
   443     theory_const_weight
   444   else
   445     multiplier_for_const_name local_const_multiplier s
   446     * weight_for m (pconst_freq (match_ptype o f) const_tab c)
   447     |> (if chained_const_weight < 1.0 andalso
   448            pconst_hyper_mem I chained_const_tab c then
   449           curry (op *) chained_const_weight
   450         else
   451           I)
   452 
   453 fun rel_pconst_weight ({local_const_multiplier, abs_rel_weight,
   454                         theory_const_rel_weight, ...} : relevance_fudge)
   455                       const_tab =
   456   generic_pconst_weight local_const_multiplier abs_rel_weight 0.0
   457                         theory_const_rel_weight 0.0 rel_weight_for I const_tab
   458                         Symtab.empty
   459 
   460 fun irrel_pconst_weight (fudge as {local_const_multiplier, abs_irrel_weight,
   461                                    skolem_irrel_weight,
   462                                    theory_const_irrel_weight,
   463                                    chained_const_irrel_weight, ...})
   464                         const_tab chained_const_tab =
   465   generic_pconst_weight local_const_multiplier abs_irrel_weight
   466                         skolem_irrel_weight theory_const_irrel_weight
   467                         chained_const_irrel_weight (irrel_weight_for fudge) swap
   468                         const_tab chained_const_tab
   469 
   470 fun locality_bonus ({intro_bonus, ...} : relevance_fudge) Intro = intro_bonus
   471   | locality_bonus {elim_bonus, ...} Elim = elim_bonus
   472   | locality_bonus {simp_bonus, ...} Simp = simp_bonus
   473   | locality_bonus {local_bonus, ...} Local = local_bonus
   474   | locality_bonus {assum_bonus, ...} Assum = assum_bonus
   475   | locality_bonus {chained_bonus, ...} Chained = chained_bonus
   476   | locality_bonus _ _ = 0.0
   477 
   478 fun is_odd_const_name s =
   479   s = abs_name orelse String.isPrefix skolem_prefix s orelse
   480   String.isSuffix theory_const_suffix s
   481 
   482 fun fact_weight fudge loc const_tab relevant_consts chained_consts fact_consts =
   483   case fact_consts |> List.partition (pconst_hyper_mem I relevant_consts)
   484                    ||> filter_out (pconst_hyper_mem swap relevant_consts) of
   485     ([], _) => 0.0
   486   | (rel, irrel) =>
   487     if forall (forall (is_odd_const_name o fst)) [rel, irrel] then
   488       0.0
   489     else
   490       let
   491         val irrel = irrel |> filter_out (pconst_mem swap rel)
   492         val rel_weight =
   493           0.0 |> fold (curry (op +) o rel_pconst_weight fudge const_tab) rel
   494         val irrel_weight =
   495           ~ (locality_bonus fudge loc)
   496           |> fold (curry (op +)
   497                    o irrel_pconst_weight fudge const_tab chained_consts) irrel
   498         val res = rel_weight / (rel_weight + irrel_weight)
   499       in if Real.isFinite res then res else 0.0 end
   500 
   501 fun pconsts_in_fact thy is_built_in_const t =
   502   Symtab.fold (fn (s, pss) => fold (cons o pair s) pss)
   503               (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
   504                                                    (SOME true) t) []
   505 
   506 fun pair_consts_fact thy is_built_in_const fudge fact =
   507   case fact |> snd |> theory_const_prop_of fudge
   508             |> pconsts_in_fact thy is_built_in_const of
   509     [] => NONE
   510   | consts => SOME ((fact, consts), NONE)
   511 
   512 val const_names_in_fact = map fst ooo pconsts_in_fact
   513 
   514 type annotated_thm =
   515   (((unit -> string) * locality) * thm) * (string * ptype) list
   516 
   517 fun take_most_relevant ctxt max_relevant remaining_max
   518         ({max_imperfect, max_imperfect_exp, ...} : relevance_fudge)
   519         (candidates : (annotated_thm * real) list) =
   520   let
   521     val max_imperfect =
   522       Real.ceil (Math.pow (max_imperfect,
   523                     Math.pow (Real.fromInt remaining_max
   524                               / Real.fromInt max_relevant, max_imperfect_exp)))
   525     val (perfect, imperfect) =
   526       candidates |> sort (Real.compare o swap o pairself snd)
   527                  |> take_prefix (fn (_, w) => w > 0.99999)
   528     val ((accepts, more_rejects), rejects) =
   529       chop max_imperfect imperfect |>> append perfect |>> chop remaining_max
   530   in
   531     trace_msg ctxt (fn () =>
   532         "Actually passed (" ^ string_of_int (length accepts) ^ " of " ^
   533         string_of_int (length candidates) ^ "): " ^
   534         (accepts |> map (fn ((((name, _), _), _), weight) =>
   535                             name () ^ " [" ^ Real.toString weight ^ "]")
   536                  |> commas));
   537     (accepts, more_rejects @ rejects)
   538   end
   539 
   540 fun if_empty_replace_with_locality thy is_built_in_const facts loc tab =
   541   if Symtab.is_empty tab then
   542     Symtab.empty
   543     |> fold (add_pconsts_in_term thy is_built_in_const false (SOME false))
   544             (map_filter (fn ((_, loc'), th) =>
   545                             if loc' = loc then SOME (prop_of th) else NONE)
   546                         facts)
   547   else
   548     tab
   549 
   550 fun consider_arities is_built_in_const th =
   551   let
   552     fun aux _ _ NONE = NONE
   553       | aux t args (SOME tab) =
   554         case t of
   555           t1 $ t2 => SOME tab |> aux t1 (t2 :: args) |> aux t2 []
   556         | Const (x as (s, _)) =>
   557           (if is_built_in_const x args |> fst then
   558              SOME tab
   559            else case Symtab.lookup tab s of
   560              NONE => SOME (Symtab.update (s, length args) tab)
   561            | SOME n => if n = length args then SOME tab else NONE)
   562         | _ => SOME tab
   563   in aux (prop_of th) [] end
   564 
   565 (* FIXME: This is currently only useful for polymorphic type encodings. *)
   566 fun could_benefit_from_ext is_built_in_const facts =
   567   fold (consider_arities is_built_in_const o snd) facts (SOME Symtab.empty)
   568   |> is_none
   569 
   570 (* High enough so that it isn't wrongly considered as very relevant (e.g., for E
   571    weights), but low enough so that it is unlikely to be truncated away if few
   572    facts are included. *)
   573 val special_fact_index = 75
   574 
   575 fun relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   576         (fudge as {threshold_divisor, ridiculous_threshold, ...})
   577         ({add, del, ...} : relevance_override) facts chained_ts hyp_ts concl_t =
   578   let
   579     val thy = Proof_Context.theory_of ctxt
   580     val const_tab = fold (count_fact_consts thy fudge) facts Symtab.empty
   581     val add_pconsts = add_pconsts_in_term thy is_built_in_const false o SOME
   582     val chained_const_tab = Symtab.empty |> fold (add_pconsts true) chained_ts
   583     val goal_const_tab =
   584       Symtab.empty |> fold (add_pconsts true) hyp_ts
   585                    |> add_pconsts false concl_t
   586       |> (fn tab => if Symtab.is_empty tab then chained_const_tab else tab)
   587       |> fold (if_empty_replace_with_locality thy is_built_in_const facts)
   588               [Chained, Assum, Local]
   589     val add_ths = Attrib.eval_thms ctxt add
   590     val del_ths = Attrib.eval_thms ctxt del
   591     val facts = facts |> filter_out (member Thm.eq_thm_prop del_ths o snd)
   592     fun iter j remaining_max threshold rel_const_tab hopeless hopeful =
   593       let
   594         fun relevant [] _ [] =
   595             (* Nothing has been added this iteration. *)
   596             if j = 0 andalso threshold >= ridiculous_threshold then
   597               (* First iteration? Try again. *)
   598               iter 0 max_relevant (threshold / threshold_divisor) rel_const_tab
   599                    hopeless hopeful
   600             else
   601               []
   602           | relevant candidates rejects [] =
   603             let
   604               val (accepts, more_rejects) =
   605                 take_most_relevant ctxt max_relevant remaining_max fudge
   606                                    candidates
   607               val rel_const_tab' =
   608                 rel_const_tab
   609                 |> fold (add_pconst_to_table false) (maps (snd o fst) accepts)
   610               fun is_dirty (c, _) =
   611                 Symtab.lookup rel_const_tab' c <> Symtab.lookup rel_const_tab c
   612               val (hopeful_rejects, hopeless_rejects) =
   613                  (rejects @ hopeless, ([], []))
   614                  |-> fold (fn (ax as (_, consts), old_weight) =>
   615                               if exists is_dirty consts then
   616                                 apfst (cons (ax, NONE))
   617                               else
   618                                 apsnd (cons (ax, old_weight)))
   619                  |>> append (more_rejects
   620                              |> map (fn (ax as (_, consts), old_weight) =>
   621                                         (ax, if exists is_dirty consts then NONE
   622                                              else SOME old_weight)))
   623               val threshold =
   624                 1.0 - (1.0 - threshold)
   625                       * Math.pow (decay, Real.fromInt (length accepts))
   626               val remaining_max = remaining_max - length accepts
   627             in
   628               trace_msg ctxt (fn () => "New or updated constants: " ^
   629                   commas (rel_const_tab' |> Symtab.dest
   630                           |> subtract (op =) (rel_const_tab |> Symtab.dest)
   631                           |> map string_for_hyper_pconst));
   632               map (fst o fst) accepts @
   633               (if remaining_max = 0 then
   634                  []
   635                else
   636                  iter (j + 1) remaining_max threshold rel_const_tab'
   637                       hopeless_rejects hopeful_rejects)
   638             end
   639           | relevant candidates rejects
   640                      (((ax as (((_, loc), _), fact_consts)), cached_weight)
   641                       :: hopeful) =
   642             let
   643               val weight =
   644                 case cached_weight of
   645                   SOME w => w
   646                 | NONE => fact_weight fudge loc const_tab rel_const_tab
   647                                       chained_const_tab fact_consts
   648             in
   649               if weight >= threshold then
   650                 relevant ((ax, weight) :: candidates) rejects hopeful
   651               else
   652                 relevant candidates ((ax, weight) :: rejects) hopeful
   653             end
   654         in
   655           trace_msg ctxt (fn () =>
   656               "ITERATION " ^ string_of_int j ^ ": current threshold: " ^
   657               Real.toString threshold ^ ", constants: " ^
   658               commas (rel_const_tab |> Symtab.dest
   659                       |> filter (curry (op <>) [] o snd)
   660                       |> map string_for_hyper_pconst));
   661           relevant [] [] hopeful
   662         end
   663     fun prepend_facts ths accepts =
   664       ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   665        (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   666       |> take max_relevant
   667     fun insert_into_facts accepts [] = accepts
   668       | insert_into_facts accepts ths =
   669         let
   670           val add = facts |> filter (member Thm.eq_thm_prop ths o snd)
   671           val (bef, after) =
   672             accepts |> filter_out (member Thm.eq_thm_prop ths o snd)
   673                     |> take (max_relevant - length add)
   674                     |> chop special_fact_index
   675         in bef @ add @ after end
   676     fun insert_special_facts accepts =
   677        [] |> could_benefit_from_ext is_built_in_const accepts ? cons @{thm ext}
   678           |> insert_into_facts accepts
   679   in
   680     facts |> map_filter (pair_consts_fact thy is_built_in_const fudge)
   681           |> iter 0 max_relevant threshold0 goal_const_tab []
   682           |> not (null add_ths) ? prepend_facts add_ths
   683           |> insert_special_facts
   684           |> tap (fn accepts => trace_msg ctxt (fn () =>
   685                       "Total relevant: " ^ string_of_int (length accepts)))
   686   end
   687 
   688 
   689 (***************************************************************)
   690 (* Retrieving and filtering lemmas                             *)
   691 (***************************************************************)
   692 
   693 (*** retrieve lemmas and filter them ***)
   694 
   695 (*Reject theorems with names like "List.filter.filter_list_def" or
   696   "Accessible_Part.acc.defs", as these are definitions arising from packages.*)
   697 fun is_package_def a =
   698   let val names = Long_Name.explode a in
   699     (length names > 2 andalso not (hd names = "local") andalso
   700      String.isSuffix "_def" a) orelse String.isSuffix "_defs" a
   701   end
   702 
   703 fun uniquify xs =
   704   Termtab.fold (cons o snd)
   705                (fold (Termtab.update o `(prop_of o snd)) xs Termtab.empty) []
   706 
   707 (* FIXME: put other record thms here, or declare as "no_atp" *)
   708 fun multi_base_blacklist ctxt ho_atp =
   709   ["defs", "select_defs", "update_defs", "split", "splits", "split_asm",
   710    "cases", "ext_cases", "eq.simps", "eq.refl", "nchotomy", "case_cong",
   711    "weak_case_cong"]
   712   |> not (ho_atp orelse (Config.get ctxt instantiate_inducts)) ?
   713         append ["induct", "inducts"]
   714   |> map (prefix ".")
   715 
   716 val max_lambda_nesting = 3 (*only applies if not ho_atp*)
   717 
   718 fun term_has_too_many_lambdas max (t1 $ t2) =
   719     exists (term_has_too_many_lambdas max) [t1, t2]
   720   | term_has_too_many_lambdas max (Abs (_, _, t)) =
   721     max = 0 orelse term_has_too_many_lambdas (max - 1) t
   722   | term_has_too_many_lambdas _ _ = false
   723 
   724 (* Don't count nested lambdas at the level of formulas, since they are
   725    quantifiers. *)
   726 fun formula_has_too_many_lambdas true _ _ = false (*i.e. ho_atp*)
   727   | formula_has_too_many_lambdas _ Ts (Abs (_, T, t)) =
   728       formula_has_too_many_lambdas false (T :: Ts) t
   729   | formula_has_too_many_lambdas _ Ts t =
   730     if member (op =) [HOLogic.boolT, propT] (fastype_of1 (Ts, t)) then
   731       exists (formula_has_too_many_lambdas false Ts) (#2 (strip_comb t))
   732     else
   733       term_has_too_many_lambdas max_lambda_nesting t
   734 
   735 (* The max apply depth of any "metis" call in "Metis_Examples" (on 2007-10-31)
   736    was 11. *)
   737 val max_apply_depth = 15
   738 
   739 fun apply_depth (f $ t) = Int.max (apply_depth f, apply_depth t + 1)
   740   | apply_depth (Abs (_, _, t)) = apply_depth t
   741   | apply_depth _ = 0
   742 
   743 fun is_formula_too_complex ho_atp t =
   744   apply_depth t > max_apply_depth orelse formula_has_too_many_lambdas ho_atp [] t
   745 
   746 (* FIXME: Extend to "Meson" and "Metis" *)
   747 val exists_sledgehammer_const =
   748   exists_Const (fn (s, _) => String.isPrefix sledgehammer_prefix s)
   749 
   750 (* FIXME: make more reliable *)
   751 val exists_low_level_class_const =
   752   exists_Const (fn (s, _) =>
   753      String.isSubstring (Long_Name.separator ^ "class" ^ Long_Name.separator) s)
   754 
   755 fun is_metastrange_theorem th =
   756   case head_of (concl_of th) of
   757     Const (s, _) => (s <> @{const_name Trueprop} andalso
   758                      s <> @{const_name "=="})
   759   | _ => false
   760 
   761 fun is_that_fact th =
   762   String.isSuffix (Long_Name.separator ^ Obtain.thatN) (Thm.get_name_hint th)
   763   andalso exists_subterm (fn Free (s, _) => s = Name.skolem Auto_Bind.thesisN
   764                            | _ => false) (prop_of th)
   765 
   766 (**** Predicates to detect unwanted facts (prolific or likely to cause
   767       unsoundness) ****)
   768 
   769 fun is_theorem_bad_for_atps ho_atp exporter thm =
   770   is_metastrange_theorem thm orelse
   771   (not exporter andalso
   772    let val t = prop_of thm in
   773      is_formula_too_complex ho_atp t orelse exists_type type_has_top_sort t orelse
   774      exists_sledgehammer_const t orelse exists_low_level_class_const t orelse
   775      is_that_fact thm
   776    end)
   777 
   778 val crude_zero_vars =
   779   map_aterms (fn Var ((s, _), T) => Var ((s, 0), T) | t => t)
   780   #> map_types (map_type_tvar (fn ((s, _), S) => TVar ((s, 0), S)))
   781 
   782 fun clasimpset_rule_table_of ctxt =
   783   let
   784     val thy = Proof_Context.theory_of ctxt
   785     val atomize = HOLogic.mk_Trueprop o Object_Logic.atomize_term thy
   786     fun add loc g f =
   787       fold (Termtab.update o rpair loc o g o crude_zero_vars o prop_of o f)
   788     val {safeIs, safeEs, hazIs, hazEs, ...} =
   789       ctxt |> claset_of |> Classical.rep_cs
   790     val intros = Item_Net.content safeIs @ Item_Net.content hazIs
   791     val elims =
   792       Item_Net.content safeEs @ Item_Net.content hazEs
   793       |> map Classical.classical_rule
   794     val simps = ctxt |> simpset_of |> dest_ss |> #simps
   795   in
   796     Termtab.empty |> add Intro I I intros
   797                   |> add Elim I I elims
   798                   |> add Simp I snd simps
   799                   |> add Simp atomize snd simps
   800   end
   801 
   802 fun all_facts ctxt ho_atp reserved exporter add_ths chained_ths =
   803   let
   804     val thy = Proof_Context.theory_of ctxt
   805     val global_facts = Global_Theory.facts_of thy
   806     val local_facts = Proof_Context.facts_of ctxt
   807     val named_locals = local_facts |> Facts.dest_static []
   808     val assms = Assumption.all_assms_of ctxt
   809     fun is_assum th = exists (fn ct => prop_of th aconv term_of ct) assms
   810     val is_chained = member Thm.eq_thm_prop chained_ths
   811     val clasimpset_table = clasimpset_rule_table_of ctxt
   812     fun locality_of_theorem global name th =
   813       if String.isSubstring ".induct" name orelse (*FIXME: use structured name*)
   814          String.isSubstring ".inducts" name then
   815         Induction
   816       else if is_chained th then
   817         Chained
   818       else if global then
   819         case Termtab.lookup clasimpset_table (prop_of th) of
   820           SOME loc => loc
   821         | NONE => General
   822       else if is_assum th then
   823         Assum
   824       else
   825         Local
   826     fun is_good_unnamed_local th =
   827       not (Thm.has_name_hint th) andalso
   828       forall (fn (_, ths) => not (member Thm.eq_thm_prop ths th)) named_locals
   829     val unnamed_locals =
   830       union Thm.eq_thm_prop (Facts.props local_facts) chained_ths
   831       |> filter is_good_unnamed_local |> map (pair "" o single)
   832     val full_space =
   833       Name_Space.merge (Facts.space_of global_facts, Facts.space_of local_facts)
   834     fun add_facts global foldx facts =
   835       foldx (fn (name0, ths) =>
   836         if not exporter andalso name0 <> "" andalso
   837            forall (not o member Thm.eq_thm_prop add_ths) ths andalso
   838            (Facts.is_concealed facts name0 orelse
   839             (not (Config.get ctxt ignore_no_atp) andalso
   840              is_package_def name0) orelse
   841             exists (fn s => String.isSuffix s name0)
   842                    (multi_base_blacklist ctxt ho_atp) orelse
   843             String.isSuffix "_def_raw" (* FIXME: crude hack *) name0) then
   844           I
   845         else
   846           let
   847             val multi = length ths > 1
   848             val backquote_thm =
   849               backquote o string_for_term ctxt o close_form o prop_of
   850             fun check_thms a =
   851               case try (Proof_Context.get_thms ctxt) a of
   852                 NONE => false
   853               | SOME ths' => eq_list Thm.eq_thm_prop (ths, ths')
   854           in
   855             pair 1
   856             #> fold (fn th => fn (j, (multis, unis)) =>
   857                         (j + 1,
   858                          if not (member Thm.eq_thm_prop add_ths th) andalso
   859                             is_theorem_bad_for_atps ho_atp exporter th then
   860                            (multis, unis)
   861                          else
   862                            let
   863                              val new =
   864                                (((fn () =>
   865                                  if name0 = "" then
   866                                    th |> backquote_thm
   867                                  else
   868                                    [Facts.extern ctxt facts name0,
   869                                     Name_Space.extern ctxt full_space name0,
   870                                     name0]
   871                                    |> find_first check_thms
   872                                    |> (fn SOME name =>
   873                                           make_name reserved multi j name
   874                                         | NONE => "")),
   875                                 locality_of_theorem global name0 th), th)
   876                            in
   877                              if multi then (new :: multis, unis)
   878                              else (multis, new :: unis)
   879                            end)) ths
   880             #> snd
   881           end)
   882   in
   883     (* The single-name theorems go after the multiple-name ones, so that single
   884        names are preferred when both are available. *)
   885     ([], []) |> add_facts false fold local_facts (unnamed_locals @ named_locals)
   886              |> add_facts true Facts.fold_static global_facts global_facts
   887              |> op @
   888   end
   889 
   890 fun external_frees t =
   891   [] |> Term.add_frees t |> filter_out (can Name.dest_internal o fst)
   892 
   893 fun nearly_all_facts ctxt ho_atp ({add, only, ...} : relevance_override)
   894                      chained_ths hyp_ts concl_t =
   895   let
   896     val thy = Proof_Context.theory_of ctxt
   897     val reserved = reserved_isar_keyword_table ()
   898     val add_ths = Attrib.eval_thms ctxt add
   899     val ind_stmt =
   900       Logic.list_implies (hyp_ts |> filter_out (null o external_frees), concl_t)
   901       |> Object_Logic.atomize_term thy
   902     val ind_stmt_xs = external_frees ind_stmt
   903   in
   904     (if only then
   905        maps (map (fn ((name, loc), th) => ((K name, loc), th))
   906              o fact_from_ref ctxt reserved chained_ths) add
   907      else
   908        all_facts ctxt ho_atp reserved false add_ths chained_ths)
   909     |> Config.get ctxt instantiate_inducts
   910        ? maps (instantiate_if_induct_rule ctxt ind_stmt ind_stmt_xs)
   911     |> (not (Config.get ctxt ignore_no_atp) andalso not only)
   912        ? filter_out (No_ATPs.member ctxt o snd)
   913     |> uniquify
   914   end
   915 
   916 fun relevant_facts ctxt (threshold0, threshold1) max_relevant is_built_in_const
   917                    fudge (override as {only, ...}) chained_ths hyp_ts concl_t
   918                    facts =
   919   let
   920     val thy = Proof_Context.theory_of ctxt
   921     val decay = Math.pow ((1.0 - threshold1) / (1.0 - threshold0),
   922                           1.0 / Real.fromInt (max_relevant + 1))
   923   in
   924     trace_msg ctxt (fn () => "Considering " ^ string_of_int (length facts) ^
   925                              " facts");
   926     (if only orelse threshold1 < 0.0 then
   927        facts
   928      else if threshold0 > 1.0 orelse threshold0 > threshold1 orelse
   929              max_relevant = 0 then
   930        []
   931      else
   932        relevance_filter ctxt threshold0 decay max_relevant is_built_in_const
   933            fudge override facts (chained_ths |> map prop_of) hyp_ts
   934            (concl_t |> theory_constify fudge (Context.theory_name thy)))
   935     |> map (apfst (apfst (fn f => f ())))
   936   end
   937 
   938 end;