weight MaSh constants by frequency
authorblanchet
Wed, 21 Aug 2013 14:54:25 +0200
changeset 5426460801776d8af
parent 54263 f4d2c64c7aa8
child 54265 ea1b62ed5a54
weight MaSh constants by frequency
src/HOL/TPTP/mash_export.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML
     1.1 --- a/src/HOL/TPTP/mash_export.ML	Wed Aug 21 13:48:25 2013 +0200
     1.2 +++ b/src/HOL/TPTP/mash_export.ML	Wed Aug 21 14:54:25 2013 +0200
     1.3 @@ -79,9 +79,11 @@
     1.4        let
     1.5          val name = nickname_of_thm th
     1.6          val feats =
     1.7 -          features_of ctxt prover (theory_of_thm th) stature [prop_of th]
     1.8 +          features_of ctxt prover (theory_of_thm th) 0 Symtab.empty stature
     1.9 +                      [prop_of th]
    1.10 +          |> map fst
    1.11          val s =
    1.12 -          encode_str name ^ ": " ^ encode_features (sort_wrt fst feats) ^ "\n"
    1.13 +          encode_str name ^ ": " ^ encode_strs (sort string_ord feats) ^ "\n"
    1.14        in File.append path s end
    1.15    in List.app do_fact facts end
    1.16  
    1.17 @@ -152,15 +154,19 @@
    1.18      val path = file_name |> Path.explode
    1.19      val facts = all_facts ctxt
    1.20      val (new_facts, old_facts) = facts |> List.partition (has_thys thys o snd)
    1.21 +    val num_old_facts = length old_facts
    1.22      val name_tabs = build_name_tables nickname_of_thm facts
    1.23 -    fun do_fact (j, ((name, (parents, ((_, stature), th))), prevs)) =
    1.24 +    fun do_fact (j,
    1.25 +            (((name, (parents, ((_, stature), th))), prevs), const_tab)) =
    1.26        if in_range range j then
    1.27          let
    1.28            val _ = tracing ("Fact " ^ string_of_int j ^ ": " ^ name)
    1.29 +          val isar_deps = isar_dependencies_of name_tabs th
    1.30 +          val do_query = not (is_bad_query ctxt ho_atp step j th isar_deps)
    1.31            val feats =
    1.32 -            features_of ctxt prover (theory_of_thm th) stature [prop_of th]
    1.33 +            features_of ctxt prover (theory_of_thm th) (num_old_facts + j)
    1.34 +                        const_tab stature [prop_of th]
    1.35              |> sort_wrt fst
    1.36 -          val isar_deps = isar_dependencies_of name_tabs th
    1.37            val access_facts =
    1.38              (if linearize then take (j - 1) new_facts
    1.39               else new_facts |> filter_accessible_from th) @ old_facts
    1.40 @@ -169,12 +175,12 @@
    1.41                                    (SOME isar_deps)
    1.42            val parents = if linearize then prevs else parents
    1.43            val query =
    1.44 -            if is_bad_query ctxt ho_atp step j th isar_deps then
    1.45 -              ""
    1.46 -            else
    1.47 +            if do_query then
    1.48                "? " ^ string_of_int max_suggs ^ " # " ^
    1.49                encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
    1.50                encode_features feats ^ "\n"
    1.51 +            else
    1.52 +              ""
    1.53            val update =
    1.54              "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
    1.55              encode_strs (map fst feats) ^ "; " ^ marker ^ " " ^
    1.56 @@ -187,8 +193,14 @@
    1.57                  |> map (`(nickname_of_thm o snd o snd))
    1.58      val hd_prevs =
    1.59        map (nickname_of_thm o snd) (the_list (try List.last old_facts))
    1.60 -    val prevss = fst (split_last (hd_prevs :: map (single o fst) new_facts))
    1.61 -    val lines = Par_List.map do_fact (tag_list 1 (new_facts ~~ prevss))
    1.62 +    val prevss = hd_prevs :: map (single o fst) new_facts |> split_last |> fst
    1.63 +    val hd_const_tabs =
    1.64 +      fold (add_const_counts o prop_of o snd) old_facts Symtab.empty
    1.65 +    val (const_tabs, _) =
    1.66 +      fold_map (`I oo add_const_counts o prop_of o snd o snd o snd) new_facts
    1.67 +               hd_const_tabs
    1.68 +    val lines =
    1.69 +      Par_List.map do_fact (tag_list 1 (new_facts ~~ prevss ~~ const_tabs))
    1.70    in File.write_list path lines end
    1.71  
    1.72  fun generate_isar_commands ctxt prover =
     2.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed Aug 21 13:48:25 2013 +0200
     2.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed Aug 21 14:54:25 2013 +0200
     2.3 @@ -62,8 +62,8 @@
     2.4    val run_prover_for_mash :
     2.5      Proof.context -> params -> string -> fact list -> thm -> prover_result
     2.6    val features_of :
     2.7 -    Proof.context -> string -> theory -> stature -> term list
     2.8 -    -> (string * real) list
     2.9 +    Proof.context -> string -> theory -> int -> int Symtab.table -> stature
    2.10 +    -> term list -> (string * real) list
    2.11    val trim_dependencies : string list -> string list option
    2.12    val isar_dependencies_of :
    2.13      string Symtab.table * string Symtab.table -> thm -> string list
    2.14 @@ -78,6 +78,7 @@
    2.15    val find_mash_suggestions :
    2.16      Proof.context -> int -> string list -> ('b * thm) list -> ('b * thm) list
    2.17      -> ('b * thm) list -> ('b * thm) list * ('b * thm) list
    2.18 +  val add_const_counts : term -> int Symtab.table -> int Symtab.table
    2.19    val mash_suggested_facts :
    2.20      Proof.context -> params -> string -> int -> term list -> term
    2.21      -> raw_fact list -> fact list * fact list
    2.22 @@ -517,6 +518,15 @@
    2.23  val lams_feature = ("lams", 2.0 (* FUDGE *))
    2.24  val skos_feature = ("skos", 2.0 (* FUDGE *))
    2.25  
    2.26 +fun weighted_const_feature_of num_facts const_tab const_s s =
    2.27 +  ("c" ^ s,
    2.28 +   if num_facts = 0 then
    2.29 +     0.0
    2.30 +   else
    2.31 +     let val count = Symtab.lookup const_tab const_s |> the_default 0 in
    2.32 +       16.0 + (Real.fromInt num_facts / Real.fromInt count)
    2.33 +     end)
    2.34 +
    2.35  (* The following "crude" functions should be progressively phased out, since
    2.36     they create visibility edges that do not exist in Isabelle, resulting in
    2.37     failed lookups later on. *)
    2.38 @@ -588,7 +598,8 @@
    2.39  
    2.40  val max_pat_breadth = 10
    2.41  
    2.42 -fun term_features_of ctxt prover thy_name term_max_depth type_max_depth ts =
    2.43 +fun term_features_of ctxt prover thy_name num_facts const_tab term_max_depth
    2.44 +                     type_max_depth ts =
    2.45    let
    2.46      val thy = Proof_Context.theory_of ctxt
    2.47  
    2.48 @@ -657,9 +668,10 @@
    2.49      fun add_term Ts feature_of = add_term_pats Ts feature_of term_max_depth
    2.50      fun add_subterms Ts t =
    2.51        case strip_comb t of
    2.52 -        (Const (x as (_, T)), args) =>
    2.53 +        (Const (x as (s, T)), args) =>
    2.54          let val (built_in, args) = is_built_in x args in
    2.55 -          (not built_in ? add_term Ts const_feature_of t)
    2.56 +          (not built_in
    2.57 +             ? add_term Ts (weighted_const_feature_of num_facts const_tab s) t)
    2.58            #> add_subtypes T
    2.59            #> fold (add_subterms Ts) args
    2.60          end
    2.61 @@ -678,10 +690,11 @@
    2.62  val type_max_depth = 2
    2.63  
    2.64  (* TODO: Generate type classes for types? *)
    2.65 -fun features_of ctxt prover thy (scope, status) ts =
    2.66 +fun features_of ctxt prover thy num_facts const_tab (scope, status) ts =
    2.67    let val thy_name = Context.theory_name thy in
    2.68      thy_feature_of thy_name ::
    2.69 -    term_features_of ctxt prover thy_name term_max_depth type_max_depth ts
    2.70 +    term_features_of ctxt prover thy_name num_facts const_tab term_max_depth
    2.71 +                     type_max_depth ts
    2.72      |> status <> General ? cons (status_feature_of status)
    2.73      |> scope <> Global ? cons local_feature
    2.74      |> exists (not o is_lambda_free) ts ? cons lams_feature
    2.75 @@ -922,22 +935,26 @@
    2.76                  [unknown_chained, proximity]
    2.77      in (mesh_facts (Thm.eq_thm_prop o pairself snd) max_facts mess, unknown) end
    2.78  
    2.79 -val max_learn_on_query = 500
    2.80 +fun add_const_counts t =
    2.81 +  fold (fn s => Symtab.map_default (s, ~1) (Integer.add 1))
    2.82 +       (Term.add_const_names t [])
    2.83  
    2.84 -fun mash_suggested_facts ctxt ({overlord, learn, ...} : params) prover max_facts
    2.85 -                         hyp_ts concl_t facts =
    2.86 +fun mash_suggested_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
    2.87 +                         concl_t facts =
    2.88    let
    2.89      val thy = Proof_Context.theory_of ctxt
    2.90      val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
    2.91 +    val const_tab = fold (add_const_counts o prop_of o snd) facts Symtab.empty
    2.92      val (access_G, suggs) =
    2.93 -      peek_state ctxt (fn {access_G, num_known_facts, ...} =>
    2.94 +      peek_state ctxt (fn {access_G, ...} =>
    2.95            if Graph.is_empty access_G then
    2.96              (access_G, [])
    2.97            else
    2.98              let
    2.99                val parents = maximal_wrt_access_graph access_G facts
   2.100                val feats =
   2.101 -                features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
   2.102 +                features_of ctxt prover thy (length facts) const_tab
   2.103 +                            (Local, General) (concl_t :: hyp_ts)
   2.104                val hints =
   2.105                  chained |> filter (is_fact_in_graph access_G o snd)
   2.106                          |> map (nickname_of_thm o snd)
   2.107 @@ -989,7 +1006,9 @@
   2.108        let
   2.109          val thy = Proof_Context.theory_of ctxt
   2.110          val name = freshish_name ()
   2.111 -        val feats = features_of ctxt prover thy (Local, General) [t] |> map fst
   2.112 +        val feats =
   2.113 +          features_of ctxt prover thy 0 Symtab.empty (Local, General) [t]
   2.114 +          |> map fst
   2.115        in
   2.116          peek_state ctxt (fn {access_G, ...} =>
   2.117              let
   2.118 @@ -1086,7 +1105,8 @@
   2.119              let
   2.120                val name = nickname_of_thm th
   2.121                val feats =
   2.122 -                features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   2.123 +                features_of ctxt prover (theory_of_thm th) 0 Symtab.empty
   2.124 +                            stature [prop_of th]
   2.125                  |> map fst
   2.126                val deps = deps_of status th |> these
   2.127                val n = n |> not (null deps) ? Integer.add 1
     3.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML	Wed Aug 21 13:48:25 2013 +0200
     3.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mepo.ML	Wed Aug 21 14:54:25 2013 +0200
     3.3 @@ -16,9 +16,6 @@
     3.4    val trace : bool Config.T
     3.5    val pseudo_abs_name : string
     3.6    val pseudo_skolem_prefix : string
     3.7 -  val const_names_in_fact :
     3.8 -    theory -> (string * typ -> term list -> bool * term list) -> term
     3.9 -    -> string list
    3.10    val mepo_suggested_facts :
    3.11      Proof.context -> params -> string -> int -> relevance_fudge option
    3.12      -> term list -> term -> raw_fact list -> fact list
    3.13 @@ -182,8 +179,6 @@
    3.14                (Symtab.empty |> add_pconsts_in_term thy is_built_in_const true
    3.15                                                     (SOME true) t) []
    3.16  
    3.17 -val const_names_in_fact = map fst ooo pconsts_in_fact
    3.18 -
    3.19  (* Inserts a dummy "constant" referring to the theory name, so that relevance
    3.20     takes the given theory into account. *)
    3.21  fun theory_constify ({theory_const_rel_weight, theory_const_irrel_weight, ...}