always remove duplicates in meshing + use weights for Naive Bayes
authorblanchet
Wed, 28 May 2014 03:10:30 +0200
changeset 58438e4074b91b2a6
parent 58437 001ec97c3e59
child 58439 80b7c07e7a73
always remove duplicates in meshing + use weights for Naive Bayes
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     1.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Tue May 27 17:48:11 2014 +0200
     1.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Wed May 28 03:10:30 2014 +0200
     1.3 @@ -526,8 +526,7 @@
     1.4  
     1.5      val num_visible_facts = length visible_facts
     1.6      val get_deps = curry Vector.sub deps_vec
     1.7 -    val syms = map_filter (fn (feat, weight) =>
     1.8 -      Option.map (rpair weight) (Symtab.lookup feat_tab feat)) feats
     1.9 +    val syms = map (apfst (the_default ~1 o Symtab.lookup feat_tab)) feats
    1.10    in
    1.11      trace_msg ctxt (fn () => "MaSh_SML " ^ (if engine = MaSh_SML_kNN then "kNN" else "NB") ^
    1.12        " query " ^ encode_features feats ^ " from {" ^
    1.13 @@ -757,8 +756,8 @@
    1.14    | normalize_scores max_facts xs =
    1.15      map (apsnd (curry Real.* (1.0 / avg (map snd (take max_facts xs))))) xs
    1.16  
    1.17 -fun mesh_facts _ max_facts [(_, (sels, unks))] =
    1.18 -    map fst (take max_facts sels) @ take (max_facts - length sels) unks
    1.19 +fun mesh_facts fact_eq max_facts [(_, (sels, unks))] =
    1.20 +    distinct fact_eq (map fst (take max_facts sels) @ take (max_facts - length sels) unks)
    1.21    | mesh_facts fact_eq max_facts mess =
    1.22      let
    1.23        val mess = mess |> map (apsnd (apfst (normalize_scores max_facts)))
    1.24 @@ -1074,8 +1073,7 @@
    1.25    let
    1.26      fun insert_parent new parents =
    1.27        let val parents = parents |> filter_out (fn p => thm_less_eq (p, new)) in
    1.28 -        parents |> forall (fn p => not (thm_less_eq (new, p))) parents
    1.29 -                   ? cons new
    1.30 +        parents |> forall (fn p => not (thm_less_eq (new, p))) parents ? cons new
    1.31        end
    1.32  
    1.33      fun rechunk seen (rest as th' :: ths) =
    1.34 @@ -1204,9 +1202,9 @@
    1.35      val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
    1.36      val num_facts = length facts
    1.37  
    1.38 -    (* Weights are currently ignored by SML naive Bayes and appear to hurt kNN more than they
    1.39 -       help. *)
    1.40 -    val const_tab = Symtab.empty |> engine = MaSh_Py ? fold (add_const_counts o prop_of o snd) facts
    1.41 +    (* Weights appear to hurt kNN more than they help. *)
    1.42 +    val const_tab = Symtab.empty |> engine <> MaSh_SML_kNN
    1.43 +      ? fold (add_const_counts o prop_of o snd) facts
    1.44  
    1.45      fun fact_has_right_theory (_, th) =
    1.46        thy_name = Context.theory_name (theory_of_thm th)