src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
changeset 49421 b002cc16aa99
parent 49419 0a261b4aa093
child 49422 47fe0ca12fc2
     1.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Fri Jul 20 22:19:46 2012 +0200
     1.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Fri Jul 20 22:19:46 2012 +0200
     1.3 @@ -28,12 +28,12 @@
     1.4    val escape_metas : string list -> string
     1.5    val unescape_meta : string -> string
     1.6    val unescape_metas : string -> string list
     1.7 -  val extract_query : string -> string * string list
     1.8 +  val extract_query : string -> string * (string * real) list
     1.9    val nickname_of : thm -> string
    1.10 -  val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    1.11 +  val suggested_facts :
    1.12 +    (string * 'a) list -> ('b * thm) list -> (('b * thm) * 'a) list
    1.13    val mesh_facts :
    1.14 -    int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    1.15 -  val is_likely_tautology_or_too_meta : thm -> bool
    1.16 +    int -> ((('a * thm) * real) list * ('a * thm) list) list -> ('a * thm) list
    1.17    val theory_ord : theory * theory -> order
    1.18    val thm_ord : thm * thm -> order
    1.19    val goal_of_thm : theory -> thm -> thm
    1.20 @@ -52,13 +52,14 @@
    1.21    val mash_REPROVE :
    1.22      Proof.context -> bool -> (string * string list) list -> unit
    1.23    val mash_QUERY :
    1.24 -    Proof.context -> bool -> int -> string list * string list -> string list
    1.25 +    Proof.context -> bool -> int -> string list * string list
    1.26 +    -> (string * real) list
    1.27    val mash_unlearn : Proof.context -> unit
    1.28    val mash_could_suggest_facts : unit -> bool
    1.29    val mash_can_suggest_facts : Proof.context -> bool
    1.30 -  val mash_suggest_facts :
    1.31 +  val mash_suggested_facts :
    1.32      Proof.context -> params -> string -> int -> term list -> term
    1.33 -    -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    1.34 +    -> ('a * thm) list -> (('a * thm) * real) list * ('a * thm) list
    1.35    val mash_learn_proof :
    1.36      Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    1.37      -> unit
    1.38 @@ -132,9 +133,22 @@
    1.39  val unescape_metas =
    1.40    space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
    1.41  
    1.42 +fun extract_node line =
    1.43 +  case space_explode ":" line of
    1.44 +    [name, parents] => (unescape_meta name, unescape_metas parents)
    1.45 +  | _ => ("", [])
    1.46 +
    1.47 +fun extract_suggestion sugg =
    1.48 +  case space_explode "=" sugg of
    1.49 +    [name, weight] =>
    1.50 +    SOME (unescape_meta name, Real.fromString weight |> the_default 0.0)
    1.51 +  | _ => NONE
    1.52 +
    1.53  fun extract_query line =
    1.54    case space_explode ":" line of
    1.55 -    [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
    1.56 +    [goal, suggs] =>
    1.57 +    (unescape_meta goal,
    1.58 +     map_filter extract_suggestion (space_explode " " suggs))
    1.59    | _ => ("", [])
    1.60  
    1.61  fun parent_of_local_thm th =
    1.62 @@ -165,31 +179,35 @@
    1.63    let
    1.64      fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
    1.65      val tab = Symtab.empty |> fold add_fact facts
    1.66 -  in map_filter (Symtab.lookup tab) suggs end
    1.67 +    fun find_sugg (name, weight) =
    1.68 +      Symtab.lookup tab name |> Option.map (rpair weight)
    1.69 +  in map_filter find_sugg suggs end
    1.70  
    1.71 -(* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
    1.72 -fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
    1.73 +fun sum_avg [] = 0
    1.74 +  | sum_avg xs = Real.ceil (100000.0 * fold (curry (op +)) xs 0.0) div length xs
    1.75  
    1.76 -fun sum_sq_avg [] = 0
    1.77 -  | sum_sq_avg xs =
    1.78 -    Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
    1.79 +fun normalize_scores [] = []
    1.80 +  | normalize_scores ((fact, score) :: tail) =
    1.81 +    (fact, 1.0) :: map (apsnd (curry Real.* (1.0 / score))) tail
    1.82  
    1.83 -fun mesh_facts max_facts [(selected, unknown)] =
    1.84 -    take max_facts selected @ take (max_facts - length selected) unknown
    1.85 +fun mesh_facts max_facts [(sels, unks)] =
    1.86 +    map fst (take max_facts sels) @ take (max_facts - length sels) unks
    1.87    | mesh_facts max_facts mess =
    1.88      let
    1.89 -      val mess = mess |> map (apfst (`length))
    1.90 +      val mess = mess |> map (apfst (normalize_scores #> `length))
    1.91        val fact_eq = Thm.eq_thm o pairself snd
    1.92 +      fun score_at sels = try (nth sels) #> Option.map snd
    1.93        fun score_in fact ((sel_len, sels), unks) =
    1.94 -        case find_index (curry fact_eq fact) sels of
    1.95 +        case find_index (curry fact_eq fact o fst) sels of
    1.96            ~1 => (case find_index (curry fact_eq fact) unks of
    1.97 -                   ~1 => SOME sel_len
    1.98 +                   ~1 => score_at sels sel_len
    1.99                   | _ => NONE)
   1.100 -        | j => SOME j
   1.101 -      fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   1.102 -      val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   1.103 +        | rank => score_at sels rank
   1.104 +      fun weight_of fact = mess |> map_filter (score_in fact) |> sum_avg
   1.105 +      val facts =
   1.106 +        fold (union fact_eq o map fst o take max_facts o snd o fst) mess []
   1.107      in
   1.108 -      facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   1.109 +      facts |> map (`weight_of) |> sort (int_ord o swap o pairself fst)
   1.110              |> map snd |> take max_facts
   1.111      end
   1.112  
   1.113 @@ -198,24 +216,6 @@
   1.114  val type_name_of = prefix "t"
   1.115  val class_name_of = prefix "s"
   1.116  
   1.117 -fun is_likely_tautology_or_too_meta th =
   1.118 -  let
   1.119 -    val is_boring_const = member (op =) atp_widely_irrelevant_consts
   1.120 -    fun is_boring_bool t =
   1.121 -      not (exists_Const (not o is_boring_const o fst) t) orelse
   1.122 -      exists_type (exists_subtype (curry (op =) @{typ prop})) t
   1.123 -    fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   1.124 -      | is_boring_prop (@{const "==>"} $ t $ u) =
   1.125 -        is_boring_prop t andalso is_boring_prop u
   1.126 -      | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   1.127 -        is_boring_prop t andalso is_boring_prop u
   1.128 -      | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   1.129 -        is_boring_bool t andalso is_boring_bool u
   1.130 -      | is_boring_prop _ = true
   1.131 -  in
   1.132 -    is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   1.133 -  end
   1.134 -
   1.135  fun theory_ord p =
   1.136    if Theory.eq_thy p then
   1.137      EQUAL
   1.138 @@ -280,10 +280,11 @@
   1.139            if is_bad_const x args then ""
   1.140            else mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   1.141          | _ => ""
   1.142 +    fun add_pattern depth t =
   1.143 +      case patternify depth t of "" => I | s => insert (op =) s
   1.144      fun add_term_patterns ~1 _ = I
   1.145        | add_term_patterns depth t =
   1.146 -        insert (op =) (patternify depth t)
   1.147 -        #> add_term_patterns (depth - 1) t
   1.148 +        add_pattern depth t #> add_term_patterns (depth - 1) t
   1.149      val add_term = add_term_patterns term_max_depth
   1.150      fun add_patterns t =
   1.151        let val (head, args) = strip_comb t in
   1.152 @@ -327,8 +328,8 @@
   1.153  fun trim_dependencies deps =
   1.154    if length deps <= max_dependencies then SOME deps else NONE
   1.155  
   1.156 -fun isar_dependencies_of all_facts =
   1.157 -  thms_in_proof (SOME all_facts) #> trim_dependencies
   1.158 +fun isar_dependencies_of all_names =
   1.159 +  thms_in_proof (SOME all_names) #> trim_dependencies
   1.160  
   1.161  fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover
   1.162                          auto_level facts all_names th =
   1.163 @@ -349,7 +350,7 @@
   1.164            SOME ((name, status), th) => accum @ [((name, status), th)]
   1.165          | NONE => accum (* shouldn't happen *)
   1.166        val facts =
   1.167 -        facts |> iterative_relevant_facts ctxt params prover
   1.168 +        facts |> mepo_suggested_facts ctxt params prover
   1.169                       (max_facts |> the_default atp_dependency_default_max_fact)
   1.170                       NONE hyp_ts concl_t
   1.171                |> fold (add_isar_dep facts) (these isar_deps)
   1.172 @@ -432,7 +433,7 @@
   1.173    "p " ^ escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
   1.174  
   1.175  fun str_of_query (parents, feats) =
   1.176 -  "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   1.177 +  "? " ^ escape_metas parents ^ "; " ^ escape_metas feats ^ "\n"
   1.178  
   1.179  fun mash_CLEAR ctxt =
   1.180    let val path = mash_model_dir () |> Path.explode in
   1.181 @@ -487,6 +488,13 @@
   1.182                  "Internal error when " ^ when ^ ":\n" ^
   1.183                  ML_Compiler.exn_message exn); def)
   1.184  
   1.185 +fun graph_info G =
   1.186 +  string_of_int (length (Graph.keys G)) ^ " node(s), " ^
   1.187 +  string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^
   1.188 +  " edge(s), " ^
   1.189 +  string_of_int (length (Graph.minimals G)) ^ " minimal, " ^
   1.190 +  string_of_int (length (Graph.maximals G)) ^ " maximal"
   1.191 +
   1.192  type mash_state = {fact_G : unit Graph.T}
   1.193  
   1.194  val empty_state = {fact_G = Graph.empty}
   1.195 @@ -500,26 +508,27 @@
   1.196      let val path = mash_state_path () in
   1.197        (true,
   1.198         case try File.read_lines path of
   1.199 -         SOME (version' :: fact_lines) =>
   1.200 +         SOME (version' :: node_lines) =>
   1.201           let
   1.202             fun add_edge_to name parent =
   1.203 -             Graph.default_node (parent, ())
   1.204 -             #> Graph.add_edge (parent, name)
   1.205 -           fun add_fact_line line =
   1.206 -             case extract_query line of
   1.207 +             Graph.default_node (parent, ()) #> Graph.add_edge (parent, name)
   1.208 +           fun add_node line =
   1.209 +             case extract_node line of
   1.210                 ("", _) => I (* shouldn't happen *)
   1.211               | (name, parents) =>
   1.212 -               Graph.default_node (name, ())
   1.213 -               #> fold (add_edge_to name) parents
   1.214 +               Graph.default_node (name, ()) #> fold (add_edge_to name) parents
   1.215             val fact_G =
   1.216               try_graph ctxt "loading state" Graph.empty (fn () =>
   1.217 -                 Graph.empty |> version' = version
   1.218 -                                ? fold add_fact_line fact_lines)
   1.219 -         in {fact_G = fact_G} end
   1.220 +                 Graph.empty |> version' = version ? fold add_node node_lines)
   1.221 +         in
   1.222 +           trace_msg ctxt (fn () =>
   1.223 +               "Loaded fact graph (" ^ graph_info fact_G ^ ")");
   1.224 +           {fact_G = fact_G}
   1.225 +         end
   1.226         | _ => empty_state)
   1.227      end
   1.228  
   1.229 -fun save {fact_G} =
   1.230 +fun save ctxt {fact_G} =
   1.231    let
   1.232      val path = mash_state_path ()
   1.233      fun fact_line_for name parents =
   1.234 @@ -529,7 +538,8 @@
   1.235        append_fact name (Graph.Keys.dest parents)
   1.236    in
   1.237      File.write path (version ^ "\n");
   1.238 -    Graph.fold append_entry fact_G ()
   1.239 +    Graph.fold append_entry fact_G ();
   1.240 +    trace_msg ctxt (fn () => "Saved fact graph (" ^ graph_info fact_G ^ ")")
   1.241    end
   1.242  
   1.243  val global_state =
   1.244 @@ -538,7 +548,7 @@
   1.245  in
   1.246  
   1.247  fun mash_map ctxt f =
   1.248 -  Synchronized.change global_state (load ctxt ##> (f #> tap save))
   1.249 +  Synchronized.change global_state (load ctxt ##> (f #> tap (save ctxt)))
   1.250  
   1.251  fun mash_get ctxt =
   1.252    Synchronized.change_result global_state (load ctxt #> `snd)
   1.253 @@ -567,9 +577,11 @@
   1.254          if Symtab.defined tab name then
   1.255            let
   1.256              val new = (Graph.all_preds fact_G [name], name)
   1.257 -            fun not_ancestor (_, x) (yp, _) = not (member (op =) yp x)
   1.258 -            val maxs = maxs |> filter (fn max => not_ancestor max new)
   1.259 -            val maxs = maxs |> forall (not_ancestor new) maxs ? cons new
   1.260 +            fun is_ancestor (_, x) (yp, _) = member (op =) yp x
   1.261 +            val maxs = maxs |> filter (fn max => not (is_ancestor max new))
   1.262 +            val maxs =
   1.263 +              if exists (is_ancestor new) maxs then maxs
   1.264 +              else new :: filter_out (fn max => is_ancestor max new) maxs
   1.265            in find_maxes (name :: seen) maxs names end
   1.266          else
   1.267            find_maxes (name :: seen) maxs
   1.268 @@ -585,8 +597,8 @@
   1.269  fun is_fact_in_graph fact_G (_, th) =
   1.270    can (Graph.get_node fact_G) (nickname_of th)
   1.271  
   1.272 -fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   1.273 -                       concl_t facts =
   1.274 +fun mash_suggested_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   1.275 +                         concl_t facts =
   1.276    let
   1.277      val thy = Proof_Context.theory_of ctxt
   1.278      val fact_G = #fact_G (mash_get ctxt)
   1.279 @@ -756,13 +768,13 @@
   1.280              n
   1.281            else
   1.282              let
   1.283 -              fun score_of (_, th) =
   1.284 +              fun priority_of (_, th) =
   1.285                  random_range 0 (1000 * max_dependencies)
   1.286                  - 500 * (th |> isar_dependencies_of all_names
   1.287                              |> Option.map length
   1.288                              |> the_default max_dependencies)
   1.289                val old_facts =
   1.290 -                old_facts |> map (`score_of)
   1.291 +                old_facts |> map (`priority_of)
   1.292                            |> sort (int_ord o pairself fst)
   1.293                            |> map snd
   1.294                val (reps, (n, _, _)) =
   1.295 @@ -850,13 +862,14 @@
   1.296          ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   1.297           (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   1.298          |> take max_facts
   1.299 -      fun iter () =
   1.300 -        iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   1.301 -                                 concl_t facts
   1.302 +      fun mepo () =
   1.303 +        facts |> mepo_suggested_facts ctxt params prover max_facts NONE hyp_ts
   1.304 +                                      concl_t
   1.305 +              |> weight_mepo_facts
   1.306        fun mash () =
   1.307 -        mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   1.308 +        mash_suggested_facts ctxt params prover max_facts hyp_ts concl_t facts
   1.309        val mess =
   1.310 -        [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   1.311 +        [] |> (if fact_filter <> mashN then cons (mepo (), []) else I)
   1.312             |> (if fact_filter <> mepoN then cons (mash ()) else I)
   1.313      in
   1.314        mesh_facts max_facts mess