src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author blanchet
Fri, 20 Jul 2012 22:19:46 +0200
changeset 49409 82fc8c956cdc
parent 49407 ca998fa08cd9
child 49410 85a7fb65507a
permissions -rw-r--r--
fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     2     Author:     Jasmin Blanchette, TU Muenchen
     3 
     4 Sledgehammer's machine-learning-based relevance filter (MaSh).
     5 *)
     6 
     7 signature SLEDGEHAMMER_MASH =
     8 sig
     9   type stature = ATP_Problem_Generate.stature
    10   type fact = Sledgehammer_Fact.fact
    11   type fact_override = Sledgehammer_Fact.fact_override
    12   type params = Sledgehammer_Provers.params
    13   type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    14   type prover_result = Sledgehammer_Provers.prover_result
    15 
    16   val trace : bool Config.T
    17   val MaShN : string
    18   val mepoN : string
    19   val mashN : string
    20   val meshN : string
    21   val unlearnN : string
    22   val learn_isarN : string
    23   val learn_atpN : string
    24   val relearn_isarN : string
    25   val relearn_atpN : string
    26   val fact_filters : string list
    27   val escape_meta : string -> string
    28   val escape_metas : string list -> string
    29   val unescape_meta : string -> string
    30   val unescape_metas : string -> string list
    31   val extract_query : string -> string * string list
    32   val nickname_of : thm -> string
    33   val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    34   val mesh_facts :
    35     int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    36   val is_likely_tautology_or_too_meta : thm -> bool
    37   val theory_ord : theory * theory -> order
    38   val thm_ord : thm * thm -> order
    39   val goal_of_thm : theory -> thm -> thm
    40   val run_prover_for_mash :
    41     Proof.context -> params -> string -> fact list -> thm -> prover_result
    42   val features_of :
    43     Proof.context -> string -> theory -> stature -> term list -> string list
    44   val isar_dependencies_of : unit Symtab.table -> thm -> string list
    45   val atp_dependencies_of :
    46     Proof.context -> params -> string -> bool -> fact list -> unit Symtab.table
    47     -> thm -> string list
    48   val mash_CLEAR : Proof.context -> unit
    49   val mash_ADD :
    50     Proof.context -> bool
    51     -> (string * string list * string list * string list) list -> unit
    52   val mash_QUERY :
    53     Proof.context -> bool -> int -> string list * string list -> string list
    54   val mash_unlearn : Proof.context -> unit
    55   val mash_could_suggest_facts : unit -> bool
    56   val mash_can_suggest_facts : Proof.context -> bool
    57   val mash_suggest_facts :
    58     Proof.context -> params -> string -> int -> term list -> term
    59     -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    60   val mash_learn_proof :
    61     Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    62     -> unit
    63   val mash_learn_facts :
    64     Proof.context -> params -> string -> bool -> bool -> Time.time -> fact list
    65     -> string
    66   val mash_learn : Proof.context -> params -> bool -> unit
    67   val relevant_facts :
    68     Proof.context -> params -> string -> int -> fact_override -> term list
    69     -> term -> fact list -> fact list
    70   val kill_learners : unit -> unit
    71   val running_learners : unit -> unit
    72 end;
    73 
    74 structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
    75 struct
    76 
    77 open ATP_Util
    78 open ATP_Problem_Generate
    79 open Sledgehammer_Util
    80 open Sledgehammer_Fact
    81 open Sledgehammer_Provers
    82 open Sledgehammer_Minimize
    83 open Sledgehammer_MePo
    84 
    85 val trace =
    86   Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
    87 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    88 
    89 val MaShN = "MaSh"
    90 
    91 val mepoN = "mepo"
    92 val mashN = "mash"
    93 val meshN = "mesh"
    94 
    95 val fact_filters = [meshN, mepoN, mashN]
    96 
    97 val unlearnN = "unlearn"
    98 val learn_isarN = "learn_isar"
    99 val learn_atpN = "learn_atp"
   100 val relearn_isarN = "relearn_isar"
   101 val relearn_atpN = "relearn_atp"
   102 
   103 fun mash_home () = getenv "MASH_HOME"
   104 fun mash_model_dir () =
   105   getenv "ISABELLE_HOME_USER" ^ "/mash"
   106   |> tap (Isabelle_System.mkdir o Path.explode)
   107 val mash_state_dir = mash_model_dir
   108 fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode
   109 
   110 
   111 (*** Isabelle helpers ***)
   112 
   113 fun meta_char c =
   114   if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   115      c = #")" orelse c = #"," then
   116     String.str c
   117   else
   118     (* fixed width, in case more digits follow *)
   119     "$" ^ stringN_of_int 3 (Char.ord c)
   120 
   121 fun unmeta_chars accum [] = String.implode (rev accum)
   122   | unmeta_chars accum (#"$" :: d1 :: d2 :: d3 :: cs) =
   123     (case Int.fromString (String.implode [d1, d2, d3]) of
   124        SOME n => unmeta_chars (Char.chr n :: accum) cs
   125      | NONE => "" (* error *))
   126   | unmeta_chars _ (#"$" :: _) = "" (* error *)
   127   | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   128 
   129 val escape_meta = String.translate meta_char
   130 val escape_metas = map escape_meta #> space_implode " "
   131 val unescape_meta = String.explode #> unmeta_chars []
   132 val unescape_metas =
   133   space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   134 
   135 fun extract_query line =
   136   case space_explode ":" line of
   137     [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
   138   | _ => ("", [])
   139 
   140 fun parent_of_local_thm th =
   141   let
   142     val thy = th |> Thm.theory_of_thm
   143     val facts = thy |> Global_Theory.facts_of
   144     val space = facts |> Facts.space_of
   145     fun id_of s = #id (Name_Space.the_entry space s)
   146     fun max_id (s', _) (s, id) =
   147       let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   148   in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   149 
   150 val local_prefix = "local" ^ Long_Name.separator
   151 
   152 fun nickname_of th =
   153   if Thm.has_name_hint th then
   154     let val hint = Thm.get_name_hint th in
   155       (* FIXME: There must be a better way to detect local facts. *)
   156       case try (unprefix local_prefix) hint of
   157         SOME suf =>
   158         parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suf
   159       | NONE => hint
   160     end
   161   else
   162     backquote_thm th
   163 
   164 fun suggested_facts suggs facts =
   165   let
   166     fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   167     val tab = Symtab.empty |> fold add_fact facts
   168   in map_filter (Symtab.lookup tab) suggs end
   169 
   170 (* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
   171 fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
   172 
   173 fun sum_sq_avg [] = 0
   174   | sum_sq_avg xs =
   175     Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
   176 
   177 fun mesh_facts max_facts [(selected, unknown)] =
   178     take max_facts selected @ take (max_facts - length selected) unknown
   179   | mesh_facts max_facts mess =
   180     let
   181       val mess = mess |> map (apfst (`length))
   182       val fact_eq = Thm.eq_thm o pairself snd
   183       fun score_in fact ((sel_len, sels), unks) =
   184         case find_index (curry fact_eq fact) sels of
   185           ~1 => (case find_index (curry fact_eq fact) unks of
   186                    ~1 => SOME sel_len
   187                  | _ => NONE)
   188         | j => SOME j
   189       fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   190       val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   191     in
   192       facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   193             |> map snd |> take max_facts
   194     end
   195 
   196 val thy_feature_prefix = "y_"
   197 
   198 val thy_feature_name_of = prefix thy_feature_prefix
   199 val const_name_of = prefix const_prefix
   200 val type_name_of = prefix type_const_prefix
   201 val class_name_of = prefix class_prefix
   202 
   203 fun is_likely_tautology_or_too_meta th =
   204   let
   205     val is_boring_const = member (op =) atp_widely_irrelevant_consts
   206     fun is_boring_bool t =
   207       not (exists_Const (not o is_boring_const o fst) t) orelse
   208       exists_type (exists_subtype (curry (op =) @{typ prop})) t
   209     fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   210       | is_boring_prop (@{const "==>"} $ t $ u) =
   211         is_boring_prop t andalso is_boring_prop u
   212       | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   213         is_boring_prop t andalso is_boring_prop u
   214       | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   215         is_boring_bool t andalso is_boring_bool u
   216       | is_boring_prop _ = true
   217   in
   218     is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   219   end
   220 
   221 fun theory_ord p =
   222   if Theory.eq_thy p then
   223     EQUAL
   224   else if Theory.subthy p then
   225     LESS
   226   else if Theory.subthy (swap p) then
   227     GREATER
   228   else case int_ord (pairself (length o Theory.ancestors_of) p) of
   229     EQUAL => string_ord (pairself Context.theory_name p)
   230   | order => order
   231 
   232 val thm_ord = theory_ord o pairself theory_of_thm
   233 
   234 val freezeT = Type.legacy_freeze_type
   235 
   236 fun freeze (t $ u) = freeze t $ freeze u
   237   | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   238   | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   239   | freeze (Const (s, T)) = Const (s, freezeT T)
   240   | freeze (Free (s, T)) = Free (s, freezeT T)
   241   | freeze t = t
   242 
   243 fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   244 
   245 fun run_prover_for_mash ctxt params prover facts goal =
   246   let
   247     val problem =
   248       {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   249        facts = facts |> map (apfst (apfst (fn name => name ())))
   250                      |> map Untranslated_Fact}
   251   in
   252     get_minimizing_prover ctxt MaSh (K ()) prover params (K (K (K "")))
   253                           problem
   254   end
   255 
   256 val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   257 
   258 fun interesting_terms_types_and_classes ctxt prover term_max_depth
   259                                         type_max_depth ts =
   260   let
   261     fun is_bad_const (x as (s, _)) args =
   262       member (op =) atp_logical_consts s orelse
   263       fst (is_built_in_const_for_prover ctxt prover x args)
   264     fun add_classes @{sort type} = I
   265       | add_classes S = union (op =) (map class_name_of S)
   266     fun do_add_type (Type (s, Ts)) =
   267         (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   268         #> fold do_add_type Ts
   269       | do_add_type (TFree (_, S)) = add_classes S
   270       | do_add_type (TVar (_, S)) = add_classes S
   271     fun add_type T = type_max_depth >= 0 ? do_add_type T
   272     fun mk_app s args =
   273       if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   274       else s
   275     fun patternify ~1 _ = ""
   276       | patternify depth t =
   277         case strip_comb t of
   278           (Const (s, _), args) =>
   279           mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   280         | _ => ""
   281     fun add_term_patterns ~1 _ = I
   282       | add_term_patterns depth t =
   283         insert (op =) (patternify depth t)
   284         #> add_term_patterns (depth - 1) t
   285     val add_term = add_term_patterns term_max_depth
   286     fun add_patterns t =
   287       let val (head, args) = strip_comb t in
   288         (case head of
   289            Const (x as (_, T)) =>
   290            not (is_bad_const x args) ? (add_term t #> add_type T)
   291          | Free (_, T) => add_type T
   292          | Var (_, T) => add_type T
   293          | Abs (_, T, body) => add_type T #> add_patterns body
   294          | _ => I)
   295         #> fold add_patterns args
   296       end
   297   in [] |> fold add_patterns ts end
   298 
   299 fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   300 
   301 val term_max_depth = 1
   302 val type_max_depth = 1
   303 
   304 (* TODO: Generate type classes for types? *)
   305 fun features_of ctxt prover thy (scope, status) ts =
   306   thy_feature_name_of (Context.theory_name thy) ::
   307   interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   308                                       ts
   309   |> forall is_lambda_free ts ? cons "no_lams"
   310   |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   311   |> scope <> Global ? cons "local"
   312   |> (case status of
   313         General => I
   314       | Induction => cons "induction"
   315       | Intro => cons "intro"
   316       | Inductive => cons "inductive"
   317       | Elim => cons "elim"
   318       | Simp => cons "simp"
   319       | Def => cons "def")
   320 
   321 fun isar_dependencies_of all_facts = thms_in_proof (SOME all_facts)
   322 
   323 val atp_dep_default_max_fact = 50
   324 
   325 fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto
   326                         facts all_names th =
   327   case isar_dependencies_of all_names th of
   328     [] => []
   329   | isar_deps =>
   330     let
   331       val thy = Proof_Context.theory_of ctxt
   332       val goal = goal_of_thm thy th
   333       val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1
   334       val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS)
   335       fun fix_name ((_, stature), th) = ((fn () => nickname_of th, stature), th)
   336       fun is_dep dep (_, th) = nickname_of th = dep
   337       fun add_isar_dep facts dep accum =
   338         if exists (is_dep dep) accum then
   339           accum
   340         else case find_first (is_dep dep) facts of
   341           SOME ((name, status), th) => accum @ [((name, status), th)]
   342         | NONE => accum (* shouldn't happen *)
   343       val facts =
   344         facts |> iterative_relevant_facts ctxt params prover
   345                      (max_facts |> the_default atp_dep_default_max_fact) NONE
   346                      hyp_ts concl_t
   347               |> fold (add_isar_dep facts) isar_deps
   348               |> map fix_name
   349     in
   350       if verbose andalso not auto then
   351         let val num_facts = length facts in
   352           "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^
   353           " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^
   354           "."
   355           |> Output.urgent_message
   356         end
   357       else
   358         ();
   359       case run_prover_for_mash ctxt params prover facts goal of
   360         {outcome = NONE, used_facts, ...} =>
   361         (if verbose andalso not auto then
   362            let val num_facts = length used_facts in
   363              "Found proof with " ^ string_of_int num_facts ^ " fact" ^
   364              plural_s num_facts ^ "."
   365              |> Output.urgent_message
   366            end
   367          else
   368            ();
   369          used_facts |> map fst)
   370       | _ => isar_deps
   371     end
   372 
   373 
   374 (*** Low-level communication with MaSh ***)
   375 
   376 (* more friendly than "try o File.rm" for those who keep the files open in their
   377    text editor *)
   378 fun wipe_out file = File.write file ""
   379 
   380 fun write_file (xs, f) file =
   381   let val path = Path.explode file in
   382     wipe_out path;
   383     xs |> chunk_list 500
   384        |> List.app (File.append path o space_implode "" o map f)
   385   end
   386 
   387 fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
   388   let
   389     val (temp_dir, serial) =
   390       if overlord then (getenv "ISABELLE_HOME_USER", "")
   391       else (getenv "ISABELLE_TMP", serial_string ())
   392     val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null"
   393     val err_file = temp_dir ^ "/mash_err" ^ serial
   394     val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   395     val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   396     val core =
   397       "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   398       " --numberOfPredictions " ^ string_of_int max_suggs ^
   399       (if save then " --saveModel" else "")
   400     val command =
   401       mash_home () ^ "/mash --quiet --outputDir " ^ mash_model_dir () ^
   402       " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file
   403   in
   404     write_file ([], K "") sugg_file;
   405     write_file write_cmds cmd_file;
   406     trace_msg ctxt (fn () => "Running " ^ command);
   407     Isabelle_System.bash command;
   408     read_suggs (fn () => try File.read_lines (Path.explode sugg_file) |> these)
   409     |> tap (fn _ => trace_msg ctxt (fn () =>
   410            case try File.read (Path.explode err_file) of
   411              NONE => "Done"
   412            | SOME "" => "Done"
   413            | SOME s => "Error: " ^ elide_string 1000 s))
   414     |> not overlord
   415        ? tap (fn _ => List.app (wipe_out o Path.explode)
   416                                [err_file, sugg_file, cmd_file])
   417   end
   418 
   419 fun str_of_update (name, parents, feats, deps) =
   420   "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   421   escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   422 
   423 fun str_of_query (parents, feats) =
   424   "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   425 
   426 fun mash_CLEAR ctxt =
   427   let val path = mash_model_dir () |> Path.explode in
   428     trace_msg ctxt (K "MaSh CLEAR");
   429     File.fold_dir (fn file => fn _ =>
   430                       try File.rm (Path.append path (Path.basic file)))
   431                   path NONE;
   432     ()
   433   end
   434 
   435 fun mash_ADD _ _ [] = ()
   436   | mash_ADD ctxt overlord upds =
   437     (trace_msg ctxt (fn () => "MaSh ADD " ^
   438          elide_string 1000 (space_implode " " (map #1 upds)));
   439      run_mash_tool ctxt overlord true 0 (upds, str_of_update) (K ()))
   440 
   441 fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   442   (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   443    run_mash_tool ctxt overlord false max_suggs
   444        ([query], str_of_query)
   445        (fn suggs =>
   446            case suggs () of
   447              [] => []
   448            | suggs => snd (extract_query (List.last suggs)))
   449    handle List.Empty => [])
   450 
   451 
   452 (*** High-level communication with MaSh ***)
   453 
   454 fun try_graph ctxt when def f =
   455   f ()
   456   handle Graph.CYCLES (cycle :: _) =>
   457          (trace_msg ctxt (fn () =>
   458               "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   459        | Graph.DUP name =>
   460          (trace_msg ctxt (fn () =>
   461               "Duplicate fact " ^ quote name ^ " when " ^ when); def)
   462        | Graph.UNDEF name =>
   463          (trace_msg ctxt (fn () =>
   464               "Unknown fact " ^ quote name ^ " when " ^ when); def)
   465        | exn =>
   466          if Exn.is_interrupt exn then
   467            reraise exn
   468          else
   469            (trace_msg ctxt (fn () =>
   470                 "Internal error when " ^ when ^ ":\n" ^
   471                 ML_Compiler.exn_message exn); def)
   472 
   473 type mash_state = {fact_graph : unit Graph.T}
   474 
   475 val empty_state = {fact_graph = Graph.empty}
   476 
   477 local
   478 
   479 val version = "*** MaSh 0.0 ***"
   480 
   481 fun load _ (state as (true, _)) = state
   482   | load ctxt _ =
   483     let val path = mash_state_path () in
   484       (true,
   485        case try File.read_lines path of
   486          SOME (version' :: fact_lines) =>
   487          let
   488            fun add_edge_to name parent =
   489              Graph.default_node (parent, ())
   490              #> Graph.add_edge (parent, name)
   491            fun add_fact_line line =
   492              case extract_query line of
   493                ("", _) => I (* shouldn't happen *)
   494              | (name, parents) =>
   495                Graph.default_node (name, ())
   496                #> fold (add_edge_to name) parents
   497            val fact_graph =
   498              try_graph ctxt "loading state" Graph.empty (fn () =>
   499                  Graph.empty |> version' = version
   500                                 ? fold add_fact_line fact_lines)
   501          in {fact_graph = fact_graph} end
   502        | _ => empty_state)
   503     end
   504 
   505 fun save {fact_graph} =
   506   let
   507     val path = mash_state_path ()
   508     fun fact_line_for name parents =
   509       escape_meta name ^ ": " ^ escape_metas parents
   510     val append_fact = File.append path o suffix "\n" oo fact_line_for
   511   in
   512     File.write path (version ^ "\n");
   513     Graph.fold (fn (name, ((), (parents, _))) => fn () =>
   514                    append_fact name (Graph.Keys.dest parents))
   515         fact_graph ()
   516   end
   517 
   518 val global_state =
   519   Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state)
   520 
   521 in
   522 
   523 fun mash_map ctxt f =
   524   Synchronized.change global_state (load ctxt ##> (f #> tap save))
   525 
   526 fun mash_get ctxt =
   527   Synchronized.change_result global_state (load ctxt #> `snd)
   528 
   529 fun mash_unlearn ctxt =
   530   Synchronized.change global_state (fn _ =>
   531       (mash_CLEAR ctxt; wipe_out (mash_state_path ()); (true, empty_state)))
   532 
   533 end
   534 
   535 fun mash_could_suggest_facts () = mash_home () <> ""
   536 fun mash_can_suggest_facts ctxt =
   537   not (Graph.is_empty (#fact_graph (mash_get ctxt)))
   538 
   539 fun parents_wrt_facts facts fact_graph =
   540   let
   541     val facts = [] |> fold (cons o nickname_of o snd) facts
   542     val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   543     fun insert_not_seen seen name =
   544       not (member (op =) seen name) ? insert (op =) name
   545     fun parents_of _ parents [] = parents
   546       | parents_of seen parents (name :: names) =
   547         if Symtab.defined tab name then
   548           parents_of (name :: seen) (name :: parents) names
   549         else
   550           parents_of (name :: seen) parents
   551                      (Graph.Keys.fold (insert_not_seen seen)
   552                                       (Graph.imm_preds fact_graph name) names)
   553   in parents_of [] [] (Graph.maximals fact_graph) end
   554 
   555 (* Generate more suggestions than requested, because some might be thrown out
   556    later for various reasons and "meshing" gives better results with some
   557    slack. *)
   558 fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts)
   559 
   560 fun is_fact_in_graph fact_graph (_, th) =
   561   can (Graph.get_node fact_graph) (nickname_of th)
   562 
   563 fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   564                        concl_t facts =
   565   let
   566     val thy = Proof_Context.theory_of ctxt
   567     val fact_graph = #fact_graph (mash_get ctxt)
   568     val parents = parents_wrt_facts facts fact_graph
   569     val feats = features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
   570     val suggs =
   571       if Graph.is_empty fact_graph then []
   572       else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   573     val selected = facts |> suggested_facts suggs
   574     val unknown = facts |> filter_out (is_fact_in_graph fact_graph)
   575   in (selected, unknown) end
   576 
   577 fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) =
   578   let
   579     fun maybe_add_from from (accum as (parents, graph)) =
   580       try_graph ctxt "updating graph" accum (fn () =>
   581           (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   582     val graph = graph |> Graph.default_node (name, ())
   583     val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   584     val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   585   in ((name, parents, feats, deps) :: upds, graph) end
   586 
   587 val learn_timeout_slack = 2.0
   588 
   589 fun launch_thread timeout task =
   590   let
   591     val hard_timeout = time_mult learn_timeout_slack timeout
   592     val birth_time = Time.now ()
   593     val death_time = Time.+ (birth_time, hard_timeout)
   594     val desc = ("machine learner for Sledgehammer", "")
   595   in Async_Manager.launch MaShN birth_time death_time desc task end
   596 
   597 fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
   598                      used_ths =
   599   if is_smt_prover ctxt prover then
   600     ()
   601   else
   602     launch_thread timeout
   603         (fn () =>
   604             let
   605               val thy = Proof_Context.theory_of ctxt
   606               val name = timestamp () ^ " " ^ serial_string () (* freshish *)
   607               val feats = features_of ctxt prover thy (Local, General) [t]
   608               val deps = used_ths |> map nickname_of
   609             in
   610               mash_map ctxt (fn {fact_graph} =>
   611                   let
   612                     val parents = parents_wrt_facts facts fact_graph
   613                     val upds = [(name, parents, feats, deps)]
   614                     val (upds, fact_graph) =
   615                       ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   616                   in
   617                     mash_ADD ctxt overlord upds; {fact_graph = fact_graph}
   618                   end);
   619               (true, "")
   620             end)
   621 
   622 fun sendback sub =
   623   Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub)
   624 
   625 (* Too many dependencies is a sign that a decision procedure is at work. There
   626    isn't much too learn from such proofs. *)
   627 val max_dependencies = 10
   628 val commit_timeout = seconds 30.0
   629 
   630 (* The timeout is understood in a very slack fashion. *)
   631 fun mash_learn_facts ctxt (params as {debug, verbose, overlord, timeout, ...})
   632                      prover auto atp learn_timeout facts =
   633   let
   634     val timer = Timer.startRealTimer ()
   635     fun next_commit_time () =
   636       Time.+ (Timer.checkRealTimer timer, commit_timeout)
   637     val {fact_graph} = mash_get ctxt
   638     val new_facts =
   639       facts |> filter_out (is_fact_in_graph fact_graph)
   640             |> sort (thm_ord o pairself snd)
   641     val num_new_facts = length new_facts
   642   in
   643     (if not auto then
   644        "MaShing" ^
   645        (if not auto then
   646           " " ^ string_of_int num_new_facts ^ " fact" ^
   647           plural_s num_new_facts ^
   648           (if atp then " (ATP timeout: " ^ string_from_time timeout ^ ")"
   649            else "")
   650         else
   651           "") ^ "..."
   652      else
   653        "")
   654     |> Output.urgent_message;
   655     if null new_facts then
   656       if not auto then
   657         "Nothing to learn.\n\nHint: Try " ^ sendback relearn_isarN ^ " or " ^
   658         sendback relearn_atpN ^ "  to learn from scratch."
   659       else
   660         ""
   661     else
   662       let
   663         val ths = facts |> map snd
   664         val all_names =
   665           ths |> filter_out is_likely_tautology_or_too_meta
   666               |> map (rpair () o nickname_of)
   667               |> Symtab.make
   668         fun do_commit [] state = state
   669           | do_commit upds {fact_graph} =
   670             let
   671               val (upds, fact_graph) =
   672                 ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   673             in mash_ADD ctxt overlord (rev upds); {fact_graph = fact_graph} end
   674         fun trim_deps deps = if length deps > max_dependencies then [] else deps
   675         fun commit last upds =
   676           (if debug andalso not auto then Output.urgent_message "Committing..."
   677            else ();
   678            mash_map ctxt (do_commit (rev upds));
   679            if not last andalso not auto then
   680              let val num_upds = length upds in
   681                "Processed " ^ string_of_int num_upds ^ " fact" ^
   682                plural_s num_upds ^ " in the last " ^
   683                string_from_time commit_timeout ^ "."
   684                |> Output.urgent_message
   685              end
   686            else
   687              ())
   688         fun do_fact _ (accum as (_, (_, _, _, true))) = accum
   689           | do_fact ((_, stature), th)
   690                     (upds, (parents, n, next_commit, false)) =
   691             let
   692               val name = nickname_of th
   693               val feats =
   694                 features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   695               val deps =
   696                 (if atp then atp_dependencies_of ctxt params prover auto facts
   697                  else isar_dependencies_of) all_names th
   698                 |> trim_deps
   699               val n = n |> not (null deps) ? Integer.add 1
   700               val upds = (name, parents, feats, deps) :: upds
   701               val (upds, next_commit) =
   702                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   703                   (commit false upds; ([], next_commit_time ()))
   704                 else
   705                   (upds, next_commit)
   706               val timed_out =
   707                 Time.> (Timer.checkRealTimer timer, learn_timeout)
   708             in (upds, ([name], n, next_commit, timed_out)) end
   709         val parents = parents_wrt_facts facts fact_graph
   710         val (upds, (_, n, _, _)) =
   711           ([], (parents, 0, next_commit_time (), false))
   712           |> fold do_fact new_facts
   713       in
   714         commit true upds;
   715         if verbose orelse not auto then
   716           "Learned " ^ string_of_int n ^ " nontrivial proof" ^ plural_s n ^
   717           (if verbose then
   718              " in " ^ string_from_time (Timer.checkRealTimer timer)
   719            else
   720              "") ^ "."
   721         else
   722           ""
   723       end
   724   end
   725 
   726 fun mash_learn ctxt (params as {provers, ...}) atp =
   727   let
   728     val css_table = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   729     val facts = all_facts_of ctxt css_table
   730   in
   731      mash_learn_facts ctxt params (hd provers) false atp infinite_timeout facts
   732      |> Output.urgent_message
   733   end
   734 
   735 (* The threshold should be large enough so that MaSh doesn't kick in for Auto
   736    Sledgehammer and Try. *)
   737 val min_secs_for_learning = 15
   738 
   739 fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   740         max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   741   if not (subset (op =) (the_list fact_filter, fact_filters)) then
   742     error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   743   else if only then
   744     facts
   745   else if max_facts <= 0 orelse null facts then
   746     []
   747   else
   748     let
   749       fun maybe_learn () =
   750         if learn andalso not (Async_Manager.has_running_threads MaShN) andalso
   751            Time.toSeconds timeout >= min_secs_for_learning then
   752           let val timeout = time_mult learn_timeout_slack timeout in
   753             launch_thread timeout
   754                 (fn () => (true, mash_learn_facts ctxt params prover true false
   755                                                   timeout facts))
   756           end
   757         else
   758           ()
   759       val fact_filter =
   760         case fact_filter of
   761           SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   762         | NONE =>
   763           if is_smt_prover ctxt prover then mepoN
   764           else if mash_can_suggest_facts ctxt then (maybe_learn (); meshN)
   765           else if mash_could_suggest_facts () then (maybe_learn (); mepoN)
   766           else mepoN
   767       val add_ths = Attrib.eval_thms ctxt add
   768       fun prepend_facts ths accepts =
   769         ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   770          (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   771         |> take max_facts
   772       fun iter () =
   773         iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   774                                  concl_t facts
   775       fun mash () =
   776         mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   777       val mess =
   778         [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   779            |> (if fact_filter <> mepoN then cons (mash ()) else I)
   780     in
   781       mesh_facts max_facts mess
   782       |> not (null add_ths) ? prepend_facts add_ths
   783     end
   784 
   785 fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   786 fun running_learners () = Async_Manager.running_threads MaShN "learner"
   787 
   788 end;