src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author blanchet
Fri, 20 Jul 2012 22:19:46 +0200
changeset 49411 dd82d190c2af
parent 49410 85a7fb65507a
child 49413 b86450f5b5cb
permissions -rw-r--r--
name tuning
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     2     Author:     Jasmin Blanchette, TU Muenchen
     3 
     4 Sledgehammer's machine-learning-based relevance filter (MaSh).
     5 *)
     6 
     7 signature SLEDGEHAMMER_MASH =
     8 sig
     9   type stature = ATP_Problem_Generate.stature
    10   type fact = Sledgehammer_Fact.fact
    11   type fact_override = Sledgehammer_Fact.fact_override
    12   type params = Sledgehammer_Provers.params
    13   type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    14   type prover_result = Sledgehammer_Provers.prover_result
    15 
    16   val trace : bool Config.T
    17   val MaShN : string
    18   val mepoN : string
    19   val mashN : string
    20   val meshN : string
    21   val unlearnN : string
    22   val learn_isarN : string
    23   val learn_atpN : string
    24   val relearn_isarN : string
    25   val relearn_atpN : string
    26   val fact_filters : string list
    27   val escape_meta : string -> string
    28   val escape_metas : string list -> string
    29   val unescape_meta : string -> string
    30   val unescape_metas : string -> string list
    31   val extract_query : string -> string * string list
    32   val nickname_of : thm -> string
    33   val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    34   val mesh_facts :
    35     int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    36   val is_likely_tautology_or_too_meta : thm -> bool
    37   val theory_ord : theory * theory -> order
    38   val thm_ord : thm * thm -> order
    39   val goal_of_thm : theory -> thm -> thm
    40   val run_prover_for_mash :
    41     Proof.context -> params -> string -> fact list -> thm -> prover_result
    42   val features_of :
    43     Proof.context -> string -> theory -> stature -> term list -> string list
    44   val isar_dependencies_of : unit Symtab.table -> thm -> string list
    45   val atp_dependencies_of :
    46     Proof.context -> params -> string -> bool -> fact list -> unit Symtab.table
    47     -> thm -> string list
    48   val mash_CLEAR : Proof.context -> unit
    49   val mash_ADD :
    50     Proof.context -> bool
    51     -> (string * string list * string list * string list) list -> unit
    52   val mash_QUERY :
    53     Proof.context -> bool -> int -> string list * string list -> string list
    54   val mash_unlearn : Proof.context -> unit
    55   val mash_could_suggest_facts : unit -> bool
    56   val mash_can_suggest_facts : Proof.context -> bool
    57   val mash_suggest_facts :
    58     Proof.context -> params -> string -> int -> term list -> term
    59     -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    60   val mash_learn_proof :
    61     Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    62     -> unit
    63   val mash_learn_facts :
    64     Proof.context -> params -> string -> bool -> bool -> Time.time -> fact list
    65     -> string
    66   val mash_learn :
    67     Proof.context -> params -> fact_override -> thm list -> bool -> unit
    68   val relevant_facts :
    69     Proof.context -> params -> string -> int -> fact_override -> term list
    70     -> term -> fact list -> fact list
    71   val kill_learners : unit -> unit
    72   val running_learners : unit -> unit
    73 end;
    74 
    75 structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
    76 struct
    77 
    78 open ATP_Util
    79 open ATP_Problem_Generate
    80 open Sledgehammer_Util
    81 open Sledgehammer_Fact
    82 open Sledgehammer_Provers
    83 open Sledgehammer_Minimize
    84 open Sledgehammer_MePo
    85 
    86 val trace =
    87   Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
    88 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    89 
    90 val MaShN = "MaSh"
    91 
    92 val mepoN = "mepo"
    93 val mashN = "mash"
    94 val meshN = "mesh"
    95 
    96 val fact_filters = [meshN, mepoN, mashN]
    97 
    98 val unlearnN = "unlearn"
    99 val learn_isarN = "learn_isar"
   100 val learn_atpN = "learn_atp"
   101 val relearn_isarN = "relearn_isar"
   102 val relearn_atpN = "relearn_atp"
   103 
   104 fun mash_home () = getenv "MASH_HOME"
   105 fun mash_model_dir () =
   106   getenv "ISABELLE_HOME_USER" ^ "/mash"
   107   |> tap (Isabelle_System.mkdir o Path.explode)
   108 val mash_state_dir = mash_model_dir
   109 fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode
   110 
   111 
   112 (*** Isabelle helpers ***)
   113 
   114 fun meta_char c =
   115   if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   116      c = #")" orelse c = #"," then
   117     String.str c
   118   else
   119     (* fixed width, in case more digits follow *)
   120     "%" ^ stringN_of_int 3 (Char.ord c)
   121 
   122 fun unmeta_chars accum [] = String.implode (rev accum)
   123   | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) =
   124     (case Int.fromString (String.implode [d1, d2, d3]) of
   125        SOME n => unmeta_chars (Char.chr n :: accum) cs
   126      | NONE => "" (* error *))
   127   | unmeta_chars _ (#"%" :: _) = "" (* error *)
   128   | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   129 
   130 val escape_meta = String.translate meta_char
   131 val escape_metas = map escape_meta #> space_implode " "
   132 val unescape_meta = String.explode #> unmeta_chars []
   133 val unescape_metas =
   134   space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   135 
   136 fun extract_query line =
   137   case space_explode ":" line of
   138     [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
   139   | _ => ("", [])
   140 
   141 fun parent_of_local_thm th =
   142   let
   143     val thy = th |> Thm.theory_of_thm
   144     val facts = thy |> Global_Theory.facts_of
   145     val space = facts |> Facts.space_of
   146     fun id_of s = #id (Name_Space.the_entry space s)
   147     fun max_id (s', _) (s, id) =
   148       let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   149   in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   150 
   151 val local_prefix = "local" ^ Long_Name.separator
   152 
   153 fun nickname_of th =
   154   if Thm.has_name_hint th then
   155     let val hint = Thm.get_name_hint th in
   156       (* FIXME: There must be a better way to detect local facts. *)
   157       case try (unprefix local_prefix) hint of
   158         SOME suf =>
   159         parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suf
   160       | NONE => hint
   161     end
   162   else
   163     backquote_thm th
   164 
   165 fun suggested_facts suggs facts =
   166   let
   167     fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   168     val tab = Symtab.empty |> fold add_fact facts
   169   in map_filter (Symtab.lookup tab) suggs end
   170 
   171 (* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
   172 fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
   173 
   174 fun sum_sq_avg [] = 0
   175   | sum_sq_avg xs =
   176     Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
   177 
   178 fun mesh_facts max_facts [(selected, unknown)] =
   179     take max_facts selected @ take (max_facts - length selected) unknown
   180   | mesh_facts max_facts mess =
   181     let
   182       val mess = mess |> map (apfst (`length))
   183       val fact_eq = Thm.eq_thm o pairself snd
   184       fun score_in fact ((sel_len, sels), unks) =
   185         case find_index (curry fact_eq fact) sels of
   186           ~1 => (case find_index (curry fact_eq fact) unks of
   187                    ~1 => SOME sel_len
   188                  | _ => NONE)
   189         | j => SOME j
   190       fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   191       val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   192     in
   193       facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   194             |> map snd |> take max_facts
   195     end
   196 
   197 val thy_feature_name_of = prefix "y"
   198 val const_name_of = prefix "c"
   199 val type_name_of = prefix "t"
   200 val class_name_of = prefix "s"
   201 
   202 fun is_likely_tautology_or_too_meta th =
   203   let
   204     val is_boring_const = member (op =) atp_widely_irrelevant_consts
   205     fun is_boring_bool t =
   206       not (exists_Const (not o is_boring_const o fst) t) orelse
   207       exists_type (exists_subtype (curry (op =) @{typ prop})) t
   208     fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   209       | is_boring_prop (@{const "==>"} $ t $ u) =
   210         is_boring_prop t andalso is_boring_prop u
   211       | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   212         is_boring_prop t andalso is_boring_prop u
   213       | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   214         is_boring_bool t andalso is_boring_bool u
   215       | is_boring_prop _ = true
   216   in
   217     is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   218   end
   219 
   220 fun theory_ord p =
   221   if Theory.eq_thy p then
   222     EQUAL
   223   else if Theory.subthy p then
   224     LESS
   225   else if Theory.subthy (swap p) then
   226     GREATER
   227   else case int_ord (pairself (length o Theory.ancestors_of) p) of
   228     EQUAL => string_ord (pairself Context.theory_name p)
   229   | order => order
   230 
   231 val thm_ord = theory_ord o pairself theory_of_thm
   232 
   233 val freezeT = Type.legacy_freeze_type
   234 
   235 fun freeze (t $ u) = freeze t $ freeze u
   236   | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   237   | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   238   | freeze (Const (s, T)) = Const (s, freezeT T)
   239   | freeze (Free (s, T)) = Free (s, freezeT T)
   240   | freeze t = t
   241 
   242 fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   243 
   244 fun run_prover_for_mash ctxt params prover facts goal =
   245   let
   246     val problem =
   247       {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   248        facts = facts |> map (apfst (apfst (fn name => name ())))
   249                      |> map Untranslated_Fact}
   250   in
   251     get_minimizing_prover ctxt MaSh (K ()) prover params (K (K (K "")))
   252                           problem
   253   end
   254 
   255 val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   256 
   257 fun interesting_terms_types_and_classes ctxt prover term_max_depth
   258                                         type_max_depth ts =
   259   let
   260     fun is_bad_const (x as (s, _)) args =
   261       member (op =) atp_logical_consts s orelse
   262       fst (is_built_in_const_for_prover ctxt prover x args)
   263     fun add_classes @{sort type} = I
   264       | add_classes S = union (op =) (map class_name_of S)
   265     fun do_add_type (Type (s, Ts)) =
   266         (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   267         #> fold do_add_type Ts
   268       | do_add_type (TFree (_, S)) = add_classes S
   269       | do_add_type (TVar (_, S)) = add_classes S
   270     fun add_type T = type_max_depth >= 0 ? do_add_type T
   271     fun mk_app s args =
   272       if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   273       else s
   274     fun patternify ~1 _ = ""
   275       | patternify depth t =
   276         case strip_comb t of
   277           (Const (s, _), args) =>
   278           mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   279         | _ => ""
   280     fun add_term_patterns ~1 _ = I
   281       | add_term_patterns depth t =
   282         insert (op =) (patternify depth t)
   283         #> add_term_patterns (depth - 1) t
   284     val add_term = add_term_patterns term_max_depth
   285     fun add_patterns t =
   286       let val (head, args) = strip_comb t in
   287         (case head of
   288            Const (x as (_, T)) =>
   289            not (is_bad_const x args) ? (add_term t #> add_type T)
   290          | Free (_, T) => add_type T
   291          | Var (_, T) => add_type T
   292          | Abs (_, T, body) => add_type T #> add_patterns body
   293          | _ => I)
   294         #> fold add_patterns args
   295       end
   296   in [] |> fold add_patterns ts end
   297 
   298 fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   299 
   300 val term_max_depth = 1
   301 val type_max_depth = 1
   302 
   303 (* TODO: Generate type classes for types? *)
   304 fun features_of ctxt prover thy (scope, status) ts =
   305   thy_feature_name_of (Context.theory_name thy) ::
   306   interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   307                                       ts
   308   |> forall is_lambda_free ts ? cons "no_lams"
   309   |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   310   |> scope <> Global ? cons "local"
   311   |> (case status of
   312         General => I
   313       | Induction => cons "induction"
   314       | Intro => cons "intro"
   315       | Inductive => cons "inductive"
   316       | Elim => cons "elim"
   317       | Simp => cons "simp"
   318       | Def => cons "def")
   319 
   320 fun isar_dependencies_of all_facts = thms_in_proof (SOME all_facts)
   321 
   322 val atp_dep_default_max_fact = 50
   323 
   324 fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto
   325                         facts all_names th =
   326   case isar_dependencies_of all_names th of
   327     [] => []
   328   | isar_deps =>
   329     let
   330       val thy = Proof_Context.theory_of ctxt
   331       val goal = goal_of_thm thy th
   332       val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1
   333       val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS)
   334       fun fix_name ((_, stature), th) = ((fn () => nickname_of th, stature), th)
   335       fun is_dep dep (_, th) = nickname_of th = dep
   336       fun add_isar_dep facts dep accum =
   337         if exists (is_dep dep) accum then
   338           accum
   339         else case find_first (is_dep dep) facts of
   340           SOME ((name, status), th) => accum @ [((name, status), th)]
   341         | NONE => accum (* shouldn't happen *)
   342       val facts =
   343         facts |> iterative_relevant_facts ctxt params prover
   344                      (max_facts |> the_default atp_dep_default_max_fact) NONE
   345                      hyp_ts concl_t
   346               |> fold (add_isar_dep facts) isar_deps
   347               |> map fix_name
   348     in
   349       if verbose andalso not auto then
   350         let val num_facts = length facts in
   351           "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^
   352           " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^
   353           "."
   354           |> Output.urgent_message
   355         end
   356       else
   357         ();
   358       case run_prover_for_mash ctxt params prover facts goal of
   359         {outcome = NONE, used_facts, ...} =>
   360         (if verbose andalso not auto then
   361            let val num_facts = length used_facts in
   362              "Found proof with " ^ string_of_int num_facts ^ " fact" ^
   363              plural_s num_facts ^ "."
   364              |> Output.urgent_message
   365            end
   366          else
   367            ();
   368          used_facts |> map fst)
   369       | _ => isar_deps
   370     end
   371 
   372 
   373 (*** Low-level communication with MaSh ***)
   374 
   375 (* more friendly than "try o File.rm" for those who keep the files open in their
   376    text editor *)
   377 fun wipe_out file = File.write file ""
   378 
   379 fun write_file (xs, f) file =
   380   let val path = Path.explode file in
   381     wipe_out path;
   382     xs |> chunk_list 500
   383        |> List.app (File.append path o space_implode "" o map f)
   384   end
   385 
   386 fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
   387   let
   388     val (temp_dir, serial) =
   389       if overlord then (getenv "ISABELLE_HOME_USER", "")
   390       else (getenv "ISABELLE_TMP", serial_string ())
   391     val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null"
   392     val err_file = temp_dir ^ "/mash_err" ^ serial
   393     val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   394     val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   395     val core =
   396       "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   397       " --numberOfPredictions " ^ string_of_int max_suggs ^
   398       (if save then " --saveModel" else "")
   399     val command =
   400       mash_home () ^ "/mash --quiet --outputDir " ^ mash_model_dir () ^
   401       " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file
   402   in
   403     write_file ([], K "") sugg_file;
   404     write_file write_cmds cmd_file;
   405     trace_msg ctxt (fn () => "Running " ^ command);
   406     Isabelle_System.bash command;
   407     read_suggs (fn () => try File.read_lines (Path.explode sugg_file) |> these)
   408     |> tap (fn _ => trace_msg ctxt (fn () =>
   409            case try File.read (Path.explode err_file) of
   410              NONE => "Done"
   411            | SOME "" => "Done"
   412            | SOME s => "Error: " ^ elide_string 1000 s))
   413     |> not overlord
   414        ? tap (fn _ => List.app (wipe_out o Path.explode)
   415                                [err_file, sugg_file, cmd_file])
   416   end
   417 
   418 fun str_of_update (name, parents, feats, deps) =
   419   "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   420   escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   421 
   422 fun str_of_query (parents, feats) =
   423   "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   424 
   425 fun mash_CLEAR ctxt =
   426   let val path = mash_model_dir () |> Path.explode in
   427     trace_msg ctxt (K "MaSh CLEAR");
   428     File.fold_dir (fn file => fn _ =>
   429                       try File.rm (Path.append path (Path.basic file)))
   430                   path NONE;
   431     ()
   432   end
   433 
   434 fun mash_ADD _ _ [] = ()
   435   | mash_ADD ctxt overlord upds =
   436     (trace_msg ctxt (fn () => "MaSh ADD " ^
   437          elide_string 1000 (space_implode " " (map #1 upds)));
   438      run_mash_tool ctxt overlord true 0 (upds, str_of_update) (K ()))
   439 
   440 fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   441   (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   442    run_mash_tool ctxt overlord false max_suggs
   443        ([query], str_of_query)
   444        (fn suggs =>
   445            case suggs () of
   446              [] => []
   447            | suggs => snd (extract_query (List.last suggs)))
   448    handle List.Empty => [])
   449 
   450 
   451 (*** High-level communication with MaSh ***)
   452 
   453 fun try_graph ctxt when def f =
   454   f ()
   455   handle Graph.CYCLES (cycle :: _) =>
   456          (trace_msg ctxt (fn () =>
   457               "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   458        | Graph.DUP name =>
   459          (trace_msg ctxt (fn () =>
   460               "Duplicate fact " ^ quote name ^ " when " ^ when); def)
   461        | Graph.UNDEF name =>
   462          (trace_msg ctxt (fn () =>
   463               "Unknown fact " ^ quote name ^ " when " ^ when); def)
   464        | exn =>
   465          if Exn.is_interrupt exn then
   466            reraise exn
   467          else
   468            (trace_msg ctxt (fn () =>
   469                 "Internal error when " ^ when ^ ":\n" ^
   470                 ML_Compiler.exn_message exn); def)
   471 
   472 type mash_state = {fact_graph : unit Graph.T}
   473 
   474 val empty_state = {fact_graph = Graph.empty}
   475 
   476 local
   477 
   478 val version = "*** MaSh 0.0 ***"
   479 
   480 fun load _ (state as (true, _)) = state
   481   | load ctxt _ =
   482     let val path = mash_state_path () in
   483       (true,
   484        case try File.read_lines path of
   485          SOME (version' :: fact_lines) =>
   486          let
   487            fun add_edge_to name parent =
   488              Graph.default_node (parent, ())
   489              #> Graph.add_edge (parent, name)
   490            fun add_fact_line line =
   491              case extract_query line of
   492                ("", _) => I (* shouldn't happen *)
   493              | (name, parents) =>
   494                Graph.default_node (name, ())
   495                #> fold (add_edge_to name) parents
   496            val fact_graph =
   497              try_graph ctxt "loading state" Graph.empty (fn () =>
   498                  Graph.empty |> version' = version
   499                                 ? fold add_fact_line fact_lines)
   500          in {fact_graph = fact_graph} end
   501        | _ => empty_state)
   502     end
   503 
   504 fun save {fact_graph} =
   505   let
   506     val path = mash_state_path ()
   507     fun fact_line_for name parents =
   508       escape_meta name ^ ": " ^ escape_metas parents
   509     val append_fact = File.append path o suffix "\n" oo fact_line_for
   510   in
   511     File.write path (version ^ "\n");
   512     Graph.fold (fn (name, ((), (parents, _))) => fn () =>
   513                    append_fact name (Graph.Keys.dest parents))
   514         fact_graph ()
   515   end
   516 
   517 val global_state =
   518   Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state)
   519 
   520 in
   521 
   522 fun mash_map ctxt f =
   523   Synchronized.change global_state (load ctxt ##> (f #> tap save))
   524 
   525 fun mash_get ctxt =
   526   Synchronized.change_result global_state (load ctxt #> `snd)
   527 
   528 fun mash_unlearn ctxt =
   529   Synchronized.change global_state (fn _ =>
   530       (mash_CLEAR ctxt; wipe_out (mash_state_path ()); (true, empty_state)))
   531 
   532 end
   533 
   534 fun mash_could_suggest_facts () = mash_home () <> ""
   535 fun mash_can_suggest_facts ctxt =
   536   not (Graph.is_empty (#fact_graph (mash_get ctxt)))
   537 
   538 fun parents_wrt_facts facts fact_graph =
   539   let
   540     val facts = [] |> fold (cons o nickname_of o snd) facts
   541     val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   542     fun insert_not_seen seen name =
   543       not (member (op =) seen name) ? insert (op =) name
   544     fun parents_of _ parents [] = parents
   545       | parents_of seen parents (name :: names) =
   546         if Symtab.defined tab name then
   547           parents_of (name :: seen) (name :: parents) names
   548         else
   549           parents_of (name :: seen) parents
   550                      (Graph.Keys.fold (insert_not_seen seen)
   551                                       (Graph.imm_preds fact_graph name) names)
   552   in parents_of [] [] (Graph.maximals fact_graph) end
   553 
   554 (* Generate more suggestions than requested, because some might be thrown out
   555    later for various reasons and "meshing" gives better results with some
   556    slack. *)
   557 fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts)
   558 
   559 fun is_fact_in_graph fact_graph (_, th) =
   560   can (Graph.get_node fact_graph) (nickname_of th)
   561 
   562 fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   563                        concl_t facts =
   564   let
   565     val thy = Proof_Context.theory_of ctxt
   566     val fact_graph = #fact_graph (mash_get ctxt)
   567     val parents = parents_wrt_facts facts fact_graph
   568     val feats = features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
   569     val suggs =
   570       if Graph.is_empty fact_graph then []
   571       else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   572     val selected = facts |> suggested_facts suggs
   573     val unknown = facts |> filter_out (is_fact_in_graph fact_graph)
   574   in (selected, unknown) end
   575 
   576 fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) =
   577   let
   578     fun maybe_add_from from (accum as (parents, graph)) =
   579       try_graph ctxt "updating graph" accum (fn () =>
   580           (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   581     val graph = graph |> Graph.default_node (name, ())
   582     val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   583     val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   584   in ((name, parents, feats, deps) :: upds, graph) end
   585 
   586 val learn_timeout_slack = 2.0
   587 
   588 fun launch_thread timeout task =
   589   let
   590     val hard_timeout = time_mult learn_timeout_slack timeout
   591     val birth_time = Time.now ()
   592     val death_time = Time.+ (birth_time, hard_timeout)
   593     val desc = ("machine learner for Sledgehammer", "")
   594   in Async_Manager.launch MaShN birth_time death_time desc task end
   595 
   596 fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
   597                      used_ths =
   598   if is_smt_prover ctxt prover then
   599     ()
   600   else
   601     launch_thread timeout
   602         (fn () =>
   603             let
   604               val thy = Proof_Context.theory_of ctxt
   605               val name = timestamp () ^ " " ^ serial_string () (* freshish *)
   606               val feats = features_of ctxt prover thy (Local, General) [t]
   607               val deps = used_ths |> map nickname_of
   608             in
   609               mash_map ctxt (fn {fact_graph} =>
   610                   let
   611                     val parents = parents_wrt_facts facts fact_graph
   612                     val upds = [(name, parents, feats, deps)]
   613                     val (upds, fact_graph) =
   614                       ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   615                   in
   616                     mash_ADD ctxt overlord upds; {fact_graph = fact_graph}
   617                   end);
   618               (true, "")
   619             end)
   620 
   621 fun sendback sub =
   622   Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub)
   623 
   624 (* Too many dependencies is a sign that a decision procedure is at work. There
   625    isn't much too learn from such proofs. *)
   626 val max_dependencies = 10
   627 val commit_timeout = seconds 30.0
   628 
   629 (* The timeout is understood in a very slack fashion. *)
   630 fun mash_learn_facts ctxt (params as {debug, verbose, overlord, timeout, ...})
   631                      prover auto atp learn_timeout facts =
   632   let
   633     val timer = Timer.startRealTimer ()
   634     fun next_commit_time () =
   635       Time.+ (Timer.checkRealTimer timer, commit_timeout)
   636     val {fact_graph} = mash_get ctxt
   637     val new_facts =
   638       facts |> filter_out (is_fact_in_graph fact_graph)
   639             |> sort (thm_ord o pairself snd)
   640     val num_new_facts = length new_facts
   641   in
   642     (if not auto then
   643        "MaShing" ^
   644        (if not auto then
   645           " " ^ string_of_int num_new_facts ^ " fact" ^
   646           plural_s num_new_facts ^
   647           (if atp then " (ATP timeout: " ^ string_from_time timeout ^ ")"
   648            else "")
   649         else
   650           "") ^ "..."
   651      else
   652        "")
   653     |> Output.urgent_message;
   654     if null new_facts then
   655       if not auto then
   656         "Nothing to learn.\n\nHint: Try " ^ sendback relearn_isarN ^ " or " ^
   657         sendback relearn_atpN ^ " to learn from scratch."
   658       else
   659         ""
   660     else
   661       let
   662         val ths = facts |> map snd
   663         val all_names =
   664           ths |> filter_out is_likely_tautology_or_too_meta
   665               |> map (rpair () o nickname_of)
   666               |> Symtab.make
   667         fun do_commit [] state = state
   668           | do_commit upds {fact_graph} =
   669             let
   670               val (upds, fact_graph) =
   671                 ([], fact_graph) |> fold (update_fact_graph ctxt) upds
   672             in mash_ADD ctxt overlord (rev upds); {fact_graph = fact_graph} end
   673         fun trim_deps deps = if length deps > max_dependencies then [] else deps
   674         fun commit last upds =
   675           (if debug andalso not auto then Output.urgent_message "Committing..."
   676            else ();
   677            mash_map ctxt (do_commit (rev upds));
   678            if not last andalso not auto then
   679              let val num_upds = length upds in
   680                "Processed " ^ string_of_int num_upds ^ " fact" ^
   681                plural_s num_upds ^ " in the last " ^
   682                string_from_time commit_timeout ^ "."
   683                |> Output.urgent_message
   684              end
   685            else
   686              ())
   687         fun do_fact _ (accum as (_, (_, _, _, true))) = accum
   688           | do_fact ((_, stature), th)
   689                     (upds, (parents, n, next_commit, false)) =
   690             let
   691               val name = nickname_of th
   692               val feats =
   693                 features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   694               val deps =
   695                 (if atp then atp_dependencies_of ctxt params prover auto facts
   696                  else isar_dependencies_of) all_names th
   697                 |> trim_deps
   698               val n = n |> not (null deps) ? Integer.add 1
   699               val upds = (name, parents, feats, deps) :: upds
   700               val (upds, next_commit) =
   701                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   702                   (commit false upds; ([], next_commit_time ()))
   703                 else
   704                   (upds, next_commit)
   705               val timed_out =
   706                 Time.> (Timer.checkRealTimer timer, learn_timeout)
   707             in (upds, ([name], n, next_commit, timed_out)) end
   708         val parents = parents_wrt_facts facts fact_graph
   709         val (upds, (_, n, _, _)) =
   710           ([], (parents, 0, next_commit_time (), false))
   711           |> fold do_fact new_facts
   712       in
   713         commit true upds;
   714         if verbose orelse not auto then
   715           "Learned " ^ string_of_int n ^ " nontrivial proof" ^ plural_s n ^
   716           (if verbose then
   717              " in " ^ string_from_time (Timer.checkRealTimer timer)
   718            else
   719              "") ^ "."
   720         else
   721           ""
   722       end
   723   end
   724 
   725 fun mash_learn ctxt (params as {provers, ...}) fact_override chained atp =
   726   let
   727     val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   728     val ctxt = ctxt |> Config.put instantiate_inducts false
   729     val facts =
   730       nearly_all_facts ctxt false fact_override Symtab.empty css chained []
   731                        @{prop True}
   732   in
   733      mash_learn_facts ctxt params (hd provers) false atp infinite_timeout facts
   734      |> Output.urgent_message
   735   end
   736 
   737 (* The threshold should be large enough so that MaSh doesn't kick in for Auto
   738    Sledgehammer and Try. *)
   739 val min_secs_for_learning = 15
   740 
   741 fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   742         max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   743   if not (subset (op =) (the_list fact_filter, fact_filters)) then
   744     error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   745   else if only then
   746     facts
   747   else if max_facts <= 0 orelse null facts then
   748     []
   749   else
   750     let
   751       fun maybe_learn () =
   752         if learn andalso not (Async_Manager.has_running_threads MaShN) andalso
   753            Time.toSeconds timeout >= min_secs_for_learning then
   754           let val timeout = time_mult learn_timeout_slack timeout in
   755             launch_thread timeout
   756                 (fn () => (true, mash_learn_facts ctxt params prover true false
   757                                                   timeout facts))
   758           end
   759         else
   760           ()
   761       val fact_filter =
   762         case fact_filter of
   763           SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   764         | NONE =>
   765           if is_smt_prover ctxt prover then mepoN
   766           else if mash_can_suggest_facts ctxt then (maybe_learn (); meshN)
   767           else if mash_could_suggest_facts () then (maybe_learn (); mepoN)
   768           else mepoN
   769       val add_ths = Attrib.eval_thms ctxt add
   770       fun prepend_facts ths accepts =
   771         ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   772          (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   773         |> take max_facts
   774       fun iter () =
   775         iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   776                                  concl_t facts
   777       fun mash () =
   778         mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   779       val mess =
   780         [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   781            |> (if fact_filter <> mepoN then cons (mash ()) else I)
   782     in
   783       mesh_facts max_facts mess
   784       |> not (null add_ths) ? prepend_facts add_ths
   785     end
   786 
   787 fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   788 fun running_learners () = Async_Manager.running_threads MaShN "learner"
   789 
   790 end;