src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author blanchet
Fri, 20 Jul 2012 22:19:46 +0200
changeset 49420 7682bc885e8a
parent 49419 0a261b4aa093
child 49421 b002cc16aa99
permissions -rw-r--r--
use CVC3 and Yices by default if they are available and there are enough cores
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     2     Author:     Jasmin Blanchette, TU Muenchen
     3 
     4 Sledgehammer's machine-learning-based relevance filter (MaSh).
     5 *)
     6 
     7 signature SLEDGEHAMMER_MASH =
     8 sig
     9   type stature = ATP_Problem_Generate.stature
    10   type fact = Sledgehammer_Fact.fact
    11   type fact_override = Sledgehammer_Fact.fact_override
    12   type params = Sledgehammer_Provers.params
    13   type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    14   type prover_result = Sledgehammer_Provers.prover_result
    15 
    16   val trace : bool Config.T
    17   val MaShN : string
    18   val mepoN : string
    19   val mashN : string
    20   val meshN : string
    21   val unlearnN : string
    22   val learn_isarN : string
    23   val learn_atpN : string
    24   val relearn_isarN : string
    25   val relearn_atpN : string
    26   val fact_filters : string list
    27   val escape_meta : string -> string
    28   val escape_metas : string list -> string
    29   val unescape_meta : string -> string
    30   val unescape_metas : string -> string list
    31   val extract_query : string -> string * string list
    32   val nickname_of : thm -> string
    33   val suggested_facts : string list -> ('a * thm) list -> ('a * thm) list
    34   val mesh_facts :
    35     int -> (('a * thm) list * ('a * thm) list) list -> ('a * thm) list
    36   val is_likely_tautology_or_too_meta : thm -> bool
    37   val theory_ord : theory * theory -> order
    38   val thm_ord : thm * thm -> order
    39   val goal_of_thm : theory -> thm -> thm
    40   val run_prover_for_mash :
    41     Proof.context -> params -> string -> fact list -> thm -> prover_result
    42   val features_of :
    43     Proof.context -> string -> theory -> stature -> term list -> string list
    44   val isar_dependencies_of : unit Symtab.table -> thm -> string list option
    45   val atp_dependencies_of :
    46     Proof.context -> params -> string -> int -> fact list -> unit Symtab.table
    47     -> thm -> string list option
    48   val mash_CLEAR : Proof.context -> unit
    49   val mash_ADD :
    50     Proof.context -> bool
    51     -> (string * string list * string list * string list) list -> unit
    52   val mash_REPROVE :
    53     Proof.context -> bool -> (string * string list) list -> unit
    54   val mash_QUERY :
    55     Proof.context -> bool -> int -> string list * string list -> string list
    56   val mash_unlearn : Proof.context -> unit
    57   val mash_could_suggest_facts : unit -> bool
    58   val mash_can_suggest_facts : Proof.context -> bool
    59   val mash_suggest_facts :
    60     Proof.context -> params -> string -> int -> term list -> term
    61     -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
    62   val mash_learn_proof :
    63     Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    64     -> unit
    65   val mash_learn :
    66     Proof.context -> params -> fact_override -> thm list -> bool -> unit
    67   val relevant_facts :
    68     Proof.context -> params -> string -> int -> fact_override -> term list
    69     -> term -> fact list -> fact list
    70   val kill_learners : unit -> unit
    71   val running_learners : unit -> unit
    72 end;
    73 
    74 structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
    75 struct
    76 
    77 open ATP_Util
    78 open ATP_Problem_Generate
    79 open Sledgehammer_Util
    80 open Sledgehammer_Fact
    81 open Sledgehammer_Provers
    82 open Sledgehammer_Minimize
    83 open Sledgehammer_MePo
    84 
    85 val trace =
    86   Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
    87 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    88 
    89 val MaShN = "MaSh"
    90 
    91 val mepoN = "mepo"
    92 val mashN = "mash"
    93 val meshN = "mesh"
    94 
    95 val fact_filters = [meshN, mepoN, mashN]
    96 
    97 val unlearnN = "unlearn"
    98 val learn_isarN = "learn_isar"
    99 val learn_atpN = "learn_atp"
   100 val relearn_isarN = "relearn_isar"
   101 val relearn_atpN = "relearn_atp"
   102 
   103 fun mash_home () = getenv "MASH_HOME"
   104 fun mash_model_dir () =
   105   getenv "ISABELLE_HOME_USER" ^ "/mash"
   106   |> tap (Isabelle_System.mkdir o Path.explode)
   107 val mash_state_dir = mash_model_dir
   108 fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode
   109 
   110 
   111 (*** Isabelle helpers ***)
   112 
   113 fun meta_char c =
   114   if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   115      c = #")" orelse c = #"," then
   116     String.str c
   117   else
   118     (* fixed width, in case more digits follow *)
   119     "%" ^ stringN_of_int 3 (Char.ord c)
   120 
   121 fun unmeta_chars accum [] = String.implode (rev accum)
   122   | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) =
   123     (case Int.fromString (String.implode [d1, d2, d3]) of
   124        SOME n => unmeta_chars (Char.chr n :: accum) cs
   125      | NONE => "" (* error *))
   126   | unmeta_chars _ (#"%" :: _) = "" (* error *)
   127   | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   128 
   129 val escape_meta = String.translate meta_char
   130 val escape_metas = map escape_meta #> space_implode " "
   131 val unescape_meta = String.explode #> unmeta_chars []
   132 val unescape_metas =
   133   space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   134 
   135 fun extract_query line =
   136   case space_explode ":" line of
   137     [goal_name, suggs] => (unescape_meta goal_name, unescape_metas suggs)
   138   | _ => ("", [])
   139 
   140 fun parent_of_local_thm th =
   141   let
   142     val thy = th |> Thm.theory_of_thm
   143     val facts = thy |> Global_Theory.facts_of
   144     val space = facts |> Facts.space_of
   145     fun id_of s = #id (Name_Space.the_entry space s)
   146     fun max_id (s', _) (s, id) =
   147       let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   148   in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   149 
   150 val local_prefix = "local" ^ Long_Name.separator
   151 
   152 fun nickname_of th =
   153   if Thm.has_name_hint th then
   154     let val hint = Thm.get_name_hint th in
   155       (* FIXME: There must be a better way to detect local facts. *)
   156       case try (unprefix local_prefix) hint of
   157         SOME suf =>
   158         parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suf
   159       | NONE => hint
   160     end
   161   else
   162     backquote_thm th
   163 
   164 fun suggested_facts suggs facts =
   165   let
   166     fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   167     val tab = Symtab.empty |> fold add_fact facts
   168   in map_filter (Symtab.lookup tab) suggs end
   169 
   170 (* Ad hoc score function roughly based on Blanchette's Ringberg 2011 data. *)
   171 fun score x = Math.pow (1.5, 15.5 - 0.05 * Real.fromInt x) + 15.0
   172 
   173 fun sum_sq_avg [] = 0
   174   | sum_sq_avg xs =
   175     Real.ceil (100000.0 * fold (curry (op +) o score) xs 0.0) div length xs
   176 
   177 fun mesh_facts max_facts [(selected, unknown)] =
   178     take max_facts selected @ take (max_facts - length selected) unknown
   179   | mesh_facts max_facts mess =
   180     let
   181       val mess = mess |> map (apfst (`length))
   182       val fact_eq = Thm.eq_thm o pairself snd
   183       fun score_in fact ((sel_len, sels), unks) =
   184         case find_index (curry fact_eq fact) sels of
   185           ~1 => (case find_index (curry fact_eq fact) unks of
   186                    ~1 => SOME sel_len
   187                  | _ => NONE)
   188         | j => SOME j
   189       fun score_of fact = mess |> map_filter (score_in fact) |> sum_sq_avg
   190       val facts = fold (union fact_eq o take max_facts o snd o fst) mess []
   191     in
   192       facts |> map (`score_of) |> sort (int_ord o swap o pairself fst)
   193             |> map snd |> take max_facts
   194     end
   195 
   196 val thy_feature_name_of = prefix "y"
   197 val const_name_of = prefix "c"
   198 val type_name_of = prefix "t"
   199 val class_name_of = prefix "s"
   200 
   201 fun is_likely_tautology_or_too_meta th =
   202   let
   203     val is_boring_const = member (op =) atp_widely_irrelevant_consts
   204     fun is_boring_bool t =
   205       not (exists_Const (not o is_boring_const o fst) t) orelse
   206       exists_type (exists_subtype (curry (op =) @{typ prop})) t
   207     fun is_boring_prop (@{const Trueprop} $ t) = is_boring_bool t
   208       | is_boring_prop (@{const "==>"} $ t $ u) =
   209         is_boring_prop t andalso is_boring_prop u
   210       | is_boring_prop (Const (@{const_name all}, _) $ (Abs (_, _, t)) $ u) =
   211         is_boring_prop t andalso is_boring_prop u
   212       | is_boring_prop (Const (@{const_name "=="}, _) $ t $ u) =
   213         is_boring_bool t andalso is_boring_bool u
   214       | is_boring_prop _ = true
   215   in
   216     is_boring_prop (prop_of th) andalso not (Thm.eq_thm_prop (@{thm ext}, th))
   217   end
   218 
   219 fun theory_ord p =
   220   if Theory.eq_thy p then
   221     EQUAL
   222   else if Theory.subthy p then
   223     LESS
   224   else if Theory.subthy (swap p) then
   225     GREATER
   226   else case int_ord (pairself (length o Theory.ancestors_of) p) of
   227     EQUAL => string_ord (pairself Context.theory_name p)
   228   | order => order
   229 
   230 val thm_ord = theory_ord o pairself theory_of_thm
   231 
   232 val freezeT = Type.legacy_freeze_type
   233 
   234 fun freeze (t $ u) = freeze t $ freeze u
   235   | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   236   | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   237   | freeze (Const (s, T)) = Const (s, freezeT T)
   238   | freeze (Free (s, T)) = Free (s, freezeT T)
   239   | freeze t = t
   240 
   241 fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   242 
   243 fun run_prover_for_mash ctxt params prover facts goal =
   244   let
   245     val problem =
   246       {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   247        facts = facts |> map (apfst (apfst (fn name => name ())))
   248                      |> map Untranslated_Fact}
   249   in
   250     get_minimizing_prover ctxt MaSh (K (K ())) prover params (K (K (K "")))
   251                           problem
   252   end
   253 
   254 val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   255 
   256 val logical_consts =
   257   [@{const_name prop}, @{const_name Pure.conjunction}] @ atp_logical_consts
   258 
   259 fun interesting_terms_types_and_classes ctxt prover term_max_depth
   260                                         type_max_depth ts =
   261   let
   262     fun is_bad_const (x as (s, _)) args =
   263       member (op =) logical_consts s orelse
   264       fst (is_built_in_const_for_prover ctxt prover x args)
   265     fun add_classes @{sort type} = I
   266       | add_classes S = union (op =) (map class_name_of S)
   267     fun do_add_type (Type (s, Ts)) =
   268         (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   269         #> fold do_add_type Ts
   270       | do_add_type (TFree (_, S)) = add_classes S
   271       | do_add_type (TVar (_, S)) = add_classes S
   272     fun add_type T = type_max_depth >= 0 ? do_add_type T
   273     fun mk_app s args =
   274       if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   275       else s
   276     fun patternify ~1 _ = ""
   277       | patternify depth t =
   278         case strip_comb t of
   279           (Const (x as (s, _)), args) =>
   280           if is_bad_const x args then ""
   281           else mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   282         | _ => ""
   283     fun add_term_patterns ~1 _ = I
   284       | add_term_patterns depth t =
   285         insert (op =) (patternify depth t)
   286         #> add_term_patterns (depth - 1) t
   287     val add_term = add_term_patterns term_max_depth
   288     fun add_patterns t =
   289       let val (head, args) = strip_comb t in
   290         (case head of
   291            Const (_, T) => add_term t #> add_type T
   292          | Free (_, T) => add_type T
   293          | Var (_, T) => add_type T
   294          | Abs (_, T, body) => add_type T #> add_patterns body
   295          | _ => I)
   296         #> fold add_patterns args
   297       end
   298   in [] |> fold add_patterns ts end
   299 
   300 fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   301 
   302 val term_max_depth = 1
   303 val type_max_depth = 1
   304 
   305 (* TODO: Generate type classes for types? *)
   306 fun features_of ctxt prover thy (scope, status) ts =
   307   thy_feature_name_of (Context.theory_name thy) ::
   308   interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   309                                       ts
   310   |> forall is_lambda_free ts ? cons "no_lams"
   311   |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   312   |> scope <> Global ? cons "local"
   313   |> (case status of
   314         General => I
   315       | Induction => cons "induction"
   316       | Intro => cons "intro"
   317       | Inductive => cons "inductive"
   318       | Elim => cons "elim"
   319       | Simp => cons "simp"
   320       | Def => cons "def")
   321 
   322 (* Too many dependencies is a sign that a decision procedure is at work. There
   323    isn't much too learn from such proofs. *)
   324 val max_dependencies = 10
   325 val atp_dependency_default_max_fact = 50
   326 
   327 fun trim_dependencies deps =
   328   if length deps <= max_dependencies then SOME deps else NONE
   329 
   330 fun isar_dependencies_of all_facts =
   331   thms_in_proof (SOME all_facts) #> trim_dependencies
   332 
   333 fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover
   334                         auto_level facts all_names th =
   335   case isar_dependencies_of all_names th of
   336     SOME [] => NONE
   337   | isar_deps =>
   338     let
   339       val thy = Proof_Context.theory_of ctxt
   340       val goal = goal_of_thm thy th
   341       val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1
   342       val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS)
   343       fun fix_name ((_, stature), th) = ((fn () => nickname_of th, stature), th)
   344       fun is_dep dep (_, th) = nickname_of th = dep
   345       fun add_isar_dep facts dep accum =
   346         if exists (is_dep dep) accum then
   347           accum
   348         else case find_first (is_dep dep) facts of
   349           SOME ((name, status), th) => accum @ [((name, status), th)]
   350         | NONE => accum (* shouldn't happen *)
   351       val facts =
   352         facts |> iterative_relevant_facts ctxt params prover
   353                      (max_facts |> the_default atp_dependency_default_max_fact)
   354                      NONE hyp_ts concl_t
   355               |> fold (add_isar_dep facts) (these isar_deps)
   356               |> map fix_name
   357     in
   358       if verbose andalso auto_level = 0 then
   359         let val num_facts = length facts in
   360           "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^
   361           " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^
   362           "."
   363           |> Output.urgent_message
   364         end
   365       else
   366         ();
   367       case run_prover_for_mash ctxt params prover facts goal of
   368         {outcome = NONE, used_facts, ...} =>
   369         (if verbose andalso auto_level = 0 then
   370            let val num_facts = length used_facts in
   371              "Found proof with " ^ string_of_int num_facts ^ " fact" ^
   372              plural_s num_facts ^ "."
   373              |> Output.urgent_message
   374            end
   375          else
   376            ();
   377          used_facts |> map fst |> trim_dependencies)
   378       | _ => NONE
   379     end
   380 
   381 
   382 (*** Low-level communication with MaSh ***)
   383 
   384 (* more friendly than "try o File.rm" for those who keep the files open in their
   385    text editor *)
   386 fun wipe_out file = File.write file ""
   387 
   388 fun write_file (xs, f) file =
   389   let val path = Path.explode file in
   390     wipe_out path;
   391     xs |> chunk_list 500
   392        |> List.app (File.append path o space_implode "" o map f)
   393   end
   394 
   395 fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
   396   let
   397     val (temp_dir, serial) =
   398       if overlord then (getenv "ISABELLE_HOME_USER", "")
   399       else (getenv "ISABELLE_TMP", serial_string ())
   400     val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null"
   401     val err_file = temp_dir ^ "/mash_err" ^ serial
   402     val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   403     val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   404     val core =
   405       "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   406       " --numberOfPredictions " ^ string_of_int max_suggs ^
   407       (if save then " --saveModel" else "")
   408     val command =
   409       mash_home () ^ "/mash --quiet --outputDir " ^ mash_model_dir () ^
   410       " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file
   411   in
   412     write_file ([], K "") sugg_file;
   413     write_file write_cmds cmd_file;
   414     trace_msg ctxt (fn () => "Running " ^ command);
   415     Isabelle_System.bash command;
   416     read_suggs (fn () => try File.read_lines (Path.explode sugg_file) |> these)
   417     |> tap (fn _ => trace_msg ctxt (fn () =>
   418            case try File.read (Path.explode err_file) of
   419              NONE => "Done"
   420            | SOME "" => "Done"
   421            | SOME s => "Error: " ^ elide_string 1000 s))
   422     |> not overlord
   423        ? tap (fn _ => List.app (wipe_out o Path.explode)
   424                                [err_file, sugg_file, cmd_file])
   425   end
   426 
   427 fun str_of_add (name, parents, feats, deps) =
   428   "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   429   escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   430 
   431 fun str_of_reprove (name, deps) =
   432   "p " ^ escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
   433 
   434 fun str_of_query (parents, feats) =
   435   "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   436 
   437 fun mash_CLEAR ctxt =
   438   let val path = mash_model_dir () |> Path.explode in
   439     trace_msg ctxt (K "MaSh CLEAR");
   440     File.fold_dir (fn file => fn _ =>
   441                       try File.rm (Path.append path (Path.basic file)))
   442                   path NONE;
   443     ()
   444   end
   445 
   446 fun mash_ADD _ _ [] = ()
   447   | mash_ADD ctxt overlord adds =
   448     (trace_msg ctxt (fn () => "MaSh ADD " ^
   449          elide_string 1000 (space_implode " " (map #1 adds)));
   450      run_mash_tool ctxt overlord true 0 (adds, str_of_add) (K ()))
   451 
   452 fun mash_REPROVE _ _ [] = ()
   453   | mash_REPROVE ctxt overlord reps =
   454     (trace_msg ctxt (fn () => "MaSh REPROVE " ^
   455          elide_string 1000 (space_implode " " (map #1 reps)));
   456      run_mash_tool ctxt overlord true 0 (reps, str_of_reprove) (K ()))
   457 
   458 fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   459   (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   460    run_mash_tool ctxt overlord false max_suggs
   461        ([query], str_of_query)
   462        (fn suggs =>
   463            case suggs () of
   464              [] => []
   465            | suggs => snd (extract_query (List.last suggs)))
   466    handle List.Empty => [])
   467 
   468 
   469 (*** High-level communication with MaSh ***)
   470 
   471 fun try_graph ctxt when def f =
   472   f ()
   473   handle Graph.CYCLES (cycle :: _) =>
   474          (trace_msg ctxt (fn () =>
   475               "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   476        | Graph.DUP name =>
   477          (trace_msg ctxt (fn () =>
   478               "Duplicate fact " ^ quote name ^ " when " ^ when); def)
   479        | Graph.UNDEF name =>
   480          (trace_msg ctxt (fn () =>
   481               "Unknown fact " ^ quote name ^ " when " ^ when); def)
   482        | exn =>
   483          if Exn.is_interrupt exn then
   484            reraise exn
   485          else
   486            (trace_msg ctxt (fn () =>
   487                 "Internal error when " ^ when ^ ":\n" ^
   488                 ML_Compiler.exn_message exn); def)
   489 
   490 type mash_state = {fact_G : unit Graph.T}
   491 
   492 val empty_state = {fact_G = Graph.empty}
   493 
   494 local
   495 
   496 val version = "*** MaSh 0.0 ***"
   497 
   498 fun load _ (state as (true, _)) = state
   499   | load ctxt _ =
   500     let val path = mash_state_path () in
   501       (true,
   502        case try File.read_lines path of
   503          SOME (version' :: fact_lines) =>
   504          let
   505            fun add_edge_to name parent =
   506              Graph.default_node (parent, ())
   507              #> Graph.add_edge (parent, name)
   508            fun add_fact_line line =
   509              case extract_query line of
   510                ("", _) => I (* shouldn't happen *)
   511              | (name, parents) =>
   512                Graph.default_node (name, ())
   513                #> fold (add_edge_to name) parents
   514            val fact_G =
   515              try_graph ctxt "loading state" Graph.empty (fn () =>
   516                  Graph.empty |> version' = version
   517                                 ? fold add_fact_line fact_lines)
   518          in {fact_G = fact_G} end
   519        | _ => empty_state)
   520     end
   521 
   522 fun save {fact_G} =
   523   let
   524     val path = mash_state_path ()
   525     fun fact_line_for name parents =
   526       escape_meta name ^ ": " ^ escape_metas parents
   527     val append_fact = File.append path o suffix "\n" oo fact_line_for
   528     fun append_entry (name, ((), (parents, _))) () =
   529       append_fact name (Graph.Keys.dest parents)
   530   in
   531     File.write path (version ^ "\n");
   532     Graph.fold append_entry fact_G ()
   533   end
   534 
   535 val global_state =
   536   Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state)
   537 
   538 in
   539 
   540 fun mash_map ctxt f =
   541   Synchronized.change global_state (load ctxt ##> (f #> tap save))
   542 
   543 fun mash_get ctxt =
   544   Synchronized.change_result global_state (load ctxt #> `snd)
   545 
   546 fun mash_unlearn ctxt =
   547   Synchronized.change global_state (fn _ =>
   548       (mash_CLEAR ctxt; wipe_out (mash_state_path ()); (true, empty_state)))
   549 
   550 end
   551 
   552 fun mash_could_suggest_facts () = mash_home () <> ""
   553 fun mash_can_suggest_facts ctxt = not (Graph.is_empty (#fact_G (mash_get ctxt)))
   554 
   555 fun queue_of xs = Queue.empty |> fold Queue.enqueue xs
   556 
   557 fun max_facts_in_graph fact_G facts =
   558   let
   559     val facts = [] |> fold (cons o nickname_of o snd) facts
   560     val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   561     fun enqueue_new seen name =
   562       not (member (op =) seen name) ? Queue.enqueue name
   563     fun find_maxes seen maxs names =
   564       case try Queue.dequeue names of
   565         NONE => map snd maxs
   566       | SOME (name, names) =>
   567         if Symtab.defined tab name then
   568           let
   569             val new = (Graph.all_preds fact_G [name], name)
   570             fun not_ancestor (_, x) (yp, _) = not (member (op =) yp x)
   571             val maxs = maxs |> filter (fn max => not_ancestor max new)
   572             val maxs = maxs |> forall (not_ancestor new) maxs ? cons new
   573           in find_maxes (name :: seen) maxs names end
   574         else
   575           find_maxes (name :: seen) maxs
   576                      (Graph.Keys.fold (enqueue_new seen)
   577                                       (Graph.imm_preds fact_G name) names)
   578   in find_maxes [] [] (queue_of (Graph.maximals fact_G)) end
   579 
   580 (* Generate more suggestions than requested, because some might be thrown out
   581    later for various reasons and "meshing" gives better results with some
   582    slack. *)
   583 fun max_suggs_of max_facts = max_facts + Int.min (200, max_facts)
   584 
   585 fun is_fact_in_graph fact_G (_, th) =
   586   can (Graph.get_node fact_G) (nickname_of th)
   587 
   588 fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   589                        concl_t facts =
   590   let
   591     val thy = Proof_Context.theory_of ctxt
   592     val fact_G = #fact_G (mash_get ctxt)
   593     val parents = max_facts_in_graph fact_G facts
   594     val feats = features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
   595     val suggs =
   596       if Graph.is_empty fact_G then []
   597       else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   598     val selected = facts |> suggested_facts suggs
   599     val unknown = facts |> filter_out (is_fact_in_graph fact_G)
   600   in (selected, unknown) end
   601 
   602 fun add_to_fact_graph ctxt (name, parents, feats, deps) (adds, graph) =
   603   let
   604     fun maybe_add_from from (accum as (parents, graph)) =
   605       try_graph ctxt "updating graph" accum (fn () =>
   606           (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   607     val graph = graph |> Graph.default_node (name, ())
   608     val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   609     val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   610   in ((name, parents, feats, deps) :: adds, graph) end
   611 
   612 val learn_timeout_slack = 2.0
   613 
   614 fun launch_thread timeout task =
   615   let
   616     val hard_timeout = time_mult learn_timeout_slack timeout
   617     val birth_time = Time.now ()
   618     val death_time = Time.+ (birth_time, hard_timeout)
   619     val desc = ("machine learner for Sledgehammer", "")
   620   in Async_Manager.launch MaShN birth_time death_time desc task end
   621 
   622 fun freshish_name () =
   623   Date.fmt ".%Y_%m_%d_%H_%M_%S__" (Date.fromTimeLocal (Time.now ())) ^
   624   serial_string ()
   625 
   626 fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
   627                      used_ths =
   628   if is_smt_prover ctxt prover then
   629     ()
   630   else
   631     launch_thread timeout (fn () =>
   632         let
   633           val thy = Proof_Context.theory_of ctxt
   634           val name = freshish_name ()
   635           val feats = features_of ctxt prover thy (Local, General) [t]
   636           val deps = used_ths |> map nickname_of
   637           val {fact_G} = mash_get ctxt
   638           val parents = max_facts_in_graph fact_G facts
   639         in
   640           mash_ADD ctxt overlord [(name, parents, feats, deps)]; (true, "")
   641         end)
   642 
   643 fun sendback sub =
   644   Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub)
   645 
   646 val commit_timeout = seconds 30.0
   647 
   648 (* The timeout is understood in a very slack fashion. *)
   649 fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover
   650                      auto_level atp learn_timeout facts =
   651   let
   652     val timer = Timer.startRealTimer ()
   653     fun next_commit_time () =
   654       Time.+ (Timer.checkRealTimer timer, commit_timeout)
   655     val {fact_G} = mash_get ctxt
   656     val (old_facts, new_facts) =
   657       facts |> List.partition (is_fact_in_graph fact_G)
   658             ||> sort (thm_ord o pairself snd)
   659   in
   660     if null new_facts andalso (not atp orelse null old_facts) then
   661       if auto_level < 2 then
   662         "No new " ^ (if atp then "ATP" else "Isar") ^ " proofs to learn." ^
   663         (if auto_level = 0 andalso not atp then
   664            "\n\nHint: Try " ^ sendback learn_atpN ^ " to learn from ATP proofs."
   665          else
   666            "")
   667       else
   668         ""
   669     else
   670       let
   671         val all_names =
   672           facts |> map snd
   673                 |> filter_out is_likely_tautology_or_too_meta
   674                 |> map (rpair () o nickname_of)
   675                 |> Symtab.make
   676         val deps_of =
   677           if atp then
   678             atp_dependencies_of ctxt params prover auto_level facts all_names
   679           else
   680             isar_dependencies_of all_names
   681         fun do_commit [] [] state = state
   682           | do_commit adds reps {fact_G} =
   683             let
   684               val (adds, fact_G) =
   685                 ([], fact_G) |> fold (add_to_fact_graph ctxt) adds
   686             in
   687               mash_ADD ctxt overlord (rev adds);
   688               mash_REPROVE ctxt overlord reps;
   689               {fact_G = fact_G}
   690             end
   691         fun commit last adds reps =
   692           (if debug andalso auto_level = 0 then
   693              Output.urgent_message "Committing..."
   694            else
   695              ();
   696            mash_map ctxt (do_commit (rev adds) reps);
   697            if not last andalso auto_level = 0 then
   698              let val num_proofs = length adds + length reps in
   699                "Learned " ^ string_of_int num_proofs ^ " " ^
   700                (if atp then "ATP" else "Isar") ^ " proof" ^
   701                plural_s num_proofs ^ " in the last " ^
   702                string_from_time commit_timeout ^ "."
   703                |> Output.urgent_message
   704              end
   705            else
   706              ())
   707         fun learn_new_fact _ (accum as (_, (_, _, _, true))) = accum
   708           | learn_new_fact ((_, stature), th)
   709                            (adds, (parents, n, next_commit, _)) =
   710             let
   711               val name = nickname_of th
   712               val feats =
   713                 features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   714               val deps = deps_of th |> these
   715               val n = n |> not (null deps) ? Integer.add 1
   716               val adds = (name, parents, feats, deps) :: adds
   717               val (adds, next_commit) =
   718                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   719                   (commit false adds []; ([], next_commit_time ()))
   720                 else
   721                   (adds, next_commit)
   722               val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   723             in (adds, ([name], n, next_commit, timed_out)) end
   724         val n =
   725           if null new_facts then
   726             0
   727           else
   728             let
   729               val last_th = new_facts |> List.last |> snd
   730               (* crude approximation *)
   731               val ancestors =
   732                 old_facts
   733                 |> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER)
   734               val parents = max_facts_in_graph fact_G ancestors
   735               val (adds, (_, n, _, _)) =
   736                 ([], (parents, 0, next_commit_time (), false))
   737                 |> fold learn_new_fact new_facts
   738             in commit true adds []; n end
   739         fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum
   740           | relearn_old_fact (_, th) (reps, (n, next_commit, _)) =
   741             let
   742               val name = nickname_of th
   743               val (n, reps) =
   744                 case deps_of th of
   745                   SOME deps => (n + 1, (name, deps) :: reps)
   746                 | NONE => (n, reps)
   747               val (reps, next_commit) =
   748                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   749                   (commit false [] reps; ([], next_commit_time ()))
   750                 else
   751                   (reps, next_commit)
   752               val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   753             in (reps, (n, next_commit, timed_out)) end
   754         val n =
   755           if null old_facts then
   756             n
   757           else
   758             let
   759               fun score_of (_, th) =
   760                 random_range 0 (1000 * max_dependencies)
   761                 - 500 * (th |> isar_dependencies_of all_names
   762                             |> Option.map length
   763                             |> the_default max_dependencies)
   764               val old_facts =
   765                 old_facts |> map (`score_of)
   766                           |> sort (int_ord o pairself fst)
   767                           |> map snd
   768               val (reps, (n, _, _)) =
   769                 ([], (n, next_commit_time (), false))
   770                 |> fold relearn_old_fact old_facts
   771             in commit true [] reps; n end
   772       in
   773         if verbose orelse auto_level < 2 then
   774           "Learned " ^ string_of_int n ^ " nontrivial " ^
   775           (if atp then "ATP" else "Isar") ^ " proof" ^ plural_s n ^
   776           (if verbose then
   777              " in " ^ string_from_time (Timer.checkRealTimer timer)
   778            else
   779              "") ^ "."
   780         else
   781           ""
   782       end
   783   end
   784 
   785 fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained
   786                atp =
   787   let
   788     val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   789     val ctxt = ctxt |> Config.put instantiate_inducts false
   790     val facts =
   791       nearly_all_facts ctxt false fact_override Symtab.empty css chained []
   792                        @{prop True}
   793     val num_facts = length facts
   794     val prover = hd provers
   795     fun learn auto_level atp =
   796       mash_learn_facts ctxt params prover auto_level atp infinite_timeout facts
   797       |> Output.urgent_message
   798   in
   799     (if atp then
   800        ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   801         plural_s num_facts ^ " for ATP proofs (" ^ quote prover ^ " timeout: " ^
   802         string_from_time timeout ^ ").\n\nCollecting Isar proofs first..."
   803         |> Output.urgent_message;
   804         learn 1 false;
   805         "Now collecting ATP proofs. This may take several hours. You can \
   806         \safely stop the learning process at any point."
   807         |> Output.urgent_message;
   808         learn 0 true)
   809      else
   810        ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   811         plural_s num_facts ^ " for Isar proofs..."
   812         |> Output.urgent_message;
   813         learn 0 false))
   814   end
   815 
   816 (* The threshold should be large enough so that MaSh doesn't kick in for Auto
   817    Sledgehammer and Try. *)
   818 val min_secs_for_learning = 15
   819 
   820 fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   821         max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   822   if not (subset (op =) (the_list fact_filter, fact_filters)) then
   823     error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   824   else if only then
   825     facts
   826   else if max_facts <= 0 orelse null facts then
   827     []
   828   else
   829     let
   830       fun maybe_learn () =
   831         if learn andalso not (Async_Manager.has_running_threads MaShN) andalso
   832            Time.toSeconds timeout >= min_secs_for_learning then
   833           let val timeout = time_mult learn_timeout_slack timeout in
   834             launch_thread timeout
   835                 (fn () => (true, mash_learn_facts ctxt params prover 2 false
   836                                                   timeout facts))
   837           end
   838         else
   839           ()
   840       val fact_filter =
   841         case fact_filter of
   842           SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   843         | NONE =>
   844           if is_smt_prover ctxt prover then mepoN
   845           else if mash_can_suggest_facts ctxt then (maybe_learn (); meshN)
   846           else if mash_could_suggest_facts () then (maybe_learn (); mepoN)
   847           else mepoN
   848       val add_ths = Attrib.eval_thms ctxt add
   849       fun prepend_facts ths accepts =
   850         ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   851          (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   852         |> take max_facts
   853       fun iter () =
   854         iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
   855                                  concl_t facts
   856       fun mash () =
   857         mash_suggest_facts ctxt params prover max_facts hyp_ts concl_t facts
   858       val mess =
   859         [] |> (if fact_filter <> mashN then cons (iter (), []) else I)
   860            |> (if fact_filter <> mepoN then cons (mash ()) else I)
   861     in
   862       mesh_facts max_facts mess
   863       |> not (null add_ths) ? prepend_facts add_ths
   864     end
   865 
   866 fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   867 fun running_learners () = Async_Manager.running_threads MaShN "learner"
   868 
   869 end;