src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author blanchet
Mon, 23 Jul 2012 15:32:30 +0200
changeset 49453 3e45c98fe127
parent 49451 72a31418ff8d
child 49454 67a6bcbd3587
permissions -rw-r--r--
distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
     1 (*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     2     Author:     Jasmin Blanchette, TU Muenchen
     3 
     4 Sledgehammer's machine-learning-based relevance filter (MaSh).
     5 *)
     6 
     7 signature SLEDGEHAMMER_MASH =
     8 sig
     9   type stature = ATP_Problem_Generate.stature
    10   type fact = Sledgehammer_Fact.fact
    11   type fact_override = Sledgehammer_Fact.fact_override
    12   type params = Sledgehammer_Provers.params
    13   type relevance_fudge = Sledgehammer_Provers.relevance_fudge
    14   type prover_result = Sledgehammer_Provers.prover_result
    15 
    16   val trace : bool Config.T
    17   val MaShN : string
    18   val mepoN : string
    19   val mashN : string
    20   val meshN : string
    21   val unlearnN : string
    22   val learn_isarN : string
    23   val learn_atpN : string
    24   val relearn_isarN : string
    25   val relearn_atpN : string
    26   val fact_filters : string list
    27   val escape_meta : string -> string
    28   val escape_metas : string list -> string
    29   val unescape_meta : string -> string
    30   val unescape_metas : string -> string list
    31   val extract_query : string -> string * (string * real) list
    32   val nickname_of : thm -> string
    33   val suggested_facts :
    34     (string * 'a) list -> ('b * thm) list -> (('b * thm) * 'a) list
    35   val mesh_facts :
    36     int -> ((('a * thm) * real) list * ('a * thm) list) list -> ('a * thm) list
    37   val theory_ord : theory * theory -> order
    38   val thm_ord : thm * thm -> order
    39   val goal_of_thm : theory -> thm -> thm
    40   val run_prover_for_mash :
    41     Proof.context -> params -> string -> fact list -> thm -> prover_result
    42   val features_of :
    43     Proof.context -> string -> theory -> stature -> term list -> string list
    44   val isar_dependencies_of : unit Symtab.table -> thm -> string list option
    45   val atp_dependencies_of :
    46     Proof.context -> params -> string -> int -> fact list -> unit Symtab.table
    47     -> thm -> string list option
    48   val mash_CLEAR : Proof.context -> unit
    49   val mash_ADD :
    50     Proof.context -> bool
    51     -> (string * string list * string list * string list) list -> unit
    52   val mash_REPROVE :
    53     Proof.context -> bool -> (string * string list) list -> unit
    54   val mash_QUERY :
    55     Proof.context -> bool -> int -> string list * string list
    56     -> (string * real) list
    57   val mash_unlearn : Proof.context -> unit
    58   val mash_could_suggest_facts : unit -> bool
    59   val mash_can_suggest_facts : Proof.context -> bool
    60   val mash_suggested_facts :
    61     Proof.context -> params -> string -> int -> term list -> term
    62     -> fact list -> (fact * real) list * fact list
    63   val mash_learn_proof :
    64     Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    65     -> unit
    66   val mash_learn :
    67     Proof.context -> params -> fact_override -> thm list -> bool -> unit
    68   val relevant_facts :
    69     Proof.context -> params -> string -> int -> fact_override -> term list
    70     -> term -> fact list -> fact list
    71   val kill_learners : unit -> unit
    72   val running_learners : unit -> unit
    73 end;
    74 
    75 structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
    76 struct
    77 
    78 open ATP_Util
    79 open ATP_Problem_Generate
    80 open Sledgehammer_Util
    81 open Sledgehammer_Fact
    82 open Sledgehammer_Provers
    83 open Sledgehammer_Minimize
    84 open Sledgehammer_MePo
    85 
    86 val trace =
    87   Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
    88 fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
    89 
    90 val MaShN = "MaSh"
    91 
    92 val mepoN = "mepo"
    93 val mashN = "mash"
    94 val meshN = "mesh"
    95 
    96 val fact_filters = [meshN, mepoN, mashN]
    97 
    98 val unlearnN = "unlearn"
    99 val learn_isarN = "learn_isar"
   100 val learn_atpN = "learn_atp"
   101 val relearn_isarN = "relearn_isar"
   102 val relearn_atpN = "relearn_atp"
   103 
   104 fun mash_home () = getenv "MASH_HOME"
   105 fun mash_model_dir () =
   106   getenv "ISABELLE_HOME_USER" ^ "/mash"
   107   |> tap (Isabelle_System.mkdir o Path.explode)
   108 val mash_state_dir = mash_model_dir
   109 fun mash_state_file () = mash_state_dir () ^ "/state"
   110 
   111 
   112 (*** Isabelle helpers ***)
   113 
   114 fun meta_char c =
   115   if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
   116      c = #")" orelse c = #"," then
   117     String.str c
   118   else
   119     (* fixed width, in case more digits follow *)
   120     "%" ^ stringN_of_int 3 (Char.ord c)
   121 
   122 fun unmeta_chars accum [] = String.implode (rev accum)
   123   | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) =
   124     (case Int.fromString (String.implode [d1, d2, d3]) of
   125        SOME n => unmeta_chars (Char.chr n :: accum) cs
   126      | NONE => "" (* error *))
   127   | unmeta_chars _ (#"%" :: _) = "" (* error *)
   128   | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
   129 
   130 val escape_meta = String.translate meta_char
   131 val escape_metas = map escape_meta #> space_implode " "
   132 val unescape_meta = String.explode #> unmeta_chars []
   133 val unescape_metas =
   134   space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
   135 
   136 fun extract_node line =
   137   case space_explode ":" line of
   138     [name, parents] => (unescape_meta name, unescape_metas parents)
   139   | _ => ("", [])
   140 
   141 fun extract_suggestion sugg =
   142   case space_explode "=" sugg of
   143     [name, weight] =>
   144     SOME (unescape_meta name, Real.fromString weight |> the_default 0.0)
   145   | _ => NONE
   146 
   147 fun extract_query line =
   148   case space_explode ":" line of
   149     [goal, suggs] =>
   150     (unescape_meta goal,
   151      map_filter extract_suggestion (space_explode " " suggs))
   152   | _ => ("", [])
   153 
   154 fun parent_of_local_thm th =
   155   let
   156     val thy = th |> Thm.theory_of_thm
   157     val facts = thy |> Global_Theory.facts_of
   158     val space = facts |> Facts.space_of
   159     fun id_of s = #id (Name_Space.the_entry space s)
   160     fun max_id (s', _) (s, id) =
   161       let val id' = id_of s' in if id > id' then (s, id) else (s', id') end
   162   in ("", ~1) |> Facts.fold_static max_id facts |> fst end
   163 
   164 val local_prefix = "local" ^ Long_Name.separator
   165 
   166 fun nickname_of th =
   167   if Thm.has_name_hint th then
   168     let val hint = Thm.get_name_hint th in
   169       (* FIXME: There must be a better way to detect local facts. *)
   170       case try (unprefix local_prefix) hint of
   171         SOME suf =>
   172         parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suf
   173       | NONE => hint
   174     end
   175   else
   176     backquote_thm th
   177 
   178 fun suggested_facts suggs facts =
   179   let
   180     fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact)
   181     val tab = Symtab.empty |> fold add_fact facts
   182     fun find_sugg (name, weight) =
   183       Symtab.lookup tab name |> Option.map (rpair weight)
   184   in map_filter find_sugg suggs end
   185 
   186 fun sum_avg [] = 0
   187   | sum_avg xs =
   188     Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs
   189 
   190 fun normalize_scores [] = []
   191   | normalize_scores ((fact, score) :: tail) =
   192     (fact, 1.0) :: map (apsnd (curry Real.* (1.0 / score))) tail
   193 
   194 fun mesh_facts max_facts [(sels, unks)] =
   195     map fst (take max_facts sels) @ take (max_facts - length sels) unks
   196   | mesh_facts max_facts mess =
   197     let
   198       val mess = mess |> map (apfst (normalize_scores #> `length))
   199       val fact_eq = Thm.eq_thm o pairself snd
   200       fun score_at sels = try (nth sels) #> Option.map snd
   201       fun score_in fact ((sel_len, sels), unks) =
   202         case find_index (curry fact_eq fact o fst) sels of
   203           ~1 => (case find_index (curry fact_eq fact) unks of
   204                    ~1 => score_at sels sel_len
   205                  | _ => NONE)
   206         | rank => score_at sels rank
   207       fun weight_of fact = mess |> map_filter (score_in fact) |> sum_avg
   208       val facts =
   209         fold (union fact_eq o map fst o take max_facts o snd o fst) mess []
   210     in
   211       facts |> map (`weight_of) |> sort (int_ord o swap o pairself fst)
   212             |> map snd |> take max_facts
   213     end
   214 
   215 val thy_feature_name_of = prefix "y"
   216 val const_name_of = prefix "c"
   217 val type_name_of = prefix "t"
   218 val class_name_of = prefix "s"
   219 
   220 fun theory_ord p =
   221   if Theory.eq_thy p then
   222     EQUAL
   223   else if Theory.subthy p then
   224     LESS
   225   else if Theory.subthy (swap p) then
   226     GREATER
   227   else case int_ord (pairself (length o Theory.ancestors_of) p) of
   228     EQUAL => string_ord (pairself Context.theory_name p)
   229   | order => order
   230 
   231 val thm_ord = theory_ord o pairself theory_of_thm
   232 
   233 val freezeT = Type.legacy_freeze_type
   234 
   235 fun freeze (t $ u) = freeze t $ freeze u
   236   | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
   237   | freeze (Var ((s, _), T)) = Free (s, freezeT T)
   238   | freeze (Const (s, T)) = Const (s, freezeT T)
   239   | freeze (Free (s, T)) = Free (s, freezeT T)
   240   | freeze t = t
   241 
   242 fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
   243 
   244 fun run_prover_for_mash ctxt params prover facts goal =
   245   let
   246     val problem =
   247       {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
   248        facts = facts |> map (apfst (apfst (fn name => name ())))
   249                      |> map Untranslated_Fact}
   250   in
   251     get_minimizing_prover ctxt MaSh (K (K ())) prover params (K (K (K "")))
   252                           problem
   253   end
   254 
   255 val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
   256 
   257 val logical_consts =
   258   [@{const_name prop}, @{const_name Pure.conjunction}] @ atp_logical_consts
   259 
   260 fun interesting_terms_types_and_classes ctxt prover term_max_depth
   261                                         type_max_depth ts =
   262   let
   263     fun is_bad_const (x as (s, _)) args =
   264       member (op =) logical_consts s orelse
   265       fst (is_built_in_const_for_prover ctxt prover x args)
   266     fun add_classes @{sort type} = I
   267       | add_classes S = union (op =) (map class_name_of S)
   268     fun do_add_type (Type (s, Ts)) =
   269         (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
   270         #> fold do_add_type Ts
   271       | do_add_type (TFree (_, S)) = add_classes S
   272       | do_add_type (TVar (_, S)) = add_classes S
   273     fun add_type T = type_max_depth >= 0 ? do_add_type T
   274     fun mk_app s args =
   275       if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
   276       else s
   277     fun patternify ~1 _ = ""
   278       | patternify depth t =
   279         case strip_comb t of
   280           (Const (x as (s, _)), args) =>
   281           if is_bad_const x args then ""
   282           else mk_app (const_name_of s) (map (patternify (depth - 1)) args)
   283         | _ => ""
   284     fun add_pattern depth t =
   285       case patternify depth t of "" => I | s => insert (op =) s
   286     fun add_term_patterns ~1 _ = I
   287       | add_term_patterns depth t =
   288         add_pattern depth t #> add_term_patterns (depth - 1) t
   289     val add_term = add_term_patterns term_max_depth
   290     fun add_patterns t =
   291       let val (head, args) = strip_comb t in
   292         (case head of
   293            Const (_, T) => add_term t #> add_type T
   294          | Free (_, T) => add_type T
   295          | Var (_, T) => add_type T
   296          | Abs (_, T, body) => add_type T #> add_patterns body
   297          | _ => I)
   298         #> fold add_patterns args
   299       end
   300   in [] |> fold add_patterns ts end
   301 
   302 fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})
   303 
   304 val term_max_depth = 1
   305 val type_max_depth = 1
   306 
   307 (* TODO: Generate type classes for types? *)
   308 fun features_of ctxt prover thy (scope, status) ts =
   309   thy_feature_name_of (Context.theory_name thy) ::
   310   interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
   311                                       ts
   312   |> forall is_lambda_free ts ? cons "no_lams"
   313   |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
   314   |> scope <> Global ? cons "local"
   315   |> (case string_of_status status of "" => I | s => cons s)
   316 
   317 (* Too many dependencies is a sign that a decision procedure is at work. There
   318    isn't much too learn from such proofs. *)
   319 val max_dependencies = 15
   320 val atp_dependency_default_max_fact = 50
   321 
   322 (* "type_definition_xxx" facts are characterized by their use of "CollectI". *)
   323 val typedef_sig = [@{thm CollectI} |> nickname_of]
   324 
   325 (* "Rep_xxx_inject", "Abs_xxx_inverse", etc., are derived using these facts. *)
   326 val typedef_ths =
   327   @{thms type_definition.Abs_inverse type_definition.Rep_inverse
   328          type_definition.Rep type_definition.Rep_inject
   329          type_definition.Abs_inject type_definition.Rep_cases
   330          type_definition.Abs_cases type_definition.Rep_induct
   331          type_definition.Abs_induct type_definition.Rep_range
   332          type_definition.Abs_image}
   333   |> map nickname_of
   334 
   335 fun trim_dependencies deps =
   336   if length deps > max_dependencies orelse deps = typedef_sig orelse
   337      exists (member (op =) typedef_ths) deps then
   338     NONE
   339   else
   340     SOME deps
   341 
   342 fun isar_dependencies_of all_names =
   343   thms_in_proof (SOME all_names) #> trim_dependencies
   344 
   345 fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover
   346                         auto_level facts all_names th =
   347   case isar_dependencies_of all_names th of
   348     SOME [] => NONE
   349   | isar_deps =>
   350     let
   351       val thy = Proof_Context.theory_of ctxt
   352       val goal = goal_of_thm thy th
   353       val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1
   354       val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS)
   355       fun fix_name ((_, stature), th) = ((fn () => nickname_of th, stature), th)
   356       fun is_dep dep (_, th) = nickname_of th = dep
   357       fun add_isar_dep facts dep accum =
   358         if exists (is_dep dep) accum then
   359           accum
   360         else case find_first (is_dep dep) facts of
   361           SOME ((name, status), th) => accum @ [((name, status), th)]
   362         | NONE => accum (* shouldn't happen *)
   363       val facts =
   364         facts |> mepo_suggested_facts ctxt params prover
   365                      (max_facts |> the_default atp_dependency_default_max_fact)
   366                      NONE hyp_ts concl_t
   367               |> fold (add_isar_dep facts) (these isar_deps)
   368               |> map fix_name
   369     in
   370       if verbose andalso auto_level = 0 then
   371         let val num_facts = length facts in
   372           "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^
   373           " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^
   374           "."
   375           |> Output.urgent_message
   376         end
   377       else
   378         ();
   379       case run_prover_for_mash ctxt params prover facts goal of
   380         {outcome = NONE, used_facts, ...} =>
   381         (if verbose andalso auto_level = 0 then
   382            let val num_facts = length used_facts in
   383              "Found proof with " ^ string_of_int num_facts ^ " fact" ^
   384              plural_s num_facts ^ "."
   385              |> Output.urgent_message
   386            end
   387          else
   388            ();
   389          used_facts |> map fst |> trim_dependencies)
   390       | _ => NONE
   391     end
   392 
   393 
   394 (*** Low-level communication with MaSh ***)
   395 
   396 (* more friendly than "try o File.rm" for those who keep the files open in their
   397    text editor *)
   398 fun wipe_out_file file = File.write (Path.explode file) ""
   399 
   400 fun write_file heading (xs, f) file =
   401   let val path = Path.explode file in
   402     File.write path heading;
   403     xs |> chunk_list 500
   404        |> List.app (File.append path o space_implode "" o map f)
   405   end
   406 
   407 fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs =
   408   let
   409     val (temp_dir, serial) =
   410       if overlord then (getenv "ISABELLE_HOME_USER", "")
   411       else (getenv "ISABELLE_TMP", serial_string ())
   412     val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null"
   413     val err_file = temp_dir ^ "/mash_err" ^ serial
   414     val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
   415     val cmd_file = temp_dir ^ "/mash_commands" ^ serial
   416     val core =
   417       "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
   418       " --numberOfPredictions " ^ string_of_int max_suggs ^
   419       (if save then " --saveModel" else "")
   420     val command =
   421       mash_home () ^ "/mash --quiet --outputDir " ^ mash_model_dir () ^
   422       " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file
   423   in
   424     write_file "" ([], K "") sugg_file;
   425     write_file "" write_cmds cmd_file;
   426     trace_msg ctxt (fn () => "Running " ^ command);
   427     Isabelle_System.bash command;
   428     read_suggs (fn () => try File.read_lines (Path.explode sugg_file) |> these)
   429     |> tap (fn _ => trace_msg ctxt (fn () =>
   430            case try File.read (Path.explode err_file) of
   431              NONE => "Done"
   432            | SOME "" => "Done"
   433            | SOME s => "Error: " ^ elide_string 1000 s))
   434     |> not overlord
   435        ? tap (fn _ => List.app wipe_out_file [err_file, sugg_file, cmd_file])
   436   end
   437 
   438 fun str_of_add (name, parents, feats, deps) =
   439   "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   440   escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   441 
   442 fun str_of_reprove (name, deps) =
   443   "p " ^ escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
   444 
   445 fun str_of_query (parents, feats) =
   446   "? " ^ escape_metas parents ^ "; " ^ escape_metas feats ^ "\n"
   447 
   448 fun mash_CLEAR ctxt =
   449   let val path = mash_model_dir () |> Path.explode in
   450     trace_msg ctxt (K "MaSh CLEAR");
   451     File.fold_dir (fn file => fn _ =>
   452                       try File.rm (Path.append path (Path.basic file)))
   453                   path NONE;
   454     ()
   455   end
   456 
   457 fun mash_ADD _ _ [] = ()
   458   | mash_ADD ctxt overlord adds =
   459     (trace_msg ctxt (fn () => "MaSh ADD " ^
   460          elide_string 1000 (space_implode " " (map #1 adds)));
   461      run_mash_tool ctxt overlord true 0 (adds, str_of_add) (K ()))
   462 
   463 fun mash_REPROVE _ _ [] = ()
   464   | mash_REPROVE ctxt overlord reps =
   465     (trace_msg ctxt (fn () => "MaSh REPROVE " ^
   466          elide_string 1000 (space_implode " " (map #1 reps)));
   467      run_mash_tool ctxt overlord true 0 (reps, str_of_reprove) (K ()))
   468 
   469 fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   470   (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   471    run_mash_tool ctxt overlord false max_suggs
   472        ([query], str_of_query)
   473        (fn suggs =>
   474            case suggs () of
   475              [] => []
   476            | suggs => snd (extract_query (List.last suggs)))
   477    handle List.Empty => [])
   478 
   479 
   480 (*** High-level communication with MaSh ***)
   481 
   482 fun try_graph ctxt when def f =
   483   f ()
   484   handle Graph.CYCLES (cycle :: _) =>
   485          (trace_msg ctxt (fn () =>
   486               "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
   487        | Graph.DUP name =>
   488          (trace_msg ctxt (fn () =>
   489               "Duplicate fact " ^ quote name ^ " when " ^ when); def)
   490        | Graph.UNDEF name =>
   491          (trace_msg ctxt (fn () =>
   492               "Unknown fact " ^ quote name ^ " when " ^ when); def)
   493        | exn =>
   494          if Exn.is_interrupt exn then
   495            reraise exn
   496          else
   497            (trace_msg ctxt (fn () =>
   498                 "Internal error when " ^ when ^ ":\n" ^
   499                 ML_Compiler.exn_message exn); def)
   500 
   501 fun graph_info G =
   502   string_of_int (length (Graph.keys G)) ^ " node(s), " ^
   503   string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^
   504   " edge(s), " ^
   505   string_of_int (length (Graph.minimals G)) ^ " minimal, " ^
   506   string_of_int (length (Graph.maximals G)) ^ " maximal"
   507 
   508 type mash_state = {fact_G : unit Graph.T}
   509 
   510 val empty_state = {fact_G = Graph.empty}
   511 
   512 local
   513 
   514 val version = "*** MaSh 0.0 ***"
   515 
   516 fun load _ (state as (true, _)) = state
   517   | load ctxt _ =
   518     let val path = mash_state_file () |> Path.explode in
   519       (true,
   520        case try File.read_lines path of
   521          SOME (version' :: node_lines) =>
   522          let
   523            fun add_edge_to name parent =
   524              Graph.default_node (parent, ()) #> Graph.add_edge (parent, name)
   525            fun add_node line =
   526              case extract_node line of
   527                ("", _) => I (* shouldn't happen *)
   528              | (name, parents) =>
   529                Graph.default_node (name, ()) #> fold (add_edge_to name) parents
   530            val fact_G =
   531              try_graph ctxt "loading state" Graph.empty (fn () =>
   532                  Graph.empty |> version' = version ? fold add_node node_lines)
   533          in
   534            trace_msg ctxt (fn () =>
   535                "Loaded fact graph (" ^ graph_info fact_G ^ ")");
   536            {fact_G = fact_G}
   537          end
   538        | _ => empty_state)
   539     end
   540 
   541 fun save ctxt {fact_G} =
   542   let
   543     fun str_of_entry (name, parents) =
   544       escape_meta name ^ ": " ^ escape_metas parents ^ "\n"
   545     fun append_entry (name, ((), (parents, _))) =
   546       cons (name, Graph.Keys.dest parents)
   547     val entries = [] |> Graph.fold append_entry fact_G
   548   in
   549     write_file (version ^ "\n") (entries, str_of_entry) (mash_state_file ());
   550     trace_msg ctxt (fn () => "Saved fact graph (" ^ graph_info fact_G ^ ")")
   551   end
   552 
   553 val global_state =
   554   Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state)
   555 
   556 in
   557 
   558 fun mash_map ctxt f =
   559   Synchronized.change global_state (load ctxt ##> (f #> tap (save ctxt)))
   560 
   561 fun mash_peek ctxt f =
   562   Synchronized.change_result global_state (load ctxt #> `snd #>> f)
   563 
   564 fun mash_get ctxt =
   565   Synchronized.change_result global_state (load ctxt #> `snd)
   566 
   567 fun mash_unlearn ctxt =
   568   Synchronized.change global_state (fn _ =>
   569       (mash_CLEAR ctxt;
   570        wipe_out_file (mash_state_file ());
   571        (true, empty_state)))
   572 
   573 end
   574 
   575 fun mash_could_suggest_facts () = mash_home () <> ""
   576 fun mash_can_suggest_facts ctxt = not (Graph.is_empty (#fact_G (mash_get ctxt)))
   577 
   578 fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
   579 
   580 fun maximal_in_graph fact_G facts =
   581   let
   582     val facts = [] |> fold (cons o nickname_of o snd) facts
   583     val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) facts
   584     fun insert_new seen name =
   585       not (Symtab.defined seen name) ? insert (op =) name
   586     fun find_maxes _ (maxs, []) = map snd maxs
   587       | find_maxes seen (maxs, new :: news) =
   588         find_maxes
   589             (seen |> num_keys (Graph.imm_succs fact_G new) > 1
   590                      ? Symtab.default (new, ()))
   591             (if Symtab.defined tab new then
   592                let
   593                  val newp = Graph.all_preds fact_G [new]
   594                  fun is_ancestor x yp = member (op =) yp x
   595                  val maxs =
   596                    maxs |> filter (fn (_, max) => not (is_ancestor max newp))
   597                in
   598                  if exists (is_ancestor new o fst) maxs then
   599                    (maxs, news)
   600                  else
   601                    ((newp, new)
   602                     :: filter_out (fn (_, max) => is_ancestor max newp) maxs,
   603                     news)
   604                end
   605              else
   606                (maxs, Graph.Keys.fold (insert_new seen)
   607                                       (Graph.imm_preds fact_G new) news))
   608   in find_maxes Symtab.empty ([], Graph.maximals fact_G) end
   609 
   610 (* Generate more suggestions than requested, because some might be thrown out
   611    later for various reasons and "meshing" gives better results with some
   612    slack. *)
   613 fun max_suggs_of max_facts = max_facts + Int.min (50, max_facts)
   614 
   615 fun is_fact_in_graph fact_G (_, th) =
   616   can (Graph.get_node fact_G) (nickname_of th)
   617 
   618 fun interleave [] ys = ys
   619   | interleave xs [] = xs
   620   | interleave (x :: xs) (y :: ys) = x :: y :: interleave xs ys
   621 
   622 fun mash_suggested_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   623                          concl_t facts =
   624   let
   625     val thy = Proof_Context.theory_of ctxt
   626     val (fact_G, suggs) =
   627       mash_peek ctxt (fn {fact_G} =>
   628           if Graph.is_empty fact_G then
   629             (fact_G, [])
   630           else
   631             let
   632               val parents = maximal_in_graph fact_G facts
   633               val feats =
   634                 features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts)
   635             in
   636               (fact_G, mash_QUERY ctxt overlord (max_suggs_of max_facts)
   637                                   (parents, feats))
   638             end)
   639     val sels =
   640       facts |> suggested_facts suggs
   641             (* The weights currently returned by "mash.py" are too extreme to
   642                make any sense. *)
   643             |> map fst
   644     val (unk_global, unk_local) =
   645       facts |> filter_out (is_fact_in_graph fact_G)
   646             |> List.partition (fn ((_, (loc, _)), _) => loc = Global)
   647   in (interleave unk_local sels |> weight_mepo_facts, unk_global) end
   648 
   649 fun add_to_fact_graph ctxt (name, parents, feats, deps) (adds, graph) =
   650   let
   651     fun maybe_add_from from (accum as (parents, graph)) =
   652       try_graph ctxt "updating graph" accum (fn () =>
   653           (from :: parents, Graph.add_edge_acyclic (from, name) graph))
   654     val graph = graph |> Graph.default_node (name, ())
   655     val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   656     val (deps, _) = ([], graph) |> fold maybe_add_from deps
   657   in ((name, parents, feats, deps) :: adds, graph) end
   658 
   659 val learn_timeout_slack = 2.0
   660 
   661 fun launch_thread timeout task =
   662   let
   663     val hard_timeout = time_mult learn_timeout_slack timeout
   664     val birth_time = Time.now ()
   665     val death_time = Time.+ (birth_time, hard_timeout)
   666     val desc = ("machine learner for Sledgehammer", "")
   667   in Async_Manager.launch MaShN birth_time death_time desc task end
   668 
   669 fun freshish_name () =
   670   Date.fmt ".%Y_%m_%d_%H_%M_%S__" (Date.fromTimeLocal (Time.now ())) ^
   671   serial_string ()
   672 
   673 fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts
   674                      used_ths =
   675   if is_smt_prover ctxt prover then
   676     ()
   677   else
   678     launch_thread timeout (fn () =>
   679         let
   680           val thy = Proof_Context.theory_of ctxt
   681           val name = freshish_name ()
   682           val feats = features_of ctxt prover thy (Local, General) [t]
   683           val deps = used_ths |> map nickname_of
   684         in
   685           mash_peek ctxt (fn {fact_G} =>
   686               let val parents = maximal_in_graph fact_G facts in
   687                 mash_ADD ctxt overlord [(name, parents, feats, deps)]
   688               end);
   689           (true, "")
   690         end)
   691 
   692 fun sendback sub =
   693   Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub)
   694 
   695 val commit_timeout = seconds 30.0
   696 
   697 (* The timeout is understood in a very slack fashion. *)
   698 fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover
   699                      auto_level atp learn_timeout facts =
   700   let
   701     val timer = Timer.startRealTimer ()
   702     fun next_commit_time () =
   703       Time.+ (Timer.checkRealTimer timer, commit_timeout)
   704     val {fact_G} = mash_get ctxt
   705     val (old_facts, new_facts) =
   706       facts |> List.partition (is_fact_in_graph fact_G)
   707             ||> sort (thm_ord o pairself snd)
   708   in
   709     if null new_facts andalso (not atp orelse null old_facts) then
   710       if auto_level < 2 then
   711         "No new " ^ (if atp then "ATP" else "Isar") ^ " proofs to learn." ^
   712         (if auto_level = 0 andalso not atp then
   713            "\n\nHint: Try " ^ sendback learn_atpN ^ " to learn from ATP proofs."
   714          else
   715            "")
   716       else
   717         ""
   718     else
   719       let
   720         val all_names =
   721           facts |> map snd |> map (rpair () o nickname_of) |> Symtab.make
   722         val deps_of =
   723           if atp then
   724             atp_dependencies_of ctxt params prover auto_level facts all_names
   725           else
   726             isar_dependencies_of all_names
   727         fun do_commit [] [] state = state
   728           | do_commit adds reps {fact_G} =
   729             let
   730               val (adds, fact_G) =
   731                 ([], fact_G) |> fold (add_to_fact_graph ctxt) adds
   732             in
   733               mash_ADD ctxt overlord (rev adds);
   734               mash_REPROVE ctxt overlord reps;
   735               {fact_G = fact_G}
   736             end
   737         fun commit last adds reps =
   738           (if debug andalso auto_level = 0 then
   739              Output.urgent_message "Committing..."
   740            else
   741              ();
   742            mash_map ctxt (do_commit (rev adds) reps);
   743            if not last andalso auto_level = 0 then
   744              let val num_proofs = length adds + length reps in
   745                "Learned " ^ string_of_int num_proofs ^ " " ^
   746                (if atp then "ATP" else "Isar") ^ " proof" ^
   747                plural_s num_proofs ^ " in the last " ^
   748                string_from_time commit_timeout ^ "."
   749                |> Output.urgent_message
   750              end
   751            else
   752              ())
   753         fun learn_new_fact _ (accum as (_, (_, _, _, true))) = accum
   754           | learn_new_fact ((_, stature), th)
   755                            (adds, (parents, n, next_commit, _)) =
   756             let
   757               val name = nickname_of th
   758               val feats =
   759                 features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   760               val deps = deps_of th |> these
   761               val n = n |> not (null deps) ? Integer.add 1
   762               val adds = (name, parents, feats, deps) :: adds
   763               val (adds, next_commit) =
   764                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   765                   (commit false adds []; ([], next_commit_time ()))
   766                 else
   767                   (adds, next_commit)
   768               val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   769             in (adds, ([name], n, next_commit, timed_out)) end
   770         val n =
   771           if null new_facts then
   772             0
   773           else
   774             let
   775               val last_th = new_facts |> List.last |> snd
   776               (* crude approximation *)
   777               val ancestors =
   778                 old_facts
   779                 |> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER)
   780               val parents = maximal_in_graph fact_G ancestors
   781               val (adds, (_, n, _, _)) =
   782                 ([], (parents, 0, next_commit_time (), false))
   783                 |> fold learn_new_fact new_facts
   784             in commit true adds []; n end
   785         fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum
   786           | relearn_old_fact (_, th) (reps, (n, next_commit, _)) =
   787             let
   788               val name = nickname_of th
   789               val (n, reps) =
   790                 case deps_of th of
   791                   SOME deps => (n + 1, (name, deps) :: reps)
   792                 | NONE => (n, reps)
   793               val (reps, next_commit) =
   794                 if Time.> (Timer.checkRealTimer timer, next_commit) then
   795                   (commit false [] reps; ([], next_commit_time ()))
   796                 else
   797                   (reps, next_commit)
   798               val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   799             in (reps, (n, next_commit, timed_out)) end
   800         val n =
   801           if not atp orelse null old_facts then
   802             n
   803           else
   804             let
   805               fun priority_of (_, th) =
   806                 random_range 0 (1000 * max_dependencies)
   807                 - 500 * (th |> isar_dependencies_of all_names
   808                             |> Option.map length
   809                             |> the_default max_dependencies)
   810               val old_facts =
   811                 old_facts |> map (`priority_of)
   812                           |> sort (int_ord o pairself fst)
   813                           |> map snd
   814               val (reps, (n, _, _)) =
   815                 ([], (n, next_commit_time (), false))
   816                 |> fold relearn_old_fact old_facts
   817             in commit true [] reps; n end
   818       in
   819         if verbose orelse auto_level < 2 then
   820           "Learned " ^ string_of_int n ^ " nontrivial " ^
   821           (if atp then "ATP" else "Isar") ^ " proof" ^ plural_s n ^
   822           (if verbose then
   823              " in " ^ string_from_time (Timer.checkRealTimer timer)
   824            else
   825              "") ^ "."
   826         else
   827           ""
   828       end
   829   end
   830 
   831 fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained
   832                atp =
   833   let
   834     val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   835     val ctxt = ctxt |> Config.put instantiate_inducts false
   836     val facts =
   837       nearly_all_facts ctxt false fact_override Symtab.empty css chained []
   838                        @{prop True}
   839     val num_facts = length facts
   840     val prover = hd provers
   841     fun learn auto_level atp =
   842       mash_learn_facts ctxt params prover auto_level atp infinite_timeout facts
   843       |> Output.urgent_message
   844   in
   845     (if atp then
   846        ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   847         plural_s num_facts ^ " for ATP proofs (" ^ quote prover ^ " timeout: " ^
   848         string_from_time timeout ^ ").\n\nCollecting Isar proofs first..."
   849         |> Output.urgent_message;
   850         learn 1 false;
   851         "Now collecting ATP proofs. This may take several hours. You can \
   852         \safely stop the learning process at any point."
   853         |> Output.urgent_message;
   854         learn 0 true)
   855      else
   856        ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   857         plural_s num_facts ^ " for Isar proofs..."
   858         |> Output.urgent_message;
   859         learn 0 false))
   860   end
   861 
   862 (* The threshold should be large enough so that MaSh doesn't kick in for Auto
   863    Sledgehammer and Try. *)
   864 val min_secs_for_learning = 15
   865 
   866 fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover
   867         max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
   868   if not (subset (op =) (the_list fact_filter, fact_filters)) then
   869     error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
   870   else if only then
   871     facts
   872   else if max_facts <= 0 orelse null facts then
   873     []
   874   else
   875     let
   876       fun maybe_learn () =
   877         if learn andalso not (Async_Manager.has_running_threads MaShN) andalso
   878            Time.toSeconds timeout >= min_secs_for_learning then
   879           let val timeout = time_mult learn_timeout_slack timeout in
   880             launch_thread timeout
   881                 (fn () => (true, mash_learn_facts ctxt params prover 2 false
   882                                                   timeout facts))
   883           end
   884         else
   885           ()
   886       val fact_filter =
   887         case fact_filter of
   888           SOME ff => (() |> ff <> mepoN ? maybe_learn; ff)
   889         | NONE =>
   890           if is_smt_prover ctxt prover then
   891             mepoN
   892           else if mash_could_suggest_facts () then
   893             (maybe_learn ();
   894              if mash_can_suggest_facts ctxt then meshN else mepoN)
   895           else
   896             mepoN
   897       val add_ths = Attrib.eval_thms ctxt add
   898       fun prepend_facts ths accepts =
   899         ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
   900          (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
   901         |> take max_facts
   902       fun mepo () =
   903         facts |> mepo_suggested_facts ctxt params prover max_facts NONE hyp_ts
   904                                       concl_t
   905               |> weight_mepo_facts
   906       fun mash () =
   907         mash_suggested_facts ctxt params prover max_facts hyp_ts concl_t facts
   908       val mess =
   909         [] |> (if fact_filter <> mashN then cons (mepo (), []) else I)
   910            |> (if fact_filter <> mepoN then cons (mash ()) else I)
   911     in
   912       mesh_facts max_facts mess
   913       |> not (null add_ths) ? prepend_facts add_ths
   914     end
   915 
   916 fun kill_learners () = Async_Manager.kill_threads MaShN "learner"
   917 fun running_learners () = Async_Manager.running_threads MaShN "learner"
   918 
   919 end;