optimize parent computation in MaSh + remove temporary files
authorblanchet
Wed, 18 Jul 2012 08:44:05 +0200
changeset 49347271a4a6af734
parent 49346 f190a6dbb29b
child 49348 2250197977dc
optimize parent computation in MaSh + remove temporary files
src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML
src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML
src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML
     1.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML	Wed Jul 18 08:44:04 2012 +0200
     1.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact.ML	Wed Jul 18 08:44:05 2012 +0200
     1.3 @@ -439,9 +439,9 @@
     1.4             all_facts ctxt ho_atp reserved add chained_ths css_table
     1.5             |> filter_out (member Thm.eq_thm_prop del o snd)
     1.6             |> maybe_filter_no_atps ctxt
     1.7 +           |> uniquify
     1.8           end)
     1.9        |> maybe_instantiate_inducts ctxt hyp_ts concl_t
    1.10 -      |> uniquify
    1.11      end
    1.12  
    1.13  end;
     2.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML	Wed Jul 18 08:44:04 2012 +0200
     2.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML	Wed Jul 18 08:44:05 2012 +0200
     2.3 @@ -37,7 +37,7 @@
     2.4    val goal_of_thm : theory -> thm -> thm
     2.5    val run_prover_for_mash :
     2.6      Proof.context -> params -> string -> fact list -> thm -> prover_result
     2.7 -  val mash_RESET : Proof.context -> unit
     2.8 +  val mash_CLEAR : Proof.context -> unit
     2.9    val mash_INIT :
    2.10      Proof.context -> bool
    2.11      -> (string * string list * string list * string list) list -> unit
    2.12 @@ -46,7 +46,7 @@
    2.13      -> (string * string list * string list * string list) list -> unit
    2.14    val mash_QUERY :
    2.15      Proof.context -> bool -> int -> string list * string list -> string list
    2.16 -  val mash_reset : Proof.context -> unit
    2.17 +  val mash_unlearn : Proof.context -> unit
    2.18    val mash_could_suggest_facts : unit -> bool
    2.19    val mash_can_suggest_facts : Proof.context -> bool
    2.20    val mash_suggest_facts :
    2.21 @@ -245,8 +245,8 @@
    2.22    thy_feature_name_of (Context.theory_name thy) ::
    2.23    interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth
    2.24                                        ts
    2.25 -  |> exists (not o is_lambda_free) ts ? cons "lambdas"
    2.26 -  |> exists (exists_Const is_exists) ts ? cons "skolems"
    2.27 +  |> forall is_lambda_free ts ? cons "no_lams"
    2.28 +  |> forall (not o exists_Const is_exists) ts ? cons "no_skos"
    2.29    |> (case status of
    2.30          General => I
    2.31        | Induction => cons "induction"
    2.32 @@ -292,7 +292,7 @@
    2.33    if overlord then (getenv "ISABELLE_HOME_USER", "")
    2.34    else (getenv "ISABELLE_TMP", serial_string ())
    2.35  
    2.36 -fun run_mash ctxt (temp_dir, serial) core =
    2.37 +fun run_mash ctxt overlord (temp_dir, serial) core =
    2.38    let
    2.39      val log_file = temp_dir ^ "/mash_log" ^ serial
    2.40      val err_file = temp_dir ^ "/mash_err" ^ serial
    2.41 @@ -303,19 +303,27 @@
    2.42      trace_msg ctxt (fn () => "Running " ^ command);
    2.43      write_file ([], K "") log_file;
    2.44      write_file ([], K "") err_file;
    2.45 -    Isabelle_System.bash command; ()
    2.46 +    Isabelle_System.bash command;
    2.47 +    if overlord then ()
    2.48 +    else (map (File.rm o Path.explode) [log_file, err_file]; ());
    2.49 +    trace_msg ctxt (K "Done")
    2.50    end
    2.51  
    2.52 +(* TODO: Eliminate code once "mash.py" handles sequences of ADD commands as fast
    2.53 +   as a single INIT. *)
    2.54  fun run_mash_init ctxt overlord write_access write_feats write_deps =
    2.55    let
    2.56      val info as (temp_dir, serial) = mash_info overlord
    2.57      val in_dir = temp_dir ^ "/mash_init" ^ serial
    2.58 -                 |> tap (Isabelle_System.mkdir o Path.explode)
    2.59 +    val in_dir_path = in_dir |> Path.explode |> tap Isabelle_System.mkdir
    2.60    in
    2.61      write_file write_access (in_dir ^ "/mash_accessibility");
    2.62      write_file write_feats (in_dir ^ "/mash_features");
    2.63      write_file write_deps (in_dir ^ "/mash_dependencies");
    2.64 -    run_mash ctxt info ("--init --inputDir " ^ in_dir)
    2.65 +    run_mash ctxt overlord info ("--init --inputDir " ^ in_dir);
    2.66 +    (* FIXME: temporary hack *)
    2.67 +    if overlord then ()
    2.68 +    else (Isabelle_System.bash ("rm -r -f " ^ File.shell_path in_dir_path); ())
    2.69    end
    2.70  
    2.71  fun run_mash_commands ctxt overlord save max_suggs write_cmds read_suggs =
    2.72 @@ -326,11 +334,14 @@
    2.73    in
    2.74      write_file ([], K "") sugg_file;
    2.75      write_file write_cmds cmd_file;
    2.76 -    run_mash ctxt info
    2.77 +    run_mash ctxt overlord info
    2.78               ("--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^
    2.79                " --numberOfPredictions " ^ string_of_int max_suggs ^
    2.80                (if save then " --saveModel" else ""));
    2.81      read_suggs (fn () => File.read_lines (Path.explode sugg_file))
    2.82 +    |> tap (fn _ =>
    2.83 +               if overlord then ()
    2.84 +               else (map (File.rm o Path.explode) [sugg_file, cmd_file]; ()))
    2.85    end
    2.86  
    2.87  fun str_of_update (name, parents, feats, deps) =
    2.88 @@ -343,15 +354,15 @@
    2.89  fun init_str_of_update get (upd as (name, _, _, _)) =
    2.90    escape_meta name ^ ": " ^ escape_metas (get upd) ^ "\n"
    2.91  
    2.92 -fun mash_RESET ctxt =
    2.93 +fun mash_CLEAR ctxt =
    2.94    let val path = mash_state_dir () |> Path.explode in
    2.95 -    trace_msg ctxt (K "MaSh RESET");
    2.96 +    trace_msg ctxt (K "MaSh CLEAR");
    2.97      File.fold_dir (fn file => fn () =>
    2.98                        File.rm (Path.append path (Path.basic file)))
    2.99                    path ()
   2.100    end
   2.101  
   2.102 -fun mash_INIT ctxt _ [] = mash_RESET ctxt
   2.103 +fun mash_INIT ctxt _ [] = mash_CLEAR ctxt
   2.104    | mash_INIT ctxt overlord upds =
   2.105      (trace_msg ctxt (fn () => "MaSh INIT " ^
   2.106           elide_string 1000 (space_implode " " (map #1 upds)));
   2.107 @@ -444,9 +455,9 @@
   2.108  fun mash_get ctxt =
   2.109    Synchronized.change_result global_state (mash_load ctxt #> `snd)
   2.110  
   2.111 -fun mash_reset ctxt =
   2.112 +fun mash_unlearn ctxt =
   2.113    Synchronized.change global_state (fn _ =>
   2.114 -      (mash_RESET ctxt; File.write (mash_state_path ()) "";
   2.115 +      (mash_CLEAR ctxt; File.write (mash_state_path ()) "";
   2.116         (true, empty_state)))
   2.117  
   2.118  end
   2.119 @@ -455,29 +466,20 @@
   2.120  fun mash_can_suggest_facts ctxt =
   2.121    not (Graph.is_empty (#fact_graph (mash_get ctxt)))
   2.122  
   2.123 -fun parents_wrt_facts ctxt facts fact_graph =
   2.124 +fun parents_wrt_facts facts fact_graph =
   2.125    let
   2.126      val facts = [] |> fold (cons o Thm.get_name_hint o snd) facts
   2.127      val tab = Symtab.empty |> fold (fn name => Symtab.update (name, ())) facts
   2.128 -    val maxs = Graph.maximals fact_graph
   2.129 -  in
   2.130 -    if forall (Symtab.defined tab) maxs then
   2.131 -      maxs
   2.132 -    else
   2.133 -      let
   2.134 -        val graph_facts = Symtab.make (map (rpair ()) (Graph.keys fact_graph))
   2.135 -        val ancestors =
   2.136 -          try_graph ctxt "when computing ancestor facts" [] (fn () =>
   2.137 -              facts |> filter (Symtab.defined graph_facts)
   2.138 -                    |> Graph.all_preds fact_graph)
   2.139 -        val ancestors =
   2.140 -          Symtab.empty |> fold (fn name => Symtab.update (name, ())) ancestors
   2.141 -      in
   2.142 -        try_graph ctxt "when computing parent facts" [] (fn () =>
   2.143 -            fact_graph |> Graph.restrict (Symtab.defined ancestors)
   2.144 -                       |> Graph.maximals)
   2.145 -      end
   2.146 -  end
   2.147 +    fun insert_not_parent parents name =
   2.148 +      not (member (op =) parents name) ? insert (op =) name
   2.149 +    fun parents_of parents [] = parents
   2.150 +      | parents_of parents (name :: names) =
   2.151 +        if Symtab.defined tab name then
   2.152 +          parents_of (name :: parents) names
   2.153 +        else
   2.154 +          parents_of parents (Graph.Keys.fold (insert_not_parent parents)
   2.155 +                                  (Graph.imm_preds fact_graph name) names)
   2.156 +  in parents_of [] (Graph.maximals fact_graph) end
   2.157  
   2.158  (* Generate more suggestions than requested, because some might be thrown out
   2.159     later for various reasons and "meshing" gives better results with some
   2.160 @@ -490,23 +492,15 @@
   2.161  fun mash_suggest_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts
   2.162                         concl_t facts =
   2.163    let
   2.164 -val timer = Timer.startRealTimer ()
   2.165      val thy = Proof_Context.theory_of ctxt
   2.166 -val _ = tracing ("A1: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.167      val fact_graph = #fact_graph (mash_get ctxt)
   2.168 -val _ = tracing ("A2: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.169 -    val parents = parents_wrt_facts ctxt facts fact_graph
   2.170 -val _ = tracing ("A3: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.171 +    val parents = parents_wrt_facts facts fact_graph
   2.172      val feats = features_of ctxt prover thy General (concl_t :: hyp_ts)
   2.173 -val _ = tracing ("A4: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.174      val suggs =
   2.175        if Graph.is_empty fact_graph then []
   2.176        else mash_QUERY ctxt overlord (max_suggs_of max_facts) (parents, feats)
   2.177 -val _ = tracing ("A5: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.178      val selected = facts |> suggested_facts suggs
   2.179 -val _ = tracing ("A6: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.180      val unknown = facts |> filter_out (is_fact_in_graph fact_graph)
   2.181 -val _ = tracing ("END: " ^ PolyML.makestring (Timer.checkRealTimer timer)) (*###*)
   2.182    in (selected, unknown) end
   2.183  
   2.184  fun add_thys_for thy =
   2.185 @@ -526,6 +520,10 @@
   2.186  
   2.187  val pass1_learn_timeout_factor = 0.5
   2.188  
   2.189 +(* Too many dependencies is a sign that a decision procedure is at work. There
   2.190 +   isn't much too learn from such proofs. *)
   2.191 +val max_dependencies = 10
   2.192 +
   2.193  (* The timeout is understood in a very slack fashion. *)
   2.194  fun mash_learn_thy ctxt ({provers, verbose, overlord, ...} : params) thy timeout
   2.195                     facts =
   2.196 @@ -548,15 +546,17 @@
   2.197            ths |> filter_out is_likely_tautology_or_too_meta
   2.198                |> map (rpair () o Thm.get_name_hint)
   2.199                |> Symtab.make
   2.200 +        fun trim_deps deps = if length deps > max_dependencies then [] else deps
   2.201          fun do_fact _ (accum as (_, true)) = accum
   2.202            | do_fact ((_, (_, status)), th) ((parents, upds), false) =
   2.203              let
   2.204                val name = Thm.get_name_hint th
   2.205 -              val feats = features_of ctxt prover thy status [prop_of th]
   2.206 -              val deps = isabelle_dependencies_of all_names th
   2.207 +              val feats =
   2.208 +                features_of ctxt prover (theory_of_thm th) status [prop_of th]
   2.209 +              val deps = isabelle_dependencies_of all_names th |> trim_deps
   2.210                val upd = (name, parents, feats, deps)
   2.211              in (([name], upd :: upds), timed_out pass1_learn_timeout_factor) end
   2.212 -        val parents = parents_wrt_facts ctxt facts fact_graph
   2.213 +        val parents = parents_wrt_facts facts fact_graph
   2.214          val ((_, upds), _) =
   2.215            ((parents, []), false) |> fold do_fact new_facts |>> apsnd rev
   2.216          val n = length upds
   2.217 @@ -594,7 +594,7 @@
   2.218    in
   2.219      mash_map ctxt (fn {thys, fact_graph} =>
   2.220          let
   2.221 -          val parents = parents_wrt_facts ctxt facts fact_graph
   2.222 +          val parents = parents_wrt_facts facts fact_graph
   2.223            val upds = [(name, parents, feats, deps)]
   2.224            val (upds, fact_graph) =
   2.225              ([], fact_graph) |> fold (update_fact_graph ctxt) upds
     3.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML	Wed Jul 18 08:44:04 2012 +0200
     3.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML	Wed Jul 18 08:44:05 2012 +0200
     3.3 @@ -388,7 +388,7 @@
     3.4      else if subcommand = running_learnersN then
     3.5        running_learners ()
     3.6      else if subcommand = unlearnN then
     3.7 -      mash_reset ctxt
     3.8 +      mash_unlearn ctxt
     3.9      else if subcommand = refresh_tptpN then
    3.10        refresh_systems_on_tptp ()
    3.11      else