relearn ATP proofs
authorblanchet
Fri, 20 Jul 2012 22:19:46 +0200
changeset 494190a261b4aa093
parent 49418 1f214c653c80
child 49420 7682bc885e8a
relearn ATP proofs
src/HOL/TPTP/mash_eval.ML
src/HOL/TPTP/mash_export.ML
src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
     1.1 --- a/src/HOL/TPTP/mash_eval.ML	Fri Jul 20 22:19:46 2012 +0200
     1.2 +++ b/src/HOL/TPTP/mash_eval.ML	Fri Jul 20 22:19:46 2012 +0200
     1.3 @@ -75,7 +75,7 @@
     1.4            | NONE => error ("No fact called \"" ^ name ^ "\"")
     1.5          val goal = goal_of_thm thy th
     1.6          val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1
     1.7 -        val isar_deps = isar_dependencies_of all_names th
     1.8 +        val isar_deps = isar_dependencies_of all_names th |> these
     1.9          val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS)
    1.10          val mepo_facts =
    1.11            Sledgehammer_MePo.iterative_relevant_facts ctxt params prover
     2.1 --- a/src/HOL/TPTP/mash_export.ML	Fri Jul 20 22:19:46 2012 +0200
     2.2 +++ b/src/HOL/TPTP/mash_export.ML	Fri Jul 20 22:19:46 2012 +0200
     2.3 @@ -102,7 +102,7 @@
     2.4      fun do_thm th =
     2.5        let
     2.6          val name = nickname_of th
     2.7 -        val deps = isar_dependencies_of all_names th
     2.8 +        val deps = isar_dependencies_of all_names th |> these
     2.9          val s = escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
    2.10        in File.append path s end
    2.11    in List.app do_thm ths end
    2.12 @@ -122,7 +122,9 @@
    2.13        let
    2.14          val name = nickname_of th
    2.15          val deps =
    2.16 -          atp_dependencies_of ctxt params prover false facts all_names th
    2.17 +          case atp_dependencies_of ctxt params prover 0 facts all_names th of
    2.18 +            SOME deps => deps
    2.19 +          | NONE => isar_dependencies_of all_names th |> these
    2.20          val s = escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
    2.21        in File.append path s end
    2.22    in List.app do_thm ths end
    2.23 @@ -142,7 +144,7 @@
    2.24        let
    2.25          val name = nickname_of th
    2.26          val feats = features_of ctxt prover thy stature [prop_of th]
    2.27 -        val deps = isar_dependencies_of all_names th
    2.28 +        val deps = isar_dependencies_of all_names th |> these
    2.29          val kind = Thm.legacy_get_kind th
    2.30          val core = escape_metas prevs ^ "; " ^ escape_metas feats
    2.31          val query = if kind <> "" then "? " ^ core ^ "\n" else ""
     3.1 --- a/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Fri Jul 20 22:19:46 2012 +0200
     3.2 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML	Fri Jul 20 22:19:46 2012 +0200
     3.3 @@ -41,14 +41,16 @@
     3.4      Proof.context -> params -> string -> fact list -> thm -> prover_result
     3.5    val features_of :
     3.6      Proof.context -> string -> theory -> stature -> term list -> string list
     3.7 -  val isar_dependencies_of : unit Symtab.table -> thm -> string list
     3.8 +  val isar_dependencies_of : unit Symtab.table -> thm -> string list option
     3.9    val atp_dependencies_of :
    3.10 -    Proof.context -> params -> string -> bool -> fact list -> unit Symtab.table
    3.11 -    -> thm -> string list
    3.12 +    Proof.context -> params -> string -> int -> fact list -> unit Symtab.table
    3.13 +    -> thm -> string list option
    3.14    val mash_CLEAR : Proof.context -> unit
    3.15    val mash_ADD :
    3.16      Proof.context -> bool
    3.17      -> (string * string list * string list * string list) list -> unit
    3.18 +  val mash_REPROVE :
    3.19 +    Proof.context -> bool -> (string * string list) list -> unit
    3.20    val mash_QUERY :
    3.21      Proof.context -> bool -> int -> string list * string list -> string list
    3.22    val mash_unlearn : Proof.context -> unit
    3.23 @@ -60,9 +62,6 @@
    3.24    val mash_learn_proof :
    3.25      Proof.context -> params -> string -> term -> ('a * thm) list -> thm list
    3.26      -> unit
    3.27 -  val mash_learn_facts :
    3.28 -    Proof.context -> params -> string -> bool -> bool -> Time.time -> fact list
    3.29 -    -> string
    3.30    val mash_learn :
    3.31      Proof.context -> params -> fact_override -> thm list -> bool -> unit
    3.32    val relevant_facts :
    3.33 @@ -320,14 +319,21 @@
    3.34        | Simp => cons "simp"
    3.35        | Def => cons "def")
    3.36  
    3.37 -fun isar_dependencies_of all_facts = thms_in_proof (SOME all_facts)
    3.38 +(* Too many dependencies is a sign that a decision procedure is at work. There
    3.39 +   isn't much too learn from such proofs. *)
    3.40 +val max_dependencies = 10
    3.41 +val atp_dependency_default_max_fact = 50
    3.42  
    3.43 -val atp_dep_default_max_fact = 50
    3.44 +fun trim_dependencies deps =
    3.45 +  if length deps <= max_dependencies then SOME deps else NONE
    3.46  
    3.47 -fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto
    3.48 -                        facts all_names th =
    3.49 +fun isar_dependencies_of all_facts =
    3.50 +  thms_in_proof (SOME all_facts) #> trim_dependencies
    3.51 +
    3.52 +fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover
    3.53 +                        auto_level facts all_names th =
    3.54    case isar_dependencies_of all_names th of
    3.55 -    [] => []
    3.56 +    SOME [] => NONE
    3.57    | isar_deps =>
    3.58      let
    3.59        val thy = Proof_Context.theory_of ctxt
    3.60 @@ -344,12 +350,12 @@
    3.61          | NONE => accum (* shouldn't happen *)
    3.62        val facts =
    3.63          facts |> iterative_relevant_facts ctxt params prover
    3.64 -                     (max_facts |> the_default atp_dep_default_max_fact) NONE
    3.65 -                     hyp_ts concl_t
    3.66 -              |> fold (add_isar_dep facts) isar_deps
    3.67 +                     (max_facts |> the_default atp_dependency_default_max_fact)
    3.68 +                     NONE hyp_ts concl_t
    3.69 +              |> fold (add_isar_dep facts) (these isar_deps)
    3.70                |> map fix_name
    3.71      in
    3.72 -      if verbose andalso not auto then
    3.73 +      if verbose andalso auto_level = 0 then
    3.74          let val num_facts = length facts in
    3.75            "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^
    3.76            " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^
    3.77 @@ -360,7 +366,7 @@
    3.78          ();
    3.79        case run_prover_for_mash ctxt params prover facts goal of
    3.80          {outcome = NONE, used_facts, ...} =>
    3.81 -        (if verbose andalso not auto then
    3.82 +        (if verbose andalso auto_level = 0 then
    3.83             let val num_facts = length used_facts in
    3.84               "Found proof with " ^ string_of_int num_facts ^ " fact" ^
    3.85               plural_s num_facts ^ "."
    3.86 @@ -368,8 +374,8 @@
    3.87             end
    3.88           else
    3.89             ();
    3.90 -         used_facts |> map fst)
    3.91 -      | _ => isar_deps
    3.92 +         used_facts |> map fst |> trim_dependencies)
    3.93 +      | _ => NONE
    3.94      end
    3.95  
    3.96  
    3.97 @@ -418,10 +424,13 @@
    3.98                                 [err_file, sugg_file, cmd_file])
    3.99    end
   3.100  
   3.101 -fun str_of_update (name, parents, feats, deps) =
   3.102 +fun str_of_add (name, parents, feats, deps) =
   3.103    "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^
   3.104    escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"
   3.105  
   3.106 +fun str_of_reprove (name, deps) =
   3.107 +  "p " ^ escape_meta name ^ ": " ^ escape_metas deps ^ "\n"
   3.108 +
   3.109  fun str_of_query (parents, feats) =
   3.110    "? " ^ escape_metas parents ^ "; " ^ escape_metas feats
   3.111  
   3.112 @@ -435,10 +444,16 @@
   3.113    end
   3.114  
   3.115  fun mash_ADD _ _ [] = ()
   3.116 -  | mash_ADD ctxt overlord upds =
   3.117 +  | mash_ADD ctxt overlord adds =
   3.118      (trace_msg ctxt (fn () => "MaSh ADD " ^
   3.119 -         elide_string 1000 (space_implode " " (map #1 upds)));
   3.120 -     run_mash_tool ctxt overlord true 0 (upds, str_of_update) (K ()))
   3.121 +         elide_string 1000 (space_implode " " (map #1 adds)));
   3.122 +     run_mash_tool ctxt overlord true 0 (adds, str_of_add) (K ()))
   3.123 +
   3.124 +fun mash_REPROVE _ _ [] = ()
   3.125 +  | mash_REPROVE ctxt overlord reps =
   3.126 +    (trace_msg ctxt (fn () => "MaSh REPROVE " ^
   3.127 +         elide_string 1000 (space_implode " " (map #1 reps)));
   3.128 +     run_mash_tool ctxt overlord true 0 (reps, str_of_reprove) (K ()))
   3.129  
   3.130  fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) =
   3.131    (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats);
   3.132 @@ -584,7 +599,7 @@
   3.133      val unknown = facts |> filter_out (is_fact_in_graph fact_G)
   3.134    in (selected, unknown) end
   3.135  
   3.136 -fun update_fact_graph ctxt (name, parents, feats, deps) (upds, graph) =
   3.137 +fun add_to_fact_graph ctxt (name, parents, feats, deps) (adds, graph) =
   3.138    let
   3.139      fun maybe_add_from from (accum as (parents, graph)) =
   3.140        try_graph ctxt "updating graph" accum (fn () =>
   3.141 @@ -592,7 +607,7 @@
   3.142      val graph = graph |> Graph.default_node (name, ())
   3.143      val (parents, graph) = ([], graph) |> fold maybe_add_from parents
   3.144      val (deps, graph) = ([], graph) |> fold maybe_add_from deps
   3.145 -  in ((name, parents, feats, deps) :: upds, graph) end
   3.146 +  in ((name, parents, feats, deps) :: adds, graph) end
   3.147  
   3.148  val learn_timeout_slack = 2.0
   3.149  
   3.150 @@ -628,14 +643,11 @@
   3.151  fun sendback sub =
   3.152    Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub)
   3.153  
   3.154 -(* Too many dependencies is a sign that a decision procedure is at work. There
   3.155 -   isn't much too learn from such proofs. *)
   3.156 -val max_dependencies = 10
   3.157  val commit_timeout = seconds 30.0
   3.158  
   3.159  (* The timeout is understood in a very slack fashion. *)
   3.160 -fun mash_learn_facts ctxt (params as {debug, verbose, overlord, timeout, ...})
   3.161 -                     prover auto atp learn_timeout facts =
   3.162 +fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover
   3.163 +                     auto_level atp learn_timeout facts =
   3.164    let
   3.165      val timer = Timer.startRealTimer ()
   3.166      fun next_commit_time () =
   3.167 @@ -644,86 +656,123 @@
   3.168      val (old_facts, new_facts) =
   3.169        facts |> List.partition (is_fact_in_graph fact_G)
   3.170              ||> sort (thm_ord o pairself snd)
   3.171 -    val num_new_facts = length new_facts
   3.172    in
   3.173 -    (if not auto then
   3.174 -       "MaShing" ^
   3.175 -       (if not auto then
   3.176 -          " " ^ string_of_int num_new_facts ^ " fact" ^
   3.177 -          plural_s num_new_facts ^
   3.178 -          (if atp then " (ATP timeout: " ^ string_from_time timeout ^ ")"
   3.179 -           else "")
   3.180 -        else
   3.181 -          "") ^ "..."
   3.182 -     else
   3.183 -       "")
   3.184 -    |> Output.urgent_message;
   3.185 -    if num_new_facts = 0 then
   3.186 -      if not auto then
   3.187 -        "Nothing to learn.\n\nHint: Try " ^ sendback relearn_isarN ^ " or " ^
   3.188 -        sendback relearn_atpN ^ " to learn from scratch."
   3.189 +    if null new_facts andalso (not atp orelse null old_facts) then
   3.190 +      if auto_level < 2 then
   3.191 +        "No new " ^ (if atp then "ATP" else "Isar") ^ " proofs to learn." ^
   3.192 +        (if auto_level = 0 andalso not atp then
   3.193 +           "\n\nHint: Try " ^ sendback learn_atpN ^ " to learn from ATP proofs."
   3.194 +         else
   3.195 +           "")
   3.196        else
   3.197          ""
   3.198      else
   3.199        let
   3.200 -        val last_th = new_facts |> List.last |> snd
   3.201 -        (* crude approximation *)
   3.202 -        val ancestors =
   3.203 -          old_facts |> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER)
   3.204          val all_names =
   3.205            facts |> map snd
   3.206                  |> filter_out is_likely_tautology_or_too_meta
   3.207                  |> map (rpair () o nickname_of)
   3.208                  |> Symtab.make
   3.209 -        fun do_commit [] state = state
   3.210 -          | do_commit upds {fact_G} =
   3.211 +        val deps_of =
   3.212 +          if atp then
   3.213 +            atp_dependencies_of ctxt params prover auto_level facts all_names
   3.214 +          else
   3.215 +            isar_dependencies_of all_names
   3.216 +        fun do_commit [] [] state = state
   3.217 +          | do_commit adds reps {fact_G} =
   3.218              let
   3.219 -              val (upds, fact_G) =
   3.220 -                ([], fact_G) |> fold (update_fact_graph ctxt) upds
   3.221 -            in mash_ADD ctxt overlord (rev upds); {fact_G = fact_G} end
   3.222 -        fun trim_deps deps = if length deps > max_dependencies then [] else deps
   3.223 -        fun commit last upds =
   3.224 -          (if debug andalso not auto then Output.urgent_message "Committing..."
   3.225 -           else ();
   3.226 -           mash_map ctxt (do_commit (rev upds));
   3.227 -           if not last andalso not auto then
   3.228 -             let val num_upds = length upds in
   3.229 -               "Processed " ^ string_of_int num_upds ^ " fact" ^
   3.230 -               plural_s num_upds ^ " in the last " ^
   3.231 +              val (adds, fact_G) =
   3.232 +                ([], fact_G) |> fold (add_to_fact_graph ctxt) adds
   3.233 +            in
   3.234 +              mash_ADD ctxt overlord (rev adds);
   3.235 +              mash_REPROVE ctxt overlord reps;
   3.236 +              {fact_G = fact_G}
   3.237 +            end
   3.238 +        fun commit last adds reps =
   3.239 +          (if debug andalso auto_level = 0 then
   3.240 +             Output.urgent_message "Committing..."
   3.241 +           else
   3.242 +             ();
   3.243 +           mash_map ctxt (do_commit (rev adds) reps);
   3.244 +           if not last andalso auto_level = 0 then
   3.245 +             let val num_proofs = length adds + length reps in
   3.246 +               "Learned " ^ string_of_int num_proofs ^ " " ^
   3.247 +               (if atp then "ATP" else "Isar") ^ " proof" ^
   3.248 +               plural_s num_proofs ^ " in the last " ^
   3.249                 string_from_time commit_timeout ^ "."
   3.250                 |> Output.urgent_message
   3.251               end
   3.252             else
   3.253               ())
   3.254 -        fun do_fact _ (accum as (_, (_, _, _, true))) = accum
   3.255 -          | do_fact ((_, stature), th)
   3.256 -                    (upds, (parents, n, next_commit, false)) =
   3.257 +        fun learn_new_fact _ (accum as (_, (_, _, _, true))) = accum
   3.258 +          | learn_new_fact ((_, stature), th)
   3.259 +                           (adds, (parents, n, next_commit, _)) =
   3.260              let
   3.261                val name = nickname_of th
   3.262                val feats =
   3.263                  features_of ctxt prover (theory_of_thm th) stature [prop_of th]
   3.264 -              val deps =
   3.265 -                (if atp then atp_dependencies_of ctxt params prover auto facts
   3.266 -                 else isar_dependencies_of) all_names th
   3.267 -                |> trim_deps
   3.268 +              val deps = deps_of th |> these
   3.269                val n = n |> not (null deps) ? Integer.add 1
   3.270 -              val upds = (name, parents, feats, deps) :: upds
   3.271 -              val (upds, next_commit) =
   3.272 +              val adds = (name, parents, feats, deps) :: adds
   3.273 +              val (adds, next_commit) =
   3.274                  if Time.> (Timer.checkRealTimer timer, next_commit) then
   3.275 -                  (commit false upds; ([], next_commit_time ()))
   3.276 +                  (commit false adds []; ([], next_commit_time ()))
   3.277                  else
   3.278 -                  (upds, next_commit)
   3.279 -              val timed_out =
   3.280 -                Time.> (Timer.checkRealTimer timer, learn_timeout)
   3.281 -            in (upds, ([name], n, next_commit, timed_out)) end
   3.282 -        val parents = max_facts_in_graph fact_G ancestors
   3.283 -        val (upds, (_, n, _, _)) =
   3.284 -          ([], (parents, 0, next_commit_time (), false))
   3.285 -          |> fold do_fact new_facts
   3.286 +                  (adds, next_commit)
   3.287 +              val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   3.288 +            in (adds, ([name], n, next_commit, timed_out)) end
   3.289 +        val n =
   3.290 +          if null new_facts then
   3.291 +            0
   3.292 +          else
   3.293 +            let
   3.294 +              val last_th = new_facts |> List.last |> snd
   3.295 +              (* crude approximation *)
   3.296 +              val ancestors =
   3.297 +                old_facts
   3.298 +                |> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER)
   3.299 +              val parents = max_facts_in_graph fact_G ancestors
   3.300 +              val (adds, (_, n, _, _)) =
   3.301 +                ([], (parents, 0, next_commit_time (), false))
   3.302 +                |> fold learn_new_fact new_facts
   3.303 +            in commit true adds []; n end
   3.304 +        fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum
   3.305 +          | relearn_old_fact (_, th) (reps, (n, next_commit, _)) =
   3.306 +            let
   3.307 +              val name = nickname_of th
   3.308 +              val (n, reps) =
   3.309 +                case deps_of th of
   3.310 +                  SOME deps => (n + 1, (name, deps) :: reps)
   3.311 +                | NONE => (n, reps)
   3.312 +              val (reps, next_commit) =
   3.313 +                if Time.> (Timer.checkRealTimer timer, next_commit) then
   3.314 +                  (commit false [] reps; ([], next_commit_time ()))
   3.315 +                else
   3.316 +                  (reps, next_commit)
   3.317 +              val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
   3.318 +            in (reps, (n, next_commit, timed_out)) end
   3.319 +        val n =
   3.320 +          if null old_facts then
   3.321 +            n
   3.322 +          else
   3.323 +            let
   3.324 +              fun score_of (_, th) =
   3.325 +                random_range 0 (1000 * max_dependencies)
   3.326 +                - 500 * (th |> isar_dependencies_of all_names
   3.327 +                            |> Option.map length
   3.328 +                            |> the_default max_dependencies)
   3.329 +              val old_facts =
   3.330 +                old_facts |> map (`score_of)
   3.331 +                          |> sort (int_ord o pairself fst)
   3.332 +                          |> map snd
   3.333 +              val (reps, (n, _, _)) =
   3.334 +                ([], (n, next_commit_time (), false))
   3.335 +                |> fold relearn_old_fact old_facts
   3.336 +            in commit true [] reps; n end
   3.337        in
   3.338 -        commit true upds;
   3.339 -        if verbose orelse not auto then
   3.340 -          "Learned " ^ string_of_int n ^ " nontrivial proof" ^ plural_s n ^
   3.341 +        if verbose orelse auto_level < 2 then
   3.342 +          "Learned " ^ string_of_int n ^ " nontrivial " ^
   3.343 +          (if atp then "ATP" else "Isar") ^ " proof" ^ plural_s n ^
   3.344            (if verbose then
   3.345               " in " ^ string_from_time (Timer.checkRealTimer timer)
   3.346             else
   3.347 @@ -733,16 +782,35 @@
   3.348        end
   3.349    end
   3.350  
   3.351 -fun mash_learn ctxt (params as {provers, ...}) fact_override chained atp =
   3.352 +fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained
   3.353 +               atp =
   3.354    let
   3.355      val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
   3.356      val ctxt = ctxt |> Config.put instantiate_inducts false
   3.357      val facts =
   3.358        nearly_all_facts ctxt false fact_override Symtab.empty css chained []
   3.359                         @{prop True}
   3.360 +    val num_facts = length facts
   3.361 +    val prover = hd provers
   3.362 +    fun learn auto_level atp =
   3.363 +      mash_learn_facts ctxt params prover auto_level atp infinite_timeout facts
   3.364 +      |> Output.urgent_message
   3.365    in
   3.366 -     mash_learn_facts ctxt params (hd provers) false atp infinite_timeout facts
   3.367 -     |> Output.urgent_message
   3.368 +    (if atp then
   3.369 +       ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   3.370 +        plural_s num_facts ^ " for ATP proofs (" ^ quote prover ^ " timeout: " ^
   3.371 +        string_from_time timeout ^ ").\n\nCollecting Isar proofs first..."
   3.372 +        |> Output.urgent_message;
   3.373 +        learn 1 false;
   3.374 +        "Now collecting ATP proofs. This may take several hours. You can \
   3.375 +        \safely stop the learning process at any point."
   3.376 +        |> Output.urgent_message;
   3.377 +        learn 0 true)
   3.378 +     else
   3.379 +       ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
   3.380 +        plural_s num_facts ^ " for Isar proofs..."
   3.381 +        |> Output.urgent_message;
   3.382 +        learn 0 false))
   3.383    end
   3.384  
   3.385  (* The threshold should be large enough so that MaSh doesn't kick in for Auto
   3.386 @@ -764,7 +832,7 @@
   3.387             Time.toSeconds timeout >= min_secs_for_learning then
   3.388            let val timeout = time_mult learn_timeout_slack timeout in
   3.389              launch_thread timeout
   3.390 -                (fn () => (true, mash_learn_facts ctxt params prover true false
   3.391 +                (fn () => (true, mash_learn_facts ctxt params prover 2 false
   3.392                                                    timeout facts))
   3.393            end
   3.394          else