blanchet@49395: (* Title: HOL/Tools/Sledgehammer/sledgehammer_mash.ML blanchet@49263: Author: Jasmin Blanchette, TU Muenchen blanchet@49263: blanchet@49263: Sledgehammer's machine-learning-based relevance filter (MaSh). blanchet@49263: *) blanchet@49263: blanchet@49396: signature SLEDGEHAMMER_MASH = blanchet@49263: sig blanchet@49266: type stature = ATP_Problem_Generate.stature blanchet@49311: type fact = Sledgehammer_Fact.fact blanchet@49311: type fact_override = Sledgehammer_Fact.fact_override blanchet@49266: type params = Sledgehammer_Provers.params blanchet@49303: type relevance_fudge = Sledgehammer_Provers.relevance_fudge blanchet@49266: type prover_result = Sledgehammer_Provers.prover_result blanchet@49266: blanchet@49323: val trace : bool Config.T blanchet@49334: val MaShN : string blanchet@49394: val mepoN : string blanchet@49394: val mashN : string blanchet@49329: val meshN : string blanchet@49407: val unlearnN : string blanchet@49407: val learn_isarN : string blanchet@49407: val learn_atpN : string blanchet@49407: val relearn_isarN : string blanchet@49407: val relearn_atpN : string blanchet@49329: val fact_filters : string list blanchet@49318: val escape_meta : string -> string blanchet@49318: val escape_metas : string list -> string blanchet@49323: val unescape_meta : string -> string blanchet@49323: val unescape_metas : string -> string list blanchet@49421: val extract_query : string -> string * (string * real) list blanchet@49393: val nickname_of : thm -> string blanchet@49421: val suggested_facts : blanchet@49421: (string * 'a) list -> ('b * thm) list -> (('b * thm) * 'a) list blanchet@49336: val mesh_facts : blanchet@49421: int -> ((('a * thm) * real) list * ('a * thm) list) list -> ('a * thm) list blanchet@49266: val theory_ord : theory * theory -> order blanchet@49266: val thm_ord : thm * thm -> order blanchet@49266: val goal_of_thm : theory -> thm -> thm blanchet@49336: val run_prover_for_mash : blanchet@49333: Proof.context -> params -> string -> fact list -> thm -> prover_result blanchet@49407: val features_of : blanchet@49407: Proof.context -> string -> theory -> stature -> term list -> string list blanchet@49419: val isar_dependencies_of : unit Symtab.table -> thm -> string list option blanchet@49407: val atp_dependencies_of : blanchet@49419: Proof.context -> params -> string -> int -> fact list -> unit Symtab.table blanchet@49419: -> thm -> string list option blanchet@49347: val mash_CLEAR : Proof.context -> unit blanchet@49323: val mash_ADD : blanchet@49331: Proof.context -> bool blanchet@49331: -> (string * string list * string list * string list) list -> unit blanchet@49419: val mash_REPROVE : blanchet@49419: Proof.context -> bool -> (string * string list) list -> unit blanchet@49331: val mash_QUERY : blanchet@49421: Proof.context -> bool -> int -> string list * string list blanchet@49421: -> (string * real) list blanchet@49347: val mash_unlearn : Proof.context -> unit blanchet@49333: val mash_could_suggest_facts : unit -> bool blanchet@49336: val mash_can_suggest_facts : Proof.context -> bool blanchet@49421: val mash_suggested_facts : blanchet@49336: Proof.context -> params -> string -> int -> term list -> term blanchet@49421: -> ('a * thm) list -> (('a * thm) * real) list * ('a * thm) list blanchet@49398: val mash_learn_proof : blanchet@49399: Proof.context -> params -> string -> term -> ('a * thm) list -> thm list blanchet@49399: -> unit blanchet@49410: val mash_learn : blanchet@49410: Proof.context -> params -> fact_override -> thm list -> bool -> unit blanchet@49303: val relevant_facts : blanchet@49307: Proof.context -> params -> string -> int -> fact_override -> term list blanchet@49311: -> term -> fact list -> fact list blanchet@49334: val kill_learners : unit -> unit blanchet@49334: val running_learners : unit -> unit blanchet@49263: end; blanchet@49263: blanchet@49396: structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH = blanchet@49263: struct blanchet@49264: blanchet@49266: open ATP_Util blanchet@49266: open ATP_Problem_Generate blanchet@49266: open Sledgehammer_Util blanchet@49266: open Sledgehammer_Fact blanchet@49266: open Sledgehammer_Provers blanchet@49333: open Sledgehammer_Minimize blanchet@49396: open Sledgehammer_MePo blanchet@49266: blanchet@49323: val trace = blanchet@49395: Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false) blanchet@49323: fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else () blanchet@49323: blanchet@49334: val MaShN = "MaSh" blanchet@49334: blanchet@49394: val mepoN = "mepo" blanchet@49394: val mashN = "mash" blanchet@49329: val meshN = "mesh" blanchet@49329: blanchet@49394: val fact_filters = [meshN, mepoN, mashN] blanchet@49329: blanchet@49407: val unlearnN = "unlearn" blanchet@49407: val learn_isarN = "learn_isar" blanchet@49407: val learn_atpN = "learn_atp" blanchet@49407: val relearn_isarN = "relearn_isar" blanchet@49407: val relearn_atpN = "relearn_atp" blanchet@49407: blanchet@49329: fun mash_home () = getenv "MASH_HOME" blanchet@49409: fun mash_model_dir () = blanchet@49324: getenv "ISABELLE_HOME_USER" ^ "/mash" blanchet@49331: |> tap (Isabelle_System.mkdir o Path.explode) blanchet@49409: val mash_state_dir = mash_model_dir blanchet@49329: fun mash_state_path () = mash_state_dir () ^ "/state" |> Path.explode blanchet@49266: blanchet@49345: blanchet@49266: (*** Isabelle helpers ***) blanchet@49266: blanchet@49323: fun meta_char c = blanchet@49266: if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse blanchet@49266: c = #")" orelse c = #"," then blanchet@49266: String.str c blanchet@49266: else blanchet@49266: (* fixed width, in case more digits follow *) blanchet@49410: "%" ^ stringN_of_int 3 (Char.ord c) blanchet@49266: blanchet@49323: fun unmeta_chars accum [] = String.implode (rev accum) blanchet@49410: | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) = blanchet@49323: (case Int.fromString (String.implode [d1, d2, d3]) of blanchet@49323: SOME n => unmeta_chars (Char.chr n :: accum) cs blanchet@49323: | NONE => "" (* error *)) blanchet@49410: | unmeta_chars _ (#"%" :: _) = "" (* error *) blanchet@49323: | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs blanchet@49323: blanchet@49323: val escape_meta = String.translate meta_char blanchet@49318: val escape_metas = map escape_meta #> space_implode " " blanchet@49330: val unescape_meta = String.explode #> unmeta_chars [] blanchet@49330: val unescape_metas = blanchet@49330: space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta blanchet@49266: blanchet@49421: fun extract_node line = blanchet@49421: case space_explode ":" line of blanchet@49421: [name, parents] => (unescape_meta name, unescape_metas parents) blanchet@49421: | _ => ("", []) blanchet@49421: blanchet@49421: fun extract_suggestion sugg = blanchet@49421: case space_explode "=" sugg of blanchet@49421: [name, weight] => blanchet@49421: SOME (unescape_meta name, Real.fromString weight |> the_default 0.0) blanchet@49421: | _ => NONE blanchet@49421: blanchet@49326: fun extract_query line = blanchet@49326: case space_explode ":" line of blanchet@49421: [goal, suggs] => blanchet@49421: (unescape_meta goal, blanchet@49421: map_filter extract_suggestion (space_explode " " suggs)) blanchet@49327: | _ => ("", []) blanchet@49326: blanchet@49393: fun parent_of_local_thm th = blanchet@49393: let blanchet@49393: val thy = th |> Thm.theory_of_thm blanchet@49393: val facts = thy |> Global_Theory.facts_of blanchet@49393: val space = facts |> Facts.space_of blanchet@49393: fun id_of s = #id (Name_Space.the_entry space s) blanchet@49393: fun max_id (s', _) (s, id) = blanchet@49393: let val id' = id_of s' in if id > id' then (s, id) else (s', id') end blanchet@49393: in ("", ~1) |> Facts.fold_static max_id facts |> fst end blanchet@49393: blanchet@49393: val local_prefix = "local" ^ Long_Name.separator blanchet@49393: blanchet@49393: fun nickname_of th = blanchet@49409: if Thm.has_name_hint th then blanchet@49409: let val hint = Thm.get_name_hint th in blanchet@49409: (* FIXME: There must be a better way to detect local facts. *) blanchet@49409: case try (unprefix local_prefix) hint of blanchet@49409: SOME suf => blanchet@49409: parent_of_local_thm th ^ Long_Name.separator ^ Long_Name.separator ^ suf blanchet@49409: | NONE => hint blanchet@49409: end blanchet@49409: else blanchet@49409: backquote_thm th blanchet@49393: blanchet@49345: fun suggested_facts suggs facts = blanchet@49345: let blanchet@49393: fun add_fact (fact as (_, th)) = Symtab.default (nickname_of th, fact) blanchet@49345: val tab = Symtab.empty |> fold add_fact facts blanchet@49421: fun find_sugg (name, weight) = blanchet@49421: Symtab.lookup tab name |> Option.map (rpair weight) blanchet@49421: in map_filter find_sugg suggs end blanchet@49326: blanchet@49421: fun sum_avg [] = 0 blanchet@49422: | sum_avg xs = blanchet@49422: Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs blanchet@49343: blanchet@49421: fun normalize_scores [] = [] blanchet@49421: | normalize_scores ((fact, score) :: tail) = blanchet@49421: (fact, 1.0) :: map (apsnd (curry Real.* (1.0 / score))) tail blanchet@49328: blanchet@49421: fun mesh_facts max_facts [(sels, unks)] = blanchet@49421: map fst (take max_facts sels) @ take (max_facts - length sels) unks blanchet@49329: | mesh_facts max_facts mess = blanchet@49329: let blanchet@49421: val mess = mess |> map (apfst (normalize_scores #> `length)) blanchet@49329: val fact_eq = Thm.eq_thm o pairself snd blanchet@49421: fun score_at sels = try (nth sels) #> Option.map snd blanchet@49335: fun score_in fact ((sel_len, sels), unks) = blanchet@49421: case find_index (curry fact_eq fact o fst) sels of blanchet@49335: ~1 => (case find_index (curry fact_eq fact) unks of blanchet@49421: ~1 => score_at sels sel_len blanchet@49335: | _ => NONE) blanchet@49421: | rank => score_at sels rank blanchet@49421: fun weight_of fact = mess |> map_filter (score_in fact) |> sum_avg blanchet@49421: val facts = blanchet@49421: fold (union fact_eq o map fst o take max_facts o snd o fst) mess [] blanchet@49329: in blanchet@49421: facts |> map (`weight_of) |> sort (int_ord o swap o pairself fst) blanchet@49343: |> map snd |> take max_facts blanchet@49329: end blanchet@49327: blanchet@49410: val thy_feature_name_of = prefix "y" blanchet@49410: val const_name_of = prefix "c" blanchet@49410: val type_name_of = prefix "t" blanchet@49410: val class_name_of = prefix "s" blanchet@49266: blanchet@49339: fun theory_ord p = blanchet@49339: if Theory.eq_thy p then blanchet@49339: EQUAL blanchet@49339: else if Theory.subthy p then blanchet@49339: LESS blanchet@49339: else if Theory.subthy (swap p) then blanchet@49339: GREATER blanchet@49339: else case int_ord (pairself (length o Theory.ancestors_of) p) of blanchet@49339: EQUAL => string_ord (pairself Context.theory_name p) blanchet@49339: | order => order blanchet@49339: blanchet@49339: val thm_ord = theory_ord o pairself theory_of_thm blanchet@49339: blanchet@49407: val freezeT = Type.legacy_freeze_type blanchet@49407: blanchet@49407: fun freeze (t $ u) = freeze t $ freeze u blanchet@49407: | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t) blanchet@49407: | freeze (Var ((s, _), T)) = Free (s, freezeT T) blanchet@49407: | freeze (Const (s, T)) = Const (s, freezeT T) blanchet@49407: | freeze (Free (s, T)) = Free (s, freezeT T) blanchet@49407: | freeze t = t blanchet@49407: blanchet@49407: fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init blanchet@49407: blanchet@49407: fun run_prover_for_mash ctxt params prover facts goal = blanchet@49407: let blanchet@49407: val problem = blanchet@49407: {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1, blanchet@49407: facts = facts |> map (apfst (apfst (fn name => name ()))) blanchet@49407: |> map Untranslated_Fact} blanchet@49407: in blanchet@49414: get_minimizing_prover ctxt MaSh (K (K ())) prover params (K (K (K ""))) blanchet@49407: problem blanchet@49407: end blanchet@49407: blanchet@49341: val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}] blanchet@49341: blanchet@49413: val logical_consts = blanchet@49413: [@{const_name prop}, @{const_name Pure.conjunction}] @ atp_logical_consts blanchet@49413: blanchet@49333: fun interesting_terms_types_and_classes ctxt prover term_max_depth blanchet@49333: type_max_depth ts = blanchet@49266: let blanchet@49333: fun is_bad_const (x as (s, _)) args = blanchet@49413: member (op =) logical_consts s orelse blanchet@49333: fst (is_built_in_const_for_prover ctxt prover x args) blanchet@49319: fun add_classes @{sort type} = I blanchet@49319: | add_classes S = union (op =) (map class_name_of S) blanchet@49266: fun do_add_type (Type (s, Ts)) = blanchet@49266: (not (member (op =) bad_types s) ? insert (op =) (type_name_of s)) blanchet@49266: #> fold do_add_type Ts blanchet@49319: | do_add_type (TFree (_, S)) = add_classes S blanchet@49319: | do_add_type (TVar (_, S)) = add_classes S blanchet@49266: fun add_type T = type_max_depth >= 0 ? do_add_type T blanchet@49266: fun mk_app s args = blanchet@49266: if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")" blanchet@49266: else s blanchet@49266: fun patternify ~1 _ = "" blanchet@49266: | patternify depth t = blanchet@49266: case strip_comb t of blanchet@49413: (Const (x as (s, _)), args) => blanchet@49413: if is_bad_const x args then "" blanchet@49413: else mk_app (const_name_of s) (map (patternify (depth - 1)) args) blanchet@49266: | _ => "" blanchet@49421: fun add_pattern depth t = blanchet@49421: case patternify depth t of "" => I | s => insert (op =) s blanchet@49266: fun add_term_patterns ~1 _ = I blanchet@49266: | add_term_patterns depth t = blanchet@49421: add_pattern depth t #> add_term_patterns (depth - 1) t blanchet@49266: val add_term = add_term_patterns term_max_depth blanchet@49266: fun add_patterns t = blanchet@49266: let val (head, args) = strip_comb t in blanchet@49266: (case head of blanchet@49413: Const (_, T) => add_term t #> add_type T blanchet@49266: | Free (_, T) => add_type T blanchet@49266: | Var (_, T) => add_type T blanchet@49266: | Abs (_, T, body) => add_type T #> add_patterns body blanchet@49266: | _ => I) blanchet@49266: #> fold add_patterns args blanchet@49266: end blanchet@49341: in [] |> fold add_patterns ts end blanchet@49266: blanchet@49266: fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1}) blanchet@49266: blanchet@49312: val term_max_depth = 1 blanchet@49312: val type_max_depth = 1 blanchet@49266: blanchet@49266: (* TODO: Generate type classes for types? *) blanchet@49400: fun features_of ctxt prover thy (scope, status) ts = blanchet@49318: thy_feature_name_of (Context.theory_name thy) :: blanchet@49333: interesting_terms_types_and_classes ctxt prover term_max_depth type_max_depth blanchet@49333: ts blanchet@49347: |> forall is_lambda_free ts ? cons "no_lams" blanchet@49347: |> forall (not o exists_Const is_exists) ts ? cons "no_skos" blanchet@49400: |> scope <> Global ? cons "local" blanchet@49317: |> (case status of blanchet@49317: General => I blanchet@49317: | Induction => cons "induction" blanchet@49317: | Intro => cons "intro" blanchet@49317: | Inductive => cons "inductive" blanchet@49317: | Elim => cons "elim" blanchet@49317: | Simp => cons "simp" blanchet@49317: | Def => cons "def") blanchet@49266: blanchet@49419: (* Too many dependencies is a sign that a decision procedure is at work. There blanchet@49419: isn't much too learn from such proofs. *) blanchet@49419: val max_dependencies = 10 blanchet@49419: val atp_dependency_default_max_fact = 50 blanchet@49266: blanchet@49419: fun trim_dependencies deps = blanchet@49419: if length deps <= max_dependencies then SOME deps else NONE blanchet@49266: blanchet@49421: fun isar_dependencies_of all_names = blanchet@49421: thms_in_proof (SOME all_names) #> trim_dependencies blanchet@49419: blanchet@49419: fun atp_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover blanchet@49419: auto_level facts all_names th = blanchet@49407: case isar_dependencies_of all_names th of blanchet@49419: SOME [] => NONE blanchet@49407: | isar_deps => blanchet@49407: let blanchet@49407: val thy = Proof_Context.theory_of ctxt blanchet@49407: val goal = goal_of_thm thy th blanchet@49407: val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal 1 blanchet@49407: val facts = facts |> filter (fn (_, th') => thm_ord (th', th) = LESS) blanchet@49407: fun fix_name ((_, stature), th) = ((fn () => nickname_of th, stature), th) blanchet@49407: fun is_dep dep (_, th) = nickname_of th = dep blanchet@49407: fun add_isar_dep facts dep accum = blanchet@49407: if exists (is_dep dep) accum then blanchet@49407: accum blanchet@49407: else case find_first (is_dep dep) facts of blanchet@49407: SOME ((name, status), th) => accum @ [((name, status), th)] blanchet@49407: | NONE => accum (* shouldn't happen *) blanchet@49407: val facts = blanchet@49421: facts |> mepo_suggested_facts ctxt params prover blanchet@49419: (max_facts |> the_default atp_dependency_default_max_fact) blanchet@49419: NONE hyp_ts concl_t blanchet@49419: |> fold (add_isar_dep facts) (these isar_deps) blanchet@49407: |> map fix_name blanchet@49407: in blanchet@49419: if verbose andalso auto_level = 0 then blanchet@49407: let val num_facts = length facts in blanchet@49407: "MaSh: " ^ quote prover ^ " on " ^ quote (nickname_of th) ^ blanchet@49407: " with " ^ string_of_int num_facts ^ " fact" ^ plural_s num_facts ^ blanchet@49407: "." blanchet@49407: |> Output.urgent_message blanchet@49407: end blanchet@49407: else blanchet@49407: (); blanchet@49407: case run_prover_for_mash ctxt params prover facts goal of blanchet@49407: {outcome = NONE, used_facts, ...} => blanchet@49419: (if verbose andalso auto_level = 0 then blanchet@49407: let val num_facts = length used_facts in blanchet@49407: "Found proof with " ^ string_of_int num_facts ^ " fact" ^ blanchet@49407: plural_s num_facts ^ "." blanchet@49407: |> Output.urgent_message blanchet@49407: end blanchet@49407: else blanchet@49407: (); blanchet@49419: used_facts |> map fst |> trim_dependencies) blanchet@49419: | _ => NONE blanchet@49407: end blanchet@49266: blanchet@49266: blanchet@49317: (*** Low-level communication with MaSh ***) blanchet@49317: blanchet@49409: (* more friendly than "try o File.rm" for those who keep the files open in their blanchet@49409: text editor *) blanchet@49409: fun wipe_out file = File.write file "" blanchet@49409: blanchet@49338: fun write_file (xs, f) file = blanchet@49333: let val path = Path.explode file in blanchet@49409: wipe_out path; blanchet@49338: xs |> chunk_list 500 blanchet@49338: |> List.app (File.append path o space_implode "" o map f) blanchet@49333: end blanchet@49331: blanchet@49409: fun run_mash_tool ctxt overlord save max_suggs write_cmds read_suggs = blanchet@49326: let blanchet@49409: val (temp_dir, serial) = blanchet@49409: if overlord then (getenv "ISABELLE_HOME_USER", "") blanchet@49409: else (getenv "ISABELLE_TMP", serial_string ()) blanchet@49409: val log_file = if overlord then temp_dir ^ "/mash_log" else "/dev/null" blanchet@49331: val err_file = temp_dir ^ "/mash_err" ^ serial blanchet@49333: val sugg_file = temp_dir ^ "/mash_suggs" ^ serial blanchet@49331: val cmd_file = temp_dir ^ "/mash_commands" ^ serial blanchet@49409: val core = blanchet@49409: "--inputFile " ^ cmd_file ^ " --predictions " ^ sugg_file ^ blanchet@49409: " --numberOfPredictions " ^ string_of_int max_suggs ^ blanchet@49409: (if save then " --saveModel" else "") blanchet@49409: val command = blanchet@49409: mash_home () ^ "/mash --quiet --outputDir " ^ mash_model_dir () ^ blanchet@49409: " --log " ^ log_file ^ " " ^ core ^ " >& " ^ err_file blanchet@49331: in blanchet@49338: write_file ([], K "") sugg_file; blanchet@49331: write_file write_cmds cmd_file; blanchet@49409: trace_msg ctxt (fn () => "Running " ^ command); blanchet@49409: Isabelle_System.bash command; blanchet@49409: read_suggs (fn () => try File.read_lines (Path.explode sugg_file) |> these) blanchet@49409: |> tap (fn _ => trace_msg ctxt (fn () => blanchet@49409: case try File.read (Path.explode err_file) of blanchet@49409: NONE => "Done" blanchet@49409: | SOME "" => "Done" blanchet@49409: | SOME s => "Error: " ^ elide_string 1000 s)) blanchet@49409: |> not overlord blanchet@49409: ? tap (fn _ => List.app (wipe_out o Path.explode) blanchet@49409: [err_file, sugg_file, cmd_file]) blanchet@49331: end blanchet@49331: blanchet@49419: fun str_of_add (name, parents, feats, deps) = blanchet@49331: "! " ^ escape_meta name ^ ": " ^ escape_metas parents ^ "; " ^ blanchet@49326: escape_metas feats ^ "; " ^ escape_metas deps ^ "\n" blanchet@49326: blanchet@49419: fun str_of_reprove (name, deps) = blanchet@49419: "p " ^ escape_meta name ^ ": " ^ escape_metas deps ^ "\n" blanchet@49419: blanchet@49331: fun str_of_query (parents, feats) = blanchet@49421: "? " ^ escape_metas parents ^ "; " ^ escape_metas feats ^ "\n" blanchet@49331: blanchet@49347: fun mash_CLEAR ctxt = blanchet@49409: let val path = mash_model_dir () |> Path.explode in blanchet@49347: trace_msg ctxt (K "MaSh CLEAR"); blanchet@49409: File.fold_dir (fn file => fn _ => blanchet@49409: try File.rm (Path.append path (Path.basic file))) blanchet@49409: path NONE; blanchet@49409: () blanchet@49324: end blanchet@49317: blanchet@49331: fun mash_ADD _ _ [] = () blanchet@49419: | mash_ADD ctxt overlord adds = blanchet@49331: (trace_msg ctxt (fn () => "MaSh ADD " ^ blanchet@49419: elide_string 1000 (space_implode " " (map #1 adds))); blanchet@49419: run_mash_tool ctxt overlord true 0 (adds, str_of_add) (K ())) blanchet@49419: blanchet@49419: fun mash_REPROVE _ _ [] = () blanchet@49419: | mash_REPROVE ctxt overlord reps = blanchet@49419: (trace_msg ctxt (fn () => "MaSh REPROVE " ^ blanchet@49419: elide_string 1000 (space_implode " " (map #1 reps))); blanchet@49419: run_mash_tool ctxt overlord true 0 (reps, str_of_reprove) (K ())) blanchet@49317: blanchet@49333: fun mash_QUERY ctxt overlord max_suggs (query as (_, feats)) = blanchet@49329: (trace_msg ctxt (fn () => "MaSh QUERY " ^ space_implode " " feats); blanchet@49409: run_mash_tool ctxt overlord false max_suggs blanchet@49338: ([query], str_of_query) blanchet@49409: (fn suggs => blanchet@49409: case suggs () of blanchet@49409: [] => [] blanchet@49409: | suggs => snd (extract_query (List.last suggs))) blanchet@49326: handle List.Empty => []) blanchet@49317: blanchet@49317: blanchet@49266: (*** High-level communication with MaSh ***) blanchet@49266: blanchet@49336: fun try_graph ctxt when def f = blanchet@49336: f () blanchet@49336: handle Graph.CYCLES (cycle :: _) => blanchet@49336: (trace_msg ctxt (fn () => blanchet@49336: "Cycle involving " ^ commas cycle ^ " when " ^ when); def) blanchet@49407: | Graph.DUP name => blanchet@49407: (trace_msg ctxt (fn () => blanchet@49407: "Duplicate fact " ^ quote name ^ " when " ^ when); def) blanchet@49336: | Graph.UNDEF name => blanchet@49336: (trace_msg ctxt (fn () => blanchet@49336: "Unknown fact " ^ quote name ^ " when " ^ when); def) blanchet@49407: | exn => blanchet@49407: if Exn.is_interrupt exn then blanchet@49407: reraise exn blanchet@49407: else blanchet@49407: (trace_msg ctxt (fn () => blanchet@49407: "Internal error when " ^ when ^ ":\n" ^ blanchet@49407: ML_Compiler.exn_message exn); def) blanchet@49336: blanchet@49421: fun graph_info G = blanchet@49421: string_of_int (length (Graph.keys G)) ^ " node(s), " ^ blanchet@49421: string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^ blanchet@49421: " edge(s), " ^ blanchet@49421: string_of_int (length (Graph.minimals G)) ^ " minimal, " ^ blanchet@49421: string_of_int (length (Graph.maximals G)) ^ " maximal" blanchet@49421: blanchet@49415: type mash_state = {fact_G : unit Graph.T} blanchet@49264: blanchet@49415: val empty_state = {fact_G = Graph.empty} blanchet@49316: blanchet@49316: local blanchet@49316: blanchet@49405: val version = "*** MaSh 0.0 ***" blanchet@49405: blanchet@49405: fun load _ (state as (true, _)) = state blanchet@49405: | load ctxt _ = blanchet@49324: let val path = mash_state_path () in blanchet@49317: (true, blanchet@49317: case try File.read_lines path of blanchet@49421: SOME (version' :: node_lines) => blanchet@49317: let blanchet@49337: fun add_edge_to name parent = blanchet@49421: Graph.default_node (parent, ()) #> Graph.add_edge (parent, name) blanchet@49421: fun add_node line = blanchet@49421: case extract_node line of blanchet@49331: ("", _) => I (* shouldn't happen *) blanchet@49331: | (name, parents) => blanchet@49421: Graph.default_node (name, ()) #> fold (add_edge_to name) parents blanchet@49415: val fact_G = blanchet@49337: try_graph ctxt "loading state" Graph.empty (fn () => blanchet@49421: Graph.empty |> version' = version ? fold add_node node_lines) blanchet@49421: in blanchet@49421: trace_msg ctxt (fn () => blanchet@49421: "Loaded fact graph (" ^ graph_info fact_G ^ ")"); blanchet@49421: {fact_G = fact_G} blanchet@49421: end blanchet@49319: | _ => empty_state) blanchet@49317: end blanchet@49316: blanchet@49421: fun save ctxt {fact_G} = blanchet@49316: let blanchet@49324: val path = mash_state_path () blanchet@49333: fun fact_line_for name parents = blanchet@49333: escape_meta name ^ ": " ^ escape_metas parents blanchet@49331: val append_fact = File.append path o suffix "\n" oo fact_line_for blanchet@49415: fun append_entry (name, ((), (parents, _))) () = blanchet@49415: append_fact name (Graph.Keys.dest parents) blanchet@49316: in blanchet@49405: File.write path (version ^ "\n"); blanchet@49421: Graph.fold append_entry fact_G (); blanchet@49421: trace_msg ctxt (fn () => "Saved fact graph (" ^ graph_info fact_G ^ ")") blanchet@49316: end blanchet@49316: blanchet@49317: val global_state = blanchet@49396: Synchronized.var "Sledgehammer_MaSh.global_state" (false, empty_state) blanchet@49316: blanchet@49316: in blanchet@49316: blanchet@49336: fun mash_map ctxt f = blanchet@49421: Synchronized.change global_state (load ctxt ##> (f #> tap (save ctxt))) blanchet@49316: blanchet@49449: fun mash_peek ctxt f = blanchet@49449: Synchronized.change_result global_state (load ctxt #> `snd #>> f) blanchet@49449: blanchet@49336: fun mash_get ctxt = blanchet@49405: Synchronized.change_result global_state (load ctxt #> `snd) blanchet@49317: blanchet@49347: fun mash_unlearn ctxt = blanchet@49317: Synchronized.change global_state (fn _ => blanchet@49409: (mash_CLEAR ctxt; wipe_out (mash_state_path ()); (true, empty_state))) blanchet@49316: blanchet@49316: end blanchet@49316: blanchet@49333: fun mash_could_suggest_facts () = mash_home () <> "" blanchet@49415: fun mash_can_suggest_facts ctxt = not (Graph.is_empty (#fact_G (mash_get ctxt))) blanchet@49264: blanchet@49422: fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0 blanchet@49415: blanchet@49422: fun maximal_in_graph fact_G facts = blanchet@49331: let blanchet@49393: val facts = [] |> fold (cons o nickname_of o snd) facts blanchet@49422: val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) facts blanchet@49422: fun insert_new seen name = blanchet@49422: not (Symtab.defined seen name) ? insert (op =) name blanchet@49422: fun find_maxes _ (maxs, []) = map snd maxs blanchet@49422: | find_maxes seen (maxs, new :: news) = blanchet@49422: find_maxes blanchet@49422: (seen |> num_keys (Graph.imm_succs fact_G new) > 1 blanchet@49422: ? Symtab.default (new, ())) blanchet@49422: (if Symtab.defined tab new then blanchet@49422: let blanchet@49422: val newp = Graph.all_preds fact_G [new] blanchet@49422: fun is_ancestor x yp = member (op =) yp x blanchet@49422: val maxs = blanchet@49422: maxs |> filter (fn (_, max) => not (is_ancestor max newp)) blanchet@49422: in blanchet@49422: if exists (is_ancestor new o fst) maxs then blanchet@49422: (maxs, news) blanchet@49422: else blanchet@49422: ((newp, new) blanchet@49422: :: filter_out (fn (_, max) => is_ancestor max newp) maxs, blanchet@49422: news) blanchet@49422: end blanchet@49422: else blanchet@49422: (maxs, Graph.Keys.fold (insert_new seen) blanchet@49422: (Graph.imm_preds fact_G new) news)) blanchet@49422: in find_maxes Symtab.empty ([], Graph.maximals fact_G) end blanchet@49331: blanchet@49333: (* Generate more suggestions than requested, because some might be thrown out blanchet@49333: later for various reasons and "meshing" gives better results with some blanchet@49333: slack. *) blanchet@49449: fun max_suggs_of max_facts = max_facts + Int.min (50, max_facts) blanchet@49333: blanchet@49415: fun is_fact_in_graph fact_G (_, th) = blanchet@49415: can (Graph.get_node fact_G) (nickname_of th) blanchet@49335: blanchet@49421: fun mash_suggested_facts ctxt ({overlord, ...} : params) prover max_facts hyp_ts blanchet@49421: concl_t facts = blanchet@49316: let blanchet@49317: val thy = Proof_Context.theory_of ctxt blanchet@49449: val (fact_G, suggs) = blanchet@49449: mash_peek ctxt (fn {fact_G} => blanchet@49449: if Graph.is_empty fact_G then blanchet@49449: (fact_G, []) blanchet@49449: else blanchet@49449: let blanchet@49449: val parents = maximal_in_graph fact_G facts blanchet@49449: val feats = blanchet@49449: features_of ctxt prover thy (Local, General) (concl_t :: hyp_ts) blanchet@49449: in blanchet@49449: (fact_G, mash_QUERY ctxt overlord (max_suggs_of max_facts) blanchet@49449: (parents, feats)) blanchet@49449: end) blanchet@49423: val selected = blanchet@49423: facts |> suggested_facts suggs blanchet@49423: (* The weights currently returned by "mash.py" are too extreme to blanchet@49423: make any sense. *) blanchet@49423: |> map fst |> weight_mepo_facts blanchet@49415: val unknown = facts |> filter_out (is_fact_in_graph fact_G) blanchet@49335: in (selected, unknown) end blanchet@49264: blanchet@49419: fun add_to_fact_graph ctxt (name, parents, feats, deps) (adds, graph) = blanchet@49331: let blanchet@49331: fun maybe_add_from from (accum as (parents, graph)) = blanchet@49336: try_graph ctxt "updating graph" accum (fn () => blanchet@49336: (from :: parents, Graph.add_edge_acyclic (from, name) graph)) blanchet@49336: val graph = graph |> Graph.default_node (name, ()) blanchet@49331: val (parents, graph) = ([], graph) |> fold maybe_add_from parents blanchet@49422: val (deps, _) = ([], graph) |> fold maybe_add_from deps blanchet@49419: in ((name, parents, feats, deps) :: adds, graph) end blanchet@49321: blanchet@49399: val learn_timeout_slack = 2.0 blanchet@49333: blanchet@49399: fun launch_thread timeout task = blanchet@49398: let blanchet@49399: val hard_timeout = time_mult learn_timeout_slack timeout blanchet@49399: val birth_time = Time.now () blanchet@49399: val death_time = Time.+ (birth_time, hard_timeout) blanchet@49399: val desc = ("machine learner for Sledgehammer", "") blanchet@49399: in Async_Manager.launch MaShN birth_time death_time desc task end blanchet@49399: blanchet@49415: fun freshish_name () = blanchet@49415: Date.fmt ".%Y_%m_%d_%H_%M_%S__" (Date.fromTimeLocal (Time.now ())) ^ blanchet@49415: serial_string () blanchet@49415: blanchet@49399: fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) prover t facts blanchet@49399: used_ths = blanchet@49399: if is_smt_prover ctxt prover then blanchet@49399: () blanchet@49399: else blanchet@49418: launch_thread timeout (fn () => blanchet@49418: let blanchet@49418: val thy = Proof_Context.theory_of ctxt blanchet@49418: val name = freshish_name () blanchet@49418: val feats = features_of ctxt prover thy (Local, General) [t] blanchet@49418: val deps = used_ths |> map nickname_of blanchet@49418: in blanchet@49449: mash_peek ctxt (fn {fact_G} => blanchet@49449: let val parents = maximal_in_graph fact_G facts in blanchet@49449: mash_ADD ctxt overlord [(name, parents, feats, deps)] blanchet@49449: end); blanchet@49449: (true, "") blanchet@49418: end) blanchet@49398: blanchet@49407: fun sendback sub = blanchet@49407: Markup.markup Isabelle_Markup.sendback (sledgehammerN ^ " " ^ sub) blanchet@49407: blanchet@49407: val commit_timeout = seconds 30.0 blanchet@49347: blanchet@49333: (* The timeout is understood in a very slack fashion. *) blanchet@49419: fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover blanchet@49419: auto_level atp learn_timeout facts = blanchet@49319: let blanchet@49333: val timer = Timer.startRealTimer () blanchet@49407: fun next_commit_time () = blanchet@49407: Time.+ (Timer.checkRealTimer timer, commit_timeout) blanchet@49415: val {fact_G} = mash_get ctxt blanchet@49415: val (old_facts, new_facts) = blanchet@49415: facts |> List.partition (is_fact_in_graph fact_G) blanchet@49415: ||> sort (thm_ord o pairself snd) blanchet@49323: in blanchet@49419: if null new_facts andalso (not atp orelse null old_facts) then blanchet@49419: if auto_level < 2 then blanchet@49419: "No new " ^ (if atp then "ATP" else "Isar") ^ " proofs to learn." ^ blanchet@49419: (if auto_level = 0 andalso not atp then blanchet@49419: "\n\nHint: Try " ^ sendback learn_atpN ^ " to learn from ATP proofs." blanchet@49419: else blanchet@49419: "") blanchet@49407: else blanchet@49407: "" blanchet@49323: else blanchet@49319: let blanchet@49330: val all_names = blanchet@49415: facts |> map snd blanchet@49415: |> filter_out is_likely_tautology_or_too_meta blanchet@49415: |> map (rpair () o nickname_of) blanchet@49415: |> Symtab.make blanchet@49419: val deps_of = blanchet@49419: if atp then blanchet@49419: atp_dependencies_of ctxt params prover auto_level facts all_names blanchet@49419: else blanchet@49419: isar_dependencies_of all_names blanchet@49419: fun do_commit [] [] state = state blanchet@49419: | do_commit adds reps {fact_G} = blanchet@49407: let blanchet@49419: val (adds, fact_G) = blanchet@49419: ([], fact_G) |> fold (add_to_fact_graph ctxt) adds blanchet@49419: in blanchet@49419: mash_ADD ctxt overlord (rev adds); blanchet@49419: mash_REPROVE ctxt overlord reps; blanchet@49419: {fact_G = fact_G} blanchet@49419: end blanchet@49419: fun commit last adds reps = blanchet@49419: (if debug andalso auto_level = 0 then blanchet@49419: Output.urgent_message "Committing..." blanchet@49419: else blanchet@49419: (); blanchet@49419: mash_map ctxt (do_commit (rev adds) reps); blanchet@49419: if not last andalso auto_level = 0 then blanchet@49419: let val num_proofs = length adds + length reps in blanchet@49419: "Learned " ^ string_of_int num_proofs ^ " " ^ blanchet@49419: (if atp then "ATP" else "Isar") ^ " proof" ^ blanchet@49419: plural_s num_proofs ^ " in the last " ^ blanchet@49407: string_from_time commit_timeout ^ "." blanchet@49407: |> Output.urgent_message blanchet@49407: end blanchet@49407: else blanchet@49407: ()) blanchet@49419: fun learn_new_fact _ (accum as (_, (_, _, _, true))) = accum blanchet@49419: | learn_new_fact ((_, stature), th) blanchet@49419: (adds, (parents, n, next_commit, _)) = blanchet@49333: let blanchet@49393: val name = nickname_of th blanchet@49347: val feats = blanchet@49400: features_of ctxt prover (theory_of_thm th) stature [prop_of th] blanchet@49419: val deps = deps_of th |> these blanchet@49409: val n = n |> not (null deps) ? Integer.add 1 blanchet@49419: val adds = (name, parents, feats, deps) :: adds blanchet@49419: val (adds, next_commit) = blanchet@49407: if Time.> (Timer.checkRealTimer timer, next_commit) then blanchet@49419: (commit false adds []; ([], next_commit_time ())) blanchet@49407: else blanchet@49419: (adds, next_commit) blanchet@49419: val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout) blanchet@49419: in (adds, ([name], n, next_commit, timed_out)) end blanchet@49419: val n = blanchet@49419: if null new_facts then blanchet@49419: 0 blanchet@49419: else blanchet@49419: let blanchet@49419: val last_th = new_facts |> List.last |> snd blanchet@49419: (* crude approximation *) blanchet@49419: val ancestors = blanchet@49419: old_facts blanchet@49419: |> filter (fn (_, th) => thm_ord (th, last_th) <> GREATER) blanchet@49422: val parents = maximal_in_graph fact_G ancestors blanchet@49419: val (adds, (_, n, _, _)) = blanchet@49419: ([], (parents, 0, next_commit_time (), false)) blanchet@49419: |> fold learn_new_fact new_facts blanchet@49419: in commit true adds []; n end blanchet@49419: fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum blanchet@49419: | relearn_old_fact (_, th) (reps, (n, next_commit, _)) = blanchet@49419: let blanchet@49419: val name = nickname_of th blanchet@49419: val (n, reps) = blanchet@49419: case deps_of th of blanchet@49419: SOME deps => (n + 1, (name, deps) :: reps) blanchet@49419: | NONE => (n, reps) blanchet@49419: val (reps, next_commit) = blanchet@49419: if Time.> (Timer.checkRealTimer timer, next_commit) then blanchet@49419: (commit false [] reps; ([], next_commit_time ())) blanchet@49419: else blanchet@49419: (reps, next_commit) blanchet@49419: val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout) blanchet@49419: in (reps, (n, next_commit, timed_out)) end blanchet@49419: val n = blanchet@49448: if not atp orelse null old_facts then blanchet@49419: n blanchet@49419: else blanchet@49419: let blanchet@49421: fun priority_of (_, th) = blanchet@49419: random_range 0 (1000 * max_dependencies) blanchet@49419: - 500 * (th |> isar_dependencies_of all_names blanchet@49419: |> Option.map length blanchet@49419: |> the_default max_dependencies) blanchet@49419: val old_facts = blanchet@49421: old_facts |> map (`priority_of) blanchet@49419: |> sort (int_ord o pairself fst) blanchet@49419: |> map snd blanchet@49419: val (reps, (n, _, _)) = blanchet@49419: ([], (n, next_commit_time (), false)) blanchet@49419: |> fold relearn_old_fact old_facts blanchet@49419: in commit true [] reps; n end blanchet@49333: in blanchet@49419: if verbose orelse auto_level < 2 then blanchet@49419: "Learned " ^ string_of_int n ^ " nontrivial " ^ blanchet@49419: (if atp then "ATP" else "Isar") ^ " proof" ^ plural_s n ^ blanchet@49334: (if verbose then blanchet@49334: " in " ^ string_from_time (Timer.checkRealTimer timer) blanchet@49334: else blanchet@49334: "") ^ "." blanchet@49334: else blanchet@49334: "" blanchet@49333: end blanchet@49323: end blanchet@49319: blanchet@49419: fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained blanchet@49419: atp = blanchet@49331: let blanchet@49411: val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt blanchet@49410: val ctxt = ctxt |> Config.put instantiate_inducts false blanchet@49410: val facts = blanchet@49411: nearly_all_facts ctxt false fact_override Symtab.empty css chained [] blanchet@49411: @{prop True} blanchet@49419: val num_facts = length facts blanchet@49419: val prover = hd provers blanchet@49419: fun learn auto_level atp = blanchet@49419: mash_learn_facts ctxt params prover auto_level atp infinite_timeout facts blanchet@49419: |> Output.urgent_message blanchet@49331: in blanchet@49419: (if atp then blanchet@49419: ("MaShing through " ^ string_of_int num_facts ^ " fact" ^ blanchet@49419: plural_s num_facts ^ " for ATP proofs (" ^ quote prover ^ " timeout: " ^ blanchet@49419: string_from_time timeout ^ ").\n\nCollecting Isar proofs first..." blanchet@49419: |> Output.urgent_message; blanchet@49419: learn 1 false; blanchet@49419: "Now collecting ATP proofs. This may take several hours. You can \ blanchet@49419: \safely stop the learning process at any point." blanchet@49419: |> Output.urgent_message; blanchet@49419: learn 0 true) blanchet@49419: else blanchet@49419: ("MaShing through " ^ string_of_int num_facts ^ " fact" ^ blanchet@49419: plural_s num_facts ^ " for Isar proofs..." blanchet@49419: |> Output.urgent_message; blanchet@49419: learn 0 false)) blanchet@49331: end blanchet@49264: blanchet@49333: (* The threshold should be large enough so that MaSh doesn't kick in for Auto blanchet@49333: Sledgehammer and Try. *) blanchet@49333: val min_secs_for_learning = 15 blanchet@49333: blanchet@49336: fun relevant_facts ctxt (params as {learn, fact_filter, timeout, ...}) prover blanchet@49336: max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts = blanchet@49329: if not (subset (op =) (the_list fact_filter, fact_filters)) then blanchet@49329: error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".") blanchet@49329: else if only then blanchet@49304: facts blanchet@49336: else if max_facts <= 0 orelse null facts then blanchet@49303: [] blanchet@49303: else blanchet@49303: let blanchet@49342: fun maybe_learn () = blanchet@49399: if learn andalso not (Async_Manager.has_running_threads MaShN) andalso blanchet@49399: Time.toSeconds timeout >= min_secs_for_learning then blanchet@49399: let val timeout = time_mult learn_timeout_slack timeout in blanchet@49399: launch_thread timeout blanchet@49419: (fn () => (true, mash_learn_facts ctxt params prover 2 false blanchet@49407: timeout facts)) blanchet@49334: end blanchet@49333: else blanchet@49333: () blanchet@49329: val fact_filter = blanchet@49329: case fact_filter of blanchet@49394: SOME ff => (() |> ff <> mepoN ? maybe_learn; ff) blanchet@49333: | NONE => blanchet@49422: if is_smt_prover ctxt prover then blanchet@49422: mepoN blanchet@49422: else if mash_could_suggest_facts () then blanchet@49422: (maybe_learn (); blanchet@49422: if mash_can_suggest_facts ctxt then meshN else mepoN) blanchet@49422: else blanchet@49422: mepoN blanchet@49303: val add_ths = Attrib.eval_thms ctxt add blanchet@49307: fun prepend_facts ths accepts = blanchet@49303: ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @ blanchet@49307: (accepts |> filter_out (member Thm.eq_thm_prop ths o snd))) blanchet@49308: |> take max_facts blanchet@49421: fun mepo () = blanchet@49421: facts |> mepo_suggested_facts ctxt params prover max_facts NONE hyp_ts blanchet@49421: concl_t blanchet@49421: |> weight_mepo_facts blanchet@49329: fun mash () = blanchet@49421: mash_suggested_facts ctxt params prover max_facts hyp_ts concl_t facts blanchet@49329: val mess = blanchet@49421: [] |> (if fact_filter <> mashN then cons (mepo (), []) else I) blanchet@49394: |> (if fact_filter <> mepoN then cons (mash ()) else I) blanchet@49303: in blanchet@49328: mesh_facts max_facts mess blanchet@49303: |> not (null add_ths) ? prepend_facts add_ths blanchet@49303: end blanchet@49303: blanchet@49334: fun kill_learners () = Async_Manager.kill_threads MaShN "learner" blanchet@49334: fun running_learners () = Async_Manager.running_threads MaShN "learner" blanchet@49334: blanchet@49263: end;