# HG changeset patch # User sultana # Date 1334443937 -3600 # Node ID 3fabf352243e5dce675479f2b77746fb74fa8f87 # Parent 92d1c566ebbf8e8d8ada69bc5b8538ac61259ada renamed mirabelle Tools directory to Actions, to make consistent with 'usage' description; diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/IsaMakefile --- a/src/HOL/IsaMakefile Sat Apr 14 23:34:18 2012 +0200 +++ b/src/HOL/IsaMakefile Sat Apr 14 23:52:17 2012 +0100 @@ -1319,16 +1319,16 @@ HOL-Mirabelle: HOL $(LOG)/HOL-Mirabelle.gz -$(LOG)/HOL-Mirabelle.gz: $(OUT)/HOL Mirabelle/Mirabelle_Test.thy \ - Mirabelle/Mirabelle.thy Mirabelle/Tools/mirabelle.ML \ - Mirabelle/ROOT.ML Mirabelle/Tools/mirabelle_arith.ML \ - Mirabelle/Tools/mirabelle_metis.ML \ - Mirabelle/Tools/mirabelle_quickcheck.ML \ - Mirabelle/Tools/mirabelle_refute.ML \ - Mirabelle/Tools/mirabelle_sledgehammer.ML \ - Mirabelle/Tools/mirabelle_sledgehammer_filter.ML \ - ex/sledgehammer_tactics.ML Mirabelle/lib/Tools/mirabelle \ - Mirabelle/lib/scripts/mirabelle.pl Library/FrechetDeriv.thy \ +$(LOG)/HOL-Mirabelle.gz: $(OUT)/HOL Mirabelle/Mirabelle_Test.thy \ + Mirabelle/Mirabelle.thy Mirabelle/Actions/mirabelle.ML \ + Mirabelle/ROOT.ML Mirabelle/Actions/mirabelle_arith.ML \ + Mirabelle/Actions/mirabelle_metis.ML \ + Mirabelle/Actions/mirabelle_quickcheck.ML \ + Mirabelle/Actions/mirabelle_refute.ML \ + Mirabelle/Actions/mirabelle_sledgehammer.ML \ + Mirabelle/Actions/mirabelle_sledgehammer_filter.ML \ + ex/sledgehammer_tactics.ML Mirabelle/lib/Tools/mirabelle \ + Mirabelle/lib/scripts/mirabelle.pl Library/FrechetDeriv.thy \ Library/Inner_Product.thy @$(ISABELLE_TOOL) usedir $(OUT)/HOL Mirabelle @cd Library; $(ISABELLE_TOOL) mirabelle -q arith Inner_Product.thy # some arbitrary small test case diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,213 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle.ML + Author: Jasmin Blanchette and Sascha Boehme, TU Munich +*) + +signature MIRABELLE = +sig + (*configuration*) + val logfile : string Config.T + val timeout : int Config.T + val start_line : int Config.T + val end_line : int Config.T + + (*core*) + type init_action = int -> theory -> theory + type done_args = {last: Toplevel.state, log: string -> unit} + type done_action = int -> done_args -> unit + type run_args = {pre: Proof.state, post: Toplevel.state option, + timeout: Time.time, log: string -> unit, pos: Position.T, name: string} + type run_action = int -> run_args -> unit + type action = init_action * run_action * done_action + val catch : (int -> string) -> run_action -> run_action + val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) -> + int -> run_args -> 'a + val register : action -> theory -> theory + val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state -> + unit + + (*utility functions*) + val can_apply : Time.time -> (Proof.context -> int -> tactic) -> + Proof.state -> bool + val theorems_in_proof_term : thm -> thm list + val theorems_of_sucessful_proof : Toplevel.state option -> thm list + val get_setting : (string * string) list -> string * string -> string + val get_int_setting : (string * string) list -> string * int -> int + val cpu_time : ('a -> 'b) -> 'a -> 'b * int +end + + + +structure Mirabelle : MIRABELLE = +struct + +(* Mirabelle configuration *) + +val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "") +val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30) +val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0) +val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1) + + +(* Mirabelle core *) + +type init_action = int -> theory -> theory +type done_args = {last: Toplevel.state, log: string -> unit} +type done_action = int -> done_args -> unit +type run_args = {pre: Proof.state, post: Toplevel.state option, + timeout: Time.time, log: string -> unit, pos: Position.T, name: string} +type run_action = int -> run_args -> unit +type action = init_action * run_action * done_action + +structure Actions = Theory_Data +( + type T = (int * run_action * done_action) list + val empty = [] + val extend = I + fun merge data = Library.merge (K true) data (* FIXME potential data loss because of (K true) *) +) + + +fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e) + +fun catch tag f id (st as {log, ...}: run_args) = (f id st; ()) + handle exn => + if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ()) + +fun catch_result tag d f id (st as {log, ...}: run_args) = f id st + handle exn => + if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d) + +fun register (init, run, done) thy = + let val id = length (Actions.get thy) + 1 + in + thy + |> init id + |> Actions.map (cons (id, run, done)) + end + +local + +fun log thy s = + let fun append_to n = if n = "" then K () else File.append (Path.explode n) + in append_to (Config.get_global thy logfile) (s ^ "\n") end + (* FIXME: with multithreading and parallel proofs enabled, we might need to + encapsulate this inside a critical section *) + +fun log_sep thy = log thy "------------------" + +fun apply_actions thy pos name info (pre, post, time) actions = + let + fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name} + fun run (id, run, _) = (apply (run id); log_sep thy) + in (log thy info; log_sep thy; List.app run actions) end + +fun in_range _ _ NONE = true + | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r)) + +fun only_within_range thy pos f x = + let val l = Config.get_global thy start_line and r = Config.get_global thy end_line + in if in_range l r (Position.line_of pos) then f x else () end + +in + +fun run_actions tr pre post = + let + val thy = Proof.theory_of pre + val pos = Toplevel.pos_of tr + val name = Toplevel.name_of tr + val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout)) + + val str0 = string_of_int o the_default 0 + val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos) + val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):" + in + only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy) + end + +fun done_actions st = + let + val thy = Toplevel.theory_of st + val _ = log thy "\n\n"; + in + thy + |> Actions.get + |> List.app (fn (id, _, done) => done id {last=st, log=log thy}) + end + +end + +val whitelist = ["apply", "by", "proof"] + +fun step_hook tr pre post = + (* FIXME: might require wrapping into "interruptible" *) + if can (Proof.assert_backward o Toplevel.proof_of) pre andalso + member (op =) whitelist (Toplevel.name_of tr) + then run_actions tr (Toplevel.proof_of pre) (SOME post) + else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post + then done_actions pre + else () (* FIXME: add theory_hook here *) + + + +(* Mirabelle utility functions *) + +fun can_apply time tac st = + let + val {context = ctxt, facts, goal} = Proof.goal st + val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt) + in + (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of + SOME (SOME _) => true + | _ => false) + end + +local + +fun fold_body_thms f = + let + fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) => + fn (x, seen) => + if Inttab.defined seen i then (x, seen) + else + let + val body' = Future.join body + val (x', seen') = app (n + (if name = "" then 0 else 1)) body' + (x, Inttab.update (i, ()) seen) + in (x' |> n = 0 ? f (name, prop, body'), seen') end) + in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end + +in + +fun theorems_in_proof_term thm = + let + val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm) + fun collect (s, _, _) = if s <> "" then insert (op =) s else I + fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE + fun resolve_thms names = map_filter (member_of names) all_thms + in + resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] []) + end + +end + +fun theorems_of_sucessful_proof state = + (case state of + NONE => [] + | SOME st => + if not (Toplevel.is_proof st) then [] + else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st)))) + +fun get_setting settings (key, default) = + the_default default (AList.lookup (op =) settings key) + +fun get_int_setting settings (key, default) = + (case Option.map Int.fromString (AList.lookup (op =) settings key) of + SOME (SOME i) => i + | SOME NONE => error ("bad option: " ^ key) + | NONE => default) + +fun cpu_time f x = + let val ({cpu, ...}, y) = Timing.timing f x + in (y, Time.toMilliseconds cpu) end + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_arith.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_arith.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,21 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_arith.ML + Author: Jasmin Blanchette and Sascha Boehme, TU Munich +*) + +structure Mirabelle_Arith : MIRABELLE_ACTION = +struct + +fun arith_tag id = "#" ^ string_of_int id ^ " arith: " + +fun init _ = I +fun done _ _ = () + +fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) = + if Mirabelle.can_apply timeout Arith_Data.arith_tac pre + then log (arith_tag id ^ "succeeded") + else () + handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout") + +fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done) + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_metis.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_metis.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,35 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_metis.ML + Author: Jasmin Blanchette and Sascha Boehme, TU Munich +*) + +structure Mirabelle_Metis : MIRABELLE_ACTION = +struct + +fun metis_tag id = "#" ^ string_of_int id ^ " metis: " + +fun init _ = I +fun done _ _ = () + +fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) = + let + val thms = Mirabelle.theorems_of_sucessful_proof post + val names = map Thm.get_name_hint thms + val add_info = if null names then I else suffix (":\n" ^ commas names) + + val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre)) + + fun metis ctxt = + Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt + (thms @ facts) + in + (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed") + |> prefix (metis_tag id) + |> add_info + |> log + end + handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout") + | ERROR msg => log (metis_tag id ^ "error: " ^ msg) + +fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done) + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,28 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_quickcheck.ML + Author: Jasmin Blanchette and Sascha Boehme, TU Munich +*) + +structure Mirabelle_Quickcheck : MIRABELLE_ACTION = +struct + +fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: " + +fun init _ = I +fun done _ _ = () + +fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) = + let + val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst + val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1 + in + (case TimeLimit.timeLimit timeout quickcheck pre of + NONE => log (qc_tag id ^ "no counterexample") + | SOME _ => log (qc_tag id ^ "counterexample found")) + end + handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout") + | ERROR msg => log (qc_tag id ^ "error: " ^ msg) + +fun invoke args = + Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done) + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_refute.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_refute.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,39 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_refute.ML + Author: Jasmin Blanchette and Sascha Boehme, TU Munich +*) + +structure Mirabelle_Refute : MIRABELLE_ACTION = +struct + + +(* FIXME: +fun refute_action args timeout {pre=st, ...} = + let + val subgoal = 1 + val thy = Proof.theory_of st + val thm = #goal (Proof.raw_goal st) + + val refute = Refute.refute_goal thy args thm + val _ = TimeLimit.timeLimit timeout refute subgoal + in + val writ_log = Substring.full (the (Symtab.lookup tab "writeln")) + val warn_log = Substring.full (the (Symtab.lookup tab "warning")) + + val r = + if Substring.isSubstring "model found" writ_log + then + if Substring.isSubstring "spurious" warn_log + then SOME "potential counterexample" + else SOME "real counterexample (bug?)" + else + if Substring.isSubstring "time limit" writ_log + then SOME "no counterexample (timeout)" + else if Substring.isSubstring "Search terminated" writ_log + then SOME "no counterexample (normal termination)" + else SOME "no counterexample (unknown)" + in r end +*) + +fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*) + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,746 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML + Author: Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich +*) + +structure Mirabelle_Sledgehammer : MIRABELLE_ACTION = +struct + +val proverK = "prover" +val prover_timeoutK = "prover_timeout" +val keepK = "keep" +val type_encK = "type_enc" +val strictK = "strict" +val sliceK = "slice" +val lam_transK = "lam_trans" +val uncurried_aliasesK = "uncurried_aliases" +val e_selection_heuristicK = "e_selection_heuristic" +val term_orderK = "term_order" +val force_sosK = "force_sos" +val max_relevantK = "max_relevant" +val max_callsK = "max_calls" +val minimizeK = "minimize" (*refers to minimization attempted by Mirabelle*) +val minimize_timeoutK = "minimize_timeout" +val metis_ftK = "metis_ft" +val reconstructorK = "reconstructor" +val preplay_timeoutK = "preplay_timeout" +val sh_minimizeK = "sh_minimize" (*refers to minimizer run within Sledgehammer*) +val max_new_mono_instancesK = "max_new_mono_instances" +val max_mono_itersK = "max_mono_iters" +val check_trivialK = "check_trivial" (*false by default*) + +fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: " +fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): " +fun reconstructor_tag reconstructor id = + "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): " + +val separator = "-----" + +val preplay_timeout_default = "4" +(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*) + +(*If a key is present in args then augment a list with its pair*) +(*This is used to avoid fixing default values at the Mirabelle level, and + instead use the default values of the tool (Sledgehammer in this case).*) +fun available_parameter args key label list = + let + val value = AList.lookup (op =) args key + in if is_some value then (label, the value) :: list else list end + + +datatype sh_data = ShData of { + calls: int, + success: int, + nontriv_calls: int, + nontriv_success: int, + lemmas: int, + max_lems: int, + time_isa: int, + time_prover: int, + time_prover_fail: int} + +datatype re_data = ReData of { + calls: int, + success: int, + nontriv_calls: int, + nontriv_success: int, + proofs: int, + time: int, + timeout: int, + lemmas: int * int * int, + posns: (Position.T * bool) list + } + +datatype min_data = MinData of { + succs: int, + ab_ratios: int + } + +fun make_sh_data + (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa, + time_prover,time_prover_fail) = + ShData{calls=calls, success=success, nontriv_calls=nontriv_calls, + nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems, + time_isa=time_isa, time_prover=time_prover, + time_prover_fail=time_prover_fail} + +fun make_min_data (succs, ab_ratios) = + MinData{succs=succs, ab_ratios=ab_ratios} + +fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time, + timeout,lemmas,posns) = + ReData{calls=calls, success=success, nontriv_calls=nontriv_calls, + nontriv_success=nontriv_success, proofs=proofs, time=time, + timeout=timeout, lemmas=lemmas, posns=posns} + +val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0) +val empty_min_data = make_min_data (0, 0) +val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), []) + +fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success, + lemmas, max_lems, time_isa, + time_prover, time_prover_fail}) = (calls, success, nontriv_calls, + nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) + +fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios) + +fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success, + proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls, + nontriv_success, proofs, time, timeout, lemmas, posns) + + +datatype reconstructor_mode = + Unminimized | Minimized | UnminimizedFT | MinimizedFT + +datatype data = Data of { + sh: sh_data, + min: min_data, + re_u: re_data, (* reconstructor with unminimized set of lemmas *) + re_m: re_data, (* reconstructor with minimized set of lemmas *) + re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *) + re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *) + mini: bool (* with minimization *) + } + +fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) = + Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft, + mini=mini} + +val empty_data = make_data (empty_sh_data, empty_min_data, + empty_re_data, empty_re_data, empty_re_data, empty_re_data, false) + +fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = + let val sh' = make_sh_data (f (tuple_of_sh_data sh)) + in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end + +fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = + let val min' = make_min_data (f (tuple_of_min_data min)) + in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end + +fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = + let + fun map_me g Unminimized (u, m, uft, mft) = (g u, m, uft, mft) + | map_me g Minimized (u, m, uft, mft) = (u, g m, uft, mft) + | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft) + | map_me g MinimizedFT (u, m, uft, mft) = (u, m, uft, g mft) + + val f' = make_re_data o f o tuple_of_re_data + + val (re_u', re_m', re_uft', re_mft') = + map_me f' m (re_u, re_m, re_uft, re_mft) + in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end + +fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) = + make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) + +fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n)); + +val inc_sh_calls = map_sh_data + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) + => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) + +val inc_sh_success = map_sh_data + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) + => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) + +val inc_sh_nontriv_calls = map_sh_data + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) + => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) + +val inc_sh_nontriv_success = map_sh_data + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) + => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) + +fun inc_sh_lemmas n = map_sh_data + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) + => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail)) + +fun inc_sh_max_lems n = map_sh_data + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) + => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail)) + +fun inc_sh_time_isa t = map_sh_data + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail)) + +fun inc_sh_time_prover t = map_sh_data + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail)) + +fun inc_sh_time_prover_fail t = map_sh_data + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t)) + +val inc_min_succs = map_min_data + (fn (succs,ab_ratios) => (succs+1, ab_ratios)) + +fun inc_min_ab_ratios r = map_min_data + (fn (succs, ab_ratios) => (succs, ab_ratios+r)) + +val inc_reconstructor_calls = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) + +val inc_reconstructor_success = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) + +val inc_reconstructor_nontriv_calls = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns)) + +val inc_reconstructor_nontriv_success = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns)) + +val inc_reconstructor_proofs = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns)) + +fun inc_reconstructor_time m t = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m + +val inc_reconstructor_timeout = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns)) + +fun inc_reconstructor_lemmas m n = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m + +fun inc_reconstructor_posns m pos = map_re_data + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m + +val str0 = string_of_int o the_default 0 + +local + +val str = string_of_int +val str3 = Real.fmt (StringCvt.FIX (SOME 3)) +fun percentage a b = string_of_int (a * 100 div b) +fun time t = Real.fromInt t / 1000.0 +fun avg_time t n = + if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0 + +fun log_sh_data log + (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) = + (log ("Total number of sledgehammer calls: " ^ str calls); + log ("Number of successful sledgehammer calls: " ^ str success); + log ("Number of sledgehammer lemmas: " ^ str lemmas); + log ("Max number of sledgehammer lemmas: " ^ str max_lems); + log ("Success rate: " ^ percentage success calls ^ "%"); + log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls); + log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success); + log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa)); + log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover)); + log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail)); + log ("Average time for sledgehammer calls (Isabelle): " ^ + str3 (avg_time time_isa calls)); + log ("Average time for successful sledgehammer calls (ATP): " ^ + str3 (avg_time time_prover success)); + log ("Average time for failed sledgehammer calls (ATP): " ^ + str3 (avg_time time_prover_fail (calls - success))) + ) + +fun str_of_pos (pos, triv) = + str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^ + (if triv then "[T]" else "") + +fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls, + re_nontriv_success, re_proofs, re_time, re_timeout, + (lemmas, lems_sos, lems_max), re_posns) = + (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls); + log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^ + " (proof: " ^ str re_proofs ^ ")"); + log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout); + log ("Success rate: " ^ percentage re_success sh_calls ^ "%"); + log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls); + log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^ + " (proof: " ^ str re_proofs ^ ")"); + log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas); + log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos); + log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max); + log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time)); + log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^ + str3 (avg_time re_time re_success)); + if tag="" + then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns)) + else () + ) + +fun log_min_data log (succs, ab_ratios) = + (log ("Number of successful minimizations: " ^ string_of_int succs); + log ("After/before ratios: " ^ string_of_int ab_ratios) + ) + +in + +fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = + let + val ShData {calls=sh_calls, ...} = sh + + fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else () + fun log_re tag m = + log_re_data log tag sh_calls (tuple_of_re_data m) + fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () => + (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2))) + in + if sh_calls > 0 + then + (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n"); + log_sh_data log (tuple_of_sh_data sh); + log ""; + if not mini + then log_reconstructor ("", re_u) ("fully-typed ", re_uft) + else + app_if re_u (fn () => + (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft); + log ""; + app_if re_m (fn () => + (log_min_data log (tuple_of_min_data min); log ""; + log_reconstructor ("", re_m) ("fully-typed ", re_mft)))))) + else () + end + +end + + +(* Warning: we implicitly assume single-threaded execution here! *) +val data = Unsynchronized.ref ([] : (int * data) list) + +fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy) +fun done id ({log, ...}: Mirabelle.done_args) = + AList.lookup (op =) (!data) id + |> Option.map (log_data id log) + |> K () + +fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ()) + + +fun get_prover ctxt args = + let + fun default_prover_name () = + hd (#provers (Sledgehammer_Isar.default_params ctxt [])) + handle List.Empty => error "No ATP available." + fun get_prover name = + (name, Sledgehammer_Run.get_minimizing_prover ctxt + Sledgehammer_Provers.Normal name) + in + (case AList.lookup (op =) args proverK of + SOME name => get_prover name + | NONE => get_prover (default_prover_name ())) + end + +type stature = ATP_Problem_Generate.stature + +(* hack *) +fun reconstructor_from_msg args msg = + (case AList.lookup (op =) args reconstructorK of + SOME name => name + | NONE => + if String.isSubstring "metis (" msg then + msg |> Substring.full + |> Substring.position "metis (" + |> snd |> Substring.position ")" + |> fst |> Substring.string + |> suffix ")" + else if String.isSubstring "metis" msg then + "metis" + else + "smt") + +local + +datatype sh_result = + SH_OK of int * int * (string * stature) list | + SH_FAIL of int * int | + SH_ERROR + +fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans + uncurried_aliases e_selection_heuristic term_order force_sos + hard_timeout timeout preplay_timeout sh_minimizeLST + max_new_mono_instancesLST max_mono_itersLST dir pos st = + let + val {context = ctxt, facts = chained_ths, goal} = Proof.goal st + val i = 1 + fun set_file_name (SOME dir) = + Config.put Sledgehammer_Provers.dest_dir dir + #> Config.put Sledgehammer_Provers.problem_prefix + ("prob_" ^ str0 (Position.line_of pos) ^ "__") + #> Config.put SMT_Config.debug_files + (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_" + ^ serial_string ()) + | set_file_name NONE = I + val st' = + st + |> Proof.map_context + (set_file_name dir + #> (Option.map (Config.put ATP_Systems.e_selection_heuristic) + e_selection_heuristic |> the_default I) + #> (Option.map (Config.put ATP_Systems.term_order) + term_order |> the_default I) + #> (Option.map (Config.put ATP_Systems.force_sos) + force_sos |> the_default I)) + val params as {relevance_thresholds, max_relevant, slice, ...} = + Sledgehammer_Isar.default_params ctxt + ([("verbose", "true"), + ("type_enc", type_enc), + ("strict", strict), + ("lam_trans", lam_trans |> the_default "smart"), + ("uncurried_aliases", uncurried_aliases |> the_default "smart"), + ("max_relevant", max_relevant), + ("slice", slice), + ("timeout", string_of_int timeout), + ("preplay_timeout", preplay_timeout)] + |> sh_minimizeLST (*don't confuse the two minimization flags*) + |> max_new_mono_instancesLST + |> max_mono_itersLST) + val default_max_relevant = + Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice + prover_name + val is_appropriate_prop = + Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name + val is_built_in_const = + Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name + val relevance_fudge = + Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name + val relevance_override = {add = [], del = [], only = false} + val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i + val time_limit = + (case hard_timeout of + NONE => I + | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs)) + fun failed failure = + ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime, + preplay = + K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis), + message = K "", message_tail = ""}, ~1) + val ({outcome, used_facts, run_time, preplay, message, message_tail} + : Sledgehammer_Provers.prover_result, + time_isa) = time_limit (Mirabelle.cpu_time (fn () => + let + val _ = if is_appropriate_prop concl_t then () + else raise Fail "inappropriate" + val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name + val facts = + Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override + chained_ths hyp_ts concl_t + |> filter (is_appropriate_prop o prop_of o snd) + |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds + (the_default default_max_relevant max_relevant) + is_built_in_const relevance_fudge relevance_override + chained_ths hyp_ts concl_t + val problem = + {state = st', goal = goal, subgoal = i, + subgoal_count = Sledgehammer_Util.subgoal_count st, + facts = facts |> map Sledgehammer_Provers.Untranslated_Fact, + smt_filter = NONE} + in prover params (K (K (K ""))) problem end)) () + handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut + | Fail "inappropriate" => failed ATP_Proof.Inappropriate + val time_prover = run_time |> Time.toMilliseconds + val msg = message (preplay ()) ^ message_tail + in + case outcome of + NONE => (msg, SH_OK (time_isa, time_prover, used_facts)) + | SOME _ => (msg, SH_FAIL (time_isa, time_prover)) + end + handle ERROR msg => ("error: " ^ msg, SH_ERROR) + +fun thms_of_name ctxt name = + let + val lex = Keyword.get_lexicons + val get = maps (Proof_Context.get_fact ctxt o fst) + in + Source.of_string name + |> Symbol.source + |> Token.source {do_recover=SOME false} lex Position.start + |> Token.source_proper + |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE + |> Source.exhaust + end + +in + +fun run_sledgehammer trivial args reconstructor named_thms id + ({pre=st, log, pos, ...}: Mirabelle.run_args) = + let + val triv_str = if trivial then "[T] " else "" + val _ = change_data id inc_sh_calls + val _ = if trivial then () else change_data id inc_sh_nontriv_calls + val (prover_name, prover) = get_prover (Proof.context_of st) args + val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" + val strict = AList.lookup (op =) args strictK |> the_default "false" + val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart" + val slice = AList.lookup (op =) args sliceK |> the_default "true" + val lam_trans = AList.lookup (op =) args lam_transK + val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK + val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK + val term_order = AList.lookup (op =) args term_orderK + val force_sos = AList.lookup (op =) args force_sosK + |> Option.map (curry (op <>) "false") + val dir = AList.lookup (op =) args keepK + val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30) + (* always use a hard timeout, but give some slack so that the automatic + minimizer has a chance to do its magic *) + val preplay_timeout = AList.lookup (op =) args preplay_timeoutK + |> the_default preplay_timeout_default + val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" + val max_new_mono_instancesLST = + available_parameter args max_new_mono_instancesK max_new_mono_instancesK + val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK + val hard_timeout = SOME (2 * timeout) + val (msg, result) = + run_sh prover_name prover type_enc strict max_relevant slice lam_trans + uncurried_aliases e_selection_heuristic term_order force_sos + hard_timeout timeout preplay_timeout sh_minimizeLST + max_new_mono_instancesLST max_mono_itersLST dir pos st + in + case result of + SH_OK (time_isa, time_prover, names) => + let + fun get_thms (name, stature) = + try (thms_of_name (Proof.context_of st)) name + |> Option.map (pair (name, stature)) + in + change_data id inc_sh_success; + if trivial then () else change_data id inc_sh_nontriv_success; + change_data id (inc_sh_lemmas (length names)); + change_data id (inc_sh_max_lems (length names)); + change_data id (inc_sh_time_isa time_isa); + change_data id (inc_sh_time_prover time_prover); + reconstructor := reconstructor_from_msg args msg; + named_thms := SOME (map_filter get_thms names); + log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^ + string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg) + end + | SH_FAIL (time_isa, time_prover) => + let + val _ = change_data id (inc_sh_time_isa time_isa) + val _ = change_data id (inc_sh_time_prover_fail time_prover) + in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end + | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg) + end + +end + +fun run_minimize args reconstructor named_thms id + ({pre=st, log, ...}: Mirabelle.run_args) = + let + val ctxt = Proof.context_of st + val n0 = length (these (!named_thms)) + val (prover_name, _) = get_prover ctxt args + val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" + val strict = AList.lookup (op =) args strictK |> the_default "false" + val timeout = + AList.lookup (op =) args minimize_timeoutK + |> Option.map (fst o read_int o raw_explode) (* FIXME Symbol.explode (?) *) + |> the_default 5 + val preplay_timeout = AList.lookup (op =) args preplay_timeoutK + |> the_default preplay_timeout_default + val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" + val max_new_mono_instancesLST = + available_parameter args max_new_mono_instancesK max_new_mono_instancesK + val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK + val params = Sledgehammer_Isar.default_params ctxt + ([("provers", prover_name), + ("verbose", "true"), + ("type_enc", type_enc), + ("strict", strict), + ("timeout", string_of_int timeout), + ("preplay_timeout", preplay_timeout)] + |> sh_minimizeLST (*don't confuse the two minimization flags*) + |> max_new_mono_instancesLST + |> max_mono_itersLST) + val minimize = + Sledgehammer_Minimize.minimize_facts prover_name params + true 1 (Sledgehammer_Util.subgoal_count st) + val _ = log separator + val (used_facts, (preplay, message, message_tail)) = + minimize st (these (!named_thms)) + val msg = message (preplay ()) ^ message_tail + in + case used_facts of + SOME named_thms' => + (change_data id inc_min_succs; + change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0)); + if length named_thms' = n0 + then log (minimize_tag id ^ "already minimal") + else (reconstructor := reconstructor_from_msg args msg; + named_thms := SOME named_thms'; + log (minimize_tag id ^ "succeeded:\n" ^ msg)) + ) + | NONE => log (minimize_tag id ^ "failed: " ^ msg) + end + +fun override_params prover type_enc timeout = + [("provers", prover), + ("max_relevant", "0"), + ("type_enc", type_enc), + ("strict", "true"), + ("slice", "false"), + ("timeout", timeout |> Time.toSeconds |> string_of_int)] + +fun run_reconstructor trivial full m name reconstructor named_thms id + ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) = + let + fun do_reconstructor named_thms ctxt = + let + val ref_of_str = + suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm + #> fst + val thms = named_thms |> maps snd + val facts = named_thms |> map (ref_of_str o fst o fst) + val relevance_override = {add = facts, del = [], only = true} + fun my_timeout time_slice = + timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal + fun sledge_tac time_slice prover type_enc = + Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt + (override_params prover type_enc (my_timeout time_slice)) + relevance_override + in + if !reconstructor = "sledgehammer_tac" then + sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native" + ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??" + ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??" + ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags" + ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN + ctxt thms + else if !reconstructor = "smt" then + SMT_Solver.smt_tac ctxt thms + else if full then + Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN] + ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms + else if String.isPrefix "metis (" (!reconstructor) then + let + val (type_encs, lam_trans) = + !reconstructor + |> Outer_Syntax.scan Position.start + |> filter Token.is_proper |> tl + |> Metis_Tactic.parse_metis_options |> fst + |>> the_default [ATP_Proof_Reconstruct.partial_typesN] + ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans + in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end + else if !reconstructor = "metis" then + Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt + thms + else + K all_tac + end + fun apply_reconstructor named_thms = + Mirabelle.can_apply timeout (do_reconstructor named_thms) st + + fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")" + | with_time (true, t) = (change_data id (inc_reconstructor_success m); + if trivial then () + else change_data id (inc_reconstructor_nontriv_success m); + change_data id (inc_reconstructor_lemmas m (length named_thms)); + change_data id (inc_reconstructor_time m t); + change_data id (inc_reconstructor_posns m (pos, trivial)); + if name = "proof" then change_data id (inc_reconstructor_proofs m) + else (); + "succeeded (" ^ string_of_int t ^ ")") + fun timed_reconstructor named_thms = + (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true) + handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m); + ("timeout", false)) + | ERROR msg => ("error: " ^ msg, false) + + val _ = log separator + val _ = change_data id (inc_reconstructor_calls m) + val _ = if trivial then () + else change_data id (inc_reconstructor_nontriv_calls m) + in + named_thms + |> timed_reconstructor + |>> log o prefix (reconstructor_tag reconstructor id) + |> snd + end + +val try_timeout = seconds 5.0 + +(* crude hack *) +val num_sledgehammer_calls = Unsynchronized.ref 0 + +fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) = + let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in + if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal + then () else + let + val max_calls = + AList.lookup (op =) args max_callsK |> the_default "10000000" + |> Int.fromString |> the + val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1; + in + if !num_sledgehammer_calls > max_calls then () + else + let + val reconstructor = Unsynchronized.ref "" + val named_thms = + Unsynchronized.ref (NONE : ((string * stature) * thm list) list option) + val minimize = AList.defined (op =) args minimizeK + val metis_ft = AList.defined (op =) args metis_ftK + val trivial = + if AList.lookup (op =) args check_trivialK |> the_default "false" + |> Bool.fromString |> the then + Try0.try0 (SOME try_timeout) ([], [], [], []) pre + handle TimeLimit.TimeOut => false + else false + fun apply_reconstructor m1 m2 = + if metis_ft + then + if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false + (run_reconstructor trivial false m1 name reconstructor + (these (!named_thms))) id st) + then + (Mirabelle.catch_result (reconstructor_tag reconstructor) false + (run_reconstructor trivial true m2 name reconstructor + (these (!named_thms))) id st; ()) + else () + else + (Mirabelle.catch_result (reconstructor_tag reconstructor) false + (run_reconstructor trivial false m1 name reconstructor + (these (!named_thms))) id st; ()) + in + change_data id (set_mini minimize); + Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor + named_thms) id st; + if is_some (!named_thms) + then + (apply_reconstructor Unminimized UnminimizedFT; + if minimize andalso not (null (these (!named_thms))) + then + (Mirabelle.catch minimize_tag + (run_minimize args reconstructor named_thms) id st; + apply_reconstructor Minimized MinimizedFT) + else ()) + else () + end + end + end + +fun invoke args = + Mirabelle.register (init, sledgehammer_action args, done) + +end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML Sat Apr 14 23:52:17 2012 +0100 @@ -0,0 +1,198 @@ +(* Title: HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML + Author: Jasmin Blanchette, TU Munich +*) + +structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION = +struct + +fun get args name default_value = + case AList.lookup (op =) args name of + SOME value => the (Real.fromString value) + | NONE => default_value + +fun extract_relevance_fudge args + {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight, + abs_rel_weight, abs_irrel_weight, skolem_irrel_weight, + theory_const_rel_weight, theory_const_irrel_weight, + chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus, + local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp, + threshold_divisor, ridiculous_threshold} = + {local_const_multiplier = + get args "local_const_multiplier" local_const_multiplier, + worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq, + higher_order_irrel_weight = + get args "higher_order_irrel_weight" higher_order_irrel_weight, + abs_rel_weight = get args "abs_rel_weight" abs_rel_weight, + abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight, + skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight, + theory_const_rel_weight = + get args "theory_const_rel_weight" theory_const_rel_weight, + theory_const_irrel_weight = + get args "theory_const_irrel_weight" theory_const_irrel_weight, + chained_const_irrel_weight = + get args "chained_const_irrel_weight" chained_const_irrel_weight, + intro_bonus = get args "intro_bonus" intro_bonus, + elim_bonus = get args "elim_bonus" elim_bonus, + simp_bonus = get args "simp_bonus" simp_bonus, + local_bonus = get args "local_bonus" local_bonus, + assum_bonus = get args "assum_bonus" assum_bonus, + chained_bonus = get args "chained_bonus" chained_bonus, + max_imperfect = get args "max_imperfect" max_imperfect, + max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp, + threshold_divisor = get args "threshold_divisor" threshold_divisor, + ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold} + +structure Prooftab = + Table(type key = int * int val ord = prod_ord int_ord int_ord) + +val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table) + +val num_successes = Unsynchronized.ref ([] : (int * int) list) +val num_failures = Unsynchronized.ref ([] : (int * int) list) +val num_found_proofs = Unsynchronized.ref ([] : (int * int) list) +val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list) +val num_found_facts = Unsynchronized.ref ([] : (int * int) list) +val num_lost_facts = Unsynchronized.ref ([] : (int * int) list) + +fun get id c = the_default 0 (AList.lookup (op =) (!c) id) +fun add id c n = + c := (case AList.lookup (op =) (!c) id of + SOME m => AList.update (op =) (id, m + n) (!c) + | NONE => (id, n) :: !c) + +fun init proof_file _ thy = + let + fun do_line line = + case line |> space_explode ":" of + [line_num, offset, proof] => + SOME (pairself (the o Int.fromString) (line_num, offset), + proof |> space_explode " " |> filter_out (curry (op =) "")) + | _ => NONE + val proofs = File.read (Path.explode proof_file) + val proof_tab = + proofs |> space_explode "\n" + |> map_filter do_line + |> AList.coalesce (op =) + |> Prooftab.make + in proof_table := proof_tab; thy end + +fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b) +fun percentage_alt a b = percentage a (a + b) + +fun done id ({log, ...} : Mirabelle.done_args) = + if get id num_successes + get id num_failures > 0 then + (log ""; + log ("Number of overall successes: " ^ + string_of_int (get id num_successes)); + log ("Number of overall failures: " ^ string_of_int (get id num_failures)); + log ("Overall success rate: " ^ + percentage_alt (get id num_successes) (get id num_failures) ^ "%"); + log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs)); + log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs)); + log ("Proof found rate: " ^ + percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^ + "%"); + log ("Number of found facts: " ^ string_of_int (get id num_found_facts)); + log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts)); + log ("Fact found rate: " ^ + percentage_alt (get id num_found_facts) (get id num_lost_facts) ^ + "%")) + else + () + +val default_prover = ATP_Systems.eN (* arbitrary ATP *) + +fun with_index (i, s) = s ^ "@" ^ string_of_int i + +fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) = + case (Position.line_of pos, Position.offset_of pos) of + (SOME line_num, SOME offset) => + (case Prooftab.lookup (!proof_table) (line_num, offset) of + SOME proofs => + let + val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre + val prover = AList.lookup (op =) args "prover" + |> the_default default_prover + val {relevance_thresholds, max_relevant, slice, ...} = + Sledgehammer_Isar.default_params ctxt args + val default_max_relevant = + Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice + prover + val is_appropriate_prop = + Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt + default_prover + val is_built_in_const = + Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover + val relevance_fudge = + extract_relevance_fudge args + (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover) + val relevance_override = {add = [], del = [], only = false} + val subgoal = 1 + val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal + val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover + val facts = + Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override + chained_ths hyp_ts concl_t + |> filter (is_appropriate_prop o prop_of o snd) + |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds + (the_default default_max_relevant max_relevant) + is_built_in_const relevance_fudge relevance_override + chained_ths hyp_ts concl_t + |> map (fst o fst) + val (found_facts, lost_facts) = + flat proofs |> sort_distinct string_ord + |> map (fn fact => (find_index (curry (op =) fact) facts, fact)) + |> List.partition (curry (op <=) 0 o fst) + |>> sort (prod_ord int_ord string_ord) ||> map snd + val found_proofs = filter (forall (member (op =) facts)) proofs + val n = length found_proofs + val _ = + if n = 0 then + (add id num_failures 1; log "Failure") + else + (add id num_successes 1; + add id num_found_proofs n; + log ("Success (" ^ string_of_int n ^ " of " ^ + string_of_int (length proofs) ^ " proofs)")) + val _ = add id num_lost_proofs (length proofs - n) + val _ = add id num_found_facts (length found_facts) + val _ = add id num_lost_facts (length lost_facts) + val _ = + if null found_facts then + () + else + let + val found_weight = + Real.fromInt (fold (fn (n, _) => + Integer.add (n * n)) found_facts 0) + / Real.fromInt (length found_facts) + |> Math.sqrt |> Real.ceil + in + log ("Found facts (among " ^ string_of_int (length facts) ^ + ", weight " ^ string_of_int found_weight ^ "): " ^ + commas (map with_index found_facts)) + end + val _ = if null lost_facts then + () + else + log ("Lost facts (among " ^ string_of_int (length facts) ^ + "): " ^ commas lost_facts) + in () end + | NONE => log "No known proof") + | _ => () + +val proof_fileK = "proof_file" + +fun invoke args = + let + val (pf_args, other_args) = + args |> List.partition (curry (op =) proof_fileK o fst) + val proof_file = case pf_args of + [] => error "No \"proof_file\" specified" + | (_, s) :: _ => s + in Mirabelle.register (init proof_file, action other_args, done) end + +end; + +(* Workaround to keep the "mirabelle.pl" script happy *) +structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter; diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Mirabelle.thy --- a/src/HOL/Mirabelle/Mirabelle.thy Sat Apr 14 23:34:18 2012 +0200 +++ b/src/HOL/Mirabelle/Mirabelle.thy Sat Apr 14 23:52:17 2012 +0100 @@ -4,7 +4,7 @@ theory Mirabelle imports Sledgehammer -uses "Tools/mirabelle.ML" +uses "Actions/mirabelle.ML" "../ex/sledgehammer_tactics.ML" begin diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Mirabelle_Test.thy --- a/src/HOL/Mirabelle/Mirabelle_Test.thy Sat Apr 14 23:34:18 2012 +0200 +++ b/src/HOL/Mirabelle/Mirabelle_Test.thy Sat Apr 14 23:52:17 2012 +0100 @@ -7,12 +7,12 @@ theory Mirabelle_Test imports Main Mirabelle uses - "Tools/mirabelle_arith.ML" - "Tools/mirabelle_metis.ML" - "Tools/mirabelle_quickcheck.ML" - "Tools/mirabelle_refute.ML" - "Tools/mirabelle_sledgehammer.ML" - "Tools/mirabelle_sledgehammer_filter.ML" + "Actions/mirabelle_arith.ML" + "Actions/mirabelle_metis.ML" + "Actions/mirabelle_quickcheck.ML" + "Actions/mirabelle_refute.ML" + "Actions/mirabelle_sledgehammer.ML" + "Actions/mirabelle_sledgehammer_filter.ML" begin text {* diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle.ML --- a/src/HOL/Mirabelle/Tools/mirabelle.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,213 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle.ML - Author: Jasmin Blanchette and Sascha Boehme, TU Munich -*) - -signature MIRABELLE = -sig - (*configuration*) - val logfile : string Config.T - val timeout : int Config.T - val start_line : int Config.T - val end_line : int Config.T - - (*core*) - type init_action = int -> theory -> theory - type done_args = {last: Toplevel.state, log: string -> unit} - type done_action = int -> done_args -> unit - type run_args = {pre: Proof.state, post: Toplevel.state option, - timeout: Time.time, log: string -> unit, pos: Position.T, name: string} - type run_action = int -> run_args -> unit - type action = init_action * run_action * done_action - val catch : (int -> string) -> run_action -> run_action - val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) -> - int -> run_args -> 'a - val register : action -> theory -> theory - val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state -> - unit - - (*utility functions*) - val can_apply : Time.time -> (Proof.context -> int -> tactic) -> - Proof.state -> bool - val theorems_in_proof_term : thm -> thm list - val theorems_of_sucessful_proof : Toplevel.state option -> thm list - val get_setting : (string * string) list -> string * string -> string - val get_int_setting : (string * string) list -> string * int -> int - val cpu_time : ('a -> 'b) -> 'a -> 'b * int -end - - - -structure Mirabelle : MIRABELLE = -struct - -(* Mirabelle configuration *) - -val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "") -val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30) -val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0) -val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1) - - -(* Mirabelle core *) - -type init_action = int -> theory -> theory -type done_args = {last: Toplevel.state, log: string -> unit} -type done_action = int -> done_args -> unit -type run_args = {pre: Proof.state, post: Toplevel.state option, - timeout: Time.time, log: string -> unit, pos: Position.T, name: string} -type run_action = int -> run_args -> unit -type action = init_action * run_action * done_action - -structure Actions = Theory_Data -( - type T = (int * run_action * done_action) list - val empty = [] - val extend = I - fun merge data = Library.merge (K true) data (* FIXME potential data loss because of (K true) *) -) - - -fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e) - -fun catch tag f id (st as {log, ...}: run_args) = (f id st; ()) - handle exn => - if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ()) - -fun catch_result tag d f id (st as {log, ...}: run_args) = f id st - handle exn => - if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d) - -fun register (init, run, done) thy = - let val id = length (Actions.get thy) + 1 - in - thy - |> init id - |> Actions.map (cons (id, run, done)) - end - -local - -fun log thy s = - let fun append_to n = if n = "" then K () else File.append (Path.explode n) - in append_to (Config.get_global thy logfile) (s ^ "\n") end - (* FIXME: with multithreading and parallel proofs enabled, we might need to - encapsulate this inside a critical section *) - -fun log_sep thy = log thy "------------------" - -fun apply_actions thy pos name info (pre, post, time) actions = - let - fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name} - fun run (id, run, _) = (apply (run id); log_sep thy) - in (log thy info; log_sep thy; List.app run actions) end - -fun in_range _ _ NONE = true - | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r)) - -fun only_within_range thy pos f x = - let val l = Config.get_global thy start_line and r = Config.get_global thy end_line - in if in_range l r (Position.line_of pos) then f x else () end - -in - -fun run_actions tr pre post = - let - val thy = Proof.theory_of pre - val pos = Toplevel.pos_of tr - val name = Toplevel.name_of tr - val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout)) - - val str0 = string_of_int o the_default 0 - val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos) - val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):" - in - only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy) - end - -fun done_actions st = - let - val thy = Toplevel.theory_of st - val _ = log thy "\n\n"; - in - thy - |> Actions.get - |> List.app (fn (id, _, done) => done id {last=st, log=log thy}) - end - -end - -val whitelist = ["apply", "by", "proof"] - -fun step_hook tr pre post = - (* FIXME: might require wrapping into "interruptible" *) - if can (Proof.assert_backward o Toplevel.proof_of) pre andalso - member (op =) whitelist (Toplevel.name_of tr) - then run_actions tr (Toplevel.proof_of pre) (SOME post) - else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post - then done_actions pre - else () (* FIXME: add theory_hook here *) - - - -(* Mirabelle utility functions *) - -fun can_apply time tac st = - let - val {context = ctxt, facts, goal} = Proof.goal st - val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt) - in - (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of - SOME (SOME _) => true - | _ => false) - end - -local - -fun fold_body_thms f = - let - fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) => - fn (x, seen) => - if Inttab.defined seen i then (x, seen) - else - let - val body' = Future.join body - val (x', seen') = app (n + (if name = "" then 0 else 1)) body' - (x, Inttab.update (i, ()) seen) - in (x' |> n = 0 ? f (name, prop, body'), seen') end) - in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end - -in - -fun theorems_in_proof_term thm = - let - val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm) - fun collect (s, _, _) = if s <> "" then insert (op =) s else I - fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE - fun resolve_thms names = map_filter (member_of names) all_thms - in - resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] []) - end - -end - -fun theorems_of_sucessful_proof state = - (case state of - NONE => [] - | SOME st => - if not (Toplevel.is_proof st) then [] - else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st)))) - -fun get_setting settings (key, default) = - the_default default (AList.lookup (op =) settings key) - -fun get_int_setting settings (key, default) = - (case Option.map Int.fromString (AList.lookup (op =) settings key) of - SOME (SOME i) => i - | SOME NONE => error ("bad option: " ^ key) - | NONE => default) - -fun cpu_time f x = - let val ({cpu, ...}, y) = Timing.timing f x - in (y, Time.toMilliseconds cpu) end - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_arith.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_arith.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_arith.ML - Author: Jasmin Blanchette and Sascha Boehme, TU Munich -*) - -structure Mirabelle_Arith : MIRABELLE_ACTION = -struct - -fun arith_tag id = "#" ^ string_of_int id ^ " arith: " - -fun init _ = I -fun done _ _ = () - -fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) = - if Mirabelle.can_apply timeout Arith_Data.arith_tac pre - then log (arith_tag id ^ "succeeded") - else () - handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout") - -fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done) - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_metis.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_metis.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_metis.ML - Author: Jasmin Blanchette and Sascha Boehme, TU Munich -*) - -structure Mirabelle_Metis : MIRABELLE_ACTION = -struct - -fun metis_tag id = "#" ^ string_of_int id ^ " metis: " - -fun init _ = I -fun done _ _ = () - -fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) = - let - val thms = Mirabelle.theorems_of_sucessful_proof post - val names = map Thm.get_name_hint thms - val add_info = if null names then I else suffix (":\n" ^ commas names) - - val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre)) - - fun metis ctxt = - Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt - (thms @ facts) - in - (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed") - |> prefix (metis_tag id) - |> add_info - |> log - end - handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout") - | ERROR msg => log (metis_tag id ^ "error: " ^ msg) - -fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done) - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_quickcheck.ML - Author: Jasmin Blanchette and Sascha Boehme, TU Munich -*) - -structure Mirabelle_Quickcheck : MIRABELLE_ACTION = -struct - -fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: " - -fun init _ = I -fun done _ _ = () - -fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) = - let - val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst - val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1 - in - (case TimeLimit.timeLimit timeout quickcheck pre of - NONE => log (qc_tag id ^ "no counterexample") - | SOME _ => log (qc_tag id ^ "counterexample found")) - end - handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout") - | ERROR msg => log (qc_tag id ^ "error: " ^ msg) - -fun invoke args = - Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done) - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_refute.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_refute.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_refute.ML - Author: Jasmin Blanchette and Sascha Boehme, TU Munich -*) - -structure Mirabelle_Refute : MIRABELLE_ACTION = -struct - - -(* FIXME: -fun refute_action args timeout {pre=st, ...} = - let - val subgoal = 1 - val thy = Proof.theory_of st - val thm = #goal (Proof.raw_goal st) - - val refute = Refute.refute_goal thy args thm - val _ = TimeLimit.timeLimit timeout refute subgoal - in - val writ_log = Substring.full (the (Symtab.lookup tab "writeln")) - val warn_log = Substring.full (the (Symtab.lookup tab "warning")) - - val r = - if Substring.isSubstring "model found" writ_log - then - if Substring.isSubstring "spurious" warn_log - then SOME "potential counterexample" - else SOME "real counterexample (bug?)" - else - if Substring.isSubstring "time limit" writ_log - then SOME "no counterexample (timeout)" - else if Substring.isSubstring "Search terminated" writ_log - then SOME "no counterexample (normal termination)" - else SOME "no counterexample (unknown)" - in r end -*) - -fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*) - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,746 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML - Author: Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich -*) - -structure Mirabelle_Sledgehammer : MIRABELLE_ACTION = -struct - -val proverK = "prover" -val prover_timeoutK = "prover_timeout" -val keepK = "keep" -val type_encK = "type_enc" -val strictK = "strict" -val sliceK = "slice" -val lam_transK = "lam_trans" -val uncurried_aliasesK = "uncurried_aliases" -val e_selection_heuristicK = "e_selection_heuristic" -val term_orderK = "term_order" -val force_sosK = "force_sos" -val max_relevantK = "max_relevant" -val max_callsK = "max_calls" -val minimizeK = "minimize" (*refers to minimization attempted by Mirabelle*) -val minimize_timeoutK = "minimize_timeout" -val metis_ftK = "metis_ft" -val reconstructorK = "reconstructor" -val preplay_timeoutK = "preplay_timeout" -val sh_minimizeK = "sh_minimize" (*refers to minimizer run within Sledgehammer*) -val max_new_mono_instancesK = "max_new_mono_instances" -val max_mono_itersK = "max_mono_iters" -val check_trivialK = "check_trivial" (*false by default*) - -fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: " -fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): " -fun reconstructor_tag reconstructor id = - "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): " - -val separator = "-----" - -val preplay_timeout_default = "4" -(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*) - -(*If a key is present in args then augment a list with its pair*) -(*This is used to avoid fixing default values at the Mirabelle level, and - instead use the default values of the tool (Sledgehammer in this case).*) -fun available_parameter args key label list = - let - val value = AList.lookup (op =) args key - in if is_some value then (label, the value) :: list else list end - - -datatype sh_data = ShData of { - calls: int, - success: int, - nontriv_calls: int, - nontriv_success: int, - lemmas: int, - max_lems: int, - time_isa: int, - time_prover: int, - time_prover_fail: int} - -datatype re_data = ReData of { - calls: int, - success: int, - nontriv_calls: int, - nontriv_success: int, - proofs: int, - time: int, - timeout: int, - lemmas: int * int * int, - posns: (Position.T * bool) list - } - -datatype min_data = MinData of { - succs: int, - ab_ratios: int - } - -fun make_sh_data - (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa, - time_prover,time_prover_fail) = - ShData{calls=calls, success=success, nontriv_calls=nontriv_calls, - nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems, - time_isa=time_isa, time_prover=time_prover, - time_prover_fail=time_prover_fail} - -fun make_min_data (succs, ab_ratios) = - MinData{succs=succs, ab_ratios=ab_ratios} - -fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time, - timeout,lemmas,posns) = - ReData{calls=calls, success=success, nontriv_calls=nontriv_calls, - nontriv_success=nontriv_success, proofs=proofs, time=time, - timeout=timeout, lemmas=lemmas, posns=posns} - -val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0) -val empty_min_data = make_min_data (0, 0) -val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), []) - -fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success, - lemmas, max_lems, time_isa, - time_prover, time_prover_fail}) = (calls, success, nontriv_calls, - nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) - -fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios) - -fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success, - proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls, - nontriv_success, proofs, time, timeout, lemmas, posns) - - -datatype reconstructor_mode = - Unminimized | Minimized | UnminimizedFT | MinimizedFT - -datatype data = Data of { - sh: sh_data, - min: min_data, - re_u: re_data, (* reconstructor with unminimized set of lemmas *) - re_m: re_data, (* reconstructor with minimized set of lemmas *) - re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *) - re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *) - mini: bool (* with minimization *) - } - -fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) = - Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft, - mini=mini} - -val empty_data = make_data (empty_sh_data, empty_min_data, - empty_re_data, empty_re_data, empty_re_data, empty_re_data, false) - -fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = - let val sh' = make_sh_data (f (tuple_of_sh_data sh)) - in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end - -fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = - let val min' = make_min_data (f (tuple_of_min_data min)) - in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end - -fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = - let - fun map_me g Unminimized (u, m, uft, mft) = (g u, m, uft, mft) - | map_me g Minimized (u, m, uft, mft) = (u, g m, uft, mft) - | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft) - | map_me g MinimizedFT (u, m, uft, mft) = (u, m, uft, g mft) - - val f' = make_re_data o f o tuple_of_re_data - - val (re_u', re_m', re_uft', re_mft') = - map_me f' m (re_u, re_m, re_uft, re_mft) - in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end - -fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) = - make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) - -fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n)); - -val inc_sh_calls = map_sh_data - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) - => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) - -val inc_sh_success = map_sh_data - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) - => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) - -val inc_sh_nontriv_calls = map_sh_data - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) - => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)) - -val inc_sh_nontriv_success = map_sh_data - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail) - => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail)) - -fun inc_sh_lemmas n = map_sh_data - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) - => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail)) - -fun inc_sh_max_lems n = map_sh_data - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) - => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail)) - -fun inc_sh_time_isa t = map_sh_data - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail)) - -fun inc_sh_time_prover t = map_sh_data - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail)) - -fun inc_sh_time_prover_fail t = map_sh_data - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail) - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t)) - -val inc_min_succs = map_min_data - (fn (succs,ab_ratios) => (succs+1, ab_ratios)) - -fun inc_min_ab_ratios r = map_min_data - (fn (succs, ab_ratios) => (succs, ab_ratios+r)) - -val inc_reconstructor_calls = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) - -val inc_reconstructor_success = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns)) - -val inc_reconstructor_nontriv_calls = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns)) - -val inc_reconstructor_nontriv_success = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns)) - -val inc_reconstructor_proofs = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns)) - -fun inc_reconstructor_time m t = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m - -val inc_reconstructor_timeout = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns)) - -fun inc_reconstructor_lemmas m n = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m - -fun inc_reconstructor_posns m pos = map_re_data - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns) - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m - -val str0 = string_of_int o the_default 0 - -local - -val str = string_of_int -val str3 = Real.fmt (StringCvt.FIX (SOME 3)) -fun percentage a b = string_of_int (a * 100 div b) -fun time t = Real.fromInt t / 1000.0 -fun avg_time t n = - if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0 - -fun log_sh_data log - (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) = - (log ("Total number of sledgehammer calls: " ^ str calls); - log ("Number of successful sledgehammer calls: " ^ str success); - log ("Number of sledgehammer lemmas: " ^ str lemmas); - log ("Max number of sledgehammer lemmas: " ^ str max_lems); - log ("Success rate: " ^ percentage success calls ^ "%"); - log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls); - log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success); - log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa)); - log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover)); - log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail)); - log ("Average time for sledgehammer calls (Isabelle): " ^ - str3 (avg_time time_isa calls)); - log ("Average time for successful sledgehammer calls (ATP): " ^ - str3 (avg_time time_prover success)); - log ("Average time for failed sledgehammer calls (ATP): " ^ - str3 (avg_time time_prover_fail (calls - success))) - ) - -fun str_of_pos (pos, triv) = - str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^ - (if triv then "[T]" else "") - -fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls, - re_nontriv_success, re_proofs, re_time, re_timeout, - (lemmas, lems_sos, lems_max), re_posns) = - (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls); - log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^ - " (proof: " ^ str re_proofs ^ ")"); - log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout); - log ("Success rate: " ^ percentage re_success sh_calls ^ "%"); - log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls); - log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^ - " (proof: " ^ str re_proofs ^ ")"); - log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas); - log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos); - log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max); - log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time)); - log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^ - str3 (avg_time re_time re_success)); - if tag="" - then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns)) - else () - ) - -fun log_min_data log (succs, ab_ratios) = - (log ("Number of successful minimizations: " ^ string_of_int succs); - log ("After/before ratios: " ^ string_of_int ab_ratios) - ) - -in - -fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) = - let - val ShData {calls=sh_calls, ...} = sh - - fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else () - fun log_re tag m = - log_re_data log tag sh_calls (tuple_of_re_data m) - fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () => - (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2))) - in - if sh_calls > 0 - then - (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n"); - log_sh_data log (tuple_of_sh_data sh); - log ""; - if not mini - then log_reconstructor ("", re_u) ("fully-typed ", re_uft) - else - app_if re_u (fn () => - (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft); - log ""; - app_if re_m (fn () => - (log_min_data log (tuple_of_min_data min); log ""; - log_reconstructor ("", re_m) ("fully-typed ", re_mft)))))) - else () - end - -end - - -(* Warning: we implicitly assume single-threaded execution here! *) -val data = Unsynchronized.ref ([] : (int * data) list) - -fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy) -fun done id ({log, ...}: Mirabelle.done_args) = - AList.lookup (op =) (!data) id - |> Option.map (log_data id log) - |> K () - -fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ()) - - -fun get_prover ctxt args = - let - fun default_prover_name () = - hd (#provers (Sledgehammer_Isar.default_params ctxt [])) - handle List.Empty => error "No ATP available." - fun get_prover name = - (name, Sledgehammer_Run.get_minimizing_prover ctxt - Sledgehammer_Provers.Normal name) - in - (case AList.lookup (op =) args proverK of - SOME name => get_prover name - | NONE => get_prover (default_prover_name ())) - end - -type stature = ATP_Problem_Generate.stature - -(* hack *) -fun reconstructor_from_msg args msg = - (case AList.lookup (op =) args reconstructorK of - SOME name => name - | NONE => - if String.isSubstring "metis (" msg then - msg |> Substring.full - |> Substring.position "metis (" - |> snd |> Substring.position ")" - |> fst |> Substring.string - |> suffix ")" - else if String.isSubstring "metis" msg then - "metis" - else - "smt") - -local - -datatype sh_result = - SH_OK of int * int * (string * stature) list | - SH_FAIL of int * int | - SH_ERROR - -fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans - uncurried_aliases e_selection_heuristic term_order force_sos - hard_timeout timeout preplay_timeout sh_minimizeLST - max_new_mono_instancesLST max_mono_itersLST dir pos st = - let - val {context = ctxt, facts = chained_ths, goal} = Proof.goal st - val i = 1 - fun set_file_name (SOME dir) = - Config.put Sledgehammer_Provers.dest_dir dir - #> Config.put Sledgehammer_Provers.problem_prefix - ("prob_" ^ str0 (Position.line_of pos) ^ "__") - #> Config.put SMT_Config.debug_files - (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_" - ^ serial_string ()) - | set_file_name NONE = I - val st' = - st - |> Proof.map_context - (set_file_name dir - #> (Option.map (Config.put ATP_Systems.e_selection_heuristic) - e_selection_heuristic |> the_default I) - #> (Option.map (Config.put ATP_Systems.term_order) - term_order |> the_default I) - #> (Option.map (Config.put ATP_Systems.force_sos) - force_sos |> the_default I)) - val params as {relevance_thresholds, max_relevant, slice, ...} = - Sledgehammer_Isar.default_params ctxt - ([("verbose", "true"), - ("type_enc", type_enc), - ("strict", strict), - ("lam_trans", lam_trans |> the_default "smart"), - ("uncurried_aliases", uncurried_aliases |> the_default "smart"), - ("max_relevant", max_relevant), - ("slice", slice), - ("timeout", string_of_int timeout), - ("preplay_timeout", preplay_timeout)] - |> sh_minimizeLST (*don't confuse the two minimization flags*) - |> max_new_mono_instancesLST - |> max_mono_itersLST) - val default_max_relevant = - Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice - prover_name - val is_appropriate_prop = - Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name - val is_built_in_const = - Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name - val relevance_fudge = - Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name - val relevance_override = {add = [], del = [], only = false} - val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i - val time_limit = - (case hard_timeout of - NONE => I - | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs)) - fun failed failure = - ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime, - preplay = - K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis), - message = K "", message_tail = ""}, ~1) - val ({outcome, used_facts, run_time, preplay, message, message_tail} - : Sledgehammer_Provers.prover_result, - time_isa) = time_limit (Mirabelle.cpu_time (fn () => - let - val _ = if is_appropriate_prop concl_t then () - else raise Fail "inappropriate" - val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name - val facts = - Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override - chained_ths hyp_ts concl_t - |> filter (is_appropriate_prop o prop_of o snd) - |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds - (the_default default_max_relevant max_relevant) - is_built_in_const relevance_fudge relevance_override - chained_ths hyp_ts concl_t - val problem = - {state = st', goal = goal, subgoal = i, - subgoal_count = Sledgehammer_Util.subgoal_count st, - facts = facts |> map Sledgehammer_Provers.Untranslated_Fact, - smt_filter = NONE} - in prover params (K (K (K ""))) problem end)) () - handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut - | Fail "inappropriate" => failed ATP_Proof.Inappropriate - val time_prover = run_time |> Time.toMilliseconds - val msg = message (preplay ()) ^ message_tail - in - case outcome of - NONE => (msg, SH_OK (time_isa, time_prover, used_facts)) - | SOME _ => (msg, SH_FAIL (time_isa, time_prover)) - end - handle ERROR msg => ("error: " ^ msg, SH_ERROR) - -fun thms_of_name ctxt name = - let - val lex = Keyword.get_lexicons - val get = maps (Proof_Context.get_fact ctxt o fst) - in - Source.of_string name - |> Symbol.source - |> Token.source {do_recover=SOME false} lex Position.start - |> Token.source_proper - |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE - |> Source.exhaust - end - -in - -fun run_sledgehammer trivial args reconstructor named_thms id - ({pre=st, log, pos, ...}: Mirabelle.run_args) = - let - val triv_str = if trivial then "[T] " else "" - val _ = change_data id inc_sh_calls - val _ = if trivial then () else change_data id inc_sh_nontriv_calls - val (prover_name, prover) = get_prover (Proof.context_of st) args - val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" - val strict = AList.lookup (op =) args strictK |> the_default "false" - val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart" - val slice = AList.lookup (op =) args sliceK |> the_default "true" - val lam_trans = AList.lookup (op =) args lam_transK - val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK - val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK - val term_order = AList.lookup (op =) args term_orderK - val force_sos = AList.lookup (op =) args force_sosK - |> Option.map (curry (op <>) "false") - val dir = AList.lookup (op =) args keepK - val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30) - (* always use a hard timeout, but give some slack so that the automatic - minimizer has a chance to do its magic *) - val preplay_timeout = AList.lookup (op =) args preplay_timeoutK - |> the_default preplay_timeout_default - val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" - val max_new_mono_instancesLST = - available_parameter args max_new_mono_instancesK max_new_mono_instancesK - val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK - val hard_timeout = SOME (2 * timeout) - val (msg, result) = - run_sh prover_name prover type_enc strict max_relevant slice lam_trans - uncurried_aliases e_selection_heuristic term_order force_sos - hard_timeout timeout preplay_timeout sh_minimizeLST - max_new_mono_instancesLST max_mono_itersLST dir pos st - in - case result of - SH_OK (time_isa, time_prover, names) => - let - fun get_thms (name, stature) = - try (thms_of_name (Proof.context_of st)) name - |> Option.map (pair (name, stature)) - in - change_data id inc_sh_success; - if trivial then () else change_data id inc_sh_nontriv_success; - change_data id (inc_sh_lemmas (length names)); - change_data id (inc_sh_max_lems (length names)); - change_data id (inc_sh_time_isa time_isa); - change_data id (inc_sh_time_prover time_prover); - reconstructor := reconstructor_from_msg args msg; - named_thms := SOME (map_filter get_thms names); - log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^ - string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg) - end - | SH_FAIL (time_isa, time_prover) => - let - val _ = change_data id (inc_sh_time_isa time_isa) - val _ = change_data id (inc_sh_time_prover_fail time_prover) - in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end - | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg) - end - -end - -fun run_minimize args reconstructor named_thms id - ({pre=st, log, ...}: Mirabelle.run_args) = - let - val ctxt = Proof.context_of st - val n0 = length (these (!named_thms)) - val (prover_name, _) = get_prover ctxt args - val type_enc = AList.lookup (op =) args type_encK |> the_default "smart" - val strict = AList.lookup (op =) args strictK |> the_default "false" - val timeout = - AList.lookup (op =) args minimize_timeoutK - |> Option.map (fst o read_int o raw_explode) (* FIXME Symbol.explode (?) *) - |> the_default 5 - val preplay_timeout = AList.lookup (op =) args preplay_timeoutK - |> the_default preplay_timeout_default - val sh_minimizeLST = available_parameter args sh_minimizeK "minimize" - val max_new_mono_instancesLST = - available_parameter args max_new_mono_instancesK max_new_mono_instancesK - val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK - val params = Sledgehammer_Isar.default_params ctxt - ([("provers", prover_name), - ("verbose", "true"), - ("type_enc", type_enc), - ("strict", strict), - ("timeout", string_of_int timeout), - ("preplay_timeout", preplay_timeout)] - |> sh_minimizeLST (*don't confuse the two minimization flags*) - |> max_new_mono_instancesLST - |> max_mono_itersLST) - val minimize = - Sledgehammer_Minimize.minimize_facts prover_name params - true 1 (Sledgehammer_Util.subgoal_count st) - val _ = log separator - val (used_facts, (preplay, message, message_tail)) = - minimize st (these (!named_thms)) - val msg = message (preplay ()) ^ message_tail - in - case used_facts of - SOME named_thms' => - (change_data id inc_min_succs; - change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0)); - if length named_thms' = n0 - then log (minimize_tag id ^ "already minimal") - else (reconstructor := reconstructor_from_msg args msg; - named_thms := SOME named_thms'; - log (minimize_tag id ^ "succeeded:\n" ^ msg)) - ) - | NONE => log (minimize_tag id ^ "failed: " ^ msg) - end - -fun override_params prover type_enc timeout = - [("provers", prover), - ("max_relevant", "0"), - ("type_enc", type_enc), - ("strict", "true"), - ("slice", "false"), - ("timeout", timeout |> Time.toSeconds |> string_of_int)] - -fun run_reconstructor trivial full m name reconstructor named_thms id - ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) = - let - fun do_reconstructor named_thms ctxt = - let - val ref_of_str = - suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm - #> fst - val thms = named_thms |> maps snd - val facts = named_thms |> map (ref_of_str o fst o fst) - val relevance_override = {add = facts, del = [], only = true} - fun my_timeout time_slice = - timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal - fun sledge_tac time_slice prover type_enc = - Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt - (override_params prover type_enc (my_timeout time_slice)) - relevance_override - in - if !reconstructor = "sledgehammer_tac" then - sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native" - ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??" - ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??" - ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags" - ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN - ctxt thms - else if !reconstructor = "smt" then - SMT_Solver.smt_tac ctxt thms - else if full then - Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN] - ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms - else if String.isPrefix "metis (" (!reconstructor) then - let - val (type_encs, lam_trans) = - !reconstructor - |> Outer_Syntax.scan Position.start - |> filter Token.is_proper |> tl - |> Metis_Tactic.parse_metis_options |> fst - |>> the_default [ATP_Proof_Reconstruct.partial_typesN] - ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans - in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end - else if !reconstructor = "metis" then - Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt - thms - else - K all_tac - end - fun apply_reconstructor named_thms = - Mirabelle.can_apply timeout (do_reconstructor named_thms) st - - fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")" - | with_time (true, t) = (change_data id (inc_reconstructor_success m); - if trivial then () - else change_data id (inc_reconstructor_nontriv_success m); - change_data id (inc_reconstructor_lemmas m (length named_thms)); - change_data id (inc_reconstructor_time m t); - change_data id (inc_reconstructor_posns m (pos, trivial)); - if name = "proof" then change_data id (inc_reconstructor_proofs m) - else (); - "succeeded (" ^ string_of_int t ^ ")") - fun timed_reconstructor named_thms = - (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true) - handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m); - ("timeout", false)) - | ERROR msg => ("error: " ^ msg, false) - - val _ = log separator - val _ = change_data id (inc_reconstructor_calls m) - val _ = if trivial then () - else change_data id (inc_reconstructor_nontriv_calls m) - in - named_thms - |> timed_reconstructor - |>> log o prefix (reconstructor_tag reconstructor id) - |> snd - end - -val try_timeout = seconds 5.0 - -(* crude hack *) -val num_sledgehammer_calls = Unsynchronized.ref 0 - -fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) = - let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in - if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal - then () else - let - val max_calls = - AList.lookup (op =) args max_callsK |> the_default "10000000" - |> Int.fromString |> the - val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1; - in - if !num_sledgehammer_calls > max_calls then () - else - let - val reconstructor = Unsynchronized.ref "" - val named_thms = - Unsynchronized.ref (NONE : ((string * stature) * thm list) list option) - val minimize = AList.defined (op =) args minimizeK - val metis_ft = AList.defined (op =) args metis_ftK - val trivial = - if AList.lookup (op =) args check_trivialK |> the_default "false" - |> Bool.fromString |> the then - Try0.try0 (SOME try_timeout) ([], [], [], []) pre - handle TimeLimit.TimeOut => false - else false - fun apply_reconstructor m1 m2 = - if metis_ft - then - if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false - (run_reconstructor trivial false m1 name reconstructor - (these (!named_thms))) id st) - then - (Mirabelle.catch_result (reconstructor_tag reconstructor) false - (run_reconstructor trivial true m2 name reconstructor - (these (!named_thms))) id st; ()) - else () - else - (Mirabelle.catch_result (reconstructor_tag reconstructor) false - (run_reconstructor trivial false m1 name reconstructor - (these (!named_thms))) id st; ()) - in - change_data id (set_mini minimize); - Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor - named_thms) id st; - if is_some (!named_thms) - then - (apply_reconstructor Unminimized UnminimizedFT; - if minimize andalso not (null (these (!named_thms))) - then - (Mirabelle.catch minimize_tag - (run_minimize args reconstructor named_thms) id st; - apply_reconstructor Minimized MinimizedFT) - else ()) - else () - end - end - end - -fun invoke args = - Mirabelle.register (init, sledgehammer_action args, done) - -end diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML --- a/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML Sat Apr 14 23:34:18 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,198 +0,0 @@ -(* Title: HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML - Author: Jasmin Blanchette, TU Munich -*) - -structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION = -struct - -fun get args name default_value = - case AList.lookup (op =) args name of - SOME value => the (Real.fromString value) - | NONE => default_value - -fun extract_relevance_fudge args - {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight, - abs_rel_weight, abs_irrel_weight, skolem_irrel_weight, - theory_const_rel_weight, theory_const_irrel_weight, - chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus, - local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp, - threshold_divisor, ridiculous_threshold} = - {local_const_multiplier = - get args "local_const_multiplier" local_const_multiplier, - worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq, - higher_order_irrel_weight = - get args "higher_order_irrel_weight" higher_order_irrel_weight, - abs_rel_weight = get args "abs_rel_weight" abs_rel_weight, - abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight, - skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight, - theory_const_rel_weight = - get args "theory_const_rel_weight" theory_const_rel_weight, - theory_const_irrel_weight = - get args "theory_const_irrel_weight" theory_const_irrel_weight, - chained_const_irrel_weight = - get args "chained_const_irrel_weight" chained_const_irrel_weight, - intro_bonus = get args "intro_bonus" intro_bonus, - elim_bonus = get args "elim_bonus" elim_bonus, - simp_bonus = get args "simp_bonus" simp_bonus, - local_bonus = get args "local_bonus" local_bonus, - assum_bonus = get args "assum_bonus" assum_bonus, - chained_bonus = get args "chained_bonus" chained_bonus, - max_imperfect = get args "max_imperfect" max_imperfect, - max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp, - threshold_divisor = get args "threshold_divisor" threshold_divisor, - ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold} - -structure Prooftab = - Table(type key = int * int val ord = prod_ord int_ord int_ord) - -val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table) - -val num_successes = Unsynchronized.ref ([] : (int * int) list) -val num_failures = Unsynchronized.ref ([] : (int * int) list) -val num_found_proofs = Unsynchronized.ref ([] : (int * int) list) -val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list) -val num_found_facts = Unsynchronized.ref ([] : (int * int) list) -val num_lost_facts = Unsynchronized.ref ([] : (int * int) list) - -fun get id c = the_default 0 (AList.lookup (op =) (!c) id) -fun add id c n = - c := (case AList.lookup (op =) (!c) id of - SOME m => AList.update (op =) (id, m + n) (!c) - | NONE => (id, n) :: !c) - -fun init proof_file _ thy = - let - fun do_line line = - case line |> space_explode ":" of - [line_num, offset, proof] => - SOME (pairself (the o Int.fromString) (line_num, offset), - proof |> space_explode " " |> filter_out (curry (op =) "")) - | _ => NONE - val proofs = File.read (Path.explode proof_file) - val proof_tab = - proofs |> space_explode "\n" - |> map_filter do_line - |> AList.coalesce (op =) - |> Prooftab.make - in proof_table := proof_tab; thy end - -fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b) -fun percentage_alt a b = percentage a (a + b) - -fun done id ({log, ...} : Mirabelle.done_args) = - if get id num_successes + get id num_failures > 0 then - (log ""; - log ("Number of overall successes: " ^ - string_of_int (get id num_successes)); - log ("Number of overall failures: " ^ string_of_int (get id num_failures)); - log ("Overall success rate: " ^ - percentage_alt (get id num_successes) (get id num_failures) ^ "%"); - log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs)); - log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs)); - log ("Proof found rate: " ^ - percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^ - "%"); - log ("Number of found facts: " ^ string_of_int (get id num_found_facts)); - log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts)); - log ("Fact found rate: " ^ - percentage_alt (get id num_found_facts) (get id num_lost_facts) ^ - "%")) - else - () - -val default_prover = ATP_Systems.eN (* arbitrary ATP *) - -fun with_index (i, s) = s ^ "@" ^ string_of_int i - -fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) = - case (Position.line_of pos, Position.offset_of pos) of - (SOME line_num, SOME offset) => - (case Prooftab.lookup (!proof_table) (line_num, offset) of - SOME proofs => - let - val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre - val prover = AList.lookup (op =) args "prover" - |> the_default default_prover - val {relevance_thresholds, max_relevant, slice, ...} = - Sledgehammer_Isar.default_params ctxt args - val default_max_relevant = - Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice - prover - val is_appropriate_prop = - Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt - default_prover - val is_built_in_const = - Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover - val relevance_fudge = - extract_relevance_fudge args - (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover) - val relevance_override = {add = [], del = [], only = false} - val subgoal = 1 - val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal - val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover - val facts = - Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override - chained_ths hyp_ts concl_t - |> filter (is_appropriate_prop o prop_of o snd) - |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds - (the_default default_max_relevant max_relevant) - is_built_in_const relevance_fudge relevance_override - chained_ths hyp_ts concl_t - |> map (fst o fst) - val (found_facts, lost_facts) = - flat proofs |> sort_distinct string_ord - |> map (fn fact => (find_index (curry (op =) fact) facts, fact)) - |> List.partition (curry (op <=) 0 o fst) - |>> sort (prod_ord int_ord string_ord) ||> map snd - val found_proofs = filter (forall (member (op =) facts)) proofs - val n = length found_proofs - val _ = - if n = 0 then - (add id num_failures 1; log "Failure") - else - (add id num_successes 1; - add id num_found_proofs n; - log ("Success (" ^ string_of_int n ^ " of " ^ - string_of_int (length proofs) ^ " proofs)")) - val _ = add id num_lost_proofs (length proofs - n) - val _ = add id num_found_facts (length found_facts) - val _ = add id num_lost_facts (length lost_facts) - val _ = - if null found_facts then - () - else - let - val found_weight = - Real.fromInt (fold (fn (n, _) => - Integer.add (n * n)) found_facts 0) - / Real.fromInt (length found_facts) - |> Math.sqrt |> Real.ceil - in - log ("Found facts (among " ^ string_of_int (length facts) ^ - ", weight " ^ string_of_int found_weight ^ "): " ^ - commas (map with_index found_facts)) - end - val _ = if null lost_facts then - () - else - log ("Lost facts (among " ^ string_of_int (length facts) ^ - "): " ^ commas lost_facts) - in () end - | NONE => log "No known proof") - | _ => () - -val proof_fileK = "proof_file" - -fun invoke args = - let - val (pf_args, other_args) = - args |> List.partition (curry (op =) proof_fileK o fst) - val proof_file = case pf_args of - [] => error "No \"proof_file\" specified" - | (_, s) :: _ => s - in Mirabelle.register (init proof_file, action other_args, done) end - -end; - -(* Workaround to keep the "mirabelle.pl" script happy *) -structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter; diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/lib/Tools/mirabelle --- a/src/HOL/Mirabelle/lib/Tools/mirabelle Sat Apr 14 23:34:18 2012 +0200 +++ b/src/HOL/Mirabelle/lib/Tools/mirabelle Sat Apr 14 23:52:17 2012 +0100 @@ -8,10 +8,10 @@ PRG="$(basename "$0")" function print_action_names() { - TOOLS="$MIRABELLE_HOME/Tools/mirabelle_*.ML" - for NAME in $TOOLS + ACTIONS="$MIRABELLE_HOME/Actions/mirabelle_*.ML" + for ACTION in $ACTIONS do - echo $NAME | sed 's/.*mirabelle_\(.*\)\.ML/ \1/' + echo $ACTION | sed 's/.*mirabelle_\(.*\)\.ML/ \1/' done } diff -r 92d1c566ebbf -r 3fabf352243e src/HOL/Mirabelle/lib/scripts/mirabelle.pl --- a/src/HOL/Mirabelle/lib/scripts/mirabelle.pl Sat Apr 14 23:34:18 2012 +0200 +++ b/src/HOL/Mirabelle/lib/scripts/mirabelle.pl Sat Apr 14 23:52:17 2012 +0100 @@ -46,7 +46,7 @@ my @action_names; foreach (split(/:/, $actions)) { if (m/([^[]*)/) { - push @action_files, "\"$mirabelle_home/Tools/mirabelle_$1.ML\""; + push @action_files, "\"$mirabelle_home/Actions/mirabelle_$1.ML\""; push @action_names, $1; } }