renamed mirabelle Tools directory to Actions, to make consistent with 'usage' description;
1.1 --- a/src/HOL/IsaMakefile Sat Apr 14 23:34:18 2012 +0200
1.2 +++ b/src/HOL/IsaMakefile Sat Apr 14 23:52:17 2012 +0100
1.3 @@ -1319,16 +1319,16 @@
1.4
1.5 HOL-Mirabelle: HOL $(LOG)/HOL-Mirabelle.gz
1.6
1.7 -$(LOG)/HOL-Mirabelle.gz: $(OUT)/HOL Mirabelle/Mirabelle_Test.thy \
1.8 - Mirabelle/Mirabelle.thy Mirabelle/Tools/mirabelle.ML \
1.9 - Mirabelle/ROOT.ML Mirabelle/Tools/mirabelle_arith.ML \
1.10 - Mirabelle/Tools/mirabelle_metis.ML \
1.11 - Mirabelle/Tools/mirabelle_quickcheck.ML \
1.12 - Mirabelle/Tools/mirabelle_refute.ML \
1.13 - Mirabelle/Tools/mirabelle_sledgehammer.ML \
1.14 - Mirabelle/Tools/mirabelle_sledgehammer_filter.ML \
1.15 - ex/sledgehammer_tactics.ML Mirabelle/lib/Tools/mirabelle \
1.16 - Mirabelle/lib/scripts/mirabelle.pl Library/FrechetDeriv.thy \
1.17 +$(LOG)/HOL-Mirabelle.gz: $(OUT)/HOL Mirabelle/Mirabelle_Test.thy \
1.18 + Mirabelle/Mirabelle.thy Mirabelle/Actions/mirabelle.ML \
1.19 + Mirabelle/ROOT.ML Mirabelle/Actions/mirabelle_arith.ML \
1.20 + Mirabelle/Actions/mirabelle_metis.ML \
1.21 + Mirabelle/Actions/mirabelle_quickcheck.ML \
1.22 + Mirabelle/Actions/mirabelle_refute.ML \
1.23 + Mirabelle/Actions/mirabelle_sledgehammer.ML \
1.24 + Mirabelle/Actions/mirabelle_sledgehammer_filter.ML \
1.25 + ex/sledgehammer_tactics.ML Mirabelle/lib/Tools/mirabelle \
1.26 + Mirabelle/lib/scripts/mirabelle.pl Library/FrechetDeriv.thy \
1.27 Library/Inner_Product.thy
1.28 @$(ISABELLE_TOOL) usedir $(OUT)/HOL Mirabelle
1.29 @cd Library; $(ISABELLE_TOOL) mirabelle -q arith Inner_Product.thy # some arbitrary small test case
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle.ML Sat Apr 14 23:52:17 2012 +0100
2.3 @@ -0,0 +1,213 @@
2.4 +(* Title: HOL/Mirabelle/Actions/mirabelle.ML
2.5 + Author: Jasmin Blanchette and Sascha Boehme, TU Munich
2.6 +*)
2.7 +
2.8 +signature MIRABELLE =
2.9 +sig
2.10 + (*configuration*)
2.11 + val logfile : string Config.T
2.12 + val timeout : int Config.T
2.13 + val start_line : int Config.T
2.14 + val end_line : int Config.T
2.15 +
2.16 + (*core*)
2.17 + type init_action = int -> theory -> theory
2.18 + type done_args = {last: Toplevel.state, log: string -> unit}
2.19 + type done_action = int -> done_args -> unit
2.20 + type run_args = {pre: Proof.state, post: Toplevel.state option,
2.21 + timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
2.22 + type run_action = int -> run_args -> unit
2.23 + type action = init_action * run_action * done_action
2.24 + val catch : (int -> string) -> run_action -> run_action
2.25 + val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) ->
2.26 + int -> run_args -> 'a
2.27 + val register : action -> theory -> theory
2.28 + val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state ->
2.29 + unit
2.30 +
2.31 + (*utility functions*)
2.32 + val can_apply : Time.time -> (Proof.context -> int -> tactic) ->
2.33 + Proof.state -> bool
2.34 + val theorems_in_proof_term : thm -> thm list
2.35 + val theorems_of_sucessful_proof : Toplevel.state option -> thm list
2.36 + val get_setting : (string * string) list -> string * string -> string
2.37 + val get_int_setting : (string * string) list -> string * int -> int
2.38 + val cpu_time : ('a -> 'b) -> 'a -> 'b * int
2.39 +end
2.40 +
2.41 +
2.42 +
2.43 +structure Mirabelle : MIRABELLE =
2.44 +struct
2.45 +
2.46 +(* Mirabelle configuration *)
2.47 +
2.48 +val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "")
2.49 +val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30)
2.50 +val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0)
2.51 +val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1)
2.52 +
2.53 +
2.54 +(* Mirabelle core *)
2.55 +
2.56 +type init_action = int -> theory -> theory
2.57 +type done_args = {last: Toplevel.state, log: string -> unit}
2.58 +type done_action = int -> done_args -> unit
2.59 +type run_args = {pre: Proof.state, post: Toplevel.state option,
2.60 + timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
2.61 +type run_action = int -> run_args -> unit
2.62 +type action = init_action * run_action * done_action
2.63 +
2.64 +structure Actions = Theory_Data
2.65 +(
2.66 + type T = (int * run_action * done_action) list
2.67 + val empty = []
2.68 + val extend = I
2.69 + fun merge data = Library.merge (K true) data (* FIXME potential data loss because of (K true) *)
2.70 +)
2.71 +
2.72 +
2.73 +fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e)
2.74 +
2.75 +fun catch tag f id (st as {log, ...}: run_args) = (f id st; ())
2.76 + handle exn =>
2.77 + if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ())
2.78 +
2.79 +fun catch_result tag d f id (st as {log, ...}: run_args) = f id st
2.80 + handle exn =>
2.81 + if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d)
2.82 +
2.83 +fun register (init, run, done) thy =
2.84 + let val id = length (Actions.get thy) + 1
2.85 + in
2.86 + thy
2.87 + |> init id
2.88 + |> Actions.map (cons (id, run, done))
2.89 + end
2.90 +
2.91 +local
2.92 +
2.93 +fun log thy s =
2.94 + let fun append_to n = if n = "" then K () else File.append (Path.explode n)
2.95 + in append_to (Config.get_global thy logfile) (s ^ "\n") end
2.96 + (* FIXME: with multithreading and parallel proofs enabled, we might need to
2.97 + encapsulate this inside a critical section *)
2.98 +
2.99 +fun log_sep thy = log thy "------------------"
2.100 +
2.101 +fun apply_actions thy pos name info (pre, post, time) actions =
2.102 + let
2.103 + fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name}
2.104 + fun run (id, run, _) = (apply (run id); log_sep thy)
2.105 + in (log thy info; log_sep thy; List.app run actions) end
2.106 +
2.107 +fun in_range _ _ NONE = true
2.108 + | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r))
2.109 +
2.110 +fun only_within_range thy pos f x =
2.111 + let val l = Config.get_global thy start_line and r = Config.get_global thy end_line
2.112 + in if in_range l r (Position.line_of pos) then f x else () end
2.113 +
2.114 +in
2.115 +
2.116 +fun run_actions tr pre post =
2.117 + let
2.118 + val thy = Proof.theory_of pre
2.119 + val pos = Toplevel.pos_of tr
2.120 + val name = Toplevel.name_of tr
2.121 + val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout))
2.122 +
2.123 + val str0 = string_of_int o the_default 0
2.124 + val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos)
2.125 + val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):"
2.126 + in
2.127 + only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy)
2.128 + end
2.129 +
2.130 +fun done_actions st =
2.131 + let
2.132 + val thy = Toplevel.theory_of st
2.133 + val _ = log thy "\n\n";
2.134 + in
2.135 + thy
2.136 + |> Actions.get
2.137 + |> List.app (fn (id, _, done) => done id {last=st, log=log thy})
2.138 + end
2.139 +
2.140 +end
2.141 +
2.142 +val whitelist = ["apply", "by", "proof"]
2.143 +
2.144 +fun step_hook tr pre post =
2.145 + (* FIXME: might require wrapping into "interruptible" *)
2.146 + if can (Proof.assert_backward o Toplevel.proof_of) pre andalso
2.147 + member (op =) whitelist (Toplevel.name_of tr)
2.148 + then run_actions tr (Toplevel.proof_of pre) (SOME post)
2.149 + else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post
2.150 + then done_actions pre
2.151 + else () (* FIXME: add theory_hook here *)
2.152 +
2.153 +
2.154 +
2.155 +(* Mirabelle utility functions *)
2.156 +
2.157 +fun can_apply time tac st =
2.158 + let
2.159 + val {context = ctxt, facts, goal} = Proof.goal st
2.160 + val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt)
2.161 + in
2.162 + (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of
2.163 + SOME (SOME _) => true
2.164 + | _ => false)
2.165 + end
2.166 +
2.167 +local
2.168 +
2.169 +fun fold_body_thms f =
2.170 + let
2.171 + fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) =>
2.172 + fn (x, seen) =>
2.173 + if Inttab.defined seen i then (x, seen)
2.174 + else
2.175 + let
2.176 + val body' = Future.join body
2.177 + val (x', seen') = app (n + (if name = "" then 0 else 1)) body'
2.178 + (x, Inttab.update (i, ()) seen)
2.179 + in (x' |> n = 0 ? f (name, prop, body'), seen') end)
2.180 + in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end
2.181 +
2.182 +in
2.183 +
2.184 +fun theorems_in_proof_term thm =
2.185 + let
2.186 + val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm)
2.187 + fun collect (s, _, _) = if s <> "" then insert (op =) s else I
2.188 + fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE
2.189 + fun resolve_thms names = map_filter (member_of names) all_thms
2.190 + in
2.191 + resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] [])
2.192 + end
2.193 +
2.194 +end
2.195 +
2.196 +fun theorems_of_sucessful_proof state =
2.197 + (case state of
2.198 + NONE => []
2.199 + | SOME st =>
2.200 + if not (Toplevel.is_proof st) then []
2.201 + else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st))))
2.202 +
2.203 +fun get_setting settings (key, default) =
2.204 + the_default default (AList.lookup (op =) settings key)
2.205 +
2.206 +fun get_int_setting settings (key, default) =
2.207 + (case Option.map Int.fromString (AList.lookup (op =) settings key) of
2.208 + SOME (SOME i) => i
2.209 + | SOME NONE => error ("bad option: " ^ key)
2.210 + | NONE => default)
2.211 +
2.212 +fun cpu_time f x =
2.213 + let val ({cpu, ...}, y) = Timing.timing f x
2.214 + in (y, Time.toMilliseconds cpu) end
2.215 +
2.216 +end
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_arith.ML Sat Apr 14 23:52:17 2012 +0100
3.3 @@ -0,0 +1,21 @@
3.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_arith.ML
3.5 + Author: Jasmin Blanchette and Sascha Boehme, TU Munich
3.6 +*)
3.7 +
3.8 +structure Mirabelle_Arith : MIRABELLE_ACTION =
3.9 +struct
3.10 +
3.11 +fun arith_tag id = "#" ^ string_of_int id ^ " arith: "
3.12 +
3.13 +fun init _ = I
3.14 +fun done _ _ = ()
3.15 +
3.16 +fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) =
3.17 + if Mirabelle.can_apply timeout Arith_Data.arith_tac pre
3.18 + then log (arith_tag id ^ "succeeded")
3.19 + else ()
3.20 + handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout")
3.21 +
3.22 +fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done)
3.23 +
3.24 +end
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_metis.ML Sat Apr 14 23:52:17 2012 +0100
4.3 @@ -0,0 +1,35 @@
4.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_metis.ML
4.5 + Author: Jasmin Blanchette and Sascha Boehme, TU Munich
4.6 +*)
4.7 +
4.8 +structure Mirabelle_Metis : MIRABELLE_ACTION =
4.9 +struct
4.10 +
4.11 +fun metis_tag id = "#" ^ string_of_int id ^ " metis: "
4.12 +
4.13 +fun init _ = I
4.14 +fun done _ _ = ()
4.15 +
4.16 +fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) =
4.17 + let
4.18 + val thms = Mirabelle.theorems_of_sucessful_proof post
4.19 + val names = map Thm.get_name_hint thms
4.20 + val add_info = if null names then I else suffix (":\n" ^ commas names)
4.21 +
4.22 + val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre))
4.23 +
4.24 + fun metis ctxt =
4.25 + Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt
4.26 + (thms @ facts)
4.27 + in
4.28 + (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed")
4.29 + |> prefix (metis_tag id)
4.30 + |> add_info
4.31 + |> log
4.32 + end
4.33 + handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout")
4.34 + | ERROR msg => log (metis_tag id ^ "error: " ^ msg)
4.35 +
4.36 +fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done)
4.37 +
4.38 +end
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML Sat Apr 14 23:52:17 2012 +0100
5.3 @@ -0,0 +1,28 @@
5.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_quickcheck.ML
5.5 + Author: Jasmin Blanchette and Sascha Boehme, TU Munich
5.6 +*)
5.7 +
5.8 +structure Mirabelle_Quickcheck : MIRABELLE_ACTION =
5.9 +struct
5.10 +
5.11 +fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: "
5.12 +
5.13 +fun init _ = I
5.14 +fun done _ _ = ()
5.15 +
5.16 +fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) =
5.17 + let
5.18 + val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst
5.19 + val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1
5.20 + in
5.21 + (case TimeLimit.timeLimit timeout quickcheck pre of
5.22 + NONE => log (qc_tag id ^ "no counterexample")
5.23 + | SOME _ => log (qc_tag id ^ "counterexample found"))
5.24 + end
5.25 + handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout")
5.26 + | ERROR msg => log (qc_tag id ^ "error: " ^ msg)
5.27 +
5.28 +fun invoke args =
5.29 + Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done)
5.30 +
5.31 +end
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_refute.ML Sat Apr 14 23:52:17 2012 +0100
6.3 @@ -0,0 +1,39 @@
6.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_refute.ML
6.5 + Author: Jasmin Blanchette and Sascha Boehme, TU Munich
6.6 +*)
6.7 +
6.8 +structure Mirabelle_Refute : MIRABELLE_ACTION =
6.9 +struct
6.10 +
6.11 +
6.12 +(* FIXME:
6.13 +fun refute_action args timeout {pre=st, ...} =
6.14 + let
6.15 + val subgoal = 1
6.16 + val thy = Proof.theory_of st
6.17 + val thm = #goal (Proof.raw_goal st)
6.18 +
6.19 + val refute = Refute.refute_goal thy args thm
6.20 + val _ = TimeLimit.timeLimit timeout refute subgoal
6.21 + in
6.22 + val writ_log = Substring.full (the (Symtab.lookup tab "writeln"))
6.23 + val warn_log = Substring.full (the (Symtab.lookup tab "warning"))
6.24 +
6.25 + val r =
6.26 + if Substring.isSubstring "model found" writ_log
6.27 + then
6.28 + if Substring.isSubstring "spurious" warn_log
6.29 + then SOME "potential counterexample"
6.30 + else SOME "real counterexample (bug?)"
6.31 + else
6.32 + if Substring.isSubstring "time limit" writ_log
6.33 + then SOME "no counterexample (timeout)"
6.34 + else if Substring.isSubstring "Search terminated" writ_log
6.35 + then SOME "no counterexample (normal termination)"
6.36 + else SOME "no counterexample (unknown)"
6.37 + in r end
6.38 +*)
6.39 +
6.40 +fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*)
6.41 +
6.42 +end
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
7.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML Sat Apr 14 23:52:17 2012 +0100
7.3 @@ -0,0 +1,746 @@
7.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML
7.5 + Author: Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
7.6 +*)
7.7 +
7.8 +structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
7.9 +struct
7.10 +
7.11 +val proverK = "prover"
7.12 +val prover_timeoutK = "prover_timeout"
7.13 +val keepK = "keep"
7.14 +val type_encK = "type_enc"
7.15 +val strictK = "strict"
7.16 +val sliceK = "slice"
7.17 +val lam_transK = "lam_trans"
7.18 +val uncurried_aliasesK = "uncurried_aliases"
7.19 +val e_selection_heuristicK = "e_selection_heuristic"
7.20 +val term_orderK = "term_order"
7.21 +val force_sosK = "force_sos"
7.22 +val max_relevantK = "max_relevant"
7.23 +val max_callsK = "max_calls"
7.24 +val minimizeK = "minimize" (*refers to minimization attempted by Mirabelle*)
7.25 +val minimize_timeoutK = "minimize_timeout"
7.26 +val metis_ftK = "metis_ft"
7.27 +val reconstructorK = "reconstructor"
7.28 +val preplay_timeoutK = "preplay_timeout"
7.29 +val sh_minimizeK = "sh_minimize" (*refers to minimizer run within Sledgehammer*)
7.30 +val max_new_mono_instancesK = "max_new_mono_instances"
7.31 +val max_mono_itersK = "max_mono_iters"
7.32 +val check_trivialK = "check_trivial" (*false by default*)
7.33 +
7.34 +fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
7.35 +fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): "
7.36 +fun reconstructor_tag reconstructor id =
7.37 + "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): "
7.38 +
7.39 +val separator = "-----"
7.40 +
7.41 +val preplay_timeout_default = "4"
7.42 +(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*)
7.43 +
7.44 +(*If a key is present in args then augment a list with its pair*)
7.45 +(*This is used to avoid fixing default values at the Mirabelle level, and
7.46 + instead use the default values of the tool (Sledgehammer in this case).*)
7.47 +fun available_parameter args key label list =
7.48 + let
7.49 + val value = AList.lookup (op =) args key
7.50 + in if is_some value then (label, the value) :: list else list end
7.51 +
7.52 +
7.53 +datatype sh_data = ShData of {
7.54 + calls: int,
7.55 + success: int,
7.56 + nontriv_calls: int,
7.57 + nontriv_success: int,
7.58 + lemmas: int,
7.59 + max_lems: int,
7.60 + time_isa: int,
7.61 + time_prover: int,
7.62 + time_prover_fail: int}
7.63 +
7.64 +datatype re_data = ReData of {
7.65 + calls: int,
7.66 + success: int,
7.67 + nontriv_calls: int,
7.68 + nontriv_success: int,
7.69 + proofs: int,
7.70 + time: int,
7.71 + timeout: int,
7.72 + lemmas: int * int * int,
7.73 + posns: (Position.T * bool) list
7.74 + }
7.75 +
7.76 +datatype min_data = MinData of {
7.77 + succs: int,
7.78 + ab_ratios: int
7.79 + }
7.80 +
7.81 +fun make_sh_data
7.82 + (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
7.83 + time_prover,time_prover_fail) =
7.84 + ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
7.85 + nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
7.86 + time_isa=time_isa, time_prover=time_prover,
7.87 + time_prover_fail=time_prover_fail}
7.88 +
7.89 +fun make_min_data (succs, ab_ratios) =
7.90 + MinData{succs=succs, ab_ratios=ab_ratios}
7.91 +
7.92 +fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
7.93 + timeout,lemmas,posns) =
7.94 + ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
7.95 + nontriv_success=nontriv_success, proofs=proofs, time=time,
7.96 + timeout=timeout, lemmas=lemmas, posns=posns}
7.97 +
7.98 +val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
7.99 +val empty_min_data = make_min_data (0, 0)
7.100 +val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
7.101 +
7.102 +fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
7.103 + lemmas, max_lems, time_isa,
7.104 + time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
7.105 + nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
7.106 +
7.107 +fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios)
7.108 +
7.109 +fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
7.110 + proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
7.111 + nontriv_success, proofs, time, timeout, lemmas, posns)
7.112 +
7.113 +
7.114 +datatype reconstructor_mode =
7.115 + Unminimized | Minimized | UnminimizedFT | MinimizedFT
7.116 +
7.117 +datatype data = Data of {
7.118 + sh: sh_data,
7.119 + min: min_data,
7.120 + re_u: re_data, (* reconstructor with unminimized set of lemmas *)
7.121 + re_m: re_data, (* reconstructor with minimized set of lemmas *)
7.122 + re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *)
7.123 + re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *)
7.124 + mini: bool (* with minimization *)
7.125 + }
7.126 +
7.127 +fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) =
7.128 + Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft,
7.129 + mini=mini}
7.130 +
7.131 +val empty_data = make_data (empty_sh_data, empty_min_data,
7.132 + empty_re_data, empty_re_data, empty_re_data, empty_re_data, false)
7.133 +
7.134 +fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
7.135 + let val sh' = make_sh_data (f (tuple_of_sh_data sh))
7.136 + in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end
7.137 +
7.138 +fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
7.139 + let val min' = make_min_data (f (tuple_of_min_data min))
7.140 + in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end
7.141 +
7.142 +fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
7.143 + let
7.144 + fun map_me g Unminimized (u, m, uft, mft) = (g u, m, uft, mft)
7.145 + | map_me g Minimized (u, m, uft, mft) = (u, g m, uft, mft)
7.146 + | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft)
7.147 + | map_me g MinimizedFT (u, m, uft, mft) = (u, m, uft, g mft)
7.148 +
7.149 + val f' = make_re_data o f o tuple_of_re_data
7.150 +
7.151 + val (re_u', re_m', re_uft', re_mft') =
7.152 + map_me f' m (re_u, re_m, re_uft, re_mft)
7.153 + in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end
7.154 +
7.155 +fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) =
7.156 + make_data (sh, min, re_u, re_m, re_uft, re_mft, mini)
7.157 +
7.158 +fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
7.159 +
7.160 +val inc_sh_calls = map_sh_data
7.161 + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
7.162 + => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
7.163 +
7.164 +val inc_sh_success = map_sh_data
7.165 + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
7.166 + => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
7.167 +
7.168 +val inc_sh_nontriv_calls = map_sh_data
7.169 + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
7.170 + => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
7.171 +
7.172 +val inc_sh_nontriv_success = map_sh_data
7.173 + (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
7.174 + => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
7.175 +
7.176 +fun inc_sh_lemmas n = map_sh_data
7.177 + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
7.178 + => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
7.179 +
7.180 +fun inc_sh_max_lems n = map_sh_data
7.181 + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
7.182 + => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
7.183 +
7.184 +fun inc_sh_time_isa t = map_sh_data
7.185 + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
7.186 + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
7.187 +
7.188 +fun inc_sh_time_prover t = map_sh_data
7.189 + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
7.190 + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
7.191 +
7.192 +fun inc_sh_time_prover_fail t = map_sh_data
7.193 + (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
7.194 + => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
7.195 +
7.196 +val inc_min_succs = map_min_data
7.197 + (fn (succs,ab_ratios) => (succs+1, ab_ratios))
7.198 +
7.199 +fun inc_min_ab_ratios r = map_min_data
7.200 + (fn (succs, ab_ratios) => (succs, ab_ratios+r))
7.201 +
7.202 +val inc_reconstructor_calls = map_re_data
7.203 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.204 + => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
7.205 +
7.206 +val inc_reconstructor_success = map_re_data
7.207 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.208 + => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
7.209 +
7.210 +val inc_reconstructor_nontriv_calls = map_re_data
7.211 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.212 + => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
7.213 +
7.214 +val inc_reconstructor_nontriv_success = map_re_data
7.215 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.216 + => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
7.217 +
7.218 +val inc_reconstructor_proofs = map_re_data
7.219 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.220 + => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
7.221 +
7.222 +fun inc_reconstructor_time m t = map_re_data
7.223 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.224 + => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m
7.225 +
7.226 +val inc_reconstructor_timeout = map_re_data
7.227 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.228 + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
7.229 +
7.230 +fun inc_reconstructor_lemmas m n = map_re_data
7.231 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.232 + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m
7.233 +
7.234 +fun inc_reconstructor_posns m pos = map_re_data
7.235 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
7.236 + => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m
7.237 +
7.238 +val str0 = string_of_int o the_default 0
7.239 +
7.240 +local
7.241 +
7.242 +val str = string_of_int
7.243 +val str3 = Real.fmt (StringCvt.FIX (SOME 3))
7.244 +fun percentage a b = string_of_int (a * 100 div b)
7.245 +fun time t = Real.fromInt t / 1000.0
7.246 +fun avg_time t n =
7.247 + if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
7.248 +
7.249 +fun log_sh_data log
7.250 + (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
7.251 + (log ("Total number of sledgehammer calls: " ^ str calls);
7.252 + log ("Number of successful sledgehammer calls: " ^ str success);
7.253 + log ("Number of sledgehammer lemmas: " ^ str lemmas);
7.254 + log ("Max number of sledgehammer lemmas: " ^ str max_lems);
7.255 + log ("Success rate: " ^ percentage success calls ^ "%");
7.256 + log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
7.257 + log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
7.258 + log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
7.259 + log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
7.260 + log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
7.261 + log ("Average time for sledgehammer calls (Isabelle): " ^
7.262 + str3 (avg_time time_isa calls));
7.263 + log ("Average time for successful sledgehammer calls (ATP): " ^
7.264 + str3 (avg_time time_prover success));
7.265 + log ("Average time for failed sledgehammer calls (ATP): " ^
7.266 + str3 (avg_time time_prover_fail (calls - success)))
7.267 + )
7.268 +
7.269 +fun str_of_pos (pos, triv) =
7.270 + str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^
7.271 + (if triv then "[T]" else "")
7.272 +
7.273 +fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
7.274 + re_nontriv_success, re_proofs, re_time, re_timeout,
7.275 + (lemmas, lems_sos, lems_max), re_posns) =
7.276 + (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls);
7.277 + log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^
7.278 + " (proof: " ^ str re_proofs ^ ")");
7.279 + log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout);
7.280 + log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
7.281 + log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls);
7.282 + log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^
7.283 + " (proof: " ^ str re_proofs ^ ")");
7.284 + log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas);
7.285 + log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos);
7.286 + log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max);
7.287 + log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time));
7.288 + log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^
7.289 + str3 (avg_time re_time re_success));
7.290 + if tag=""
7.291 + then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
7.292 + else ()
7.293 + )
7.294 +
7.295 +fun log_min_data log (succs, ab_ratios) =
7.296 + (log ("Number of successful minimizations: " ^ string_of_int succs);
7.297 + log ("After/before ratios: " ^ string_of_int ab_ratios)
7.298 + )
7.299 +
7.300 +in
7.301 +
7.302 +fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
7.303 + let
7.304 + val ShData {calls=sh_calls, ...} = sh
7.305 +
7.306 + fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
7.307 + fun log_re tag m =
7.308 + log_re_data log tag sh_calls (tuple_of_re_data m)
7.309 + fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () =>
7.310 + (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2)))
7.311 + in
7.312 + if sh_calls > 0
7.313 + then
7.314 + (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
7.315 + log_sh_data log (tuple_of_sh_data sh);
7.316 + log "";
7.317 + if not mini
7.318 + then log_reconstructor ("", re_u) ("fully-typed ", re_uft)
7.319 + else
7.320 + app_if re_u (fn () =>
7.321 + (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft);
7.322 + log "";
7.323 + app_if re_m (fn () =>
7.324 + (log_min_data log (tuple_of_min_data min); log "";
7.325 + log_reconstructor ("", re_m) ("fully-typed ", re_mft))))))
7.326 + else ()
7.327 + end
7.328 +
7.329 +end
7.330 +
7.331 +
7.332 +(* Warning: we implicitly assume single-threaded execution here! *)
7.333 +val data = Unsynchronized.ref ([] : (int * data) list)
7.334 +
7.335 +fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
7.336 +fun done id ({log, ...}: Mirabelle.done_args) =
7.337 + AList.lookup (op =) (!data) id
7.338 + |> Option.map (log_data id log)
7.339 + |> K ()
7.340 +
7.341 +fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
7.342 +
7.343 +
7.344 +fun get_prover ctxt args =
7.345 + let
7.346 + fun default_prover_name () =
7.347 + hd (#provers (Sledgehammer_Isar.default_params ctxt []))
7.348 + handle List.Empty => error "No ATP available."
7.349 + fun get_prover name =
7.350 + (name, Sledgehammer_Run.get_minimizing_prover ctxt
7.351 + Sledgehammer_Provers.Normal name)
7.352 + in
7.353 + (case AList.lookup (op =) args proverK of
7.354 + SOME name => get_prover name
7.355 + | NONE => get_prover (default_prover_name ()))
7.356 + end
7.357 +
7.358 +type stature = ATP_Problem_Generate.stature
7.359 +
7.360 +(* hack *)
7.361 +fun reconstructor_from_msg args msg =
7.362 + (case AList.lookup (op =) args reconstructorK of
7.363 + SOME name => name
7.364 + | NONE =>
7.365 + if String.isSubstring "metis (" msg then
7.366 + msg |> Substring.full
7.367 + |> Substring.position "metis ("
7.368 + |> snd |> Substring.position ")"
7.369 + |> fst |> Substring.string
7.370 + |> suffix ")"
7.371 + else if String.isSubstring "metis" msg then
7.372 + "metis"
7.373 + else
7.374 + "smt")
7.375 +
7.376 +local
7.377 +
7.378 +datatype sh_result =
7.379 + SH_OK of int * int * (string * stature) list |
7.380 + SH_FAIL of int * int |
7.381 + SH_ERROR
7.382 +
7.383 +fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans
7.384 + uncurried_aliases e_selection_heuristic term_order force_sos
7.385 + hard_timeout timeout preplay_timeout sh_minimizeLST
7.386 + max_new_mono_instancesLST max_mono_itersLST dir pos st =
7.387 + let
7.388 + val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
7.389 + val i = 1
7.390 + fun set_file_name (SOME dir) =
7.391 + Config.put Sledgehammer_Provers.dest_dir dir
7.392 + #> Config.put Sledgehammer_Provers.problem_prefix
7.393 + ("prob_" ^ str0 (Position.line_of pos) ^ "__")
7.394 + #> Config.put SMT_Config.debug_files
7.395 + (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_"
7.396 + ^ serial_string ())
7.397 + | set_file_name NONE = I
7.398 + val st' =
7.399 + st
7.400 + |> Proof.map_context
7.401 + (set_file_name dir
7.402 + #> (Option.map (Config.put ATP_Systems.e_selection_heuristic)
7.403 + e_selection_heuristic |> the_default I)
7.404 + #> (Option.map (Config.put ATP_Systems.term_order)
7.405 + term_order |> the_default I)
7.406 + #> (Option.map (Config.put ATP_Systems.force_sos)
7.407 + force_sos |> the_default I))
7.408 + val params as {relevance_thresholds, max_relevant, slice, ...} =
7.409 + Sledgehammer_Isar.default_params ctxt
7.410 + ([("verbose", "true"),
7.411 + ("type_enc", type_enc),
7.412 + ("strict", strict),
7.413 + ("lam_trans", lam_trans |> the_default "smart"),
7.414 + ("uncurried_aliases", uncurried_aliases |> the_default "smart"),
7.415 + ("max_relevant", max_relevant),
7.416 + ("slice", slice),
7.417 + ("timeout", string_of_int timeout),
7.418 + ("preplay_timeout", preplay_timeout)]
7.419 + |> sh_minimizeLST (*don't confuse the two minimization flags*)
7.420 + |> max_new_mono_instancesLST
7.421 + |> max_mono_itersLST)
7.422 + val default_max_relevant =
7.423 + Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
7.424 + prover_name
7.425 + val is_appropriate_prop =
7.426 + Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name
7.427 + val is_built_in_const =
7.428 + Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name
7.429 + val relevance_fudge =
7.430 + Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name
7.431 + val relevance_override = {add = [], del = [], only = false}
7.432 + val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i
7.433 + val time_limit =
7.434 + (case hard_timeout of
7.435 + NONE => I
7.436 + | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs))
7.437 + fun failed failure =
7.438 + ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime,
7.439 + preplay =
7.440 + K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis),
7.441 + message = K "", message_tail = ""}, ~1)
7.442 + val ({outcome, used_facts, run_time, preplay, message, message_tail}
7.443 + : Sledgehammer_Provers.prover_result,
7.444 + time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
7.445 + let
7.446 + val _ = if is_appropriate_prop concl_t then ()
7.447 + else raise Fail "inappropriate"
7.448 + val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name
7.449 + val facts =
7.450 + Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
7.451 + chained_ths hyp_ts concl_t
7.452 + |> filter (is_appropriate_prop o prop_of o snd)
7.453 + |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
7.454 + (the_default default_max_relevant max_relevant)
7.455 + is_built_in_const relevance_fudge relevance_override
7.456 + chained_ths hyp_ts concl_t
7.457 + val problem =
7.458 + {state = st', goal = goal, subgoal = i,
7.459 + subgoal_count = Sledgehammer_Util.subgoal_count st,
7.460 + facts = facts |> map Sledgehammer_Provers.Untranslated_Fact,
7.461 + smt_filter = NONE}
7.462 + in prover params (K (K (K ""))) problem end)) ()
7.463 + handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut
7.464 + | Fail "inappropriate" => failed ATP_Proof.Inappropriate
7.465 + val time_prover = run_time |> Time.toMilliseconds
7.466 + val msg = message (preplay ()) ^ message_tail
7.467 + in
7.468 + case outcome of
7.469 + NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
7.470 + | SOME _ => (msg, SH_FAIL (time_isa, time_prover))
7.471 + end
7.472 + handle ERROR msg => ("error: " ^ msg, SH_ERROR)
7.473 +
7.474 +fun thms_of_name ctxt name =
7.475 + let
7.476 + val lex = Keyword.get_lexicons
7.477 + val get = maps (Proof_Context.get_fact ctxt o fst)
7.478 + in
7.479 + Source.of_string name
7.480 + |> Symbol.source
7.481 + |> Token.source {do_recover=SOME false} lex Position.start
7.482 + |> Token.source_proper
7.483 + |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE
7.484 + |> Source.exhaust
7.485 + end
7.486 +
7.487 +in
7.488 +
7.489 +fun run_sledgehammer trivial args reconstructor named_thms id
7.490 + ({pre=st, log, pos, ...}: Mirabelle.run_args) =
7.491 + let
7.492 + val triv_str = if trivial then "[T] " else ""
7.493 + val _ = change_data id inc_sh_calls
7.494 + val _ = if trivial then () else change_data id inc_sh_nontriv_calls
7.495 + val (prover_name, prover) = get_prover (Proof.context_of st) args
7.496 + val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
7.497 + val strict = AList.lookup (op =) args strictK |> the_default "false"
7.498 + val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart"
7.499 + val slice = AList.lookup (op =) args sliceK |> the_default "true"
7.500 + val lam_trans = AList.lookup (op =) args lam_transK
7.501 + val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK
7.502 + val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK
7.503 + val term_order = AList.lookup (op =) args term_orderK
7.504 + val force_sos = AList.lookup (op =) args force_sosK
7.505 + |> Option.map (curry (op <>) "false")
7.506 + val dir = AList.lookup (op =) args keepK
7.507 + val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
7.508 + (* always use a hard timeout, but give some slack so that the automatic
7.509 + minimizer has a chance to do its magic *)
7.510 + val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
7.511 + |> the_default preplay_timeout_default
7.512 + val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
7.513 + val max_new_mono_instancesLST =
7.514 + available_parameter args max_new_mono_instancesK max_new_mono_instancesK
7.515 + val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
7.516 + val hard_timeout = SOME (2 * timeout)
7.517 + val (msg, result) =
7.518 + run_sh prover_name prover type_enc strict max_relevant slice lam_trans
7.519 + uncurried_aliases e_selection_heuristic term_order force_sos
7.520 + hard_timeout timeout preplay_timeout sh_minimizeLST
7.521 + max_new_mono_instancesLST max_mono_itersLST dir pos st
7.522 + in
7.523 + case result of
7.524 + SH_OK (time_isa, time_prover, names) =>
7.525 + let
7.526 + fun get_thms (name, stature) =
7.527 + try (thms_of_name (Proof.context_of st)) name
7.528 + |> Option.map (pair (name, stature))
7.529 + in
7.530 + change_data id inc_sh_success;
7.531 + if trivial then () else change_data id inc_sh_nontriv_success;
7.532 + change_data id (inc_sh_lemmas (length names));
7.533 + change_data id (inc_sh_max_lems (length names));
7.534 + change_data id (inc_sh_time_isa time_isa);
7.535 + change_data id (inc_sh_time_prover time_prover);
7.536 + reconstructor := reconstructor_from_msg args msg;
7.537 + named_thms := SOME (map_filter get_thms names);
7.538 + log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
7.539 + string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
7.540 + end
7.541 + | SH_FAIL (time_isa, time_prover) =>
7.542 + let
7.543 + val _ = change_data id (inc_sh_time_isa time_isa)
7.544 + val _ = change_data id (inc_sh_time_prover_fail time_prover)
7.545 + in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
7.546 + | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg)
7.547 + end
7.548 +
7.549 +end
7.550 +
7.551 +fun run_minimize args reconstructor named_thms id
7.552 + ({pre=st, log, ...}: Mirabelle.run_args) =
7.553 + let
7.554 + val ctxt = Proof.context_of st
7.555 + val n0 = length (these (!named_thms))
7.556 + val (prover_name, _) = get_prover ctxt args
7.557 + val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
7.558 + val strict = AList.lookup (op =) args strictK |> the_default "false"
7.559 + val timeout =
7.560 + AList.lookup (op =) args minimize_timeoutK
7.561 + |> Option.map (fst o read_int o raw_explode) (* FIXME Symbol.explode (?) *)
7.562 + |> the_default 5
7.563 + val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
7.564 + |> the_default preplay_timeout_default
7.565 + val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
7.566 + val max_new_mono_instancesLST =
7.567 + available_parameter args max_new_mono_instancesK max_new_mono_instancesK
7.568 + val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
7.569 + val params = Sledgehammer_Isar.default_params ctxt
7.570 + ([("provers", prover_name),
7.571 + ("verbose", "true"),
7.572 + ("type_enc", type_enc),
7.573 + ("strict", strict),
7.574 + ("timeout", string_of_int timeout),
7.575 + ("preplay_timeout", preplay_timeout)]
7.576 + |> sh_minimizeLST (*don't confuse the two minimization flags*)
7.577 + |> max_new_mono_instancesLST
7.578 + |> max_mono_itersLST)
7.579 + val minimize =
7.580 + Sledgehammer_Minimize.minimize_facts prover_name params
7.581 + true 1 (Sledgehammer_Util.subgoal_count st)
7.582 + val _ = log separator
7.583 + val (used_facts, (preplay, message, message_tail)) =
7.584 + minimize st (these (!named_thms))
7.585 + val msg = message (preplay ()) ^ message_tail
7.586 + in
7.587 + case used_facts of
7.588 + SOME named_thms' =>
7.589 + (change_data id inc_min_succs;
7.590 + change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0));
7.591 + if length named_thms' = n0
7.592 + then log (minimize_tag id ^ "already minimal")
7.593 + else (reconstructor := reconstructor_from_msg args msg;
7.594 + named_thms := SOME named_thms';
7.595 + log (minimize_tag id ^ "succeeded:\n" ^ msg))
7.596 + )
7.597 + | NONE => log (minimize_tag id ^ "failed: " ^ msg)
7.598 + end
7.599 +
7.600 +fun override_params prover type_enc timeout =
7.601 + [("provers", prover),
7.602 + ("max_relevant", "0"),
7.603 + ("type_enc", type_enc),
7.604 + ("strict", "true"),
7.605 + ("slice", "false"),
7.606 + ("timeout", timeout |> Time.toSeconds |> string_of_int)]
7.607 +
7.608 +fun run_reconstructor trivial full m name reconstructor named_thms id
7.609 + ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
7.610 + let
7.611 + fun do_reconstructor named_thms ctxt =
7.612 + let
7.613 + val ref_of_str =
7.614 + suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm
7.615 + #> fst
7.616 + val thms = named_thms |> maps snd
7.617 + val facts = named_thms |> map (ref_of_str o fst o fst)
7.618 + val relevance_override = {add = facts, del = [], only = true}
7.619 + fun my_timeout time_slice =
7.620 + timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal
7.621 + fun sledge_tac time_slice prover type_enc =
7.622 + Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
7.623 + (override_params prover type_enc (my_timeout time_slice))
7.624 + relevance_override
7.625 + in
7.626 + if !reconstructor = "sledgehammer_tac" then
7.627 + sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native"
7.628 + ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??"
7.629 + ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??"
7.630 + ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags"
7.631 + ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN
7.632 + ctxt thms
7.633 + else if !reconstructor = "smt" then
7.634 + SMT_Solver.smt_tac ctxt thms
7.635 + else if full then
7.636 + Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN]
7.637 + ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms
7.638 + else if String.isPrefix "metis (" (!reconstructor) then
7.639 + let
7.640 + val (type_encs, lam_trans) =
7.641 + !reconstructor
7.642 + |> Outer_Syntax.scan Position.start
7.643 + |> filter Token.is_proper |> tl
7.644 + |> Metis_Tactic.parse_metis_options |> fst
7.645 + |>> the_default [ATP_Proof_Reconstruct.partial_typesN]
7.646 + ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans
7.647 + in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end
7.648 + else if !reconstructor = "metis" then
7.649 + Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt
7.650 + thms
7.651 + else
7.652 + K all_tac
7.653 + end
7.654 + fun apply_reconstructor named_thms =
7.655 + Mirabelle.can_apply timeout (do_reconstructor named_thms) st
7.656 +
7.657 + fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
7.658 + | with_time (true, t) = (change_data id (inc_reconstructor_success m);
7.659 + if trivial then ()
7.660 + else change_data id (inc_reconstructor_nontriv_success m);
7.661 + change_data id (inc_reconstructor_lemmas m (length named_thms));
7.662 + change_data id (inc_reconstructor_time m t);
7.663 + change_data id (inc_reconstructor_posns m (pos, trivial));
7.664 + if name = "proof" then change_data id (inc_reconstructor_proofs m)
7.665 + else ();
7.666 + "succeeded (" ^ string_of_int t ^ ")")
7.667 + fun timed_reconstructor named_thms =
7.668 + (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true)
7.669 + handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m);
7.670 + ("timeout", false))
7.671 + | ERROR msg => ("error: " ^ msg, false)
7.672 +
7.673 + val _ = log separator
7.674 + val _ = change_data id (inc_reconstructor_calls m)
7.675 + val _ = if trivial then ()
7.676 + else change_data id (inc_reconstructor_nontriv_calls m)
7.677 + in
7.678 + named_thms
7.679 + |> timed_reconstructor
7.680 + |>> log o prefix (reconstructor_tag reconstructor id)
7.681 + |> snd
7.682 + end
7.683 +
7.684 +val try_timeout = seconds 5.0
7.685 +
7.686 +(* crude hack *)
7.687 +val num_sledgehammer_calls = Unsynchronized.ref 0
7.688 +
7.689 +fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
7.690 + let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
7.691 + if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
7.692 + then () else
7.693 + let
7.694 + val max_calls =
7.695 + AList.lookup (op =) args max_callsK |> the_default "10000000"
7.696 + |> Int.fromString |> the
7.697 + val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
7.698 + in
7.699 + if !num_sledgehammer_calls > max_calls then ()
7.700 + else
7.701 + let
7.702 + val reconstructor = Unsynchronized.ref ""
7.703 + val named_thms =
7.704 + Unsynchronized.ref (NONE : ((string * stature) * thm list) list option)
7.705 + val minimize = AList.defined (op =) args minimizeK
7.706 + val metis_ft = AList.defined (op =) args metis_ftK
7.707 + val trivial =
7.708 + if AList.lookup (op =) args check_trivialK |> the_default "false"
7.709 + |> Bool.fromString |> the then
7.710 + Try0.try0 (SOME try_timeout) ([], [], [], []) pre
7.711 + handle TimeLimit.TimeOut => false
7.712 + else false
7.713 + fun apply_reconstructor m1 m2 =
7.714 + if metis_ft
7.715 + then
7.716 + if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false
7.717 + (run_reconstructor trivial false m1 name reconstructor
7.718 + (these (!named_thms))) id st)
7.719 + then
7.720 + (Mirabelle.catch_result (reconstructor_tag reconstructor) false
7.721 + (run_reconstructor trivial true m2 name reconstructor
7.722 + (these (!named_thms))) id st; ())
7.723 + else ()
7.724 + else
7.725 + (Mirabelle.catch_result (reconstructor_tag reconstructor) false
7.726 + (run_reconstructor trivial false m1 name reconstructor
7.727 + (these (!named_thms))) id st; ())
7.728 + in
7.729 + change_data id (set_mini minimize);
7.730 + Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor
7.731 + named_thms) id st;
7.732 + if is_some (!named_thms)
7.733 + then
7.734 + (apply_reconstructor Unminimized UnminimizedFT;
7.735 + if minimize andalso not (null (these (!named_thms)))
7.736 + then
7.737 + (Mirabelle.catch minimize_tag
7.738 + (run_minimize args reconstructor named_thms) id st;
7.739 + apply_reconstructor Minimized MinimizedFT)
7.740 + else ())
7.741 + else ()
7.742 + end
7.743 + end
7.744 + end
7.745 +
7.746 +fun invoke args =
7.747 + Mirabelle.register (init, sledgehammer_action args, done)
7.748 +
7.749 +end
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
8.2 +++ b/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML Sat Apr 14 23:52:17 2012 +0100
8.3 @@ -0,0 +1,198 @@
8.4 +(* Title: HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML
8.5 + Author: Jasmin Blanchette, TU Munich
8.6 +*)
8.7 +
8.8 +structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION =
8.9 +struct
8.10 +
8.11 +fun get args name default_value =
8.12 + case AList.lookup (op =) args name of
8.13 + SOME value => the (Real.fromString value)
8.14 + | NONE => default_value
8.15 +
8.16 +fun extract_relevance_fudge args
8.17 + {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight,
8.18 + abs_rel_weight, abs_irrel_weight, skolem_irrel_weight,
8.19 + theory_const_rel_weight, theory_const_irrel_weight,
8.20 + chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus,
8.21 + local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp,
8.22 + threshold_divisor, ridiculous_threshold} =
8.23 + {local_const_multiplier =
8.24 + get args "local_const_multiplier" local_const_multiplier,
8.25 + worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq,
8.26 + higher_order_irrel_weight =
8.27 + get args "higher_order_irrel_weight" higher_order_irrel_weight,
8.28 + abs_rel_weight = get args "abs_rel_weight" abs_rel_weight,
8.29 + abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight,
8.30 + skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight,
8.31 + theory_const_rel_weight =
8.32 + get args "theory_const_rel_weight" theory_const_rel_weight,
8.33 + theory_const_irrel_weight =
8.34 + get args "theory_const_irrel_weight" theory_const_irrel_weight,
8.35 + chained_const_irrel_weight =
8.36 + get args "chained_const_irrel_weight" chained_const_irrel_weight,
8.37 + intro_bonus = get args "intro_bonus" intro_bonus,
8.38 + elim_bonus = get args "elim_bonus" elim_bonus,
8.39 + simp_bonus = get args "simp_bonus" simp_bonus,
8.40 + local_bonus = get args "local_bonus" local_bonus,
8.41 + assum_bonus = get args "assum_bonus" assum_bonus,
8.42 + chained_bonus = get args "chained_bonus" chained_bonus,
8.43 + max_imperfect = get args "max_imperfect" max_imperfect,
8.44 + max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp,
8.45 + threshold_divisor = get args "threshold_divisor" threshold_divisor,
8.46 + ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold}
8.47 +
8.48 +structure Prooftab =
8.49 + Table(type key = int * int val ord = prod_ord int_ord int_ord)
8.50 +
8.51 +val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table)
8.52 +
8.53 +val num_successes = Unsynchronized.ref ([] : (int * int) list)
8.54 +val num_failures = Unsynchronized.ref ([] : (int * int) list)
8.55 +val num_found_proofs = Unsynchronized.ref ([] : (int * int) list)
8.56 +val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list)
8.57 +val num_found_facts = Unsynchronized.ref ([] : (int * int) list)
8.58 +val num_lost_facts = Unsynchronized.ref ([] : (int * int) list)
8.59 +
8.60 +fun get id c = the_default 0 (AList.lookup (op =) (!c) id)
8.61 +fun add id c n =
8.62 + c := (case AList.lookup (op =) (!c) id of
8.63 + SOME m => AList.update (op =) (id, m + n) (!c)
8.64 + | NONE => (id, n) :: !c)
8.65 +
8.66 +fun init proof_file _ thy =
8.67 + let
8.68 + fun do_line line =
8.69 + case line |> space_explode ":" of
8.70 + [line_num, offset, proof] =>
8.71 + SOME (pairself (the o Int.fromString) (line_num, offset),
8.72 + proof |> space_explode " " |> filter_out (curry (op =) ""))
8.73 + | _ => NONE
8.74 + val proofs = File.read (Path.explode proof_file)
8.75 + val proof_tab =
8.76 + proofs |> space_explode "\n"
8.77 + |> map_filter do_line
8.78 + |> AList.coalesce (op =)
8.79 + |> Prooftab.make
8.80 + in proof_table := proof_tab; thy end
8.81 +
8.82 +fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b)
8.83 +fun percentage_alt a b = percentage a (a + b)
8.84 +
8.85 +fun done id ({log, ...} : Mirabelle.done_args) =
8.86 + if get id num_successes + get id num_failures > 0 then
8.87 + (log "";
8.88 + log ("Number of overall successes: " ^
8.89 + string_of_int (get id num_successes));
8.90 + log ("Number of overall failures: " ^ string_of_int (get id num_failures));
8.91 + log ("Overall success rate: " ^
8.92 + percentage_alt (get id num_successes) (get id num_failures) ^ "%");
8.93 + log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs));
8.94 + log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs));
8.95 + log ("Proof found rate: " ^
8.96 + percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^
8.97 + "%");
8.98 + log ("Number of found facts: " ^ string_of_int (get id num_found_facts));
8.99 + log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts));
8.100 + log ("Fact found rate: " ^
8.101 + percentage_alt (get id num_found_facts) (get id num_lost_facts) ^
8.102 + "%"))
8.103 + else
8.104 + ()
8.105 +
8.106 +val default_prover = ATP_Systems.eN (* arbitrary ATP *)
8.107 +
8.108 +fun with_index (i, s) = s ^ "@" ^ string_of_int i
8.109 +
8.110 +fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) =
8.111 + case (Position.line_of pos, Position.offset_of pos) of
8.112 + (SOME line_num, SOME offset) =>
8.113 + (case Prooftab.lookup (!proof_table) (line_num, offset) of
8.114 + SOME proofs =>
8.115 + let
8.116 + val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre
8.117 + val prover = AList.lookup (op =) args "prover"
8.118 + |> the_default default_prover
8.119 + val {relevance_thresholds, max_relevant, slice, ...} =
8.120 + Sledgehammer_Isar.default_params ctxt args
8.121 + val default_max_relevant =
8.122 + Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
8.123 + prover
8.124 + val is_appropriate_prop =
8.125 + Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt
8.126 + default_prover
8.127 + val is_built_in_const =
8.128 + Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover
8.129 + val relevance_fudge =
8.130 + extract_relevance_fudge args
8.131 + (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover)
8.132 + val relevance_override = {add = [], del = [], only = false}
8.133 + val subgoal = 1
8.134 + val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal
8.135 + val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover
8.136 + val facts =
8.137 + Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
8.138 + chained_ths hyp_ts concl_t
8.139 + |> filter (is_appropriate_prop o prop_of o snd)
8.140 + |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
8.141 + (the_default default_max_relevant max_relevant)
8.142 + is_built_in_const relevance_fudge relevance_override
8.143 + chained_ths hyp_ts concl_t
8.144 + |> map (fst o fst)
8.145 + val (found_facts, lost_facts) =
8.146 + flat proofs |> sort_distinct string_ord
8.147 + |> map (fn fact => (find_index (curry (op =) fact) facts, fact))
8.148 + |> List.partition (curry (op <=) 0 o fst)
8.149 + |>> sort (prod_ord int_ord string_ord) ||> map snd
8.150 + val found_proofs = filter (forall (member (op =) facts)) proofs
8.151 + val n = length found_proofs
8.152 + val _ =
8.153 + if n = 0 then
8.154 + (add id num_failures 1; log "Failure")
8.155 + else
8.156 + (add id num_successes 1;
8.157 + add id num_found_proofs n;
8.158 + log ("Success (" ^ string_of_int n ^ " of " ^
8.159 + string_of_int (length proofs) ^ " proofs)"))
8.160 + val _ = add id num_lost_proofs (length proofs - n)
8.161 + val _ = add id num_found_facts (length found_facts)
8.162 + val _ = add id num_lost_facts (length lost_facts)
8.163 + val _ =
8.164 + if null found_facts then
8.165 + ()
8.166 + else
8.167 + let
8.168 + val found_weight =
8.169 + Real.fromInt (fold (fn (n, _) =>
8.170 + Integer.add (n * n)) found_facts 0)
8.171 + / Real.fromInt (length found_facts)
8.172 + |> Math.sqrt |> Real.ceil
8.173 + in
8.174 + log ("Found facts (among " ^ string_of_int (length facts) ^
8.175 + ", weight " ^ string_of_int found_weight ^ "): " ^
8.176 + commas (map with_index found_facts))
8.177 + end
8.178 + val _ = if null lost_facts then
8.179 + ()
8.180 + else
8.181 + log ("Lost facts (among " ^ string_of_int (length facts) ^
8.182 + "): " ^ commas lost_facts)
8.183 + in () end
8.184 + | NONE => log "No known proof")
8.185 + | _ => ()
8.186 +
8.187 +val proof_fileK = "proof_file"
8.188 +
8.189 +fun invoke args =
8.190 + let
8.191 + val (pf_args, other_args) =
8.192 + args |> List.partition (curry (op =) proof_fileK o fst)
8.193 + val proof_file = case pf_args of
8.194 + [] => error "No \"proof_file\" specified"
8.195 + | (_, s) :: _ => s
8.196 + in Mirabelle.register (init proof_file, action other_args, done) end
8.197 +
8.198 +end;
8.199 +
8.200 +(* Workaround to keep the "mirabelle.pl" script happy *)
8.201 +structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter;
9.1 --- a/src/HOL/Mirabelle/Mirabelle.thy Sat Apr 14 23:34:18 2012 +0200
9.2 +++ b/src/HOL/Mirabelle/Mirabelle.thy Sat Apr 14 23:52:17 2012 +0100
9.3 @@ -4,7 +4,7 @@
9.4
9.5 theory Mirabelle
9.6 imports Sledgehammer
9.7 -uses "Tools/mirabelle.ML"
9.8 +uses "Actions/mirabelle.ML"
9.9 "../ex/sledgehammer_tactics.ML"
9.10 begin
9.11
10.1 --- a/src/HOL/Mirabelle/Mirabelle_Test.thy Sat Apr 14 23:34:18 2012 +0200
10.2 +++ b/src/HOL/Mirabelle/Mirabelle_Test.thy Sat Apr 14 23:52:17 2012 +0100
10.3 @@ -7,12 +7,12 @@
10.4 theory Mirabelle_Test
10.5 imports Main Mirabelle
10.6 uses
10.7 - "Tools/mirabelle_arith.ML"
10.8 - "Tools/mirabelle_metis.ML"
10.9 - "Tools/mirabelle_quickcheck.ML"
10.10 - "Tools/mirabelle_refute.ML"
10.11 - "Tools/mirabelle_sledgehammer.ML"
10.12 - "Tools/mirabelle_sledgehammer_filter.ML"
10.13 + "Actions/mirabelle_arith.ML"
10.14 + "Actions/mirabelle_metis.ML"
10.15 + "Actions/mirabelle_quickcheck.ML"
10.16 + "Actions/mirabelle_refute.ML"
10.17 + "Actions/mirabelle_sledgehammer.ML"
10.18 + "Actions/mirabelle_sledgehammer_filter.ML"
10.19 begin
10.20
10.21 text {*
11.1 --- a/src/HOL/Mirabelle/Tools/mirabelle.ML Sat Apr 14 23:34:18 2012 +0200
11.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
11.3 @@ -1,213 +0,0 @@
11.4 -(* Title: HOL/Mirabelle/Tools/mirabelle.ML
11.5 - Author: Jasmin Blanchette and Sascha Boehme, TU Munich
11.6 -*)
11.7 -
11.8 -signature MIRABELLE =
11.9 -sig
11.10 - (*configuration*)
11.11 - val logfile : string Config.T
11.12 - val timeout : int Config.T
11.13 - val start_line : int Config.T
11.14 - val end_line : int Config.T
11.15 -
11.16 - (*core*)
11.17 - type init_action = int -> theory -> theory
11.18 - type done_args = {last: Toplevel.state, log: string -> unit}
11.19 - type done_action = int -> done_args -> unit
11.20 - type run_args = {pre: Proof.state, post: Toplevel.state option,
11.21 - timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
11.22 - type run_action = int -> run_args -> unit
11.23 - type action = init_action * run_action * done_action
11.24 - val catch : (int -> string) -> run_action -> run_action
11.25 - val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) ->
11.26 - int -> run_args -> 'a
11.27 - val register : action -> theory -> theory
11.28 - val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state ->
11.29 - unit
11.30 -
11.31 - (*utility functions*)
11.32 - val can_apply : Time.time -> (Proof.context -> int -> tactic) ->
11.33 - Proof.state -> bool
11.34 - val theorems_in_proof_term : thm -> thm list
11.35 - val theorems_of_sucessful_proof : Toplevel.state option -> thm list
11.36 - val get_setting : (string * string) list -> string * string -> string
11.37 - val get_int_setting : (string * string) list -> string * int -> int
11.38 - val cpu_time : ('a -> 'b) -> 'a -> 'b * int
11.39 -end
11.40 -
11.41 -
11.42 -
11.43 -structure Mirabelle : MIRABELLE =
11.44 -struct
11.45 -
11.46 -(* Mirabelle configuration *)
11.47 -
11.48 -val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "")
11.49 -val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30)
11.50 -val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0)
11.51 -val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1)
11.52 -
11.53 -
11.54 -(* Mirabelle core *)
11.55 -
11.56 -type init_action = int -> theory -> theory
11.57 -type done_args = {last: Toplevel.state, log: string -> unit}
11.58 -type done_action = int -> done_args -> unit
11.59 -type run_args = {pre: Proof.state, post: Toplevel.state option,
11.60 - timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
11.61 -type run_action = int -> run_args -> unit
11.62 -type action = init_action * run_action * done_action
11.63 -
11.64 -structure Actions = Theory_Data
11.65 -(
11.66 - type T = (int * run_action * done_action) list
11.67 - val empty = []
11.68 - val extend = I
11.69 - fun merge data = Library.merge (K true) data (* FIXME potential data loss because of (K true) *)
11.70 -)
11.71 -
11.72 -
11.73 -fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e)
11.74 -
11.75 -fun catch tag f id (st as {log, ...}: run_args) = (f id st; ())
11.76 - handle exn =>
11.77 - if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ())
11.78 -
11.79 -fun catch_result tag d f id (st as {log, ...}: run_args) = f id st
11.80 - handle exn =>
11.81 - if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d)
11.82 -
11.83 -fun register (init, run, done) thy =
11.84 - let val id = length (Actions.get thy) + 1
11.85 - in
11.86 - thy
11.87 - |> init id
11.88 - |> Actions.map (cons (id, run, done))
11.89 - end
11.90 -
11.91 -local
11.92 -
11.93 -fun log thy s =
11.94 - let fun append_to n = if n = "" then K () else File.append (Path.explode n)
11.95 - in append_to (Config.get_global thy logfile) (s ^ "\n") end
11.96 - (* FIXME: with multithreading and parallel proofs enabled, we might need to
11.97 - encapsulate this inside a critical section *)
11.98 -
11.99 -fun log_sep thy = log thy "------------------"
11.100 -
11.101 -fun apply_actions thy pos name info (pre, post, time) actions =
11.102 - let
11.103 - fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name}
11.104 - fun run (id, run, _) = (apply (run id); log_sep thy)
11.105 - in (log thy info; log_sep thy; List.app run actions) end
11.106 -
11.107 -fun in_range _ _ NONE = true
11.108 - | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r))
11.109 -
11.110 -fun only_within_range thy pos f x =
11.111 - let val l = Config.get_global thy start_line and r = Config.get_global thy end_line
11.112 - in if in_range l r (Position.line_of pos) then f x else () end
11.113 -
11.114 -in
11.115 -
11.116 -fun run_actions tr pre post =
11.117 - let
11.118 - val thy = Proof.theory_of pre
11.119 - val pos = Toplevel.pos_of tr
11.120 - val name = Toplevel.name_of tr
11.121 - val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout))
11.122 -
11.123 - val str0 = string_of_int o the_default 0
11.124 - val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos)
11.125 - val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):"
11.126 - in
11.127 - only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy)
11.128 - end
11.129 -
11.130 -fun done_actions st =
11.131 - let
11.132 - val thy = Toplevel.theory_of st
11.133 - val _ = log thy "\n\n";
11.134 - in
11.135 - thy
11.136 - |> Actions.get
11.137 - |> List.app (fn (id, _, done) => done id {last=st, log=log thy})
11.138 - end
11.139 -
11.140 -end
11.141 -
11.142 -val whitelist = ["apply", "by", "proof"]
11.143 -
11.144 -fun step_hook tr pre post =
11.145 - (* FIXME: might require wrapping into "interruptible" *)
11.146 - if can (Proof.assert_backward o Toplevel.proof_of) pre andalso
11.147 - member (op =) whitelist (Toplevel.name_of tr)
11.148 - then run_actions tr (Toplevel.proof_of pre) (SOME post)
11.149 - else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post
11.150 - then done_actions pre
11.151 - else () (* FIXME: add theory_hook here *)
11.152 -
11.153 -
11.154 -
11.155 -(* Mirabelle utility functions *)
11.156 -
11.157 -fun can_apply time tac st =
11.158 - let
11.159 - val {context = ctxt, facts, goal} = Proof.goal st
11.160 - val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt)
11.161 - in
11.162 - (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of
11.163 - SOME (SOME _) => true
11.164 - | _ => false)
11.165 - end
11.166 -
11.167 -local
11.168 -
11.169 -fun fold_body_thms f =
11.170 - let
11.171 - fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) =>
11.172 - fn (x, seen) =>
11.173 - if Inttab.defined seen i then (x, seen)
11.174 - else
11.175 - let
11.176 - val body' = Future.join body
11.177 - val (x', seen') = app (n + (if name = "" then 0 else 1)) body'
11.178 - (x, Inttab.update (i, ()) seen)
11.179 - in (x' |> n = 0 ? f (name, prop, body'), seen') end)
11.180 - in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end
11.181 -
11.182 -in
11.183 -
11.184 -fun theorems_in_proof_term thm =
11.185 - let
11.186 - val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm)
11.187 - fun collect (s, _, _) = if s <> "" then insert (op =) s else I
11.188 - fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE
11.189 - fun resolve_thms names = map_filter (member_of names) all_thms
11.190 - in
11.191 - resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] [])
11.192 - end
11.193 -
11.194 -end
11.195 -
11.196 -fun theorems_of_sucessful_proof state =
11.197 - (case state of
11.198 - NONE => []
11.199 - | SOME st =>
11.200 - if not (Toplevel.is_proof st) then []
11.201 - else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st))))
11.202 -
11.203 -fun get_setting settings (key, default) =
11.204 - the_default default (AList.lookup (op =) settings key)
11.205 -
11.206 -fun get_int_setting settings (key, default) =
11.207 - (case Option.map Int.fromString (AList.lookup (op =) settings key) of
11.208 - SOME (SOME i) => i
11.209 - | SOME NONE => error ("bad option: " ^ key)
11.210 - | NONE => default)
11.211 -
11.212 -fun cpu_time f x =
11.213 - let val ({cpu, ...}, y) = Timing.timing f x
11.214 - in (y, Time.toMilliseconds cpu) end
11.215 -
11.216 -end
12.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_arith.ML Sat Apr 14 23:34:18 2012 +0200
12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
12.3 @@ -1,21 +0,0 @@
12.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_arith.ML
12.5 - Author: Jasmin Blanchette and Sascha Boehme, TU Munich
12.6 -*)
12.7 -
12.8 -structure Mirabelle_Arith : MIRABELLE_ACTION =
12.9 -struct
12.10 -
12.11 -fun arith_tag id = "#" ^ string_of_int id ^ " arith: "
12.12 -
12.13 -fun init _ = I
12.14 -fun done _ _ = ()
12.15 -
12.16 -fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) =
12.17 - if Mirabelle.can_apply timeout Arith_Data.arith_tac pre
12.18 - then log (arith_tag id ^ "succeeded")
12.19 - else ()
12.20 - handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout")
12.21 -
12.22 -fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done)
12.23 -
12.24 -end
13.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_metis.ML Sat Apr 14 23:34:18 2012 +0200
13.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
13.3 @@ -1,35 +0,0 @@
13.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_metis.ML
13.5 - Author: Jasmin Blanchette and Sascha Boehme, TU Munich
13.6 -*)
13.7 -
13.8 -structure Mirabelle_Metis : MIRABELLE_ACTION =
13.9 -struct
13.10 -
13.11 -fun metis_tag id = "#" ^ string_of_int id ^ " metis: "
13.12 -
13.13 -fun init _ = I
13.14 -fun done _ _ = ()
13.15 -
13.16 -fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) =
13.17 - let
13.18 - val thms = Mirabelle.theorems_of_sucessful_proof post
13.19 - val names = map Thm.get_name_hint thms
13.20 - val add_info = if null names then I else suffix (":\n" ^ commas names)
13.21 -
13.22 - val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre))
13.23 -
13.24 - fun metis ctxt =
13.25 - Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt
13.26 - (thms @ facts)
13.27 - in
13.28 - (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed")
13.29 - |> prefix (metis_tag id)
13.30 - |> add_info
13.31 - |> log
13.32 - end
13.33 - handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout")
13.34 - | ERROR msg => log (metis_tag id ^ "error: " ^ msg)
13.35 -
13.36 -fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done)
13.37 -
13.38 -end
14.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML Sat Apr 14 23:34:18 2012 +0200
14.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
14.3 @@ -1,28 +0,0 @@
14.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_quickcheck.ML
14.5 - Author: Jasmin Blanchette and Sascha Boehme, TU Munich
14.6 -*)
14.7 -
14.8 -structure Mirabelle_Quickcheck : MIRABELLE_ACTION =
14.9 -struct
14.10 -
14.11 -fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: "
14.12 -
14.13 -fun init _ = I
14.14 -fun done _ _ = ()
14.15 -
14.16 -fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) =
14.17 - let
14.18 - val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst
14.19 - val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1
14.20 - in
14.21 - (case TimeLimit.timeLimit timeout quickcheck pre of
14.22 - NONE => log (qc_tag id ^ "no counterexample")
14.23 - | SOME _ => log (qc_tag id ^ "counterexample found"))
14.24 - end
14.25 - handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout")
14.26 - | ERROR msg => log (qc_tag id ^ "error: " ^ msg)
14.27 -
14.28 -fun invoke args =
14.29 - Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done)
14.30 -
14.31 -end
15.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_refute.ML Sat Apr 14 23:34:18 2012 +0200
15.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
15.3 @@ -1,39 +0,0 @@
15.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_refute.ML
15.5 - Author: Jasmin Blanchette and Sascha Boehme, TU Munich
15.6 -*)
15.7 -
15.8 -structure Mirabelle_Refute : MIRABELLE_ACTION =
15.9 -struct
15.10 -
15.11 -
15.12 -(* FIXME:
15.13 -fun refute_action args timeout {pre=st, ...} =
15.14 - let
15.15 - val subgoal = 1
15.16 - val thy = Proof.theory_of st
15.17 - val thm = #goal (Proof.raw_goal st)
15.18 -
15.19 - val refute = Refute.refute_goal thy args thm
15.20 - val _ = TimeLimit.timeLimit timeout refute subgoal
15.21 - in
15.22 - val writ_log = Substring.full (the (Symtab.lookup tab "writeln"))
15.23 - val warn_log = Substring.full (the (Symtab.lookup tab "warning"))
15.24 -
15.25 - val r =
15.26 - if Substring.isSubstring "model found" writ_log
15.27 - then
15.28 - if Substring.isSubstring "spurious" warn_log
15.29 - then SOME "potential counterexample"
15.30 - else SOME "real counterexample (bug?)"
15.31 - else
15.32 - if Substring.isSubstring "time limit" writ_log
15.33 - then SOME "no counterexample (timeout)"
15.34 - else if Substring.isSubstring "Search terminated" writ_log
15.35 - then SOME "no counterexample (normal termination)"
15.36 - else SOME "no counterexample (unknown)"
15.37 - in r end
15.38 -*)
15.39 -
15.40 -fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*)
15.41 -
15.42 -end
16.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML Sat Apr 14 23:34:18 2012 +0200
16.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
16.3 @@ -1,746 +0,0 @@
16.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
16.5 - Author: Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
16.6 -*)
16.7 -
16.8 -structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
16.9 -struct
16.10 -
16.11 -val proverK = "prover"
16.12 -val prover_timeoutK = "prover_timeout"
16.13 -val keepK = "keep"
16.14 -val type_encK = "type_enc"
16.15 -val strictK = "strict"
16.16 -val sliceK = "slice"
16.17 -val lam_transK = "lam_trans"
16.18 -val uncurried_aliasesK = "uncurried_aliases"
16.19 -val e_selection_heuristicK = "e_selection_heuristic"
16.20 -val term_orderK = "term_order"
16.21 -val force_sosK = "force_sos"
16.22 -val max_relevantK = "max_relevant"
16.23 -val max_callsK = "max_calls"
16.24 -val minimizeK = "minimize" (*refers to minimization attempted by Mirabelle*)
16.25 -val minimize_timeoutK = "minimize_timeout"
16.26 -val metis_ftK = "metis_ft"
16.27 -val reconstructorK = "reconstructor"
16.28 -val preplay_timeoutK = "preplay_timeout"
16.29 -val sh_minimizeK = "sh_minimize" (*refers to minimizer run within Sledgehammer*)
16.30 -val max_new_mono_instancesK = "max_new_mono_instances"
16.31 -val max_mono_itersK = "max_mono_iters"
16.32 -val check_trivialK = "check_trivial" (*false by default*)
16.33 -
16.34 -fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
16.35 -fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): "
16.36 -fun reconstructor_tag reconstructor id =
16.37 - "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): "
16.38 -
16.39 -val separator = "-----"
16.40 -
16.41 -val preplay_timeout_default = "4"
16.42 -(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*)
16.43 -
16.44 -(*If a key is present in args then augment a list with its pair*)
16.45 -(*This is used to avoid fixing default values at the Mirabelle level, and
16.46 - instead use the default values of the tool (Sledgehammer in this case).*)
16.47 -fun available_parameter args key label list =
16.48 - let
16.49 - val value = AList.lookup (op =) args key
16.50 - in if is_some value then (label, the value) :: list else list end
16.51 -
16.52 -
16.53 -datatype sh_data = ShData of {
16.54 - calls: int,
16.55 - success: int,
16.56 - nontriv_calls: int,
16.57 - nontriv_success: int,
16.58 - lemmas: int,
16.59 - max_lems: int,
16.60 - time_isa: int,
16.61 - time_prover: int,
16.62 - time_prover_fail: int}
16.63 -
16.64 -datatype re_data = ReData of {
16.65 - calls: int,
16.66 - success: int,
16.67 - nontriv_calls: int,
16.68 - nontriv_success: int,
16.69 - proofs: int,
16.70 - time: int,
16.71 - timeout: int,
16.72 - lemmas: int * int * int,
16.73 - posns: (Position.T * bool) list
16.74 - }
16.75 -
16.76 -datatype min_data = MinData of {
16.77 - succs: int,
16.78 - ab_ratios: int
16.79 - }
16.80 -
16.81 -fun make_sh_data
16.82 - (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
16.83 - time_prover,time_prover_fail) =
16.84 - ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
16.85 - nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
16.86 - time_isa=time_isa, time_prover=time_prover,
16.87 - time_prover_fail=time_prover_fail}
16.88 -
16.89 -fun make_min_data (succs, ab_ratios) =
16.90 - MinData{succs=succs, ab_ratios=ab_ratios}
16.91 -
16.92 -fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
16.93 - timeout,lemmas,posns) =
16.94 - ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
16.95 - nontriv_success=nontriv_success, proofs=proofs, time=time,
16.96 - timeout=timeout, lemmas=lemmas, posns=posns}
16.97 -
16.98 -val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
16.99 -val empty_min_data = make_min_data (0, 0)
16.100 -val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
16.101 -
16.102 -fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
16.103 - lemmas, max_lems, time_isa,
16.104 - time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
16.105 - nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
16.106 -
16.107 -fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios)
16.108 -
16.109 -fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
16.110 - proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
16.111 - nontriv_success, proofs, time, timeout, lemmas, posns)
16.112 -
16.113 -
16.114 -datatype reconstructor_mode =
16.115 - Unminimized | Minimized | UnminimizedFT | MinimizedFT
16.116 -
16.117 -datatype data = Data of {
16.118 - sh: sh_data,
16.119 - min: min_data,
16.120 - re_u: re_data, (* reconstructor with unminimized set of lemmas *)
16.121 - re_m: re_data, (* reconstructor with minimized set of lemmas *)
16.122 - re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *)
16.123 - re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *)
16.124 - mini: bool (* with minimization *)
16.125 - }
16.126 -
16.127 -fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) =
16.128 - Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft,
16.129 - mini=mini}
16.130 -
16.131 -val empty_data = make_data (empty_sh_data, empty_min_data,
16.132 - empty_re_data, empty_re_data, empty_re_data, empty_re_data, false)
16.133 -
16.134 -fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
16.135 - let val sh' = make_sh_data (f (tuple_of_sh_data sh))
16.136 - in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end
16.137 -
16.138 -fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
16.139 - let val min' = make_min_data (f (tuple_of_min_data min))
16.140 - in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end
16.141 -
16.142 -fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
16.143 - let
16.144 - fun map_me g Unminimized (u, m, uft, mft) = (g u, m, uft, mft)
16.145 - | map_me g Minimized (u, m, uft, mft) = (u, g m, uft, mft)
16.146 - | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft)
16.147 - | map_me g MinimizedFT (u, m, uft, mft) = (u, m, uft, g mft)
16.148 -
16.149 - val f' = make_re_data o f o tuple_of_re_data
16.150 -
16.151 - val (re_u', re_m', re_uft', re_mft') =
16.152 - map_me f' m (re_u, re_m, re_uft, re_mft)
16.153 - in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end
16.154 -
16.155 -fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) =
16.156 - make_data (sh, min, re_u, re_m, re_uft, re_mft, mini)
16.157 -
16.158 -fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
16.159 -
16.160 -val inc_sh_calls = map_sh_data
16.161 - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
16.162 - => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
16.163 -
16.164 -val inc_sh_success = map_sh_data
16.165 - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
16.166 - => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
16.167 -
16.168 -val inc_sh_nontriv_calls = map_sh_data
16.169 - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
16.170 - => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
16.171 -
16.172 -val inc_sh_nontriv_success = map_sh_data
16.173 - (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
16.174 - => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
16.175 -
16.176 -fun inc_sh_lemmas n = map_sh_data
16.177 - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
16.178 - => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
16.179 -
16.180 -fun inc_sh_max_lems n = map_sh_data
16.181 - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
16.182 - => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
16.183 -
16.184 -fun inc_sh_time_isa t = map_sh_data
16.185 - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
16.186 - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
16.187 -
16.188 -fun inc_sh_time_prover t = map_sh_data
16.189 - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
16.190 - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
16.191 -
16.192 -fun inc_sh_time_prover_fail t = map_sh_data
16.193 - (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
16.194 - => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
16.195 -
16.196 -val inc_min_succs = map_min_data
16.197 - (fn (succs,ab_ratios) => (succs+1, ab_ratios))
16.198 -
16.199 -fun inc_min_ab_ratios r = map_min_data
16.200 - (fn (succs, ab_ratios) => (succs, ab_ratios+r))
16.201 -
16.202 -val inc_reconstructor_calls = map_re_data
16.203 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.204 - => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
16.205 -
16.206 -val inc_reconstructor_success = map_re_data
16.207 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.208 - => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
16.209 -
16.210 -val inc_reconstructor_nontriv_calls = map_re_data
16.211 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.212 - => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
16.213 -
16.214 -val inc_reconstructor_nontriv_success = map_re_data
16.215 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.216 - => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
16.217 -
16.218 -val inc_reconstructor_proofs = map_re_data
16.219 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.220 - => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
16.221 -
16.222 -fun inc_reconstructor_time m t = map_re_data
16.223 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.224 - => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m
16.225 -
16.226 -val inc_reconstructor_timeout = map_re_data
16.227 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.228 - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
16.229 -
16.230 -fun inc_reconstructor_lemmas m n = map_re_data
16.231 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.232 - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m
16.233 -
16.234 -fun inc_reconstructor_posns m pos = map_re_data
16.235 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
16.236 - => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m
16.237 -
16.238 -val str0 = string_of_int o the_default 0
16.239 -
16.240 -local
16.241 -
16.242 -val str = string_of_int
16.243 -val str3 = Real.fmt (StringCvt.FIX (SOME 3))
16.244 -fun percentage a b = string_of_int (a * 100 div b)
16.245 -fun time t = Real.fromInt t / 1000.0
16.246 -fun avg_time t n =
16.247 - if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
16.248 -
16.249 -fun log_sh_data log
16.250 - (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
16.251 - (log ("Total number of sledgehammer calls: " ^ str calls);
16.252 - log ("Number of successful sledgehammer calls: " ^ str success);
16.253 - log ("Number of sledgehammer lemmas: " ^ str lemmas);
16.254 - log ("Max number of sledgehammer lemmas: " ^ str max_lems);
16.255 - log ("Success rate: " ^ percentage success calls ^ "%");
16.256 - log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
16.257 - log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
16.258 - log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
16.259 - log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
16.260 - log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
16.261 - log ("Average time for sledgehammer calls (Isabelle): " ^
16.262 - str3 (avg_time time_isa calls));
16.263 - log ("Average time for successful sledgehammer calls (ATP): " ^
16.264 - str3 (avg_time time_prover success));
16.265 - log ("Average time for failed sledgehammer calls (ATP): " ^
16.266 - str3 (avg_time time_prover_fail (calls - success)))
16.267 - )
16.268 -
16.269 -fun str_of_pos (pos, triv) =
16.270 - str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^
16.271 - (if triv then "[T]" else "")
16.272 -
16.273 -fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
16.274 - re_nontriv_success, re_proofs, re_time, re_timeout,
16.275 - (lemmas, lems_sos, lems_max), re_posns) =
16.276 - (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls);
16.277 - log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^
16.278 - " (proof: " ^ str re_proofs ^ ")");
16.279 - log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout);
16.280 - log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
16.281 - log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls);
16.282 - log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^
16.283 - " (proof: " ^ str re_proofs ^ ")");
16.284 - log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas);
16.285 - log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos);
16.286 - log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max);
16.287 - log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time));
16.288 - log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^
16.289 - str3 (avg_time re_time re_success));
16.290 - if tag=""
16.291 - then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
16.292 - else ()
16.293 - )
16.294 -
16.295 -fun log_min_data log (succs, ab_ratios) =
16.296 - (log ("Number of successful minimizations: " ^ string_of_int succs);
16.297 - log ("After/before ratios: " ^ string_of_int ab_ratios)
16.298 - )
16.299 -
16.300 -in
16.301 -
16.302 -fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
16.303 - let
16.304 - val ShData {calls=sh_calls, ...} = sh
16.305 -
16.306 - fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
16.307 - fun log_re tag m =
16.308 - log_re_data log tag sh_calls (tuple_of_re_data m)
16.309 - fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () =>
16.310 - (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2)))
16.311 - in
16.312 - if sh_calls > 0
16.313 - then
16.314 - (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
16.315 - log_sh_data log (tuple_of_sh_data sh);
16.316 - log "";
16.317 - if not mini
16.318 - then log_reconstructor ("", re_u) ("fully-typed ", re_uft)
16.319 - else
16.320 - app_if re_u (fn () =>
16.321 - (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft);
16.322 - log "";
16.323 - app_if re_m (fn () =>
16.324 - (log_min_data log (tuple_of_min_data min); log "";
16.325 - log_reconstructor ("", re_m) ("fully-typed ", re_mft))))))
16.326 - else ()
16.327 - end
16.328 -
16.329 -end
16.330 -
16.331 -
16.332 -(* Warning: we implicitly assume single-threaded execution here! *)
16.333 -val data = Unsynchronized.ref ([] : (int * data) list)
16.334 -
16.335 -fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
16.336 -fun done id ({log, ...}: Mirabelle.done_args) =
16.337 - AList.lookup (op =) (!data) id
16.338 - |> Option.map (log_data id log)
16.339 - |> K ()
16.340 -
16.341 -fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
16.342 -
16.343 -
16.344 -fun get_prover ctxt args =
16.345 - let
16.346 - fun default_prover_name () =
16.347 - hd (#provers (Sledgehammer_Isar.default_params ctxt []))
16.348 - handle List.Empty => error "No ATP available."
16.349 - fun get_prover name =
16.350 - (name, Sledgehammer_Run.get_minimizing_prover ctxt
16.351 - Sledgehammer_Provers.Normal name)
16.352 - in
16.353 - (case AList.lookup (op =) args proverK of
16.354 - SOME name => get_prover name
16.355 - | NONE => get_prover (default_prover_name ()))
16.356 - end
16.357 -
16.358 -type stature = ATP_Problem_Generate.stature
16.359 -
16.360 -(* hack *)
16.361 -fun reconstructor_from_msg args msg =
16.362 - (case AList.lookup (op =) args reconstructorK of
16.363 - SOME name => name
16.364 - | NONE =>
16.365 - if String.isSubstring "metis (" msg then
16.366 - msg |> Substring.full
16.367 - |> Substring.position "metis ("
16.368 - |> snd |> Substring.position ")"
16.369 - |> fst |> Substring.string
16.370 - |> suffix ")"
16.371 - else if String.isSubstring "metis" msg then
16.372 - "metis"
16.373 - else
16.374 - "smt")
16.375 -
16.376 -local
16.377 -
16.378 -datatype sh_result =
16.379 - SH_OK of int * int * (string * stature) list |
16.380 - SH_FAIL of int * int |
16.381 - SH_ERROR
16.382 -
16.383 -fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans
16.384 - uncurried_aliases e_selection_heuristic term_order force_sos
16.385 - hard_timeout timeout preplay_timeout sh_minimizeLST
16.386 - max_new_mono_instancesLST max_mono_itersLST dir pos st =
16.387 - let
16.388 - val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
16.389 - val i = 1
16.390 - fun set_file_name (SOME dir) =
16.391 - Config.put Sledgehammer_Provers.dest_dir dir
16.392 - #> Config.put Sledgehammer_Provers.problem_prefix
16.393 - ("prob_" ^ str0 (Position.line_of pos) ^ "__")
16.394 - #> Config.put SMT_Config.debug_files
16.395 - (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_"
16.396 - ^ serial_string ())
16.397 - | set_file_name NONE = I
16.398 - val st' =
16.399 - st
16.400 - |> Proof.map_context
16.401 - (set_file_name dir
16.402 - #> (Option.map (Config.put ATP_Systems.e_selection_heuristic)
16.403 - e_selection_heuristic |> the_default I)
16.404 - #> (Option.map (Config.put ATP_Systems.term_order)
16.405 - term_order |> the_default I)
16.406 - #> (Option.map (Config.put ATP_Systems.force_sos)
16.407 - force_sos |> the_default I))
16.408 - val params as {relevance_thresholds, max_relevant, slice, ...} =
16.409 - Sledgehammer_Isar.default_params ctxt
16.410 - ([("verbose", "true"),
16.411 - ("type_enc", type_enc),
16.412 - ("strict", strict),
16.413 - ("lam_trans", lam_trans |> the_default "smart"),
16.414 - ("uncurried_aliases", uncurried_aliases |> the_default "smart"),
16.415 - ("max_relevant", max_relevant),
16.416 - ("slice", slice),
16.417 - ("timeout", string_of_int timeout),
16.418 - ("preplay_timeout", preplay_timeout)]
16.419 - |> sh_minimizeLST (*don't confuse the two minimization flags*)
16.420 - |> max_new_mono_instancesLST
16.421 - |> max_mono_itersLST)
16.422 - val default_max_relevant =
16.423 - Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
16.424 - prover_name
16.425 - val is_appropriate_prop =
16.426 - Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name
16.427 - val is_built_in_const =
16.428 - Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name
16.429 - val relevance_fudge =
16.430 - Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name
16.431 - val relevance_override = {add = [], del = [], only = false}
16.432 - val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i
16.433 - val time_limit =
16.434 - (case hard_timeout of
16.435 - NONE => I
16.436 - | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs))
16.437 - fun failed failure =
16.438 - ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime,
16.439 - preplay =
16.440 - K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis),
16.441 - message = K "", message_tail = ""}, ~1)
16.442 - val ({outcome, used_facts, run_time, preplay, message, message_tail}
16.443 - : Sledgehammer_Provers.prover_result,
16.444 - time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
16.445 - let
16.446 - val _ = if is_appropriate_prop concl_t then ()
16.447 - else raise Fail "inappropriate"
16.448 - val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name
16.449 - val facts =
16.450 - Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
16.451 - chained_ths hyp_ts concl_t
16.452 - |> filter (is_appropriate_prop o prop_of o snd)
16.453 - |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
16.454 - (the_default default_max_relevant max_relevant)
16.455 - is_built_in_const relevance_fudge relevance_override
16.456 - chained_ths hyp_ts concl_t
16.457 - val problem =
16.458 - {state = st', goal = goal, subgoal = i,
16.459 - subgoal_count = Sledgehammer_Util.subgoal_count st,
16.460 - facts = facts |> map Sledgehammer_Provers.Untranslated_Fact,
16.461 - smt_filter = NONE}
16.462 - in prover params (K (K (K ""))) problem end)) ()
16.463 - handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut
16.464 - | Fail "inappropriate" => failed ATP_Proof.Inappropriate
16.465 - val time_prover = run_time |> Time.toMilliseconds
16.466 - val msg = message (preplay ()) ^ message_tail
16.467 - in
16.468 - case outcome of
16.469 - NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
16.470 - | SOME _ => (msg, SH_FAIL (time_isa, time_prover))
16.471 - end
16.472 - handle ERROR msg => ("error: " ^ msg, SH_ERROR)
16.473 -
16.474 -fun thms_of_name ctxt name =
16.475 - let
16.476 - val lex = Keyword.get_lexicons
16.477 - val get = maps (Proof_Context.get_fact ctxt o fst)
16.478 - in
16.479 - Source.of_string name
16.480 - |> Symbol.source
16.481 - |> Token.source {do_recover=SOME false} lex Position.start
16.482 - |> Token.source_proper
16.483 - |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE
16.484 - |> Source.exhaust
16.485 - end
16.486 -
16.487 -in
16.488 -
16.489 -fun run_sledgehammer trivial args reconstructor named_thms id
16.490 - ({pre=st, log, pos, ...}: Mirabelle.run_args) =
16.491 - let
16.492 - val triv_str = if trivial then "[T] " else ""
16.493 - val _ = change_data id inc_sh_calls
16.494 - val _ = if trivial then () else change_data id inc_sh_nontriv_calls
16.495 - val (prover_name, prover) = get_prover (Proof.context_of st) args
16.496 - val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
16.497 - val strict = AList.lookup (op =) args strictK |> the_default "false"
16.498 - val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart"
16.499 - val slice = AList.lookup (op =) args sliceK |> the_default "true"
16.500 - val lam_trans = AList.lookup (op =) args lam_transK
16.501 - val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK
16.502 - val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK
16.503 - val term_order = AList.lookup (op =) args term_orderK
16.504 - val force_sos = AList.lookup (op =) args force_sosK
16.505 - |> Option.map (curry (op <>) "false")
16.506 - val dir = AList.lookup (op =) args keepK
16.507 - val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
16.508 - (* always use a hard timeout, but give some slack so that the automatic
16.509 - minimizer has a chance to do its magic *)
16.510 - val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
16.511 - |> the_default preplay_timeout_default
16.512 - val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
16.513 - val max_new_mono_instancesLST =
16.514 - available_parameter args max_new_mono_instancesK max_new_mono_instancesK
16.515 - val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
16.516 - val hard_timeout = SOME (2 * timeout)
16.517 - val (msg, result) =
16.518 - run_sh prover_name prover type_enc strict max_relevant slice lam_trans
16.519 - uncurried_aliases e_selection_heuristic term_order force_sos
16.520 - hard_timeout timeout preplay_timeout sh_minimizeLST
16.521 - max_new_mono_instancesLST max_mono_itersLST dir pos st
16.522 - in
16.523 - case result of
16.524 - SH_OK (time_isa, time_prover, names) =>
16.525 - let
16.526 - fun get_thms (name, stature) =
16.527 - try (thms_of_name (Proof.context_of st)) name
16.528 - |> Option.map (pair (name, stature))
16.529 - in
16.530 - change_data id inc_sh_success;
16.531 - if trivial then () else change_data id inc_sh_nontriv_success;
16.532 - change_data id (inc_sh_lemmas (length names));
16.533 - change_data id (inc_sh_max_lems (length names));
16.534 - change_data id (inc_sh_time_isa time_isa);
16.535 - change_data id (inc_sh_time_prover time_prover);
16.536 - reconstructor := reconstructor_from_msg args msg;
16.537 - named_thms := SOME (map_filter get_thms names);
16.538 - log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
16.539 - string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
16.540 - end
16.541 - | SH_FAIL (time_isa, time_prover) =>
16.542 - let
16.543 - val _ = change_data id (inc_sh_time_isa time_isa)
16.544 - val _ = change_data id (inc_sh_time_prover_fail time_prover)
16.545 - in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
16.546 - | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg)
16.547 - end
16.548 -
16.549 -end
16.550 -
16.551 -fun run_minimize args reconstructor named_thms id
16.552 - ({pre=st, log, ...}: Mirabelle.run_args) =
16.553 - let
16.554 - val ctxt = Proof.context_of st
16.555 - val n0 = length (these (!named_thms))
16.556 - val (prover_name, _) = get_prover ctxt args
16.557 - val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
16.558 - val strict = AList.lookup (op =) args strictK |> the_default "false"
16.559 - val timeout =
16.560 - AList.lookup (op =) args minimize_timeoutK
16.561 - |> Option.map (fst o read_int o raw_explode) (* FIXME Symbol.explode (?) *)
16.562 - |> the_default 5
16.563 - val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
16.564 - |> the_default preplay_timeout_default
16.565 - val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
16.566 - val max_new_mono_instancesLST =
16.567 - available_parameter args max_new_mono_instancesK max_new_mono_instancesK
16.568 - val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
16.569 - val params = Sledgehammer_Isar.default_params ctxt
16.570 - ([("provers", prover_name),
16.571 - ("verbose", "true"),
16.572 - ("type_enc", type_enc),
16.573 - ("strict", strict),
16.574 - ("timeout", string_of_int timeout),
16.575 - ("preplay_timeout", preplay_timeout)]
16.576 - |> sh_minimizeLST (*don't confuse the two minimization flags*)
16.577 - |> max_new_mono_instancesLST
16.578 - |> max_mono_itersLST)
16.579 - val minimize =
16.580 - Sledgehammer_Minimize.minimize_facts prover_name params
16.581 - true 1 (Sledgehammer_Util.subgoal_count st)
16.582 - val _ = log separator
16.583 - val (used_facts, (preplay, message, message_tail)) =
16.584 - minimize st (these (!named_thms))
16.585 - val msg = message (preplay ()) ^ message_tail
16.586 - in
16.587 - case used_facts of
16.588 - SOME named_thms' =>
16.589 - (change_data id inc_min_succs;
16.590 - change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0));
16.591 - if length named_thms' = n0
16.592 - then log (minimize_tag id ^ "already minimal")
16.593 - else (reconstructor := reconstructor_from_msg args msg;
16.594 - named_thms := SOME named_thms';
16.595 - log (minimize_tag id ^ "succeeded:\n" ^ msg))
16.596 - )
16.597 - | NONE => log (minimize_tag id ^ "failed: " ^ msg)
16.598 - end
16.599 -
16.600 -fun override_params prover type_enc timeout =
16.601 - [("provers", prover),
16.602 - ("max_relevant", "0"),
16.603 - ("type_enc", type_enc),
16.604 - ("strict", "true"),
16.605 - ("slice", "false"),
16.606 - ("timeout", timeout |> Time.toSeconds |> string_of_int)]
16.607 -
16.608 -fun run_reconstructor trivial full m name reconstructor named_thms id
16.609 - ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
16.610 - let
16.611 - fun do_reconstructor named_thms ctxt =
16.612 - let
16.613 - val ref_of_str =
16.614 - suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm
16.615 - #> fst
16.616 - val thms = named_thms |> maps snd
16.617 - val facts = named_thms |> map (ref_of_str o fst o fst)
16.618 - val relevance_override = {add = facts, del = [], only = true}
16.619 - fun my_timeout time_slice =
16.620 - timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal
16.621 - fun sledge_tac time_slice prover type_enc =
16.622 - Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
16.623 - (override_params prover type_enc (my_timeout time_slice))
16.624 - relevance_override
16.625 - in
16.626 - if !reconstructor = "sledgehammer_tac" then
16.627 - sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native"
16.628 - ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??"
16.629 - ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??"
16.630 - ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags"
16.631 - ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN
16.632 - ctxt thms
16.633 - else if !reconstructor = "smt" then
16.634 - SMT_Solver.smt_tac ctxt thms
16.635 - else if full then
16.636 - Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN]
16.637 - ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms
16.638 - else if String.isPrefix "metis (" (!reconstructor) then
16.639 - let
16.640 - val (type_encs, lam_trans) =
16.641 - !reconstructor
16.642 - |> Outer_Syntax.scan Position.start
16.643 - |> filter Token.is_proper |> tl
16.644 - |> Metis_Tactic.parse_metis_options |> fst
16.645 - |>> the_default [ATP_Proof_Reconstruct.partial_typesN]
16.646 - ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans
16.647 - in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end
16.648 - else if !reconstructor = "metis" then
16.649 - Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt
16.650 - thms
16.651 - else
16.652 - K all_tac
16.653 - end
16.654 - fun apply_reconstructor named_thms =
16.655 - Mirabelle.can_apply timeout (do_reconstructor named_thms) st
16.656 -
16.657 - fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
16.658 - | with_time (true, t) = (change_data id (inc_reconstructor_success m);
16.659 - if trivial then ()
16.660 - else change_data id (inc_reconstructor_nontriv_success m);
16.661 - change_data id (inc_reconstructor_lemmas m (length named_thms));
16.662 - change_data id (inc_reconstructor_time m t);
16.663 - change_data id (inc_reconstructor_posns m (pos, trivial));
16.664 - if name = "proof" then change_data id (inc_reconstructor_proofs m)
16.665 - else ();
16.666 - "succeeded (" ^ string_of_int t ^ ")")
16.667 - fun timed_reconstructor named_thms =
16.668 - (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true)
16.669 - handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m);
16.670 - ("timeout", false))
16.671 - | ERROR msg => ("error: " ^ msg, false)
16.672 -
16.673 - val _ = log separator
16.674 - val _ = change_data id (inc_reconstructor_calls m)
16.675 - val _ = if trivial then ()
16.676 - else change_data id (inc_reconstructor_nontriv_calls m)
16.677 - in
16.678 - named_thms
16.679 - |> timed_reconstructor
16.680 - |>> log o prefix (reconstructor_tag reconstructor id)
16.681 - |> snd
16.682 - end
16.683 -
16.684 -val try_timeout = seconds 5.0
16.685 -
16.686 -(* crude hack *)
16.687 -val num_sledgehammer_calls = Unsynchronized.ref 0
16.688 -
16.689 -fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
16.690 - let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
16.691 - if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
16.692 - then () else
16.693 - let
16.694 - val max_calls =
16.695 - AList.lookup (op =) args max_callsK |> the_default "10000000"
16.696 - |> Int.fromString |> the
16.697 - val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
16.698 - in
16.699 - if !num_sledgehammer_calls > max_calls then ()
16.700 - else
16.701 - let
16.702 - val reconstructor = Unsynchronized.ref ""
16.703 - val named_thms =
16.704 - Unsynchronized.ref (NONE : ((string * stature) * thm list) list option)
16.705 - val minimize = AList.defined (op =) args minimizeK
16.706 - val metis_ft = AList.defined (op =) args metis_ftK
16.707 - val trivial =
16.708 - if AList.lookup (op =) args check_trivialK |> the_default "false"
16.709 - |> Bool.fromString |> the then
16.710 - Try0.try0 (SOME try_timeout) ([], [], [], []) pre
16.711 - handle TimeLimit.TimeOut => false
16.712 - else false
16.713 - fun apply_reconstructor m1 m2 =
16.714 - if metis_ft
16.715 - then
16.716 - if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false
16.717 - (run_reconstructor trivial false m1 name reconstructor
16.718 - (these (!named_thms))) id st)
16.719 - then
16.720 - (Mirabelle.catch_result (reconstructor_tag reconstructor) false
16.721 - (run_reconstructor trivial true m2 name reconstructor
16.722 - (these (!named_thms))) id st; ())
16.723 - else ()
16.724 - else
16.725 - (Mirabelle.catch_result (reconstructor_tag reconstructor) false
16.726 - (run_reconstructor trivial false m1 name reconstructor
16.727 - (these (!named_thms))) id st; ())
16.728 - in
16.729 - change_data id (set_mini minimize);
16.730 - Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor
16.731 - named_thms) id st;
16.732 - if is_some (!named_thms)
16.733 - then
16.734 - (apply_reconstructor Unminimized UnminimizedFT;
16.735 - if minimize andalso not (null (these (!named_thms)))
16.736 - then
16.737 - (Mirabelle.catch minimize_tag
16.738 - (run_minimize args reconstructor named_thms) id st;
16.739 - apply_reconstructor Minimized MinimizedFT)
16.740 - else ())
16.741 - else ()
16.742 - end
16.743 - end
16.744 - end
16.745 -
16.746 -fun invoke args =
16.747 - Mirabelle.register (init, sledgehammer_action args, done)
16.748 -
16.749 -end
17.1 --- a/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML Sat Apr 14 23:34:18 2012 +0200
17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
17.3 @@ -1,198 +0,0 @@
17.4 -(* Title: HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML
17.5 - Author: Jasmin Blanchette, TU Munich
17.6 -*)
17.7 -
17.8 -structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION =
17.9 -struct
17.10 -
17.11 -fun get args name default_value =
17.12 - case AList.lookup (op =) args name of
17.13 - SOME value => the (Real.fromString value)
17.14 - | NONE => default_value
17.15 -
17.16 -fun extract_relevance_fudge args
17.17 - {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight,
17.18 - abs_rel_weight, abs_irrel_weight, skolem_irrel_weight,
17.19 - theory_const_rel_weight, theory_const_irrel_weight,
17.20 - chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus,
17.21 - local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp,
17.22 - threshold_divisor, ridiculous_threshold} =
17.23 - {local_const_multiplier =
17.24 - get args "local_const_multiplier" local_const_multiplier,
17.25 - worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq,
17.26 - higher_order_irrel_weight =
17.27 - get args "higher_order_irrel_weight" higher_order_irrel_weight,
17.28 - abs_rel_weight = get args "abs_rel_weight" abs_rel_weight,
17.29 - abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight,
17.30 - skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight,
17.31 - theory_const_rel_weight =
17.32 - get args "theory_const_rel_weight" theory_const_rel_weight,
17.33 - theory_const_irrel_weight =
17.34 - get args "theory_const_irrel_weight" theory_const_irrel_weight,
17.35 - chained_const_irrel_weight =
17.36 - get args "chained_const_irrel_weight" chained_const_irrel_weight,
17.37 - intro_bonus = get args "intro_bonus" intro_bonus,
17.38 - elim_bonus = get args "elim_bonus" elim_bonus,
17.39 - simp_bonus = get args "simp_bonus" simp_bonus,
17.40 - local_bonus = get args "local_bonus" local_bonus,
17.41 - assum_bonus = get args "assum_bonus" assum_bonus,
17.42 - chained_bonus = get args "chained_bonus" chained_bonus,
17.43 - max_imperfect = get args "max_imperfect" max_imperfect,
17.44 - max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp,
17.45 - threshold_divisor = get args "threshold_divisor" threshold_divisor,
17.46 - ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold}
17.47 -
17.48 -structure Prooftab =
17.49 - Table(type key = int * int val ord = prod_ord int_ord int_ord)
17.50 -
17.51 -val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table)
17.52 -
17.53 -val num_successes = Unsynchronized.ref ([] : (int * int) list)
17.54 -val num_failures = Unsynchronized.ref ([] : (int * int) list)
17.55 -val num_found_proofs = Unsynchronized.ref ([] : (int * int) list)
17.56 -val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list)
17.57 -val num_found_facts = Unsynchronized.ref ([] : (int * int) list)
17.58 -val num_lost_facts = Unsynchronized.ref ([] : (int * int) list)
17.59 -
17.60 -fun get id c = the_default 0 (AList.lookup (op =) (!c) id)
17.61 -fun add id c n =
17.62 - c := (case AList.lookup (op =) (!c) id of
17.63 - SOME m => AList.update (op =) (id, m + n) (!c)
17.64 - | NONE => (id, n) :: !c)
17.65 -
17.66 -fun init proof_file _ thy =
17.67 - let
17.68 - fun do_line line =
17.69 - case line |> space_explode ":" of
17.70 - [line_num, offset, proof] =>
17.71 - SOME (pairself (the o Int.fromString) (line_num, offset),
17.72 - proof |> space_explode " " |> filter_out (curry (op =) ""))
17.73 - | _ => NONE
17.74 - val proofs = File.read (Path.explode proof_file)
17.75 - val proof_tab =
17.76 - proofs |> space_explode "\n"
17.77 - |> map_filter do_line
17.78 - |> AList.coalesce (op =)
17.79 - |> Prooftab.make
17.80 - in proof_table := proof_tab; thy end
17.81 -
17.82 -fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b)
17.83 -fun percentage_alt a b = percentage a (a + b)
17.84 -
17.85 -fun done id ({log, ...} : Mirabelle.done_args) =
17.86 - if get id num_successes + get id num_failures > 0 then
17.87 - (log "";
17.88 - log ("Number of overall successes: " ^
17.89 - string_of_int (get id num_successes));
17.90 - log ("Number of overall failures: " ^ string_of_int (get id num_failures));
17.91 - log ("Overall success rate: " ^
17.92 - percentage_alt (get id num_successes) (get id num_failures) ^ "%");
17.93 - log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs));
17.94 - log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs));
17.95 - log ("Proof found rate: " ^
17.96 - percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^
17.97 - "%");
17.98 - log ("Number of found facts: " ^ string_of_int (get id num_found_facts));
17.99 - log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts));
17.100 - log ("Fact found rate: " ^
17.101 - percentage_alt (get id num_found_facts) (get id num_lost_facts) ^
17.102 - "%"))
17.103 - else
17.104 - ()
17.105 -
17.106 -val default_prover = ATP_Systems.eN (* arbitrary ATP *)
17.107 -
17.108 -fun with_index (i, s) = s ^ "@" ^ string_of_int i
17.109 -
17.110 -fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) =
17.111 - case (Position.line_of pos, Position.offset_of pos) of
17.112 - (SOME line_num, SOME offset) =>
17.113 - (case Prooftab.lookup (!proof_table) (line_num, offset) of
17.114 - SOME proofs =>
17.115 - let
17.116 - val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre
17.117 - val prover = AList.lookup (op =) args "prover"
17.118 - |> the_default default_prover
17.119 - val {relevance_thresholds, max_relevant, slice, ...} =
17.120 - Sledgehammer_Isar.default_params ctxt args
17.121 - val default_max_relevant =
17.122 - Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
17.123 - prover
17.124 - val is_appropriate_prop =
17.125 - Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt
17.126 - default_prover
17.127 - val is_built_in_const =
17.128 - Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover
17.129 - val relevance_fudge =
17.130 - extract_relevance_fudge args
17.131 - (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover)
17.132 - val relevance_override = {add = [], del = [], only = false}
17.133 - val subgoal = 1
17.134 - val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal
17.135 - val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover
17.136 - val facts =
17.137 - Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
17.138 - chained_ths hyp_ts concl_t
17.139 - |> filter (is_appropriate_prop o prop_of o snd)
17.140 - |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
17.141 - (the_default default_max_relevant max_relevant)
17.142 - is_built_in_const relevance_fudge relevance_override
17.143 - chained_ths hyp_ts concl_t
17.144 - |> map (fst o fst)
17.145 - val (found_facts, lost_facts) =
17.146 - flat proofs |> sort_distinct string_ord
17.147 - |> map (fn fact => (find_index (curry (op =) fact) facts, fact))
17.148 - |> List.partition (curry (op <=) 0 o fst)
17.149 - |>> sort (prod_ord int_ord string_ord) ||> map snd
17.150 - val found_proofs = filter (forall (member (op =) facts)) proofs
17.151 - val n = length found_proofs
17.152 - val _ =
17.153 - if n = 0 then
17.154 - (add id num_failures 1; log "Failure")
17.155 - else
17.156 - (add id num_successes 1;
17.157 - add id num_found_proofs n;
17.158 - log ("Success (" ^ string_of_int n ^ " of " ^
17.159 - string_of_int (length proofs) ^ " proofs)"))
17.160 - val _ = add id num_lost_proofs (length proofs - n)
17.161 - val _ = add id num_found_facts (length found_facts)
17.162 - val _ = add id num_lost_facts (length lost_facts)
17.163 - val _ =
17.164 - if null found_facts then
17.165 - ()
17.166 - else
17.167 - let
17.168 - val found_weight =
17.169 - Real.fromInt (fold (fn (n, _) =>
17.170 - Integer.add (n * n)) found_facts 0)
17.171 - / Real.fromInt (length found_facts)
17.172 - |> Math.sqrt |> Real.ceil
17.173 - in
17.174 - log ("Found facts (among " ^ string_of_int (length facts) ^
17.175 - ", weight " ^ string_of_int found_weight ^ "): " ^
17.176 - commas (map with_index found_facts))
17.177 - end
17.178 - val _ = if null lost_facts then
17.179 - ()
17.180 - else
17.181 - log ("Lost facts (among " ^ string_of_int (length facts) ^
17.182 - "): " ^ commas lost_facts)
17.183 - in () end
17.184 - | NONE => log "No known proof")
17.185 - | _ => ()
17.186 -
17.187 -val proof_fileK = "proof_file"
17.188 -
17.189 -fun invoke args =
17.190 - let
17.191 - val (pf_args, other_args) =
17.192 - args |> List.partition (curry (op =) proof_fileK o fst)
17.193 - val proof_file = case pf_args of
17.194 - [] => error "No \"proof_file\" specified"
17.195 - | (_, s) :: _ => s
17.196 - in Mirabelle.register (init proof_file, action other_args, done) end
17.197 -
17.198 -end;
17.199 -
17.200 -(* Workaround to keep the "mirabelle.pl" script happy *)
17.201 -structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter;
18.1 --- a/src/HOL/Mirabelle/lib/Tools/mirabelle Sat Apr 14 23:34:18 2012 +0200
18.2 +++ b/src/HOL/Mirabelle/lib/Tools/mirabelle Sat Apr 14 23:52:17 2012 +0100
18.3 @@ -8,10 +8,10 @@
18.4 PRG="$(basename "$0")"
18.5
18.6 function print_action_names() {
18.7 - TOOLS="$MIRABELLE_HOME/Tools/mirabelle_*.ML"
18.8 - for NAME in $TOOLS
18.9 + ACTIONS="$MIRABELLE_HOME/Actions/mirabelle_*.ML"
18.10 + for ACTION in $ACTIONS
18.11 do
18.12 - echo $NAME | sed 's/.*mirabelle_\(.*\)\.ML/ \1/'
18.13 + echo $ACTION | sed 's/.*mirabelle_\(.*\)\.ML/ \1/'
18.14 done
18.15 }
18.16
19.1 --- a/src/HOL/Mirabelle/lib/scripts/mirabelle.pl Sat Apr 14 23:34:18 2012 +0200
19.2 +++ b/src/HOL/Mirabelle/lib/scripts/mirabelle.pl Sat Apr 14 23:52:17 2012 +0100
19.3 @@ -46,7 +46,7 @@
19.4 my @action_names;
19.5 foreach (split(/:/, $actions)) {
19.6 if (m/([^[]*)/) {
19.7 - push @action_files, "\"$mirabelle_home/Tools/mirabelle_$1.ML\"";
19.8 + push @action_files, "\"$mirabelle_home/Actions/mirabelle_$1.ML\"";
19.9 push @action_names, $1;
19.10 }
19.11 }