author sultana

Tue, 24 Apr 2012 13:59:29 +0100

changeset 48595 15f4309bb9eb

parent 48594 44d1f4e0a46e

child 48596 39c5a1a80acc

src/HOL/IsaMakefile file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_arith.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_metis.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_refute.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Mirabelle.thy file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Mirabelle_Test.thy file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_arith.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_metis.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_refute.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/lib/Tools/mirabelle file | annotate | diff | comparison | revisions

src/HOL/Mirabelle/lib/scripts/mirabelle.pl file | annotate | diff | comparison | revisions
     1.1 --- a/src/HOL/IsaMakefile	Tue Apr 24 13:55:02 2012 +0100
     1.2 +++ b/src/HOL/IsaMakefile	Tue Apr 24 13:59:29 2012 +0100
     1.3 @@ -1326,13 +1326,13 @@
     1.4  HOL-Mirabelle: HOL $(LOG)/HOL-Mirabelle.gz
     1.5  
     1.6  $(LOG)/HOL-Mirabelle.gz: $(OUT)/HOL Mirabelle/Mirabelle_Test.thy \
     1.7 -  Mirabelle/Mirabelle.thy Mirabelle/Actions/mirabelle.ML \
     1.8 -  Mirabelle/ROOT.ML Mirabelle/Actions/mirabelle_arith.ML \
     1.9 -  Mirabelle/Actions/mirabelle_metis.ML \
    1.10 -  Mirabelle/Actions/mirabelle_quickcheck.ML \
    1.11 -  Mirabelle/Actions/mirabelle_refute.ML	\
    1.12 -  Mirabelle/Actions/mirabelle_sledgehammer.ML \
    1.13 -  Mirabelle/Actions/mirabelle_sledgehammer_filter.ML \
    1.14 +  Mirabelle/Mirabelle.thy Mirabelle/Tools/mirabelle.ML \
    1.15 +  Mirabelle/ROOT.ML Mirabelle/Tools/mirabelle_arith.ML \
    1.16 +  Mirabelle/Tools/mirabelle_metis.ML \
    1.17 +  Mirabelle/Tools/mirabelle_quickcheck.ML \
    1.18 +  Mirabelle/Tools/mirabelle_refute.ML	\
    1.19 +  Mirabelle/Tools/mirabelle_sledgehammer.ML \
    1.20 +  Mirabelle/Tools/mirabelle_sledgehammer_filter.ML \
    1.21    ex/sledgehammer_tactics.ML Mirabelle/lib/Tools/mirabelle \
    1.22    Mirabelle/lib/scripts/mirabelle.pl Library/FrechetDeriv.thy \
    1.23    Library/Inner_Product.thy

     2.1 --- a/src/HOL/Mirabelle/Actions/mirabelle.ML	Tue Apr 24 13:55:02 2012 +0100
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,213 +0,0 @@
     2.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle.ML
     2.5 -    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
     2.6 -*)
     2.7 -
     2.8 -signature MIRABELLE =
     2.9 -sig
    2.10 -  (*configuration*)
    2.11 -  val logfile : string Config.T
    2.12 -  val timeout : int Config.T
    2.13 -  val start_line : int Config.T
    2.14 -  val end_line : int Config.T
    2.15 -
    2.16 -  (*core*)
    2.17 -  type init_action = int -> theory -> theory
    2.18 -  type done_args = {last: Toplevel.state, log: string -> unit}
    2.19 -  type done_action = int -> done_args -> unit
    2.20 -  type run_args = {pre: Proof.state, post: Toplevel.state option,
    2.21 -    timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
    2.22 -  type run_action = int -> run_args -> unit
    2.23 -  type action = init_action * run_action * done_action
    2.24 -  val catch : (int -> string) -> run_action -> run_action
    2.25 -  val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) ->
    2.26 -    int -> run_args -> 'a
    2.27 -  val register : action -> theory -> theory
    2.28 -  val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state ->
    2.29 -    unit
    2.30 -
    2.31 -  (*utility functions*)
    2.32 -  val can_apply : Time.time -> (Proof.context -> int -> tactic) ->
    2.33 -    Proof.state -> bool
    2.34 -  val theorems_in_proof_term : thm -> thm list
    2.35 -  val theorems_of_sucessful_proof : Toplevel.state option -> thm list
    2.36 -  val get_setting : (string * string) list -> string * string -> string
    2.37 -  val get_int_setting : (string * string) list -> string * int -> int
    2.38 -  val cpu_time : ('a -> 'b) -> 'a -> 'b * int
    2.39 -end
    2.40 -
    2.41 -
    2.42 -
    2.43 -structure Mirabelle : MIRABELLE =
    2.44 -struct
    2.45 -
    2.46 -(* Mirabelle configuration *)
    2.47 -
    2.48 -val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "")
    2.49 -val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30)
    2.50 -val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0)
    2.51 -val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1)
    2.52 -
    2.53 -
    2.54 -(* Mirabelle core *)
    2.55 -
    2.56 -type init_action = int -> theory -> theory
    2.57 -type done_args = {last: Toplevel.state, log: string -> unit}
    2.58 -type done_action = int -> done_args -> unit
    2.59 -type run_args = {pre: Proof.state, post: Toplevel.state option,
    2.60 -  timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
    2.61 -type run_action = int -> run_args -> unit
    2.62 -type action = init_action * run_action * done_action
    2.63 -
    2.64 -structure Actions = Theory_Data
    2.65 -(
    2.66 -  type T = (int * run_action * done_action) list
    2.67 -  val empty = []
    2.68 -  val extend = I
    2.69 -  fun merge data = Library.merge (K true) data  (* FIXME potential data loss because of (K true) *)
    2.70 -)
    2.71 -
    2.72 -
    2.73 -fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e)
    2.74 -
    2.75 -fun catch tag f id (st as {log, ...}: run_args) = (f id st; ())
    2.76 -  handle exn =>
    2.77 -    if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ())
    2.78 -
    2.79 -fun catch_result tag d f id (st as {log, ...}: run_args) = f id st
    2.80 -  handle exn =>
    2.81 -    if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d)
    2.82 -
    2.83 -fun register (init, run, done) thy =
    2.84 -  let val id = length (Actions.get thy) + 1
    2.85 -  in
    2.86 -    thy
    2.87 -    |> init id
    2.88 -    |> Actions.map (cons (id, run, done))
    2.89 -  end
    2.90 -
    2.91 -local
    2.92 -
    2.93 -fun log thy s =
    2.94 -  let fun append_to n = if n = "" then K () else File.append (Path.explode n)
    2.95 -  in append_to (Config.get_global thy logfile) (s ^ "\n") end
    2.96 -  (* FIXME: with multithreading and parallel proofs enabled, we might need to
    2.97 -     encapsulate this inside a critical section *)
    2.98 -
    2.99 -fun log_sep thy = log thy "------------------"
   2.100 -
   2.101 -fun apply_actions thy pos name info (pre, post, time) actions =
   2.102 -  let
   2.103 -    fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name}
   2.104 -    fun run (id, run, _) = (apply (run id); log_sep thy)
   2.105 -  in (log thy info; log_sep thy; List.app run actions) end
   2.106 -
   2.107 -fun in_range _ _ NONE = true
   2.108 -  | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r))
   2.109 -
   2.110 -fun only_within_range thy pos f x =
   2.111 -  let val l = Config.get_global thy start_line and r = Config.get_global thy end_line
   2.112 -  in if in_range l r (Position.line_of pos) then f x else () end
   2.113 -
   2.114 -in
   2.115 -
   2.116 -fun run_actions tr pre post =
   2.117 -  let
   2.118 -    val thy = Proof.theory_of pre
   2.119 -    val pos = Toplevel.pos_of tr
   2.120 -    val name = Toplevel.name_of tr
   2.121 -    val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout))
   2.122 -
   2.123 -    val str0 = string_of_int o the_default 0
   2.124 -    val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos)
   2.125 -    val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):"
   2.126 -  in
   2.127 -    only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy)
   2.128 -  end
   2.129 -
   2.130 -fun done_actions st =
   2.131 -  let
   2.132 -    val thy = Toplevel.theory_of st
   2.133 -    val _ = log thy "\n\n";
   2.134 -  in
   2.135 -    thy
   2.136 -    |> Actions.get
   2.137 -    |> List.app (fn (id, _, done) => done id {last=st, log=log thy})
   2.138 -  end
   2.139 -
   2.140 -end
   2.141 -
   2.142 -val whitelist = ["apply", "by", "proof"]
   2.143 -
   2.144 -fun step_hook tr pre post =
   2.145 - (* FIXME: might require wrapping into "interruptible" *)
   2.146 -  if can (Proof.assert_backward o Toplevel.proof_of) pre andalso
   2.147 -     member (op =) whitelist (Toplevel.name_of tr)
   2.148 -  then run_actions tr (Toplevel.proof_of pre) (SOME post)
   2.149 -  else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post
   2.150 -  then done_actions pre
   2.151 -  else ()   (* FIXME: add theory_hook here *)
   2.152 -
   2.153 -
   2.154 -
   2.155 -(* Mirabelle utility functions *)
   2.156 -
   2.157 -fun can_apply time tac st =
   2.158 -  let
   2.159 -    val {context = ctxt, facts, goal} = Proof.goal st
   2.160 -    val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt)
   2.161 -  in
   2.162 -    (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of
   2.163 -      SOME (SOME _) => true
   2.164 -    | _ => false)
   2.165 -  end
   2.166 -
   2.167 -local
   2.168 -
   2.169 -fun fold_body_thms f =
   2.170 -  let
   2.171 -    fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) =>
   2.172 -      fn (x, seen) =>
   2.173 -        if Inttab.defined seen i then (x, seen)
   2.174 -        else
   2.175 -          let
   2.176 -            val body' = Future.join body
   2.177 -            val (x', seen') = app (n + (if name = "" then 0 else 1)) body'
   2.178 -              (x, Inttab.update (i, ()) seen)
   2.179 -        in (x' |> n = 0 ? f (name, prop, body'), seen') end)
   2.180 -  in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end
   2.181 -
   2.182 -in
   2.183 -
   2.184 -fun theorems_in_proof_term thm =
   2.185 -  let
   2.186 -    val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm)
   2.187 -    fun collect (s, _, _) = if s <> "" then insert (op =) s else I
   2.188 -    fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE
   2.189 -    fun resolve_thms names = map_filter (member_of names) all_thms
   2.190 -  in
   2.191 -    resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] [])
   2.192 -  end
   2.193 -
   2.194 -end
   2.195 -
   2.196 -fun theorems_of_sucessful_proof state =
   2.197 -  (case state of
   2.198 -    NONE => []
   2.199 -  | SOME st =>
   2.200 -      if not (Toplevel.is_proof st) then []
   2.201 -      else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st))))
   2.202 -
   2.203 -fun get_setting settings (key, default) =
   2.204 -  the_default default (AList.lookup (op =) settings key)
   2.205 -
   2.206 -fun get_int_setting settings (key, default) =
   2.207 -  (case Option.map Int.fromString (AList.lookup (op =) settings key) of
   2.208 -    SOME (SOME i) => i
   2.209 -  | SOME NONE => error ("bad option: " ^ key)
   2.210 -  | NONE => default)
   2.211 -
   2.212 -fun cpu_time f x =
   2.213 -  let val ({cpu, ...}, y) = Timing.timing f x
   2.214 -  in (y, Time.toMilliseconds cpu) end
   2.215 -
   2.216 -end

     3.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_arith.ML	Tue Apr 24 13:55:02 2012 +0100
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,21 +0,0 @@
     3.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_arith.ML
     3.5 -    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
     3.6 -*)
     3.7 -
     3.8 -structure Mirabelle_Arith : MIRABELLE_ACTION =
     3.9 -struct
    3.10 -
    3.11 -fun arith_tag id = "#" ^ string_of_int id ^ " arith: "
    3.12 -
    3.13 -fun init _ = I
    3.14 -fun done _ _ = ()
    3.15 -
    3.16 -fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) =
    3.17 -  if Mirabelle.can_apply timeout Arith_Data.arith_tac pre
    3.18 -  then log (arith_tag id ^ "succeeded")
    3.19 -  else ()
    3.20 -  handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout")
    3.21 -
    3.22 -fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done)
    3.23 -
    3.24 -end

     4.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_metis.ML	Tue Apr 24 13:55:02 2012 +0100
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,35 +0,0 @@
     4.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_metis.ML
     4.5 -    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
     4.6 -*)
     4.7 -
     4.8 -structure Mirabelle_Metis : MIRABELLE_ACTION =
     4.9 -struct
    4.10 -
    4.11 -fun metis_tag id = "#" ^ string_of_int id ^ " metis: "
    4.12 -
    4.13 -fun init _ = I
    4.14 -fun done _ _ = ()
    4.15 -
    4.16 -fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) =
    4.17 -  let
    4.18 -    val thms = Mirabelle.theorems_of_sucessful_proof post
    4.19 -    val names = map Thm.get_name_hint thms
    4.20 -    val add_info = if null names then I else suffix (":\n" ^ commas names)
    4.21 -
    4.22 -    val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre))
    4.23 -
    4.24 -    fun metis ctxt =
    4.25 -      Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt
    4.26 -                             (thms @ facts)
    4.27 -  in
    4.28 -    (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed")
    4.29 -    |> prefix (metis_tag id)
    4.30 -    |> add_info
    4.31 -    |> log
    4.32 -  end
    4.33 -  handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout")
    4.34 -       | ERROR msg => log (metis_tag id ^ "error: " ^ msg)
    4.35 -
    4.36 -fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done)
    4.37 -
    4.38 -end

     5.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML	Tue Apr 24 13:55:02 2012 +0100
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,28 +0,0 @@
     5.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_quickcheck.ML
     5.5 -    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
     5.6 -*)
     5.7 -
     5.8 -structure Mirabelle_Quickcheck : MIRABELLE_ACTION =
     5.9 -struct
    5.10 -
    5.11 -fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: "
    5.12 -
    5.13 -fun init _ = I
    5.14 -fun done _ _ = ()
    5.15 -
    5.16 -fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) =
    5.17 -  let
    5.18 -    val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst
    5.19 -    val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1
    5.20 -  in
    5.21 -    (case TimeLimit.timeLimit timeout quickcheck pre of
    5.22 -      NONE => log (qc_tag id ^ "no counterexample")
    5.23 -    | SOME _ => log (qc_tag id ^ "counterexample found"))
    5.24 -  end
    5.25 -  handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout")
    5.26 -       | ERROR msg => log (qc_tag id ^ "error: " ^ msg)
    5.27 -
    5.28 -fun invoke args =
    5.29 -  Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done)
    5.30 -
    5.31 -end

     6.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_refute.ML	Tue Apr 24 13:55:02 2012 +0100
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,39 +0,0 @@
     6.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_refute.ML
     6.5 -    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
     6.6 -*)
     6.7 -
     6.8 -structure Mirabelle_Refute : MIRABELLE_ACTION =
     6.9 -struct
    6.10 -
    6.11 -
    6.12 -(* FIXME:
    6.13 -fun refute_action args timeout {pre=st, ...} = 
    6.14 -  let
    6.15 -    val subgoal = 1
    6.16 -    val thy = Proof.theory_of st
    6.17 -    val thm = #goal (Proof.raw_goal st)
    6.18 -
    6.19 -    val refute = Refute.refute_goal thy args thm
    6.20 -    val _ = TimeLimit.timeLimit timeout refute subgoal
    6.21 -  in
    6.22 -    val writ_log = Substring.full (the (Symtab.lookup tab "writeln"))
    6.23 -    val warn_log = Substring.full (the (Symtab.lookup tab "warning"))
    6.24 -
    6.25 -    val r =
    6.26 -      if Substring.isSubstring "model found" writ_log
    6.27 -      then
    6.28 -        if Substring.isSubstring "spurious" warn_log
    6.29 -        then SOME "potential counterexample"
    6.30 -        else SOME "real counterexample (bug?)"
    6.31 -      else
    6.32 -        if Substring.isSubstring "time limit" writ_log
    6.33 -        then SOME "no counterexample (timeout)"
    6.34 -        else if Substring.isSubstring "Search terminated" writ_log
    6.35 -        then SOME "no counterexample (normal termination)"
    6.36 -        else SOME "no counterexample (unknown)"
    6.37 -  in r end
    6.38 -*)
    6.39 -
    6.40 -fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*)
    6.41 -
    6.42 -end

     7.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML	Tue Apr 24 13:55:02 2012 +0100
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,766 +0,0 @@
     7.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML
     7.5 -    Author:     Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
     7.6 -*)
     7.7 -
     7.8 -structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
     7.9 -struct
    7.10 -
    7.11 -(*To facilitate synching the description of Mirabelle Sledgehammer parameters
    7.12 - (in ../lib/Tools/mirabelle) with the parameters actually used by this
    7.13 - interface, the former extracts PARAMETER and DESCRIPTION from code below which
    7.14 - has this pattern (provided it appears in a single line):
    7.15 -   val .*K = "PARAMETER" (*DESCRIPTION*)
    7.16 -*)
    7.17 -(*NOTE: descriptions mention parameters (particularly NAME) without a defined range.*)
    7.18 -val proverK = "prover" (*=NAME: name of the external prover to call*)
    7.19 -val prover_timeoutK = "prover_timeout" (*=TIME: timeout for invoked ATP (seconds of process time)*)
    7.20 -val keepK = "keep" (*=PATH: path where to keep temporary files created by sledgehammer*)
    7.21 -val minimizeK = "minimize" (*: enable minimization of theorem set found by sledgehammer*)
    7.22 -                           (*refers to minimization attempted by Mirabelle*)
    7.23 -val minimize_timeoutK = "minimize_timeout" (*=TIME: timeout for each minimization step (seconds of*)
    7.24 -
    7.25 -val reconstructorK = "reconstructor" (*=NAME: how to reconstruct proofs (ie. using metis/smt)*)
    7.26 -val metis_ftK = "metis_ft" (*: apply metis with fully-typed encoding to the theorems found by sledgehammer*)
    7.27 -
    7.28 -val max_relevantK = "max_relevant" (*=NUM: max. relevant clauses to use*)
    7.29 -val max_callsK = "max_calls" (*=NUM: max. no. of calls to sledgehammer*)
    7.30 -val preplay_timeoutK = "preplay_timeout" (*=TIME: timeout for finding reconstructed proof*)
    7.31 -val sh_minimizeK = "sh_minimize" (*: instruct sledgehammer to run its minimizer*)
    7.32 -
    7.33 -val check_trivialK = "check_trivial" (*: check if goals are "trivial" (false by default)*)
    7.34 -val type_encK = "type_enc" (*=STRING: type encoding scheme*)
    7.35 -val lam_transK = "lam_trans" (*=STRING: lambda translation scheme*)
    7.36 -val strictK = "strict" (*=BOOL: run in strict mode*)
    7.37 -val sliceK = "slice" (*=BOOL: allow sledgehammer-level strategy-scheduling*)
    7.38 -val uncurried_aliasesK = "uncurried_aliases" (*=SMART_BOOL: use fresh function names to alias curried applications*)
    7.39 -val e_selection_heuristicK = "e_selection_heuristic" (*: FIXME*)
    7.40 -val term_orderK = "term_order" (*: FIXME*)
    7.41 -val force_sosK = "force_sos" (*: use SOS*)
    7.42 -val max_new_mono_instancesK = "max_new_mono_instances" (*=NUM: max. new monomorphic instances*)
    7.43 -val max_mono_itersK = "max_mono_iters" (*=NUM: max. iterations of monomorphiser*)
    7.44 -
    7.45 -fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
    7.46 -fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): "
    7.47 -fun reconstructor_tag reconstructor id =
    7.48 -  "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): "
    7.49 -
    7.50 -val separator = "-----"
    7.51 -
    7.52 -(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*)
    7.53 -(*defaults used in this Mirabelle action*)
    7.54 -val preplay_timeout_default = "4"
    7.55 -val lam_trans_default = "smart"
    7.56 -val uncurried_aliases_default = "smart"
    7.57 -val type_enc_default = "smart"
    7.58 -val strict_default = "false"
    7.59 -val max_relevant_default = "smart"
    7.60 -val slice_default = "true"
    7.61 -val max_calls_default = "10000000"
    7.62 -val trivial_default = "false"
    7.63 -val minimize_timeout_default = 5
    7.64 -
    7.65 -(*If a key is present in args then augment a list with its pair*)
    7.66 -(*This is used to avoid fixing default values at the Mirabelle level, and
    7.67 -  instead use the default values of the tool (Sledgehammer in this case).*)
    7.68 -fun available_parameter args key label list =
    7.69 -  let
    7.70 -    val value = AList.lookup (op =) args key
    7.71 -  in if is_some value then (label, the value) :: list else list end
    7.72 -
    7.73 -
    7.74 -datatype sh_data = ShData of {
    7.75 -  calls: int,
    7.76 -  success: int,
    7.77 -  nontriv_calls: int,
    7.78 -  nontriv_success: int,
    7.79 -  lemmas: int,
    7.80 -  max_lems: int,
    7.81 -  time_isa: int,
    7.82 -  time_prover: int,
    7.83 -  time_prover_fail: int}
    7.84 -
    7.85 -datatype re_data = ReData of {
    7.86 -  calls: int,
    7.87 -  success: int,
    7.88 -  nontriv_calls: int,
    7.89 -  nontriv_success: int,
    7.90 -  proofs: int,
    7.91 -  time: int,
    7.92 -  timeout: int,
    7.93 -  lemmas: int * int * int,
    7.94 -  posns: (Position.T * bool) list
    7.95 -  }
    7.96 -
    7.97 -datatype min_data = MinData of {
    7.98 -  succs: int,
    7.99 -  ab_ratios: int
   7.100 -  }
   7.101 -
   7.102 -fun make_sh_data
   7.103 -      (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
   7.104 -       time_prover,time_prover_fail) =
   7.105 -  ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
   7.106 -         nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
   7.107 -         time_isa=time_isa, time_prover=time_prover,
   7.108 -         time_prover_fail=time_prover_fail}
   7.109 -
   7.110 -fun make_min_data (succs, ab_ratios) =
   7.111 -  MinData{succs=succs, ab_ratios=ab_ratios}
   7.112 -
   7.113 -fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
   7.114 -                  timeout,lemmas,posns) =
   7.115 -  ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
   7.116 -         nontriv_success=nontriv_success, proofs=proofs, time=time,
   7.117 -         timeout=timeout, lemmas=lemmas, posns=posns}
   7.118 -
   7.119 -val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
   7.120 -val empty_min_data = make_min_data (0, 0)
   7.121 -val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
   7.122 -
   7.123 -fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
   7.124 -                              lemmas, max_lems, time_isa,
   7.125 -  time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
   7.126 -  nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
   7.127 -
   7.128 -fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios)
   7.129 -
   7.130 -fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
   7.131 -  proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
   7.132 -  nontriv_success, proofs, time, timeout, lemmas, posns)
   7.133 -
   7.134 -
   7.135 -datatype reconstructor_mode =
   7.136 -  Unminimized | Minimized | UnminimizedFT | MinimizedFT
   7.137 -
   7.138 -datatype data = Data of {
   7.139 -  sh: sh_data,
   7.140 -  min: min_data,
   7.141 -  re_u: re_data, (* reconstructor with unminimized set of lemmas *)
   7.142 -  re_m: re_data, (* reconstructor with minimized set of lemmas *)
   7.143 -  re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *)
   7.144 -  re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *)
   7.145 -  mini: bool   (* with minimization *)
   7.146 -  }
   7.147 -
   7.148 -fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) =
   7.149 -  Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft,
   7.150 -    mini=mini}
   7.151 -
   7.152 -val empty_data = make_data (empty_sh_data, empty_min_data,
   7.153 -  empty_re_data, empty_re_data, empty_re_data, empty_re_data, false)
   7.154 -
   7.155 -fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   7.156 -  let val sh' = make_sh_data (f (tuple_of_sh_data sh))
   7.157 -  in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end
   7.158 -
   7.159 -fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   7.160 -  let val min' = make_min_data (f (tuple_of_min_data min))
   7.161 -  in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end
   7.162 -
   7.163 -fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   7.164 -  let
   7.165 -    fun map_me g Unminimized   (u, m, uft, mft) = (g u, m, uft, mft)
   7.166 -      | map_me g Minimized     (u, m, uft, mft) = (u, g m, uft, mft)
   7.167 -      | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft)
   7.168 -      | map_me g MinimizedFT   (u, m, uft, mft) = (u, m, uft, g mft)
   7.169 -
   7.170 -    val f' = make_re_data o f o tuple_of_re_data
   7.171 -
   7.172 -    val (re_u', re_m', re_uft', re_mft') =
   7.173 -      map_me f' m (re_u, re_m, re_uft, re_mft)
   7.174 -  in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end
   7.175 -
   7.176 -fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) =
   7.177 -  make_data (sh, min, re_u, re_m, re_uft, re_mft, mini)
   7.178 -
   7.179 -fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
   7.180 -
   7.181 -val inc_sh_calls =  map_sh_data
   7.182 -  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   7.183 -    => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   7.184 -
   7.185 -val inc_sh_success = map_sh_data
   7.186 -  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   7.187 -    => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   7.188 -
   7.189 -val inc_sh_nontriv_calls =  map_sh_data
   7.190 -  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   7.191 -    => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   7.192 -
   7.193 -val inc_sh_nontriv_success = map_sh_data
   7.194 -  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   7.195 -    => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   7.196 -
   7.197 -fun inc_sh_lemmas n = map_sh_data
   7.198 -  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   7.199 -    => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
   7.200 -
   7.201 -fun inc_sh_max_lems n = map_sh_data
   7.202 -  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   7.203 -    => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
   7.204 -
   7.205 -fun inc_sh_time_isa t = map_sh_data
   7.206 -  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   7.207 -    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
   7.208 -
   7.209 -fun inc_sh_time_prover t = map_sh_data
   7.210 -  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   7.211 -    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
   7.212 -
   7.213 -fun inc_sh_time_prover_fail t = map_sh_data
   7.214 -  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   7.215 -    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
   7.216 -
   7.217 -val inc_min_succs = map_min_data
   7.218 -  (fn (succs,ab_ratios) => (succs+1, ab_ratios))
   7.219 -
   7.220 -fun inc_min_ab_ratios r = map_min_data
   7.221 -  (fn (succs, ab_ratios) => (succs, ab_ratios+r))
   7.222 -
   7.223 -val inc_reconstructor_calls = map_re_data
   7.224 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.225 -    => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   7.226 -
   7.227 -val inc_reconstructor_success = map_re_data
   7.228 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.229 -    => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   7.230 -
   7.231 -val inc_reconstructor_nontriv_calls = map_re_data
   7.232 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.233 -    => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
   7.234 -
   7.235 -val inc_reconstructor_nontriv_success = map_re_data
   7.236 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.237 -    => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
   7.238 -
   7.239 -val inc_reconstructor_proofs = map_re_data
   7.240 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.241 -    => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
   7.242 -
   7.243 -fun inc_reconstructor_time m t = map_re_data
   7.244 - (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.245 -  => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m
   7.246 -
   7.247 -val inc_reconstructor_timeout = map_re_data
   7.248 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.249 -    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
   7.250 -
   7.251 -fun inc_reconstructor_lemmas m n = map_re_data
   7.252 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.253 -    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m
   7.254 -
   7.255 -fun inc_reconstructor_posns m pos = map_re_data
   7.256 -  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   7.257 -    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m
   7.258 -
   7.259 -val str0 = string_of_int o the_default 0
   7.260 -
   7.261 -local
   7.262 -
   7.263 -val str = string_of_int
   7.264 -val str3 = Real.fmt (StringCvt.FIX (SOME 3))
   7.265 -fun percentage a b = string_of_int (a * 100 div b)
   7.266 -fun time t = Real.fromInt t / 1000.0
   7.267 -fun avg_time t n =
   7.268 -  if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
   7.269 -
   7.270 -fun log_sh_data log
   7.271 -    (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
   7.272 - (log ("Total number of sledgehammer calls: " ^ str calls);
   7.273 -  log ("Number of successful sledgehammer calls: " ^ str success);
   7.274 -  log ("Number of sledgehammer lemmas: " ^ str lemmas);
   7.275 -  log ("Max number of sledgehammer lemmas: " ^ str max_lems);
   7.276 -  log ("Success rate: " ^ percentage success calls ^ "%");
   7.277 -  log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
   7.278 -  log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
   7.279 -  log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
   7.280 -  log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
   7.281 -  log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
   7.282 -  log ("Average time for sledgehammer calls (Isabelle): " ^
   7.283 -    str3 (avg_time time_isa calls));
   7.284 -  log ("Average time for successful sledgehammer calls (ATP): " ^
   7.285 -    str3 (avg_time time_prover success));
   7.286 -  log ("Average time for failed sledgehammer calls (ATP): " ^
   7.287 -    str3 (avg_time time_prover_fail (calls - success)))
   7.288 -  )
   7.289 -
   7.290 -fun str_of_pos (pos, triv) =
   7.291 -  str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^
   7.292 -  (if triv then "[T]" else "")
   7.293 -
   7.294 -fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
   7.295 -     re_nontriv_success, re_proofs, re_time, re_timeout,
   7.296 -    (lemmas, lems_sos, lems_max), re_posns) =
   7.297 - (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls);
   7.298 -  log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^
   7.299 -    " (proof: " ^ str re_proofs ^ ")");
   7.300 -  log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout);
   7.301 -  log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
   7.302 -  log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls);
   7.303 -  log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^
   7.304 -    " (proof: " ^ str re_proofs ^ ")");
   7.305 -  log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas);
   7.306 -  log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos);
   7.307 -  log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max);
   7.308 -  log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time));
   7.309 -  log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^
   7.310 -    str3 (avg_time re_time re_success));
   7.311 -  if tag=""
   7.312 -  then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
   7.313 -  else ()
   7.314 - )
   7.315 -
   7.316 -fun log_min_data log (succs, ab_ratios) =
   7.317 -  (log ("Number of successful minimizations: " ^ string_of_int succs);
   7.318 -   log ("After/before ratios: " ^ string_of_int ab_ratios)
   7.319 -  )
   7.320 -
   7.321 -in
   7.322 -
   7.323 -fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   7.324 -  let
   7.325 -    val ShData {calls=sh_calls, ...} = sh
   7.326 -
   7.327 -    fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
   7.328 -    fun log_re tag m =
   7.329 -      log_re_data log tag sh_calls (tuple_of_re_data m)
   7.330 -    fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () =>
   7.331 -      (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2)))
   7.332 -  in
   7.333 -    if sh_calls > 0
   7.334 -    then
   7.335 -     (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
   7.336 -      log_sh_data log (tuple_of_sh_data sh);
   7.337 -      log "";
   7.338 -      if not mini
   7.339 -      then log_reconstructor ("", re_u) ("fully-typed ", re_uft)
   7.340 -      else
   7.341 -        app_if re_u (fn () =>
   7.342 -         (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft);
   7.343 -          log "";
   7.344 -          app_if re_m (fn () =>
   7.345 -            (log_min_data log (tuple_of_min_data min); log "";
   7.346 -             log_reconstructor ("", re_m) ("fully-typed ", re_mft))))))
   7.347 -    else ()
   7.348 -  end
   7.349 -
   7.350 -end
   7.351 -
   7.352 -
   7.353 -(* Warning: we implicitly assume single-threaded execution here! *)
   7.354 -val data = Unsynchronized.ref ([] : (int * data) list)
   7.355 -
   7.356 -fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
   7.357 -fun done id ({log, ...}: Mirabelle.done_args) =
   7.358 -  AList.lookup (op =) (!data) id
   7.359 -  |> Option.map (log_data id log)
   7.360 -  |> K ()
   7.361 -
   7.362 -fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
   7.363 -
   7.364 -
   7.365 -fun get_prover ctxt args =
   7.366 -  let
   7.367 -    fun default_prover_name () =
   7.368 -      hd (#provers (Sledgehammer_Isar.default_params ctxt []))
   7.369 -      handle List.Empty => error "No ATP available."
   7.370 -    fun get_prover name =
   7.371 -      (name, Sledgehammer_Run.get_minimizing_prover ctxt
   7.372 -                Sledgehammer_Provers.Normal name)
   7.373 -  in
   7.374 -    (case AList.lookup (op =) args proverK of
   7.375 -      SOME name => get_prover name
   7.376 -    | NONE => get_prover (default_prover_name ()))
   7.377 -  end
   7.378 -
   7.379 -type stature = ATP_Problem_Generate.stature
   7.380 -
   7.381 -(* hack *)
   7.382 -fun reconstructor_from_msg args msg =
   7.383 -  (case AList.lookup (op =) args reconstructorK of
   7.384 -    SOME name => name
   7.385 -  | NONE =>
   7.386 -    if String.isSubstring "metis (" msg then
   7.387 -      msg |> Substring.full
   7.388 -          |> Substring.position "metis ("
   7.389 -          |> snd |> Substring.position ")"
   7.390 -          |> fst |> Substring.string
   7.391 -          |> suffix ")"
   7.392 -    else if String.isSubstring "metis" msg then
   7.393 -      "metis"
   7.394 -    else
   7.395 -      "smt")
   7.396 -
   7.397 -local
   7.398 -
   7.399 -datatype sh_result =
   7.400 -  SH_OK of int * int * (string * stature) list |
   7.401 -  SH_FAIL of int * int |
   7.402 -  SH_ERROR
   7.403 -
   7.404 -fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans
   7.405 -        uncurried_aliases e_selection_heuristic term_order force_sos
   7.406 -        hard_timeout timeout preplay_timeout sh_minimizeLST
   7.407 -        max_new_mono_instancesLST max_mono_itersLST dir pos st =
   7.408 -  let
   7.409 -    val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
   7.410 -    val i = 1
   7.411 -    fun set_file_name (SOME dir) =
   7.412 -        Config.put Sledgehammer_Provers.dest_dir dir
   7.413 -        #> Config.put Sledgehammer_Provers.problem_prefix
   7.414 -          ("prob_" ^ str0 (Position.line_of pos) ^ "__")
   7.415 -        #> Config.put SMT_Config.debug_files
   7.416 -          (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_"
   7.417 -          ^ serial_string ())
   7.418 -      | set_file_name NONE = I
   7.419 -    val st' =
   7.420 -      st
   7.421 -      |> Proof.map_context
   7.422 -           (set_file_name dir
   7.423 -            #> (Option.map (Config.put ATP_Systems.e_selection_heuristic)
   7.424 -                  e_selection_heuristic |> the_default I)
   7.425 -            #> (Option.map (Config.put ATP_Systems.term_order)
   7.426 -                  term_order |> the_default I)
   7.427 -            #> (Option.map (Config.put ATP_Systems.force_sos)
   7.428 -                  force_sos |> the_default I))
   7.429 -    val params as {relevance_thresholds, max_relevant, slice, ...} =
   7.430 -      Sledgehammer_Isar.default_params ctxt
   7.431 -         ([("verbose", "true"),
   7.432 -           ("type_enc", type_enc),
   7.433 -           ("strict", strict),
   7.434 -           ("lam_trans", lam_trans |> the_default lam_trans_default),
   7.435 -           ("uncurried_aliases", uncurried_aliases |> the_default uncurried_aliases_default),
   7.436 -           ("max_relevant", max_relevant),
   7.437 -           ("slice", slice),
   7.438 -           ("timeout", string_of_int timeout),
   7.439 -           ("preplay_timeout", preplay_timeout)]
   7.440 -          |> sh_minimizeLST (*don't confuse the two minimization flags*)
   7.441 -          |> max_new_mono_instancesLST
   7.442 -          |> max_mono_itersLST)
   7.443 -    val default_max_relevant =
   7.444 -      Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
   7.445 -        prover_name
   7.446 -    val is_appropriate_prop =
   7.447 -      Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name
   7.448 -    val is_built_in_const =
   7.449 -      Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name
   7.450 -    val relevance_fudge =
   7.451 -      Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name
   7.452 -    val relevance_override = {add = [], del = [], only = false}
   7.453 -    val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i
   7.454 -    val time_limit =
   7.455 -      (case hard_timeout of
   7.456 -        NONE => I
   7.457 -      | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs))
   7.458 -    fun failed failure =
   7.459 -      ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime,
   7.460 -        preplay =
   7.461 -          K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis),
   7.462 -        message = K "", message_tail = ""}, ~1)
   7.463 -    val ({outcome, used_facts, run_time, preplay, message, message_tail}
   7.464 -         : Sledgehammer_Provers.prover_result,
   7.465 -        time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
   7.466 -      let
   7.467 -        val _ = if is_appropriate_prop concl_t then ()
   7.468 -                else raise Fail "inappropriate"
   7.469 -        val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name
   7.470 -        val facts =
   7.471 -          Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
   7.472 -            chained_ths hyp_ts concl_t
   7.473 -          |> filter (is_appropriate_prop o prop_of o snd)
   7.474 -          |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
   7.475 -                 (the_default default_max_relevant max_relevant)
   7.476 -                 is_built_in_const relevance_fudge relevance_override
   7.477 -                 chained_ths hyp_ts concl_t
   7.478 -        val problem =
   7.479 -          {state = st', goal = goal, subgoal = i,
   7.480 -           subgoal_count = Sledgehammer_Util.subgoal_count st,
   7.481 -           facts = facts |> map Sledgehammer_Provers.Untranslated_Fact}
   7.482 -      in prover params (K (K (K ""))) problem end)) ()
   7.483 -      handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut
   7.484 -           | Fail "inappropriate" => failed ATP_Proof.Inappropriate
   7.485 -    val time_prover = run_time |> Time.toMilliseconds
   7.486 -    val msg = message (preplay ()) ^ message_tail
   7.487 -  in
   7.488 -    case outcome of
   7.489 -      NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
   7.490 -    | SOME _ => (msg, SH_FAIL (time_isa, time_prover))
   7.491 -  end
   7.492 -  handle ERROR msg => ("error: " ^ msg, SH_ERROR)
   7.493 -
   7.494 -fun thms_of_name ctxt name =
   7.495 -  let
   7.496 -    val lex = Keyword.get_lexicons
   7.497 -    val get = maps (Proof_Context.get_fact ctxt o fst)
   7.498 -  in
   7.499 -    Source.of_string name
   7.500 -    |> Symbol.source
   7.501 -    |> Token.source {do_recover=SOME false} lex Position.start
   7.502 -    |> Token.source_proper
   7.503 -    |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE
   7.504 -    |> Source.exhaust
   7.505 -  end
   7.506 -
   7.507 -in
   7.508 -
   7.509 -fun run_sledgehammer trivial args reconstructor named_thms id
   7.510 -      ({pre=st, log, pos, ...}: Mirabelle.run_args) =
   7.511 -  let
   7.512 -    val triv_str = if trivial then "[T] " else ""
   7.513 -    val _ = change_data id inc_sh_calls
   7.514 -    val _ = if trivial then () else change_data id inc_sh_nontriv_calls
   7.515 -    val (prover_name, prover) = get_prover (Proof.context_of st) args
   7.516 -    val type_enc = AList.lookup (op =) args type_encK |> the_default type_enc_default
   7.517 -    val strict = AList.lookup (op =) args strictK |> the_default strict_default
   7.518 -    val max_relevant = AList.lookup (op =) args max_relevantK |> the_default max_relevant_default
   7.519 -    val slice = AList.lookup (op =) args sliceK |> the_default slice_default
   7.520 -    val lam_trans = AList.lookup (op =) args lam_transK
   7.521 -    val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK
   7.522 -    val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK
   7.523 -    val term_order = AList.lookup (op =) args term_orderK
   7.524 -    val force_sos = AList.lookup (op =) args force_sosK
   7.525 -      |> Option.map (curry (op <>) "false")
   7.526 -    val dir = AList.lookup (op =) args keepK
   7.527 -    val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
   7.528 -    (* always use a hard timeout, but give some slack so that the automatic
   7.529 -       minimizer has a chance to do its magic *)
   7.530 -    val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
   7.531 -      |> the_default preplay_timeout_default
   7.532 -    val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
   7.533 -    val max_new_mono_instancesLST =
   7.534 -      available_parameter args max_new_mono_instancesK max_new_mono_instancesK
   7.535 -    val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
   7.536 -    val hard_timeout = SOME (2 * timeout)
   7.537 -    val (msg, result) =
   7.538 -      run_sh prover_name prover type_enc strict max_relevant slice lam_trans
   7.539 -        uncurried_aliases e_selection_heuristic term_order force_sos
   7.540 -        hard_timeout timeout preplay_timeout sh_minimizeLST
   7.541 -        max_new_mono_instancesLST max_mono_itersLST dir pos st
   7.542 -  in
   7.543 -    case result of
   7.544 -      SH_OK (time_isa, time_prover, names) =>
   7.545 -        let
   7.546 -          fun get_thms (name, stature) =
   7.547 -            try (thms_of_name (Proof.context_of st)) name
   7.548 -            |> Option.map (pair (name, stature))
   7.549 -        in
   7.550 -          change_data id inc_sh_success;
   7.551 -          if trivial then () else change_data id inc_sh_nontriv_success;
   7.552 -          change_data id (inc_sh_lemmas (length names));
   7.553 -          change_data id (inc_sh_max_lems (length names));
   7.554 -          change_data id (inc_sh_time_isa time_isa);
   7.555 -          change_data id (inc_sh_time_prover time_prover);
   7.556 -          reconstructor := reconstructor_from_msg args msg;
   7.557 -          named_thms := SOME (map_filter get_thms names);
   7.558 -          log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
   7.559 -            string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
   7.560 -        end
   7.561 -    | SH_FAIL (time_isa, time_prover) =>
   7.562 -        let
   7.563 -          val _ = change_data id (inc_sh_time_isa time_isa)
   7.564 -          val _ = change_data id (inc_sh_time_prover_fail time_prover)
   7.565 -        in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
   7.566 -    | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg)
   7.567 -  end
   7.568 -
   7.569 -end
   7.570 -
   7.571 -fun run_minimize args reconstructor named_thms id
   7.572 -        ({pre=st, log, ...}: Mirabelle.run_args) =
   7.573 -  let
   7.574 -    val ctxt = Proof.context_of st
   7.575 -    val n0 = length (these (!named_thms))
   7.576 -    val (prover_name, _) = get_prover ctxt args
   7.577 -    val type_enc = AList.lookup (op =) args type_encK |> the_default type_enc_default
   7.578 -    val strict = AList.lookup (op =) args strictK |> the_default strict_default
   7.579 -    val timeout =
   7.580 -      AList.lookup (op =) args minimize_timeoutK
   7.581 -      |> Option.map (fst o read_int o raw_explode)  (* FIXME Symbol.explode (?) *)
   7.582 -      |> the_default minimize_timeout_default
   7.583 -    val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
   7.584 -      |> the_default preplay_timeout_default
   7.585 -    val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
   7.586 -    val max_new_mono_instancesLST =
   7.587 -      available_parameter args max_new_mono_instancesK max_new_mono_instancesK
   7.588 -    val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
   7.589 -    val params = Sledgehammer_Isar.default_params ctxt
   7.590 -     ([("provers", prover_name),
   7.591 -       ("verbose", "true"),
   7.592 -       ("type_enc", type_enc),
   7.593 -       ("strict", strict),
   7.594 -       ("timeout", string_of_int timeout),
   7.595 -       ("preplay_timeout", preplay_timeout)]
   7.596 -      |> sh_minimizeLST (*don't confuse the two minimization flags*)
   7.597 -      |> max_new_mono_instancesLST
   7.598 -      |> max_mono_itersLST)
   7.599 -    val minimize =
   7.600 -      Sledgehammer_Minimize.minimize_facts prover_name params
   7.601 -          true 1 (Sledgehammer_Util.subgoal_count st)
   7.602 -    val _ = log separator
   7.603 -    val (used_facts, (preplay, message, message_tail)) =
   7.604 -      minimize st (these (!named_thms))
   7.605 -    val msg = message (preplay ()) ^ message_tail
   7.606 -  in
   7.607 -    case used_facts of
   7.608 -      SOME named_thms' =>
   7.609 -        (change_data id inc_min_succs;
   7.610 -         change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0));
   7.611 -         if length named_thms' = n0
   7.612 -         then log (minimize_tag id ^ "already minimal")
   7.613 -         else (reconstructor := reconstructor_from_msg args msg;
   7.614 -               named_thms := SOME named_thms';
   7.615 -               log (minimize_tag id ^ "succeeded:\n" ^ msg))
   7.616 -        )
   7.617 -    | NONE => log (minimize_tag id ^ "failed: " ^ msg)
   7.618 -  end
   7.619 -
   7.620 -fun override_params prover type_enc timeout =
   7.621 -  [("provers", prover),
   7.622 -   ("max_relevant", "0"),
   7.623 -   ("type_enc", type_enc),
   7.624 -   ("strict", "true"),
   7.625 -   ("slice", "false"),
   7.626 -   ("timeout", timeout |> Time.toSeconds |> string_of_int)]
   7.627 -
   7.628 -fun run_reconstructor trivial full m name reconstructor named_thms id
   7.629 -    ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
   7.630 -  let
   7.631 -    fun do_reconstructor named_thms ctxt =
   7.632 -      let
   7.633 -        val ref_of_str =
   7.634 -          suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm
   7.635 -          #> fst
   7.636 -        val thms = named_thms |> maps snd
   7.637 -        val facts = named_thms |> map (ref_of_str o fst o fst)
   7.638 -        val relevance_override = {add = facts, del = [], only = true}
   7.639 -        fun my_timeout time_slice =
   7.640 -          timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal
   7.641 -        fun sledge_tac time_slice prover type_enc =
   7.642 -          Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
   7.643 -            (override_params prover type_enc (my_timeout time_slice))
   7.644 -            relevance_override
   7.645 -      in
   7.646 -        if !reconstructor = "sledgehammer_tac" then
   7.647 -          sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native"
   7.648 -          ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??"
   7.649 -          ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??"
   7.650 -          ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags"
   7.651 -          ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN
   7.652 -            ctxt thms
   7.653 -        else if !reconstructor = "smt" then
   7.654 -          SMT_Solver.smt_tac ctxt thms
   7.655 -        else if full then
   7.656 -          Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN]
   7.657 -            ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms
   7.658 -        else if String.isPrefix "metis (" (!reconstructor) then
   7.659 -          let
   7.660 -            val (type_encs, lam_trans) =
   7.661 -              !reconstructor
   7.662 -              |> Outer_Syntax.scan Position.start
   7.663 -              |> filter Token.is_proper |> tl
   7.664 -              |> Metis_Tactic.parse_metis_options |> fst
   7.665 -              |>> the_default [ATP_Proof_Reconstruct.partial_typesN]
   7.666 -              ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans
   7.667 -          in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end
   7.668 -        else if !reconstructor = "metis" then
   7.669 -          Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt
   7.670 -            thms
   7.671 -        else
   7.672 -          K all_tac
   7.673 -      end
   7.674 -    fun apply_reconstructor named_thms =
   7.675 -      Mirabelle.can_apply timeout (do_reconstructor named_thms) st
   7.676 -
   7.677 -    fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
   7.678 -      | with_time (true, t) = (change_data id (inc_reconstructor_success m);
   7.679 -          if trivial then ()
   7.680 -          else change_data id (inc_reconstructor_nontriv_success m);
   7.681 -          change_data id (inc_reconstructor_lemmas m (length named_thms));
   7.682 -          change_data id (inc_reconstructor_time m t);
   7.683 -          change_data id (inc_reconstructor_posns m (pos, trivial));
   7.684 -          if name = "proof" then change_data id (inc_reconstructor_proofs m)
   7.685 -          else ();
   7.686 -          "succeeded (" ^ string_of_int t ^ ")")
   7.687 -    fun timed_reconstructor named_thms =
   7.688 -      (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true)
   7.689 -      handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m);
   7.690 -               ("timeout", false))
   7.691 -           | ERROR msg => ("error: " ^ msg, false)
   7.692 -
   7.693 -    val _ = log separator
   7.694 -    val _ = change_data id (inc_reconstructor_calls m)
   7.695 -    val _ = if trivial then ()
   7.696 -            else change_data id (inc_reconstructor_nontriv_calls m)
   7.697 -  in
   7.698 -    named_thms
   7.699 -    |> timed_reconstructor
   7.700 -    |>> log o prefix (reconstructor_tag reconstructor id)
   7.701 -    |> snd
   7.702 -  end
   7.703 -
   7.704 -val try_timeout = seconds 5.0
   7.705 -
   7.706 -(* crude hack *)
   7.707 -val num_sledgehammer_calls = Unsynchronized.ref 0
   7.708 -
   7.709 -fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
   7.710 -  let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
   7.711 -    if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
   7.712 -    then () else
   7.713 -    let
   7.714 -      val max_calls =
   7.715 -        AList.lookup (op =) args max_callsK |> the_default max_calls_default
   7.716 -        |> Int.fromString |> the
   7.717 -      val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
   7.718 -    in
   7.719 -      if !num_sledgehammer_calls > max_calls then ()
   7.720 -      else
   7.721 -        let
   7.722 -          val reconstructor = Unsynchronized.ref ""
   7.723 -          val named_thms =
   7.724 -            Unsynchronized.ref (NONE : ((string * stature) * thm list) list option)
   7.725 -          val minimize = AList.defined (op =) args minimizeK
   7.726 -          val metis_ft = AList.defined (op =) args metis_ftK
   7.727 -          val trivial =
   7.728 -            if AList.lookup (op =) args check_trivialK |> the_default trivial_default
   7.729 -                            |> Bool.fromString |> the then
   7.730 -              Try0.try0 (SOME try_timeout) ([], [], [], []) pre
   7.731 -              handle TimeLimit.TimeOut => false
   7.732 -            else false
   7.733 -          fun apply_reconstructor m1 m2 =
   7.734 -            if metis_ft
   7.735 -            then
   7.736 -              if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   7.737 -                  (run_reconstructor trivial false m1 name reconstructor
   7.738 -                       (these (!named_thms))) id st)
   7.739 -              then
   7.740 -                (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   7.741 -                  (run_reconstructor trivial true m2 name reconstructor
   7.742 -                       (these (!named_thms))) id st; ())
   7.743 -              else ()
   7.744 -            else
   7.745 -              (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   7.746 -                (run_reconstructor trivial false m1 name reconstructor
   7.747 -                     (these (!named_thms))) id st; ())
   7.748 -        in
   7.749 -          change_data id (set_mini minimize);
   7.750 -          Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor
   7.751 -                                                   named_thms) id st;
   7.752 -          if is_some (!named_thms)
   7.753 -          then
   7.754 -           (apply_reconstructor Unminimized UnminimizedFT;
   7.755 -            if minimize andalso not (null (these (!named_thms)))
   7.756 -            then
   7.757 -             (Mirabelle.catch minimize_tag
   7.758 -                  (run_minimize args reconstructor named_thms) id st;
   7.759 -              apply_reconstructor Minimized MinimizedFT)
   7.760 -            else ())
   7.761 -          else ()
   7.762 -        end
   7.763 -    end
   7.764 -  end
   7.765 -
   7.766 -fun invoke args =
   7.767 -  Mirabelle.register (init, sledgehammer_action args, done)
   7.768 -
   7.769 -end

     8.1 --- a/src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML	Tue Apr 24 13:55:02 2012 +0100
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,198 +0,0 @@
     8.4 -(*  Title:      HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML
     8.5 -    Author:     Jasmin Blanchette, TU Munich
     8.6 -*)
     8.7 -
     8.8 -structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION =
     8.9 -struct
    8.10 -
    8.11 -fun get args name default_value =
    8.12 -  case AList.lookup (op =) args name of
    8.13 -    SOME value => the (Real.fromString value)
    8.14 -  | NONE => default_value
    8.15 -
    8.16 -fun extract_relevance_fudge args
    8.17 -      {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight,
    8.18 -       abs_rel_weight, abs_irrel_weight, skolem_irrel_weight,
    8.19 -       theory_const_rel_weight, theory_const_irrel_weight,
    8.20 -       chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus,
    8.21 -       local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp,
    8.22 -       threshold_divisor, ridiculous_threshold} =
    8.23 -  {local_const_multiplier =
    8.24 -       get args "local_const_multiplier" local_const_multiplier,
    8.25 -   worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq,
    8.26 -   higher_order_irrel_weight =
    8.27 -       get args "higher_order_irrel_weight" higher_order_irrel_weight,
    8.28 -   abs_rel_weight = get args "abs_rel_weight" abs_rel_weight,
    8.29 -   abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight,
    8.30 -   skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight,
    8.31 -   theory_const_rel_weight =
    8.32 -       get args "theory_const_rel_weight" theory_const_rel_weight,
    8.33 -   theory_const_irrel_weight =
    8.34 -       get args "theory_const_irrel_weight" theory_const_irrel_weight,
    8.35 -   chained_const_irrel_weight =
    8.36 -       get args "chained_const_irrel_weight" chained_const_irrel_weight,
    8.37 -   intro_bonus = get args "intro_bonus" intro_bonus,
    8.38 -   elim_bonus = get args "elim_bonus" elim_bonus,
    8.39 -   simp_bonus = get args "simp_bonus" simp_bonus,
    8.40 -   local_bonus = get args "local_bonus" local_bonus,
    8.41 -   assum_bonus = get args "assum_bonus" assum_bonus,
    8.42 -   chained_bonus = get args "chained_bonus" chained_bonus,
    8.43 -   max_imperfect = get args "max_imperfect" max_imperfect,
    8.44 -   max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp,
    8.45 -   threshold_divisor = get args "threshold_divisor" threshold_divisor,
    8.46 -   ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold}
    8.47 -
    8.48 -structure Prooftab =
    8.49 -  Table(type key = int * int val ord = prod_ord int_ord int_ord)
    8.50 -
    8.51 -val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table)
    8.52 -
    8.53 -val num_successes = Unsynchronized.ref ([] : (int * int) list)
    8.54 -val num_failures = Unsynchronized.ref ([] : (int * int) list)
    8.55 -val num_found_proofs = Unsynchronized.ref ([] : (int * int) list)
    8.56 -val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list)
    8.57 -val num_found_facts = Unsynchronized.ref ([] : (int * int) list)
    8.58 -val num_lost_facts = Unsynchronized.ref ([] : (int * int) list)
    8.59 -
    8.60 -fun get id c = the_default 0 (AList.lookup (op =) (!c) id)
    8.61 -fun add id c n =
    8.62 -  c := (case AList.lookup (op =) (!c) id of
    8.63 -          SOME m => AList.update (op =) (id, m + n) (!c)
    8.64 -        | NONE => (id, n) :: !c)
    8.65 -
    8.66 -fun init proof_file _ thy =
    8.67 -  let
    8.68 -    fun do_line line =
    8.69 -      case line |> space_explode ":" of
    8.70 -        [line_num, offset, proof] =>
    8.71 -        SOME (pairself (the o Int.fromString) (line_num, offset),
    8.72 -              proof |> space_explode " " |> filter_out (curry (op =) ""))
    8.73 -       | _ => NONE
    8.74 -    val proofs = File.read (Path.explode proof_file)
    8.75 -    val proof_tab =
    8.76 -      proofs |> space_explode "\n"
    8.77 -             |> map_filter do_line
    8.78 -             |> AList.coalesce (op =)
    8.79 -             |> Prooftab.make
    8.80 -  in proof_table := proof_tab; thy end
    8.81 -
    8.82 -fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b)
    8.83 -fun percentage_alt a b = percentage a (a + b)
    8.84 -
    8.85 -fun done id ({log, ...} : Mirabelle.done_args) =
    8.86 -  if get id num_successes + get id num_failures > 0 then
    8.87 -    (log "";
    8.88 -     log ("Number of overall successes: " ^
    8.89 -          string_of_int (get id num_successes));
    8.90 -     log ("Number of overall failures: " ^ string_of_int (get id num_failures));
    8.91 -     log ("Overall success rate: " ^
    8.92 -          percentage_alt (get id num_successes) (get id num_failures) ^ "%");
    8.93 -     log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs));
    8.94 -     log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs));
    8.95 -     log ("Proof found rate: " ^
    8.96 -          percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^
    8.97 -          "%");
    8.98 -     log ("Number of found facts: " ^ string_of_int (get id num_found_facts));
    8.99 -     log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts));
   8.100 -     log ("Fact found rate: " ^
   8.101 -          percentage_alt (get id num_found_facts) (get id num_lost_facts) ^
   8.102 -          "%"))
   8.103 -  else
   8.104 -    ()
   8.105 -
   8.106 -val default_prover = ATP_Systems.eN (* arbitrary ATP *)
   8.107 -
   8.108 -fun with_index (i, s) = s ^ "@" ^ string_of_int i
   8.109 -
   8.110 -fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) =
   8.111 -  case (Position.line_of pos, Position.offset_of pos) of
   8.112 -    (SOME line_num, SOME offset) =>
   8.113 -    (case Prooftab.lookup (!proof_table) (line_num, offset) of
   8.114 -       SOME proofs =>
   8.115 -       let
   8.116 -         val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre
   8.117 -         val prover = AList.lookup (op =) args "prover"
   8.118 -                      |> the_default default_prover
   8.119 -         val {relevance_thresholds, max_relevant, slice, ...} =
   8.120 -           Sledgehammer_Isar.default_params ctxt args
   8.121 -         val default_max_relevant =
   8.122 -           Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
   8.123 -                                                                prover
   8.124 -         val is_appropriate_prop =
   8.125 -           Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt
   8.126 -               default_prover
   8.127 -         val is_built_in_const =
   8.128 -           Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover
   8.129 -         val relevance_fudge =
   8.130 -           extract_relevance_fudge args
   8.131 -               (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover)
   8.132 -         val relevance_override = {add = [], del = [], only = false}
   8.133 -         val subgoal = 1
   8.134 -         val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal
   8.135 -         val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover
   8.136 -         val facts =
   8.137 -          Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
   8.138 -                                               chained_ths hyp_ts concl_t
   8.139 -          |> filter (is_appropriate_prop o prop_of o snd)
   8.140 -          |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
   8.141 -                 (the_default default_max_relevant max_relevant)
   8.142 -                 is_built_in_const relevance_fudge relevance_override
   8.143 -                 chained_ths hyp_ts concl_t
   8.144 -           |> map (fst o fst)
   8.145 -         val (found_facts, lost_facts) =
   8.146 -           flat proofs |> sort_distinct string_ord
   8.147 -           |> map (fn fact => (find_index (curry (op =) fact) facts, fact))
   8.148 -           |> List.partition (curry (op <=) 0 o fst)
   8.149 -           |>> sort (prod_ord int_ord string_ord) ||> map snd
   8.150 -         val found_proofs = filter (forall (member (op =) facts)) proofs
   8.151 -         val n = length found_proofs
   8.152 -         val _ =
   8.153 -           if n = 0 then
   8.154 -             (add id num_failures 1; log "Failure")
   8.155 -           else
   8.156 -             (add id num_successes 1;
   8.157 -              add id num_found_proofs n;
   8.158 -              log ("Success (" ^ string_of_int n ^ " of " ^
   8.159 -                   string_of_int (length proofs) ^ " proofs)"))
   8.160 -         val _ = add id num_lost_proofs (length proofs - n)
   8.161 -         val _ = add id num_found_facts (length found_facts)
   8.162 -         val _ = add id num_lost_facts (length lost_facts)
   8.163 -         val _ =
   8.164 -           if null found_facts then
   8.165 -             ()
   8.166 -           else
   8.167 -             let
   8.168 -               val found_weight =
   8.169 -                 Real.fromInt (fold (fn (n, _) =>
   8.170 -                                        Integer.add (n * n)) found_facts 0)
   8.171 -                   / Real.fromInt (length found_facts)
   8.172 -                 |> Math.sqrt |> Real.ceil
   8.173 -             in
   8.174 -               log ("Found facts (among " ^ string_of_int (length facts) ^
   8.175 -                    ", weight " ^ string_of_int found_weight ^ "): " ^
   8.176 -                    commas (map with_index found_facts))
   8.177 -             end
   8.178 -         val _ = if null lost_facts then
   8.179 -                   ()
   8.180 -                 else
   8.181 -                   log ("Lost facts (among " ^ string_of_int (length facts) ^
   8.182 -                        "): " ^ commas lost_facts)
   8.183 -       in () end
   8.184 -     | NONE => log "No known proof")
   8.185 -  | _ => ()
   8.186 -
   8.187 -val proof_fileK = "proof_file"
   8.188 -
   8.189 -fun invoke args =
   8.190 -  let
   8.191 -    val (pf_args, other_args) =
   8.192 -      args |> List.partition (curry (op =) proof_fileK o fst)
   8.193 -    val proof_file = case pf_args of
   8.194 -                       [] => error "No \"proof_file\" specified"
   8.195 -                     | (_, s) :: _ => s
   8.196 -  in Mirabelle.register (init proof_file, action other_args, done) end
   8.197 -
   8.198 -end;
   8.199 -
   8.200 -(* Workaround to keep the "mirabelle.pl" script happy *)
   8.201 -structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter;

     9.1 --- a/src/HOL/Mirabelle/Mirabelle.thy	Tue Apr 24 13:55:02 2012 +0100
     9.2 +++ b/src/HOL/Mirabelle/Mirabelle.thy	Tue Apr 24 13:59:29 2012 +0100
     9.3 @@ -4,7 +4,7 @@
     9.4  
     9.5  theory Mirabelle
     9.6  imports Sledgehammer
     9.7 -uses "Actions/mirabelle.ML"
     9.8 +uses "Tools/mirabelle.ML"
     9.9       "../ex/sledgehammer_tactics.ML"
    9.10  begin
    9.11  

    10.1 --- a/src/HOL/Mirabelle/Mirabelle_Test.thy	Tue Apr 24 13:55:02 2012 +0100
    10.2 +++ b/src/HOL/Mirabelle/Mirabelle_Test.thy	Tue Apr 24 13:59:29 2012 +0100
    10.3 @@ -7,12 +7,12 @@
    10.4  theory Mirabelle_Test
    10.5  imports Main Mirabelle
    10.6  uses
    10.7 -  "Actions/mirabelle_arith.ML"
    10.8 -  "Actions/mirabelle_metis.ML"
    10.9 -  "Actions/mirabelle_quickcheck.ML"
   10.10 -  "Actions/mirabelle_refute.ML"
   10.11 -  "Actions/mirabelle_sledgehammer.ML"
   10.12 -  "Actions/mirabelle_sledgehammer_filter.ML"
   10.13 +  "Tools/mirabelle_arith.ML"
   10.14 +  "Tools/mirabelle_metis.ML"
   10.15 +  "Tools/mirabelle_quickcheck.ML"
   10.16 +  "Tools/mirabelle_refute.ML"
   10.17 +  "Tools/mirabelle_sledgehammer.ML"
   10.18 +  "Tools/mirabelle_sledgehammer_filter.ML"
   10.19  begin
   10.20  
   10.21  text {*

    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle.ML	Tue Apr 24 13:59:29 2012 +0100
    11.3 @@ -0,0 +1,213 @@
    11.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle.ML
    11.5 +    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
    11.6 +*)
    11.7 +
    11.8 +signature MIRABELLE =
    11.9 +sig
   11.10 +  (*configuration*)
   11.11 +  val logfile : string Config.T
   11.12 +  val timeout : int Config.T
   11.13 +  val start_line : int Config.T
   11.14 +  val end_line : int Config.T
   11.15 +
   11.16 +  (*core*)
   11.17 +  type init_action = int -> theory -> theory
   11.18 +  type done_args = {last: Toplevel.state, log: string -> unit}
   11.19 +  type done_action = int -> done_args -> unit
   11.20 +  type run_args = {pre: Proof.state, post: Toplevel.state option,
   11.21 +    timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
   11.22 +  type run_action = int -> run_args -> unit
   11.23 +  type action = init_action * run_action * done_action
   11.24 +  val catch : (int -> string) -> run_action -> run_action
   11.25 +  val catch_result : (int -> string) -> 'a -> (int -> run_args -> 'a) ->
   11.26 +    int -> run_args -> 'a
   11.27 +  val register : action -> theory -> theory
   11.28 +  val step_hook : Toplevel.transition -> Toplevel.state -> Toplevel.state ->
   11.29 +    unit
   11.30 +
   11.31 +  (*utility functions*)
   11.32 +  val can_apply : Time.time -> (Proof.context -> int -> tactic) ->
   11.33 +    Proof.state -> bool
   11.34 +  val theorems_in_proof_term : thm -> thm list
   11.35 +  val theorems_of_sucessful_proof : Toplevel.state option -> thm list
   11.36 +  val get_setting : (string * string) list -> string * string -> string
   11.37 +  val get_int_setting : (string * string) list -> string * int -> int
   11.38 +  val cpu_time : ('a -> 'b) -> 'a -> 'b * int
   11.39 +end
   11.40 +
   11.41 +
   11.42 +
   11.43 +structure Mirabelle : MIRABELLE =
   11.44 +struct
   11.45 +
   11.46 +(* Mirabelle configuration *)
   11.47 +
   11.48 +val logfile = Attrib.setup_config_string @{binding mirabelle_logfile} (K "")
   11.49 +val timeout = Attrib.setup_config_int @{binding mirabelle_timeout} (K 30)
   11.50 +val start_line = Attrib.setup_config_int @{binding mirabelle_start_line} (K 0)
   11.51 +val end_line = Attrib.setup_config_int @{binding mirabelle_end_line} (K ~1)
   11.52 +
   11.53 +
   11.54 +(* Mirabelle core *)
   11.55 +
   11.56 +type init_action = int -> theory -> theory
   11.57 +type done_args = {last: Toplevel.state, log: string -> unit}
   11.58 +type done_action = int -> done_args -> unit
   11.59 +type run_args = {pre: Proof.state, post: Toplevel.state option,
   11.60 +  timeout: Time.time, log: string -> unit, pos: Position.T, name: string}
   11.61 +type run_action = int -> run_args -> unit
   11.62 +type action = init_action * run_action * done_action
   11.63 +
   11.64 +structure Actions = Theory_Data
   11.65 +(
   11.66 +  type T = (int * run_action * done_action) list
   11.67 +  val empty = []
   11.68 +  val extend = I
   11.69 +  fun merge data = Library.merge (K true) data  (* FIXME potential data loss because of (K true) *)
   11.70 +)
   11.71 +
   11.72 +
   11.73 +fun log_exn log tag id e = log (tag id ^ "exception:\n" ^ General.exnMessage e)
   11.74 +
   11.75 +fun catch tag f id (st as {log, ...}: run_args) = (f id st; ())
   11.76 +  handle exn =>
   11.77 +    if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; ())
   11.78 +
   11.79 +fun catch_result tag d f id (st as {log, ...}: run_args) = f id st
   11.80 +  handle exn =>
   11.81 +    if Exn.is_interrupt exn then reraise exn else (log_exn log tag id exn; d)
   11.82 +
   11.83 +fun register (init, run, done) thy =
   11.84 +  let val id = length (Actions.get thy) + 1
   11.85 +  in
   11.86 +    thy
   11.87 +    |> init id
   11.88 +    |> Actions.map (cons (id, run, done))
   11.89 +  end
   11.90 +
   11.91 +local
   11.92 +
   11.93 +fun log thy s =
   11.94 +  let fun append_to n = if n = "" then K () else File.append (Path.explode n)
   11.95 +  in append_to (Config.get_global thy logfile) (s ^ "\n") end
   11.96 +  (* FIXME: with multithreading and parallel proofs enabled, we might need to
   11.97 +     encapsulate this inside a critical section *)
   11.98 +
   11.99 +fun log_sep thy = log thy "------------------"
  11.100 +
  11.101 +fun apply_actions thy pos name info (pre, post, time) actions =
  11.102 +  let
  11.103 +    fun apply f = f {pre=pre, post=post, timeout=time, log=log thy, pos=pos, name=name}
  11.104 +    fun run (id, run, _) = (apply (run id); log_sep thy)
  11.105 +  in (log thy info; log_sep thy; List.app run actions) end
  11.106 +
  11.107 +fun in_range _ _ NONE = true
  11.108 +  | in_range l r (SOME i) = (l <= i andalso (r < 0 orelse i <= r))
  11.109 +
  11.110 +fun only_within_range thy pos f x =
  11.111 +  let val l = Config.get_global thy start_line and r = Config.get_global thy end_line
  11.112 +  in if in_range l r (Position.line_of pos) then f x else () end
  11.113 +
  11.114 +in
  11.115 +
  11.116 +fun run_actions tr pre post =
  11.117 +  let
  11.118 +    val thy = Proof.theory_of pre
  11.119 +    val pos = Toplevel.pos_of tr
  11.120 +    val name = Toplevel.name_of tr
  11.121 +    val st = (pre, post, Time.fromSeconds (Config.get_global thy timeout))
  11.122 +
  11.123 +    val str0 = string_of_int o the_default 0
  11.124 +    val loc = str0 (Position.line_of pos) ^ ":" ^ str0 (Position.offset_of pos)
  11.125 +    val info = "\n\nat " ^ loc ^ " (" ^ name ^ "):"
  11.126 +  in
  11.127 +    only_within_range thy pos (apply_actions thy pos name info st) (Actions.get thy)
  11.128 +  end
  11.129 +
  11.130 +fun done_actions st =
  11.131 +  let
  11.132 +    val thy = Toplevel.theory_of st
  11.133 +    val _ = log thy "\n\n";
  11.134 +  in
  11.135 +    thy
  11.136 +    |> Actions.get
  11.137 +    |> List.app (fn (id, _, done) => done id {last=st, log=log thy})
  11.138 +  end
  11.139 +
  11.140 +end
  11.141 +
  11.142 +val whitelist = ["apply", "by", "proof"]
  11.143 +
  11.144 +fun step_hook tr pre post =
  11.145 + (* FIXME: might require wrapping into "interruptible" *)
  11.146 +  if can (Proof.assert_backward o Toplevel.proof_of) pre andalso
  11.147 +     member (op =) whitelist (Toplevel.name_of tr)
  11.148 +  then run_actions tr (Toplevel.proof_of pre) (SOME post)
  11.149 +  else if not (Toplevel.is_toplevel pre) andalso Toplevel.is_toplevel post
  11.150 +  then done_actions pre
  11.151 +  else ()   (* FIXME: add theory_hook here *)
  11.152 +
  11.153 +
  11.154 +
  11.155 +(* Mirabelle utility functions *)
  11.156 +
  11.157 +fun can_apply time tac st =
  11.158 +  let
  11.159 +    val {context = ctxt, facts, goal} = Proof.goal st
  11.160 +    val full_tac = HEADGOAL (Method.insert_tac facts THEN' tac ctxt)
  11.161 +  in
  11.162 +    (case try (TimeLimit.timeLimit time (Seq.pull o full_tac)) goal of
  11.163 +      SOME (SOME _) => true
  11.164 +    | _ => false)
  11.165 +  end
  11.166 +
  11.167 +local
  11.168 +
  11.169 +fun fold_body_thms f =
  11.170 +  let
  11.171 +    fun app n (PBody {thms, ...}) = thms |> fold (fn (i, (name, prop, body)) =>
  11.172 +      fn (x, seen) =>
  11.173 +        if Inttab.defined seen i then (x, seen)
  11.174 +        else
  11.175 +          let
  11.176 +            val body' = Future.join body
  11.177 +            val (x', seen') = app (n + (if name = "" then 0 else 1)) body'
  11.178 +              (x, Inttab.update (i, ()) seen)
  11.179 +        in (x' |> n = 0 ? f (name, prop, body'), seen') end)
  11.180 +  in fn bodies => fn x => #1 (fold (app 0) bodies (x, Inttab.empty)) end
  11.181 +
  11.182 +in
  11.183 +
  11.184 +fun theorems_in_proof_term thm =
  11.185 +  let
  11.186 +    val all_thms = Global_Theory.all_thms_of (Thm.theory_of_thm thm)
  11.187 +    fun collect (s, _, _) = if s <> "" then insert (op =) s else I
  11.188 +    fun member_of xs (x, y) = if member (op =) xs x then SOME y else NONE
  11.189 +    fun resolve_thms names = map_filter (member_of names) all_thms
  11.190 +  in
  11.191 +    resolve_thms (fold_body_thms collect [Thm.proof_body_of thm] [])
  11.192 +  end
  11.193 +
  11.194 +end
  11.195 +
  11.196 +fun theorems_of_sucessful_proof state =
  11.197 +  (case state of
  11.198 +    NONE => []
  11.199 +  | SOME st =>
  11.200 +      if not (Toplevel.is_proof st) then []
  11.201 +      else theorems_in_proof_term (#goal (Proof.goal (Toplevel.proof_of st))))
  11.202 +
  11.203 +fun get_setting settings (key, default) =
  11.204 +  the_default default (AList.lookup (op =) settings key)
  11.205 +
  11.206 +fun get_int_setting settings (key, default) =
  11.207 +  (case Option.map Int.fromString (AList.lookup (op =) settings key) of
  11.208 +    SOME (SOME i) => i
  11.209 +  | SOME NONE => error ("bad option: " ^ key)
  11.210 +  | NONE => default)
  11.211 +
  11.212 +fun cpu_time f x =
  11.213 +  let val ({cpu, ...}, y) = Timing.timing f x
  11.214 +  in (y, Time.toMilliseconds cpu) end
  11.215 +
  11.216 +end

    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_arith.ML	Tue Apr 24 13:59:29 2012 +0100
    12.3 @@ -0,0 +1,21 @@
    12.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_arith.ML
    12.5 +    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
    12.6 +*)
    12.7 +
    12.8 +structure Mirabelle_Arith : MIRABELLE_ACTION =
    12.9 +struct
   12.10 +
   12.11 +fun arith_tag id = "#" ^ string_of_int id ^ " arith: "
   12.12 +
   12.13 +fun init _ = I
   12.14 +fun done _ _ = ()
   12.15 +
   12.16 +fun run id ({pre, timeout, log, ...}: Mirabelle.run_args) =
   12.17 +  if Mirabelle.can_apply timeout Arith_Data.arith_tac pre
   12.18 +  then log (arith_tag id ^ "succeeded")
   12.19 +  else ()
   12.20 +  handle TimeLimit.TimeOut => log (arith_tag id ^ "timeout")
   12.21 +
   12.22 +fun invoke _ = Mirabelle.register (init, Mirabelle.catch arith_tag run, done)
   12.23 +
   12.24 +end

    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_metis.ML	Tue Apr 24 13:59:29 2012 +0100
    13.3 @@ -0,0 +1,35 @@
    13.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_metis.ML
    13.5 +    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
    13.6 +*)
    13.7 +
    13.8 +structure Mirabelle_Metis : MIRABELLE_ACTION =
    13.9 +struct
   13.10 +
   13.11 +fun metis_tag id = "#" ^ string_of_int id ^ " metis: "
   13.12 +
   13.13 +fun init _ = I
   13.14 +fun done _ _ = ()
   13.15 +
   13.16 +fun run id ({pre, post, timeout, log, ...}: Mirabelle.run_args) =
   13.17 +  let
   13.18 +    val thms = Mirabelle.theorems_of_sucessful_proof post
   13.19 +    val names = map Thm.get_name_hint thms
   13.20 +    val add_info = if null names then I else suffix (":\n" ^ commas names)
   13.21 +
   13.22 +    val facts = Facts.props (Proof_Context.facts_of (Proof.context_of pre))
   13.23 +
   13.24 +    fun metis ctxt =
   13.25 +      Metis_Tactic.metis_tac [] ATP_Problem_Generate.liftingN ctxt
   13.26 +                             (thms @ facts)
   13.27 +  in
   13.28 +    (if Mirabelle.can_apply timeout metis pre then "succeeded" else "failed")
   13.29 +    |> prefix (metis_tag id)
   13.30 +    |> add_info
   13.31 +    |> log
   13.32 +  end
   13.33 +  handle TimeLimit.TimeOut => log (metis_tag id ^ "timeout")
   13.34 +       | ERROR msg => log (metis_tag id ^ "error: " ^ msg)
   13.35 +
   13.36 +fun invoke _ = Mirabelle.register (init, Mirabelle.catch metis_tag run, done)
   13.37 +
   13.38 +end

    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML	Tue Apr 24 13:59:29 2012 +0100
    14.3 @@ -0,0 +1,28 @@
    14.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_quickcheck.ML
    14.5 +    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
    14.6 +*)
    14.7 +
    14.8 +structure Mirabelle_Quickcheck : MIRABELLE_ACTION =
    14.9 +struct
   14.10 +
   14.11 +fun qc_tag id = "#" ^ string_of_int id ^ " quickcheck: "
   14.12 +
   14.13 +fun init _ = I
   14.14 +fun done _ _ = ()
   14.15 +
   14.16 +fun run args id ({pre, timeout, log, ...}: Mirabelle.run_args) =
   14.17 +  let
   14.18 +    val has_valid_key = member (op =) ["iterations", "size", "generator"] o fst
   14.19 +    val quickcheck = Quickcheck.quickcheck (map (apsnd single) (filter has_valid_key args)) 1
   14.20 +  in
   14.21 +    (case TimeLimit.timeLimit timeout quickcheck pre of
   14.22 +      NONE => log (qc_tag id ^ "no counterexample")
   14.23 +    | SOME _ => log (qc_tag id ^ "counterexample found"))
   14.24 +  end
   14.25 +  handle TimeLimit.TimeOut => log (qc_tag id ^ "timeout")
   14.26 +       | ERROR msg => log (qc_tag id ^ "error: " ^ msg)
   14.27 +
   14.28 +fun invoke args =
   14.29 +  Mirabelle.register (init, Mirabelle.catch qc_tag (run args), done)
   14.30 +
   14.31 +end

    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_refute.ML	Tue Apr 24 13:59:29 2012 +0100
    15.3 @@ -0,0 +1,39 @@
    15.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_refute.ML
    15.5 +    Author:     Jasmin Blanchette and Sascha Boehme, TU Munich
    15.6 +*)
    15.7 +
    15.8 +structure Mirabelle_Refute : MIRABELLE_ACTION =
    15.9 +struct
   15.10 +
   15.11 +
   15.12 +(* FIXME:
   15.13 +fun refute_action args timeout {pre=st, ...} = 
   15.14 +  let
   15.15 +    val subgoal = 1
   15.16 +    val thy = Proof.theory_of st
   15.17 +    val thm = #goal (Proof.raw_goal st)
   15.18 +
   15.19 +    val refute = Refute.refute_goal thy args thm
   15.20 +    val _ = TimeLimit.timeLimit timeout refute subgoal
   15.21 +  in
   15.22 +    val writ_log = Substring.full (the (Symtab.lookup tab "writeln"))
   15.23 +    val warn_log = Substring.full (the (Symtab.lookup tab "warning"))
   15.24 +
   15.25 +    val r =
   15.26 +      if Substring.isSubstring "model found" writ_log
   15.27 +      then
   15.28 +        if Substring.isSubstring "spurious" warn_log
   15.29 +        then SOME "potential counterexample"
   15.30 +        else SOME "real counterexample (bug?)"
   15.31 +      else
   15.32 +        if Substring.isSubstring "time limit" writ_log
   15.33 +        then SOME "no counterexample (timeout)"
   15.34 +        else if Substring.isSubstring "Search terminated" writ_log
   15.35 +        then SOME "no counterexample (normal termination)"
   15.36 +        else SOME "no counterexample (unknown)"
   15.37 +  in r end
   15.38 +*)
   15.39 +
   15.40 +fun invoke args = I (*Mirabelle.register ("refute", refute_action args)*)
   15.41 +
   15.42 +end

    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML	Tue Apr 24 13:59:29 2012 +0100
    16.3 @@ -0,0 +1,766 @@
    16.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML
    16.5 +    Author:     Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
    16.6 +*)
    16.7 +
    16.8 +structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
    16.9 +struct
   16.10 +
   16.11 +(*To facilitate synching the description of Mirabelle Sledgehammer parameters
   16.12 + (in ../lib/Tools/mirabelle) with the parameters actually used by this
   16.13 + interface, the former extracts PARAMETER and DESCRIPTION from code below which
   16.14 + has this pattern (provided it appears in a single line):
   16.15 +   val .*K = "PARAMETER" (*DESCRIPTION*)
   16.16 +*)
   16.17 +(*NOTE: descriptions mention parameters (particularly NAME) without a defined range.*)
   16.18 +val proverK = "prover" (*=NAME: name of the external prover to call*)
   16.19 +val prover_timeoutK = "prover_timeout" (*=TIME: timeout for invoked ATP (seconds of process time)*)
   16.20 +val keepK = "keep" (*=PATH: path where to keep temporary files created by sledgehammer*)
   16.21 +val minimizeK = "minimize" (*: enable minimization of theorem set found by sledgehammer*)
   16.22 +                           (*refers to minimization attempted by Mirabelle*)
   16.23 +val minimize_timeoutK = "minimize_timeout" (*=TIME: timeout for each minimization step (seconds of*)
   16.24 +
   16.25 +val reconstructorK = "reconstructor" (*=NAME: how to reconstruct proofs (ie. using metis/smt)*)
   16.26 +val metis_ftK = "metis_ft" (*: apply metis with fully-typed encoding to the theorems found by sledgehammer*)
   16.27 +
   16.28 +val max_relevantK = "max_relevant" (*=NUM: max. relevant clauses to use*)
   16.29 +val max_callsK = "max_calls" (*=NUM: max. no. of calls to sledgehammer*)
   16.30 +val preplay_timeoutK = "preplay_timeout" (*=TIME: timeout for finding reconstructed proof*)
   16.31 +val sh_minimizeK = "sh_minimize" (*: instruct sledgehammer to run its minimizer*)
   16.32 +
   16.33 +val check_trivialK = "check_trivial" (*: check if goals are "trivial" (false by default)*)
   16.34 +val type_encK = "type_enc" (*=STRING: type encoding scheme*)
   16.35 +val lam_transK = "lam_trans" (*=STRING: lambda translation scheme*)
   16.36 +val strictK = "strict" (*=BOOL: run in strict mode*)
   16.37 +val sliceK = "slice" (*=BOOL: allow sledgehammer-level strategy-scheduling*)
   16.38 +val uncurried_aliasesK = "uncurried_aliases" (*=SMART_BOOL: use fresh function names to alias curried applications*)
   16.39 +val e_selection_heuristicK = "e_selection_heuristic" (*: FIXME*)
   16.40 +val term_orderK = "term_order" (*: FIXME*)
   16.41 +val force_sosK = "force_sos" (*: use SOS*)
   16.42 +val max_new_mono_instancesK = "max_new_mono_instances" (*=NUM: max. new monomorphic instances*)
   16.43 +val max_mono_itersK = "max_mono_iters" (*=NUM: max. iterations of monomorphiser*)
   16.44 +
   16.45 +fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
   16.46 +fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): "
   16.47 +fun reconstructor_tag reconstructor id =
   16.48 +  "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): "
   16.49 +
   16.50 +val separator = "-----"
   16.51 +
   16.52 +(*FIXME sensible to have Mirabelle-level Sledgehammer defaults?*)
   16.53 +(*defaults used in this Mirabelle action*)
   16.54 +val preplay_timeout_default = "4"
   16.55 +val lam_trans_default = "smart"
   16.56 +val uncurried_aliases_default = "smart"
   16.57 +val type_enc_default = "smart"
   16.58 +val strict_default = "false"
   16.59 +val max_relevant_default = "smart"
   16.60 +val slice_default = "true"
   16.61 +val max_calls_default = "10000000"
   16.62 +val trivial_default = "false"
   16.63 +val minimize_timeout_default = 5
   16.64 +
   16.65 +(*If a key is present in args then augment a list with its pair*)
   16.66 +(*This is used to avoid fixing default values at the Mirabelle level, and
   16.67 +  instead use the default values of the tool (Sledgehammer in this case).*)
   16.68 +fun available_parameter args key label list =
   16.69 +  let
   16.70 +    val value = AList.lookup (op =) args key
   16.71 +  in if is_some value then (label, the value) :: list else list end
   16.72 +
   16.73 +
   16.74 +datatype sh_data = ShData of {
   16.75 +  calls: int,
   16.76 +  success: int,
   16.77 +  nontriv_calls: int,
   16.78 +  nontriv_success: int,
   16.79 +  lemmas: int,
   16.80 +  max_lems: int,
   16.81 +  time_isa: int,
   16.82 +  time_prover: int,
   16.83 +  time_prover_fail: int}
   16.84 +
   16.85 +datatype re_data = ReData of {
   16.86 +  calls: int,
   16.87 +  success: int,
   16.88 +  nontriv_calls: int,
   16.89 +  nontriv_success: int,
   16.90 +  proofs: int,
   16.91 +  time: int,
   16.92 +  timeout: int,
   16.93 +  lemmas: int * int * int,
   16.94 +  posns: (Position.T * bool) list
   16.95 +  }
   16.96 +
   16.97 +datatype min_data = MinData of {
   16.98 +  succs: int,
   16.99 +  ab_ratios: int
  16.100 +  }
  16.101 +
  16.102 +fun make_sh_data
  16.103 +      (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
  16.104 +       time_prover,time_prover_fail) =
  16.105 +  ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
  16.106 +         nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
  16.107 +         time_isa=time_isa, time_prover=time_prover,
  16.108 +         time_prover_fail=time_prover_fail}
  16.109 +
  16.110 +fun make_min_data (succs, ab_ratios) =
  16.111 +  MinData{succs=succs, ab_ratios=ab_ratios}
  16.112 +
  16.113 +fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
  16.114 +                  timeout,lemmas,posns) =
  16.115 +  ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
  16.116 +         nontriv_success=nontriv_success, proofs=proofs, time=time,
  16.117 +         timeout=timeout, lemmas=lemmas, posns=posns}
  16.118 +
  16.119 +val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
  16.120 +val empty_min_data = make_min_data (0, 0)
  16.121 +val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
  16.122 +
  16.123 +fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
  16.124 +                              lemmas, max_lems, time_isa,
  16.125 +  time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
  16.126 +  nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
  16.127 +
  16.128 +fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios)
  16.129 +
  16.130 +fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
  16.131 +  proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
  16.132 +  nontriv_success, proofs, time, timeout, lemmas, posns)
  16.133 +
  16.134 +
  16.135 +datatype reconstructor_mode =
  16.136 +  Unminimized | Minimized | UnminimizedFT | MinimizedFT
  16.137 +
  16.138 +datatype data = Data of {
  16.139 +  sh: sh_data,
  16.140 +  min: min_data,
  16.141 +  re_u: re_data, (* reconstructor with unminimized set of lemmas *)
  16.142 +  re_m: re_data, (* reconstructor with minimized set of lemmas *)
  16.143 +  re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *)
  16.144 +  re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *)
  16.145 +  mini: bool   (* with minimization *)
  16.146 +  }
  16.147 +
  16.148 +fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) =
  16.149 +  Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft,
  16.150 +    mini=mini}
  16.151 +
  16.152 +val empty_data = make_data (empty_sh_data, empty_min_data,
  16.153 +  empty_re_data, empty_re_data, empty_re_data, empty_re_data, false)
  16.154 +
  16.155 +fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
  16.156 +  let val sh' = make_sh_data (f (tuple_of_sh_data sh))
  16.157 +  in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end
  16.158 +
  16.159 +fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
  16.160 +  let val min' = make_min_data (f (tuple_of_min_data min))
  16.161 +  in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end
  16.162 +
  16.163 +fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
  16.164 +  let
  16.165 +    fun map_me g Unminimized   (u, m, uft, mft) = (g u, m, uft, mft)
  16.166 +      | map_me g Minimized     (u, m, uft, mft) = (u, g m, uft, mft)
  16.167 +      | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft)
  16.168 +      | map_me g MinimizedFT   (u, m, uft, mft) = (u, m, uft, g mft)
  16.169 +
  16.170 +    val f' = make_re_data o f o tuple_of_re_data
  16.171 +
  16.172 +    val (re_u', re_m', re_uft', re_mft') =
  16.173 +      map_me f' m (re_u, re_m, re_uft, re_mft)
  16.174 +  in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end
  16.175 +
  16.176 +fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) =
  16.177 +  make_data (sh, min, re_u, re_m, re_uft, re_mft, mini)
  16.178 +
  16.179 +fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
  16.180 +
  16.181 +val inc_sh_calls =  map_sh_data
  16.182 +  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
  16.183 +    => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
  16.184 +
  16.185 +val inc_sh_success = map_sh_data
  16.186 +  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
  16.187 +    => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
  16.188 +
  16.189 +val inc_sh_nontriv_calls =  map_sh_data
  16.190 +  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
  16.191 +    => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
  16.192 +
  16.193 +val inc_sh_nontriv_success = map_sh_data
  16.194 +  (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
  16.195 +    => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
  16.196 +
  16.197 +fun inc_sh_lemmas n = map_sh_data
  16.198 +  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
  16.199 +    => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
  16.200 +
  16.201 +fun inc_sh_max_lems n = map_sh_data
  16.202 +  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
  16.203 +    => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
  16.204 +
  16.205 +fun inc_sh_time_isa t = map_sh_data
  16.206 +  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
  16.207 +    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
  16.208 +
  16.209 +fun inc_sh_time_prover t = map_sh_data
  16.210 +  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
  16.211 +    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
  16.212 +
  16.213 +fun inc_sh_time_prover_fail t = map_sh_data
  16.214 +  (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
  16.215 +    => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
  16.216 +
  16.217 +val inc_min_succs = map_min_data
  16.218 +  (fn (succs,ab_ratios) => (succs+1, ab_ratios))
  16.219 +
  16.220 +fun inc_min_ab_ratios r = map_min_data
  16.221 +  (fn (succs, ab_ratios) => (succs, ab_ratios+r))
  16.222 +
  16.223 +val inc_reconstructor_calls = map_re_data
  16.224 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.225 +    => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
  16.226 +
  16.227 +val inc_reconstructor_success = map_re_data
  16.228 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.229 +    => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
  16.230 +
  16.231 +val inc_reconstructor_nontriv_calls = map_re_data
  16.232 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.233 +    => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
  16.234 +
  16.235 +val inc_reconstructor_nontriv_success = map_re_data
  16.236 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.237 +    => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
  16.238 +
  16.239 +val inc_reconstructor_proofs = map_re_data
  16.240 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.241 +    => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
  16.242 +
  16.243 +fun inc_reconstructor_time m t = map_re_data
  16.244 + (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.245 +  => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m
  16.246 +
  16.247 +val inc_reconstructor_timeout = map_re_data
  16.248 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.249 +    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
  16.250 +
  16.251 +fun inc_reconstructor_lemmas m n = map_re_data
  16.252 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.253 +    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m
  16.254 +
  16.255 +fun inc_reconstructor_posns m pos = map_re_data
  16.256 +  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
  16.257 +    => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m
  16.258 +
  16.259 +val str0 = string_of_int o the_default 0
  16.260 +
  16.261 +local
  16.262 +
  16.263 +val str = string_of_int
  16.264 +val str3 = Real.fmt (StringCvt.FIX (SOME 3))
  16.265 +fun percentage a b = string_of_int (a * 100 div b)
  16.266 +fun time t = Real.fromInt t / 1000.0
  16.267 +fun avg_time t n =
  16.268 +  if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
  16.269 +
  16.270 +fun log_sh_data log
  16.271 +    (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
  16.272 + (log ("Total number of sledgehammer calls: " ^ str calls);
  16.273 +  log ("Number of successful sledgehammer calls: " ^ str success);
  16.274 +  log ("Number of sledgehammer lemmas: " ^ str lemmas);
  16.275 +  log ("Max number of sledgehammer lemmas: " ^ str max_lems);
  16.276 +  log ("Success rate: " ^ percentage success calls ^ "%");
  16.277 +  log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
  16.278 +  log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
  16.279 +  log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
  16.280 +  log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
  16.281 +  log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
  16.282 +  log ("Average time for sledgehammer calls (Isabelle): " ^
  16.283 +    str3 (avg_time time_isa calls));
  16.284 +  log ("Average time for successful sledgehammer calls (ATP): " ^
  16.285 +    str3 (avg_time time_prover success));
  16.286 +  log ("Average time for failed sledgehammer calls (ATP): " ^
  16.287 +    str3 (avg_time time_prover_fail (calls - success)))
  16.288 +  )
  16.289 +
  16.290 +fun str_of_pos (pos, triv) =
  16.291 +  str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^
  16.292 +  (if triv then "[T]" else "")
  16.293 +
  16.294 +fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
  16.295 +     re_nontriv_success, re_proofs, re_time, re_timeout,
  16.296 +    (lemmas, lems_sos, lems_max), re_posns) =
  16.297 + (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls);
  16.298 +  log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^
  16.299 +    " (proof: " ^ str re_proofs ^ ")");
  16.300 +  log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout);
  16.301 +  log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
  16.302 +  log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls);
  16.303 +  log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^
  16.304 +    " (proof: " ^ str re_proofs ^ ")");
  16.305 +  log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas);
  16.306 +  log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos);
  16.307 +  log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max);
  16.308 +  log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time));
  16.309 +  log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^
  16.310 +    str3 (avg_time re_time re_success));
  16.311 +  if tag=""
  16.312 +  then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
  16.313 +  else ()
  16.314 + )
  16.315 +
  16.316 +fun log_min_data log (succs, ab_ratios) =
  16.317 +  (log ("Number of successful minimizations: " ^ string_of_int succs);
  16.318 +   log ("After/before ratios: " ^ string_of_int ab_ratios)
  16.319 +  )
  16.320 +
  16.321 +in
  16.322 +
  16.323 +fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
  16.324 +  let
  16.325 +    val ShData {calls=sh_calls, ...} = sh
  16.326 +
  16.327 +    fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
  16.328 +    fun log_re tag m =
  16.329 +      log_re_data log tag sh_calls (tuple_of_re_data m)
  16.330 +    fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () =>
  16.331 +      (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2)))
  16.332 +  in
  16.333 +    if sh_calls > 0
  16.334 +    then
  16.335 +     (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
  16.336 +      log_sh_data log (tuple_of_sh_data sh);
  16.337 +      log "";
  16.338 +      if not mini
  16.339 +      then log_reconstructor ("", re_u) ("fully-typed ", re_uft)
  16.340 +      else
  16.341 +        app_if re_u (fn () =>
  16.342 +         (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft);
  16.343 +          log "";
  16.344 +          app_if re_m (fn () =>
  16.345 +            (log_min_data log (tuple_of_min_data min); log "";
  16.346 +             log_reconstructor ("", re_m) ("fully-typed ", re_mft))))))
  16.347 +    else ()
  16.348 +  end
  16.349 +
  16.350 +end
  16.351 +
  16.352 +
  16.353 +(* Warning: we implicitly assume single-threaded execution here! *)
  16.354 +val data = Unsynchronized.ref ([] : (int * data) list)
  16.355 +
  16.356 +fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
  16.357 +fun done id ({log, ...}: Mirabelle.done_args) =
  16.358 +  AList.lookup (op =) (!data) id
  16.359 +  |> Option.map (log_data id log)
  16.360 +  |> K ()
  16.361 +
  16.362 +fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
  16.363 +
  16.364 +
  16.365 +fun get_prover ctxt args =
  16.366 +  let
  16.367 +    fun default_prover_name () =
  16.368 +      hd (#provers (Sledgehammer_Isar.default_params ctxt []))
  16.369 +      handle List.Empty => error "No ATP available."
  16.370 +    fun get_prover name =
  16.371 +      (name, Sledgehammer_Run.get_minimizing_prover ctxt
  16.372 +                Sledgehammer_Provers.Normal name)
  16.373 +  in
  16.374 +    (case AList.lookup (op =) args proverK of
  16.375 +      SOME name => get_prover name
  16.376 +    | NONE => get_prover (default_prover_name ()))
  16.377 +  end
  16.378 +
  16.379 +type stature = ATP_Problem_Generate.stature
  16.380 +
  16.381 +(* hack *)
  16.382 +fun reconstructor_from_msg args msg =
  16.383 +  (case AList.lookup (op =) args reconstructorK of
  16.384 +    SOME name => name
  16.385 +  | NONE =>
  16.386 +    if String.isSubstring "metis (" msg then
  16.387 +      msg |> Substring.full
  16.388 +          |> Substring.position "metis ("
  16.389 +          |> snd |> Substring.position ")"
  16.390 +          |> fst |> Substring.string
  16.391 +          |> suffix ")"
  16.392 +    else if String.isSubstring "metis" msg then
  16.393 +      "metis"
  16.394 +    else
  16.395 +      "smt")
  16.396 +
  16.397 +local
  16.398 +
  16.399 +datatype sh_result =
  16.400 +  SH_OK of int * int * (string * stature) list |
  16.401 +  SH_FAIL of int * int |
  16.402 +  SH_ERROR
  16.403 +
  16.404 +fun run_sh prover_name prover type_enc strict max_relevant slice lam_trans
  16.405 +        uncurried_aliases e_selection_heuristic term_order force_sos
  16.406 +        hard_timeout timeout preplay_timeout sh_minimizeLST
  16.407 +        max_new_mono_instancesLST max_mono_itersLST dir pos st =
  16.408 +  let
  16.409 +    val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
  16.410 +    val i = 1
  16.411 +    fun set_file_name (SOME dir) =
  16.412 +        Config.put Sledgehammer_Provers.dest_dir dir
  16.413 +        #> Config.put Sledgehammer_Provers.problem_prefix
  16.414 +          ("prob_" ^ str0 (Position.line_of pos) ^ "__")
  16.415 +        #> Config.put SMT_Config.debug_files
  16.416 +          (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_"
  16.417 +          ^ serial_string ())
  16.418 +      | set_file_name NONE = I
  16.419 +    val st' =
  16.420 +      st
  16.421 +      |> Proof.map_context
  16.422 +           (set_file_name dir
  16.423 +            #> (Option.map (Config.put ATP_Systems.e_selection_heuristic)
  16.424 +                  e_selection_heuristic |> the_default I)
  16.425 +            #> (Option.map (Config.put ATP_Systems.term_order)
  16.426 +                  term_order |> the_default I)
  16.427 +            #> (Option.map (Config.put ATP_Systems.force_sos)
  16.428 +                  force_sos |> the_default I))
  16.429 +    val params as {relevance_thresholds, max_relevant, slice, ...} =
  16.430 +      Sledgehammer_Isar.default_params ctxt
  16.431 +         ([("verbose", "true"),
  16.432 +           ("type_enc", type_enc),
  16.433 +           ("strict", strict),
  16.434 +           ("lam_trans", lam_trans |> the_default lam_trans_default),
  16.435 +           ("uncurried_aliases", uncurried_aliases |> the_default uncurried_aliases_default),
  16.436 +           ("max_relevant", max_relevant),
  16.437 +           ("slice", slice),
  16.438 +           ("timeout", string_of_int timeout),
  16.439 +           ("preplay_timeout", preplay_timeout)]
  16.440 +          |> sh_minimizeLST (*don't confuse the two minimization flags*)
  16.441 +          |> max_new_mono_instancesLST
  16.442 +          |> max_mono_itersLST)
  16.443 +    val default_max_relevant =
  16.444 +      Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
  16.445 +        prover_name
  16.446 +    val is_appropriate_prop =
  16.447 +      Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name
  16.448 +    val is_built_in_const =
  16.449 +      Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name
  16.450 +    val relevance_fudge =
  16.451 +      Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name
  16.452 +    val relevance_override = {add = [], del = [], only = false}
  16.453 +    val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i
  16.454 +    val time_limit =
  16.455 +      (case hard_timeout of
  16.456 +        NONE => I
  16.457 +      | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs))
  16.458 +    fun failed failure =
  16.459 +      ({outcome = SOME failure, used_facts = [], run_time = Time.zeroTime,
  16.460 +        preplay =
  16.461 +          K (ATP_Proof_Reconstruct.Failed_to_Play Sledgehammer_Provers.plain_metis),
  16.462 +        message = K "", message_tail = ""}, ~1)
  16.463 +    val ({outcome, used_facts, run_time, preplay, message, message_tail}
  16.464 +         : Sledgehammer_Provers.prover_result,
  16.465 +        time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
  16.466 +      let
  16.467 +        val _ = if is_appropriate_prop concl_t then ()
  16.468 +                else raise Fail "inappropriate"
  16.469 +        val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover_name
  16.470 +        val facts =
  16.471 +          Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
  16.472 +            chained_ths hyp_ts concl_t
  16.473 +          |> filter (is_appropriate_prop o prop_of o snd)
  16.474 +          |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
  16.475 +                 (the_default default_max_relevant max_relevant)
  16.476 +                 is_built_in_const relevance_fudge relevance_override
  16.477 +                 chained_ths hyp_ts concl_t
  16.478 +        val problem =
  16.479 +          {state = st', goal = goal, subgoal = i,
  16.480 +           subgoal_count = Sledgehammer_Util.subgoal_count st,
  16.481 +           facts = facts |> map Sledgehammer_Provers.Untranslated_Fact}
  16.482 +      in prover params (K (K (K ""))) problem end)) ()
  16.483 +      handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut
  16.484 +           | Fail "inappropriate" => failed ATP_Proof.Inappropriate
  16.485 +    val time_prover = run_time |> Time.toMilliseconds
  16.486 +    val msg = message (preplay ()) ^ message_tail
  16.487 +  in
  16.488 +    case outcome of
  16.489 +      NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
  16.490 +    | SOME _ => (msg, SH_FAIL (time_isa, time_prover))
  16.491 +  end
  16.492 +  handle ERROR msg => ("error: " ^ msg, SH_ERROR)
  16.493 +
  16.494 +fun thms_of_name ctxt name =
  16.495 +  let
  16.496 +    val lex = Keyword.get_lexicons
  16.497 +    val get = maps (Proof_Context.get_fact ctxt o fst)
  16.498 +  in
  16.499 +    Source.of_string name
  16.500 +    |> Symbol.source
  16.501 +    |> Token.source {do_recover=SOME false} lex Position.start
  16.502 +    |> Token.source_proper
  16.503 +    |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE
  16.504 +    |> Source.exhaust
  16.505 +  end
  16.506 +
  16.507 +in
  16.508 +
  16.509 +fun run_sledgehammer trivial args reconstructor named_thms id
  16.510 +      ({pre=st, log, pos, ...}: Mirabelle.run_args) =
  16.511 +  let
  16.512 +    val triv_str = if trivial then "[T] " else ""
  16.513 +    val _ = change_data id inc_sh_calls
  16.514 +    val _ = if trivial then () else change_data id inc_sh_nontriv_calls
  16.515 +    val (prover_name, prover) = get_prover (Proof.context_of st) args
  16.516 +    val type_enc = AList.lookup (op =) args type_encK |> the_default type_enc_default
  16.517 +    val strict = AList.lookup (op =) args strictK |> the_default strict_default
  16.518 +    val max_relevant = AList.lookup (op =) args max_relevantK |> the_default max_relevant_default
  16.519 +    val slice = AList.lookup (op =) args sliceK |> the_default slice_default
  16.520 +    val lam_trans = AList.lookup (op =) args lam_transK
  16.521 +    val uncurried_aliases = AList.lookup (op =) args uncurried_aliasesK
  16.522 +    val e_selection_heuristic = AList.lookup (op =) args e_selection_heuristicK
  16.523 +    val term_order = AList.lookup (op =) args term_orderK
  16.524 +    val force_sos = AList.lookup (op =) args force_sosK
  16.525 +      |> Option.map (curry (op <>) "false")
  16.526 +    val dir = AList.lookup (op =) args keepK
  16.527 +    val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
  16.528 +    (* always use a hard timeout, but give some slack so that the automatic
  16.529 +       minimizer has a chance to do its magic *)
  16.530 +    val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
  16.531 +      |> the_default preplay_timeout_default
  16.532 +    val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
  16.533 +    val max_new_mono_instancesLST =
  16.534 +      available_parameter args max_new_mono_instancesK max_new_mono_instancesK
  16.535 +    val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
  16.536 +    val hard_timeout = SOME (2 * timeout)
  16.537 +    val (msg, result) =
  16.538 +      run_sh prover_name prover type_enc strict max_relevant slice lam_trans
  16.539 +        uncurried_aliases e_selection_heuristic term_order force_sos
  16.540 +        hard_timeout timeout preplay_timeout sh_minimizeLST
  16.541 +        max_new_mono_instancesLST max_mono_itersLST dir pos st
  16.542 +  in
  16.543 +    case result of
  16.544 +      SH_OK (time_isa, time_prover, names) =>
  16.545 +        let
  16.546 +          fun get_thms (name, stature) =
  16.547 +            try (thms_of_name (Proof.context_of st)) name
  16.548 +            |> Option.map (pair (name, stature))
  16.549 +        in
  16.550 +          change_data id inc_sh_success;
  16.551 +          if trivial then () else change_data id inc_sh_nontriv_success;
  16.552 +          change_data id (inc_sh_lemmas (length names));
  16.553 +          change_data id (inc_sh_max_lems (length names));
  16.554 +          change_data id (inc_sh_time_isa time_isa);
  16.555 +          change_data id (inc_sh_time_prover time_prover);
  16.556 +          reconstructor := reconstructor_from_msg args msg;
  16.557 +          named_thms := SOME (map_filter get_thms names);
  16.558 +          log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
  16.559 +            string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
  16.560 +        end
  16.561 +    | SH_FAIL (time_isa, time_prover) =>
  16.562 +        let
  16.563 +          val _ = change_data id (inc_sh_time_isa time_isa)
  16.564 +          val _ = change_data id (inc_sh_time_prover_fail time_prover)
  16.565 +        in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
  16.566 +    | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg)
  16.567 +  end
  16.568 +
  16.569 +end
  16.570 +
  16.571 +fun run_minimize args reconstructor named_thms id
  16.572 +        ({pre=st, log, ...}: Mirabelle.run_args) =
  16.573 +  let
  16.574 +    val ctxt = Proof.context_of st
  16.575 +    val n0 = length (these (!named_thms))
  16.576 +    val (prover_name, _) = get_prover ctxt args
  16.577 +    val type_enc = AList.lookup (op =) args type_encK |> the_default type_enc_default
  16.578 +    val strict = AList.lookup (op =) args strictK |> the_default strict_default
  16.579 +    val timeout =
  16.580 +      AList.lookup (op =) args minimize_timeoutK
  16.581 +      |> Option.map (fst o read_int o raw_explode)  (* FIXME Symbol.explode (?) *)
  16.582 +      |> the_default minimize_timeout_default
  16.583 +    val preplay_timeout = AList.lookup (op =) args preplay_timeoutK
  16.584 +      |> the_default preplay_timeout_default
  16.585 +    val sh_minimizeLST = available_parameter args sh_minimizeK "minimize"
  16.586 +    val max_new_mono_instancesLST =
  16.587 +      available_parameter args max_new_mono_instancesK max_new_mono_instancesK
  16.588 +    val max_mono_itersLST = available_parameter args max_mono_itersK max_mono_itersK
  16.589 +    val params = Sledgehammer_Isar.default_params ctxt
  16.590 +     ([("provers", prover_name),
  16.591 +       ("verbose", "true"),
  16.592 +       ("type_enc", type_enc),
  16.593 +       ("strict", strict),
  16.594 +       ("timeout", string_of_int timeout),
  16.595 +       ("preplay_timeout", preplay_timeout)]
  16.596 +      |> sh_minimizeLST (*don't confuse the two minimization flags*)
  16.597 +      |> max_new_mono_instancesLST
  16.598 +      |> max_mono_itersLST)
  16.599 +    val minimize =
  16.600 +      Sledgehammer_Minimize.minimize_facts prover_name params
  16.601 +          true 1 (Sledgehammer_Util.subgoal_count st)
  16.602 +    val _ = log separator
  16.603 +    val (used_facts, (preplay, message, message_tail)) =
  16.604 +      minimize st (these (!named_thms))
  16.605 +    val msg = message (preplay ()) ^ message_tail
  16.606 +  in
  16.607 +    case used_facts of
  16.608 +      SOME named_thms' =>
  16.609 +        (change_data id inc_min_succs;
  16.610 +         change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0));
  16.611 +         if length named_thms' = n0
  16.612 +         then log (minimize_tag id ^ "already minimal")
  16.613 +         else (reconstructor := reconstructor_from_msg args msg;
  16.614 +               named_thms := SOME named_thms';
  16.615 +               log (minimize_tag id ^ "succeeded:\n" ^ msg))
  16.616 +        )
  16.617 +    | NONE => log (minimize_tag id ^ "failed: " ^ msg)
  16.618 +  end
  16.619 +
  16.620 +fun override_params prover type_enc timeout =
  16.621 +  [("provers", prover),
  16.622 +   ("max_relevant", "0"),
  16.623 +   ("type_enc", type_enc),
  16.624 +   ("strict", "true"),
  16.625 +   ("slice", "false"),
  16.626 +   ("timeout", timeout |> Time.toSeconds |> string_of_int)]
  16.627 +
  16.628 +fun run_reconstructor trivial full m name reconstructor named_thms id
  16.629 +    ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
  16.630 +  let
  16.631 +    fun do_reconstructor named_thms ctxt =
  16.632 +      let
  16.633 +        val ref_of_str =
  16.634 +          suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm
  16.635 +          #> fst
  16.636 +        val thms = named_thms |> maps snd
  16.637 +        val facts = named_thms |> map (ref_of_str o fst o fst)
  16.638 +        val relevance_override = {add = facts, del = [], only = true}
  16.639 +        fun my_timeout time_slice =
  16.640 +          timeout |> Time.toReal |> curry Real.* time_slice |> Time.fromReal
  16.641 +        fun sledge_tac time_slice prover type_enc =
  16.642 +          Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
  16.643 +            (override_params prover type_enc (my_timeout time_slice))
  16.644 +            relevance_override
  16.645 +      in
  16.646 +        if !reconstructor = "sledgehammer_tac" then
  16.647 +          sledge_tac 0.2 ATP_Systems.z3_tptpN "mono_native"
  16.648 +          ORELSE' sledge_tac 0.2 ATP_Systems.eN "mono_guards??"
  16.649 +          ORELSE' sledge_tac 0.2 ATP_Systems.vampireN "mono_guards??"
  16.650 +          ORELSE' sledge_tac 0.2 ATP_Systems.spassN "poly_tags"
  16.651 +          ORELSE' Metis_Tactic.metis_tac [] ATP_Problem_Generate.combsN
  16.652 +            ctxt thms
  16.653 +        else if !reconstructor = "smt" then
  16.654 +          SMT_Solver.smt_tac ctxt thms
  16.655 +        else if full then
  16.656 +          Metis_Tactic.metis_tac [ATP_Proof_Reconstruct.full_typesN]
  16.657 +            ATP_Proof_Reconstruct.metis_default_lam_trans ctxt thms
  16.658 +        else if String.isPrefix "metis (" (!reconstructor) then
  16.659 +          let
  16.660 +            val (type_encs, lam_trans) =
  16.661 +              !reconstructor
  16.662 +              |> Outer_Syntax.scan Position.start
  16.663 +              |> filter Token.is_proper |> tl
  16.664 +              |> Metis_Tactic.parse_metis_options |> fst
  16.665 +              |>> the_default [ATP_Proof_Reconstruct.partial_typesN]
  16.666 +              ||> the_default ATP_Proof_Reconstruct.metis_default_lam_trans
  16.667 +          in Metis_Tactic.metis_tac type_encs lam_trans ctxt thms end
  16.668 +        else if !reconstructor = "metis" then
  16.669 +          Metis_Tactic.metis_tac [] ATP_Proof_Reconstruct.metis_default_lam_trans ctxt
  16.670 +            thms
  16.671 +        else
  16.672 +          K all_tac
  16.673 +      end
  16.674 +    fun apply_reconstructor named_thms =
  16.675 +      Mirabelle.can_apply timeout (do_reconstructor named_thms) st
  16.676 +
  16.677 +    fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
  16.678 +      | with_time (true, t) = (change_data id (inc_reconstructor_success m);
  16.679 +          if trivial then ()
  16.680 +          else change_data id (inc_reconstructor_nontriv_success m);
  16.681 +          change_data id (inc_reconstructor_lemmas m (length named_thms));
  16.682 +          change_data id (inc_reconstructor_time m t);
  16.683 +          change_data id (inc_reconstructor_posns m (pos, trivial));
  16.684 +          if name = "proof" then change_data id (inc_reconstructor_proofs m)
  16.685 +          else ();
  16.686 +          "succeeded (" ^ string_of_int t ^ ")")
  16.687 +    fun timed_reconstructor named_thms =
  16.688 +      (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true)
  16.689 +      handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m);
  16.690 +               ("timeout", false))
  16.691 +           | ERROR msg => ("error: " ^ msg, false)
  16.692 +
  16.693 +    val _ = log separator
  16.694 +    val _ = change_data id (inc_reconstructor_calls m)
  16.695 +    val _ = if trivial then ()
  16.696 +            else change_data id (inc_reconstructor_nontriv_calls m)
  16.697 +  in
  16.698 +    named_thms
  16.699 +    |> timed_reconstructor
  16.700 +    |>> log o prefix (reconstructor_tag reconstructor id)
  16.701 +    |> snd
  16.702 +  end
  16.703 +
  16.704 +val try_timeout = seconds 5.0
  16.705 +
  16.706 +(* crude hack *)
  16.707 +val num_sledgehammer_calls = Unsynchronized.ref 0
  16.708 +
  16.709 +fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
  16.710 +  let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
  16.711 +    if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
  16.712 +    then () else
  16.713 +    let
  16.714 +      val max_calls =
  16.715 +        AList.lookup (op =) args max_callsK |> the_default max_calls_default
  16.716 +        |> Int.fromString |> the
  16.717 +      val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
  16.718 +    in
  16.719 +      if !num_sledgehammer_calls > max_calls then ()
  16.720 +      else
  16.721 +        let
  16.722 +          val reconstructor = Unsynchronized.ref ""
  16.723 +          val named_thms =
  16.724 +            Unsynchronized.ref (NONE : ((string * stature) * thm list) list option)
  16.725 +          val minimize = AList.defined (op =) args minimizeK
  16.726 +          val metis_ft = AList.defined (op =) args metis_ftK
  16.727 +          val trivial =
  16.728 +            if AList.lookup (op =) args check_trivialK |> the_default trivial_default
  16.729 +                            |> Bool.fromString |> the then
  16.730 +              Try0.try0 (SOME try_timeout) ([], [], [], []) pre
  16.731 +              handle TimeLimit.TimeOut => false
  16.732 +            else false
  16.733 +          fun apply_reconstructor m1 m2 =
  16.734 +            if metis_ft
  16.735 +            then
  16.736 +              if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false
  16.737 +                  (run_reconstructor trivial false m1 name reconstructor
  16.738 +                       (these (!named_thms))) id st)
  16.739 +              then
  16.740 +                (Mirabelle.catch_result (reconstructor_tag reconstructor) false
  16.741 +                  (run_reconstructor trivial true m2 name reconstructor
  16.742 +                       (these (!named_thms))) id st; ())
  16.743 +              else ()
  16.744 +            else
  16.745 +              (Mirabelle.catch_result (reconstructor_tag reconstructor) false
  16.746 +                (run_reconstructor trivial false m1 name reconstructor
  16.747 +                     (these (!named_thms))) id st; ())
  16.748 +        in
  16.749 +          change_data id (set_mini minimize);
  16.750 +          Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor
  16.751 +                                                   named_thms) id st;
  16.752 +          if is_some (!named_thms)
  16.753 +          then
  16.754 +           (apply_reconstructor Unminimized UnminimizedFT;
  16.755 +            if minimize andalso not (null (these (!named_thms)))
  16.756 +            then
  16.757 +             (Mirabelle.catch minimize_tag
  16.758 +                  (run_minimize args reconstructor named_thms) id st;
  16.759 +              apply_reconstructor Minimized MinimizedFT)
  16.760 +            else ())
  16.761 +          else ()
  16.762 +        end
  16.763 +    end
  16.764 +  end
  16.765 +
  16.766 +fun invoke args =
  16.767 +  Mirabelle.register (init, sledgehammer_action args, done)
  16.768 +
  16.769 +end

    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML	Tue Apr 24 13:59:29 2012 +0100
    17.3 @@ -0,0 +1,198 @@
    17.4 +(*  Title:      HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML
    17.5 +    Author:     Jasmin Blanchette, TU Munich
    17.6 +*)
    17.7 +
    17.8 +structure Mirabelle_Sledgehammer_Filter : MIRABELLE_ACTION =
    17.9 +struct
   17.10 +
   17.11 +fun get args name default_value =
   17.12 +  case AList.lookup (op =) args name of
   17.13 +    SOME value => the (Real.fromString value)
   17.14 +  | NONE => default_value
   17.15 +
   17.16 +fun extract_relevance_fudge args
   17.17 +      {local_const_multiplier, worse_irrel_freq, higher_order_irrel_weight,
   17.18 +       abs_rel_weight, abs_irrel_weight, skolem_irrel_weight,
   17.19 +       theory_const_rel_weight, theory_const_irrel_weight,
   17.20 +       chained_const_irrel_weight, intro_bonus, elim_bonus, simp_bonus,
   17.21 +       local_bonus, assum_bonus, chained_bonus, max_imperfect, max_imperfect_exp,
   17.22 +       threshold_divisor, ridiculous_threshold} =
   17.23 +  {local_const_multiplier =
   17.24 +       get args "local_const_multiplier" local_const_multiplier,
   17.25 +   worse_irrel_freq = get args "worse_irrel_freq" worse_irrel_freq,
   17.26 +   higher_order_irrel_weight =
   17.27 +       get args "higher_order_irrel_weight" higher_order_irrel_weight,
   17.28 +   abs_rel_weight = get args "abs_rel_weight" abs_rel_weight,
   17.29 +   abs_irrel_weight = get args "abs_irrel_weight" abs_irrel_weight,
   17.30 +   skolem_irrel_weight = get args "skolem_irrel_weight" skolem_irrel_weight,
   17.31 +   theory_const_rel_weight =
   17.32 +       get args "theory_const_rel_weight" theory_const_rel_weight,
   17.33 +   theory_const_irrel_weight =
   17.34 +       get args "theory_const_irrel_weight" theory_const_irrel_weight,
   17.35 +   chained_const_irrel_weight =
   17.36 +       get args "chained_const_irrel_weight" chained_const_irrel_weight,
   17.37 +   intro_bonus = get args "intro_bonus" intro_bonus,
   17.38 +   elim_bonus = get args "elim_bonus" elim_bonus,
   17.39 +   simp_bonus = get args "simp_bonus" simp_bonus,
   17.40 +   local_bonus = get args "local_bonus" local_bonus,
   17.41 +   assum_bonus = get args "assum_bonus" assum_bonus,
   17.42 +   chained_bonus = get args "chained_bonus" chained_bonus,
   17.43 +   max_imperfect = get args "max_imperfect" max_imperfect,
   17.44 +   max_imperfect_exp = get args "max_imperfect_exp" max_imperfect_exp,
   17.45 +   threshold_divisor = get args "threshold_divisor" threshold_divisor,
   17.46 +   ridiculous_threshold = get args "ridiculous_threshold" ridiculous_threshold}
   17.47 +
   17.48 +structure Prooftab =
   17.49 +  Table(type key = int * int val ord = prod_ord int_ord int_ord)
   17.50 +
   17.51 +val proof_table = Unsynchronized.ref (Prooftab.empty: string list list Prooftab.table)
   17.52 +
   17.53 +val num_successes = Unsynchronized.ref ([] : (int * int) list)
   17.54 +val num_failures = Unsynchronized.ref ([] : (int * int) list)
   17.55 +val num_found_proofs = Unsynchronized.ref ([] : (int * int) list)
   17.56 +val num_lost_proofs = Unsynchronized.ref ([] : (int * int) list)
   17.57 +val num_found_facts = Unsynchronized.ref ([] : (int * int) list)
   17.58 +val num_lost_facts = Unsynchronized.ref ([] : (int * int) list)
   17.59 +
   17.60 +fun get id c = the_default 0 (AList.lookup (op =) (!c) id)
   17.61 +fun add id c n =
   17.62 +  c := (case AList.lookup (op =) (!c) id of
   17.63 +          SOME m => AList.update (op =) (id, m + n) (!c)
   17.64 +        | NONE => (id, n) :: !c)
   17.65 +
   17.66 +fun init proof_file _ thy =
   17.67 +  let
   17.68 +    fun do_line line =
   17.69 +      case line |> space_explode ":" of
   17.70 +        [line_num, offset, proof] =>
   17.71 +        SOME (pairself (the o Int.fromString) (line_num, offset),
   17.72 +              proof |> space_explode " " |> filter_out (curry (op =) ""))
   17.73 +       | _ => NONE
   17.74 +    val proofs = File.read (Path.explode proof_file)
   17.75 +    val proof_tab =
   17.76 +      proofs |> space_explode "\n"
   17.77 +             |> map_filter do_line
   17.78 +             |> AList.coalesce (op =)
   17.79 +             |> Prooftab.make
   17.80 +  in proof_table := proof_tab; thy end
   17.81 +
   17.82 +fun percentage a b = if b = 0 then "N/A" else string_of_int (a * 100 div b)
   17.83 +fun percentage_alt a b = percentage a (a + b)
   17.84 +
   17.85 +fun done id ({log, ...} : Mirabelle.done_args) =
   17.86 +  if get id num_successes + get id num_failures > 0 then
   17.87 +    (log "";
   17.88 +     log ("Number of overall successes: " ^
   17.89 +          string_of_int (get id num_successes));
   17.90 +     log ("Number of overall failures: " ^ string_of_int (get id num_failures));
   17.91 +     log ("Overall success rate: " ^
   17.92 +          percentage_alt (get id num_successes) (get id num_failures) ^ "%");
   17.93 +     log ("Number of found proofs: " ^ string_of_int (get id num_found_proofs));
   17.94 +     log ("Number of lost proofs: " ^ string_of_int (get id num_lost_proofs));
   17.95 +     log ("Proof found rate: " ^
   17.96 +          percentage_alt (get id num_found_proofs) (get id num_lost_proofs) ^
   17.97 +          "%");
   17.98 +     log ("Number of found facts: " ^ string_of_int (get id num_found_facts));
   17.99 +     log ("Number of lost facts: " ^ string_of_int (get id num_lost_facts));
  17.100 +     log ("Fact found rate: " ^
  17.101 +          percentage_alt (get id num_found_facts) (get id num_lost_facts) ^
  17.102 +          "%"))
  17.103 +  else
  17.104 +    ()
  17.105 +
  17.106 +val default_prover = ATP_Systems.eN (* arbitrary ATP *)
  17.107 +
  17.108 +fun with_index (i, s) = s ^ "@" ^ string_of_int i
  17.109 +
  17.110 +fun action args id ({pre, pos, log, ...} : Mirabelle.run_args) =
  17.111 +  case (Position.line_of pos, Position.offset_of pos) of
  17.112 +    (SOME line_num, SOME offset) =>
  17.113 +    (case Prooftab.lookup (!proof_table) (line_num, offset) of
  17.114 +       SOME proofs =>
  17.115 +       let
  17.116 +         val {context = ctxt, facts = chained_ths, goal} = Proof.goal pre
  17.117 +         val prover = AList.lookup (op =) args "prover"
  17.118 +                      |> the_default default_prover
  17.119 +         val {relevance_thresholds, max_relevant, slice, ...} =
  17.120 +           Sledgehammer_Isar.default_params ctxt args
  17.121 +         val default_max_relevant =
  17.122 +           Sledgehammer_Provers.default_max_relevant_for_prover ctxt slice
  17.123 +                                                                prover
  17.124 +         val is_appropriate_prop =
  17.125 +           Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt
  17.126 +               default_prover
  17.127 +         val is_built_in_const =
  17.128 +           Sledgehammer_Provers.is_built_in_const_for_prover ctxt default_prover
  17.129 +         val relevance_fudge =
  17.130 +           extract_relevance_fudge args
  17.131 +               (Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover)
  17.132 +         val relevance_override = {add = [], del = [], only = false}
  17.133 +         val subgoal = 1
  17.134 +         val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal subgoal
  17.135 +         val ho_atp = Sledgehammer_Provers.is_ho_atp ctxt prover
  17.136 +         val facts =
  17.137 +          Sledgehammer_Filter.nearly_all_facts ctxt ho_atp relevance_override
  17.138 +                                               chained_ths hyp_ts concl_t
  17.139 +          |> filter (is_appropriate_prop o prop_of o snd)
  17.140 +          |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
  17.141 +                 (the_default default_max_relevant max_relevant)
  17.142 +                 is_built_in_const relevance_fudge relevance_override
  17.143 +                 chained_ths hyp_ts concl_t
  17.144 +           |> map (fst o fst)
  17.145 +         val (found_facts, lost_facts) =
  17.146 +           flat proofs |> sort_distinct string_ord
  17.147 +           |> map (fn fact => (find_index (curry (op =) fact) facts, fact))
  17.148 +           |> List.partition (curry (op <=) 0 o fst)
  17.149 +           |>> sort (prod_ord int_ord string_ord) ||> map snd
  17.150 +         val found_proofs = filter (forall (member (op =) facts)) proofs
  17.151 +         val n = length found_proofs
  17.152 +         val _ =
  17.153 +           if n = 0 then
  17.154 +             (add id num_failures 1; log "Failure")
  17.155 +           else
  17.156 +             (add id num_successes 1;
  17.157 +              add id num_found_proofs n;
  17.158 +              log ("Success (" ^ string_of_int n ^ " of " ^
  17.159 +                   string_of_int (length proofs) ^ " proofs)"))
  17.160 +         val _ = add id num_lost_proofs (length proofs - n)
  17.161 +         val _ = add id num_found_facts (length found_facts)
  17.162 +         val _ = add id num_lost_facts (length lost_facts)
  17.163 +         val _ =
  17.164 +           if null found_facts then
  17.165 +             ()
  17.166 +           else
  17.167 +             let
  17.168 +               val found_weight =
  17.169 +                 Real.fromInt (fold (fn (n, _) =>
  17.170 +                                        Integer.add (n * n)) found_facts 0)
  17.171 +                   / Real.fromInt (length found_facts)
  17.172 +                 |> Math.sqrt |> Real.ceil
  17.173 +             in
  17.174 +               log ("Found facts (among " ^ string_of_int (length facts) ^
  17.175 +                    ", weight " ^ string_of_int found_weight ^ "): " ^
  17.176 +                    commas (map with_index found_facts))
  17.177 +             end
  17.178 +         val _ = if null lost_facts then
  17.179 +                   ()
  17.180 +                 else
  17.181 +                   log ("Lost facts (among " ^ string_of_int (length facts) ^
  17.182 +                        "): " ^ commas lost_facts)
  17.183 +       in () end
  17.184 +     | NONE => log "No known proof")
  17.185 +  | _ => ()
  17.186 +
  17.187 +val proof_fileK = "proof_file"
  17.188 +
  17.189 +fun invoke args =
  17.190 +  let
  17.191 +    val (pf_args, other_args) =
  17.192 +      args |> List.partition (curry (op =) proof_fileK o fst)
  17.193 +    val proof_file = case pf_args of
  17.194 +                       [] => error "No \"proof_file\" specified"
  17.195 +                     | (_, s) :: _ => s
  17.196 +  in Mirabelle.register (init proof_file, action other_args, done) end
  17.197 +
  17.198 +end;
  17.199 +
  17.200 +(* Workaround to keep the "mirabelle.pl" script happy *)
  17.201 +structure Mirabelle_Sledgehammer_filter = Mirabelle_Sledgehammer_Filter;

    18.1 --- a/src/HOL/Mirabelle/lib/Tools/mirabelle	Tue Apr 24 13:55:02 2012 +0100
    18.2 +++ b/src/HOL/Mirabelle/lib/Tools/mirabelle	Tue Apr 24 13:59:29 2012 +0100
    18.3 @@ -8,10 +8,10 @@
    18.4  PRG="$(basename "$0")"
    18.5  
    18.6  function print_action_names() {
    18.7 -  ACTIONS="$MIRABELLE_HOME/Actions/mirabelle_*.ML"
    18.8 -  for ACTION in $ACTIONS
    18.9 +  TOOLS="$MIRABELLE_HOME/Actions/mirabelle_*.ML"
   18.10 +  for TOOL in $TOOLS
   18.11    do
   18.12 -    echo $ACTION | perl -w -p -e 's/.*mirabelle_(.*)\.ML/    $1/'
   18.13 +    echo $TOOL | perl -w -p -e 's/.*mirabelle_(.*)\.ML/    $1/'
   18.14    done
   18.15  }
   18.16  

    19.1 --- a/src/HOL/Mirabelle/lib/scripts/mirabelle.pl	Tue Apr 24 13:55:02 2012 +0100
    19.2 +++ b/src/HOL/Mirabelle/lib/scripts/mirabelle.pl	Tue Apr 24 13:59:29 2012 +0100
    19.3 @@ -46,7 +46,7 @@
    19.4  my @action_names;
    19.5  foreach (split(/:/, $actions)) {
    19.6    if (m/([^[]*)/) {
    19.7 -    push @action_files, "\"$mirabelle_home/Actions/mirabelle_$1.ML\"";
    19.8 +    push @action_files, "\"$mirabelle_home/Tools/mirabelle_$1.ML\"";
    19.9      push @action_names, $1;
   19.10    }
   19.11  }
author	sultana
	Tue, 24 Apr 2012 13:59:29 +0100
changeset 48595	15f4309bb9eb
parent 48594	44d1f4e0a46e
child 48596	39c5a1a80acc
src/HOL/IsaMakefile		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_arith.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_metis.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_quickcheck.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_refute.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_sledgehammer.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Actions/mirabelle_sledgehammer_filter.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Mirabelle.thy		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Mirabelle_Test.thy		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_arith.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_metis.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_quickcheck.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_refute.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/Tools/mirabelle_sledgehammer_filter.ML		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/lib/Tools/mirabelle		file \| annotate \| diff \| comparison \| revisions
src/HOL/Mirabelle/lib/scripts/mirabelle.pl		file \| annotate \| diff \| comparison \| revisions