src/HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
author blanchet
Wed, 24 Aug 2011 11:17:33 +0200
changeset 45319 d9a657c44380
parent 45318 5e19eecb0e1c
child 45342 0989d8deab69
permissions -rw-r--r--
more reliable "sledgehammer\_tac" reconstruction, by avoiding "insert_tac"
     1 (*  Title:      HOL/Mirabelle/Tools/mirabelle_sledgehammer.ML
     2     Author:     Jasmin Blanchette and Sascha Boehme and Tobias Nipkow, TU Munich
     3 *)
     4 
     5 structure Mirabelle_Sledgehammer : MIRABELLE_ACTION =
     6 struct
     7 
     8 val proverK = "prover"
     9 val prover_timeoutK = "prover_timeout"
    10 val keepK = "keep"
    11 val type_encK = "type_enc"
    12 val soundK = "sound"
    13 val slicingK = "slicing"
    14 val lambda_translationK = "lambda_translation"
    15 val e_weight_methodK = "e_weight_method"
    16 val force_sosK = "force_sos"
    17 val max_relevantK = "max_relevant"
    18 val max_callsK = "max_calls"
    19 val minimizeK = "minimize"
    20 val minimize_timeoutK = "minimize_timeout"
    21 val metis_ftK = "metis_ft"
    22 val reconstructorK = "reconstructor"
    23 
    24 val preplay_timeout = "4"
    25 
    26 fun sh_tag id = "#" ^ string_of_int id ^ " sledgehammer: "
    27 fun minimize_tag id = "#" ^ string_of_int id ^ " minimize (sledgehammer): "
    28 fun reconstructor_tag reconstructor id =
    29   "#" ^ string_of_int id ^ " " ^ (!reconstructor) ^ " (sledgehammer): "
    30 
    31 val separator = "-----"
    32 
    33 
    34 datatype sh_data = ShData of {
    35   calls: int,
    36   success: int,
    37   nontriv_calls: int,
    38   nontriv_success: int,
    39   lemmas: int,
    40   max_lems: int,
    41   time_isa: int,
    42   time_prover: int,
    43   time_prover_fail: int}
    44 
    45 datatype re_data = ReData of {
    46   calls: int,
    47   success: int,
    48   nontriv_calls: int,
    49   nontriv_success: int,
    50   proofs: int,
    51   time: int,
    52   timeout: int,
    53   lemmas: int * int * int,
    54   posns: (Position.T * bool) list
    55   }
    56 
    57 datatype min_data = MinData of {
    58   succs: int,
    59   ab_ratios: int
    60   }
    61 
    62 fun make_sh_data
    63       (calls,success,nontriv_calls,nontriv_success,lemmas,max_lems,time_isa,
    64        time_prover,time_prover_fail) =
    65   ShData{calls=calls, success=success, nontriv_calls=nontriv_calls,
    66          nontriv_success=nontriv_success, lemmas=lemmas, max_lems=max_lems,
    67          time_isa=time_isa, time_prover=time_prover,
    68          time_prover_fail=time_prover_fail}
    69 
    70 fun make_min_data (succs, ab_ratios) =
    71   MinData{succs=succs, ab_ratios=ab_ratios}
    72 
    73 fun make_re_data (calls,success,nontriv_calls,nontriv_success,proofs,time,
    74                   timeout,lemmas,posns) =
    75   ReData{calls=calls, success=success, nontriv_calls=nontriv_calls,
    76          nontriv_success=nontriv_success, proofs=proofs, time=time,
    77          timeout=timeout, lemmas=lemmas, posns=posns}
    78 
    79 val empty_sh_data = make_sh_data (0, 0, 0, 0, 0, 0, 0, 0, 0)
    80 val empty_min_data = make_min_data (0, 0)
    81 val empty_re_data = make_re_data (0, 0, 0, 0, 0, 0, 0, (0,0,0), [])
    82 
    83 fun tuple_of_sh_data (ShData {calls, success, nontriv_calls, nontriv_success,
    84                               lemmas, max_lems, time_isa,
    85   time_prover, time_prover_fail}) = (calls, success, nontriv_calls,
    86   nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail)
    87 
    88 fun tuple_of_min_data (MinData {succs, ab_ratios}) = (succs, ab_ratios)
    89 
    90 fun tuple_of_re_data (ReData {calls, success, nontriv_calls, nontriv_success,
    91   proofs, time, timeout, lemmas, posns}) = (calls, success, nontriv_calls,
    92   nontriv_success, proofs, time, timeout, lemmas, posns)
    93 
    94 
    95 datatype reconstructor_mode =
    96   Unminimized | Minimized | UnminimizedFT | MinimizedFT
    97 
    98 datatype data = Data of {
    99   sh: sh_data,
   100   min: min_data,
   101   re_u: re_data, (* reconstructor with unminimized set of lemmas *)
   102   re_m: re_data, (* reconstructor with minimized set of lemmas *)
   103   re_uft: re_data, (* reconstructor with unminimized set of lemmas and fully-typed *)
   104   re_mft: re_data, (* reconstructor with minimized set of lemmas and fully-typed *)
   105   mini: bool   (* with minimization *)
   106   }
   107 
   108 fun make_data (sh, min, re_u, re_m, re_uft, re_mft, mini) =
   109   Data {sh=sh, min=min, re_u=re_u, re_m=re_m, re_uft=re_uft, re_mft=re_mft,
   110     mini=mini}
   111 
   112 val empty_data = make_data (empty_sh_data, empty_min_data,
   113   empty_re_data, empty_re_data, empty_re_data, empty_re_data, false)
   114 
   115 fun map_sh_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   116   let val sh' = make_sh_data (f (tuple_of_sh_data sh))
   117   in make_data (sh', min, re_u, re_m, re_uft, re_mft, mini) end
   118 
   119 fun map_min_data f (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   120   let val min' = make_min_data (f (tuple_of_min_data min))
   121   in make_data (sh, min', re_u, re_m, re_uft, re_mft, mini) end
   122 
   123 fun map_re_data f m (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   124   let
   125     fun map_me g Unminimized   (u, m, uft, mft) = (g u, m, uft, mft)
   126       | map_me g Minimized     (u, m, uft, mft) = (u, g m, uft, mft)
   127       | map_me g UnminimizedFT (u, m, uft, mft) = (u, m, g uft, mft)
   128       | map_me g MinimizedFT   (u, m, uft, mft) = (u, m, uft, g mft)
   129 
   130     val f' = make_re_data o f o tuple_of_re_data
   131 
   132     val (re_u', re_m', re_uft', re_mft') =
   133       map_me f' m (re_u, re_m, re_uft, re_mft)
   134   in make_data (sh, min, re_u', re_m', re_uft', re_mft', mini) end
   135 
   136 fun set_mini mini (Data {sh, min, re_u, re_m, re_uft, re_mft, ...}) =
   137   make_data (sh, min, re_u, re_m, re_uft, re_mft, mini)
   138 
   139 fun inc_max (n:int) (s,sos,m) = (s+n, sos + n*n, Int.max(m,n));
   140 
   141 val inc_sh_calls =  map_sh_data
   142   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   143     => (calls + 1, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   144 
   145 val inc_sh_success = map_sh_data
   146   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   147     => (calls, success + 1, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   148 
   149 val inc_sh_nontriv_calls =  map_sh_data
   150   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   151     => (calls, success, nontriv_calls + 1, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail))
   152 
   153 val inc_sh_nontriv_success = map_sh_data
   154   (fn (calls, success, nontriv_calls, nontriv_success, lemmas,max_lems, time_isa, time_prover, time_prover_fail)
   155     => (calls, success, nontriv_calls, nontriv_success + 1, lemmas,max_lems, time_isa, time_prover, time_prover_fail))
   156 
   157 fun inc_sh_lemmas n = map_sh_data
   158   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   159     => (calls,success,nontriv_calls, nontriv_success, lemmas+n,max_lems,time_isa,time_prover,time_prover_fail))
   160 
   161 fun inc_sh_max_lems n = map_sh_data
   162   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   163     => (calls,success,nontriv_calls, nontriv_success, lemmas,Int.max(max_lems,n),time_isa,time_prover,time_prover_fail))
   164 
   165 fun inc_sh_time_isa t = map_sh_data
   166   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   167     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa + t,time_prover,time_prover_fail))
   168 
   169 fun inc_sh_time_prover t = map_sh_data
   170   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   171     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover + t,time_prover_fail))
   172 
   173 fun inc_sh_time_prover_fail t = map_sh_data
   174   (fn (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail)
   175     => (calls,success,nontriv_calls, nontriv_success, lemmas,max_lems,time_isa,time_prover,time_prover_fail + t))
   176 
   177 val inc_min_succs = map_min_data
   178   (fn (succs,ab_ratios) => (succs+1, ab_ratios))
   179 
   180 fun inc_min_ab_ratios r = map_min_data
   181   (fn (succs, ab_ratios) => (succs, ab_ratios+r))
   182 
   183 val inc_reconstructor_calls = map_re_data
   184   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   185     => (calls + 1, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   186 
   187 val inc_reconstructor_success = map_re_data
   188   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   189     => (calls, success + 1, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas,posns))
   190 
   191 val inc_reconstructor_nontriv_calls = map_re_data
   192   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   193     => (calls, success, nontriv_calls + 1, nontriv_success, proofs, time, timeout, lemmas,posns))
   194 
   195 val inc_reconstructor_nontriv_success = map_re_data
   196   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   197     => (calls, success, nontriv_calls, nontriv_success + 1, proofs, time, timeout, lemmas,posns))
   198 
   199 val inc_reconstructor_proofs = map_re_data
   200   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   201     => (calls, success, nontriv_calls, nontriv_success, proofs + 1, time, timeout, lemmas,posns))
   202 
   203 fun inc_reconstructor_time m t = map_re_data
   204  (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   205   => (calls, success, nontriv_calls, nontriv_success, proofs, time + t, timeout, lemmas,posns)) m
   206 
   207 val inc_reconstructor_timeout = map_re_data
   208   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   209     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout + 1, lemmas,posns))
   210 
   211 fun inc_reconstructor_lemmas m n = map_re_data
   212   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   213     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, inc_max n lemmas, posns)) m
   214 
   215 fun inc_reconstructor_posns m pos = map_re_data
   216   (fn (calls,success,nontriv_calls, nontriv_success, proofs,time,timeout,lemmas,posns)
   217     => (calls, success, nontriv_calls, nontriv_success, proofs, time, timeout, lemmas, pos::posns)) m
   218 
   219 val str0 = string_of_int o the_default 0
   220 
   221 local
   222 
   223 val str = string_of_int
   224 val str3 = Real.fmt (StringCvt.FIX (SOME 3))
   225 fun percentage a b = string_of_int (a * 100 div b)
   226 fun time t = Real.fromInt t / 1000.0
   227 fun avg_time t n =
   228   if n > 0 then (Real.fromInt t / 1000.0) / Real.fromInt n else 0.0
   229 
   230 fun log_sh_data log
   231     (calls, success, nontriv_calls, nontriv_success, lemmas, max_lems, time_isa, time_prover, time_prover_fail) =
   232  (log ("Total number of sledgehammer calls: " ^ str calls);
   233   log ("Number of successful sledgehammer calls: " ^ str success);
   234   log ("Number of sledgehammer lemmas: " ^ str lemmas);
   235   log ("Max number of sledgehammer lemmas: " ^ str max_lems);
   236   log ("Success rate: " ^ percentage success calls ^ "%");
   237   log ("Total number of nontrivial sledgehammer calls: " ^ str nontriv_calls);
   238   log ("Number of successful nontrivial sledgehammer calls: " ^ str nontriv_success);
   239   log ("Total time for sledgehammer calls (Isabelle): " ^ str3 (time time_isa));
   240   log ("Total time for successful sledgehammer calls (ATP): " ^ str3 (time time_prover));
   241   log ("Total time for failed sledgehammer calls (ATP): " ^ str3 (time time_prover_fail));
   242   log ("Average time for sledgehammer calls (Isabelle): " ^
   243     str3 (avg_time time_isa calls));
   244   log ("Average time for successful sledgehammer calls (ATP): " ^
   245     str3 (avg_time time_prover success));
   246   log ("Average time for failed sledgehammer calls (ATP): " ^
   247     str3 (avg_time time_prover_fail (calls - success)))
   248   )
   249 
   250 fun str_of_pos (pos, triv) =
   251   str0 (Position.line_of pos) (* ^ ":" ^ str0 (Position.offset_of pos) *) ^
   252   (if triv then "[T]" else "")
   253 
   254 fun log_re_data log tag sh_calls (re_calls, re_success, re_nontriv_calls,
   255      re_nontriv_success, re_proofs, re_time, re_timeout,
   256     (lemmas, lems_sos, lems_max), re_posns) =
   257  (log ("Total number of " ^ tag ^ "reconstructor calls: " ^ str re_calls);
   258   log ("Number of successful " ^ tag ^ "reconstructor calls: " ^ str re_success ^
   259     " (proof: " ^ str re_proofs ^ ")");
   260   log ("Number of " ^ tag ^ "reconstructor timeouts: " ^ str re_timeout);
   261   log ("Success rate: " ^ percentage re_success sh_calls ^ "%");
   262   log ("Total number of nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_calls);
   263   log ("Number of successful nontrivial " ^ tag ^ "reconstructor calls: " ^ str re_nontriv_success ^
   264     " (proof: " ^ str re_proofs ^ ")");
   265   log ("Number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lemmas);
   266   log ("SOS of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_sos);
   267   log ("Max number of successful " ^ tag ^ "reconstructor lemmas: " ^ str lems_max);
   268   log ("Total time for successful " ^ tag ^ "reconstructor calls: " ^ str3 (time re_time));
   269   log ("Average time for successful " ^ tag ^ "reconstructor calls: " ^
   270     str3 (avg_time re_time re_success));
   271   if tag=""
   272   then log ("Proved: " ^ space_implode " " (map str_of_pos re_posns))
   273   else ()
   274  )
   275 
   276 fun log_min_data log (succs, ab_ratios) =
   277   (log ("Number of successful minimizations: " ^ string_of_int succs);
   278    log ("After/before ratios: " ^ string_of_int ab_ratios)
   279   )
   280 
   281 in
   282 
   283 fun log_data id log (Data {sh, min, re_u, re_m, re_uft, re_mft, mini}) =
   284   let
   285     val ShData {calls=sh_calls, ...} = sh
   286 
   287     fun app_if (ReData {calls, ...}) f = if calls > 0 then f () else ()
   288     fun log_re tag m =
   289       log_re_data log tag sh_calls (tuple_of_re_data m)
   290     fun log_reconstructor (tag1, m1) (tag2, m2) = app_if m1 (fn () =>
   291       (log_re tag1 m1; log ""; app_if m2 (fn () => log_re tag2 m2)))
   292   in
   293     if sh_calls > 0
   294     then
   295      (log ("\n\n\nReport #" ^ string_of_int id ^ ":\n");
   296       log_sh_data log (tuple_of_sh_data sh);
   297       log "";
   298       if not mini
   299       then log_reconstructor ("", re_u) ("fully-typed ", re_uft)
   300       else
   301         app_if re_u (fn () =>
   302          (log_reconstructor ("unminimized ", re_u) ("unminimized fully-typed ", re_uft);
   303           log "";
   304           app_if re_m (fn () =>
   305             (log_min_data log (tuple_of_min_data min); log "";
   306              log_reconstructor ("", re_m) ("fully-typed ", re_mft))))))
   307     else ()
   308   end
   309 
   310 end
   311 
   312 
   313 (* Warning: we implicitly assume single-threaded execution here! *)
   314 val data = Unsynchronized.ref ([] : (int * data) list)
   315 
   316 fun init id thy = (Unsynchronized.change data (cons (id, empty_data)); thy)
   317 fun done id ({log, ...}: Mirabelle.done_args) =
   318   AList.lookup (op =) (!data) id
   319   |> Option.map (log_data id log)
   320   |> K ()
   321 
   322 fun change_data id f = (Unsynchronized.change data (AList.map_entry (op =) id f); ())
   323 
   324 
   325 fun get_prover ctxt args =
   326   let
   327     fun default_prover_name () =
   328       hd (#provers (Sledgehammer_Isar.default_params ctxt []))
   329       handle Empty => error "No ATP available."
   330     fun get_prover name =
   331       (name, Sledgehammer_Run.get_minimizing_prover ctxt
   332                 Sledgehammer_Provers.Normal name)
   333   in
   334     (case AList.lookup (op =) args proverK of
   335       SOME name => get_prover name
   336     | NONE => get_prover (default_prover_name ()))
   337   end
   338 
   339 type locality = ATP_Translate.locality
   340 
   341 (* hack *)
   342 fun reconstructor_from_msg args msg =
   343   (case AList.lookup (op =) args reconstructorK of
   344     SOME name => name
   345   | NONE =>
   346     if String.isSubstring "metis (full_types)" msg then "metis (full_types)"
   347     else if String.isSubstring "metis (no_types)" msg then "metis (no_types)"
   348     else if String.isSubstring "metis" msg then "metis"
   349     else "smt")
   350 
   351 local
   352 
   353 datatype sh_result =
   354   SH_OK of int * int * (string * locality) list |
   355   SH_FAIL of int * int |
   356   SH_ERROR
   357 
   358 fun run_sh prover_name prover type_enc sound max_relevant slicing
   359         lambda_translation e_weight_method force_sos hard_timeout timeout dir
   360         pos st =
   361   let
   362     val {context = ctxt, facts = chained_ths, goal} = Proof.goal st
   363     val i = 1
   364     fun set_file_name (SOME dir) =
   365         Config.put Sledgehammer_Provers.dest_dir dir
   366         #> Config.put Sledgehammer_Provers.problem_prefix
   367           ("prob_" ^ str0 (Position.line_of pos) ^ "__")
   368         #> Config.put SMT_Config.debug_files
   369           (dir ^ "/" ^ Name.desymbolize false (ATP_Util.timestamp ()) ^ "_"
   370           ^ serial_string ())
   371       | set_file_name NONE = I
   372     val st' =
   373       st |> Proof.map_context
   374                 (set_file_name dir
   375                  #> (Option.map (Config.put
   376                        Sledgehammer_Provers.atp_lambda_translation)
   377                        lambda_translation |> the_default I)
   378                  #> (Option.map (Config.put ATP_Systems.e_weight_method)
   379                        e_weight_method |> the_default I)
   380                  #> (Option.map (Config.put ATP_Systems.force_sos)
   381                        force_sos |> the_default I)
   382                  #> Config.put Sledgehammer_Provers.measure_run_time true
   383                  #> Config.put Sledgehammer_Provers.atp_sound_modulo_infiniteness false)
   384     val params as {relevance_thresholds, max_relevant, slicing, ...} =
   385       Sledgehammer_Isar.default_params ctxt
   386           [("verbose", "true"),
   387            ("type_enc", type_enc),
   388            ("sound", sound),
   389            ("preplay_timeout", preplay_timeout),
   390            ("max_relevant", max_relevant),
   391            ("slicing", slicing),
   392            ("timeout", string_of_int timeout),
   393            ("preplay_timeout", preplay_timeout)]
   394     val default_max_relevant =
   395       Sledgehammer_Provers.default_max_relevant_for_prover ctxt slicing
   396         prover_name
   397     val is_appropriate_prop =
   398       Sledgehammer_Provers.is_appropriate_prop_for_prover ctxt prover_name
   399     val is_built_in_const =
   400       Sledgehammer_Provers.is_built_in_const_for_prover ctxt prover_name
   401     val relevance_fudge =
   402       Sledgehammer_Provers.relevance_fudge_for_prover ctxt prover_name
   403     val relevance_override = {add = [], del = [], only = false}
   404     val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal ctxt goal i
   405     val time_limit =
   406       (case hard_timeout of
   407         NONE => I
   408       | SOME secs => TimeLimit.timeLimit (Time.fromSeconds secs))
   409     fun failed failure =
   410       ({outcome = SOME failure, used_facts = [], run_time_in_msecs = NONE,
   411         preplay = K (ATP_Reconstruct.Failed_to_Play ATP_Reconstruct.Metis),
   412         message = K "", message_tail = ""}, ~1)
   413     val ({outcome, used_facts, run_time_in_msecs, preplay, message,
   414           message_tail} : Sledgehammer_Provers.prover_result,
   415         time_isa) = time_limit (Mirabelle.cpu_time (fn () =>
   416       let
   417         val _ = if is_appropriate_prop concl_t then ()
   418                 else raise Fail "inappropriate"
   419         val facts =
   420           Sledgehammer_Filter.nearly_all_facts ctxt relevance_override
   421                                                chained_ths hyp_ts concl_t
   422           |> filter (is_appropriate_prop o prop_of o snd)
   423           |> Sledgehammer_Filter.relevant_facts ctxt relevance_thresholds
   424                  (the_default default_max_relevant max_relevant)
   425                  is_built_in_const relevance_fudge relevance_override
   426                  chained_ths hyp_ts concl_t
   427         val problem =
   428           {state = st', goal = goal, subgoal = i,
   429            subgoal_count = Sledgehammer_Util.subgoal_count st,
   430            facts = facts |> map Sledgehammer_Provers.Untranslated_Fact,
   431            smt_filter = NONE}
   432       in prover params (K (K "")) problem end)) ()
   433       handle TimeLimit.TimeOut => failed ATP_Proof.TimedOut
   434            | Fail "inappropriate" => failed ATP_Proof.Inappropriate
   435     val time_prover = run_time_in_msecs |> the_default ~1
   436     val msg = message (preplay ()) ^ message_tail
   437   in
   438     case outcome of
   439       NONE => (msg, SH_OK (time_isa, time_prover, used_facts))
   440     | SOME _ => (msg, SH_FAIL (time_isa, time_prover))
   441   end
   442   handle ERROR msg => ("error: " ^ msg, SH_ERROR)
   443 
   444 fun thms_of_name ctxt name =
   445   let
   446     val lex = Keyword.get_lexicons
   447     val get = maps (Proof_Context.get_fact ctxt o fst)
   448   in
   449     Source.of_string name
   450     |> Symbol.source
   451     |> Token.source {do_recover=SOME false} lex Position.start
   452     |> Token.source_proper
   453     |> Source.source Token.stopper (Parse_Spec.xthms1 >> get) NONE
   454     |> Source.exhaust
   455   end
   456 
   457 in
   458 
   459 fun run_sledgehammer trivial args reconstructor named_thms id
   460       ({pre=st, log, pos, ...}: Mirabelle.run_args) =
   461   let
   462     val triv_str = if trivial then "[T] " else ""
   463     val _ = change_data id inc_sh_calls
   464     val _ = if trivial then () else change_data id inc_sh_nontriv_calls
   465     val (prover_name, prover) = get_prover (Proof.context_of st) args
   466     val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
   467     val sound = AList.lookup (op =) args soundK |> the_default "false"
   468     val max_relevant = AList.lookup (op =) args max_relevantK |> the_default "smart"
   469     val slicing = AList.lookup (op =) args slicingK |> the_default "true"
   470     val lambda_translation = AList.lookup (op =) args lambda_translationK
   471     val e_weight_method = AList.lookup (op =) args e_weight_methodK
   472     val force_sos = AList.lookup (op =) args force_sosK
   473       |> Option.map (curry (op <>) "false")
   474     val dir = AList.lookup (op =) args keepK
   475     val timeout = Mirabelle.get_int_setting args (prover_timeoutK, 30)
   476     (* always use a hard timeout, but give some slack so that the automatic
   477        minimizer has a chance to do its magic *)
   478     val hard_timeout = SOME (2 * timeout)
   479     val (msg, result) =
   480       run_sh prover_name prover type_enc sound max_relevant slicing
   481         lambda_translation e_weight_method force_sos hard_timeout timeout dir
   482         pos st
   483   in
   484     case result of
   485       SH_OK (time_isa, time_prover, names) =>
   486         let
   487           fun get_thms (_, ATP_Translate.Chained) = NONE
   488             | get_thms (name, loc) =
   489               SOME ((name, loc), thms_of_name (Proof.context_of st) name)
   490         in
   491           change_data id inc_sh_success;
   492           if trivial then () else change_data id inc_sh_nontriv_success;
   493           change_data id (inc_sh_lemmas (length names));
   494           change_data id (inc_sh_max_lems (length names));
   495           change_data id (inc_sh_time_isa time_isa);
   496           change_data id (inc_sh_time_prover time_prover);
   497           reconstructor := reconstructor_from_msg args msg;
   498           named_thms := SOME (map_filter get_thms names);
   499           log (sh_tag id ^ triv_str ^ "succeeded (" ^ string_of_int time_isa ^ "+" ^
   500             string_of_int time_prover ^ ") [" ^ prover_name ^ "]:\n" ^ msg)
   501         end
   502     | SH_FAIL (time_isa, time_prover) =>
   503         let
   504           val _ = change_data id (inc_sh_time_isa time_isa)
   505           val _ = change_data id (inc_sh_time_prover_fail time_prover)
   506         in log (sh_tag id ^ triv_str ^ "failed: " ^ msg) end
   507     | SH_ERROR => log (sh_tag id ^ "failed: " ^ msg)
   508   end
   509 
   510 end
   511 
   512 fun run_minimize args reconstructor named_thms id
   513         ({pre=st, log, ...}: Mirabelle.run_args) =
   514   let
   515     val ctxt = Proof.context_of st
   516     val n0 = length (these (!named_thms))
   517     val (prover_name, _) = get_prover ctxt args
   518     val type_enc = AList.lookup (op =) args type_encK |> the_default "smart"
   519     val sound = AList.lookup (op =) args soundK |> the_default "false"
   520     val timeout =
   521       AList.lookup (op =) args minimize_timeoutK
   522       |> Option.map (fst o read_int o raw_explode)  (* FIXME Symbol.explode (?) *)
   523       |> the_default 5
   524     val params = Sledgehammer_Isar.default_params ctxt
   525       [("provers", prover_name),
   526        ("verbose", "true"),
   527        ("type_enc", type_enc),
   528        ("sound", sound),
   529        ("timeout", string_of_int timeout),
   530        ("preplay_timeout", preplay_timeout)]
   531     val minimize =
   532       Sledgehammer_Minimize.minimize_facts prover_name params
   533           true 1 (Sledgehammer_Util.subgoal_count st)
   534     val _ = log separator
   535     val (used_facts, (preplay, message, message_tail)) =
   536       minimize st (these (!named_thms))
   537     val msg = message (preplay ()) ^ message_tail
   538   in
   539     case used_facts of
   540       SOME named_thms' =>
   541         (change_data id inc_min_succs;
   542          change_data id (inc_min_ab_ratios ((100 * length named_thms') div n0));
   543          if length named_thms' = n0
   544          then log (minimize_tag id ^ "already minimal")
   545          else (reconstructor := reconstructor_from_msg args msg;
   546                named_thms := SOME named_thms';
   547                log (minimize_tag id ^ "succeeded:\n" ^ msg))
   548         )
   549     | NONE => log (minimize_tag id ^ "failed: " ^ msg)
   550   end
   551 
   552 
   553 fun e_override_params timeout =
   554   [("provers", "e"),
   555    ("max_relevant", "0"),
   556    ("type_enc", "poly_guards?"),
   557    ("sound", "true"),
   558    ("slicing", "false"),
   559    ("timeout", timeout |> Time.toSeconds |> string_of_int)]
   560 
   561 fun vampire_override_params timeout =
   562   [("provers", "vampire"),
   563    ("max_relevant", "0"),
   564    ("type_enc", "poly_tags"),
   565    ("sound", "true"),
   566    ("slicing", "false"),
   567    ("timeout", timeout |> Time.toSeconds |> string_of_int)]
   568 
   569 fun run_reconstructor trivial full m name reconstructor named_thms id
   570     ({pre=st, timeout, log, pos, ...}: Mirabelle.run_args) =
   571   let
   572     fun do_reconstructor named_thms ctxt =
   573       let
   574         val ref_of_str =
   575           suffix ";" #> Outer_Syntax.scan Position.none #> Parse_Spec.xthm
   576           #> fst
   577         val thms = named_thms |> maps snd
   578         val facts = named_thms |> map (ref_of_str o fst o fst)
   579         val relevance_override = {add = facts, del = [], only = true}
   580       in
   581         if !reconstructor = "sledgehammer_tac" then
   582           Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
   583              (e_override_params timeout) relevance_override
   584           ORELSE'
   585           Sledgehammer_Tactics.sledgehammer_as_oracle_tac ctxt
   586              (vampire_override_params timeout) relevance_override
   587         else if !reconstructor = "smt" then
   588           SMT_Solver.smt_tac ctxt thms
   589         else if full orelse !reconstructor = "metis (full_types)" then
   590           Metis_Tactics.metis_tac [Metis_Tactics.full_type_enc] ctxt thms
   591         else if !reconstructor = "metis (no_types)" then
   592           Metis_Tactics.metis_tac [Metis_Tactics.no_type_enc] ctxt thms
   593         else if !reconstructor = "metis" then
   594           Metis_Tactics.metis_tac [] ctxt thms
   595         else
   596           K all_tac
   597       end
   598     fun apply_reconstructor named_thms =
   599       Mirabelle.can_apply timeout (do_reconstructor named_thms) st
   600 
   601     fun with_time (false, t) = "failed (" ^ string_of_int t ^ ")"
   602       | with_time (true, t) = (change_data id (inc_reconstructor_success m);
   603           if trivial then ()
   604           else change_data id (inc_reconstructor_nontriv_success m);
   605           change_data id (inc_reconstructor_lemmas m (length named_thms));
   606           change_data id (inc_reconstructor_time m t);
   607           change_data id (inc_reconstructor_posns m (pos, trivial));
   608           if name = "proof" then change_data id (inc_reconstructor_proofs m)
   609           else ();
   610           "succeeded (" ^ string_of_int t ^ ")")
   611     fun timed_reconstructor named_thms =
   612       (with_time (Mirabelle.cpu_time apply_reconstructor named_thms), true)
   613       handle TimeLimit.TimeOut => (change_data id (inc_reconstructor_timeout m);
   614                ("timeout", false))
   615            | ERROR msg => ("error: " ^ msg, false)
   616 
   617     val _ = log separator
   618     val _ = change_data id (inc_reconstructor_calls m)
   619     val _ = if trivial then ()
   620             else change_data id (inc_reconstructor_nontriv_calls m)
   621   in
   622     named_thms
   623     |> timed_reconstructor
   624     |>> log o prefix (reconstructor_tag reconstructor id)
   625     |> snd
   626   end
   627 
   628 val try_timeout = seconds 5.0
   629 
   630 (* crude hack *)
   631 val num_sledgehammer_calls = Unsynchronized.ref 0
   632 
   633 fun sledgehammer_action args id (st as {pre, name, ...}: Mirabelle.run_args) =
   634   let val goal = Thm.major_prem_of (#goal (Proof.goal pre)) in
   635     if can Logic.dest_conjunction goal orelse can Logic.dest_equals goal
   636     then () else
   637     let
   638       val max_calls =
   639         AList.lookup (op =) args max_callsK |> the_default "10000000"
   640         |> Int.fromString |> the
   641       val _ = num_sledgehammer_calls := !num_sledgehammer_calls + 1;
   642     in
   643       if !num_sledgehammer_calls > max_calls then ()
   644       else
   645         let
   646           val reconstructor = Unsynchronized.ref ""
   647           val named_thms =
   648             Unsynchronized.ref (NONE : ((string * locality) * thm list) list option)
   649           val minimize = AList.defined (op =) args minimizeK
   650           val metis_ft = AList.defined (op =) args metis_ftK
   651           val trivial =
   652             Try_Methods.try_methods (SOME try_timeout) ([], [], [], []) pre
   653             handle TimeLimit.TimeOut => false
   654           fun apply_reconstructor m1 m2 =
   655             if metis_ft
   656             then
   657               if not (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   658                   (run_reconstructor trivial false m1 name reconstructor
   659                        (these (!named_thms))) id st)
   660               then
   661                 (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   662                   (run_reconstructor trivial true m2 name reconstructor
   663                        (these (!named_thms))) id st; ())
   664               else ()
   665             else
   666               (Mirabelle.catch_result (reconstructor_tag reconstructor) false
   667                 (run_reconstructor trivial false m1 name reconstructor
   668                      (these (!named_thms))) id st; ())
   669         in
   670           change_data id (set_mini minimize);
   671           Mirabelle.catch sh_tag (run_sledgehammer trivial args reconstructor
   672                                                    named_thms) id st;
   673           if is_some (!named_thms)
   674           then
   675            (apply_reconstructor Unminimized UnminimizedFT;
   676             if minimize andalso not (null (these (!named_thms)))
   677             then
   678              (Mirabelle.catch minimize_tag
   679                   (run_minimize args reconstructor named_thms) id st;
   680               apply_reconstructor Minimized MinimizedFT)
   681             else ())
   682           else ()
   683         end
   684     end
   685   end
   686 
   687 fun invoke args =
   688   Mirabelle.register (init, sledgehammer_action args, done)
   689 
   690 end