src/HOL/Tools/ATP_Manager/atp_systems.ML
author blanchet
Wed, 23 Jun 2010 11:36:03 +0200
changeset 37514 b147d01b8ebc
parent 37509 f39464d971c4
child 37546 fc2f979b9a08
permissions -rw-r--r--
if SPASS fails at finding a proof with the SOS option turned on, turn it off and try again
     1 (*  Title:      HOL/Tools/ATP_Manager/atp_systems.ML
     2     Author:     Fabian Immler, TU Muenchen
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Setup for supported ATPs.
     6 *)
     7 
     8 signature ATP_SYSTEMS =
     9 sig
    10   type prover = ATP_Manager.prover
    11 
    12   (* hooks for problem files *)
    13   val dest_dir : string Config.T
    14   val problem_prefix : string Config.T
    15   val measure_runtime : bool Config.T
    16 
    17   val refresh_systems_on_tptp : unit -> unit
    18   val default_atps_param_value : unit -> string
    19   val setup : theory -> theory
    20 end;
    21 
    22 structure ATP_Systems : ATP_SYSTEMS =
    23 struct
    24 
    25 open Sledgehammer_Util
    26 open Sledgehammer_Fact_Preprocessor
    27 open Sledgehammer_HOL_Clause
    28 open Sledgehammer_Fact_Filter
    29 open Sledgehammer_Proof_Reconstruct
    30 open Sledgehammer_TPTP_Format
    31 open ATP_Manager
    32 
    33 (** generic ATP **)
    34 
    35 (* external problem files *)
    36 
    37 val (dest_dir, dest_dir_setup) = Attrib.config_string "atp_dest_dir" (K "");
    38   (*Empty string means create files in Isabelle's temporary files directory.*)
    39 
    40 val (problem_prefix, problem_prefix_setup) =
    41   Attrib.config_string "atp_problem_prefix" (K "prob");
    42 
    43 val (measure_runtime, measure_runtime_setup) =
    44   Attrib.config_bool "atp_measure_runtime" (K false);
    45 
    46 
    47 (* prover configuration *)
    48 
    49 type prover_config =
    50   {home_var: string,
    51    executable: string,
    52    arguments: bool -> Time.time -> string,
    53    proof_delims: (string * string) list,
    54    known_failures: (failure * string) list,
    55    max_axiom_clauses: int,
    56    prefers_theory_relevant: bool};
    57 
    58 
    59 (* basic template *)
    60 
    61 val remotify = prefix "remote_"
    62 
    63 fun with_path cleanup after f path =
    64   Exn.capture f path
    65   |> tap (fn _ => cleanup path)
    66   |> Exn.release
    67   |> tap (after path)
    68 
    69 (* Splits by the first possible of a list of delimiters. *)
    70 fun extract_proof delims output =
    71   case pairself (find_first (fn s => String.isSubstring s output))
    72                 (ListPair.unzip delims) of
    73     (SOME begin_delim, SOME end_delim) =>
    74     (output |> first_field begin_delim |> the |> snd
    75             |> first_field end_delim |> the |> fst
    76             |> first_field "\n" |> the |> snd
    77      handle Option.Option => "")
    78   | _ => ""
    79 
    80 fun extract_proof_and_outcome complete res_code proof_delims known_failures
    81                               output =
    82   case map_filter (fn (failure, pattern) =>
    83                       if String.isSubstring pattern output then SOME failure
    84                       else NONE) known_failures of
    85     [] => (case extract_proof proof_delims output of
    86              "" => ("", SOME UnknownError)
    87            | proof => if res_code = 0 then (proof, NONE)
    88                       else ("", SOME UnknownError))
    89   | (failure :: _) =>
    90     ("", SOME (if failure = IncompleteUnprovable andalso complete then
    91                  Unprovable
    92                else
    93                  failure))
    94 
    95 fun string_for_failure Unprovable = "The ATP problem is unprovable."
    96   | string_for_failure IncompleteUnprovable =
    97     "The ATP cannot prove the problem."
    98   | string_for_failure TimedOut = "Timed out."
    99   | string_for_failure OutOfResources = "The ATP ran out of resources."
   100   | string_for_failure OldSpass =
   101     (* FIXME: Change the error message below to point to the Isabelle download
   102        page once the package is there (around the Isabelle2010 release). *)
   103     "Warning: Sledgehammer requires a more recent version of SPASS with \
   104     \support for the TPTP syntax. To install it, download and untar the \
   105     \package \"http://isabelle.in.tum.de/~blanchet/spass-3.7.tgz\" and add the \
   106     \\"spass-3.7\" directory's full path to \"" ^
   107     Path.implode (Path.expand (Path.appends
   108         (Path.variable "ISABELLE_HOME_USER" ::
   109          map Path.basic ["etc", "components"]))) ^
   110     "\" on a line of its own."
   111   | string_for_failure MalformedInput =
   112     "Internal Sledgehammer error: The ATP problem is malformed. Please report \
   113     \this to the Isabelle developers."
   114   | string_for_failure MalformedOutput = "Error: The ATP output is malformed."
   115   | string_for_failure UnknownError = "Error: An unknown ATP error occurred."
   116 
   117 fun shape_of_clauses _ [] = []
   118   | shape_of_clauses j ([] :: clauses) = [] :: shape_of_clauses j clauses
   119   | shape_of_clauses j ((_ :: lits) :: clauses) =
   120     let val shape = shape_of_clauses (j + 1) (lits :: clauses) in
   121       (j :: hd shape) :: tl shape
   122     end
   123 
   124 (* generic TPTP-based provers *)
   125 
   126 fun generic_tptp_prover
   127         (name, {home_var, executable, arguments, proof_delims, known_failures,
   128                 max_axiom_clauses, prefers_theory_relevant})
   129         ({debug, overlord, full_types, explicit_apply, respect_no_atp,
   130           relevance_threshold, relevance_convergence, theory_relevant,
   131           defs_relevant, isar_proof, isar_shrink_factor, ...} : params)
   132         minimize_command timeout
   133         ({subgoal, goal, relevance_override, axiom_clauses, filtered_clauses}
   134          : problem) =
   135   let
   136     (* get clauses and prepare them for writing *)
   137     val (ctxt, (_, th)) = goal;
   138     val thy = ProofContext.theory_of ctxt;
   139     val goal_clss = #1 (neg_conjecture_clauses ctxt th subgoal)
   140     val goal_cls = List.concat goal_clss
   141     val the_filtered_clauses =
   142       (case filtered_clauses of
   143         NONE => relevant_facts full_types respect_no_atp relevance_threshold
   144                     relevance_convergence defs_relevant max_axiom_clauses
   145                     (the_default prefers_theory_relevant theory_relevant)
   146                     relevance_override goal goal_cls
   147       | SOME fcls => fcls);
   148     val the_axiom_clauses = axiom_clauses |> the_default the_filtered_clauses
   149     val (internal_thm_names, clauses) =
   150       prepare_clauses full_types goal_cls the_axiom_clauses the_filtered_clauses
   151                       thy
   152 
   153     (* path to unique problem file *)
   154     val the_dest_dir = if overlord then getenv "ISABELLE_HOME_USER"
   155                        else Config.get ctxt dest_dir;
   156     val the_problem_prefix = Config.get ctxt problem_prefix;
   157     fun prob_pathname nr =
   158       let
   159         val probfile =
   160           Path.basic ((if overlord then "prob_" ^ name
   161                        else the_problem_prefix ^ serial_string ())
   162                       ^ "_" ^ string_of_int nr)
   163       in
   164         if the_dest_dir = "" then File.tmp_path probfile
   165         else if File.exists (Path.explode the_dest_dir)
   166         then Path.append (Path.explode the_dest_dir) probfile
   167         else error ("No such directory: " ^ the_dest_dir ^ ".")
   168       end;
   169 
   170     val home = getenv home_var
   171     val command = Path.explode (home ^ "/" ^ executable)
   172     (* write out problem file and call prover *)
   173     fun command_line complete probfile =
   174       let
   175         val core = File.shell_path command ^ " " ^ arguments complete timeout ^
   176                    " " ^ File.shell_path probfile
   177       in
   178         (if Config.get ctxt measure_runtime then
   179            "TIMEFORMAT='%3U'; { time " ^ core ^ " ; }"
   180          else
   181            "exec " ^ core) ^ " 2>&1" ^
   182         (if overlord then
   183            " | sed 's/,/, /g' \
   184            \| sed 's/\\([^!=<]\\)\\([=|]\\)\\([^=>]\\)/\\1 \\2 \\3/g' \
   185            \| sed 's/  / /g' | sed 's/| |/||/g' \
   186            \| sed 's/ = = =/===/g' \
   187            \| sed 's/= = /== /g'"
   188          else
   189            "")
   190       end
   191     fun split_time s =
   192       let
   193         val split = String.tokens (fn c => str c = "\n");
   194         val (output, t) = s |> split |> split_last |> apfst cat_lines;
   195         fun as_num f = f >> (fst o read_int);
   196         val num = as_num (Scan.many1 Symbol.is_ascii_digit);
   197         val digit = Scan.one Symbol.is_ascii_digit;
   198         val num3 = as_num (digit ::: digit ::: (digit >> single));
   199         val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b);
   200         val as_time = the_default 0 o Scan.read Symbol.stopper time o explode;
   201       in (output, as_time t) end;
   202     fun run_on probfile =
   203       if File.exists command then
   204         let
   205           fun do_run complete =
   206             let
   207               val command = command_line complete probfile
   208               val ((output, msecs), res_code) =
   209                 bash_output command
   210                 |>> (if overlord then
   211                        prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n")
   212                      else
   213                        I)
   214                 |>> (if Config.get ctxt measure_runtime then split_time
   215                      else rpair 0)
   216               val (proof, outcome) =
   217                 extract_proof_and_outcome complete res_code proof_delims
   218                                           known_failures output
   219             in (output, msecs, proof, outcome) end
   220           val (pool, conjecture_offset) =
   221             write_tptp_file (debug andalso overlord) full_types explicit_apply
   222                             probfile clauses
   223           val conjecture_shape = shape_of_clauses (conjecture_offset + 1) goal_clss
   224           val result =
   225             do_run false
   226             |> (fn (_, msecs0, _, SOME IncompleteUnprovable) =>
   227                    do_run true
   228                    |> (fn (output, msecs, proof, outcome) =>
   229                           (output, msecs0 + msecs, proof, outcome))
   230                  | result => result)
   231         in ((pool, conjecture_shape), result) end
   232       else if home = "" then
   233         error ("The environment variable " ^ quote home_var ^ " is not set.")
   234       else
   235         error ("Bad executable: " ^ Path.implode command ^ ".");
   236 
   237     (* If the problem file has not been exported, remove it; otherwise, export
   238        the proof file too. *)
   239     fun cleanup probfile =
   240       if the_dest_dir = "" then try File.rm probfile else NONE
   241     fun export probfile (_, (output, _, _, _)) =
   242       if the_dest_dir = "" then
   243         ()
   244       else
   245         File.write (Path.explode (Path.implode probfile ^ "_proof")) output
   246 
   247     val ((pool, conjecture_shape), (output, msecs, proof, outcome)) =
   248       with_path cleanup export run_on (prob_pathname subgoal)
   249 
   250     val (message, relevant_thm_names) =
   251       case outcome of
   252         NONE =>
   253         proof_text isar_proof
   254             (pool, debug, isar_shrink_factor, ctxt, conjecture_shape)
   255             (full_types, minimize_command, proof, internal_thm_names, th,
   256              subgoal)
   257       | SOME failure => (string_for_failure failure ^ "\n", [])
   258   in
   259     {outcome = outcome, message = message, pool = pool,
   260      relevant_thm_names = relevant_thm_names, atp_run_time_in_msecs = msecs,
   261      output = output, proof = proof, internal_thm_names = internal_thm_names,
   262      conjecture_shape = conjecture_shape,
   263      filtered_clauses = the_filtered_clauses}
   264   end
   265 
   266 fun tptp_prover name p = (name, generic_tptp_prover (name, p));
   267 
   268 fun to_generous_secs time = (Time.toMilliseconds time + 999) div 1000
   269 
   270 (* E prover *)
   271 
   272 val tstp_proof_delims =
   273   ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
   274 
   275 val e_config : prover_config =
   276   {home_var = "E_HOME",
   277    executable = "eproof",
   278    arguments = fn _ => fn timeout =>
   279      "--tstp-in --tstp-out -l5 -xAutoDev -tAutoDev --silent --cpu-limit=" ^
   280      string_of_int (to_generous_secs timeout),
   281    proof_delims = [tstp_proof_delims],
   282    known_failures =
   283      [(Unprovable, "SZS status: Satisfiable"),
   284       (Unprovable, "SZS status Satisfiable"),
   285       (TimedOut, "Failure: Resource limit exceeded (time)"),
   286       (TimedOut, "time limit exceeded"),
   287       (OutOfResources,
   288        "# Cannot determine problem status within resource limit"),
   289       (OutOfResources, "SZS status: ResourceOut"),
   290       (OutOfResources, "SZS status ResourceOut")],
   291    max_axiom_clauses = 100,
   292    prefers_theory_relevant = false}
   293 val e = tptp_prover "e" e_config
   294 
   295 
   296 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   297    counteracting the presence of "hAPP". *)
   298 val spass_config : prover_config =
   299   {home_var = "SPASS_HOME",
   300    executable = "SPASS",
   301    arguments = fn complete => fn timeout =>
   302      ("-TPTP -Auto -PGiven=0 -PProblem=0 -Splits=0 -FullRed=0 -DocProof \
   303       \-VarWeight=3 -TimeLimit=" ^ string_of_int (to_generous_secs timeout))
   304      |> not complete ? prefix "-SOS=1 ",
   305    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   306    known_failures =
   307      [(IncompleteUnprovable, "SPASS beiseite: Completion found"),
   308       (TimedOut, "SPASS beiseite: Ran out of time"),
   309       (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded"),
   310       (MalformedInput, "Undefined symbol"),
   311       (MalformedInput, "Free Variable"),
   312       (OldSpass, "unrecognized option `-TPTP'"),
   313       (OldSpass, "Unrecognized option TPTP")],
   314    max_axiom_clauses = 40,
   315    prefers_theory_relevant = true}
   316 val spass = tptp_prover "spass" spass_config
   317 
   318 (* Vampire *)
   319 
   320 val vampire_config : prover_config =
   321   {home_var = "VAMPIRE_HOME",
   322    executable = "vampire",
   323    arguments = fn _ => fn timeout =>
   324      "--output_syntax tptp --mode casc -t " ^
   325      string_of_int (to_generous_secs timeout),
   326    proof_delims =
   327      [("=========== Refutation ==========",
   328        "======= End of refutation ======="),
   329       ("% SZS output start Refutation", "% SZS output end Refutation")],
   330    known_failures =
   331      [(Unprovable, "UNPROVABLE"),
   332       (IncompleteUnprovable, "CANNOT PROVE"),
   333       (Unprovable, "Satisfiability detected"),
   334       (OutOfResources, "Refutation not found")],
   335    max_axiom_clauses = 60,
   336    prefers_theory_relevant = false}
   337 val vampire = tptp_prover "vampire" vampire_config
   338 
   339 (* Remote prover invocation via SystemOnTPTP *)
   340 
   341 val systems = Synchronized.var "atp_systems" ([]: string list);
   342 
   343 fun get_systems () =
   344   case bash_output "\"$ISABELLE_ATP_MANAGER/SystemOnTPTP\" -w" of
   345     (answer, 0) => split_lines answer
   346   | (answer, _) =>
   347     error ("Failed to get available systems at SystemOnTPTP:\n" ^ answer)
   348 
   349 fun refresh_systems_on_tptp () =
   350   Synchronized.change systems (fn _ => get_systems ())
   351 
   352 fun get_system prefix = Synchronized.change_result systems (fn systems =>
   353   (if null systems then get_systems () else systems)
   354   |> `(find_first (String.isPrefix prefix)));
   355 
   356 fun the_system prefix =
   357   (case get_system prefix of
   358     NONE => error ("System " ^ quote prefix ^ " not available at SystemOnTPTP.")
   359   | SOME sys => sys);
   360 
   361 val remote_known_failures =
   362   [(TimedOut, "says Timeout"),
   363    (MalformedOutput, "Remote script could not extract proof")]
   364 
   365 fun remote_config atp_prefix args
   366         ({proof_delims, known_failures, max_axiom_clauses,
   367           prefers_theory_relevant, ...} : prover_config) : prover_config =
   368   {home_var = "ISABELLE_ATP_MANAGER",
   369    executable = "SystemOnTPTP",
   370    arguments = fn _ => fn timeout =>
   371      args ^ " -t " ^ string_of_int (to_generous_secs timeout) ^ " -s " ^
   372      the_system atp_prefix,
   373    proof_delims = insert (op =) tstp_proof_delims proof_delims,
   374    known_failures = remote_known_failures @ known_failures,
   375    max_axiom_clauses = max_axiom_clauses,
   376    prefers_theory_relevant = prefers_theory_relevant}
   377 
   378 fun remote_tptp_prover prover atp_prefix args config =
   379   tptp_prover (remotify (fst prover)) (remote_config atp_prefix args config)
   380 
   381 val remote_e = remote_tptp_prover e "EP---" "" e_config
   382 val remote_spass = remote_tptp_prover spass "SPASS---" "-x" spass_config
   383 val remote_vampire = remote_tptp_prover vampire "Vampire---9" "" vampire_config
   384 
   385 fun maybe_remote (name, _) ({home_var, ...} : prover_config) =
   386   name |> getenv home_var = "" ? remotify
   387 
   388 fun default_atps_param_value () =
   389   space_implode " " [maybe_remote e e_config, maybe_remote spass spass_config,
   390                      remotify (fst vampire)]
   391 
   392 val provers = [e, spass, vampire, remote_e, remote_spass, remote_vampire]
   393 val prover_setup = fold add_prover provers
   394 
   395 val setup =
   396   dest_dir_setup
   397   #> problem_prefix_setup
   398   #> measure_runtime_setup
   399   #> prover_setup
   400 
   401 end;