src/HOL/Tools/ATP/atp_systems.ML
author blanchet
Mon, 20 Jun 2011 10:41:02 +0200
changeset 44345 423cd1ecf714
parent 44344 fb2713b803e6
child 44346 f205e841402a
permissions -rw-r--r--
optimized E's time slicing, based on latest exhaustive Judgment Day results
     1 (*  Title:      HOL/Tools/ATP/atp_systems.ML
     2     Author:     Fabian Immler, TU Muenchen
     3     Author:     Jasmin Blanchette, TU Muenchen
     4 
     5 Setup for supported ATPs.
     6 *)
     7 
     8 signature ATP_SYSTEMS =
     9 sig
    10   type format = ATP_Problem.format
    11   type formula_kind = ATP_Problem.formula_kind
    12   type failure = ATP_Proof.failure
    13 
    14   type atp_config =
    15     {exec : string * string,
    16      required_execs : (string * string) list,
    17      arguments :
    18        Proof.context -> bool -> string -> Time.time
    19        -> (unit -> (string * real) list) -> string,
    20      proof_delims : (string * string) list,
    21      known_failures : (failure * string) list,
    22      conj_sym_kind : formula_kind,
    23      prem_kind : formula_kind,
    24      formats : format list,
    25      best_slices :
    26        Proof.context -> (real * (bool * (int * string list * string))) list}
    27 
    28   val e_weight_method : string Config.T
    29   val e_default_fun_weight : real Config.T
    30   val e_fun_weight_base : real Config.T
    31   val e_fun_weight_span : real Config.T
    32   val e_default_sym_offs_weight : real Config.T
    33   val e_sym_offs_weight_base : real Config.T
    34   val e_sym_offs_weight_span : real Config.T
    35   val spass_force_sos : bool Config.T
    36   val vampire_force_sos : bool Config.T
    37   val eN : string
    38   val spassN : string
    39   val vampireN : string
    40   val leo2N : string
    41   val satallaxN : string
    42   val sine_eN : string
    43   val snarkN : string
    44   val tofof_eN : string
    45   val waldmeisterN : string
    46   val z3_atpN : string
    47   val remote_prefix : string
    48   val remote_atp :
    49     string -> string -> string list -> (string * string) list
    50     -> (failure * string) list -> formula_kind -> formula_kind -> format list
    51     -> (Proof.context -> int * string list) -> string * atp_config
    52   val add_atp : string * atp_config -> theory -> theory
    53   val get_atp : theory -> string -> atp_config
    54   val supported_atps : theory -> string list
    55   val is_atp_installed : theory -> string -> bool
    56   val refresh_systems_on_tptp : unit -> unit
    57   val setup : theory -> theory
    58 end;
    59 
    60 structure ATP_Systems : ATP_SYSTEMS =
    61 struct
    62 
    63 open ATP_Problem
    64 open ATP_Proof
    65 
    66 (* ATP configuration *)
    67 
    68 type atp_config =
    69   {exec : string * string,
    70    required_execs : (string * string) list,
    71    arguments :
    72      Proof.context -> bool -> string -> Time.time
    73      -> (unit -> (string * real) list) -> string,
    74    proof_delims : (string * string) list,
    75    known_failures : (failure * string) list,
    76    conj_sym_kind : formula_kind,
    77    prem_kind : formula_kind,
    78    formats : format list,
    79    best_slices :
    80      Proof.context -> (real * (bool * (int * string list * string))) list}
    81 
    82 (* "best_slices" must be found empirically, taking a wholistic approach since
    83    the ATPs are run in parallel. The "real" components give the faction of the
    84    time available given to the slice; these should add up to 1.0. The "bool"
    85    component indicates whether the slice's strategy is complete; the "int", the
    86    preferred number of facts to pass; the "string list", the preferred type
    87    systems, which should be of the form [sound] or [unsound, sound], where
    88    "sound" is a sound type system and "unsound" is an unsound one.
    89 
    90    The last slice should be the most "normal" one, because it will get all the
    91    time available if the other slices fail early and also because it is used for
    92    remote provers and if slicing is disabled. *)
    93 
    94 val known_perl_failures =
    95   [(CantConnect, "HTTP error"),
    96    (NoPerl, "env: perl"),
    97    (NoLibwwwPerl, "Can't locate HTTP")]
    98 
    99 (* named ATPs *)
   100 
   101 val eN = "e"
   102 val spassN = "spass"
   103 val vampireN = "vampire"
   104 val z3_atpN = "z3_atp"
   105 val leo2N = "leo2"
   106 val satallaxN = "satallax"
   107 val sine_eN = "sine_e"
   108 val snarkN = "snark"
   109 val tofof_eN = "tofof_e"
   110 val waldmeisterN = "waldmeister"
   111 val remote_prefix = "remote_"
   112 
   113 structure Data = Theory_Data
   114 (
   115   type T = (atp_config * stamp) Symtab.table
   116   val empty = Symtab.empty
   117   val extend = I
   118   fun merge data : T = Symtab.merge (eq_snd op =) data
   119     handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   120 )
   121 
   122 fun to_secs time = (Time.toMilliseconds time + 999) div 1000
   123 
   124 val sosN = "sos"
   125 val no_sosN = "no_sos"
   126 
   127 
   128 (* E *)
   129 
   130 val tstp_proof_delims =
   131   [("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation"),
   132    ("% SZS output start CNFRefutation", "% SZS output end CNFRefutation")]
   133 
   134 val e_smartN = "smart"
   135 val e_autoN = "auto"
   136 val e_fun_weightN = "fun_weight"
   137 val e_sym_offset_weightN = "sym_offset_weight"
   138 
   139 val e_weight_method =
   140   Attrib.setup_config_string @{binding atp_e_weight_method} (K e_smartN)
   141 (* FUDGE *)
   142 val e_default_fun_weight =
   143   Attrib.setup_config_real @{binding atp_e_default_fun_weight} (K 20.0)
   144 val e_fun_weight_base =
   145   Attrib.setup_config_real @{binding atp_e_fun_weight_base} (K 0.0)
   146 val e_fun_weight_span =
   147   Attrib.setup_config_real @{binding atp_e_fun_weight_span} (K 40.0)
   148 val e_default_sym_offs_weight =
   149   Attrib.setup_config_real @{binding atp_e_default_sym_offs_weight} (K 1.0)
   150 val e_sym_offs_weight_base =
   151   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_base} (K ~20.0)
   152 val e_sym_offs_weight_span =
   153   Attrib.setup_config_real @{binding atp_e_sym_offs_weight_span} (K 60.0)
   154 
   155 fun e_weight_method_case method fw sow =
   156   if method = e_fun_weightN then fw
   157   else if method = e_sym_offset_weightN then sow
   158   else raise Fail ("unexpected" ^ quote method)
   159 
   160 fun scaled_e_weight ctxt method w =
   161   w * Config.get ctxt
   162           (e_weight_method_case method e_fun_weight_span e_sym_offs_weight_span)
   163   + Config.get ctxt
   164         (e_weight_method_case method e_fun_weight_base e_sym_offs_weight_base)
   165   |> Real.ceil |> signed_string_of_int
   166 
   167 fun e_weight_arguments ctxt method weights =
   168   if method = e_autoN then
   169     "-xAutoDev"
   170   else
   171     "--split-clauses=4 --split-reuse-defs --simul-paramod --forward-context-sr \
   172     \--destructive-er-aggressive --destructive-er --presat-simplify \
   173     \--prefer-initial-clauses -tKBO6 -winvfreqrank -c1 -Ginvfreqconjmax -F1 \
   174     \--delete-bad-limit=150000000 -WSelectMaxLComplexAvoidPosPred \
   175     \-H'(4*" ^ e_weight_method_case method "FunWeight" "SymOffsetWeight" ^
   176     "(SimulateSOS, " ^
   177     (e_weight_method_case method e_default_fun_weight e_default_sym_offs_weight
   178      |> Config.get ctxt |> Real.ceil |> signed_string_of_int) ^
   179     ",20,1.5,1.5,1" ^
   180     (weights ()
   181      |> map (fn (s, w) => "," ^ s ^ ":" ^ scaled_e_weight ctxt method w)
   182      |> implode) ^
   183     "),3*ConjectureGeneralSymbolWeight(PreferNonGoals,200,100,200,50,50,1,100,\
   184     \1.5,1.5,1),1*Clauseweight(PreferProcessed,1,1,1),1*\
   185     \FIFOWeight(PreferProcessed))'"
   186 
   187 fun is_old_e_version () = (string_ord (getenv "E_VERSION", "1.2w") = LESS)
   188 
   189 fun effective_e_weight_method ctxt =
   190   if is_old_e_version () then e_autoN else Config.get ctxt e_weight_method
   191 
   192 val e_config : atp_config =
   193   {exec = ("E_HOME", "eproof"),
   194    required_execs = [],
   195    arguments =
   196      fn ctxt => fn full_proof => fn method => fn timeout => fn weights =>
   197         "--tstp-in --tstp-out -l5 " ^ e_weight_arguments ctxt method weights ^
   198         " -tAutoDev --silent --cpu-limit=" ^ string_of_int (to_secs timeout) ^
   199         (if full_proof orelse is_old_e_version () then "" else " --trim"),
   200    proof_delims = tstp_proof_delims,
   201    known_failures =
   202      [(Unprovable, "SZS status: CounterSatisfiable"),
   203       (Unprovable, "SZS status CounterSatisfiable"),
   204       (ProofMissing, "SZS status Theorem"),
   205       (TimedOut, "Failure: Resource limit exceeded (time)"),
   206       (TimedOut, "time limit exceeded"),
   207       (OutOfResources, "# Cannot determine problem status"),
   208       (OutOfResources, "SZS status: ResourceOut"),
   209       (OutOfResources, "SZS status ResourceOut")],
   210    conj_sym_kind = Hypothesis,
   211    prem_kind = Conjecture,
   212    formats = [FOF],
   213    best_slices = fn ctxt =>
   214      let val method = effective_e_weight_method ctxt in
   215        (* FUDGE *)
   216        if method = e_smartN then
   217          [(0.333, (true, (200, ["mangled_preds"], e_sym_offset_weightN))),
   218           (0.333, (true, (1000, ["mangled_preds?"], e_fun_weightN))),
   219           (0.334, (true, (50, ["mangled_tags?"], e_sym_offset_weightN)))]
   220        else
   221          [(1.0, (true, (300, ["mangled_tags?"], method)))]
   222      end}
   223 
   224 val e = (eN, e_config)
   225 
   226 
   227 (* SPASS *)
   228 
   229 val spass_force_sos =
   230   Attrib.setup_config_bool @{binding atp_spass_force_sos} (K false)
   231 
   232 (* The "-VarWeight=3" option helps the higher-order problems, probably by
   233    counteracting the presence of "hAPP". *)
   234 val spass_config : atp_config =
   235   {exec = ("ISABELLE_ATP", "scripts/spass"),
   236    required_execs = [("SPASS_HOME", "SPASS"), ("SPASS_HOME", "tptp2dfg")],
   237    arguments = fn ctxt => fn _ => fn sos => fn timeout => fn _ =>
   238      ("-Auto -PGiven=0 -PProblem=0 -Splits=0 -FullRed=0 -DocProof \
   239       \-VarWeight=3 -TimeLimit=" ^ string_of_int (to_secs timeout))
   240      |> sos = sosN ? prefix "-SOS=1 ",
   241    proof_delims = [("Here is a proof", "Formulae used in the proof")],
   242    known_failures =
   243      known_perl_failures @
   244      [(GaveUp, "SPASS beiseite: Completion found"),
   245       (TimedOut, "SPASS beiseite: Ran out of time"),
   246       (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded"),
   247       (MalformedInput, "Undefined symbol"),
   248       (MalformedInput, "Free Variable"),
   249       (SpassTooOld, "tptp2dfg"),
   250       (InternalError, "Please report this error")],
   251    conj_sym_kind = Hypothesis,
   252    prem_kind = Conjecture,
   253    formats = [FOF],
   254    best_slices = fn ctxt =>
   255      (* FUDGE *)
   256      [(0.333, (false, (150, ["mangled_preds_heavy"], sosN))),
   257       (0.333, (false, (150, ["mangled_preds?"], sosN))),
   258       (0.334, (true, (150, ["poly_preds"], no_sosN)))]
   259      |> (if Config.get ctxt spass_force_sos then hd #> apfst (K 1.0) #> single
   260          else I)}
   261 
   262 val spass = (spassN, spass_config)
   263 
   264 
   265 (* Vampire *)
   266 
   267 val vampire_force_sos =
   268   Attrib.setup_config_bool @{binding atp_vampire_force_sos} (K false)
   269 
   270 val vampire_config : atp_config =
   271   {exec = ("VAMPIRE_HOME", "vampire"),
   272    required_execs = [],
   273    arguments = fn ctxt => fn _ => fn sos => fn timeout => fn _ =>
   274      "--proof tptp --mode casc -t " ^ string_of_int (to_secs timeout) ^
   275      " --thanks \"Andrei and Krystof\" --input_file"
   276      |> sos = sosN ? prefix "--sos on ",
   277    proof_delims =
   278      [("=========== Refutation ==========",
   279        "======= End of refutation ======="),
   280       ("% SZS output start Refutation", "% SZS output end Refutation"),
   281       ("% SZS output start Proof", "% SZS output end Proof")],
   282    known_failures =
   283      [(GaveUp, "UNPROVABLE"),
   284       (GaveUp, "CANNOT PROVE"),
   285       (GaveUp, "SZS status GaveUp"),
   286       (ProofIncomplete, "predicate_definition_introduction,[]"),
   287       (TimedOut, "SZS status Timeout"),
   288       (Unprovable, "Satisfiability detected"),
   289       (Unprovable, "Termination reason: Satisfiable"),
   290       (VampireTooOld, "not a valid option"),
   291       (Interrupted, "Aborted by signal SIGINT")],
   292    conj_sym_kind = Conjecture,
   293    prem_kind = Conjecture,
   294    formats = [FOF],
   295    best_slices = fn ctxt =>
   296      (* FUDGE *)
   297      [(0.333, (false, (200, ["mangled_preds_heavy"], sosN))),
   298       (0.333, (false, (300, ["mangled_tags?"], sosN))),
   299       (0.334, (true, (400, ["poly_preds"], no_sosN)))]
   300      |> (if Config.get ctxt vampire_force_sos then hd #> apfst (K 1.0) #> single
   301          else I)}
   302 
   303 val vampire = (vampireN, vampire_config)
   304 
   305 
   306 (* Z3 with TPTP syntax *)
   307 
   308 val z3_atp_config : atp_config =
   309   {exec = ("Z3_HOME", "z3"),
   310    required_execs = [],
   311    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   312      "MBQI=true -p -t:" ^ string_of_int (to_secs timeout),
   313    proof_delims = [],
   314    known_failures =
   315      [(Unprovable, "\nsat"),
   316       (GaveUp, "\nunknown"),
   317       (GaveUp, "SZS status Satisfiable"),
   318       (ProofMissing, "\nunsat"),
   319       (ProofMissing, "SZS status Unsatisfiable")],
   320    conj_sym_kind = Hypothesis,
   321    prem_kind = Hypothesis,
   322    formats = [FOF],
   323    best_slices =
   324      (* FUDGE (FIXME) *)
   325      K [(1.0, (false, (250, [], "")))]}
   326 
   327 val z3_atp = (z3_atpN, z3_atp_config)
   328 
   329 
   330 (* Remote ATP invocation via SystemOnTPTP *)
   331 
   332 val systems = Synchronized.var "atp_systems" ([] : string list)
   333 
   334 fun get_systems () =
   335   case bash_output "\"$ISABELLE_ATP/scripts/remote_atp\" -w 2>&1" of
   336     (output, 0) => split_lines output
   337   | (output, _) =>
   338     error (case extract_known_failure known_perl_failures output of
   339              SOME failure => string_for_failure failure
   340            | NONE => perhaps (try (unsuffix "\n")) output ^ ".")
   341 
   342 fun find_system name [] systems =
   343     find_first (String.isPrefix (name ^ "---")) systems
   344   | find_system name (version :: versions) systems =
   345     case find_first (String.isPrefix (name ^ "---" ^ version)) systems of
   346       NONE => find_system name versions systems
   347     | res => res
   348 
   349 fun get_system name versions =
   350   Synchronized.change_result systems
   351       (fn systems => (if null systems then get_systems () else systems)
   352                      |> `(`(find_system name versions)))
   353 
   354 fun the_system name versions =
   355   case get_system name versions of
   356     (SOME sys, _) => sys
   357   | (NONE, []) => error ("SystemOnTPTP is currently not available.")
   358   | (NONE, syss) =>
   359     error ("System " ^ quote name ^ " is not available at SystemOnTPTP.\n" ^
   360            "(Available systems: " ^ commas_quote syss ^ ".)")
   361 
   362 val max_remote_secs = 240 (* give Geoff Sutcliffe's servers a break *)
   363 
   364 fun remote_config system_name system_versions proof_delims known_failures
   365                   conj_sym_kind prem_kind formats best_slice : atp_config =
   366   {exec = ("ISABELLE_ATP", "scripts/remote_atp"),
   367    required_execs = [],
   368    arguments = fn _ => fn _ => fn _ => fn timeout => fn _ =>
   369      "-t " ^ string_of_int (Int.min (max_remote_secs, to_secs timeout))
   370      ^ " -s " ^ the_system system_name system_versions,
   371    proof_delims = union (op =) tstp_proof_delims proof_delims,
   372    known_failures = known_failures @ known_perl_failures @
   373      [(Unprovable, "says Satisfiable"),
   374       (Unprovable, "says CounterSatisfiable"),
   375       (GaveUp, "says Unknown"),
   376       (GaveUp, "says GaveUp"),
   377       (ProofMissing, "says Theorem"),
   378       (ProofMissing, "says Unsatisfiable"),
   379       (TimedOut, "says Timeout"),
   380       (Inappropriate, "says Inappropriate")],
   381    conj_sym_kind = conj_sym_kind,
   382    prem_kind = prem_kind,
   383    formats = formats,
   384    best_slices = fn ctxt =>
   385      let val (max_relevant, type_syss) = best_slice ctxt in
   386        [(1.0, (false, (max_relevant, type_syss, "")))]
   387      end}
   388 
   389 fun remotify_config system_name system_versions
   390         ({proof_delims, known_failures, conj_sym_kind, prem_kind, formats,
   391           best_slices, ...} : atp_config) : atp_config =
   392   remote_config system_name system_versions proof_delims known_failures
   393                 conj_sym_kind prem_kind formats
   394                 (best_slices #> List.last #> snd #> snd
   395                  #> (fn (max_relevant, type_syss, _) =>
   396                         (max_relevant, type_syss)))
   397 
   398 fun remote_atp name system_name system_versions proof_delims known_failures
   399         conj_sym_kind prem_kind formats best_slice =
   400   (remote_prefix ^ name,
   401    remote_config system_name system_versions proof_delims known_failures
   402                  conj_sym_kind prem_kind formats best_slice)
   403 fun remotify_atp (name, config) system_name system_versions =
   404   (remote_prefix ^ name, remotify_config system_name system_versions config)
   405 
   406 val remote_e = remotify_atp e "EP" ["1.0", "1.1", "1.2"]
   407 val remote_vampire = remotify_atp vampire "Vampire" ["0.6", "9.0", "1.0"]
   408 val remote_z3_atp = remotify_atp z3_atp "Z3" ["2.18"]
   409 val remote_leo2 =
   410   remote_atp leo2N "LEO-II" ["1.2.6"] [] [] Axiom Hypothesis [THF]
   411              (K (100, ["simple"]) (* FUDGE *))
   412 val remote_satallax =
   413   remote_atp satallaxN "Satallax" ["2.0"] [] [] Axiom Hypothesis [THF]
   414              (K (64, ["simple"]) (* FUDGE *))
   415 val remote_sine_e =
   416   remote_atp sine_eN "SInE" ["0.4"] [] (#known_failures e_config)
   417              Axiom Conjecture [FOF]
   418              (K (500, ["poly_tags_heavy!", "poly_tags_heavy"]) (* FUDGE *))
   419 val remote_snark =
   420   remote_atp snarkN "SNARK" ["20080805r029", "20080805r024"]
   421              [("refutation.", "end_refutation.")] [] Hypothesis Hypothesis
   422              [TFF, FOF] (K (100, ["simple"]) (* FUDGE *))
   423 val remote_tofof_e =
   424   remote_atp tofof_eN "ToFoF" ["0.1"] [] (#known_failures e_config)
   425              Axiom Hypothesis [TFF] (K (150, ["simple"]) (* FUDGE *))
   426 val remote_waldmeister =
   427   remote_atp waldmeisterN "Waldmeister" ["710"]
   428              [("#START OF PROOF", "Proved Goals:")]
   429              [(OutOfResources, "Too many function symbols"),
   430               (Crashed, "Unrecoverable Segmentation Fault")]
   431              Hypothesis Hypothesis [CNF_UEQ]
   432              (K (50, ["mangled_args", "mangled_tags?"]) (* FUDGE *))
   433 
   434 (* Setup *)
   435 
   436 fun add_atp (name, config) thy =
   437   Data.map (Symtab.update_new (name, (config, stamp ()))) thy
   438   handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
   439 
   440 fun get_atp thy name =
   441   the (Symtab.lookup (Data.get thy) name) |> fst
   442   handle Option.Option => error ("Unknown ATP: " ^ name ^ ".")
   443 
   444 val supported_atps = Symtab.keys o Data.get
   445 
   446 fun is_atp_installed thy name =
   447   let val {exec, required_execs, ...} = get_atp thy name in
   448     forall (curry (op <>) "" o getenv o fst) (exec :: required_execs)
   449   end
   450 
   451 fun refresh_systems_on_tptp () =
   452   Synchronized.change systems (fn _ => get_systems ())
   453 
   454 val atps =
   455   [e, spass, vampire, z3_atp, remote_e, remote_vampire, remote_z3_atp,
   456    remote_leo2, remote_satallax, remote_sine_e, remote_snark, remote_tofof_e,
   457    remote_waldmeister]
   458 val setup = fold add_atp atps
   459 
   460 end;