src/HOL/Tools/ATP/atp_systems.ML
author blanchet
Wed, 25 Aug 2010 17:49:52 +0200
changeset 38983 2b6333f78a9e
parent 38979 e2d58749194b
child 39052 bf27c24ba224
permissions -rw-r--r--
make relevance filter work in term of a "max_relevant" option + use Vampire SOS;
"max_relevant" is more reliable than "max_relevant_per_iter";
also made sure that the option is monotone -- larger values should lead to more axioms -- which wasn't always the case before;
SOS for Vampire makes a difference of about 3% (i.e. 3% more proofs are found)
blanchet@38293
     1
(*  Title:      HOL/Tools/ATP/atp_systems.ML
wenzelm@28592
     2
    Author:     Fabian Immler, TU Muenchen
blanchet@36371
     3
    Author:     Jasmin Blanchette, TU Muenchen
wenzelm@28592
     4
blanchet@36376
     5
Setup for supported ATPs.
wenzelm@28592
     6
*)
wenzelm@28592
     7
blanchet@36376
     8
signature ATP_SYSTEMS =
wenzelm@28592
     9
sig
blanchet@38257
    10
  datatype failure =
blanchet@38744
    11
    Unprovable | IncompleteUnprovable | CantConnect | TimedOut |
blanchet@38744
    12
    OutOfResources | SpassTooOld | VampireTooOld | NoPerl | NoLibwwwPerl |
blanchet@38744
    13
    MalformedInput | MalformedOutput | UnknownError
blanchet@38257
    14
blanchet@38257
    15
  type prover_config =
blanchet@38338
    16
    {exec: string * string,
blanchet@38338
    17
     required_execs: (string * string) list,
blanchet@38257
    18
     arguments: bool -> Time.time -> string,
blanchet@38883
    19
     has_incomplete_mode: bool,
blanchet@38257
    20
     proof_delims: (string * string) list,
blanchet@38257
    21
     known_failures: (failure * string) list,
blanchet@38983
    22
     default_max_relevant: int,
blanchet@38812
    23
     default_theory_relevant: bool,
blanchet@38854
    24
     explicit_forall: bool,
blanchet@38854
    25
     use_conjecture_for_hypotheses: bool}
blanchet@38257
    26
blanchet@38307
    27
  val string_for_failure : failure -> string
blanchet@38307
    28
  val known_failure_in_output :
blanchet@38307
    29
    string -> (failure * string) list -> failure option
blanchet@38257
    30
  val add_prover: string * prover_config -> theory -> theory
blanchet@38257
    31
  val get_prover: theory -> string -> prover_config
blanchet@38257
    32
  val available_atps: theory -> unit
blanchet@35867
    33
  val refresh_systems_on_tptp : unit -> unit
blanchet@36371
    34
  val default_atps_param_value : unit -> string
blanchet@35867
    35
  val setup : theory -> theory
wenzelm@28592
    36
end;
wenzelm@28592
    37
blanchet@36376
    38
structure ATP_Systems : ATP_SYSTEMS =
wenzelm@28592
    39
struct
wenzelm@28596
    40
blanchet@38257
    41
(* prover configuration *)
blanchet@35826
    42
blanchet@38257
    43
datatype failure =
blanchet@38698
    44
  Unprovable | IncompleteUnprovable | CantConnect | TimedOut | OutOfResources |
blanchet@38744
    45
  SpassTooOld | VampireTooOld | NoPerl | NoLibwwwPerl | MalformedInput |
blanchet@38698
    46
  MalformedOutput | UnknownError
boehmes@32864
    47
wenzelm@32941
    48
type prover_config =
blanchet@38338
    49
  {exec: string * string,
blanchet@38338
    50
   required_execs: (string * string) list,
blanchet@37514
    51
   arguments: bool -> Time.time -> string,
blanchet@38883
    52
   has_incomplete_mode: bool,
blanchet@36370
    53
   proof_delims: (string * string) list,
blanchet@36370
    54
   known_failures: (failure * string) list,
blanchet@38983
    55
   default_max_relevant: int,
blanchet@38812
    56
   default_theory_relevant: bool,
blanchet@38854
    57
   explicit_forall: bool,
blanchet@38854
    58
   use_conjecture_for_hypotheses: bool}
boehmes@32864
    59
blanchet@38307
    60
val missing_message_tail =
blanchet@38307
    61
  " appears to be missing. You will need to install it if you want to run \
blanchet@38307
    62
  \ATPs remotely."
blanchet@38307
    63
blanchet@38307
    64
fun string_for_failure Unprovable = "The ATP problem is unprovable."
blanchet@38307
    65
  | string_for_failure IncompleteUnprovable =
blanchet@38307
    66
    "The ATP cannot prove the problem."
blanchet@38340
    67
  | string_for_failure CantConnect = "Can't connect to remote server."
blanchet@38307
    68
  | string_for_failure TimedOut = "Timed out."
blanchet@38307
    69
  | string_for_failure OutOfResources = "The ATP ran out of resources."
blanchet@38744
    70
  | string_for_failure SpassTooOld =
blanchet@38342
    71
    "Isabelle requires a more recent version of SPASS with support for the \
blanchet@38342
    72
    \TPTP syntax. To install it, download and extract the package \
blanchet@38342
    73
    \\"http://isabelle.in.tum.de/dist/contrib/spass-3.7.tar.gz\" and add the \
blanchet@38342
    74
    \\"spass-3.7\" directory's absolute path to " ^
blanchet@38307
    75
    quote (Path.implode (Path.expand (Path.appends
blanchet@38307
    76
               (Path.variable "ISABELLE_HOME_USER" ::
blanchet@38307
    77
                map Path.basic ["etc", "components"])))) ^
blanchet@38307
    78
    " on a line of its own."
blanchet@38744
    79
  | string_for_failure VampireTooOld =
blanchet@38698
    80
    "Isabelle requires a more recent version of Vampire. To install it, follow \
blanchet@38698
    81
    \the instructions from the Sledgehammer manual (\"isabelle doc\
blanchet@38698
    82
    \ sledgehammer\")."
blanchet@38307
    83
  | string_for_failure NoPerl = "Perl" ^ missing_message_tail
blanchet@38307
    84
  | string_for_failure NoLibwwwPerl =
blanchet@38307
    85
    "The Perl module \"libwww-perl\"" ^ missing_message_tail
blanchet@38307
    86
  | string_for_failure MalformedInput =
blanchet@38342
    87
    "The ATP problem is malformed. Please report this to the Isabelle \
blanchet@38342
    88
    \developers."
blanchet@38342
    89
  | string_for_failure MalformedOutput = "The ATP output is malformed."
blanchet@38342
    90
  | string_for_failure UnknownError = "An unknown ATP error occurred."
blanchet@38307
    91
blanchet@38307
    92
fun known_failure_in_output output =
blanchet@38307
    93
  find_first (fn (_, pattern) => String.isSubstring pattern output)
blanchet@38307
    94
  #> Option.map fst
blanchet@38307
    95
blanchet@38307
    96
val known_perl_failures =
blanchet@38340
    97
  [(CantConnect, "HTTP error"),
blanchet@38340
    98
   (NoPerl, "env: perl"),
blanchet@38311
    99
   (NoLibwwwPerl, "Can't locate HTTP")]
wenzelm@28596
   100
blanchet@38257
   101
(* named provers *)
wenzelm@28596
   102
blanchet@38257
   103
structure Data = Theory_Data
blanchet@38257
   104
(
blanchet@38257
   105
  type T = (prover_config * stamp) Symtab.table
blanchet@38257
   106
  val empty = Symtab.empty
blanchet@38257
   107
  val extend = I
blanchet@38257
   108
  fun merge data : T = Symtab.merge (eq_snd op =) data
blanchet@38257
   109
    handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
blanchet@38257
   110
)
blanchet@36370
   111
blanchet@38257
   112
fun add_prover (name, config) thy =
blanchet@38257
   113
  Data.map (Symtab.update_new (name, (config, stamp ()))) thy
blanchet@38257
   114
  handle Symtab.DUP name => error ("Duplicate ATP: " ^ quote name ^ ".")
boehmes@32458
   115
blanchet@38257
   116
fun get_prover thy name =
blanchet@38257
   117
  the (Symtab.lookup (Data.get thy) name) |> fst
blanchet@38257
   118
  handle Option.Option => error ("Unknown ATP: " ^ name ^ ".")
blanchet@36369
   119
blanchet@38257
   120
fun available_atps thy =
blanchet@38257
   121
  priority ("Available ATPs: " ^
blanchet@38257
   122
            commas (sort_strings (Symtab.keys (Data.get thy))) ^ ".")
blanchet@36370
   123
blanchet@38257
   124
fun available_atps thy =
blanchet@38257
   125
  priority ("Available ATPs: " ^
blanchet@38257
   126
            commas (sort_strings (Symtab.keys (Data.get thy))) ^ ".")
wenzelm@28592
   127
blanchet@38976
   128
fun to_secs bonus time = (Time.toMilliseconds time + bonus + 999) div 1000
blanchet@36140
   129
wenzelm@28596
   130
(* E prover *)
wenzelm@28596
   131
blanchet@38976
   132
(* Give older versions of E an extra second, because the "eproof" script wrongly
blanchet@38976
   133
   subtracted an entire second to account for the overhead of the script
blanchet@38976
   134
   itself, which is in fact much lower. *)
blanchet@38976
   135
fun e_bonus () =
blanchet@38976
   136
  case getenv "E_VERSION" of
blanchet@38976
   137
    "" => 1000
blanchet@38976
   138
  | version =>
blanchet@38976
   139
    if exists (fn s => String.isPrefix s version) ["0.9", "1.0"] then 1000
blanchet@38976
   140
    else 0
blanchet@38976
   141
blanchet@36369
   142
val tstp_proof_delims =
blanchet@36369
   143
  ("# SZS output start CNFRefutation.", "# SZS output end CNFRefutation")
blanchet@36369
   144
blanchet@35967
   145
val e_config : prover_config =
blanchet@38338
   146
  {exec = ("E_HOME", "eproof"),
blanchet@38338
   147
   required_execs = [],
blanchet@37514
   148
   arguments = fn _ => fn timeout =>
blanchet@38930
   149
     "--tstp-in --tstp-out -l5 -xAutoDev -tAutoDev --silent \
blanchet@38976
   150
     \--cpu-limit=" ^ string_of_int (to_secs (e_bonus ()) timeout),
blanchet@38883
   151
   has_incomplete_mode = false,
blanchet@36369
   152
   proof_delims = [tstp_proof_delims],
blanchet@36265
   153
   known_failures =
blanchet@38229
   154
     [(Unprovable, "SZS status: CounterSatisfiable"),
blanchet@38229
   155
      (Unprovable, "SZS status CounterSatisfiable"),
blanchet@36370
   156
      (TimedOut, "Failure: Resource limit exceeded (time)"),
blanchet@36370
   157
      (TimedOut, "time limit exceeded"),
blanchet@36370
   158
      (OutOfResources,
blanchet@36370
   159
       "# Cannot determine problem status within resource limit"),
blanchet@36370
   160
      (OutOfResources, "SZS status: ResourceOut"),
blanchet@36370
   161
      (OutOfResources, "SZS status ResourceOut")],
blanchet@38983
   162
   default_max_relevant = 500 (* FUDGE *),
blanchet@38812
   163
   default_theory_relevant = false,
blanchet@38854
   164
   explicit_forall = false,
blanchet@38854
   165
   use_conjecture_for_hypotheses = true}
blanchet@38698
   166
blanchet@38257
   167
val e = ("e", e_config)
wenzelm@28596
   168
wenzelm@28596
   169
blanchet@36219
   170
(* The "-VarWeight=3" option helps the higher-order problems, probably by
blanchet@36219
   171
   counteracting the presence of "hAPP". *)
blanchet@37498
   172
val spass_config : prover_config =
blanchet@38338
   173
  {exec = ("ISABELLE_ATP", "scripts/spass"),
blanchet@38338
   174
   required_execs = [("SPASS_HOME", "SPASS")],
blanchet@37514
   175
   arguments = fn complete => fn timeout =>
blanchet@38199
   176
     ("-Auto -PGiven=0 -PProblem=0 -Splits=0 -FullRed=0 -DocProof \
blanchet@38976
   177
      \-VarWeight=3 -TimeLimit=" ^ string_of_int (to_secs 0 timeout))
blanchet@37514
   178
     |> not complete ? prefix "-SOS=1 ",
blanchet@38883
   179
   has_incomplete_mode = true,
blanchet@36369
   180
   proof_delims = [("Here is a proof", "Formulae used in the proof")],
blanchet@36289
   181
   known_failures =
blanchet@38307
   182
     known_perl_failures @
blanchet@37388
   183
     [(IncompleteUnprovable, "SPASS beiseite: Completion found"),
blanchet@36370
   184
      (TimedOut, "SPASS beiseite: Ran out of time"),
blanchet@36958
   185
      (OutOfResources, "SPASS beiseite: Maximal number of loops exceeded"),
blanchet@37388
   186
      (MalformedInput, "Undefined symbol"),
blanchet@37389
   187
      (MalformedInput, "Free Variable"),
blanchet@38744
   188
      (SpassTooOld, "tptp2dfg")],
blanchet@38983
   189
   default_max_relevant = 350 (* FUDGE *),
blanchet@38812
   190
   default_theory_relevant = true,
blanchet@38854
   191
   explicit_forall = true,
blanchet@38854
   192
   use_conjecture_for_hypotheses = true}
blanchet@38698
   193
blanchet@38257
   194
val spass = ("spass", spass_config)
wenzelm@28596
   195
blanchet@38698
   196
blanchet@37509
   197
(* Vampire *)
blanchet@37509
   198
blanchet@37509
   199
val vampire_config : prover_config =
blanchet@38338
   200
  {exec = ("VAMPIRE_HOME", "vampire"),
blanchet@38338
   201
   required_execs = [],
blanchet@38983
   202
   arguments = fn complete => fn timeout =>
blanchet@38983
   203
     ("--mode casc -t " ^ string_of_int (to_secs 0 timeout) ^
blanchet@38983
   204
      " --thanks Andrei --input_file")
blanchet@38983
   205
     |> not complete ? prefix "--sos on ",
blanchet@38983
   206
   has_incomplete_mode = true,
blanchet@37509
   207
   proof_delims =
blanchet@37509
   208
     [("=========== Refutation ==========",
blanchet@37509
   209
       "======= End of refutation ======="),
blanchet@38279
   210
      ("% SZS output start Refutation", "% SZS output end Refutation"),
blanchet@38279
   211
      ("% SZS output start Proof", "% SZS output end Proof")],
blanchet@37509
   212
   known_failures =
blanchet@37509
   213
     [(Unprovable, "UNPROVABLE"),
blanchet@37509
   214
      (IncompleteUnprovable, "CANNOT PROVE"),
blanchet@38338
   215
      (TimedOut, "SZS status Timeout"),
blanchet@37509
   216
      (Unprovable, "Satisfiability detected"),
blanchet@38885
   217
      (Unprovable, "Termination reason: Satisfiable"),
blanchet@38744
   218
      (VampireTooOld, "not a valid option")],
blanchet@38983
   219
   default_max_relevant = 400 (* FUDGE *),
blanchet@38812
   220
   default_theory_relevant = false,
blanchet@38854
   221
   explicit_forall = false,
blanchet@38919
   222
   use_conjecture_for_hypotheses = true}
blanchet@38698
   223
blanchet@38257
   224
val vampire = ("vampire", vampire_config)
blanchet@37509
   225
blanchet@38698
   226
blanchet@37509
   227
(* Remote prover invocation via SystemOnTPTP *)
wenzelm@28596
   228
blanchet@38307
   229
val systems = Synchronized.var "atp_systems" ([] : string list)
immler@31828
   230
immler@31828
   231
fun get_systems () =
blanchet@38307
   232
  case bash_output "\"$ISABELLE_ATP/scripts/remote_atp\" -w 2>&1" of
blanchet@36370
   233
    (answer, 0) => split_lines answer
blanchet@36370
   234
  | (answer, _) =>
blanchet@38311
   235
    error (case known_failure_in_output answer known_perl_failures of
blanchet@38311
   236
             SOME failure => string_for_failure failure
blanchet@38311
   237
           | NONE => perhaps (try (unsuffix "\n")) answer ^ ".")
immler@31828
   238
blanchet@35867
   239
fun refresh_systems_on_tptp () =
blanchet@37509
   240
  Synchronized.change systems (fn _ => get_systems ())
immler@31828
   241
blanchet@38929
   242
fun find_system name [] systems = find_first (String.isPrefix name) systems
blanchet@38929
   243
  | find_system name (version :: versions) systems =
blanchet@38929
   244
    case find_first (String.isPrefix (name ^ "---" ^ version)) systems of
blanchet@38929
   245
      NONE => find_system name versions systems
blanchet@38929
   246
    | res => res
blanchet@38929
   247
blanchet@38929
   248
fun get_system name versions =
blanchet@38812
   249
  Synchronized.change_result systems
blanchet@38812
   250
      (fn systems => (if null systems then get_systems () else systems)
blanchet@38929
   251
                     |> `(find_system name versions))
immler@31828
   252
blanchet@38929
   253
fun the_system name versions =
blanchet@38929
   254
  case get_system name versions of
blanchet@38929
   255
    NONE => error ("System " ^ quote name ^ " not available at SystemOnTPTP.")
blanchet@38929
   256
  | SOME sys => sys
wenzelm@28596
   257
blanchet@38929
   258
fun remote_config system_name system_versions proof_delims known_failures
blanchet@38983
   259
                  default_max_relevant default_theory_relevant
blanchet@38979
   260
                  use_conjecture_for_hypotheses : prover_config =
blanchet@38338
   261
  {exec = ("ISABELLE_ATP", "scripts/remote_atp"),
blanchet@38338
   262
   required_execs = [],
blanchet@37514
   263
   arguments = fn _ => fn timeout =>
blanchet@38976
   264
     " -t " ^ string_of_int (to_secs 0 timeout) ^ " -s " ^
blanchet@38929
   265
     the_system system_name system_versions,
blanchet@38883
   266
   has_incomplete_mode = false,
blanchet@36369
   267
   proof_delims = insert (op =) tstp_proof_delims proof_delims,
blanchet@38307
   268
   known_failures =
blanchet@38307
   269
     known_failures @ known_perl_failures @
blanchet@38340
   270
     [(TimedOut, "says Timeout")],
blanchet@38983
   271
   default_max_relevant = default_max_relevant,
blanchet@38812
   272
   default_theory_relevant = default_theory_relevant,
blanchet@38854
   273
   explicit_forall = true,
blanchet@38854
   274
   use_conjecture_for_hypotheses = use_conjecture_for_hypotheses}
boehmes@32864
   275
blanchet@38929
   276
fun remotify_config system_name system_versions
blanchet@38983
   277
        ({proof_delims, known_failures, default_max_relevant,
blanchet@38854
   278
          default_theory_relevant, use_conjecture_for_hypotheses, ...}
blanchet@38854
   279
         : prover_config) : prover_config =
blanchet@38929
   280
  remote_config system_name system_versions proof_delims known_failures
blanchet@38983
   281
                default_max_relevant default_theory_relevant
blanchet@38854
   282
                use_conjecture_for_hypotheses
blanchet@38257
   283
blanchet@38821
   284
val remotify_name = prefix "remote_"
blanchet@38929
   285
fun remote_prover name system_name system_versions proof_delims known_failures
blanchet@38983
   286
                  default_max_relevant default_theory_relevant
blanchet@38854
   287
                  use_conjecture_for_hypotheses =
blanchet@38821
   288
  (remotify_name name,
blanchet@38929
   289
   remote_config system_name system_versions proof_delims known_failures
blanchet@38983
   290
                 default_max_relevant default_theory_relevant
blanchet@38854
   291
                 use_conjecture_for_hypotheses)
blanchet@38929
   292
fun remotify_prover (name, config) system_name system_versions =
blanchet@38929
   293
  (remotify_name name, remotify_config system_name system_versions config)
wenzelm@28592
   294
blanchet@38929
   295
val remote_e = remotify_prover e "EP" ["1.0", "1.1", "1.2"]
blanchet@38929
   296
val remote_vampire = remotify_prover vampire "Vampire" ["9.9", "0.6", "1.0"]
blanchet@38826
   297
val remote_sine_e =
blanchet@38983
   298
  remote_prover "sine_e" "SInE" [] [] [(Unprovable, "says Unknown")]
blanchet@38983
   299
                1000 (* FUDGE *) false true
blanchet@38821
   300
val remote_snark =
blanchet@38929
   301
  remote_prover "snark" "SNARK---" [] [("refutation.", "end_refutation.")] []
blanchet@38983
   302
                350 (* FUDGE *) false true
blanchet@38698
   303
blanchet@38698
   304
(* Setup *)
blanchet@38698
   305
blanchet@38338
   306
fun is_installed ({exec, required_execs, ...} : prover_config) =
blanchet@38338
   307
  forall (curry (op <>) "" o getenv o fst) (exec :: required_execs)
blanchet@38287
   308
fun maybe_remote (name, config) =
blanchet@38821
   309
  name |> not (is_installed config) ? remotify_name
blanchet@36371
   310
blanchet@36371
   311
fun default_atps_param_value () =
blanchet@38287
   312
  space_implode " " ([maybe_remote e] @
blanchet@38287
   313
                     (if is_installed (snd spass) then [fst spass] else []) @
blanchet@38826
   314
                     [if forall (is_installed o snd) [e, spass] then
blanchet@38826
   315
                        remotify_name (fst vampire)
blanchet@38826
   316
                      else
blanchet@38826
   317
                        maybe_remote vampire,
blanchet@38826
   318
                      fst remote_sine_e])
blanchet@36371
   319
blanchet@38821
   320
val provers = [e, spass, vampire, remote_e, remote_vampire, remote_sine_e,
blanchet@38821
   321
               remote_snark]
blanchet@38257
   322
val setup = fold add_prover provers
blanchet@35867
   323
wenzelm@28592
   324
end;