wneuper/isa: doc-src/Sledgehammer/sledgehammer.tex@fd7958ebee96

     1 \documentclass[a4paper,12pt]{article}

     2 \usepackage[T1]{fontenc}

     3 \usepackage{amsmath}

     4 \usepackage{amssymb}

     5 \usepackage[english,french]{babel}

     6 \usepackage{color}

     7 \usepackage{footmisc}

     8 \usepackage{graphicx}

     9 %\usepackage{mathpazo}

    10 \usepackage{multicol}

    11 \usepackage{stmaryrd}

    12 %\usepackage[scaled=.85]{beramono}

    13 \usepackage{../../lib/texinputs/isabelle,../iman,../pdfsetup}

    15 \newcommand\download{\url{http://www21.in.tum.de/~blanchet/\#software}}

    17 \def\qty#1{\ensuremath{\left<\mathit{#1\/}\right>}}

    18 \def\qtybf#1{$\mathbf{\left<\textbf{\textit{#1\/}}\right>}$}

    20 \newcommand\const[1]{\textsf{#1}}

    22 %\oddsidemargin=4.6mm

    23 %\evensidemargin=4.6mm

    24 %\textwidth=150mm

    25 %\topmargin=4.6mm

    26 %\headheight=0mm

    27 %\headsep=0mm

    28 %\textheight=234mm

    30 \def\Colon{\mathord{:\mkern-1.5mu:}}

    31 %\def\lbrakk{\mathopen{\lbrack\mkern-3.25mu\lbrack}}

    32 %\def\rbrakk{\mathclose{\rbrack\mkern-3.255mu\rbrack}}

    33 \def\lparr{\mathopen{(\mkern-4mu\mid}}

    34 \def\rparr{\mathclose{\mid\mkern-4mu)}}

    36 \def\unk{{?}}

    37 \def\undef{(\lambda x.\; \unk)}

    38 %\def\unr{\textit{others}}

    39 \def\unr{\ldots}

    40 \def\Abs#1{\hbox{\rm{\flqq}}{\,#1\,}\hbox{\rm{\frqq}}}

    41 \def\Q{{\smash{\lower.2ex\hbox{$\scriptstyle?$}}}}

    43 \urlstyle{tt}

    45 \begin{document}

    47 %%% TYPESETTING

    48 %\renewcommand\labelitemi{$\bullet$}

    49 \renewcommand\labelitemi{\raise.065ex\hbox{\small\textbullet}}

    51 \selectlanguage{english}

    53 \title{\includegraphics[scale=0.5]{isabelle_sledgehammer} \\[4ex]

    54 Hammering Away \\[\smallskipamount]

    55 \Large A User's Guide to Sledgehammer for Isabelle/HOL}

    56 \author{\hbox{} \\

    57 Jasmin Christian Blanchette \\

    58 {\normalsize Institut f\"ur Informatik, Technische Universit\"at M\"unchen} \\[4\smallskipamount]

    59 {\normalsize with contributions from} \\[4\smallskipamount]

    60 Lawrence C. Paulson \\

    61 {\normalsize Computer Laboratory, University of Cambridge} \\

    62 \hbox{}}

    64 \maketitle

    66 \tableofcontents

    68 \setlength{\parskip}{.7em plus .2em minus .1em}

    69 \setlength{\parindent}{0pt}

    70 \setlength{\abovedisplayskip}{\parskip}

    71 \setlength{\abovedisplayshortskip}{.9\parskip}

    72 \setlength{\belowdisplayskip}{\parskip}

    73 \setlength{\belowdisplayshortskip}{.9\parskip}

    75 % General-purpose enum environment with correct spacing

    76 \newenvironment{enum}%

    77     {\begin{list}{}{%

    78         \setlength{\topsep}{.1\parskip}%

    79         \setlength{\partopsep}{.1\parskip}%

    80         \setlength{\itemsep}{\parskip}%

    81         \advance\itemsep by-\parsep}}

    82     {\end{list}}

    84 \def\pre{\begingroup\vskip0pt plus1ex\advance\leftskip by\leftmargin

    85 \advance\rightskip by\leftmargin}

    86 \def\post{\vskip0pt plus1ex\endgroup}

    88 \def\prew{\pre\advance\rightskip by-\leftmargin}

    89 \def\postw{\post}

    91 \section{Introduction}

    92 \label{introduction}

    94 Sledgehammer is a tool that applies automatic theorem provers (ATPs)

    95 and satisfiability-modulo-theories (SMT) solvers on the current goal.%

    96 \footnote{The distinction between ATPs and SMT solvers is convenient but mostly

    97 historical. The two communities are converging, with more and more ATPs

    98 supporting typical SMT features such as arithmetic and sorts, and a few SMT

    99 solvers parsing ATP syntaxes. There is also a strong technological connection

   100 between instantiation-based ATPs (such as iProver and iProver-Eq) and SMT

   101 solvers.}

   102 %

   103 The supported ATPs are E \cite{schulz-2002}, E-SInE \cite{sine}, E-ToFoF

   104 \cite{tofof}, iProver \cite{korovin-2009}, iProver-Eq

   105 \cite{korovin-sticksel-2010}, LEO-II \cite{leo2}, Satallax \cite{satallax},

   106 SNARK \cite{snark}, SPASS \cite{weidenbach-et-al-2009}, Vampire

   107 \cite{riazanov-voronkov-2002}, and Waldmeister \cite{waldmeister}. The ATPs are

   108 run either locally or remotely via the System\-On\-TPTP web service

   109 \cite{sutcliffe-2000}. In addition to the ATPs, the SMT solvers Z3 \cite{z3} is

   110 used by default, and you can tell Sledgehammer to try Alt-Ergo \cite{alt-ergo},

   111 CVC3 \cite{cvc3}, and Yices \cite{yices} as well; these are run either locally

   112 or (for CVC3 and Z3) on a server at the TU M\"unchen.

   114 The problem passed to the automatic provers consists of your current goal

   115 together with a heuristic selection of hundreds of facts (theorems) from the

   116 current theory context, filtered by relevance. Because jobs are run in the

   117 background, you can continue to work on your proof by other means. Provers can

   118 be run in parallel. Any reply (which may arrive half a minute later) will appear

   119 in the Proof General response buffer.

   121 The result of a successful proof search is some source text that usually (but

   122 not always) reconstructs the proof within Isabelle. For ATPs, the reconstructed

   123 proof relies on the general-purpose \textit{metis} proof method, which

   124 integrates the Metis ATP in Isabelle/HOL with explicit inferences going through

   125 the kernel. Thus its results are correct by construction.

   127 In this manual, we will explicitly invoke the \textbf{sledgehammer} command.

   128 Sledgehammer also provides an automatic mode that can be enabled via the ``Auto

   129 Sledgehammer'' option in Proof General's ``Isabelle'' menu. In this mode,

   130 Sledgehammer is run on every newly entered theorem. The time limit for Auto

   131 Sledgehammer and other automatic tools can be set using the ``Auto Tools Time

   132 Limit'' option.

   134 \newbox\boxA

   135 \setbox\boxA=\hbox{\texttt{NOSPAM}}

   137 \newcommand\authoremail{\texttt{blan{\color{white}NOSPAM}\kern-\wd\boxA{}chette@\allowbreak

   138 in.\allowbreak tum.\allowbreak de}}

   140 To run Sledgehammer, you must make sure that the theory \textit{Sledgehammer} is

   141 imported---this is rarely a problem in practice since it is part of

   142 \textit{Main}. Examples of Sledgehammer use can be found in Isabelle's

   143 \texttt{src/HOL/Metis\_Examples} directory.

   144 Comments and bug reports concerning Sledgehammer or this manual should be

   145 directed to the author at \authoremail.

   147 \vskip2.5\smallskipamount

   149 %\textbf{Acknowledgment.} The author would like to thank Mark Summerfield for

   150 %suggesting several textual improvements.

   152 \section{Installation}

   153 \label{installation}

   155 Sledgehammer is part of Isabelle, so you do not need to install it. However, it

   156 relies on third-party automatic provers (ATPs and SMT solvers).

   158 Among the ATPs, E, LEO-II, Satallax, SPASS, and Vampire can be run locally; in

   159 addition, E, E-SInE, E-ToFoF, iProver, iProver-Eq, LEO-II, Satallax, SNARK,

   160 Vampire, and Waldmeister are available remotely via System\-On\-TPTP

   161 \cite{sutcliffe-2000}. If you want better performance, you should at least

   162 install E and SPASS locally.

   164 Among the SMT solvers, Alt-Ergo, CVC3, Yices, and Z3 can be run locally, and

   165 CVC3 and Z3 can be run remotely on a TU M\"unchen server. If you want better

   166 performance and get the ability to replay proofs that rely on the \emph{smt}

   167 proof method without an Internet connection, you should at least install Z3

   168 locally.

   170 There are three main ways to install automatic provers on your machine:

   172 \begin{sloppy}

   173 \begin{enum}

   174 \item[\labelitemi] If you installed an official Isabelle package, it should

   175 already include properly setup executables for CVC3, E, SPASS, and Z3, ready to use.%

   176 \footnote{Vampire's and Yices's licenses prevent us from doing the same for

   177 these otherwise remarkable tools.}

   178 For Z3, you must additionally set the variable

   179 \texttt{Z3\_NON\_COMMERCIAL} to ``yes'' to confirm that you are a

   180 noncommercial user, either in the environment in which Isabelle is

   181 launched or in your

   182 \texttt{\$ISABELLE\_HOME\_USER/etc/settings} file.

   184 \item[\labelitemi] Alternatively, you can download the Isabelle-aware CVC3, E,

   185 SPASS, and Z3 binary packages from \download. Extract the archives, then add a

   186 line to your \texttt{\$ISABELLE\_HOME\_USER\slash etc\slash components}%

   187 \footnote{The variable \texttt{\$ISABELLE\_HOME\_USER} is set by Isabelle at

   188 startup. Its value can be retrieved by executing \texttt{isabelle}

   189 \texttt{getenv} \texttt{ISABELLE\_HOME\_USER} on the command line.}

   190 file with the absolute path to CVC3, E, SPASS, or Z3. For example, if the

   191 \texttt{components} file does not exist yet and you extracted SPASS to

   192 \texttt{/usr/local/spass-3.8ds}, create it with the single line

   194 \prew

   195 \texttt{/usr/local/spass-3.8ds}

   196 \postw

   198 in it.

   200 \item[\labelitemi] If you prefer to build E, LEO-II, Satallax, or SPASS

   201 manually, or found a Vampire executable somewhere (e.g.,

   202 \url{http://www.vprover.org/}), set the environment variable \texttt{E\_HOME},

   203 \texttt{LEO2\_HOME}, \texttt{SATALLAX\_HOME}, \texttt{SPASS\_HOME}, or

   204 \texttt{VAMPIRE\_HOME} to the directory that contains the \texttt{eproof},

   205 \texttt{leo}, \texttt{satallax}, \texttt{SPASS}, or \texttt{vampire} executable.

   206 Sledgehammer has been tested with E 1.0 to 1.4, LEO-II 1.3.4, Satallax 2.2, 2.3,

   207 and 2.4, SPASS 3.8ds, and Vampire 0.6, 1.0, and 1.8.%

   208 \footnote{Following the rewrite of Vampire, the counter for version numbers was

   209 reset to 0; hence the (new) Vampire versions 0.6, 1.0, and 1.8 are more recent

   210 than 9.0 or 11.5.}%

   211 Since the ATPs' output formats are neither documented nor stable, other

   212 versions might not work well with Sledgehammer. Ideally,

   213 you should also set \texttt{E\_VERSION}, \texttt{LEO2\_VERSION},

   214 \texttt{SATALLAX\_VERSION}, \texttt{SPASS\_VERSION}, or

   215 \texttt{VAMPIRE\_VERSION} to the prover's version number (e.g., ``1.4'').

   217 Similarly, if you want to build Alt-Ergo or CVC3, or found a

   218 Yices or Z3 executable somewhere (e.g.,

   219 \url{http://yices.csl.sri.com/download.shtml} or

   220 \url{http://research.microsoft.com/en-us/um/redmond/projects/z3/download.html}),

   221 set the environment variable \texttt{CVC3\_\allowbreak SOLVER},

   222 \texttt{YICES\_SOLVER}, or \texttt{Z3\_SOLVER} to the complete path of

   223 the executable, \emph{including the file name}. Sledgehammer has been tested

   224 with Alt-Ergo 0.93, CVC3 2.2 and 2.4.1, Yices 1.0.28 and 1.0.33, and Z3 3.0,

   225 3.1, 3.2, and 4.0. Since the SMT solvers' output formats are somewhat unstable,

   226 other versions of the solvers might not work well with Sledgehammer. Ideally,

   227 also set \texttt{CVC3\_VERSION}, \texttt{YICES\_VERSION}, or

   228 \texttt{Z3\_VERSION} to the solver's version number (e.g., ``4.0'').

   229 \end{enum}

   230 \end{sloppy}

   232 To check whether E, SPASS, Vampire, and/or Z3 are successfully installed, try

   233 out the example in \S\ref{first-steps}. If the remote versions of any of these

   234 provers is used (identified by the prefix ``\emph{remote\_\/}''), or if the

   235 local versions fail to solve the easy goal presented there, something must be

   236 wrong with the installation.

   238 Remote prover invocation requires Perl with the World Wide Web Library

   239 (\texttt{libwww-perl}) installed. If you must use a proxy server to access the

   240 Internet, set the \texttt{http\_proxy} environment variable to the proxy, either

   241 in the environment in which Isabelle is launched or in your

   242 \texttt{\$ISABELLE\_HOME\_USER/etc/settings} file. Here are a few

   243 examples:

   245 \prew

   246 \texttt{http\_proxy=http://proxy.example.org} \\

   247 \texttt{http\_proxy=http://proxy.example.org:8080} \\

   248 \texttt{http\_proxy=http://joeblow:pAsSwRd@proxy.example.org}

   249 \postw

   251 \section{First Steps}

   252 \label{first-steps}

   254 To illustrate Sledgehammer in context, let us start a theory file and

   255 attempt to prove a simple lemma:

   257 \prew

   258 \textbf{theory}~\textit{Scratch} \\

   259 \textbf{imports}~\textit{Main} \\

   260 \textbf{begin} \\[2\smallskipamount]

   261 %

   262 \textbf{lemma} ``$[a] = [b] \,\Longrightarrow\, a = b$'' \\

   263 \textbf{sledgehammer}

   264 \postw

   266 Instead of issuing the \textbf{sledgehammer} command, you can also find

   267 Sledgehammer in the ``Commands'' submenu of the ``Isabelle'' menu in Proof

   268 General or press the Emacs key sequence C-c C-a C-s.

   269 Either way, Sledgehammer produces the following output after a few seconds:

   271 \prew

   272 \slshape

   273 Sledgehammer: ``\textit{e\/}'' on goal \\

   274 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   275 Try this: \textbf{by} (\textit{metis last\_ConsL}) (64 ms). \\[3\smallskipamount]

   276 %

   277 Sledgehammer: ``\textit{z3\/}'' on goal \\

   278 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   279 Try this: \textbf{by} (\textit{metis list.inject}) (20 ms). \\[3\smallskipamount]

   280 %

   281 Sledgehammer: ``\textit{vampire\/}'' on goal \\

   282 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   283 Try this: \textbf{by} (\textit{metis hd.simps}) (14 ms). \\[3\smallskipamount]

   284 %

   285 Sledgehammer: ``\textit{spass\/}'' on goal \\

   286 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   287 Try this: \textbf{by} (\textit{metis list.inject}) (17 ms). \\[3\smallskipamount]

   288 %

   289 Sledgehammer: ``\textit{remote\_waldmeister\/}'' on goal \\

   290 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   291 Try this: \textbf{by} (\textit{metis hd.simps}) (15 ms). \\[3\smallskipamount]

   292 %

   293 Sledgehammer: ``\textit{remote\_e\_sine\/}'' on goal \\

   294 $[a] = [b] \,\Longrightarrow\, a = b$ \\

   295 Try this: \textbf{by} (\textit{metis hd.simps}) (18 ms).

   296 \postw

   298 Sledgehammer ran E, E-SInE, SPASS, Vampire, Waldmeister, and Z3 in parallel.

   299 Depending on which provers are installed and how many processor cores are

   300 available, some of the provers might be missing or present with a

   301 \textit{remote\_} prefix. Waldmeister is run only for unit equational problems,

   302 where the goal's conclusion is a (universally quantified) equation.

   304 For each successful prover, Sledgehammer gives a one-liner \textit{metis} or

   305 \textit{smt} method call. Rough timings are shown in parentheses, indicating how

   306 fast the call is. You can click the proof to insert it into the theory text.

   308 In addition, you can ask Sledgehammer for an Isar text proof by passing the

   309 \textit{isar\_proof} option (\S\ref{output-format}):

   311 \prew

   312 \textbf{sledgehammer} [\textit{isar\_proof}]

   313 \postw

   315 When Isar proof construction is successful, it can yield proofs that are more

   316 readable and also faster than the \textit{metis} or \textit{smt} one-liners.

   317 This feature is experimental and is only available for ATPs.

   319 \section{Hints}

   320 \label{hints}

   322 This section presents a few hints that should help you get the most out of

   323 Sledgehammer. Frequently asked questions are answered in

   324 \S\ref{frequently-asked-questions}.

   326 %\newcommand\point[1]{\medskip\par{\sl\bfseries#1}\par\nopagebreak}

   327 \newcommand\point[1]{\subsection{\emph{#1}}}

   329 \point{Presimplify the goal}

   331 For best results, first simplify your problem by calling \textit{auto} or at

   332 least \textit{safe} followed by \textit{simp\_all}. The SMT solvers provide

   333 arithmetic decision procedures, but the ATPs typically do not (or if they do,

   334 Sledgehammer does not use it yet). Apart from Waldmeister, they are not

   335 especially good at heavy rewriting, but because they regard equations as

   336 undirected, they often prove theorems that require the reverse orientation of a

   337 \textit{simp} rule. Higher-order problems can be tackled, but the success rate

   338 is better for first-order problems. Hence, you may get better results if you

   339 first simplify the problem to remove higher-order features.

   341 \point{Make sure E, SPASS, Vampire, and Z3 are locally installed}

   343 Locally installed provers are faster and more reliable than those running on

   344 servers. See \S\ref{installation} for details on how to install them.

   346 \point{Familiarize yourself with the most important options}

   348 Sledgehammer's options are fully documented in \S\ref{command-syntax}. Many of

   349 the options are very specialized, but serious users of the tool should at least

   350 familiarize themselves with the following options:

   352 \begin{enum}

   353 \item[\labelitemi] \textbf{\textit{provers}} (\S\ref{mode-of-operation}) specifies

   354 the automatic provers (ATPs and SMT solvers) that should be run whenever

   355 Sledgehammer is invoked (e.g., ``\textit{provers}~= \textit{e spass

   356 remote\_vampire\/}''). For convenience, you can omit ``\textit{provers}~=''

   357 and simply write the prover names as a space-separated list (e.g., ``\textit{e

   358 spass remote\_vampire\/}'').

   360 \item[\labelitemi] \textbf{\textit{max\_facts}} (\S\ref{relevance-filter})

   361 specifies the maximum number of facts that should be passed to the provers. By

   362 default, the value is prover-dependent but varies between about 50 and 1000. If

   363 the provers time out, you can try lowering this value to, say, 25 or 50 and see

   364 if that helps.

   366 \item[\labelitemi] \textbf{\textit{isar\_proof}} (\S\ref{output-format}) specifies

   367 that Isar proofs should be generated, instead of one-liner \textit{metis} or

   368 \textit{smt} proofs. The length of the Isar proofs can be controlled by setting

   369 \textit{isar\_shrink\_factor} (\S\ref{output-format}).

   371 \item[\labelitemi] \textbf{\textit{timeout}} (\S\ref{timeouts}) controls the

   372 provers' time limit. It is set to 30 seconds, but since Sledgehammer runs

   373 asynchronously you should not hesitate to raise this limit to 60 or 120 seconds

   374 if you are the kind of user who can think clearly while ATPs are active.

   375 \end{enum}

   377 Options can be set globally using \textbf{sledgehammer\_params}

   378 (\S\ref{command-syntax}). The command also prints the list of all available

   379 options with their current value. Fact selection can be influenced by specifying

   380 ``$(\textit{add}{:}~\textit{my\_facts})$'' after the \textbf{sledgehammer} call

   381 to ensure that certain facts are included, or simply ``$(\textit{my\_facts})$''

   382 to force Sledgehammer to run only with $\textit{my\_facts}$.

   384 \section{Frequently Asked Questions}

   385 \label{frequently-asked-questions}

   387 This sections answers frequently (and infrequently) asked questions about

   388 Sledgehammer. It is a good idea to skim over it now even if you do not have any

   389 questions at this stage. And if you have any further questions not listed here,

   390 send them to the author at \authoremail.

   392 \point{Which facts are passed to the automatic provers?}

   394 Sledgehammer heuristically selects a few hundred relevant lemmas from the

   395 currently loaded libraries. The component that performs this selection is

   396 called \emph{relevance filter}.

   398 \begin{enum}

   399 \item[\labelitemi]

   400 The traditional relevance filter, called \emph{MePo}

   401 (\underline{Me}ng--\underline{Pau}lson), assigns a score to every available fact

   402 (lemma, theorem, definition, or axiom) based upon how many constants that fact

   403 shares with the conjecture. This process iterates to include facts relevant to

   404 those just accepted. The constants are weighted to give unusual ones greater

   405 significance. MePo copes best when the conjecture contains some unusual

   406 constants; if all the constants are common, it is unable to discriminate among

   407 the hundreds of facts that are picked up. The filter is also memoryless: It has

   408 no information about how many times a particular fact has been used in a proof,

   409 and it cannot learn.

   411 \item[\labelitemi]

   412 An experimental, memoryful alternative to MePo is \emph{MaSh}

   413 (\underline{Ma}chine Learner for \underline{S}ledge\underline{h}ammer). It

   414 relies on an external tool called \texttt{mash} that applies machine learning to

   415 the problem of finding relevant facts.

   417 \item[\labelitemi] The \emph{Mesh} filter combines MePo and MaSh.

   418 \end{enum}

   420 The default is either MePo or Mesh, depending on whether \texttt{mash} is

   421 installed and what class of provers the target prover belongs to

   422 (\S\ref{relevance-filter}).

   424 The number of facts included in a problem varies from prover to prover, since

   425 some provers get overwhelmed more easily than others. You can show the number of

   426 facts given using the \textit{verbose} option (\S\ref{output-format}) and the

   427 actual facts using \textit{debug} (\S\ref{output-format}).

   429 Sledgehammer is good at finding short proofs combining a handful of existing

   430 lemmas. If you are looking for longer proofs, you must typically restrict the

   431 number of facts, by setting the \textit{max\_facts} option

   432 (\S\ref{relevance-filter}) to, say, 25 or 50.

   434 You can also influence which facts are actually selected in a number of ways. If

   435 you simply want to ensure that a fact is included, you can specify it using the

   436 ``$(\textit{add}{:}~\textit{my\_facts})$'' syntax. For example:

   437 %

   438 \prew

   439 \textbf{sledgehammer} (\textit{add}: \textit{hd.simps} \textit{tl.simps})

   440 \postw

   441 %

   442 The specified facts then replace the least relevant facts that would otherwise be

   443 included; the other selected facts remain the same.

   444 If you want to direct the selection in a particular direction, you can specify

   445 the facts via \textbf{using}:

   446 %

   447 \prew

   448 \textbf{using} \textit{hd.simps} \textit{tl.simps} \\

   449 \textbf{sledgehammer}

   450 \postw

   451 %

   452 The facts are then more likely to be selected than otherwise, and if they are

   453 selected at iteration $j$ they also influence which facts are selected at

   454 iterations $j + 1$, $j + 2$, etc. To give them even more weight, try

   455 %

   456 \prew

   457 \textbf{using} \textit{hd.simps} \textit{tl.simps} \\

   458 \textbf{apply}~\textbf{--} \\

   459 \textbf{sledgehammer}

   460 \postw

   462 \point{Why does Metis fail to reconstruct the proof?}

   464 There are many reasons. If Metis runs seemingly forever, that is a sign that the

   465 proof is too difficult for it. Metis's search is complete, so it should

   466 eventually find it, but that's little consolation. There are several possible

   467 solutions:

   469 \begin{enum}

   470 \item[\labelitemi] Try the \textit{isar\_proof} option (\S\ref{output-format}) to

   471 obtain a step-by-step Isar proof where each step is justified by \textit{metis}.

   472 Since the steps are fairly small, \textit{metis} is more likely to be able to

   473 replay them.

   475 \item[\labelitemi] Try the \textit{smt} proof method instead of \textit{metis}. It

   476 is usually stronger, but you need to either have Z3 available to replay the

   477 proofs, trust the SMT solver, or use certificates. See the documentation in the

   478 \emph{SMT} theory (\texttt{\$ISABELLE\_HOME/src/HOL/SMT.thy}) for details.

   480 \item[\labelitemi] Try the \textit{blast} or \textit{auto} proof methods, passing

   481 the necessary facts via \textbf{unfolding}, \textbf{using}, \textit{intro}{:},

   482 \textit{elim}{:}, \textit{dest}{:}, or \textit{simp}{:}, as appropriate.

   483 \end{enum}

   485 In some rare cases, \textit{metis} fails fairly quickly, and you get the error

   486 message

   488 \prew

   489 \slshape

   490 One-line proof reconstruction failed.

   491 \postw

   493 This message indicates that Sledgehammer determined that the goal is provable,

   494 but the proof is, for technical reasons, beyond \textit{metis}'s power. You can

   495 then try again with the \textit{strict} option (\S\ref{problem-encoding}).

   497 If the goal is actually unprovable and you did not specify an unsound encoding

   498 using \textit{type\_enc} (\S\ref{problem-encoding}), this is a bug, and you are

   499 strongly encouraged to report this to the author at \authoremail.

   501 \point{Why are the generated Isar proofs so ugly/broken?}

   503 The current implementation of the Isar proof feature,

   504 enabled by the \textit{isar\_proof} option (\S\ref{output-format}),

   505 is highly experimental. Work on a new implementation has begun. There is a large body of

   506 research into transforming resolution proofs into natural deduction proofs (such

   507 as Isar proofs), which we hope to leverage. In the meantime, a workaround is to

   508 set the \textit{isar\_shrink\_factor} option (\S\ref{output-format}) to a larger

   509 value or to try several provers and keep the nicest-looking proof.

   511 \point{How can I tell whether a suggested proof is sound?}

   513 Earlier versions of Sledgehammer often suggested unsound proofs---either proofs

   514 of nontheorems or simply proofs that rely on type-unsound inferences. This

   515 is a thing of the past, unless you explicitly specify an unsound encoding

   516 using \textit{type\_enc} (\S\ref{problem-encoding}).

   517 %

   518 Officially, the only form of ``unsoundness'' that lurks in the sound

   519 encodings is related to missing characteristic theorems of datatypes. For

   520 example,

   522 \prew

   523 \textbf{lemma}~``$\exists \mathit{xs}.\; \mathit{xs} \neq []$'' \\

   524 \textbf{sledgehammer} ()

   525 \postw

   527 suggests an argumentless \textit{metis} call that fails. However, the conjecture

   528 does actually hold, and the \textit{metis} call can be repaired by adding

   529 \textit{list.distinct}.

   530 %

   531 We hope to address this problem in a future version of Isabelle. In the

   532 meantime, you can avoid it by passing the \textit{strict} option

   533 (\S\ref{problem-encoding}).

   535 \point{What are the \textit{full\_types}, \textit{no\_types}, and

   536 \textit{mono\_tags} arguments to Metis?}

   538 The \textit{metis}~(\textit{full\_types}) proof method

   539 and its cousin \textit{metis}~(\textit{mono\_tags}) are fully-typed

   540 version of Metis. It is somewhat slower than \textit{metis}, but the proof

   541 search is fully typed, and it also includes more powerful rules such as the

   542 axiom ``$x = \const{True} \mathrel{\lor} x = \const{False}$'' for reasoning in

   543 higher-order places (e.g., in set comprehensions). The method kicks in

   544 automatically as a fallback when \textit{metis} fails, and it is sometimes

   545 generated by Sledgehammer instead of \textit{metis} if the proof obviously

   546 requires type information or if \textit{metis} failed when Sledgehammer

   547 preplayed the proof. (By default, Sledgehammer tries to run \textit{metis} with

   548 various options for up to 3 seconds each time to ensure that the generated

   549 one-line proofs actually work and to display timing information. This can be

   550 configured using the \textit{preplay\_timeout} and \textit{dont\_preplay}

   551 options (\S\ref{timeouts}).)

   552 %

   553 At the other end of the soundness spectrum, \textit{metis} (\textit{no\_types})

   554 uses no type information at all during the proof search, which is more efficient

   555 but often fails. Calls to \textit{metis} (\textit{no\_types}) are occasionally

   556 generated by Sledgehammer.

   557 %

   558 See the \textit{type\_enc} option (\S\ref{problem-encoding}) for details.

   560 Incidentally, if you ever see warnings such as

   562 \prew

   563 \slshape

   564 Metis: Falling back on ``\textit{metis} (\textit{full\_types})''.

   565 \postw

   567 for a successful \textit{metis} proof, you can advantageously pass the

   568 \textit{full\_types} option to \textit{metis} directly.

   570 \point{And what are the \textit{lifting} and \textit{hide\_lams} arguments

   571 to Metis?}

   573 Orthogonally to the encoding of types, it is important to choose an appropriate

   574 translation of $\lambda$-abstractions. Metis supports three translation schemes,

   575 in decreasing order of power: Curry combinators (the default),

   576 $\lambda$-lifting, and a ``hiding'' scheme that disables all reasoning under

   577 $\lambda$-abstractions. The more powerful schemes also give the automatic

   578 provers more rope to hang themselves. See the \textit{lam\_trans} option (\S\ref{problem-encoding}) for details.

   580 \point{Are generated proofs minimal?}

   582 Automatic provers frequently use many more facts than are necessary.

   583 Sledgehammer inclues a minimization tool that takes a set of facts returned by a

   584 given prover and repeatedly calls the same prover, \textit{metis}, or

   585 \textit{smt} with subsets of those axioms in order to find a minimal set.

   586 Reducing the number of axioms typically improves Metis's speed and success rate,

   587 while also removing superfluous clutter from the proof scripts.

   589 In earlier versions of Sledgehammer, generated proofs were systematically

   590 accompanied by a suggestion to invoke the minimization tool. This step is now

   591 performed implicitly if it can be done in a reasonable amount of time (something

   592 that can be guessed from the number of facts in the original proof and the time

   593 it took to find or preplay it).

   595 In addition, some provers (e.g., Yices) do not provide proofs or sometimes

   596 produce incomplete proofs. The minimizer is then invoked to find out which facts

   597 are actually needed from the (large) set of facts that was initially given to

   598 the prover. Finally, if a prover returns a proof with lots of facts, the

   599 minimizer is invoked automatically since Metis would be unlikely to re-find the

   600 proof.

   601 %

   602 Automatic minimization can be forced or disabled using the \textit{minimize}

   603 option (\S\ref{mode-of-operation}).

   605 \point{A strange error occurred---what should I do?}

   607 Sledgehammer tries to give informative error messages. Please report any strange

   608 error to the author at \authoremail. This applies double if you get the message

   610 \prew

   611 \slshape

   612 The prover found a type-unsound proof involving ``\textit{foo\/}'',

   613 ``\textit{bar\/}'', and ``\textit{baz\/}'' even though a supposedly type-sound

   614 encoding was used (or, less likely, your axioms are inconsistent). You might

   615 want to report this to the Isabelle developers.

   616 \postw

   618 \point{Auto can solve it---why not Sledgehammer?}

   620 Problems can be easy for \textit{auto} and difficult for automatic provers, but

   621 the reverse is also true, so do not be discouraged if your first attempts fail.

   622 Because the system refers to all theorems known to Isabelle, it is particularly

   623 suitable when your goal has a short proof from lemmas that you do not know

   624 about.

   626 \point{Why are there so many options?}

   628 Sledgehammer's philosophy should work out of the box, without user guidance.

   629 Many of the options are meant to be used mostly by the Sledgehammer developers

   630 for experimentation purposes. Of course, feel free to experiment with them if

   631 you are so inclined.

   633 \section{Command Syntax}

   634 \label{command-syntax}

   636 \subsection{Sledgehammer}

   638 Sledgehammer can be invoked at any point when there is an open goal by entering

   639 the \textbf{sledgehammer} command in the theory file. Its general syntax is as

   640 follows:

   642 \prew

   643 \textbf{sledgehammer} \qty{subcommand}$^?$ \qty{options}$^?$ \qty{facts\_override}$^?$ \qty{num}$^?$

   644 \postw

   646 For convenience, Sledgehammer is also available in the ``Commands'' submenu of

   647 the ``Isabelle'' menu in Proof General or by pressing the Emacs key sequence C-c

   648 C-a C-s. This is equivalent to entering the \textbf{sledgehammer} command with

   649 no arguments in the theory text.

   651 In the general syntax, the \qty{subcommand} may be any of the following:

   653 \begin{enum}

   654 \item[\labelitemi] \textbf{\textit{run} (the default):} Runs Sledgehammer on

   655 subgoal number \qty{num} (1 by default), with the given options and facts.

   657 \item[\labelitemi] \textbf{\textit{min}:} Attempts to minimize the facts

   658 specified in the \qty{facts\_override} argument to obtain a simpler proof

   659 involving fewer facts. The options and goal number are as for \textit{run}.

   661 \item[\labelitemi] \textbf{\textit{messages}:} Redisplays recent messages issued

   662 by Sledgehammer. This allows you to examine results that might have been lost

   663 due to Sledgehammer's asynchronous nature. The \qty{num} argument specifies a

   664 limit on the number of messages to display (10 by default).

   666 \item[\labelitemi] \textbf{\textit{supported\_provers}:} Prints the list of

   667 automatic provers supported by Sledgehammer. See \S\ref{installation} and

   668 \S\ref{mode-of-operation} for more information on how to install automatic

   669 provers.

   671 \item[\labelitemi] \textbf{\textit{running\_provers}:} Prints information about

   672 currently running automatic provers, including elapsed runtime and remaining

   673 time until timeout.

   675 \item[\labelitemi] \textbf{\textit{kill\_provers}:} Terminates all running

   676 automatic provers.

   678 \item[\labelitemi] \textbf{\textit{unlearn}:} Resets the MaSh machine learner,

   679 erasing any persistent state.

   681 \item[\labelitemi] \textbf{\textit{learn}:} Invokes the MaSh machine learner on

   682 the current theory to process all the available facts. This happens

   683 automatically at Sledgehammer invocations if the \textit{learn} option

   684 (\S\ref{relevance-filter}) is enabled.

   686 \item[\labelitemi] \textbf{\textit{relearn}:} Same as \textit{unlearn} followed

   687 by \textit{learn}.

   689 \item[\labelitemi] \textbf{\textit{running\_learners}:} Prints information about

   690 currently running machine learners, including elapsed runtime and remaining

   691 time until timeout.

   693 \item[\labelitemi] \textbf{\textit{kill\_learners}:} Terminates all running

   694 machine learners.

   696 \item[\labelitemi] \textbf{\textit{refresh\_tptp}:} Refreshes the list of remote

   697 ATPs available at System\-On\-TPTP \cite{sutcliffe-2000}.

   698 \end{enum}

   700 Sledgehammer's behavior can be influenced by various \qty{options}, which can be

   701 specified in brackets after the \textbf{sledgehammer} command. The

   702 \qty{options} are a list of key--value pairs of the form ``[$k_1 = v_1,

   703 \ldots, k_n = v_n$]''. For Boolean options, ``= \textit{true\/}'' is optional. For

   704 example:

   706 \prew

   707 \textbf{sledgehammer} [\textit{isar\_proof}, \,\textit{timeout} = 120]

   708 \postw

   710 Default values can be set using \textbf{sledgehammer\_\allowbreak params}:

   712 \prew

   713 \textbf{sledgehammer\_params} \qty{options}

   714 \postw

   716 The supported options are described in \S\ref{option-reference}.

   718 The \qty{facts\_override} argument lets you alter the set of facts that go

   719 through the relevance filter. It may be of the form ``(\qty{facts})'', where

   720 \qty{facts} is a space-separated list of Isabelle facts (theorems, local

   721 assumptions, etc.), in which case the relevance filter is bypassed and the given

   722 facts are used. It may also be of the form ``(\textit{add}:\ \qty{facts\/_{\mathrm{1}}})'',

   723 ``(\textit{del}:\ \qty{facts\/_{\mathrm{2}}})'', or ``(\textit{add}:\ \qty{facts\/_{\mathrm{1}}}\

   724 \textit{del}:\ \qty{facts\/_{\mathrm{2}}})'', where the relevance filter is instructed to

   725 proceed as usual except that it should consider \qty{facts\/_{\mathrm{1}}}

   726 highly-relevant and \qty{facts\/_{\mathrm{2}}} fully irrelevant.

   728 You can instruct Sledgehammer to run automatically on newly entered theorems by

   729 enabling the ``Auto Sledgehammer'' option in Proof General's ``Isabelle'' menu.

   730 For automatic runs, only the first prover set using \textit{provers}

   731 (\S\ref{mode-of-operation}) is considered, fewer facts are passed to the prover,

   732 \textit{slice} (\S\ref{mode-of-operation}) is disabled, \textit{strict}

   733 (\S\ref{problem-encoding}) is enabled, \textit{verbose} (\S\ref{output-format})

   734 and \textit{debug} (\S\ref{output-format}) are disabled, and \textit{timeout}

   735 (\S\ref{timeouts}) is superseded by the ``Auto Tools Time Limit'' in Proof

   736 General's ``Isabelle'' menu. Sledgehammer's output is also more concise.

   738 \subsection{Metis}

   740 The \textit{metis} proof method has the syntax

   742 \prew

   743 \textbf{\textit{metis}}~(\qty{options})${}^?$~\qty{facts}${}^?$

   744 \postw

   746 where \qty{facts} is a list of arbitrary facts and \qty{options} is a

   747 comma-separated list consisting of at most one $\lambda$ translation scheme

   748 specification with the same semantics as Sledgehammer's \textit{lam\_trans}

   749 option (\S\ref{problem-encoding}) and at most one type encoding specification

   750 with the same semantics as Sledgehammer's \textit{type\_enc} option

   751 (\S\ref{problem-encoding}).

   752 %

   753 The supported $\lambda$ translation schemes are \textit{hide\_lams},

   754 \textit{lifting}, and \textit{combs} (the default).

   755 %

   756 All the untyped type encodings listed in \S\ref{problem-encoding} are supported.

   757 For convenience, the following aliases are provided:

   758 \begin{enum}

   759 \item[\labelitemi] \textbf{\textit{full\_types}:} Synonym for \textit{poly\_guards\_query}.

   760 \item[\labelitemi] \textbf{\textit{partial\_types}:} Synonym for \textit{poly\_args}.

   761 \item[\labelitemi] \textbf{\textit{no\_types}:} Synonym for \textit{erased}.

   762 \end{enum}

   764 \section{Option Reference}

   765 \label{option-reference}

   767 \def\defl{\{}

   768 \def\defr{\}}

   770 \def\flushitem#1{\item[]\noindent\kern-\leftmargin \textbf{#1}}

   771 \def\optrueonly#1{\flushitem{\textit{#1} $\bigl[$= \textit{true}$\bigr]$\enskip}\nopagebreak\\[\parskip]}

   772 \def\optrue#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{bool}$\bigr]$\enskip \defl\textit{true}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}

   773 \def\opfalse#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{bool}$\bigr]$\enskip \defl\textit{false}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}

   774 \def\opsmart#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\enskip \defl\textit{smart}\defr\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}

   775 \def\opsmartx#1#2{\flushitem{\textit{#1} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\enskip \defl\textit{smart}\defr\\\hbox{}\hfill (neg.: \textit{#2})}\nopagebreak\\[\parskip]}

   776 \def\opnodefault#1#2{\flushitem{\textit{#1} = \qtybf{#2}} \nopagebreak\\[\parskip]}

   777 \def\opnodefaultbrk#1#2{\flushitem{$\bigl[$\textit{#1} =$\bigr]$ \qtybf{#2}} \nopagebreak\\[\parskip]}

   778 \def\opdefault#1#2#3{\flushitem{\textit{#1} = \qtybf{#2}\enskip \defl\textit{#3}\defr} \nopagebreak\\[\parskip]}

   779 \def\oparg#1#2#3{\flushitem{\textit{#1} \qtybf{#2} = \qtybf{#3}} \nopagebreak\\[\parskip]}

   780 \def\opargbool#1#2#3{\flushitem{\textit{#1} \qtybf{#2} $\bigl[$= \qtybf{bool}$\bigr]$\hfill (neg.: \textit{#3})}\nopagebreak\\[\parskip]}

   781 \def\opargboolorsmart#1#2#3{\flushitem{\textit{#1} \qtybf{#2} $\bigl[$= \qtybf{smart\_bool}$\bigr]$\hfill (neg.: \textit{#3})}\nopagebreak\\[\parskip]}

   783 Sledgehammer's options are categorized as follows:\ mode of operation

   784 (\S\ref{mode-of-operation}), problem encoding (\S\ref{problem-encoding}),

   785 relevance filter (\S\ref{relevance-filter}), output format

   786 (\S\ref{output-format}), authentication (\S\ref{authentication}), and timeouts

   787 (\S\ref{timeouts}).

   789 The descriptions below refer to the following syntactic quantities:

   791 \begin{enum}

   792 \item[\labelitemi] \qtybf{string}: A string.

   793 \item[\labelitemi] \qtybf{bool\/}: \textit{true} or \textit{false}.

   794 \item[\labelitemi] \qtybf{smart\_bool\/}: \textit{true}, \textit{false}, or

   795 \textit{smart}.

   796 \item[\labelitemi] \qtybf{int\/}: An integer.

   797 %\item[\labelitemi] \qtybf{float\/}: A floating-point number (e.g., 2.5).

   798 \item[\labelitemi] \qtybf{float\_pair\/}: A pair of floating-point numbers

   799 (e.g., 0.6 0.95).

   800 \item[\labelitemi] \qtybf{smart\_int\/}: An integer or \textit{smart}.

   801 \item[\labelitemi] \qtybf{float\_or\_none\/}: A floating-point number (e.g., 60 or

   802 0.5) expressing a number of seconds, or the keyword \textit{none} ($\infty$

   803 seconds).

   804 \end{enum}

   806 Default values are indicated in curly brackets (\textrm{\{\}}). Boolean options

   807 have a negative counterpart (e.g., \textit{blocking} vs.\

   808 \textit{non\_blocking}). When setting Boolean options or their negative

   809 counterparts, ``= \textit{true\/}'' may be omitted.

   811 \subsection{Mode of Operation}

   812 \label{mode-of-operation}

   814 \begin{enum}

   815 \opnodefaultbrk{provers}{string}

   816 Specifies the automatic provers to use as a space-separated list (e.g.,

   817 ``\textit{e}~\textit{spass}~\textit{remote\_vampire\/}'').

   818 Provers can be run locally or remotely; see \S\ref{installation} for

   819 installation instructions.

   821 The following local provers are supported:

   823 \begin{enum}

   824 \item[\labelitemi] \textbf{\textit{alt\_ergo}:} Alt-Ergo is a polymorphic

   825 SMT solver developed by Bobot et al.\ \cite{alt-ergo}.

   826 It supports the TPTP polymorphic typed first-order format (TFF1) via Why3

   827 \cite{why3}. It is included for experimental purposes. To use Alt-Ergo, set the

   828 environment variable \texttt{WHY3\_HOME} to the directory that contains the

   829 \texttt{why3} executable. Sledgehammer has been tested with Alt-Ergo 0.93 and an

   830 unidentified development version of Why3.

   832 \item[\labelitemi] \textbf{\textit{cvc3}:} CVC3 is an SMT solver developed by

   833 Clark Barrett, Cesare Tinelli, and their colleagues \cite{cvc3}. To use CVC3,

   834 set the environment variable \texttt{CVC3\_SOLVER} to the complete path of the

   835 executable, including the file name, or install the prebuilt CVC3 package from

   836 \download. Sledgehammer has been tested with version 2.2 and 2.4.1.

   838 \item[\labelitemi] \textbf{\textit{e}:} E is a first-order resolution prover

   839 developed by Stephan Schulz \cite{schulz-2002}. To use E, set the environment

   840 variable \texttt{E\_HOME} to the directory that contains the \texttt{eproof}

   841 executable and \texttt{E\_VERSION} to the version number (e.g., ``1.4''), or

   842 install the prebuilt E package from \download. Sledgehammer has been tested with

   843 versions 1.0 to 1.4.

   845 \item[\labelitemi] \textbf{\textit{leo2}:} LEO-II is an automatic

   846 higher-order prover developed by Christoph Benzm\"uller et al.\ \cite{leo2},

   847 with support for the TPTP typed higher-order syntax (THF0). To use LEO-II, set

   848 the environment variable \texttt{LEO2\_HOME} to the directory that contains the

   849 \texttt{leo} executable. Sledgehammer requires version 1.2.9 or above.

   851 \item[\labelitemi] \textbf{\textit{metis}:} Although it is much less powerful than

   852 the external provers, Metis itself can be used for proof search.

   854 \item[\labelitemi] \textbf{\textit{satallax}:} Satallax is an automatic

   855 higher-order prover developed by Chad Brown et al.\ \cite{satallax}, with

   856 support for the TPTP typed higher-order syntax (THF0). To use Satallax, set the

   857 environment variable \texttt{SATALLAX\_HOME} to the directory that contains the

   858 \texttt{satallax} executable. Sledgehammer requires version 2.2 or above.

   860 \item[\labelitemi] \textbf{\textit{smt}:} The \textit{smt} proof method with the

   861 current settings (usually:\ Z3 with proof reconstruction).

   863 \item[\labelitemi] \textbf{\textit{spass}:} SPASS is a first-order resolution

   864 prover developed by Christoph Weidenbach et al.\ \cite{weidenbach-et-al-2009}.

   865 To use SPASS, set the environment variable \texttt{SPASS\_HOME} to the directory

   866 that contains the \texttt{SPASS} executable and \texttt{SPASS\_VERSION} to the

   867 version number (e.g., ``3.8ds''), or install the prebuilt SPASS package from

   868 \download. Sledgehammer requires version 3.8ds or above.

   870 \item[\labelitemi] \textbf{\textit{vampire}:} Vampire is a first-order resolution

   871 prover developed by Andrei Voronkov and his colleagues

   872 \cite{riazanov-voronkov-2002}. To use Vampire, set the environment variable

   873 \texttt{VAMPIRE\_HOME} to the directory that contains the \texttt{vampire}

   874 executable and \texttt{VAMPIRE\_VERSION} to the version number (e.g.,

   875 ``1.8rev1435''). Sledgehammer has been tested with versions 0.6, 1.0, and 1.8.

   876 Versions strictly above 1.8 (e.g., ``1.8rev1435'') support the TPTP typed

   877 first-order format (TFF0).

   879 \item[\labelitemi] \textbf{\textit{yices}:} Yices is an SMT solver developed at

   880 SRI \cite{yices}. To use Yices, set the environment variable

   881 \texttt{YICES\_SOLVER} to the complete path of the executable, including the

   882 file name. Sledgehammer has been tested with version 1.0.28.

   884 \item[\labelitemi] \textbf{\textit{z3}:} Z3 is an SMT solver developed at

   885 Microsoft Research \cite{z3}. To use Z3, set the environment variable

   886 \texttt{Z3\_SOLVER} to the complete path of the executable, including the file

   887 name, and set \texttt{Z3\_NON\_COMMERCIAL} to ``yes'' to confirm that you are a

   888 noncommercial user. Sledgehammer has been tested with versions 3.0, 3.1, 3.2,

   889 and 4.0.

   891 \item[\labelitemi] \textbf{\textit{z3\_tptp}:} This version of Z3 pretends to be

   892 an ATP, exploiting Z3's support for the TPTP untyped and typed first-order

   893 formats (FOF and TFF0). It is included for experimental purposes. It

   894 requires version 3.0 or above. To use it, set the environment variable

   895 \texttt{Z3\_HOME} to the directory that contains the \texttt{z3}

   896 executable.

   897 \end{enum}

   899 The following remote provers are supported:

   901 \begin{enum}

   902 \item[\labelitemi] \textbf{\textit{remote\_cvc3}:} The remote version of CVC3 runs

   903 on servers at the TU M\"unchen (or wherever \texttt{REMOTE\_SMT\_URL} is set to

   904 point).

   906 \item[\labelitemi] \textbf{\textit{remote\_e}:} The remote version of E runs

   907 on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.

   909 \item[\labelitemi] \textbf{\textit{remote\_e\_sine}:} E-SInE is a metaprover

   910 developed by Kry\v stof Hoder \cite{sine} based on E. It runs on Geoff

   911 Sutcliffe's Miami servers.

   913 \item[\labelitemi] \textbf{\textit{remote\_e\_tofof}:} E-ToFoF is a metaprover

   914 developed by Geoff Sutcliffe \cite{tofof} based on E running on his Miami

   915 servers. This ATP supports the TPTP typed first-order format (TFF0). The

   916 remote version of E-ToFoF runs on Geoff Sutcliffe's Miami servers.

   918 \item[\labelitemi] \textbf{\textit{remote\_iprover}:} iProver is a pure

   919 instantiation-based prover developed by Konstantin Korovin \cite{korovin-2009}. The

   920 remote version of iProver runs on Geoff Sutcliffe's Miami servers

   921 \cite{sutcliffe-2000}.

   923 \item[\labelitemi] \textbf{\textit{remote\_iprover\_eq}:} iProver-Eq is an

   924 instantiation-based prover with native support for equality developed by

   925 Konstantin Korovin and Christoph Sticksel \cite{korovin-sticksel-2010}. The

   926 remote version of iProver-Eq runs on Geoff Sutcliffe's Miami servers

   927 \cite{sutcliffe-2000}.

   929 \item[\labelitemi] \textbf{\textit{remote\_leo2}:} The remote version of LEO-II

   930 runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.

   932 \item[\labelitemi] \textbf{\textit{remote\_satallax}:} The remote version of

   933 Satallax runs on Geoff Sutcliffe's Miami servers \cite{sutcliffe-2000}.

   935 \item[\labelitemi] \textbf{\textit{remote\_snark}:} SNARK is a first-order

   936 resolution prover developed by Stickel et al.\ \cite{snark}. It supports the

   937 TPTP typed first-order format (TFF0). The remote version of SNARK runs on

   938 Geoff Sutcliffe's Miami servers.

   940 \item[\labelitemi] \textbf{\textit{remote\_vampire}:} The remote version of

   941 Vampire runs on Geoff Sutcliffe's Miami servers.

   943 \item[\labelitemi] \textbf{\textit{remote\_waldmeister}:} Waldmeister is a unit

   944 equality prover developed by Hillenbrand et al.\ \cite{waldmeister}. It can be

   945 used to prove universally quantified equations using unconditional equations,

   946 corresponding to the TPTP CNF UEQ division. The remote version of Waldmeister

   947 runs on Geoff Sutcliffe's Miami servers.

   949 \item[\labelitemi] \textbf{\textit{remote\_z3}:} The remote version of Z3 runs on

   950 servers at the TU M\"unchen (or wherever \texttt{REMOTE\_SMT\_URL} is set to

   951 point).

   953 \item[\labelitemi] \textbf{\textit{remote\_z3\_tptp}:} The remote version of ``Z3

   954 with TPTP syntax'' runs on Geoff Sutcliffe's Miami servers.

   955 \end{enum}

   957 By default, Sledgehammer runs E, E-SInE, SPASS, Vampire, Z3 (or whatever

   958 the SMT module's \textit{smt\_solver} configuration option is set to), and (if

   959 appropriate) Waldmeister in parallel---either locally or remotely, depending on

   960 the number of processor cores available. For historical reasons, the default

   961 value of this option can be overridden using the option ``Sledgehammer:

   962 Provers'' in Proof General's ``Isabelle'' menu.

   964 It is generally a good idea to run several provers in parallel. Running E,

   965 SPASS, and Vampire for 5~seconds yields a similar success rate to running the

   966 most effective of these for 120~seconds \cite{boehme-nipkow-2010}.

   968 For the \textit{min} subcommand, the default prover is \textit{metis}. If

   969 several provers are set, the first one is used.

   971 \opnodefault{prover}{string}

   972 Alias for \textit{provers}.

   974 \opfalse{blocking}{non\_blocking}

   975 Specifies whether the \textbf{sledgehammer} command should operate

   976 synchronously. The asynchronous (non-blocking) mode lets the user start proving

   977 the putative theorem manually while Sledgehammer looks for a proof, but it can

   978 also be more confusing. Irrespective of the value of this option, Sledgehammer

   979 is always run synchronously for the new jEdit-based user interface or if

   980 \textit{debug} (\S\ref{output-format}) is enabled.

   982 \optrue{slice}{dont\_slice}

   983 Specifies whether the time allocated to a prover should be sliced into several

   984 segments, each of which has its own set of possibly prover-dependent options.

   985 For SPASS and Vampire, the first slice tries the fast but incomplete

   986 set-of-support (SOS) strategy, whereas the second slice runs without it. For E,

   987 up to three slices are tried, with different weighted search strategies and

   988 number of facts. For SMT solvers, several slices are tried with the same options

   989 each time but fewer and fewer facts. According to benchmarks with a timeout of

   990 30 seconds, slicing is a valuable optimization, and you should probably leave it

   991 enabled unless you are conducting experiments. This option is implicitly

   992 disabled for (short) automatic runs.

   994 \nopagebreak

   995 {\small See also \textit{verbose} (\S\ref{output-format}).}

   997 \opsmart{minimize}{dont\_minimize}

   998 Specifies whether the minimization tool should be invoked automatically after

   999 proof search. By default, automatic minimization takes place only if

  1000 it can be done in a reasonable amount of time (as determined by

  1001 the number of facts in the original proof and the time it took to find or

  1002 preplay it) or the proof involves an unreasonably large number of facts.

  1004 \nopagebreak

  1005 {\small See also \textit{preplay\_timeout} (\S\ref{timeouts})

  1006 and \textit{dont\_preplay} (\S\ref{timeouts}).}

  1008 \opfalse{overlord}{no\_overlord}

  1009 Specifies whether Sledgehammer should put its temporary files in

  1010 \texttt{\$ISA\-BELLE\_\allowbreak HOME\_\allowbreak USER}, which is useful for

  1011 debugging Sledgehammer but also unsafe if several instances of the tool are run

  1012 simultaneously. The files are identified by the prefix \texttt{prob\_}; you may

  1013 safely remove them after Sledgehammer has run.

  1015 \nopagebreak

  1016 {\small See also \textit{debug} (\S\ref{output-format}).}

  1017 \end{enum}

  1019 \subsection{Relevance Filter}

  1020 \label{relevance-filter}

  1022 \begin{enum}

  1023 \opdefault{fact\_filter}{string}{smart}

  1024 Specifies the relevance filter to use. The following filters are available:

  1026 \begin{enum}

  1027 \item[\labelitemi] \textbf{\textit{mepo}:}

  1028 The traditional memoryless MePo relevance filter.

  1030 \item[\labelitemi] \textbf{\textit{mash}:}

  1031 The memoryful MaSh machine learner. MaSh relies on the external program

  1032 \texttt{mash}, which can be obtained from the author at \authoremail. To install

  1033 it, set the environment variable \texttt{MASH\_HOME} to the directory that

  1034 contains the \texttt{mash} executable.

  1036 \item[\labelitemi] \textbf{\textit{mesh}:} A combination of MePo and MaSh.

  1038 \item[\labelitemi] \textbf{\textit{smart}:} Use Mesh if \texttt{mash} is

  1039 installed and the target prover is an ATP; otherwise, use MePo.

  1040 \end{enum}

  1042 \opdefault{max\_facts}{smart\_int}{smart}

  1043 Specifies the maximum number of facts that may be returned by the relevance

  1044 filter. If the option is set to \textit{smart}, it is set to a value that was

  1045 empirically found to be appropriate for the prover. Typical values range between

  1046 50 and 1000.

  1048 \opdefault{fact\_thresholds}{float\_pair}{\upshape 0.45~0.85}

  1049 Specifies the thresholds above which facts are considered relevant by the

  1050 relevance filter. The first threshold is used for the first iteration of the

  1051 relevance filter and the second threshold is used for the last iteration (if it

  1052 is reached). The effective threshold is quadratically interpolated for the other

  1053 iterations. Each threshold ranges from 0 to 1, where 0 means that all theorems

  1054 are relevant and 1 only theorems that refer to previously seen constants.

  1056 \optrue{learn}{dont\_learn}

  1057 Specifies whether MaSh should be run automatically by Sledgehammer to learn the

  1058 available theories (and hence provide more accurate results). Learning only

  1059 takes place if \texttt{mash} is installed.

  1061 \opdefault{max\_new\_mono\_instances}{int}{smart}

  1062 Specifies the maximum number of monomorphic instances to generate beyond

  1063 \textit{max\_facts}. The higher this limit is, the more monomorphic instances

  1064 are potentially generated. Whether monomorphization takes place depends on the

  1065 type encoding used. If the option is set to \textit{smart}, it is set to a value

  1066 that was empirically found to be appropriate for the prover. For most provers,

  1067 this value is 200.

  1069 \nopagebreak

  1070 {\small See also \textit{type\_enc} (\S\ref{problem-encoding}).}

  1072 \opdefault{max\_mono\_iters}{int}{smart}

  1073 Specifies the maximum number of iterations for the monomorphization fixpoint

  1074 construction. The higher this limit is, the more monomorphic instances are

  1075 potentially generated. Whether monomorphization takes place depends on the

  1076 type encoding used. If the option is set to \textit{smart}, it is set to a value

  1077 that was empirically found to be appropriate for the prover. For most provers,

  1078 this value is 3.

  1080 \nopagebreak

  1081 {\small See also \textit{type\_enc} (\S\ref{problem-encoding}).}

  1082 \end{enum}

  1084 \subsection{Problem Encoding}

  1085 \label{problem-encoding}

  1087 \newcommand\comb[1]{\const{#1}}

  1089 \begin{enum}

  1090 \opdefault{lam\_trans}{string}{smart}

  1091 Specifies the $\lambda$ translation scheme to use in ATP problems. The supported

  1092 translation schemes are listed below:

  1094 \begin{enum}

  1095 \item[\labelitemi] \textbf{\textit{hide\_lams}:} Hide the $\lambda$-abstractions

  1096 by replacing them by unspecified fresh constants, effectively disabling all

  1097 reasoning under $\lambda$-abstractions.

  1099 \item[\labelitemi] \textbf{\textit{lifting}:} Introduce a new

  1100 supercombinator \const{c} for each cluster of $n$~$\lambda$-abstractions,

  1101 defined using an equation $\const{c}~x_1~\ldots~x_n = t$ ($\lambda$-lifting).

  1103 \item[\labelitemi] \textbf{\textit{combs}:} Rewrite lambdas to the Curry

  1104 combinators (\comb{I}, \comb{K}, \comb{S}, \comb{B}, \comb{C}). Combinators

  1105 enable the ATPs to synthesize $\lambda$-terms but tend to yield bulkier formulas

  1106 than $\lambda$-lifting: The translation is quadratic in the worst case, and the

  1107 equational definitions of the combinators are very prolific in the context of

  1108 resolution.

  1110 \item[\labelitemi] \textbf{\textit{combs\_and\_lifting}:} Introduce a new

  1111 supercombinator \const{c} for each cluster of $\lambda$-abstractions and characterize it both using a

  1112 lifted equation $\const{c}~x_1~\ldots~x_n = t$ and via Curry combinators.

  1114 \item[\labelitemi] \textbf{\textit{combs\_or\_lifting}:} For each cluster of

  1115 $\lambda$-abstractions, heuristically choose between $\lambda$-lifting and Curry

  1116 combinators.

  1118 \item[\labelitemi] \textbf{\textit{keep\_lams}:}

  1119 Keep the $\lambda$-abstractions in the generated problems. This is available

  1120 only with provers that support the THF0 syntax.

  1122 \item[\labelitemi] \textbf{\textit{smart}:} The actual translation scheme used

  1123 depends on the ATP and should be the most efficient scheme for that ATP.

  1124 \end{enum}

  1126 For SMT solvers, the $\lambda$ translation scheme is always \textit{lifting},

  1127 irrespective of the value of this option.

  1129 \opsmartx{uncurried\_aliases}{no\_uncurried\_aliases}

  1130 Specifies whether fresh function symbols should be generated as aliases for

  1131 applications of curried functions in ATP problems.

  1133 \opdefault{type\_enc}{string}{smart}

  1134 Specifies the type encoding to use in ATP problems. Some of the type encodings

  1135 are unsound, meaning that they can give rise to spurious proofs

  1136 (unreconstructible using \textit{metis}). The type encodings are

  1137 listed below, with an indication of their soundness in parentheses.

  1138 An asterisk (*) indicates that the encoding is slightly incomplete for

  1139 reconstruction with \textit{metis}, unless the \emph{strict} option (described

  1140 below) is enabled.

  1142 \begin{enum}

  1143 \item[\labelitemi] \textbf{\textit{erased} (unsound):} No type information is

  1144 supplied to the ATP, not even to resolve overloading. Types are simply erased.

  1146 \item[\labelitemi] \textbf{\textit{poly\_guards} (sound):} Types are encoded using

  1147 a predicate \const{g}$(\tau, t)$ that guards bound

  1148 variables. Constants are annotated with their types, supplied as extra

  1149 arguments, to resolve overloading.

  1151 \item[\labelitemi] \textbf{\textit{poly\_tags} (sound):} Each term and subterm is

  1152 tagged with its type using a function $\const{t\/}(\tau, t)$.

  1154 \item[\labelitemi] \textbf{\textit{poly\_args} (unsound):}

  1155 Like for \textit{poly\_guards} constants are annotated with their types to

  1156 resolve overloading, but otherwise no type information is encoded. This

  1157 is the default encoding used by the \textit{metis} command.

  1159 \item[\labelitemi]

  1160 \textbf{%

  1161 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags} (sound); \\

  1162 \textit{raw\_mono\_args} (unsound):} \\

  1163 Similar to \textit{poly\_guards}, \textit{poly\_tags}, and \textit{poly\_args},

  1164 respectively, but the problem is additionally monomorphized, meaning that type

  1165 variables are instantiated with heuristically chosen ground types.

  1166 Monomorphization can simplify reasoning but also leads to larger fact bases,

  1167 which can slow down the ATPs.

  1169 \item[\labelitemi]

  1170 \textbf{%

  1171 \textit{mono\_guards}, \textit{mono\_tags} (sound);

  1172 \textit{mono\_args} (unsound):} \\

  1173 Similar to

  1174 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags}, and

  1175 \textit{raw\_mono\_args}, respectively but types are mangled in constant names

  1176 instead of being supplied as ground term arguments. The binary predicate

  1177 $\const{g}(\tau, t)$ becomes a unary predicate

  1178 $\const{g\_}\tau(t)$, and the binary function

  1179 $\const{t}(\tau, t)$ becomes a unary function

  1180 $\const{t\_}\tau(t)$.

  1182 \item[\labelitemi] \textbf{\textit{mono\_native} (sound):} Exploits native

  1183 first-order types if the prover supports the TFF0, TFF1, or THF0 syntax;

  1184 otherwise, falls back on \textit{mono\_guards}. The problem is monomorphized.

  1186 \item[\labelitemi] \textbf{\textit{mono\_native\_higher} (sound):} Exploits

  1187 native higher-order types if the prover supports the THF0 syntax; otherwise,

  1188 falls back on \textit{mono\_native} or \textit{mono\_guards}. The problem is

  1189 monomorphized.

  1191 \item[\labelitemi] \textbf{\textit{poly\_native} (sound):} Exploits native

  1192 first-order polymorphic types if the prover supports the TFF1 syntax; otherwise,

  1193 falls back on \textit{mono\_native}.

  1195 \item[\labelitemi]

  1196 \textbf{%

  1197 \textit{poly\_guards}?, \textit{poly\_tags}?, \textit{raw\_mono\_guards}?, \\

  1198 \textit{raw\_mono\_tags}?, \textit{mono\_guards}?, \textit{mono\_tags}?, \\

  1199 \textit{mono\_native}? (sound*):} \\

  1200 The type encodings \textit{poly\_guards}, \textit{poly\_tags},

  1201 \textit{raw\_mono\_guards}, \textit{raw\_mono\_tags}, \textit{mono\_guards},

  1202 \textit{mono\_tags}, and \textit{mono\_native} are fully typed and sound. For

  1203 each of these, Sledgehammer also provides a lighter variant identified by a

  1204 question mark (`\hbox{?}')\ that detects and erases monotonic types, notably

  1205 infinite types. (For \textit{mono\_native}, the types are not actually erased

  1206 but rather replaced by a shared uniform type of individuals.) As argument to the

  1207 \textit{metis} proof method, the question mark is replaced by a

  1208 \hbox{``\textit{\_query\/}''} suffix.

  1210 \item[\labelitemi]

  1211 \textbf{%

  1212 \textit{poly\_guards}??, \textit{poly\_tags}??, \textit{raw\_mono\_guards}??, \\

  1213 \textit{raw\_mono\_tags}??, \textit{mono\_guards}??, \textit{mono\_tags}?? \\

  1214 (sound*):} \\

  1215 Even lighter versions of the `\hbox{?}' encodings. As argument to the

  1216 \textit{metis} proof method, the `\hbox{??}' suffix is replaced by

  1217 \hbox{``\textit{\_query\_query\/}''}.

  1219 \item[\labelitemi]

  1220 \textbf{%

  1221 \textit{poly\_guards}@, \textit{poly\_tags}@, \textit{raw\_mono\_guards}@, \\

  1222 \textit{raw\_mono\_tags}@ (sound*):} \\

  1223 Alternative versions of the `\hbox{??}' encodings. As argument to the

  1224 \textit{metis} proof method, the `\hbox{@}' suffix is replaced by

  1225 \hbox{``\textit{\_at\/}''}.

  1227 \item[\labelitemi] \textbf{\textit{poly\_args}?, \textit{raw\_mono\_args}? (unsound):} \\

  1228 Lighter versions of \textit{poly\_args} and \textit{raw\_mono\_args}.

  1230 \item[\labelitemi] \textbf{\textit{smart}:} The actual encoding used depends on

  1231 the ATP and should be the most efficient sound encoding for that ATP.

  1232 \end{enum}

  1234 For SMT solvers, the type encoding is always \textit{mono\_native}, irrespective

  1235 of the value of this option.

  1237 \nopagebreak

  1238 {\small See also \textit{max\_new\_mono\_instances} (\S\ref{relevance-filter})

  1239 and \textit{max\_mono\_iters} (\S\ref{relevance-filter}).}

  1241 \opfalse{strict}{non\_strict}

  1242 Specifies whether Sledgehammer should run in its strict mode. In that mode,

  1243 sound type encodings marked with an asterisk (*) above are made complete

  1244 for reconstruction with \textit{metis}, at the cost of some clutter in the

  1245 generated problems. This option has no effect if \textit{type\_enc} is

  1246 deliberately set to an unsound encoding.

  1247 \end{enum}

  1249 \subsection{Output Format}

  1250 \label{output-format}

  1252 \begin{enum}

  1254 \opfalse{verbose}{quiet}

  1255 Specifies whether the \textbf{sledgehammer} command should explain what it does.

  1256 This option is implicitly disabled for automatic runs.

  1258 \opfalse{debug}{no\_debug}

  1259 Specifies whether Sledgehammer should display additional debugging information

  1260 beyond what \textit{verbose} already displays. Enabling \textit{debug} also

  1261 enables \textit{verbose} and \textit{blocking} (\S\ref{mode-of-operation})

  1262 behind the scenes. The \textit{debug} option is implicitly disabled for

  1263 automatic runs.

  1265 \nopagebreak

  1266 {\small See also \textit{overlord} (\S\ref{mode-of-operation}).}

  1268 \opfalse{isar\_proof}{no\_isar\_proof}

  1269 Specifies whether Isar proofs should be output in addition to one-liner

  1270 \textit{metis} proofs. Isar proof construction is still experimental and often

  1271 fails; however, they are usually faster and sometimes more robust than

  1272 \textit{metis} proofs.

  1274 \opdefault{isar\_shrink\_factor}{int}{\upshape 1}

  1275 Specifies the granularity of the Isar proof. A value of $n$ indicates that each

  1276 Isar proof step should correspond to a group of up to $n$ consecutive proof

  1277 steps in the ATP proof.

  1278 \end{enum}

  1280 \subsection{Authentication}

  1281 \label{authentication}

  1283 \begin{enum}

  1284 \opnodefault{expect}{string}

  1285 Specifies the expected outcome, which must be one of the following:

  1287 \begin{enum}

  1288 \item[\labelitemi] \textbf{\textit{some}:} Sledgehammer found a proof.

  1289 \item[\labelitemi] \textbf{\textit{none}:} Sledgehammer found no proof.

  1290 \item[\labelitemi] \textbf{\textit{timeout}:} Sledgehammer timed out.

  1291 \item[\labelitemi] \textbf{\textit{unknown}:} Sledgehammer encountered some

  1292 problem.

  1293 \end{enum}

  1295 Sledgehammer emits an error (if \textit{blocking} is enabled) or a warning

  1296 (otherwise) if the actual outcome differs from the expected outcome. This option

  1297 is useful for regression testing.

  1299 \nopagebreak

  1300 {\small See also \textit{blocking} (\S\ref{mode-of-operation}) and

  1301 \textit{timeout} (\S\ref{timeouts}).}

  1302 \end{enum}

  1304 \subsection{Timeouts}

  1305 \label{timeouts}

  1307 \begin{enum}

  1308 \opdefault{timeout}{float\_or\_none}{\upshape 30}

  1309 Specifies the maximum number of seconds that the automatic provers should spend

  1310 searching for a proof. This excludes problem preparation and is a soft limit.

  1311 For historical reasons, the default value of this option can be overridden using

  1312 the option ``Sledgehammer: Time Limit'' in Proof General's ``Isabelle'' menu.

  1314 \opdefault{preplay\_timeout}{float\_or\_none}{\upshape 3}

  1315 Specifies the maximum number of seconds that \textit{metis} or \textit{smt}

  1316 should spend trying to ``preplay'' the found proof. If this option is set to 0,

  1317 no preplaying takes place, and no timing information is displayed next to the

  1318 suggested \textit{metis} calls.

  1320 \nopagebreak

  1321 {\small See also \textit{minimize} (\S\ref{mode-of-operation}).}

  1323 \optrueonly{dont\_preplay}

  1324 Alias for ``\textit{preplay\_timeout} = 0''.

  1326 \end{enum}

  1328 \let\em=\sl

  1329 \bibliography{../manual}{}

  1330 \bibliographystyle{abbrv}

  1332 \end{document}

author	blanchet
	Fri, 20 Jul 2012 22:19:46 +0200
changeset 49403	fd7958ebee96
parent 49402	302cf211fb3f
child 49405	4147f2bc4442
permissions	-rw-r--r--