# HG changeset patch # User nipkow # Date 904143469 -7200 # Node ID 1463e182c5331d17ddd758287f00fd6dd0b7f7cd # Parent 6ef3742b61533746e2acba829181156680814425 The HOL tutorial. diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/appendix.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/appendix.tex Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,75 @@ +\appendix + +\chapter{Appendix} +\label{sec:Appendix} + +\begin{figure}[htbp] +\begin{center} +\begin{tabular}{|llll|} +\hline +\texttt{arities} & +\texttt{binder} & +\texttt{classes} & +\texttt{consts} \\ +\texttt{default} & +\texttt{defs} & +\texttt{end} & +\texttt{global} \\ +\texttt{infixl} & +\texttt{infixr} & +\texttt{instance} & +\texttt{local} \\ +\texttt{mixfix} & +\texttt{ML} & +\texttt{MLtext} & +\texttt{nonterminals} \\ +\texttt{oracle} & +\texttt{output} & +\texttt{path} & +\texttt{rules} \\ +\texttt{setup} & +\texttt{syntax} & +\texttt{translations} & +\texttt{types} \\ +\texttt{constdefs} & +\texttt{axclass} &&\\ +\hline +\end{tabular} +\end{center} +\caption{Keywords in theory files} +\label{fig:keywords} +\end{figure} + +\begin{figure}[htbp] +\begin{center} +\begin{tabular}{|lllll|} +\hline +\texttt{ALL} & +\texttt{case} & +\texttt{div} & +\texttt{dvd} & +\texttt{else} \\ +\texttt{EX} & +\texttt{if} & +\texttt{in} & +\texttt{INT} & +\texttt{Int} \\ +\texttt{LEAST} & +\texttt{let} & +\texttt{mod} & +\texttt{O} & +\texttt{o} \\ +\texttt{of} & +\texttt{op} & +\texttt{PROP} & +\texttt{SIGMA} & +\texttt{then} \\ +\texttt{Times} & +\texttt{UN} & +\texttt{Un} &&\\ +\hline +\end{tabular} +\end{center} +\caption{Reserved words in HOL} +\label{fig:ReservedWords} +\end{figure} diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/basics.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/basics.tex Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,261 @@ +\chapter{Basic Concepts} + +\section{Introduction} + +This is a tutorial on how to use Isabelle/HOL as a specification and +verification system. Isabelle is a generic system for implementing logical +formalisms, and Isabelle/HOL is the specialization of Isabelle for +HOL, which abbreviates Higher-Order Logic. We introduce HOL step by step +following the equation +\[ \mbox{HOL} = \mbox{Functional Programming} + \mbox{Logic}. \] +We assume that the reader is familiar with the basic concepts of both fields. +For excellent introductions to functional programming consult the textbooks +by Bird and Wadler~\cite{Bird-Wadler} or Paulson~\cite{Paulson-ML}. Although +this tutorial initially concentrates on functional programming, do not be +misled: HOL can express most mathematical concepts, and functional +programming is just one particularly simple and ubiquitous instance. + +A tutorial is by definition incomplete. To fully exploit the power of the +system you need to consult the Isabelle Reference Manual~\cite{Isa-Ref-Man} +for details about Isabelle and the HOL chapter of the Logics +manual~\cite{Isa-Logics-Man} for details relating to HOL. Both manuals have a +comprehensive index. + +\section{Theories, proofs and interaction} +\label{sec:Basic:Theories} + +Working with Isabelle means creating two different kinds of documents: +theories and proof scripts. Roughly speaking, a \bfindex{theory} is a named +collection of types and functions, much like a module in a programming +language or a specification in a specification language. In fact, theories in +HOL can be either. Theories must reside in files with the suffix +\texttt{.thy}. The general format of a theory file \texttt{T.thy} is +\begin{ttbox} +T = B\(@1\) + \(\cdots\) + B\(@n\) + +\({<}declarations{>}\) +end +\end{ttbox} +where \texttt{B}$@1$, \dots, \texttt{B}$@n$ are the names of existing +theories that \texttt{T} is based on and ${<}declarations{>}$ stands for the +newly introduced concepts (types, functions etc). The \texttt{B}$@i$ are the +direct \textbf{parent theories}\indexbold{parent theory} of \texttt{T}. +Everything defined in the parent theories (and their parents \dots) is +automatically visible. To avoid name clashes, identifiers can be qualified by +theory names as in \texttt{T.f} and \texttt{B.f}. HOL's theory library is +available online at \verb$http://www.in.tum.de/~isabelle/library/HOL/$ and is +recommended browsing. +\begin{warn} + HOL contains a theory \ttindexbold{Main}, the union of all the basic + predefined theories like arithmetic, lists, sets, etc.\ (see the online + library). Unless you know what you are doing, always include \texttt{Main} + as a direct or indirect parent theory of all your theories. +\end{warn} + +This tutorial is concerned with introducing you to the different linguistic +constructs that can fill ${<}declarations{>}$ in the above theory template. +A complete grammar of the basic constructs is found in Appendix~A +of~\cite{Isa-Ref-Man}, for reference in times of doubt. + +The tutorial is also concerned with showing you how to prove theorems about +the concepts in a theory. This involves invoking predefined theorem proving +commands. Because Isabelle is written in the programming language +ML,\footnote{Many concepts in HOL and ML are similar. Make sure you do not + confuse the two levels.} interacting with Isabelle means calling ML +functions. Hence \bfindex{proof scripts} are sequences of calls to ML +functions that perform specific theorem proving tasks. Nevertheless, +familiarity with ML is absolutely not required. All proof scripts for theory +\texttt{T} (defined in file \texttt{T.thy}) should be contained in file +\texttt{T.ML}. Theory and proof scripts are loaded (and checked!) by calling +the ML function \ttindexbold{use_thy}: +\begin{ttbox} +use_thy "T"; +\end{ttbox} + +There are more advanced interfaces for Isabelle that hide the ML level from +you and replace function calls by menu selection. There is even a special +font with mathematical symbols. For details see the Isabelle home page. This +tutorial concentrates on the bare essentials and ignores such niceties. + +\section{Types, terms and formulae} +\label{sec:TypesTermsForms} + +Embedded in the declarations of a theory are the types, terms and formulae of +HOL. HOL is a typed logic whose type system resembles that of functional +programming languages like ML or Haskell. Thus there are +\begin{description} +\item[base types,] in particular \ttindex{bool}, the type of truth values, +and \ttindex{nat}, the type of natural numbers. +\item[type constructors,] in particular \ttindex{list}, the type of +lists, and \ttindex{set}, the type of sets. Type constructors are written +postfix, e.g.\ \texttt{(nat)list} is the type of lists whose elements are +natural numbers. Parentheses around single arguments can be dropped (as in +\texttt{nat list}), multiple arguments are separated by commas (as in +\texttt{(bool,nat)foo}). +\item[function types,] denoted by \ttindexbold{=>}. In HOL +\texttt{=>} represents {\em total} functions only. As is customary, +\texttt{$\tau@1$ => $\tau@2$ => $\tau@3$} means +\texttt{$\tau@1$ => ($\tau@2$ => $\tau@3$)}. Isabelle also supports the +notation \texttt{[$\tau@1,\dots,\tau@n$] => $\tau$} which abbreviates +\texttt{$\tau@1$ => $\cdots$ => $\tau@n$ => $\tau$}. +\item[type variables,] denoted by \texttt{'a}, \texttt{'b} etc, just like in +ML. They give rise to polymorphic types like \texttt{'a => 'a}, the type of the +identity function. +\end{description} +\begin{warn} + Types are extremely important because they prevent us from writing + nonsense. Isabelle insists that all terms and formulae must be well-typed + and will print an error message if a type mismatch is encountered. To + reduce the amount of explicit type information that needs to be provided by + the user, Isabelle infers the type of all variables automatically (this is + called \bfindex{type inference}) and keeps quiet about it. Occasionally + this may lead to misunderstandings between you and the system. If anything + strange happens, we recommend to set the flag \ttindexbold{show_types} that + tells Isabelle to display type information that is usually suppressed: + simply type +\begin{ttbox} +set show_types; +\end{ttbox} + +\noindent +at the ML-level. This can be reversed by \texttt{reset show_types;}. +\end{warn} + + +\textbf{Terms}\indexbold{term} +are formed as in functional programming by applying functions to +arguments. If \texttt{f} is a function of type \texttt{$\tau@1$ => $\tau@2$} +and \texttt{t} is a term of type $\tau@1$ then \texttt{f~t} is a term of type +$\tau@2$. HOL also supports infix functions like \texttt{+} and some basic +constructs from functional programming: +\begin{description} +\item[\texttt{if $b$ then $t@1$ else $t@2$}]\indexbold{*if} +means what you think it means and requires that +$b$ is of type \texttt{bool} and $t@1$ and $t@2$ are of the same type. +\item[\texttt{let $x$ = $t$ in $u$}]\indexbold{*let} +is equivalent to $u$ where all occurrences of $x$ have been replaced by +$t$. For example, +\texttt{let x = 0 in x+x} means \texttt{0+0}. Multiple bindings are separated +by semicolons: \texttt{let $x@1$ = $t@1$; \dots; $x@n$ = $t@n$ in $u$}. +\item[\texttt{case $e$ of $c@1$ => $e@1$ | \dots | $c@n$ => $e@n$}] +\indexbold{*case} +evaluates to $e@i$ if $e$ is of the form +$c@i$. See~\S\ref{sec:case-expressions} for details. +\end{description} + +Terms may also contain $\lambda$-abstractions. For example, $\lambda +x. x+1$ is the function that takes an argument $x$ and returns $x+1$. In +Isabelle we write \texttt{\%x.~x+1}.\index{==>@{\tt\%}|bold} +Instead of \texttt{\%x.~\%y.~\%z.~t} we can write \texttt{\%x~y~z.~t}. + +\textbf{Formulae}\indexbold{formula} +are terms of type \texttt{bool}. There are the basic +constants \ttindexbold{True} and \ttindexbold{False} and the usual logical +connectives (in decreasing order of priority): +\verb$~$\index{$HOL1@{\ttnot}|bold} (`not'), +\texttt{\&}\index{$HOL2@{\tt\&}|bold} (`and'), +\texttt{|}\index{$HOL2@{\ttor}|bold} (`or') and +\texttt{-->}\index{$HOL2@{\tt-->}|bold} (`implies'), +all of which (except the unary \verb$~$) associate to the right. In +particular \texttt{A --> B --> C} means \texttt{A --> (B --> C)} and is thus +logically equivalent with \texttt{A \& B --> C} +(which is \texttt{(A \& B) --> C}). + +Equality is available in the form of the infix function +\texttt{=} of type \texttt{'a => 'a => bool}. Thus \texttt{$t@1$ = $t@2$} is +a formula provided $t@1$ and $t@2$ are terms of the same type. In case $t@1$ +and $t@2$ are of type \texttt{bool}, \texttt{=} acts as if-and-only-if. + +The syntax for quantifiers is +\texttt{!~$x$.$\,P$}\index{$HOLQ@{\ttall}|bold} (`for all $x$') and +\texttt{?~$x$.$\,P$}\index{$HOLQ@{\tt?}|bold} (`exists $x$'). +There is even \texttt{?!~$x$.$\,P$}\index{$HOLQ@{\ttuniquex}|bold}, which +means that there exists exactly one $x$ that satisfies $P$. Instead of +\texttt{!} and \texttt{?} you may also write \texttt{ALL} and \texttt{EX}. +Nested quantifications can be abbreviated: +\texttt{!$x~y~z$.$\,P$} means \texttt{!$x$.~!$y$.~!$z$.$\,P$}. + +Despite type inference, it is sometimes necessary to attach explicit +\bfindex{type constraints} to a term. The syntax is \texttt{$t$::$\tau$} as +in \texttt{x < (y::nat)}. Note that \texttt{::} binds weakly and should +therefore be enclosed in parentheses: \texttt{x < y::nat} is ill-typed +because it is interpreted as \texttt{(x < y)::nat}. The main reason for type +constraints are overloaded functions like \texttt{+}, \texttt{*} and +\texttt{<}. (See \S\ref{sec:TypeClasses} for a full discussion of +overloading.) + +\begin{warn} +In general, HOL's concrete syntax tries to follow the conventions of +functional programming and mathematics. Below we list the main rules that you +should be familiar with to avoid certain syntactic traps. A particular +problem for novices can be the priority of operators. If you are unsure, use +more rather than fewer parentheses. In those cases where Isabelle echoes your +input, you can see which parentheses are dropped---they were superfluous. If +you are unsure how to interpret Isabelle's output because you don't know +where the (dropped) parentheses go, set (and possibly reset) the flag +\ttindexbold{show_brackets}: +\begin{ttbox} +set show_brackets; \(\dots\); reset show_brackets; +\end{ttbox} +\end{warn} + +\begin{itemize} +\item +Remember that \texttt{f t u} means \texttt{(f t) u} and not \texttt{f(t u)}! +\item +Isabelle allows infix functions like \texttt{+}. The prefix form of function +application binds more strongly than anything else and hence \texttt{f~x + y} +means \texttt{(f~x)~+~y} and not \texttt{f(x+y)}. +\item +Remember that in HOL if-and-only-if is expressed using equality. But equality +has a high priority, as befitting a relation, while if-and-only-if typically +has the lowest priority. Thus, \verb$~ ~ P = P$ means \verb$~ ~(P = P)$ and +not \verb$(~ ~P) = P$. When using \texttt{=} to mean logical equivalence, +enclose both operands in parentheses, as in \texttt{(A \& B) = (B \& A)}. +\item +Constructs with an opening but without a closing delimiter bind very weakly +and should therefore be enclosed in parentheses if they appear in subterms, as +in \texttt{f = (\%x.~x)}. This includes +\ttindex{if}, \ttindex{let}, \ttindex{case}, \verb$%$ and quantifiers. +\item +Never write \texttt{\%x.x} or \texttt{!x.x=x} because \texttt{x.x} is always +read as a single qualified identifier that refers to an item \texttt{x} in +theory \texttt{x}. Write \texttt{\%x.~x} and \texttt{!x.~x=x} instead. +\end{itemize} + +\section{Variables} +\label{sec:variables} + +Isabelle distinguishes free and bound variables just as is customary. Bound +variables are automatically renamed to avoid clashes with free variables. In +addition, Isabelle has a third kind of variable, called a \bfindex{schematic + variable} or \bfindex{unknown}, which starts with a \texttt{?}. Logically, +an unknown is a free variable. But it may be instantiated by another term +during the proof process. For example, the mathematical theorem $x = x$ is +represented in Isabelle as \texttt{?x = ?x}, which means that Isabelle can +instantiate it arbitrarily. This is in contrast to ordinary variables, which +remain fixed. The programming language Prolog calls unknowns {\em logical\/} +variables. + +Most of the time you can and should ignore unknowns and work with ordinary +variables. Just don't be surprised that after you have finished the +proof of a theorem, Isabelle will turn your free variables into unknowns: it +merely indicates that Isabelle will automatically instantiate those unknowns +suitably when the theorem is used in some other proof. +\begin{warn} + The existential quantifier \texttt{?}\index{$HOLQ@{\tt?}} needs to be + followed by a space. Otherwise \texttt{?x} is interpreted as a schematic + variable. +\end{warn} + +\section{Getting started} + +Assuming you have installed Isabelle, you start it by typing \texttt{isabelle + HOL} in a shell window.\footnote{Since you will always want to use HOL when + studying this tutorial, you can set the shell variable + \texttt{ISABELLE_LOGIC} to \texttt{HOL} once and for all and simply execute + \texttt{isabelle}.} This presents you with Isabelle's most basic ASCII +interface. In addition you need to open an editor window to create theories +(\texttt{.thy} files) and proof scripts (\texttt{.ML} files). While you are +developing a proof, we recommend to type each proof command into the ML-file +first and then enter it into Isabelle by copy-and-paste, thus ensuring that +you have a complete record of your proof. diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/extra.sty --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/extra.sty Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,29 @@ +% extra.sty : Isabelle Manual extra macros for non-Springer version +% +\typeout{Document Style extra. Released 17 February 1994} + +%%Euro-style date: 20 September 1955 +\def\today{\number\day\space\ifcase\month\or +January\or February\or March\or April\or May\or June\or +July\or August\or September\or October\or November\or December\fi +\space\number\year} + +%%Borrowed from alltt.sty, but leaves % as the comment character +\def\docspecials{\do\ \do\$\do\&% + \do\#\do\^\do\^^K\do\_\do\^^A\do\~} + +%%%Put first chapter on odd page, with arabic numbering; like \cleardoublepage +\newcommand\clearfirst{\clearpage\ifodd\c@page\else + \hbox{}\newpage\if@twocolumn\hbox{}\newpage\fi\fi + \pagenumbering{arabic}} + +%%%Ruled chapter headings +\def\@rulehead#1{\hrule height1pt \vskip 14pt \Huge \bf + #1 \vskip 14pt\hrule height1pt} +\def\@makechapterhead#1{ { \parindent 0pt + \ifnum\c@secnumdepth >\m@ne \raggedleft\large\bf\@chapapp{} \thechapter \par + \vskip 20pt \fi \raggedright \@rulehead{#1} \par \nobreak \vskip 40pt } } + +\def\@makeschapterhead#1{ { \parindent 0pt \raggedright + \@rulehead{#1} \par \nobreak \vskip 40pt } } + diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/fp.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/fp.tex Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,1416 @@ +\chapter{Functional Programming in HOL} + +Although on the surface this chapter is mainly concerned with how to write +functional programs in HOL and how to verify them, most of the +constructs and proof procedures introduced are general purpose and recur in +any specification or verification task. + +The dedicated functional programmer should be warned: HOL offers only what +could be called {\em total functional programming} --- all functions in HOL +must be total; lazy data structures are not directly available. On the +positive side, functions in HOL need not be computable: HOL is a +specification language that goes well beyond what can be expressed as a +program. However, for the time being we concentrate on the computable. + +\section{An introductory theory} +\label{sec:intro-theory} + +Functional programming needs datatypes and functions. Both of them can be +defined in a theory with a syntax reminiscent of languages like ML or +Haskell. As an example consider the theory in Fig.~\ref{fig:ToyList}. + +\begin{figure}[htbp] +\begin{ttbox}\makeatother +\input{ToyList/ToyList.thy}\end{ttbox} +\caption{A theory of lists} +\label{fig:ToyList} +\end{figure} + +HOL already has a predefined theory of lists called \texttt{List} --- +\texttt{ToyList} is merely a small fragment of it chosen as an example. In +contrast to what is recommended in \S\ref{sec:Basic:Theories}, +\texttt{ToyList} is not based on \texttt{Main} but on \texttt{Datatype}, a +theory that contains everything required for datatype definitions but does +not have \texttt{List} as a parent, thus avoiding ambiguities caused by +defining lists twice. + +The \ttindexbold{datatype} \texttt{list} introduces two constructors +\texttt{Nil} and \texttt{Cons}, the empty list and the operator that adds an +element to the front of a list. For example, the term \texttt{Cons True (Cons + False Nil)} is a value of type \texttt{bool~list}, namely the list with the +elements \texttt{True} and \texttt{False}. Because this notation becomes +unwieldy very quickly, the datatype declaration is annotated with an +alternative syntax: instead of \texttt{Nil} and \texttt{Cons}~$x$~$xs$ we can +write \index{#@{\tt[]}|bold}\texttt{[]} and +\texttt{$x$~\#~$xs$}\index{#@{\tt\#}|bold}. In fact, this alternative syntax +is the standard syntax. Thus the list \texttt{Cons True (Cons False Nil)} +becomes \texttt{True \# False \# []}. The annotation \ttindexbold{infixr} +means that \texttt{\#} associates to the right, i.e.\ the term \texttt{$x$ \# + $y$ \# $z$} is read as \texttt{$x$ \# ($y$ \# $z$)} and not as \texttt{($x$ + \# $y$) \# $z$}. + +\begin{warn} + Syntax annotations are a powerful but completely optional feature. You + could drop them from theory \texttt{ToyList} and go back to the identifiers + \texttt{Nil} and \texttt{Cons}. However, lists are such a central datatype + that their syntax is highly customized. We recommend that novices should + not use syntax annotations in their own theories. +\end{warn} + +Next, the functions \texttt{app} and \texttt{rev} are declared. In contrast +to ML, Isabelle insists on explicit declarations of all functions (keyword +\ttindexbold{consts}). (Apart from the declaration-before-use restriction, +the order of items in a theory file is unconstrained.) Function \texttt{app} +is annotated with concrete syntax too. Instead of the prefix syntax +\texttt{app}~$xs$~$ys$ the infix $xs$~\texttt{\at}~$ys$ becomes the preferred +form. + +Both functions are defined recursively. The equations for \texttt{app} and +\texttt{rev} hardly need comments: \texttt{app} appends two lists and +\texttt{rev} reverses a list. The keyword \texttt{primrec} indicates that +the recursion is of a particularly primitive kind where each recursive call +peels off a datatype constructor from one of the arguments (see +\S\ref{sec:datatype}). Thus the recursion always terminates, i.e.\ the +function is \bfindex{total}. + +The termination requirement is absolutely essential in HOL, a logic of total +functions. If we were to drop it, inconsistencies could quickly arise: the +``definition'' $f(n) = f(n)+1$ immediately leads to $0 = 1$ by subtracting +$f(n)$ on both sides. +% However, this is a subtle issue that we cannot discuss here further. + +\begin{warn} + As we have indicated, the desire for total functions is not a gratuitously + imposed restriction but an essential characteristic of HOL. It is only + because of totality that reasoning in HOL is comparatively easy. More + generally, the philosophy in HOL is not to allow arbitrary axioms (such as + function definitions whose totality has not been proved) because they + quickly lead to inconsistencies. Instead, fixed constructs for introducing + types and functions are offered (such as \texttt{datatype} and + \texttt{primrec}) which are guaranteed to preserve consistency. +\end{warn} + +A remark about syntax. The textual definition of a theory follows a fixed +syntax with keywords like \texttt{datatype} and \texttt{end} (see +Fig.~\ref{fig:keywords} in Appendix~\ref{sec:Appendix} for a full list). +Embedded in this syntax are the types and formulae of HOL, whose syntax is +extensible, e.g.\ by new user-defined infix operators +(see~\ref{sec:infix-syntax}). To distinguish the two levels, everything +HOL-specific should be enclosed in \texttt{"}\dots\texttt{"}. The same holds +for identifiers that happen to be keywords, as in +\begin{ttbox} +consts "end" :: 'a list => 'a +\end{ttbox} +To lessen this burden, quotation marks around types can be dropped, +provided their syntax does not go beyond what is described in +\S\ref{sec:TypesTermsForms}. Types containing further operators, e.g.\ +\texttt{*} for Cartesian products, need quotation marks. + +When Isabelle prints a syntax error message, it refers to the HOL syntax as +the \bfindex{inner syntax}. + +\section{An introductory proof} +\label{sec:intro-proof} + +Having defined \texttt{ToyList}, we load it with the ML command +\begin{ttbox} +use_thy "ToyList"; +\end{ttbox} +and are ready to prove a few simple theorems. This will illustrate not just +the basic proof commands but also the typical proof process. + +\subsubsection*{Main goal: \texttt{rev(rev xs) = xs}} + +Our goal is to show that reversing a list twice produces the original +list. Typing +\begin{ttbox} +\input{ToyList/thm}\end{ttbox} +establishes a new goal to be proved in the context of the current theory, +which is the one we just loaded. Isabelle's response is to print the current proof state: +\begin{ttbox} +{\out Level 0} +{\out rev (rev xs) = xs} +{\out 1. rev (rev xs) = xs} +\end{ttbox} +Until we have finished a proof, the proof state always looks like this: +\begin{ttbox} +{\out Level \(i\)} +{\out \(G\)} +{\out 1. \(G@1\)} +{\out \(\vdots\)} +{\out \(n\). \(G@n\)} +\end{ttbox} +where \texttt{Level}~$i$ indicates that we are $i$ steps into the proof, $G$ +is the overall goal that we are trying to prove, and the numbered lines +contain the subgoals $G@1$, \dots, $G@n$ that we need to prove to establish +$G$. At \texttt{Level 0} there is only one subgoal, which is identical with +the overall goal. Normally $G$ is constant and only serves as a reminder. +Hence we rarely show it in this tutorial. + +Let us now get back to \texttt{rev(rev xs) = xs}. Properties of recursively +defined functions are best established by induction. In this case there is +not much choice except to induct on \texttt{xs}: +\begin{ttbox} +\input{ToyList/inductxs}\end{ttbox} +This tells Isabelle to perform induction on variable \texttt{xs} in subgoal +1. The new proof state contains two subgoals, namely the base case +(\texttt{Nil}) and the induction step (\texttt{Cons}): +\begin{ttbox} +{\out 1. rev (rev []) = []} +{\out 2. !!a list. rev (rev list) = list ==> rev (rev (a # list)) = a # list} +\end{ttbox} +The induction step is an example of the general format of a subgoal: +\begin{ttbox} +{\out \(i\). !!\(x@1 \dots x@n\). {\it assumptions} ==> {\it conclusion}} +\end{ttbox}\index{==>@{\tt==>}|bold} +The prefix of bound variables \texttt{!!\(x@1 \dots x@n\)} can be ignored +most of the time, or simply treated as a list of variables local to this +subgoal. Their deeper significance is explained in \S\ref{sec:PCproofs}. The +{\it assumptions} are the local assumptions for this subgoal and {\it + conclusion} is the actual proposition to be proved. Typical proof steps +that add new assumptions are induction or case distinction. In our example +the only assumption is the induction hypothesis \texttt{rev (rev list) = + list}, where \texttt{list} is a variable name chosen by Isabelle. If there +are multiple assumptions, they are enclosed in the bracket pair +\texttt{[|}\index{==>@\ttlbr|bold} and \texttt{|]}\index{==>@\ttrbr|bold} +and separated by semicolons. + +Let us try to solve both goals automatically: +\begin{ttbox} +\input{ToyList/autotac}\end{ttbox} +This command tells Isabelle to apply a proof strategy called +\texttt{Auto_tac} to all subgoals. Essentially, \texttt{Auto_tac} tries to +`simplify' the subgoals. In our case, subgoal~1 is solved completely (thanks +to the equation \texttt{rev [] = []}) and disappears; the simplified version +of subgoal~2 becomes the new subgoal~1: +\begin{ttbox}\makeatother +{\out 1. !!a list. rev(rev list) = list ==> rev(rev list @ a # []) = a # list} +\end{ttbox} +In order to simplify this subgoal further, a lemma suggests itself. + +\subsubsection*{First lemma: \texttt{rev(xs \at~ys) = (rev ys) \at~(rev xs)}} + +We start the proof as usual: +\begin{ttbox}\makeatother +\input{ToyList/lemma1}\end{ttbox} +There are two variables that we could induct on: \texttt{xs} and +\texttt{ys}. Because \texttt{\at} is defined by recursion on +the first argument, \texttt{xs} is the correct one: +\begin{ttbox} +\input{ToyList/inductxs}\end{ttbox} +This time not even the base case is solved automatically: +\begin{ttbox}\makeatother +by(Auto_tac); +{\out 1. rev ys = rev ys @ []} +{\out 2. \dots} +\end{ttbox} +We need another lemma. + +\subsubsection*{Second lemma: \texttt{xs \at~[] = xs}} + +This time the canonical proof procedure +\begin{ttbox}\makeatother +\input{ToyList/lemma2}\input{ToyList/inductxs}\input{ToyList/autotac}\end{ttbox} +leads to the desired message \texttt{No subgoals!}: +\begin{ttbox}\makeatother +{\out Level 2} +{\out xs @ [] = xs} +{\out No subgoals!} +\end{ttbox} +Now we can give the lemma just proved a suitable name +\begin{ttbox} +\input{ToyList/qed2}\end{ttbox} +and tell Isabelle to use this lemma in all future proofs by simplification: +\begin{ttbox} +\input{ToyList/addsimps2}\end{ttbox} +Note that in the theorem \texttt{app_Nil2} the free variable \texttt{xs} has +been replaced by the unknown \texttt{?xs}, just as explained in +\S\ref{sec:variables}. + +Going back to the proof of the first lemma +\begin{ttbox}\makeatother +\input{ToyList/lemma1}\input{ToyList/inductxs}\input{ToyList/autotac}\end{ttbox} +we find that this time \texttt{Auto_tac} solves the base case, but the +induction step merely simplifies to +\begin{ttbox}\makeatother +{\out 1. !!a list.} +{\out rev (list @ ys) = rev ys @ rev list} +{\out ==> (rev ys @ rev list) @ a # [] = rev ys @ rev list @ a # []} +\end{ttbox} +Now we need to remember that \texttt{\at} associates to the right, and that +\texttt{\#} and \texttt{\at} have the same priority (namely the \texttt{65} +in the definition of \texttt{ToyList}). Thus the conclusion really is +\begin{ttbox}\makeatother +{\out ==> (rev ys @ rev list) @ (a # []) = rev ys @ (rev list @ (a # []))} +\end{ttbox} +and the missing lemma is associativity of \texttt{\at}. + +\subsubsection*{Third lemma: \texttt{(xs \at~ys) \at~zs = xs \at~(ys \at~zs)}} + +This time the canonical proof procedure +\begin{ttbox}\makeatother +\input{ToyList/lemma3}\end{ttbox} +succeeds without further ado. Again we name the lemma and add it to +the set of lemmas used during simplification: +\begin{ttbox} +\input{ToyList/qed3}\end{ttbox} +Now we can go back and prove the first lemma +\begin{ttbox}\makeatother +\input{ToyList/lemma1}\input{ToyList/inductxs}\input{ToyList/autotac}\end{ttbox} +add it to the simplification lemmas +\begin{ttbox} +\input{ToyList/qed1}\end{ttbox} +and then solve our main theorem: +\begin{ttbox}\makeatother +\input{ToyList/thm}\input{ToyList/inductxs}\input{ToyList/autotac}\end{ttbox} + +\subsubsection*{Review} + +This is the end of our toy proof. It should have familiarized you with +\begin{itemize} +\item the standard theorem proving procedure: +state a goal; proceed with proof until a new lemma is required; prove that +lemma; come back to the original goal. +\item a specific procedure that works well for functional programs: +induction followed by all-out simplification via \texttt{Auto_tac}. +\item a basic repertoire of proof commands. +\end{itemize} + + +\section{Some helpful commands} +\label{sec:commands-and-hints} + +This section discusses a few basic commands for manipulating the proof state +and can be skipped by casual readers. + +There are two kinds of commands used during a proof: the actual proof +commands and auxiliary commands for examining the proof state and controlling +the display. Proof commands are always of the form +\texttt{by(\textit{tactic});}\indexbold{tactic} where \textbf{tactic} is a +synonym for ``theorem proving function''. Typical examples are +\texttt{induct_tac} and \texttt{Auto_tac} --- the suffix \texttt{_tac} is +merely a mnemonic. Further tactics are introduced throughout the tutorial. + +%Tactics can also be modified. For example, +%\begin{ttbox} +%by(ALLGOALS Asm_simp_tac); +%\end{ttbox} +%tells Isabelle to apply \texttt{Asm_simp_tac} to all subgoals. For more on +%tactics and how to combine them see~\S\ref{sec:Tactics}. + +The most useful auxiliary commands are: +\begin{description} +\item[Printing the current state] +Type \texttt{pr();} to redisplay the current proof state, for example when it +has disappeared off the screen. +\item[Limiting the number of subgoals] +Typing \texttt{prlim $k$;} tells Isabelle to print only the first $k$ +subgoals from now on and redisplays the current proof state. This is helpful +when there are many subgoals. +\item[Undoing] Typing \texttt{undo();} undoes the effect of the last +tactic. +\item[Context switch] Every proof happens in the context of a + \bfindex{current theory}. By default, this is the last theory loaded. If + you want to prove a theorem in the context of a different theory + \texttt{T}, you need to type \texttt{context T.thy;}\index{*context|bold} + first. Of course you need to change the context again if you want to go + back to your original theory. +\item[Displaying types] We have already mentioned the flag + \ttindex{show_types} above. It can also be useful for detecting typos in + formulae early on. For example, if \texttt{show_types} is set and the goal + \texttt{rev(rev xs) = xs} is started, Isabelle prints the additional output +\begin{ttbox} +{\out Variables:} +{\out xs :: 'a list} +\end{ttbox} +which tells us that Isabelle has correctly inferred that +\texttt{xs} is a variable of list type. On the other hand, had we +made a typo as in \texttt{rev(re xs) = xs}, the response +\begin{ttbox} +Variables: + re :: 'a list => 'a list + xs :: 'a list +\end{ttbox} +would have alerted us because of the unexpected variable \texttt{re}. +\item[(Re)loading theories]\index{loading theories}\index{reloading theories} +Initially you load theory \texttt{T} by typing \ttindex{use_thy}~\texttt{"T";}, +which loads all parent theories of \texttt{T} automatically, if they are not +loaded already. If you modify \texttt{T.thy} or \texttt{T.ML}, you can +reload it by typing \texttt{use_thy~"T";} again. This time, however, only +\texttt{T} is reloaded. If some of \texttt{T}'s parents have changed as well, +type \ttindexbold{update}\texttt{();} to reload all theories that have +changed. +\end{description} +Further commands are found in the Reference Manual. + + +\section{Datatypes} +\label{sec:datatype} + +Inductive datatypes are part of almost every non-trivial application of HOL. +First we take another look at a very important example, the datatype of +lists, before we turn to datatypes in general. The section closes with a +case study. + + +\subsection{Lists} + +Lists are one of the essential datatypes in computing. Readers of this tutorial +and users of HOL need to be familiar with their basic operations. Theory +\texttt{ToyList} is only a small fragment of HOL's predefined theory +\texttt{List}\footnote{\texttt{http://www.in.tum.de/\~\relax + isabelle/library/HOL/List.html}}. +The latter contains many further operations. For example, the functions +\ttindexbold{hd} (`head') and \ttindexbold{tl} (`tail') return the first +element and the remainder of a list. (However, pattern-matching is usually +preferable to \texttt{hd} and \texttt{tl}.) +Theory \texttt{List} also contains more syntactic sugar: +\texttt{[}$x@1$\texttt{,}\dots\texttt{,}$x@n$\texttt{]} abbreviates +$x@1$\texttt{\#}\dots\texttt{\#}$x@n$\texttt{\#[]}. +In the rest of the tutorial we always use HOL's predefined lists. + + +\subsection{The general format} +\label{sec:general-datatype} + +The general HOL \texttt{datatype} definition is of the form +\[ +\mathtt{datatype}~(\alpha@1, \dots, \alpha@n) \, t ~=~ +C@1~\tau@{11}~\dots~\tau@{1k@1} ~\mid~ \dots ~\mid~ +C@m~\tau@{m1}~\dots~\tau@{mk@m} +\] +where $\alpha@i$ are type variables (the parameters), $C@i$ are distinct +constructor names and $\tau@{ij}$ are types; it is customary to capitalize +the first letter in constructor names. There are a number of +restrictions (such as the type should not be empty) detailed +elsewhere~\cite{Isa-Logics-Man}. Isabelle notifies you if you violate them. + +Laws about datatypes, such as \verb$[] ~= x#xs$ and \texttt{(x\#xs = y\#ys) = + (x=y \& xs=ys)}, are used automatically during proofs by simplification. +The same is true for the equations in primitive recursive function +definitions. + +\subsection{Primitive recursion} + +Functions on datatypes are usually defined by recursion. In fact, most of the +time they are defined by what is called \bfindex{primitive recursion}. +The keyword \texttt{primrec} is followed by a list of equations +\[ f \, x@1 \, \dots \, (C \, y@1 \, \dots \, y@k)\, \dots \, x@n = r \] +such that $C$ is a constructor of the datatype $t$ and all recursive calls of +$f$ in $r$ are of the form $f \, \dots \, y@i \, \dots$ for some $i$. Thus +Isabelle immediately sees that $f$ terminates because one (fixed!) argument +becomes smaller with every recursive call. There must be exactly one equation +for each constructor. Their order is immaterial. +A more general method for defining total recursive functions is explained in +\S\ref{sec:recdef}. + +\begin{exercise} +Given the datatype of binary trees +\begin{ttbox} +\input{Misc/tree}\end{ttbox} +define a function \texttt{mirror} that mirrors the structure of a binary tree +by swapping subtrees (recursively). Prove \texttt{mirror(mirror(t)) = t}. +\end{exercise} + +\subsection{\texttt{case}-expressions} +\label{sec:case-expressions} + +HOL also features \ttindexbold{case}-expressions for analyzing elements of a +datatype. For example, +\begin{ttbox} +case xs of [] => 0 | y#ys => y +\end{ttbox} +evaluates to \texttt{0} if \texttt{xs} is \texttt{[]} and to \texttt{y} if +\texttt{xs} is \texttt{y\#ys}. (Since the result in both branches must be of +the same type, it follows that \texttt{y::nat} and hence +\texttt{xs::(nat)list}.) + +In general, if $e$ is a term of the datatype $t$ defined in +\S\ref{sec:general-datatype} above, the corresponding +\texttt{case}-expression analyzing $e$ is +\[ +\begin{array}{rrcl} +\mbox{\tt case}~e~\mbox{\tt of} & C@1~x@{11}~\dots~x@{1k@1} & \To & e@1 \\ + \vdots \\ + \mid & C@m~x@{m1}~\dots~x@{mk@m} & \To & e@m +\end{array} +\] + +\begin{warn} +{\em All} constructors must be present, their order is fixed, and nested +patterns are not supported. Violating these restrictions results in strange +error messages. +\end{warn} +\noindent +Nested patterns can be simulated by nested \texttt{case}-expressions: instead +of +\begin{ttbox} +case xs of [] => 0 | [x] => x | x#(y#zs) => y +\end{ttbox} +write +\begin{ttbox} +case xs of [] => 0 | x#ys => (case ys of [] => x | y#zs => y) +\end{ttbox} +Note that \texttt{case}-expressions should be enclosed in parentheses to +indicate their scope. + +\subsection{Structural induction} + +Almost all the basic laws about a datatype are applied automatically during +simplification. Only induction is invoked by hand via \texttt{induct_tac}, +which works for any datatype. In some cases, induction is overkill and a case +distinction over all constructors of the datatype suffices. This is performed +by \ttindexbold{exhaust_tac}. A trivial example: +\begin{ttbox} +\input{Misc/exhaust.ML}{\out1. xs = [] ==> (case xs of [] => [] | y # ys => xs) = xs} +{\out2. !!a list. xs = a # list ==> (case xs of [] => [] | y # ys => xs) = xs} +\input{Misc/autotac.ML}\end{ttbox} +Note that this particular case distinction could have been automated +completely. See~\S\ref{sec:SimpFeatures}. + +\begin{warn} + Induction is only allowed on a free variable that should not occur among + the assumptions of the subgoal. Exhaustion works for arbitrary terms. +\end{warn} + +\subsection{Case study: boolean expressions} +\label{sec:boolex} + +The aim of this case study is twofold: it shows how to model boolean +expressions and some algorithms for manipulating them, and it demonstrates +the constructs introduced above. + +\subsubsection{How can we model boolean expressions?} + +We want to represent boolean expressions built up from variables and +constants by negation and conjunction. The following datatype serves exactly +that purpose: +\begin{ttbox} +\input{Ifexpr/boolex}\end{ttbox} +The two constants are represented by the terms \texttt{Const~True} and +\texttt{Const~False}. Variables are represented by terms of the form +\texttt{Var}~$n$, where $n$ is a natural number (type \texttt{nat}). +For example, the formula $P@0 \land \neg P@1$ is represented by the term +\texttt{And~(Var~0)~(Neg(Var~1))}. + +\subsubsection{What is the value of boolean expressions?} + +The value of a boolean expressions depends on the value of its variables. +Hence the function \texttt{value} takes an additional parameter, an {\em + environment} of type \texttt{nat~=>~bool}, which maps variables to their +values: +\begin{ttbox} +\input{Ifexpr/value}\end{ttbox} + +\subsubsection{If-expressions} + +An alternative and often more efficient (because in a certain sense +canonical) representation are so-called \textit{If-expressions\/} built up +from constants (\texttt{CIF}), variables (\texttt{VIF}) and conditionals +(\texttt{IF}): +\begin{ttbox} +\input{Ifexpr/ifex}\end{ttbox} +The evaluation if If-expressions proceeds as for \texttt{boolex}: +\begin{ttbox} +\input{Ifexpr/valif}\end{ttbox} + +\subsubsection{Transformation into and of If-expressions} + +The type \texttt{boolex} is close to the customary representation of logical +formulae, whereas \texttt{ifex} is designed for efficiency. Thus we need to +translate from \texttt{boolex} into \texttt{ifex}: +\begin{ttbox} +\input{Ifexpr/bool2if}\end{ttbox} +At last, we have something we can verify: that \texttt{bool2if} preserves the +value of its argument. +\begin{ttbox} +\input{Ifexpr/bool2if.ML}\end{ttbox} +The proof is canonical: +\begin{ttbox} +\input{Ifexpr/proof.ML}\end{ttbox} +In fact, all proofs in this case study look exactly like this. Hence we do +not show them below. + +More interesting is the transformation of If-expressions into a normal form +where the first argument of \texttt{IF} cannot be another \texttt{IF} but +must be a constant or variable. Such a normal form can be computed by +repeatedly replacing a subterm of the form \texttt{IF~(IF~b~x~y)~z~u} by +\texttt{IF b (IF x z u) (IF y z u)}, which has the same value. The following +primitive recursive functions perform this task: +\begin{ttbox} +\input{Ifexpr/normif} +\input{Ifexpr/norm}\end{ttbox} +Their interplay is a bit tricky, and we leave it to the reader to develop an +intuitive understanding. Fortunately, Isabelle can help us to verify that the +transformation preserves the value of the expression: +\begin{ttbox} +\input{Ifexpr/norm.ML}\end{ttbox} +The proof is canonical, provided we first show the following lemma (which +also helps to understand what \texttt{normif} does) and make it available +for simplification via \texttt{Addsimps}: +\begin{ttbox} +\input{Ifexpr/normif.ML}\end{ttbox} + +But how can we be sure that \texttt{norm} really produces a normal form in +the above sense? We have to prove +\begin{ttbox} +\input{Ifexpr/normal_norm.ML}\end{ttbox} +where \texttt{normal} expresses that an If-expression is in normal form: +\begin{ttbox} +\input{Ifexpr/normal}\end{ttbox} +Of course, this requires a lemma about normality of \texttt{normif} +\begin{ttbox} +\input{Ifexpr/normal_normif.ML}\end{ttbox} +that has to be made available for simplification via \texttt{Addsimps}. + +How does one come up with the required lemmas? Try to prove the main theorems +without them and study carefully what \texttt{Auto_tac} leaves unproved. This +has to provide the clue. +The necessity of universal quantification (\texttt{!t e}) in the two lemmas +is explained in \S\ref{sec:InductionHeuristics} + +\begin{exercise} + We strengthen the definition of a {\em normal\/} If-expression as follows: + the first argument of all \texttt{IF}s must be a variable. Adapt the above + development to this changed requirement. (Hint: you may need to formulate + some of the goals as implications (\texttt{-->}) rather than equalities + (\texttt{=}).) +\end{exercise} + +\section{Some basic types} + +\subsection{Natural numbers} + +The type \ttindexbold{nat} of natural numbers is predefined and behaves like +\begin{ttbox} +datatype nat = 0 | Suc nat +\end{ttbox} +In particular, there are \texttt{case}-expressions, for example +\begin{ttbox} +case n of 0 => 0 | Suc m => m +\end{ttbox} +primitive recursion, for example +\begin{ttbox} +\input{Misc/natsum}\end{ttbox} +and induction, for example +\begin{ttbox} +\input{Misc/NatSum.ML}\ttbreak +{\out sum n + sum n = n * Suc n} +{\out No subgoals!} +\end{ttbox} + +The usual arithmetic operations \ttindexbold{+}, \ttindexbold{-}, +\ttindexbold{*}, \ttindexbold{div} and \ttindexbold{mod} are predefined, as +are the relations \ttindexbold{<=} and \ttindexbold{<}. There is even a least +number operation \ttindexbold{LEAST}. For example, \texttt{(LEAST n.$\,$1 < + n) = 2} (HOL does not prove this completely automatically). + +\begin{warn} + The operations \ttindexbold{+}, \ttindexbold{-}, \ttindexbold{*} are + overloaded, i.e.\ they are available not just for natural numbers but at + other types as well (see \S\ref{sec:TypeClasses}). For example, given + the goal \texttt{x+y = y+x}, there is nothing to indicate that you are + talking about natural numbers. Hence Isabelle can only infer that + \texttt{x} and \texttt{y} are of some arbitrary type where \texttt{+} is + declared. As a consequence, you will be unable to prove the goal (although + it may take you some time to realize what has happened if + \texttt{show_types} is not set). In this particular example, you need to + include an explicit type constraint, for example \texttt{x+y = y+(x::nat)}. + If there is enough contextual information this may not be necessary: + \texttt{x+0 = x} automatically implies \texttt{x::nat}. +\end{warn} + + +\subsection{Products} + +HOL also has pairs: \texttt{($a@1$,$a@2$)} is of type \texttt{$\tau@1$ * +$\tau@2$} provided each $a@i$ is of type $\tau@i$. The components of a pair +are extracted by \texttt{fst} and \texttt{snd}: +\texttt{fst($x$,$y$) = $x$} and \texttt{snd($x$,$y$) = $y$}. Tuples +are simulated by pairs nested to the right: +\texttt{($a@1$,$a@2$,$a@3$)} and \texttt{$\tau@1$ * $\tau@2$ * $\tau@3$} +stand for \texttt{($a@1$,($a@2$,$a@3$))} and \texttt{$\tau@1$ * ($\tau@2$ * +$\tau@3$)}. Therefore \texttt{fst(snd($a@1$,$a@2$,$a@3$)) = $a@2$}. + +It is possible to use (nested) tuples as patterns in abstractions, for +example \texttt{\%(x,y,z).x+y+z} and \texttt{\%((x,y),z).x+y+z}. + +In addition to explicit $\lambda$-abstractions, tuple patterns can be used in +most variable binding constructs. Typical examples are +\begin{ttbox} +let (x,y) = f z in (y,x) + +case xs of [] => 0 | (x,y)\#zs => x+y +\end{ttbox} +Further important examples are quantifiers and sets. + +\begin{warn} +Abstraction over pairs and tuples is merely a convenient shorthand for a more +complex internal representation. Thus the internal and external form of a +term may differ, which can affect proofs. If you want to avoid this +complication, use \texttt{fst} and \texttt{snd}, i.e.\ write +\texttt{\%p.~fst p + snd p} instead of \texttt{\%(x,y).~x + y}. +See~\S\ref{} for theorem proving with tuple patterns. +\end{warn} + + +\section{Definitions} +\label{sec:Definitions} + +A definition is simply an abbreviation, i.e.\ a new name for an existing +construction. In particular, definitions cannot be recursive. Isabelle offers +definitions on the level of types and terms. Those on the type level are +called type synonyms, those on the term level are called (constant) +definitions. + + +\subsection{Type synonyms} +\indexbold{type synonyms} + +Type synonyms are similar to those found in ML. Their syntax is fairly self +explanatory: +\begin{ttbox} +\input{Misc/types}\end{ttbox}\indexbold{*types} +The synonym \texttt{alist} shows that in general the type on the right-hand +side needs to be enclosed in double quotation marks +(see the end of~\S\ref{sec:intro-theory}). + +Internally all synonyms are fully expanded. As a consequence Isabelle's +output never contains synonyms. Their main purpose is to improve the +readability of theory definitions. Synonyms can be used just like any other +type: +\begin{ttbox} +\input{Misc/consts}\end{ttbox} + +\subsection{Constant definitions} +\label{sec:ConstDefinitions} + +The above constants \texttt{nand} and \texttt{exor} are non-recursive and can +therefore be defined directly by +\begin{ttbox} +\input{Misc/defs}\end{ttbox}\indexbold{*defs} +where \texttt{defs} is a keyword and \texttt{nand_def} and \texttt{exor_def} +are arbitrary user-supplied names. +The symbol \texttt{==}\index{==>@{\tt==}|bold} is a special form of equality +that should only be used in constant definitions. +Declarations and definitions can also be merged +\begin{ttbox} +\input{Misc/constdefs}\end{ttbox}\indexbold{*constdefs} +in which case the default name of each definition is $f$\texttt{_def}, where +$f$ is the name of the defined constant. + +Note that pattern-matching is not allowed, i.e.\ each definition must be of +the form $f\,x@1\,\dots\,x@n$\texttt{~==~}$t$. + +Section~\S\ref{sec:Simplification} explains how definitions are used +in proofs. + +\begin{warn} +A common mistake when writing definitions is to introduce extra free variables +on the right-hand side as in the following fictitious definition: +\begin{ttbox} +defs prime_def "prime(p) == (m divides p) --> (m=1 | m=p)" +\end{ttbox} +Isabelle rejects this `definition' because of the extra {\tt m} on the +right-hand side, which would introduce an inconsistency. What you should have +written is +\begin{ttbox} +defs prime_def "prime(p) == !m. (m divides p) --> (m=1 | m=p)" +\end{ttbox} +\end{warn} + + + + +\chapter{More Functional Programming} + +The purpose of this chapter is to deepen the reader's understanding of the +concepts encountered so far and to introduce an advanced method for defining +recursive functions. The first two sections give a structured presentation of +theorem proving by simplification (\S\ref{sec:Simplification}) and +discuss important heuristics for induction (\S\ref{sec:InductionHeuristics}). They +can be skipped by readers less interested in proofs. They are followed by a +case study, a compiler for expressions (\S\ref{sec:ExprCompiler}). +Finally we present a very general method for defining recursive functions +that goes well beyond what \texttt{primrec} allows (\S\ref{sec:recdef}). + + +\section{Simplification} +\label{sec:Simplification} + +So far we have proved our theorems by \texttt{Auto_tac}, which +`simplifies' all subgoals. In fact, \texttt{Auto_tac} can do much more than +that, except that it did not need to so far. However, when you go beyond toy +examples, you need to understand the ingredients of \texttt{Auto_tac}. +This section covers the tactic that \texttt{Auto_tac} always applies first, +namely simplification. + +Simplification is one of the central theorem proving tools in Isabelle and +many other systems. The tool itself is called the \bfindex{simplifier}. The +purpose of this section is twofold: to introduce the many features of the +simplifier (\S\ref{sec:SimpFeatures}) and to explain a little bit how the +simplifier works (\S\ref{sec:SimpHow}). Anybody intending to use HOL should +read \S\ref{sec:SimpFeatures}, and the serious student should read +\S\ref{sec:SimpHow} as well in order to understand what happened in case +things do not simplify as expected. + + +\subsection{Using the simplifier} +\label{sec:SimpFeatures} + +In its most basic form, simplification means repeated application of +equations from left to right. For example, taking the rules for \texttt{\at} +and applying them to the term \texttt{[0,1] \at\ []} results in a sequence of +simplification steps: +\begin{ttbox}\makeatother +(0#1#[]) @ [] \(\leadsto\) 0#((1#[]) @ []) \(\leadsto\) 0#(1#([] @ [])) \(\leadsto\) 0#1#[] +\end{ttbox} +This is also known as {\em term rewriting} and the equations are referred +to as {\em rewrite rules}. This is more honest than `simplification' because +the terms do not necessarily become simpler in the process. + +\subsubsection{Simpsets} + +To facilitate simplification, each theory has an associated set of +simplification rules, known as a \bfindex{simpset}. Within a theory, +proofs by simplification refer to the associated simpset by default. +The simpset of a theory is built up as follows: starting with the union of +the simpsets of the parent theories, each occurrence of a \texttt{datatype} +or \texttt{primrec} construct augments the simpset. Explicit definitions are +not added automatically. Users can add new theorems via \texttt{Addsimps} and +delete them again later by \texttt{Delsimps}. + +You may augment a simpset not just by equations but by pretty much any +theorem. The simplifier will try to make sense of it. For example, a theorem +\verb$~$$P$ is automatically turned into \texttt{$P$ = False}. The details are +explained in \S\ref{sec:SimpHow}. + +As a rule of thumb, rewrite rules that really simplify a term (like +\texttt{xs \at\ [] = xs} and \texttt{rev(rev xs) = xs}) should be added to the +current simpset right after they have been proved. Those of a more specific +nature (e.g.\ distributivity laws, which alter the structure of terms +considerably) should only be added for specific proofs and deleted again +afterwards. Conversely, it may also happen that a generally useful rule +needs to be removed for a certain proof and is added again afterwards. The +need of frequent temporary additions or deletions may indicate a badly +designed simpset. +\begin{warn} + Simplification may not terminate, for example if both $f(x) = g(x)$ and + $g(x) = f(x)$ are in the simpset. It is the user's responsibility not to + include rules that can lead to nontermination, either on their own or in + combination with other rules. +\end{warn} + +\subsubsection{Simplification tactics} + +There are four main simplification tactics: +\begin{ttdescription} +\item[\ttindexbold{Simp_tac} $i$] simplifies the conclusion of subgoal~$i$ + using the theory's simpset. It may solve the subgoal completely if it has + become trivial. For example: +\begin{ttbox}\makeatother +{\out 1. [] @ [] = []} +by(Simp_tac 1); +{\out No subgoals!} +\end{ttbox} + +\item[\ttindexbold{Asm_simp_tac}] + is like \verb$Simp_tac$, but extracts additional rewrite rules from + the assumptions of the subgoal. For example, it solves +\begin{ttbox}\makeatother +{\out 1. xs = [] ==> xs @ xs = xs} +\end{ttbox} +which \texttt{Simp_tac} does not do. + +\item[\ttindexbold{Full_simp_tac}] is like \verb$Simp_tac$, but also + simplifies the assumptions (without using the assumptions to + simplify each other or the actual goal). + +\item[\ttindexbold{Asm_full_simp_tac}] is like \verb$Asm_simp_tac$, + but also simplifies the assumptions. In particular, assumptions can + simplify each other. For example: +\begin{ttbox}\makeatother +\out{ 1. [| xs @ zs = ys @ xs; [] @ xs = [] @ [] |] ==> ys = zs} +by(Asm_full_simp_tac 1); +{\out No subgoals!} +\end{ttbox} +The second assumption simplifies to \texttt{xs = []}, which in turn +simplifies the first assumption to \texttt{zs = ys}, thus reducing the +conclusion to \texttt{ys = ys} and hence to \texttt{True}. +(See also the paragraph on tracing below.) +\end{ttdescription} +\texttt{Asm_full_simp_tac} is the most powerful of this quartet of +tactics. In fact, \texttt{Auto_tac} starts by applying +\texttt{Asm_full_simp_tac} to all subgoals. The only reason for the existence +of the other three tactics is that sometimes one wants to limit the amount of +simplification, for example to avoid nontermination: +\begin{ttbox}\makeatother +{\out 1. ! x. f x = g (f (g x)) ==> f [] = f [] @ []} +\end{ttbox} +is solved by \texttt{Simp_tac}, but \texttt{Asm_simp_tac} and +\texttt{Asm_full_simp_tac} loop because the rewrite rule \texttt{f x = g(f(g +x))} extracted from the assumption does not terminate. Isabelle notices +certain simple forms of nontermination, but not this one. + +\subsubsection{Modifying simpsets locally} + +If a certain theorem is merely needed in one proof by simplification, the +pattern +\begin{ttbox} +Addsimps [\(rare_theorem\)]; +by(Simp_tac 1); +Delsimps [\(rare_theorem\)]; +\end{ttbox} +is awkward. Therefore there are lower-case versions of the simplification +tactics (\ttindexbold{simp_tac}, \ttindexbold{asm_simp_tac}, +\ttindexbold{full_simp_tac}, \ttindexbold{asm_full_simp_tac}) and of the +simpset modifiers (\ttindexbold{addsimps}, \ttindexbold{delsimps}) +that do not access or modify the implicit simpset but explicitly take a +simpset as an argument. For example, the above three lines become +\begin{ttbox} +by(simp_tac (simpset() addsimps [\(rare_theorem\)]) 1); +\end{ttbox} +where the result of the function call \texttt{simpset()} is the simpset of +the current theory and \texttt{addsimps} is an infix function. The implicit +simpset is read once but not modified. +This is far preferable to pairs of \texttt{Addsimps} and \texttt{Delsimps}. +Local modifications can be stacked as in +\begin{ttbox} +by(simp_tac (simpset() addsimps [\(rare_theorem\)] delsimps [\(some_thm\)]) 1); +\end{ttbox} + +\subsubsection{Rewriting with definitions} + +Constant definitions (\S\ref{sec:ConstDefinitions}) are not automatically +included in the simpset of a theory. Hence such definitions are not expanded +automatically either, just as it should be: definitions are introduced for +the purpose of abbreviating complex concepts. Of course we need to expand the +definitions initially to derive enough lemmas that characterize the concept +sufficiently for us to forget the original definition completely. For +example, given the theory +\begin{ttbox} +\input{Misc/Exor.thy}\end{ttbox} +we may want to prove \verb$exor A (~A)$. Instead of \texttt{Goal} we use +\begin{ttbox} +\input{Misc/exorgoal.ML}\end{ttbox} +which tells Isabelle to expand the definition of \texttt{exor}---the first +argument of \texttt{Goalw} can be a list of definitions---in the initial goal: +\begin{ttbox} +{\out exor A (~ A)} +{\out 1. A & ~ ~ A | ~ A & ~ A} +\end{ttbox} +In this simple example, the goal is proved by \texttt{Simp_tac}. +Of course the resulting theorem is insufficient to characterize \texttt{exor} +completely. + +In case we want to expand a definition in the middle of a proof, we can +simply add the definition locally to the simpset: +\begin{ttbox} +\input{Misc/exorproof.ML}\end{ttbox} +You should normally not add the definition permanently to the simpset +using \texttt{Addsimps} because this defeats the whole purpose of an +abbreviation. + +\begin{warn} +If you have defined $f\,x\,y$\texttt{~==~}$t$ then you can only expand +occurrences of $f$ with at least two arguments. Thus it is safer to define +$f$\texttt{~==~\%$x\,y$.}$\;t$. +\end{warn} + +\subsubsection{Simplifying \texttt{let}-expressions} + +Proving a goal containing \ttindex{let}-expressions invariably requires the +\texttt{let}-constructs to be expanded at some point. Since +\texttt{let}-\texttt{in} is just syntactic sugar for a defined constant +(called \texttt{Let}), expanding \texttt{let}-constructs means rewriting with +\texttt{Let_def}: +%context List.thy; +%Goal "(let xs = [] in xs@xs) = ys"; +\begin{ttbox}\makeatother +{\out 1. (let xs = [] in xs @ xs) = ys} +by(simp_tac (simpset() addsimps [Let_def]) 1); +{\out 1. [] = ys} +\end{ttbox} +If, in a particular context, there is no danger of a combinatorial explosion +of nested \texttt{let}s one could even add \texttt{Let_def} permanently via +\texttt{Addsimps}. + +\subsubsection{Conditional equations} + +So far all examples of rewrite rules were equations. The simplifier also +accepts {\em conditional\/} equations, for example +\begin{ttbox} +xs ~= [] ==> hd xs # tl xs = xs \hfill \((*)\) +\end{ttbox} +(which is proved by \texttt{exhaust_tac} on \texttt{xs} followed by +\texttt{Asm_full_simp_tac} twice). Assuming that this theorem together with +%\begin{ttbox}\makeatother +\texttt{(rev xs = []) = (xs = [])} +%\end{ttbox} +are part of the simpset, the subgoal +\begin{ttbox}\makeatother +{\out 1. xs ~= [] ==> hd(rev xs) # tl(rev xs) = rev xs} +\end{ttbox} +is proved by simplification: +the conditional equation $(*)$ above +can simplify \texttt{hd(rev~xs)~\#~tl(rev~xs)} to \texttt{rev xs} +because the corresponding precondition \verb$rev xs ~= []$ simplifies to +\verb$xs ~= []$, which is exactly the local assumption of the subgoal. + + +\subsubsection{Automatic case splits} + +Goals containing \ttindex{if}-expressions are usually proved by case +distinction on the condition of the \texttt{if}. For example the goal +\begin{ttbox} +{\out 1. ! xs. if xs = [] then rev xs = [] else rev xs ~= []} +\end{ttbox} +can be split into +\begin{ttbox} +{\out 1. ! xs. (xs = [] --> rev xs = []) \& (xs ~= [] --> rev xs ~= [])} +\end{ttbox} +by typing +\begin{ttbox} +\input{Misc/splitif.ML}\end{ttbox} +Because this is almost always the right proof strategy, the simplifier +performs case-splitting on \texttt{if}s automatically. Try \texttt{Simp_tac} +on the initial goal above. + +This splitting idea generalizes from \texttt{if} to \ttindex{case}: +\begin{ttbox}\makeatother +{\out 1. (case xs of [] => zs | y#ys => y#(ys@zs)) = xs@zs} +\end{ttbox} +becomes +\begin{ttbox}\makeatother +{\out 1. (xs = [] --> zs = xs @ zs) &} +{\out (! a list. xs = a # list --> a # list @ zs = xs @ zs)} +\end{ttbox} +by typing +\begin{ttbox} +\input{Misc/splitlist.ML}\end{ttbox} +In contrast to \texttt{if}-expressions, the simplifier does not split +\texttt{case}-expressions by default because this can lead to nontermination +in case of recursive datatypes. +Nevertheless the simplifier can be instructed to perform \texttt{case}-splits +by adding the appropriate rule to the simpset: +\begin{ttbox} +by(simp_tac (simpset() addsplits [split_list_case]) 1); +\end{ttbox}\indexbold{*addsplits} +solves the initial goal outright, which \texttt{Simp_tac} alone will not do. + +In general, every datatype $t$ comes with a rule +\texttt{$t$.split} that can be added to the simpset either +locally via \texttt{addsplits} (see above), or permanently via +\begin{ttbox} +Addsplits [\(t\).split]; +\end{ttbox}\indexbold{*Addsplits} +Split-rules can be removed globally via \ttindexbold{Delsplits} and locally +via \ttindexbold{delsplits} as, for example, in +\begin{ttbox} +by(simp_tac (simpset() addsimps [\(\dots\)] delsplits [split_if]) 1); +\end{ttbox} + + +\subsubsection{Permutative rewrite rules} + +A rewrite rule is {\bf permutative} if the left-hand side and right-hand side +are the same up to renaming of variables. The most common permutative rule +is commutativity: $x+y = y+x$. Another example is $(x-y)-z = (x-z)-y$. Such +rules are problematic because once they apply, they can be used forever. +The simplifier is aware of this danger and treats permutative rules +separately. For details see~\cite{Isa-Ref-Man}. + +\subsubsection{Tracing} +\indexbold{tracing the simplifier} + +Using the simplifier effectively may take a bit of experimentation. Set the +\verb$trace_simp$ flag to get a better idea of what is going on: +\begin{ttbox}\makeatother +{\out 1. rev [x] = []} +\ttbreak +set trace_simp; +by(Simp_tac 1); +\ttbreak\makeatother +{\out Applying instance of rewrite rule:} +{\out rev (?x # ?xs) == rev ?xs @ [?x]} +{\out Rewriting:} +{\out rev [x] == rev [] @ [x]} +\ttbreak +{\out Applying instance of rewrite rule:} +{\out rev [] == []} +{\out Rewriting:} +{\out rev [] == []} +\ttbreak\makeatother +{\out Applying instance of rewrite rule:} +{\out [] @ ?y == ?y} +{\out Rewriting:} +{\out [] @ [x] == [x]} +\ttbreak +{\out Applying instance of rewrite rule:} +{\out ?x # ?t = ?t == False} +{\out Rewriting:} +{\out [x] = [] == False} +\ttbreak +{\out Level 1} +{\out rev [x] = []} +{\out 1. False} +\end{ttbox} +In more complicated cases, the trace can be enormous, especially since +invocations of the simplifier are often nested (e.g.\ when solving conditions +of rewrite rules). + +\subsection{How it works} +\label{sec:SimpHow} + +\subsubsection{Higher-order patterns} + +\subsubsection{Local assumptions} + +\subsubsection{The preprocessor} + +\section{Induction heuristics} +\label{sec:InductionHeuristics} + +The purpose of this section is to illustrate some simple heuristics for +inductive proofs. The first one we have already mentioned in our initial +example: +\begin{quote} +{\em 1. Theorems about recursive functions are proved by induction.} +\end{quote} +In case the function has more than one argument +\begin{quote} +{\em 2. Do induction on argument number $i$ if the function is defined by +recursion in argument number $i$.} +\end{quote} +When we look at the proof of +\begin{ttbox}\makeatother +(xs @ ys) @ zs = xs @ (ys @ zs) +\end{ttbox} +in \S\ref{sec:intro-proof} we find (a) \texttt{\at} is recursive in the first +argument, (b) \texttt{xs} occurs only as the first argument of \texttt{\at}, +and (c) both \texttt{ys} and \texttt{zs} occur at least once as the second +argument of \texttt{\at}. Hence it is natural to perform induction on +\texttt{xs}. + +The key heuristic, and the main point of this section, is to +generalize the goal before induction. The reason is simple: if the goal is +too specific, the induction hypothesis is too weak to allow the induction +step to go through. Let us now illustrate the idea with an example. + +We define a tail-recursive version of list-reversal, +i.e.\ one that can be compiled into a loop: +\begin{ttbox} +\input{Misc/Itrev.thy}\end{ttbox} +The behaviour of \texttt{itrev} is simple: it reverses its first argument by +stacking its elements onto the second argument, and returning that second +argument when the first one becomes empty. +We need to show that \texttt{itrev} does indeed reverse its first argument +provided the second one is empty: +\begin{ttbox} +\input{Misc/itrev1.ML}\end{ttbox} +There is no choice as to the induction variable, and we immediately simplify: +\begin{ttbox} +\input{Misc/induct_auto.ML}\ttbreak\makeatother +{\out1. !!a list. itrev list [] = rev list\(\;\)==> itrev list [a] = rev list @ [a]} +\end{ttbox} +Just as predicted above, the overall goal, and hence the induction +hypothesis, is too weak to solve the induction step because of the fixed +\texttt{[]}. The corresponding heuristic: +\begin{quote} +{\em 3. Generalize goals for induction by replacing constants by variables.} +\end{quote} +Of course one cannot do this na\"{\i}vely: \texttt{itrev xs ys = rev xs} is +just not true --- the correct generalization is +\begin{ttbox}\makeatother +\input{Misc/itrev2.ML}\end{ttbox} +If \texttt{ys} is replaced by \texttt{[]}, the right-hand side simplifies to +\texttt{rev xs}, just as required. + +In this particular instance it is easy to guess the right generalization, +but in more complex situations a good deal of creativity is needed. This is +the main source of complications in inductive proofs. + +Although we now have two variables, only \texttt{xs} is suitable for +induction, and we repeat our above proof attempt. Unfortunately, we are still +not there: +\begin{ttbox}\makeatother +{\out 1. !!a list.} +{\out itrev list ys = rev list @ ys} +{\out ==> itrev list (a # ys) = rev list @ a # ys} +\end{ttbox} +The induction hypothesis is still too weak, but this time it takes no +intuition to generalize: the problem is that \texttt{ys} is fixed throughout +the subgoal, but the induction hypothesis needs to be applied with +\texttt{a \# ys} instead of \texttt{ys}. Hence we prove the theorem +for all \texttt{ys} instead of a fixed one: +\begin{ttbox}\makeatother +\input{Misc/itrev3.ML}\end{ttbox} +This time induction on \texttt{xs} followed by simplification succeeds. This +leads to another heuristic for generalization: +\begin{quote} +{\em 4. Generalize goals for induction by universally quantifying all free +variables {\em(except the induction variable itself!)}.} +\end{quote} +This prevents trivial failures like the above and does not change the +provability of the goal. Because it is not always required, and may even +complicate matters in some cases, this heuristic is often not +applied blindly. + +A final point worth mentioning is the orientation of the equation we just +proved: the more complex notion (\texttt{itrev}) is on the left-hand +side, the simpler \texttt{rev} on the right-hand side. This constitutes +another, albeit weak heuristic that is not restricted to induction: +\begin{quote} + {\em 5. The right-hand side of an equation should (in some sense) be + simpler than the left-hand side.} +\end{quote} +The heuristic is tricky to apply because it is not obvious that +\texttt{rev xs \at\ ys} is simpler than \texttt{itrev xs ys}. But see what +happens if you try to prove the symmetric equation! + + +\section{Case study: compiling expressions} +\label{sec:ExprCompiler} + +The task is to develop a compiler from a generic type of expressions (built +up from variables, constants and binary operations) to a stack machine. This +generic type of expressions is a generalization of the boolean expressions in +\S\ref{sec:boolex}. This time we do not commit ourselves to a particular +type of variables or values but make them type parameters. Neither is there +a fixed set of binary operations: instead the expression contains the +appropriate function itself. +\begin{ttbox} +\input{CodeGen/expr}\end{ttbox} +The three constructors represent constants, variables and the combination of +two subexpressions with a binary operation. + +The value of an expression w.r.t.\ an environment that maps variables to +values is easily defined: +\begin{ttbox} +\input{CodeGen/value}\end{ttbox} + +The stack machine has three instructions: load a constant value onto the +stack, load the contents of a certain address onto the stack, and apply a +binary operation to the two topmost elements of the stack, replacing them by +the result. As for \texttt{expr}, addresses and values are type parameters: +\begin{ttbox} +\input{CodeGen/instr}\end{ttbox} + +The execution of the stack machine is modelled by a function \texttt{exec} +that takes a store (modelled as a function from addresses to values, just +like the environment for evaluating expressions), a stack (modelled as a +list) of values and a list of instructions and returns the stack at the end +of the execution --- the store remains unchanged: +\begin{ttbox} +\input{CodeGen/exec}\end{ttbox} +Recall that \texttt{hd} and \texttt{tl} +return the first element and the remainder of a list. + +Because all functions are total, \texttt{hd} is defined even for the empty +list, although we do not know what the result is. Thus our model of the +machine always terminates properly, although the above definition does not +tell us much about the result in situations where \texttt{Apply} was executed +with fewer than two elements on the stack. + +The compiler is a function from expressions to a list of instructions. Its +definition is pretty much obvious: +\begin{ttbox}\makeatother +\input{CodeGen/comp}\end{ttbox} + +Now we have to prove the correctness of the compiler, i.e.\ that the +execution of a compiled expression results in the value of the expression: +\begin{ttbox} +exec s [] (comp e) = [value s e] +\end{ttbox} +This is generalized to +\begin{ttbox} +\input{CodeGen/goal2.ML}\end{ttbox} +and proved by induction on \texttt{e} followed by simplification, once we +have the following lemma about executing the concatenation of two instruction +sequences: +\begin{ttbox}\makeatother +\input{CodeGen/goal2.ML}\end{ttbox} +This requires induction on \texttt{xs} and ordinary simplification for the +base cases. In the induction step, simplification leaves us with a formula +that contains two \texttt{case}-expressions over instructions. Thus we add +automatic case splitting as well, which finishes the proof: +\begin{ttbox} +\input{CodeGen/simpsplit.ML}\end{ttbox} + +We could now go back and prove \texttt{exec s [] (comp e) = [value s e]} +merely by simplification with the generalized version we just proved. +However, this is unnecessary because the generalized version fully subsumes +its instance. + +\section{Total recursive functions} +\label{sec:recdef} +\index{*recdef|(} + + +Although many total functions have a natural primitive recursive definition, +this is not always the case. Arbitrary total recursive functions can be +defined by means of \texttt{recdef}: you can use full pattern-matching, +recursion need not involve datatypes, and termination is proved by showing +that each recursive call makes the argument smaller in a suitable (user +supplied) sense. + +\subsection{Defining recursive functions} + +Here is a simple example, the Fibonacci function: +\begin{ttbox} +consts fib :: nat => nat +recdef fib "measure(\%n. n)" + "fib 0 = 0" + "fib 1 = 1" + "fib (Suc(Suc x)) = fib x + fib (Suc x)" +\end{ttbox} +The definition of \texttt{fib} is accompanied by a \bfindex{measure function} +\texttt{\%n.$\;$n} that maps the argument of \texttt{fib} to a natural +number. The requirement is that in each equation the measure of the argument +on the left-hand side is strictly greater than the measure of the argument of +each recursive call. In the case of \texttt{fib} this is obviously true +because the measure function is the identity and \texttt{Suc(Suc~x)} is +strictly greater than both \texttt{x} and \texttt{Suc~x}. + +Slightly more interesting is the insertion of a fixed element +between any two elements of a list: +\begin{ttbox} +consts sep :: "'a * 'a list => 'a list" +recdef sep "measure (\%(a,xs). length xs)" + "sep(a, []) = []" + "sep(a, [x]) = [x]" + "sep(a, x#y#zs) = x # a # sep(a,y#zs)" +\end{ttbox} +This time the measure is the length of the list, which decreases with the +recursive call; the first component of the argument tuple is irrelevant. + +Pattern matching need not be exhaustive: +\begin{ttbox} +consts last :: 'a list => bool +recdef last "measure (\%xs. length xs)" + "last [x] = x" + "last (x#y#zs) = last (y#zs)" +\end{ttbox} + +Overlapping patterns are disambiguated by taking the order of equations into +account, just as in functional programming: +\begin{ttbox} +recdef sep "measure (\%(a,xs). length xs)" + "sep(a, x#y#zs) = x # a # sep(a,y#zs)" + "sep(a, xs) = xs" +\end{ttbox} +This defines exactly the same function \texttt{sep} as further above. + +\begin{warn} +Currently \texttt{recdef} only accepts functions with a single argument, +possibly of tuple type. +\end{warn} + +When loading a theory containing a \texttt{recdef} of a function $f$, +Isabelle proves the recursion equations and stores the result as a list of +theorems $f$.\texttt{rules}. It can be viewed by typing +\begin{ttbox} +prths \(f\).rules; +\end{ttbox} +All of the above examples are simple enough that Isabelle can determine +automatically that the measure of the argument goes down in each recursive +call. In that case $f$.\texttt{rules} contains precisely the defining +equations. + +In general, Isabelle may not be able to prove all termination conditions +automatically. For example, termination of +\begin{ttbox} +consts gcd :: "nat*nat => nat" +recdef gcd "measure ((\%(m,n).n))" + "gcd (m, n) = (if n=0 then m else gcd(n, m mod n))" +\end{ttbox} +relies on the lemma \texttt{mod_less_divisor} +\begin{ttbox} +0 < n ==> m mod n < n +\end{ttbox} +that is not part of the default simpset. As a result, Isabelle prints a +warning and \texttt{gcd.rules} contains a precondition: +\begin{ttbox} +(! m n. 0 < n --> m mod n < n) ==> gcd (m, n) = (if n=0 \dots) +\end{ttbox} +We need to instruct \texttt{recdef} to use an extended simpset to prove the +termination condition: +\begin{ttbox} +recdef gcd "measure ((\%(m,n).n))" + simpset "simpset() addsimps [mod_less_divisor]" + "gcd (m, n) = (if n=0 then m else gcd(n, m mod n))" +\end{ttbox} +This time everything works fine and \texttt{gcd.rules} contains precisely the +stated recursion equation for \texttt{gcd}. + +When defining some nontrivial total recursive function, the first attempt +will usually generate a number of termination conditions, some of which may +require new lemmas to be proved in some of the parent theories. Those lemmas +can then be added to the simpset used by \texttt{recdef} for its +proofs, as shown for \texttt{gcd}. + +Although all the above examples employ measure functions, \texttt{recdef} +allows arbitrary wellfounded relations. For example, termination of +Ackermann's function requires the lexicographic product \texttt{**}: +\begin{ttbox} +recdef ack "measure(\%m. m) ** measure(\%n. n)" + "ack(0,n) = Suc n" + "ack(Suc m,0) = ack(m, 1)" + "ack(Suc m,Suc n) = ack(m,ack(Suc m,n))" +\end{ttbox} +For details see~\cite{Isa-Logics-Man} and the examples in the library. + + +\subsection{Deriving simplification rules} + +Once we have succeeded to prove all termination conditions, we can start to +derive some theorems. In contrast to \texttt{primrec} definitions, which are +automatically added to the simpset, \texttt{recdef} rules must be included +explicitly, for example as in +\begin{ttbox} +Addsimps fib.rules; +\end{ttbox} +However, some care is necessary now, in contrast to \texttt{primrec}. +Although \texttt{gcd} is a total function, its defining equation leads to +nontermination of the simplifier, because the subterm \texttt{gcd(n, m mod + n)} on the right-hand side can again be simplified by the same equation, +and so on. The reason: the simplifier rewrites the \texttt{then} and +\texttt{else} branches of a conditional if the condition simplifies to +neither \texttt{True} nor \texttt{False}. Therefore it is recommended to +derive an alternative formulation that replaces case distinctions on the +right-hand side by conditional equations. For \texttt{gcd} it means we have +to prove +\begin{ttbox} + gcd (m, 0) = m +n ~= 0 ==> gcd (m, n) = gcd(n, m mod n) +\end{ttbox} +To avoid nontermination during those proofs, we have to resort to some low +level tactics: +\begin{ttbox} +Goal "gcd(m,0) = m"; +by(resolve_tac [trans] 1); +by(resolve_tac gcd.rules 1); +by(Simp_tac 1); +\end{ttbox} +At this point it is not necessary to understand what exactly +\texttt{resolve_tac} is doing. The main point is that the above proof works +not just for this one example but in general (except that we have to use +\texttt{Asm_simp_tac} and $f$\texttt{.rules} in general). Try the second +\texttt{gcd}-equation above! + +\subsection{Induction} + +Assuming we have added the recursion equations (or some suitable derived +equations) to the simpset, we might like to prove something about our +function. Since the function is recursive, the natural proof principle is +again induction. But this time the structural form of induction that comes +with datatypes is unlikely to work well---otherwise we could have defined the +function by \texttt{primrec}. Therefore \texttt{recdef} automatically proves +a suitable induction rule $f$\texttt{.induct} that follows the recursion +pattern of the particular function $f$. Roughly speaking, it requires you to +prove for each \texttt{recdef} equation that the property you are trying to +establish holds for the left-hand side provided it holds for all recursive +calls on the right-hand side. Applying $f$\texttt{.induct} requires its +explicit instantiation. See \S\ref{sec:explicit-inst} for details. + +\index{*recdef|)} diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/ttbox.sty --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/ttbox.sty Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,20 @@ +\ProvidesPackage{ttbox}[1997/06/25] +\RequirePackage{alltt} + +%%%Boxed terminal sessions + +%Redefines \{ and \} to be in \tt font and \| to make a BACKSLASH +\def\ttbraces{\chardef\{=`\{\chardef\}=`\}\chardef\|=`\\} + +%Restores % as the comment character (especially, to suppress line breaks) +\newcommand\comments{\catcode`\%=14\relax} + +%alltt* environment: like alltt but smaller, and with \{ \} and \| as in ttbox +\newenvironment{alltt*}{\begin{alltt}\footnotesize\ttbraces}{\end{alltt}} + +%Indented alltt* environment with small point size +%NO LINE BREAKS are allowed unless \pagebreak appears at START of a line +\newenvironment{ttbox}{\begin{quote}\samepage\begin{alltt*}}% + {\end{alltt*}\end{quote}} + +\endinput diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/tutorial.bbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/tutorial.bbl Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,25 @@ +\begin{thebibliography}{1} + +\bibitem{Bird-Wadler} +Richard Bird and Philip Wadler. +\newblock {\em Introduction to Functional Programming}. +\newblock Prentice-Hall, 1988. + +\bibitem{Isa-Ref-Man} +Lawrence~C. Paulson. +\newblock {\em The Isabelle Reference Manual}. +\newblock University of Cambridge, Computer Laboratory. +\newblock \verb$http://www.in.tum.de/~isabelle/dist/$. + +\bibitem{Isa-Logics-Man} +Lawrence~C. Paulson. +\newblock {\em Isabelle's Object-Logics}. +\newblock University of Cambridge, Computer Laboratory. +\newblock \verb$http://www.in.tum.de/~isabelle/dist/$. + +\bibitem{Paulson-ML} +Lawrence~C. Paulson. +\newblock {\em ML for the Working Programmer}. +\newblock Cambridge University Press, 2nd edition, 1996. + +\end{thebibliography} diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/tutorial.ind --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/tutorial.ind Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,135 @@ +\begin{theindex} + + \item {\tt[]}, \bold{7} + \item {\tt\#}, \bold{7} + \item {\ttnot}, \bold{3} + \item {\tt-->}, \bold{3} + \item {\tt\&}, \bold{3} + \item {\ttor}, \bold{3} + \item {\tt?}, \bold{3}, 4 + \item {\ttall}, \bold{3} + \item {\ttuniquex}, \bold{3} + \item {\tt *}, \bold{17} + \item {\tt +}, \bold{17} + \item {\tt -}, \bold{17} + \item {\tt <}, \bold{17} + \item {\tt <=}, \bold{17} + \item \ttlbr, \bold{9} + \item \ttrbr, \bold{9} + \item {\tt==>}, \bold{9} + \item {\tt==}, \bold{18} + \item {\tt\%}, \bold{3} + \item {\tt =>}, \bold{2} + + \indexspace + + \item {\tt addsimps}, \bold{22} + \item {\tt Addsplits}, \bold{24} + \item {\tt addsplits}, \bold{24} + \item {\tt Asm_full_simp_tac}, \bold{21} + \item {\tt asm_full_simp_tac}, \bold{22} + \item {\tt Asm_simp_tac}, \bold{21} + \item {\tt asm_simp_tac}, \bold{22} + + \indexspace + + \item {\tt bool}, 2 + + \indexspace + + \item {\tt case}, \bold{3}, 4, \bold{13}, 24 + \item {\tt constdefs}, \bold{18} + \item {\tt consts}, \bold{7} + \item {\tt context}, \bold{11} + \item current theory, \bold{11} + + \indexspace + + \item {\tt datatype}, \bold{7} + \item {\tt defs}, \bold{18} + \item {\tt delsimps}, \bold{22} + \item {\tt Delsplits}, \bold{24} + \item {\tt delsplits}, \bold{24} + \item {\tt div}, \bold{17} + + \indexspace + + \item {\tt exhaust_tac}, \bold{14} + + \indexspace + + \item {\tt False}, \bold{3} + \item formula, \bold{3} + \item {\tt Full_simp_tac}, \bold{21} + \item {\tt full_simp_tac}, \bold{22} + + \indexspace + + \item {\tt hd}, \bold{12} + + \indexspace + + \item {\tt if}, \bold{3}, 4, 24 + \item {\tt infixr}, \bold{7} + \item inner syntax, \bold{8} + + \indexspace + + \item {\tt LEAST}, \bold{17} + \item {\tt let}, \bold{3}, 4, 23 + \item {\tt list}, 2 + \item loading theories, 12 + + \indexspace + + \item {\tt Main}, \bold{2} + \item measure function, \bold{29} + \item {\tt mod}, \bold{17} + + \indexspace + + \item {\tt nat}, 2, \bold{17} + + \indexspace + + \item parent theory, \bold{1} + \item primitive recursion, \bold{13} + \item proof scripts, \bold{2} + + \indexspace + + \item {\tt recdef}, 29--31 + \item reloading theories, 12 + + \indexspace + + \item schematic variable, \bold{4} + \item {\tt set}, 2 + \item {\tt show_brackets}, \bold{4} + \item {\tt show_types}, \bold{3}, 11 + \item {\tt Simp_tac}, \bold{21} + \item {\tt simp_tac}, \bold{22} + \item simplifier, \bold{20} + \item simpset, \bold{21} + + \indexspace + + \item tactic, \bold{11} + \item term, \bold{3} + \item theory, \bold{1} + \item {\tt tl}, \bold{12} + \item total, \bold{7} + \item tracing the simplifier, \bold{25} + \item {\tt True}, \bold{3} + \item type constraints, \bold{3} + \item type inference, \bold{3} + \item type synonyms, \bold{18} + \item {\tt types}, \bold{18} + + \indexspace + + \item unknown, \bold{4} + \item {\tt update}, \bold{12} + \item {\tt use_thy}, \bold{2}, 12 + +\end{theindex} diff -r 6ef3742b6153 -r 1463e182c533 doc-src/Tutorial/tutorial.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc-src/Tutorial/tutorial.tex Wed Aug 26 16:57:49 1998 +0200 @@ -0,0 +1,69 @@ +\documentclass[11pt]{report} +\usepackage{a4,latexsym} +\usepackage{graphicx} + +\makeatletter +\input{../iman.sty} +\input{extra.sty} +\makeatother +\usepackage{ttbox} +\newcommand\ttbreak{\vskip-10pt\pagebreak[0]} + +%\newtheorem{theorem}{Theorem}[section] +\newtheorem{Exercise}{Exercise}[section] +\newenvironment{exercise}{\begin{Exercise}\rm}{\end{Exercise}} +\newcommand{\ttlbr}{{\tt[|}} +\newcommand{\ttrbr}{{\tt|]}} +\newcommand{\ttnot}{\tt\~\relax} +\newcommand{\ttor}{\tt|} +\newcommand{\ttall}{\tt!} +\newcommand{\ttuniquex}{\tt?!} + +%% $Id$ +%%%STILL NEEDS MODAL, LCF +%%%\includeonly{ZF} +%%% to index derived rls: ^\([a-zA-Z0-9][a-zA-Z0-9_]*\) \\tdx{\1} +%%% to index rulenames: ^ *(\([a-zA-Z0-9][a-zA-Z0-9_]*\), \\tdx{\1} +%%% to index constants: \\tt \([a-zA-Z0-9][a-zA-Z0-9_]*\) \\cdx{\1} +%%% to deverbify: \\verb|\([^|]*\)| \\ttindex{\1} +%% run ../sedindex logics to prepare index file + +\makeindex + +\underscoreoff + +\setcounter{secnumdepth}{2} \setcounter{tocdepth}{2} %% {secnumdepth}{2}??? + +\pagestyle{headings} +%\sloppy +%\binperiod %%%treat . like a binary operator + +\begin{document} +\title{\includegraphics[scale=0.2,angle=-90]{isabelle_hol.ps} + \\ \vspace{0.5cm} The Tutorial + \\ --- DRAFT ---} +\author{Tobias Nipkow\\ +Technische Universit\"at M\"unchen \\ +Institut f\"ur Informatik \\ +\texttt{http://www.in.tum.de/\~\relax nipkow/}} +\maketitle + +\pagenumbering{roman} +\tableofcontents + +\subsubsection*{Acknowledgements} +This tutorial owes a lot to the constant discussions with and the valuable +feedback from Larry Paulson and the Isabelle group at Munich: Olaf M\"uller, +Wolfgang Naraschewski, David von Oheimb, Leonor Prensa-Nieto, Cornelia Pusch +and Markus Wenzel. Stefan Merz was also kind enough to read and comment on a +draft version. +\clearfirst + +\input{basics} +\input{fp} +\input{appendix} + +\bibliographystyle{plain} +\bibliography{base} +\input{tutorial.ind} +\end{document}