wneuper/isa: doc-src/Ref/defining.tex@55e83c32cdec (annotated)

lcp@320	1	%% $Id$
lcp@320	2	\chapter{Defining Logics} \label{Defining-Logics}
lcp@320	3	This chapter explains how to define new formal systems --- in particular,
lcp@320	4	their concrete syntax. While Isabelle can be regarded as a theorem prover
lcp@320	5	for set theory, higher-order logic or the sequent calculus, its
lcp@320	6	distinguishing feature is support for the definition of new logics.
lcp@320	7
lcp@320	8	Isabelle logics are hierarchies of theories, which are described and
wenzelm@864	9	illustrated in
lcp@320	10	\iflabelundefined{sec:defining-theories}{{\em Introduction to Isabelle}}%
lcp@320	11	{\S\ref{sec:defining-theories}}. That material, together with the theory
lcp@320	12	files provided in the examples directories, should suffice for all simple
lcp@320	13	applications. The easiest way to define a new theory is by modifying a
lcp@320	14	copy of an existing theory.
lcp@320	15
lcp@320	16	This chapter documents the meta-logic syntax, mixfix declarations and
lcp@320	17	pretty printing. The extended examples in \S\ref{sec:min_logics}
lcp@320	18	demonstrate the logical aspects of the definition of theories.
lcp@320	19
lcp@320	20
lcp@320	21	\section{Priority grammars} \label{sec:priority_grammars}
wenzelm@864	22	\index{priority grammars\|(}
lcp@320	23
lcp@320	24	A context-free grammar contains a set of {\bf nonterminal symbols}, a set of
lcp@320	25	{\bf terminal symbols} and a set of {\bf productions}\index{productions}.
lcp@320	26	Productions have the form ${A=\gamma}$, where $A$ is a nonterminal and
lcp@320	27	$\gamma$ is a string of terminals and nonterminals. One designated
lcp@320	28	nonterminal is called the {\bf start symbol}. The language defined by the
lcp@320	29	grammar consists of all strings of terminals that can be derived from the
lcp@320	30	start symbol by applying productions as rewrite rules.
lcp@320	31
lcp@320	32	The syntax of an Isabelle logic is specified by a {\bf priority
lcp@320	33	grammar}.\index{priorities} Each nonterminal is decorated by an integer
lcp@320	34	priority, as in~$A^{(p)}$. A nonterminal $A^{(p)}$ in a derivation may be
skalberg@14231	35	rewritten using a production $A^{(q)} = \gamma$ only if~$p \leq q$. Any
lcp@320	36	priority grammar can be translated into a normal context free grammar by
lcp@320	37	introducing new nonterminals and productions.
lcp@320	38
lcp@320	39	Formally, a set of context free productions $G$ induces a derivation
lcp@320	40	relation $\longrightarrow@G$. Let $\alpha$ and $\beta$ denote strings of
lcp@320	41	terminal or nonterminal symbols. Then
wenzelm@864	42	\[ \alpha\, A^{(p)}\, \beta ~\longrightarrow@G~ \alpha\,\gamma\,\beta \]
skalberg@14231	43	if and only if $G$ contains some production $A^{(q)}=\gamma$ for~$p \leq q$.
lcp@320	44
lcp@320	45	The following simple grammar for arithmetic expressions demonstrates how
lcp@320	46	binding power and associativity of operators can be enforced by priorities.
lcp@320	47	\begin{center}
lcp@320	48	\begin{tabular}{rclr}
lcp@320	49	$A^{(9)}$ & = & {\tt0} \\
lcp@320	50	$A^{(9)}$ & = & {\tt(} $A^{(0)}$ {\tt)} \\
lcp@320	51	$A^{(0)}$ & = & $A^{(0)}$ {\tt+} $A^{(1)}$ \\
lcp@320	52	$A^{(2)}$ & = & $A^{(3)}$ {\tt*} $A^{(2)}$ \\
lcp@320	53	$A^{(3)}$ & = & {\tt-} $A^{(3)}$
lcp@320	54	\end{tabular}
lcp@320	55	\end{center}
lcp@320	56	The choice of priorities determines that {\tt -} binds tighter than {\tt *},
lcp@320	57	which binds tighter than {\tt +}. Furthermore {\tt +} associates to the
lcp@320	58	left and {\tt *} to the right.
lcp@320	59
lcp@320	60	For clarity, grammars obey these conventions:
lcp@320	61	\begin{itemize}
lcp@320	62	\item All priorities must lie between~0 and \ttindex{max_pri}, which is a
lcp@320	63	some fixed integer. Sometimes {\tt max_pri} is written as $\infty$.
lcp@320	64	\item Priority 0 on the right-hand side and priority \ttindex{max_pri} on
lcp@320	65	the left-hand side may be omitted.
lcp@320	66	\item The production $A^{(p)} = \alpha$ is written as $A = \alpha~(p)$; the
lcp@320	67	priority of the left-hand side actually appears in a column on the far
wenzelm@864	68	right.
wenzelm@864	69	\item Alternatives are separated by~$\|$.
lcp@320	70	\item Repetition is indicated by dots~(\dots) in an informal but obvious
lcp@320	71	way.
lcp@320	72	\end{itemize}
lcp@320	73
lcp@320	74	Using these conventions and assuming $\infty=9$, the grammar
lcp@320	75	takes the form
lcp@320	76	\begin{center}
lcp@320	77	\begin{tabular}{rclc}
lcp@320	78	$A$ & = & {\tt0} & \hspace*{4em} \\
lcp@320	79	& $\|$ & {\tt(} $A$ {\tt)} \\
lcp@320	80	& $\|$ & $A$ {\tt+} $A^{(1)}$ & (0) \\
lcp@320	81	& $\|$ & $A^{(3)}$ {\tt*} $A^{(2)}$ & (2) \\
lcp@320	82	& $\|$ & {\tt-} $A^{(3)}$ & (3)
lcp@320	83	\end{tabular}
lcp@320	84	\end{center}
lcp@320	85	\index{priority grammars\|)}
lcp@320	86
lcp@320	87
paulson@4597	88	\begin{figure}\small
lcp@320	89	\begin{center}
lcp@320	90	\begin{tabular}{rclc}
clasohm@711	91	$any$ &=& $prop$ ~~$\|$~~ $logic$ \\\\
wenzelm@864	92	$prop$ &=& {\tt(} $prop$ {\tt)} \\
wenzelm@864	93	&$\|$& $prop^{(4)}$ {\tt::} $type$ & (3) \\
wenzelm@864	94	&$\|$& {\tt PROP} $aprop$ \\
clasohm@711	95	&$\|$& $any^{(3)}$ {\tt ==} $any^{(2)}$ & (2) \\
clasohm@711	96	&$\|$& $any^{(3)}$ {\tt =?=} $any^{(2)}$ & (2) \\
lcp@320	97	&$\|$& $prop^{(2)}$ {\tt ==>} $prop^{(1)}$ & (1) \\
lcp@320	98	&$\|$& {\tt[\|} $prop$ {\tt;} \dots {\tt;} $prop$ {\tt\|]} {\tt==>} $prop^{(1)}$ & (1) \\
wenzelm@864	99	&$\|$& {\tt!!} $idts$ {\tt.} $prop$ & (0) \\
wenzelm@864	100	&$\|$& {\tt OFCLASS} {\tt(} $type$ {\tt,} $logic$ {\tt)} \\\\
wenzelm@4543	101	$aprop$ &=& $id$ ~~$\|$~~ $longid$ ~~$\|$~~ $var$
clasohm@711	102	~~$\|$~~ $logic^{(\infty)}$ {\tt(} $any$ {\tt,} \dots {\tt,} $any$ {\tt)} \\\\
wenzelm@864	103	$logic$ &=& {\tt(} $logic$ {\tt)} \\
wenzelm@864	104	&$\|$& $logic^{(4)}$ {\tt::} $type$ & (3) \\
wenzelm@4543	105	&$\|$& $id$ ~~$\|$~~ $longid$ ~~$\|$~~ $var$
wenzelm@864	106	~~$\|$~~ $logic^{(\infty)}$ {\tt(} $any$ {\tt,} \dots {\tt,} $any$ {\tt)} \\
berghofe@11621	107	&$\|$& {\tt \%} $pttrns$ {\tt.} $any^{(3)}$ & (3) \\
berghofe@11621	108	&$\|$& {\tt TYPE} {\tt(} $type$ {\tt)} \\\\
lcp@320	109	$idts$ &=& $idt$ ~~$\|$~~ $idt^{(1)}$ $idts$ \\\\
lcp@320	110	$idt$ &=& $id$ ~~$\|$~~ {\tt(} $idt$ {\tt)} \\
lcp@320	111	&$\|$& $id$ {\tt ::} $type$ & (0) \\\\
wenzelm@3694	112	$pttrns$ &=& $pttrn$ ~~$\|$~~ $pttrn^{(1)}$ $pttrns$ \\\\
wenzelm@3694	113	$pttrn$ &=& $idt$ \\\\
wenzelm@864	114	$type$ &=& {\tt(} $type$ {\tt)} \\
wenzelm@864	115	&$\|$& $tid$ ~~$\|$~~ $tvar$ ~~$\|$~~ $tid$ {\tt::} $sort$
wenzelm@864	116	~~$\|$~~ $tvar$ {\tt::} $sort$ \\
lcp@320	117	&$\|$& $id$ ~~$\|$~~ $type^{(\infty)}$ $id$
lcp@320	118	~~$\|$~~ {\tt(} $type$ {\tt,} \dots {\tt,} $type$ {\tt)} $id$ \\
wenzelm@4543	119	&$\|$& $longid$ ~~$\|$~~ $type^{(\infty)}$ $longid$
wenzelm@4543	120	~~$\|$~~ {\tt(} $type$ {\tt,} \dots {\tt,} $type$ {\tt)} $longid$ \\
lcp@320	121	&$\|$& $type^{(1)}$ {\tt =>} $type$ & (0) \\
wenzelm@864	122	&$\|$& {\tt[} $type$ {\tt,} \dots {\tt,} $type$ {\tt]} {\tt=>} $type$&(0) \\\\
wenzelm@4543	123	$sort$ &=& $id$ ~~$\|$~~ $longid$ ~~$\|$~~ {\tt\ttlbrace\ttrbrace} ~~$\|$~~
paulson@8136	124	{\tt\ttlbrace} $id$ ~$\|$~ $longid${\tt,}\dots{\tt,} $id$ ~$\|$~$longid$ {\tt\ttrbrace}
lcp@320	125	\end{tabular}
lcp@320	126	\index{*PROP symbol}
lcp@320	127	\index{== symbol}\index{=?= symbol}\index{*==> symbol}
lcp@320	128	\index{:: symbol}\index{=> symbol}
lcp@332	129	\index{sort constraints}
lcp@332	130	%the index command: a percent is permitted, but braces must match!
lcp@320	131	\index{%@{\tt\%} symbol}
lcp@320	132	\index{{}@{\tt\ttlbrace} symbol}\index{{}@{\tt\ttrbrace} symbol}
lcp@320	133	\index{[ symbol}\index{] symbol}
lcp@320	134	\index{*"!"! symbol}
lcp@320	135	\index{*"["\| symbol}
lcp@320	136	\index{*"\|"] symbol}
lcp@320	137	\end{center}
lcp@320	138	\caption{Meta-logic syntax}\label{fig:pure_gram}
lcp@320	139	\end{figure}
lcp@320	140
lcp@320	141
lcp@320	142	\section{The Pure syntax} \label{sec:basic_syntax}
lcp@320	143	\index{syntax!Pure\|(}
lcp@320	144
lcp@320	145	At the root of all object-logics lies the theory \thydx{Pure}. It
lcp@320	146	contains, among many other things, the Pure syntax. An informal account of
wenzelm@864	147	this basic syntax (types, terms and formulae) appears in
lcp@320	148	\iflabelundefined{sec:forward}{{\em Introduction to Isabelle}}%
lcp@320	149	{\S\ref{sec:forward}}. A more precise description using a priority grammar
lcp@320	150	appears in Fig.\ts\ref{fig:pure_gram}. It defines the following
lcp@320	151	nonterminals:
lcp@320	152	\begin{ttdescription}
wenzelm@864	153	\item[\ndxbold{any}] denotes any term.
wenzelm@864	154
clasohm@711	155	\item[\ndxbold{prop}] denotes terms of type {\tt prop}. These are formulae
wenzelm@864	156	of the meta-logic. Note that user constants of result type {\tt prop}
wenzelm@864	157	(i.e.\ $c :: \ldots \To prop$) should always provide concrete syntax.
wenzelm@864	158	Otherwise atomic propositions with head $c$ may be printed incorrectly.
lcp@320	159
wenzelm@864	160	\item[\ndxbold{aprop}] denotes atomic propositions.
wenzelm@864	161
wenzelm@864	162	%% FIXME huh!?
wenzelm@864	163	% These typically
wenzelm@864	164	% include the judgement forms of the object-logic; its definition
wenzelm@864	165	% introduces a meta-level predicate for each judgement form.
lcp@320	166
clasohm@711	167	\item[\ndxbold{logic}] denotes terms whose type belongs to class
wenzelm@864	168	\cldx{logic}, excluding type \tydx{prop}.
lcp@320	169
wenzelm@864	170	\item[\ndxbold{idts}] denotes a list of identifiers, possibly constrained
wenzelm@864	171	by types.
wenzelm@3694	172
wenzelm@3694	173	\item[\ndxbold{pttrn}, \ndxbold{pttrns}] denote patterns for
wenzelm@3694	174	abstraction, cases etc. Initially the same as $idt$ and $idts$,
skalberg@14231	175	these are intended to be augmented by user extensions.
lcp@320	176
lcp@320	177	\item[\ndxbold{type}] denotes types of the meta-logic.
lcp@320	178
wenzelm@864	179	\item[\ndxbold{sort}] denotes meta-level sorts.
lcp@320	180	\end{ttdescription}
lcp@320	181
lcp@320	182	\begin{warn}
lcp@320	183	In {\tt idts}, note that \verb\|x::nat y\| is parsed as \verb\|x::(nat y)\|,
lcp@320	184	treating {\tt y} like a type constructor applied to {\tt nat}. The
lcp@320	185	likely result is an error message. To avoid this interpretation, use
lcp@320	186	parentheses and write \verb\|(x::nat) y\|.
lcp@332	187	\index{type constraints}\index{*:: symbol}
lcp@320	188
lcp@320	189	Similarly, \verb\|x::nat y::nat\| is parsed as \verb\|x::(nat y::nat)\| and
lcp@320	190	yields an error. The correct form is \verb\|(x::nat) (y::nat)\|.
lcp@320	191	\end{warn}
lcp@320	192
nipkow@452	193	\begin{warn}
paulson@3485	194	Type constraints bind very weakly. For example, \verb!x<y::nat! is normally
clasohm@711	195	parsed as \verb!(x<y)::nat!, unless \verb$<$ has priority of 3 or less, in
paulson@3485	196	which case the string is likely to be ambiguous. The correct form is
nipkow@452	197	\verb!x<(y::nat)!.
nipkow@452	198	\end{warn}
lcp@320	199
nipkow@867	200	\subsection{Logical types and default syntax}\label{logical-types}
nipkow@867	201	\index{lambda calc@$\lambda$-calculus}
nipkow@867	202
nipkow@867	203	Isabelle's representation of mathematical languages is based on the
nipkow@867	204	simply typed $\lambda$-calculus. All logical types, namely those of
nipkow@867	205	class \cldx{logic}, are automatically equipped with a basic syntax of
nipkow@867	206	types, identifiers, variables, parentheses, $\lambda$-abstraction and
nipkow@867	207	application.
nipkow@867	208	\begin{warn}
nipkow@867	209	Isabelle combines the syntaxes for all types of class \cldx{logic} by
nipkow@867	210	mapping all those types to the single nonterminal $logic$. Thus all
nipkow@867	211	productions of $logic$, in particular $id$, $var$ etc, become available.
nipkow@867	212	\end{warn}
wenzelm@864	213
wenzelm@864	214
lcp@320	215	\subsection{Lexical matters}
lcp@320	216	The parser does not process input strings directly. It operates on token
lcp@320	217	lists provided by Isabelle's \bfindex{lexer}. There are two kinds of
lcp@320	218	tokens: \bfindex{delimiters} and \bfindex{name tokens}.
lcp@320	219
lcp@320	220	\index{reserved words}
lcp@320	221	Delimiters can be regarded as reserved words of the syntax. You can
lcp@320	222	add new ones when extending theories. In Fig.\ts\ref{fig:pure_gram} they
lcp@320	223	appear in typewriter font, for example {\tt ==}, {\tt =?=} and
lcp@320	224	{\tt PROP}\@.
lcp@320	225
wenzelm@864	226	Name tokens have a predefined syntax. The lexer distinguishes six disjoint
wenzelm@864	227	classes of names: \rmindex{identifiers}, \rmindex{unknowns}, type
wenzelm@864	228	identifiers\index{type identifiers}, type unknowns\index{type unknowns},
paulson@3485	229	\rmindex{numerals}, \rmindex{strings}. They are denoted by \ndxbold{id},
wenzelm@864	230	\ndxbold{var}, \ndxbold{tid}, \ndxbold{tvar}, \ndxbold{xnum}, \ndxbold{xstr},
wenzelm@864	231	respectively. Typical examples are {\tt x}, {\tt ?x7}, {\tt 'a}, {\tt ?'a3},
paulson@3485	232	{\tt \#42}, {\tt ''foo bar''}. Here is the precise syntax:
lcp@320	233	\begin{eqnarray*}
lcp@320	234	id & = & letter~quasiletter^* \\
wenzelm@4543	235	longid & = & id\mbox{\tt .}id~\dots~id \\
lcp@320	236	var & = & \mbox{\tt ?}id ~~\|~~ \mbox{\tt ?}id\mbox{\tt .}nat \\
lcp@320	237	tid & = & \mbox{\tt '}id \\
lcp@320	238	tvar & = & \mbox{\tt ?}tid ~~\|~~
wenzelm@864	239	\mbox{\tt ?}tid\mbox{\tt .}nat \\
wenzelm@5542	240	xnum & = & \mbox{\tt \#}nat ~~\|~~ \mbox{\tt \#-}nat \\
wenzelm@864	241	xstr & = & \mbox{\tt ''\rm text\tt ''} \\[1ex]
kleing@14483	242	letter & = & sletter ~~\|~~ xletter \\
lcp@320	243	digit & = & \mbox{one of {\tt 0}\dots {\tt 9}} \\
lcp@320	244	quasiletter & = & letter ~~\|~~ digit ~~\|~~ \mbox{\tt _} ~~\|~~ \mbox{\tt '} \\
kleing@14483	245	nat & = & digit^+\\[1ex]
kleing@14483	246	sletter & = & \mbox{one of {\tt a}\dots {\tt z} {\tt A}\dots {\tt Z}} \\
kleing@14483	247	xletter & = & {\tt \backslash<} ~ (sletter ~\|~ dletter ~\|~ gletter ~\|~ cletter) ~ {\tt >}\\
kleing@14483	248	dletter & = & \mbox{one of {\tt aa}\dots {\tt zz} {\tt AA}\dots {\tt ZZ}} \\
kleing@14483	249	bletter & = & {\tt bool} ~\|~ {\tt complex} ~\|~ {\tt nat} ~\|~ {\tt rat} ~\|~ {\tt real} ~\|~ {\tt int}\\
kleing@14483	250	gletter & = & {\tt alpha} ~\|~ {\tt beta} ~\|~ {\tt gamma} ~\|~ {\tt delta} ~\|~ {\tt epsilon} ~\|~ {\tt zeta} ~\|~ {\tt eta} ~\|\\
kleing@14483	251	& & {\tt theta} ~\|~ {\tt iota} ~\|~ {\tt kappa} ~\|~ {\tt mu} ~\|~ {\tt nu} ~\|~ {\tt xi} ~\|~ {\tt pi} ~\|~ {\tt rho} ~\|\\
kleing@14483	252	& & {\tt sigma} ~\|~ {\tt tau} ~\|~ {\tt upsilon} ~\|~ {\tt phi} ~\|~ {\tt psi} ~\|~ {\tt omega} ~\|~ {\tt Gamma} ~\|\\
kleing@14483	253	& & {\tt Delta} ~\|~ {\tt Theta} ~\|~ {\tt Lambda} ~\|~ {\tt Xi} ~\|~ {\tt Pi} ~\|~ {\tt Sigma} ~\|~ {\tt Upsilon} ~\|\\
kleing@14483	254	& & {\tt Phi} ~\|~ {\tt Psi} ~\|~ {\tt Omega}\\
kleing@14483	255	cletter & = & {\tt \hat{}\, isup} ~~\|~~ {\tt \hat{}\, isub}
lcp@320	256	\end{eqnarray*}
wenzelm@4543	257	The lexer repeatedly takes the longest prefix of the input string that
wenzelm@4543	258	forms a valid token. A maximal prefix that is both a delimiter and a
wenzelm@4543	259	name is treated as a delimiter. Spaces, tabs, newlines and formfeeds
wenzelm@4543	260	are separators; they never occur within tokens, except those of class
wenzelm@4543	261	$xstr$.
wenzelm@864	262
wenzelm@864	263	\medskip
wenzelm@864	264	Delimiters need not be separated by white space. For example, if {\tt -}
wenzelm@864	265	is a delimiter but {\tt --} is not, then the string {\tt --} is treated as
wenzelm@864	266	two consecutive occurrences of the token~{\tt -}. In contrast, \ML\
wenzelm@864	267	treats {\tt --} as a single symbolic name. The consequence of Isabelle's
wenzelm@864	268	more liberal scheme is that the same string may be parsed in different ways
wenzelm@864	269	after extending the syntax: after adding {\tt --} as a delimiter, the input
wenzelm@864	270	{\tt --} is treated as a single token.
wenzelm@864	271
lcp@320	272	A \ndxbold{var} or \ndxbold{tvar} describes an unknown, which is internally
lcp@320	273	a pair of base name and index (\ML\ type \mltydx{indexname}). These
lcp@320	274	components are either separated by a dot as in {\tt ?x.1} or {\tt ?x7.3} or
lcp@320	275	run together as in {\tt ?x1}. The latter form is possible if the base name
lcp@320	276	does not end with digits. If the index is 0, it may be dropped altogether:
lcp@320	277	{\tt ?x} abbreviates both {\tt ?x0} and {\tt ?x.0}.
lcp@320	278
wenzelm@864	279	Tokens of class $xnum$ or $xstr$ are not used by the meta-logic.
wenzelm@864	280	Object-logics may provide numerals and string constants by adding appropriate
wenzelm@864	281	productions and translation functions.
lcp@320	282
wenzelm@864	283	\medskip
lcp@320	284	Although name tokens are returned from the lexer rather than the parser, it
lcp@320	285	is more logical to regard them as nonterminals. Delimiters, however, are
lcp@320	286	terminals; they are just syntactic sugar and contribute nothing to the
lcp@320	287	abstract syntax tree.
lcp@320	288
lcp@320	289
wenzelm@3108	290	\subsection{*Inspecting the syntax} \label{pg:print_syn}
lcp@320	291	\begin{ttbox}
lcp@320	292	syn_of : theory -> Syntax.syntax
wenzelm@864	293	print_syntax : theory -> unit
lcp@320	294	Syntax.print_syntax : Syntax.syntax -> unit
lcp@320	295	Syntax.print_gram : Syntax.syntax -> unit
lcp@320	296	Syntax.print_trans : Syntax.syntax -> unit
lcp@320	297	\end{ttbox}
lcp@320	298	The abstract type \mltydx{Syntax.syntax} allows manipulation of syntaxes
lcp@320	299	in \ML. You can display values of this type by calling the following
lcp@320	300	functions:
lcp@320	301	\begin{ttdescription}
lcp@320	302	\item[\ttindexbold{syn_of} {\it thy}] returns the syntax of the Isabelle
lcp@320	303	theory~{\it thy} as an \ML\ value.
lcp@320	304
paulson@8136	305	\item[\ttindexbold{print_syntax} $thy$] uses {\tt Syntax.print_syntax}
paulson@8136	306	to display the syntax part of theory $thy$.
wenzelm@864	307
lcp@320	308	\item[\ttindexbold{Syntax.print_syntax} {\it syn}] shows virtually all
lcp@320	309	information contained in the syntax {\it syn}. The displayed output can
lcp@320	310	be large. The following two functions are more selective.
lcp@320	311
lcp@320	312	\item[\ttindexbold{Syntax.print_gram} {\it syn}] shows the grammar part
wenzelm@864	313	of~{\it syn}, namely the lexicon, logical types and productions. These are
lcp@320	314	discussed below.
lcp@320	315
lcp@320	316	\item[\ttindexbold{Syntax.print_trans} {\it syn}] shows the translation
lcp@320	317	part of~{\it syn}, namely the constants, parse/print macros and
lcp@320	318	parse/print translations.
lcp@320	319	\end{ttdescription}
lcp@320	320
wenzelm@12465	321	The output of the above print functions is divided into labelled sections.
wenzelm@12465	322	The grammar is represented by {\tt lexicon}, {\tt logtypes} and {\tt prods}.
wenzelm@12465	323	The rest refers to syntactic translations and macro expansion. Here is an
lcp@320	324	explanation of the various sections.
lcp@320	325	\begin{description}
lcp@320	326	\item[{\tt lexicon}] lists the delimiters used for lexical
wenzelm@864	327	analysis.\index{delimiters}
lcp@320	328
wenzelm@864	329	\item[{\tt logtypes}] lists the types that are regarded the same as {\tt
paulson@3485	330	logic} syntactically. Thus types of object-logics (e.g.\ {\tt nat}, say)
wenzelm@864	331	will be automatically equipped with the standard syntax of
wenzelm@864	332	$\lambda$-calculus.
lcp@320	333
lcp@320	334	\item[{\tt prods}] lists the \rmindex{productions} of the priority grammar.
lcp@320	335	The nonterminal $A^{(n)}$ is rendered in {\sc ascii} as {\tt $A$[$n$]}.
lcp@320	336	Each delimiter is quoted. Some productions are shown with {\tt =>} and
lcp@320	337	an attached string. These strings later become the heads of parse
lcp@320	338	trees; they also play a vital role when terms are printed (see
lcp@320	339	\S\ref{sec:asts}).
lcp@320	340
lcp@320	341	Productions with no strings attached are called {\bf copy
lcp@320	342	productions}\indexbold{productions!copy}. Their right-hand side must
lcp@320	343	have exactly one nonterminal symbol (or name token). The parser does
lcp@320	344	not create a new parse tree node for copy productions, but simply
lcp@320	345	returns the parse tree of the right-hand symbol.
lcp@320	346
lcp@320	347	If the right-hand side consists of a single nonterminal with no
lcp@320	348	delimiters, then the copy production is called a {\bf chain
lcp@320	349	production}. Chain productions act as abbreviations:
lcp@320	350	conceptually, they are removed from the grammar by adding new
lcp@320	351	productions. Priority information attached to chain productions is
lcp@320	352	ignored; only the dummy value $-1$ is displayed.
wenzelm@3108	353
wenzelm@3108	354	\item[\ttindex{print_modes}] lists the alternative print modes
wenzelm@3108	355	provided by this syntax (see \S\ref{sec:prmodes}).
lcp@320	356
lcp@320	357	\item[{\tt consts}, {\tt parse_rules}, {\tt print_rules}]
lcp@320	358	relate to macros (see \S\ref{sec:macros}).
lcp@320	359
lcp@320	360	\item[{\tt parse_ast_translation}, {\tt print_ast_translation}]
lcp@320	361	list sets of constants that invoke translation functions for abstract
lcp@320	362	syntax trees. Section \S\ref{sec:asts} below discusses this obscure
lcp@320	363	matter.\index{constants!for translations}
lcp@320	364
paulson@4597	365	\item[{\tt parse_translation}, {\tt print_translation}] list the sets
lcp@320	366	of constants that invoke translation functions for terms (see
lcp@320	367	\S\ref{sec:tr_funs}).
lcp@320	368	\end{description}
lcp@320	369	\index{syntax!Pure\|)}
lcp@320	370
lcp@320	371
lcp@320	372	\section{Mixfix declarations} \label{sec:mixfix}
wenzelm@864	373	\index{mixfix declarations\|(}
lcp@320	374
lcp@320	375	When defining a theory, you declare new constants by giving their names,
lcp@320	376	their type, and an optional {\bf mixfix annotation}. Mixfix annotations
lcp@320	377	allow you to extend Isabelle's basic $\lambda$-calculus syntax with
lcp@320	378	readable notation. They can express any context-free priority grammar.
lcp@320	379	Isabelle syntax definitions are inspired by \OBJ~\cite{OBJ}; they are more
wenzelm@864	380	general than the priority declarations of \ML\ and Prolog.
lcp@320	381
lcp@320	382	A mixfix annotation defines a production of the priority grammar. It
lcp@320	383	describes the concrete syntax, the translation to abstract syntax, and the
lcp@320	384	pretty printing. Special case annotations provide a simple means of
wenzelm@864	385	specifying infix operators and binders.
lcp@320	386
lcp@320	387	\subsection{The general mixfix form}
lcp@320	388	Here is a detailed account of mixfix declarations. Suppose the following
wenzelm@864	389	line occurs within a {\tt consts} or {\tt syntax} section of a {\tt .thy}
wenzelm@864	390	file:
lcp@320	391	\begin{center}
lcp@320	392	{\tt $c$ ::\ "$\sigma$" ("$template$" $ps$ $p$)}
lcp@320	393	\end{center}
lcp@332	394	This constant declaration and mixfix annotation are interpreted as follows:
lcp@320	395	\begin{itemize}\index{productions}
lcp@320	396	\item The string {\tt $c$} is the name of the constant associated with the
lcp@320	397	production; unless it is a valid identifier, it must be enclosed in
lcp@320	398	quotes. If $c$ is empty (given as~{\tt ""}) then this is a copy
lcp@320	399	production.\index{productions!copy} Otherwise, parsing an instance of the
lcp@320	400	phrase $template$ generates the \AST{} {\tt ("$c$" $a@1$ $\ldots$
lcp@320	401	$a@n$)}, where $a@i$ is the \AST{} generated by parsing the $i$-th
lcp@320	402	argument.
lcp@320	403
wenzelm@864	404	\item The constant $c$, if non-empty, is declared to have type $\sigma$
wenzelm@864	405	({\tt consts} section only).
lcp@320	406
lcp@320	407	\item The string $template$ specifies the right-hand side of
lcp@320	408	the production. It has the form
wenzelm@864	409	\[ w@0 \;_\; w@1 \;_\; \ldots \;_\; w@n, \]
lcp@320	410	where each occurrence of {\tt_} denotes an argument position and
lcp@320	411	the~$w@i$ do not contain~{\tt _}. (If you want a literal~{\tt _} in
lcp@320	412	the concrete syntax, you must escape it as described below.) The $w@i$
wenzelm@864	413	may consist of \rmindex{delimiters}, spaces or
lcp@320	414	\rmindex{pretty printing} annotations (see below).
lcp@320	415
lcp@320	416	\item The type $\sigma$ specifies the production's nonterminal symbols
lcp@320	417	(or name tokens). If $template$ is of the form above then $\sigma$
lcp@320	418	must be a function type with at least~$n$ argument positions, say
lcp@320	419	$\sigma = [\tau@1, \dots, \tau@n] \To \tau$. Nonterminal symbols are
lcp@320	420	derived from the types $\tau@1$, \ldots,~$\tau@n$, $\tau$ as described
wenzelm@864	421	below. Any of these may be function types.
lcp@320	422
lcp@320	423	\item The optional list~$ps$ may contain at most $n$ integers, say {\tt
lcp@320	424	[$p@1$, $\ldots$, $p@m$]}, where $p@i$ is the minimal
lcp@320	425	priority\indexbold{priorities} required of any phrase that may appear
lcp@320	426	as the $i$-th argument. Missing priorities default to~0.
wenzelm@4543	427
wenzelm@4543	428	\item The integer $p$ is the priority of this production. If
wenzelm@4543	429	omitted, it defaults to the maximal priority. Priorities range
wenzelm@4543	430	between 0 and \ttindexbold{max_pri} (= 1000).
lcp@320	431
lcp@320	432	\end{itemize}
lcp@320	433	%
wenzelm@864	434	The resulting production is \[ A^{(p)}= w@0\, A@1^{(p@1)}\, w@1\,
wenzelm@864	435	A@2^{(p@2)}\, \dots\, A@n^{(p@n)}\, w@n \] where $A$ and the $A@i$ are the
wenzelm@864	436	nonterminals corresponding to the types $\tau$ and $\tau@i$ respectively.
wenzelm@864	437	The nonterminal symbol associated with a type $(\ldots)ty$ is {\tt logic}, if
wenzelm@864	438	this is a logical type (namely one of class {\tt logic} excluding {\tt
wenzelm@864	439	prop}). Otherwise it is $ty$ (note that only the outermost type constructor
wenzelm@864	440	is taken into account). Finally, the nonterminal of a type variable is {\tt
wenzelm@864	441	any}.
wenzelm@864	442
wenzelm@911	443	\begin{warn}
wenzelm@864	444	Theories must sometimes declare types for purely syntactic purposes ---
paulson@3485	445	merely playing the role of nonterminals. One example is \tydx{type}, the
wenzelm@864	446	built-in type of types. This is a `type of all types' in the syntactic
wenzelm@864	447	sense only. Do not declare such types under {\tt arities} as belonging to
wenzelm@864	448	class {\tt logic}\index{*logic class}, for that would make them useless as
wenzelm@864	449	separate nonterminal symbols.
wenzelm@864	450	\end{warn}
wenzelm@864	451
wenzelm@864	452	Associating nonterminals with types allows a constant's type to specify
wenzelm@864	453	syntax as well. We can declare the function~$f$ to have type $[\tau@1,
wenzelm@864	454	\ldots, \tau@n]\To \tau$ and, through a mixfix annotation, specify the layout
wenzelm@864	455	of the function's $n$ arguments. The constant's name, in this case~$f$, will
wenzelm@864	456	also serve as the label in the abstract syntax tree.
wenzelm@864	457
wenzelm@864	458	You may also declare mixfix syntax without adding constants to the theory's
wenzelm@864	459	signature, by using a {\tt syntax} section instead of {\tt consts}. Thus a
wenzelm@864	460	production need not map directly to a logical function (this typically
wenzelm@864	461	requires additional syntactic translations, see also
wenzelm@864	462	Chapter~\ref{chap:syntax}).
wenzelm@864	463
wenzelm@864	464
wenzelm@911	465	\medskip
wenzelm@911	466	As a special case of the general mixfix declaration, the form
wenzelm@864	467	\begin{center}
wenzelm@911	468	{\tt $c$ ::\ "$\sigma$" ("$template$")}
wenzelm@864	469	\end{center}
wenzelm@864	470	specifies no priorities. The resulting production puts no priority
wenzelm@864	471	constraints on any of its arguments and has maximal priority itself.
wenzelm@864	472	Omitting priorities in this manner is prone to syntactic ambiguities unless
berghofe@3098	473	the production's right-hand side is fully bracketed, as in
berghofe@3098	474	\verb\|"if _ then _ else _ fi"\|.
lcp@320	475
lcp@320	476	Omitting the mixfix annotation completely, as in {\tt $c$ ::\ "$\sigma$"},
lcp@320	477	is sensible only if~$c$ is an identifier. Otherwise you will be unable to
lcp@320	478	write terms involving~$c$.
lcp@320	479
lcp@320	480
lcp@320	481	\subsection{Example: arithmetic expressions}
lcp@320	482	\index{examples!of mixfix declarations}
wenzelm@864	483	This theory specification contains a {\tt syntax} section with mixfix
lcp@320	484	declarations encoding the priority grammar from
lcp@320	485	\S\ref{sec:priority_grammars}:
lcp@320	486	\begin{ttbox}
wenzelm@3108	487	ExpSyntax = Pure +
lcp@320	488	types
lcp@320	489	exp
wenzelm@864	490	syntax
clasohm@1387	491	"0" :: exp ("0" 9)
clasohm@1387	492	"+" :: [exp, exp] => exp ("_ + _" [0, 1] 0)
clasohm@1387	493	"" :: [exp, exp] => exp ("_ _" [3, 2] 2)
clasohm@1387	494	"-" :: exp => exp ("- _" [3] 3)
lcp@320	495	end
lcp@320	496	\end{ttbox}
wenzelm@864	497	Executing {\tt Syntax.print_gram} reveals the productions derived from the
wenzelm@864	498	above mixfix declarations (lots of additional information deleted):
lcp@320	499	\begin{ttbox}
wenzelm@3108	500	Syntax.print_gram (syn_of ExpSyntax.thy);
lcp@320	501	{\out exp = "0" => "0" (9)}
lcp@320	502	{\out exp = exp[0] "+" exp[1] => "+" (0)}
lcp@320	503	{\out exp = exp[3] "" exp[2] => "" (2)}
lcp@320	504	{\out exp = "-" exp[3] => "-" (3)}
lcp@320	505	\end{ttbox}
lcp@320	506
wenzelm@3108	507	Note that because {\tt exp} is not of class {\tt logic}, it has been
paulson@3485	508	retained as a separate nonterminal. This also entails that the syntax
wenzelm@3108	509	does not provide for identifiers or paranthesized expressions.
wenzelm@3108	510	Normally you would also want to add the declaration {\tt arities
wenzelm@3108	511	exp::logic} after {\tt types} and use {\tt consts} instead of {\tt
paulson@3485	512	syntax}. Try this as an exercise and study the changes in the
nipkow@867	513	grammar.
lcp@320	514
lcp@320	515	\subsection{The mixfix template}
wenzelm@864	516	Let us now take a closer look at the string $template$ appearing in mixfix
lcp@320	517	annotations. This string specifies a list of parsing and printing
lcp@320	518	directives: delimiters\index{delimiters}, arguments, spaces, blocks of
lcp@320	519	indentation and line breaks. These are encoded by the following character
lcp@320	520	sequences:
lcp@320	521	\index{pretty printing\|(}
lcp@320	522	\begin{description}
lcp@320	523	\item[~$d$~] is a delimiter, namely a non-empty sequence of characters
lcp@320	524	other than the special characters {\tt _}, {\tt(}, {\tt)} and~{\tt/}.
lcp@320	525	Even these characters may appear if escaped; this means preceding it with
lcp@320	526	a~{\tt '} (single quote). Thus you have to write {\tt ''} if you really
wenzelm@911	527	want a single quote. Furthermore, a~{\tt '} followed by a space separates
wenzelm@911	528	delimiters without extra white space being added for printing.
lcp@320	529
lcp@320	530	\item[~{\tt_}~] is an argument position, which stands for a nonterminal symbol
lcp@320	531	or name token.
lcp@320	532
lcp@320	533	\item[~$s$~] is a non-empty sequence of spaces for printing. This and the
lcp@320	534	following specifications do not affect parsing at all.
lcp@320	535
lcp@320	536	\item[~{\tt(}$n$~] opens a pretty printing block. The optional number $n$
lcp@320	537	specifies how much indentation to add when a line break occurs within the
lcp@320	538	block. If {\tt(} is not followed by digits, the indentation defaults
lcp@320	539	to~0.
lcp@320	540
lcp@320	541	\item[~{\tt)}~] closes a pretty printing block.
lcp@320	542
lcp@320	543	\item[~{\tt//}~] forces a line break.
lcp@320	544
lcp@320	545	\item[~{\tt/}$s$~] allows a line break. Here $s$ stands for the string of
lcp@320	546	spaces (zero or more) right after the {\tt /} character. These spaces
lcp@320	547	are printed if the break is not taken.
lcp@320	548	\end{description}
lcp@320	549	For example, the template {\tt"(_ +/ _)"} specifies an infix operator.
lcp@320	550	There are two argument positions; the delimiter~{\tt+} is preceded by a
lcp@320	551	space and followed by a space or line break; the entire phrase is a pretty
lcp@320	552	printing block. Other examples appear in Fig.\ts\ref{fig:set_trans} below.
lcp@320	553	Isabelle's pretty printer resembles the one described in
paulson@6592	554	Paulson~\cite{paulson-ml2}.
lcp@320	555
lcp@320	556	\index{pretty printing\|)}
lcp@320	557
lcp@320	558
lcp@320	559	\subsection{Infixes}
lcp@320	560	\indexbold{infixes}
lcp@320	561
wenzelm@3108	562	Infix operators associating to the left or right can be declared using
wenzelm@3108	563	{\tt infixl} or {\tt infixr}. Basically, the form {\tt $c$ ::\
wenzelm@3108	564	$\sigma$ (infixl $p$)} abbreviates the mixfix declarations
lcp@320	565	\begin{ttbox}
clasohm@1387	566	"op $c$" :: $\sigma$ ("(_ $c$/ _)" [$p$, $p+1$] $p$)
clasohm@1387	567	"op $c$" :: $\sigma$ ("op $c$")
lcp@320	568	\end{ttbox}
clasohm@1387	569	and {\tt $c$ ::\ $\sigma$ (infixr $p$)} abbreviates the mixfix declarations
lcp@320	570	\begin{ttbox}
clasohm@1387	571	"op $c$" :: $\sigma$ ("(_ $c$/ _)" [$p+1$, $p$] $p$)
clasohm@1387	572	"op $c$" :: $\sigma$ ("op $c$")
lcp@320	573	\end{ttbox}
lcp@320	574	The infix operator is declared as a constant with the prefix {\tt op}.
lcp@320	575	Thus, prefixing infixes with \sdx{op} makes them behave like ordinary
lcp@320	576	function symbols, as in \ML. Special characters occurring in~$c$ must be
lcp@320	577	escaped, as in delimiters, using a single quote.
lcp@320	578
wenzelm@3108	579	A slightly more general form of infix declarations allows constant
wenzelm@3108	580	names to be independent from their concrete syntax, namely \texttt{$c$
paulson@3485	581	::\ $\sigma$\ (infixl "$sy$" $p$)}, the same for \texttt{infixr}. As
wenzelm@3108	582	an example consider:
wenzelm@3108	583	\begin{ttbox}
wenzelm@3108	584	and :: [bool, bool] => bool (infixr "&" 35)
wenzelm@3108	585	\end{ttbox}
wenzelm@3108	586	The internal constant name will then be just \texttt{and}, without any
wenzelm@3108	587	\texttt{op} prefixed.
wenzelm@3108	588
lcp@320	589
lcp@320	590	\subsection{Binders}
lcp@320	591	\indexbold{binders}
lcp@320	592	\begingroup
lcp@320	593	\def\Q{{\cal Q}}
lcp@320	594	A {\bf binder} is a variable-binding construct such as a quantifier. The
lcp@320	595	constant declaration
lcp@320	596	\begin{ttbox}
clasohm@1387	597	$c$ :: $\sigma$ (binder "$\Q$" [$pb$] $p$)
lcp@320	598	\end{ttbox}
lcp@320	599	introduces a constant~$c$ of type~$\sigma$, which must have the form
lcp@320	600	$(\tau@1 \To \tau@2) \To \tau@3$. Its concrete syntax is $\Q~x.P$, where
lcp@320	601	$x$ is a bound variable of type~$\tau@1$, the body~$P$ has type $\tau@2$
paulson@3485	602	and the whole term has type~$\tau@3$. The optional integer $pb$
lcp@1060	603	specifies the body's priority, by default~$p$. Special characters
clasohm@877	604	in $\Q$ must be escaped using a single quote.
lcp@320	605
wenzelm@864	606	The declaration is expanded internally to something like
lcp@320	607	\begin{ttbox}
berghofe@3098	608	$c$\hskip3pt :: ($\tau@1$ => $\tau@2$) => $\tau@3$
berghofe@3098	609	"$\Q$" :: [idts, $\tau@2$] => $\tau@3$ ("(3$\Q$_./ _)" [0, $pb$] $p$)
lcp@320	610	\end{ttbox}
lcp@320	611	Here \ndx{idts} is the nonterminal symbol for a list of identifiers with
lcp@332	612	\index{type constraints}
lcp@320	613	optional type constraints (see Fig.\ts\ref{fig:pure_gram}). The
lcp@320	614	declaration also installs a parse translation\index{translations!parse}
lcp@320	615	for~$\Q$ and a print translation\index{translations!print} for~$c$ to
lcp@320	616	translate between the internal and external forms.
lcp@320	617
lcp@320	618	A binder of type $(\sigma \To \tau) \To \tau$ can be nested by giving a
lcp@320	619	list of variables. The external form $\Q~x@1~x@2 \ldots x@n. P$
lcp@320	620	corresponds to the internal form
lcp@320	621	\[ c(\lambda x@1. c(\lambda x@2. \ldots c(\lambda x@n. P) \ldots)). \]
lcp@320	622
lcp@320	623	\medskip
lcp@320	624	For example, let us declare the quantifier~$\forall$:\index{quantifiers}
lcp@320	625	\begin{ttbox}
clasohm@1387	626	All :: ('a => o) => o (binder "ALL " 10)
lcp@320	627	\end{ttbox}
lcp@320	628	This lets us write $\forall x.P$ as either {\tt All(\%$x$.$P$)} or {\tt ALL
lcp@320	629	$x$.$P$}. When printing, Isabelle prefers the latter form, but must fall
lcp@320	630	back on ${\tt All}(P)$ if $P$ is not an abstraction. Both $P$ and {\tt ALL
lcp@320	631	$x$.$P$} have type~$o$, the type of formulae, while the bound variable
lcp@320	632	can be polymorphic.
lcp@320	633	\endgroup
lcp@320	634
lcp@320	635	\index{mixfix declarations\|)}
lcp@320	636
wenzelm@3108	637
wenzelm@3108	638	\section{*Alternative print modes} \label{sec:prmodes}
wenzelm@3108	639	\index{print modes\|(}
wenzelm@3108	640	%
paulson@3485	641	Isabelle's pretty printer supports alternative output syntaxes. These
paulson@3485	642	may be used independently or in cooperation. The currently active
wenzelm@3108	643	print modes (with precedence from left to right) are determined by a
wenzelm@3108	644	reference variable.
wenzelm@3108	645	\begin{ttbox}\index{*print_mode}
wenzelm@3108	646	print_mode: string list ref
wenzelm@3108	647	\end{ttbox}
wenzelm@3108	648	Initially this may already contain some print mode identifiers,
wenzelm@3108	649	depending on how Isabelle has been invoked (e.g.\ by some user
paulson@3485	650	interface). So changes should be incremental --- adding or deleting
wenzelm@3108	651	modes relative to the current value.
wenzelm@3108	652
wenzelm@12465	653	Any \ML{} string is a legal print mode identifier, without any predeclaration
wenzelm@12465	654	required. The following names should be considered reserved, though:
wenzelm@12465	655	\texttt{""} (the empty string), \texttt{symbols}, \texttt{xsymbols}, and
wenzelm@12465	656	\texttt{latex}.
wenzelm@3108	657
wenzelm@3108	658	There is a separate table of mixfix productions for pretty printing
paulson@3485	659	associated with each print mode. The currently active ones are
wenzelm@3108	660	conceptually just concatenated from left to right, with the standard
wenzelm@3108	661	syntax output table always coming last as default. Thus mixfix
wenzelm@3108	662	productions of preceding modes in the list may override those of later
wenzelm@3108	663	ones. Also note that token translations are always relative to some
wenzelm@3108	664	print mode (see \S\ref{sec:tok_tr}).
wenzelm@3108	665
wenzelm@3108	666	\medskip The canonical application of print modes is optional printing
wenzelm@3108	667	of mathematical symbols from a special screen font instead of {\sc
paulson@3485	668	ascii}. Another example is to re-use Isabelle's advanced
wenzelm@3108	669	$\lambda$-term printing mechanisms to generate completely different
wenzelm@3228	670	output, say for interfacing external tools like \rmindex{model
wenzelm@3228	671	checkers} (see also \texttt{HOL/Modelcheck}).
wenzelm@3108	672
wenzelm@3108	673	\index{print modes\|)}
wenzelm@3108	674
wenzelm@3108	675
clasohm@711	676	\section{Ambiguity of parsed expressions} \label{sec:ambiguity}
clasohm@711	677	\index{ambiguity!of parsed expressions}
clasohm@711	678
clasohm@711	679	To keep the grammar small and allow common productions to be shared
wenzelm@864	680	all logical types (except {\tt prop}) are internally represented
paulson@3485	681	by one nonterminal, namely {\tt logic}. This and omitted or too freely
clasohm@711	682	chosen priorities may lead to ways of parsing an expression that were
paulson@3485	683	not intended by the theory's maker. In most cases Isabelle is able to
wenzelm@864	684	select one of multiple parse trees that an expression has lead
paulson@3485	685	to by checking which of them can be typed correctly. But this may not
clasohm@711	686	work in every case and always slows down parsing.
wenzelm@864	687	The warning and error messages that can be produced during this process are
clasohm@711	688	as follows:
clasohm@711	689
clasohm@880	690	If an ambiguity can be resolved by type inference the following
clasohm@880	691	warning is shown to remind the user that parsing is (unnecessarily)
paulson@3485	692	slowed down. In cases where it's not easily possible to eliminate the
clasohm@880	693	ambiguity the frequency of the warning can be controlled by changing
clasohm@883	694	the value of {\tt Syntax.ambiguity_level} which has type {\tt int
paulson@3485	695	ref}. Its default value is 1 and by increasing it one can control how
clasohm@883	696	many parse trees are necessary to generate the warning.
clasohm@711	697
clasohm@711	698	\begin{ttbox}
wenzelm@3801	699	{\out Ambiguous input "\dots"}
clasohm@711	700	{\out produces the following parse trees:}
wenzelm@3801	701	{\out \dots}
clasohm@711	702	{\out Fortunately, only one parse tree is type correct.}
wenzelm@3801	703	{\out You may still want to disambiguate your grammar or your input.}
clasohm@711	704	\end{ttbox}
clasohm@711	705
clasohm@711	706	The following message is normally caused by using the same
clasohm@711	707	syntax in two different productions:
clasohm@711	708
clasohm@711	709	\begin{ttbox}
wenzelm@3802	710	{\out Ambiguous input "..."}
clasohm@711	711	{\out produces the following parse trees:}
wenzelm@3802	712	{\out \dots}
wenzelm@3802	713	{\out More than one term is type correct:}
wenzelm@3802	714	{\out \dots}
clasohm@711	715	\end{ttbox}
clasohm@711	716
clasohm@866	717	Ambiguities occuring in syntax translation rules cannot be resolved by
clasohm@866	718	type inference because it is not necessary for these rules to be type
paulson@3485	719	correct. Therefore Isabelle always generates an error message and the
clasohm@866	720	ambiguity should be eliminated by changing the grammar or the rule.
clasohm@711	721
lcp@320	722
lcp@320	723	\section{Example: some minimal logics} \label{sec:min_logics}
lcp@320	724	\index{examples!of logic definitions}
lcp@320	725
lcp@320	726	This section presents some examples that have a simple syntax. They
lcp@320	727	demonstrate how to define new object-logics from scratch.
lcp@320	728
clasohm@711	729	First we must define how an object-logic syntax is embedded into the
wenzelm@864	730	meta-logic. Since all theorems must conform to the syntax for~\ndx{prop}
wenzelm@864	731	(see Fig.\ts\ref{fig:pure_gram}), that syntax has to be extended with the
lcp@320	732	object-level syntax. Assume that the syntax of your object-logic defines a
wenzelm@864	733	meta-type~\tydx{o} of formulae which refers to the nonterminal {\tt logic}.
wenzelm@864	734	These formulae can now appear in axioms and theorems wherever \ndx{prop} does
wenzelm@864	735	if you add the production
wenzelm@864	736	\[ prop ~=~ logic. \]
wenzelm@864	737	This is not supposed to be a copy production but an implicit coercion from
wenzelm@864	738	formulae to propositions:
lcp@320	739	\begin{ttbox}
lcp@320	740	Base = Pure +
lcp@320	741	types
lcp@320	742	o
lcp@320	743	arities
lcp@320	744	o :: logic
lcp@320	745	consts
clasohm@1387	746	Trueprop :: o => prop ("_" 5)
lcp@320	747	end
lcp@320	748	\end{ttbox}
lcp@320	749	The constant \cdx{Trueprop} (the name is arbitrary) acts as an invisible
lcp@332	750	coercion function. Assuming this definition resides in a file {\tt Base.thy},
lcp@320	751	you have to load it with the command {\tt use_thy "Base"}.
lcp@320	752
lcp@320	753	One of the simplest nontrivial logics is {\bf minimal logic} of
lcp@320	754	implication. Its definition in Isabelle needs no advanced features but
lcp@320	755	illustrates the overall mechanism nicely:
lcp@320	756	\begin{ttbox}
lcp@320	757	Hilbert = Base +
lcp@320	758	consts
clasohm@1387	759	"-->" :: [o, o] => o (infixr 10)
lcp@320	760	rules
lcp@320	761	K "P --> Q --> P"
lcp@320	762	S "(P --> Q --> R) --> (P --> Q) --> P --> R"
lcp@320	763	MP "[\| P --> Q; P \|] ==> Q"
lcp@320	764	end
lcp@320	765	\end{ttbox}
lcp@332	766	After loading this definition from the file {\tt Hilbert.thy}, you can
lcp@320	767	start to prove theorems in the logic:
lcp@320	768	\begin{ttbox}
paulson@5205	769	Goal "P --> P";
lcp@320	770	{\out Level 0}
lcp@320	771	{\out P --> P}
lcp@320	772	{\out 1. P --> P}
lcp@320	773	\ttbreak
lcp@320	774	by (resolve_tac [Hilbert.MP] 1);
lcp@320	775	{\out Level 1}
lcp@320	776	{\out P --> P}
lcp@320	777	{\out 1. ?P --> P --> P}
lcp@320	778	{\out 2. ?P}
lcp@320	779	\ttbreak
lcp@320	780	by (resolve_tac [Hilbert.MP] 1);
lcp@320	781	{\out Level 2}
lcp@320	782	{\out P --> P}
lcp@320	783	{\out 1. ?P1 --> ?P --> P --> P}
lcp@320	784	{\out 2. ?P1}
lcp@320	785	{\out 3. ?P}
lcp@320	786	\ttbreak
lcp@320	787	by (resolve_tac [Hilbert.S] 1);
lcp@320	788	{\out Level 3}
lcp@320	789	{\out P --> P}
lcp@320	790	{\out 1. P --> ?Q2 --> P}
lcp@320	791	{\out 2. P --> ?Q2}
lcp@320	792	\ttbreak
lcp@320	793	by (resolve_tac [Hilbert.K] 1);
lcp@320	794	{\out Level 4}
lcp@320	795	{\out P --> P}
lcp@320	796	{\out 1. P --> ?Q2}
lcp@320	797	\ttbreak
lcp@320	798	by (resolve_tac [Hilbert.K] 1);
lcp@320	799	{\out Level 5}
lcp@320	800	{\out P --> P}
lcp@320	801	{\out No subgoals!}
lcp@320	802	\end{ttbox}
lcp@320	803	As we can see, this Hilbert-style formulation of minimal logic is easy to
lcp@320	804	define but difficult to use. The following natural deduction formulation is
lcp@320	805	better:
lcp@320	806	\begin{ttbox}
lcp@320	807	MinI = Base +
lcp@320	808	consts
clasohm@1387	809	"-->" :: [o, o] => o (infixr 10)
lcp@320	810	rules
lcp@320	811	impI "(P ==> Q) ==> P --> Q"
lcp@320	812	impE "[\| P --> Q; P \|] ==> Q"
lcp@320	813	end
lcp@320	814	\end{ttbox}
lcp@320	815	Note, however, that although the two systems are equivalent, this fact
lcp@320	816	cannot be proved within Isabelle. Axioms {\tt S} and {\tt K} can be
lcp@320	817	derived in {\tt MinI} (exercise!), but {\tt impI} cannot be derived in {\tt
lcp@320	818	Hilbert}. The reason is that {\tt impI} is only an {\bf admissible} rule
lcp@320	819	in {\tt Hilbert}, something that can only be shown by induction over all
lcp@320	820	possible proofs in {\tt Hilbert}.
lcp@320	821
lcp@320	822	We may easily extend minimal logic with falsity:
lcp@320	823	\begin{ttbox}
lcp@320	824	MinIF = MinI +
lcp@320	825	consts
clasohm@1387	826	False :: o
lcp@320	827	rules
lcp@320	828	FalseE "False ==> P"
lcp@320	829	end
lcp@320	830	\end{ttbox}
lcp@320	831	On the other hand, we may wish to introduce conjunction only:
lcp@320	832	\begin{ttbox}
lcp@320	833	MinC = Base +
lcp@320	834	consts
clasohm@1387	835	"&" :: [o, o] => o (infixr 30)
lcp@320	836	\ttbreak
lcp@320	837	rules
lcp@320	838	conjI "[\| P; Q \|] ==> P & Q"
lcp@320	839	conjE1 "P & Q ==> P"
lcp@320	840	conjE2 "P & Q ==> Q"
lcp@320	841	end
lcp@320	842	\end{ttbox}
lcp@320	843	And if we want to have all three connectives together, we create and load a
wenzelm@3108	844	theory file consisting of a single line:
lcp@320	845	\begin{ttbox}
lcp@320	846	MinIFC = MinIF + MinC
lcp@320	847	\end{ttbox}
lcp@320	848	Now we can prove mixed theorems like
lcp@320	849	\begin{ttbox}
paulson@5205	850	Goal "P & False --> Q";
lcp@320	851	by (resolve_tac [MinI.impI] 1);
lcp@320	852	by (dresolve_tac [MinC.conjE2] 1);
lcp@320	853	by (eresolve_tac [MinIF.FalseE] 1);
lcp@320	854	\end{ttbox}
lcp@320	855	Try this as an exercise!
wenzelm@5371	856
wenzelm@5371	857
wenzelm@5371	858	%%% Local Variables:
wenzelm@5371	859	%%% mode: latex
wenzelm@5371	860	%%% TeX-master: "ref"
wenzelm@5371	861	%%% End:

author	wenzelm
	Wed, 09 Jun 2004 18:50:38 +0200
changeset 14893	55e83c32cdec
parent 14483	6eac487f9cfa
child 14948	aa6d54648b32
permissions	-rw-r--r--