wneuper/isa: doc-src/IsarRef/syntax.tex@b4dcc32310fb (annotated)

wenzelm@7046	1
wenzelm@7046	2	\chapter{Isar document syntax}
wenzelm@7046	3
wenzelm@7315	4	We give a complete reference of all basic syntactic entities underlying the
wenzelm@7315	5	the Isabelle/Isar document syntax. This chapter will not introduce any actual
wenzelm@7315	6	theory and proof commands, though (cf.\ chapter~\ref{ch:pure-syntax} and
wenzelm@7315	7	later).
wenzelm@7134	8
wenzelm@7315	9	\medskip
wenzelm@7046	10
wenzelm@7315	11	In order to get started with writing well-formed Isabelle/Isar documents, the
wenzelm@7315	12	most important aspect to be noted is the difference of \emph{inner} versus
wenzelm@7315	13	\emph{outer} syntax. Inner syntax is that of Isabelle types and terms of the
wenzelm@7315	14	logic, while outer syntax is that of Isabelle/Isar theories (and proofs). As
wenzelm@7315	15	a general rule, inner syntax entities may occur only as \emph{atomic entities}
wenzelm@7319	16	within outer syntax. Thus, string \texttt{"x + y"} and identifier \texttt{z}
wenzelm@7319	17	are legal term specifications, while \texttt{x + y} is not.
wenzelm@7315	18
wenzelm@7315	19	\begin{warn}
wenzelm@7315	20	Note that old-style Isabelle theories used to fake parts of the inner type
wenzelm@7315	21	syntax, with complicated rules when quotes may be omitted. Despite the
wenzelm@7315	22	minor drawback of requiring quotes more often, Isabelle/Isar is simpler and
wenzelm@7315	23	more robust in that respect.
wenzelm@7315	24	\end{warn}
wenzelm@7315	25
wenzelm@7315	26
wenzelm@7315	27	\section{Lexical matters}\label{sec:lex-syntax}
wenzelm@7315	28
wenzelm@7315	29	The Isabelle/Isar outer syntax provides token classes as presented below.
wenzelm@7315	30	Note that some of these coincide (by full intention) with inner lexical syntax
wenzelm@7315	31	as given in \cite{isabelle-ref}. These different levels of syntax should not
wenzelm@7315	32	be confused, though.
wenzelm@7315	33
wenzelm@7315	34	\begin{matharray}{rcl}
wenzelm@7315	35	ident & = & letter~quasiletter^* \\
wenzelm@7315	36	longident & = & ident\verb,.,ident~\dots~ident \\
wenzelm@7315	37	symident & = & sym^+ \\
wenzelm@7315	38	nat & = & digit^+ \\
wenzelm@7315	39	var & = & \verb,?,ident ~\|~ \verb,?,ident\verb,.,nat \\
wenzelm@7315	40	textvar & = & \verb,??,ident \\
wenzelm@7315	41	typefree & = & \verb,',ident \\
wenzelm@7315	42	typevar & = & \verb,?,typefree ~\|~ \verb,?,typefree\verb,.,nat \\
wenzelm@7315	43	string & = & \verb,", ~\dots~ \verb,", \\
wenzelm@7319	44	verbatim & = & \verb,{, ~\dots~ \verb,}, \\
wenzelm@7319	45	\end{matharray}
wenzelm@7319	46	\begin{matharray}{rcl}
wenzelm@7315	47	letter & = & \verb,a, ~\|~ \dots ~\|~ \verb,z, ~\|~ \verb,A, ~\|~ \dots ~\|~ \verb,Z, \\
wenzelm@7315	48	digit & = & \verb,0, ~\|~ \dots ~\|~ \verb,9, \\
wenzelm@7315	49	quasiletter & = & letter ~\|~ digit ~\|~ \verb,_, ~\|~ \verb,', \\
wenzelm@7315	50	sym & = & \verb,!, ~\|~ \verb,#, ~\|~ \verb,$, ~\|~ \verb,%, ~\|~ \verb,&, ~\|~ %$
wenzelm@7319	51	\verb,*, ~\|~ \verb,+, ~\|~ \verb,-, ~\|~ \verb,/, ~\|~ \verb,:, ~\|~
wenzelm@7319	52	\verb,<, ~\|~ \verb,=, ~\|~ \verb,>, ~\|~ \verb,?, ~\|~ \mathtt{\at} ~\|~ \\
wenzelm@7319	53	& & \verb,^, ~\|~ \verb,_, ~\|~ \verb,`, ~\|~ \verb,\|, ~\|~ \verb,~, \\
wenzelm@7315	54	\end{matharray}
wenzelm@7315	55
wenzelm@7315	56	The syntax of \texttt{string} admits any characters, including newlines;
wenzelm@7315	57	\verb\|"\| and \verb\|\\| have to be escaped by a backslash, though. Note that
wenzelm@7315	58	ML-style control character notation is not supported. The body of
wenzelm@7315	59	\texttt{verbatim} may consist of any text not containing \verb\|*}\|.
wenzelm@7315	60
wenzelm@7315	61	Comments take the form \texttt{(~\dots~)} and may be nested, just as in ML.
wenzelm@7315	62	Note that these are \emph{source} comments only, which are stripped after
wenzelm@7315	63	lexical analysis of the input. The Isar document syntax also provides several
wenzelm@7315	64	elements of \emph{formal comments} that are actually part of the text (see
wenzelm@7315	65	\S\ref{sec:comments}, \S\ref{sec:formal-cmt-thy}, \S\ref{sec:formal-cmt-prf}).
wenzelm@7315	66
wenzelm@7046	67
wenzelm@7046	68	\section{Common syntax entities}
wenzelm@7046	69
wenzelm@7050	70	The Isar proof and theory language syntax has been carefully designed with
wenzelm@7134	71	orthogonality in mind. Subsequently, we introduce several basic syntactic
wenzelm@7134	72	entities, such as names, terms, theorem specifications, which have been
wenzelm@7134	73	factored out of the actual Isar language elements described later.
wenzelm@7134	74
wenzelm@7167	75	Note that some of the basic syntactic entities introduced below act much like
wenzelm@7167	76	tokens rather than nonterminals, in particular for the sake of error messages.
wenzelm@7134	77	E.g.\ syntax elements such as $\CONSTS$ referring to \railqtoken{name} or
wenzelm@7167	78	\railqtoken{type} would really report a missing name or type rather than any
wenzelm@7167	79	of the constituent primitive tokens such as \railtoken{ident} or
wenzelm@7167	80	\railtoken{string}.
wenzelm@7046	81
wenzelm@7050	82
wenzelm@7050	83	\subsection{Names}
wenzelm@7050	84
wenzelm@7134	85	Entity \railqtoken{name} usually refers to any name of types, constants,
wenzelm@7167	86	theorems etc.\ that are to be \emph{declared} or \emph{defined} (so qualified
wenzelm@7134	87	identifiers are excluded). Quoted strings provide an escape for
wenzelm@7134	88	non-identifier names or those ruled out by outer syntax keywords (e.g.\
wenzelm@7134	89	\verb\|"let"\|). Already existing objects are usually referenced by
wenzelm@7134	90	\railqtoken{nameref}.
wenzelm@7050	91
wenzelm@7141	92	\indexoutertoken{name}\indexoutertoken{parname}\indexoutertoken{nameref}
wenzelm@7046	93	\begin{rail}
wenzelm@7167	94	name: ident \| symident \| string
wenzelm@7046	95	;
wenzelm@7167	96	parname: '(' name ')'
wenzelm@7141	97	;
wenzelm@7167	98	nameref: name \| longident
wenzelm@7046	99	;
wenzelm@7046	100	\end{rail}
wenzelm@7046	101
wenzelm@7050	102
wenzelm@7315	103	\subsection{Comments}\label{sec:comments}
wenzelm@7046	104
wenzelm@7167	105	Large chunks of plain \railqtoken{text} are usually given
wenzelm@7167	106	\railtoken{verbatim}, i.e.\ enclosed in \verb\|{\|\dots\verb\|}\|. For
wenzelm@7175	107	convenience, any of the smaller text units conforming to \railqtoken{nameref}
wenzelm@7175	108	are admitted as well. Almost any of the Isar commands may be annotated by
wenzelm@7175	109	some marginal \railnonterm{comment} of the form \texttt{--} \railqtoken{text}.
wenzelm@7167	110	Note that this kind of comment is actually part of the language, while source
wenzelm@7167	111	level comments \verb\|(\|\dots\verb\|)\| are already stripped at the lexical
wenzelm@7167	112	level. A few commands such as $\PROOFNAME$ admit additional markup with a
wenzelm@7167	113	``level of interest'': \texttt{\%} followed by an optional number $n$ (default
wenzelm@7167	114	$n = 1$) indicates that the respective part of the document becomes $n$ levels
wenzelm@7315	115	more obscure; \texttt{\%\%} means that interest drops by $\infty$ ---
wenzelm@7315	116	\emph{abandon every hope, who enter here}.
wenzelm@7050	117
wenzelm@7050	118	\indexoutertoken{text}\indexouternonterm{comment}\indexouternonterm{interest}
wenzelm@7046	119	\begin{rail}
wenzelm@7167	120	text: verbatim \| nameref
wenzelm@7050	121	;
wenzelm@7167	122	comment: '--' text
wenzelm@7046	123	;
wenzelm@7167	124	interest: percent nat? \| ppercent
wenzelm@7046	125	;
wenzelm@7046	126	\end{rail}
wenzelm@7046	127
wenzelm@7046	128
wenzelm@7175	129	\subsection{Type classes, Sorts and arities}
wenzelm@7046	130
wenzelm@7050	131	The syntax of sorts and arities is given directly at the outer level. Note
wenzelm@7167	132	that this is in contrast to that types and terms (see \ref{sec:types-terms}).
wenzelm@7050	133
wenzelm@7050	134	\indexouternonterm{sort}\indexouternonterm{arity}\indexouternonterm{simplearity}
wenzelm@7135	135	\indexouternonterm{classdecl}
wenzelm@7046	136	\begin{rail}
wenzelm@7321	137	classdecl: name ('<' (nameref + ','))?
wenzelm@7046	138	;
wenzelm@7167	139	sort: nameref \| lbrace (nameref * ',') rbrace
wenzelm@7046	140	;
wenzelm@7167	141	arity: ('(' (sort + ',') ')')? sort
wenzelm@7046	142	;
wenzelm@7167	143	simplearity: ('(' (sort + ',') ')')? nameref
wenzelm@7167	144	;
wenzelm@7046	145	\end{rail}
wenzelm@7046	146
wenzelm@7046	147
wenzelm@7167	148	\subsection{Types and terms}\label{sec:types-terms}
wenzelm@7046	149
wenzelm@7167	150	The actual inner Isabelle syntax, that of types and terms of the logic, is far
wenzelm@7315	151	too flexible in order to be modelled explicitly at the outer theory level.
wenzelm@7167	152	Basically, any such entity has to be quoted at the outer level to turn it into
wenzelm@7175	153	a single token (the parsing and type-checking is performed later). For
wenzelm@7175	154	convenience, a slightly more liberal convention is adopted: quotes may be
wenzelm@7175	155	omitted for any type or term that is already \emph{atomic at the outer level}.
wenzelm@7175	156	E.g.\ one may write just \texttt{x} instead of \texttt{"x"}.
wenzelm@7050	157
wenzelm@7050	158	\indexoutertoken{type}\indexoutertoken{term}\indexoutertoken{prop}
wenzelm@7046	159	\begin{rail}
wenzelm@7167	160	type: nameref \| typefree \| typevar
wenzelm@7050	161	;
wenzelm@7167	162	term: nameref \| var \| textvar \| nat
wenzelm@7050	163	;
wenzelm@7167	164	prop: term
wenzelm@7050	165	;
wenzelm@7046	166	\end{rail}
wenzelm@7046	167
wenzelm@7167	168	Type declarations and definitions usually refer to \railnonterm{typespec} on
wenzelm@7167	169	the left-hand side. This models basic type constructor application at the
wenzelm@7167	170	outer syntax level. Note that only plain postfix notation is available here,
wenzelm@7167	171	but no infixes.
wenzelm@7050	172
wenzelm@7050	173	\indexouternonterm{typespec}
wenzelm@7050	174	\begin{rail}
wenzelm@7167	175	typespec: (() \| typefree \| '(' ( typefree + ',' ) ')') name
wenzelm@7050	176	;
wenzelm@7050	177	\end{rail}
wenzelm@7050	178
wenzelm@7050	179
wenzelm@7315	180	\subsection{Term patterns}\label{sec:term-pats}
wenzelm@7050	181
wenzelm@7167	182	Assumptions and goal statements usually admit automatic binding of schematic
wenzelm@7167	183	text variables by giving (optional) patterns of the form $\IS{p@1 \dots p@n}$.
wenzelm@7167	184	There are separate versions available for \railqtoken{term}s and
wenzelm@7167	185	\railqtoken{prop}s. The latter provides a $\CONCLNAME$ part with patterns
wenzelm@7167	186	referring the (atomic) conclusion of a rule.
wenzelm@7050	187
wenzelm@7050	188	\indexouternonterm{termpat}\indexouternonterm{proppat}
wenzelm@7050	189	\begin{rail}
wenzelm@7167	190	termpat: '(' ('is' term +) ')'
wenzelm@7050	191	;
wenzelm@7167	192	proppat: '(' (('is' prop +) \| 'concl' ('is' prop +) \| ('is' prop +) 'concl' ('is' prop +)) ')'
wenzelm@7050	193	;
wenzelm@7050	194	\end{rail}
wenzelm@7050	195
wenzelm@7050	196
wenzelm@7046	197	\subsection{Mixfix annotations}
wenzelm@7046	198
wenzelm@7134	199	Mixfix annotations specify concrete \emph{inner} syntax of Isabelle types and
wenzelm@7167	200	terms. Some commands such as $\TYPES$ admit infixes only, while $\CONSTS$ and
wenzelm@7167	201	$\isarkeyword{syntax}$ support the full range of general mixfixes and binders.
wenzelm@7046	202
wenzelm@7050	203	\indexouternonterm{infix}\indexouternonterm{mixfix}
wenzelm@7050	204	\begin{rail}
wenzelm@7167	205	infix: '(' ('infixl' \| 'infixr') string? nat ')'
wenzelm@7167	206	;
wenzelm@7175	207	mixfix: infix \| '(' string prios? nat? ')' \| '(' 'binder' string prios? nat ')'
wenzelm@7050	208	;
wenzelm@7046	209
wenzelm@7175	210	prios: '[' (nat + ',') ']'
wenzelm@7050	211	;
wenzelm@7050	212	\end{rail}
wenzelm@7046	213
wenzelm@7050	214
wenzelm@7134	215	\subsection{Attributes and theorems}\label{sec:syn-att}
wenzelm@7050	216
wenzelm@7050	217	Attributes (and proof methods, see \S\ref{sec:syn-meth}) have their own
wenzelm@7315	218	``semi-inner'' syntax, in the sense that input conforming \railnonterm{args}
wenzelm@7315	219	below are parsed by the attribute a second time. The attribute argument
wenzelm@7050	220	specifications may be any sequence of atomic entities (identifiers, strings
wenzelm@7167	221	etc.), or properly bracketed argument lists. Below \railqtoken{atom} refers
wenzelm@7175	222	to any atomic entity, including \railtoken{keyword}s conforming to
wenzelm@7175	223	\railtoken{symident}.
wenzelm@7050	224
wenzelm@7050	225	\indexoutertoken{atom}\indexouternonterm{args}\indexouternonterm{attributes}
wenzelm@7050	226	\begin{rail}
wenzelm@7167	227	atom: nameref \| typefree \| typevar \| var \| textvar \| nat \| keyword
wenzelm@7050	228	;
wenzelm@7167	229	arg: atom \| '(' args ')' \| '[' args ']' \| lbrace args rbrace
wenzelm@7134	230	;
wenzelm@7167	231	args: arg *
wenzelm@7134	232	;
wenzelm@7167	233	attributes: '[' (nameref args * ',') ']'
wenzelm@7050	234	;
wenzelm@7050	235	\end{rail}
wenzelm@7050	236
wenzelm@7315	237	Theorem specifications come in several flavours: \railnonterm{axmdecl} and
wenzelm@7175	238	\railnonterm{thmdecl} usually refer to axioms, assumptions or results of goal
wenzelm@7175	239	statements, \railnonterm{thmdef} collects lists of existing theorems.
wenzelm@7175	240	Existing theorems are given by \railnonterm{thmref} and \railnonterm{thmrefs}
wenzelm@7175	241	(the former requires an actual singleton result). Any of these theorem
wenzelm@7175	242	specifications may include lists of attributes both on the left and right hand
wenzelm@7175	243	sides; attributes are applied to the any immediately preceding theorem.
wenzelm@7050	244
wenzelm@7135	245	\indexouternonterm{thmdecl}\indexouternonterm{axmdecl}
wenzelm@7135	246	\indexouternonterm{thmdef}\indexouternonterm{thmrefs}
wenzelm@7050	247	\begin{rail}
wenzelm@7167	248	axmdecl: name attributes? ':'
wenzelm@7050	249	;
wenzelm@7167	250	thmdecl: thmname ':'
wenzelm@7135	251	;
wenzelm@7167	252	thmdef: thmname '='
wenzelm@7050	253	;
wenzelm@7175	254	thmref: nameref attributes?
wenzelm@7175	255	;
wenzelm@7175	256	thmrefs: thmref +
wenzelm@7134	257	;
wenzelm@7167	258
wenzelm@7167	259	thmname: name attributes \| name \| attributes
wenzelm@7050	260	;
wenzelm@7050	261	\end{rail}
wenzelm@7050	262
wenzelm@7050	263
wenzelm@7050	264	\subsection{Proof methods}\label{sec:syn-meth}
wenzelm@7050	265
wenzelm@7050	266	Proof methods are either basic ones, or expressions composed of methods via
wenzelm@7175	267	``\texttt{,}'' (sequential composition), ``\texttt{\|}'' (alternative choices),
wenzelm@7167	268	``\texttt{?}'' (try), ``\texttt{*}'' (repeat ${} \ge 0$ times), ``\texttt{+}''
wenzelm@7167	269	(repeat ${} > 0$ times). In practice, proof methods are usually just a comma
wenzelm@7175	270	separated list of (\railqtoken{nameref}~\railnonterm{args}) specifications.
wenzelm@7167	271	Thus the syntax is similar to that of attributes, with plain parentheses
wenzelm@7167	272	instead of square brackets (see also \S\ref{sec:syn-att}). Note that
wenzelm@7167	273	parentheses may be dropped for single method specifications without arguments.
wenzelm@7050	274
wenzelm@7050	275	\indexouternonterm{method}
wenzelm@7050	276	\begin{rail}
wenzelm@7167	277	method: (nameref \| '(' methods ')') (() \| '?' \| '*' \| '+')
wenzelm@7134	278	;
wenzelm@7167	279	methods: (nameref args \| method) + (',' \| '\|')
wenzelm@7050	280	;
wenzelm@7050	281	\end{rail}
wenzelm@7046	282
wenzelm@7046	283
wenzelm@7046	284	%%% Local Variables:
wenzelm@7046	285	%%% mode: latex
wenzelm@7046	286	%%% TeX-master: "isar-ref"
wenzelm@7046	287	%%% End:

author	wenzelm
	Mon, 23 Aug 1999 15:27:27 +0200
changeset 7321	b4dcc32310fb
parent 7319	3907d597cae6
child 7335	abba35b98892
permissions	-rw-r--r--