wneuper/isa: doc-src/IsarRef/Thy/Inner_Syntax.thy@8fc228f21861 (annotated)

wenzelm@28762	1	(* $Id$ *)
wenzelm@28762	2
wenzelm@28762	3	theory Inner_Syntax
wenzelm@28762	4	imports Main
wenzelm@28762	5	begin
wenzelm@28762	6
wenzelm@28762	7	chapter {* Inner syntax --- the term language *}
wenzelm@28762	8
wenzelm@28762	9	section {* Printing logical entities *}
wenzelm@28762	10
wenzelm@28762	11	subsection {* Diagnostic commands *}
wenzelm@28762	12
wenzelm@28762	13	text {*
wenzelm@28762	14	\begin{matharray}{rcl}
wenzelm@28766	15	@{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762	16	@{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762	17	@{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28766	18	@{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762	19	@{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762	20	@{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28766	21	@{command_def "pr"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
wenzelm@28762	22	\end{matharray}
wenzelm@28762	23
wenzelm@28762	24	These diagnostic commands assist interactive development by printing
wenzelm@28762	25	internal logical entities in a human-readable fashion.
wenzelm@28762	26
wenzelm@28762	27	\begin{rail}
wenzelm@28766	28	'typ' modes? type
wenzelm@28762	29	;
wenzelm@28762	30	'term' modes? term
wenzelm@28762	31	;
wenzelm@28762	32	'prop' modes? prop
wenzelm@28762	33	;
wenzelm@28766	34	'thm' modes? thmrefs
wenzelm@28762	35	;
wenzelm@28766	36	( 'prf' \| 'full\_prf' ) modes? thmrefs?
wenzelm@28762	37	;
wenzelm@28766	38	'pr' modes? nat? (',' nat)?
wenzelm@28762	39	;
wenzelm@28762	40
wenzelm@28762	41	modes: '(' (name + ) ')'
wenzelm@28762	42	;
wenzelm@28762	43	\end{rail}
wenzelm@28762	44
wenzelm@28762	45	\begin{description}
wenzelm@28762	46
wenzelm@28766	47	\item @{command "typ"}~@{text \<tau>} reads and prints types of the
wenzelm@28766	48	meta-logic according to the current theory or proof context.
wenzelm@28766	49
wenzelm@28766	50	\item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
wenzelm@28766	51	read, type-check and print terms or propositions according to the
wenzelm@28766	52	current theory or proof context; the inferred type of @{text t} is
wenzelm@28766	53	output as well. Note that these commands are also useful in
wenzelm@28766	54	inspecting the current environment of term abbreviations.
wenzelm@28762	55
wenzelm@28762	56	\item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
wenzelm@28762	57	theorems from the current theory or proof context. Note that any
wenzelm@28762	58	attributes included in the theorem specifications are applied to a
wenzelm@28762	59	temporary context derived from the current theory or proof; the
wenzelm@28762	60	result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
wenzelm@28762	61	\<dots>, a\<^sub>n"} do not have any permanent effect.
wenzelm@28762	62
wenzelm@28762	63	\item @{command "prf"} displays the (compact) proof term of the
wenzelm@28762	64	current proof state (if present), or of the given theorems. Note
wenzelm@28762	65	that this requires proof terms to be switched on for the current
wenzelm@28762	66	object logic (see the ``Proof terms'' section of the Isabelle
wenzelm@28762	67	reference manual for information on how to do this).
wenzelm@28762	68
wenzelm@28762	69	\item @{command "full_prf"} is like @{command "prf"}, but displays
wenzelm@28762	70	the full proof term, i.e.\ also displays information omitted in the
wenzelm@28762	71	compact proof term, which is denoted by ``@{text _}'' placeholders
wenzelm@28762	72	there.
wenzelm@28762	73
wenzelm@28766	74	\item @{command "pr"}~@{text "goals, prems"} prints the current
wenzelm@28766	75	proof state (if present), including the proof context, current facts
wenzelm@28766	76	and goals. The optional limit arguments affect the number of goals
wenzelm@28766	77	and premises to be displayed, which is initially 10 for both.
wenzelm@28766	78	Omitting limit values leaves the current setting unchanged.
wenzelm@28766	79
wenzelm@28762	80	\end{description}
wenzelm@28762	81
wenzelm@28762	82	All of the diagnostic commands above admit a list of @{text modes}
wenzelm@28762	83	to be specified, which is appended to the current print mode (see
wenzelm@28762	84	also \cite{isabelle-ref}). Thus the output behavior may be modified
wenzelm@28762	85	according particular print mode features. For example, @{command
wenzelm@28762	86	"pr"}~@{text "(latex xsymbols)"} would print the current proof state
wenzelm@28762	87	with mathematical symbols and special characters represented in
wenzelm@28762	88	{\LaTeX} source, according to the Isabelle style
wenzelm@28762	89	\cite{isabelle-sys}.
wenzelm@28762	90
wenzelm@28762	91	Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
wenzelm@28762	92	systematic way to include formal items into the printed text
wenzelm@28762	93	document.
wenzelm@28762	94	*}
wenzelm@28762	95
wenzelm@28762	96
wenzelm@28763	97	subsection {* Details of printed content *}
wenzelm@28763	98
wenzelm@28763	99	text {*
wenzelm@28763	100	\begin{mldecls}
wenzelm@28763	101	@{index_ML show_types: "bool ref"} & default @{ML false} \\
wenzelm@28763	102	@{index_ML show_sorts: "bool ref"} & default @{ML false} \\
wenzelm@28763	103	@{index_ML show_consts: "bool ref"} & default @{ML false} \\
wenzelm@28763	104	@{index_ML long_names: "bool ref"} & default @{ML false} \\
wenzelm@28763	105	@{index_ML short_names: "bool ref"} & default @{ML false} \\
wenzelm@28763	106	@{index_ML unique_names: "bool ref"} & default @{ML true} \\
wenzelm@28763	107	@{index_ML show_brackets: "bool ref"} & default @{ML false} \\
wenzelm@28765	108	@{index_ML eta_contract: "bool ref"} & default @{ML true} \\
wenzelm@28763	109	@{index_ML goals_limit: "int ref"} & default @{ML 10} \\
wenzelm@28763	110	@{index_ML Proof.show_main_goal: "bool ref"} & default @{ML false} \\
wenzelm@28763	111	@{index_ML show_hyps: "bool ref"} & default @{ML false} \\
wenzelm@28763	112	@{index_ML show_tags: "bool ref"} & default @{ML false} \\
wenzelm@28765	113	@{index_ML show_question_marks: "bool ref"} & default @{ML true} \\
wenzelm@28763	114	\end{mldecls}
wenzelm@28763	115
wenzelm@28763	116	These global ML variables control the detail of information that is
wenzelm@28763	117	displayed for types, terms, theorems, goals etc.
wenzelm@28763	118
wenzelm@28765	119	In interactive sessions, the user interface usually manages these
wenzelm@28765	120	global parameters of the Isabelle process, even with some concept of
wenzelm@28765	121	persistence. Nonetheless it is occasionally useful to manipulate ML
wenzelm@28765	122	variables directly, e.g.\ using @{command "ML_val"} or @{command
wenzelm@28765	123	"ML_command"}.
wenzelm@28765	124
wenzelm@28765	125	Batch-mode logic sessions may be configured by putting appropriate
wenzelm@28765	126	ML text directly into the @{verbatim ROOT.ML} file.
wenzelm@28765	127
wenzelm@28763	128	\begin{description}
wenzelm@28763	129
wenzelm@28763	130	\item @{ML show_types} and @{ML show_sorts} control printing of type
wenzelm@28763	131	constraints for term variables, and sort constraints for type
wenzelm@28763	132	variables. By default, neither of these are shown in output. If
wenzelm@28763	133	@{ML show_sorts} is set to @{ML true}, types are always shown as
wenzelm@28763	134	well.
wenzelm@28763	135
wenzelm@28763	136	Note that displaying types and sorts may explain why a polymorphic
wenzelm@28763	137	inference rule fails to resolve with some goal, or why a rewrite
wenzelm@28763	138	rule does not apply as expected.
wenzelm@28763	139
wenzelm@28763	140	\item @{ML show_consts} controls printing of types of constants when
wenzelm@28765	141	displaying a goal state.
wenzelm@28765	142
wenzelm@28765	143	Note that the output can be enormous, because polymorphic constants
wenzelm@28765	144	often occur at several different type instances.
wenzelm@28763	145
wenzelm@28763	146	\item @{ML long_names}, @{ML short_names}, and @{ML unique_names}
wenzelm@28765	147	control the way of printing fully qualified internal names in
wenzelm@28765	148	external form. See also \secref{sec:antiq} for the document
wenzelm@28765	149	antiquotation options of the same names.
wenzelm@28763	150
wenzelm@28765	151	\item @{ML show_brackets} controls bracketing in pretty printed
wenzelm@28765	152	output. If set to @{ML true}, all sub-expressions of the pretty
wenzelm@28765	153	printing tree will be parenthesized, even if this produces malformed
wenzelm@28765	154	term syntax! This crude way of showing the internal structure of
wenzelm@28765	155	pretty printed entities may occasionally help to diagnose problems
wenzelm@28765	156	with operator priorities, for example.
wenzelm@28763	157
wenzelm@28763	158	\item @{ML eta_contract} controls @{text "\<eta>"}-contracted printing of
wenzelm@28763	159	terms.
wenzelm@28763	160
wenzelm@28763	161	The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
wenzelm@28763	162	provided @{text x} is not free in @{text f}. It asserts
wenzelm@28763	163	\emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
wenzelm@28763	164	g x"} for all @{text x}. Higher-order unification frequently puts
wenzelm@28763	165	terms into a fully @{text \<eta>}-expanded form. For example, if @{text
wenzelm@28763	166	F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
wenzelm@28763	167	"\<lambda>h. F (\<lambda>x. h x)"}.
wenzelm@28763	168
wenzelm@28763	169	Setting @{ML eta_contract} makes Isabelle perform @{text
wenzelm@28763	170	\<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
wenzelm@28763	171	appears simply as @{text F}.
wenzelm@28763	172
wenzelm@28763	173	Note that the distinction between a term and its @{text \<eta>}-expanded
wenzelm@28765	174	form occasionally matters. While higher-order resolution and
wenzelm@28765	175	rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
wenzelm@28765	176	might look at terms more discretely.
wenzelm@28763	177
wenzelm@28763	178	\item @{ML goals_limit} controls the maximum number of subgoals to
wenzelm@28765	179	be shown in goal output.
wenzelm@28763	180
wenzelm@28763	181	\item @{ML Proof.show_main_goal} controls whether the main result to
wenzelm@28763	182	be proven should be displayed. This information might be relevant
wenzelm@28765	183	for schematic goals, to inspect the current claim that has been
wenzelm@28765	184	synthesized so far.
wenzelm@28763	185
wenzelm@28763	186	\item @{ML show_hyps} controls printing of implicit hypotheses of
wenzelm@28763	187	local facts. Normally, only those hypotheses are displayed that are
wenzelm@28763	188	\emph{not} covered by the assumptions of the current context: this
wenzelm@28763	189	situation indicates a fault in some tool being used.
wenzelm@28763	190
wenzelm@28765	191	By setting @{ML show_hyps} to @{ML true}, output of \emph{all}
wenzelm@28765	192	hypotheses can be enforced, which is occasionally useful for
wenzelm@28765	193	diagnostic purposes.
wenzelm@28763	194
wenzelm@28763	195	\item @{ML show_tags} controls printing of extra annotations within
wenzelm@28765	196	theorems, such as internal position information, or the case names
wenzelm@28765	197	being attached by the attribute @{attribute case_names}.
wenzelm@28765	198
wenzelm@28765	199	Note that the @{attribute tagged} and @{attribute untagged}
wenzelm@28765	200	attributes provide low-level access to the collection of tags
wenzelm@28765	201	associated with a theorem.
wenzelm@28765	202
wenzelm@28765	203	\item @{ML show_question_marks} controls printing of question marks
wenzelm@28765	204	for schematic variables, such as @{text ?x}. Only the leading
wenzelm@28765	205	question mark is affected, the remaining text is unchanged
wenzelm@28765	206	(including proper markup for schematic variables that might be
wenzelm@28765	207	relevant for user interfaces).
wenzelm@28765	208
wenzelm@28765	209	\end{description}
wenzelm@28765	210	*}
wenzelm@28765	211
wenzelm@28765	212
wenzelm@28765	213	subsection {* Printing limits *}
wenzelm@28765	214
wenzelm@28765	215	text {*
wenzelm@28765	216	\begin{mldecls}
wenzelm@28765	217	@{index_ML Pretty.setdepth: "int -> unit"} \\
wenzelm@28765	218	@{index_ML Pretty.setmargin: "int -> unit"} \\
wenzelm@28765	219	@{index_ML print_depth: "int -> unit"} \\
wenzelm@28765	220	\end{mldecls}
wenzelm@28765	221
wenzelm@28765	222	These ML functions set limits for pretty printed text.
wenzelm@28765	223
wenzelm@28765	224	\begin{description}
wenzelm@28765	225
wenzelm@28765	226	\item @{ML Pretty.setdepth}~@{text d} tells the pretty printer to
wenzelm@28765	227	limit the printing depth to @{text d}. This affects the display of
wenzelm@28765	228	types, terms, theorems etc. The default value is 0, which permits
wenzelm@28765	229	printing to an arbitrary depth. Other useful values for @{text d}
wenzelm@28765	230	are 10 and 20.
wenzelm@28765	231
wenzelm@28765	232	\item @{ML Pretty.setmargin}~@{text m} tells the pretty printer to
wenzelm@28765	233	assume a right margin (page width) of @{text m}. The initial margin
wenzelm@28765	234	is 76, but user interfaces might adapt the margin automatically when
wenzelm@28765	235	resizing windows.
wenzelm@28765	236
wenzelm@28765	237	\item @{ML print_depth}~@{text n} limits the printing depth of the
wenzelm@28765	238	ML toplevel pretty printer; the precise effect depends on the ML
wenzelm@28765	239	compiler and run-time system. Typically @{text n} should be less
wenzelm@28765	240	than 10. Bigger values such as 100--1000 are useful for debugging.
wenzelm@28763	241
wenzelm@28763	242	\end{description}
wenzelm@28763	243	*}
wenzelm@28763	244
wenzelm@28763	245
wenzelm@28762	246	section {* Mixfix annotations *}
wenzelm@28762	247
wenzelm@28762	248	text {* Mixfix annotations specify concrete \emph{inner syntax} of
wenzelm@28767	249	Isabelle types and terms. Some commands such as @{command
wenzelm@28767	250	"typedecl"} admit infixes only, while @{command "definition"} etc.\
wenzelm@28767	251	support the full range of general mixfixes and binders. Fixed
wenzelm@28767	252	parameters in toplevel theorem statements, locale specifications
wenzelm@28767	253	also admit mixfix annotations.
wenzelm@28762	254
wenzelm@28762	255	\indexouternonterm{infix}\indexouternonterm{mixfix}\indexouternonterm{structmixfix}
wenzelm@28762	256	\begin{rail}
wenzelm@28762	257	infix: '(' ('infix' \| 'infixl' \| 'infixr') string nat ')'
wenzelm@28762	258	;
wenzelm@28762	259	mixfix: infix \| '(' string prios? nat? ')' \| '(' 'binder' string prios? nat ')'
wenzelm@28762	260	;
wenzelm@28762	261	structmixfix: mixfix \| '(' 'structure' ')'
wenzelm@28762	262	;
wenzelm@28762	263
wenzelm@28762	264	prios: '[' (nat + ',') ']'
wenzelm@28762	265	;
wenzelm@28762	266	\end{rail}
wenzelm@28762	267
wenzelm@28762	268	Here the \railtok{string} specifications refer to the actual mixfix
wenzelm@28762	269	template, which may include literal text, spacing, blocks, and
wenzelm@28762	270	arguments (denoted by ``@{text _}''); the special symbol
wenzelm@28762	271	``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'') represents an index
wenzelm@28762	272	argument that specifies an implicit structure reference (see also
wenzelm@28762	273	\secref{sec:locale}). Infix and binder declarations provide common
wenzelm@28762	274	abbreviations for particular mixfix declarations. So in practice,
wenzelm@28762	275	mixfix templates mostly degenerate to literal text for concrete
wenzelm@28762	276	syntax, such as ``@{verbatim "++"}'' for an infix symbol.
wenzelm@28762	277
wenzelm@28762	278	\medskip In full generality, mixfix declarations work as follows.
wenzelm@28762	279	Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is
wenzelm@28762	280	annotated by @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text
wenzelm@28762	281	"mixfix"} is a string @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of
wenzelm@28762	282	delimiters that surround argument positions as indicated by
wenzelm@28762	283	underscores.
wenzelm@28762	284
wenzelm@28762	285	Altogether this determines a production for a context-free priority
wenzelm@28762	286	grammar, where for each argument @{text "i"} the syntactic category
wenzelm@28762	287	is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and
wenzelm@28762	288	the result category is determined from @{text "\<tau>"} (with
wenzelm@28762	289	priority @{text "p"}). Priority specifications are optional, with
wenzelm@28762	290	default 0 for arguments and 1000 for the result.
wenzelm@28762	291
wenzelm@28762	292	Since @{text "\<tau>"} may be again a function type, the constant
wenzelm@28762	293	type scheme may have more argument positions than the mixfix
wenzelm@28762	294	pattern. Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
wenzelm@28762	295	@{text "m > n"} works by attaching concrete notation only to the
wenzelm@28762	296	innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
wenzelm@28762	297	instead. If a term has fewer arguments than specified in the mixfix
wenzelm@28762	298	template, the concrete syntax is ignored.
wenzelm@28762	299
wenzelm@28762	300	\medskip A mixfix template may also contain additional directives
wenzelm@28762	301	for pretty printing, notably spaces, blocks, and breaks. The
wenzelm@28762	302	general template format is a sequence over any of the following
wenzelm@28762	303	entities.
wenzelm@28762	304
wenzelm@28762	305	\begin{itemize}
wenzelm@28762	306
wenzelm@28762	307	\item @{text "\<^bold>d"} is a delimiter, namely a non-empty
wenzelm@28762	308	sequence of characters other than the special characters @{text "'"}
wenzelm@28762	309	(single quote), @{text "_"} (underscore), @{text "\<index>"} (index
wenzelm@28762	310	symbol), @{text "/"} (slash), @{text "("} and @{text ")"}
wenzelm@28762	311	(parentheses).
wenzelm@28762	312
wenzelm@28762	313	A single quote escapes the special meaning of these meta-characters,
wenzelm@28762	314	producing a literal version of the following character, unless that
wenzelm@28762	315	is a blank. A single quote followed by a blank separates
wenzelm@28762	316	delimiters, without affecting printing, but input tokens may have
wenzelm@28762	317	additional white space here.
wenzelm@28762	318
wenzelm@28762	319	\item @{text "_"} is an argument position, which stands for a
wenzelm@28762	320	certain syntactic category in the underlying grammar.
wenzelm@28762	321
wenzelm@28762	322	\item @{text "\<index>"} is an indexed argument position; this is
wenzelm@28762	323	the place where implicit structure arguments can be attached.
wenzelm@28762	324
wenzelm@28762	325	\item @{text "\<^bold>s"} is a non-empty sequence of spaces for
wenzelm@28762	326	printing. This and the following specifications do not affect
wenzelm@28762	327	parsing at all.
wenzelm@28762	328
wenzelm@28762	329	\item @{text "(\<^bold>n"} opens a pretty printing block. The
wenzelm@28762	330	optional number specifies how much indentation to add when a line
wenzelm@28762	331	break occurs within the block. If the parenthesis is not followed
wenzelm@28762	332	by digits, the indentation defaults to 0. A block specified via
wenzelm@28762	333	@{text "(00"} is unbreakable.
wenzelm@28762	334
wenzelm@28762	335	\item @{text ")"} closes a pretty printing block.
wenzelm@28762	336
wenzelm@28762	337	\item @{text "//"} forces a line break.
wenzelm@28762	338
wenzelm@28762	339	\item @{text "/\<^bold>s"} allows a line break. Here @{text
wenzelm@28762	340	"\<^bold>s"} stands for the string of spaces (zero or more) right
wenzelm@28762	341	after the slash. These spaces are printed if the break is
wenzelm@28762	342	\emph{not} taken.
wenzelm@28762	343
wenzelm@28762	344	\end{itemize}
wenzelm@28762	345
wenzelm@28762	346	For example, the template @{text "(_ +/ _)"} specifies an infix
wenzelm@28762	347	operator. There are two argument positions; the delimiter @{text
wenzelm@28762	348	"+"} is preceded by a space and followed by a space or line break;
wenzelm@28762	349	the entire phrase is a pretty printing block.
wenzelm@28762	350
wenzelm@28762	351	The general idea of pretty printing with blocks and breaks is also
wenzelm@28762	352	described in \cite{paulson-ml2}.
wenzelm@28762	353	*}
wenzelm@28762	354
wenzelm@28762	355
wenzelm@28766	356	section {* Explicit term notation *}
wenzelm@28762	357
wenzelm@28762	358	text {*
wenzelm@28762	359	\begin{matharray}{rcll}
wenzelm@28762	360	@{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762	361	@{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762	362	\end{matharray}
wenzelm@28762	363
wenzelm@28762	364	\begin{rail}
wenzelm@28762	365	('notation' \| 'no\_notation') target? mode? (nameref structmixfix + 'and')
wenzelm@28762	366	;
wenzelm@28762	367	\end{rail}
wenzelm@28762	368
wenzelm@28762	369	\begin{description}
wenzelm@28762	370
wenzelm@28762	371	\item @{command "notation"}~@{text "c (mx)"} associates mixfix
wenzelm@28762	372	syntax with an existing constant or fixed variable. This is a
wenzelm@28762	373	robust interface to the underlying @{command "syntax"} primitive
wenzelm@28762	374	(\secref{sec:syn-trans}). Type declaration and internal syntactic
wenzelm@28762	375	representation of the given entity is retrieved from the context.
wenzelm@28762	376
wenzelm@28762	377	\item @{command "no_notation"} is similar to @{command "notation"},
wenzelm@28762	378	but removes the specified syntax annotation from the present
wenzelm@28762	379	context.
wenzelm@28762	380
wenzelm@28762	381	\end{description}
wenzelm@28762	382	*}
wenzelm@28762	383
wenzelm@28769	384	section {* The Pure syntax *}
wenzelm@28769	385
wenzelm@28769	386	subsection {* Priority grammars *}
wenzelm@28769	387
wenzelm@28769	388	text {* A context-free grammar consists of a set of \emph{terminal
wenzelm@28769	389	symbols}, a set of \emph{nonterminal symbols} and a set of
wenzelm@28769	390	\emph{productions}. Productions have the form @{text "A = \<gamma>"},
wenzelm@28769	391	where @{text A} is a nonterminal and @{text \<gamma>} is a string of
wenzelm@28769	392	terminals and nonterminals. One designated nonterminal is called
wenzelm@28769	393	the \emph{root symbol}. The language defined by the grammar
wenzelm@28769	394	consists of all strings of terminals that can be derived from the
wenzelm@28769	395	root symbol by applying productions as rewrite rules.
wenzelm@28769	396
wenzelm@28769	397	The standard Isabelle parser for inner syntax uses a \emph{priority
wenzelm@28769	398	grammar}. Each nonterminal is decorated by an integer priority:
wenzelm@28769	399	@{text "A\<^sup>(\<^sup>p\<^sup>)"}. In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
wenzelm@28769	400	using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}. Any
wenzelm@28769	401	priority grammar can be translated into a normal context-free
wenzelm@28769	402	grammar by introducing new nonterminals and productions.
wenzelm@28769	403
wenzelm@28769	404	\medskip Formally, a set of context free productions @{text G}
wenzelm@28769	405	induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows. Let @{text
wenzelm@28769	406	\<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
wenzelm@28769	407	Then
wenzelm@28769	408	\[
wenzelm@28769	409	@{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"}
wenzelm@28769	410	\]
wenzelm@28769	411	if and only if @{text G} contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"}
wenzelm@28769	412	for @{text "p \<le> q"}.
wenzelm@28769	413
wenzelm@28769	414	\medskip The following grammar for arithmetic expressions
wenzelm@28769	415	demonstrates how binding power and associativity of operators can be
wenzelm@28769	416	enforced by priorities.
wenzelm@28769	417
wenzelm@28769	418	\begin{center}
wenzelm@28769	419	\begin{tabular}{rclr}
wenzelm@28769	420	@{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
wenzelm@28769	421	@{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
wenzelm@28769	422	@{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
wenzelm@28769	423	@{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
wenzelm@28769	424	@{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
wenzelm@28769	425	\end{tabular}
wenzelm@28769	426	\end{center}
wenzelm@28769	427	The choice of priorities determines that @{verbatim "-"} binds
wenzelm@28769	428	tighter than @{verbatim "*"}, which binds tighter than @{verbatim
wenzelm@28769	429	"+"}. Furthermore @{verbatim "+"} associates to the left and
wenzelm@28769	430	@{verbatim "*"} to the right.
wenzelm@28769	431
wenzelm@28769	432	\medskip For clarity, grammars obey these conventions:
wenzelm@28769	433	\begin{itemize}
wenzelm@28769	434
wenzelm@28769	435	\item All priorities must lie between 0 and 1000.
wenzelm@28769	436
wenzelm@28769	437	\item Priority 0 on the right-hand side and priority 1000 on the
wenzelm@28769	438	left-hand side may be omitted.
wenzelm@28769	439
wenzelm@28769	440	\item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
wenzelm@28769	441	(p)"}, i.e.\ the priority of the left-hand side actually appears in
wenzelm@28769	442	a column on the far right.
wenzelm@28769	443
wenzelm@28769	444	\item Alternatives are separated by @{text "\|"}.
wenzelm@28769	445
wenzelm@28769	446	\item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
wenzelm@28769	447	but obvious way.
wenzelm@28769	448
wenzelm@28769	449	\end{itemize}
wenzelm@28769	450
wenzelm@28769	451	Using these conventions, the example grammar specification above
wenzelm@28769	452	takes the form:
wenzelm@28769	453	\begin{center}
wenzelm@28769	454	\begin{tabular}{rclc}
wenzelm@28769	455	@{text A} & @{text "="} & @{verbatim 0} & \qquad\qquad \\
wenzelm@28769	456	& @{text "\|"} & @{verbatim "("} @{text A} @{verbatim ")"} \\
wenzelm@28769	457	& @{text "\|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
wenzelm@28769	458	& @{text "\|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28769	459	& @{text "\|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@28769	460	\end{tabular}
wenzelm@28769	461	\end{center}
wenzelm@28769	462	*}
wenzelm@28769	463
wenzelm@28769	464
wenzelm@28762	465	section {* Syntax and translations \label{sec:syn-trans} *}
wenzelm@28762	466
wenzelm@28762	467	text {*
wenzelm@28762	468	\begin{matharray}{rcl}
wenzelm@28762	469	@{command_def "nonterminals"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	470	@{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	471	@{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	472	@{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	473	@{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	474	\end{matharray}
wenzelm@28762	475
wenzelm@28762	476	\begin{rail}
wenzelm@28762	477	'nonterminals' (name +)
wenzelm@28762	478	;
wenzelm@28762	479	('syntax' \| 'no\_syntax') mode? (constdecl +)
wenzelm@28762	480	;
wenzelm@28762	481	('translations' \| 'no\_translations') (transpat ('==' \| '=>' \| '<=' \| rightleftharpoons \| rightharpoonup \| leftharpoondown) transpat +)
wenzelm@28762	482	;
wenzelm@28762	483
wenzelm@28762	484	mode: ('(' ( name \| 'output' \| name 'output' ) ')')
wenzelm@28762	485	;
wenzelm@28762	486	transpat: ('(' nameref ')')? string
wenzelm@28762	487	;
wenzelm@28762	488	\end{rail}
wenzelm@28762	489
wenzelm@28762	490	\begin{description}
wenzelm@28762	491
wenzelm@28762	492	\item @{command "nonterminals"}~@{text c} declares a type
wenzelm@28762	493	constructor @{text c} (without arguments) to act as purely syntactic
wenzelm@28762	494	type: a nonterminal symbol of the inner syntax.
wenzelm@28762	495
wenzelm@28762	496	\item @{command "syntax"}~@{text "(mode) decls"} is similar to
wenzelm@28762	497	@{command "consts"}~@{text decls}, except that the actual logical
wenzelm@28762	498	signature extension is omitted. Thus the context free grammar of
wenzelm@28762	499	Isabelle's inner syntax may be augmented in arbitrary ways,
wenzelm@28762	500	independently of the logic. The @{text mode} argument refers to the
wenzelm@28762	501	print mode that the grammar rules belong; unless the @{keyword_ref
wenzelm@28762	502	"output"} indicator is given, all productions are added both to the
wenzelm@28762	503	input and output grammar.
wenzelm@28762	504
wenzelm@28762	505	\item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
wenzelm@28762	506	declarations (and translations) resulting from @{text decls}, which
wenzelm@28762	507	are interpreted in the same manner as for @{command "syntax"} above.
wenzelm@28762	508
wenzelm@28762	509	\item @{command "translations"}~@{text rules} specifies syntactic
wenzelm@28762	510	translation rules (i.e.\ macros): parse~/ print rules (@{text "\<rightleftharpoons>"}),
wenzelm@28762	511	parse rules (@{text "\<rightharpoonup>"}), or print rules (@{text "\<leftharpoondown>"}).
wenzelm@28762	512	Translation patterns may be prefixed by the syntactic category to be
wenzelm@28762	513	used for parsing; the default is @{text logic}.
wenzelm@28762	514
wenzelm@28762	515	\item @{command "no_translations"}~@{text rules} removes syntactic
wenzelm@28762	516	translation rules, which are interpreted in the same manner as for
wenzelm@28762	517	@{command "translations"} above.
wenzelm@28762	518
wenzelm@28762	519	\end{description}
wenzelm@28762	520	*}
wenzelm@28762	521
wenzelm@28762	522
wenzelm@28762	523	section {* Syntax translation functions *}
wenzelm@28762	524
wenzelm@28762	525	text {*
wenzelm@28762	526	\begin{matharray}{rcl}
wenzelm@28762	527	@{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	528	@{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	529	@{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	530	@{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	531	@{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762	532	\end{matharray}
wenzelm@28762	533
wenzelm@28762	534	\begin{rail}
wenzelm@28762	535	( 'parse\_ast\_translation' \| 'parse\_translation' \| 'print\_translation' \|
wenzelm@28762	536	'typed\_print\_translation' \| 'print\_ast\_translation' ) ('(advanced)')? text
wenzelm@28762	537	;
wenzelm@28762	538	\end{rail}
wenzelm@28762	539
wenzelm@28762	540	Syntax translation functions written in ML admit almost arbitrary
wenzelm@28762	541	manipulations of Isabelle's inner syntax. Any of the above commands
wenzelm@28762	542	have a single \railqtok{text} argument that refers to an ML
wenzelm@28762	543	expression of appropriate type, which are as follows by default:
wenzelm@28762	544
wenzelm@28762	545	%FIXME proper antiquotations
wenzelm@28762	546	\begin{ttbox}
wenzelm@28762	547	val parse_ast_translation : (string * (ast list -> ast)) list
wenzelm@28762	548	val parse_translation : (string * (term list -> term)) list
wenzelm@28762	549	val print_translation : (string * (term list -> term)) list
wenzelm@28762	550	val typed_print_translation :
wenzelm@28762	551	(string * (bool -> typ -> term list -> term)) list
wenzelm@28762	552	val print_ast_translation : (string * (ast list -> ast)) list
wenzelm@28762	553	\end{ttbox}
wenzelm@28762	554
wenzelm@28762	555	If the @{text "(advanced)"} option is given, the corresponding
wenzelm@28762	556	translation functions may depend on the current theory or proof
wenzelm@28762	557	context. This allows to implement advanced syntax mechanisms, as
wenzelm@28762	558	translations functions may refer to specific theory declarations or
wenzelm@28762	559	auxiliary proof data.
wenzelm@28762	560
wenzelm@28762	561	See also \cite[\S8]{isabelle-ref} for more information on the
wenzelm@28762	562	general concept of syntax transformations in Isabelle.
wenzelm@28762	563
wenzelm@28762	564	%FIXME proper antiquotations
wenzelm@28762	565	\begin{ttbox}
wenzelm@28762	566	val parse_ast_translation:
wenzelm@28762	567	(string * (Proof.context -> ast list -> ast)) list
wenzelm@28762	568	val parse_translation:
wenzelm@28762	569	(string * (Proof.context -> term list -> term)) list
wenzelm@28762	570	val print_translation:
wenzelm@28762	571	(string * (Proof.context -> term list -> term)) list
wenzelm@28762	572	val typed_print_translation:
wenzelm@28762	573	(string * (Proof.context -> bool -> typ -> term list -> term)) list
wenzelm@28762	574	val print_ast_translation:
wenzelm@28762	575	(string * (Proof.context -> ast list -> ast)) list
wenzelm@28762	576	\end{ttbox}
wenzelm@28762	577	*}
wenzelm@28762	578
wenzelm@28762	579	end

author	wenzelm
	Thu, 13 Nov 2008 21:54:51 +0100
changeset 28769	8fc228f21861
parent 28767	f09ceb800d00
child 28770	93a372e2dc7a
permissions	-rw-r--r--