doc-src/IsarRef/Thy/Inner_Syntax.thy
author wenzelm
Thu, 01 Oct 2009 20:04:44 +0200
changeset 32833 f3716d1a2e48
parent 30397 b6212ae21656
child 35352 7425aece4ee3
permissions -rw-r--r--
explicitly Unsynchronized;
     1 theory Inner_Syntax
     2 imports Main
     3 begin
     4 
     5 chapter {* Inner syntax --- the term language \label{ch:inner-syntax} *}
     6 
     7 section {* Printing logical entities *}
     8 
     9 subsection {* Diagnostic commands *}
    10 
    11 text {*
    12   \begin{matharray}{rcl}
    13     @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    14     @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    15     @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    16     @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    17     @{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    18     @{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    19     @{command_def "pr"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
    20   \end{matharray}
    21 
    22   These diagnostic commands assist interactive development by printing
    23   internal logical entities in a human-readable fashion.
    24 
    25   \begin{rail}
    26     'typ' modes? type
    27     ;
    28     'term' modes? term
    29     ;
    30     'prop' modes? prop
    31     ;
    32     'thm' modes? thmrefs
    33     ;
    34     ( 'prf' | 'full\_prf' ) modes? thmrefs?
    35     ;
    36     'pr' modes? nat? (',' nat)?
    37     ;
    38 
    39     modes: '(' (name + ) ')'
    40     ;
    41   \end{rail}
    42 
    43   \begin{description}
    44 
    45   \item @{command "typ"}~@{text \<tau>} reads and prints types of the
    46   meta-logic according to the current theory or proof context.
    47 
    48   \item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
    49   read, type-check and print terms or propositions according to the
    50   current theory or proof context; the inferred type of @{text t} is
    51   output as well.  Note that these commands are also useful in
    52   inspecting the current environment of term abbreviations.
    53 
    54   \item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
    55   theorems from the current theory or proof context.  Note that any
    56   attributes included in the theorem specifications are applied to a
    57   temporary context derived from the current theory or proof; the
    58   result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
    59   \<dots>, a\<^sub>n"} do not have any permanent effect.
    60 
    61   \item @{command "prf"} displays the (compact) proof term of the
    62   current proof state (if present), or of the given theorems. Note
    63   that this requires proof terms to be switched on for the current
    64   object logic (see the ``Proof terms'' section of the Isabelle
    65   reference manual for information on how to do this).
    66 
    67   \item @{command "full_prf"} is like @{command "prf"}, but displays
    68   the full proof term, i.e.\ also displays information omitted in the
    69   compact proof term, which is denoted by ``@{text _}'' placeholders
    70   there.
    71 
    72   \item @{command "pr"}~@{text "goals, prems"} prints the current
    73   proof state (if present), including the proof context, current facts
    74   and goals.  The optional limit arguments affect the number of goals
    75   and premises to be displayed, which is initially 10 for both.
    76   Omitting limit values leaves the current setting unchanged.
    77 
    78   \end{description}
    79 
    80   All of the diagnostic commands above admit a list of @{text modes}
    81   to be specified, which is appended to the current print mode (see
    82   also \cite{isabelle-ref}).  Thus the output behavior may be modified
    83   according particular print mode features.  For example, @{command
    84   "pr"}~@{text "(latex xsymbols)"} would print the current proof state
    85   with mathematical symbols and special characters represented in
    86   {\LaTeX} source, according to the Isabelle style
    87   \cite{isabelle-sys}.
    88 
    89   Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
    90   systematic way to include formal items into the printed text
    91   document.
    92 *}
    93 
    94 
    95 subsection {* Details of printed content *}
    96 
    97 text {*
    98   \begin{mldecls} 
    99     @{index_ML show_types: "bool Unsynchronized.ref"} & default @{ML false} \\
   100     @{index_ML show_sorts: "bool Unsynchronized.ref"} & default @{ML false} \\
   101     @{index_ML show_consts: "bool Unsynchronized.ref"} & default @{ML false} \\
   102     @{index_ML long_names: "bool Unsynchronized.ref"} & default @{ML false} \\
   103     @{index_ML short_names: "bool Unsynchronized.ref"} & default @{ML false} \\
   104     @{index_ML unique_names: "bool Unsynchronized.ref"} & default @{ML true} \\
   105     @{index_ML show_brackets: "bool Unsynchronized.ref"} & default @{ML false} \\
   106     @{index_ML eta_contract: "bool Unsynchronized.ref"} & default @{ML true} \\
   107     @{index_ML goals_limit: "int Unsynchronized.ref"} & default @{ML 10} \\
   108     @{index_ML Proof.show_main_goal: "bool Unsynchronized.ref"} & default @{ML false} \\
   109     @{index_ML show_hyps: "bool Unsynchronized.ref"} & default @{ML false} \\
   110     @{index_ML show_tags: "bool Unsynchronized.ref"} & default @{ML false} \\
   111     @{index_ML show_question_marks: "bool Unsynchronized.ref"} & default @{ML true} \\
   112   \end{mldecls}
   113 
   114   These global ML variables control the detail of information that is
   115   displayed for types, terms, theorems, goals etc.
   116 
   117   In interactive sessions, the user interface usually manages these
   118   global parameters of the Isabelle process, even with some concept of
   119   persistence.  Nonetheless it is occasionally useful to manipulate ML
   120   variables directly, e.g.\ using @{command "ML_val"} or @{command
   121   "ML_command"}.
   122 
   123   Batch-mode logic sessions may be configured by putting appropriate
   124   ML text directly into the @{verbatim ROOT.ML} file.
   125 
   126   \begin{description}
   127 
   128   \item @{ML show_types} and @{ML show_sorts} control printing of type
   129   constraints for term variables, and sort constraints for type
   130   variables.  By default, neither of these are shown in output.  If
   131   @{ML show_sorts} is set to @{ML true}, types are always shown as
   132   well.
   133 
   134   Note that displaying types and sorts may explain why a polymorphic
   135   inference rule fails to resolve with some goal, or why a rewrite
   136   rule does not apply as expected.
   137 
   138   \item @{ML show_consts} controls printing of types of constants when
   139   displaying a goal state.
   140 
   141   Note that the output can be enormous, because polymorphic constants
   142   often occur at several different type instances.
   143 
   144   \item @{ML long_names}, @{ML short_names}, and @{ML unique_names}
   145   control the way of printing fully qualified internal names in
   146   external form.  See also \secref{sec:antiq} for the document
   147   antiquotation options of the same names.
   148 
   149   \item @{ML show_brackets} controls bracketing in pretty printed
   150   output.  If set to @{ML true}, all sub-expressions of the pretty
   151   printing tree will be parenthesized, even if this produces malformed
   152   term syntax!  This crude way of showing the internal structure of
   153   pretty printed entities may occasionally help to diagnose problems
   154   with operator priorities, for example.
   155 
   156   \item @{ML eta_contract} controls @{text "\<eta>"}-contracted printing of
   157   terms.
   158 
   159   The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
   160   provided @{text x} is not free in @{text f}.  It asserts
   161   \emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
   162   g x"} for all @{text x}.  Higher-order unification frequently puts
   163   terms into a fully @{text \<eta>}-expanded form.  For example, if @{text
   164   F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
   165   "\<lambda>h. F (\<lambda>x. h x)"}.
   166 
   167   Setting @{ML eta_contract} makes Isabelle perform @{text
   168   \<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
   169   appears simply as @{text F}.
   170 
   171   Note that the distinction between a term and its @{text \<eta>}-expanded
   172   form occasionally matters.  While higher-order resolution and
   173   rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
   174   might look at terms more discretely.
   175 
   176   \item @{ML goals_limit} controls the maximum number of subgoals to
   177   be shown in goal output.
   178 
   179   \item @{ML Proof.show_main_goal} controls whether the main result to
   180   be proven should be displayed.  This information might be relevant
   181   for schematic goals, to inspect the current claim that has been
   182   synthesized so far.
   183 
   184   \item @{ML show_hyps} controls printing of implicit hypotheses of
   185   local facts.  Normally, only those hypotheses are displayed that are
   186   \emph{not} covered by the assumptions of the current context: this
   187   situation indicates a fault in some tool being used.
   188 
   189   By setting @{ML show_hyps} to @{ML true}, output of \emph{all}
   190   hypotheses can be enforced, which is occasionally useful for
   191   diagnostic purposes.
   192 
   193   \item @{ML show_tags} controls printing of extra annotations within
   194   theorems, such as internal position information, or the case names
   195   being attached by the attribute @{attribute case_names}.
   196 
   197   Note that the @{attribute tagged} and @{attribute untagged}
   198   attributes provide low-level access to the collection of tags
   199   associated with a theorem.
   200 
   201   \item @{ML show_question_marks} controls printing of question marks
   202   for schematic variables, such as @{text ?x}.  Only the leading
   203   question mark is affected, the remaining text is unchanged
   204   (including proper markup for schematic variables that might be
   205   relevant for user interfaces).
   206 
   207   \end{description}
   208 *}
   209 
   210 
   211 subsection {* Printing limits *}
   212 
   213 text {*
   214   \begin{mldecls}
   215     @{index_ML Pretty.setdepth: "int -> unit"} \\
   216     @{index_ML Pretty.setmargin: "int -> unit"} \\
   217     @{index_ML print_depth: "int -> unit"} \\
   218   \end{mldecls}
   219 
   220   These ML functions set limits for pretty printed text.
   221 
   222   \begin{description}
   223 
   224   \item @{ML Pretty.setdepth}~@{text d} tells the pretty printer to
   225   limit the printing depth to @{text d}.  This affects the display of
   226   types, terms, theorems etc.  The default value is 0, which permits
   227   printing to an arbitrary depth.  Other useful values for @{text d}
   228   are 10 and 20.
   229 
   230   \item @{ML Pretty.setmargin}~@{text m} tells the pretty printer to
   231   assume a right margin (page width) of @{text m}.  The initial margin
   232   is 76, but user interfaces might adapt the margin automatically when
   233   resizing windows.
   234 
   235   \item @{ML print_depth}~@{text n} limits the printing depth of the
   236   ML toplevel pretty printer; the precise effect depends on the ML
   237   compiler and run-time system.  Typically @{text n} should be less
   238   than 10.  Bigger values such as 100--1000 are useful for debugging.
   239 
   240   \end{description}
   241 *}
   242 
   243 
   244 section {* Mixfix annotations *}
   245 
   246 text {* Mixfix annotations specify concrete \emph{inner syntax} of
   247   Isabelle types and terms.  Some commands such as @{command
   248   "typedecl"} admit infixes only, while @{command "definition"} etc.\
   249   support the full range of general mixfixes and binders.  Fixed
   250   parameters in toplevel theorem statements, locale specifications
   251   also admit mixfix annotations.
   252 
   253   \indexouternonterm{infix}\indexouternonterm{mixfix}\indexouternonterm{structmixfix}
   254   \begin{rail}
   255     infix: '(' ('infix' | 'infixl' | 'infixr') string nat ')'
   256     ;
   257     mixfix: infix | '(' string prios? nat? ')' | '(' 'binder' string prios? nat ')'
   258     ;
   259     structmixfix: mixfix | '(' 'structure' ')'
   260     ;
   261 
   262     prios: '[' (nat + ',') ']'
   263     ;
   264   \end{rail}
   265 
   266   Here the \railtok{string} specifications refer to the actual mixfix
   267   template, which may include literal text, spacing, blocks, and
   268   arguments (denoted by ``@{text _}''); the special symbol
   269   ``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'') represents an index
   270   argument that specifies an implicit structure reference (see also
   271   \secref{sec:locale}).  Infix and binder declarations provide common
   272   abbreviations for particular mixfix declarations.  So in practice,
   273   mixfix templates mostly degenerate to literal text for concrete
   274   syntax, such as ``@{verbatim "++"}'' for an infix symbol.
   275 
   276   \medskip In full generality, mixfix declarations work as follows.
   277   Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is
   278   annotated by @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text
   279   "mixfix"} is a string @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of
   280   delimiters that surround argument positions as indicated by
   281   underscores.
   282 
   283   Altogether this determines a production for a context-free priority
   284   grammar, where for each argument @{text "i"} the syntactic category
   285   is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and
   286   the result category is determined from @{text "\<tau>"} (with
   287   priority @{text "p"}).  Priority specifications are optional, with
   288   default 0 for arguments and 1000 for the result.
   289 
   290   Since @{text "\<tau>"} may be again a function type, the constant
   291   type scheme may have more argument positions than the mixfix
   292   pattern.  Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
   293   @{text "m > n"} works by attaching concrete notation only to the
   294   innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
   295   instead.  If a term has fewer arguments than specified in the mixfix
   296   template, the concrete syntax is ignored.
   297 
   298   \medskip A mixfix template may also contain additional directives
   299   for pretty printing, notably spaces, blocks, and breaks.  The
   300   general template format is a sequence over any of the following
   301   entities.
   302 
   303   \begin{description}
   304 
   305   \item @{text "d"} is a delimiter, namely a non-empty sequence of
   306   characters other than the following special characters:
   307 
   308   \smallskip
   309   \begin{tabular}{ll}
   310     @{verbatim "'"} & single quote \\
   311     @{verbatim "_"} & underscore \\
   312     @{text "\<index>"} & index symbol \\
   313     @{verbatim "("} & open parenthesis \\
   314     @{verbatim ")"} & close parenthesis \\
   315     @{verbatim "/"} & slash \\
   316   \end{tabular}
   317   \medskip
   318 
   319   \item @{verbatim "'"} escapes the special meaning of these
   320   meta-characters, producing a literal version of the following
   321   character, unless that is a blank.
   322 
   323   A single quote followed by a blank separates delimiters, without
   324   affecting printing, but input tokens may have additional white space
   325   here.
   326 
   327   \item @{verbatim "_"} is an argument position, which stands for a
   328   certain syntactic category in the underlying grammar.
   329 
   330   \item @{text "\<index>"} is an indexed argument position; this is the place
   331   where implicit structure arguments can be attached.
   332 
   333   \item @{text "s"} is a non-empty sequence of spaces for printing.
   334   This and the following specifications do not affect parsing at all.
   335 
   336   \item @{verbatim "("}@{text n} opens a pretty printing block.  The
   337   optional number specifies how much indentation to add when a line
   338   break occurs within the block.  If the parenthesis is not followed
   339   by digits, the indentation defaults to 0.  A block specified via
   340   @{verbatim "(00"} is unbreakable.
   341 
   342   \item @{verbatim ")"} closes a pretty printing block.
   343 
   344   \item @{verbatim "//"} forces a line break.
   345 
   346   \item @{verbatim "/"}@{text s} allows a line break.  Here @{text s}
   347   stands for the string of spaces (zero or more) right after the
   348   slash.  These spaces are printed if the break is \emph{not} taken.
   349 
   350   \end{description}
   351 
   352   For example, the template @{verbatim "(_ +/ _)"} specifies an infix
   353   operator.  There are two argument positions; the delimiter
   354   @{verbatim "+"} is preceded by a space and followed by a space or
   355   line break; the entire phrase is a pretty printing block.
   356 
   357   The general idea of pretty printing with blocks and breaks is also
   358   described in \cite{paulson-ml2}.
   359 *}
   360 
   361 
   362 section {* Explicit term notation *}
   363 
   364 text {*
   365   \begin{matharray}{rcll}
   366     @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   367     @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   368   \end{matharray}
   369 
   370   \begin{rail}
   371     ('notation' | 'no\_notation') target? mode? \\ (nameref structmixfix + 'and')
   372     ;
   373   \end{rail}
   374 
   375   \begin{description}
   376 
   377   \item @{command "notation"}~@{text "c (mx)"} associates mixfix
   378   syntax with an existing constant or fixed variable.  This is a
   379   robust interface to the underlying @{command "syntax"} primitive
   380   (\secref{sec:syn-trans}).  Type declaration and internal syntactic
   381   representation of the given entity is retrieved from the context.
   382   
   383   \item @{command "no_notation"} is similar to @{command "notation"},
   384   but removes the specified syntax annotation from the present
   385   context.
   386 
   387   \end{description}
   388 *}
   389 
   390 
   391 section {* The Pure syntax \label{sec:pure-syntax} *}
   392 
   393 subsection {* Priority grammars \label{sec:priority-grammar} *}
   394 
   395 text {* A context-free grammar consists of a set of \emph{terminal
   396   symbols}, a set of \emph{nonterminal symbols} and a set of
   397   \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
   398   where @{text A} is a nonterminal and @{text \<gamma>} is a string of
   399   terminals and nonterminals.  One designated nonterminal is called
   400   the \emph{root symbol}.  The language defined by the grammar
   401   consists of all strings of terminals that can be derived from the
   402   root symbol by applying productions as rewrite rules.
   403 
   404   The standard Isabelle parser for inner syntax uses a \emph{priority
   405   grammar}.  Each nonterminal is decorated by an integer priority:
   406   @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
   407   using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}.  Any
   408   priority grammar can be translated into a normal context-free
   409   grammar by introducing new nonterminals and productions.
   410 
   411   \medskip Formally, a set of context free productions @{text G}
   412   induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows.  Let @{text
   413   \<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
   414   Then @{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"} holds if and only if @{text G}
   415   contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} for @{text "p \<le> q"}.
   416 
   417   \medskip The following grammar for arithmetic expressions
   418   demonstrates how binding power and associativity of operators can be
   419   enforced by priorities.
   420 
   421   \begin{center}
   422   \begin{tabular}{rclr}
   423   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
   424   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
   425   @{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
   426   @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
   427   @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
   428   \end{tabular}
   429   \end{center}
   430   The choice of priorities determines that @{verbatim "-"} binds
   431   tighter than @{verbatim "*"}, which binds tighter than @{verbatim
   432   "+"}.  Furthermore @{verbatim "+"} associates to the left and
   433   @{verbatim "*"} to the right.
   434 
   435   \medskip For clarity, grammars obey these conventions:
   436   \begin{itemize}
   437 
   438   \item All priorities must lie between 0 and 1000.
   439 
   440   \item Priority 0 on the right-hand side and priority 1000 on the
   441   left-hand side may be omitted.
   442 
   443   \item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
   444   (p)"}, i.e.\ the priority of the left-hand side actually appears in
   445   a column on the far right.
   446 
   447   \item Alternatives are separated by @{text "|"}.
   448 
   449   \item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
   450   but obvious way.
   451 
   452   \end{itemize}
   453 
   454   Using these conventions, the example grammar specification above
   455   takes the form:
   456   \begin{center}
   457   \begin{tabular}{rclc}
   458     @{text A} & @{text "="} & @{verbatim "("} @{text A} @{verbatim ")"} \\
   459               & @{text "|"} & @{verbatim 0} & \qquad\qquad \\
   460               & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
   461               & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   462               & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   463   \end{tabular}
   464   \end{center}
   465 *}
   466 
   467 
   468 subsection {* The Pure grammar *}
   469 
   470 text {*
   471   The priority grammar of the @{text "Pure"} theory is defined as follows:
   472 
   473   %FIXME syntax for "index" (?)
   474   %FIXME "op" versions of ==> etc. (?)
   475 
   476   \begin{center}
   477   \begin{supertabular}{rclr}
   478 
   479   @{syntax_def (inner) any} & = & @{text "prop  |  logic"} \\\\
   480 
   481   @{syntax_def (inner) prop} & = & @{verbatim "("} @{text prop} @{verbatim ")"} \\
   482     & @{text "|"} & @{text "prop\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   483     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=?="} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   484     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=="} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   485     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{text "\<equiv>"} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   486     & @{text "|"} & @{text "prop\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "&&&"} @{text "prop\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   487     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   488     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   489     & @{text "|"} & @{verbatim "[|"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{verbatim "|]"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   490     & @{text "|"} & @{text "\<lbrakk>"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{text "\<rbrakk>"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   491     & @{text "|"} & @{verbatim "!!"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   492     & @{text "|"} & @{text "\<And>"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   493     & @{text "|"} & @{verbatim OFCLASS} @{verbatim "("} @{text type} @{verbatim ","} @{text logic} @{verbatim ")"} \\
   494     & @{text "|"} & @{verbatim SORT_CONSTRAINT} @{verbatim "("} @{text type} @{verbatim ")"} \\
   495     & @{text "|"} & @{verbatim TERM} @{text logic} \\
   496     & @{text "|"} & @{verbatim PROP} @{text aprop} \\\\
   497 
   498   @{syntax_def (inner) aprop} & = & @{verbatim "("} @{text aprop} @{verbatim ")"} \\
   499     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   500     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   501     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\\\
   502 
   503   @{syntax_def (inner) logic} & = & @{verbatim "("} @{text logic} @{verbatim ")"} \\
   504     & @{text "|"} & @{text "logic\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   505     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   506     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   507     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\
   508     & @{text "|"} & @{verbatim "%"} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   509     & @{text "|"} & @{text \<lambda>} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   510     & @{text "|"} & @{verbatim TYPE} @{verbatim "("} @{text type} @{verbatim ")"} \\\\
   511 
   512   @{syntax_def (inner) idt} & = & @{verbatim "("} @{text idt} @{verbatim ")"}@{text "  |  id  |  "}@{verbatim "_"} \\
   513     & @{text "|"} & @{text id} @{verbatim "::"} @{text type} & @{text "(0)"} \\
   514     & @{text "|"} & @{verbatim "_"} @{verbatim "::"} @{text type} & @{text "(0)"} \\\\
   515 
   516   @{syntax_def (inner) idts} & = & @{text "idt  |  idt\<^sup>(\<^sup>1\<^sup>) idts"} & @{text "(0)"} \\\\
   517 
   518   @{syntax_def (inner) pttrn} & = & @{text idt} \\\\
   519 
   520   @{syntax_def (inner) pttrns} & = & @{text "pttrn  |  pttrn\<^sup>(\<^sup>1\<^sup>) pttrns"} & @{text "(0)"} \\\\
   521 
   522   @{syntax_def (inner) type} & = & @{verbatim "("} @{text type} @{verbatim ")"} \\
   523     & @{text "|"} & @{text "tid  |  tvar  |  "}@{verbatim "_"} \\
   524     & @{text "|"} & @{text "tid"} @{verbatim "::"} @{text "sort  |  tvar  "}@{verbatim "::"} @{text "sort  |  "}@{verbatim "_"} @{verbatim "::"} @{text "sort"} \\
   525     & @{text "|"} & @{text "id  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) id  |  "}@{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text id} \\
   526     & @{text "|"} & @{text "longid  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) longid"} \\
   527     & @{text "|"} & @{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text longid} \\
   528     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   529     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
   530     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   531     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\\\
   532 
   533   @{syntax_def (inner) sort} & = & @{text "id  |  longid  |  "}@{verbatim "{}"} \\
   534     & @{text "|"} & @{verbatim "{"} @{text "(id | longid)"} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text "(id | longid)"} @{verbatim "}"} \\
   535   \end{supertabular}
   536   \end{center}
   537 
   538   \medskip Here literal terminals are printed @{verbatim "verbatim"};
   539   see also \secref{sec:inner-lex} for further token categories of the
   540   inner syntax.  The meaning of the nonterminals defined by the above
   541   grammar is as follows:
   542 
   543   \begin{description}
   544 
   545   \item @{syntax_ref (inner) any} denotes any term.
   546 
   547   \item @{syntax_ref (inner) prop} denotes meta-level propositions,
   548   which are terms of type @{typ prop}.  The syntax of such formulae of
   549   the meta-logic is carefully distinguished from usual conventions for
   550   object-logics.  In particular, plain @{text "\<lambda>"}-term notation is
   551   \emph{not} recognized as @{syntax (inner) prop}.
   552 
   553   \item @{syntax_ref (inner) aprop} denotes atomic propositions, which
   554   are embedded into regular @{syntax (inner) prop} by means of an
   555   explicit @{verbatim PROP} token.
   556 
   557   Terms of type @{typ prop} with non-constant head, e.g.\ a plain
   558   variable, are printed in this form.  Constants that yield type @{typ
   559   prop} are expected to provide their own concrete syntax; otherwise
   560   the printed version will appear like @{syntax (inner) logic} and
   561   cannot be parsed again as @{syntax (inner) prop}.
   562 
   563   \item @{syntax_ref (inner) logic} denotes arbitrary terms of a
   564   logical type, excluding type @{typ prop}.  This is the main
   565   syntactic category of object-logic entities, covering plain @{text
   566   \<lambda>}-term notation (variables, abstraction, application), plus
   567   anything defined by the user.
   568 
   569   When specifying notation for logical entities, all logical types
   570   (excluding @{typ prop}) are \emph{collapsed} to this single category
   571   of @{syntax (inner) logic}.
   572 
   573   \item @{syntax_ref (inner) idt} denotes identifiers, possibly
   574   constrained by types.
   575 
   576   \item @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref
   577   (inner) idt}.  This is the most basic category for variables in
   578   iterated binders, such as @{text "\<lambda>"} or @{text "\<And>"}.
   579 
   580   \item @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns}
   581   denote patterns for abstraction, cases bindings etc.  In Pure, these
   582   categories start as a merely copy of @{syntax (inner) idt} and
   583   @{syntax (inner) idts}, respectively.  Object-logics may add
   584   additional productions for binding forms.
   585 
   586   \item @{syntax_ref (inner) type} denotes types of the meta-logic.
   587 
   588   \item @{syntax_ref (inner) sort} denotes meta-level sorts.
   589 
   590   \end{description}
   591 
   592   Here are some further explanations of certain syntax features.
   593 
   594   \begin{itemize}
   595 
   596   \item In @{syntax (inner) idts}, note that @{text "x :: nat y"} is
   597   parsed as @{text "x :: (nat y)"}, treating @{text y} like a type
   598   constructor applied to @{text nat}.  To avoid this interpretation,
   599   write @{text "(x :: nat) y"} with explicit parentheses.
   600 
   601   \item Similarly, @{text "x :: nat y :: nat"} is parsed as @{text "x ::
   602   (nat y :: nat)"}.  The correct form is @{text "(x :: nat) (y ::
   603   nat)"}, or @{text "(x :: nat) y :: nat"} if @{text y} is last in the
   604   sequence of identifiers.
   605 
   606   \item Type constraints for terms bind very weakly.  For example,
   607   @{text "x < y :: nat"} is normally parsed as @{text "(x < y) ::
   608   nat"}, unless @{text "<"} has a very low priority, in which case the
   609   input is likely to be ambiguous.  The correct form is @{text "x < (y
   610   :: nat)"}.
   611 
   612   \item Constraints may be either written with two literal colons
   613   ``@{verbatim "::"}'' or the double-colon symbol @{verbatim "\<Colon>"},
   614   which actually looks exactly the same in some {\LaTeX} styles.
   615 
   616   \item Dummy variables (written as underscore) may occur in different
   617   roles.
   618 
   619   \begin{description}
   620 
   621   \item A type ``@{text "_"}'' or ``@{text "_ :: sort"}'' acts like an
   622   anonymous inference parameter, which is filled-in according to the
   623   most general type produced by the type-checking phase.
   624 
   625   \item A bound ``@{text "_"}'' refers to a vacuous abstraction, where
   626   the body does not refer to the binding introduced here.  As in the
   627   term @{term "\<lambda>x _. x"}, which is @{text "\<alpha>"}-equivalent to @{text
   628   "\<lambda>x y. x"}.
   629 
   630   \item A free ``@{text "_"}'' refers to an implicit outer binding.
   631   Higher definitional packages usually allow forms like @{text "f x _
   632   = x"}.
   633 
   634   \item A schematic ``@{text "_"}'' (within a term pattern, see
   635   \secref{sec:term-decls}) refers to an anonymous variable that is
   636   implicitly abstracted over its context of locally bound variables.
   637   For example, this allows pattern matching of @{text "{x. f x = g
   638   x}"} against @{text "{x. _ = _}"}, or even @{text "{_. _ = _}"} by
   639   using both bound and schematic dummies.
   640 
   641   \end{description}
   642 
   643   \item The three literal dots ``@{verbatim "..."}'' may be also
   644   written as ellipsis symbol @{verbatim "\<dots>"}.  In both cases this
   645   refers to a special schematic variable, which is bound in the
   646   context.  This special term abbreviation works nicely with
   647   calculational reasoning (\secref{sec:calculation}).
   648 
   649   \end{itemize}
   650 *}
   651 
   652 
   653 section {* Lexical matters \label{sec:inner-lex} *}
   654 
   655 text {* The inner lexical syntax vaguely resembles the outer one
   656   (\secref{sec:outer-lex}), but some details are different.  There are
   657   two main categories of inner syntax tokens:
   658 
   659   \begin{enumerate}
   660 
   661   \item \emph{delimiters} --- the literal tokens occurring in
   662   productions of the given priority grammar (cf.\
   663   \secref{sec:priority-grammar});
   664 
   665   \item \emph{named tokens} --- various categories of identifiers etc.
   666 
   667   \end{enumerate}
   668 
   669   Delimiters override named tokens and may thus render certain
   670   identifiers inaccessible.  Sometimes the logical context admits
   671   alternative ways to refer to the same entity, potentially via
   672   qualified names.
   673 
   674   \medskip The categories for named tokens are defined once and for
   675   all as follows, reusing some categories of the outer token syntax
   676   (\secref{sec:outer-lex}).
   677 
   678   \begin{center}
   679   \begin{supertabular}{rcl}
   680     @{syntax_def (inner) id} & = & @{syntax_ref ident} \\
   681     @{syntax_def (inner) longid} & = & @{syntax_ref longident} \\
   682     @{syntax_def (inner) var} & = & @{syntax_ref var} \\
   683     @{syntax_def (inner) tid} & = & @{syntax_ref typefree} \\
   684     @{syntax_def (inner) tvar} & = & @{syntax_ref typevar} \\
   685     @{syntax_def (inner) num} & = & @{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat} \\
   686     @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
   687     @{syntax_def (inner) xnum} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
   688 
   689     @{syntax_def (inner) xstr} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
   690   \end{supertabular}
   691   \end{center}
   692 
   693   The token categories @{syntax (inner) num}, @{syntax (inner)
   694   float_token}, @{syntax (inner) xnum}, and @{syntax (inner) xstr} are
   695   not used in Pure.  Object-logics may implement numerals and string
   696   constants by adding appropriate syntax declarations, together with
   697   some translation functions (e.g.\ see Isabelle/HOL).
   698 
   699   The derived categories @{syntax_def (inner) num_const} and
   700   @{syntax_def (inner) float_const} provide robust access to @{syntax
   701   (inner) num}, and @{syntax (inner) float_token}, respectively: the
   702   syntax tree holds a syntactic constant instead of a free variable.
   703 *}
   704 
   705 
   706 section {* Syntax and translations \label{sec:syn-trans} *}
   707 
   708 text {*
   709   \begin{matharray}{rcl}
   710     @{command_def "nonterminals"} & : & @{text "theory \<rightarrow> theory"} \\
   711     @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
   712     @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
   713     @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
   714     @{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
   715   \end{matharray}
   716 
   717   \begin{rail}
   718     'nonterminals' (name +)
   719     ;
   720     ('syntax' | 'no\_syntax') mode? (constdecl +)
   721     ;
   722     ('translations' | 'no\_translations') (transpat ('==' | '=>' | '<=' | rightleftharpoons | rightharpoonup | leftharpoondown) transpat +)
   723     ;
   724 
   725     mode: ('(' ( name | 'output' | name 'output' ) ')')
   726     ;
   727     transpat: ('(' nameref ')')? string
   728     ;
   729   \end{rail}
   730 
   731   \begin{description}
   732   
   733   \item @{command "nonterminals"}~@{text c} declares a type
   734   constructor @{text c} (without arguments) to act as purely syntactic
   735   type: a nonterminal symbol of the inner syntax.
   736 
   737   \item @{command "syntax"}~@{text "(mode) decls"} is similar to
   738   @{command "consts"}~@{text decls}, except that the actual logical
   739   signature extension is omitted.  Thus the context free grammar of
   740   Isabelle's inner syntax may be augmented in arbitrary ways,
   741   independently of the logic.  The @{text mode} argument refers to the
   742   print mode that the grammar rules belong; unless the @{keyword_ref
   743   "output"} indicator is given, all productions are added both to the
   744   input and output grammar.
   745   
   746   \item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
   747   declarations (and translations) resulting from @{text decls}, which
   748   are interpreted in the same manner as for @{command "syntax"} above.
   749   
   750   \item @{command "translations"}~@{text rules} specifies syntactic
   751   translation rules (i.e.\ macros): parse~/ print rules (@{text "\<rightleftharpoons>"}),
   752   parse rules (@{text "\<rightharpoonup>"}), or print rules (@{text "\<leftharpoondown>"}).
   753   Translation patterns may be prefixed by the syntactic category to be
   754   used for parsing; the default is @{text logic}.
   755   
   756   \item @{command "no_translations"}~@{text rules} removes syntactic
   757   translation rules, which are interpreted in the same manner as for
   758   @{command "translations"} above.
   759 
   760   \end{description}
   761 *}
   762 
   763 
   764 section {* Syntax translation functions \label{sec:tr-funs} *}
   765 
   766 text {*
   767   \begin{matharray}{rcl}
   768     @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   769     @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   770     @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   771     @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   772     @{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   773   \end{matharray}
   774 
   775   \begin{rail}
   776   ( 'parse\_ast\_translation' | 'parse\_translation' | 'print\_translation' |
   777     'typed\_print\_translation' | 'print\_ast\_translation' ) ('(advanced)')? text
   778   ;
   779   \end{rail}
   780 
   781   Syntax translation functions written in ML admit almost arbitrary
   782   manipulations of Isabelle's inner syntax.  Any of the above commands
   783   have a single \railqtok{text} argument that refers to an ML
   784   expression of appropriate type, which are as follows by default:
   785 
   786 %FIXME proper antiquotations
   787 \begin{ttbox}
   788 val parse_ast_translation   : (string * (ast list -> ast)) list
   789 val parse_translation       : (string * (term list -> term)) list
   790 val print_translation       : (string * (term list -> term)) list
   791 val typed_print_translation :
   792   (string * (bool -> typ -> term list -> term)) list
   793 val print_ast_translation   : (string * (ast list -> ast)) list
   794 \end{ttbox}
   795 
   796   If the @{text "(advanced)"} option is given, the corresponding
   797   translation functions may depend on the current theory or proof
   798   context.  This allows to implement advanced syntax mechanisms, as
   799   translations functions may refer to specific theory declarations or
   800   auxiliary proof data.
   801 
   802   See also \cite{isabelle-ref} for more information on the general
   803   concept of syntax transformations in Isabelle.
   804 
   805 %FIXME proper antiquotations
   806 \begin{ttbox}
   807 val parse_ast_translation:
   808   (string * (Proof.context -> ast list -> ast)) list
   809 val parse_translation:
   810   (string * (Proof.context -> term list -> term)) list
   811 val print_translation:
   812   (string * (Proof.context -> term list -> term)) list
   813 val typed_print_translation:
   814   (string * (Proof.context -> bool -> typ -> term list -> term)) list
   815 val print_ast_translation:
   816   (string * (Proof.context -> ast list -> ast)) list
   817 \end{ttbox}
   818 *}
   819 
   820 
   821 section {* Inspecting the syntax *}
   822 
   823 text {*
   824   \begin{matharray}{rcl}
   825     @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   826   \end{matharray}
   827 
   828   \begin{description}
   829 
   830   \item @{command "print_syntax"} prints the inner syntax of the
   831   current context.  The output can be quite large; the most important
   832   sections are explained below.
   833 
   834   \begin{description}
   835 
   836   \item @{text "lexicon"} lists the delimiters of the inner token
   837   language; see \secref{sec:inner-lex}.
   838 
   839   \item @{text "prods"} lists the productions of the underlying
   840   priority grammar; see \secref{sec:priority-grammar}.
   841 
   842   The nonterminal @{text "A\<^sup>(\<^sup>p\<^sup>)"} is rendered in plain text as @{text
   843   "A[p]"}; delimiters are quoted.  Many productions have an extra
   844   @{text "\<dots> => name"}.  These names later become the heads of parse
   845   trees; they also guide the pretty printer.
   846 
   847   Productions without such parse tree names are called \emph{copy
   848   productions}.  Their right-hand side must have exactly one
   849   nonterminal symbol (or named token).  The parser does not create a
   850   new parse tree node for copy productions, but simply returns the
   851   parse tree of the right-hand symbol.
   852 
   853   If the right-hand side of a copy production consists of a single
   854   nonterminal without any delimiters, then it is called a \emph{chain
   855   production}.  Chain productions act as abbreviations: conceptually,
   856   they are removed from the grammar by adding new productions.
   857   Priority information attached to chain productions is ignored; only
   858   the dummy value @{text "-1"} is displayed.
   859 
   860   \item @{text "print modes"} lists the alternative print modes
   861   provided by this grammar; see \secref{sec:print-modes}.
   862 
   863   \item @{text "parse_rules"} and @{text "print_rules"} relate to
   864   syntax translations (macros); see \secref{sec:syn-trans}.
   865 
   866   \item @{text "parse_ast_translation"} and @{text
   867   "print_ast_translation"} list sets of constants that invoke
   868   translation functions for abstract syntax trees, which are only
   869   required in very special situations; see \secref{sec:tr-funs}.
   870 
   871   \item @{text "parse_translation"} and @{text "print_translation"}
   872   list the sets of constants that invoke regular translation
   873   functions; see \secref{sec:tr-funs}.
   874 
   875   \end{description}
   876   
   877   \end{description}
   878 *}
   879 
   880 end