doc-src/IsarRef/Thy/Inner_Syntax.thy
author wenzelm
Wed, 24 Feb 2010 20:37:01 +0100
changeset 35352 7425aece4ee3
parent 32833 f3716d1a2e48
child 35418 4c7cba1f7ce9
permissions -rw-r--r--
allow general mixfix syntax for type constructors;
     1 theory Inner_Syntax
     2 imports Main
     3 begin
     4 
     5 chapter {* Inner syntax --- the term language \label{ch:inner-syntax} *}
     6 
     7 section {* Printing logical entities *}
     8 
     9 subsection {* Diagnostic commands *}
    10 
    11 text {*
    12   \begin{matharray}{rcl}
    13     @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    14     @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    15     @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    16     @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    17     @{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    18     @{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    19     @{command_def "pr"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
    20   \end{matharray}
    21 
    22   These diagnostic commands assist interactive development by printing
    23   internal logical entities in a human-readable fashion.
    24 
    25   \begin{rail}
    26     'typ' modes? type
    27     ;
    28     'term' modes? term
    29     ;
    30     'prop' modes? prop
    31     ;
    32     'thm' modes? thmrefs
    33     ;
    34     ( 'prf' | 'full\_prf' ) modes? thmrefs?
    35     ;
    36     'pr' modes? nat? (',' nat)?
    37     ;
    38 
    39     modes: '(' (name + ) ')'
    40     ;
    41   \end{rail}
    42 
    43   \begin{description}
    44 
    45   \item @{command "typ"}~@{text \<tau>} reads and prints types of the
    46   meta-logic according to the current theory or proof context.
    47 
    48   \item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
    49   read, type-check and print terms or propositions according to the
    50   current theory or proof context; the inferred type of @{text t} is
    51   output as well.  Note that these commands are also useful in
    52   inspecting the current environment of term abbreviations.
    53 
    54   \item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
    55   theorems from the current theory or proof context.  Note that any
    56   attributes included in the theorem specifications are applied to a
    57   temporary context derived from the current theory or proof; the
    58   result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
    59   \<dots>, a\<^sub>n"} do not have any permanent effect.
    60 
    61   \item @{command "prf"} displays the (compact) proof term of the
    62   current proof state (if present), or of the given theorems. Note
    63   that this requires proof terms to be switched on for the current
    64   object logic (see the ``Proof terms'' section of the Isabelle
    65   reference manual for information on how to do this).
    66 
    67   \item @{command "full_prf"} is like @{command "prf"}, but displays
    68   the full proof term, i.e.\ also displays information omitted in the
    69   compact proof term, which is denoted by ``@{text _}'' placeholders
    70   there.
    71 
    72   \item @{command "pr"}~@{text "goals, prems"} prints the current
    73   proof state (if present), including the proof context, current facts
    74   and goals.  The optional limit arguments affect the number of goals
    75   and premises to be displayed, which is initially 10 for both.
    76   Omitting limit values leaves the current setting unchanged.
    77 
    78   \end{description}
    79 
    80   All of the diagnostic commands above admit a list of @{text modes}
    81   to be specified, which is appended to the current print mode (see
    82   also \cite{isabelle-ref}).  Thus the output behavior may be modified
    83   according particular print mode features.  For example, @{command
    84   "pr"}~@{text "(latex xsymbols)"} would print the current proof state
    85   with mathematical symbols and special characters represented in
    86   {\LaTeX} source, according to the Isabelle style
    87   \cite{isabelle-sys}.
    88 
    89   Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
    90   systematic way to include formal items into the printed text
    91   document.
    92 *}
    93 
    94 
    95 subsection {* Details of printed content *}
    96 
    97 text {*
    98   \begin{mldecls} 
    99     @{index_ML show_types: "bool Unsynchronized.ref"} & default @{ML false} \\
   100     @{index_ML show_sorts: "bool Unsynchronized.ref"} & default @{ML false} \\
   101     @{index_ML show_consts: "bool Unsynchronized.ref"} & default @{ML false} \\
   102     @{index_ML long_names: "bool Unsynchronized.ref"} & default @{ML false} \\
   103     @{index_ML short_names: "bool Unsynchronized.ref"} & default @{ML false} \\
   104     @{index_ML unique_names: "bool Unsynchronized.ref"} & default @{ML true} \\
   105     @{index_ML show_brackets: "bool Unsynchronized.ref"} & default @{ML false} \\
   106     @{index_ML eta_contract: "bool Unsynchronized.ref"} & default @{ML true} \\
   107     @{index_ML goals_limit: "int Unsynchronized.ref"} & default @{ML 10} \\
   108     @{index_ML Proof.show_main_goal: "bool Unsynchronized.ref"} & default @{ML false} \\
   109     @{index_ML show_hyps: "bool Unsynchronized.ref"} & default @{ML false} \\
   110     @{index_ML show_tags: "bool Unsynchronized.ref"} & default @{ML false} \\
   111     @{index_ML show_question_marks: "bool Unsynchronized.ref"} & default @{ML true} \\
   112   \end{mldecls}
   113 
   114   These global ML variables control the detail of information that is
   115   displayed for types, terms, theorems, goals etc.
   116 
   117   In interactive sessions, the user interface usually manages these
   118   global parameters of the Isabelle process, even with some concept of
   119   persistence.  Nonetheless it is occasionally useful to manipulate ML
   120   variables directly, e.g.\ using @{command "ML_val"} or @{command
   121   "ML_command"}.
   122 
   123   Batch-mode logic sessions may be configured by putting appropriate
   124   ML text directly into the @{verbatim ROOT.ML} file.
   125 
   126   \begin{description}
   127 
   128   \item @{ML show_types} and @{ML show_sorts} control printing of type
   129   constraints for term variables, and sort constraints for type
   130   variables.  By default, neither of these are shown in output.  If
   131   @{ML show_sorts} is set to @{ML true}, types are always shown as
   132   well.
   133 
   134   Note that displaying types and sorts may explain why a polymorphic
   135   inference rule fails to resolve with some goal, or why a rewrite
   136   rule does not apply as expected.
   137 
   138   \item @{ML show_consts} controls printing of types of constants when
   139   displaying a goal state.
   140 
   141   Note that the output can be enormous, because polymorphic constants
   142   often occur at several different type instances.
   143 
   144   \item @{ML long_names}, @{ML short_names}, and @{ML unique_names}
   145   control the way of printing fully qualified internal names in
   146   external form.  See also \secref{sec:antiq} for the document
   147   antiquotation options of the same names.
   148 
   149   \item @{ML show_brackets} controls bracketing in pretty printed
   150   output.  If set to @{ML true}, all sub-expressions of the pretty
   151   printing tree will be parenthesized, even if this produces malformed
   152   term syntax!  This crude way of showing the internal structure of
   153   pretty printed entities may occasionally help to diagnose problems
   154   with operator priorities, for example.
   155 
   156   \item @{ML eta_contract} controls @{text "\<eta>"}-contracted printing of
   157   terms.
   158 
   159   The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
   160   provided @{text x} is not free in @{text f}.  It asserts
   161   \emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
   162   g x"} for all @{text x}.  Higher-order unification frequently puts
   163   terms into a fully @{text \<eta>}-expanded form.  For example, if @{text
   164   F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
   165   "\<lambda>h. F (\<lambda>x. h x)"}.
   166 
   167   Setting @{ML eta_contract} makes Isabelle perform @{text
   168   \<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
   169   appears simply as @{text F}.
   170 
   171   Note that the distinction between a term and its @{text \<eta>}-expanded
   172   form occasionally matters.  While higher-order resolution and
   173   rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
   174   might look at terms more discretely.
   175 
   176   \item @{ML goals_limit} controls the maximum number of subgoals to
   177   be shown in goal output.
   178 
   179   \item @{ML Proof.show_main_goal} controls whether the main result to
   180   be proven should be displayed.  This information might be relevant
   181   for schematic goals, to inspect the current claim that has been
   182   synthesized so far.
   183 
   184   \item @{ML show_hyps} controls printing of implicit hypotheses of
   185   local facts.  Normally, only those hypotheses are displayed that are
   186   \emph{not} covered by the assumptions of the current context: this
   187   situation indicates a fault in some tool being used.
   188 
   189   By setting @{ML show_hyps} to @{ML true}, output of \emph{all}
   190   hypotheses can be enforced, which is occasionally useful for
   191   diagnostic purposes.
   192 
   193   \item @{ML show_tags} controls printing of extra annotations within
   194   theorems, such as internal position information, or the case names
   195   being attached by the attribute @{attribute case_names}.
   196 
   197   Note that the @{attribute tagged} and @{attribute untagged}
   198   attributes provide low-level access to the collection of tags
   199   associated with a theorem.
   200 
   201   \item @{ML show_question_marks} controls printing of question marks
   202   for schematic variables, such as @{text ?x}.  Only the leading
   203   question mark is affected, the remaining text is unchanged
   204   (including proper markup for schematic variables that might be
   205   relevant for user interfaces).
   206 
   207   \end{description}
   208 *}
   209 
   210 
   211 subsection {* Printing limits *}
   212 
   213 text {*
   214   \begin{mldecls}
   215     @{index_ML Pretty.setdepth: "int -> unit"} \\
   216     @{index_ML Pretty.setmargin: "int -> unit"} \\
   217     @{index_ML print_depth: "int -> unit"} \\
   218   \end{mldecls}
   219 
   220   These ML functions set limits for pretty printed text.
   221 
   222   \begin{description}
   223 
   224   \item @{ML Pretty.setdepth}~@{text d} tells the pretty printer to
   225   limit the printing depth to @{text d}.  This affects the display of
   226   types, terms, theorems etc.  The default value is 0, which permits
   227   printing to an arbitrary depth.  Other useful values for @{text d}
   228   are 10 and 20.
   229 
   230   \item @{ML Pretty.setmargin}~@{text m} tells the pretty printer to
   231   assume a right margin (page width) of @{text m}.  The initial margin
   232   is 76, but user interfaces might adapt the margin automatically when
   233   resizing windows.
   234 
   235   \item @{ML print_depth}~@{text n} limits the printing depth of the
   236   ML toplevel pretty printer; the precise effect depends on the ML
   237   compiler and run-time system.  Typically @{text n} should be less
   238   than 10.  Bigger values such as 100--1000 are useful for debugging.
   239 
   240   \end{description}
   241 *}
   242 
   243 
   244 section {* Mixfix annotations *}
   245 
   246 text {* Mixfix annotations specify concrete \emph{inner syntax} of
   247   Isabelle types and terms.  Locally fixed parameters in toplevel
   248   theorem statements, locale specifications etc.\ also admit mixfix
   249   annotations.
   250 
   251   \indexouternonterm{infix}\indexouternonterm{mixfix}\indexouternonterm{structmixfix}
   252   \begin{rail}
   253     infix: '(' ('infix' | 'infixl' | 'infixr') string nat ')'
   254     ;
   255     mixfix: infix | '(' string prios? nat? ')' | '(' 'binder' string prios? nat ')'
   256     ;
   257     structmixfix: mixfix | '(' 'structure' ')'
   258     ;
   259 
   260     prios: '[' (nat + ',') ']'
   261     ;
   262   \end{rail}
   263 
   264   Here the \railtok{string} specifications refer to the actual mixfix
   265   template, which may include literal text, spacing, blocks, and
   266   arguments (denoted by ``@{text _}''); the special symbol
   267   ``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'') represents an index
   268   argument that specifies an implicit structure reference (see also
   269   \secref{sec:locale}).  Infix and binder declarations provide common
   270   abbreviations for particular mixfix declarations.  So in practice,
   271   mixfix templates mostly degenerate to literal text for concrete
   272   syntax, such as ``@{verbatim "++"}'' for an infix symbol.
   273 
   274   \medskip In full generality, mixfix declarations work as follows.
   275   Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is
   276   annotated by @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text
   277   "mixfix"} is a string @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of
   278   delimiters that surround argument positions as indicated by
   279   underscores.
   280 
   281   Altogether this determines a production for a context-free priority
   282   grammar, where for each argument @{text "i"} the syntactic category
   283   is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and
   284   the result category is determined from @{text "\<tau>"} (with
   285   priority @{text "p"}).  Priority specifications are optional, with
   286   default 0 for arguments and 1000 for the result.
   287 
   288   Since @{text "\<tau>"} may be again a function type, the constant
   289   type scheme may have more argument positions than the mixfix
   290   pattern.  Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
   291   @{text "m > n"} works by attaching concrete notation only to the
   292   innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
   293   instead.  If a term has fewer arguments than specified in the mixfix
   294   template, the concrete syntax is ignored.
   295 
   296   \medskip A mixfix template may also contain additional directives
   297   for pretty printing, notably spaces, blocks, and breaks.  The
   298   general template format is a sequence over any of the following
   299   entities.
   300 
   301   \begin{description}
   302 
   303   \item @{text "d"} is a delimiter, namely a non-empty sequence of
   304   characters other than the following special characters:
   305 
   306   \smallskip
   307   \begin{tabular}{ll}
   308     @{verbatim "'"} & single quote \\
   309     @{verbatim "_"} & underscore \\
   310     @{text "\<index>"} & index symbol \\
   311     @{verbatim "("} & open parenthesis \\
   312     @{verbatim ")"} & close parenthesis \\
   313     @{verbatim "/"} & slash \\
   314   \end{tabular}
   315   \medskip
   316 
   317   \item @{verbatim "'"} escapes the special meaning of these
   318   meta-characters, producing a literal version of the following
   319   character, unless that is a blank.
   320 
   321   A single quote followed by a blank separates delimiters, without
   322   affecting printing, but input tokens may have additional white space
   323   here.
   324 
   325   \item @{verbatim "_"} is an argument position, which stands for a
   326   certain syntactic category in the underlying grammar.
   327 
   328   \item @{text "\<index>"} is an indexed argument position; this is the place
   329   where implicit structure arguments can be attached.
   330 
   331   \item @{text "s"} is a non-empty sequence of spaces for printing.
   332   This and the following specifications do not affect parsing at all.
   333 
   334   \item @{verbatim "("}@{text n} opens a pretty printing block.  The
   335   optional number specifies how much indentation to add when a line
   336   break occurs within the block.  If the parenthesis is not followed
   337   by digits, the indentation defaults to 0.  A block specified via
   338   @{verbatim "(00"} is unbreakable.
   339 
   340   \item @{verbatim ")"} closes a pretty printing block.
   341 
   342   \item @{verbatim "//"} forces a line break.
   343 
   344   \item @{verbatim "/"}@{text s} allows a line break.  Here @{text s}
   345   stands for the string of spaces (zero or more) right after the
   346   slash.  These spaces are printed if the break is \emph{not} taken.
   347 
   348   \end{description}
   349 
   350   For example, the template @{verbatim "(_ +/ _)"} specifies an infix
   351   operator.  There are two argument positions; the delimiter
   352   @{verbatim "+"} is preceded by a space and followed by a space or
   353   line break; the entire phrase is a pretty printing block.
   354 
   355   The general idea of pretty printing with blocks and breaks is also
   356   described in \cite{paulson-ml2}.
   357 *}
   358 
   359 
   360 section {* Explicit term notation *}
   361 
   362 text {*
   363   \begin{matharray}{rcll}
   364     @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   365     @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   366   \end{matharray}
   367 
   368   \begin{rail}
   369     ('notation' | 'no\_notation') target? mode? \\ (nameref structmixfix + 'and')
   370     ;
   371   \end{rail}
   372 
   373   \begin{description}
   374 
   375   \item @{command "notation"}~@{text "c (mx)"} associates mixfix
   376   syntax with an existing constant or fixed variable.  This is a
   377   robust interface to the underlying @{command "syntax"} primitive
   378   (\secref{sec:syn-trans}).  Type declaration and internal syntactic
   379   representation of the given entity is retrieved from the context.
   380   
   381   \item @{command "no_notation"} is similar to @{command "notation"},
   382   but removes the specified syntax annotation from the present
   383   context.
   384 
   385   \end{description}
   386 *}
   387 
   388 
   389 section {* The Pure syntax \label{sec:pure-syntax} *}
   390 
   391 subsection {* Priority grammars \label{sec:priority-grammar} *}
   392 
   393 text {* A context-free grammar consists of a set of \emph{terminal
   394   symbols}, a set of \emph{nonterminal symbols} and a set of
   395   \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
   396   where @{text A} is a nonterminal and @{text \<gamma>} is a string of
   397   terminals and nonterminals.  One designated nonterminal is called
   398   the \emph{root symbol}.  The language defined by the grammar
   399   consists of all strings of terminals that can be derived from the
   400   root symbol by applying productions as rewrite rules.
   401 
   402   The standard Isabelle parser for inner syntax uses a \emph{priority
   403   grammar}.  Each nonterminal is decorated by an integer priority:
   404   @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
   405   using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}.  Any
   406   priority grammar can be translated into a normal context-free
   407   grammar by introducing new nonterminals and productions.
   408 
   409   \medskip Formally, a set of context free productions @{text G}
   410   induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows.  Let @{text
   411   \<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
   412   Then @{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"} holds if and only if @{text G}
   413   contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} for @{text "p \<le> q"}.
   414 
   415   \medskip The following grammar for arithmetic expressions
   416   demonstrates how binding power and associativity of operators can be
   417   enforced by priorities.
   418 
   419   \begin{center}
   420   \begin{tabular}{rclr}
   421   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
   422   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
   423   @{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
   424   @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
   425   @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
   426   \end{tabular}
   427   \end{center}
   428   The choice of priorities determines that @{verbatim "-"} binds
   429   tighter than @{verbatim "*"}, which binds tighter than @{verbatim
   430   "+"}.  Furthermore @{verbatim "+"} associates to the left and
   431   @{verbatim "*"} to the right.
   432 
   433   \medskip For clarity, grammars obey these conventions:
   434   \begin{itemize}
   435 
   436   \item All priorities must lie between 0 and 1000.
   437 
   438   \item Priority 0 on the right-hand side and priority 1000 on the
   439   left-hand side may be omitted.
   440 
   441   \item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
   442   (p)"}, i.e.\ the priority of the left-hand side actually appears in
   443   a column on the far right.
   444 
   445   \item Alternatives are separated by @{text "|"}.
   446 
   447   \item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
   448   but obvious way.
   449 
   450   \end{itemize}
   451 
   452   Using these conventions, the example grammar specification above
   453   takes the form:
   454   \begin{center}
   455   \begin{tabular}{rclc}
   456     @{text A} & @{text "="} & @{verbatim "("} @{text A} @{verbatim ")"} \\
   457               & @{text "|"} & @{verbatim 0} & \qquad\qquad \\
   458               & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
   459               & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   460               & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   461   \end{tabular}
   462   \end{center}
   463 *}
   464 
   465 
   466 subsection {* The Pure grammar *}
   467 
   468 text {*
   469   The priority grammar of the @{text "Pure"} theory is defined as follows:
   470 
   471   %FIXME syntax for "index" (?)
   472   %FIXME "op" versions of ==> etc. (?)
   473 
   474   \begin{center}
   475   \begin{supertabular}{rclr}
   476 
   477   @{syntax_def (inner) any} & = & @{text "prop  |  logic"} \\\\
   478 
   479   @{syntax_def (inner) prop} & = & @{verbatim "("} @{text prop} @{verbatim ")"} \\
   480     & @{text "|"} & @{text "prop\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   481     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=?="} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   482     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=="} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   483     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{text "\<equiv>"} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   484     & @{text "|"} & @{text "prop\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "&&&"} @{text "prop\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   485     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   486     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   487     & @{text "|"} & @{verbatim "[|"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{verbatim "|]"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   488     & @{text "|"} & @{text "\<lbrakk>"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{text "\<rbrakk>"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   489     & @{text "|"} & @{verbatim "!!"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   490     & @{text "|"} & @{text "\<And>"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   491     & @{text "|"} & @{verbatim OFCLASS} @{verbatim "("} @{text type} @{verbatim ","} @{text logic} @{verbatim ")"} \\
   492     & @{text "|"} & @{verbatim SORT_CONSTRAINT} @{verbatim "("} @{text type} @{verbatim ")"} \\
   493     & @{text "|"} & @{verbatim TERM} @{text logic} \\
   494     & @{text "|"} & @{verbatim PROP} @{text aprop} \\\\
   495 
   496   @{syntax_def (inner) aprop} & = & @{verbatim "("} @{text aprop} @{verbatim ")"} \\
   497     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   498     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   499     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\\\
   500 
   501   @{syntax_def (inner) logic} & = & @{verbatim "("} @{text logic} @{verbatim ")"} \\
   502     & @{text "|"} & @{text "logic\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   503     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   504     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   505     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\
   506     & @{text "|"} & @{verbatim "%"} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   507     & @{text "|"} & @{text \<lambda>} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   508     & @{text "|"} & @{verbatim TYPE} @{verbatim "("} @{text type} @{verbatim ")"} \\\\
   509 
   510   @{syntax_def (inner) idt} & = & @{verbatim "("} @{text idt} @{verbatim ")"}@{text "  |  id  |  "}@{verbatim "_"} \\
   511     & @{text "|"} & @{text id} @{verbatim "::"} @{text type} & @{text "(0)"} \\
   512     & @{text "|"} & @{verbatim "_"} @{verbatim "::"} @{text type} & @{text "(0)"} \\\\
   513 
   514   @{syntax_def (inner) idts} & = & @{text "idt  |  idt\<^sup>(\<^sup>1\<^sup>) idts"} & @{text "(0)"} \\\\
   515 
   516   @{syntax_def (inner) pttrn} & = & @{text idt} \\\\
   517 
   518   @{syntax_def (inner) pttrns} & = & @{text "pttrn  |  pttrn\<^sup>(\<^sup>1\<^sup>) pttrns"} & @{text "(0)"} \\\\
   519 
   520   @{syntax_def (inner) type} & = & @{verbatim "("} @{text type} @{verbatim ")"} \\
   521     & @{text "|"} & @{text "tid  |  tvar  |  "}@{verbatim "_"} \\
   522     & @{text "|"} & @{text "tid"} @{verbatim "::"} @{text "sort  |  tvar  "}@{verbatim "::"} @{text "sort  |  "}@{verbatim "_"} @{verbatim "::"} @{text "sort"} \\
   523     & @{text "|"} & @{text "id  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) id  |  "}@{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text id} \\
   524     & @{text "|"} & @{text "longid  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) longid"} \\
   525     & @{text "|"} & @{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text longid} \\
   526     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   527     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
   528     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   529     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\\\
   530 
   531   @{syntax_def (inner) sort} & = & @{text "id  |  longid  |  "}@{verbatim "{}"} \\
   532     & @{text "|"} & @{verbatim "{"} @{text "(id | longid)"} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text "(id | longid)"} @{verbatim "}"} \\
   533   \end{supertabular}
   534   \end{center}
   535 
   536   \medskip Here literal terminals are printed @{verbatim "verbatim"};
   537   see also \secref{sec:inner-lex} for further token categories of the
   538   inner syntax.  The meaning of the nonterminals defined by the above
   539   grammar is as follows:
   540 
   541   \begin{description}
   542 
   543   \item @{syntax_ref (inner) any} denotes any term.
   544 
   545   \item @{syntax_ref (inner) prop} denotes meta-level propositions,
   546   which are terms of type @{typ prop}.  The syntax of such formulae of
   547   the meta-logic is carefully distinguished from usual conventions for
   548   object-logics.  In particular, plain @{text "\<lambda>"}-term notation is
   549   \emph{not} recognized as @{syntax (inner) prop}.
   550 
   551   \item @{syntax_ref (inner) aprop} denotes atomic propositions, which
   552   are embedded into regular @{syntax (inner) prop} by means of an
   553   explicit @{verbatim PROP} token.
   554 
   555   Terms of type @{typ prop} with non-constant head, e.g.\ a plain
   556   variable, are printed in this form.  Constants that yield type @{typ
   557   prop} are expected to provide their own concrete syntax; otherwise
   558   the printed version will appear like @{syntax (inner) logic} and
   559   cannot be parsed again as @{syntax (inner) prop}.
   560 
   561   \item @{syntax_ref (inner) logic} denotes arbitrary terms of a
   562   logical type, excluding type @{typ prop}.  This is the main
   563   syntactic category of object-logic entities, covering plain @{text
   564   \<lambda>}-term notation (variables, abstraction, application), plus
   565   anything defined by the user.
   566 
   567   When specifying notation for logical entities, all logical types
   568   (excluding @{typ prop}) are \emph{collapsed} to this single category
   569   of @{syntax (inner) logic}.
   570 
   571   \item @{syntax_ref (inner) idt} denotes identifiers, possibly
   572   constrained by types.
   573 
   574   \item @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref
   575   (inner) idt}.  This is the most basic category for variables in
   576   iterated binders, such as @{text "\<lambda>"} or @{text "\<And>"}.
   577 
   578   \item @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns}
   579   denote patterns for abstraction, cases bindings etc.  In Pure, these
   580   categories start as a merely copy of @{syntax (inner) idt} and
   581   @{syntax (inner) idts}, respectively.  Object-logics may add
   582   additional productions for binding forms.
   583 
   584   \item @{syntax_ref (inner) type} denotes types of the meta-logic.
   585 
   586   \item @{syntax_ref (inner) sort} denotes meta-level sorts.
   587 
   588   \end{description}
   589 
   590   Here are some further explanations of certain syntax features.
   591 
   592   \begin{itemize}
   593 
   594   \item In @{syntax (inner) idts}, note that @{text "x :: nat y"} is
   595   parsed as @{text "x :: (nat y)"}, treating @{text y} like a type
   596   constructor applied to @{text nat}.  To avoid this interpretation,
   597   write @{text "(x :: nat) y"} with explicit parentheses.
   598 
   599   \item Similarly, @{text "x :: nat y :: nat"} is parsed as @{text "x ::
   600   (nat y :: nat)"}.  The correct form is @{text "(x :: nat) (y ::
   601   nat)"}, or @{text "(x :: nat) y :: nat"} if @{text y} is last in the
   602   sequence of identifiers.
   603 
   604   \item Type constraints for terms bind very weakly.  For example,
   605   @{text "x < y :: nat"} is normally parsed as @{text "(x < y) ::
   606   nat"}, unless @{text "<"} has a very low priority, in which case the
   607   input is likely to be ambiguous.  The correct form is @{text "x < (y
   608   :: nat)"}.
   609 
   610   \item Constraints may be either written with two literal colons
   611   ``@{verbatim "::"}'' or the double-colon symbol @{verbatim "\<Colon>"},
   612   which actually looks exactly the same in some {\LaTeX} styles.
   613 
   614   \item Dummy variables (written as underscore) may occur in different
   615   roles.
   616 
   617   \begin{description}
   618 
   619   \item A type ``@{text "_"}'' or ``@{text "_ :: sort"}'' acts like an
   620   anonymous inference parameter, which is filled-in according to the
   621   most general type produced by the type-checking phase.
   622 
   623   \item A bound ``@{text "_"}'' refers to a vacuous abstraction, where
   624   the body does not refer to the binding introduced here.  As in the
   625   term @{term "\<lambda>x _. x"}, which is @{text "\<alpha>"}-equivalent to @{text
   626   "\<lambda>x y. x"}.
   627 
   628   \item A free ``@{text "_"}'' refers to an implicit outer binding.
   629   Higher definitional packages usually allow forms like @{text "f x _
   630   = x"}.
   631 
   632   \item A schematic ``@{text "_"}'' (within a term pattern, see
   633   \secref{sec:term-decls}) refers to an anonymous variable that is
   634   implicitly abstracted over its context of locally bound variables.
   635   For example, this allows pattern matching of @{text "{x. f x = g
   636   x}"} against @{text "{x. _ = _}"}, or even @{text "{_. _ = _}"} by
   637   using both bound and schematic dummies.
   638 
   639   \end{description}
   640 
   641   \item The three literal dots ``@{verbatim "..."}'' may be also
   642   written as ellipsis symbol @{verbatim "\<dots>"}.  In both cases this
   643   refers to a special schematic variable, which is bound in the
   644   context.  This special term abbreviation works nicely with
   645   calculational reasoning (\secref{sec:calculation}).
   646 
   647   \end{itemize}
   648 *}
   649 
   650 
   651 section {* Lexical matters \label{sec:inner-lex} *}
   652 
   653 text {* The inner lexical syntax vaguely resembles the outer one
   654   (\secref{sec:outer-lex}), but some details are different.  There are
   655   two main categories of inner syntax tokens:
   656 
   657   \begin{enumerate}
   658 
   659   \item \emph{delimiters} --- the literal tokens occurring in
   660   productions of the given priority grammar (cf.\
   661   \secref{sec:priority-grammar});
   662 
   663   \item \emph{named tokens} --- various categories of identifiers etc.
   664 
   665   \end{enumerate}
   666 
   667   Delimiters override named tokens and may thus render certain
   668   identifiers inaccessible.  Sometimes the logical context admits
   669   alternative ways to refer to the same entity, potentially via
   670   qualified names.
   671 
   672   \medskip The categories for named tokens are defined once and for
   673   all as follows, reusing some categories of the outer token syntax
   674   (\secref{sec:outer-lex}).
   675 
   676   \begin{center}
   677   \begin{supertabular}{rcl}
   678     @{syntax_def (inner) id} & = & @{syntax_ref ident} \\
   679     @{syntax_def (inner) longid} & = & @{syntax_ref longident} \\
   680     @{syntax_def (inner) var} & = & @{syntax_ref var} \\
   681     @{syntax_def (inner) tid} & = & @{syntax_ref typefree} \\
   682     @{syntax_def (inner) tvar} & = & @{syntax_ref typevar} \\
   683     @{syntax_def (inner) num} & = & @{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat} \\
   684     @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
   685     @{syntax_def (inner) xnum} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
   686 
   687     @{syntax_def (inner) xstr} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
   688   \end{supertabular}
   689   \end{center}
   690 
   691   The token categories @{syntax (inner) num}, @{syntax (inner)
   692   float_token}, @{syntax (inner) xnum}, and @{syntax (inner) xstr} are
   693   not used in Pure.  Object-logics may implement numerals and string
   694   constants by adding appropriate syntax declarations, together with
   695   some translation functions (e.g.\ see Isabelle/HOL).
   696 
   697   The derived categories @{syntax_def (inner) num_const} and
   698   @{syntax_def (inner) float_const} provide robust access to @{syntax
   699   (inner) num}, and @{syntax (inner) float_token}, respectively: the
   700   syntax tree holds a syntactic constant instead of a free variable.
   701 *}
   702 
   703 
   704 section {* Syntax and translations \label{sec:syn-trans} *}
   705 
   706 text {*
   707   \begin{matharray}{rcl}
   708     @{command_def "nonterminals"} & : & @{text "theory \<rightarrow> theory"} \\
   709     @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
   710     @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
   711     @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
   712     @{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
   713   \end{matharray}
   714 
   715   \begin{rail}
   716     'nonterminals' (name +)
   717     ;
   718     ('syntax' | 'no\_syntax') mode? (constdecl +)
   719     ;
   720     ('translations' | 'no\_translations') (transpat ('==' | '=>' | '<=' | rightleftharpoons | rightharpoonup | leftharpoondown) transpat +)
   721     ;
   722 
   723     mode: ('(' ( name | 'output' | name 'output' ) ')')
   724     ;
   725     transpat: ('(' nameref ')')? string
   726     ;
   727   \end{rail}
   728 
   729   \begin{description}
   730   
   731   \item @{command "nonterminals"}~@{text c} declares a type
   732   constructor @{text c} (without arguments) to act as purely syntactic
   733   type: a nonterminal symbol of the inner syntax.
   734 
   735   \item @{command "syntax"}~@{text "(mode) decls"} is similar to
   736   @{command "consts"}~@{text decls}, except that the actual logical
   737   signature extension is omitted.  Thus the context free grammar of
   738   Isabelle's inner syntax may be augmented in arbitrary ways,
   739   independently of the logic.  The @{text mode} argument refers to the
   740   print mode that the grammar rules belong; unless the @{keyword_ref
   741   "output"} indicator is given, all productions are added both to the
   742   input and output grammar.
   743   
   744   \item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
   745   declarations (and translations) resulting from @{text decls}, which
   746   are interpreted in the same manner as for @{command "syntax"} above.
   747   
   748   \item @{command "translations"}~@{text rules} specifies syntactic
   749   translation rules (i.e.\ macros): parse~/ print rules (@{text "\<rightleftharpoons>"}),
   750   parse rules (@{text "\<rightharpoonup>"}), or print rules (@{text "\<leftharpoondown>"}).
   751   Translation patterns may be prefixed by the syntactic category to be
   752   used for parsing; the default is @{text logic}.
   753   
   754   \item @{command "no_translations"}~@{text rules} removes syntactic
   755   translation rules, which are interpreted in the same manner as for
   756   @{command "translations"} above.
   757 
   758   \end{description}
   759 *}
   760 
   761 
   762 section {* Syntax translation functions \label{sec:tr-funs} *}
   763 
   764 text {*
   765   \begin{matharray}{rcl}
   766     @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   767     @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   768     @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   769     @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   770     @{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
   771   \end{matharray}
   772 
   773   \begin{rail}
   774   ( 'parse\_ast\_translation' | 'parse\_translation' | 'print\_translation' |
   775     'typed\_print\_translation' | 'print\_ast\_translation' ) ('(advanced)')? text
   776   ;
   777   \end{rail}
   778 
   779   Syntax translation functions written in ML admit almost arbitrary
   780   manipulations of Isabelle's inner syntax.  Any of the above commands
   781   have a single \railqtok{text} argument that refers to an ML
   782   expression of appropriate type, which are as follows by default:
   783 
   784 %FIXME proper antiquotations
   785 \begin{ttbox}
   786 val parse_ast_translation   : (string * (ast list -> ast)) list
   787 val parse_translation       : (string * (term list -> term)) list
   788 val print_translation       : (string * (term list -> term)) list
   789 val typed_print_translation :
   790   (string * (bool -> typ -> term list -> term)) list
   791 val print_ast_translation   : (string * (ast list -> ast)) list
   792 \end{ttbox}
   793 
   794   If the @{text "(advanced)"} option is given, the corresponding
   795   translation functions may depend on the current theory or proof
   796   context.  This allows to implement advanced syntax mechanisms, as
   797   translations functions may refer to specific theory declarations or
   798   auxiliary proof data.
   799 
   800   See also \cite{isabelle-ref} for more information on the general
   801   concept of syntax transformations in Isabelle.
   802 
   803 %FIXME proper antiquotations
   804 \begin{ttbox}
   805 val parse_ast_translation:
   806   (string * (Proof.context -> ast list -> ast)) list
   807 val parse_translation:
   808   (string * (Proof.context -> term list -> term)) list
   809 val print_translation:
   810   (string * (Proof.context -> term list -> term)) list
   811 val typed_print_translation:
   812   (string * (Proof.context -> bool -> typ -> term list -> term)) list
   813 val print_ast_translation:
   814   (string * (Proof.context -> ast list -> ast)) list
   815 \end{ttbox}
   816 *}
   817 
   818 
   819 section {* Inspecting the syntax *}
   820 
   821 text {*
   822   \begin{matharray}{rcl}
   823     @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   824   \end{matharray}
   825 
   826   \begin{description}
   827 
   828   \item @{command "print_syntax"} prints the inner syntax of the
   829   current context.  The output can be quite large; the most important
   830   sections are explained below.
   831 
   832   \begin{description}
   833 
   834   \item @{text "lexicon"} lists the delimiters of the inner token
   835   language; see \secref{sec:inner-lex}.
   836 
   837   \item @{text "prods"} lists the productions of the underlying
   838   priority grammar; see \secref{sec:priority-grammar}.
   839 
   840   The nonterminal @{text "A\<^sup>(\<^sup>p\<^sup>)"} is rendered in plain text as @{text
   841   "A[p]"}; delimiters are quoted.  Many productions have an extra
   842   @{text "\<dots> => name"}.  These names later become the heads of parse
   843   trees; they also guide the pretty printer.
   844 
   845   Productions without such parse tree names are called \emph{copy
   846   productions}.  Their right-hand side must have exactly one
   847   nonterminal symbol (or named token).  The parser does not create a
   848   new parse tree node for copy productions, but simply returns the
   849   parse tree of the right-hand symbol.
   850 
   851   If the right-hand side of a copy production consists of a single
   852   nonterminal without any delimiters, then it is called a \emph{chain
   853   production}.  Chain productions act as abbreviations: conceptually,
   854   they are removed from the grammar by adding new productions.
   855   Priority information attached to chain productions is ignored; only
   856   the dummy value @{text "-1"} is displayed.
   857 
   858   \item @{text "print modes"} lists the alternative print modes
   859   provided by this grammar; see \secref{sec:print-modes}.
   860 
   861   \item @{text "parse_rules"} and @{text "print_rules"} relate to
   862   syntax translations (macros); see \secref{sec:syn-trans}.
   863 
   864   \item @{text "parse_ast_translation"} and @{text
   865   "print_ast_translation"} list sets of constants that invoke
   866   translation functions for abstract syntax trees, which are only
   867   required in very special situations; see \secref{sec:tr-funs}.
   868 
   869   \item @{text "parse_translation"} and @{text "print_translation"}
   870   list the sets of constants that invoke regular translation
   871   functions; see \secref{sec:tr-funs}.
   872 
   873   \end{description}
   874   
   875   \end{description}
   876 *}
   877 
   878 end