doc-src/IsarRef/Thy/Outer_Syntax.thy
author wenzelm
Thu, 13 Nov 2008 21:59:02 +0100
changeset 28774 0e25ef17b06b
parent 28762 f5d79aeffd81
child 28775 d25fe9601dbd
permissions -rw-r--r--
more tuning of Pure grammer;
     1 (* $Id$ *)
     2 
     3 theory Outer_Syntax
     4 imports Main
     5 begin
     6 
     7 chapter {* Outer syntax *}
     8 
     9 text {*
    10   The rather generic framework of Isabelle/Isar syntax emerges from
    11   three main syntactic categories: \emph{commands} of the top-level
    12   Isar engine (covering theory and proof elements), \emph{methods} for
    13   general goal refinements (analogous to traditional ``tactics''), and
    14   \emph{attributes} for operations on facts (within a certain
    15   context).  Subsequently we give a reference of basic syntactic
    16   entities underlying Isabelle/Isar syntax in a bottom-up manner.
    17   Concrete theory and proof language elements will be introduced later
    18   on.
    19 
    20   \medskip In order to get started with writing well-formed
    21   Isabelle/Isar documents, the most important aspect to be noted is
    22   the difference of \emph{inner} versus \emph{outer} syntax.  Inner
    23   syntax is that of Isabelle types and terms of the logic, while outer
    24   syntax is that of Isabelle/Isar theory sources (specifications and
    25   proofs).  As a general rule, inner syntax entities may occur only as
    26   \emph{atomic entities} within outer syntax.  For example, the string
    27   @{verbatim "\"x + y\""} and identifier @{verbatim z} are legal term
    28   specifications within a theory, while @{verbatim "x + y"} without
    29   quotes is not.
    30 
    31   Printed theory documents usually omit quotes to gain readability
    32   (this is a matter of {\LaTeX} macro setup, say via @{verbatim
    33   "\\isabellestyle"}, see also \cite{isabelle-sys}).  Experienced
    34   users of Isabelle/Isar may easily reconstruct the lost technical
    35   information, while mere readers need not care about quotes at all.
    36 
    37   \medskip Isabelle/Isar input may contain any number of input
    38   termination characters ``@{verbatim ";"}'' (semicolon) to separate
    39   commands explicitly.  This is particularly useful in interactive
    40   shell sessions to make clear where the current command is intended
    41   to end.  Otherwise, the interpreter loop will continue to issue a
    42   secondary prompt ``@{verbatim "#"}'' until an end-of-command is
    43   clearly recognized from the input syntax, e.g.\ encounter of the
    44   next command keyword.
    45 
    46   More advanced interfaces such as Proof~General \cite{proofgeneral}
    47   do not require explicit semicolons, the amount of input text is
    48   determined automatically by inspecting the present content of the
    49   Emacs text buffer.  In the printed presentation of Isabelle/Isar
    50   documents semicolons are omitted altogether for readability.
    51 
    52   \begin{warn}
    53     Proof~General requires certain syntax classification tables in
    54     order to achieve properly synchronized interaction with the
    55     Isabelle/Isar process.  These tables need to be consistent with
    56     the Isabelle version and particular logic image to be used in a
    57     running session (common object-logics may well change the outer
    58     syntax).  The standard setup should work correctly with any of the
    59     ``official'' logic images derived from Isabelle/HOL (including
    60     HOLCF etc.).  Users of alternative logics may need to tell
    61     Proof~General explicitly, e.g.\ by giving an option @{verbatim "-k ZF"}
    62     (in conjunction with @{verbatim "-l ZF"}, to specify the default
    63     logic image).  Note that option @{verbatim "-L"} does both
    64     of this at the same time.
    65   \end{warn}
    66 *}
    67 
    68 
    69 section {* Lexical matters \label{sec:outer-lex} *}
    70 
    71 text {* The Isabelle/Isar outer syntax provides token classes as
    72   presented below; most of these coincide with the inner lexical
    73   syntax as defined in \secref{sec:inner-lex}.
    74 
    75   \begin{matharray}{rcl}
    76     @{syntax_def ident} & = & letter\,quasiletter^* \\
    77     @{syntax_def longident} & = & ident (\verb,.,ident)^+ \\
    78     @{syntax_def symident} & = & sym^+ ~|~ \verb,\,\verb,<,ident\verb,>, \\
    79     @{syntax_def nat} & = & digit^+ \\
    80     @{syntax_def var} & = & ident ~|~ \verb,?,ident ~|~ \verb,?,ident\verb,.,nat \\
    81     @{syntax_def typefree} & = & \verb,',ident \\
    82     @{syntax_def typevar} & = & typefree ~|~ \verb,?,typefree ~|~ \verb,?,typefree\verb,.,nat \\
    83     @{syntax_def string} & = & \verb,", ~\dots~ \verb,", \\
    84     @{syntax_def altstring} & = & \backquote ~\dots~ \backquote \\
    85     @{syntax_def verbatim} & = & \verb,{*, ~\dots~ \verb,*,\verb,}, \\[1ex]
    86 
    87     letter & = & latin ~|~ \verb,\,\verb,<,latin\verb,>, ~|~ \verb,\,\verb,<,latin\,latin\verb,>, ~|~ greek ~|~ \\
    88            &   & \verb,\<^isub>, ~|~ \verb,\<^isup>, \\
    89     quasiletter & = & letter ~|~ digit ~|~ \verb,_, ~|~ \verb,', \\
    90     latin & = & \verb,a, ~|~ \dots ~|~ \verb,z, ~|~ \verb,A, ~|~ \dots ~|~ \verb,Z, \\
    91     digit & = & \verb,0, ~|~ \dots ~|~ \verb,9, \\
    92     sym & = & \verb,!, ~|~ \verb,#, ~|~ \verb,$, ~|~ \verb,%, ~|~ \verb,&, ~|~
    93      \verb,*, ~|~ \verb,+, ~|~ \verb,-, ~|~ \verb,/, ~|~ \\
    94     & & \verb,<, ~|~ \verb,=, ~|~ \verb,>, ~|~ \verb,?, ~|~ \texttt{\at} ~|~
    95     \verb,^, ~|~ \verb,_, ~|~ \verb,|, ~|~ \verb,~, \\
    96     greek & = & \verb,\<alpha>, ~|~ \verb,\<beta>, ~|~ \verb,\<gamma>, ~|~ \verb,\<delta>, ~| \\
    97           &   & \verb,\<epsilon>, ~|~ \verb,\<zeta>, ~|~ \verb,\<eta>, ~|~ \verb,\<theta>, ~| \\
    98           &   & \verb,\<iota>, ~|~ \verb,\<kappa>, ~|~ \verb,\<mu>, ~|~ \verb,\<nu>, ~| \\
    99           &   & \verb,\<xi>, ~|~ \verb,\<pi>, ~|~ \verb,\<rho>, ~|~ \verb,\<sigma>, ~|~ \verb,\<tau>, ~| \\
   100           &   & \verb,\<upsilon>, ~|~ \verb,\<phi>, ~|~ \verb,\<chi>, ~|~ \verb,\<psi>, ~| \\
   101           &   & \verb,\<omega>, ~|~ \verb,\<Gamma>, ~|~ \verb,\<Delta>, ~|~ \verb,\<Theta>, ~| \\
   102           &   & \verb,\<Lambda>, ~|~ \verb,\<Xi>, ~|~ \verb,\<Pi>, ~|~ \verb,\<Sigma>, ~| \\
   103           &   & \verb,\<Upsilon>, ~|~ \verb,\<Phi>, ~|~ \verb,\<Psi>, ~|~ \verb,\<Omega>, \\
   104   \end{matharray}
   105 
   106   The syntax of @{syntax string} admits any characters, including
   107   newlines; ``@{verbatim "\""}'' (double-quote) and ``@{verbatim
   108   "\\"}'' (backslash) need to be escaped by a backslash; arbitrary
   109   character codes may be specified as ``@{verbatim "\\"}@{text ddd}'',
   110   with three decimal digits.  Alternative strings according to
   111   @{syntax altstring} are analogous, using single back-quotes instead.
   112   The body of @{syntax verbatim} may consist of any text not
   113   containing ``@{verbatim "*"}@{verbatim "}"}''; this allows
   114   convenient inclusion of quotes without further escapes.  The greek
   115   letters do \emph{not} include @{verbatim "\<lambda>"}, which is already used
   116   differently in the meta-logic.
   117 
   118   Common mathematical symbols such as @{text \<forall>} are represented in
   119   Isabelle as @{verbatim \<forall>}.  There are infinitely many Isabelle
   120   symbols like this, although proper presentation is left to front-end
   121   tools such as {\LaTeX} or Proof~General with the X-Symbol package.
   122   A list of standard Isabelle symbols that work well with these tools
   123   is given in \cite[appendix~A]{isabelle-sys}.
   124   
   125   Source comments take the form @{verbatim "(*"}~@{text
   126   "\<dots>"}~@{verbatim "*)"} and may be nested, although user-interface
   127   tools might prevent this.  Note that this form indicates source
   128   comments only, which are stripped after lexical analysis of the
   129   input.  The Isar syntax also provides proper \emph{document
   130   comments} that are considered as part of the text (see
   131   \secref{sec:comments}).
   132 *}
   133 
   134 
   135 section {* Common syntax entities *}
   136 
   137 text {*
   138   We now introduce several basic syntactic entities, such as names,
   139   terms, and theorem specifications, which are factored out of the
   140   actual Isar language elements to be described later.
   141 *}
   142 
   143 
   144 subsection {* Names *}
   145 
   146 text {*
   147   Entity \railqtok{name} usually refers to any name of types,
   148   constants, theorems etc.\ that are to be \emph{declared} or
   149   \emph{defined} (so qualified identifiers are excluded here).  Quoted
   150   strings provide an escape for non-identifier names or those ruled
   151   out by outer syntax keywords (e.g.\ quoted @{verbatim "\"let\""}).
   152   Already existing objects are usually referenced by
   153   \railqtok{nameref}.
   154 
   155   \indexoutertoken{name}\indexoutertoken{parname}\indexoutertoken{nameref}
   156   \indexoutertoken{int}
   157   \begin{rail}
   158     name: ident | symident | string | nat
   159     ;
   160     parname: '(' name ')'
   161     ;
   162     nameref: name | longident
   163     ;
   164     int: nat | '-' nat
   165     ;
   166   \end{rail}
   167 *}
   168 
   169 
   170 subsection {* Comments \label{sec:comments} *}
   171 
   172 text {*
   173   Large chunks of plain \railqtok{text} are usually given
   174   \railtok{verbatim}, i.e.\ enclosed in @{verbatim "{"}@{verbatim
   175   "*"}~@{text "\<dots>"}~@{verbatim "*"}@{verbatim "}"}.  For convenience,
   176   any of the smaller text units conforming to \railqtok{nameref} are
   177   admitted as well.  A marginal \railnonterm{comment} is of the form
   178   @{verbatim "--"} \railqtok{text}.  Any number of these may occur
   179   within Isabelle/Isar commands.
   180 
   181   \indexoutertoken{text}\indexouternonterm{comment}
   182   \begin{rail}
   183     text: verbatim | nameref
   184     ;
   185     comment: '--' text
   186     ;
   187   \end{rail}
   188 *}
   189 
   190 
   191 subsection {* Type classes, sorts and arities *}
   192 
   193 text {*
   194   Classes are specified by plain names.  Sorts have a very simple
   195   inner syntax, which is either a single class name @{text c} or a
   196   list @{text "{c\<^sub>1, \<dots>, c\<^sub>n}"} referring to the
   197   intersection of these classes.  The syntax of type arities is given
   198   directly at the outer level.
   199 
   200   \indexouternonterm{sort}\indexouternonterm{arity}
   201   \indexouternonterm{classdecl}
   202   \begin{rail}
   203     classdecl: name (('<' | subseteq) (nameref + ','))?
   204     ;
   205     sort: nameref
   206     ;
   207     arity: ('(' (sort + ',') ')')? sort
   208     ;
   209   \end{rail}
   210 *}
   211 
   212 
   213 subsection {* Types and terms \label{sec:types-terms} *}
   214 
   215 text {*
   216   The actual inner Isabelle syntax, that of types and terms of the
   217   logic, is far too sophisticated in order to be modelled explicitly
   218   at the outer theory level.  Basically, any such entity has to be
   219   quoted to turn it into a single token (the parsing and type-checking
   220   is performed internally later).  For convenience, a slightly more
   221   liberal convention is adopted: quotes may be omitted for any type or
   222   term that is already atomic at the outer level.  For example, one
   223   may just write @{verbatim x} instead of quoted @{verbatim "\"x\""}.
   224   Note that symbolic identifiers (e.g.\ @{verbatim "++"} or @{text
   225   "\<forall>"} are available as well, provided these have not been superseded
   226   by commands or other keywords already (such as @{verbatim "="} or
   227   @{verbatim "+"}).
   228 
   229   \indexoutertoken{type}\indexoutertoken{term}\indexoutertoken{prop}
   230   \begin{rail}
   231     type: nameref | typefree | typevar
   232     ;
   233     term: nameref | var
   234     ;
   235     prop: term
   236     ;
   237   \end{rail}
   238 
   239   Positional instantiations are indicated by giving a sequence of
   240   terms, or the placeholder ``@{text _}'' (underscore), which means to
   241   skip a position.
   242 
   243   \indexoutertoken{inst}\indexoutertoken{insts}
   244   \begin{rail}
   245     inst: underscore | term
   246     ;
   247     insts: (inst *)
   248     ;
   249   \end{rail}
   250 
   251   Type declarations and definitions usually refer to
   252   \railnonterm{typespec} on the left-hand side.  This models basic
   253   type constructor application at the outer syntax level.  Note that
   254   only plain postfix notation is available here, but no infixes.
   255 
   256   \indexouternonterm{typespec}
   257   \begin{rail}
   258     typespec: (() | typefree | '(' ( typefree + ',' ) ')') name
   259     ;
   260   \end{rail}
   261 *}
   262 
   263 
   264 subsection {* Term patterns and declarations \label{sec:term-decls} *}
   265 
   266 text {*
   267   Wherever explicit propositions (or term fragments) occur in a proof
   268   text, casual binding of schematic term variables may be given
   269   specified via patterns of the form ``@{text "(\<IS> p\<^sub>1 \<dots>
   270   p\<^sub>n)"}''.  This works both for \railqtok{term} and \railqtok{prop}.
   271 
   272   \indexouternonterm{termpat}\indexouternonterm{proppat}
   273   \begin{rail}
   274     termpat: '(' ('is' term +) ')'
   275     ;
   276     proppat: '(' ('is' prop +) ')'
   277     ;
   278   \end{rail}
   279 
   280   \medskip Declarations of local variables @{text "x :: \<tau>"} and
   281   logical propositions @{text "a : \<phi>"} represent different views on
   282   the same principle of introducing a local scope.  In practice, one
   283   may usually omit the typing of \railnonterm{vars} (due to
   284   type-inference), and the naming of propositions (due to implicit
   285   references of current facts).  In any case, Isar proof elements
   286   usually admit to introduce multiple such items simultaneously.
   287 
   288   \indexouternonterm{vars}\indexouternonterm{props}
   289   \begin{rail}
   290     vars: (name+) ('::' type)?
   291     ;
   292     props: thmdecl? (prop proppat? +)
   293     ;
   294   \end{rail}
   295 
   296   The treatment of multiple declarations corresponds to the
   297   complementary focus of \railnonterm{vars} versus
   298   \railnonterm{props}.  In ``@{text "x\<^sub>1 \<dots> x\<^sub>n :: \<tau>"}''
   299   the typing refers to all variables, while in @{text "a: \<phi>\<^sub>1 \<dots>
   300   \<phi>\<^sub>n"} the naming refers to all propositions collectively.
   301   Isar language elements that refer to \railnonterm{vars} or
   302   \railnonterm{props} typically admit separate typings or namings via
   303   another level of iteration, with explicit @{keyword_ref "and"}
   304   separators; e.g.\ see @{command "fix"} and @{command "assume"} in
   305   \secref{sec:proof-context}.
   306 *}
   307 
   308 
   309 subsection {* Attributes and theorems \label{sec:syn-att} *}
   310 
   311 text {* Attributes have their own ``semi-inner'' syntax, in the sense
   312   that input conforming to \railnonterm{args} below is parsed by the
   313   attribute a second time.  The attribute argument specifications may
   314   be any sequence of atomic entities (identifiers, strings etc.), or
   315   properly bracketed argument lists.  Below \railqtok{atom} refers to
   316   any atomic entity, including any \railtok{keyword} conforming to
   317   \railtok{symident}.
   318 
   319   \indexoutertoken{atom}\indexouternonterm{args}\indexouternonterm{attributes}
   320   \begin{rail}
   321     atom: nameref | typefree | typevar | var | nat | keyword
   322     ;
   323     arg: atom | '(' args ')' | '[' args ']'
   324     ;
   325     args: arg *
   326     ;
   327     attributes: '[' (nameref args * ',') ']'
   328     ;
   329   \end{rail}
   330 
   331   Theorem specifications come in several flavors:
   332   \railnonterm{axmdecl} and \railnonterm{thmdecl} usually refer to
   333   axioms, assumptions or results of goal statements, while
   334   \railnonterm{thmdef} collects lists of existing theorems.  Existing
   335   theorems are given by \railnonterm{thmref} and
   336   \railnonterm{thmrefs}, the former requires an actual singleton
   337   result.
   338 
   339   There are three forms of theorem references:
   340   \begin{enumerate}
   341   
   342   \item named facts @{text "a"},
   343 
   344   \item selections from named facts @{text "a(i)"} or @{text "a(j - k)"},
   345 
   346   \item literal fact propositions using @{syntax_ref altstring} syntax
   347   @{verbatim "`"}@{text "\<phi>"}@{verbatim "`"} (see also method
   348   @{method_ref fact}).
   349 
   350   \end{enumerate}
   351 
   352   Any kind of theorem specification may include lists of attributes
   353   both on the left and right hand sides; attributes are applied to any
   354   immediately preceding fact.  If names are omitted, the theorems are
   355   not stored within the theorem database of the theory or proof
   356   context, but any given attributes are applied nonetheless.
   357 
   358   An extra pair of brackets around attributes (like ``@{text
   359   "[[simproc a]]"}'') abbreviates a theorem reference involving an
   360   internal dummy fact, which will be ignored later on.  So only the
   361   effect of the attribute on the background context will persist.
   362   This form of in-place declarations is particularly useful with
   363   commands like @{command "declare"} and @{command "using"}.
   364 
   365   \indexouternonterm{axmdecl}\indexouternonterm{thmdecl}
   366   \indexouternonterm{thmdef}\indexouternonterm{thmref}
   367   \indexouternonterm{thmrefs}\indexouternonterm{selection}
   368   \begin{rail}
   369     axmdecl: name attributes? ':'
   370     ;
   371     thmdecl: thmbind ':'
   372     ;
   373     thmdef: thmbind '='
   374     ;
   375     thmref: (nameref selection? | altstring) attributes? | '[' attributes ']'
   376     ;
   377     thmrefs: thmref +
   378     ;
   379 
   380     thmbind: name attributes | name | attributes
   381     ;
   382     selection: '(' ((nat | nat '-' nat?) + ',') ')'
   383     ;
   384   \end{rail}
   385 *}
   386 
   387 end