doc-src/Codegen/Thy/Adaption.thy
author haftmann
Tue, 03 Mar 2009 11:00:51 +0100
changeset 30209 2f4684e2ea95
parent 28714 doc-src/IsarAdvanced/Codegen/Thy/Adaption.thy@1992553cccfe
child 30210 853abb4853cc
permissions -rw-r--r--
more canonical directory structure of manuals
     1 theory Adaption
     2 imports Setup
     3 begin
     4 
     5 setup %invisible {* Code_Target.extend_target ("\<SML>", ("SML", K I)) *}
     6 
     7 section {* Adaption to target languages \label{sec:adaption} *}
     8 
     9 subsection {* Adapting code generation *}
    10 
    11 text {*
    12   The aspects of code generation introduced so far have two aspects
    13   in common:
    14 
    15   \begin{itemize}
    16     \item They act uniformly, without reference to a specific
    17        target language.
    18     \item They are \emph{safe} in the sense that as long as you trust
    19        the code generator meta theory and implementation, you cannot
    20        produce programs that yield results which are not derivable
    21        in the logic.
    22   \end{itemize}
    23 
    24   \noindent In this section we will introduce means to \emph{adapt} the serialiser
    25   to a specific target language, i.e.~to print program fragments
    26   in a way which accommodates \qt{already existing} ingredients of
    27   a target language environment, for three reasons:
    28 
    29   \begin{itemize}
    30     \item improving readability and aesthetics of generated code
    31     \item gaining efficiency
    32     \item interface with language parts which have no direct counterpart
    33       in @{text "HOL"} (say, imperative data structures)
    34   \end{itemize}
    35 
    36   \noindent Generally, you should avoid using those features yourself
    37   \emph{at any cost}:
    38 
    39   \begin{itemize}
    40     \item The safe configuration methods act uniformly on every target language,
    41       whereas for adaption you have to treat each target language separate.
    42     \item Application is extremely tedious since there is no abstraction
    43       which would allow for a static check, making it easy to produce garbage.
    44     \item More or less subtle errors can be introduced unconsciously.
    45   \end{itemize}
    46 
    47   \noindent However, even if you ought refrain from setting up adaption
    48   yourself, already the @{text "HOL"} comes with some reasonable default
    49   adaptions (say, using target language list syntax).  There also some
    50   common adaption cases which you can setup by importing particular
    51   library theories.  In order to understand these, we provide some clues here;
    52   these however are not supposed to replace a careful study of the sources.
    53 *}
    54 
    55 subsection {* The adaption principle *}
    56 
    57 text {*
    58   The following figure illustrates what \qt{adaption} is conceptually
    59   supposed to be:
    60 
    61   \begin{figure}[here]
    62     \begin{tikzpicture}[scale = 0.5]
    63       \tikzstyle water=[color = blue, thick]
    64       \tikzstyle ice=[color = black, very thick, cap = round, join = round, fill = white]
    65       \tikzstyle process=[color = green, semithick, ->]
    66       \tikzstyle adaption=[color = red, semithick, ->]
    67       \tikzstyle target=[color = black]
    68       \foreach \x in {0, ..., 24}
    69         \draw[style=water] (\x, 0.25) sin + (0.25, 0.25) cos + (0.25, -0.25) sin
    70           + (0.25, -0.25) cos + (0.25, 0.25);
    71       \draw[style=ice] (1, 0) --
    72         (3, 6) node[above, fill=white] {logic} -- (5, 0) -- cycle;
    73       \draw[style=ice] (9, 0) --
    74         (11, 6) node[above, fill=white] {intermediate language} -- (13, 0) -- cycle;
    75       \draw[style=ice] (15, -6) --
    76         (19, 6) node[above, fill=white] {target language} -- (23, -6) -- cycle;
    77       \draw[style=process]
    78         (3.5, 3) .. controls (7, 5) .. node[fill=white] {translation} (10.5, 3);
    79       \draw[style=process]
    80         (11.5, 3) .. controls (15, 5) .. node[fill=white] (serialisation) {serialisation} (18.5, 3);
    81       \node (adaption) at (11, -2) [style=adaption] {adaption};
    82       \node at (19, 3) [rotate=90] {generated};
    83       \node at (19.5, -5) {language};
    84       \node at (19.5, -3) {library};
    85       \node (includes) at (19.5, -1) {includes};
    86       \node (reserved) at (16.5, -3) [rotate=72] {reserved}; % proper 71.57
    87       \draw[style=process]
    88         (includes) -- (serialisation);
    89       \draw[style=process]
    90         (reserved) -- (serialisation);
    91       \draw[style=adaption]
    92         (adaption) -- (serialisation);
    93       \draw[style=adaption]
    94         (adaption) -- (includes);
    95       \draw[style=adaption]
    96         (adaption) -- (reserved);
    97     \end{tikzpicture}
    98     \caption{The adaption principle}
    99     \label{fig:adaption}
   100   \end{figure}
   101 
   102   \noindent In the tame view, code generation acts as broker between
   103   @{text logic}, @{text "intermediate language"} and
   104   @{text "target language"} by means of @{text translation} and
   105   @{text serialisation};  for the latter, the serialiser has to observe
   106   the structure of the @{text language} itself plus some @{text reserved}
   107   keywords which have to be avoided for generated code.
   108   However, if you consider @{text adaption} mechanisms, the code generated
   109   by the serializer is just the tip of the iceberg:
   110 
   111   \begin{itemize}
   112     \item @{text serialisation} can be \emph{parametrised} such that
   113       logical entities are mapped to target-specific ones
   114       (e.g. target-specific list syntax,
   115         see also \secref{sec:adaption_mechanisms})
   116     \item Such parametrisations can involve references to a
   117       target-specific standard @{text library} (e.g. using
   118       the @{text Haskell} @{verbatim Maybe} type instead
   119       of the @{text HOL} @{type "option"} type);
   120       if such are used, the corresponding identifiers
   121       (in our example, @{verbatim Maybe}, @{verbatim Nothing}
   122       and @{verbatim Just}) also have to be considered @{text reserved}.
   123     \item Even more, the user can enrich the library of the
   124       target-language by providing code snippets
   125       (\qt{@{text "includes"}}) which are prepended to
   126       any generated code (see \secref{sec:include});  this typically
   127       also involves further @{text reserved} identifiers.
   128   \end{itemize}
   129 
   130   \noindent As figure \ref{fig:adaption} illustrates, all these adaption mechanisms
   131   have to act consistently;  it is at the discretion of the user
   132   to take care for this.
   133 *}
   134 
   135 subsection {* Common adaption patterns *}
   136 
   137 text {*
   138   The @{theory HOL} @{theory Main} theory already provides a code
   139   generator setup
   140   which should be suitable for most applications.  Common extensions
   141   and modifications are available by certain theories of the @{text HOL}
   142   library; beside being useful in applications, they may serve
   143   as a tutorial for customising the code generator setup (see below
   144   \secref{sec:adaption_mechanisms}).
   145 
   146   \begin{description}
   147 
   148     \item[@{theory "Code_Integer"}] represents @{text HOL} integers by big
   149        integer literals in target languages.
   150     \item[@{theory "Code_Char"}] represents @{text HOL} characters by 
   151        character literals in target languages.
   152     \item[@{theory "Code_Char_chr"}] like @{text "Code_Char"},
   153        but also offers treatment of character codes; includes
   154        @{theory "Code_Char"}.
   155     \item[@{theory "Efficient_Nat"}] \label{eff_nat} implements natural numbers by integers,
   156        which in general will result in higher efficiency; pattern
   157        matching with @{term "0\<Colon>nat"} / @{const "Suc"}
   158        is eliminated;  includes @{theory "Code_Integer"}
   159        and @{theory "Code_Index"}.
   160     \item[@{theory "Code_Index"}] provides an additional datatype
   161        @{typ index} which is mapped to target-language built-in integers.
   162        Useful for code setups which involve e.g. indexing of
   163        target-language arrays.
   164     \item[@{theory "Code_Message"}] provides an additional datatype
   165        @{typ message_string} which is isomorphic to strings;
   166        @{typ message_string}s are mapped to target-language strings.
   167        Useful for code setups which involve e.g. printing (error) messages.
   168 
   169   \end{description}
   170 
   171   \begin{warn}
   172     When importing any of these theories, they should form the last
   173     items in an import list.  Since these theories adapt the
   174     code generator setup in a non-conservative fashion,
   175     strange effects may occur otherwise.
   176   \end{warn}
   177 *}
   178 
   179 
   180 subsection {* Parametrising serialisation \label{sec:adaption_mechanisms} *}
   181 
   182 text {*
   183   Consider the following function and its corresponding
   184   SML code:
   185 *}
   186 
   187 primrec %quote in_interval :: "nat \<times> nat \<Rightarrow> nat \<Rightarrow> bool" where
   188   "in_interval (k, l) n \<longleftrightarrow> k \<le> n \<and> n \<le> l"
   189 (*<*)
   190 code_type %invisible bool
   191   (SML)
   192 code_const %invisible True and False and "op \<and>" and Not
   193   (SML and and and)
   194 (*>*)
   195 text %quote {*@{code_stmts in_interval (SML)}*}
   196 
   197 text {*
   198   \noindent Though this is correct code, it is a little bit unsatisfactory:
   199   boolean values and operators are materialised as distinguished
   200   entities with have nothing to do with the SML-built-in notion
   201   of \qt{bool}.  This results in less readable code;
   202   additionally, eager evaluation may cause programs to
   203   loop or break which would perfectly terminate when
   204   the existing SML @{verbatim "bool"} would be used.  To map
   205   the HOL @{typ bool} on SML @{verbatim "bool"}, we may use
   206   \qn{custom serialisations}:
   207 *}
   208 
   209 code_type %quotett bool
   210   (SML "bool")
   211 code_const %quotett True and False and "op \<and>"
   212   (SML "true" and "false" and "_ andalso _")
   213 
   214 text {*
   215   \noindent The @{command code_type} command takes a type constructor
   216   as arguments together with a list of custom serialisations.
   217   Each custom serialisation starts with a target language
   218   identifier followed by an expression, which during
   219   code serialisation is inserted whenever the type constructor
   220   would occur.  For constants, @{command code_const} implements
   221   the corresponding mechanism.  Each ``@{verbatim "_"}'' in
   222   a serialisation expression is treated as a placeholder
   223   for the type constructor's (the constant's) arguments.
   224 *}
   225 
   226 text %quote {*@{code_stmts in_interval (SML)}*}
   227 
   228 text {*
   229   \noindent This still is not perfect: the parentheses
   230   around the \qt{andalso} expression are superfluous.
   231   Though the serialiser
   232   by no means attempts to imitate the rich Isabelle syntax
   233   framework, it provides some common idioms, notably
   234   associative infixes with precedences which may be used here:
   235 *}
   236 
   237 code_const %quotett "op \<and>"
   238   (SML infixl 1 "andalso")
   239 
   240 text %quote {*@{code_stmts in_interval (SML)}*}
   241 
   242 text {*
   243   \noindent The attentive reader may ask how we assert that no generated
   244   code will accidentally overwrite.  For this reason the serialiser has
   245   an internal table of identifiers which have to be avoided to be used
   246   for new declarations.  Initially, this table typically contains the
   247   keywords of the target language.  It can be extended manually, thus avoiding
   248   accidental overwrites, using the @{command "code_reserved"} command:
   249 *}
   250 
   251 code_reserved %quote "\<SML>" bool true false andalso
   252 
   253 text {*
   254   \noindent Next, we try to map HOL pairs to SML pairs, using the
   255   infix ``@{verbatim "*"}'' type constructor and parentheses:
   256 *}
   257 (*<*)
   258 code_type %invisible *
   259   (SML)
   260 code_const %invisible Pair
   261   (SML)
   262 (*>*)
   263 code_type %quotett *
   264   (SML infix 2 "*")
   265 code_const %quotett Pair
   266   (SML "!((_),/ (_))")
   267 
   268 text {*
   269   \noindent The initial bang ``@{verbatim "!"}'' tells the serialiser
   270   never to put
   271   parentheses around the whole expression (they are already present),
   272   while the parentheses around argument place holders
   273   tell not to put parentheses around the arguments.
   274   The slash ``@{verbatim "/"}'' (followed by arbitrary white space)
   275   inserts a space which may be used as a break if necessary
   276   during pretty printing.
   277 
   278   These examples give a glimpse what mechanisms
   279   custom serialisations provide; however their usage
   280   requires careful thinking in order not to introduce
   281   inconsistencies -- or, in other words:
   282   custom serialisations are completely axiomatic.
   283 
   284   A further noteworthy details is that any special
   285   character in a custom serialisation may be quoted
   286   using ``@{verbatim "'"}''; thus, in
   287   ``@{verbatim "fn '_ => _"}'' the first
   288   ``@{verbatim "_"}'' is a proper underscore while the
   289   second ``@{verbatim "_"}'' is a placeholder.
   290 *}
   291 
   292 
   293 subsection {* @{text Haskell} serialisation *}
   294 
   295 text {*
   296   For convenience, the default
   297   @{text HOL} setup for @{text Haskell} maps the @{class eq} class to
   298   its counterpart in @{text Haskell}, giving custom serialisations
   299   for the class @{class eq} (by command @{command code_class}) and its operation
   300   @{const HOL.eq}
   301 *}
   302 
   303 code_class %quotett eq
   304   (Haskell "Eq")
   305 
   306 code_const %quotett "op ="
   307   (Haskell infixl 4 "==")
   308 
   309 text {*
   310   \noindent A problem now occurs whenever a type which
   311   is an instance of @{class eq} in @{text HOL} is mapped
   312   on a @{text Haskell}-built-in type which is also an instance
   313   of @{text Haskell} @{text Eq}:
   314 *}
   315 
   316 typedecl %quote bar
   317 
   318 instantiation %quote bar :: eq
   319 begin
   320 
   321 definition %quote "eq_class.eq (x\<Colon>bar) y \<longleftrightarrow> x = y"
   322 
   323 instance %quote by default (simp add: eq_bar_def)
   324 
   325 end %quote
   326 
   327 code_type %quotett bar
   328   (Haskell "Integer")
   329 
   330 text {*
   331   \noindent The code generator would produce
   332   an additional instance, which of course is rejected by the @{text Haskell}
   333   compiler.
   334   To suppress this additional instance, use
   335   @{text "code_instance"}:
   336 *}
   337 
   338 code_instance %quotett bar :: eq
   339   (Haskell -)
   340 
   341 
   342 subsection {* Enhancing the target language context \label{sec:include} *}
   343 
   344 text {*
   345   In rare cases it is necessary to \emph{enrich} the context of a
   346   target language;  this is accomplished using the @{command "code_include"}
   347   command:
   348 *}
   349 
   350 code_include %quotett Haskell "Errno"
   351 {*errno i = error ("Error number: " ++ show i)*}
   352 
   353 code_reserved %quotett Haskell Errno
   354 
   355 text {*
   356   \noindent Such named @{text include}s are then prepended to every generated code.
   357   Inspect such code in order to find out how @{command "code_include"} behaves
   358   with respect to a particular target language.
   359 *}
   360 
   361 end