doc-src/Codegen/Thy/Introduction.thy
author haftmann
Sat, 27 Nov 2010 18:51:04 +0100
changeset 40991 5288144b4358
parent 39978 3aa2bc9c5478
child 42960 9f6652122963
permissions -rw-r--r--
added evaluation section
     1 theory Introduction
     2 imports Setup
     3 begin
     4 
     5 section {* Introduction *}
     6 
     7 text {*
     8   This tutorial introduces the code generator facilities of @{text
     9   "Isabelle/HOL"}.  It allows to turn (a certain class of) HOL
    10   specifications into corresponding executable code in the programming
    11   languages @{text SML} \cite{SML}, @{text OCaml} \cite{OCaml},
    12   @{text Haskell} \cite{haskell-revised-report} and @{text Scala}
    13   \cite{scala-overview-tech-report}.
    14 
    15   To profit from this tutorial, some familiarity and experience with
    16   @{theory HOL} \cite{isa-tutorial} and its basic theories is assumed.
    17 *}
    18 
    19 
    20 subsection {* Code generation principle: shallow embedding \label{sec:principle} *}
    21 
    22 text {*
    23   The key concept for understanding Isabelle's code generation is
    24   \emph{shallow embedding}: logical entities like constants, types and
    25   classes are identified with corresponding entities in the target
    26   language.  In particular, the carrier of a generated program's
    27   semantics are \emph{equational theorems} from the logic.  If we view
    28   a generated program as an implementation of a higher-order rewrite
    29   system, then every rewrite step performed by the program can be
    30   simulated in the logic, which guarantees partial correctness
    31   \cite{Haftmann-Nipkow:2010:code}.
    32 *}
    33 
    34 
    35 subsection {* A quick start with the Isabelle/HOL toolbox \label{sec:queue_example} *}
    36 
    37 text {*
    38   In a HOL theory, the @{command_def datatype} and @{command_def
    39   definition}/@{command_def primrec}/@{command_def fun} declarations
    40   form the core of a functional programming language.  By default
    41   equational theorems stemming from those are used for generated code,
    42   therefore \qt{naive} code generation can proceed without further
    43   ado.
    44 
    45   For example, here a simple \qt{implementation} of amortised queues:
    46 *}
    47 
    48 datatype %quote 'a queue = AQueue "'a list" "'a list"
    49 
    50 definition %quote empty :: "'a queue" where
    51   "empty = AQueue [] []"
    52 
    53 primrec %quote enqueue :: "'a \<Rightarrow> 'a queue \<Rightarrow> 'a queue" where
    54   "enqueue x (AQueue xs ys) = AQueue (x # xs) ys"
    55 
    56 fun %quote dequeue :: "'a queue \<Rightarrow> 'a option \<times> 'a queue" where
    57     "dequeue (AQueue [] []) = (None, AQueue [] [])"
    58   | "dequeue (AQueue xs (y # ys)) = (Some y, AQueue xs ys)"
    59   | "dequeue (AQueue xs []) =
    60       (case rev xs of y # ys \<Rightarrow> (Some y, AQueue [] ys))" (*<*)
    61 
    62 lemma %invisible dequeue_nonempty_Nil [simp]:
    63   "xs \<noteq> [] \<Longrightarrow> dequeue (AQueue xs []) = (case rev xs of y # ys \<Rightarrow> (Some y, AQueue [] ys))"
    64   by (cases xs) (simp_all split: list.splits) (*>*)
    65 
    66 text {* \noindent Then we can generate code e.g.~for @{text SML} as follows: *}
    67 
    68 export_code %quote empty dequeue enqueue in SML
    69   module_name Example file "examples/example.ML"
    70 
    71 text {* \noindent resulting in the following code: *}
    72 
    73 text %quotetypewriter {*
    74   @{code_stmts empty enqueue dequeue (SML)}
    75 *}
    76 
    77 text {*
    78   \noindent The @{command_def export_code} command takes a
    79   space-separated list of constants for which code shall be generated;
    80   anything else needed for those is added implicitly.  Then follows a
    81   target language identifier and a freely chosen module name.  A file
    82   name denotes the destination to store the generated code.  Note that
    83   the semantics of the destination depends on the target language: for
    84   @{text SML}, @{text OCaml} and @{text Scala} it denotes a \emph{file},
    85   for @{text Haskell} it denotes a \emph{directory} where a file named as the
    86   module name (with extension @{text ".hs"}) is written:
    87 *}
    88 
    89 export_code %quote empty dequeue enqueue in Haskell
    90   module_name Example file "examples/"
    91 
    92 text {*
    93   \noindent This is the corresponding code:
    94 *}
    95 
    96 text %quotetypewriter {*
    97   @{code_stmts empty enqueue dequeue (Haskell)}
    98 *}
    99 
   100 text {*
   101   \noindent For more details about @{command export_code} see
   102   \secref{sec:further}.
   103 *}
   104 
   105 
   106 subsection {* Type classes *}
   107 
   108 text {*
   109   Code can also be generated from type classes in a Haskell-like
   110   manner.  For illustration here an example from abstract algebra:
   111 *}
   112 
   113 class %quote semigroup =
   114   fixes mult :: "'a \<Rightarrow> 'a \<Rightarrow> 'a" (infixl "\<otimes>" 70)
   115   assumes assoc: "(x \<otimes> y) \<otimes> z = x \<otimes> (y \<otimes> z)"
   116 
   117 class %quote monoid = semigroup +
   118   fixes neutral :: 'a ("\<one>")
   119   assumes neutl: "\<one> \<otimes> x = x"
   120     and neutr: "x \<otimes> \<one> = x"
   121 
   122 instantiation %quote nat :: monoid
   123 begin
   124 
   125 primrec %quote mult_nat where
   126     "0 \<otimes> n = (0\<Colon>nat)"
   127   | "Suc m \<otimes> n = n + m \<otimes> n"
   128 
   129 definition %quote neutral_nat where
   130   "\<one> = Suc 0"
   131 
   132 lemma %quote add_mult_distrib:
   133   fixes n m q :: nat
   134   shows "(n + m) \<otimes> q = n \<otimes> q + m \<otimes> q"
   135   by (induct n) simp_all
   136 
   137 instance %quote proof
   138   fix m n q :: nat
   139   show "m \<otimes> n \<otimes> q = m \<otimes> (n \<otimes> q)"
   140     by (induct m) (simp_all add: add_mult_distrib)
   141   show "\<one> \<otimes> n = n"
   142     by (simp add: neutral_nat_def)
   143   show "m \<otimes> \<one> = m"
   144     by (induct m) (simp_all add: neutral_nat_def)
   145 qed
   146 
   147 end %quote
   148 
   149 text {*
   150   \noindent We define the natural operation of the natural numbers
   151   on monoids:
   152 *}
   153 
   154 primrec %quote (in monoid) pow :: "nat \<Rightarrow> 'a \<Rightarrow> 'a" where
   155     "pow 0 a = \<one>"
   156   | "pow (Suc n) a = a \<otimes> pow n a"
   157 
   158 text {*
   159   \noindent This we use to define the discrete exponentiation
   160   function:
   161 *}
   162 
   163 definition %quote bexp :: "nat \<Rightarrow> nat" where
   164   "bexp n = pow n (Suc (Suc 0))"
   165 
   166 text {*
   167   \noindent The corresponding code in Haskell uses that language's
   168   native classes:
   169 *}
   170 
   171 text %quotetypewriter {*
   172   @{code_stmts bexp (Haskell)}
   173 *}
   174 
   175 text {*
   176   \noindent This is a convenient place to show how explicit dictionary
   177   construction manifests in generated code -- the same example in
   178   @{text SML}:
   179 *}
   180 
   181 text %quotetypewriter {*
   182   @{code_stmts bexp (SML)}
   183 *}
   184 
   185 text {*
   186   \noindent Note the parameters with trailing underscore (@{verbatim
   187   "A_"}), which are the dictionary parameters.
   188 *}
   189 
   190 
   191 subsection {* How to continue from here *}
   192 
   193 text {*
   194   What you have seen so far should be already enough in a lot of
   195   cases.  If you are content with this, you can quit reading here.
   196 
   197   Anyway, to understand situations where problems occur or to increase
   198   the scope of code generation beyond default, it is necessary to gain
   199   some understanding how the code generator actually works:
   200 
   201   \begin{itemize}
   202 
   203     \item The foundations of the code generator are described in
   204       \secref{sec:foundations}.
   205 
   206     \item In particular \secref{sec:utterly_wrong} gives hints how to
   207       debug situations where code generation does not succeed as
   208       expected.
   209 
   210     \item The scope and quality of generated code can be increased
   211       dramatically by applying refinement techniques, which are
   212       introduced in \secref{sec:refinement}.
   213 
   214     \item Inductive predicates can be turned executable using an
   215       extension of the code generator \secref{sec:inductive}.
   216 
   217     \item If you want to utilize code generation to obtain fast
   218       evaluators e.g.~for decision procedures, have a look at
   219       \secref{sec:evaluation}.
   220 
   221     \item You may want to skim over the more technical sections
   222       \secref{sec:adaptation} and \secref{sec:further}.
   223 
   224     \item For exhaustive syntax diagrams etc. you should visit the
   225       Isabelle/Isar Reference Manual \cite{isabelle-isar-ref}.
   226 
   227   \end{itemize}
   228 
   229   \bigskip
   230 
   231   \begin{center}\fbox{\fbox{\begin{minipage}{8cm}
   232 
   233     \begin{center}\textit{Happy proving, happy hacking!}\end{center}
   234 
   235   \end{minipage}}}\end{center}
   236 
   237   \begin{warn}
   238     There is also a more ancient code generator in Isabelle by Stefan
   239     Berghofer \cite{Berghofer-Nipkow:2002}.  Although its
   240     functionality is covered by the code generator presented here, it
   241     will sometimes show up as an artifact.  In case of ambiguity, we
   242     will refer to the framework described here as @{text "generic code
   243     generator"}, to the other as @{text "SML code generator"}.
   244   \end{warn}
   245 *}
   246 
   247 end