src/Pure/Syntax/syn_ext.ML
author clasohm
Fri, 22 Apr 1994 12:43:53 +0200
changeset 330 2fda15dd1e0f
parent 240 8b2a8c52242d
child 345 7007562172b1
permissions -rw-r--r--
changed the way a grammar is generated to allow the new parser to work;
also made a lot of changes in parser.ML and minor ones elsewhere
wenzelm@240
     1
(*  Title:      Pure/Syntax/syn_ext.ML
wenzelm@240
     2
    ID:         $Id$
wenzelm@240
     3
    Author:     Markus Wenzel, TU Muenchen
wenzelm@240
     4
wenzelm@240
     5
Syntax extension (internal interface).
wenzelm@240
     6
*)
wenzelm@240
     7
wenzelm@240
     8
signature SYN_EXT0 =
wenzelm@240
     9
sig
wenzelm@240
    10
  val typeT: typ
wenzelm@240
    11
  val constrainC: string
wenzelm@240
    12
end;
wenzelm@240
    13
wenzelm@240
    14
signature SYN_EXT =
wenzelm@240
    15
sig
wenzelm@240
    16
  include SYN_EXT0
wenzelm@240
    17
  structure Ast: AST
wenzelm@240
    18
  local open Ast in
wenzelm@240
    19
    val logic: string
wenzelm@240
    20
    val args: string
wenzelm@240
    21
    val idT: typ
wenzelm@240
    22
    val varT: typ
clasohm@330
    23
    val tidT: typ
wenzelm@240
    24
    val tvarT: typ
wenzelm@240
    25
    val applC: string
wenzelm@240
    26
    val typ_to_nonterm: typ -> string
wenzelm@240
    27
    datatype xsymb =
wenzelm@240
    28
      Delim of string |
wenzelm@240
    29
      Argument of string * int |
wenzelm@240
    30
      Space of string |
wenzelm@240
    31
      Bg of int | Brk of int | En
wenzelm@240
    32
    datatype xprod = XProd of string * xsymb list * string * int
wenzelm@240
    33
    val max_pri: int
wenzelm@240
    34
    val chain_pri: int
wenzelm@240
    35
    val delims_of: xprod list -> string list
wenzelm@240
    36
    datatype mfix = Mfix of string * typ * string * int list * int
wenzelm@240
    37
    datatype syn_ext =
wenzelm@240
    38
      SynExt of {
wenzelm@240
    39
        roots: string list,
wenzelm@240
    40
        xprods: xprod list,
wenzelm@240
    41
        consts: string list,
wenzelm@240
    42
        parse_ast_translation: (string * (ast list -> ast)) list,
wenzelm@240
    43
        parse_rules: (ast * ast) list,
wenzelm@240
    44
        parse_translation: (string * (term list -> term)) list,
wenzelm@240
    45
        print_translation: (string * (term list -> term)) list,
wenzelm@240
    46
        print_rules: (ast * ast) list,
wenzelm@240
    47
        print_ast_translation: (string * (ast list -> ast)) list}
clasohm@330
    48
    val syn_ext: string list -> string list -> mfix list -> string list ->
wenzelm@240
    49
      (string * (ast list -> ast)) list * (string * (term list -> term)) list *
wenzelm@240
    50
      (string * (term list -> term)) list * (string * (ast list -> ast)) list
wenzelm@240
    51
      -> (ast * ast) list * (ast * ast) list -> syn_ext
clasohm@330
    52
    val syn_ext_rules: string list -> (ast * ast) list * (ast * ast) list -> syn_ext
clasohm@330
    53
    val syn_ext_roots: string list -> string list -> syn_ext
wenzelm@240
    54
  end
wenzelm@240
    55
end;
wenzelm@240
    56
wenzelm@240
    57
functor SynExtFun(structure Lexicon: LEXICON and Ast: AST): SYN_EXT =
wenzelm@240
    58
struct
wenzelm@240
    59
wenzelm@240
    60
structure Ast = Ast;
wenzelm@240
    61
open Lexicon Ast;
wenzelm@240
    62
wenzelm@240
    63
wenzelm@240
    64
(** misc definitions **)
wenzelm@240
    65
wenzelm@240
    66
(* syntactic categories *)
wenzelm@240
    67
wenzelm@240
    68
val logic = "logic";
wenzelm@240
    69
val logicT = Type (logic, []);
wenzelm@240
    70
wenzelm@240
    71
val logic1 = "logic1";
wenzelm@240
    72
val logic1T = Type (logic1, []);
wenzelm@240
    73
wenzelm@240
    74
val args = "args";
wenzelm@240
    75
val argsT = Type (args, []);
wenzelm@240
    76
clasohm@330
    77
val typeT = Type ("type", []);
clasohm@330
    78
wenzelm@240
    79
val funT = Type ("fun", []);
wenzelm@240
    80
wenzelm@240
    81
wenzelm@240
    82
(* terminals *)
wenzelm@240
    83
wenzelm@240
    84
val idT = Type (id, []);
wenzelm@240
    85
val varT = Type (var, []);
clasohm@330
    86
val tidT = Type (tid, []);
wenzelm@240
    87
val tvarT = Type (tvar, []);
wenzelm@240
    88
wenzelm@240
    89
wenzelm@240
    90
(* constants *)
wenzelm@240
    91
wenzelm@240
    92
val applC = "_appl";
wenzelm@240
    93
val constrainC = "_constrain";
wenzelm@240
    94
wenzelm@240
    95
wenzelm@240
    96
wenzelm@240
    97
(** datatype xprod **)
wenzelm@240
    98
wenzelm@240
    99
(*Delim s: delimiter s
wenzelm@240
   100
  Argument (s, p): nonterminal s requiring priority >= p, or valued token
wenzelm@240
   101
  Space s: some white space for printing
wenzelm@240
   102
  Bg, Brk, En: blocks and breaks for pretty printing*)
wenzelm@240
   103
wenzelm@240
   104
datatype xsymb =
wenzelm@240
   105
  Delim of string |
wenzelm@240
   106
  Argument of string * int |
wenzelm@240
   107
  Space of string |
wenzelm@240
   108
  Bg of int | Brk of int | En;
wenzelm@240
   109
wenzelm@240
   110
wenzelm@240
   111
(*XProd (lhs, syms, c, p):
wenzelm@240
   112
    lhs: name of nonterminal on the lhs of the production
wenzelm@240
   113
    syms: list of symbols on the rhs of the production
wenzelm@240
   114
    c: head of parse tree
wenzelm@240
   115
    p: priority of this production*)
wenzelm@240
   116
wenzelm@240
   117
datatype xprod = XProd of string * xsymb list * string * int;
wenzelm@240
   118
wenzelm@240
   119
val max_pri = 1000;   (*maximum legal priority*)
wenzelm@240
   120
val chain_pri = ~1;   (*dummy for chain productions*)
wenzelm@240
   121
wenzelm@240
   122
wenzelm@240
   123
(* delims_of *)
wenzelm@240
   124
wenzelm@240
   125
fun delims_of xprods =
wenzelm@240
   126
  let
wenzelm@240
   127
    fun del_of (Delim s) = Some s
wenzelm@240
   128
      | del_of _ = None;
wenzelm@240
   129
wenzelm@240
   130
    fun dels_of (XProd (_, xsymbs, _, _)) =
wenzelm@240
   131
      mapfilter del_of xsymbs;
wenzelm@240
   132
  in
wenzelm@240
   133
    distinct (flat (map dels_of xprods))
wenzelm@240
   134
  end;
wenzelm@240
   135
wenzelm@240
   136
wenzelm@240
   137
wenzelm@240
   138
(** datatype mfix **)
wenzelm@240
   139
wenzelm@240
   140
(*Mfix (sy, ty, c, ps, p):
wenzelm@240
   141
    sy: rhs of production as symbolic string
wenzelm@240
   142
    ty: type description of production
wenzelm@240
   143
    c: head of parse tree
wenzelm@240
   144
    ps: priorities of arguments in sy
wenzelm@240
   145
    p: priority of production*)
wenzelm@240
   146
wenzelm@240
   147
datatype mfix = Mfix of string * typ * string * int list * int;
wenzelm@240
   148
wenzelm@240
   149
wenzelm@240
   150
(* typ_to_nonterm *)
wenzelm@240
   151
wenzelm@240
   152
fun typ_to_nonterm (Type (c, _)) = c
wenzelm@240
   153
  | typ_to_nonterm _ = logic;
wenzelm@240
   154
wenzelm@240
   155
fun typ_to_nonterm1 (Type (c, _)) = c
wenzelm@240
   156
  | typ_to_nonterm1 _ = logic1;
wenzelm@240
   157
wenzelm@240
   158
wenzelm@240
   159
(* mfix_to_xprod *)
wenzelm@240
   160
wenzelm@240
   161
fun mfix_to_xprod (Mfix (sy, typ, const, pris, pri)) =
wenzelm@240
   162
  let
wenzelm@240
   163
    fun err msg =
wenzelm@240
   164
      (writeln ("Error in mixfix annotation " ^ quote sy ^ " for " ^ quote const);
wenzelm@240
   165
        error msg);
wenzelm@240
   166
wenzelm@240
   167
    fun check_pri p =
wenzelm@240
   168
      if p >= 0 andalso p <= max_pri then ()
wenzelm@240
   169
      else err ("precedence out of range: " ^ string_of_int p);
wenzelm@240
   170
wenzelm@240
   171
    fun blocks_ok [] 0 = true
wenzelm@240
   172
      | blocks_ok [] _ = false
wenzelm@240
   173
      | blocks_ok (Bg _ :: syms) n = blocks_ok syms (n + 1)
wenzelm@240
   174
      | blocks_ok (En :: _) 0 = false
wenzelm@240
   175
      | blocks_ok (En :: syms) n = blocks_ok syms (n - 1)
wenzelm@240
   176
      | blocks_ok (_ :: syms) n = blocks_ok syms n;
wenzelm@240
   177
wenzelm@240
   178
    fun check_blocks syms =
wenzelm@240
   179
      if blocks_ok syms 0 then ()
wenzelm@240
   180
      else err "unbalanced block parentheses";
wenzelm@240
   181
wenzelm@240
   182
wenzelm@240
   183
    fun is_meta c = c mem ["(", ")", "/", "_"];
wenzelm@240
   184
wenzelm@240
   185
    fun scan_delim_char ("'" :: c :: cs) =
wenzelm@240
   186
          if is_blank c then err "illegal spaces in delimiter" else (c, cs)
wenzelm@240
   187
      | scan_delim_char ["'"] = err "trailing escape character"
wenzelm@240
   188
      | scan_delim_char (chs as c :: cs) =
wenzelm@240
   189
          if is_blank c orelse is_meta c then raise LEXICAL_ERROR else (c, cs)
wenzelm@240
   190
      | scan_delim_char [] = raise LEXICAL_ERROR;
wenzelm@240
   191
wenzelm@240
   192
    val scan_symb =
wenzelm@240
   193
      $$ "_" >> K (Argument ("", 0)) ||
wenzelm@240
   194
      $$ "(" -- scan_int >> (Bg o #2) ||
wenzelm@240
   195
      $$ ")" >> K En ||
wenzelm@240
   196
      $$ "/" -- $$ "/" >> K (Brk ~1) ||
wenzelm@240
   197
      $$ "/" -- scan_any is_blank >> (Brk o length o #2) ||
wenzelm@240
   198
      scan_any1 is_blank >> (Space o implode) ||
wenzelm@240
   199
      repeat1 scan_delim_char >> (Delim o implode);
wenzelm@240
   200
wenzelm@240
   201
wenzelm@240
   202
    val cons_fst = apfst o cons;
wenzelm@240
   203
wenzelm@240
   204
    fun add_args [] ty [] = ([], typ_to_nonterm1 ty)
wenzelm@240
   205
      | add_args [] _ _ = err "too many precedences"
wenzelm@240
   206
      | add_args (Argument _ :: syms) (Type ("fun", [ty, tys])) [] =
wenzelm@240
   207
          cons_fst (Argument (typ_to_nonterm ty, 0)) (add_args syms tys [])
wenzelm@240
   208
      | add_args (Argument _ :: syms) (Type ("fun", [ty, tys])) (p :: ps) =
wenzelm@240
   209
          cons_fst (Argument (typ_to_nonterm ty, p)) (add_args syms tys ps)
wenzelm@240
   210
      | add_args (Argument _ :: _) _ _ =
wenzelm@240
   211
          err "more arguments than in corresponding type"
wenzelm@240
   212
      | add_args (sym :: syms) ty ps = cons_fst sym (add_args syms ty ps);
wenzelm@240
   213
wenzelm@240
   214
wenzelm@240
   215
    fun is_arg (Argument _) = true
wenzelm@240
   216
      | is_arg _ = false;
wenzelm@240
   217
wenzelm@240
   218
    fun is_term (Delim _) = true
wenzelm@240
   219
      | is_term (Argument (s, _)) = is_terminal s
wenzelm@240
   220
      | is_term _ = false;
wenzelm@240
   221
wenzelm@240
   222
    fun rem_pri (Argument (s, _)) = Argument (s, chain_pri)
wenzelm@240
   223
      | rem_pri sym = sym;
wenzelm@240
   224
wenzelm@240
   225
wenzelm@240
   226
    val (raw_symbs, _) = repeat scan_symb (explode sy);
wenzelm@240
   227
    val (symbs, lhs) = add_args raw_symbs typ pris;
wenzelm@240
   228
    val xprod = XProd (lhs, symbs, const, pri);
wenzelm@240
   229
  in
wenzelm@240
   230
    seq check_pri pris;
wenzelm@240
   231
    check_pri pri;
wenzelm@240
   232
    check_blocks symbs;
wenzelm@240
   233
wenzelm@240
   234
    if is_terminal lhs then err ("illegal lhs: " ^ lhs)
wenzelm@240
   235
    else if const <> "" then xprod
wenzelm@240
   236
    else if length (filter is_arg symbs) <> 1 then
wenzelm@240
   237
      err "copy production must have exactly one argument"
wenzelm@240
   238
    else if exists is_term symbs then xprod
wenzelm@240
   239
    else XProd (lhs, map rem_pri symbs, "", chain_pri)
wenzelm@240
   240
  end;
wenzelm@240
   241
wenzelm@240
   242
wenzelm@240
   243
(** datatype syn_ext **)
wenzelm@240
   244
wenzelm@240
   245
datatype syn_ext =
wenzelm@240
   246
  SynExt of {
wenzelm@240
   247
    roots: string list,
wenzelm@240
   248
    xprods: xprod list,
wenzelm@240
   249
    consts: string list,
wenzelm@240
   250
    parse_ast_translation: (string * (ast list -> ast)) list,
wenzelm@240
   251
    parse_rules: (ast * ast) list,
wenzelm@240
   252
    parse_translation: (string * (term list -> term)) list,
wenzelm@240
   253
    print_translation: (string * (term list -> term)) list,
wenzelm@240
   254
    print_rules: (ast * ast) list,
wenzelm@240
   255
    print_ast_translation: (string * (ast list -> ast)) list};
wenzelm@240
   256
wenzelm@240
   257
wenzelm@240
   258
(* syn_ext *)
wenzelm@240
   259
clasohm@330
   260
fun syn_ext all_roots new_roots mfixes consts trfuns rules =
wenzelm@240
   261
  let
wenzelm@240
   262
    val (parse_ast_translation, parse_translation, print_translation,
wenzelm@240
   263
      print_ast_translation) = trfuns;
wenzelm@240
   264
    val (parse_rules, print_rules) = rules;
wenzelm@240
   265
clasohm@330
   266
    val Troots = map (apr (Type, [])) new_roots;
clasohm@330
   267
    val Troots' = Troots \\ [typeT, propT];
clasohm@330
   268
clasohm@330
   269
    fun change_name T ext =
clasohm@330
   270
      let val Type (name, ts) = T
clasohm@330
   271
      in Type (space_implode "" [name, ext], ts) end;
clasohm@330
   272
clasohm@330
   273
    (* Append "_H" to lhs if production is not a copy or chain production *)
clasohm@330
   274
    fun hide_xprod roots (XProd (lhs, symbs, const, pri)) =
clasohm@330
   275
      let fun is_delim (Delim _) = true
clasohm@330
   276
            | is_delim _ = false
clasohm@330
   277
      in if const <> "" andalso lhs mem roots andalso exists is_delim symbs then
clasohm@330
   278
           XProd (space_implode "" [lhs, "_H"], symbs, const, pri)
clasohm@330
   279
         else XProd (lhs, symbs, const, pri)
clasohm@330
   280
      end;
clasohm@330
   281
clasohm@330
   282
    (* Make descend production and append "_H" to rhs nonterminal *)
clasohm@330
   283
    fun descend_right (from, to) =
clasohm@330
   284
      Mfix ("_", change_name to "_H" --> from, "", [0], 0);
clasohm@330
   285
clasohm@330
   286
    (* Make descend production and append "_H" to lhs *)
clasohm@330
   287
    fun descend_left (from, to) =
clasohm@330
   288
      Mfix ("_", to --> change_name from "_H", "", [0], 0);
clasohm@330
   289
clasohm@330
   290
    (* Make descend production and append "_A" to lhs *)
clasohm@330
   291
    fun descend1 (from, to) =
clasohm@330
   292
      Mfix ("_", to --> change_name from "_A", "", [0], 0);
clasohm@330
   293
clasohm@330
   294
    (* Make parentheses production for 'hidden' and 'automatic' nonterminal *)
clasohm@330
   295
    fun parents T = 
clasohm@330
   296
      if T = typeT then
clasohm@330
   297
        [Mfix ("'(_')", T --> T, "", [0], max_pri)]
clasohm@330
   298
      else
clasohm@330
   299
        [Mfix ("'(_')", change_name T "_H" --> change_name T "_H", "", [0], max_pri),
clasohm@330
   300
         Mfix ("'(_')", change_name T "_A" --> change_name T "_A", "", [0], max_pri)];
clasohm@330
   301
clasohm@330
   302
    fun mkappl T =
clasohm@330
   303
      Mfix ("(1_/(1'(_')))", [funT, argsT] ---> change_name T "_A", applC, 
clasohm@330
   304
            [max_pri, 0], max_pri);
clasohm@330
   305
clasohm@330
   306
    fun mkid T =
clasohm@330
   307
      Mfix ("_", idT --> change_name T "_A", "", [], max_pri);
clasohm@330
   308
clasohm@330
   309
    fun mkvar T =
clasohm@330
   310
      Mfix ("_", varT --> change_name T "_A", "", [], max_pri);
clasohm@330
   311
clasohm@330
   312
    fun constrain T =
clasohm@330
   313
      Mfix ("_::_", [T, typeT] ---> change_name T "_A", constrainC, 
clasohm@330
   314
            [max_pri, 0], max_pri - 1)
clasohm@330
   315
clasohm@330
   316
    fun unhide T =
clasohm@330
   317
      if T <> logicT then
clasohm@330
   318
        [Mfix ("_", change_name T "_H" --> T, "", [0], 0),
clasohm@330
   319
         Mfix ("_", change_name T "_A" --> T, "", [0], 0)]
clasohm@330
   320
      else
clasohm@330
   321
        [Mfix ("_", change_name T "_A" --> T, "", [0], 0)];
clasohm@330
   322
clasohm@330
   323
    val mfixes' = flat (map parents Troots) @ map mkappl Troots' @
wenzelm@240
   324
      map mkid Troots' @ map mkvar Troots' @ map constrain Troots' @
clasohm@330
   325
      map (apl (logicT, descend_right)) (Troots \\ [logicT, typeT]) @
clasohm@330
   326
      map (apr (descend1, logic1T)) (Troots') @
clasohm@330
   327
      flat (map unhide (Troots \\ [typeT]));
wenzelm@240
   328
    val mfix_consts =
clasohm@330
   329
      distinct (filter is_xid (map (fn (Mfix (_, _, c, _, _)) => c) 
clasohm@330
   330
               (mfixes @ mfixes')));
clasohm@330
   331
    val xprods = map mfix_to_xprod mfixes;
clasohm@330
   332
    val xprods' = map mfix_to_xprod mfixes';
wenzelm@240
   333
  in
wenzelm@240
   334
    SynExt {
clasohm@330
   335
      roots = new_roots,
clasohm@330
   336
      xprods = (map (hide_xprod (all_roots \\ ["logic", "type"])) xprods) 
clasohm@330
   337
               @ xprods',    (* hide only productions that weren't created
clasohm@330
   338
                                automatically *)
wenzelm@240
   339
      consts = consts union mfix_consts,
wenzelm@240
   340
      parse_ast_translation = parse_ast_translation,
wenzelm@240
   341
      parse_rules = parse_rules,
wenzelm@240
   342
      parse_translation = parse_translation,
wenzelm@240
   343
      print_translation = print_translation,
wenzelm@240
   344
      print_rules = print_rules,
wenzelm@240
   345
      print_ast_translation = print_ast_translation}
wenzelm@240
   346
  end;
wenzelm@240
   347
wenzelm@240
   348
wenzelm@240
   349
(* syn_ext_rules, syn_ext_roots *)
wenzelm@240
   350
clasohm@330
   351
fun syn_ext_rules roots rules =
clasohm@330
   352
  syn_ext roots [] [] [] ([], [], [], []) rules;
wenzelm@240
   353
clasohm@330
   354
fun syn_ext_roots all_roots new_roots =
clasohm@330
   355
  syn_ext all_roots new_roots [] [] ([], [], [], []) ([], []);
wenzelm@240
   356
wenzelm@240
   357
wenzelm@240
   358
end;
wenzelm@240
   359