src/Pure/Thy/rail.ML
author wenzelm
Sat, 30 Apr 2011 19:50:39 +0200
changeset 43375 869c3f6f2d6e
child 43377 876887b07e8d
permissions -rw-r--r--
railroad diagrams in LaTeX as document antiquotation;
wenzelm@43375
     1
(*  Title:      Pure/Thy/rail.ML
wenzelm@43375
     2
    Author:     Michael Kerscher, TU München
wenzelm@43375
     3
    Author:     Makarius
wenzelm@43375
     4
wenzelm@43375
     5
Railroad diagrams in LaTeX.
wenzelm@43375
     6
*)
wenzelm@43375
     7
wenzelm@43375
     8
structure Rail: sig end =
wenzelm@43375
     9
struct
wenzelm@43375
    10
wenzelm@43375
    11
(** lexical syntax **)
wenzelm@43375
    12
wenzelm@43375
    13
(* datatype token *)
wenzelm@43375
    14
wenzelm@43375
    15
datatype kind = Keyword | Ident | String | EOF;
wenzelm@43375
    16
wenzelm@43375
    17
datatype token = Token of Position.range * (kind * string);
wenzelm@43375
    18
wenzelm@43375
    19
fun pos_of (Token ((pos, _), _)) = pos;
wenzelm@43375
    20
fun end_pos_of (Token ((_, pos), _)) = pos;
wenzelm@43375
    21
wenzelm@43375
    22
fun kind_of (Token (_, (k, _))) = k;
wenzelm@43375
    23
fun content_of (Token (_, (_, x))) = x;
wenzelm@43375
    24
wenzelm@43375
    25
wenzelm@43375
    26
(* diagnostics *)
wenzelm@43375
    27
wenzelm@43375
    28
val print_kind =
wenzelm@43375
    29
 fn Keyword => "rail keyword"
wenzelm@43375
    30
  | Ident => "identifier"
wenzelm@43375
    31
  | String => "single-quoted string"
wenzelm@43375
    32
  | EOF => "end-of-file";
wenzelm@43375
    33
wenzelm@43375
    34
fun print (Token ((pos, _), (k, x))) =
wenzelm@43375
    35
  (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
wenzelm@43375
    36
  Position.str_of pos;
wenzelm@43375
    37
wenzelm@43375
    38
fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
wenzelm@43375
    39
wenzelm@43375
    40
wenzelm@43375
    41
(* stopper *)
wenzelm@43375
    42
wenzelm@43375
    43
fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
wenzelm@43375
    44
val eof = mk_eof Position.none;
wenzelm@43375
    45
wenzelm@43375
    46
fun is_eof (Token (_, (EOF, _))) = true
wenzelm@43375
    47
  | is_eof _ = false;
wenzelm@43375
    48
wenzelm@43375
    49
val stopper =
wenzelm@43375
    50
  Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
wenzelm@43375
    51
wenzelm@43375
    52
wenzelm@43375
    53
(* tokenize *)
wenzelm@43375
    54
wenzelm@43375
    55
local
wenzelm@43375
    56
wenzelm@43375
    57
fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
wenzelm@43375
    58
wenzelm@43375
    59
val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
wenzelm@43375
    60
wenzelm@43375
    61
val scan_keyword =
wenzelm@43375
    62
  Scan.one (member (op =) ["|", "*", "+", "?", "(", ")", "\\", ";", ":"] o Symbol_Pos.symbol);
wenzelm@43375
    63
wenzelm@43375
    64
val scan_token =
wenzelm@43375
    65
  scan_space >> K [] ||
wenzelm@43375
    66
  scan_keyword >> (token Keyword o single) ||
wenzelm@43375
    67
  Lexicon.scan_id >> token Ident ||
wenzelm@43375
    68
  Symbol_Pos.scan_string_q >> (token String o #1 o #2);
wenzelm@43375
    69
wenzelm@43375
    70
in
wenzelm@43375
    71
wenzelm@43375
    72
fun tokenize pos str =
wenzelm@43375
    73
  Source.of_string str
wenzelm@43375
    74
  |> Symbol.source
wenzelm@43375
    75
  |> Symbol_Pos.source pos
wenzelm@43375
    76
  |> Source.source Symbol_Pos.stopper
wenzelm@43375
    77
      (Scan.bulk (Symbol_Pos.!!! "Rail lexical error: bad input" scan_token) >> flat) NONE
wenzelm@43375
    78
  |> Source.exhaust;
wenzelm@43375
    79
wenzelm@43375
    80
end;
wenzelm@43375
    81
wenzelm@43375
    82
wenzelm@43375
    83
wenzelm@43375
    84
(** parsing **)
wenzelm@43375
    85
wenzelm@43375
    86
fun !!! scan =
wenzelm@43375
    87
  let
wenzelm@43375
    88
    val prefix = "Rail syntax error";
wenzelm@43375
    89
wenzelm@43375
    90
    fun get_pos [] = " (past end-of-file!)"
wenzelm@43375
    91
      | get_pos (tok :: _) = Position.str_of (pos_of tok);
wenzelm@43375
    92
wenzelm@43375
    93
    fun err (toks, NONE) = prefix ^ get_pos toks
wenzelm@43375
    94
      | err (toks, SOME msg) =
wenzelm@43375
    95
          if String.isPrefix prefix msg then msg
wenzelm@43375
    96
          else prefix ^ get_pos toks ^ ": " ^ msg;
wenzelm@43375
    97
  in Scan.!! err scan end;
wenzelm@43375
    98
wenzelm@43375
    99
fun $$$ x =
wenzelm@43375
   100
  Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
wenzelm@43375
   101
  Scan.fail_with
wenzelm@43375
   102
    (fn [] => print_keyword x ^ " expected (past end-of-file!)"
wenzelm@43375
   103
      | tok :: _ => print_keyword x ^ "expected,\nbut " ^ print tok ^ " was found");
wenzelm@43375
   104
wenzelm@43375
   105
fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
wenzelm@43375
   106
fun enum sep scan = enum1 sep scan || Scan.succeed [];
wenzelm@43375
   107
wenzelm@43375
   108
fun parse_token kind =
wenzelm@43375
   109
  Scan.some (fn tok => if kind_of tok = kind then SOME (content_of tok) else NONE);
wenzelm@43375
   110
wenzelm@43375
   111
val ident = parse_token Ident;
wenzelm@43375
   112
val string = parse_token String;
wenzelm@43375
   113
wenzelm@43375
   114
wenzelm@43375
   115
wenzelm@43375
   116
(** rail expressions **)
wenzelm@43375
   117
wenzelm@43375
   118
(* datatype *)
wenzelm@43375
   119
wenzelm@43375
   120
datatype rails =
wenzelm@43375
   121
  Cat of int * rail list
wenzelm@43375
   122
and rail =
wenzelm@43375
   123
  Bar of rails list |
wenzelm@43375
   124
  Plus of rails * rails |
wenzelm@43375
   125
  Newline of int |
wenzelm@43375
   126
  Nonterminal of string |
wenzelm@43375
   127
  Terminal of string;
wenzelm@43375
   128
wenzelm@43375
   129
fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
wenzelm@43375
   130
and reverse (Bar cats) = Bar (map reverse_cat cats)
wenzelm@43375
   131
  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
wenzelm@43375
   132
  | reverse x = x;
wenzelm@43375
   133
wenzelm@43375
   134
fun cat rails = Cat (0, rails);
wenzelm@43375
   135
wenzelm@43375
   136
val empty = cat [];
wenzelm@43375
   137
fun is_empty (Cat (_, [])) = true | is_empty _ = false;
wenzelm@43375
   138
wenzelm@43375
   139
fun is_newline (Newline _) = true | is_newline _ = false;
wenzelm@43375
   140
wenzelm@43375
   141
fun bar [Cat (_, [rail])] = rail
wenzelm@43375
   142
  | bar cats = Bar cats;
wenzelm@43375
   143
wenzelm@43375
   144
fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
wenzelm@43375
   145
wenzelm@43375
   146
fun star cat1 cat2 =
wenzelm@43375
   147
  if is_empty cat2 then plus empty cat1
wenzelm@43375
   148
  else bar [empty, cat [plus cat1 cat2]];
wenzelm@43375
   149
wenzelm@43375
   150
fun maybe rail = bar [empty, cat [rail]];
wenzelm@43375
   151
wenzelm@43375
   152
wenzelm@43375
   153
(* read *)
wenzelm@43375
   154
wenzelm@43375
   155
local
wenzelm@43375
   156
wenzelm@43375
   157
fun body x = (enum1 "|" body1 >> bar) x
wenzelm@43375
   158
and body0 x = (enum "|" body1 >> bar) x
wenzelm@43375
   159
and body1 x =
wenzelm@43375
   160
 (body2 :|-- (fn a =>
wenzelm@43375
   161
   $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
wenzelm@43375
   162
   $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
wenzelm@43375
   163
   Scan.succeed a)) x
wenzelm@43375
   164
and body2 x = (Scan.repeat1 body3 >> cat) x
wenzelm@43375
   165
and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
wenzelm@43375
   166
and body4 x =
wenzelm@43375
   167
 ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
wenzelm@43375
   168
  $$$ "\\" >> K (Newline 0) ||
wenzelm@43375
   169
  ident >> Nonterminal ||
wenzelm@43375
   170
  string >> Terminal) x
wenzelm@43375
   171
and body4e x = (Scan.option body4 >> (cat o the_list)) x;
wenzelm@43375
   172
wenzelm@43375
   173
val rule = ident -- ($$$ ":" |-- !!! body) || body >> pair "";
wenzelm@43375
   174
val rules = enum1 ";" (Scan.option rule) >> map_filter I;
wenzelm@43375
   175
wenzelm@43375
   176
in
wenzelm@43375
   177
wenzelm@43375
   178
fun read pos str =
wenzelm@43375
   179
  (case Scan.error (Scan.finite stopper rules) (tokenize pos str) of
wenzelm@43375
   180
    (res, []) => res
wenzelm@43375
   181
  | (_, tok :: _) => error ("Malformed rail input: " ^ print tok));
wenzelm@43375
   182
wenzelm@43375
   183
end;
wenzelm@43375
   184
wenzelm@43375
   185
wenzelm@43375
   186
(* latex output *)
wenzelm@43375
   187
wenzelm@43375
   188
local
wenzelm@43375
   189
wenzelm@43375
   190
fun vertical_range_cat (Cat (_, rails)) y =
wenzelm@43375
   191
  let val (rails', (_, y')) =
wenzelm@43375
   192
    fold_map (fn rail => fn (y0, y') =>
wenzelm@43375
   193
      if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
wenzelm@43375
   194
      else
wenzelm@43375
   195
        let val (rail', y0') = vertical_range rail y0;
wenzelm@43375
   196
        in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
wenzelm@43375
   197
  in (Cat (y, rails'), y') end
wenzelm@43375
   198
wenzelm@43375
   199
and vertical_range (Bar cats) y =
wenzelm@43375
   200
      let val (cats', y') = fold_map vertical_range_cat cats y
wenzelm@43375
   201
      in (Bar cats', Int.max (y + 1, y')) end
wenzelm@43375
   202
  | vertical_range (Plus (cat1, cat2)) y =
wenzelm@43375
   203
      let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
wenzelm@43375
   204
      in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
wenzelm@43375
   205
  | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
wenzelm@43375
   206
  | vertical_range atom y = (atom, y + 1);
wenzelm@43375
   207
wenzelm@43375
   208
wenzelm@43375
   209
fun output_text s = "\\isa{" ^ Output.output s ^ "}";
wenzelm@43375
   210
wenzelm@43375
   211
fun output_cat c (Cat (_, rails)) = outputs c rails
wenzelm@43375
   212
and outputs c [rail] = output c rail
wenzelm@43375
   213
  | outputs _ rails = implode (map (output "") rails)
wenzelm@43375
   214
and output _ (Bar []) = ""
wenzelm@43375
   215
  | output c (Bar [cat]) = output_cat c cat
wenzelm@43375
   216
  | output _ (Bar (cat :: cats)) =
wenzelm@43375
   217
      "\\rail@bar\n" ^ output_cat "" cat ^
wenzelm@43375
   218
      implode (map (fn Cat (y, rails) =>
wenzelm@43375
   219
          "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
wenzelm@43375
   220
      "\\rail@endbar\n"
wenzelm@43375
   221
  | output c (Plus (cat, Cat (y, rails))) =
wenzelm@43375
   222
      "\\rail@plus\n" ^ output_cat c cat ^
wenzelm@43375
   223
      "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
wenzelm@43375
   224
      "\\rail@endplus\n"
wenzelm@43375
   225
  | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
wenzelm@43375
   226
  | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text s ^ "}[]\n"
wenzelm@43375
   227
  | output c (Terminal s) = "\\rail@" ^ c ^ "term{" ^ output_text s ^ "}[]\n";
wenzelm@43375
   228
wenzelm@43375
   229
fun output_rule (name, rail) =
wenzelm@43375
   230
  let val (rail', y') = vertical_range rail 0 in
wenzelm@43375
   231
    "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ output_text name ^ "}\n" ^
wenzelm@43375
   232
    output "" rail' ^
wenzelm@43375
   233
    "\\rail@end\n"
wenzelm@43375
   234
  end;
wenzelm@43375
   235
wenzelm@43375
   236
fun output_rules rules =
wenzelm@43375
   237
  "\\begin{railoutput}\n" ^
wenzelm@43375
   238
  implode (map output_rule rules) ^
wenzelm@43375
   239
  "\\end{railoutput}\n";
wenzelm@43375
   240
wenzelm@43375
   241
in
wenzelm@43375
   242
wenzelm@43375
   243
val _ =
wenzelm@43375
   244
  Thy_Output.antiquotation "rail" (Scan.lift (Parse.position Args.name))
wenzelm@43375
   245
    (fn _ => fn (str, pos) => output_rules (read pos str));
wenzelm@43375
   246
wenzelm@43375
   247
end;
wenzelm@43375
   248
wenzelm@43375
   249
end;
wenzelm@43375
   250