wenzelm@43375
|
1 |
(* Title: Pure/Thy/rail.ML
|
wenzelm@43375
|
2 |
Author: Michael Kerscher, TU München
|
wenzelm@43375
|
3 |
Author: Makarius
|
wenzelm@43375
|
4 |
|
wenzelm@43375
|
5 |
Railroad diagrams in LaTeX.
|
wenzelm@43375
|
6 |
*)
|
wenzelm@43375
|
7 |
|
wenzelm@43375
|
8 |
structure Rail: sig end =
|
wenzelm@43375
|
9 |
struct
|
wenzelm@43375
|
10 |
|
wenzelm@43375
|
11 |
(** lexical syntax **)
|
wenzelm@43375
|
12 |
|
wenzelm@43375
|
13 |
(* datatype token *)
|
wenzelm@43375
|
14 |
|
wenzelm@43375
|
15 |
datatype kind = Keyword | Ident | String | EOF;
|
wenzelm@43375
|
16 |
|
wenzelm@43375
|
17 |
datatype token = Token of Position.range * (kind * string);
|
wenzelm@43375
|
18 |
|
wenzelm@43375
|
19 |
fun pos_of (Token ((pos, _), _)) = pos;
|
wenzelm@43375
|
20 |
fun end_pos_of (Token ((_, pos), _)) = pos;
|
wenzelm@43375
|
21 |
|
wenzelm@43375
|
22 |
fun kind_of (Token (_, (k, _))) = k;
|
wenzelm@43375
|
23 |
fun content_of (Token (_, (_, x))) = x;
|
wenzelm@43375
|
24 |
|
wenzelm@43375
|
25 |
|
wenzelm@43375
|
26 |
(* diagnostics *)
|
wenzelm@43375
|
27 |
|
wenzelm@43375
|
28 |
val print_kind =
|
wenzelm@43375
|
29 |
fn Keyword => "rail keyword"
|
wenzelm@43375
|
30 |
| Ident => "identifier"
|
wenzelm@43375
|
31 |
| String => "single-quoted string"
|
wenzelm@43375
|
32 |
| EOF => "end-of-file";
|
wenzelm@43375
|
33 |
|
wenzelm@43375
|
34 |
fun print (Token ((pos, _), (k, x))) =
|
wenzelm@43375
|
35 |
(if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
|
wenzelm@43375
|
36 |
Position.str_of pos;
|
wenzelm@43375
|
37 |
|
wenzelm@43375
|
38 |
fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
|
wenzelm@43375
|
39 |
|
wenzelm@43375
|
40 |
|
wenzelm@43375
|
41 |
(* stopper *)
|
wenzelm@43375
|
42 |
|
wenzelm@43375
|
43 |
fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
|
wenzelm@43375
|
44 |
val eof = mk_eof Position.none;
|
wenzelm@43375
|
45 |
|
wenzelm@43375
|
46 |
fun is_eof (Token (_, (EOF, _))) = true
|
wenzelm@43375
|
47 |
| is_eof _ = false;
|
wenzelm@43375
|
48 |
|
wenzelm@43375
|
49 |
val stopper =
|
wenzelm@43375
|
50 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
|
wenzelm@43375
|
51 |
|
wenzelm@43375
|
52 |
|
wenzelm@43375
|
53 |
(* tokenize *)
|
wenzelm@43375
|
54 |
|
wenzelm@43375
|
55 |
local
|
wenzelm@43375
|
56 |
|
wenzelm@43375
|
57 |
fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
|
wenzelm@43375
|
58 |
|
wenzelm@43375
|
59 |
val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
|
wenzelm@43375
|
60 |
|
wenzelm@43375
|
61 |
val scan_keyword =
|
wenzelm@43375
|
62 |
Scan.one (member (op =) ["|", "*", "+", "?", "(", ")", "\\", ";", ":"] o Symbol_Pos.symbol);
|
wenzelm@43375
|
63 |
|
wenzelm@43375
|
64 |
val scan_token =
|
wenzelm@43375
|
65 |
scan_space >> K [] ||
|
wenzelm@43375
|
66 |
scan_keyword >> (token Keyword o single) ||
|
wenzelm@43375
|
67 |
Lexicon.scan_id >> token Ident ||
|
wenzelm@43375
|
68 |
Symbol_Pos.scan_string_q >> (token String o #1 o #2);
|
wenzelm@43375
|
69 |
|
wenzelm@43375
|
70 |
in
|
wenzelm@43375
|
71 |
|
wenzelm@43375
|
72 |
fun tokenize pos str =
|
wenzelm@43375
|
73 |
Source.of_string str
|
wenzelm@43375
|
74 |
|> Symbol.source
|
wenzelm@43375
|
75 |
|> Symbol_Pos.source pos
|
wenzelm@43375
|
76 |
|> Source.source Symbol_Pos.stopper
|
wenzelm@43375
|
77 |
(Scan.bulk (Symbol_Pos.!!! "Rail lexical error: bad input" scan_token) >> flat) NONE
|
wenzelm@43375
|
78 |
|> Source.exhaust;
|
wenzelm@43375
|
79 |
|
wenzelm@43375
|
80 |
end;
|
wenzelm@43375
|
81 |
|
wenzelm@43375
|
82 |
|
wenzelm@43375
|
83 |
|
wenzelm@43375
|
84 |
(** parsing **)
|
wenzelm@43375
|
85 |
|
wenzelm@43375
|
86 |
fun !!! scan =
|
wenzelm@43375
|
87 |
let
|
wenzelm@43375
|
88 |
val prefix = "Rail syntax error";
|
wenzelm@43375
|
89 |
|
wenzelm@43375
|
90 |
fun get_pos [] = " (past end-of-file!)"
|
wenzelm@43375
|
91 |
| get_pos (tok :: _) = Position.str_of (pos_of tok);
|
wenzelm@43375
|
92 |
|
wenzelm@43375
|
93 |
fun err (toks, NONE) = prefix ^ get_pos toks
|
wenzelm@43375
|
94 |
| err (toks, SOME msg) =
|
wenzelm@43375
|
95 |
if String.isPrefix prefix msg then msg
|
wenzelm@43375
|
96 |
else prefix ^ get_pos toks ^ ": " ^ msg;
|
wenzelm@43375
|
97 |
in Scan.!! err scan end;
|
wenzelm@43375
|
98 |
|
wenzelm@43375
|
99 |
fun $$$ x =
|
wenzelm@43375
|
100 |
Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
|
wenzelm@43375
|
101 |
Scan.fail_with
|
wenzelm@43375
|
102 |
(fn [] => print_keyword x ^ " expected (past end-of-file!)"
|
wenzelm@43375
|
103 |
| tok :: _ => print_keyword x ^ "expected,\nbut " ^ print tok ^ " was found");
|
wenzelm@43375
|
104 |
|
wenzelm@43375
|
105 |
fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
|
wenzelm@43375
|
106 |
fun enum sep scan = enum1 sep scan || Scan.succeed [];
|
wenzelm@43375
|
107 |
|
wenzelm@43375
|
108 |
fun parse_token kind =
|
wenzelm@43375
|
109 |
Scan.some (fn tok => if kind_of tok = kind then SOME (content_of tok) else NONE);
|
wenzelm@43375
|
110 |
|
wenzelm@43375
|
111 |
val ident = parse_token Ident;
|
wenzelm@43375
|
112 |
val string = parse_token String;
|
wenzelm@43375
|
113 |
|
wenzelm@43375
|
114 |
|
wenzelm@43375
|
115 |
|
wenzelm@43375
|
116 |
(** rail expressions **)
|
wenzelm@43375
|
117 |
|
wenzelm@43375
|
118 |
(* datatype *)
|
wenzelm@43375
|
119 |
|
wenzelm@43375
|
120 |
datatype rails =
|
wenzelm@43375
|
121 |
Cat of int * rail list
|
wenzelm@43375
|
122 |
and rail =
|
wenzelm@43375
|
123 |
Bar of rails list |
|
wenzelm@43375
|
124 |
Plus of rails * rails |
|
wenzelm@43375
|
125 |
Newline of int |
|
wenzelm@43375
|
126 |
Nonterminal of string |
|
wenzelm@43375
|
127 |
Terminal of string;
|
wenzelm@43375
|
128 |
|
wenzelm@43375
|
129 |
fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
|
wenzelm@43375
|
130 |
and reverse (Bar cats) = Bar (map reverse_cat cats)
|
wenzelm@43375
|
131 |
| reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
|
wenzelm@43375
|
132 |
| reverse x = x;
|
wenzelm@43375
|
133 |
|
wenzelm@43375
|
134 |
fun cat rails = Cat (0, rails);
|
wenzelm@43375
|
135 |
|
wenzelm@43375
|
136 |
val empty = cat [];
|
wenzelm@43375
|
137 |
fun is_empty (Cat (_, [])) = true | is_empty _ = false;
|
wenzelm@43375
|
138 |
|
wenzelm@43375
|
139 |
fun is_newline (Newline _) = true | is_newline _ = false;
|
wenzelm@43375
|
140 |
|
wenzelm@43375
|
141 |
fun bar [Cat (_, [rail])] = rail
|
wenzelm@43375
|
142 |
| bar cats = Bar cats;
|
wenzelm@43375
|
143 |
|
wenzelm@43375
|
144 |
fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
|
wenzelm@43375
|
145 |
|
wenzelm@43375
|
146 |
fun star cat1 cat2 =
|
wenzelm@43375
|
147 |
if is_empty cat2 then plus empty cat1
|
wenzelm@43375
|
148 |
else bar [empty, cat [plus cat1 cat2]];
|
wenzelm@43375
|
149 |
|
wenzelm@43375
|
150 |
fun maybe rail = bar [empty, cat [rail]];
|
wenzelm@43375
|
151 |
|
wenzelm@43375
|
152 |
|
wenzelm@43375
|
153 |
(* read *)
|
wenzelm@43375
|
154 |
|
wenzelm@43375
|
155 |
local
|
wenzelm@43375
|
156 |
|
wenzelm@43375
|
157 |
fun body x = (enum1 "|" body1 >> bar) x
|
wenzelm@43375
|
158 |
and body0 x = (enum "|" body1 >> bar) x
|
wenzelm@43375
|
159 |
and body1 x =
|
wenzelm@43375
|
160 |
(body2 :|-- (fn a =>
|
wenzelm@43375
|
161 |
$$$ "*" |-- !!! body4e >> (cat o single o star a) ||
|
wenzelm@43375
|
162 |
$$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
|
wenzelm@43375
|
163 |
Scan.succeed a)) x
|
wenzelm@43375
|
164 |
and body2 x = (Scan.repeat1 body3 >> cat) x
|
wenzelm@43375
|
165 |
and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
|
wenzelm@43375
|
166 |
and body4 x =
|
wenzelm@43375
|
167 |
($$$ "(" |-- !!! (body0 --| $$$ ")") ||
|
wenzelm@43375
|
168 |
$$$ "\\" >> K (Newline 0) ||
|
wenzelm@43375
|
169 |
ident >> Nonterminal ||
|
wenzelm@43375
|
170 |
string >> Terminal) x
|
wenzelm@43375
|
171 |
and body4e x = (Scan.option body4 >> (cat o the_list)) x;
|
wenzelm@43375
|
172 |
|
wenzelm@43375
|
173 |
val rule = ident -- ($$$ ":" |-- !!! body) || body >> pair "";
|
wenzelm@43375
|
174 |
val rules = enum1 ";" (Scan.option rule) >> map_filter I;
|
wenzelm@43375
|
175 |
|
wenzelm@43375
|
176 |
in
|
wenzelm@43375
|
177 |
|
wenzelm@43375
|
178 |
fun read pos str =
|
wenzelm@43375
|
179 |
(case Scan.error (Scan.finite stopper rules) (tokenize pos str) of
|
wenzelm@43375
|
180 |
(res, []) => res
|
wenzelm@43375
|
181 |
| (_, tok :: _) => error ("Malformed rail input: " ^ print tok));
|
wenzelm@43375
|
182 |
|
wenzelm@43375
|
183 |
end;
|
wenzelm@43375
|
184 |
|
wenzelm@43375
|
185 |
|
wenzelm@43375
|
186 |
(* latex output *)
|
wenzelm@43375
|
187 |
|
wenzelm@43375
|
188 |
local
|
wenzelm@43375
|
189 |
|
wenzelm@43375
|
190 |
fun vertical_range_cat (Cat (_, rails)) y =
|
wenzelm@43375
|
191 |
let val (rails', (_, y')) =
|
wenzelm@43375
|
192 |
fold_map (fn rail => fn (y0, y') =>
|
wenzelm@43375
|
193 |
if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
|
wenzelm@43375
|
194 |
else
|
wenzelm@43375
|
195 |
let val (rail', y0') = vertical_range rail y0;
|
wenzelm@43375
|
196 |
in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
|
wenzelm@43375
|
197 |
in (Cat (y, rails'), y') end
|
wenzelm@43375
|
198 |
|
wenzelm@43375
|
199 |
and vertical_range (Bar cats) y =
|
wenzelm@43375
|
200 |
let val (cats', y') = fold_map vertical_range_cat cats y
|
wenzelm@43375
|
201 |
in (Bar cats', Int.max (y + 1, y')) end
|
wenzelm@43375
|
202 |
| vertical_range (Plus (cat1, cat2)) y =
|
wenzelm@43375
|
203 |
let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
|
wenzelm@43375
|
204 |
in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
|
wenzelm@43375
|
205 |
| vertical_range (Newline _) y = (Newline (y + 2), y + 3)
|
wenzelm@43375
|
206 |
| vertical_range atom y = (atom, y + 1);
|
wenzelm@43375
|
207 |
|
wenzelm@43375
|
208 |
|
wenzelm@43375
|
209 |
fun output_text s = "\\isa{" ^ Output.output s ^ "}";
|
wenzelm@43375
|
210 |
|
wenzelm@43375
|
211 |
fun output_cat c (Cat (_, rails)) = outputs c rails
|
wenzelm@43375
|
212 |
and outputs c [rail] = output c rail
|
wenzelm@43375
|
213 |
| outputs _ rails = implode (map (output "") rails)
|
wenzelm@43375
|
214 |
and output _ (Bar []) = ""
|
wenzelm@43375
|
215 |
| output c (Bar [cat]) = output_cat c cat
|
wenzelm@43375
|
216 |
| output _ (Bar (cat :: cats)) =
|
wenzelm@43375
|
217 |
"\\rail@bar\n" ^ output_cat "" cat ^
|
wenzelm@43375
|
218 |
implode (map (fn Cat (y, rails) =>
|
wenzelm@43375
|
219 |
"\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
|
wenzelm@43375
|
220 |
"\\rail@endbar\n"
|
wenzelm@43375
|
221 |
| output c (Plus (cat, Cat (y, rails))) =
|
wenzelm@43375
|
222 |
"\\rail@plus\n" ^ output_cat c cat ^
|
wenzelm@43375
|
223 |
"\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
|
wenzelm@43375
|
224 |
"\\rail@endplus\n"
|
wenzelm@43375
|
225 |
| output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
|
wenzelm@43375
|
226 |
| output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text s ^ "}[]\n"
|
wenzelm@43375
|
227 |
| output c (Terminal s) = "\\rail@" ^ c ^ "term{" ^ output_text s ^ "}[]\n";
|
wenzelm@43375
|
228 |
|
wenzelm@43375
|
229 |
fun output_rule (name, rail) =
|
wenzelm@43375
|
230 |
let val (rail', y') = vertical_range rail 0 in
|
wenzelm@43375
|
231 |
"\\rail@begin{" ^ string_of_int y' ^ "}{" ^ output_text name ^ "}\n" ^
|
wenzelm@43375
|
232 |
output "" rail' ^
|
wenzelm@43375
|
233 |
"\\rail@end\n"
|
wenzelm@43375
|
234 |
end;
|
wenzelm@43375
|
235 |
|
wenzelm@43375
|
236 |
fun output_rules rules =
|
wenzelm@43375
|
237 |
"\\begin{railoutput}\n" ^
|
wenzelm@43375
|
238 |
implode (map output_rule rules) ^
|
wenzelm@43375
|
239 |
"\\end{railoutput}\n";
|
wenzelm@43375
|
240 |
|
wenzelm@43375
|
241 |
in
|
wenzelm@43375
|
242 |
|
wenzelm@43375
|
243 |
val _ =
|
wenzelm@43375
|
244 |
Thy_Output.antiquotation "rail" (Scan.lift (Parse.position Args.name))
|
wenzelm@43375
|
245 |
(fn _ => fn (str, pos) => output_rules (read pos str));
|
wenzelm@43375
|
246 |
|
wenzelm@43375
|
247 |
end;
|
wenzelm@43375
|
248 |
|
wenzelm@43375
|
249 |
end;
|
wenzelm@43375
|
250 |
|