1 (* Title: HOL/SPARK/Tools/fdl_lexer.ML |
|
2 Author: Stefan Berghofer |
|
3 Copyright: secunet Security Networks AG |
|
4 |
|
5 Lexical analyzer for fdl files. |
|
6 *) |
|
7 |
|
8 signature FDL_LEXER = |
|
9 sig |
|
10 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF |
|
11 datatype T = Token of kind * string * Position.T; |
|
12 type chars |
|
13 type banner |
|
14 type date |
|
15 type time |
|
16 val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) -> |
|
17 Position.T -> string -> 'a * T list |
|
18 val position_of: T -> Position.T |
|
19 val pos_of: T -> string |
|
20 val is_eof: T -> bool |
|
21 val stopper: T Scan.stopper |
|
22 val kind_of: T -> kind |
|
23 val content_of: T -> string |
|
24 val unparse: T -> string |
|
25 val is_proper: T -> bool |
|
26 val is_digit: string -> bool |
|
27 val is_char_eof: string * 'a -> bool |
|
28 val c_comment: chars -> T * chars |
|
29 val curly_comment: chars -> T * chars |
|
30 val percent_comment: chars -> T * chars |
|
31 val vcg_header: chars -> (banner * (date * time) option) * chars |
|
32 val siv_header: chars -> |
|
33 (banner * (date * time) * (date * time) * (string * string)) * chars (*.. NOT [], but .. |
|
34 (^^^^^^^^^^^^^^^ header ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^) ^^^^^ ..trailing line=13..117 *) |
|
35 val explode_pos: string -> Position.T -> chars |
|
36 val leq_token: T * T -> bool |
|
37 val lexicon: Scan.lexicon |
|
38 val identifier: chars -> chars * chars |
|
39 val long_identifier: chars -> chars * chars |
|
40 val banner: chars -> (string * string * string) * chars |
|
41 val date: chars -> (string * string * string) * chars; |
|
42 val whitespace: chars -> chars * chars |
|
43 val whitespace1: chars -> chars * chars |
|
44 val keyword: string -> chars -> chars * chars |
|
45 val number: chars -> chars * chars |
|
46 val any: chars -> (string * Position.T) * chars |
|
47 val any_line: chars -> string * chars; |
|
48 val !!! : string -> (chars -> 'a) -> chars -> 'a |
|
49 val $$$ : string -> chars -> chars * chars |
|
50 val scan: (chars -> 'a * chars) -> (chars -> T * chars) -> chars -> ('a * T list) * chars; |
|
51 val char_stopper: (string * Position.T) Scan.stopper |
|
52 val make_token: kind -> chars -> T |
|
53 end; |
|
54 |
|
55 structure Fdl_Lexer(**): FDL_LEXER(**) = |
|
56 struct |
|
57 |
|
58 (** tokens **) |
|
59 |
|
60 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF; |
|
61 |
|
62 datatype T = Token of kind * string * Position.T; |
|
63 (*for Formalise.T ^^^^ NOT required?*) |
|
64 |
|
65 fun make_token k xs = Token (k, implode (map fst xs), |
|
66 case xs of [] => Position.none | (_, p) :: _ => p); |
|
67 |
|
68 fun kind_of (Token (k, _, _)) = k; |
|
69 |
|
70 fun is_proper (Token (Comment, _, _)) = false |
|
71 | is_proper _ = true; |
|
72 |
|
73 fun content_of (Token (_, s, _)) = s; |
|
74 |
|
75 fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":" |
|
76 | unparse (Token (_, s, _)) = s; |
|
77 |
|
78 fun position_of (Token (_, _, pos)) = pos; |
|
79 |
|
80 val pos_of = Position.here o position_of; |
|
81 |
|
82 fun is_eof (Token (EOF, _, _)) = true |
|
83 | is_eof _ = false; |
|
84 |
|
85 fun mk_eof pos = Token (EOF, "", pos); |
|
86 val eof = mk_eof Position.none; |
|
87 |
|
88 val stopper = |
|
89 Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof; |
|
90 |
|
91 fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s'; |
|
92 |
|
93 |
|
94 (** split up a string into a list of characters (with positions) **) |
|
95 |
|
96 type chars = (string * Position.T) list; |
|
97 |
|
98 fun is_char_eof ("", _) = true |
|
99 | is_char_eof _ = false; |
|
100 |
|
101 val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof; |
|
102 |
|
103 fun symbol (x : string, _ : Position.T) = x; |
|
104 |
|
105 (* convert string s to chars ("", pos) *) |
|
106 fun explode_pos s pos = fst (fold_map (fn x => fn pos => |
|
107 ((x, pos), Position.symbol x pos)) (raw_explode s) pos); |
|
108 |
|
109 |
|
110 (** scanners **) |
|
111 |
|
112 val any = Scan.one (not o Scan.is_stopper char_stopper); |
|
113 |
|
114 fun prfx [] = Scan.succeed [] |
|
115 | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs; |
|
116 |
|
117 val $$$ = prfx o raw_explode; |
|
118 |
|
119 val lexicon = Scan.make_lexicon (map raw_explode |
|
120 ["rule_family", |
|
121 "For", |
|
122 ":", |
|
123 "[", |
|
124 "]", |
|
125 "(", |
|
126 ")", |
|
127 ",", |
|
128 "&", |
|
129 ";", |
|
130 "=", |
|
131 ".", |
|
132 "..", |
|
133 "requires", |
|
134 "may_be_replaced_by", |
|
135 "may_be_deduced", |
|
136 "may_be_deduced_from", |
|
137 "are_interchangeable", |
|
138 "if", |
|
139 "end", |
|
140 "function", |
|
141 "procedure", |
|
142 "type", |
|
143 "var", |
|
144 "const", |
|
145 "array", |
|
146 "record", |
|
147 ":=", |
|
148 "of", |
|
149 "**", |
|
150 "*", |
|
151 "/", |
|
152 "div", |
|
153 "mod", |
|
154 "+", |
|
155 "-", |
|
156 "<>", |
|
157 "<", |
|
158 ">", |
|
159 "<=", |
|
160 ">=", |
|
161 "<->", |
|
162 "->", |
|
163 "not", |
|
164 "and", |
|
165 "or", |
|
166 "for_some", |
|
167 "for_all", |
|
168 "***", |
|
169 "!!!", |
|
170 "element", |
|
171 "update", |
|
172 "pending"]); |
|
173 |
|
174 fun keyword s = Scan.literal lexicon :|-- |
|
175 (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail); |
|
176 |
|
177 fun is_digit x = "0" <= x andalso x <= "9"; |
|
178 fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z"; |
|
179 val is_underscore = equal "_"; |
|
180 val is_tilde = equal "~"; |
|
181 val is_newline = equal "\n"; |
|
182 val is_tab = equal "\t"; |
|
183 val is_space = equal " "; |
|
184 val is_whitespace = is_space orf is_tab orf is_newline; |
|
185 val is_whitespace' = is_space orf is_tab; |
|
186 |
|
187 val number = Scan.many1 (is_digit o symbol); |
|
188 |
|
189 val identifier = |
|
190 Scan.one (is_alpha o symbol) ::: |
|
191 Scan.many |
|
192 ((is_alpha orf is_digit orf is_underscore) o symbol) @@@ |
|
193 Scan.optional (Scan.one (is_tilde o symbol) >> single) []; |
|
194 |
|
195 val long_identifier = |
|
196 identifier @@@ Scan.repeats1 ($$$ "." @@@ identifier); |
|
197 |
|
198 val whitespace = Scan.many (is_whitespace o symbol); |
|
199 val whitespace1 = Scan.many1 (is_whitespace o symbol); |
|
200 val whitespace' = Scan.many (is_whitespace' o symbol); |
|
201 val newline = Scan.one (is_newline o symbol); |
|
202 |
|
203 fun beginning n cs = |
|
204 let |
|
205 val drop_blanks = drop_suffix is_whitespace; |
|
206 val all_cs = drop_blanks cs; |
|
207 val dots = if length all_cs > n then " ..." else ""; |
|
208 in |
|
209 (drop_blanks (take n all_cs) |
|
210 |> map (fn c => if is_whitespace c then " " else c) |
|
211 |> implode) ^ dots |
|
212 end; |
|
213 |
|
214 fun !!! text scan = |
|
215 let |
|
216 fun get_pos [] = " (end-of-input)" |
|
217 | get_pos ((_, pos) :: _) = Position.here pos; |
|
218 |
|
219 fun err (syms, msg) = fn () => |
|
220 text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^ |
|
221 (case msg of NONE => "" | SOME m => "\n" ^ m ()); |
|
222 in Scan.!! err scan end; |
|
223 |
|
224 val any_line' = |
|
225 Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol))); |
|
226 |
|
227 val any_line = whitespace' |-- any_line' --| |
|
228 newline >> (implode o map symbol); |
|
229 |
|
230 fun gen_comment a b = $$$ a |-- !!! "unclosed comment" |
|
231 (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment; |
|
232 |
|
233 val c_comment = gen_comment "/*" "*/"; |
|
234 fun c_comment chars = |
|
235 let |
|
236 (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.c_comment called with", |
|
237 b_lenght_chars = length chars, chars = chars};( **) |
|
238 val xxx as (redex, toks) = (gen_comment "/*" "*/") chars |
|
239 (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.c_comment \<rightarrow>", |
|
240 b_lenght_chars = length toks, res = xxx};( **) |
|
241 in xxx end; |
|
242 val curly_comment = gen_comment "{" "}"; |
|
243 |
|
244 val percent_comment = $$$ "%" |-- any_line' >> make_token Comment; |
|
245 |
|
246 fun repeatn 0 _ = Scan.succeed [] |
|
247 | repeatn n p = Scan.one p ::: repeatn (n-1) p; |
|
248 |
|
249 |
|
250 (** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **) |
|
251 |
|
252 type banner = string * string * string; |
|
253 type date = string * string * string; |
|
254 type time = string * string * string * string option; |
|
255 |
|
256 val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol)); |
|
257 |
|
258 fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol); |
|
259 fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol); |
|
260 |
|
261 val time = |
|
262 digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 -- |
|
263 Scan.option ($$$ "." |-- digitn 2) >> |
|
264 (fn (((hr, mi), s), ms) => (hr, mi, s, ms)); |
|
265 |
|
266 val date = |
|
267 digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >> |
|
268 (fn ((d, m), y) => (d, m, y)); |
|
269 |
|
270 val banner = |
|
271 whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs => |
|
272 (any_line -- any_line -- any_line >> |
|
273 (*WN:^^^^^^^^ ^^^^^^^^ ^^^^^^^^ 3 lines ending with \n *) |
|
274 (fn ((l1, l2), l3) => (l1, l2, l3))) --| |
|
275 whitespace' --| prfx (map symbol xs) --| whitespace' --| newline); |
|
276 |
|
277 val vcg_header = banner -- Scan.option (whitespace |-- |
|
278 $$$ "DATE :" |-- whitespace |-- date --| whitespace --| |
|
279 Scan.option ($$$ "TIME :" -- whitespace) -- time); |
|
280 |
|
281 val siv_header = banner --| whitespace -- |
|
282 ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
283 whitespace -- |
|
284 ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
285 newline --| newline -- (any_line -- any_line) >> |
|
286 (fn (((b, c), s), ls) => (b, c, s, ls)); |
|
287 |
|
288 (*WN: print ---vvvvv*) |
|
289 fun siv_header chars = |
|
290 let |
|
291 (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.siv_header called with", |
|
292 b_lenght_chars = length chars, chars = chars};( **) |
|
293 val xxx as (redex, toks) = |
|
294 ( |
|
295 banner --| whitespace -- |
|
296 ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
297 whitespace -- |
|
298 ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --| |
|
299 newline --| newline -- (any_line -- any_line) >> |
|
300 (fn (((b, c), s), ls) => (b, c, s, ls)) |
|
301 ) chars; |
|
302 (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.siv_header \<rightarrow>", |
|
303 b_lenght_chars = length toks, res = xxx};( **) |
|
304 in xxx end; |
|
305 |
|
306 (** the main tokenizer **) |
|
307 |
|
308 fun scan header comment = |
|
309 !!! "bad header" header --| whitespace -- |
|
310 Scan.repeat (Scan.unless (Scan.one is_char_eof) |
|
311 (!!! "bad input" |
|
312 ( comment |
|
313 || (keyword "For" -- whitespace1) |-- |
|
314 Scan.repeat1 (Scan.unless (keyword ":") any) --| |
|
315 keyword ":" >> make_token Traceability |
|
316 || Scan.max leq_token |
|
317 (Scan.literal lexicon >> make_token Keyword) |
|
318 ( long_identifier >> make_token Long_Ident |
|
319 || identifier >> make_token Ident) |
|
320 || number >> make_token Number) --| |
|
321 whitespace) ); |
|
322 fun scan header comment chars = |
|
323 let |
|
324 (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.scan called with", |
|
325 b_lenght_chars = length chars, chars = chars};( **) |
|
326 val xxx as (redex, toks) = |
|
327 (!!! "bad header" header --| whitespace -- |
|
328 Scan.repeat (Scan.unless (Scan.one is_char_eof) |
|
329 (!!! "bad input" |
|
330 ( comment |
|
331 || (keyword "For" -- whitespace1) |-- |
|
332 Scan.repeat1 (Scan.unless (keyword ":") any) --| |
|
333 keyword ":" >> make_token Traceability |
|
334 || Scan.max leq_token |
|
335 (Scan.literal lexicon >> make_token Keyword) |
|
336 ( long_identifier >> make_token Long_Ident |
|
337 || identifier >> make_token Ident) |
|
338 || number >> make_token Number) --| |
|
339 whitespace))) chars |
|
340 (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.scan \<rightarrow>", |
|
341 b_lenght_chars = length toks, res = xxx};( **) |
|
342 in xxx end; |
|
343 |
|
344 fun tokenize header comment pos s = |
|
345 fst (Scan.finite char_stopper |
|
346 (Scan.error (scan header comment)) (explode_pos s pos)); |
|
347 |
|
348 fun tokenize header(*= Fdl_Lexer.siv_header*) comment pos s = |
|
349 ((** )@{print} {a = "//--- ###I Fdl_Lexer.tokenize called with", header = header, |
|
350 comment = comment, pos = pos, s = s};( **) |
|
351 let |
|
352 (** )val _ = @{print} {a = "###I Fdl_Lexer.tokenize: explode_pos", res = explode_pos s pos};( **) |
|
353 (*convert string s to chars ("", pos) is ---vvvvvvvvvvv ^^^^^^^^^^^*) |
|
354 val xxx as (redex, toks) = |
|
355 fst (Scan.finite char_stopper |
|
356 (Scan.error (scan header comment)) (explode_pos s pos)) |
|
357 (** )val _ = @{print} {a = "\\--- ###I Fdl_Lexer.tokenize \<rightarrow>", |
|
358 b_lenght_chars = length toks, res = xxx};( **) |
|
359 in xxx end |
|
360 ); |
|
361 |
|
362 end; |
|