defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
1 (* Title: HOL/SPARK/Tools/fdl_lexer.ML
2 Author: Stefan Berghofer
3 Copyright: secunet Security Networks AG
5 Lexical analyzer for fdl files.
15 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
16 val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
17 Position.T -> string -> 'a * T list
18 val position_of: T -> Position.T
19 val pos_of: T -> string
21 val stopper: T Scan.stopper
22 val kind_of: T -> kind
23 val content_of: T -> string
24 val unparse: T -> string
25 val is_proper: T -> bool
26 val is_digit: string -> bool
27 val c_comment: chars -> T * chars
28 val curly_comment: chars -> T * chars
29 val percent_comment: chars -> T * chars
30 val vcg_header: chars -> (banner * (date * time) option) * chars
31 val siv_header: chars ->
32 (banner * (date * time) * (date * time) * (string * string)) * chars
35 structure Fdl_Lexer: FDL_LEXER =
40 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
42 datatype T = Token of kind * string * Position.T;
44 fun make_token k xs = Token (k, implode (map fst xs),
45 case xs of [] => Position.none | (_, p) :: _ => p);
47 fun kind_of (Token (k, _, _)) = k;
49 fun is_proper (Token (Comment, _, _)) = false
52 fun content_of (Token (_, s, _)) = s;
54 fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
55 | unparse (Token (_, s, _)) = s;
57 fun position_of (Token (_, _, pos)) = pos;
59 val pos_of = Position.str_of o position_of;
61 fun is_eof (Token (EOF, _, _)) = true
64 fun mk_eof pos = Token (EOF, "", pos);
65 val eof = mk_eof Position.none;
68 Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
70 fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
73 (** split up a string into a list of characters (with positions) **)
75 type chars = (string * Position.T) list;
77 fun is_char_eof ("", _) = true
78 | is_char_eof _ = false;
80 val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
82 fun symbol (x : string, _ : Position.T) = x;
84 fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
85 ((x, pos), Position.advance x pos)) (raw_explode s) pos);
90 val any = Scan.one (not o Scan.is_stopper char_stopper);
92 fun prfx [] = Scan.succeed []
93 | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
95 val $$$ = prfx o raw_explode;
97 val lexicon = Scan.make_lexicon (map raw_explode
113 "may_be_replaced_by",
115 "may_be_deduced_from",
116 "are_interchangeable",
153 fun keyword s = Scan.literal lexicon :|--
154 (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
156 fun is_digit x = "0" <= x andalso x <= "9";
157 fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
158 val is_underscore = equal "_";
159 val is_tilde = equal "~";
160 val is_newline = equal "\n";
161 val is_tab = equal "\t";
162 val is_space = equal " ";
163 val is_whitespace = is_space orf is_tab orf is_newline;
164 val is_whitespace' = is_space orf is_tab;
166 val number = Scan.many1 (is_digit o symbol);
169 Scan.one (is_alpha o symbol) :::
171 ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
172 Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
174 val long_identifier =
175 identifier @@@ (Scan.repeat1 ($$$ "." @@@ identifier) >> flat);
177 val whitespace = Scan.many (is_whitespace o symbol);
178 val whitespace' = Scan.many (is_whitespace' o symbol);
179 val newline = Scan.one (is_newline o symbol);
183 val drop_blanks = #1 o take_suffix is_whitespace;
184 val all_cs = drop_blanks cs;
185 val dots = if length all_cs > n then " ..." else "";
187 (drop_blanks (take n all_cs)
188 |> map (fn c => if is_whitespace c then " " else c)
194 fun get_pos [] = " (past end-of-text!)"
195 | get_pos ((_, pos) :: _) = Position.str_of pos;
197 fun err (syms, msg) = fn () =>
198 text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
199 (case msg of NONE => "" | SOME m => "\n" ^ m ());
200 in Scan.!! err scan end;
203 Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
205 val any_line = whitespace' |-- any_line' --|
206 newline >> (implode o map symbol);
208 fun gen_comment a b = $$$ a |-- !!! "missing end of comment"
209 (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
211 val c_comment = gen_comment "/*" "*/";
212 val curly_comment = gen_comment "{" "}";
214 val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
216 fun repeatn 0 _ = Scan.succeed []
217 | repeatn n p = Scan.one p ::: repeatn (n-1) p;
220 (** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
222 type banner = string * string * string;
223 type date = string * string * string;
224 type time = string * string * string * string option;
226 val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
228 fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
229 fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
232 digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
233 Scan.option ($$$ "." |-- digitn 2) >>
234 (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
237 digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
238 (fn ((d, m), y) => (d, m, y));
241 whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
242 (any_line -- any_line -- any_line >>
243 (fn ((l1, l2), l3) => (l1, l2, l3))) --|
244 whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
246 val vcg_header = banner -- Scan.option (whitespace |--
247 $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
248 Scan.option ($$$ "TIME :" -- whitespace) -- time);
250 val siv_header = banner --| whitespace --
251 ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
253 ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
254 newline --| newline -- (any_line -- any_line) >>
255 (fn (((b, c), s), ls) => (b, c, s, ls));
258 (** the main tokenizer **)
260 fun scan header comment =
261 !!! "bad header" header --| whitespace --
262 Scan.repeat (Scan.unless (Scan.one is_char_eof)
265 || (keyword "For" -- whitespace) |--
266 Scan.repeat1 (Scan.unless (keyword ":") any) --|
267 keyword ":" >> make_token Traceability
268 || Scan.max leq_token
269 (Scan.literal lexicon >> make_token Keyword)
270 ( long_identifier >> make_token Long_Ident
271 || identifier >> make_token Ident)
272 || number >> make_token Number) --|
275 fun tokenize header comment pos s =
276 fst (Scan.finite char_stopper
277 (Scan.error (scan header comment)) (explode_pos s pos));