defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
1 (* Title: Pure/ML/ml_lex.ML
4 Lexical syntax for SML.
10 Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
11 Space | Comment | Error of string | EOF
13 val stopper: token Scan.stopper
14 val is_regular: token -> bool
15 val is_improper: token -> bool
16 val set_range: Position.range -> token -> token
17 val pos_of: token -> Position.T
18 val end_pos_of: token -> Position.T
19 val kind_of: token -> token_kind
20 val content_of: token -> string
21 val check_content_of: token -> string
22 val flatten: token list -> string
23 val report_token: token -> unit
24 val keywords: string list
25 val source: (Symbol.symbol, 'a) Source.source ->
26 (token, (Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source)
27 Source.source) Source.source
28 val tokenize: string -> token list
29 val read: Position.T -> Symbol_Pos.text -> token Antiquote.antiquote list
32 structure ML_Lex: ML_LEX =
40 Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
41 Space | Comment | Error of string | EOF;
43 datatype token = Token of Position.range * (token_kind * string);
48 fun set_range range (Token (_, x)) = Token (range, x);
50 fun pos_of (Token ((pos, _), _)) = pos;
51 fun end_pos_of (Token ((_, pos), _)) = pos;
56 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
57 val eof = mk_eof Position.none;
59 fun is_eof (Token (_, (EOF, _))) = true
63 Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
68 fun kind_of (Token (_, (k, _))) = k;
70 fun content_of (Token (_, (_, x))) = x;
71 fun token_leq (tok, tok') = content_of tok <= content_of tok';
73 fun is_regular (Token (_, (Error _, _))) = false
74 | is_regular (Token (_, (EOF, _))) = false
75 | is_regular _ = true;
77 fun is_improper (Token (_, (Space, _))) = true
78 | is_improper (Token (_, (Comment, _))) = true
79 | is_improper _ = false;
83 Token (_, (Keyword, ":>")) =>
84 warning ("Opaque signature matching (:>) fails to work with ML pretty printing --\n\
85 \prefer non-opaque matching (:) possibly with abstype" ^
86 Position.str_of (pos_of tok))
89 fun check_content_of tok =
91 Error msg => error msg
92 | _ => content_of tok);
94 fun flatten_content (tok :: (toks as tok' :: _)) =
95 Symbol.escape (check_content_of tok) ::
96 (if is_improper tok orelse is_improper tok' then flatten_content toks
97 else Symbol.space :: flatten_content toks)
98 | flatten_content toks = map (Symbol.escape o check_content_of) toks;
100 val flatten = implode o flatten_content;
107 val token_kind_markup =
108 fn Keyword => Markup.ML_keyword
109 | Ident => Markup.ML_ident
110 | LongIdent => Markup.ML_ident
111 | TypeVar => Markup.ML_tvar
112 | Word => Markup.ML_numeral
113 | Int => Markup.ML_numeral
114 | Real => Markup.ML_numeral
115 | Char => Markup.ML_char
116 | String => Markup.ML_string
117 | Space => Markup.empty
118 | Comment => Markup.ML_comment
119 | Error _ => Markup.ML_malformed
120 | EOF => Markup.empty;
122 fun token_markup kind x =
123 if kind = Keyword andalso exists_string (not o Symbol.is_ascii_letter) x
124 then Markup.ML_delimiter
125 else token_kind_markup kind;
129 fun report_token (Token ((pos, _), (kind, x))) = Position.report pos (token_markup kind x);
137 open Basic_Symbol_Pos;
139 fun !!! msg = Symbol_Pos.!!! (fn () => "SML lexical error: " ^ msg);
144 val scan_blank = Scan.one (Symbol.is_ascii_blank o Symbol_Pos.symbol);
145 val scan_blanks1 = Scan.repeat1 scan_blank;
150 val keywords = ["#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
151 "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
152 "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
153 "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
154 "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
155 "sharing", "sig", "signature", "struct", "structure", "then", "type",
156 "val", "where", "while", "with", "withtype"];
158 val lex = Scan.make_lexicon (map raw_explode keywords);
159 fun scan_keyword x = Scan.literal lex x;
168 ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o
171 val scan_alphanumeric =
172 Scan.one (Symbol.is_ascii_letter o Symbol_Pos.symbol) -- scan_letdigs >> op ::;
175 Scan.many1 (member (op =) (raw_explode "!#$%&*+-/:<=>?@\\^`|~") o Symbol_Pos.symbol);
179 val scan_ident = scan_alphanumeric || scan_symbolic;
182 (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
184 val scan_typevar = $$$ "'" @@@ scan_letdigs;
193 val scan_dec = Scan.many1 (Symbol.is_ascii_digit o Symbol_Pos.symbol);
194 val scan_hex = Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);
195 val scan_sign = Scan.optional ($$$ "~") [];
196 val scan_decint = scan_sign @@@ scan_dec;
201 $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
202 $$$ "0" @@@ $$$ "w" @@@ scan_dec;
204 val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
206 val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
209 scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
210 scan_decint @@@ scan_exp;
215 (* chars and strings *)
220 Scan.one (member (op =) (raw_explode "\"\\abtnvfr") o Symbol_Pos.symbol) >> single ||
221 $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
222 Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) --
223 Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) --
224 Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) >> (fn ((a, b), c) => [a, b, c]);
227 Scan.one (fn (s, _) => Symbol.is_regular s andalso s <> "\"" andalso s <> "\\" andalso
228 (not (Symbol.is_char s) orelse Symbol.is_printable s)) >> single ||
229 $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
231 val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
232 val scan_gaps = Scan.repeat scan_gap >> flat;
237 $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
240 $$$ "\"" @@@ !!! "missing quote at end of string"
241 ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
250 fun token k ss = Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss));
253 (scan_char >> token Char ||
254 scan_string >> token String ||
255 scan_blanks1 >> token Space ||
256 Symbol_Pos.scan_comment !!! >> token Comment ||
258 (scan_keyword >> token Keyword)
259 (scan_word >> token Word ||
260 scan_real >> token Real ||
261 scan_int >> token Int ||
262 scan_longident >> token LongIdent ||
263 scan_ident >> token Ident ||
264 scan_typevar >> token TypeVar));
266 val scan_antiq = Antiquote.scan || scan_ml >> Antiquote.Text;
269 Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o Symbol_Pos.symbol)
270 >> (fn cs => [token (Error msg) cs]);
275 Symbol_Pos.source (Position.line 1) src
276 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_ml)) (SOME (false, recover));
278 val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
282 val _ = Position.report pos Markup.ML_source;
283 val syms = Symbol_Pos.explode (txt, pos);
288 val pos1 = List.last syms |-> Position.advance;
289 val pos2 = Position.advance Symbol.space pos1;
290 in [Antiquote.Text (Token (Position.range pos1 pos2, (Space, Symbol.space)))] end;
293 |> Source.source Symbol_Pos.stopper (Scan.bulk (!!! "bad input" scan_antiq))
294 (SOME (false, fn msg => recover msg >> map Antiquote.Text))
296 |> tap (List.app (Antiquote.report report_token))
297 |> tap Antiquote.check_nesting
298 |> tap (List.app (fn Antiquote.Text tok => (check_content_of tok; warn tok) | _ => ())))
300 cat_error msg ("The error(s) above occurred in ML source" ^ Position.str_of pos);
301 in input @ termination end;