1.1 --- a/src/Pure/ML/ml_lex.ML Thu Aug 07 13:45:09 2008 +0200
1.2 +++ b/src/Pure/ML/ml_lex.ML Thu Aug 07 13:45:11 2008 +0200
1.3 @@ -19,7 +19,8 @@
1.4 val val_of: token -> string
1.5 val keywords: string list
1.6 val source: (Symbol.symbol, 'a) Source.source ->
1.7 - (token, int * (Symbol.symbol, 'a) Source.source) Source.source
1.8 + (token, (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source)
1.9 + Source.source) Source.source
1.10 end;
1.11
1.12 structure ML_Lex: ML_LEX =
1.13 @@ -33,19 +34,36 @@
1.14 Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
1.15 Space | Comment | Error of string | EOF;
1.16
1.17 -datatype token = Token of int * (token_kind * string);
1.18 +datatype token = Token of Position.range * (token_kind * string);
1.19 +
1.20 +
1.21 +(* position *)
1.22 +
1.23 +fun position_of (Token ((pos, _), _)) = pos;
1.24 +fun end_position_of (Token ((_, pos), _)) = pos;
1.25 +
1.26 +val pos_of = Position.str_of o position_of;
1.27
1.28
1.29 (* control tokens *)
1.30
1.31 -val eof = Token (0, (EOF, ""));
1.32 +fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
1.33 +val eof = mk_eof Position.none;
1.34
1.35 fun is_eof (Token (_, (EOF, _))) = true
1.36 | is_eof _ = false;
1.37
1.38 -val stopper = Scan.stopper (K eof) is_eof;
1.39 +val stopper =
1.40 + Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
1.41
1.42
1.43 +(* token content *)
1.44 +
1.45 +fun val_of (Token (_, (_, x))) = x;
1.46 +fun token_leq (tok, tok') = val_of tok <= val_of tok';
1.47 +
1.48 +fun kind_of (Token (_, (k, _))) = k;
1.49 +
1.50 fun is_regular (Token (_, (Error _, _))) = false
1.51 | is_regular (Token (_, (EOF, _))) = false
1.52 | is_regular _ = true;
1.53 @@ -55,36 +73,18 @@
1.54 | is_improper _ = false;
1.55
1.56
1.57 -(* token content *)
1.58 -
1.59 -fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")";
1.60 -
1.61 -fun kind_of (Token (_, (k, _))) = k;
1.62 -fun val_of (Token (_, (_, x))) = x;
1.63 -
1.64 -fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
1.65 -
1.66 -
1.67
1.68 (** scanners **)
1.69
1.70 -(* diagnostics *)
1.71 +open BasicSymbolPos;
1.72
1.73 -fun lex_err msg ((n, cs), _) = "SML lexical error (line " ^ string_of_int n ^ "): " ^ msg cs;
1.74 -fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
1.75 +fun !!! msg = SymbolPos.!!! ("SML lexical error: " ^ msg);
1.76
1.77
1.78 -(* line numbering *)
1.79 +(* blanks *)
1.80
1.81 -fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1));
1.82 -val keep_line = Scan.lift;
1.83 -
1.84 -val scan_blank =
1.85 - incr_line ($$ "\n") ||
1.86 - keep_line (Scan.one Symbol.is_ascii_blank);
1.87 -
1.88 -val scan_blanks = Scan.repeat scan_blank >> implode;
1.89 -val scan_blanks1 = Scan.repeat1 scan_blank >> implode;
1.90 +val scan_blank = Scan.one (Symbol.is_ascii_blank o symbol);
1.91 +val scan_blanks1 = Scan.repeat1 scan_blank;
1.92
1.93
1.94 (* keywords *)
1.95 @@ -97,7 +97,8 @@
1.96 "sharing", "sig", "signature", "struct", "structure", "then", "type",
1.97 "val", "where", "while", "with", "withtype"];
1.98
1.99 -val scan_keyword = Scan.literal (Scan.make_lexicon (map explode keywords)) >> implode;
1.100 +val lex = Scan.make_lexicon (map explode keywords);
1.101 +fun scan_keyword x = Scan.literal lex x;
1.102
1.103
1.104 (* identifiers *)
1.105 @@ -105,20 +106,20 @@
1.106 local
1.107
1.108 val scan_letdigs =
1.109 - Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
1.110 + Scan.many ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o symbol);
1.111
1.112 -val scan_alphanumeric = Scan.one Symbol.is_ascii_letter ^^ scan_letdigs;
1.113 +val scan_alphanumeric = Scan.one (Symbol.is_ascii_letter o symbol) -- scan_letdigs >> op ::;
1.114
1.115 -val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
1.116 +val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~") o symbol);
1.117
1.118 in
1.119
1.120 val scan_ident = scan_alphanumeric || scan_symbolic;
1.121
1.122 val scan_longident =
1.123 - (Scan.repeat1 (scan_alphanumeric ^^ $$ ".") >> implode) ^^ (scan_ident || $$ "=");
1.124 + (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
1.125
1.126 -val scan_typevar = $$ "'" ^^ scan_letdigs;
1.127 +val scan_typevar = $$$ "'" @@@ scan_letdigs;
1.128
1.129 end;
1.130
1.131 @@ -127,22 +128,24 @@
1.132
1.133 local
1.134
1.135 -val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
1.136 -val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
1.137 -val scan_sign = Scan.optional ($$ "~") "";
1.138 -val scan_decint = scan_sign ^^ scan_dec;
1.139 +val scan_dec = Scan.many1 (Symbol.is_ascii_digit o symbol);
1.140 +val scan_hex = Scan.many1 (Symbol.is_ascii_hex o symbol);
1.141 +val scan_sign = Scan.optional ($$$ "~") [];
1.142 +val scan_decint = scan_sign @@@ scan_dec;
1.143
1.144 in
1.145
1.146 -val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
1.147 +val scan_word =
1.148 + $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
1.149 + $$$ "0" @@@ $$$ "w" @@@ scan_dec;
1.150
1.151 -val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
1.152 +val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
1.153
1.154 -val scan_exp = ($$ "E" || $$ "e") ^^ scan_decint;
1.155 +val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
1.156
1.157 val scan_real =
1.158 - scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
1.159 - scan_decint ^^ scan_exp;
1.160 + scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
1.161 + scan_decint @@@ scan_exp;
1.162
1.163 end;
1.164
1.165 @@ -152,90 +155,62 @@
1.166 local
1.167
1.168 val scan_escape =
1.169 - Scan.one (member (op =) (explode "\"\\abtnvfr")) ||
1.170 - $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") ||
1.171 - Scan.one Symbol.is_ascii_digit ^^
1.172 - Scan.one Symbol.is_ascii_digit ^^
1.173 - Scan.one Symbol.is_ascii_digit;
1.174 + Scan.one (member (op =) (explode "\"\\abtnvfr") o symbol) >> single ||
1.175 + $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
1.176 + Scan.one (Symbol.is_ascii_digit o symbol) --
1.177 + Scan.one (Symbol.is_ascii_digit o symbol) --
1.178 + Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
1.179
1.180 val scan_str =
1.181 - keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) ||
1.182 - keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape);
1.183 + Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
1.184 + $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
1.185
1.186 -val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
1.187 -val scan_gaps = Scan.repeat scan_gap >> implode;
1.188 +val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
1.189 +val scan_gaps = Scan.repeat scan_gap >> flat;
1.190
1.191 in
1.192
1.193 val scan_char =
1.194 - keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
1.195 + $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
1.196
1.197 val scan_string =
1.198 - keep_line ($$ "\"") ^^
1.199 - !!! "missing quote at end of string"
1.200 - ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\""));
1.201 + $$$ "\"" @@@ !!! "missing quote at end of string"
1.202 + ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
1.203
1.204 end;
1.205
1.206
1.207 -(* scan nested comments *)
1.208 -
1.209 -local
1.210 -
1.211 -val scan_cmt =
1.212 - Scan.lift scan_blank ||
1.213 - Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
1.214 - Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
1.215 - Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
1.216 - Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
1.217 -
1.218 -in
1.219 -
1.220 -val scan_comment =
1.221 - keep_line ($$ "(" ^^ $$ "*") ^^
1.222 - !!! "missing end of comment"
1.223 - (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
1.224 -
1.225 -end;
1.226 -
1.227 -
1.228 -(* scan token *)
1.229 -
1.230 -val scan =
1.231 - let
1.232 - val scanner = Scan.state :|-- (fn n =>
1.233 - let
1.234 - fun token k x = Token (n, (k, x));
1.235 - in
1.236 - scan_char >> token Char ||
1.237 - scan_string >> token String ||
1.238 - scan_blanks1 >> token Space ||
1.239 - scan_comment >> token Comment ||
1.240 - keep_line (Scan.max token_leq
1.241 - (scan_keyword >> token Keyword)
1.242 - (scan_word >> token Word ||
1.243 - scan_real >> token Real ||
1.244 - scan_int >> token Int ||
1.245 - scan_longident >> token LongIdent ||
1.246 - scan_ident >> token Ident ||
1.247 - scan_typevar >> token TypeVar))
1.248 - end);
1.249 - in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
1.250 -
1.251 -
1.252 (* token source *)
1.253
1.254 local
1.255
1.256 -val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
1.257 +fun token k s =
1.258 + let val (x, range) = SymbolPos.implode s
1.259 + in Token (range, (k, x)) end;
1.260
1.261 -fun recover msg = Scan.state :|-- (fn n =>
1.262 - keep_line (Scan.many is_junk) >> (fn cs => [Token (n, (Error msg, implode cs))]));
1.263 +val scan = !!! "bad input"
1.264 + (scan_char >> token Char ||
1.265 + scan_string >> token String ||
1.266 + scan_blanks1 >> token Space ||
1.267 + SymbolPos.scan_comment !!! >> token Comment ||
1.268 + Scan.max token_leq
1.269 + (scan_keyword >> token Keyword)
1.270 + (scan_word >> token Word ||
1.271 + scan_real >> token Real ||
1.272 + scan_int >> token Int ||
1.273 + scan_longident >> token LongIdent ||
1.274 + scan_ident >> token Ident ||
1.275 + scan_typevar >> token TypeVar));
1.276 +
1.277 +fun recover msg =
1.278 + Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o symbol)
1.279 + >> (fn cs => [token (Error msg) cs]);
1.280
1.281 in
1.282
1.283 fun source src =
1.284 - Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src;
1.285 + SymbolPos.source (Position.line 1) src
1.286 + |> Source.source SymbolPos.stopper (Scan.bulk scan) (SOME (false, recover));
1.287
1.288 end;
1.289