improved position handling due to SymbolPos.T;
authorwenzelm
Thu, 07 Aug 2008 13:45:11 +0200
changeset 27772b933c997eab3
parent 27771 98499933a50f
child 27773 a52166b228b9
improved position handling due to SymbolPos.T;
SymbolPos.scan_comment;
tuned;
src/Pure/ML/ml_lex.ML
     1.1 --- a/src/Pure/ML/ml_lex.ML	Thu Aug 07 13:45:09 2008 +0200
     1.2 +++ b/src/Pure/ML/ml_lex.ML	Thu Aug 07 13:45:11 2008 +0200
     1.3 @@ -19,7 +19,8 @@
     1.4    val val_of: token -> string
     1.5    val keywords: string list
     1.6    val source: (Symbol.symbol, 'a) Source.source ->
     1.7 -    (token, int * (Symbol.symbol, 'a) Source.source) Source.source
     1.8 +    (token, (SymbolPos.T, Position.T * (Symbol.symbol, 'a) Source.source)
     1.9 +      Source.source) Source.source
    1.10  end;
    1.11  
    1.12  structure ML_Lex: ML_LEX =
    1.13 @@ -33,19 +34,36 @@
    1.14    Keyword | Ident | LongIdent | TypeVar | Word | Int | Real | Char | String |
    1.15    Space | Comment | Error of string | EOF;
    1.16  
    1.17 -datatype token = Token of int * (token_kind * string);
    1.18 +datatype token = Token of Position.range * (token_kind * string);
    1.19 +
    1.20 +
    1.21 +(* position *)
    1.22 +
    1.23 +fun position_of (Token ((pos, _), _)) = pos;
    1.24 +fun end_position_of (Token ((_, pos), _)) = pos;
    1.25 +
    1.26 +val pos_of = Position.str_of o position_of;
    1.27  
    1.28  
    1.29  (* control tokens *)
    1.30  
    1.31 -val eof = Token (0, (EOF, ""));
    1.32 +fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    1.33 +val eof = mk_eof Position.none;
    1.34  
    1.35  fun is_eof (Token (_, (EOF, _))) = true
    1.36    | is_eof _ = false;
    1.37  
    1.38 -val stopper = Scan.stopper (K eof) is_eof;
    1.39 +val stopper =
    1.40 +  Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;
    1.41  
    1.42  
    1.43 +(* token content *)
    1.44 +
    1.45 +fun val_of (Token (_, (_, x))) = x;
    1.46 +fun token_leq (tok, tok') = val_of tok <= val_of tok';
    1.47 +
    1.48 +fun kind_of (Token (_, (k, _))) = k;
    1.49 +
    1.50  fun is_regular (Token (_, (Error _, _))) = false
    1.51    | is_regular (Token (_, (EOF, _))) = false
    1.52    | is_regular _ = true;
    1.53 @@ -55,36 +73,18 @@
    1.54    | is_improper _ = false;
    1.55  
    1.56  
    1.57 -(* token content *)
    1.58 -
    1.59 -fun pos_of (Token (n, _)) = " (line " ^ string_of_int n ^ ")";
    1.60 -
    1.61 -fun kind_of (Token (_, (k, _))) = k;
    1.62 -fun val_of (Token (_, (_, x))) = x;
    1.63 -
    1.64 -fun token_leq (Token (_, (_, x)), Token (_, (_, x'))) = x <= x';
    1.65 -
    1.66 -
    1.67  
    1.68  (** scanners **)
    1.69  
    1.70 -(* diagnostics *)
    1.71 +open BasicSymbolPos;
    1.72  
    1.73 -fun lex_err msg ((n, cs), _) = "SML lexical error (line " ^ string_of_int n ^ "): " ^ msg cs;
    1.74 -fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
    1.75 +fun !!! msg = SymbolPos.!!! ("SML lexical error: " ^ msg);
    1.76  
    1.77  
    1.78 -(* line numbering *)
    1.79 +(* blanks *)
    1.80  
    1.81 -fun incr_line scan = Scan.depend (fn (n: int) => scan >> pair (n + 1));
    1.82 -val keep_line = Scan.lift;
    1.83 -
    1.84 -val scan_blank =
    1.85 -  incr_line ($$ "\n") ||
    1.86 -  keep_line (Scan.one Symbol.is_ascii_blank);
    1.87 -
    1.88 -val scan_blanks = Scan.repeat scan_blank >> implode;
    1.89 -val scan_blanks1 = Scan.repeat1 scan_blank >> implode;
    1.90 +val scan_blank = Scan.one (Symbol.is_ascii_blank o symbol);
    1.91 +val scan_blanks1 = Scan.repeat1 scan_blank;
    1.92  
    1.93  
    1.94  (* keywords *)
    1.95 @@ -97,7 +97,8 @@
    1.96    "sharing", "sig", "signature", "struct", "structure", "then", "type",
    1.97    "val", "where", "while", "with", "withtype"];
    1.98  
    1.99 -val scan_keyword = Scan.literal (Scan.make_lexicon (map explode keywords)) >> implode;
   1.100 +val lex = Scan.make_lexicon (map explode keywords);
   1.101 +fun scan_keyword x = Scan.literal lex x;
   1.102  
   1.103  
   1.104  (* identifiers *)
   1.105 @@ -105,20 +106,20 @@
   1.106  local
   1.107  
   1.108  val scan_letdigs =
   1.109 -  Scan.many (Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) >> implode;
   1.110 +  Scan.many ((Symbol.is_ascii_letter orf Symbol.is_ascii_digit orf Symbol.is_ascii_quasi) o symbol);
   1.111  
   1.112 -val scan_alphanumeric = Scan.one Symbol.is_ascii_letter ^^ scan_letdigs;
   1.113 +val scan_alphanumeric = Scan.one (Symbol.is_ascii_letter o symbol) -- scan_letdigs >> op ::;
   1.114  
   1.115 -val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~")) >> implode;
   1.116 +val scan_symbolic = Scan.many1 (member (op =) (explode "!#$%&*+-/:<=>?@\\^`|~") o symbol);
   1.117  
   1.118  in
   1.119  
   1.120  val scan_ident = scan_alphanumeric || scan_symbolic;
   1.121  
   1.122  val scan_longident =
   1.123 -  (Scan.repeat1 (scan_alphanumeric ^^ $$ ".") >> implode) ^^ (scan_ident || $$ "=");
   1.124 +  (Scan.repeat1 (scan_alphanumeric @@@ $$$ ".") >> flat) @@@ (scan_ident || $$$ "=");
   1.125  
   1.126 -val scan_typevar = $$ "'" ^^ scan_letdigs;
   1.127 +val scan_typevar = $$$ "'" @@@ scan_letdigs;
   1.128  
   1.129  end;
   1.130  
   1.131 @@ -127,22 +128,24 @@
   1.132  
   1.133  local
   1.134  
   1.135 -val scan_dec = Scan.many1 Symbol.is_ascii_digit >> implode;
   1.136 -val scan_hex = Scan.many1 Symbol.is_ascii_hex >> implode;
   1.137 -val scan_sign = Scan.optional ($$ "~") "";
   1.138 -val scan_decint = scan_sign ^^ scan_dec;
   1.139 +val scan_dec = Scan.many1 (Symbol.is_ascii_digit o symbol);
   1.140 +val scan_hex = Scan.many1 (Symbol.is_ascii_hex o symbol);
   1.141 +val scan_sign = Scan.optional ($$$ "~") [];
   1.142 +val scan_decint = scan_sign @@@ scan_dec;
   1.143  
   1.144  in
   1.145  
   1.146 -val scan_word = Scan.this_string "0wx" ^^ scan_hex || Scan.this_string "0w" ^^ scan_dec;
   1.147 +val scan_word =
   1.148 +  $$$ "0" @@@ $$$ "w" @@@ $$$ "x" @@@ scan_hex ||
   1.149 +  $$$ "0" @@@ $$$ "w" @@@ scan_dec;
   1.150  
   1.151 -val scan_int = scan_sign ^^ (Scan.this_string "0x" ^^ scan_hex || scan_dec);
   1.152 +val scan_int = scan_sign @@@ ($$$ "0" @@@ $$$ "x" @@@ scan_hex || scan_dec);
   1.153  
   1.154 -val scan_exp = ($$ "E" || $$ "e") ^^ scan_decint;
   1.155 +val scan_exp = ($$$ "E" || $$$ "e") @@@ scan_decint;
   1.156  
   1.157  val scan_real =
   1.158 -  scan_decint ^^ $$ "." ^^ scan_dec ^^ Scan.optional scan_exp "" ||
   1.159 -  scan_decint ^^ scan_exp;
   1.160 +  scan_decint @@@ $$$ "." @@@ scan_dec @@@ Scan.optional scan_exp [] ||
   1.161 +  scan_decint @@@ scan_exp;
   1.162  
   1.163  end;
   1.164  
   1.165 @@ -152,90 +155,62 @@
   1.166  local
   1.167  
   1.168  val scan_escape =
   1.169 -  Scan.one (member (op =) (explode "\"\\abtnvfr")) ||
   1.170 -  $$ "^" ^^ Scan.one (fn s => ord "@" <= ord s andalso ord s <= ord "_") ||
   1.171 -  Scan.one Symbol.is_ascii_digit ^^
   1.172 -    Scan.one Symbol.is_ascii_digit ^^
   1.173 -    Scan.one Symbol.is_ascii_digit;
   1.174 +  Scan.one (member (op =) (explode "\"\\abtnvfr") o symbol) >> single ||
   1.175 +  $$$ "^" @@@ (Scan.one (fn (s, _) => ord "@" <= ord s andalso ord s <= ord "_") >> single) ||
   1.176 +  Scan.one (Symbol.is_ascii_digit o symbol) --
   1.177 +    Scan.one (Symbol.is_ascii_digit o symbol) --
   1.178 +    Scan.one (Symbol.is_ascii_digit o symbol) >> (fn ((a, b), c) => [a, b, c]);
   1.179  
   1.180  val scan_str =
   1.181 -  keep_line (Scan.one (fn s => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\")) ||
   1.182 -  keep_line ($$ "\\") ^^ !!! "bad escape character in string" (keep_line scan_escape);
   1.183 +  Scan.one (fn (s, _) => Symbol.is_printable s andalso s <> "\"" andalso s <> "\\") >> single ||
   1.184 +  $$$ "\\" @@@ !!! "bad escape character in string" scan_escape;
   1.185  
   1.186 -val scan_gap = keep_line ($$ "\\") ^^ scan_blanks1 ^^ keep_line ($$ "\\");
   1.187 -val scan_gaps = Scan.repeat scan_gap >> implode;
   1.188 +val scan_gap = $$$ "\\" @@@ scan_blanks1 @@@ $$$ "\\";
   1.189 +val scan_gaps = Scan.repeat scan_gap >> flat;
   1.190  
   1.191  in
   1.192  
   1.193  val scan_char =
   1.194 -  keep_line ($$ "#" ^^ $$ "\"") ^^ scan_gaps ^^ scan_str ^^ scan_gaps ^^ keep_line ($$ "\"");
   1.195 +  $$$ "#" @@@ $$$ "\"" @@@ scan_gaps @@@ scan_str @@@ scan_gaps @@@ $$$ "\"";
   1.196  
   1.197  val scan_string =
   1.198 -  keep_line ($$ "\"") ^^
   1.199 -  !!! "missing quote at end of string"
   1.200 -    ((Scan.repeat (scan_gap || scan_str) >> implode) ^^ keep_line ($$ "\""));
   1.201 +  $$$ "\"" @@@ !!! "missing quote at end of string"
   1.202 +    ((Scan.repeat (scan_gap || scan_str) >> flat) @@@ $$$ "\"");
   1.203  
   1.204  end;
   1.205  
   1.206  
   1.207 -(* scan nested comments *)
   1.208 -
   1.209 -local
   1.210 -
   1.211 -val scan_cmt =
   1.212 -  Scan.lift scan_blank ||
   1.213 -  Scan.depend (fn (d: int) => keep_line ($$ "(" ^^ $$ "*") >> pair (d + 1)) ||
   1.214 -  Scan.depend (fn 0 => Scan.fail | d => keep_line ($$ "*" ^^ $$ ")") >> pair (d - 1)) ||
   1.215 -  Scan.lift (keep_line ($$ "*" --| Scan.ahead (~$$ ")"))) ||
   1.216 -  Scan.lift (keep_line (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)));
   1.217 -
   1.218 -in
   1.219 -
   1.220 -val scan_comment =
   1.221 -  keep_line ($$ "(" ^^ $$ "*") ^^
   1.222 -  !!! "missing end of comment"
   1.223 -    (Scan.pass 0 (Scan.repeat scan_cmt >> implode) ^^ keep_line ($$ "*" ^^ $$ ")"));
   1.224 -
   1.225 -end;
   1.226 -
   1.227 -
   1.228 -(* scan token *)
   1.229 -
   1.230 -val scan =
   1.231 -  let
   1.232 -    val scanner = Scan.state :|-- (fn n =>
   1.233 -      let
   1.234 -        fun token k x = Token (n, (k, x));
   1.235 -      in
   1.236 -        scan_char >> token Char ||
   1.237 -        scan_string >> token String ||
   1.238 -        scan_blanks1 >> token Space ||
   1.239 -        scan_comment >> token Comment ||
   1.240 -        keep_line (Scan.max token_leq
   1.241 -          (scan_keyword >> token Keyword)
   1.242 -          (scan_word >> token Word ||
   1.243 -            scan_real >> token Real ||
   1.244 -            scan_int >> token Int ||
   1.245 -            scan_longident >> token LongIdent ||
   1.246 -            scan_ident >> token Ident ||
   1.247 -            scan_typevar >> token TypeVar))
   1.248 -      end);
   1.249 -  in !! (lex_err (fn cs => "bad input " ^ quote (Symbol.beginning 10 cs))) scanner end;
   1.250 -
   1.251 -
   1.252  (* token source *)
   1.253  
   1.254  local
   1.255  
   1.256 -val is_junk = (not o Symbol.is_blank) andf Symbol.is_regular;
   1.257 +fun token k s =
   1.258 +  let val (x, range) = SymbolPos.implode s
   1.259 +  in Token (range, (k, x)) end;
   1.260  
   1.261 -fun recover msg = Scan.state :|-- (fn n =>
   1.262 -  keep_line (Scan.many is_junk) >> (fn cs => [Token (n, (Error msg, implode cs))]));
   1.263 +val scan = !!! "bad input"
   1.264 + (scan_char >> token Char ||
   1.265 +  scan_string >> token String ||
   1.266 +  scan_blanks1 >> token Space ||
   1.267 +  SymbolPos.scan_comment !!! >> token Comment ||
   1.268 +  Scan.max token_leq
   1.269 +   (scan_keyword >> token Keyword)
   1.270 +   (scan_word >> token Word ||
   1.271 +    scan_real >> token Real ||
   1.272 +    scan_int >> token Int ||
   1.273 +    scan_longident >> token LongIdent ||
   1.274 +    scan_ident >> token Ident ||
   1.275 +    scan_typevar >> token TypeVar));
   1.276 +
   1.277 +fun recover msg =
   1.278 +  Scan.many (((not o Symbol.is_blank) andf Symbol.is_regular) o symbol)
   1.279 +  >> (fn cs => [token (Error msg) cs]);
   1.280  
   1.281  in
   1.282  
   1.283  fun source src =
   1.284 -  Source.source' 1 Symbol.stopper (Scan.bulk scan) (SOME (false, recover)) src;
   1.285 +  SymbolPos.source (Position.line 1) src
   1.286 +  |> Source.source SymbolPos.stopper (Scan.bulk scan) (SOME (false, recover));
   1.287  
   1.288  end;
   1.289