1.1 --- a/src/Pure/Syntax/parser.ML Fri Jul 02 20:44:17 2010 +0200
1.2 +++ b/src/Pure/Syntax/parser.ML Fri Jul 02 21:41:06 2010 +0200
1.3 @@ -23,19 +23,16 @@
1.4 structure Parser: PARSER =
1.5 struct
1.6
1.7 -open Lexicon Syn_Ext;
1.8 -
1.9 -
1.10 (** datatype gram **)
1.11
1.12 type nt_tag = int; (*production for the NTs are stored in an array
1.13 so we can identify NTs by their index*)
1.14
1.15 -datatype symb = Terminal of token
1.16 +datatype symb = Terminal of Lexicon.token
1.17 | Nonterminal of nt_tag * int; (*(tag, precedence)*)
1.18
1.19 -type nt_gram = ((nt_tag list * token list) *
1.20 - (token option * (symb list * string * int) list) list);
1.21 +type nt_gram = ((nt_tag list * Lexicon.token list) *
1.22 + (Lexicon.token option * (symb list * string * int) list) list);
1.23 (*(([dependent_nts], [start_tokens]),
1.24 [(start_token, [(rhs, name, prio)])])*)
1.25 (*depent_nts is a list of all NTs whose lookahead
1.26 @@ -53,8 +50,8 @@
1.27 lambda productions are stored as normal productions
1.28 and also as an entry in "lambdas"*)
1.29
1.30 -val UnknownStart = eof; (*productions for which no starting token is
1.31 - known yet are associated with this token*)
1.32 +val UnknownStart = Lexicon.eof; (*productions for which no starting token is
1.33 + known yet are associated with this token*)
1.34
1.35 (* get all NTs that are connected with a list of NTs
1.36 (used for expanding chain list)*)
1.37 @@ -125,7 +122,7 @@
1.38 (*get all known starting tokens for a nonterminal*)
1.39 fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
1.40
1.41 - val token_union = uncurry (union matching_tokens);
1.42 + val token_union = uncurry (union Lexicon.matching_tokens);
1.43
1.44 (*update prods, lookaheads, and lambdas according to new lambda NTs*)
1.45 val (added_starts', lambdas') =
1.46 @@ -158,7 +155,7 @@
1.47 val nt_dependencies' = union (op =) nts nt_dependencies;
1.48
1.49 (*associate production with new starting tokens*)
1.50 - fun copy ([]: token option list) nt_prods = nt_prods
1.51 + fun copy ([]: Lexicon.token option list) nt_prods = nt_prods
1.52 | copy (tk :: tks) nt_prods =
1.53 let val old_prods = these (AList.lookup (op =) nt_prods tk);
1.54
1.55 @@ -259,7 +256,7 @@
1.56 let
1.57 val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
1.58
1.59 - val new_tks = subtract matching_tokens old_tks start_tks;
1.60 + val new_tks = subtract Lexicon.matching_tokens old_tks start_tks;
1.61
1.62 (*store new production*)
1.63 fun store [] prods is_new =
1.64 @@ -278,7 +275,7 @@
1.65 else (new_prod :: tk_prods, true);
1.66
1.67 val prods' = prods
1.68 - |> is_new' ? AList.update (op =) (tk: token option, tk_prods');
1.69 + |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods');
1.70 in store tks prods' (is_new orelse is_new') end;
1.71
1.72 val (nt_prods', prod_count', changed) =
1.73 @@ -329,10 +326,10 @@
1.74 andalso member (op =) new_tks (the tk);
1.75
1.76 (*associate production with new starting tokens*)
1.77 - fun copy ([]: token list) nt_prods = nt_prods
1.78 + fun copy ([]: Lexicon.token list) nt_prods = nt_prods
1.79 | copy (tk :: tks) nt_prods =
1.80 let
1.81 - val tk_prods = (these o AList.lookup (op =) nt_prods) (SOME tk);
1.82 + val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk));
1.83
1.84 val tk_prods' =
1.85 if not lambda then p :: tk_prods
1.86 @@ -359,7 +356,7 @@
1.87 val (lookahead as (old_nts, old_tks), nt_prods) =
1.88 Array.sub (prods, nt);
1.89
1.90 - val tk_prods = (these o AList.lookup (op =) nt_prods) key;
1.91 + val tk_prods = these (AList.lookup (op =) nt_prods key);
1.92
1.93 (*associate productions with new lookahead tokens*)
1.94 val (tk_prods', nt_prods') =
1.95 @@ -370,7 +367,7 @@
1.96 |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods')
1.97
1.98 val added_tks =
1.99 - subtract matching_tokens old_tks new_tks;
1.100 + subtract Lexicon.matching_tokens old_tks new_tks;
1.101 in if null added_tks then
1.102 (Array.update (prods, nt, (lookahead, nt_prods'));
1.103 process_nts nts added)
1.104 @@ -381,7 +378,7 @@
1.105 end;
1.106
1.107 val ((dependent, _), _) = Array.sub (prods, changed_nt);
1.108 - in add_starts (starts @ (process_nts dependent [])) end;
1.109 + in add_starts (starts @ process_nts dependent []) end;
1.110 in add_starts added_starts' end;
1.111 in add_prods prods chains' lambdas' prod_count ps end;
1.112
1.113 @@ -394,8 +391,8 @@
1.114
1.115 val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags)));
1.116
1.117 - fun pretty_symb (Terminal (Token (Literal, s, _))) = Pretty.quote (Pretty.str s)
1.118 - | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
1.119 + fun pretty_symb (Terminal (Lexicon.Token (Lexicon.Literal, s, _))) = Pretty.quote (Pretty.str s)
1.120 + | pretty_symb (Terminal tok) = Pretty.str (Lexicon.str_of_token tok)
1.121 | pretty_symb (Nonterminal (tag, p)) =
1.122 Pretty.str (nt_name tag ^ "[" ^ signed_string_of_int p ^ "]");
1.123
1.124 @@ -422,7 +419,6 @@
1.125
1.126 (** Operations on gramars **)
1.127
1.128 -(*The mother of all grammars*)
1.129 val empty_gram = Gram {nt_count = 0, prod_count = 0,
1.130 tags = Symtab.empty, chains = [], lambdas = [],
1.131 prods = Array.array (0, (([], []), []))};
1.132 @@ -454,12 +450,13 @@
1.133 delimiters and predefined terms are stored as terminals,
1.134 nonterminals are converted to integer tags*)
1.135 fun symb_of [] nt_count tags result = (nt_count, tags, rev result)
1.136 - | symb_of ((Delim s) :: ss) nt_count tags result =
1.137 - symb_of ss nt_count tags (Terminal (Token (Literal, s, Position.no_range)) :: result)
1.138 - | symb_of ((Argument (s, p)) :: ss) nt_count tags result =
1.139 + | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result =
1.140 + symb_of ss nt_count tags
1.141 + (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result)
1.142 + | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result =
1.143 let
1.144 val (nt_count', tags', new_symb) =
1.145 - case predef_term s of
1.146 + case Lexicon.predef_term s of
1.147 NONE =>
1.148 let val (nt_count', tags', s_tag) = get_tag nt_count tags s;
1.149 in (nt_count', tags', Nonterminal (s_tag, p)) end
1.150 @@ -471,7 +468,7 @@
1.151 (*Convert list of productions by invoking symb_of for each of them*)
1.152 fun prod_of [] nt_count prod_count tags result =
1.153 (nt_count, prod_count, tags, result)
1.154 - | prod_of ((XProd (lhs, xsymbs, const, pri)) :: ps)
1.155 + | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps)
1.156 nt_count prod_count tags result =
1.157 let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs;
1.158
1.159 @@ -604,7 +601,7 @@
1.160
1.161 datatype parsetree =
1.162 Node of string * parsetree list |
1.163 - Tip of token;
1.164 + Tip of Lexicon.token;
1.165
1.166 type state =
1.167 nt_tag * int * (*identification and production precedence*)
1.168 @@ -675,7 +672,7 @@
1.169
1.170
1.171 fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c =
1.172 - if valued_token c then
1.173 + if Lexicon.valued_token c then
1.174 (A, j, ts @ [Tip c], sa, id, i)
1.175 else (A, j, ts, sa, id, i);
1.176
1.177 @@ -695,105 +692,105 @@
1.178 (*get all productions of a NT and NTs chained to it which can
1.179 be started by specified token*)
1.180 fun prods_for prods chains include_none tk nts =
1.181 -let
1.182 - fun token_assoc (list, key) =
1.183 - let fun assoc [] result = result
1.184 - | assoc ((keyi, pi) :: pairs) result =
1.185 - if is_some keyi andalso matching_tokens (the keyi, key)
1.186 - orelse include_none andalso is_none keyi then
1.187 - assoc pairs (pi @ result)
1.188 - else assoc pairs result;
1.189 - in assoc list [] end;
1.190 + let
1.191 + fun token_assoc (list, key) =
1.192 + let fun assoc [] result = result
1.193 + | assoc ((keyi, pi) :: pairs) result =
1.194 + if is_some keyi andalso Lexicon.matching_tokens (the keyi, key)
1.195 + orelse include_none andalso is_none keyi then
1.196 + assoc pairs (pi @ result)
1.197 + else assoc pairs result;
1.198 + in assoc list [] end;
1.199
1.200 - fun get_prods [] result = result
1.201 - | get_prods (nt :: nts) result =
1.202 - let val nt_prods = snd (Array.sub (prods, nt));
1.203 - in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
1.204 -in get_prods (connected_with chains nts nts) [] end;
1.205 + fun get_prods [] result = result
1.206 + | get_prods (nt :: nts) result =
1.207 + let val nt_prods = snd (Array.sub (prods, nt));
1.208 + in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
1.209 + in get_prods (connected_with chains nts nts) [] end;
1.210
1.211
1.212 fun PROCESSS warned prods chains Estate i c states =
1.213 -let
1.214 -fun all_prods_for nt = prods_for prods chains true c [nt];
1.215 + let
1.216 + fun all_prods_for nt = prods_for prods chains true c [nt];
1.217
1.218 -fun processS used [] (Si, Sii) = (Si, Sii)
1.219 - | processS used (S :: States) (Si, Sii) =
1.220 - (case S of
1.221 - (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
1.222 - let (*predictor operation*)
1.223 - val (used', new_states) =
1.224 - (case AList.lookup (op =) used nt of
1.225 - SOME (usedPrec, l) => (*nonterminal has been processed*)
1.226 - if usedPrec <= minPrec then
1.227 - (*wanted precedence has been processed*)
1.228 - (used, movedot_lambda S l)
1.229 - else (*wanted precedence hasn't been parsed yet*)
1.230 - let
1.231 - val tk_prods = all_prods_for nt;
1.232 + fun processS used [] (Si, Sii) = (Si, Sii)
1.233 + | processS used (S :: States) (Si, Sii) =
1.234 + (case S of
1.235 + (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
1.236 + let (*predictor operation*)
1.237 + val (used', new_states) =
1.238 + (case AList.lookup (op =) used nt of
1.239 + SOME (usedPrec, l) => (*nonterminal has been processed*)
1.240 + if usedPrec <= minPrec then
1.241 + (*wanted precedence has been processed*)
1.242 + (used, movedot_lambda S l)
1.243 + else (*wanted precedence hasn't been parsed yet*)
1.244 + let
1.245 + val tk_prods = all_prods_for nt;
1.246
1.247 - val States' = mkStates i minPrec nt
1.248 - (getRHS' minPrec usedPrec tk_prods);
1.249 - in (update_prec (nt, minPrec) used,
1.250 - movedot_lambda S l @ States')
1.251 - end
1.252 + val States' = mkStates i minPrec nt
1.253 + (getRHS' minPrec usedPrec tk_prods);
1.254 + in (update_prec (nt, minPrec) used,
1.255 + movedot_lambda S l @ States')
1.256 + end
1.257
1.258 - | NONE => (*nonterminal is parsed for the first time*)
1.259 - let val tk_prods = all_prods_for nt;
1.260 - val States' = mkStates i minPrec nt
1.261 - (getRHS minPrec tk_prods);
1.262 - in ((nt, (minPrec, [])) :: used, States') end);
1.263 + | NONE => (*nonterminal is parsed for the first time*)
1.264 + let val tk_prods = all_prods_for nt;
1.265 + val States' = mkStates i minPrec nt
1.266 + (getRHS minPrec tk_prods);
1.267 + in ((nt, (minPrec, [])) :: used, States') end);
1.268
1.269 - val dummy =
1.270 - if not (!warned) andalso
1.271 - length (new_states @ States) > (!branching_level) then
1.272 - (warning "Currently parsed expression could be extremely ambiguous.";
1.273 - warned := true)
1.274 - else ();
1.275 - in
1.276 - processS used' (new_states @ States) (S :: Si, Sii)
1.277 - end
1.278 - | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*)
1.279 - processS used States
1.280 - (S :: Si,
1.281 - if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
1.282 - | (A, prec, ts, [], id, j) => (*completer operation*)
1.283 - let val tt = if id = "" then ts else [Node (id, ts)] in
1.284 - if j = i then (*lambda production?*)
1.285 - let
1.286 - val (used', O) = update_trees used (A, (tt, prec));
1.287 + val dummy =
1.288 + if not (!warned) andalso
1.289 + length (new_states @ States) > (!branching_level) then
1.290 + (warning "Currently parsed expression could be extremely ambiguous.";
1.291 + warned := true)
1.292 + else ();
1.293 in
1.294 - case O of
1.295 - NONE =>
1.296 - let val Slist = getS A prec Si;
1.297 - val States' = map (movedot_nonterm tt) Slist;
1.298 - in processS used' (States' @ States) (S :: Si, Sii) end
1.299 - | SOME n =>
1.300 - if n >= prec then processS used' States (S :: Si, Sii)
1.301 - else
1.302 - let val Slist = getS' A prec n Si;
1.303 - val States' = map (movedot_nonterm tt) Slist;
1.304 - in processS used' (States' @ States) (S :: Si, Sii) end
1.305 + processS used' (new_states @ States) (S :: Si, Sii)
1.306 end
1.307 - else
1.308 - let val Slist = getStates Estate i j A prec
1.309 - in processS used (map (movedot_nonterm tt) Slist @ States)
1.310 - (S :: Si, Sii)
1.311 - end
1.312 - end)
1.313 -in processS [] states ([], []) end;
1.314 + | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*)
1.315 + processS used States
1.316 + (S :: Si,
1.317 + if Lexicon.matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
1.318 + | (A, prec, ts, [], id, j) => (*completer operation*)
1.319 + let val tt = if id = "" then ts else [Node (id, ts)] in
1.320 + if j = i then (*lambda production?*)
1.321 + let
1.322 + val (used', O) = update_trees used (A, (tt, prec));
1.323 + in
1.324 + case O of
1.325 + NONE =>
1.326 + let val Slist = getS A prec Si;
1.327 + val States' = map (movedot_nonterm tt) Slist;
1.328 + in processS used' (States' @ States) (S :: Si, Sii) end
1.329 + | SOME n =>
1.330 + if n >= prec then processS used' States (S :: Si, Sii)
1.331 + else
1.332 + let val Slist = getS' A prec n Si;
1.333 + val States' = map (movedot_nonterm tt) Slist;
1.334 + in processS used' (States' @ States) (S :: Si, Sii) end
1.335 + end
1.336 + else
1.337 + let val Slist = getStates Estate i j A prec
1.338 + in processS used (map (movedot_nonterm tt) Slist @ States)
1.339 + (S :: Si, Sii)
1.340 + end
1.341 + end)
1.342 + in processS [] states ([], []) end;
1.343
1.344
1.345 fun produce warned prods tags chains stateset i indata prev_token =
1.346 (case Array.sub (stateset, i) of
1.347 [] =>
1.348 let
1.349 - val toks = if is_eof prev_token then indata else prev_token :: indata;
1.350 - val pos = Position.str_of (pos_of_token prev_token);
1.351 + val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata;
1.352 + val pos = Position.str_of (Lexicon.pos_of_token prev_token);
1.353 in
1.354 if null toks then error ("Inner syntax error: unexpected end of input" ^ pos)
1.355 else error (Pretty.string_of (Pretty.block
1.356 (Pretty.str ("Inner syntax error" ^ pos ^ " at \"") ::
1.357 - Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks))) @
1.358 + Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @
1.359 [Pretty.str "\""])))
1.360 end
1.361 | s =>
1.362 @@ -807,21 +804,20 @@
1.363 end));
1.364
1.365
1.366 -fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE)
1.367 - l;
1.368 +fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) l;
1.369
1.370 fun earley prods tags chains startsymbol indata =
1.371 let
1.372 - val start_tag = case Symtab.lookup tags startsymbol of
1.373 - SOME tag => tag
1.374 - | NONE => error ("parse: Unknown startsymbol " ^
1.375 - quote startsymbol);
1.376 - val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal eof], "", 0)];
1.377 + val start_tag =
1.378 + (case Symtab.lookup tags startsymbol of
1.379 + SOME tag => tag
1.380 + | NONE => error ("parse: Unknown startsymbol " ^ quote startsymbol));
1.381 + val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)];
1.382 val s = length indata + 1;
1.383 val Estate = Array.array (s, []);
1.384 in
1.385 Array.update (Estate, 0, S0);
1.386 - get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata eof)
1.387 + get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof)
1.388 end;
1.389
1.390
1.391 @@ -830,9 +826,9 @@
1.392 val end_pos =
1.393 (case try List.last toks of
1.394 NONE => Position.none
1.395 - | SOME (Token (_, _, (_, end_pos))) => end_pos);
1.396 + | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos);
1.397 val r =
1.398 - (case earley prods tags chains start (toks @ [mk_eof end_pos]) of
1.399 + (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of
1.400 [] => sys_error "parse: no parse trees"
1.401 | pts => pts);
1.402 in r end;
1.403 @@ -842,7 +838,8 @@
1.404 let
1.405 fun freeze a = map_range (curry Array.sub a) (Array.length a);
1.406 val prods = maps snd (maps snd (freeze (#prods gram)));
1.407 - fun guess (SOME ([Nonterminal (_, k), Terminal (Token (Literal, s, _)), Nonterminal (_, l)], _, j)) =
1.408 + fun guess (SOME ([Nonterminal (_, k),
1.409 + Terminal (Lexicon.Token (Lexicon.Literal, s, _)), Nonterminal (_, l)], _, j)) =
1.410 if k = j andalso l = j + 1 then SOME (s, true, false, j)
1.411 else if k = j + 1 then if l = j then SOME (s, false, true, j)
1.412 else if l = j + 1 then SOME (s, false, false, j)