do not open auxiliary ML structures;
authorwenzelm
Fri, 02 Jul 2010 21:41:06 +0200
changeset 37699ece640e48a6c
parent 37698 ad5489a6be48
child 37700 d123b1e08856
do not open auxiliary ML structures;
misc tuning;
src/Pure/Syntax/parser.ML
     1.1 --- a/src/Pure/Syntax/parser.ML	Fri Jul 02 20:44:17 2010 +0200
     1.2 +++ b/src/Pure/Syntax/parser.ML	Fri Jul 02 21:41:06 2010 +0200
     1.3 @@ -23,19 +23,16 @@
     1.4  structure Parser: PARSER =
     1.5  struct
     1.6  
     1.7 -open Lexicon Syn_Ext;
     1.8 -
     1.9 -
    1.10  (** datatype gram **)
    1.11  
    1.12  type nt_tag = int;              (*production for the NTs are stored in an array
    1.13                                    so we can identify NTs by their index*)
    1.14  
    1.15 -datatype symb = Terminal of token
    1.16 +datatype symb = Terminal of Lexicon.token
    1.17                | Nonterminal of nt_tag * int;              (*(tag, precedence)*)
    1.18  
    1.19 -type nt_gram = ((nt_tag list * token list) *
    1.20 -                (token option * (symb list * string * int) list) list);
    1.21 +type nt_gram = ((nt_tag list * Lexicon.token list) *
    1.22 +                (Lexicon.token option * (symb list * string * int) list) list);
    1.23                                       (*(([dependent_nts], [start_tokens]),
    1.24                                          [(start_token, [(rhs, name, prio)])])*)
    1.25                                (*depent_nts is a list of all NTs whose lookahead
    1.26 @@ -53,8 +50,8 @@
    1.27                           lambda productions are stored as normal productions
    1.28                           and also as an entry in "lambdas"*)
    1.29  
    1.30 -val UnknownStart = eof;       (*productions for which no starting token is
    1.31 -                                known yet are associated with this token*)
    1.32 +val UnknownStart = Lexicon.eof;       (*productions for which no starting token is
    1.33 +                                        known yet are associated with this token*)
    1.34  
    1.35  (* get all NTs that are connected with a list of NTs
    1.36     (used for expanding chain list)*)
    1.37 @@ -125,7 +122,7 @@
    1.38        (*get all known starting tokens for a nonterminal*)
    1.39        fun starts_for_nt nt = snd (fst (Array.sub (prods, nt)));
    1.40  
    1.41 -      val token_union = uncurry (union matching_tokens);
    1.42 +      val token_union = uncurry (union Lexicon.matching_tokens);
    1.43  
    1.44        (*update prods, lookaheads, and lambdas according to new lambda NTs*)
    1.45        val (added_starts', lambdas') =
    1.46 @@ -158,7 +155,7 @@
    1.47                          val nt_dependencies' = union (op =) nts nt_dependencies;
    1.48  
    1.49                          (*associate production with new starting tokens*)
    1.50 -                        fun copy ([]: token option list) nt_prods = nt_prods
    1.51 +                        fun copy ([]: Lexicon.token option list) nt_prods = nt_prods
    1.52                            | copy (tk :: tks) nt_prods =
    1.53                              let val old_prods = these (AList.lookup (op =) nt_prods tk);
    1.54  
    1.55 @@ -259,7 +256,7 @@
    1.56                let
    1.57                  val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt);
    1.58  
    1.59 -                val new_tks = subtract matching_tokens old_tks start_tks;
    1.60 +                val new_tks = subtract Lexicon.matching_tokens old_tks start_tks;
    1.61  
    1.62                  (*store new production*)
    1.63                  fun store [] prods is_new =
    1.64 @@ -278,7 +275,7 @@
    1.65                            else (new_prod :: tk_prods, true);
    1.66  
    1.67                          val prods' = prods
    1.68 -                          |> is_new' ? AList.update (op =) (tk: token option, tk_prods');
    1.69 +                          |> is_new' ? AList.update (op =) (tk: Lexicon.token option, tk_prods');
    1.70                      in store tks prods' (is_new orelse is_new') end;
    1.71  
    1.72                  val (nt_prods', prod_count', changed) =
    1.73 @@ -329,10 +326,10 @@
    1.74                                     andalso member (op =) new_tks (the tk);
    1.75  
    1.76                        (*associate production with new starting tokens*)
    1.77 -                      fun copy ([]: token list) nt_prods = nt_prods
    1.78 +                      fun copy ([]: Lexicon.token list) nt_prods = nt_prods
    1.79                          | copy (tk :: tks) nt_prods =
    1.80                            let
    1.81 -                            val tk_prods = (these o AList.lookup (op =) nt_prods) (SOME tk);
    1.82 +                            val tk_prods = these (AList.lookup (op =) nt_prods (SOME tk));
    1.83  
    1.84                              val tk_prods' =
    1.85                                if not lambda then p :: tk_prods
    1.86 @@ -359,7 +356,7 @@
    1.87                        val (lookahead as (old_nts, old_tks), nt_prods) =
    1.88                          Array.sub (prods, nt);
    1.89  
    1.90 -                      val tk_prods = (these o AList.lookup (op =) nt_prods) key;
    1.91 +                      val tk_prods = these (AList.lookup (op =) nt_prods key);
    1.92  
    1.93                        (*associate productions with new lookahead tokens*)
    1.94                        val (tk_prods', nt_prods') =
    1.95 @@ -370,7 +367,7 @@
    1.96                          |> (key = SOME UnknownStart) ? AList.update (op =) (key, tk_prods')
    1.97  
    1.98                        val added_tks =
    1.99 -                        subtract matching_tokens old_tks new_tks;
   1.100 +                        subtract Lexicon.matching_tokens old_tks new_tks;
   1.101                      in if null added_tks then
   1.102                           (Array.update (prods, nt, (lookahead, nt_prods'));
   1.103                            process_nts nts added)
   1.104 @@ -381,7 +378,7 @@
   1.105                      end;
   1.106  
   1.107                  val ((dependent, _), _) = Array.sub (prods, changed_nt);
   1.108 -              in add_starts (starts @ (process_nts dependent [])) end;
   1.109 +              in add_starts (starts @ process_nts dependent []) end;
   1.110          in add_starts added_starts' end;
   1.111    in add_prods prods chains' lambdas' prod_count ps end;
   1.112  
   1.113 @@ -394,8 +391,8 @@
   1.114  
   1.115      val nt_name = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags)));
   1.116  
   1.117 -    fun pretty_symb (Terminal (Token (Literal, s, _))) = Pretty.quote (Pretty.str s)
   1.118 -      | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
   1.119 +    fun pretty_symb (Terminal (Lexicon.Token (Lexicon.Literal, s, _))) = Pretty.quote (Pretty.str s)
   1.120 +      | pretty_symb (Terminal tok) = Pretty.str (Lexicon.str_of_token tok)
   1.121        | pretty_symb (Nonterminal (tag, p)) =
   1.122            Pretty.str (nt_name tag ^ "[" ^ signed_string_of_int p ^ "]");
   1.123  
   1.124 @@ -422,7 +419,6 @@
   1.125  
   1.126  (** Operations on gramars **)
   1.127  
   1.128 -(*The mother of all grammars*)
   1.129  val empty_gram = Gram {nt_count = 0, prod_count = 0,
   1.130                         tags = Symtab.empty, chains = [], lambdas = [],
   1.131                         prods = Array.array (0, (([], []), []))};
   1.132 @@ -454,12 +450,13 @@
   1.133        delimiters and predefined terms are stored as terminals,
   1.134        nonterminals are converted to integer tags*)
   1.135      fun symb_of [] nt_count tags result = (nt_count, tags, rev result)
   1.136 -      | symb_of ((Delim s) :: ss) nt_count tags result =
   1.137 -          symb_of ss nt_count tags (Terminal (Token (Literal, s, Position.no_range)) :: result)
   1.138 -      | symb_of ((Argument (s, p)) :: ss) nt_count tags result =
   1.139 +      | symb_of ((Syn_Ext.Delim s) :: ss) nt_count tags result =
   1.140 +          symb_of ss nt_count tags
   1.141 +            (Terminal (Lexicon.Token (Lexicon.Literal, s, Position.no_range)) :: result)
   1.142 +      | symb_of ((Syn_Ext.Argument (s, p)) :: ss) nt_count tags result =
   1.143            let
   1.144              val (nt_count', tags', new_symb) =
   1.145 -              case predef_term s of
   1.146 +              case Lexicon.predef_term s of
   1.147                  NONE =>
   1.148                    let val (nt_count', tags', s_tag) = get_tag nt_count tags s;
   1.149                    in (nt_count', tags', Nonterminal (s_tag, p)) end
   1.150 @@ -471,7 +468,7 @@
   1.151      (*Convert list of productions by invoking symb_of for each of them*)
   1.152      fun prod_of [] nt_count prod_count tags result =
   1.153            (nt_count, prod_count, tags, result)
   1.154 -      | prod_of ((XProd (lhs, xsymbs, const, pri)) :: ps)
   1.155 +      | prod_of ((Syn_Ext.XProd (lhs, xsymbs, const, pri)) :: ps)
   1.156                  nt_count prod_count tags result =
   1.157          let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs;
   1.158  
   1.159 @@ -604,7 +601,7 @@
   1.160  
   1.161  datatype parsetree =
   1.162    Node of string * parsetree list |
   1.163 -  Tip of token;
   1.164 +  Tip of Lexicon.token;
   1.165  
   1.166  type state =
   1.167    nt_tag * int *                (*identification and production precedence*)
   1.168 @@ -675,7 +672,7 @@
   1.169  
   1.170  
   1.171  fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c =
   1.172 -  if valued_token c then
   1.173 +  if Lexicon.valued_token c then
   1.174      (A, j, ts @ [Tip c], sa, id, i)
   1.175    else (A, j, ts, sa, id, i);
   1.176  
   1.177 @@ -695,105 +692,105 @@
   1.178  (*get all productions of a NT and NTs chained to it which can
   1.179    be started by specified token*)
   1.180  fun prods_for prods chains include_none tk nts =
   1.181 -let
   1.182 -    fun token_assoc (list, key) =
   1.183 -      let fun assoc [] result = result
   1.184 -            | assoc ((keyi, pi) :: pairs) result =
   1.185 -                if is_some keyi andalso matching_tokens (the keyi, key)
   1.186 -                   orelse include_none andalso is_none keyi then
   1.187 -                  assoc pairs (pi @ result)
   1.188 -                else assoc pairs result;
   1.189 -      in assoc list [] end;
   1.190 +  let
   1.191 +      fun token_assoc (list, key) =
   1.192 +        let fun assoc [] result = result
   1.193 +              | assoc ((keyi, pi) :: pairs) result =
   1.194 +                  if is_some keyi andalso Lexicon.matching_tokens (the keyi, key)
   1.195 +                     orelse include_none andalso is_none keyi then
   1.196 +                    assoc pairs (pi @ result)
   1.197 +                  else assoc pairs result;
   1.198 +        in assoc list [] end;
   1.199  
   1.200 -    fun get_prods [] result = result
   1.201 -      | get_prods (nt :: nts) result =
   1.202 -        let val nt_prods = snd (Array.sub (prods, nt));
   1.203 -        in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
   1.204 -in get_prods (connected_with chains nts nts) [] end;
   1.205 +      fun get_prods [] result = result
   1.206 +        | get_prods (nt :: nts) result =
   1.207 +          let val nt_prods = snd (Array.sub (prods, nt));
   1.208 +          in get_prods nts ((token_assoc (nt_prods, tk)) @ result) end;
   1.209 +  in get_prods (connected_with chains nts nts) [] end;
   1.210  
   1.211  
   1.212  fun PROCESSS warned prods chains Estate i c states =
   1.213 -let
   1.214 -fun all_prods_for nt = prods_for prods chains true c [nt];
   1.215 +  let
   1.216 +    fun all_prods_for nt = prods_for prods chains true c [nt];
   1.217  
   1.218 -fun processS used [] (Si, Sii) = (Si, Sii)
   1.219 -  | processS used (S :: States) (Si, Sii) =
   1.220 -      (case S of
   1.221 -        (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
   1.222 -          let                                       (*predictor operation*)
   1.223 -            val (used', new_states) =
   1.224 -              (case AList.lookup (op =) used nt of
   1.225 -                SOME (usedPrec, l) =>       (*nonterminal has been processed*)
   1.226 -                  if usedPrec <= minPrec then
   1.227 -                                      (*wanted precedence has been processed*)
   1.228 -                    (used, movedot_lambda S l)
   1.229 -                  else            (*wanted precedence hasn't been parsed yet*)
   1.230 -                    let
   1.231 -                      val tk_prods = all_prods_for nt;
   1.232 +    fun processS used [] (Si, Sii) = (Si, Sii)
   1.233 +      | processS used (S :: States) (Si, Sii) =
   1.234 +          (case S of
   1.235 +            (_, _, _, Nonterminal (nt, minPrec) :: _, _, _) =>
   1.236 +              let                                       (*predictor operation*)
   1.237 +                val (used', new_states) =
   1.238 +                  (case AList.lookup (op =) used nt of
   1.239 +                    SOME (usedPrec, l) =>       (*nonterminal has been processed*)
   1.240 +                      if usedPrec <= minPrec then
   1.241 +                                          (*wanted precedence has been processed*)
   1.242 +                        (used, movedot_lambda S l)
   1.243 +                      else            (*wanted precedence hasn't been parsed yet*)
   1.244 +                        let
   1.245 +                          val tk_prods = all_prods_for nt;
   1.246  
   1.247 -                      val States' = mkStates i minPrec nt
   1.248 -                                      (getRHS' minPrec usedPrec tk_prods);
   1.249 -                    in (update_prec (nt, minPrec) used,
   1.250 -                        movedot_lambda S l @ States')
   1.251 -                    end
   1.252 +                          val States' = mkStates i minPrec nt
   1.253 +                                          (getRHS' minPrec usedPrec tk_prods);
   1.254 +                        in (update_prec (nt, minPrec) used,
   1.255 +                            movedot_lambda S l @ States')
   1.256 +                        end
   1.257  
   1.258 -              | NONE =>           (*nonterminal is parsed for the first time*)
   1.259 -                  let val tk_prods = all_prods_for nt;
   1.260 -                      val States' = mkStates i minPrec nt
   1.261 -                                      (getRHS minPrec tk_prods);
   1.262 -                  in ((nt, (minPrec, [])) :: used, States') end);
   1.263 +                  | NONE =>           (*nonterminal is parsed for the first time*)
   1.264 +                      let val tk_prods = all_prods_for nt;
   1.265 +                          val States' = mkStates i minPrec nt
   1.266 +                                          (getRHS minPrec tk_prods);
   1.267 +                      in ((nt, (minPrec, [])) :: used, States') end);
   1.268  
   1.269 -            val dummy =
   1.270 -              if not (!warned) andalso
   1.271 -                 length (new_states @ States) > (!branching_level) then
   1.272 -                (warning "Currently parsed expression could be extremely ambiguous.";
   1.273 -                 warned := true)
   1.274 -              else ();
   1.275 -          in
   1.276 -            processS used' (new_states @ States) (S :: Si, Sii)
   1.277 -          end
   1.278 -      | (_, _, _, Terminal a :: _, _, _) =>               (*scanner operation*)
   1.279 -          processS used States
   1.280 -            (S :: Si,
   1.281 -              if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
   1.282 -      | (A, prec, ts, [], id, j) =>                   (*completer operation*)
   1.283 -          let val tt = if id = "" then ts else [Node (id, ts)] in
   1.284 -            if j = i then                             (*lambda production?*)
   1.285 -              let
   1.286 -                val (used', O) = update_trees used (A, (tt, prec));
   1.287 +                val dummy =
   1.288 +                  if not (!warned) andalso
   1.289 +                     length (new_states @ States) > (!branching_level) then
   1.290 +                    (warning "Currently parsed expression could be extremely ambiguous.";
   1.291 +                     warned := true)
   1.292 +                  else ();
   1.293                in
   1.294 -                case O of
   1.295 -                  NONE =>
   1.296 -                    let val Slist = getS A prec Si;
   1.297 -                        val States' = map (movedot_nonterm tt) Slist;
   1.298 -                    in processS used' (States' @ States) (S :: Si, Sii) end
   1.299 -                | SOME n =>
   1.300 -                    if n >= prec then processS used' States (S :: Si, Sii)
   1.301 -                    else
   1.302 -                      let val Slist = getS' A prec n Si;
   1.303 -                          val States' = map (movedot_nonterm tt) Slist;
   1.304 -                      in processS used' (States' @ States) (S :: Si, Sii) end
   1.305 +                processS used' (new_states @ States) (S :: Si, Sii)
   1.306                end
   1.307 -            else
   1.308 -              let val Slist = getStates Estate i j A prec
   1.309 -              in processS used (map (movedot_nonterm tt) Slist @ States)
   1.310 -                          (S :: Si, Sii)
   1.311 -              end
   1.312 -          end)
   1.313 -in processS [] states ([], []) end;
   1.314 +          | (_, _, _, Terminal a :: _, _, _) =>               (*scanner operation*)
   1.315 +              processS used States
   1.316 +                (S :: Si,
   1.317 +                  if Lexicon.matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
   1.318 +          | (A, prec, ts, [], id, j) =>                   (*completer operation*)
   1.319 +              let val tt = if id = "" then ts else [Node (id, ts)] in
   1.320 +                if j = i then                             (*lambda production?*)
   1.321 +                  let
   1.322 +                    val (used', O) = update_trees used (A, (tt, prec));
   1.323 +                  in
   1.324 +                    case O of
   1.325 +                      NONE =>
   1.326 +                        let val Slist = getS A prec Si;
   1.327 +                            val States' = map (movedot_nonterm tt) Slist;
   1.328 +                        in processS used' (States' @ States) (S :: Si, Sii) end
   1.329 +                    | SOME n =>
   1.330 +                        if n >= prec then processS used' States (S :: Si, Sii)
   1.331 +                        else
   1.332 +                          let val Slist = getS' A prec n Si;
   1.333 +                              val States' = map (movedot_nonterm tt) Slist;
   1.334 +                          in processS used' (States' @ States) (S :: Si, Sii) end
   1.335 +                  end
   1.336 +                else
   1.337 +                  let val Slist = getStates Estate i j A prec
   1.338 +                  in processS used (map (movedot_nonterm tt) Slist @ States)
   1.339 +                              (S :: Si, Sii)
   1.340 +                  end
   1.341 +              end)
   1.342 +  in processS [] states ([], []) end;
   1.343  
   1.344  
   1.345  fun produce warned prods tags chains stateset i indata prev_token =
   1.346    (case Array.sub (stateset, i) of
   1.347      [] =>
   1.348        let
   1.349 -        val toks = if is_eof prev_token then indata else prev_token :: indata;
   1.350 -        val pos = Position.str_of (pos_of_token prev_token);
   1.351 +        val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata;
   1.352 +        val pos = Position.str_of (Lexicon.pos_of_token prev_token);
   1.353        in
   1.354          if null toks then error ("Inner syntax error: unexpected end of input" ^ pos)
   1.355          else error (Pretty.string_of (Pretty.block
   1.356            (Pretty.str ("Inner syntax error" ^ pos ^ " at \"") ::
   1.357 -            Pretty.breaks (map (Pretty.str o str_of_token) (#1 (split_last toks))) @
   1.358 +            Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @
   1.359              [Pretty.str "\""])))
   1.360        end
   1.361    | s =>
   1.362 @@ -807,21 +804,20 @@
   1.363         end));
   1.364  
   1.365  
   1.366 -fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE)
   1.367 -                            l;
   1.368 +fun get_trees l = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) l;
   1.369  
   1.370  fun earley prods tags chains startsymbol indata =
   1.371    let
   1.372 -    val start_tag = case Symtab.lookup tags startsymbol of
   1.373 -                       SOME tag => tag
   1.374 -                     | NONE   => error ("parse: Unknown startsymbol " ^
   1.375 -                                        quote startsymbol);
   1.376 -    val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal eof], "", 0)];
   1.377 +    val start_tag =
   1.378 +      (case Symtab.lookup tags startsymbol of
   1.379 +        SOME tag => tag
   1.380 +      | NONE => error ("parse: Unknown startsymbol " ^ quote startsymbol));
   1.381 +    val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)];
   1.382      val s = length indata + 1;
   1.383      val Estate = Array.array (s, []);
   1.384    in
   1.385      Array.update (Estate, 0, S0);
   1.386 -    get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata eof)
   1.387 +    get_trees (produce (Unsynchronized.ref false) prods tags chains Estate 0 indata Lexicon.eof)
   1.388    end;
   1.389  
   1.390  
   1.391 @@ -830,9 +826,9 @@
   1.392      val end_pos =
   1.393        (case try List.last toks of
   1.394          NONE => Position.none
   1.395 -      | SOME (Token (_, _, (_, end_pos))) => end_pos);
   1.396 +      | SOME (Lexicon.Token (_, _, (_, end_pos))) => end_pos);
   1.397      val r =
   1.398 -      (case earley prods tags chains start (toks @ [mk_eof end_pos]) of
   1.399 +      (case earley prods tags chains start (toks @ [Lexicon.mk_eof end_pos]) of
   1.400          [] => sys_error "parse: no parse trees"
   1.401        | pts => pts);
   1.402    in r end;
   1.403 @@ -842,7 +838,8 @@
   1.404    let
   1.405      fun freeze a = map_range (curry Array.sub a) (Array.length a);
   1.406      val prods = maps snd (maps snd (freeze (#prods gram)));
   1.407 -    fun guess (SOME ([Nonterminal (_, k), Terminal (Token (Literal, s, _)), Nonterminal (_, l)], _, j)) =
   1.408 +    fun guess (SOME ([Nonterminal (_, k),
   1.409 +            Terminal (Lexicon.Token (Lexicon.Literal, s, _)), Nonterminal (_, l)], _, j)) =
   1.410            if k = j andalso l = j + 1 then SOME (s, true, false, j)
   1.411            else if k = j + 1 then if l = j then SOME (s, false, true, j)
   1.412              else if l = j + 1 then SOME (s, false, false, j)