wneuper/isa: src/Pure/Syntax/parser.ML@33b9b5da3e6f (annotated)

wenzelm@18	1	(* Title: Pure/Syntax/parser.ML
wenzelm@46	2	ID: $Id$
clasohm@345	3	Author: Sonia Mahjoub, Markus Wenzel and Carsten Clasohm, TU Muenchen
wenzelm@18	4
wenzelm@552	5	Isabelle's main parser (used for terms and types).
wenzelm@18	6	*)
wenzelm@18	7
wenzelm@18	8	signature PARSER =
wenzelm@18	9	sig
wenzelm@237	10	structure Lexicon: LEXICON
wenzelm@237	11	structure SynExt: SYN_EXT
wenzelm@237	12	local open Lexicon SynExt SynExt.Ast in
wenzelm@18	13	type gram
wenzelm@18	14	val empty_gram: gram
clasohm@624	15	val extend_gram: gram -> string list -> xprod list -> gram
wenzelm@237	16	val merge_grams: gram -> gram -> gram
wenzelm@237	17	val pretty_gram: gram -> Pretty.T list
wenzelm@237	18	datatype parsetree =
wenzelm@237	19	Node of string * parsetree list \|
wenzelm@237	20	Tip of token
clasohm@330	21	val parse: gram -> string -> token list -> parsetree list
wenzelm@18	22	end
wenzelm@18	23	end;
wenzelm@18	24
wenzelm@237	25	functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON
wenzelm@552	26	and SynExt: SYN_EXT): PARSER =
wenzelm@18	27	struct
wenzelm@18	28
wenzelm@237	29	structure Pretty = SynExt.Ast.Pretty;
wenzelm@237	30	structure Lexicon = Lexicon;
wenzelm@237	31	structure SynExt = SynExt;
wenzelm@237	32	open Lexicon SynExt;
wenzelm@18	33
wenzelm@18	34
wenzelm@18	35	( datatype gram )
wenzelm@18	36
wenzelm@237	37	datatype symb =
wenzelm@237	38	Terminal of token \|
wenzelm@237	39	Nonterminal of string * int;
wenzelm@18	40
clasohm@330	41	datatype refsymb = Term of token \| Nonterm of rhss_ref * int
clasohm@330	42	(reference to production list instead of name)
clasohm@330	43	and gram = Gram of (string * (symb list * string * int)) list *
clasohm@330	44	(string * rhss_ref) list
clasohm@330	45	withtype rhss_ref = (token option * (refsymb list * string * int) list) list ref
clasohm@330	46	(lookahead table: token and productions)
wenzelm@18	47
clasohm@330	48	(* convert productions to reference grammar with lookaheads and eliminate
clasohm@330	49	chain productions *)
clasohm@373	50	fun mk_gram prods =
clasohm@395	51	let (*get reference on list of all possible rhss for nonterminal lhs
clasohm@330	52	(if it doesn't exist a new one is created and added to the nonterminal
clasohm@330	53	list)*)
clasohm@330	54	fun get_rhss ref_prods lhs =
clasohm@330	55	case assoc (ref_prods, lhs) of
clasohm@330	56	None =>
clasohm@330	57	let val l = ref [(None, [])]
clasohm@330	58	in (l, (lhs, l) :: ref_prods) end
clasohm@330	59	\| Some l => (l, ref_prods);
clasohm@330	60
clasohm@330	61	(convert symb list to refsymb list)
clasohm@330	62	fun mk_refsymbs ref_prods [] rs = (rs, ref_prods)
clasohm@330	63	\| mk_refsymbs ref_prods (Terminal tk :: symbs) rs =
clasohm@330	64	mk_refsymbs ref_prods symbs (rs @ [Term tk])
clasohm@330	65	\| mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs =
clasohm@330	66	let val (rhss, ref_prods') = get_rhss ref_prods name
clasohm@330	67	in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)])
clasohm@330	68	end;
clasohm@330	69
clasohm@330	70	(convert prod list to (string rhss_ref) list
clasohm@330	71	without computing lookaheads*)
clasohm@330	72	fun mk_ref_gram [] ref_prods = ref_prods
clasohm@330	73	\| mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods =
clasohm@330	74	let val (rhs', ref_prods') = get_rhss ref_prods lhs;
clasohm@330	75	val (dummy, rhss) = hd (!rhs');
clasohm@330	76	val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs [];
clasohm@330	77	in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)];
clasohm@330	78	mk_ref_gram ps ref_prods''
clasohm@330	79	end;
clasohm@330	80
clasohm@330	81	(eliminate chain productions)
clasohm@330	82	fun elim_chain ref_gram =
clasohm@330	83	let (*make a list of pairs representing chain productions and delete
clasohm@330	84	these productions*)
clasohm@330	85	fun list_chain [] = []
clasohm@330	86	\| list_chain ((_, rhss_ref) :: ps) =
clasohm@330	87	let fun lists [] new_rhss chains = (new_rhss, chains)
clasohm@330	88	\| lists (([Nonterm (id2, ~1)], _, ~1) :: rs)
clasohm@330	89	new_rhss chains =
clasohm@330	90	lists rs new_rhss ((rhss_ref, id2) :: chains)
clasohm@330	91	\| lists (rhs :: rs) new_rhss chains =
clasohm@330	92	lists rs (rhs :: new_rhss) chains;
clasohm@330	93
clasohm@330	94	val (dummy, rhss) = hd (!rhss_ref);
clasohm@330	95
clasohm@330	96	val (new_rhss, chains) = lists rhss [] [];
clasohm@330	97	in rhss_ref := [(dummy, new_rhss)];
clasohm@330	98	chains @ (list_chain ps)
clasohm@330	99	end;
clasohm@330	100
clasohm@345	101	(*convert a list of pairs to an association list
clasohm@345	102	by using the first element as the key*)
clasohm@345	103	fun mk_assoc pairs =
clasohm@330	104	let fun doit [] result = result
clasohm@330	105	\| doit ((id1, id2) :: ps) result =
clasohm@330	106	doit ps
clasohm@330	107	(overwrite (result, (id1, id2 :: (assocs result id1))));
clasohm@345	108	in doit pairs [] end;
clasohm@330	109
clasohm@330	110	(replace reference by list of rhss in chain pairs)
clasohm@345	111	fun deref (id1, ids) =
clasohm@345	112	let fun deref1 [] = []
clasohm@345	113	\| deref1 (id :: ids) =
clasohm@330	114	let val (_, rhss) = hd (!id);
clasohm@345	115	in rhss @ (deref1 ids) end;
clasohm@345	116	in (id1, deref1 ids) end;
clasohm@330	117
clasohm@330	118	val chain_pairs =
clasohm@345	119	map deref (transitive_closure (mk_assoc (list_chain ref_gram)));
clasohm@330	120
clasohm@330	121	(add new rhss to productions)
clasohm@345	122	fun elim (rhss_ref, rhss) =
clasohm@330	123	let val (dummy, old_rhss) = hd (!rhss_ref);
clasohm@330	124	in rhss_ref := [(dummy, old_rhss @ rhss)] end;
clasohm@345	125	in map elim chain_pairs;
clasohm@330	126	ref_gram
clasohm@330	127	end;
clasohm@330	128
clasohm@330	129	val ref_gram = elim_chain (mk_ref_gram prods []);
clasohm@330	130
clasohm@330	131	(*make a list of all lambda NTs
clasohm@330	132	(i.e. nonterminals that can produce lambda*)
clasohm@330	133	val lambdas =
clasohm@330	134	let fun lambda [] result = result
clasohm@330	135	\| lambda ((_, rhss_ref) :: nts) result =
clasohm@330	136	if rhss_ref mem result then
clasohm@330	137	lambda nts result
clasohm@330	138	else
clasohm@345	139	let (*list all NTs that can be produced by a rhs
clasohm@345	140	containing only lambda NTs*)
clasohm@345	141	fun only_lambdas [] result = result
clasohm@345	142	\| only_lambdas ((_, rhss_ref) :: ps) result =
clasohm@330	143	let fun only (symbs, _, _) =
clasohm@330	144	forall (fn (Nonterm (id, _)) => id mem result
clasohm@330	145	\| (Term _) => false) symbs;
clasohm@330	146
clasohm@330	147	val (_, rhss) = hd (!rhss_ref);
clasohm@330	148	in if not (rhss_ref mem result) andalso
clasohm@330	149	exists only rhss then
clasohm@345	150	only_lambdas ref_gram (rhss_ref :: result)
clasohm@330	151	else
clasohm@345	152	only_lambdas ps result
clasohm@330	153	end;
clasohm@330	154
clasohm@330	155	val (_, rhss) = hd (!rhss_ref);
clasohm@330	156	in if exists (fn (symbs, _, _) => null symbs) rhss
clasohm@330	157	then lambda nts
clasohm@345	158	(only_lambdas ref_gram (rhss_ref :: result))
clasohm@330	159	else lambda nts result
clasohm@330	160	end;
clasohm@330	161	in lambda ref_gram [] end;
clasohm@330	162
clasohm@330	163	(*list all nonterminals on which the lookahead depends (due to lambda
clasohm@330	164	NTs this can be more than one)
clasohm@330	165	and report if there is a terminal at the 'start'*)
clasohm@330	166	fun rhss_start [] skipped = (None, skipped)
clasohm@330	167	\| rhss_start (Term tk :: _) skipped = (Some tk, skipped)
clasohm@330	168	\| rhss_start (Nonterm (rhss_ref, _) :: rest) skipped =
clasohm@330	169	if rhss_ref mem lambdas then
clasohm@330	170	rhss_start rest (rhss_ref ins skipped)
clasohm@330	171	else
clasohm@330	172	(None, rhss_ref ins skipped);
clasohm@330	173
clasohm@330	174	(list all terminals that can start the given rhss)
clasohm@330	175	fun look_rhss starts rhss_ref =
clasohm@377	176	let fun look [] _ result = result
clasohm@377	177	\| look ((symbs, _, _) :: todos) done result =
clasohm@330	178	let val (start_token, skipped) = rhss_start symbs [];
clasohm@330	179
clasohm@330	180	(*process all nonterminals on which the lookahead
clasohm@330	181	depends and build the new todo and done lists for
clasohm@330	182	the look function*)
clasohm@377	183	fun look2 [] todos result =
clasohm@377	184	look todos (done union skipped) result
clasohm@377	185	\| look2 (rhss_ref :: ls) todos result =
clasohm@377	186	if rhss_ref mem done then look2 ls todos result
clasohm@330	187	else case assoc (starts, rhss_ref) of
clasohm@377	188	Some tks => look2 ls todos (tks union result)
clasohm@377	189	\| None =>
clasohm@377	190	let val (_, rhss) = hd (!rhss_ref);
clasohm@377	191	in look2 ls (rhss @ todos) result end;
clasohm@330	192	in case start_token of
clasohm@377	193	Some tk => look2 skipped todos (start_token ins result)
clasohm@377	194	\| None => look2 skipped todos result
clasohm@330	195	end;
clasohm@330	196
clasohm@330	197	val (_, rhss) = hd (!rhss_ref);
clasohm@377	198	in look rhss [rhss_ref] [] end;
clasohm@330	199
clasohm@330	200	(*make a table that contains all possible starting terminals
clasohm@330	201	for each nonterminal*)
clasohm@330	202	fun mk_starts [] starts = starts
clasohm@330	203	\| mk_starts ((_, rhss_ref) :: ps) starts =
clasohm@330	204	mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts);
clasohm@330	205
clasohm@330	206	val starts = mk_starts ref_gram [];
clasohm@330	207
clasohm@330	208	(add list of allowed starting tokens to productions)
clasohm@330	209	fun mk_lookahead (_, rhss_ref) =
clasohm@377	210	let (*compares two values of type token option
clasohm@377	211	(used for speed reasons)*)
clasohm@377	212	fun matching_opt_tks (Some tk1, Some tk2) =
clasohm@377	213	matching_tokens (tk1, tk2)
clasohm@377	214	\| matching_opt_tks _ = false;
clasohm@377	215
clasohm@377	216	(*add item to lookahead list (a list containing pairs of token and
clasohm@377	217	rhss that can be started with it)*)
clasohm@345	218	fun add_start new_rhs tokens table =
clasohm@345	219	let fun add [] [] = []
clasohm@345	220	\| add (tk :: tks) [] =
clasohm@345	221	(tk, [new_rhs]) :: (add tks [])
clasohm@345	222	\| add tokens ((tk, rhss) :: ss) =
clasohm@377	223	if gen_mem matching_opt_tks (tk, tokens) then
clasohm@345	224	(tk, new_rhs :: rhss) :: (add (tokens \ tk) ss)
clasohm@330	225	else
clasohm@345	226	(tk, rhss) :: (add tokens ss);
clasohm@345	227	in add tokens table end;
clasohm@330	228
clasohm@330	229	(combine all lookaheads of a list of nonterminals)
clasohm@345	230	fun combine_starts rhss_refs =
clasohm@377	231	foldr (gen_union matching_opt_tks)
clasohm@345	232	((map (fn rhss_ref => let val Some tks = assoc (starts, rhss_ref)
clasohm@345	233	in tks end) rhss_refs), []);
clasohm@330	234
clasohm@330	235	(get lookahead for a rhs and update lhs' lookahead list)
clasohm@330	236	fun look_rhss [] table = table
clasohm@330	237	\| look_rhss ((rhs as (symbs, id, prec)) :: rs) table =
clasohm@330	238	let val (start_token, skipped) = rhss_start symbs [];
clasohm@330	239	val starts = case start_token of
clasohm@377	240	Some tk => gen_ins matching_opt_tks
clasohm@377	241	(Some tk, combine_starts skipped)
clasohm@345	242	\| None => if skipped subset lambdas then
clasohm@330	243	[None]
clasohm@330	244	else
clasohm@330	245	combine_starts skipped;
clasohm@330	246	in look_rhss rs (add_start rhs starts table) end;
clasohm@330	247
clasohm@330	248	val (_, rhss) = hd (!rhss_ref);
clasohm@330	249	in rhss_ref := look_rhss rhss [] end;
clasohm@330	250
clasohm@330	251	in map mk_lookahead ref_gram;
clasohm@330	252	Gram (prods, ref_gram)
clasohm@330	253	end;
wenzelm@18	254
wenzelm@18	255
wenzelm@237	256	(* empty, extend, merge grams *)
wenzelm@18	257
wenzelm@237	258	val empty_gram = mk_gram [];
wenzelm@18	259
clasohm@624	260	fun extend_gram (gram1 as Gram (prods1, _)) roots xprods2 =
wenzelm@18	261	let
clasohm@624	262	fun simplify preserve s =
clasohm@624	263	if preserve then
clasohm@624	264	(if s mem (roots \\ ["type", "prop", "any"]) then "logic" else s)
clasohm@624	265	else (if s = "prop" then "@prop_H" else
clasohm@624	266	(if s mem (roots \\ ["type", "prop", "any"]) then
clasohm@624	267	"@logic_H"
clasohm@624	268	else s));
clasohm@624	269
clasohm@624	270	fun not_delim (Delim _) = false
clasohm@624	271	\| not_delim _ = true
clasohm@624	272
clasohm@624	273	fun symb_of _ (Delim s) = Some (Terminal (Token s))
clasohm@624	274	\| symb_of preserve (Argument (s, p)) =
wenzelm@18	275	(case predef_term s of
clasohm@624	276	None => Some (Nonterminal (simplify preserve s, p))
wenzelm@237	277	\| Some tk => Some (Terminal tk))
clasohm@624	278	\| symb_of _ _ = None;
wenzelm@18	279
wenzelm@237	280	fun prod_of (XProd (lhs, xsymbs, const, pri)) =
clasohm@624	281	let val copy_prod = (lhs = "prop" andalso forall not_delim xsymbs andalso
clasohm@624	282	const <> "");
clasohm@624	283
clasohm@624	284	val preserve = copy_prod
clasohm@624	285	orelse pri = chain_pri andalso lhs = "logic"
clasohm@624	286	orelse lhs mem ["@prop_H", "@logic_H", "any"];
clasohm@624	287
clasohm@624	288	val lhs' = if copy_prod then "@prop_H" else
clasohm@624	289	if lhs = "logic" andalso pri = chain_pri
clasohm@624	290	then "@logic_H"
clasohm@624	291	else if lhs mem ("logic1" :: (roots \\ ["type", "prop"]))
clasohm@624	292	then "logic"
clasohm@624	293	else lhs;
clasohm@624	294	in (lhs', (mapfilter (symb_of preserve) xsymbs, const, pri))
clasohm@624	295	end;
wenzelm@18	296
wenzelm@237	297	val prods2 = distinct (map prod_of xprods2);
wenzelm@18	298	in
wenzelm@237	299	if prods2 subset prods1 then gram1
clasohm@395	300	else (writeln "Building new grammar...";
clasohm@395	301	mk_gram (extend_list prods1 prods2))
wenzelm@18	302	end;
wenzelm@18	303
wenzelm@237	304	fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) =
wenzelm@237	305	if prods2 subset prods1 then gram1
wenzelm@237	306	else if prods1 subset prods2 then gram2
clasohm@395	307	else (writeln "Building new grammar...";
clasohm@395	308	mk_gram (merge_lists prods1 prods2));
wenzelm@18	309
wenzelm@18	310
wenzelm@237	311	(* pretty_gram *)
wenzelm@18	312
wenzelm@237	313	fun pretty_gram (Gram (prods, _)) =
wenzelm@237	314	let
wenzelm@237	315	fun pretty_name name = [Pretty.str (name ^ " =")];
wenzelm@18	316
wenzelm@237	317	fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s)
wenzelm@237	318	\| pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
wenzelm@237	319	\| pretty_symb (Nonterminal (s, p)) =
wenzelm@237	320	Pretty.str (s ^ "[" ^ string_of_int p ^ "]");
wenzelm@18	321
wenzelm@237	322	fun pretty_const "" = []
wenzelm@237	323	\| pretty_const c = [Pretty.str ("=> " ^ quote c)];
wenzelm@237	324
wenzelm@237	325	fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
wenzelm@237	326
wenzelm@237	327	fun pretty_prod (name, (symbs, const, pri)) =
wenzelm@237	328	Pretty.block (Pretty.breaks (pretty_name name @
wenzelm@237	329	map pretty_symb symbs @ pretty_const const @ pretty_pri pri));
wenzelm@18	330	in
wenzelm@237	331	map pretty_prod prods
wenzelm@18	332	end;
wenzelm@18	333
wenzelm@18	334
wenzelm@18	335
wenzelm@18	336	( parse )
wenzelm@18	337
wenzelm@237	338	datatype parsetree =
wenzelm@237	339	Node of string * parsetree list \|
wenzelm@237	340	Tip of token;
wenzelm@237	341
wenzelm@18	342	type state =
clasohm@330	343	rhss_ref * int (lhs: identification and production precedence)
clasohm@330	344	* parsetree list (already parsed nonterminals on rhs)
clasohm@330	345	* refsymb list (rest of rhs)
clasohm@330	346	* string (name of production)
clasohm@330	347	* int; (index for previous state list)
wenzelm@18	348
wenzelm@18	349	type earleystate = state list Array.array;
wenzelm@18	350
wenzelm@18	351
clasohm@330	352	(Get all rhss with precedence >= minPrec)
clasohm@330	353	fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec);
wenzelm@18	354
clasohm@330	355	(Get all rhss with precedence >= minPrec and < maxPrec)
clasohm@330	356	fun getRHS' minPrec maxPrec =
clasohm@330	357	filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec);
wenzelm@237	358
clasohm@330	359	(Make states using a list of rhss)
clasohm@330	360	fun mkStates i minPrec lhsID rhss =
clasohm@330	361	let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i);
clasohm@330	362	in map mkState rhss end;
clasohm@330	363
clasohm@330	364	(*Add parse tree to list and eliminate duplicates
clasohm@330	365	saving the maximum precedence*)
clasohm@330	366	fun conc (t, prec:int) [] = (None, [(t, prec)])
clasohm@330	367	\| conc (t, prec) ((t', prec') :: ts) =
clasohm@330	368	if t = t' then
clasohm@330	369	(Some prec', if prec' >= prec then (t', prec') :: ts
clasohm@330	370	else (t, prec) :: ts)
clasohm@330	371	else
clasohm@330	372	let val (n, ts') = conc (t, prec) ts
clasohm@330	373	in (n, (t', prec') :: ts') end;
wenzelm@18	374
clasohm@330	375	(Update entry in used)
wenzelm@237	376	fun update_tree ((B, (i, ts)) :: used) (A, t) =
wenzelm@237	377	if A = B then
wenzelm@237	378	let val (n, ts') = conc t ts
wenzelm@237	379	in ((A, (i, ts')) :: used, n) end
wenzelm@237	380	else
wenzelm@237	381	let val (used', n) = update_tree used (A, t)
wenzelm@237	382	in ((B, (i, ts)) :: used', n) end;
wenzelm@18	383
clasohm@330	384	(Replace entry in used)
clasohm@330	385	fun update_index (A, prec) used =
clasohm@330	386	let fun update((hd as (B, (_, ts))) :: used, used') =
clasohm@330	387	if A = B
clasohm@330	388	then used' @ ((A, (prec, ts)) :: used)
clasohm@330	389	else update (used, hd :: used')
clasohm@330	390	in update (used, []) end;
wenzelm@18	391
clasohm@330	392	fun getS A maxPrec Si =
wenzelm@237	393	filter
clasohm@330	394	(fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330	395	=> A = B andalso prec <= maxPrec
wenzelm@237	396	\| _ => false) Si;
wenzelm@18	397
clasohm@330	398	fun getS' A maxPrec minPrec Si =
wenzelm@237	399	filter
clasohm@330	400	(fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330	401	=> A = B andalso prec > minPrec andalso prec <= maxPrec
wenzelm@237	402	\| _ => false) Si;
wenzelm@18	403
clasohm@330	404	fun getStates Estate i ii A maxPrec =
wenzelm@237	405	filter
clasohm@330	406	(fn (_, _, _, Nonterm (B, prec) :: _, _, _)
clasohm@330	407	=> A = B andalso prec <= maxPrec
wenzelm@237	408	\| _ => false)
wenzelm@237	409	(Array.sub (Estate, ii));
wenzelm@18	410
wenzelm@18	411
clasohm@330	412	fun movedot_term (A, j, ts, Term a :: sa, id, i) c =
wenzelm@237	413	if valued_token c then
wenzelm@237	414	(A, j, (ts @ [Tip c]), sa, id, i)
wenzelm@237	415	else (A, j, ts, sa, id, i);
wenzelm@18	416
clasohm@330	417	fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) =
wenzelm@237	418	(A, j, tss @ ts, sa, id, i);
wenzelm@18	419
wenzelm@237	420	fun movedot_lambda _ [] = []
clasohm@330	421	\| movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) =
wenzelm@237	422	if k <= ki then
wenzelm@237	423	(B, j, tss @ t, sa, id, i) ::
clasohm@330	424	movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts
clasohm@330	425	else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts;
wenzelm@18	426
wenzelm@18	427
wenzelm@18	428
clasohm@330	429	fun PROCESSS Estate i c states =
wenzelm@18	430	let
clasohm@330	431	fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c);
clasohm@330	432
wenzelm@237	433	fun processS used [] (Si, Sii) = (Si, Sii)
wenzelm@237	434	\| processS used (S :: States) (Si, Sii) =
wenzelm@237	435	(case S of
clasohm@330	436	(_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) =>
clasohm@330	437	let (predictor operation)
clasohm@330	438	val (used_new, States_new) =
clasohm@330	439	(case assoc (used, rhss_ref) of
clasohm@330	440	Some (usedPrec, l) => (nonterminal has been processed)
clasohm@330	441	if usedPrec <= minPrec then
clasohm@330	442	(wanted precedence has been processed)
clasohm@330	443	(used, movedot_lambda S l)
clasohm@330	444	else (wanted precedence hasn't been parsed yet)
clasohm@330	445	let val rhss = get_lookahead rhss_ref;
clasohm@330	446	val States' = mkStates i minPrec rhss_ref
clasohm@330	447	(getRHS' minPrec usedPrec rhss);
clasohm@330	448	in (update_index (rhss_ref, minPrec) used,
clasohm@330	449	movedot_lambda S l @ States')
wenzelm@237	450	end
wenzelm@18	451
clasohm@330	452	\| None => (nonterminal is parsed for the first time)
clasohm@330	453	let val rhss = get_lookahead rhss_ref;
clasohm@330	454	val States' = mkStates i minPrec rhss_ref
clasohm@330	455	(getRHS minPrec rhss);
clasohm@330	456	in ((rhss_ref, (minPrec, [])) :: used, States') end)
wenzelm@237	457	in
clasohm@330	458	processS used_new (States_new @ States) (S :: Si, Sii)
wenzelm@237	459	end
clasohm@330	460	\| (_, _, _, Term a :: _, _, _) => (scanner operation)
wenzelm@237	461	processS used States
wenzelm@237	462	(S :: Si,
wenzelm@237	463	if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
wenzelm@18	464
clasohm@330	465	\| (A, prec, ts, [], id, j) => (completer operation)
wenzelm@237	466	let
wenzelm@237	467	val tt = if id = "" then ts else [Node (id, ts)]
wenzelm@237	468	in
clasohm@330	469	if j = i then (lambda production?)
wenzelm@237	470	let
clasohm@330	471	val (used', O) = update_tree used (A, (tt, prec));
wenzelm@237	472	in
wenzelm@237	473	(case O of
wenzelm@237	474	None =>
wenzelm@237	475	let
clasohm@330	476	val Slist = getS A prec Si;
wenzelm@237	477	val States' = map (movedot_nonterm tt) Slist;
wenzelm@237	478	in
clasohm@330	479	processS used' (States' @ States) (S :: Si, Sii)
wenzelm@237	480	end
wenzelm@237	481	\| Some n =>
clasohm@330	482	if n >= prec then
wenzelm@237	483	processS used' States (S :: Si, Sii)
wenzelm@237	484	else
wenzelm@237	485	let
clasohm@330	486	val Slist = getS' A prec n Si;
wenzelm@237	487	val States' = map (movedot_nonterm tt) Slist;
wenzelm@18	488	in
clasohm@330	489	processS used' (States' @ States) (S :: Si, Sii)
wenzelm@237	490	end)
clasohm@330	491	end
wenzelm@237	492	else
wenzelm@237	493	processS used
clasohm@330	494	(map (movedot_nonterm tt) (getStates Estate i j A prec) @ States)
wenzelm@237	495	(S :: Si, Sii)
wenzelm@237	496	end)
wenzelm@18	497	in
wenzelm@237	498	processS [] states ([], [])
wenzelm@18	499	end;
wenzelm@18	500
wenzelm@18	501
wenzelm@18	502
clasohm@362	503	fun syntax_error toks allowed =
clasohm@362	504	error
clasohm@362	505	((if toks = [] then
clasohm@362	506	"error: unexpected end of input\n"
clasohm@362	507	else
clasohm@367	508	"Syntax error at: " ^ quote (space_implode " " (map str_of_token
clasohm@367	509	((rev o tl o rev) toks)))
clasohm@362	510	^ "\n")
clasohm@362	511	^ "Expected tokens: "
clasohm@362	512	^ space_implode ", " (map (quote o str_of_token) allowed));
wenzelm@18	513
clasohm@362	514	fun produce stateset i indata prev_token =
clasohm@362	515	(the argument prev_token is only used for error messages)
wenzelm@237	516	(case Array.sub (stateset, i) of
clasohm@373	517	[] => let (*similar to token_assoc but does not automatically
clasohm@373	518	include 'None' key*)
clasohm@373	519	fun token_assoc2 (list, key) =
clasohm@373	520	let fun assoc [] = []
clasohm@373	521	\| assoc ((keyi, xi) :: pairs) =
clasohm@373	522	if is_some keyi andalso
clasohm@373	523	matching_tokens (the keyi, key) then
clasohm@373	524	(assoc pairs) @ xi
clasohm@373	525	else assoc pairs;
clasohm@373	526	in assoc list end;
clasohm@373	527
clasohm@373	528	(test if tk is a lookahead for a given minimum precedence)
clasohm@373	529	fun reduction minPrec tk _ (Term _ :: _, _, prec:int) =
clasohm@373	530	if prec >= minPrec then true
clasohm@373	531	else false
clasohm@373	532	\| reduction minPrec tk checked
clasohm@373	533	(Nonterm (rhss_ref, NTprec)::_,_, prec) =
clasohm@373	534	if prec >= minPrec andalso not (rhss_ref mem checked) then
clasohm@373	535	exists (reduction NTprec tk (rhss_ref :: checked))
clasohm@373	536	(token_assoc2 (!rhss_ref, tk))
clasohm@373	537	else false;
clasohm@373	538
clasohm@373	539	(*compute a list of allowed starting tokens
clasohm@373	540	for a list of nonterminals considering precedence*)
clasohm@362	541	fun get_starts [] = []
clasohm@372	542	\| get_starts ((rhss_ref, minPrec:int) :: refs) =
clasohm@362	543	let fun get [] = []
clasohm@373	544	\| get ((Some tk, prods) :: rhss) =
clasohm@373	545	if exists (reduction minPrec tk [rhss_ref]) prods
clasohm@373	546	then tk :: (get rhss)
clasohm@372	547	else get rhss
clasohm@362	548	\| get ((None, _) :: rhss) =
clasohm@362	549	get rhss;
clasohm@362	550	in (get (!rhss_ref)) union (get_starts refs) end;
clasohm@362	551
clasohm@372	552	val NTs = map (fn (_, _, _, Nonterm (a, prec) :: _, _, _) =>
clasohm@372	553	(a, prec))
clasohm@362	554	(filter (fn (_, _, _, Nonterm _ :: _, _, _) => true
clasohm@362	555	\| _ => false) (Array.sub (stateset, i-1)));
clasohm@367	556	val allowed = distinct (get_starts NTs @
clasohm@362	557	(map (fn (_, _, _, Term a :: _, _, _) => a)
clasohm@362	558	(filter (fn (_, _, _, Term _ :: _, _, _) => true
clasohm@367	559	\| _ => false) (Array.sub (stateset, i-1)))));
clasohm@362	560	in syntax_error (if prev_token = EndToken then indata
clasohm@362	561	else prev_token :: indata) allowed
clasohm@362	562	end
wenzelm@237	563	\| s =>
wenzelm@237	564	(case indata of
wenzelm@237	565	[] => Array.sub (stateset, i)
wenzelm@237	566	\| c :: cs =>
wenzelm@237	567	let
clasohm@330	568	val (si, sii) = PROCESSS stateset i c s;
wenzelm@237	569	in
wenzelm@237	570	Array.update (stateset, i, si);
wenzelm@237	571	Array.update (stateset, i + 1, sii);
clasohm@362	572	produce stateset (i + 1) cs c
wenzelm@237	573	end));
wenzelm@18	574
wenzelm@18	575
wenzelm@237	576	val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt \| _ => None);
wenzelm@237	577
wenzelm@18	578
wenzelm@18	579	fun earley grammar startsymbol indata =
wenzelm@237	580	let
clasohm@624	581	val startsymbol' = case startsymbol of
clasohm@624	582	"logic" => "@logic_H"
clasohm@624	583	\| "prop" => "@prop_H"
clasohm@624	584	\| other => other;
clasohm@330	585	val rhss_ref = case assoc (grammar, startsymbol) of
clasohm@330	586	Some r => r
clasohm@624	587	\| None => error ("parse: Unknown startsymbol " ^
clasohm@624	588	quote startsymbol);
clasohm@330	589	val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)];
clasohm@330	590	val s = length indata + 1;
wenzelm@237	591	val Estate = Array.array (s, []);
wenzelm@237	592	in
wenzelm@237	593	Array.update (Estate, 0, S0);
wenzelm@237	594	let
clasohm@362	595	val l = produce Estate 0 indata EndToken(dummy);
clasohm@624	596
wenzelm@237	597	val p_trees = get_trees l;
wenzelm@237	598	in p_trees end
wenzelm@237	599	end;
wenzelm@18	600
wenzelm@18	601
wenzelm@237	602	fun parse (Gram (_, prod_tab)) start toks =
clasohm@624	603	let val r =
wenzelm@237	604	(case earley prod_tab start toks of
wenzelm@237	605	[] => sys_error "parse: no parse trees"
clasohm@330	606	\| pts => pts);
clasohm@624	607	in r end
wenzelm@18	608
wenzelm@18	609	end;

author	clasohm
	Tue, 04 Oct 1994 13:02:16 +0100
changeset 624	33b9b5da3e6f
parent 552	fc92fac8b0de
child 682	c36f49c76d22
permissions	-rw-r--r--