berghofe@11523
|
1 |
(* Title: Pure/General/scan.ML
|
berghofe@11523
|
2 |
ID: $Id$
|
berghofe@11523
|
3 |
Author: Markus Wenzel and Tobias Nipkow, TU Muenchen
|
wenzelm@8806
|
4 |
License: GPL (GNU GENERAL PUBLIC LICENSE)
|
wenzelm@6116
|
5 |
|
wenzelm@6116
|
6 |
Generic scanners (for potentially infinite input).
|
wenzelm@6116
|
7 |
*)
|
wenzelm@6116
|
8 |
|
wenzelm@6116
|
9 |
infix 5 -- :-- |-- --| ^^;
|
wenzelm@6116
|
10 |
infix 3 >>;
|
wenzelm@6116
|
11 |
infix 0 ||;
|
wenzelm@6116
|
12 |
|
wenzelm@6116
|
13 |
signature BASIC_SCAN =
|
wenzelm@6116
|
14 |
sig
|
wenzelm@14677
|
15 |
(*error msg handler*)
|
wenzelm@6116
|
16 |
val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
|
wenzelm@14677
|
17 |
(*apply function*)
|
wenzelm@6116
|
18 |
val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
|
wenzelm@14677
|
19 |
(*alternative*)
|
wenzelm@6116
|
20 |
val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
|
wenzelm@14677
|
21 |
(*sequential pairing*)
|
wenzelm@6116
|
22 |
val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
|
wenzelm@14677
|
23 |
(*dependent pairing*)
|
wenzelm@6116
|
24 |
val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
|
wenzelm@14677
|
25 |
(*forget fst*)
|
wenzelm@6116
|
26 |
val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
|
wenzelm@14677
|
27 |
(*forget snd*)
|
wenzelm@6116
|
28 |
val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
|
wenzelm@14677
|
29 |
(*concatenation*)
|
wenzelm@6116
|
30 |
val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
|
wenzelm@14677
|
31 |
(*one element literal*)
|
wenzelm@6116
|
32 |
val $$ : ''a -> ''a list -> ''a * ''a list
|
wenzelm@14726
|
33 |
(*literal list*)
|
wenzelm@14726
|
34 |
val list: ''a list -> ''a list -> ''a list * ''a list
|
wenzelm@6116
|
35 |
end;
|
wenzelm@6116
|
36 |
|
wenzelm@6116
|
37 |
signature SCAN =
|
wenzelm@6116
|
38 |
sig
|
wenzelm@6116
|
39 |
include BASIC_SCAN
|
wenzelm@6116
|
40 |
val fail: 'a -> 'b
|
wenzelm@6116
|
41 |
val fail_with: ('a -> string) -> 'a -> 'b
|
wenzelm@6116
|
42 |
val succeed: 'a -> 'b -> 'a * 'b
|
wenzelm@6116
|
43 |
val one: ('a -> bool) -> 'a list -> 'a * 'a list
|
wenzelm@6116
|
44 |
val any: ('a -> bool) -> 'a list -> 'a list * 'a list
|
wenzelm@6116
|
45 |
val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
|
wenzelm@6116
|
46 |
val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
|
wenzelm@6116
|
47 |
val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
|
wenzelm@6116
|
48 |
val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
|
wenzelm@6116
|
49 |
val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
|
wenzelm@6116
|
50 |
val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
|
wenzelm@6116
|
51 |
val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
|
wenzelm@6116
|
52 |
val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
|
wenzelm@6116
|
53 |
val first: ('a -> 'b) list -> 'a -> 'b
|
wenzelm@14677
|
54 |
val state: 'a * 'b -> 'a * ('a * 'b)
|
wenzelm@6116
|
55 |
val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
|
wenzelm@6116
|
56 |
val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
|
wenzelm@6116
|
57 |
val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
|
wenzelm@6116
|
58 |
val try: ('a -> 'b) -> 'a -> 'b
|
wenzelm@6116
|
59 |
val force: ('a -> 'b) -> 'a -> 'b
|
wenzelm@6116
|
60 |
val prompt: string -> ('a -> 'b) -> 'a -> 'b
|
wenzelm@6116
|
61 |
val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
|
wenzelm@6116
|
62 |
-> 'b * 'a list -> 'c * ('d * 'a list)
|
wenzelm@6116
|
63 |
val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
|
wenzelm@6116
|
64 |
val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
|
wenzelm@6116
|
65 |
val catch: ('a -> 'b) -> 'a -> 'b
|
wenzelm@6116
|
66 |
val error: ('a -> 'b) -> 'a -> 'b
|
wenzelm@6116
|
67 |
val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
|
wenzelm@6116
|
68 |
'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
|
wenzelm@10746
|
69 |
('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
|
wenzelm@6116
|
70 |
val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
|
wenzelm@6116
|
71 |
'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
|
wenzelm@10746
|
72 |
('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
|
wenzelm@6116
|
73 |
val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
|
wenzelm@6116
|
74 |
val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
|
wenzelm@6116
|
75 |
type lexicon
|
wenzelm@7025
|
76 |
val dest_lexicon: lexicon -> string list
|
wenzelm@6116
|
77 |
val make_lexicon: string list list -> lexicon
|
wenzelm@6116
|
78 |
val empty_lexicon: lexicon
|
wenzelm@6116
|
79 |
val extend_lexicon: lexicon -> string list list -> lexicon
|
wenzelm@6116
|
80 |
val merge_lexicons: lexicon -> lexicon -> lexicon
|
wenzelm@14686
|
81 |
val is_literal: lexicon -> string list -> bool
|
wenzelm@6116
|
82 |
val literal: lexicon -> string list -> string list * string list
|
wenzelm@6116
|
83 |
end;
|
wenzelm@6116
|
84 |
|
wenzelm@6116
|
85 |
structure Scan: SCAN =
|
wenzelm@6116
|
86 |
struct
|
wenzelm@6116
|
87 |
|
wenzelm@6116
|
88 |
|
wenzelm@6116
|
89 |
(** scanners **)
|
wenzelm@6116
|
90 |
|
berghofe@11523
|
91 |
exception MORE of string option; (*need more input (prompt)*)
|
berghofe@11523
|
92 |
exception FAIL of string option; (*try alternatives (reason of failure)*)
|
berghofe@11523
|
93 |
exception ABORT of string; (*dead end*)
|
wenzelm@6116
|
94 |
|
wenzelm@6116
|
95 |
|
wenzelm@6116
|
96 |
(* scanner combinators *)
|
wenzelm@6116
|
97 |
|
wenzelm@14677
|
98 |
(*dependent pairing*)
|
kleing@14078
|
99 |
fun (sc1 :-- sc2) toks =
|
berghofe@14108
|
100 |
let
|
berghofe@14108
|
101 |
val (x, toks2) = sc1 toks
|
berghofe@14108
|
102 |
val (y, toks3) = sc2 x toks2
|
berghofe@14108
|
103 |
in ((x, y), toks3) end;
|
kleing@14078
|
104 |
|
wenzelm@14677
|
105 |
(*sequential pairing*)
|
kleing@14078
|
106 |
fun (sc1 -- sc2) toks =
|
berghofe@14108
|
107 |
let
|
berghofe@14108
|
108 |
val (x, toks2) = sc1 toks
|
berghofe@14108
|
109 |
val (y, toks3) = sc2 toks2
|
berghofe@14108
|
110 |
in ((x, y), toks3) end;
|
kleing@14078
|
111 |
|
wenzelm@14677
|
112 |
(*application*)
|
kleing@14078
|
113 |
fun (sc >> f) toks =
|
berghofe@14108
|
114 |
let val (x, toks2) = sc toks
|
berghofe@14108
|
115 |
in (f x, toks2) end;
|
kleing@14078
|
116 |
|
wenzelm@14677
|
117 |
(*forget snd*)
|
kleing@14078
|
118 |
fun (sc1 --| sc2) toks =
|
berghofe@14108
|
119 |
let
|
berghofe@14108
|
120 |
val (x, toks2) = sc1 toks
|
berghofe@14108
|
121 |
val (_, toks3) = sc2 toks2
|
berghofe@14108
|
122 |
in (x, toks3) end;
|
kleing@14078
|
123 |
|
wenzelm@14677
|
124 |
(*forget fst*)
|
kleing@14078
|
125 |
fun (sc1 |-- sc2) toks =
|
berghofe@14108
|
126 |
let val (_, toks2) = sc1 toks
|
berghofe@14108
|
127 |
in sc2 toks2 end;
|
kleing@14078
|
128 |
|
wenzelm@14677
|
129 |
(*concatenation*)
|
kleing@14078
|
130 |
fun (sc1 ^^ sc2) toks =
|
berghofe@14108
|
131 |
let
|
berghofe@14108
|
132 |
val (x, toks2) = sc1 toks
|
berghofe@14108
|
133 |
val (y, toks3) = sc2 toks2
|
berghofe@14108
|
134 |
in (x ^ y, toks3) end;
|
berghofe@14108
|
135 |
|
wenzelm@14677
|
136 |
(*alternative*)
|
wenzelm@6116
|
137 |
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
|
wenzelm@6116
|
138 |
|
wenzelm@6116
|
139 |
|
wenzelm@6116
|
140 |
(* generic scanners *)
|
wenzelm@6116
|
141 |
|
wenzelm@6116
|
142 |
fun fail _ = raise FAIL None;
|
wenzelm@6116
|
143 |
fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
|
wenzelm@6116
|
144 |
fun succeed y xs = (y, xs);
|
wenzelm@6116
|
145 |
|
wenzelm@6116
|
146 |
fun one _ [] = raise MORE None
|
wenzelm@6116
|
147 |
| one pred (x :: xs) =
|
wenzelm@6116
|
148 |
if pred x then (x, xs) else raise FAIL None;
|
wenzelm@6116
|
149 |
|
wenzelm@6116
|
150 |
fun $$ _ [] = raise MORE None
|
wenzelm@6116
|
151 |
| $$ a (x :: xs) =
|
wenzelm@6116
|
152 |
if a = x then (x, xs) else raise FAIL None;
|
wenzelm@6116
|
153 |
|
wenzelm@14726
|
154 |
fun list ys xs =
|
wenzelm@14726
|
155 |
let
|
wenzelm@14726
|
156 |
fun drop_prefix [] xs = xs
|
wenzelm@14726
|
157 |
| drop_prefix (_ :: _) [] = raise MORE None
|
wenzelm@14726
|
158 |
| drop_prefix (y :: ys) (x :: xs) =
|
wenzelm@14726
|
159 |
if y = x then drop_prefix ys xs else raise FAIL None;
|
wenzelm@14726
|
160 |
in (ys, drop_prefix ys xs) end;
|
wenzelm@14726
|
161 |
|
wenzelm@6116
|
162 |
fun any _ [] = raise MORE None
|
wenzelm@6116
|
163 |
| any pred (lst as x :: xs) =
|
wenzelm@6116
|
164 |
if pred x then apfst (cons x) (any pred xs)
|
wenzelm@6116
|
165 |
else ([], lst);
|
wenzelm@6116
|
166 |
|
kleing@14078
|
167 |
fun any1 p toks =
|
berghofe@14108
|
168 |
let
|
berghofe@14108
|
169 |
val (x, toks2) = one p toks
|
berghofe@14108
|
170 |
val (xs,toks3) = any p toks2
|
berghofe@14108
|
171 |
in (x :: xs, toks3) end;
|
wenzelm@6116
|
172 |
|
kleing@14078
|
173 |
fun optional scan def = scan || succeed def
|
kleing@14078
|
174 |
fun option scan = (scan >> Some) || succeed None
|
wenzelm@6116
|
175 |
|
berghofe@13795
|
176 |
fun repeat scan =
|
berghofe@14108
|
177 |
let fun rep ys xs = (case (Some (scan xs) handle FAIL _ => None) of
|
berghofe@14108
|
178 |
None => (rev ys, xs) | Some (y, xs') => rep (y :: ys) xs')
|
berghofe@14108
|
179 |
in rep [] end;
|
berghofe@13795
|
180 |
|
kleing@14078
|
181 |
fun repeat1 scan toks =
|
berghofe@14108
|
182 |
let
|
berghofe@14108
|
183 |
val (x, toks2) = scan toks
|
berghofe@14108
|
184 |
val (xs, toks3) = repeat scan toks2
|
berghofe@14108
|
185 |
in (x :: xs, toks3) end;
|
wenzelm@6116
|
186 |
|
wenzelm@6116
|
187 |
fun max leq scan1 scan2 xs =
|
wenzelm@6116
|
188 |
(case (option scan1 xs, option scan2 xs) of
|
berghofe@11523
|
189 |
((None, _), (None, _)) => raise FAIL None (*looses FAIL msg!*)
|
wenzelm@6116
|
190 |
| ((Some tok1, xs'), (None, _)) => (tok1, xs')
|
wenzelm@6116
|
191 |
| ((None, _), (Some tok2, xs')) => (tok2, xs')
|
wenzelm@6116
|
192 |
| ((Some tok1, xs1'), (Some tok2, xs2')) =>
|
wenzelm@6116
|
193 |
if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
|
wenzelm@6116
|
194 |
|
wenzelm@6116
|
195 |
fun ahead scan xs = (fst (scan xs), xs);
|
wenzelm@6116
|
196 |
|
wenzelm@6116
|
197 |
fun unless test scan =
|
wenzelm@6116
|
198 |
ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
|
wenzelm@6116
|
199 |
|
wenzelm@6116
|
200 |
fun first [] = fail
|
wenzelm@6116
|
201 |
| first (scan :: scans) = scan || first scans;
|
wenzelm@6116
|
202 |
|
wenzelm@6116
|
203 |
|
wenzelm@6116
|
204 |
(* state based scanners *)
|
wenzelm@6116
|
205 |
|
wenzelm@9122
|
206 |
fun state (st, xs) = (st, (st, xs));
|
wenzelm@9122
|
207 |
|
wenzelm@6116
|
208 |
fun depend scan (st, xs) =
|
wenzelm@6116
|
209 |
let val ((st', y), xs') = scan st xs
|
wenzelm@6116
|
210 |
in (y, (st', xs')) end;
|
wenzelm@6116
|
211 |
|
wenzelm@6116
|
212 |
fun lift scan (st, xs) =
|
wenzelm@6116
|
213 |
let val (y, xs') = scan xs
|
wenzelm@6116
|
214 |
in (y, (st, xs')) end;
|
wenzelm@6116
|
215 |
|
wenzelm@6116
|
216 |
fun pass st scan xs =
|
wenzelm@6116
|
217 |
let val (y, (_, xs')) = scan (st, xs)
|
wenzelm@6116
|
218 |
in (y, xs') end;
|
wenzelm@6116
|
219 |
|
wenzelm@6116
|
220 |
|
wenzelm@6116
|
221 |
(* exception handling *)
|
wenzelm@6116
|
222 |
|
wenzelm@6116
|
223 |
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
|
wenzelm@6116
|
224 |
fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
|
wenzelm@6116
|
225 |
fun force scan xs = scan xs handle MORE _ => raise FAIL None;
|
wenzelm@6116
|
226 |
fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
|
wenzelm@6116
|
227 |
fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
|
wenzelm@6116
|
228 |
fun error scan xs = scan xs handle ABORT msg => Library.error msg;
|
wenzelm@6116
|
229 |
|
wenzelm@6116
|
230 |
|
wenzelm@6116
|
231 |
(* finite scans *)
|
wenzelm@6116
|
232 |
|
wenzelm@6116
|
233 |
fun finite' (stopper, is_stopper) scan (state, input) =
|
wenzelm@6116
|
234 |
let
|
wenzelm@6116
|
235 |
fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
|
wenzelm@6116
|
236 |
|
wenzelm@6116
|
237 |
fun stop [] = lost ()
|
wenzelm@6116
|
238 |
| stop lst =
|
wenzelm@6116
|
239 |
let val (xs, x) = split_last lst
|
wenzelm@6116
|
240 |
in if is_stopper x then ((), xs) else lost () end;
|
wenzelm@6116
|
241 |
in
|
wenzelm@6116
|
242 |
if exists is_stopper input then
|
wenzelm@6116
|
243 |
raise ABORT "Stopper may not occur in input of finite scan!"
|
berghofe@13795
|
244 |
else (force scan --| lift stop) (state, rev_append (rev input) [stopper])
|
wenzelm@6116
|
245 |
end;
|
wenzelm@6116
|
246 |
|
wenzelm@6116
|
247 |
fun finite stopper scan xs =
|
wenzelm@6116
|
248 |
let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
|
wenzelm@6116
|
249 |
in (y, xs') end;
|
wenzelm@6116
|
250 |
|
wenzelm@6116
|
251 |
fun read stopper scan xs =
|
wenzelm@6116
|
252 |
(case error (finite stopper (option scan)) xs of
|
wenzelm@6116
|
253 |
(y as Some _, []) => y
|
wenzelm@6116
|
254 |
| _ => None);
|
wenzelm@6116
|
255 |
|
wenzelm@6116
|
256 |
|
wenzelm@6116
|
257 |
(* infinite scans -- draining state-based source *)
|
wenzelm@6116
|
258 |
|
wenzelm@6116
|
259 |
fun drain def_prmpt get stopper scan ((state, xs), src) =
|
wenzelm@6116
|
260 |
(scan (state, xs), src) handle MORE prmpt =>
|
wenzelm@6116
|
261 |
(case get (if_none prmpt def_prmpt) src of
|
wenzelm@6116
|
262 |
([], _) => (finite' stopper scan (state, xs), src)
|
wenzelm@6116
|
263 |
| (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
|
wenzelm@6116
|
264 |
|
wenzelm@6116
|
265 |
fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
|
wenzelm@6116
|
266 |
let
|
wenzelm@10746
|
267 |
val drain_with = drain def_prmpt get stopper;
|
wenzelm@6116
|
268 |
|
wenzelm@6116
|
269 |
fun drain_loop recover inp =
|
wenzelm@6116
|
270 |
drain_with (catch scanner) inp handle FAIL msg =>
|
wenzelm@10746
|
271 |
(error_msg (if_none msg "Syntax error."); drain_with recover inp);
|
wenzelm@6116
|
272 |
|
wenzelm@6116
|
273 |
val ((ys, (state', xs')), src') =
|
wenzelm@6116
|
274 |
(case (get def_prmpt src, opt_recover) of
|
wenzelm@6116
|
275 |
(([], s), _) => (([], (state, [])), s)
|
wenzelm@6116
|
276 |
| ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
|
wenzelm@8653
|
277 |
| ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
|
wenzelm@8653
|
278 |
in (ys, (state', unget (xs', src'))) end;
|
wenzelm@6116
|
279 |
|
wenzelm@6116
|
280 |
fun source def_prmpt get unget stopper scan opt_recover src =
|
wenzelm@6116
|
281 |
let val (ys, ((), src')) =
|
wenzelm@6116
|
282 |
source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
|
wenzelm@6116
|
283 |
in (ys, src') end;
|
wenzelm@6116
|
284 |
|
wenzelm@6116
|
285 |
fun single scan = scan >> (fn x => [x]);
|
wenzelm@6116
|
286 |
fun bulk scan = scan -- repeat (try scan) >> (op ::);
|
wenzelm@6116
|
287 |
|
wenzelm@6116
|
288 |
|
wenzelm@6116
|
289 |
|
wenzelm@6116
|
290 |
(** datatype lexicon **)
|
wenzelm@6116
|
291 |
|
wenzelm@6116
|
292 |
datatype lexicon =
|
wenzelm@6116
|
293 |
Empty |
|
wenzelm@6116
|
294 |
Branch of string * string list * lexicon * lexicon * lexicon;
|
wenzelm@6116
|
295 |
|
wenzelm@6116
|
296 |
val no_literal = [];
|
wenzelm@6116
|
297 |
|
wenzelm@6116
|
298 |
|
wenzelm@6116
|
299 |
(* dest_lexicon *)
|
wenzelm@6116
|
300 |
|
wenzelm@7025
|
301 |
fun dest_lex Empty = []
|
wenzelm@7025
|
302 |
| dest_lex (Branch (_, [], lt, eq, gt)) =
|
wenzelm@7025
|
303 |
dest_lex lt @ dest_lex eq @ dest_lex gt
|
wenzelm@7025
|
304 |
| dest_lex (Branch (_, cs, lt, eq, gt)) =
|
wenzelm@7025
|
305 |
dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
|
wenzelm@7025
|
306 |
|
wenzelm@7025
|
307 |
val dest_lexicon = map implode o dest_lex;
|
wenzelm@6116
|
308 |
|
wenzelm@6116
|
309 |
|
wenzelm@6116
|
310 |
(* empty, extend, make, merge lexicons *)
|
wenzelm@6116
|
311 |
|
wenzelm@6116
|
312 |
val empty_lexicon = Empty;
|
wenzelm@6116
|
313 |
|
berghofe@11523
|
314 |
fun extend_lexicon lexicon [] = lexicon
|
berghofe@11523
|
315 |
| extend_lexicon lexicon chrss =
|
wenzelm@6116
|
316 |
let
|
berghofe@11523
|
317 |
fun ext (lex, chrs) =
|
berghofe@11523
|
318 |
let
|
berghofe@11523
|
319 |
fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
|
wenzelm@14686
|
320 |
(case String.compare (c, d) of
|
wenzelm@14686
|
321 |
LESS => Branch (d, a, add lt chs, eq, gt)
|
wenzelm@14686
|
322 |
| EQUAL => Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
|
wenzelm@14686
|
323 |
| GREATER => Branch (d, a, lt, eq, add gt chs))
|
berghofe@11523
|
324 |
| add Empty [c] =
|
berghofe@11523
|
325 |
Branch (c, chrs, Empty, Empty, Empty)
|
berghofe@11523
|
326 |
| add Empty (c :: cs) =
|
berghofe@11523
|
327 |
Branch (c, no_literal, Empty, add Empty cs, Empty)
|
berghofe@11523
|
328 |
| add lex [] = lex;
|
berghofe@11523
|
329 |
in add lex chrs end;
|
berghofe@11523
|
330 |
in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
|
wenzelm@6116
|
331 |
|
wenzelm@6116
|
332 |
val make_lexicon = extend_lexicon empty_lexicon;
|
wenzelm@6116
|
333 |
|
wenzelm@6116
|
334 |
fun merge_lexicons lex1 lex2 =
|
wenzelm@6116
|
335 |
let
|
wenzelm@7025
|
336 |
val chss1 = dest_lex lex1;
|
wenzelm@7025
|
337 |
val chss2 = dest_lex lex2;
|
wenzelm@6116
|
338 |
in
|
wenzelm@6116
|
339 |
if chss2 subset chss1 then lex1
|
wenzelm@6116
|
340 |
else if chss1 subset chss2 then lex2
|
wenzelm@6116
|
341 |
else extend_lexicon lex1 chss2
|
wenzelm@6116
|
342 |
end;
|
wenzelm@6116
|
343 |
|
wenzelm@6116
|
344 |
|
wenzelm@14686
|
345 |
(* is_literal *)
|
wenzelm@14686
|
346 |
|
wenzelm@14686
|
347 |
fun is_literal Empty _ = false
|
wenzelm@14686
|
348 |
| is_literal _ [] = false
|
wenzelm@14686
|
349 |
| is_literal (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
|
wenzelm@14686
|
350 |
(case String.compare (c, d) of
|
wenzelm@14686
|
351 |
LESS => is_literal lt chs
|
wenzelm@14686
|
352 |
| EQUAL => a <> no_literal andalso null cs orelse is_literal eq cs
|
wenzelm@14686
|
353 |
| GREATER => is_literal gt chs);
|
wenzelm@14686
|
354 |
|
wenzelm@14686
|
355 |
|
wenzelm@6116
|
356 |
(* scan literal *)
|
wenzelm@6116
|
357 |
|
wenzelm@6116
|
358 |
fun literal lex chrs =
|
wenzelm@6116
|
359 |
let
|
wenzelm@6116
|
360 |
fun lit Empty res _ = res
|
wenzelm@6116
|
361 |
| lit (Branch _) _ [] = raise MORE None
|
wenzelm@6116
|
362 |
| lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
|
wenzelm@14686
|
363 |
(case String.compare (c, d) of
|
wenzelm@14686
|
364 |
LESS => lit lt res chs
|
wenzelm@14686
|
365 |
| EQUAL => lit eq (if a = no_literal then res else Some (a, cs)) cs
|
wenzelm@14686
|
366 |
| GREATER => lit gt res chs);
|
wenzelm@6116
|
367 |
in
|
wenzelm@6116
|
368 |
(case lit lex None chrs of
|
wenzelm@6116
|
369 |
None => raise FAIL None
|
wenzelm@6116
|
370 |
| Some res => res)
|
wenzelm@6116
|
371 |
end;
|
wenzelm@6116
|
372 |
|
wenzelm@6116
|
373 |
|
wenzelm@6116
|
374 |
end;
|
wenzelm@6116
|
375 |
|
wenzelm@6116
|
376 |
|
wenzelm@6116
|
377 |
structure BasicScan: BASIC_SCAN = Scan;
|
wenzelm@6116
|
378 |
open BasicScan;
|