1 (* Title: Pure/General/scan.ML
2 Author: Markus Wenzel and Tobias Nipkow, TU Muenchen
4 Generic scanners (for potentially infinite input).
7 infix 5 -- :-- :|-- |-- --| ^^;
12 signature BASIC_SCAN =
15 val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
17 val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
19 val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
20 (*sequential pairing*)
21 val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
23 val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
25 val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
26 val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
27 val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
29 val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
30 val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
31 val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
32 (*one element literal*)
33 val $$ : string -> string list -> string * string list
34 val ~$$ : string -> string list -> string * string list
40 val prompt: string -> ('a -> 'b) -> 'a -> 'b
41 val error: ('a -> 'b) -> 'a -> 'b
42 val catch: ('a -> 'b) -> 'a -> 'b (*exception Fail*)
44 val fail_with: ('a -> string) -> 'a -> 'b
45 val succeed: 'a -> 'b -> 'a * 'b
46 val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
47 val one: ('a -> bool) -> 'a list -> 'a * 'a list
48 val this: string list -> string list -> string list * string list
49 val this_string: string -> string list -> string * string list
50 val many: ('a -> bool) -> 'a list -> 'a list * 'a list
51 val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
52 val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
53 val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
54 val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
55 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
56 val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
57 val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
58 val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
59 val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
60 val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
61 val first: ('a -> 'b) list -> 'a -> 'b
62 val state: 'a * 'b -> 'a * ('a * 'b)
63 val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
64 val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
65 val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
66 val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
67 val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
68 val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
70 val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
71 val is_stopper: 'a stopper -> 'a -> bool
72 val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
73 -> 'b * 'a list -> 'c * ('d * 'a list)
74 val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
75 val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
76 val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b stopper ->
77 ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
79 val is_literal: lexicon -> string list -> bool
80 val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
81 val empty_lexicon: lexicon
82 val extend_lexicon: string list -> lexicon -> lexicon
83 val make_lexicon: string list list -> lexicon
84 val dest_lexicon: lexicon -> string list
85 val merge_lexicons: lexicon * lexicon -> lexicon
88 structure Scan: SCAN =
96 exception MORE of string option; (*need more input (prompt)*)
97 exception FAIL of string option; (*try alternatives (reason of failure)*)
98 exception ABORT of string; (*dead end*)
100 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
101 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
102 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
103 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
104 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
106 fun catch scan xs = scan xs
107 handle ABORT msg => raise Fail msg
108 | FAIL msg => raise Fail (the_default "Syntax error." msg);
111 (* scanner combinators *)
113 fun (scan >> f) xs = scan xs |>> f;
115 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
117 fun (scan1 :-- scan2) xs =
119 val (x, ys) = scan1 xs;
120 val (y, zs) = scan2 x ys;
123 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
124 fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
125 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
126 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
127 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
128 fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
129 fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
132 (* generic scanners *)
134 fun fail _ = raise FAIL NONE;
135 fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
136 fun succeed y xs = (y, xs);
138 fun some _ [] = raise MORE NONE
140 (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
142 fun one _ [] = raise MORE NONE
143 | one pred (x :: xs) =
144 if pred x then (x, xs) else raise FAIL NONE;
146 fun $$ a = one (fn s: string => s = a);
147 fun ~$$ a = one (fn s: string => s <> a);
151 fun drop_prefix [] xs = xs
152 | drop_prefix (_ :: _) [] = raise MORE NONE
153 | drop_prefix (y :: ys) (x :: xs) =
154 if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
155 in (ys, drop_prefix ys xs) end;
157 fun this_string s = this (explode s) >> K s; (*primitive string -- no symbols here!*)
159 fun many _ [] = raise MORE NONE
160 | many pred (lst as x :: xs) =
161 if pred x then apfst (cons x) (many pred xs)
164 fun many1 pred = one pred ::: many pred;
166 fun optional scan def = scan || succeed def;
167 fun option scan = (scan >> SOME) || succeed NONE;
172 (case (SOME (scan xs) handle FAIL _ => NONE) of
174 | SOME (y, xs') => rep (y :: ys) xs');
177 fun repeat1 scan = scan ::: repeat scan;
179 fun single scan = scan >> (fn x => [x]);
180 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
182 fun max leq scan1 scan2 xs =
183 (case (option scan1 xs, option scan2 xs) of
184 ((NONE, _), (NONE, _)) => raise FAIL NONE (*looses FAIL msg!*)
185 | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
186 | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
187 | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
188 if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
190 fun ahead scan xs = (fst (scan xs), xs);
192 fun unless test scan =
193 ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
196 | first (scan :: scans) = scan || first scans;
199 (* state based scanners *)
201 fun state (st, xs) = (st, (st, xs));
203 fun depend scan (st, xs) =
204 let val ((st', y), xs') = scan st xs
205 in (y, (st', xs')) end;
207 fun peek scan = depend (fn st => scan st >> pair st);
209 fun pass st scan xs =
210 let val (y, (_, xs')) = scan (st, xs)
213 fun lift scan (st, xs) =
214 let val (y, xs') = scan xs
215 in (y, (st, xs')) end;
217 fun unlift scan = pass () scan;
223 let val (y, xs') = scan xs
224 in ((y, take (length xs - length xs') xs), xs') end;
229 datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
231 fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
232 fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
237 fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
239 fun lost () = raise ABORT "Bad scanner: lost stopper of finite scan!";
241 fun stop [] = lost ()
243 let val (xs, x) = split_last lst
244 in if is_stopper x then ((), xs) else lost () end;
246 if exists is_stopper input then
247 raise ABORT "Stopper may not occur in input of finite scan!"
248 else (strict scan --| lift stop) (state, input @ [mk_stopper input])
251 fun finite stopper scan = unlift (finite' stopper (lift scan));
253 fun read stopper scan xs =
254 (case error (finite stopper (option scan)) xs of
255 (y as SOME _, []) => y
259 (* infinite scans -- draining state-based source *)
261 fun drain def_prompt get stopper scan ((state, xs), src) =
262 (scan (state, xs), src) handle MORE prompt =>
263 (case get (the_default def_prompt prompt) src of
264 ([], _) => (finite' stopper scan (state, xs), src)
265 | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
269 (** datatype lexicon -- position tree **)
271 datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
273 val empty_lexicon = Lexicon Symtab.empty;
275 fun is_literal _ [] = false
276 | is_literal (Lexicon tab) (c :: cs) =
277 (case Symtab.lookup tab c of
278 SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
282 (* scan longest match *)
284 fun literal lexicon =
286 fun finish (SOME (res, rest)) = (rev res, rest)
287 | finish NONE = raise FAIL NONE;
288 fun scan _ res (Lexicon tab) [] = if Symtab.is_empty tab then finish res else raise MORE NONE
289 | scan path res (Lexicon tab) (c :: cs) =
290 (case Symtab.lookup tab (fst c) of
292 let val path' = c :: path
293 in scan path' (if tip then SOME (path', cs) else res) lex cs end
294 | NONE => finish res);
295 in scan [] NONE lexicon end;
300 fun extend_lexicon chrs lexicon =
303 | ext (c :: cs) (Lexicon tab) =
304 (case Symtab.lookup tab c of
305 SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
306 | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
307 in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
309 fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
314 fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
316 val path' = d :: path;
317 val content = dest path' lex;
318 in append (if tip then rev path' :: content else content) end) tab [];
320 val dest_lexicon = map implode o dest [];
321 fun merge_lexicons (lex1, lex2) = fold extend_lexicon (dest [] lex2) lex1;
325 structure Basic_Scan: BASIC_SCAN = Scan;