refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
1 (* Title: Pure/General/scan.ML
2 Author: Markus Wenzel and Tobias Nipkow, TU Muenchen
4 Generic scanners (for potentially infinite input).
7 infix 5 -- :-- :|-- |-- --| ^^;
12 signature BASIC_SCAN =
14 type message = unit -> string
16 val !! : ('a * message option -> message) -> ('a -> 'b) -> 'a -> 'b
18 val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
20 val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
21 (*sequential pairing*)
22 val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
24 val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
26 val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
27 val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
28 val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
30 val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
31 val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
32 val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
33 (*one element literal*)
34 val $$ : string -> string list -> string * string list
35 val ~$$ : string -> string list -> string * string list
41 val prompt: string -> ('a -> 'b) -> 'a -> 'b
42 val permissive: ('a -> 'b) -> 'a -> 'b
43 val error: ('a -> 'b) -> 'a -> 'b
44 val catch: ('a -> 'b) -> 'a -> 'b (*exception Fail*)
46 val fail_with: ('a -> message) -> 'a -> 'b
47 val succeed: 'a -> 'b -> 'a * 'b
48 val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
49 val one: ('a -> bool) -> 'a list -> 'a * 'a list
50 val this: string list -> string list -> string list * string list
51 val this_string: string -> string list -> string * string list
52 val many: ('a -> bool) -> 'a list -> 'a list * 'a list
53 val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
54 val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
55 val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
56 val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
57 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
58 val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
59 val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
60 val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
61 val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
62 val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
63 val first: ('a -> 'b) list -> 'a -> 'b
64 val state: 'a * 'b -> 'a * ('a * 'b)
65 val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
66 val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
67 val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
68 val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
69 val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
70 val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
72 val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
73 val is_stopper: 'a stopper -> 'a -> bool
74 val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
75 -> 'b * 'a list -> 'c * ('d * 'a list)
76 val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
77 val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
78 val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b stopper ->
79 ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
81 val is_literal: lexicon -> string list -> bool
82 val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
83 val empty_lexicon: lexicon
84 val extend_lexicon: string list -> lexicon -> lexicon
85 val make_lexicon: string list list -> lexicon
86 val dest_lexicon: lexicon -> string list
87 val merge_lexicons: lexicon * lexicon -> lexicon
90 structure Scan: SCAN =
98 type message = unit -> string;
100 exception MORE of string option; (*need more input (prompt)*)
101 exception FAIL of message option; (*try alternatives (reason of failure)*)
102 exception ABORT of message; (*dead end*)
104 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
105 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
106 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
107 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
108 fun error scan xs = scan xs handle ABORT msg => Library.error (msg ());
110 fun catch scan xs = scan xs
111 handle ABORT msg => raise Fail (msg ())
112 | FAIL msg => raise Fail (case msg of NONE => "Syntax error" | SOME m => m ());
115 (* scanner combinators *)
117 fun (scan >> f) xs = scan xs |>> f;
119 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
121 fun (scan1 :-- scan2) xs =
123 val (x, ys) = scan1 xs;
124 val (y, zs) = scan2 x ys;
127 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
128 fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
129 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
130 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
131 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
132 fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
133 fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
136 (* generic scanners *)
138 fun fail _ = raise FAIL NONE;
139 fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
140 fun succeed y xs = (y, xs);
142 fun some _ [] = raise MORE NONE
144 (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
146 fun one _ [] = raise MORE NONE
147 | one pred (x :: xs) =
148 if pred x then (x, xs) else raise FAIL NONE;
150 fun $$ a = one (fn s: string => s = a);
151 fun ~$$ a = one (fn s: string => s <> a);
155 fun drop_prefix [] xs = xs
156 | drop_prefix (_ :: _) [] = raise MORE NONE
157 | drop_prefix (y :: ys) (x :: xs) =
158 if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
159 in (ys, drop_prefix ys xs) end;
161 fun this_string s = this (raw_explode s) >> K s; (*primitive string -- no symbols here!*)
163 fun many _ [] = raise MORE NONE
164 | many pred (lst as x :: xs) =
165 if pred x then apfst (cons x) (many pred xs)
168 fun many1 pred = one pred ::: many pred;
170 fun optional scan def = scan || succeed def;
171 fun option scan = (scan >> SOME) || succeed NONE;
176 (case (SOME (scan xs) handle FAIL _ => NONE) of
178 | SOME (y, xs') => rep (y :: ys) xs');
181 fun repeat1 scan = scan ::: repeat scan;
183 fun single scan = scan >> (fn x => [x]);
184 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
186 fun max leq scan1 scan2 xs =
187 (case (option scan1 xs, option scan2 xs) of
188 ((NONE, _), (NONE, _)) => raise FAIL NONE (*looses FAIL msg!*)
189 | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
190 | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
191 | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
192 if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
194 fun ahead scan xs = (fst (scan xs), xs);
196 fun unless test scan =
197 ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
200 | first (scan :: scans) = scan || first scans;
203 (* state based scanners *)
205 fun state (st, xs) = (st, (st, xs));
207 fun depend scan (st, xs) =
208 let val ((st', y), xs') = scan st xs
209 in (y, (st', xs')) end;
211 fun peek scan = depend (fn st => scan st >> pair st);
213 fun pass st scan xs =
214 let val (y, (_, xs')) = scan (st, xs)
217 fun lift scan (st, xs) =
218 let val (y, xs') = scan xs
219 in (y, (st, xs')) end;
221 fun unlift scan = pass () scan;
227 let val (y, xs') = scan xs
228 in ((y, take (length xs - length xs') xs), xs') end;
233 datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
235 fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
236 fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
241 fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
243 fun lost () = raise ABORT (fn () => "Bad scanner: lost stopper of finite scan!");
245 fun stop [] = lost ()
247 let val (xs, x) = split_last lst
248 in if is_stopper x then ((), xs) else lost () end;
250 if exists is_stopper input then
251 raise ABORT (fn () => "Stopper may not occur in input of finite scan!")
252 else (strict scan --| lift stop) (state, input @ [mk_stopper input])
255 fun finite stopper scan = unlift (finite' stopper (lift scan));
257 fun read stopper scan xs =
258 (case error (finite stopper (option scan)) xs of
259 (y as SOME _, []) => y
263 (* infinite scans -- draining state-based source *)
265 fun drain def_prompt get stopper scan ((state, xs), src) =
266 (scan (state, xs), src) handle MORE prompt =>
267 (case get (the_default def_prompt prompt) src of
268 ([], _) => (finite' stopper scan (state, xs), src)
269 | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
273 (** datatype lexicon -- position tree **)
275 datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
277 val empty_lexicon = Lexicon Symtab.empty;
279 fun is_literal _ [] = false
280 | is_literal (Lexicon tab) (c :: cs) =
281 (case Symtab.lookup tab c of
282 SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
286 (* scan longest match *)
288 fun literal lexicon =
290 fun finish (SOME (res, rest)) = (rev res, rest)
291 | finish NONE = raise FAIL NONE;
292 fun scan _ res (Lexicon tab) [] = if Symtab.is_empty tab then finish res else raise MORE NONE
293 | scan path res (Lexicon tab) (c :: cs) =
294 (case Symtab.lookup tab (fst c) of
296 let val path' = c :: path
297 in scan path' (if tip then SOME (path', cs) else res) lex cs end
298 | NONE => finish res);
299 in scan [] NONE lexicon end;
304 fun extend_lexicon chrs lexicon =
307 | ext (c :: cs) (Lexicon tab) =
308 (case Symtab.lookup tab c of
309 SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
310 | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
311 in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
313 fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
318 fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
320 val path' = d :: path;
321 val content = dest path' lex;
322 in append (if tip then rev path' :: content else content) end) tab [];
324 val dest_lexicon = map implode o dest [];
325 fun merge_lexicons (lex1, lex2) = fold extend_lexicon (dest [] lex2) lex1;
329 structure Basic_Scan: BASIC_SCAN = Scan;