src/Pure/General/scan.ML
author wenzelm
Mon, 31 May 2010 21:06:57 +0200
changeset 37216 3165bc303f66
parent 33956 e9afca2118d4
child 39201 c7a66b584147
permissions -rw-r--r--
modernized some structure names, keeping a few legacy aliases;
     1 (*  Title:      Pure/General/scan.ML
     2     Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
     3 
     4 Generic scanners (for potentially infinite input).
     5 *)
     6 
     7 infix 5 -- :-- :|-- |-- --| ^^;
     8 infixr 5 ::: @@@;
     9 infix 3 >>;
    10 infixr 0 ||;
    11 
    12 signature BASIC_SCAN =
    13 sig
    14   (*error msg handler*)
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    16   (*apply function*)
    17   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    18   (*alternative*)
    19   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    20   (*sequential pairing*)
    21   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    22   (*dependent pairing*)
    23   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    24   (*projections*)
    25   val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
    26   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    27   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    28   (*concatenation*)
    29   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    30   val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
    31   val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
    32   (*one element literal*)
    33   val $$ : string -> string list -> string * string list
    34   val ~$$ : string -> string list -> string * string list
    35 end;
    36 
    37 signature SCAN =
    38 sig
    39   include BASIC_SCAN
    40   val prompt: string -> ('a -> 'b) -> 'a -> 'b
    41   val error: ('a -> 'b) -> 'a -> 'b
    42   val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
    43   val fail: 'a -> 'b
    44   val fail_with: ('a -> string) -> 'a -> 'b
    45   val succeed: 'a -> 'b -> 'a * 'b
    46   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
    47   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    48   val this: string list -> string list -> string list * string list
    49   val this_string: string -> string list -> string * string list
    50   val many: ('a -> bool) -> 'a list -> 'a list * 'a list
    51   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
    52   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    53   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    54   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    55   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    56   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    57   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    58   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    59   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    60   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    61   val first: ('a -> 'b) list -> 'a -> 'b
    62   val state: 'a * 'b -> 'a * ('a * 'b)
    63   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    64   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
    65   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    66   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    67   val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
    68   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    69   type 'a stopper
    70   val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
    71   val is_stopper: 'a stopper -> 'a -> bool
    72   val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
    73     -> 'b * 'a list -> 'c * ('d * 'a list)
    74   val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    75   val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    76   val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b stopper ->
    77     ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
    78   type lexicon
    79   val is_literal: lexicon -> string list -> bool
    80   val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
    81   val empty_lexicon: lexicon
    82   val extend_lexicon: string list -> lexicon -> lexicon
    83   val make_lexicon: string list list -> lexicon
    84   val dest_lexicon: lexicon -> string list
    85   val merge_lexicons: lexicon * lexicon -> lexicon
    86 end;
    87 
    88 structure Scan: SCAN =
    89 struct
    90 
    91 
    92 (** scanners **)
    93 
    94 (* exceptions *)
    95 
    96 exception MORE of string option;        (*need more input (prompt)*)
    97 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    98 exception ABORT of string;              (*dead end*)
    99 
   100 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   101 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
   102 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
   103 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
   104 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
   105 
   106 fun catch scan xs = scan xs
   107   handle ABORT msg => raise Fail msg
   108     | FAIL msg => raise Fail (the_default "Syntax error." msg);
   109 
   110 
   111 (* scanner combinators *)
   112 
   113 fun (scan >> f) xs = scan xs |>> f;
   114 
   115 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
   116 
   117 fun (scan1 :-- scan2) xs =
   118   let
   119     val (x, ys) = scan1 xs;
   120     val (y, zs) = scan2 x ys;
   121   in ((x, y), zs) end;
   122 
   123 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
   124 fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
   125 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
   126 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
   127 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
   128 fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
   129 fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
   130 
   131 
   132 (* generic scanners *)
   133 
   134 fun fail _ = raise FAIL NONE;
   135 fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
   136 fun succeed y xs = (y, xs);
   137 
   138 fun some _ [] = raise MORE NONE
   139   | some f (x :: xs) =
   140       (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
   141 
   142 fun one _ [] = raise MORE NONE
   143   | one pred (x :: xs) =
   144       if pred x then (x, xs) else raise FAIL NONE;
   145 
   146 fun $$ a = one (fn s: string => s = a);
   147 fun ~$$ a = one (fn s: string => s <> a);
   148 
   149 fun this ys xs =
   150   let
   151     fun drop_prefix [] xs = xs
   152       | drop_prefix (_ :: _) [] = raise MORE NONE
   153       | drop_prefix (y :: ys) (x :: xs) =
   154           if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
   155   in (ys, drop_prefix ys xs) end;
   156 
   157 fun this_string s = this (explode s) >> K s;  (*primitive string -- no symbols here!*)
   158 
   159 fun many _ [] = raise MORE NONE
   160   | many pred (lst as x :: xs) =
   161       if pred x then apfst (cons x) (many pred xs)
   162       else ([], lst);
   163 
   164 fun many1 pred = one pred ::: many pred;
   165 
   166 fun optional scan def = scan || succeed def;
   167 fun option scan = (scan >> SOME) || succeed NONE;
   168 
   169 fun repeat scan =
   170   let
   171     fun rep ys xs =
   172       (case (SOME (scan xs) handle FAIL _ => NONE) of
   173         NONE => (rev ys, xs)
   174       | SOME (y, xs') => rep (y :: ys) xs');
   175   in rep [] end;
   176 
   177 fun repeat1 scan = scan ::: repeat scan;
   178 
   179 fun single scan = scan >> (fn x => [x]);
   180 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
   181 
   182 fun max leq scan1 scan2 xs =
   183   (case (option scan1 xs, option scan2 xs) of
   184     ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
   185   | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
   186   | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
   187   | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
   188       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   189 
   190 fun ahead scan xs = (fst (scan xs), xs);
   191 
   192 fun unless test scan =
   193   ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
   194 
   195 fun first [] = fail
   196   | first (scan :: scans) = scan || first scans;
   197 
   198 
   199 (* state based scanners *)
   200 
   201 fun state (st, xs) = (st, (st, xs));
   202 
   203 fun depend scan (st, xs) =
   204   let val ((st', y), xs') = scan st xs
   205   in (y, (st', xs')) end;
   206 
   207 fun peek scan = depend (fn st => scan st >> pair st);
   208 
   209 fun pass st scan xs =
   210   let val (y, (_, xs')) = scan (st, xs)
   211   in (y, xs') end;
   212 
   213 fun lift scan (st, xs) =
   214   let val (y, xs') = scan xs
   215   in (y, (st, xs')) end;
   216 
   217 fun unlift scan = pass () scan;
   218 
   219 
   220 (* trace input *)
   221 
   222 fun trace scan xs =
   223   let val (y, xs') = scan xs
   224   in ((y, take (length xs - length xs') xs), xs') end;
   225 
   226 
   227 (* stopper *)
   228 
   229 datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
   230 
   231 fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
   232 fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
   233 
   234 
   235 (* finite scans *)
   236 
   237 fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
   238   let
   239     fun lost () = raise ABORT "Bad scanner: lost stopper of finite scan!";
   240 
   241     fun stop [] = lost ()
   242       | stop lst =
   243           let val (xs, x) = split_last lst
   244           in if is_stopper x then ((), xs) else lost () end;
   245   in
   246     if exists is_stopper input then
   247       raise ABORT "Stopper may not occur in input of finite scan!"
   248     else (strict scan --| lift stop) (state, input @ [mk_stopper input])
   249   end;
   250 
   251 fun finite stopper scan = unlift (finite' stopper (lift scan));
   252 
   253 fun read stopper scan xs =
   254   (case error (finite stopper (option scan)) xs of
   255     (y as SOME _, []) => y
   256   | _ => NONE);
   257 
   258 
   259 (* infinite scans -- draining state-based source *)
   260 
   261 fun drain def_prompt get stopper scan ((state, xs), src) =
   262   (scan (state, xs), src) handle MORE prompt =>
   263     (case get (the_default def_prompt prompt) src of
   264       ([], _) => (finite' stopper scan (state, xs), src)
   265     | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
   266 
   267 
   268 
   269 (** datatype lexicon -- position tree **)
   270 
   271 datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
   272 
   273 val empty_lexicon = Lexicon Symtab.empty;
   274 
   275 fun is_literal _ [] = false
   276   | is_literal (Lexicon tab) (c :: cs) =
   277       (case Symtab.lookup tab c of
   278         SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
   279       | NONE => false);
   280 
   281 
   282 (* scan longest match *)
   283 
   284 fun literal lexicon =
   285   let
   286     fun finish (SOME (res, rest)) = (rev res, rest)
   287       | finish NONE = raise FAIL NONE;
   288     fun scan _ res (Lexicon tab) [] = if Symtab.is_empty tab then finish res else raise MORE NONE
   289       | scan path res (Lexicon tab) (c :: cs) =
   290           (case Symtab.lookup tab (fst c) of
   291             SOME (tip, lex) =>
   292               let val path' = c :: path
   293               in scan path' (if tip then SOME (path', cs) else res) lex cs end
   294           | NONE => finish res);
   295   in scan [] NONE lexicon end;
   296 
   297 
   298 (* build lexicons *)
   299 
   300 fun extend_lexicon chrs lexicon =
   301   let
   302     fun ext [] lex = lex
   303       | ext (c :: cs) (Lexicon tab) =
   304           (case Symtab.lookup tab c of
   305             SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
   306           | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
   307   in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
   308 
   309 fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
   310 
   311 
   312 (* merge lexicons *)
   313 
   314 fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
   315   let
   316     val path' = d :: path;
   317     val content = dest path' lex;
   318   in append (if tip then rev path' :: content else content) end) tab [];
   319 
   320 val dest_lexicon = map implode o dest [];
   321 fun merge_lexicons (lex1, lex2) = fold extend_lexicon (dest [] lex2) lex1;
   322 
   323 end;
   324 
   325 structure Basic_Scan: BASIC_SCAN = Scan;
   326 open Basic_Scan;