src/Pure/General/scan.ML
author wenzelm
Thu, 09 Aug 2012 14:37:43 +0200
changeset 49758 a72f8ffecf31
parent 44818 9b00f09f7721
child 56446 8284c0d5bf52
permissions -rw-r--r--
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
     1 (*  Title:      Pure/General/scan.ML
     2     Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
     3 
     4 Generic scanners (for potentially infinite input).
     5 *)
     6 
     7 infix 5 -- :-- :|-- |-- --| ^^;
     8 infixr 5 ::: @@@;
     9 infix 3 >>;
    10 infixr 0 ||;
    11 
    12 signature BASIC_SCAN =
    13 sig
    14   type message = unit -> string
    15   (*error msg handler*)
    16   val !! : ('a * message option -> message) -> ('a -> 'b) -> 'a -> 'b
    17   (*apply function*)
    18   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    19   (*alternative*)
    20   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    21   (*sequential pairing*)
    22   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    23   (*dependent pairing*)
    24   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    25   (*projections*)
    26   val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
    27   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    28   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    29   (*concatenation*)
    30   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    31   val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
    32   val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
    33   (*one element literal*)
    34   val $$ : string -> string list -> string * string list
    35   val ~$$ : string -> string list -> string * string list
    36 end;
    37 
    38 signature SCAN =
    39 sig
    40   include BASIC_SCAN
    41   val prompt: string -> ('a -> 'b) -> 'a -> 'b
    42   val permissive: ('a -> 'b) -> 'a -> 'b
    43   val error: ('a -> 'b) -> 'a -> 'b
    44   val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
    45   val fail: 'a -> 'b
    46   val fail_with: ('a -> message) -> 'a -> 'b
    47   val succeed: 'a -> 'b -> 'a * 'b
    48   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
    49   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    50   val this: string list -> string list -> string list * string list
    51   val this_string: string -> string list -> string * string list
    52   val many: ('a -> bool) -> 'a list -> 'a list * 'a list
    53   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
    54   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    55   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    56   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    57   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    58   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    59   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    60   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    61   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    62   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    63   val first: ('a -> 'b) list -> 'a -> 'b
    64   val state: 'a * 'b -> 'a * ('a * 'b)
    65   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    66   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
    67   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    68   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    69   val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
    70   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    71   type 'a stopper
    72   val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
    73   val is_stopper: 'a stopper -> 'a -> bool
    74   val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
    75     -> 'b * 'a list -> 'c * ('d * 'a list)
    76   val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    77   val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    78   val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b stopper ->
    79     ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
    80   type lexicon
    81   val is_literal: lexicon -> string list -> bool
    82   val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
    83   val empty_lexicon: lexicon
    84   val extend_lexicon: string list -> lexicon -> lexicon
    85   val make_lexicon: string list list -> lexicon
    86   val dest_lexicon: lexicon -> string list
    87   val merge_lexicons: lexicon * lexicon -> lexicon
    88 end;
    89 
    90 structure Scan: SCAN =
    91 struct
    92 
    93 
    94 (** scanners **)
    95 
    96 (* exceptions *)
    97 
    98 type message = unit -> string;
    99 
   100 exception MORE of string option;        (*need more input (prompt)*)
   101 exception FAIL of message option;       (*try alternatives (reason of failure)*)
   102 exception ABORT of message;             (*dead end*)
   103 
   104 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   105 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
   106 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
   107 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
   108 fun error scan xs = scan xs handle ABORT msg => Library.error (msg ());
   109 
   110 fun catch scan xs = scan xs
   111   handle ABORT msg => raise Fail (msg ())
   112     | FAIL msg => raise Fail (case msg of NONE => "Syntax error" | SOME m => m ());
   113 
   114 
   115 (* scanner combinators *)
   116 
   117 fun (scan >> f) xs = scan xs |>> f;
   118 
   119 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
   120 
   121 fun (scan1 :-- scan2) xs =
   122   let
   123     val (x, ys) = scan1 xs;
   124     val (y, zs) = scan2 x ys;
   125   in ((x, y), zs) end;
   126 
   127 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
   128 fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
   129 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
   130 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
   131 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
   132 fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
   133 fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
   134 
   135 
   136 (* generic scanners *)
   137 
   138 fun fail _ = raise FAIL NONE;
   139 fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
   140 fun succeed y xs = (y, xs);
   141 
   142 fun some _ [] = raise MORE NONE
   143   | some f (x :: xs) =
   144       (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
   145 
   146 fun one _ [] = raise MORE NONE
   147   | one pred (x :: xs) =
   148       if pred x then (x, xs) else raise FAIL NONE;
   149 
   150 fun $$ a = one (fn s: string => s = a);
   151 fun ~$$ a = one (fn s: string => s <> a);
   152 
   153 fun this ys xs =
   154   let
   155     fun drop_prefix [] xs = xs
   156       | drop_prefix (_ :: _) [] = raise MORE NONE
   157       | drop_prefix (y :: ys) (x :: xs) =
   158           if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
   159   in (ys, drop_prefix ys xs) end;
   160 
   161 fun this_string s = this (raw_explode s) >> K s;  (*primitive string -- no symbols here!*)
   162 
   163 fun many _ [] = raise MORE NONE
   164   | many pred (lst as x :: xs) =
   165       if pred x then apfst (cons x) (many pred xs)
   166       else ([], lst);
   167 
   168 fun many1 pred = one pred ::: many pred;
   169 
   170 fun optional scan def = scan || succeed def;
   171 fun option scan = (scan >> SOME) || succeed NONE;
   172 
   173 fun repeat scan =
   174   let
   175     fun rep ys xs =
   176       (case (SOME (scan xs) handle FAIL _ => NONE) of
   177         NONE => (rev ys, xs)
   178       | SOME (y, xs') => rep (y :: ys) xs');
   179   in rep [] end;
   180 
   181 fun repeat1 scan = scan ::: repeat scan;
   182 
   183 fun single scan = scan >> (fn x => [x]);
   184 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
   185 
   186 fun max leq scan1 scan2 xs =
   187   (case (option scan1 xs, option scan2 xs) of
   188     ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
   189   | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
   190   | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
   191   | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
   192       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   193 
   194 fun ahead scan xs = (fst (scan xs), xs);
   195 
   196 fun unless test scan =
   197   ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
   198 
   199 fun first [] = fail
   200   | first (scan :: scans) = scan || first scans;
   201 
   202 
   203 (* state based scanners *)
   204 
   205 fun state (st, xs) = (st, (st, xs));
   206 
   207 fun depend scan (st, xs) =
   208   let val ((st', y), xs') = scan st xs
   209   in (y, (st', xs')) end;
   210 
   211 fun peek scan = depend (fn st => scan st >> pair st);
   212 
   213 fun pass st scan xs =
   214   let val (y, (_, xs')) = scan (st, xs)
   215   in (y, xs') end;
   216 
   217 fun lift scan (st, xs) =
   218   let val (y, xs') = scan xs
   219   in (y, (st, xs')) end;
   220 
   221 fun unlift scan = pass () scan;
   222 
   223 
   224 (* trace input *)
   225 
   226 fun trace scan xs =
   227   let val (y, xs') = scan xs
   228   in ((y, take (length xs - length xs') xs), xs') end;
   229 
   230 
   231 (* stopper *)
   232 
   233 datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
   234 
   235 fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
   236 fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
   237 
   238 
   239 (* finite scans *)
   240 
   241 fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
   242   let
   243     fun lost () = raise ABORT (fn () => "Bad scanner: lost stopper of finite scan!");
   244 
   245     fun stop [] = lost ()
   246       | stop lst =
   247           let val (xs, x) = split_last lst
   248           in if is_stopper x then ((), xs) else lost () end;
   249   in
   250     if exists is_stopper input then
   251       raise ABORT (fn () => "Stopper may not occur in input of finite scan!")
   252     else (strict scan --| lift stop) (state, input @ [mk_stopper input])
   253   end;
   254 
   255 fun finite stopper scan = unlift (finite' stopper (lift scan));
   256 
   257 fun read stopper scan xs =
   258   (case error (finite stopper (option scan)) xs of
   259     (y as SOME _, []) => y
   260   | _ => NONE);
   261 
   262 
   263 (* infinite scans -- draining state-based source *)
   264 
   265 fun drain def_prompt get stopper scan ((state, xs), src) =
   266   (scan (state, xs), src) handle MORE prompt =>
   267     (case get (the_default def_prompt prompt) src of
   268       ([], _) => (finite' stopper scan (state, xs), src)
   269     | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
   270 
   271 
   272 
   273 (** datatype lexicon -- position tree **)
   274 
   275 datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
   276 
   277 val empty_lexicon = Lexicon Symtab.empty;
   278 
   279 fun is_literal _ [] = false
   280   | is_literal (Lexicon tab) (c :: cs) =
   281       (case Symtab.lookup tab c of
   282         SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
   283       | NONE => false);
   284 
   285 
   286 (* scan longest match *)
   287 
   288 fun literal lexicon =
   289   let
   290     fun finish (SOME (res, rest)) = (rev res, rest)
   291       | finish NONE = raise FAIL NONE;
   292     fun scan _ res (Lexicon tab) [] = if Symtab.is_empty tab then finish res else raise MORE NONE
   293       | scan path res (Lexicon tab) (c :: cs) =
   294           (case Symtab.lookup tab (fst c) of
   295             SOME (tip, lex) =>
   296               let val path' = c :: path
   297               in scan path' (if tip then SOME (path', cs) else res) lex cs end
   298           | NONE => finish res);
   299   in scan [] NONE lexicon end;
   300 
   301 
   302 (* build lexicons *)
   303 
   304 fun extend_lexicon chrs lexicon =
   305   let
   306     fun ext [] lex = lex
   307       | ext (c :: cs) (Lexicon tab) =
   308           (case Symtab.lookup tab c of
   309             SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
   310           | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
   311   in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
   312 
   313 fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
   314 
   315 
   316 (* merge lexicons *)
   317 
   318 fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
   319   let
   320     val path' = d :: path;
   321     val content = dest path' lex;
   322   in append (if tip then rev path' :: content else content) end) tab [];
   323 
   324 val dest_lexicon = map implode o dest [];
   325 fun merge_lexicons (lex1, lex2) = fold extend_lexicon (dest [] lex2) lex1;
   326 
   327 end;
   328 
   329 structure Basic_Scan: BASIC_SCAN = Scan;
   330 open Basic_Scan;