src/Pure/General/scan.ML
author berghofe
Fri, 31 Aug 2001 16:17:52 +0200
changeset 11523 9a658fe20107
parent 10746 01e2d857fb78
child 13795 cfa3441c5238
permissions -rw-r--r--
Tuned function extend_lexicon.
berghofe@11523
     1
(*  Title:      Pure/General/scan.ML
berghofe@11523
     2
    ID:         $Id$
berghofe@11523
     3
    Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
wenzelm@8806
     4
    License:    GPL (GNU GENERAL PUBLIC LICENSE)
wenzelm@6116
     5
wenzelm@6116
     6
Generic scanners (for potentially infinite input).
wenzelm@6116
     7
*)
wenzelm@6116
     8
wenzelm@6116
     9
infix 5 -- :-- |-- --| ^^;
wenzelm@6116
    10
infix 3 >>;
wenzelm@6116
    11
infix 0 ||;
wenzelm@6116
    12
wenzelm@6116
    13
signature BASIC_SCAN =
wenzelm@6116
    14
sig
wenzelm@6116
    15
  val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    16
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
wenzelm@6116
    17
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    18
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@6116
    19
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@6116
    20
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
wenzelm@6116
    21
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
wenzelm@6116
    22
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
wenzelm@6116
    23
  val $$ : ''a -> ''a list -> ''a * ''a list
wenzelm@6116
    24
end;
wenzelm@6116
    25
wenzelm@6116
    26
signature SCAN =
wenzelm@6116
    27
sig
wenzelm@6116
    28
  include BASIC_SCAN
wenzelm@6116
    29
  val fail: 'a -> 'b
wenzelm@6116
    30
  val fail_with: ('a -> string) -> 'a -> 'b
wenzelm@6116
    31
  val succeed: 'a -> 'b -> 'a * 'b
wenzelm@6116
    32
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
wenzelm@6116
    33
  val any: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    34
  val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    35
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
wenzelm@6116
    36
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
wenzelm@6116
    37
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    38
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    39
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
wenzelm@6116
    40
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
wenzelm@6116
    41
  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
wenzelm@6116
    42
  val first: ('a -> 'b) list -> 'a -> 'b
wenzelm@9122
    43
  val state: 'a * 'b -> 'a * ('a * 'b) 
wenzelm@6116
    44
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
wenzelm@6116
    45
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
wenzelm@6116
    46
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
wenzelm@6116
    47
  val try: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    48
  val force: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    49
  val prompt: string -> ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    50
  val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
wenzelm@6116
    51
    -> 'b * 'a list -> 'c * ('d * 'a list)
wenzelm@6116
    52
  val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
wenzelm@6116
    53
  val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
wenzelm@6116
    54
  val catch: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    55
  val error: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    56
  val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
wenzelm@6116
    57
    'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
wenzelm@10746
    58
    ('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
wenzelm@6116
    59
  val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
wenzelm@6116
    60
    'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
wenzelm@10746
    61
    ('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
wenzelm@6116
    62
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    63
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    64
  type lexicon
wenzelm@7025
    65
  val dest_lexicon: lexicon -> string list
wenzelm@6116
    66
  val make_lexicon: string list list -> lexicon
wenzelm@6116
    67
  val empty_lexicon: lexicon
wenzelm@6116
    68
  val extend_lexicon: lexicon -> string list list -> lexicon
wenzelm@6116
    69
  val merge_lexicons: lexicon -> lexicon -> lexicon
wenzelm@6116
    70
  val literal: lexicon -> string list -> string list * string list
wenzelm@6116
    71
end;
wenzelm@6116
    72
wenzelm@6116
    73
structure Scan: SCAN =
wenzelm@6116
    74
struct
wenzelm@6116
    75
wenzelm@6116
    76
wenzelm@6116
    77
(** scanners **)
wenzelm@6116
    78
berghofe@11523
    79
exception MORE of string option;        (*need more input (prompt)*)
berghofe@11523
    80
exception FAIL of string option;        (*try alternatives (reason of failure)*)
berghofe@11523
    81
exception ABORT of string;              (*dead end*)
wenzelm@6116
    82
wenzelm@6116
    83
wenzelm@6116
    84
(* scanner combinators *)
wenzelm@6116
    85
wenzelm@6116
    86
fun (scan >> f) xs = apfst f (scan xs);
wenzelm@6116
    87
wenzelm@6116
    88
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
wenzelm@6116
    89
wenzelm@6116
    90
(*dependent pairing*)
wenzelm@6116
    91
fun (scan1 :-- scan2) xs =
wenzelm@6116
    92
  let
wenzelm@6116
    93
    val (x, ys) = scan1 xs;
wenzelm@6116
    94
    val (y, zs) = scan2 x ys;
wenzelm@6116
    95
  in ((x, y), zs) end;
wenzelm@6116
    96
wenzelm@6116
    97
fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
wenzelm@6116
    98
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
wenzelm@6116
    99
fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
wenzelm@6116
   100
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
wenzelm@6116
   101
wenzelm@6116
   102
wenzelm@6116
   103
(* generic scanners *)
wenzelm@6116
   104
wenzelm@6116
   105
fun fail _ = raise FAIL None;
wenzelm@6116
   106
fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
wenzelm@6116
   107
fun succeed y xs = (y, xs);
wenzelm@6116
   108
wenzelm@6116
   109
fun one _ [] = raise MORE None
wenzelm@6116
   110
  | one pred (x :: xs) =
wenzelm@6116
   111
      if pred x then (x, xs) else raise FAIL None;
wenzelm@6116
   112
wenzelm@6116
   113
fun $$ _ [] = raise MORE None
wenzelm@6116
   114
  | $$ a (x :: xs) =
wenzelm@6116
   115
      if a = x then (x, xs) else raise FAIL None;
wenzelm@6116
   116
wenzelm@6116
   117
fun any _ [] = raise MORE None
wenzelm@6116
   118
  | any pred (lst as x :: xs) =
wenzelm@6116
   119
      if pred x then apfst (cons x) (any pred xs)
wenzelm@6116
   120
      else ([], lst);
wenzelm@6116
   121
wenzelm@6116
   122
fun any1 pred = one pred -- any pred >> op ::;
wenzelm@6116
   123
wenzelm@6116
   124
fun optional scan def = scan || succeed def;
wenzelm@6116
   125
fun option scan = optional (scan >> Some) None;
wenzelm@6116
   126
wenzelm@6116
   127
fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs;
wenzelm@6116
   128
fun repeat1 scan = scan -- repeat scan >> op ::;
wenzelm@6116
   129
wenzelm@6116
   130
fun max leq scan1 scan2 xs =
wenzelm@6116
   131
  (case (option scan1 xs, option scan2 xs) of
berghofe@11523
   132
    ((None, _), (None, _)) => raise FAIL None           (*looses FAIL msg!*)
wenzelm@6116
   133
  | ((Some tok1, xs'), (None, _)) => (tok1, xs')
wenzelm@6116
   134
  | ((None, _), (Some tok2, xs')) => (tok2, xs')
wenzelm@6116
   135
  | ((Some tok1, xs1'), (Some tok2, xs2')) =>
wenzelm@6116
   136
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
wenzelm@6116
   137
wenzelm@6116
   138
fun ahead scan xs = (fst (scan xs), xs);
wenzelm@6116
   139
wenzelm@6116
   140
fun unless test scan =
wenzelm@6116
   141
  ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
wenzelm@6116
   142
wenzelm@6116
   143
fun first [] = fail
wenzelm@6116
   144
  | first (scan :: scans) = scan || first scans;
wenzelm@6116
   145
wenzelm@6116
   146
wenzelm@6116
   147
(* state based scanners *)
wenzelm@6116
   148
wenzelm@9122
   149
fun state (st, xs) = (st, (st, xs));
wenzelm@9122
   150
wenzelm@6116
   151
fun depend scan (st, xs) =
wenzelm@6116
   152
  let val ((st', y), xs') = scan st xs
wenzelm@6116
   153
  in (y, (st', xs')) end;
wenzelm@6116
   154
wenzelm@6116
   155
fun lift scan (st, xs) =
wenzelm@6116
   156
  let val (y, xs') = scan xs
wenzelm@6116
   157
  in (y, (st, xs')) end;
wenzelm@6116
   158
wenzelm@6116
   159
fun pass st scan xs =
wenzelm@6116
   160
  let val (y, (_, xs')) = scan (st, xs)
wenzelm@6116
   161
  in (y, xs') end;
wenzelm@6116
   162
wenzelm@6116
   163
wenzelm@6116
   164
(* exception handling *)
wenzelm@6116
   165
wenzelm@6116
   166
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
wenzelm@6116
   167
fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
wenzelm@6116
   168
fun force scan xs = scan xs handle MORE _ => raise FAIL None;
wenzelm@6116
   169
fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
wenzelm@6116
   170
fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
wenzelm@6116
   171
fun error scan xs = scan xs handle ABORT msg => Library.error msg;
wenzelm@6116
   172
wenzelm@6116
   173
wenzelm@6116
   174
(* finite scans *)
wenzelm@6116
   175
wenzelm@6116
   176
fun finite' (stopper, is_stopper) scan (state, input) =
wenzelm@6116
   177
  let
wenzelm@6116
   178
    fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
wenzelm@6116
   179
wenzelm@6116
   180
    fun stop [] = lost ()
wenzelm@6116
   181
      | stop lst =
wenzelm@6116
   182
          let val (xs, x) = split_last lst
wenzelm@6116
   183
          in if is_stopper x then ((), xs) else lost () end;
wenzelm@6116
   184
  in
wenzelm@6116
   185
    if exists is_stopper input then
wenzelm@6116
   186
      raise ABORT "Stopper may not occur in input of finite scan!"
wenzelm@6116
   187
    else (force scan --| lift stop) (state, input @ [stopper])
wenzelm@6116
   188
  end;
wenzelm@6116
   189
wenzelm@6116
   190
fun finite stopper scan xs =
wenzelm@6116
   191
  let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
wenzelm@6116
   192
  in (y, xs') end;
wenzelm@6116
   193
wenzelm@6116
   194
fun read stopper scan xs =
wenzelm@6116
   195
  (case error (finite stopper (option scan)) xs of
wenzelm@6116
   196
    (y as Some _, []) => y
wenzelm@6116
   197
  | _ => None);
wenzelm@6116
   198
wenzelm@6116
   199
wenzelm@6116
   200
(* infinite scans -- draining state-based source *)
wenzelm@6116
   201
wenzelm@6116
   202
fun drain def_prmpt get stopper scan ((state, xs), src) =
wenzelm@6116
   203
  (scan (state, xs), src) handle MORE prmpt =>
wenzelm@6116
   204
    (case get (if_none prmpt def_prmpt) src of
wenzelm@6116
   205
      ([], _) => (finite' stopper scan (state, xs), src)
wenzelm@6116
   206
    | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
wenzelm@6116
   207
wenzelm@6116
   208
fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
wenzelm@6116
   209
  let
wenzelm@10746
   210
    val drain_with = drain def_prmpt get stopper;
wenzelm@6116
   211
wenzelm@6116
   212
    fun drain_loop recover inp =
wenzelm@6116
   213
      drain_with (catch scanner) inp handle FAIL msg =>
wenzelm@10746
   214
        (error_msg (if_none msg "Syntax error."); drain_with recover inp);
wenzelm@6116
   215
wenzelm@6116
   216
    val ((ys, (state', xs')), src') =
wenzelm@6116
   217
      (case (get def_prmpt src, opt_recover) of
wenzelm@6116
   218
        (([], s), _) => (([], (state, [])), s)
wenzelm@6116
   219
      | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
wenzelm@8653
   220
      | ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
wenzelm@8653
   221
  in (ys, (state', unget (xs', src'))) end;
wenzelm@6116
   222
wenzelm@6116
   223
fun source def_prmpt get unget stopper scan opt_recover src =
wenzelm@6116
   224
  let val (ys, ((), src')) =
wenzelm@6116
   225
    source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
wenzelm@6116
   226
  in (ys, src') end;
wenzelm@6116
   227
wenzelm@6116
   228
fun single scan = scan >> (fn x => [x]);
wenzelm@6116
   229
fun bulk scan = scan -- repeat (try scan) >> (op ::);
wenzelm@6116
   230
wenzelm@6116
   231
wenzelm@6116
   232
wenzelm@6116
   233
(** datatype lexicon **)
wenzelm@6116
   234
wenzelm@6116
   235
datatype lexicon =
wenzelm@6116
   236
  Empty |
wenzelm@6116
   237
  Branch of string * string list * lexicon * lexicon * lexicon;
wenzelm@6116
   238
wenzelm@6116
   239
val no_literal = [];
wenzelm@6116
   240
wenzelm@6116
   241
wenzelm@6116
   242
(* dest_lexicon *)
wenzelm@6116
   243
wenzelm@7025
   244
fun dest_lex Empty = []
wenzelm@7025
   245
  | dest_lex (Branch (_, [], lt, eq, gt)) =
wenzelm@7025
   246
      dest_lex lt @ dest_lex eq @ dest_lex gt
wenzelm@7025
   247
  | dest_lex (Branch (_, cs, lt, eq, gt)) =
wenzelm@7025
   248
      dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
wenzelm@7025
   249
wenzelm@7025
   250
val dest_lexicon = map implode o dest_lex;
wenzelm@6116
   251
wenzelm@6116
   252
wenzelm@6116
   253
(* empty, extend, make, merge lexicons *)
wenzelm@6116
   254
wenzelm@6116
   255
val empty_lexicon = Empty;
wenzelm@6116
   256
berghofe@11523
   257
fun extend_lexicon lexicon [] = lexicon
berghofe@11523
   258
  | extend_lexicon lexicon chrss =
wenzelm@6116
   259
      let
berghofe@11523
   260
        fun ext (lex, chrs) =
berghofe@11523
   261
          let
berghofe@11523
   262
            fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
berghofe@11523
   263
                  if c < d then Branch (d, a, add lt chs, eq, gt)
berghofe@11523
   264
                  else if c > d then Branch (d, a, lt, eq, add gt chs)
berghofe@11523
   265
                  else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
berghofe@11523
   266
              | add Empty [c] =
berghofe@11523
   267
                  Branch (c, chrs, Empty, Empty, Empty)
berghofe@11523
   268
              | add Empty (c :: cs) =
berghofe@11523
   269
                  Branch (c, no_literal, Empty, add Empty cs, Empty)
berghofe@11523
   270
              | add lex [] = lex;
berghofe@11523
   271
          in add lex chrs end;
berghofe@11523
   272
      in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
wenzelm@6116
   273
wenzelm@6116
   274
val make_lexicon = extend_lexicon empty_lexicon;
wenzelm@6116
   275
wenzelm@6116
   276
fun merge_lexicons lex1 lex2 =
wenzelm@6116
   277
  let
wenzelm@7025
   278
    val chss1 = dest_lex lex1;
wenzelm@7025
   279
    val chss2 = dest_lex lex2;
wenzelm@6116
   280
  in
wenzelm@6116
   281
    if chss2 subset chss1 then lex1
wenzelm@6116
   282
    else if chss1 subset chss2 then lex2
wenzelm@6116
   283
    else extend_lexicon lex1 chss2
wenzelm@6116
   284
  end;
wenzelm@6116
   285
wenzelm@6116
   286
wenzelm@6116
   287
(* scan literal *)
wenzelm@6116
   288
wenzelm@6116
   289
fun literal lex chrs =
wenzelm@6116
   290
  let
wenzelm@6116
   291
    fun lit Empty res _ = res
wenzelm@6116
   292
      | lit (Branch _) _ [] = raise MORE None
wenzelm@6116
   293
      | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
berghofe@11523
   294
          if c < d then lit lt res chs
berghofe@11523
   295
          else if c > d then lit gt res chs
berghofe@11523
   296
          else lit eq (if a = no_literal then res else Some (a, cs)) cs;
wenzelm@6116
   297
  in
wenzelm@6116
   298
    (case lit lex None chrs of
wenzelm@6116
   299
      None => raise FAIL None
wenzelm@6116
   300
    | Some res => res)
wenzelm@6116
   301
  end;
wenzelm@6116
   302
wenzelm@6116
   303
wenzelm@6116
   304
end;
wenzelm@6116
   305
wenzelm@6116
   306
wenzelm@6116
   307
structure BasicScan: BASIC_SCAN = Scan;
wenzelm@6116
   308
open BasicScan;