src/Pure/General/scan.ML
author wenzelm
Mon, 26 Apr 2004 14:58:29 +0200
changeset 14677 33a37f091dc5
parent 14108 eaf3c75f2c8e
child 14686 708c613370ab
permissions -rw-r--r--
tuned presentation;
berghofe@11523
     1
(*  Title:      Pure/General/scan.ML
berghofe@11523
     2
    ID:         $Id$
berghofe@11523
     3
    Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
wenzelm@8806
     4
    License:    GPL (GNU GENERAL PUBLIC LICENSE)
wenzelm@6116
     5
wenzelm@6116
     6
Generic scanners (for potentially infinite input).
wenzelm@6116
     7
*)
wenzelm@6116
     8
wenzelm@6116
     9
infix 5 -- :-- |-- --| ^^;
wenzelm@6116
    10
infix 3 >>;
wenzelm@6116
    11
infix 0 ||;
wenzelm@6116
    12
wenzelm@6116
    13
signature BASIC_SCAN =
wenzelm@6116
    14
sig
wenzelm@14677
    15
  (*error msg handler*)
wenzelm@6116
    16
  val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    17
  (*apply function*)
wenzelm@6116
    18
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
wenzelm@14677
    19
  (*alternative*)
wenzelm@6116
    20
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    21
  (*sequential pairing*)
wenzelm@6116
    22
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@14677
    23
  (*dependent pairing*)
wenzelm@6116
    24
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@14677
    25
  (*forget fst*)
wenzelm@6116
    26
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
wenzelm@14677
    27
  (*forget snd*)
wenzelm@6116
    28
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
wenzelm@14677
    29
  (*concatenation*)
wenzelm@6116
    30
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
wenzelm@14677
    31
  (*one element literal*)
wenzelm@6116
    32
  val $$ : ''a -> ''a list -> ''a * ''a list
wenzelm@6116
    33
end;
wenzelm@6116
    34
wenzelm@6116
    35
signature SCAN =
wenzelm@6116
    36
sig
wenzelm@6116
    37
  include BASIC_SCAN
wenzelm@6116
    38
  val fail: 'a -> 'b
wenzelm@6116
    39
  val fail_with: ('a -> string) -> 'a -> 'b
wenzelm@6116
    40
  val succeed: 'a -> 'b -> 'a * 'b
wenzelm@6116
    41
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
wenzelm@6116
    42
  val any: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    43
  val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    44
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
wenzelm@6116
    45
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
wenzelm@6116
    46
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    47
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    48
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
wenzelm@6116
    49
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
wenzelm@6116
    50
  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
wenzelm@6116
    51
  val first: ('a -> 'b) list -> 'a -> 'b
wenzelm@14677
    52
  val state: 'a * 'b -> 'a * ('a * 'b)
wenzelm@6116
    53
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
wenzelm@6116
    54
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
wenzelm@6116
    55
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
wenzelm@6116
    56
  val try: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    57
  val force: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    58
  val prompt: string -> ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    59
  val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
wenzelm@6116
    60
    -> 'b * 'a list -> 'c * ('d * 'a list)
wenzelm@6116
    61
  val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
wenzelm@6116
    62
  val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
wenzelm@6116
    63
  val catch: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    64
  val error: ('a -> 'b) -> 'a -> 'b
wenzelm@6116
    65
  val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
wenzelm@6116
    66
    'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
wenzelm@10746
    67
    ('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
wenzelm@6116
    68
  val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
wenzelm@6116
    69
    'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
wenzelm@10746
    70
    ('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
wenzelm@6116
    71
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    72
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    73
  type lexicon
wenzelm@7025
    74
  val dest_lexicon: lexicon -> string list
wenzelm@6116
    75
  val make_lexicon: string list list -> lexicon
wenzelm@6116
    76
  val empty_lexicon: lexicon
wenzelm@6116
    77
  val extend_lexicon: lexicon -> string list list -> lexicon
wenzelm@6116
    78
  val merge_lexicons: lexicon -> lexicon -> lexicon
wenzelm@6116
    79
  val literal: lexicon -> string list -> string list * string list
wenzelm@6116
    80
end;
wenzelm@6116
    81
wenzelm@6116
    82
structure Scan: SCAN =
wenzelm@6116
    83
struct
wenzelm@6116
    84
wenzelm@6116
    85
wenzelm@6116
    86
(** scanners **)
wenzelm@6116
    87
berghofe@11523
    88
exception MORE of string option;        (*need more input (prompt)*)
berghofe@11523
    89
exception FAIL of string option;        (*try alternatives (reason of failure)*)
berghofe@11523
    90
exception ABORT of string;              (*dead end*)
wenzelm@6116
    91
wenzelm@6116
    92
wenzelm@6116
    93
(* scanner combinators *)
wenzelm@6116
    94
wenzelm@14677
    95
(*dependent pairing*)
kleing@14078
    96
fun (sc1 :-- sc2) toks =
berghofe@14108
    97
  let
berghofe@14108
    98
    val (x, toks2) = sc1 toks
berghofe@14108
    99
    val (y, toks3) = sc2 x toks2
berghofe@14108
   100
  in ((x, y), toks3) end;
kleing@14078
   101
wenzelm@14677
   102
(*sequential pairing*)
kleing@14078
   103
fun (sc1 -- sc2) toks =
berghofe@14108
   104
  let
berghofe@14108
   105
    val (x, toks2) = sc1 toks
berghofe@14108
   106
    val (y, toks3) = sc2 toks2
berghofe@14108
   107
  in ((x, y), toks3) end;
kleing@14078
   108
wenzelm@14677
   109
(*application*)
kleing@14078
   110
fun (sc >> f) toks =
berghofe@14108
   111
  let val (x, toks2) = sc toks
berghofe@14108
   112
  in (f x, toks2) end;
kleing@14078
   113
wenzelm@14677
   114
(*forget snd*)
kleing@14078
   115
fun (sc1 --| sc2) toks =
berghofe@14108
   116
  let
berghofe@14108
   117
    val (x, toks2) = sc1 toks
berghofe@14108
   118
    val (_, toks3) = sc2 toks2
berghofe@14108
   119
  in (x, toks3) end;
kleing@14078
   120
wenzelm@14677
   121
(*forget fst*)
kleing@14078
   122
fun (sc1 |-- sc2) toks =
berghofe@14108
   123
  let val (_, toks2) = sc1 toks
berghofe@14108
   124
  in sc2 toks2 end;
kleing@14078
   125
wenzelm@14677
   126
(*concatenation*)
kleing@14078
   127
fun (sc1 ^^ sc2) toks =
berghofe@14108
   128
  let
berghofe@14108
   129
    val (x, toks2) = sc1 toks
berghofe@14108
   130
    val (y, toks3) = sc2 toks2
berghofe@14108
   131
  in (x ^ y, toks3) end;
berghofe@14108
   132
wenzelm@14677
   133
(*alternative*)
wenzelm@6116
   134
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
wenzelm@6116
   135
wenzelm@6116
   136
wenzelm@6116
   137
(* generic scanners *)
wenzelm@6116
   138
wenzelm@6116
   139
fun fail _ = raise FAIL None;
wenzelm@6116
   140
fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
wenzelm@6116
   141
fun succeed y xs = (y, xs);
wenzelm@6116
   142
wenzelm@6116
   143
fun one _ [] = raise MORE None
wenzelm@6116
   144
  | one pred (x :: xs) =
wenzelm@6116
   145
      if pred x then (x, xs) else raise FAIL None;
wenzelm@6116
   146
wenzelm@6116
   147
fun $$ _ [] = raise MORE None
wenzelm@6116
   148
  | $$ a (x :: xs) =
wenzelm@6116
   149
      if a = x then (x, xs) else raise FAIL None;
wenzelm@6116
   150
wenzelm@6116
   151
fun any _ [] = raise MORE None
wenzelm@6116
   152
  | any pred (lst as x :: xs) =
wenzelm@6116
   153
      if pred x then apfst (cons x) (any pred xs)
wenzelm@6116
   154
      else ([], lst);
wenzelm@6116
   155
kleing@14078
   156
fun any1 p toks =
berghofe@14108
   157
  let
berghofe@14108
   158
    val (x, toks2) = one p toks
berghofe@14108
   159
    val (xs,toks3) = any p toks2
berghofe@14108
   160
  in (x :: xs, toks3) end;
wenzelm@6116
   161
kleing@14078
   162
fun optional scan def =  scan || succeed def
kleing@14078
   163
fun option scan = (scan >> Some) || succeed None
wenzelm@6116
   164
berghofe@13795
   165
fun repeat scan =
berghofe@14108
   166
  let fun rep ys xs = (case (Some (scan xs) handle FAIL _ => None) of
berghofe@14108
   167
    None => (rev ys, xs) | Some (y, xs') => rep (y :: ys) xs')
berghofe@14108
   168
  in rep [] end;
berghofe@13795
   169
kleing@14078
   170
fun repeat1 scan toks =
berghofe@14108
   171
  let
berghofe@14108
   172
    val (x, toks2) = scan toks
berghofe@14108
   173
    val (xs, toks3) = repeat scan toks2
berghofe@14108
   174
  in (x :: xs, toks3) end;
wenzelm@6116
   175
wenzelm@6116
   176
fun max leq scan1 scan2 xs =
wenzelm@6116
   177
  (case (option scan1 xs, option scan2 xs) of
berghofe@11523
   178
    ((None, _), (None, _)) => raise FAIL None           (*looses FAIL msg!*)
wenzelm@6116
   179
  | ((Some tok1, xs'), (None, _)) => (tok1, xs')
wenzelm@6116
   180
  | ((None, _), (Some tok2, xs')) => (tok2, xs')
wenzelm@6116
   181
  | ((Some tok1, xs1'), (Some tok2, xs2')) =>
wenzelm@6116
   182
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
wenzelm@6116
   183
wenzelm@6116
   184
fun ahead scan xs = (fst (scan xs), xs);
wenzelm@6116
   185
wenzelm@6116
   186
fun unless test scan =
wenzelm@6116
   187
  ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
wenzelm@6116
   188
wenzelm@6116
   189
fun first [] = fail
wenzelm@6116
   190
  | first (scan :: scans) = scan || first scans;
wenzelm@6116
   191
wenzelm@6116
   192
wenzelm@6116
   193
(* state based scanners *)
wenzelm@6116
   194
wenzelm@9122
   195
fun state (st, xs) = (st, (st, xs));
wenzelm@9122
   196
wenzelm@6116
   197
fun depend scan (st, xs) =
wenzelm@6116
   198
  let val ((st', y), xs') = scan st xs
wenzelm@6116
   199
  in (y, (st', xs')) end;
wenzelm@6116
   200
wenzelm@6116
   201
fun lift scan (st, xs) =
wenzelm@6116
   202
  let val (y, xs') = scan xs
wenzelm@6116
   203
  in (y, (st, xs')) end;
wenzelm@6116
   204
wenzelm@6116
   205
fun pass st scan xs =
wenzelm@6116
   206
  let val (y, (_, xs')) = scan (st, xs)
wenzelm@6116
   207
  in (y, xs') end;
wenzelm@6116
   208
wenzelm@6116
   209
wenzelm@6116
   210
(* exception handling *)
wenzelm@6116
   211
wenzelm@6116
   212
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
wenzelm@6116
   213
fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
wenzelm@6116
   214
fun force scan xs = scan xs handle MORE _ => raise FAIL None;
wenzelm@6116
   215
fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
wenzelm@6116
   216
fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
wenzelm@6116
   217
fun error scan xs = scan xs handle ABORT msg => Library.error msg;
wenzelm@6116
   218
wenzelm@6116
   219
wenzelm@6116
   220
(* finite scans *)
wenzelm@6116
   221
wenzelm@6116
   222
fun finite' (stopper, is_stopper) scan (state, input) =
wenzelm@6116
   223
  let
wenzelm@6116
   224
    fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
wenzelm@6116
   225
wenzelm@6116
   226
    fun stop [] = lost ()
wenzelm@6116
   227
      | stop lst =
wenzelm@6116
   228
          let val (xs, x) = split_last lst
wenzelm@6116
   229
          in if is_stopper x then ((), xs) else lost () end;
wenzelm@6116
   230
  in
wenzelm@6116
   231
    if exists is_stopper input then
wenzelm@6116
   232
      raise ABORT "Stopper may not occur in input of finite scan!"
berghofe@13795
   233
    else (force scan --| lift stop) (state, rev_append (rev input) [stopper])
wenzelm@6116
   234
  end;
wenzelm@6116
   235
wenzelm@6116
   236
fun finite stopper scan xs =
wenzelm@6116
   237
  let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
wenzelm@6116
   238
  in (y, xs') end;
wenzelm@6116
   239
wenzelm@6116
   240
fun read stopper scan xs =
wenzelm@6116
   241
  (case error (finite stopper (option scan)) xs of
wenzelm@6116
   242
    (y as Some _, []) => y
wenzelm@6116
   243
  | _ => None);
wenzelm@6116
   244
wenzelm@6116
   245
wenzelm@6116
   246
(* infinite scans -- draining state-based source *)
wenzelm@6116
   247
wenzelm@6116
   248
fun drain def_prmpt get stopper scan ((state, xs), src) =
wenzelm@6116
   249
  (scan (state, xs), src) handle MORE prmpt =>
wenzelm@6116
   250
    (case get (if_none prmpt def_prmpt) src of
wenzelm@6116
   251
      ([], _) => (finite' stopper scan (state, xs), src)
wenzelm@6116
   252
    | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
wenzelm@6116
   253
wenzelm@6116
   254
fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
wenzelm@6116
   255
  let
wenzelm@10746
   256
    val drain_with = drain def_prmpt get stopper;
wenzelm@6116
   257
wenzelm@6116
   258
    fun drain_loop recover inp =
wenzelm@6116
   259
      drain_with (catch scanner) inp handle FAIL msg =>
wenzelm@10746
   260
        (error_msg (if_none msg "Syntax error."); drain_with recover inp);
wenzelm@6116
   261
wenzelm@6116
   262
    val ((ys, (state', xs')), src') =
wenzelm@6116
   263
      (case (get def_prmpt src, opt_recover) of
wenzelm@6116
   264
        (([], s), _) => (([], (state, [])), s)
wenzelm@6116
   265
      | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
wenzelm@8653
   266
      | ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
wenzelm@8653
   267
  in (ys, (state', unget (xs', src'))) end;
wenzelm@6116
   268
wenzelm@6116
   269
fun source def_prmpt get unget stopper scan opt_recover src =
wenzelm@6116
   270
  let val (ys, ((), src')) =
wenzelm@6116
   271
    source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
wenzelm@6116
   272
  in (ys, src') end;
wenzelm@6116
   273
wenzelm@6116
   274
fun single scan = scan >> (fn x => [x]);
wenzelm@6116
   275
fun bulk scan = scan -- repeat (try scan) >> (op ::);
wenzelm@6116
   276
wenzelm@6116
   277
wenzelm@6116
   278
wenzelm@6116
   279
(** datatype lexicon **)
wenzelm@6116
   280
wenzelm@6116
   281
datatype lexicon =
wenzelm@6116
   282
  Empty |
wenzelm@6116
   283
  Branch of string * string list * lexicon * lexicon * lexicon;
wenzelm@6116
   284
wenzelm@6116
   285
val no_literal = [];
wenzelm@6116
   286
wenzelm@6116
   287
wenzelm@6116
   288
(* dest_lexicon *)
wenzelm@6116
   289
wenzelm@7025
   290
fun dest_lex Empty = []
wenzelm@7025
   291
  | dest_lex (Branch (_, [], lt, eq, gt)) =
wenzelm@7025
   292
      dest_lex lt @ dest_lex eq @ dest_lex gt
wenzelm@7025
   293
  | dest_lex (Branch (_, cs, lt, eq, gt)) =
wenzelm@7025
   294
      dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
wenzelm@7025
   295
wenzelm@7025
   296
val dest_lexicon = map implode o dest_lex;
wenzelm@6116
   297
wenzelm@6116
   298
wenzelm@6116
   299
(* empty, extend, make, merge lexicons *)
wenzelm@6116
   300
wenzelm@6116
   301
val empty_lexicon = Empty;
wenzelm@6116
   302
berghofe@11523
   303
fun extend_lexicon lexicon [] = lexicon
berghofe@11523
   304
  | extend_lexicon lexicon chrss =
wenzelm@6116
   305
      let
berghofe@11523
   306
        fun ext (lex, chrs) =
berghofe@11523
   307
          let
berghofe@11523
   308
            fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
berghofe@11523
   309
                  if c < d then Branch (d, a, add lt chs, eq, gt)
berghofe@11523
   310
                  else if c > d then Branch (d, a, lt, eq, add gt chs)
berghofe@11523
   311
                  else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
berghofe@11523
   312
              | add Empty [c] =
berghofe@11523
   313
                  Branch (c, chrs, Empty, Empty, Empty)
berghofe@11523
   314
              | add Empty (c :: cs) =
berghofe@11523
   315
                  Branch (c, no_literal, Empty, add Empty cs, Empty)
berghofe@11523
   316
              | add lex [] = lex;
berghofe@11523
   317
          in add lex chrs end;
berghofe@11523
   318
      in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
wenzelm@6116
   319
wenzelm@6116
   320
val make_lexicon = extend_lexicon empty_lexicon;
wenzelm@6116
   321
wenzelm@6116
   322
fun merge_lexicons lex1 lex2 =
wenzelm@6116
   323
  let
wenzelm@7025
   324
    val chss1 = dest_lex lex1;
wenzelm@7025
   325
    val chss2 = dest_lex lex2;
wenzelm@6116
   326
  in
wenzelm@6116
   327
    if chss2 subset chss1 then lex1
wenzelm@6116
   328
    else if chss1 subset chss2 then lex2
wenzelm@6116
   329
    else extend_lexicon lex1 chss2
wenzelm@6116
   330
  end;
wenzelm@6116
   331
wenzelm@6116
   332
wenzelm@6116
   333
(* scan literal *)
wenzelm@6116
   334
wenzelm@6116
   335
fun literal lex chrs =
wenzelm@6116
   336
  let
wenzelm@6116
   337
    fun lit Empty res _ = res
wenzelm@6116
   338
      | lit (Branch _) _ [] = raise MORE None
wenzelm@6116
   339
      | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
berghofe@11523
   340
          if c < d then lit lt res chs
berghofe@11523
   341
          else if c > d then lit gt res chs
berghofe@11523
   342
          else lit eq (if a = no_literal then res else Some (a, cs)) cs;
wenzelm@6116
   343
  in
wenzelm@6116
   344
    (case lit lex None chrs of
wenzelm@6116
   345
      None => raise FAIL None
wenzelm@6116
   346
    | Some res => res)
wenzelm@6116
   347
  end;
wenzelm@6116
   348
wenzelm@6116
   349
wenzelm@6116
   350
end;
wenzelm@6116
   351
wenzelm@6116
   352
wenzelm@6116
   353
structure BasicScan: BASIC_SCAN = Scan;
wenzelm@6116
   354
open BasicScan;