src/Pure/General/scan.ML
author wenzelm
Sat, 23 Jul 2011 16:37:17 +0200
changeset 44818 9b00f09f7721
parent 40875 becf5d5187cc
child 49758 a72f8ffecf31
permissions -rw-r--r--
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
berghofe@11523
     1
(*  Title:      Pure/General/scan.ML
berghofe@11523
     2
    Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
wenzelm@6116
     3
wenzelm@6116
     4
Generic scanners (for potentially infinite input).
wenzelm@6116
     5
*)
wenzelm@6116
     6
wenzelm@24025
     7
infix 5 -- :-- :|-- |-- --| ^^;
wenzelm@25999
     8
infixr 5 ::: @@@;
wenzelm@6116
     9
infix 3 >>;
wenzelm@23699
    10
infixr 0 ||;
wenzelm@6116
    11
wenzelm@6116
    12
signature BASIC_SCAN =
wenzelm@6116
    13
sig
wenzelm@44818
    14
  type message = unit -> string
wenzelm@14677
    15
  (*error msg handler*)
wenzelm@44818
    16
  val !! : ('a * message option -> message) -> ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    17
  (*apply function*)
wenzelm@6116
    18
  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
wenzelm@14677
    19
  (*alternative*)
wenzelm@6116
    20
  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
wenzelm@14677
    21
  (*sequential pairing*)
wenzelm@6116
    22
  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@14677
    23
  (*dependent pairing*)
wenzelm@6116
    24
  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
wenzelm@24025
    25
  (*projections*)
wenzelm@24025
    26
  val :|-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> 'd * 'e
wenzelm@6116
    27
  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
wenzelm@6116
    28
  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
wenzelm@14677
    29
  (*concatenation*)
wenzelm@6116
    30
  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
wenzelm@25999
    31
  val ::: : ('a -> 'b * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
wenzelm@25999
    32
  val @@@ : ('a -> 'b list * 'c) * ('c -> 'b list * 'd) -> 'a -> 'b list * 'd
wenzelm@14677
    33
  (*one element literal*)
wenzelm@19291
    34
  val $$ : string -> string list -> string * string list
wenzelm@19306
    35
  val ~$$ : string -> string list -> string * string list
wenzelm@6116
    36
end;
wenzelm@6116
    37
wenzelm@6116
    38
signature SCAN =
wenzelm@6116
    39
sig
wenzelm@6116
    40
  include BASIC_SCAN
wenzelm@23699
    41
  val prompt: string -> ('a -> 'b) -> 'a -> 'b
wenzelm@23699
    42
  val error: ('a -> 'b) -> 'a -> 'b
wenzelm@23699
    43
  val catch: ('a -> 'b) -> 'a -> 'b    (*exception Fail*)
wenzelm@6116
    44
  val fail: 'a -> 'b
wenzelm@44818
    45
  val fail_with: ('a -> message) -> 'a -> 'b
wenzelm@6116
    46
  val succeed: 'a -> 'b -> 'a * 'b
wenzelm@15664
    47
  val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
wenzelm@15664
    48
  val one: ('a -> bool) -> 'a list -> 'a * 'a list
wenzelm@19291
    49
  val this: string list -> string list -> string list * string list
wenzelm@14927
    50
  val this_string: string -> string list -> string * string list
wenzelm@21858
    51
  val many: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@21858
    52
  val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
wenzelm@6116
    53
  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
wenzelm@6116
    54
  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
wenzelm@6116
    55
  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    56
  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@23699
    57
  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@23699
    58
  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
wenzelm@6116
    59
  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
wenzelm@6116
    60
  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
wenzelm@6116
    61
  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
wenzelm@6116
    62
  val first: ('a -> 'b) list -> 'a -> 'b
wenzelm@14677
    63
  val state: 'a * 'b -> 'a * ('a * 'b)
wenzelm@6116
    64
  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
wenzelm@15664
    65
  val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
wenzelm@15664
    66
  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
wenzelm@6116
    67
  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
wenzelm@23699
    68
  val unlift: (unit * 'a -> 'b * ('c * 'd)) -> 'a -> 'b * 'd
wenzelm@15664
    69
  val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
wenzelm@27731
    70
  type 'a stopper
wenzelm@27731
    71
  val stopper: ('a list -> 'a) -> ('a -> bool) -> 'a stopper
wenzelm@27731
    72
  val is_stopper: 'a stopper -> 'a -> bool
wenzelm@27731
    73
  val finite': 'a stopper -> ('b * 'a list -> 'c * ('d * 'a list))
wenzelm@6116
    74
    -> 'b * 'a list -> 'c * ('d * 'a list)
wenzelm@27731
    75
  val finite: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
wenzelm@27731
    76
  val read: 'a stopper -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
wenzelm@27731
    77
  val drain: string -> (string -> 'a -> 'b list * 'a) -> 'b stopper ->
wenzelm@23699
    78
    ('c * 'b list -> 'd * ('e * 'b list)) -> ('c * 'b list) * 'a -> ('d * ('e * 'b list)) * 'a
wenzelm@6116
    79
  type lexicon
wenzelm@27765
    80
  val is_literal: lexicon -> string list -> bool
wenzelm@27765
    81
  val literal: lexicon -> (string * 'a) list -> (string * 'a) list * (string * 'a) list
wenzelm@27765
    82
  val empty_lexicon: lexicon
wenzelm@27765
    83
  val extend_lexicon: string list -> lexicon -> lexicon
wenzelm@27765
    84
  val make_lexicon: string list list -> lexicon
wenzelm@7025
    85
  val dest_lexicon: lexicon -> string list
wenzelm@27765
    86
  val merge_lexicons: lexicon * lexicon -> lexicon
wenzelm@6116
    87
end;
wenzelm@6116
    88
wenzelm@6116
    89
structure Scan: SCAN =
wenzelm@6116
    90
struct
wenzelm@6116
    91
wenzelm@6116
    92
wenzelm@6116
    93
(** scanners **)
wenzelm@6116
    94
wenzelm@23699
    95
(* exceptions *)
wenzelm@23699
    96
wenzelm@44818
    97
type message = unit -> string;
wenzelm@44818
    98
berghofe@11523
    99
exception MORE of string option;        (*need more input (prompt)*)
wenzelm@44818
   100
exception FAIL of message option;       (*try alternatives (reason of failure)*)
wenzelm@44818
   101
exception ABORT of message;             (*dead end*)
wenzelm@6116
   102
wenzelm@23699
   103
fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
wenzelm@23699
   104
fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
wenzelm@23699
   105
fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
wenzelm@23699
   106
fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
wenzelm@44818
   107
fun error scan xs = scan xs handle ABORT msg => Library.error (msg ());
wenzelm@23699
   108
wenzelm@23699
   109
fun catch scan xs = scan xs
wenzelm@44818
   110
  handle ABORT msg => raise Fail (msg ())
wenzelm@44818
   111
    | FAIL msg => raise Fail (case msg of NONE => "Syntax error" | SOME m => m ());
wenzelm@23699
   112
wenzelm@6116
   113
wenzelm@6116
   114
(* scanner combinators *)
wenzelm@6116
   115
wenzelm@19306
   116
fun (scan >> f) xs = scan xs |>> f;
wenzelm@19306
   117
wenzelm@19306
   118
fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
wenzelm@19306
   119
wenzelm@19306
   120
fun (scan1 :-- scan2) xs =
berghofe@14108
   121
  let
wenzelm@19306
   122
    val (x, ys) = scan1 xs;
wenzelm@19306
   123
    val (y, zs) = scan2 x ys;
wenzelm@19306
   124
  in ((x, y), zs) end;
kleing@14078
   125
wenzelm@19306
   126
fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
wenzelm@24025
   127
fun (scan1 :|-- scan2) = scan1 :-- scan2 >> #2;
wenzelm@19306
   128
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
wenzelm@19306
   129
fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
wenzelm@19306
   130
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
wenzelm@25999
   131
fun (scan1 ::: scan2) = scan1 -- scan2 >> op ::;
wenzelm@25999
   132
fun (scan1 @@@ scan2) = scan1 -- scan2 >> op @;
wenzelm@6116
   133
wenzelm@6116
   134
wenzelm@6116
   135
(* generic scanners *)
wenzelm@6116
   136
skalberg@15531
   137
fun fail _ = raise FAIL NONE;
skalberg@15531
   138
fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
wenzelm@6116
   139
fun succeed y xs = (y, xs);
wenzelm@6116
   140
wenzelm@15664
   141
fun some _ [] = raise MORE NONE
wenzelm@15664
   142
  | some f (x :: xs) =
wenzelm@15664
   143
      (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
wenzelm@15664
   144
skalberg@15531
   145
fun one _ [] = raise MORE NONE
wenzelm@6116
   146
  | one pred (x :: xs) =
skalberg@15531
   147
      if pred x then (x, xs) else raise FAIL NONE;
wenzelm@6116
   148
wenzelm@19306
   149
fun $$ a = one (fn s: string => s = a);
wenzelm@19306
   150
fun ~$$ a = one (fn s: string => s <> a);
wenzelm@6116
   151
wenzelm@14833
   152
fun this ys xs =
wenzelm@14726
   153
  let
wenzelm@14726
   154
    fun drop_prefix [] xs = xs
skalberg@15531
   155
      | drop_prefix (_ :: _) [] = raise MORE NONE
wenzelm@14726
   156
      | drop_prefix (y :: ys) (x :: xs) =
wenzelm@19291
   157
          if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
wenzelm@14726
   158
  in (ys, drop_prefix ys xs) end;
wenzelm@14726
   159
wenzelm@40875
   160
fun this_string s = this (raw_explode s) >> K s;  (*primitive string -- no symbols here!*)
wenzelm@14907
   161
wenzelm@21858
   162
fun many _ [] = raise MORE NONE
wenzelm@21858
   163
  | many pred (lst as x :: xs) =
wenzelm@21858
   164
      if pred x then apfst (cons x) (many pred xs)
wenzelm@6116
   165
      else ([], lst);
wenzelm@6116
   166
wenzelm@25999
   167
fun many1 pred = one pred ::: many pred;
wenzelm@6116
   168
wenzelm@15664
   169
fun optional scan def = scan || succeed def;
wenzelm@15664
   170
fun option scan = (scan >> SOME) || succeed NONE;
wenzelm@6116
   171
berghofe@13795
   172
fun repeat scan =
wenzelm@15664
   173
  let
wenzelm@15664
   174
    fun rep ys xs =
wenzelm@15664
   175
      (case (SOME (scan xs) handle FAIL _ => NONE) of
wenzelm@15664
   176
        NONE => (rev ys, xs)
wenzelm@15664
   177
      | SOME (y, xs') => rep (y :: ys) xs');
berghofe@14108
   178
  in rep [] end;
berghofe@13795
   179
wenzelm@25999
   180
fun repeat1 scan = scan ::: repeat scan;
wenzelm@6116
   181
wenzelm@23699
   182
fun single scan = scan >> (fn x => [x]);
wenzelm@23699
   183
fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
wenzelm@23699
   184
wenzelm@6116
   185
fun max leq scan1 scan2 xs =
wenzelm@6116
   186
  (case (option scan1 xs, option scan2 xs) of
skalberg@15531
   187
    ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
skalberg@15531
   188
  | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
skalberg@15531
   189
  | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
skalberg@15531
   190
  | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
wenzelm@6116
   191
      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
wenzelm@6116
   192
wenzelm@6116
   193
fun ahead scan xs = (fst (scan xs), xs);
wenzelm@6116
   194
wenzelm@6116
   195
fun unless test scan =
skalberg@15531
   196
  ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
wenzelm@6116
   197
wenzelm@6116
   198
fun first [] = fail
wenzelm@6116
   199
  | first (scan :: scans) = scan || first scans;
wenzelm@6116
   200
wenzelm@6116
   201
wenzelm@6116
   202
(* state based scanners *)
wenzelm@6116
   203
wenzelm@9122
   204
fun state (st, xs) = (st, (st, xs));
wenzelm@9122
   205
wenzelm@6116
   206
fun depend scan (st, xs) =
wenzelm@6116
   207
  let val ((st', y), xs') = scan st xs
wenzelm@6116
   208
  in (y, (st', xs')) end;
wenzelm@6116
   209
wenzelm@15664
   210
fun peek scan = depend (fn st => scan st >> pair st);
wenzelm@15664
   211
wenzelm@15664
   212
fun pass st scan xs =
wenzelm@15664
   213
  let val (y, (_, xs')) = scan (st, xs)
wenzelm@15664
   214
  in (y, xs') end;
wenzelm@15664
   215
wenzelm@6116
   216
fun lift scan (st, xs) =
wenzelm@6116
   217
  let val (y, xs') = scan xs
wenzelm@6116
   218
  in (y, (st, xs')) end;
wenzelm@6116
   219
wenzelm@15664
   220
fun unlift scan = pass () scan;
wenzelm@15664
   221
wenzelm@15664
   222
wenzelm@15664
   223
(* trace input *)
wenzelm@15664
   224
wenzelm@23699
   225
fun trace scan xs =
wenzelm@23699
   226
  let val (y, xs') = scan xs
haftmann@33956
   227
  in ((y, take (length xs - length xs') xs), xs') end;
wenzelm@6116
   228
wenzelm@6116
   229
wenzelm@27731
   230
(* stopper *)
wenzelm@27731
   231
wenzelm@27731
   232
datatype 'a stopper = Stopper of ('a list -> 'a) * ('a -> bool);
wenzelm@27731
   233
wenzelm@27731
   234
fun stopper mk_stopper is_stopper = Stopper (mk_stopper, is_stopper);
wenzelm@27731
   235
fun is_stopper (Stopper (_, is_stopper)) = is_stopper;
wenzelm@27731
   236
wenzelm@27731
   237
wenzelm@6116
   238
(* finite scans *)
wenzelm@6116
   239
wenzelm@27731
   240
fun finite' (Stopper (mk_stopper, is_stopper)) scan (state, input) =
wenzelm@6116
   241
  let
wenzelm@44818
   242
    fun lost () = raise ABORT (fn () => "Bad scanner: lost stopper of finite scan!");
wenzelm@6116
   243
wenzelm@6116
   244
    fun stop [] = lost ()
wenzelm@6116
   245
      | stop lst =
wenzelm@6116
   246
          let val (xs, x) = split_last lst
wenzelm@6116
   247
          in if is_stopper x then ((), xs) else lost () end;
wenzelm@6116
   248
  in
wenzelm@6116
   249
    if exists is_stopper input then
wenzelm@44818
   250
      raise ABORT (fn () => "Stopper may not occur in input of finite scan!")
wenzelm@27731
   251
    else (strict scan --| lift stop) (state, input @ [mk_stopper input])
wenzelm@6116
   252
  end;
wenzelm@6116
   253
wenzelm@15664
   254
fun finite stopper scan = unlift (finite' stopper (lift scan));
wenzelm@6116
   255
wenzelm@6116
   256
fun read stopper scan xs =
wenzelm@6116
   257
  (case error (finite stopper (option scan)) xs of
skalberg@15531
   258
    (y as SOME _, []) => y
skalberg@15531
   259
  | _ => NONE);
wenzelm@6116
   260
wenzelm@6116
   261
wenzelm@6116
   262
(* infinite scans -- draining state-based source *)
wenzelm@6116
   263
wenzelm@23699
   264
fun drain def_prompt get stopper scan ((state, xs), src) =
wenzelm@23699
   265
  (scan (state, xs), src) handle MORE prompt =>
wenzelm@23699
   266
    (case get (the_default def_prompt prompt) src of
wenzelm@6116
   267
      ([], _) => (finite' stopper scan (state, xs), src)
wenzelm@23699
   268
    | (xs', src') => drain def_prompt get stopper scan ((state, xs @ xs'), src'));
wenzelm@6116
   269
wenzelm@6116
   270
wenzelm@6116
   271
wenzelm@27782
   272
(** datatype lexicon -- position tree **)
wenzelm@6116
   273
wenzelm@27782
   274
datatype lexicon = Lexicon of (bool * lexicon) Symtab.table;
wenzelm@6116
   275
wenzelm@27782
   276
val empty_lexicon = Lexicon Symtab.empty;
wenzelm@6116
   277
wenzelm@27782
   278
fun is_literal _ [] = false
wenzelm@32791
   279
  | is_literal (Lexicon tab) (c :: cs) =
wenzelm@27782
   280
      (case Symtab.lookup tab c of
wenzelm@27782
   281
        SOME (tip, lex) => tip andalso null cs orelse is_literal lex cs
wenzelm@27782
   282
      | NONE => false);
wenzelm@6116
   283
wenzelm@6116
   284
wenzelm@27782
   285
(* scan longest match *)
wenzelm@27782
   286
wenzelm@27782
   287
fun literal lexicon =
wenzelm@27765
   288
  let
wenzelm@27782
   289
    fun finish (SOME (res, rest)) = (rev res, rest)
wenzelm@27782
   290
      | finish NONE = raise FAIL NONE;
wenzelm@27782
   291
    fun scan _ res (Lexicon tab) [] = if Symtab.is_empty tab then finish res else raise MORE NONE
wenzelm@32791
   292
      | scan path res (Lexicon tab) (c :: cs) =
wenzelm@27782
   293
          (case Symtab.lookup tab (fst c) of
wenzelm@27782
   294
            SOME (tip, lex) =>
wenzelm@27782
   295
              let val path' = c :: path
wenzelm@27782
   296
              in scan path' (if tip then SOME (path', cs) else res) lex cs end
wenzelm@27784
   297
          | NONE => finish res);
wenzelm@27782
   298
  in scan [] NONE lexicon end;
wenzelm@7025
   299
wenzelm@6116
   300
wenzelm@27765
   301
(* build lexicons *)
wenzelm@6116
   302
wenzelm@27765
   303
fun extend_lexicon chrs lexicon =
wenzelm@27765
   304
  let
wenzelm@27782
   305
    fun ext [] lex = lex
wenzelm@32791
   306
      | ext (c :: cs) (Lexicon tab) =
wenzelm@27782
   307
          (case Symtab.lookup tab c of
wenzelm@27782
   308
            SOME (tip, lex) => Lexicon (Symtab.update (c, (tip orelse null cs, ext cs lex)) tab)
wenzelm@27782
   309
          | NONE => Lexicon (Symtab.update (c, (null cs, ext cs empty_lexicon)) tab));
wenzelm@27765
   310
  in if is_literal lexicon chrs then lexicon else ext chrs lexicon end;
wenzelm@27765
   311
wenzelm@27765
   312
fun make_lexicon chrss = fold extend_lexicon chrss empty_lexicon;
wenzelm@27765
   313
wenzelm@27765
   314
wenzelm@27765
   315
(* merge lexicons *)
wenzelm@27765
   316
wenzelm@27782
   317
fun dest path (Lexicon tab) = Symtab.fold (fn (d, (tip, lex)) =>
wenzelm@27765
   318
  let
wenzelm@27782
   319
    val path' = d :: path;
wenzelm@27782
   320
    val content = dest path' lex;
wenzelm@27782
   321
  in append (if tip then rev path' :: content else content) end) tab [];
wenzelm@6116
   322
wenzelm@27782
   323
val dest_lexicon = map implode o dest [];
wenzelm@27782
   324
fun merge_lexicons (lex1, lex2) = fold extend_lexicon (dest [] lex2) lex1;
wenzelm@6116
   325
wenzelm@6116
   326
end;
wenzelm@6116
   327
wenzelm@37216
   328
structure Basic_Scan: BASIC_SCAN = Scan;
wenzelm@37216
   329
open Basic_Scan;