src/Pure/General/scan.scala
author wenzelm
Thu, 07 Jul 2011 14:10:50 +0200
changeset 44570 58bb7ca5c7a2
parent 44569 5130dfe1b7be
child 46123 6de58d947e57
permissions -rw-r--r--
explicit indication of type Symbol.Symbol;
wenzelm@31648
     1
/*  Title:      Pure/General/scan.scala
wenzelm@31648
     2
    Author:     Makarius
wenzelm@31648
     3
wenzelm@40769
     4
Efficient scanning of keywords and tokens.
wenzelm@31648
     5
*/
wenzelm@31648
     6
wenzelm@31648
     7
package isabelle
wenzelm@31648
     8
wenzelm@34187
     9
wenzelm@36035
    10
import scala.collection.generic.Addable
wenzelm@36717
    11
import scala.collection.IndexedSeq
wenzelm@34187
    12
import scala.collection.immutable.PagedSeq
wenzelm@34187
    13
import scala.util.parsing.input.{OffsetPosition, Position => InputPosition, Reader}
wenzelm@31648
    14
import scala.util.parsing.combinator.RegexParsers
wenzelm@31648
    15
wenzelm@34187
    16
import java.io.{File, InputStream, BufferedInputStream, FileInputStream}
wenzelm@34187
    17
wenzelm@31648
    18
wenzelm@31648
    19
object Scan
wenzelm@31648
    20
{
wenzelm@44285
    21
  /** context of partial scans **/
wenzelm@44285
    22
wenzelm@44285
    23
  sealed abstract class Context
wenzelm@44285
    24
  case object Finished extends Context
wenzelm@44285
    25
  case class Quoted(quote: String) extends Context
wenzelm@44285
    26
  case object Verbatim extends Context
wenzelm@44285
    27
  case class Comment(depth: Int) extends Context
wenzelm@44285
    28
wenzelm@44285
    29
wenzelm@44285
    30
wenzelm@31649
    31
  /** Lexicon -- position tree **/
wenzelm@31648
    32
wenzelm@31649
    33
  object Lexicon
wenzelm@31649
    34
  {
wenzelm@43631
    35
    protected case class Tree(val branches: Map[Char, (String, Tree)])
wenzelm@31649
    36
    private val empty_tree = Tree(Map())
wenzelm@31648
    37
wenzelm@31649
    38
    val empty: Lexicon = new Lexicon
wenzelm@31774
    39
    def apply(elems: String*): Lexicon = empty ++ elems
wenzelm@31648
    40
  }
wenzelm@31648
    41
wenzelm@36035
    42
  class Lexicon extends Addable[String, Lexicon] with RegexParsers
wenzelm@31648
    43
  {
wenzelm@31649
    44
    /* representation */
wenzelm@31649
    45
wenzelm@31649
    46
    import Lexicon.Tree
wenzelm@31776
    47
    protected val main_tree: Tree = Lexicon.empty_tree
wenzelm@31649
    48
wenzelm@31649
    49
wenzelm@31650
    50
    /* auxiliary operations */
wenzelm@31650
    51
wenzelm@31650
    52
    private def content(tree: Tree, result: List[String]): List[String] =
wenzelm@31650
    53
      (result /: tree.branches.toList) ((res, entry) =>
wenzelm@31650
    54
        entry match { case (_, (s, tr)) =>
wenzelm@31650
    55
          if (s.isEmpty) content(tr, res) else content(tr, s :: res) })
wenzelm@31650
    56
wenzelm@31650
    57
    private def lookup(str: CharSequence): Option[(Boolean, Tree)] =
wenzelm@31650
    58
    {
wenzelm@31650
    59
      val len = str.length
wenzelm@31650
    60
      def look(tree: Tree, tip: Boolean, i: Int): Option[(Boolean, Tree)] =
wenzelm@31650
    61
      {
wenzelm@31650
    62
        if (i < len) {
wenzelm@31650
    63
          tree.branches.get(str.charAt(i)) match {
wenzelm@31650
    64
            case Some((s, tr)) => look(tr, !s.isEmpty, i + 1)
wenzelm@31650
    65
            case None => None
wenzelm@31650
    66
          }
wenzelm@31650
    67
        } else Some(tip, tree)
wenzelm@31650
    68
      }
wenzelm@31650
    69
      look(main_tree, false, 0)
wenzelm@31650
    70
    }
wenzelm@31650
    71
wenzelm@31650
    72
    def completions(str: CharSequence): List[String] =
wenzelm@31776
    73
      lookup(str) match {
wenzelm@31650
    74
        case Some((true, tree)) => content(tree, List(str.toString))
wenzelm@31650
    75
        case Some((false, tree)) => content(tree, Nil)
wenzelm@31650
    76
        case None => Nil
wenzelm@31776
    77
      }
wenzelm@31650
    78
wenzelm@31650
    79
wenzelm@36035
    80
    /* pseudo Set methods */
wenzelm@31649
    81
wenzelm@36036
    82
    def iterator: Iterator[String] = content(main_tree, Nil).sortWith(_ <= _).iterator
wenzelm@31649
    83
wenzelm@36035
    84
    override def toString: String = iterator.mkString("Lexicon(", ", ", ")")
wenzelm@31649
    85
wenzelm@36035
    86
    def empty: Lexicon = Lexicon.empty
wenzelm@36035
    87
    def isEmpty: Boolean = main_tree.branches.isEmpty
wenzelm@31649
    88
wenzelm@31774
    89
    def contains(elem: String): Boolean =
wenzelm@31774
    90
      lookup(elem) match {
wenzelm@31650
    91
        case Some((tip, _)) => tip
wenzelm@31650
    92
        case _ => false
wenzelm@31650
    93
      }
wenzelm@31649
    94
wenzelm@36035
    95
wenzelm@36035
    96
    /* Addable methods */
wenzelm@36035
    97
wenzelm@36035
    98
    def repr = this
wenzelm@36035
    99
wenzelm@31774
   100
    def + (elem: String): Lexicon =
wenzelm@31774
   101
      if (contains(elem)) this
wenzelm@31774
   102
      else {
wenzelm@31774
   103
        val len = elem.length
wenzelm@31774
   104
        def extend(tree: Tree, i: Int): Tree =
wenzelm@31774
   105
          if (i < len) {
wenzelm@31774
   106
            val c = elem.charAt(i)
wenzelm@31774
   107
            val end = (i + 1 == len)
wenzelm@31774
   108
            tree.branches.get(c) match {
wenzelm@31774
   109
              case Some((s, tr)) =>
wenzelm@31774
   110
                Tree(tree.branches +
wenzelm@31774
   111
                  (c -> (if (end) elem else s, extend(tr, i + 1))))
wenzelm@31774
   112
              case None =>
wenzelm@31774
   113
                Tree(tree.branches +
wenzelm@31774
   114
                  (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1))))
wenzelm@31774
   115
            }
wenzelm@34144
   116
          }
wenzelm@34144
   117
          else tree
wenzelm@31774
   118
        val old = this
wenzelm@31774
   119
        new Lexicon { override val main_tree = extend(old.main_tree, 0) }
wenzelm@31648
   120
      }
wenzelm@31774
   121
wenzelm@31649
   122
wenzelm@34306
   123
wenzelm@34146
   124
    /** RegexParsers methods **/
wenzelm@31649
   125
wenzelm@31649
   126
    override val whiteSpace = "".r
wenzelm@31649
   127
wenzelm@34146
   128
wenzelm@44285
   129
    /* optional termination */
wenzelm@44285
   130
wenzelm@44285
   131
    def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
wenzelm@44285
   132
      p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
wenzelm@44285
   133
wenzelm@44285
   134
wenzelm@34146
   135
    /* keywords from lexicon */
wenzelm@34146
   136
wenzelm@34149
   137
    def keyword: Parser[String] = new Parser[String]
wenzelm@34144
   138
    {
wenzelm@31649
   139
      def apply(in: Input) =
wenzelm@31649
   140
      {
wenzelm@31649
   141
        val source = in.source
wenzelm@31649
   142
        val offset = in.offset
wenzelm@31649
   143
        val len = source.length - offset
wenzelm@31649
   144
wenzelm@34149
   145
        def scan(tree: Tree, result: String, i: Int): String =
wenzelm@31649
   146
        {
wenzelm@31649
   147
          if (i < len) {
wenzelm@31649
   148
            tree.branches.get(source.charAt(offset + i)) match {
wenzelm@34149
   149
              case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1)
wenzelm@34144
   150
              case None => result
wenzelm@31649
   151
            }
wenzelm@34144
   152
          }
wenzelm@34144
   153
          else result
wenzelm@31649
   154
        }
wenzelm@34149
   155
        val result = scan(main_tree, "", 0)
wenzelm@34149
   156
        if (result.isEmpty) Failure("keyword expected", in)
wenzelm@34149
   157
        else Success(result, in.drop(result.length))
wenzelm@31649
   158
      }
wenzelm@31649
   159
    }.named("keyword")
wenzelm@31649
   160
wenzelm@34146
   161
wenzelm@34164
   162
    /* symbol range */
wenzelm@34146
   163
wenzelm@44570
   164
    def symbol_range(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] =
wenzelm@34149
   165
      new Parser[String]
wenzelm@34144
   166
      {
wenzelm@34146
   167
        def apply(in: Input) =
wenzelm@34146
   168
        {
wenzelm@34146
   169
          val start = in.offset
wenzelm@34146
   170
          val end = in.source.length
wenzelm@34146
   171
          val matcher = new Symbol.Matcher(in.source)
wenzelm@34146
   172
wenzelm@34146
   173
          var i = start
wenzelm@34146
   174
          var count = 0
wenzelm@34146
   175
          var finished = false
wenzelm@34146
   176
          while (!finished) {
wenzelm@34146
   177
            if (i < end && count < max_count) {
wenzelm@34146
   178
              val n = matcher(i, end)
wenzelm@34146
   179
              val sym = in.source.subSequence(i, i + n).toString
wenzelm@34146
   180
              if (pred(sym)) { i += n; count += 1 }
wenzelm@34146
   181
              else finished = true
wenzelm@34146
   182
            }
wenzelm@34146
   183
            else finished = true
wenzelm@34146
   184
          }
wenzelm@34149
   185
          if (count < min_count) Failure("bad input", in)
wenzelm@34149
   186
          else Success(in.source.subSequence(start, i).toString, in.drop(i - start))
wenzelm@34146
   187
        }
wenzelm@34164
   188
      }.named("symbol_range")
wenzelm@34146
   189
wenzelm@44570
   190
    def one(pred: Symbol.Symbol => Boolean): Parser[String] =
wenzelm@44570
   191
      symbol_range(pred, 1, 1)
wenzelm@44570
   192
wenzelm@44570
   193
    def many(pred: Symbol.Symbol => Boolean): Parser[String] =
wenzelm@44570
   194
      symbol_range(pred, 0, Integer.MAX_VALUE)
wenzelm@44570
   195
wenzelm@44570
   196
    def many1(pred: Symbol.Symbol => Boolean): Parser[String] =
wenzelm@44570
   197
      symbol_range(pred, 1, Integer.MAX_VALUE)
wenzelm@34149
   198
wenzelm@34149
   199
wenzelm@34152
   200
    /* quoted strings */
wenzelm@34149
   201
wenzelm@44570
   202
    private def quoted_body(quote: Symbol.Symbol): Parser[String] =
wenzelm@44285
   203
    {
wenzelm@44285
   204
      rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
wenzelm@44285
   205
        (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
wenzelm@44285
   206
    }
wenzelm@44285
   207
wenzelm@44570
   208
    private def quoted(quote: Symbol.Symbol): Parser[String] =
wenzelm@34152
   209
    {
wenzelm@44285
   210
      quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34152
   211
    }.named("quoted")
wenzelm@34149
   212
wenzelm@44570
   213
    def quoted_content(quote: Symbol.Symbol, source: String): String =
wenzelm@34149
   214
    {
wenzelm@34149
   215
      require(parseAll(quoted(quote), source).successful)
wenzelm@34189
   216
      val body = source.substring(1, source.length - 1)
wenzelm@34189
   217
      if (body.exists(_ == '\\')) {
wenzelm@34189
   218
        val content =
wenzelm@40769
   219
          rep(many1(sym => sym != quote && sym != "\\") |
wenzelm@34189
   220
              "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
wenzelm@34189
   221
        parseAll(content ^^ (_.mkString), body).get
wenzelm@34189
   222
      }
wenzelm@34189
   223
      else body
wenzelm@34149
   224
    }
wenzelm@34149
   225
wenzelm@44570
   226
    def quoted_context(quote: Symbol.Symbol, ctxt: Context): Parser[(String, Context)] =
wenzelm@44285
   227
    {
wenzelm@44285
   228
      ctxt match {
wenzelm@44285
   229
        case Finished =>
wenzelm@44285
   230
          quote ~ quoted_body(quote) ~ opt_term(quote) ^^
wenzelm@44285
   231
            { case x ~ y ~ Some(z) => (x + y + z, Finished)
wenzelm@44285
   232
              case x ~ y ~ None => (x + y, Quoted(quote)) }
wenzelm@44285
   233
        case Quoted(q) if q == quote =>
wenzelm@44285
   234
          quoted_body(quote) ~ opt_term(quote) ^^
wenzelm@44285
   235
            { case x ~ Some(y) => (x + y, Finished)
wenzelm@44285
   236
              case x ~ None => (x, ctxt) }
wenzelm@44285
   237
        case _ => failure("")
wenzelm@44285
   238
      }
wenzelm@44285
   239
    }.named("quoted_context")
wenzelm@44285
   240
wenzelm@34149
   241
wenzelm@34152
   242
    /* verbatim text */
wenzelm@34152
   243
wenzelm@44285
   244
    private def verbatim_body: Parser[String] =
wenzelm@44285
   245
      rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
wenzelm@44285
   246
wenzelm@34152
   247
    private def verbatim: Parser[String] =
wenzelm@34152
   248
    {
wenzelm@44285
   249
      "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34152
   250
    }.named("verbatim")
wenzelm@34149
   251
wenzelm@34149
   252
    def verbatim_content(source: String): String =
wenzelm@34149
   253
    {
wenzelm@34149
   254
      require(parseAll(verbatim, source).successful)
wenzelm@34149
   255
      source.substring(2, source.length - 2)
wenzelm@34149
   256
    }
wenzelm@34149
   257
wenzelm@44285
   258
    def verbatim_context(ctxt: Context): Parser[(String, Context)] =
wenzelm@44285
   259
    {
wenzelm@44285
   260
      ctxt match {
wenzelm@44285
   261
        case Finished =>
wenzelm@44285
   262
          "{*" ~ verbatim_body ~ opt_term("*}") ^^
wenzelm@44285
   263
            { case x ~ y ~ Some(z) => (x + y + z, Finished)
wenzelm@44285
   264
              case x ~ y ~ None => (x + y, Verbatim) }
wenzelm@44285
   265
        case Verbatim =>
wenzelm@44285
   266
          verbatim_body ~ opt_term("*}") ^^
wenzelm@44285
   267
            { case x ~ Some(y) => (x + y, Finished)
wenzelm@44285
   268
              case x ~ None => (x, Verbatim) }
wenzelm@44285
   269
        case _ => failure("")
wenzelm@44285
   270
      }
wenzelm@44285
   271
    }.named("verbatim_context")
wenzelm@44285
   272
wenzelm@34149
   273
wenzelm@34152
   274
    /* nested comments */
wenzelm@34152
   275
wenzelm@44286
   276
    private def comment_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)]
wenzelm@34152
   277
    {
wenzelm@44286
   278
      require(depth >= 0)
wenzelm@44286
   279
wenzelm@34152
   280
      val comment_text =
wenzelm@43312
   281
        rep1(many1(sym => sym != "*" && sym != "(") | """\*(?!\))|\((?!\*)""".r)
wenzelm@34152
   282
wenzelm@34152
   283
      def apply(in: Input) =
wenzelm@34152
   284
      {
wenzelm@34152
   285
        var rest = in
wenzelm@43312
   286
        def try_parse[A](p: Parser[A]): Boolean =
wenzelm@34152
   287
        {
wenzelm@43312
   288
          parse(p ^^^ (), rest) match {
wenzelm@34152
   289
            case Success(_, next) => { rest = next; true }
wenzelm@34152
   290
            case _ => false
wenzelm@34152
   291
          }
wenzelm@34152
   292
        }
wenzelm@44286
   293
        var d = depth
wenzelm@34152
   294
        var finished = false
wenzelm@34152
   295
        while (!finished) {
wenzelm@44286
   296
          if (try_parse("(*")) d += 1
wenzelm@44286
   297
          else if (d > 0 && try_parse("*)")) d -= 1
wenzelm@44286
   298
          else if (d == 0 || !try_parse(comment_text)) finished = true
wenzelm@34152
   299
        }
wenzelm@44286
   300
        if (in.offset < rest.offset)
wenzelm@44286
   301
          Success((in.source.subSequence(in.offset, rest.offset).toString, d), rest)
wenzelm@34152
   302
        else Failure("comment expected", in)
wenzelm@34152
   303
      }
wenzelm@44286
   304
    }.named("comment_depth")
wenzelm@44286
   305
wenzelm@44286
   306
    def comment: Parser[String] =
wenzelm@44286
   307
      comment_depth(0) ^? { case (x, d) if d == 0 => x }
wenzelm@44286
   308
wenzelm@44286
   309
    def comment_context(ctxt: Context): Parser[(String, Context)] =
wenzelm@44286
   310
    {
wenzelm@44286
   311
      val depth =
wenzelm@44286
   312
        ctxt match {
wenzelm@44286
   313
          case Finished => 0
wenzelm@44286
   314
          case Comment(d) => d
wenzelm@44286
   315
          case _ => -1
wenzelm@44286
   316
        }
wenzelm@44286
   317
      if (depth >= 0)
wenzelm@44286
   318
        comment_depth(depth) ^^
wenzelm@44286
   319
          { case (x, 0) => (x, Finished)
wenzelm@44286
   320
            case (x, d) => (x, Comment(d)) }
wenzelm@44286
   321
      else failure("")
wenzelm@44286
   322
    }
wenzelm@34152
   323
wenzelm@34152
   324
    def comment_content(source: String): String =
wenzelm@34152
   325
    {
wenzelm@34152
   326
      require(parseAll(comment, source).successful)
wenzelm@34152
   327
      source.substring(2, source.length - 2)
wenzelm@34152
   328
    }
wenzelm@34152
   329
wenzelm@34152
   330
wenzelm@34149
   331
    /* outer syntax tokens */
wenzelm@34149
   332
wenzelm@44285
   333
    private def delimited_token: Parser[Token] =
wenzelm@44285
   334
    {
wenzelm@44285
   335
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
wenzelm@44285
   336
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
wenzelm@44285
   337
      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
wenzelm@44285
   338
      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
wenzelm@44285
   339
wenzelm@44285
   340
      string | (alt_string | (verb | cmt))
wenzelm@44285
   341
    }
wenzelm@44285
   342
wenzelm@44569
   343
    private def other_token(is_command: String => Boolean)
wenzelm@44285
   344
      : Parser[Token] =
wenzelm@34149
   345
    {
wenzelm@44569
   346
      val id = one(Symbol.is_letter) ~ many(Symbol.is_letdig) ^^ { case x ~ y => x + y }
wenzelm@44569
   347
      val nat = many1(Symbol.is_digit)
wenzelm@40536
   348
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34149
   349
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
wenzelm@34149
   350
wenzelm@34149
   351
      val ident = id ~ rep("." ~> id) ^^
wenzelm@36966
   352
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
wenzelm@36966
   353
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
wenzelm@34149
   354
wenzelm@36966
   355
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
wenzelm@36966
   356
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
wenzelm@36966
   357
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
wenzelm@36966
   358
      val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
wenzelm@40536
   359
      val float =
wenzelm@40536
   360
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
wenzelm@34149
   361
wenzelm@34149
   362
      val sym_ident =
wenzelm@44569
   363
        (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
wenzelm@36966
   364
        (x => Token(Token.Kind.SYM_IDENT, x))
wenzelm@34149
   365
wenzelm@44569
   366
      val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
wenzelm@34149
   367
wenzelm@44285
   368
      // FIXME check
wenzelm@44569
   369
      val junk = many(sym => !(Symbol.is_blank(sym)))
wenzelm@44569
   370
      val junk1 = many1(sym => !(Symbol.is_blank(sym)))
wenzelm@34149
   371
wenzelm@34272
   372
      val bad_delimiter =
wenzelm@38649
   373
        ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
wenzelm@44285
   374
      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
wenzelm@34149
   375
wenzelm@44285
   376
      val command_keyword =
wenzelm@44285
   377
        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
wenzelm@34149
   378
wenzelm@44285
   379
      space | (bad_delimiter |
wenzelm@44285
   380
        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
wenzelm@44285
   381
          command_keyword) | bad))
wenzelm@44285
   382
    }
wenzelm@34149
   383
wenzelm@44569
   384
    def token(is_command: String => Boolean): Parser[Token] =
wenzelm@44569
   385
      delimited_token | other_token(is_command)
wenzelm@44285
   386
wenzelm@44569
   387
    def token_context(is_command: String => Boolean, ctxt: Context): Parser[(Token, Context)] =
wenzelm@44285
   388
    {
wenzelm@44285
   389
      val string =
wenzelm@44285
   390
        quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
wenzelm@44285
   391
      val alt_string =
wenzelm@44285
   392
        quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
wenzelm@44286
   393
      val verb = verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
wenzelm@44286
   394
      val cmt = comment_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
wenzelm@44569
   395
      val other = other_token(is_command) ^^ { case x => (x, Finished) }
wenzelm@44285
   396
wenzelm@44286
   397
      string | (alt_string | (verb | (cmt | other)))
wenzelm@34149
   398
    }
wenzelm@31648
   399
  }
wenzelm@34187
   400
wenzelm@34187
   401
wenzelm@34187
   402
wenzelm@34187
   403
  /** read file without decoding -- enables efficient length operation **/
wenzelm@34187
   404
wenzelm@36717
   405
  private class Restricted_Seq(seq: IndexedSeq[Char], start: Int, end: Int)
wenzelm@34187
   406
    extends CharSequence
wenzelm@34187
   407
  {
wenzelm@34187
   408
    def charAt(i: Int): Char =
wenzelm@34187
   409
      if (0 <= i && i < length) seq(start + i)
wenzelm@34187
   410
      else throw new IndexOutOfBoundsException
wenzelm@34187
   411
wenzelm@34187
   412
    def length: Int = end - start  // avoid potentially expensive seq.length
wenzelm@34187
   413
wenzelm@34187
   414
    def subSequence(i: Int, j: Int): CharSequence =
wenzelm@34187
   415
      if (0 <= i && i <= j && j <= length) new Restricted_Seq(seq, start + i, start + j)
wenzelm@34187
   416
      else throw new IndexOutOfBoundsException
wenzelm@34187
   417
wenzelm@34187
   418
    override def toString: String =
wenzelm@34187
   419
    {
wenzelm@34187
   420
      val buf = new StringBuilder(length)
wenzelm@34187
   421
      for (offset <- start until end) buf.append(seq(offset))
wenzelm@34187
   422
      buf.toString
wenzelm@34187
   423
    }
wenzelm@34187
   424
  }
wenzelm@34187
   425
wenzelm@34187
   426
  abstract class Byte_Reader extends Reader[Char] { def close: Unit }
wenzelm@34187
   427
wenzelm@34187
   428
  def byte_reader(file: File): Byte_Reader =
wenzelm@34187
   429
  {
wenzelm@34187
   430
    val stream = new BufferedInputStream(new FileInputStream(file))
wenzelm@34187
   431
    val seq = new PagedSeq(
wenzelm@34187
   432
      (buf: Array[Char], offset: Int, length: Int) =>
wenzelm@34187
   433
        {
wenzelm@34187
   434
          var i = 0
wenzelm@34187
   435
          var c = 0
wenzelm@34187
   436
          var eof = false
wenzelm@34187
   437
          while (!eof && i < length) {
wenzelm@34187
   438
            c = stream.read
wenzelm@34187
   439
            if (c == -1) eof = true
wenzelm@34187
   440
            else { buf(offset + i) = c.toChar; i += 1 }
wenzelm@34187
   441
          }
wenzelm@34187
   442
          if (i > 0) i else -1
wenzelm@34187
   443
        })
wenzelm@34187
   444
    val restricted_seq = new Restricted_Seq(seq, 0, file.length.toInt)
wenzelm@34187
   445
wenzelm@34187
   446
    class Paged_Reader(override val offset: Int) extends Byte_Reader
wenzelm@34187
   447
    {
wenzelm@34187
   448
      override lazy val source: CharSequence = restricted_seq
wenzelm@34187
   449
      def first: Char = if (seq.isDefinedAt(offset)) seq(offset) else '\032'
wenzelm@34187
   450
      def rest: Paged_Reader = if (seq.isDefinedAt(offset)) new Paged_Reader(offset + 1) else this
wenzelm@34187
   451
      def pos: InputPosition = new OffsetPosition(source, offset)
wenzelm@34187
   452
      def atEnd: Boolean = !seq.isDefinedAt(offset)
wenzelm@34187
   453
      override def drop(n: Int): Paged_Reader = new Paged_Reader(offset + n)
wenzelm@34187
   454
      def close { stream.close }
wenzelm@34187
   455
    }
wenzelm@34187
   456
    new Paged_Reader(0)
wenzelm@34187
   457
  }
wenzelm@31648
   458
}