src/Pure/General/scan.scala
author wenzelm
Thu, 07 Jul 2011 13:48:30 +0200
changeset 44569 5130dfe1b7be
parent 44294 a26e514c92b2
child 44570 58bb7ca5c7a2
permissions -rw-r--r--
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
tuned implicit build/init messages;
wenzelm@31648
     1
/*  Title:      Pure/General/scan.scala
wenzelm@31648
     2
    Author:     Makarius
wenzelm@31648
     3
wenzelm@40769
     4
Efficient scanning of keywords and tokens.
wenzelm@31648
     5
*/
wenzelm@31648
     6
wenzelm@31648
     7
package isabelle
wenzelm@31648
     8
wenzelm@34187
     9
wenzelm@36035
    10
import scala.collection.generic.Addable
wenzelm@36717
    11
import scala.collection.IndexedSeq
wenzelm@34187
    12
import scala.collection.immutable.PagedSeq
wenzelm@34187
    13
import scala.util.parsing.input.{OffsetPosition, Position => InputPosition, Reader}
wenzelm@31648
    14
import scala.util.parsing.combinator.RegexParsers
wenzelm@31648
    15
wenzelm@34187
    16
import java.io.{File, InputStream, BufferedInputStream, FileInputStream}
wenzelm@34187
    17
wenzelm@31648
    18
wenzelm@31648
    19
object Scan
wenzelm@31648
    20
{
wenzelm@44285
    21
  /** context of partial scans **/
wenzelm@44285
    22
wenzelm@44285
    23
  sealed abstract class Context
wenzelm@44285
    24
  case object Finished extends Context
wenzelm@44285
    25
  case class Quoted(quote: String) extends Context
wenzelm@44285
    26
  case object Verbatim extends Context
wenzelm@44285
    27
  case class Comment(depth: Int) extends Context
wenzelm@44285
    28
wenzelm@44285
    29
wenzelm@44285
    30
wenzelm@31649
    31
  /** Lexicon -- position tree **/
wenzelm@31648
    32
wenzelm@31649
    33
  object Lexicon
wenzelm@31649
    34
  {
wenzelm@43631
    35
    protected case class Tree(val branches: Map[Char, (String, Tree)])
wenzelm@31649
    36
    private val empty_tree = Tree(Map())
wenzelm@31648
    37
wenzelm@31649
    38
    val empty: Lexicon = new Lexicon
wenzelm@31774
    39
    def apply(elems: String*): Lexicon = empty ++ elems
wenzelm@31648
    40
  }
wenzelm@31648
    41
wenzelm@36035
    42
  class Lexicon extends Addable[String, Lexicon] with RegexParsers
wenzelm@31648
    43
  {
wenzelm@31649
    44
    /* representation */
wenzelm@31649
    45
wenzelm@31649
    46
    import Lexicon.Tree
wenzelm@31776
    47
    protected val main_tree: Tree = Lexicon.empty_tree
wenzelm@31649
    48
wenzelm@31649
    49
wenzelm@31650
    50
    /* auxiliary operations */
wenzelm@31650
    51
wenzelm@31650
    52
    private def content(tree: Tree, result: List[String]): List[String] =
wenzelm@31650
    53
      (result /: tree.branches.toList) ((res, entry) =>
wenzelm@31650
    54
        entry match { case (_, (s, tr)) =>
wenzelm@31650
    55
          if (s.isEmpty) content(tr, res) else content(tr, s :: res) })
wenzelm@31650
    56
wenzelm@31650
    57
    private def lookup(str: CharSequence): Option[(Boolean, Tree)] =
wenzelm@31650
    58
    {
wenzelm@31650
    59
      val len = str.length
wenzelm@31650
    60
      def look(tree: Tree, tip: Boolean, i: Int): Option[(Boolean, Tree)] =
wenzelm@31650
    61
      {
wenzelm@31650
    62
        if (i < len) {
wenzelm@31650
    63
          tree.branches.get(str.charAt(i)) match {
wenzelm@31650
    64
            case Some((s, tr)) => look(tr, !s.isEmpty, i + 1)
wenzelm@31650
    65
            case None => None
wenzelm@31650
    66
          }
wenzelm@31650
    67
        } else Some(tip, tree)
wenzelm@31650
    68
      }
wenzelm@31650
    69
      look(main_tree, false, 0)
wenzelm@31650
    70
    }
wenzelm@31650
    71
wenzelm@31650
    72
    def completions(str: CharSequence): List[String] =
wenzelm@31776
    73
      lookup(str) match {
wenzelm@31650
    74
        case Some((true, tree)) => content(tree, List(str.toString))
wenzelm@31650
    75
        case Some((false, tree)) => content(tree, Nil)
wenzelm@31650
    76
        case None => Nil
wenzelm@31776
    77
      }
wenzelm@31650
    78
wenzelm@31650
    79
wenzelm@36035
    80
    /* pseudo Set methods */
wenzelm@31649
    81
wenzelm@36036
    82
    def iterator: Iterator[String] = content(main_tree, Nil).sortWith(_ <= _).iterator
wenzelm@31649
    83
wenzelm@36035
    84
    override def toString: String = iterator.mkString("Lexicon(", ", ", ")")
wenzelm@31649
    85
wenzelm@36035
    86
    def empty: Lexicon = Lexicon.empty
wenzelm@36035
    87
    def isEmpty: Boolean = main_tree.branches.isEmpty
wenzelm@31649
    88
wenzelm@31774
    89
    def contains(elem: String): Boolean =
wenzelm@31774
    90
      lookup(elem) match {
wenzelm@31650
    91
        case Some((tip, _)) => tip
wenzelm@31650
    92
        case _ => false
wenzelm@31650
    93
      }
wenzelm@31649
    94
wenzelm@36035
    95
wenzelm@36035
    96
    /* Addable methods */
wenzelm@36035
    97
wenzelm@36035
    98
    def repr = this
wenzelm@36035
    99
wenzelm@31774
   100
    def + (elem: String): Lexicon =
wenzelm@31774
   101
      if (contains(elem)) this
wenzelm@31774
   102
      else {
wenzelm@31774
   103
        val len = elem.length
wenzelm@31774
   104
        def extend(tree: Tree, i: Int): Tree =
wenzelm@31774
   105
          if (i < len) {
wenzelm@31774
   106
            val c = elem.charAt(i)
wenzelm@31774
   107
            val end = (i + 1 == len)
wenzelm@31774
   108
            tree.branches.get(c) match {
wenzelm@31774
   109
              case Some((s, tr)) =>
wenzelm@31774
   110
                Tree(tree.branches +
wenzelm@31774
   111
                  (c -> (if (end) elem else s, extend(tr, i + 1))))
wenzelm@31774
   112
              case None =>
wenzelm@31774
   113
                Tree(tree.branches +
wenzelm@31774
   114
                  (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1))))
wenzelm@31774
   115
            }
wenzelm@34144
   116
          }
wenzelm@34144
   117
          else tree
wenzelm@31774
   118
        val old = this
wenzelm@31774
   119
        new Lexicon { override val main_tree = extend(old.main_tree, 0) }
wenzelm@31648
   120
      }
wenzelm@31774
   121
wenzelm@31649
   122
wenzelm@34306
   123
wenzelm@34146
   124
    /** RegexParsers methods **/
wenzelm@31649
   125
wenzelm@31649
   126
    override val whiteSpace = "".r
wenzelm@31649
   127
wenzelm@34146
   128
wenzelm@44285
   129
    /* optional termination */
wenzelm@44285
   130
wenzelm@44285
   131
    def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
wenzelm@44285
   132
      p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
wenzelm@44285
   133
wenzelm@44285
   134
wenzelm@34146
   135
    /* keywords from lexicon */
wenzelm@34146
   136
wenzelm@34149
   137
    def keyword: Parser[String] = new Parser[String]
wenzelm@34144
   138
    {
wenzelm@31649
   139
      def apply(in: Input) =
wenzelm@31649
   140
      {
wenzelm@31649
   141
        val source = in.source
wenzelm@31649
   142
        val offset = in.offset
wenzelm@31649
   143
        val len = source.length - offset
wenzelm@31649
   144
wenzelm@34149
   145
        def scan(tree: Tree, result: String, i: Int): String =
wenzelm@31649
   146
        {
wenzelm@31649
   147
          if (i < len) {
wenzelm@31649
   148
            tree.branches.get(source.charAt(offset + i)) match {
wenzelm@34149
   149
              case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1)
wenzelm@34144
   150
              case None => result
wenzelm@31649
   151
            }
wenzelm@34144
   152
          }
wenzelm@34144
   153
          else result
wenzelm@31649
   154
        }
wenzelm@34149
   155
        val result = scan(main_tree, "", 0)
wenzelm@34149
   156
        if (result.isEmpty) Failure("keyword expected", in)
wenzelm@34149
   157
        else Success(result, in.drop(result.length))
wenzelm@31649
   158
      }
wenzelm@31649
   159
    }.named("keyword")
wenzelm@31649
   160
wenzelm@34146
   161
wenzelm@34164
   162
    /* symbol range */
wenzelm@34146
   163
wenzelm@34164
   164
    def symbol_range(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] =
wenzelm@34149
   165
      new Parser[String]
wenzelm@34144
   166
      {
wenzelm@34146
   167
        def apply(in: Input) =
wenzelm@34146
   168
        {
wenzelm@34146
   169
          val start = in.offset
wenzelm@34146
   170
          val end = in.source.length
wenzelm@34146
   171
          val matcher = new Symbol.Matcher(in.source)
wenzelm@34146
   172
wenzelm@34146
   173
          var i = start
wenzelm@34146
   174
          var count = 0
wenzelm@34146
   175
          var finished = false
wenzelm@34146
   176
          while (!finished) {
wenzelm@34146
   177
            if (i < end && count < max_count) {
wenzelm@34146
   178
              val n = matcher(i, end)
wenzelm@34146
   179
              val sym = in.source.subSequence(i, i + n).toString
wenzelm@34146
   180
              if (pred(sym)) { i += n; count += 1 }
wenzelm@34146
   181
              else finished = true
wenzelm@34146
   182
            }
wenzelm@34146
   183
            else finished = true
wenzelm@34146
   184
          }
wenzelm@34149
   185
          if (count < min_count) Failure("bad input", in)
wenzelm@34149
   186
          else Success(in.source.subSequence(start, i).toString, in.drop(i - start))
wenzelm@34146
   187
        }
wenzelm@34164
   188
      }.named("symbol_range")
wenzelm@34146
   189
wenzelm@34164
   190
    def one(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, 1)
wenzelm@34164
   191
    def many(pred: String => Boolean): Parser[String] = symbol_range(pred, 0, Integer.MAX_VALUE)
wenzelm@34164
   192
    def many1(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, Integer.MAX_VALUE)
wenzelm@34149
   193
wenzelm@34149
   194
wenzelm@34152
   195
    /* quoted strings */
wenzelm@34149
   196
wenzelm@44285
   197
    private def quoted_body(quote: String): Parser[String] =
wenzelm@44285
   198
    {
wenzelm@44285
   199
      rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
wenzelm@44285
   200
        (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
wenzelm@44285
   201
    }
wenzelm@44285
   202
wenzelm@34152
   203
    private def quoted(quote: String): Parser[String] =
wenzelm@34152
   204
    {
wenzelm@44285
   205
      quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34152
   206
    }.named("quoted")
wenzelm@34149
   207
wenzelm@34149
   208
    def quoted_content(quote: String, source: String): String =
wenzelm@34149
   209
    {
wenzelm@34149
   210
      require(parseAll(quoted(quote), source).successful)
wenzelm@34189
   211
      val body = source.substring(1, source.length - 1)
wenzelm@34189
   212
      if (body.exists(_ == '\\')) {
wenzelm@34189
   213
        val content =
wenzelm@40769
   214
          rep(many1(sym => sym != quote && sym != "\\") |
wenzelm@34189
   215
              "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
wenzelm@34189
   216
        parseAll(content ^^ (_.mkString), body).get
wenzelm@34189
   217
      }
wenzelm@34189
   218
      else body
wenzelm@34149
   219
    }
wenzelm@34149
   220
wenzelm@44285
   221
    def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] =
wenzelm@44285
   222
    {
wenzelm@44285
   223
      ctxt match {
wenzelm@44285
   224
        case Finished =>
wenzelm@44285
   225
          quote ~ quoted_body(quote) ~ opt_term(quote) ^^
wenzelm@44285
   226
            { case x ~ y ~ Some(z) => (x + y + z, Finished)
wenzelm@44285
   227
              case x ~ y ~ None => (x + y, Quoted(quote)) }
wenzelm@44285
   228
        case Quoted(q) if q == quote =>
wenzelm@44285
   229
          quoted_body(quote) ~ opt_term(quote) ^^
wenzelm@44285
   230
            { case x ~ Some(y) => (x + y, Finished)
wenzelm@44285
   231
              case x ~ None => (x, ctxt) }
wenzelm@44285
   232
        case _ => failure("")
wenzelm@44285
   233
      }
wenzelm@44285
   234
    }.named("quoted_context")
wenzelm@44285
   235
wenzelm@34149
   236
wenzelm@34152
   237
    /* verbatim text */
wenzelm@34152
   238
wenzelm@44285
   239
    private def verbatim_body: Parser[String] =
wenzelm@44285
   240
      rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
wenzelm@44285
   241
wenzelm@34152
   242
    private def verbatim: Parser[String] =
wenzelm@34152
   243
    {
wenzelm@44285
   244
      "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34152
   245
    }.named("verbatim")
wenzelm@34149
   246
wenzelm@34149
   247
    def verbatim_content(source: String): String =
wenzelm@34149
   248
    {
wenzelm@34149
   249
      require(parseAll(verbatim, source).successful)
wenzelm@34149
   250
      source.substring(2, source.length - 2)
wenzelm@34149
   251
    }
wenzelm@34149
   252
wenzelm@44285
   253
    def verbatim_context(ctxt: Context): Parser[(String, Context)] =
wenzelm@44285
   254
    {
wenzelm@44285
   255
      ctxt match {
wenzelm@44285
   256
        case Finished =>
wenzelm@44285
   257
          "{*" ~ verbatim_body ~ opt_term("*}") ^^
wenzelm@44285
   258
            { case x ~ y ~ Some(z) => (x + y + z, Finished)
wenzelm@44285
   259
              case x ~ y ~ None => (x + y, Verbatim) }
wenzelm@44285
   260
        case Verbatim =>
wenzelm@44285
   261
          verbatim_body ~ opt_term("*}") ^^
wenzelm@44285
   262
            { case x ~ Some(y) => (x + y, Finished)
wenzelm@44285
   263
              case x ~ None => (x, Verbatim) }
wenzelm@44285
   264
        case _ => failure("")
wenzelm@44285
   265
      }
wenzelm@44285
   266
    }.named("verbatim_context")
wenzelm@44285
   267
wenzelm@34149
   268
wenzelm@34152
   269
    /* nested comments */
wenzelm@34152
   270
wenzelm@44286
   271
    private def comment_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)]
wenzelm@34152
   272
    {
wenzelm@44286
   273
      require(depth >= 0)
wenzelm@44286
   274
wenzelm@34152
   275
      val comment_text =
wenzelm@43312
   276
        rep1(many1(sym => sym != "*" && sym != "(") | """\*(?!\))|\((?!\*)""".r)
wenzelm@34152
   277
wenzelm@34152
   278
      def apply(in: Input) =
wenzelm@34152
   279
      {
wenzelm@34152
   280
        var rest = in
wenzelm@43312
   281
        def try_parse[A](p: Parser[A]): Boolean =
wenzelm@34152
   282
        {
wenzelm@43312
   283
          parse(p ^^^ (), rest) match {
wenzelm@34152
   284
            case Success(_, next) => { rest = next; true }
wenzelm@34152
   285
            case _ => false
wenzelm@34152
   286
          }
wenzelm@34152
   287
        }
wenzelm@44286
   288
        var d = depth
wenzelm@34152
   289
        var finished = false
wenzelm@34152
   290
        while (!finished) {
wenzelm@44286
   291
          if (try_parse("(*")) d += 1
wenzelm@44286
   292
          else if (d > 0 && try_parse("*)")) d -= 1
wenzelm@44286
   293
          else if (d == 0 || !try_parse(comment_text)) finished = true
wenzelm@34152
   294
        }
wenzelm@44286
   295
        if (in.offset < rest.offset)
wenzelm@44286
   296
          Success((in.source.subSequence(in.offset, rest.offset).toString, d), rest)
wenzelm@34152
   297
        else Failure("comment expected", in)
wenzelm@34152
   298
      }
wenzelm@44286
   299
    }.named("comment_depth")
wenzelm@44286
   300
wenzelm@44286
   301
    def comment: Parser[String] =
wenzelm@44286
   302
      comment_depth(0) ^? { case (x, d) if d == 0 => x }
wenzelm@44286
   303
wenzelm@44286
   304
    def comment_context(ctxt: Context): Parser[(String, Context)] =
wenzelm@44286
   305
    {
wenzelm@44286
   306
      val depth =
wenzelm@44286
   307
        ctxt match {
wenzelm@44286
   308
          case Finished => 0
wenzelm@44286
   309
          case Comment(d) => d
wenzelm@44286
   310
          case _ => -1
wenzelm@44286
   311
        }
wenzelm@44286
   312
      if (depth >= 0)
wenzelm@44286
   313
        comment_depth(depth) ^^
wenzelm@44286
   314
          { case (x, 0) => (x, Finished)
wenzelm@44286
   315
            case (x, d) => (x, Comment(d)) }
wenzelm@44286
   316
      else failure("")
wenzelm@44286
   317
    }
wenzelm@34152
   318
wenzelm@34152
   319
    def comment_content(source: String): String =
wenzelm@34152
   320
    {
wenzelm@34152
   321
      require(parseAll(comment, source).successful)
wenzelm@34152
   322
      source.substring(2, source.length - 2)
wenzelm@34152
   323
    }
wenzelm@34152
   324
wenzelm@34152
   325
wenzelm@34149
   326
    /* outer syntax tokens */
wenzelm@34149
   327
wenzelm@44285
   328
    private def delimited_token: Parser[Token] =
wenzelm@44285
   329
    {
wenzelm@44285
   330
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
wenzelm@44285
   331
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
wenzelm@44285
   332
      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
wenzelm@44285
   333
      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
wenzelm@44285
   334
wenzelm@44285
   335
      string | (alt_string | (verb | cmt))
wenzelm@44285
   336
    }
wenzelm@44285
   337
wenzelm@44569
   338
    private def other_token(is_command: String => Boolean)
wenzelm@44285
   339
      : Parser[Token] =
wenzelm@34149
   340
    {
wenzelm@44569
   341
      val id = one(Symbol.is_letter) ~ many(Symbol.is_letdig) ^^ { case x ~ y => x + y }
wenzelm@44569
   342
      val nat = many1(Symbol.is_digit)
wenzelm@40536
   343
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
wenzelm@34149
   344
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
wenzelm@34149
   345
wenzelm@34149
   346
      val ident = id ~ rep("." ~> id) ^^
wenzelm@36966
   347
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
wenzelm@36966
   348
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
wenzelm@34149
   349
wenzelm@36966
   350
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
wenzelm@36966
   351
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
wenzelm@36966
   352
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
wenzelm@36966
   353
      val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
wenzelm@40536
   354
      val float =
wenzelm@40536
   355
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
wenzelm@34149
   356
wenzelm@34149
   357
      val sym_ident =
wenzelm@44569
   358
        (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
wenzelm@36966
   359
        (x => Token(Token.Kind.SYM_IDENT, x))
wenzelm@34149
   360
wenzelm@44569
   361
      val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
wenzelm@34149
   362
wenzelm@44285
   363
      // FIXME check
wenzelm@44569
   364
      val junk = many(sym => !(Symbol.is_blank(sym)))
wenzelm@44569
   365
      val junk1 = many1(sym => !(Symbol.is_blank(sym)))
wenzelm@34149
   366
wenzelm@34272
   367
      val bad_delimiter =
wenzelm@38649
   368
        ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
wenzelm@44285
   369
      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
wenzelm@34149
   370
wenzelm@44285
   371
      val command_keyword =
wenzelm@44285
   372
        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
wenzelm@34149
   373
wenzelm@44285
   374
      space | (bad_delimiter |
wenzelm@44285
   375
        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
wenzelm@44285
   376
          command_keyword) | bad))
wenzelm@44285
   377
    }
wenzelm@34149
   378
wenzelm@44569
   379
    def token(is_command: String => Boolean): Parser[Token] =
wenzelm@44569
   380
      delimited_token | other_token(is_command)
wenzelm@44285
   381
wenzelm@44569
   382
    def token_context(is_command: String => Boolean, ctxt: Context): Parser[(Token, Context)] =
wenzelm@44285
   383
    {
wenzelm@44285
   384
      val string =
wenzelm@44285
   385
        quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
wenzelm@44285
   386
      val alt_string =
wenzelm@44285
   387
        quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
wenzelm@44286
   388
      val verb = verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
wenzelm@44286
   389
      val cmt = comment_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
wenzelm@44569
   390
      val other = other_token(is_command) ^^ { case x => (x, Finished) }
wenzelm@44285
   391
wenzelm@44286
   392
      string | (alt_string | (verb | (cmt | other)))
wenzelm@34149
   393
    }
wenzelm@31648
   394
  }
wenzelm@34187
   395
wenzelm@34187
   396
wenzelm@34187
   397
wenzelm@34187
   398
  /** read file without decoding -- enables efficient length operation **/
wenzelm@34187
   399
wenzelm@36717
   400
  private class Restricted_Seq(seq: IndexedSeq[Char], start: Int, end: Int)
wenzelm@34187
   401
    extends CharSequence
wenzelm@34187
   402
  {
wenzelm@34187
   403
    def charAt(i: Int): Char =
wenzelm@34187
   404
      if (0 <= i && i < length) seq(start + i)
wenzelm@34187
   405
      else throw new IndexOutOfBoundsException
wenzelm@34187
   406
wenzelm@34187
   407
    def length: Int = end - start  // avoid potentially expensive seq.length
wenzelm@34187
   408
wenzelm@34187
   409
    def subSequence(i: Int, j: Int): CharSequence =
wenzelm@34187
   410
      if (0 <= i && i <= j && j <= length) new Restricted_Seq(seq, start + i, start + j)
wenzelm@34187
   411
      else throw new IndexOutOfBoundsException
wenzelm@34187
   412
wenzelm@34187
   413
    override def toString: String =
wenzelm@34187
   414
    {
wenzelm@34187
   415
      val buf = new StringBuilder(length)
wenzelm@34187
   416
      for (offset <- start until end) buf.append(seq(offset))
wenzelm@34187
   417
      buf.toString
wenzelm@34187
   418
    }
wenzelm@34187
   419
  }
wenzelm@34187
   420
wenzelm@34187
   421
  abstract class Byte_Reader extends Reader[Char] { def close: Unit }
wenzelm@34187
   422
wenzelm@34187
   423
  def byte_reader(file: File): Byte_Reader =
wenzelm@34187
   424
  {
wenzelm@34187
   425
    val stream = new BufferedInputStream(new FileInputStream(file))
wenzelm@34187
   426
    val seq = new PagedSeq(
wenzelm@34187
   427
      (buf: Array[Char], offset: Int, length: Int) =>
wenzelm@34187
   428
        {
wenzelm@34187
   429
          var i = 0
wenzelm@34187
   430
          var c = 0
wenzelm@34187
   431
          var eof = false
wenzelm@34187
   432
          while (!eof && i < length) {
wenzelm@34187
   433
            c = stream.read
wenzelm@34187
   434
            if (c == -1) eof = true
wenzelm@34187
   435
            else { buf(offset + i) = c.toChar; i += 1 }
wenzelm@34187
   436
          }
wenzelm@34187
   437
          if (i > 0) i else -1
wenzelm@34187
   438
        })
wenzelm@34187
   439
    val restricted_seq = new Restricted_Seq(seq, 0, file.length.toInt)
wenzelm@34187
   440
wenzelm@34187
   441
    class Paged_Reader(override val offset: Int) extends Byte_Reader
wenzelm@34187
   442
    {
wenzelm@34187
   443
      override lazy val source: CharSequence = restricted_seq
wenzelm@34187
   444
      def first: Char = if (seq.isDefinedAt(offset)) seq(offset) else '\032'
wenzelm@34187
   445
      def rest: Paged_Reader = if (seq.isDefinedAt(offset)) new Paged_Reader(offset + 1) else this
wenzelm@34187
   446
      def pos: InputPosition = new OffsetPosition(source, offset)
wenzelm@34187
   447
      def atEnd: Boolean = !seq.isDefinedAt(offset)
wenzelm@34187
   448
      override def drop(n: Int): Paged_Reader = new Paged_Reader(offset + n)
wenzelm@34187
   449
      def close { stream.close }
wenzelm@34187
   450
    }
wenzelm@34187
   451
    new Paged_Reader(0)
wenzelm@34187
   452
  }
wenzelm@31648
   453
}