src/Pure/General/scan.scala
changeset 34187 7b659c1561f1
parent 34164 0a0a19153626
child 34189 10c5871ec684
     1.1 --- a/src/Pure/General/scan.scala	Sun Dec 27 21:30:54 2009 +0100
     1.2 +++ b/src/Pure/General/scan.scala	Sun Dec 27 21:33:35 2009 +0100
     1.3 @@ -6,8 +6,13 @@
     1.4  
     1.5  package isabelle
     1.6  
     1.7 +
     1.8 +import scala.collection.immutable.PagedSeq
     1.9 +import scala.util.parsing.input.{OffsetPosition, Position => InputPosition, Reader}
    1.10  import scala.util.parsing.combinator.RegexParsers
    1.11  
    1.12 +import java.io.{File, InputStream, BufferedInputStream, FileInputStream}
    1.13 +
    1.14  
    1.15  object Scan
    1.16  {
    1.17 @@ -182,7 +187,7 @@
    1.18      def quoted_content(quote: String, source: String): String =
    1.19      {
    1.20        require(parseAll(quoted(quote), source).successful)
    1.21 -      source.substring(1, source.length - 1)  // FIXME proper escapes, recode utf8 (!?)
    1.22 +      source.substring(1, source.length - 1)  // FIXME proper escapes
    1.23      }
    1.24  
    1.25  
    1.26 @@ -284,4 +289,62 @@
    1.27        bad_input
    1.28      }
    1.29    }
    1.30 +
    1.31 +
    1.32 +
    1.33 +  /** read file without decoding -- enables efficient length operation **/
    1.34 +
    1.35 +  private class Restricted_Seq(seq: RandomAccessSeq[Char], start: Int, end: Int)
    1.36 +    extends CharSequence
    1.37 +  {
    1.38 +    def charAt(i: Int): Char =
    1.39 +      if (0 <= i && i < length) seq(start + i)
    1.40 +      else throw new IndexOutOfBoundsException
    1.41 +
    1.42 +    def length: Int = end - start  // avoid potentially expensive seq.length
    1.43 +
    1.44 +    def subSequence(i: Int, j: Int): CharSequence =
    1.45 +      if (0 <= i && i <= j && j <= length) new Restricted_Seq(seq, start + i, start + j)
    1.46 +      else throw new IndexOutOfBoundsException
    1.47 +
    1.48 +    override def toString: String =
    1.49 +    {
    1.50 +      val buf = new StringBuilder(length)
    1.51 +      for (offset <- start until end) buf.append(seq(offset))
    1.52 +      buf.toString
    1.53 +    }
    1.54 +  }
    1.55 +
    1.56 +  abstract class Byte_Reader extends Reader[Char] { def close: Unit }
    1.57 +
    1.58 +  def byte_reader(file: File): Byte_Reader =
    1.59 +  {
    1.60 +    val stream = new BufferedInputStream(new FileInputStream(file))
    1.61 +    val seq = new PagedSeq(
    1.62 +      (buf: Array[Char], offset: Int, length: Int) =>
    1.63 +        {
    1.64 +          var i = 0
    1.65 +          var c = 0
    1.66 +          var eof = false
    1.67 +          while (!eof && i < length) {
    1.68 +            c = stream.read
    1.69 +            if (c == -1) eof = true
    1.70 +            else { buf(offset + i) = c.toChar; i += 1 }
    1.71 +          }
    1.72 +          if (i > 0) i else -1
    1.73 +        })
    1.74 +    val restricted_seq = new Restricted_Seq(seq, 0, file.length.toInt)
    1.75 +
    1.76 +    class Paged_Reader(override val offset: Int) extends Byte_Reader
    1.77 +    {
    1.78 +      override lazy val source: CharSequence = restricted_seq
    1.79 +      def first: Char = if (seq.isDefinedAt(offset)) seq(offset) else '\032'
    1.80 +      def rest: Paged_Reader = if (seq.isDefinedAt(offset)) new Paged_Reader(offset + 1) else this
    1.81 +      def pos: InputPosition = new OffsetPosition(source, offset)
    1.82 +      def atEnd: Boolean = !seq.isDefinedAt(offset)
    1.83 +      override def drop(n: Int): Paged_Reader = new Paged_Reader(offset + n)
    1.84 +      def close { stream.close }
    1.85 +    }
    1.86 +    new Paged_Reader(0)
    1.87 +  }
    1.88  }