clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
authorwenzelm
Mon, 11 Jan 2010 21:37:48 +0100
changeset 34319f879b649ac4c
parent 34318 c47a2228fead
child 34320 c1509b9d624f
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
src/Pure/General/scan.scala
src/Pure/General/symbol.scala
     1.1 --- a/src/Pure/General/scan.scala	Mon Jan 11 20:50:03 2010 +0100
     1.2 +++ b/src/Pure/General/scan.scala	Mon Jan 11 21:37:48 2010 +0100
     1.3 @@ -179,7 +179,7 @@
     1.4      private def quoted(quote: String): Parser[String] =
     1.5      {
     1.6        quote ~
     1.7 -        rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
     1.8 +        rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
     1.9            "\\" + quote | "\\\\" |
    1.10            (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
    1.11        quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
    1.12 @@ -191,7 +191,7 @@
    1.13        val body = source.substring(1, source.length - 1)
    1.14        if (body.exists(_ == '\\')) {
    1.15          val content =
    1.16 -          rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
    1.17 +          rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
    1.18                "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
    1.19          parseAll(content ^^ (_.mkString), body).get
    1.20        }
    1.21 @@ -203,7 +203,7 @@
    1.22  
    1.23      private def verbatim: Parser[String] =
    1.24      {
    1.25 -      "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
    1.26 +      "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_wellformed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
    1.27          { case x ~ ys ~ z => x + ys.mkString + z }
    1.28      }.named("verbatim")
    1.29  
    1.30 @@ -219,7 +219,7 @@
    1.31      def comment: Parser[String] = new Parser[String]
    1.32      {
    1.33        val comment_text =
    1.34 -        rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym)) |
    1.35 +        rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_wellformed(sym)) |
    1.36            """\*(?!\))|\((?!\*)""".r)
    1.37        val comment_open = "(*" ~ comment_text ^^^ ()
    1.38        val comment_close = comment_text ~ "*)" ^^^ ()
    1.39 @@ -276,7 +276,7 @@
    1.40  
    1.41        val sym_ident =
    1.42          (many1(symbols.is_symbolic_char) |
    1.43 -          one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^
    1.44 +          one(sym => symbols.is_symbolic(sym) & Symbol.is_wellformed(sym))) ^^
    1.45          (x => Token(Token_Kind.SYM_IDENT, x))
    1.46  
    1.47        val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x))
     2.1 --- a/src/Pure/General/symbol.scala	Mon Jan 11 20:50:03 2010 +0100
     2.2 +++ b/src/Pure/General/symbol.scala	Mon Jan 11 21:37:48 2010 +0100
     2.3 @@ -23,6 +23,8 @@
     2.4        \^? [A-Za-z][A-Za-z0-9_']* |
     2.5        \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
     2.6  
     2.7 +  // FIXME cover bad surrogates!?
     2.8 +  // FIXME check wrt. ML version
     2.9    private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
    2.10      """ \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
    2.11  
    2.12 @@ -32,14 +34,10 @@
    2.13  
    2.14    /* basic matching */
    2.15  
    2.16 -  def is_closed(c: Char): Boolean =
    2.17 -    !(c == '\\' || Character.isHighSurrogate(c))
    2.18 +  def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
    2.19  
    2.20 -  def is_closed(s: CharSequence): Boolean =
    2.21 -  {
    2.22 -    if (s.length == 1) is_closed(s.charAt(0))
    2.23 -    else !bad_symbol.pattern.matcher(s).matches
    2.24 -  }
    2.25 +  def is_wellformed(s: CharSequence): Boolean =
    2.26 +    s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
    2.27  
    2.28    class Matcher(text: CharSequence)
    2.29    {
    2.30 @@ -47,7 +45,7 @@
    2.31      def apply(start: Int, end: Int): Int =
    2.32      {
    2.33        require(0 <= start && start < end && end <= text.length)
    2.34 -      if (is_closed(text.charAt(start))) 1
    2.35 +      if (is_plain(text.charAt(start))) 1
    2.36        else {
    2.37          matcher.region(start, end).lookingAt
    2.38          matcher.group.length
    2.39 @@ -177,7 +175,7 @@
    2.40          }
    2.41        }
    2.42        decl.split("\\s+").toList match {
    2.43 -        case sym :: props if sym.length > 1 && is_closed(sym) => (sym, read_props(props))
    2.44 +        case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
    2.45          case _ => err()
    2.46        }
    2.47      }