clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
1.1 --- a/src/Pure/General/scan.scala Mon Jan 11 20:50:03 2010 +0100
1.2 +++ b/src/Pure/General/scan.scala Mon Jan 11 21:37:48 2010 +0100
1.3 @@ -179,7 +179,7 @@
1.4 private def quoted(quote: String): Parser[String] =
1.5 {
1.6 quote ~
1.7 - rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
1.8 + rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
1.9 "\\" + quote | "\\\\" |
1.10 (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
1.11 quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
1.12 @@ -191,7 +191,7 @@
1.13 val body = source.substring(1, source.length - 1)
1.14 if (body.exists(_ == '\\')) {
1.15 val content =
1.16 - rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
1.17 + rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_wellformed(sym)) |
1.18 "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString }))
1.19 parseAll(content ^^ (_.mkString), body).get
1.20 }
1.21 @@ -203,7 +203,7 @@
1.22
1.23 private def verbatim: Parser[String] =
1.24 {
1.25 - "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
1.26 + "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_wellformed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
1.27 { case x ~ ys ~ z => x + ys.mkString + z }
1.28 }.named("verbatim")
1.29
1.30 @@ -219,7 +219,7 @@
1.31 def comment: Parser[String] = new Parser[String]
1.32 {
1.33 val comment_text =
1.34 - rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_closed(sym)) |
1.35 + rep(many1(sym => sym != "*" && sym != "(" && Symbol.is_wellformed(sym)) |
1.36 """\*(?!\))|\((?!\*)""".r)
1.37 val comment_open = "(*" ~ comment_text ^^^ ()
1.38 val comment_close = comment_text ~ "*)" ^^^ ()
1.39 @@ -276,7 +276,7 @@
1.40
1.41 val sym_ident =
1.42 (many1(symbols.is_symbolic_char) |
1.43 - one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^
1.44 + one(sym => symbols.is_symbolic(sym) & Symbol.is_wellformed(sym))) ^^
1.45 (x => Token(Token_Kind.SYM_IDENT, x))
1.46
1.47 val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x))
2.1 --- a/src/Pure/General/symbol.scala Mon Jan 11 20:50:03 2010 +0100
2.2 +++ b/src/Pure/General/symbol.scala Mon Jan 11 21:37:48 2010 +0100
2.3 @@ -23,6 +23,8 @@
2.4 \^? [A-Za-z][A-Za-z0-9_']* |
2.5 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
2.6
2.7 + // FIXME cover bad surrogates!?
2.8 + // FIXME check wrt. ML version
2.9 private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
2.10 """ \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
2.11
2.12 @@ -32,14 +34,10 @@
2.13
2.14 /* basic matching */
2.15
2.16 - def is_closed(c: Char): Boolean =
2.17 - !(c == '\\' || Character.isHighSurrogate(c))
2.18 + def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
2.19
2.20 - def is_closed(s: CharSequence): Boolean =
2.21 - {
2.22 - if (s.length == 1) is_closed(s.charAt(0))
2.23 - else !bad_symbol.pattern.matcher(s).matches
2.24 - }
2.25 + def is_wellformed(s: CharSequence): Boolean =
2.26 + s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
2.27
2.28 class Matcher(text: CharSequence)
2.29 {
2.30 @@ -47,7 +45,7 @@
2.31 def apply(start: Int, end: Int): Int =
2.32 {
2.33 require(0 <= start && start < end && end <= text.length)
2.34 - if (is_closed(text.charAt(start))) 1
2.35 + if (is_plain(text.charAt(start))) 1
2.36 else {
2.37 matcher.region(start, end).lookingAt
2.38 matcher.group.length
2.39 @@ -177,7 +175,7 @@
2.40 }
2.41 }
2.42 decl.split("\\s+").toList match {
2.43 - case sym :: props if sym.length > 1 && is_closed(sym) => (sym, read_props(props))
2.44 + case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
2.45 case _ => err()
2.46 }
2.47 }