1.1 --- a/src/Pure/General/symbol.scala Tue Jun 09 20:18:21 2009 +0200
1.2 +++ b/src/Pure/General/symbol.scala Tue Jun 09 20:29:23 2009 +0200
1.3 @@ -6,37 +6,49 @@
1.4
1.5 package isabelle
1.6
1.7 -import java.util.regex.Pattern
1.8 import java.io.File
1.9 +
1.10 import scala.io.Source
1.11 -import scala.collection.jcl.HashMap
1.12 +import scala.collection.jcl
1.13 +import scala.util.matching.Regex
1.14
1.15
1.16 -object Symbol {
1.17 +object Symbol
1.18 +{
1.19
1.20 /** Symbol regexps **/
1.21
1.22 - private def compile(s: String) =
1.23 - Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
1.24 + private val plain = new Regex("""(?xs)
1.25 + [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
1.26
1.27 - private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
1.28 -
1.29 - private val symbol_pattern = compile(""" \\ \\? < (?:
1.30 + private val symbol = new Regex("""(?xs)
1.31 + \\ \\? < (?:
1.32 \^? [A-Za-z][A-Za-z0-9_']* |
1.33 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
1.34
1.35 - private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
1.36 + private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
1.37 """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
1.38
1.39 // total pattern
1.40 - val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .")
1.41 + val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
1.42
1.43 + // prefix of another symbol
1.44 + def is_open(s: String): Boolean =
1.45 + {
1.46 + val len = s.length
1.47 + len == 1 && Character.isHighSurrogate(s(0)) ||
1.48 + s == "\\" ||
1.49 + s == "\\<" ||
1.50 + len > 2 && s(len - 1) != '>'
1.51 + }
1.52
1.53
1.54 /** Recoding **/
1.55
1.56 - private class Recoder(list: List[(String, String)]) {
1.57 - private val (min, max) = {
1.58 + private class Recoder(list: List[(String, String)])
1.59 + {
1.60 + private val (min, max) =
1.61 + {
1.62 var min = '\uffff'
1.63 var max = '\u0000'
1.64 for ((x, _) <- list) {
1.65 @@ -46,14 +58,16 @@
1.66 }
1.67 (min, max)
1.68 }
1.69 - private val table = {
1.70 - val table = new HashMap[String, String]
1.71 + private val table =
1.72 + {
1.73 + val table = new jcl.HashMap[String, String] // reasonably efficient?
1.74 for ((x, y) <- list) table + (x -> y)
1.75 table
1.76 }
1.77 - def recode(text: String) = {
1.78 + def recode(text: String): String =
1.79 + {
1.80 val len = text.length
1.81 - val matcher = pattern.matcher(text)
1.82 + val matcher = regex.pattern.matcher(text)
1.83 val result = new StringBuilder(len)
1.84 var i = 0
1.85 while (i < len) {
1.86 @@ -62,10 +76,7 @@
1.87 matcher.region(i, len)
1.88 matcher.lookingAt
1.89 val x = matcher.group
1.90 - table.get(x) match {
1.91 - case Some(y) => result.append(y)
1.92 - case None => result.append(x)
1.93 - }
1.94 + result.append(table.get(x) getOrElse x)
1.95 i = matcher.end
1.96 }
1.97 else { result.append(c); i += 1 }
1.98 @@ -80,75 +91,56 @@
1.99
1.100 class Interpretation(symbol_decls: Iterator[String])
1.101 {
1.102 - private val symbols = new HashMap[String, HashMap[String, String]]
1.103 - private var decoder: Recoder = null
1.104 - private var encoder: Recoder = null
1.105 + /* read symbols */
1.106 +
1.107 + private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
1.108 + private val key = new Regex("""(?xs) (.+): """)
1.109 +
1.110 + private def read_decl(decl: String): (String, Map[String, String]) =
1.111 + {
1.112 + def err() = error("Bad symbol declaration: " + decl)
1.113 +
1.114 + def read_props(props: List[String]): Map[String, String] =
1.115 + {
1.116 + props match {
1.117 + case Nil => Map()
1.118 + case _ :: Nil => err()
1.119 + case key(x) :: y :: rest => read_props(rest) + (x -> y)
1.120 + case _ => err()
1.121 + }
1.122 + }
1.123 + decl.split("\\s+").toList match {
1.124 + case Nil => err()
1.125 + case sym :: props => (sym, read_props(props))
1.126 + }
1.127 + }
1.128 +
1.129 + private val symbols: List[(String, Map[String, String])] =
1.130 + for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
1.131 + yield read_decl(decl)
1.132 +
1.133 +
1.134 + /* main recoder methods */
1.135 +
1.136 + private val (decoder, encoder) =
1.137 + {
1.138 + val mapping =
1.139 + for {
1.140 + (sym, props) <- symbols
1.141 + val code =
1.142 + try { Integer.decode(props("code")).intValue }
1.143 + catch {
1.144 + case _: NoSuchElementException => error("Missing code for symbol " + sym)
1.145 + case _: NumberFormatException => error("Bad code for symbol " + sym)
1.146 + }
1.147 + val ch = new String(Character.toChars(code))
1.148 + } yield (sym, ch)
1.149 + (new Recoder(mapping ++ (for ((x, y) <- mapping) yield ("\\" + x, y))),
1.150 + new Recoder(for ((x, y) <- mapping) yield (y, x)))
1.151 + }
1.152
1.153 def decode(text: String) = decoder.recode(text)
1.154 def encode(text: String) = encoder.recode(text)
1.155
1.156 -
1.157 - /* read symbols */
1.158 -
1.159 - private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
1.160 - private val blank_pattern = compile(""" \s+ """)
1.161 - private val key_pattern = compile(""" (.+): """)
1.162 -
1.163 - private def read_decl(decl: String) = {
1.164 - def err() = error("Bad symbol declaration: " + decl)
1.165 -
1.166 - def read_props(props: List[String], tab: HashMap[String, String])
1.167 - {
1.168 - props match {
1.169 - case Nil => ()
1.170 - case _ :: Nil => err()
1.171 - case key :: value :: rest => {
1.172 - val key_matcher = key_pattern.matcher(key)
1.173 - if (key_matcher.matches) {
1.174 - tab + (key_matcher.group(1) -> value)
1.175 - read_props(rest, tab)
1.176 - }
1.177 - else err ()
1.178 - }
1.179 - }
1.180 - }
1.181 -
1.182 - if (!empty_pattern.matcher(decl).matches) {
1.183 - blank_pattern.split(decl).toList match {
1.184 - case Nil => err()
1.185 - case symbol :: props => {
1.186 - val tab = new HashMap[String, String]
1.187 - read_props(props, tab)
1.188 - symbols + (symbol -> tab)
1.189 - }
1.190 - }
1.191 - }
1.192 - }
1.193 -
1.194 -
1.195 - /* init tables */
1.196 -
1.197 - private def get_code(entry: (String, HashMap[String, String])) = {
1.198 - val (symbol, props) = entry
1.199 - val code =
1.200 - try { Integer.decode(props("code")).intValue }
1.201 - catch {
1.202 - case _: NoSuchElementException => error("Missing code for symbol " + symbol)
1.203 - case _: NumberFormatException => error("Bad code for symbol " + symbol)
1.204 - }
1.205 - (symbol, new String(Character.toChars(code)))
1.206 - }
1.207 -
1.208 - private def init_recoders() = {
1.209 - val list = symbols.elements.toList.map(get_code)
1.210 - decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
1.211 - encoder = new Recoder(for ((x, y) <- list) yield (y, x))
1.212 - }
1.213 -
1.214 -
1.215 - /* constructor */
1.216 -
1.217 - symbol_decls.foreach(read_decl)
1.218 - init_recoders()
1.219 }
1.220 }