1 /* Title: Pure/General/symbol.scala
4 Detecting and recoding Isabelle symbols.
11 import scala.io.Source
12 import scala.collection.jcl
13 import scala.util.matching.Regex
19 /** Symbol regexps **/
21 private val plain = new Regex("""(?xs)
22 [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
24 private val symbol = new Regex("""(?xs)
26 \^? [A-Za-z][A-Za-z0-9_']* |
27 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
29 private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
30 """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
33 val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
35 // prefix of another symbol
36 def is_open(s: String): Boolean =
39 len == 1 && Character.isHighSurrogate(s(0)) ||
42 len > 2 && s(len - 1) != '>'
48 private class Recoder(list: List[(String, String)])
50 private val (min, max) =
54 for ((x, _) <- list) {
63 val table = new jcl.HashMap[String, String] // reasonably efficient?
64 for ((x, y) <- list) table + (x -> y)
67 def recode(text: String): String =
70 val matcher = regex.pattern.matcher(text)
71 val result = new StringBuilder(len)
75 if (min <= c && c <= max) {
76 matcher.region(i, len)
79 result.append(table.get(x) getOrElse x)
82 else { result.append(c); i += 1 }
90 /** Symbol interpretation **/
92 class Interpretation(symbol_decls: Iterator[String])
96 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
97 private val key = new Regex("""(?xs) (.+): """)
99 private def read_decl(decl: String): (String, Map[String, String]) =
101 def err() = error("Bad symbol declaration: " + decl)
103 def read_props(props: List[String]): Map[String, String] =
107 case _ :: Nil => err()
108 case key(x) :: y :: rest => read_props(rest) + (x -> y)
112 decl.split("\\s+").toList match {
114 case sym :: props => (sym, read_props(props))
118 private val symbols: List[(String, Map[String, String])] =
119 for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
120 yield read_decl(decl)
123 /* main recoder methods */
125 private val (decoder, encoder) =
129 (sym, props) <- symbols
131 try { Integer.decode(props("code")).intValue }
133 case _: NoSuchElementException => error("Missing code for symbol " + sym)
134 case _: NumberFormatException => error("Bad code for symbol " + sym)
136 val ch = new String(Character.toChars(code))
138 (new Recoder(mapping ++ (for ((x, y) <- mapping) yield ("\\" + x, y))),
139 new Recoder(for ((x, y) <- mapping) yield (y, x)))
142 def decode(text: String) = decoder.recode(text)
143 def encode(text: String) = encoder.recode(text)