1 /* Title: Pure/General/symbol.scala
4 Detecting and recoding Isabelle symbols.
10 import scala.collection.{jcl, mutable}
11 import scala.util.matching.Regex
18 private val plain = new Regex("""(?xs)
19 [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
21 private val symbol = new Regex("""(?xs)
23 \^? [A-Za-z][A-Za-z0-9_']* |
24 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
26 private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
27 """ \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
30 val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
32 // prefix of another symbol
33 def is_open(s: CharSequence): Boolean =
36 len == 1 && Character.isHighSurrogate(s.charAt(0)) ||
39 len > 2 && s.charAt(len - 1) != '>'
45 private def could_open(c: Char): Boolean =
46 c == '\\' || Character.isHighSurrogate(c)
48 def elements(text: CharSequence) = new Iterator[String] {
49 private val matcher = regex.pattern.matcher(text)
51 def hasNext = i < text.length
54 if (could_open(text.charAt(i))) {
55 matcher.region(i, text.length).lookingAt
59 val s = text.subSequence(i, i + len)
66 /* decoding offsets */
68 class Index(text: CharSequence)
70 case class Entry(chr: Int, sym: Int)
71 val index: Array[Entry] =
73 val matcher = regex.pattern.matcher(text)
74 val buf = new mutable.ArrayBuffer[Entry]
77 while (chr < text.length) {
79 if (could_open(text.charAt(chr))) {
80 matcher.region(chr, text.length).lookingAt
86 if (len > 1) buf += Entry(chr, sym)
90 def decode(sym: Int): Int =
92 val end = index.length
93 def bisect(a: Int, b: Int): Int =
97 if (sym < index(c).sym) bisect(a, c)
98 else if (c + 1 == end || sym < index(c + 1).sym) c
103 val i = bisect(0, end)
105 else index(i).chr + sym - index(i).sym
112 private class Recoder(list: List[(String, String)])
114 private val (min, max) =
118 for ((x, _) <- list) {
127 val table = new jcl.HashMap[String, String] // reasonably efficient?
128 for ((x, y) <- list) table + (x -> y)
131 def recode(text: String): String =
133 val len = text.length
134 val matcher = regex.pattern.matcher(text)
135 val result = new StringBuilder(len)
139 if (min <= c && c <= max) {
140 matcher.region(i, len).lookingAt
141 val x = matcher.group
142 result.append(table.get(x) getOrElse x)
145 else { result.append(c); i += 1 }
153 /** Symbol interpretation **/
155 class Interpretation(symbol_decls: Iterator[String])
159 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
160 private val key = new Regex("""(?xs) (.+): """)
162 private def read_decl(decl: String): (String, Map[String, String]) =
164 def err() = error("Bad symbol declaration: " + decl)
166 def read_props(props: List[String]): Map[String, String] =
170 case _ :: Nil => err()
171 case key(x) :: y :: rest => read_props(rest) + (x -> y)
175 decl.split("\\s+").toList match {
177 case sym :: props => (sym, read_props(props))
181 private val symbols: List[(String, Map[String, String])] =
182 for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
183 yield read_decl(decl)
186 /* misc properties */
188 val names: Map[String, String] = {
189 val name = new Regex("""\\<([A-Za-z][A-Za-z0-9_']*)>""")
190 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
193 val abbrevs: Map[String, String] = Map((
194 for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
195 yield (sym -> props("abbrev"))): _*)
198 /* main recoder methods */
200 private val (decoder, encoder) =
204 (sym, props) <- symbols
206 try { Integer.decode(props("code")).intValue }
208 case _: NoSuchElementException => error("Missing code for symbol " + sym)
209 case _: NumberFormatException => error("Bad code for symbol " + sym)
211 val ch = new String(Character.toChars(code))
213 (new Recoder(mapping),
214 new Recoder(mapping map { case (x, y) => (y, x) }))
217 def decode(text: String) = decoder.recode(text)
218 def encode(text: String) = encoder.recode(text)