1 /* Title: Pure/General/symbol.scala
4 Detecting and recoding Isabelle symbols.
10 import scala.collection.mutable
11 import scala.util.matching.Regex
24 private val static_spaces = space * 4000
26 def spaces(k: Int): String =
29 if (k < static_spaces.length) static_spaces.substring(0, k)
34 /* ASCII characters */
36 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
37 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
38 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
40 def is_ascii_letdig(c: Char): Boolean =
41 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
43 def is_ascii_identifier(s: String): Boolean =
44 s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
49 private val plain = new Regex("""(?xs)
50 [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
52 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
54 private val symbol = new Regex("""(?xs)
56 \^? [A-Za-z][A-Za-z0-9_']* |
57 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
59 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
60 """ [\ud800-\udbff\ufffd] | \\<\^? """)
63 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
68 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
70 def is_physical_newline(s: Symbol): Boolean =
71 s == "\n" || s == "\r" || s == "\r\n"
73 def is_malformed(s: Symbol): Boolean =
74 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
76 class Matcher(text: CharSequence)
78 private val matcher = regex_total.pattern.matcher(text)
79 def apply(start: Int, end: Int): Int =
81 require(0 <= start && start < end && end <= text.length)
82 if (is_plain(text.charAt(start))) 1
84 matcher.region(start, end).lookingAt
93 private val char_symbols: Array[Symbol] =
94 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
96 def iterator(text: CharSequence): Iterator[Symbol] =
99 private val matcher = new Matcher(text)
101 def hasNext = i < text.length
104 val n = matcher(i, text.length)
108 val c = text.charAt(i)
109 if (c < char_symbols.length) char_symbols(c)
110 else text.subSequence(i, i + n).toString
112 else text.subSequence(i, i + n).toString
119 /* decoding offsets */
121 class Index(text: CharSequence)
123 case class Entry(chr: Int, sym: Int)
124 val index: Array[Entry] =
126 val matcher = new Matcher(text)
127 val buf = new mutable.ArrayBuffer[Entry]
130 while (chr < text.length) {
131 val n = matcher(chr, text.length)
134 if (n > 1) buf += Entry(chr, sym)
138 def decode(sym1: Int): Int =
141 val end = index.length
142 def bisect(a: Int, b: Int): Int =
146 if (sym < index(c).sym) bisect(a, c)
147 else if (c + 1 == end || sym < index(c + 1).sym) c
148 else bisect(c + 1, b)
152 val i = bisect(0, end)
154 else index(i).chr + sym - index(i).sym
156 def decode(range: Text.Range): Text.Range = range.map(decode(_))
162 private class Recoder(list: List[(String, String)])
164 private val (min, max) =
168 for ((x, _) <- list) {
177 var tab = Map[String, String]()
178 for ((x, y) <- list) {
180 case None => tab += (x -> y)
182 error("Duplicate mapping of \"" + x + "\" to \"" + y + "\" vs. \"" + z + "\"")
187 def recode(text: String): String =
189 val len = text.length
190 val matcher = regex_total.pattern.matcher(text)
191 val result = new StringBuilder(len)
195 if (min <= c && c <= max) {
196 matcher.region(i, len).lookingAt
197 val x = matcher.group
198 result.append(table.get(x) getOrElse x)
201 else { result.append(c); i += 1 }
209 /** symbol interpretation **/
211 private lazy val symbols =
213 Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS"))))
215 private class Interpretation(symbols_spec: String)
219 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
220 private val key = new Regex("""(?xs) (.+): """)
222 private def read_decl(decl: String): (Symbol, Map[String, String]) =
224 def err() = error("Bad symbol declaration: " + decl)
226 def read_props(props: List[String]): Map[String, String] =
230 case _ :: Nil => err()
231 case key(x) :: y :: rest => read_props(rest) + (x -> y)
235 decl.split("\\s+").toList match {
236 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
241 private val symbols: List[(Symbol, Map[String, String])] =
243 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
244 yield read_decl(decl)): _*) toList
247 /* misc properties */
249 val names: Map[Symbol, String] =
251 val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
252 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
255 val abbrevs: Map[Symbol, String] =
257 for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
258 yield (sym -> props("abbrev"))): _*)
263 private val (decoder, encoder) =
267 (sym, props) <- symbols
269 try { Integer.decode(props("code")).intValue }
271 case _: NoSuchElementException => error("Missing code for symbol " + sym)
272 case _: NumberFormatException => error("Bad code for symbol " + sym)
274 val ch = new String(Character.toChars(code))
276 if (code < 128) error("Illegal ASCII code for symbol " + sym)
279 (new Recoder(mapping),
280 new Recoder(mapping map { case (x, y) => (y, x) }))
283 def decode(text: String): String = decoder.recode(text)
284 def encode(text: String): String = encoder.recode(text)
286 private def recode_set(elems: String*): Set[String] =
288 val content = elems.toList
289 Set((content ::: content.map(decode)): _*)
292 private def recode_map[A](elems: (String, A)*): Map[String, A] =
294 val content = elems.toList
295 Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
301 val fonts: Map[Symbol, String] =
303 for ((sym, props) <- symbols if props.isDefinedAt("font"))
304 yield (sym -> props("font"))): _*)
306 val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
307 val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
312 val letters = recode_set(
313 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
314 "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
315 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
316 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
318 "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
319 "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
320 "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
321 "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
322 "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
323 "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
324 "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
325 "\\<x>", "\\<y>", "\\<z>",
327 "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
328 "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
329 "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
330 "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
331 "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
332 "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
333 "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
334 "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
335 "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
337 "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
338 "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
339 "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
340 "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
341 "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
342 "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
343 "\\<Psi>", "\\<Omega>",
345 "\\<^isub>", "\\<^isup>")
348 recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
351 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
354 /* control symbols */
356 val ctrl_decoded: Set[Symbol] =
357 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
359 val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
360 val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
362 val bold_decoded = decode("\\<^bold>")
363 val bsub_decoded = decode("\\<^bsub>")
364 val esub_decoded = decode("\\<^esub>")
365 val bsup_decoded = decode("\\<^bsup>")
366 val esup_decoded = decode("\\<^esup>")
372 def names: Map[Symbol, String] = symbols.names
373 def abbrevs: Map[Symbol, String] = symbols.abbrevs
375 def decode(text: String): String = symbols.decode(text)
376 def encode(text: String): String = symbols.encode(text)
378 def fonts: Map[Symbol, String] = symbols.fonts
379 def font_names: List[String] = symbols.font_names
380 def font_index: Map[String, Int] = symbols.font_index
381 def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
386 def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
387 def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
388 def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
389 def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
390 def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
391 def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
392 def is_symbolic(sym: Symbol): Boolean =
393 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
396 /* control symbols */
398 def is_ctrl(sym: Symbol): Boolean =
399 sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
401 def is_controllable(sym: Symbol): Boolean =
402 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
404 def is_subscript_decoded(sym: Symbol): Boolean = symbols.subscript_decoded.contains(sym)
405 def is_superscript_decoded(sym: Symbol): Boolean = symbols.superscript_decoded.contains(sym)
406 def is_bold_decoded(sym: Symbol): Boolean = sym == symbols.bold_decoded
407 def is_bsub_decoded(sym: Symbol): Boolean = sym == symbols.bsub_decoded
408 def is_esub_decoded(sym: Symbol): Boolean = sym == symbols.esub_decoded
409 def is_bsup_decoded(sym: Symbol): Boolean = sym == symbols.bsup_decoded
410 def is_esup_decoded(sym: Symbol): Boolean = sym == symbols.esup_decoded