simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
tuned implicit build/init messages;
1 /* Title: Pure/General/symbol.scala
4 Detecting and recoding Isabelle symbols.
10 import scala.collection.mutable
11 import scala.util.matching.Regex
21 private val static_spaces = space * 4000
23 def spaces(k: Int): String =
26 if (k < static_spaces.length) static_spaces.substring(0, k)
31 /* ASCII characters */
33 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
34 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
35 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
37 def is_ascii_letdig(c: Char): Boolean =
38 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
40 def is_ascii_identifier(s: String): Boolean =
41 s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
46 private val plain = new Regex("""(?xs)
47 [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
49 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
51 private val symbol = new Regex("""(?xs)
53 \^? [A-Za-z][A-Za-z0-9_']* |
54 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
56 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
57 """ [\ud800-\udbff\ufffd] | \\<\^? """)
60 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
65 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
67 def is_physical_newline(s: String): Boolean =
68 s == "\n" || s == "\r" || s == "\r\n"
70 def is_malformed(s: String): Boolean =
71 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
73 class Matcher(text: CharSequence)
75 private val matcher = regex_total.pattern.matcher(text)
76 def apply(start: Int, end: Int): Int =
78 require(0 <= start && start < end && end <= text.length)
79 if (is_plain(text.charAt(start))) 1
81 matcher.region(start, end).lookingAt
90 private val char_symbols: Array[String] =
91 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
93 def iterator(text: CharSequence): Iterator[String] =
96 private val matcher = new Matcher(text)
98 def hasNext = i < text.length
101 val n = matcher(i, text.length)
105 val c = text.charAt(i)
106 if (c < char_symbols.length) char_symbols(c)
107 else text.subSequence(i, i + n).toString
109 else text.subSequence(i, i + n).toString
116 /* decoding offsets */
118 class Index(text: CharSequence)
120 case class Entry(chr: Int, sym: Int)
121 val index: Array[Entry] =
123 val matcher = new Matcher(text)
124 val buf = new mutable.ArrayBuffer[Entry]
127 while (chr < text.length) {
128 val n = matcher(chr, text.length)
131 if (n > 1) buf += Entry(chr, sym)
135 def decode(sym1: Int): Int =
138 val end = index.length
139 def bisect(a: Int, b: Int): Int =
143 if (sym < index(c).sym) bisect(a, c)
144 else if (c + 1 == end || sym < index(c + 1).sym) c
145 else bisect(c + 1, b)
149 val i = bisect(0, end)
151 else index(i).chr + sym - index(i).sym
153 def decode(range: Text.Range): Text.Range = range.map(decode(_))
159 private class Recoder(list: List[(String, String)])
161 private val (min, max) =
165 for ((x, _) <- list) {
174 var tab = Map[String, String]()
175 for ((x, y) <- list) {
177 case None => tab += (x -> y)
179 error("Duplicate mapping of \"" + x + "\" to \"" + y + "\" vs. \"" + z + "\"")
184 def recode(text: String): String =
186 val len = text.length
187 val matcher = regex_total.pattern.matcher(text)
188 val result = new StringBuilder(len)
192 if (min <= c && c <= max) {
193 matcher.region(i, len).lookingAt
194 val x = matcher.group
195 result.append(table.get(x) getOrElse x)
198 else { result.append(c); i += 1 }
206 /** symbol interpretation **/
208 private lazy val symbols =
210 Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS"))))
212 private class Interpretation(symbols_spec: String)
216 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
217 private val key = new Regex("""(?xs) (.+): """)
219 private def read_decl(decl: String): (String, Map[String, String]) =
221 def err() = error("Bad symbol declaration: " + decl)
223 def read_props(props: List[String]): Map[String, String] =
227 case _ :: Nil => err()
228 case key(x) :: y :: rest => read_props(rest) + (x -> y)
232 decl.split("\\s+").toList match {
233 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
238 private val symbols: List[(String, Map[String, String])] =
240 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
241 yield read_decl(decl)): _*) toList
244 /* misc properties */
246 val names: Map[String, String] =
248 val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
249 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
252 val abbrevs: Map[String, String] =
254 for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
255 yield (sym -> props("abbrev"))): _*)
260 private val (decoder, encoder) =
264 (sym, props) <- symbols
266 try { Integer.decode(props("code")).intValue }
268 case _: NoSuchElementException => error("Missing code for symbol " + sym)
269 case _: NumberFormatException => error("Bad code for symbol " + sym)
271 val ch = new String(Character.toChars(code))
273 if (code < 128) error("Illegal ASCII code for symbol " + sym)
276 (new Recoder(mapping),
277 new Recoder(mapping map { case (x, y) => (y, x) }))
280 def decode(text: String): String = decoder.recode(text)
281 def encode(text: String): String = encoder.recode(text)
283 private def recode_set(elems: String*): Set[String] =
285 val content = elems.toList
286 Set((content ::: content.map(decode)): _*)
289 private def recode_map[A](elems: (String, A)*): Map[String, A] =
291 val content = elems.toList
292 Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
298 val fonts: Map[String, String] =
300 for ((sym, props) <- symbols if props.isDefinedAt("font"))
301 yield (sym -> props("font"))): _*)
303 val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
304 val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
309 val letters = recode_set(
310 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
311 "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
312 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
313 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
315 "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
316 "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
317 "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
318 "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
319 "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
320 "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
321 "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
322 "\\<x>", "\\<y>", "\\<z>",
324 "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
325 "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
326 "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
327 "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
328 "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
329 "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
330 "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
331 "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
332 "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
334 "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
335 "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
336 "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
337 "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
338 "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
339 "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
340 "\\<Psi>", "\\<Omega>",
342 "\\<^isub>", "\\<^isup>")
345 recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
348 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
351 /* control symbols */
353 val ctrl_decoded: Set[String] =
354 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
356 val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
357 val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
359 val bold_decoded = decode("\\<^bold>")
360 val bsub_decoded = decode("\\<^bsub>")
361 val esub_decoded = decode("\\<^esub>")
362 val bsup_decoded = decode("\\<^bsup>")
363 val esup_decoded = decode("\\<^esup>")
369 def names: Map[String, String] = symbols.names
370 def abbrevs: Map[String, String] = symbols.abbrevs
372 def decode(text: String): String = symbols.decode(text)
373 def encode(text: String): String = symbols.encode(text)
375 def fonts: Map[String, String] = symbols.fonts
376 def font_names: List[String] = symbols.font_names
377 def font_index: Map[String, Int] = symbols.font_index
378 def lookup_font(sym: String): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
383 def is_letter(sym: String): Boolean = symbols.letters.contains(sym)
384 def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
385 def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"
386 def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
387 def is_blank(sym: String): Boolean = symbols.blanks.contains(sym)
388 def is_symbolic_char(sym: String): Boolean = symbols.sym_chars.contains(sym)
389 def is_symbolic(sym: String): Boolean =
390 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
393 /* control symbols */
395 def is_ctrl(sym: String): Boolean =
396 sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
398 def is_controllable(sym: String): Boolean =
399 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
401 def is_subscript_decoded(sym: String): Boolean = symbols.subscript_decoded.contains(sym)
402 def is_superscript_decoded(sym: String): Boolean = symbols.superscript_decoded.contains(sym)
403 def is_bold_decoded(sym: String): Boolean = sym == symbols.bold_decoded
404 def is_bsub_decoded(sym: String): Boolean = sym == symbols.bsub_decoded
405 def is_esub_decoded(sym: String): Boolean = sym == symbols.esub_decoded
406 def is_bsup_decoded(sym: String): Boolean = sym == symbols.bsup_decoded
407 def is_esup_decoded(sym: String): Boolean = sym == symbols.esup_decoded