1 /* Title: Pure/General/symbol.scala
4 Detecting and recoding Isabelle symbols.
10 import scala.collection.mutable
11 import scala.util.matching.Regex
21 private val static_spaces = space * 4000
23 def spaces(k: Int): String =
26 if (k < static_spaces.length) static_spaces.substring(0, k)
31 /* ASCII characters */
33 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
34 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
35 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
37 def is_ascii_letdig(c: Char): Boolean =
38 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
40 def is_ascii_identifier(s: String): Boolean =
41 s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
46 private val plain = new Regex("""(?xs)
47 [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
49 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
51 private val symbol = new Regex("""(?xs)
53 \^? [A-Za-z][A-Za-z0-9_']* |
54 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
56 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
57 """ [\ud800-\udbff\ufffd] | \\<\^? """)
60 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
65 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
67 def is_physical_newline(s: CharSequence): Boolean =
68 "\n".contentEquals(s) || "\r".contentEquals(s) || "\r\n".contentEquals(s)
70 def is_malformed(s: CharSequence): Boolean =
71 !(s.length == 1 && is_plain(s.charAt(0))) && malformed_symbol.pattern.matcher(s).matches
73 class Matcher(text: CharSequence)
75 private val matcher = regex_total.pattern.matcher(text)
76 def apply(start: Int, end: Int): Int =
78 require(0 <= start && start < end && end <= text.length)
79 if (is_plain(text.charAt(start))) 1
81 matcher.region(start, end).lookingAt
90 def iterator(text: CharSequence) = new Iterator[CharSequence]
92 private val matcher = new Matcher(text)
94 def hasNext = i < text.length
97 val n = matcher(i, text.length)
98 val s = text.subSequence(i, i + n)
105 /* decoding offsets */
107 class Index(text: CharSequence)
109 case class Entry(chr: Int, sym: Int)
110 val index: Array[Entry] =
112 val matcher = new Matcher(text)
113 val buf = new mutable.ArrayBuffer[Entry]
116 while (chr < text.length) {
117 val n = matcher(chr, text.length)
120 if (n > 1) buf += Entry(chr, sym)
124 def decode(sym1: Int): Int =
127 val end = index.length
128 def bisect(a: Int, b: Int): Int =
132 if (sym < index(c).sym) bisect(a, c)
133 else if (c + 1 == end || sym < index(c + 1).sym) c
134 else bisect(c + 1, b)
138 val i = bisect(0, end)
140 else index(i).chr + sym - index(i).sym
142 def decode(range: Text.Range): Text.Range = range.map(decode(_))
148 private class Recoder(list: List[(String, String)])
150 private val (min, max) =
154 for ((x, _) <- list) {
163 var tab = Map[String, String]()
164 for ((x, y) <- list) {
166 case None => tab += (x -> y)
168 error("Duplicate mapping of \"" + x + "\" to \"" + y + "\" vs. \"" + z + "\"")
173 def recode(text: String): String =
175 val len = text.length
176 val matcher = regex_total.pattern.matcher(text)
177 val result = new StringBuilder(len)
181 if (min <= c && c <= max) {
182 matcher.region(i, len).lookingAt
183 val x = matcher.group
184 result.append(table.get(x) getOrElse x)
187 else { result.append(c); i += 1 }
195 /** Symbol interpretation **/
197 class Interpretation(symbol_decls: List[String])
201 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
202 private val key = new Regex("""(?xs) (.+): """)
204 private def read_decl(decl: String): (String, Map[String, String]) =
206 def err() = error("Bad symbol declaration: " + decl)
208 def read_props(props: List[String]): Map[String, String] =
212 case _ :: Nil => err()
213 case key(x) :: y :: rest => read_props(rest) + (x -> y)
217 decl.split("\\s+").toList match {
218 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
223 private val symbols: List[(String, Map[String, String])] =
225 for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
226 yield read_decl(decl)): _*) toList
229 /* misc properties */
231 val names: Map[String, String] =
233 val name = new Regex("""\\<([A-Za-z][A-Za-z0-9_']*)>""")
234 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
237 val abbrevs: Map[String, String] =
239 for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
240 yield (sym -> props("abbrev"))): _*)
243 /* main recoder methods */
245 private val (decoder, encoder) =
249 (sym, props) <- symbols
251 try { Integer.decode(props("code")).intValue }
253 case _: NoSuchElementException => error("Missing code for symbol " + sym)
254 case _: NumberFormatException => error("Bad code for symbol " + sym)
256 val ch = new String(Character.toChars(code))
258 if (code < 128) error("Illegal ASCII code for symbol " + sym)
261 (new Recoder(mapping),
262 new Recoder(mapping map { case (x, y) => (y, x) }))
265 def decode(text: String): String = decoder.recode(text)
266 def encode(text: String): String = encoder.recode(text)
271 private object Decode_Set
273 def apply(elems: String*): Set[String] =
275 val content = elems.toList
276 Set((content ::: content.map(decode)): _*)
280 private val letters = Decode_Set(
281 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
282 "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
283 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
284 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
286 "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
287 "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
288 "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
289 "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
290 "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
291 "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
292 "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
293 "\\<x>", "\\<y>", "\\<z>",
295 "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
296 "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
297 "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
298 "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
299 "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
300 "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
301 "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
302 "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
303 "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
305 "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
306 "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
307 "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
308 "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
309 "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
310 "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
311 "\\<Psi>", "\\<Omega>",
313 "\\<^isub>", "\\<^isup>")
316 Decode_Set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
318 private val sym_chars =
319 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
321 def is_letter(sym: String): Boolean = letters.contains(sym)
322 def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
323 def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"
324 def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
325 def is_blank(sym: String): Boolean = blanks.contains(sym)
326 def is_symbolic_char(sym: String): Boolean = sym_chars.contains(sym)
327 def is_symbolic(sym: String): Boolean =
328 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
331 /* special control symbols */
333 def is_controllable(sym: String): Boolean =
334 !is_blank(sym) && !sym.startsWith("\\<^") && !is_malformed(sym)
336 private val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
337 private val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
338 val bold_decoded = decode("\\<^bold>")
340 def is_subscript_decoded(sym: String): Boolean = subscript_decoded.contains(sym)
341 def is_superscript_decoded(sym: String): Boolean = superscript_decoded.contains(sym)
342 def is_bold_decoded(sym: String): Boolean = sym == bold_decoded