wenzelm@27901
|
1 |
/* Title: Pure/General/symbol.scala
|
wenzelm@27901
|
2 |
Author: Makarius
|
wenzelm@27901
|
3 |
|
wenzelm@27924
|
4 |
Detecting and recoding Isabelle symbols.
|
wenzelm@27901
|
5 |
*/
|
wenzelm@27901
|
6 |
|
wenzelm@27901
|
7 |
package isabelle
|
wenzelm@27901
|
8 |
|
wenzelm@27918
|
9 |
import scala.io.Source
|
wenzelm@36035
|
10 |
import scala.collection.mutable
|
wenzelm@31537
|
11 |
import scala.util.matching.Regex
|
wenzelm@27901
|
12 |
|
wenzelm@27901
|
13 |
|
wenzelm@31537
|
14 |
object Symbol
|
wenzelm@31537
|
15 |
{
|
wenzelm@36772
|
16 |
/* spaces */
|
wenzelm@36772
|
17 |
|
wenzelm@36850
|
18 |
val spc = ' '
|
wenzelm@36850
|
19 |
val space = " "
|
wenzelm@36850
|
20 |
|
wenzelm@36850
|
21 |
private val static_spaces = space * 4000
|
wenzelm@36772
|
22 |
|
wenzelm@36772
|
23 |
def spaces(k: Int): String =
|
wenzelm@36772
|
24 |
{
|
wenzelm@36772
|
25 |
require(k >= 0)
|
wenzelm@36772
|
26 |
if (k < static_spaces.length) static_spaces.substring(0, k)
|
wenzelm@36850
|
27 |
else space * k
|
wenzelm@36772
|
28 |
}
|
wenzelm@36772
|
29 |
|
wenzelm@36772
|
30 |
|
wenzelm@44292
|
31 |
/* ASCII characters */
|
wenzelm@44292
|
32 |
|
wenzelm@44292
|
33 |
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
|
wenzelm@44292
|
34 |
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
|
wenzelm@44292
|
35 |
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
|
wenzelm@44292
|
36 |
|
wenzelm@44292
|
37 |
def is_ascii_letdig(c: Char): Boolean =
|
wenzelm@44292
|
38 |
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
|
wenzelm@44292
|
39 |
|
wenzelm@44292
|
40 |
def is_ascii_identifier(s: String): Boolean =
|
wenzelm@44292
|
41 |
s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
|
wenzelm@44292
|
42 |
|
wenzelm@44292
|
43 |
|
wenzelm@34007
|
44 |
/* Symbol regexps */
|
wenzelm@27901
|
45 |
|
wenzelm@31537
|
46 |
private val plain = new Regex("""(?xs)
|
wenzelm@40770
|
47 |
[^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
|
wenzelm@37563
|
48 |
|
wenzelm@40768
|
49 |
private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
|
wenzelm@27901
|
50 |
|
wenzelm@31537
|
51 |
private val symbol = new Regex("""(?xs)
|
wenzelm@31548
|
52 |
\\ < (?:
|
wenzelm@27924
|
53 |
\^? [A-Za-z][A-Za-z0-9_']* |
|
wenzelm@27924
|
54 |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
|
wenzelm@27923
|
55 |
|
wenzelm@40769
|
56 |
private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
|
wenzelm@40775
|
57 |
""" [\ud800-\udbff\ufffd] | \\<\^? """)
|
wenzelm@27923
|
58 |
|
wenzelm@40769
|
59 |
val regex_total =
|
wenzelm@40769
|
60 |
new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
|
wenzelm@27918
|
61 |
|
wenzelm@34146
|
62 |
|
wenzelm@34146
|
63 |
/* basic matching */
|
wenzelm@34146
|
64 |
|
wenzelm@37563
|
65 |
def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
|
wenzelm@34146
|
66 |
|
wenzelm@39203
|
67 |
def is_physical_newline(s: CharSequence): Boolean =
|
wenzelm@39203
|
68 |
"\n".contentEquals(s) || "\r".contentEquals(s) || "\r\n".contentEquals(s)
|
wenzelm@39203
|
69 |
|
wenzelm@40769
|
70 |
def is_malformed(s: CharSequence): Boolean =
|
wenzelm@40769
|
71 |
!(s.length == 1 && is_plain(s.charAt(0))) && malformed_symbol.pattern.matcher(s).matches
|
wenzelm@34146
|
72 |
|
wenzelm@34146
|
73 |
class Matcher(text: CharSequence)
|
wenzelm@34146
|
74 |
{
|
wenzelm@40769
|
75 |
private val matcher = regex_total.pattern.matcher(text)
|
wenzelm@34146
|
76 |
def apply(start: Int, end: Int): Int =
|
wenzelm@34146
|
77 |
{
|
wenzelm@34146
|
78 |
require(0 <= start && start < end && end <= text.length)
|
wenzelm@34319
|
79 |
if (is_plain(text.charAt(start))) 1
|
wenzelm@34147
|
80 |
else {
|
wenzelm@34146
|
81 |
matcher.region(start, end).lookingAt
|
wenzelm@34146
|
82 |
matcher.group.length
|
wenzelm@34146
|
83 |
}
|
wenzelm@34146
|
84 |
}
|
wenzelm@31537
|
85 |
}
|
wenzelm@27901
|
86 |
|
wenzelm@27901
|
87 |
|
wenzelm@44360
|
88 |
/* efficient iterators */
|
wenzelm@31939
|
89 |
|
wenzelm@44360
|
90 |
def iterator(text: CharSequence): Iterator[CharSequence] =
|
wenzelm@44360
|
91 |
new Iterator[CharSequence]
|
wenzelm@40768
|
92 |
{
|
wenzelm@44360
|
93 |
private val matcher = new Matcher(text)
|
wenzelm@44360
|
94 |
private var i = 0
|
wenzelm@44360
|
95 |
def hasNext = i < text.length
|
wenzelm@44360
|
96 |
def next =
|
wenzelm@44360
|
97 |
{
|
wenzelm@44360
|
98 |
val n = matcher(i, text.length)
|
wenzelm@44360
|
99 |
val s = text.subSequence(i, i + n)
|
wenzelm@44360
|
100 |
i += n
|
wenzelm@44360
|
101 |
s
|
wenzelm@44360
|
102 |
}
|
wenzelm@34007
|
103 |
}
|
wenzelm@44360
|
104 |
|
wenzelm@44360
|
105 |
private val char_symbols: Array[String] =
|
wenzelm@44361
|
106 |
(0 until 128).iterator.map(i => new String(Array(i.toChar))).toArray
|
wenzelm@44360
|
107 |
|
wenzelm@44360
|
108 |
private def make_string(sym: CharSequence): String =
|
wenzelm@44360
|
109 |
sym.length match {
|
wenzelm@44360
|
110 |
case 0 => ""
|
wenzelm@44360
|
111 |
case 1 =>
|
wenzelm@44360
|
112 |
val c = sym.charAt(0)
|
wenzelm@44360
|
113 |
if (c < char_symbols.length) char_symbols(c)
|
wenzelm@44360
|
114 |
else sym.toString
|
wenzelm@44360
|
115 |
case _ => sym.toString
|
wenzelm@44360
|
116 |
}
|
wenzelm@44360
|
117 |
|
wenzelm@44360
|
118 |
def iterator_string(text: CharSequence): Iterator[String] =
|
wenzelm@44360
|
119 |
iterator(text).map(make_string)
|
wenzelm@34007
|
120 |
|
wenzelm@34007
|
121 |
|
wenzelm@34007
|
122 |
/* decoding offsets */
|
wenzelm@34007
|
123 |
|
wenzelm@34007
|
124 |
class Index(text: CharSequence)
|
wenzelm@31939
|
125 |
{
|
wenzelm@31939
|
126 |
case class Entry(chr: Int, sym: Int)
|
wenzelm@31939
|
127 |
val index: Array[Entry] =
|
wenzelm@31939
|
128 |
{
|
wenzelm@34146
|
129 |
val matcher = new Matcher(text)
|
wenzelm@31939
|
130 |
val buf = new mutable.ArrayBuffer[Entry]
|
wenzelm@31939
|
131 |
var chr = 0
|
wenzelm@31939
|
132 |
var sym = 0
|
wenzelm@34007
|
133 |
while (chr < text.length) {
|
wenzelm@34146
|
134 |
val n = matcher(chr, text.length)
|
wenzelm@34146
|
135 |
chr += n
|
wenzelm@31939
|
136 |
sym += 1
|
wenzelm@34146
|
137 |
if (n > 1) buf += Entry(chr, sym)
|
wenzelm@31939
|
138 |
}
|
wenzelm@31939
|
139 |
buf.toArray
|
wenzelm@31939
|
140 |
}
|
wenzelm@38797
|
141 |
def decode(sym1: Int): Int =
|
wenzelm@31939
|
142 |
{
|
wenzelm@38797
|
143 |
val sym = sym1 - 1
|
wenzelm@31939
|
144 |
val end = index.length
|
wenzelm@31939
|
145 |
def bisect(a: Int, b: Int): Int =
|
wenzelm@31939
|
146 |
{
|
wenzelm@31939
|
147 |
if (a < b) {
|
wenzelm@31939
|
148 |
val c = (a + b) / 2
|
wenzelm@31939
|
149 |
if (sym < index(c).sym) bisect(a, c)
|
wenzelm@31939
|
150 |
else if (c + 1 == end || sym < index(c + 1).sym) c
|
wenzelm@31939
|
151 |
else bisect(c + 1, b)
|
wenzelm@31939
|
152 |
}
|
wenzelm@31939
|
153 |
else -1
|
wenzelm@31939
|
154 |
}
|
wenzelm@31939
|
155 |
val i = bisect(0, end)
|
wenzelm@31939
|
156 |
if (i < 0) sym
|
wenzelm@31939
|
157 |
else index(i).chr + sym - index(i).sym
|
wenzelm@31939
|
158 |
}
|
wenzelm@38797
|
159 |
def decode(range: Text.Range): Text.Range = range.map(decode(_))
|
wenzelm@31939
|
160 |
}
|
wenzelm@31939
|
161 |
|
wenzelm@31939
|
162 |
|
wenzelm@34007
|
163 |
/* recoding text */
|
wenzelm@27924
|
164 |
|
wenzelm@31537
|
165 |
private class Recoder(list: List[(String, String)])
|
wenzelm@31537
|
166 |
{
|
wenzelm@31537
|
167 |
private val (min, max) =
|
wenzelm@31537
|
168 |
{
|
wenzelm@27937
|
169 |
var min = '\uffff'
|
wenzelm@27937
|
170 |
var max = '\u0000'
|
wenzelm@27937
|
171 |
for ((x, _) <- list) {
|
wenzelm@27937
|
172 |
val c = x(0)
|
wenzelm@27937
|
173 |
if (c < min) min = c
|
wenzelm@27937
|
174 |
if (c > max) max = c
|
wenzelm@27937
|
175 |
}
|
wenzelm@27937
|
176 |
(min, max)
|
wenzelm@27937
|
177 |
}
|
wenzelm@40690
|
178 |
private val table =
|
wenzelm@40690
|
179 |
{
|
wenzelm@40690
|
180 |
var tab = Map[String, String]()
|
wenzelm@40690
|
181 |
for ((x, y) <- list) {
|
wenzelm@40690
|
182 |
tab.get(x) match {
|
wenzelm@40690
|
183 |
case None => tab += (x -> y)
|
wenzelm@40690
|
184 |
case Some(z) =>
|
wenzelm@40690
|
185 |
error("Duplicate mapping of \"" + x + "\" to \"" + y + "\" vs. \"" + z + "\"")
|
wenzelm@40690
|
186 |
}
|
wenzelm@40690
|
187 |
}
|
wenzelm@40690
|
188 |
tab
|
wenzelm@40690
|
189 |
}
|
wenzelm@31537
|
190 |
def recode(text: String): String =
|
wenzelm@31537
|
191 |
{
|
wenzelm@27937
|
192 |
val len = text.length
|
wenzelm@40769
|
193 |
val matcher = regex_total.pattern.matcher(text)
|
wenzelm@27937
|
194 |
val result = new StringBuilder(len)
|
wenzelm@27937
|
195 |
var i = 0
|
wenzelm@27937
|
196 |
while (i < len) {
|
wenzelm@27937
|
197 |
val c = text(i)
|
wenzelm@27937
|
198 |
if (min <= c && c <= max) {
|
wenzelm@31939
|
199 |
matcher.region(i, len).lookingAt
|
wenzelm@27938
|
200 |
val x = matcher.group
|
wenzelm@31537
|
201 |
result.append(table.get(x) getOrElse x)
|
wenzelm@27937
|
202 |
i = matcher.end
|
wenzelm@27937
|
203 |
}
|
wenzelm@27937
|
204 |
else { result.append(c); i += 1 }
|
wenzelm@27937
|
205 |
}
|
wenzelm@27937
|
206 |
result.toString
|
wenzelm@27924
|
207 |
}
|
wenzelm@27924
|
208 |
}
|
wenzelm@27918
|
209 |
|
wenzelm@27918
|
210 |
|
wenzelm@27937
|
211 |
|
wenzelm@27937
|
212 |
/** Symbol interpretation **/
|
wenzelm@27937
|
213 |
|
wenzelm@34146
|
214 |
class Interpretation(symbol_decls: List[String])
|
wenzelm@29569
|
215 |
{
|
wenzelm@31537
|
216 |
/* read symbols */
|
wenzelm@31537
|
217 |
|
wenzelm@31537
|
218 |
private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
|
wenzelm@31537
|
219 |
private val key = new Regex("""(?xs) (.+): """)
|
wenzelm@31537
|
220 |
|
wenzelm@31537
|
221 |
private def read_decl(decl: String): (String, Map[String, String]) =
|
wenzelm@31537
|
222 |
{
|
wenzelm@31537
|
223 |
def err() = error("Bad symbol declaration: " + decl)
|
wenzelm@31537
|
224 |
|
wenzelm@31537
|
225 |
def read_props(props: List[String]): Map[String, String] =
|
wenzelm@31537
|
226 |
{
|
wenzelm@31537
|
227 |
props match {
|
wenzelm@31537
|
228 |
case Nil => Map()
|
wenzelm@31537
|
229 |
case _ :: Nil => err()
|
wenzelm@31537
|
230 |
case key(x) :: y :: rest => read_props(rest) + (x -> y)
|
wenzelm@31537
|
231 |
case _ => err()
|
wenzelm@31537
|
232 |
}
|
wenzelm@31537
|
233 |
}
|
wenzelm@31537
|
234 |
decl.split("\\s+").toList match {
|
wenzelm@40769
|
235 |
case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
|
wenzelm@34193
|
236 |
case _ => err()
|
wenzelm@31537
|
237 |
}
|
wenzelm@31537
|
238 |
}
|
wenzelm@31537
|
239 |
|
wenzelm@31537
|
240 |
private val symbols: List[(String, Map[String, String])] =
|
wenzelm@40690
|
241 |
Map((
|
wenzelm@40690
|
242 |
for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
|
wenzelm@40690
|
243 |
yield read_decl(decl)): _*) toList
|
wenzelm@31537
|
244 |
|
wenzelm@31537
|
245 |
|
wenzelm@31651
|
246 |
/* misc properties */
|
wenzelm@31651
|
247 |
|
wenzelm@34143
|
248 |
val names: Map[String, String] =
|
wenzelm@34143
|
249 |
{
|
wenzelm@44331
|
250 |
val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
|
wenzelm@31651
|
251 |
Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
|
wenzelm@31651
|
252 |
}
|
wenzelm@31651
|
253 |
|
wenzelm@44359
|
254 |
val abbrevs: Map[String, String] =
|
wenzelm@44359
|
255 |
Map((
|
wenzelm@44359
|
256 |
for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
|
wenzelm@44359
|
257 |
yield (sym -> props("abbrev"))): _*)
|
wenzelm@44359
|
258 |
|
wenzelm@44359
|
259 |
|
wenzelm@44361
|
260 |
/* recoding */
|
wenzelm@31537
|
261 |
|
wenzelm@31537
|
262 |
private val (decoder, encoder) =
|
wenzelm@31537
|
263 |
{
|
wenzelm@31537
|
264 |
val mapping =
|
wenzelm@31537
|
265 |
for {
|
wenzelm@31537
|
266 |
(sym, props) <- symbols
|
wenzelm@31537
|
267 |
val code =
|
wenzelm@31537
|
268 |
try { Integer.decode(props("code")).intValue }
|
wenzelm@31537
|
269 |
catch {
|
wenzelm@31537
|
270 |
case _: NoSuchElementException => error("Missing code for symbol " + sym)
|
wenzelm@31537
|
271 |
case _: NumberFormatException => error("Bad code for symbol " + sym)
|
wenzelm@31537
|
272 |
}
|
wenzelm@31537
|
273 |
val ch = new String(Character.toChars(code))
|
wenzelm@34193
|
274 |
} yield {
|
wenzelm@34193
|
275 |
if (code < 128) error("Illegal ASCII code for symbol " + sym)
|
wenzelm@34193
|
276 |
else (sym, ch)
|
wenzelm@34193
|
277 |
}
|
wenzelm@31548
|
278 |
(new Recoder(mapping),
|
wenzelm@31551
|
279 |
new Recoder(mapping map { case (x, y) => (y, x) }))
|
wenzelm@31537
|
280 |
}
|
wenzelm@27918
|
281 |
|
wenzelm@34104
|
282 |
def decode(text: String): String = decoder.recode(text)
|
wenzelm@34104
|
283 |
def encode(text: String): String = encoder.recode(text)
|
wenzelm@34143
|
284 |
|
wenzelm@44361
|
285 |
private def recode_set(elems: String*): Set[String] =
|
wenzelm@44361
|
286 |
{
|
wenzelm@44361
|
287 |
val content = elems.toList
|
wenzelm@44361
|
288 |
Set((content ::: content.map(decode)): _*)
|
wenzelm@44361
|
289 |
}
|
wenzelm@44361
|
290 |
|
wenzelm@44361
|
291 |
private def recode_map[A](elems: (String, A)*): Map[String, A] =
|
wenzelm@44361
|
292 |
{
|
wenzelm@44361
|
293 |
val content = elems.toList
|
wenzelm@44361
|
294 |
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
|
wenzelm@44361
|
295 |
}
|
wenzelm@44361
|
296 |
|
wenzelm@44361
|
297 |
|
wenzelm@44361
|
298 |
/* user fonts */
|
wenzelm@44361
|
299 |
|
wenzelm@44361
|
300 |
val fonts: Map[String, String] =
|
wenzelm@44361
|
301 |
recode_map((
|
wenzelm@44361
|
302 |
for ((sym, props) <- symbols if props.isDefinedAt("font"))
|
wenzelm@44361
|
303 |
yield (sym -> props("font"))): _*)
|
wenzelm@44361
|
304 |
|
wenzelm@44361
|
305 |
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
|
wenzelm@44361
|
306 |
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
|
wenzelm@44361
|
307 |
|
wenzelm@44361
|
308 |
def lookup_font(sym: String): Option[Int] = fonts.get(sym).map(font_index(_))
|
wenzelm@44361
|
309 |
|
wenzelm@34143
|
310 |
|
wenzelm@34143
|
311 |
/* classification */
|
wenzelm@34143
|
312 |
|
wenzelm@44361
|
313 |
private val letters = recode_set(
|
wenzelm@34143
|
314 |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
|
wenzelm@34143
|
315 |
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
|
wenzelm@34143
|
316 |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
wenzelm@34143
|
317 |
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
|
wenzelm@34143
|
318 |
|
wenzelm@34143
|
319 |
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
|
wenzelm@34143
|
320 |
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
|
wenzelm@34143
|
321 |
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
|
wenzelm@34143
|
322 |
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
|
wenzelm@34143
|
323 |
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
|
wenzelm@34143
|
324 |
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
|
wenzelm@34143
|
325 |
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
|
wenzelm@34143
|
326 |
"\\<x>", "\\<y>", "\\<z>",
|
wenzelm@34143
|
327 |
|
wenzelm@34143
|
328 |
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
|
wenzelm@34143
|
329 |
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
|
wenzelm@34143
|
330 |
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
|
wenzelm@34143
|
331 |
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
|
wenzelm@34143
|
332 |
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
|
wenzelm@34143
|
333 |
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
|
wenzelm@34143
|
334 |
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
|
wenzelm@34143
|
335 |
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
|
wenzelm@34143
|
336 |
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
|
wenzelm@34143
|
337 |
|
wenzelm@34143
|
338 |
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
|
wenzelm@34143
|
339 |
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
|
wenzelm@34143
|
340 |
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
|
wenzelm@34143
|
341 |
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
|
wenzelm@34143
|
342 |
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
|
wenzelm@34143
|
343 |
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
|
wenzelm@34143
|
344 |
"\\<Psi>", "\\<Omega>",
|
wenzelm@34143
|
345 |
|
wenzelm@34143
|
346 |
"\\<^isub>", "\\<^isup>")
|
wenzelm@34143
|
347 |
|
wenzelm@34147
|
348 |
private val blanks =
|
wenzelm@44361
|
349 |
recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
|
wenzelm@34147
|
350 |
|
wenzelm@34147
|
351 |
private val sym_chars =
|
wenzelm@34147
|
352 |
Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
|
wenzelm@34143
|
353 |
|
wenzelm@34143
|
354 |
def is_letter(sym: String): Boolean = letters.contains(sym)
|
wenzelm@34147
|
355 |
def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
|
wenzelm@34143
|
356 |
def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"
|
wenzelm@34147
|
357 |
def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
|
wenzelm@34143
|
358 |
def is_blank(sym: String): Boolean = blanks.contains(sym)
|
wenzelm@34147
|
359 |
def is_symbolic_char(sym: String): Boolean = sym_chars.contains(sym)
|
wenzelm@40769
|
360 |
def is_symbolic(sym: String): Boolean =
|
wenzelm@40769
|
361 |
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
|
wenzelm@44330
|
362 |
|
wenzelm@44330
|
363 |
|
wenzelm@44359
|
364 |
/* control symbols */
|
wenzelm@44359
|
365 |
|
wenzelm@44359
|
366 |
private val ctrl_decoded: Set[String] =
|
wenzelm@44359
|
367 |
Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
|
wenzelm@44359
|
368 |
|
wenzelm@44359
|
369 |
def is_ctrl(sym: String): Boolean =
|
wenzelm@44359
|
370 |
sym.startsWith("\\<^") || ctrl_decoded.contains(sym)
|
wenzelm@44330
|
371 |
|
wenzelm@44314
|
372 |
def is_controllable(sym: String): Boolean =
|
wenzelm@44359
|
373 |
!is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
|
wenzelm@44330
|
374 |
|
wenzelm@44330
|
375 |
private val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
|
wenzelm@44330
|
376 |
private val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
|
wenzelm@44330
|
377 |
val bold_decoded = decode("\\<^bold>")
|
wenzelm@44330
|
378 |
|
wenzelm@44330
|
379 |
def is_subscript_decoded(sym: String): Boolean = subscript_decoded.contains(sym)
|
wenzelm@44330
|
380 |
def is_superscript_decoded(sym: String): Boolean = superscript_decoded.contains(sym)
|
wenzelm@44330
|
381 |
def is_bold_decoded(sym: String): Boolean = sym == bold_decoded
|
wenzelm@27918
|
382 |
}
|
wenzelm@27901
|
383 |
}
|