1 /* THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
2 Title: Pure/General/symbol.scala
4 ... WITH ADDITIONS AFTER "/---..---\"
6 Detecting and recoding Isabelle symbols.
9 package isac.gui.mawen.syntax.isabelle
11 import scala.collection.mutable
12 import scala.util.matching.Regex
13 import scala.annotation.tailrec
20 // counting Isabelle symbols, starting from 1
21 type Offset = Text.Offset
22 type Range = Text.Range
29 private val static_spaces = space * 4000
31 def spaces(n: Int): String =
34 if (n < static_spaces.length) static_spaces.substring(0, n)
39 /* ASCII characters */
41 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
43 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
45 def is_ascii_hex(c: Char): Boolean =
46 '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
48 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
50 def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
52 def is_ascii_letdig(c: Char): Boolean =
53 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
55 def is_ascii_identifier(s: String): Boolean =
56 s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
58 def ascii(c: Char): Symbol =
61 Console.printf("Non-ASCII character: " + XLibrary.quote(c.toString));
62 "Non-ASCII character: " + XLibrary.quote(c.toString)
64 else char_symbols(c.toInt)
70 private val symbol_total = new Regex("""(?xs)
71 [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
73 private def is_plain(c: Char): Boolean =
74 !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
76 def is_malformed(s: Symbol): Boolean =
80 Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
84 !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
85 case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
88 def is_newline(s: Symbol): Boolean =
89 s == "\n" || s == "\r" || s == "\r\n"
91 class Matcher(text: CharSequence)
93 private val matcher = symbol_total.pattern.matcher(text)
94 def apply(start: Int, end: Int): Int =
96 require(0 <= start && start < end && end <= text.length)
97 if (is_plain(text.charAt(start))) 1
99 matcher.region(start, end).lookingAt
108 private val char_symbols: Array[Symbol] =
109 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
111 def iterator(text: CharSequence): Iterator[Symbol] =
114 private val matcher = new Matcher(text)
116 def hasNext = i < text.length
119 val n = matcher(i, text.length)
123 val c = text.charAt(i)
124 if (c < char_symbols.length) char_symbols(c)
125 else text.subSequence(i, i + n).toString
127 else text.subSequence(i, i + n).toString
133 def explode(text: CharSequence): List[Symbol] = iterator(text).toList
135 def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
137 var (line, column) = pos
138 for (sym <- iterator(text)) {
139 if (is_newline(sym)) { line += 1; column = 1 }
146 /* decoding offsets */
150 private sealed case class Entry(chr: Int, sym: Int)
152 val empty: Index = new Index(Nil)
154 def apply(text: CharSequence): Index =
156 val matcher = new Matcher(text)
157 val buf = new mutable.ListBuffer[Entry]
160 while (chr < text.length) {
161 val n = matcher(chr, text.length)
164 if (n > 1) buf += Entry(chr, sym)
166 if (buf.isEmpty) empty else new Index(buf.toList)
170 final class Index private(entries: List[Index.Entry])
172 private val hash: Int = entries.hashCode
173 private val index: Array[Index.Entry] = entries.toArray
175 def decode(symbol_offset: Offset): Text.Offset =
177 val sym = symbol_offset - 1
178 val end = index.length
179 @tailrec def bisect(a: Int, b: Int): Int =
183 if (sym < index(c).sym) bisect(a, c)
184 else if (c + 1 == end || sym < index(c + 1).sym) c
185 else bisect(c + 1, b)
189 val i = bisect(0, end)
191 else index(i).chr + sym - index(i).sym
193 def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
195 override def hashCode: Int = hash
196 override def equals(that: Any): Boolean =
198 case other: Index => index.sameElements(other.index)
203 //\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
204 ////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
207 def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'