1 /* THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
2 Title: Pure/General/symbol.scala
4 ... WITH ADDITIONS AFTER "/---..---\"
6 Detecting and recoding Isabelle symbols.
9 package isac.gui.mawen.syntax.isabelle
11 import scala.collection.mutable
12 import scala.util.matching.Regex
13 import scala.annotation.tailrec
20 // counting Isabelle symbols, starting from 1
21 type Offset = Text.Offset
22 type Range = Text.Range
29 private val static_spaces = space * 4000
31 def spaces(n: Int): String =
34 if (n < static_spaces.length) static_spaces.substring(0, n)
39 /* ASCII characters */
41 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
43 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
45 def is_ascii_hex(c: Char): Boolean =
46 '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
48 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
50 def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
52 def is_ascii_letdig(c: Char): Boolean =
53 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
55 def is_ascii_identifier(s: String): Boolean =
56 s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
58 def ascii(c: Char): Symbol =
60 if (c > 127) error("Non-ASCII character: " + XLibrary.quote(c.toString))
61 else char_symbols(c.toInt)
67 private val symbol_total = new Regex("""(?xs)
68 [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
70 private def is_plain(c: Char): Boolean =
71 !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
73 def is_malformed(s: Symbol): Boolean =
77 Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
81 !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
82 case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
85 def is_newline(s: Symbol): Boolean =
86 s == "\n" || s == "\r" || s == "\r\n"
88 class Matcher(text: CharSequence)
90 private val matcher = symbol_total.pattern.matcher(text)
91 def apply(start: Int, end: Int): Int =
93 require(0 <= start && start < end && end <= text.length)
94 if (is_plain(text.charAt(start))) 1
96 matcher.region(start, end).lookingAt
105 private val char_symbols: Array[Symbol] =
106 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
108 def iterator(text: CharSequence): Iterator[Symbol] =
111 private val matcher = new Matcher(text)
113 def hasNext = i < text.length
116 val n = matcher(i, text.length)
120 val c = text.charAt(i)
121 if (c < char_symbols.length) char_symbols(c)
122 else text.subSequence(i, i + n).toString
124 else text.subSequence(i, i + n).toString
130 def explode(text: CharSequence): List[Symbol] = iterator(text).toList
132 def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
134 var (line, column) = pos
135 for (sym <- iterator(text)) {
136 if (is_newline(sym)) { line += 1; column = 1 }
143 /* decoding offsets */
147 private sealed case class Entry(chr: Int, sym: Int)
149 val empty: Index = new Index(Nil)
151 def apply(text: CharSequence): Index =
153 val matcher = new Matcher(text)
154 val buf = new mutable.ListBuffer[Entry]
157 while (chr < text.length) {
158 val n = matcher(chr, text.length)
161 if (n > 1) buf += Entry(chr, sym)
163 if (buf.isEmpty) empty else new Index(buf.toList)
167 final class Index private(entries: List[Index.Entry])
169 private val hash: Int = entries.hashCode
170 private val index: Array[Index.Entry] = entries.toArray
172 def decode(symbol_offset: Offset): Text.Offset =
174 val sym = symbol_offset - 1
175 val end = index.length
176 @tailrec def bisect(a: Int, b: Int): Int =
180 if (sym < index(c).sym) bisect(a, c)
181 else if (c + 1 == end || sym < index(c + 1).sym) c
182 else bisect(c + 1, b)
186 val i = bisect(0, end)
188 else index(i).chr + sym - index(i).sym
190 def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
192 override def hashCode: Int = hash
193 override def equals(that: Any): Boolean =
195 case other: Index => index.sameElements(other.index)
200 //\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
201 ////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
204 def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'