wneuper@5051
|
1 |
/* THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
|
wneuper@5050
|
2 |
Title: Pure/General/symbol.scala
|
wneuper@5050
|
3 |
Author: Makarius
|
wneuper@5050
|
4 |
... WITH ADDITIONS AFTER "/---..---\"
|
wneuper@5050
|
5 |
|
wneuper@5050
|
6 |
Detecting and recoding Isabelle symbols.
|
wneuper@5050
|
7 |
*/
|
wneuper@5050
|
8 |
|
wneuper@5050
|
9 |
package isac.gui.mawen.syntax.isabelle
|
wneuper@5050
|
10 |
|
wneuper@5050
|
11 |
import scala.collection.mutable
|
wneuper@5050
|
12 |
import scala.util.matching.Regex
|
wneuper@5050
|
13 |
import scala.annotation.tailrec
|
wneuper@5050
|
14 |
|
wneuper@5050
|
15 |
|
wneuper@5055
|
16 |
object XSymbol
|
wneuper@5050
|
17 |
{
|
wneuper@5050
|
18 |
type Symbol = String
|
wneuper@5050
|
19 |
|
wneuper@5050
|
20 |
// counting Isabelle symbols, starting from 1
|
wneuper@5050
|
21 |
type Offset = Text.Offset
|
wneuper@5050
|
22 |
type Range = Text.Range
|
wneuper@5050
|
23 |
|
wneuper@5050
|
24 |
|
wneuper@5050
|
25 |
/* spaces */
|
wneuper@5050
|
26 |
|
wneuper@5050
|
27 |
val space = " "
|
wneuper@5050
|
28 |
|
wneuper@5050
|
29 |
private val static_spaces = space * 4000
|
wneuper@5050
|
30 |
|
wneuper@5050
|
31 |
def spaces(n: Int): String =
|
wneuper@5050
|
32 |
{
|
wneuper@5050
|
33 |
require(n >= 0)
|
wneuper@5050
|
34 |
if (n < static_spaces.length) static_spaces.substring(0, n)
|
wneuper@5050
|
35 |
else space * n
|
wneuper@5050
|
36 |
}
|
wneuper@5050
|
37 |
|
wneuper@5050
|
38 |
|
wneuper@5050
|
39 |
/* ASCII characters */
|
wneuper@5050
|
40 |
|
wneuper@5050
|
41 |
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
|
wneuper@5050
|
42 |
|
wneuper@5050
|
43 |
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
|
wneuper@5050
|
44 |
|
wneuper@5050
|
45 |
def is_ascii_hex(c: Char): Boolean =
|
wneuper@5050
|
46 |
'0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
|
wneuper@5050
|
47 |
|
wneuper@5050
|
48 |
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
|
wneuper@5050
|
49 |
|
wneuper@5050
|
50 |
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
|
wneuper@5050
|
51 |
|
wneuper@5050
|
52 |
def is_ascii_letdig(c: Char): Boolean =
|
wneuper@5050
|
53 |
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
|
wneuper@5050
|
54 |
|
wneuper@5050
|
55 |
def is_ascii_identifier(s: String): Boolean =
|
wneuper@5050
|
56 |
s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
|
wneuper@5050
|
57 |
|
wneuper@5050
|
58 |
def ascii(c: Char): Symbol =
|
wneuper@5050
|
59 |
{
|
wneuper@5232
|
60 |
if (c > 127) {
|
wneuper@5232
|
61 |
Console.printf("Non-ASCII character: " + XLibrary.quote(c.toString));
|
wneuper@5232
|
62 |
"Non-ASCII character: " + XLibrary.quote(c.toString)
|
wneuper@5232
|
63 |
}
|
wneuper@5050
|
64 |
else char_symbols(c.toInt)
|
wneuper@5050
|
65 |
}
|
wneuper@5050
|
66 |
|
wneuper@5050
|
67 |
|
wneuper@5050
|
68 |
/* symbol matching */
|
wneuper@5050
|
69 |
|
wneuper@5050
|
70 |
private val symbol_total = new Regex("""(?xs)
|
wneuper@5050
|
71 |
[\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
|
wneuper@5050
|
72 |
|
wneuper@5050
|
73 |
private def is_plain(c: Char): Boolean =
|
wneuper@5050
|
74 |
!(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
|
wneuper@5050
|
75 |
|
wneuper@5050
|
76 |
def is_malformed(s: Symbol): Boolean =
|
wneuper@5050
|
77 |
s.length match {
|
wneuper@5050
|
78 |
case 1 =>
|
wneuper@5050
|
79 |
val c = s(0)
|
wneuper@5050
|
80 |
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
|
wneuper@5050
|
81 |
case 2 =>
|
wneuper@5050
|
82 |
val c1 = s(0)
|
wneuper@5050
|
83 |
val c2 = s(1)
|
wneuper@5050
|
84 |
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
|
wneuper@5050
|
85 |
case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
|
wneuper@5050
|
86 |
}
|
wneuper@5050
|
87 |
|
wneuper@5050
|
88 |
def is_newline(s: Symbol): Boolean =
|
wneuper@5050
|
89 |
s == "\n" || s == "\r" || s == "\r\n"
|
wneuper@5050
|
90 |
|
wneuper@5050
|
91 |
class Matcher(text: CharSequence)
|
wneuper@5050
|
92 |
{
|
wneuper@5050
|
93 |
private val matcher = symbol_total.pattern.matcher(text)
|
wneuper@5050
|
94 |
def apply(start: Int, end: Int): Int =
|
wneuper@5050
|
95 |
{
|
wneuper@5050
|
96 |
require(0 <= start && start < end && end <= text.length)
|
wneuper@5050
|
97 |
if (is_plain(text.charAt(start))) 1
|
wneuper@5050
|
98 |
else {
|
wneuper@5050
|
99 |
matcher.region(start, end).lookingAt
|
wneuper@5050
|
100 |
matcher.group.length
|
wneuper@5050
|
101 |
}
|
wneuper@5050
|
102 |
}
|
wneuper@5050
|
103 |
}
|
wneuper@5050
|
104 |
|
wneuper@5050
|
105 |
|
wneuper@5050
|
106 |
/* iterator */
|
wneuper@5050
|
107 |
|
wneuper@5050
|
108 |
private val char_symbols: Array[Symbol] =
|
wneuper@5050
|
109 |
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
|
wneuper@5050
|
110 |
|
wneuper@5050
|
111 |
def iterator(text: CharSequence): Iterator[Symbol] =
|
wneuper@5050
|
112 |
new Iterator[Symbol]
|
wneuper@5050
|
113 |
{
|
wneuper@5050
|
114 |
private val matcher = new Matcher(text)
|
wneuper@5050
|
115 |
private var i = 0
|
wneuper@5050
|
116 |
def hasNext = i < text.length
|
wneuper@5050
|
117 |
def next =
|
wneuper@5050
|
118 |
{
|
wneuper@5050
|
119 |
val n = matcher(i, text.length)
|
wneuper@5050
|
120 |
val s =
|
wneuper@5050
|
121 |
if (n == 0) ""
|
wneuper@5050
|
122 |
else if (n == 1) {
|
wneuper@5050
|
123 |
val c = text.charAt(i)
|
wneuper@5050
|
124 |
if (c < char_symbols.length) char_symbols(c)
|
wneuper@5050
|
125 |
else text.subSequence(i, i + n).toString
|
wneuper@5050
|
126 |
}
|
wneuper@5050
|
127 |
else text.subSequence(i, i + n).toString
|
wneuper@5050
|
128 |
i += n
|
wneuper@5050
|
129 |
s
|
wneuper@5050
|
130 |
}
|
wneuper@5050
|
131 |
}
|
wneuper@5050
|
132 |
|
wneuper@5050
|
133 |
def explode(text: CharSequence): List[Symbol] = iterator(text).toList
|
wneuper@5050
|
134 |
|
wneuper@5050
|
135 |
def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
|
wneuper@5050
|
136 |
{
|
wneuper@5050
|
137 |
var (line, column) = pos
|
wneuper@5050
|
138 |
for (sym <- iterator(text)) {
|
wneuper@5050
|
139 |
if (is_newline(sym)) { line += 1; column = 1 }
|
wneuper@5050
|
140 |
else column += 1
|
wneuper@5050
|
141 |
}
|
wneuper@5050
|
142 |
(line, column)
|
wneuper@5050
|
143 |
}
|
wneuper@5050
|
144 |
|
wneuper@5050
|
145 |
|
wneuper@5050
|
146 |
/* decoding offsets */
|
wneuper@5050
|
147 |
|
wneuper@5050
|
148 |
object Index
|
wneuper@5050
|
149 |
{
|
wneuper@5050
|
150 |
private sealed case class Entry(chr: Int, sym: Int)
|
wneuper@5050
|
151 |
|
wneuper@5050
|
152 |
val empty: Index = new Index(Nil)
|
wneuper@5050
|
153 |
|
wneuper@5050
|
154 |
def apply(text: CharSequence): Index =
|
wneuper@5050
|
155 |
{
|
wneuper@5050
|
156 |
val matcher = new Matcher(text)
|
wneuper@5050
|
157 |
val buf = new mutable.ListBuffer[Entry]
|
wneuper@5050
|
158 |
var chr = 0
|
wneuper@5050
|
159 |
var sym = 0
|
wneuper@5050
|
160 |
while (chr < text.length) {
|
wneuper@5050
|
161 |
val n = matcher(chr, text.length)
|
wneuper@5050
|
162 |
chr += n
|
wneuper@5050
|
163 |
sym += 1
|
wneuper@5050
|
164 |
if (n > 1) buf += Entry(chr, sym)
|
wneuper@5050
|
165 |
}
|
wneuper@5050
|
166 |
if (buf.isEmpty) empty else new Index(buf.toList)
|
wneuper@5050
|
167 |
}
|
wneuper@5050
|
168 |
}
|
wneuper@5050
|
169 |
|
wneuper@5050
|
170 |
final class Index private(entries: List[Index.Entry])
|
wneuper@5050
|
171 |
{
|
wneuper@5050
|
172 |
private val hash: Int = entries.hashCode
|
wneuper@5050
|
173 |
private val index: Array[Index.Entry] = entries.toArray
|
wneuper@5050
|
174 |
|
wneuper@5050
|
175 |
def decode(symbol_offset: Offset): Text.Offset =
|
wneuper@5050
|
176 |
{
|
wneuper@5050
|
177 |
val sym = symbol_offset - 1
|
wneuper@5050
|
178 |
val end = index.length
|
wneuper@5050
|
179 |
@tailrec def bisect(a: Int, b: Int): Int =
|
wneuper@5050
|
180 |
{
|
wneuper@5050
|
181 |
if (a < b) {
|
wneuper@5050
|
182 |
val c = (a + b) / 2
|
wneuper@5050
|
183 |
if (sym < index(c).sym) bisect(a, c)
|
wneuper@5050
|
184 |
else if (c + 1 == end || sym < index(c + 1).sym) c
|
wneuper@5050
|
185 |
else bisect(c + 1, b)
|
wneuper@5050
|
186 |
}
|
wneuper@5050
|
187 |
else -1
|
wneuper@5050
|
188 |
}
|
wneuper@5050
|
189 |
val i = bisect(0, end)
|
wneuper@5050
|
190 |
if (i < 0) sym
|
wneuper@5050
|
191 |
else index(i).chr + sym - index(i).sym
|
wneuper@5050
|
192 |
}
|
wneuper@5050
|
193 |
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
|
wneuper@5050
|
194 |
|
wneuper@5050
|
195 |
override def hashCode: Int = hash
|
wneuper@5050
|
196 |
override def equals(that: Any): Boolean =
|
wneuper@5050
|
197 |
that match {
|
wneuper@5050
|
198 |
case other: Index => index.sameElements(other.index)
|
wneuper@5050
|
199 |
case _ => false
|
wneuper@5050
|
200 |
}
|
wneuper@5050
|
201 |
}
|
wneuper@5050
|
202 |
|
wneuper@5050
|
203 |
//\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
|
wneuper@5050
|
204 |
////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
|
wneuper@5050
|
205 |
|
wneuper@5050
|
206 |
// Symbol
|
wneuper@5050
|
207 |
def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'
|
wneuper@5050
|
208 |
|
wneuper@5050
|
209 |
}
|