wenzelm@27901
|
1 |
/* Title: Pure/General/symbol.scala
|
wenzelm@27901
|
2 |
ID: $Id$
|
wenzelm@27901
|
3 |
Author: Makarius
|
wenzelm@27901
|
4 |
|
wenzelm@27924
|
5 |
Detecting and recoding Isabelle symbols.
|
wenzelm@27901
|
6 |
*/
|
wenzelm@27901
|
7 |
|
wenzelm@27901
|
8 |
package isabelle
|
wenzelm@27901
|
9 |
|
wenzelm@27937
|
10 |
import java.util.regex.Pattern
|
wenzelm@27918
|
11 |
import java.io.File
|
wenzelm@27918
|
12 |
import scala.io.Source
|
wenzelm@27923
|
13 |
import scala.collection.jcl.HashMap
|
wenzelm@27901
|
14 |
|
wenzelm@27901
|
15 |
|
wenzelm@27901
|
16 |
object Symbol {
|
wenzelm@27901
|
17 |
|
wenzelm@27924
|
18 |
/** Symbol regexps **/
|
wenzelm@27901
|
19 |
|
wenzelm@27901
|
20 |
private def compile(s: String) =
|
wenzelm@27901
|
21 |
Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
|
wenzelm@27901
|
22 |
|
wenzelm@27937
|
23 |
private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
|
wenzelm@27918
|
24 |
|
wenzelm@27937
|
25 |
private val symbol_pattern = compile(""" \\ \\? < (?:
|
wenzelm@27924
|
26 |
\^? [A-Za-z][A-Za-z0-9_']* |
|
wenzelm@27924
|
27 |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
|
wenzelm@27923
|
28 |
|
wenzelm@27937
|
29 |
private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
|
wenzelm@27924
|
30 |
""" \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
|
wenzelm@27923
|
31 |
|
wenzelm@27939
|
32 |
// total pattern
|
wenzelm@27939
|
33 |
val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .")
|
wenzelm@27918
|
34 |
|
wenzelm@27901
|
35 |
|
wenzelm@27901
|
36 |
|
wenzelm@27937
|
37 |
/** Recoding **/
|
wenzelm@27924
|
38 |
|
wenzelm@27927
|
39 |
private class Recoder(list: List[(String, String)]) {
|
wenzelm@27937
|
40 |
private val (min, max) = {
|
wenzelm@27937
|
41 |
var min = '\uffff'
|
wenzelm@27937
|
42 |
var max = '\u0000'
|
wenzelm@27937
|
43 |
for ((x, _) <- list) {
|
wenzelm@27937
|
44 |
val c = x(0)
|
wenzelm@27937
|
45 |
if (c < min) min = c
|
wenzelm@27937
|
46 |
if (c > max) max = c
|
wenzelm@27937
|
47 |
}
|
wenzelm@27937
|
48 |
(min, max)
|
wenzelm@27937
|
49 |
}
|
wenzelm@27927
|
50 |
private val table = {
|
wenzelm@27927
|
51 |
val table = new HashMap[String, String]
|
wenzelm@27937
|
52 |
for ((x, y) <- list) table + (x -> y)
|
wenzelm@27927
|
53 |
table
|
wenzelm@27927
|
54 |
}
|
wenzelm@27924
|
55 |
def recode(text: String) = {
|
wenzelm@27937
|
56 |
val len = text.length
|
wenzelm@27924
|
57 |
val matcher = pattern.matcher(text)
|
wenzelm@27937
|
58 |
val result = new StringBuilder(len)
|
wenzelm@27937
|
59 |
var i = 0
|
wenzelm@27937
|
60 |
while (i < len) {
|
wenzelm@27937
|
61 |
val c = text(i)
|
wenzelm@27937
|
62 |
if (min <= c && c <= max) {
|
wenzelm@27939
|
63 |
matcher.region(i, len)
|
wenzelm@27939
|
64 |
matcher.lookingAt
|
wenzelm@27938
|
65 |
val x = matcher.group
|
wenzelm@27938
|
66 |
table.get(x) match {
|
wenzelm@27937
|
67 |
case Some(y) => result.append(y)
|
wenzelm@27938
|
68 |
case None => result.append(x)
|
wenzelm@27937
|
69 |
}
|
wenzelm@27937
|
70 |
i = matcher.end
|
wenzelm@27937
|
71 |
}
|
wenzelm@27937
|
72 |
else { result.append(c); i += 1 }
|
wenzelm@27937
|
73 |
}
|
wenzelm@27937
|
74 |
result.toString
|
wenzelm@27924
|
75 |
}
|
wenzelm@27924
|
76 |
}
|
wenzelm@27918
|
77 |
|
wenzelm@27918
|
78 |
|
wenzelm@27937
|
79 |
|
wenzelm@27937
|
80 |
/** Symbol interpretation **/
|
wenzelm@27937
|
81 |
|
wenzelm@27923
|
82 |
class Interpretation {
|
wenzelm@27918
|
83 |
|
wenzelm@27924
|
84 |
private var symbols = new HashMap[String, HashMap[String, String]]
|
wenzelm@27926
|
85 |
private var decoder: Recoder = null
|
wenzelm@27926
|
86 |
private var encoder: Recoder = null
|
wenzelm@27918
|
87 |
|
wenzelm@27924
|
88 |
def decode(text: String) = decoder.recode(text)
|
wenzelm@27924
|
89 |
def encode(text: String) = encoder.recode(text)
|
wenzelm@27923
|
90 |
|
wenzelm@27923
|
91 |
|
wenzelm@27923
|
92 |
/* read symbols */
|
wenzelm@27923
|
93 |
|
wenzelm@27923
|
94 |
private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
|
wenzelm@27923
|
95 |
private val blank_pattern = compile(""" \s+ """)
|
wenzelm@27923
|
96 |
private val key_pattern = compile(""" (.+): """)
|
wenzelm@27923
|
97 |
|
wenzelm@27923
|
98 |
private def read_line(line: String) = {
|
wenzelm@27993
|
99 |
def err() = error("Bad symbol specification (line " + line + ")")
|
wenzelm@27923
|
100 |
|
wenzelm@27923
|
101 |
def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
|
wenzelm@27918
|
102 |
props match {
|
wenzelm@27923
|
103 |
case Nil => ()
|
wenzelm@27918
|
104 |
case _ :: Nil => err()
|
wenzelm@27918
|
105 |
case key :: value :: rest => {
|
wenzelm@27918
|
106 |
val key_matcher = key_pattern.matcher(key)
|
wenzelm@27923
|
107 |
if (key_matcher.matches) {
|
wenzelm@27923
|
108 |
tab + (key_matcher.group(1) -> value)
|
wenzelm@27923
|
109 |
read_props(rest, tab)
|
wenzelm@27923
|
110 |
}
|
wenzelm@27918
|
111 |
else err ()
|
wenzelm@27918
|
112 |
}
|
wenzelm@27918
|
113 |
}
|
wenzelm@27918
|
114 |
}
|
wenzelm@27918
|
115 |
|
wenzelm@27918
|
116 |
if (!empty_pattern.matcher(line).matches) {
|
wenzelm@27918
|
117 |
blank_pattern.split(line).toList match {
|
wenzelm@27918
|
118 |
case Nil => err()
|
wenzelm@27923
|
119 |
case symbol :: props => {
|
wenzelm@27923
|
120 |
val tab = new HashMap[String, String]
|
wenzelm@27923
|
121 |
read_props(props, tab)
|
wenzelm@27923
|
122 |
symbols + (symbol -> tab)
|
wenzelm@27923
|
123 |
}
|
wenzelm@27918
|
124 |
}
|
wenzelm@27918
|
125 |
}
|
wenzelm@27918
|
126 |
}
|
wenzelm@27918
|
127 |
|
wenzelm@27935
|
128 |
private def read_symbols(path: String) = {
|
wenzelm@27935
|
129 |
val file = new File(IsabelleSystem.platform_path(path))
|
wenzelm@27918
|
130 |
if (file.canRead) {
|
wenzelm@27918
|
131 |
for (line <- Source.fromFile(file).getLines) read_line(line)
|
wenzelm@27918
|
132 |
}
|
wenzelm@27918
|
133 |
}
|
wenzelm@27923
|
134 |
|
wenzelm@27923
|
135 |
|
wenzelm@27923
|
136 |
/* init tables */
|
wenzelm@27923
|
137 |
|
wenzelm@27924
|
138 |
private def get_code(entry: (String, HashMap[String, String])) = {
|
wenzelm@27924
|
139 |
val (symbol, props) = entry
|
wenzelm@27924
|
140 |
val code =
|
wenzelm@27924
|
141 |
try { Integer.decode(props("code")).intValue }
|
wenzelm@27924
|
142 |
catch {
|
wenzelm@27993
|
143 |
case _: NoSuchElementException => error("Missing code for symbol " + symbol)
|
wenzelm@27993
|
144 |
case _: NumberFormatException => error("Bad code for symbol " + symbol)
|
wenzelm@27924
|
145 |
}
|
wenzelm@27924
|
146 |
(symbol, new String(Character.toChars(code)))
|
wenzelm@27924
|
147 |
}
|
wenzelm@27923
|
148 |
|
wenzelm@27924
|
149 |
private def init_recoders() = {
|
wenzelm@27924
|
150 |
val list = symbols.elements.toList.map(get_code)
|
wenzelm@27928
|
151 |
decoder = new Recoder(list ::: (for ((x, y) <- list) yield ("\\" + x, y)))
|
wenzelm@27928
|
152 |
encoder = new Recoder(for ((x, y) <- list) yield (y, x))
|
wenzelm@27923
|
153 |
}
|
wenzelm@27923
|
154 |
|
wenzelm@27923
|
155 |
|
wenzelm@27923
|
156 |
/* constructor */
|
wenzelm@27923
|
157 |
|
wenzelm@27935
|
158 |
read_symbols("$ISABELLE_HOME/etc/symbols")
|
wenzelm@27935
|
159 |
read_symbols("$ISABELLE_HOME_USER/etc/symbols")
|
wenzelm@27924
|
160 |
init_recoders()
|
wenzelm@27918
|
161 |
}
|
wenzelm@27918
|
162 |
|
wenzelm@27901
|
163 |
}
|