62 |
65 |
63 /* basic matching */ |
66 /* basic matching */ |
64 |
67 |
65 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff') |
68 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff') |
66 |
69 |
67 def is_physical_newline(s: String): Boolean = |
70 def is_physical_newline(s: Symbol): Boolean = |
68 s == "\n" || s == "\r" || s == "\r\n" |
71 s == "\n" || s == "\r" || s == "\r\n" |
69 |
72 |
70 def is_malformed(s: String): Boolean = |
73 def is_malformed(s: Symbol): Boolean = |
71 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches |
74 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches |
72 |
75 |
73 class Matcher(text: CharSequence) |
76 class Matcher(text: CharSequence) |
74 { |
77 { |
75 private val matcher = regex_total.pattern.matcher(text) |
78 private val matcher = regex_total.pattern.matcher(text) |
233 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) |
236 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) |
234 case _ => err() |
237 case _ => err() |
235 } |
238 } |
236 } |
239 } |
237 |
240 |
238 private val symbols: List[(String, Map[String, String])] = |
241 private val symbols: List[(Symbol, Map[String, String])] = |
239 Map(( |
242 Map(( |
240 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches) |
243 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches) |
241 yield read_decl(decl)): _*) toList |
244 yield read_decl(decl)): _*) toList |
242 |
245 |
243 |
246 |
244 /* misc properties */ |
247 /* misc properties */ |
245 |
248 |
246 val names: Map[String, String] = |
249 val names: Map[Symbol, String] = |
247 { |
250 { |
248 val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") |
251 val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") |
249 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*) |
252 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*) |
250 } |
253 } |
251 |
254 |
252 val abbrevs: Map[String, String] = |
255 val abbrevs: Map[Symbol, String] = |
253 Map(( |
256 Map(( |
254 for ((sym, props) <- symbols if props.isDefinedAt("abbrev")) |
257 for ((sym, props) <- symbols if props.isDefinedAt("abbrev")) |
255 yield (sym -> props("abbrev"))): _*) |
258 yield (sym -> props("abbrev"))): _*) |
256 |
259 |
257 |
260 |
348 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") |
351 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") |
349 |
352 |
350 |
353 |
351 /* control symbols */ |
354 /* control symbols */ |
352 |
355 |
353 val ctrl_decoded: Set[String] = |
356 val ctrl_decoded: Set[Symbol] = |
354 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) |
357 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) |
355 |
358 |
356 val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>")) |
359 val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>")) |
357 val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>")) |
360 val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>")) |
358 |
361 |
364 } |
367 } |
365 |
368 |
366 |
369 |
367 /* tables */ |
370 /* tables */ |
368 |
371 |
369 def names: Map[String, String] = symbols.names |
372 def names: Map[Symbol, String] = symbols.names |
370 def abbrevs: Map[String, String] = symbols.abbrevs |
373 def abbrevs: Map[Symbol, String] = symbols.abbrevs |
371 |
374 |
372 def decode(text: String): String = symbols.decode(text) |
375 def decode(text: String): String = symbols.decode(text) |
373 def encode(text: String): String = symbols.encode(text) |
376 def encode(text: String): String = symbols.encode(text) |
374 |
377 |
375 def fonts: Map[String, String] = symbols.fonts |
378 def fonts: Map[Symbol, String] = symbols.fonts |
376 def font_names: List[String] = symbols.font_names |
379 def font_names: List[String] = symbols.font_names |
377 def font_index: Map[String, Int] = symbols.font_index |
380 def font_index: Map[String, Int] = symbols.font_index |
378 def lookup_font(sym: String): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) |
381 def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) |
379 |
382 |
380 |
383 |
381 /* classification */ |
384 /* classification */ |
382 |
385 |
383 def is_letter(sym: String): Boolean = symbols.letters.contains(sym) |
386 def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) |
384 def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' |
387 def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' |
385 def is_quasi(sym: String): Boolean = sym == "_" || sym == "'" |
388 def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" |
386 def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) |
389 def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) |
387 def is_blank(sym: String): Boolean = symbols.blanks.contains(sym) |
390 def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) |
388 def is_symbolic_char(sym: String): Boolean = symbols.sym_chars.contains(sym) |
391 def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) |
389 def is_symbolic(sym: String): Boolean = |
392 def is_symbolic(sym: Symbol): Boolean = |
390 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") |
393 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") |
391 |
394 |
392 |
395 |
393 /* control symbols */ |
396 /* control symbols */ |
394 |
397 |
395 def is_ctrl(sym: String): Boolean = |
398 def is_ctrl(sym: Symbol): Boolean = |
396 sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym) |
399 sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym) |
397 |
400 |
398 def is_controllable(sym: String): Boolean = |
401 def is_controllable(sym: Symbol): Boolean = |
399 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym) |
402 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym) |
400 |
403 |
401 def is_subscript_decoded(sym: String): Boolean = symbols.subscript_decoded.contains(sym) |
404 def is_subscript_decoded(sym: Symbol): Boolean = symbols.subscript_decoded.contains(sym) |
402 def is_superscript_decoded(sym: String): Boolean = symbols.superscript_decoded.contains(sym) |
405 def is_superscript_decoded(sym: Symbol): Boolean = symbols.superscript_decoded.contains(sym) |
403 def is_bold_decoded(sym: String): Boolean = sym == symbols.bold_decoded |
406 def is_bold_decoded(sym: Symbol): Boolean = sym == symbols.bold_decoded |
404 def is_bsub_decoded(sym: String): Boolean = sym == symbols.bsub_decoded |
407 def is_bsub_decoded(sym: Symbol): Boolean = sym == symbols.bsub_decoded |
405 def is_esub_decoded(sym: String): Boolean = sym == symbols.esub_decoded |
408 def is_esub_decoded(sym: Symbol): Boolean = sym == symbols.esub_decoded |
406 def is_bsup_decoded(sym: String): Boolean = sym == symbols.bsup_decoded |
409 def is_bsup_decoded(sym: Symbol): Boolean = sym == symbols.bsup_decoded |
407 def is_esup_decoded(sym: String): Boolean = sym == symbols.esup_decoded |
410 def is_esup_decoded(sym: Symbol): Boolean = sym == symbols.esup_decoded |
408 } |
411 } |