src/Pure/General/symbol.scala
author wenzelm
Sat, 17 Mar 2012 17:44:29 +0100
changeset 47868 395b7277ed76
parent 45873 aa34d2d049ce
child 48002 fb5764df8a9c
permissions -rw-r--r--
misc tuning to accomodate scala-2.10.0-M2;
wenzelm@27901
     1
/*  Title:      Pure/General/symbol.scala
wenzelm@27901
     2
    Author:     Makarius
wenzelm@27901
     3
wenzelm@27924
     4
Detecting and recoding Isabelle symbols.
wenzelm@27901
     5
*/
wenzelm@27901
     6
wenzelm@27901
     7
package isabelle
wenzelm@27901
     8
wenzelm@27918
     9
import scala.io.Source
wenzelm@36035
    10
import scala.collection.mutable
wenzelm@31537
    11
import scala.util.matching.Regex
wenzelm@27901
    12
wenzelm@27901
    13
wenzelm@31537
    14
object Symbol
wenzelm@31537
    15
{
wenzelm@44570
    16
  type Symbol = String
wenzelm@44570
    17
wenzelm@44570
    18
wenzelm@36772
    19
  /* spaces */
wenzelm@36772
    20
wenzelm@36850
    21
  val spc = ' '
wenzelm@36850
    22
  val space = " "
wenzelm@36850
    23
wenzelm@36850
    24
  private val static_spaces = space * 4000
wenzelm@36772
    25
wenzelm@36772
    26
  def spaces(k: Int): String =
wenzelm@36772
    27
  {
wenzelm@36772
    28
    require(k >= 0)
wenzelm@36772
    29
    if (k < static_spaces.length) static_spaces.substring(0, k)
wenzelm@36850
    30
    else space * k
wenzelm@36772
    31
  }
wenzelm@36772
    32
wenzelm@36772
    33
wenzelm@44292
    34
  /* ASCII characters */
wenzelm@44292
    35
wenzelm@44292
    36
  def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
wenzelm@44292
    37
  def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
wenzelm@44292
    38
  def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
wenzelm@44292
    39
wenzelm@44292
    40
  def is_ascii_letdig(c: Char): Boolean =
wenzelm@44292
    41
    is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
wenzelm@44292
    42
wenzelm@44292
    43
  def is_ascii_identifier(s: String): Boolean =
wenzelm@44292
    44
    s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
wenzelm@44292
    45
wenzelm@44292
    46
wenzelm@34007
    47
  /* Symbol regexps */
wenzelm@27901
    48
wenzelm@31537
    49
  private val plain = new Regex("""(?xs)
wenzelm@40770
    50
      [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
wenzelm@37563
    51
wenzelm@40768
    52
  private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
wenzelm@27901
    53
wenzelm@31537
    54
  private val symbol = new Regex("""(?xs)
wenzelm@31548
    55
      \\ < (?:
wenzelm@27924
    56
      \^? [A-Za-z][A-Za-z0-9_']* |
wenzelm@27924
    57
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
wenzelm@27923
    58
wenzelm@40769
    59
  private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
wenzelm@40775
    60
    """ [\ud800-\udbff\ufffd] | \\<\^? """)
wenzelm@27923
    61
wenzelm@40769
    62
  val regex_total =
wenzelm@40769
    63
    new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
wenzelm@27918
    64
wenzelm@34146
    65
wenzelm@34146
    66
  /* basic matching */
wenzelm@34146
    67
wenzelm@37563
    68
  def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
wenzelm@34146
    69
wenzelm@44570
    70
  def is_physical_newline(s: Symbol): Boolean =
wenzelm@44550
    71
    s == "\n" || s == "\r" || s == "\r\n"
wenzelm@39203
    72
wenzelm@44570
    73
  def is_malformed(s: Symbol): Boolean =
wenzelm@44550
    74
    !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
wenzelm@34146
    75
wenzelm@34146
    76
  class Matcher(text: CharSequence)
wenzelm@34146
    77
  {
wenzelm@40769
    78
    private val matcher = regex_total.pattern.matcher(text)
wenzelm@34146
    79
    def apply(start: Int, end: Int): Int =
wenzelm@34146
    80
    {
wenzelm@34146
    81
      require(0 <= start && start < end && end <= text.length)
wenzelm@34319
    82
      if (is_plain(text.charAt(start))) 1
wenzelm@34147
    83
      else {
wenzelm@34146
    84
        matcher.region(start, end).lookingAt
wenzelm@34146
    85
        matcher.group.length
wenzelm@34146
    86
      }
wenzelm@34146
    87
    }
wenzelm@31537
    88
  }
wenzelm@27901
    89
wenzelm@27901
    90
wenzelm@44569
    91
  /* iterator */
wenzelm@31939
    92
wenzelm@44570
    93
  private val char_symbols: Array[Symbol] =
wenzelm@44550
    94
    (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
wenzelm@44550
    95
wenzelm@44570
    96
  def iterator(text: CharSequence): Iterator[Symbol] =
wenzelm@44570
    97
    new Iterator[Symbol]
wenzelm@40768
    98
    {
wenzelm@44360
    99
      private val matcher = new Matcher(text)
wenzelm@44360
   100
      private var i = 0
wenzelm@44360
   101
      def hasNext = i < text.length
wenzelm@44360
   102
      def next =
wenzelm@44360
   103
      {
wenzelm@44360
   104
        val n = matcher(i, text.length)
wenzelm@44550
   105
        val s =
wenzelm@44550
   106
          if (n == 0) ""
wenzelm@44550
   107
          else if (n == 1) {
wenzelm@44550
   108
            val c = text.charAt(i)
wenzelm@44550
   109
            if (c < char_symbols.length) char_symbols(c)
wenzelm@44550
   110
            else text.subSequence(i, i + n).toString
wenzelm@44550
   111
          }
wenzelm@44550
   112
          else text.subSequence(i, i + n).toString
wenzelm@44360
   113
        i += n
wenzelm@44360
   114
        s
wenzelm@44360
   115
      }
wenzelm@34007
   116
    }
wenzelm@44360
   117
wenzelm@45820
   118
  def explode(text: CharSequence): List[Symbol] = iterator(text).toList
wenzelm@45820
   119
wenzelm@34007
   120
wenzelm@34007
   121
  /* decoding offsets */
wenzelm@34007
   122
wenzelm@34007
   123
  class Index(text: CharSequence)
wenzelm@31939
   124
  {
wenzelm@44596
   125
    sealed case class Entry(chr: Int, sym: Int)
wenzelm@31939
   126
    val index: Array[Entry] =
wenzelm@31939
   127
    {
wenzelm@34146
   128
      val matcher = new Matcher(text)
wenzelm@31939
   129
      val buf = new mutable.ArrayBuffer[Entry]
wenzelm@31939
   130
      var chr = 0
wenzelm@31939
   131
      var sym = 0
wenzelm@34007
   132
      while (chr < text.length) {
wenzelm@34146
   133
        val n = matcher(chr, text.length)
wenzelm@34146
   134
        chr += n
wenzelm@31939
   135
        sym += 1
wenzelm@34146
   136
        if (n > 1) buf += Entry(chr, sym)
wenzelm@31939
   137
      }
wenzelm@31939
   138
      buf.toArray
wenzelm@31939
   139
    }
wenzelm@38797
   140
    def decode(sym1: Int): Int =
wenzelm@31939
   141
    {
wenzelm@38797
   142
      val sym = sym1 - 1
wenzelm@31939
   143
      val end = index.length
wenzelm@31939
   144
      def bisect(a: Int, b: Int): Int =
wenzelm@31939
   145
      {
wenzelm@31939
   146
        if (a < b) {
wenzelm@31939
   147
          val c = (a + b) / 2
wenzelm@31939
   148
          if (sym < index(c).sym) bisect(a, c)
wenzelm@31939
   149
          else if (c + 1 == end || sym < index(c + 1).sym) c
wenzelm@31939
   150
          else bisect(c + 1, b)
wenzelm@31939
   151
        }
wenzelm@31939
   152
        else -1
wenzelm@31939
   153
      }
wenzelm@31939
   154
      val i = bisect(0, end)
wenzelm@31939
   155
      if (i < 0) sym
wenzelm@31939
   156
      else index(i).chr + sym - index(i).sym
wenzelm@31939
   157
    }
wenzelm@38797
   158
    def decode(range: Text.Range): Text.Range = range.map(decode(_))
wenzelm@31939
   159
  }
wenzelm@31939
   160
wenzelm@31939
   161
wenzelm@34007
   162
  /* recoding text */
wenzelm@27924
   163
wenzelm@31537
   164
  private class Recoder(list: List[(String, String)])
wenzelm@31537
   165
  {
wenzelm@31537
   166
    private val (min, max) =
wenzelm@31537
   167
    {
wenzelm@27937
   168
      var min = '\uffff'
wenzelm@27937
   169
      var max = '\u0000'
wenzelm@27937
   170
      for ((x, _) <- list) {
wenzelm@27937
   171
        val c = x(0)
wenzelm@27937
   172
        if (c < min) min = c
wenzelm@27937
   173
        if (c > max) max = c
wenzelm@27937
   174
      }
wenzelm@27937
   175
      (min, max)
wenzelm@27937
   176
    }
wenzelm@40690
   177
    private val table =
wenzelm@40690
   178
    {
wenzelm@40690
   179
      var tab = Map[String, String]()
wenzelm@40690
   180
      for ((x, y) <- list) {
wenzelm@40690
   181
        tab.get(x) match {
wenzelm@40690
   182
          case None => tab += (x -> y)
wenzelm@40690
   183
          case Some(z) =>
wenzelm@45054
   184
            error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
wenzelm@40690
   185
        }
wenzelm@40690
   186
      }
wenzelm@40690
   187
      tab
wenzelm@40690
   188
    }
wenzelm@31537
   189
    def recode(text: String): String =
wenzelm@31537
   190
    {
wenzelm@27937
   191
      val len = text.length
wenzelm@40769
   192
      val matcher = regex_total.pattern.matcher(text)
wenzelm@27937
   193
      val result = new StringBuilder(len)
wenzelm@27937
   194
      var i = 0
wenzelm@27937
   195
      while (i < len) {
wenzelm@27937
   196
        val c = text(i)
wenzelm@27937
   197
        if (min <= c && c <= max) {
wenzelm@31939
   198
          matcher.region(i, len).lookingAt
wenzelm@27938
   199
          val x = matcher.group
wenzelm@31537
   200
          result.append(table.get(x) getOrElse x)
wenzelm@27937
   201
          i = matcher.end
wenzelm@27937
   202
        }
wenzelm@27937
   203
        else { result.append(c); i += 1 }
wenzelm@27937
   204
      }
wenzelm@27937
   205
      result.toString
wenzelm@27924
   206
    }
wenzelm@27924
   207
  }
wenzelm@27918
   208
wenzelm@27918
   209
wenzelm@27937
   210
wenzelm@44569
   211
  /** symbol interpretation **/
wenzelm@27937
   212
wenzelm@44569
   213
  private lazy val symbols =
wenzelm@44569
   214
    new Interpretation(
wenzelm@44569
   215
      Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS"))))
wenzelm@44569
   216
wenzelm@44569
   217
  private class Interpretation(symbols_spec: String)
wenzelm@29569
   218
  {
wenzelm@31537
   219
    /* read symbols */
wenzelm@31537
   220
wenzelm@31537
   221
    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
wenzelm@31537
   222
    private val key = new Regex("""(?xs) (.+): """)
wenzelm@31537
   223
wenzelm@44570
   224
    private def read_decl(decl: String): (Symbol, Map[String, String]) =
wenzelm@31537
   225
    {
wenzelm@31537
   226
      def err() = error("Bad symbol declaration: " + decl)
wenzelm@31537
   227
wenzelm@31537
   228
      def read_props(props: List[String]): Map[String, String] =
wenzelm@31537
   229
      {
wenzelm@31537
   230
        props match {
wenzelm@31537
   231
          case Nil => Map()
wenzelm@31537
   232
          case _ :: Nil => err()
wenzelm@31537
   233
          case key(x) :: y :: rest => read_props(rest) + (x -> y)
wenzelm@31537
   234
          case _ => err()
wenzelm@31537
   235
        }
wenzelm@31537
   236
      }
wenzelm@31537
   237
      decl.split("\\s+").toList match {
wenzelm@40769
   238
        case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
wenzelm@34193
   239
        case _ => err()
wenzelm@31537
   240
      }
wenzelm@31537
   241
    }
wenzelm@31537
   242
wenzelm@44570
   243
    private val symbols: List[(Symbol, Map[String, String])] =
wenzelm@40690
   244
      Map((
wenzelm@44569
   245
        for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
wenzelm@40690
   246
          yield read_decl(decl)): _*) toList
wenzelm@31537
   247
wenzelm@31537
   248
wenzelm@31651
   249
    /* misc properties */
wenzelm@31651
   250
wenzelm@44570
   251
    val names: Map[Symbol, String] =
wenzelm@34143
   252
    {
wenzelm@44331
   253
      val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
wenzelm@31651
   254
      Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
wenzelm@31651
   255
    }
wenzelm@31651
   256
wenzelm@44570
   257
    val abbrevs: Map[Symbol, String] =
wenzelm@44359
   258
      Map((
wenzelm@44359
   259
        for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
wenzelm@44359
   260
          yield (sym -> props("abbrev"))): _*)
wenzelm@44359
   261
wenzelm@44359
   262
wenzelm@44361
   263
    /* recoding */
wenzelm@31537
   264
wenzelm@31537
   265
    private val (decoder, encoder) =
wenzelm@31537
   266
    {
wenzelm@31537
   267
      val mapping =
wenzelm@31537
   268
        for {
wenzelm@31537
   269
          (sym, props) <- symbols
wenzelm@47868
   270
          code =
wenzelm@31537
   271
            try { Integer.decode(props("code")).intValue }
wenzelm@31537
   272
            catch {
wenzelm@31537
   273
              case _: NoSuchElementException => error("Missing code for symbol " + sym)
wenzelm@31537
   274
              case _: NumberFormatException => error("Bad code for symbol " + sym)
wenzelm@31537
   275
            }
wenzelm@47868
   276
          ch = new String(Character.toChars(code))
wenzelm@34193
   277
        } yield {
wenzelm@34193
   278
          if (code < 128) error("Illegal ASCII code for symbol " + sym)
wenzelm@34193
   279
          else (sym, ch)
wenzelm@34193
   280
        }
wenzelm@31548
   281
      (new Recoder(mapping),
wenzelm@31551
   282
       new Recoder(mapping map { case (x, y) => (y, x) }))
wenzelm@31537
   283
    }
wenzelm@27918
   284
wenzelm@34104
   285
    def decode(text: String): String = decoder.recode(text)
wenzelm@34104
   286
    def encode(text: String): String = encoder.recode(text)
wenzelm@34143
   287
wenzelm@44361
   288
    private def recode_set(elems: String*): Set[String] =
wenzelm@44361
   289
    {
wenzelm@44361
   290
      val content = elems.toList
wenzelm@44361
   291
      Set((content ::: content.map(decode)): _*)
wenzelm@44361
   292
    }
wenzelm@44361
   293
wenzelm@44361
   294
    private def recode_map[A](elems: (String, A)*): Map[String, A] =
wenzelm@44361
   295
    {
wenzelm@44361
   296
      val content = elems.toList
wenzelm@44361
   297
      Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
wenzelm@44361
   298
    }
wenzelm@44361
   299
wenzelm@44361
   300
wenzelm@44361
   301
    /* user fonts */
wenzelm@44361
   302
wenzelm@44570
   303
    val fonts: Map[Symbol, String] =
wenzelm@44361
   304
      recode_map((
wenzelm@44361
   305
        for ((sym, props) <- symbols if props.isDefinedAt("font"))
wenzelm@44361
   306
          yield (sym -> props("font"))): _*)
wenzelm@44361
   307
wenzelm@44361
   308
    val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
wenzelm@44361
   309
    val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
wenzelm@44361
   310
wenzelm@34143
   311
wenzelm@34143
   312
    /* classification */
wenzelm@34143
   313
wenzelm@44569
   314
    val letters = recode_set(
wenzelm@34143
   315
      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
wenzelm@34143
   316
      "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
wenzelm@34143
   317
      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
wenzelm@34143
   318
      "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
wenzelm@34143
   319
wenzelm@34143
   320
      "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
wenzelm@34143
   321
      "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
wenzelm@34143
   322
      "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
wenzelm@34143
   323
      "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
wenzelm@34143
   324
      "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
wenzelm@34143
   325
      "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
wenzelm@34143
   326
      "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
wenzelm@34143
   327
      "\\<x>", "\\<y>", "\\<z>",
wenzelm@34143
   328
wenzelm@34143
   329
      "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
wenzelm@34143
   330
      "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
wenzelm@34143
   331
      "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
wenzelm@34143
   332
      "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
wenzelm@34143
   333
      "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
wenzelm@34143
   334
      "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
wenzelm@34143
   335
      "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
wenzelm@34143
   336
      "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
wenzelm@34143
   337
      "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
wenzelm@34143
   338
wenzelm@34143
   339
      "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
wenzelm@34143
   340
      "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
wenzelm@34143
   341
      "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
wenzelm@34143
   342
      "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
wenzelm@34143
   343
      "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
wenzelm@34143
   344
      "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
wenzelm@34143
   345
      "\\<Psi>", "\\<Omega>",
wenzelm@34143
   346
wenzelm@34143
   347
      "\\<^isub>", "\\<^isup>")
wenzelm@34143
   348
wenzelm@44569
   349
    val blanks =
wenzelm@44361
   350
      recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
wenzelm@34147
   351
wenzelm@44569
   352
    val sym_chars =
wenzelm@34147
   353
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
wenzelm@34143
   354
wenzelm@45873
   355
    val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
wenzelm@45873
   356
wenzelm@44330
   357
wenzelm@44359
   358
    /* control symbols */
wenzelm@44359
   359
wenzelm@44570
   360
    val ctrl_decoded: Set[Symbol] =
wenzelm@44359
   361
      Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
wenzelm@44359
   362
wenzelm@45109
   363
    val sub_decoded = decode("\\<^sub>")
wenzelm@45109
   364
    val sup_decoded = decode("\\<^sup>")
wenzelm@45109
   365
    val isub_decoded = decode("\\<^isub>")
wenzelm@45109
   366
    val isup_decoded = decode("\\<^isup>")
wenzelm@44382
   367
    val bsub_decoded = decode("\\<^bsub>")
wenzelm@44382
   368
    val esub_decoded = decode("\\<^esub>")
wenzelm@44382
   369
    val bsup_decoded = decode("\\<^bsup>")
wenzelm@44382
   370
    val esup_decoded = decode("\\<^esup>")
wenzelm@45109
   371
    val bold_decoded = decode("\\<^bold>")
wenzelm@27918
   372
  }
wenzelm@44569
   373
wenzelm@44569
   374
wenzelm@44569
   375
  /* tables */
wenzelm@44569
   376
wenzelm@44570
   377
  def names: Map[Symbol, String] = symbols.names
wenzelm@44570
   378
  def abbrevs: Map[Symbol, String] = symbols.abbrevs
wenzelm@44569
   379
wenzelm@44569
   380
  def decode(text: String): String = symbols.decode(text)
wenzelm@44569
   381
  def encode(text: String): String = symbols.encode(text)
wenzelm@44569
   382
wenzelm@44570
   383
  def fonts: Map[Symbol, String] = symbols.fonts
wenzelm@44569
   384
  def font_names: List[String] = symbols.font_names
wenzelm@44569
   385
  def font_index: Map[String, Int] = symbols.font_index
wenzelm@44570
   386
  def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
wenzelm@44569
   387
wenzelm@44569
   388
wenzelm@44569
   389
  /* classification */
wenzelm@44569
   390
wenzelm@44570
   391
  def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
wenzelm@44570
   392
  def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
wenzelm@44570
   393
  def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
wenzelm@44570
   394
  def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
wenzelm@44570
   395
  def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
wenzelm@45873
   396
wenzelm@44570
   397
  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
wenzelm@45873
   398
  def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym)
wenzelm@45873
   399
wenzelm@45873
   400
  private def raw_symbolic(sym: Symbol): Boolean =
wenzelm@44569
   401
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
wenzelm@44569
   402
wenzelm@44569
   403
wenzelm@45873
   404
wenzelm@45873
   405
wenzelm@44569
   406
  /* control symbols */
wenzelm@44569
   407
wenzelm@44570
   408
  def is_ctrl(sym: Symbol): Boolean =
wenzelm@44569
   409
    sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
wenzelm@44569
   410
wenzelm@44570
   411
  def is_controllable(sym: Symbol): Boolean =
wenzelm@44569
   412
    !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
wenzelm@44569
   413
wenzelm@45109
   414
  def sub_decoded: Symbol = symbols.sub_decoded
wenzelm@45109
   415
  def sup_decoded: Symbol = symbols.sup_decoded
wenzelm@45109
   416
  def isub_decoded: Symbol = symbols.isub_decoded
wenzelm@45109
   417
  def isup_decoded: Symbol = symbols.isup_decoded
wenzelm@45109
   418
  def bsub_decoded: Symbol = symbols.bsub_decoded
wenzelm@45109
   419
  def esub_decoded: Symbol = symbols.esub_decoded
wenzelm@45109
   420
  def bsup_decoded: Symbol = symbols.bsup_decoded
wenzelm@45109
   421
  def esup_decoded: Symbol = symbols.esup_decoded
wenzelm@45109
   422
  def bold_decoded: Symbol = symbols.bold_decoded
wenzelm@27901
   423
}