src/Pure/General/symbol.scala
author wenzelm
Tue, 09 Jun 2009 20:40:19 +0200
changeset 31538 2c0b67a0e5e7
parent 31537 0466cb17064f
child 31548 5f1f0a20af4d
permissions -rw-r--r--
tuned;
wenzelm@27901
     1
/*  Title:      Pure/General/symbol.scala
wenzelm@27901
     2
    Author:     Makarius
wenzelm@27901
     3
wenzelm@27924
     4
Detecting and recoding Isabelle symbols.
wenzelm@27901
     5
*/
wenzelm@27901
     6
wenzelm@27901
     7
package isabelle
wenzelm@27901
     8
wenzelm@27918
     9
import scala.io.Source
wenzelm@31537
    10
import scala.collection.jcl
wenzelm@31537
    11
import scala.util.matching.Regex
wenzelm@27901
    12
wenzelm@27901
    13
wenzelm@31537
    14
object Symbol
wenzelm@31537
    15
{
wenzelm@27901
    16
wenzelm@27924
    17
  /** Symbol regexps **/
wenzelm@27901
    18
wenzelm@31537
    19
  private val plain = new Regex("""(?xs)
wenzelm@31537
    20
    [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
wenzelm@27901
    21
wenzelm@31537
    22
  private val symbol = new Regex("""(?xs)
wenzelm@31537
    23
      \\ \\? < (?:
wenzelm@27924
    24
      \^? [A-Za-z][A-Za-z0-9_']* |
wenzelm@27924
    25
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
wenzelm@27923
    26
wenzelm@31537
    27
  private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
wenzelm@27924
    28
    """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
wenzelm@27923
    29
wenzelm@27939
    30
  // total pattern
wenzelm@31537
    31
  val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
wenzelm@27918
    32
wenzelm@31537
    33
  // prefix of another symbol
wenzelm@31537
    34
  def is_open(s: String): Boolean =
wenzelm@31537
    35
  {
wenzelm@31537
    36
    val len = s.length
wenzelm@31537
    37
    len == 1 && Character.isHighSurrogate(s(0)) ||
wenzelm@31537
    38
    s == "\\" ||
wenzelm@31537
    39
    s == "\\<" ||
wenzelm@31537
    40
    len > 2 && s(len - 1) != '>'
wenzelm@31537
    41
  }
wenzelm@27901
    42
wenzelm@27901
    43
wenzelm@27937
    44
  /** Recoding **/
wenzelm@27924
    45
wenzelm@31537
    46
  private class Recoder(list: List[(String, String)])
wenzelm@31537
    47
  {
wenzelm@31537
    48
    private val (min, max) =
wenzelm@31537
    49
    {
wenzelm@27937
    50
      var min = '\uffff'
wenzelm@27937
    51
      var max = '\u0000'
wenzelm@27937
    52
      for ((x, _) <- list) {
wenzelm@27937
    53
        val c = x(0)
wenzelm@27937
    54
        if (c < min) min = c
wenzelm@27937
    55
        if (c > max) max = c
wenzelm@27937
    56
      }
wenzelm@27937
    57
      (min, max)
wenzelm@27937
    58
    }
wenzelm@31537
    59
    private val table =
wenzelm@31537
    60
    {
wenzelm@31537
    61
      val table = new jcl.HashMap[String, String]   // reasonably efficient?
wenzelm@27937
    62
      for ((x, y) <- list) table + (x -> y)
wenzelm@27927
    63
      table
wenzelm@27927
    64
    }
wenzelm@31537
    65
    def recode(text: String): String =
wenzelm@31537
    66
    {
wenzelm@27937
    67
      val len = text.length
wenzelm@31537
    68
      val matcher = regex.pattern.matcher(text)
wenzelm@27937
    69
      val result = new StringBuilder(len)
wenzelm@27937
    70
      var i = 0
wenzelm@27937
    71
      while (i < len) {
wenzelm@27937
    72
        val c = text(i)
wenzelm@27937
    73
        if (min <= c && c <= max) {
wenzelm@27939
    74
          matcher.region(i, len)
wenzelm@27939
    75
          matcher.lookingAt
wenzelm@27938
    76
          val x = matcher.group
wenzelm@31537
    77
          result.append(table.get(x) getOrElse x)
wenzelm@27937
    78
          i = matcher.end
wenzelm@27937
    79
        }
wenzelm@27937
    80
        else { result.append(c); i += 1 }
wenzelm@27937
    81
      }
wenzelm@27937
    82
      result.toString
wenzelm@27924
    83
    }
wenzelm@27924
    84
  }
wenzelm@27918
    85
wenzelm@27918
    86
wenzelm@27937
    87
wenzelm@27937
    88
  /** Symbol interpretation **/
wenzelm@27937
    89
wenzelm@29569
    90
  class Interpretation(symbol_decls: Iterator[String])
wenzelm@29569
    91
  {
wenzelm@31537
    92
    /* read symbols */
wenzelm@31537
    93
wenzelm@31537
    94
    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
wenzelm@31537
    95
    private val key = new Regex("""(?xs) (.+): """)
wenzelm@31537
    96
wenzelm@31537
    97
    private def read_decl(decl: String): (String, Map[String, String]) =
wenzelm@31537
    98
    {
wenzelm@31537
    99
      def err() = error("Bad symbol declaration: " + decl)
wenzelm@31537
   100
wenzelm@31537
   101
      def read_props(props: List[String]): Map[String, String] =
wenzelm@31537
   102
      {
wenzelm@31537
   103
        props match {
wenzelm@31537
   104
          case Nil => Map()
wenzelm@31537
   105
          case _ :: Nil => err()
wenzelm@31537
   106
          case key(x) :: y :: rest => read_props(rest) + (x -> y)
wenzelm@31537
   107
          case _ => err()
wenzelm@31537
   108
        }
wenzelm@31537
   109
      }
wenzelm@31537
   110
      decl.split("\\s+").toList match {
wenzelm@31537
   111
        case Nil => err()
wenzelm@31537
   112
        case sym :: props => (sym, read_props(props))
wenzelm@31537
   113
      }
wenzelm@31537
   114
    }
wenzelm@31537
   115
wenzelm@31537
   116
    private val symbols: List[(String, Map[String, String])] =
wenzelm@31537
   117
      for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
wenzelm@31537
   118
        yield read_decl(decl)
wenzelm@31537
   119
wenzelm@31537
   120
wenzelm@31537
   121
    /* main recoder methods */
wenzelm@31537
   122
wenzelm@31537
   123
    private val (decoder, encoder) =
wenzelm@31537
   124
    {
wenzelm@31537
   125
      val mapping =
wenzelm@31537
   126
        for {
wenzelm@31537
   127
          (sym, props) <- symbols
wenzelm@31537
   128
          val code =
wenzelm@31537
   129
            try { Integer.decode(props("code")).intValue }
wenzelm@31537
   130
            catch {
wenzelm@31537
   131
              case _: NoSuchElementException => error("Missing code for symbol " + sym)
wenzelm@31537
   132
              case _: NumberFormatException => error("Bad code for symbol " + sym)
wenzelm@31537
   133
            }
wenzelm@31537
   134
          val ch = new String(Character.toChars(code))
wenzelm@31537
   135
        } yield (sym, ch)
wenzelm@31537
   136
      (new Recoder(mapping ++ (for ((x, y) <- mapping) yield ("\\" + x, y))),
wenzelm@31537
   137
       new Recoder(for ((x, y) <- mapping) yield (y, x)))
wenzelm@31537
   138
    }
wenzelm@27918
   139
wenzelm@27924
   140
    def decode(text: String) = decoder.recode(text)
wenzelm@27924
   141
    def encode(text: String) = encoder.recode(text)
wenzelm@27923
   142
wenzelm@27918
   143
  }
wenzelm@27901
   144
}