src/Pure/General/symbol.scala
author wenzelm
Tue, 09 Jun 2009 20:29:23 +0200
changeset 31537 0466cb17064f
parent 29569 f3f529b5d8fb
child 31538 2c0b67a0e5e7
permissions -rw-r--r--
more native Scala style;
added is_open;
misc tuning;
     1 /*  Title:      Pure/General/symbol.scala
     2     Author:     Makarius
     3 
     4 Detecting and recoding Isabelle symbols.
     5 */
     6 
     7 package isabelle
     8 
     9 import java.io.File
    10 
    11 import scala.io.Source
    12 import scala.collection.jcl
    13 import scala.util.matching.Regex
    14 
    15 
    16 object Symbol
    17 {
    18 
    19   /** Symbol regexps **/
    20 
    21   private val plain = new Regex("""(?xs)
    22     [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
    23 
    24   private val symbol = new Regex("""(?xs)
    25       \\ \\? < (?:
    26       \^? [A-Za-z][A-Za-z0-9_']* |
    27       \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
    28 
    29   private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
    30     """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
    31 
    32   // total pattern
    33   val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
    34 
    35   // prefix of another symbol
    36   def is_open(s: String): Boolean =
    37   {
    38     val len = s.length
    39     len == 1 && Character.isHighSurrogate(s(0)) ||
    40     s == "\\" ||
    41     s == "\\<" ||
    42     len > 2 && s(len - 1) != '>'
    43   }
    44 
    45 
    46   /** Recoding **/
    47 
    48   private class Recoder(list: List[(String, String)])
    49   {
    50     private val (min, max) =
    51     {
    52       var min = '\uffff'
    53       var max = '\u0000'
    54       for ((x, _) <- list) {
    55         val c = x(0)
    56         if (c < min) min = c
    57         if (c > max) max = c
    58       }
    59       (min, max)
    60     }
    61     private val table =
    62     {
    63       val table = new jcl.HashMap[String, String]   // reasonably efficient?
    64       for ((x, y) <- list) table + (x -> y)
    65       table
    66     }
    67     def recode(text: String): String =
    68     {
    69       val len = text.length
    70       val matcher = regex.pattern.matcher(text)
    71       val result = new StringBuilder(len)
    72       var i = 0
    73       while (i < len) {
    74         val c = text(i)
    75         if (min <= c && c <= max) {
    76           matcher.region(i, len)
    77           matcher.lookingAt
    78           val x = matcher.group
    79           result.append(table.get(x) getOrElse x)
    80           i = matcher.end
    81         }
    82         else { result.append(c); i += 1 }
    83       }
    84       result.toString
    85     }
    86   }
    87 
    88 
    89 
    90   /** Symbol interpretation **/
    91 
    92   class Interpretation(symbol_decls: Iterator[String])
    93   {
    94     /* read symbols */
    95 
    96     private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
    97     private val key = new Regex("""(?xs) (.+): """)
    98 
    99     private def read_decl(decl: String): (String, Map[String, String]) =
   100     {
   101       def err() = error("Bad symbol declaration: " + decl)
   102 
   103       def read_props(props: List[String]): Map[String, String] =
   104       {
   105         props match {
   106           case Nil => Map()
   107           case _ :: Nil => err()
   108           case key(x) :: y :: rest => read_props(rest) + (x -> y)
   109           case _ => err()
   110         }
   111       }
   112       decl.split("\\s+").toList match {
   113         case Nil => err()
   114         case sym :: props => (sym, read_props(props))
   115       }
   116     }
   117 
   118     private val symbols: List[(String, Map[String, String])] =
   119       for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
   120         yield read_decl(decl)
   121 
   122 
   123     /* main recoder methods */
   124 
   125     private val (decoder, encoder) =
   126     {
   127       val mapping =
   128         for {
   129           (sym, props) <- symbols
   130           val code =
   131             try { Integer.decode(props("code")).intValue }
   132             catch {
   133               case _: NoSuchElementException => error("Missing code for symbol " + sym)
   134               case _: NumberFormatException => error("Bad code for symbol " + sym)
   135             }
   136           val ch = new String(Character.toChars(code))
   137         } yield (sym, ch)
   138       (new Recoder(mapping ++ (for ((x, y) <- mapping) yield ("\\" + x, y))),
   139        new Recoder(for ((x, y) <- mapping) yield (y, x)))
   140     }
   141 
   142     def decode(text: String) = decoder.recode(text)
   143     def encode(text: String) = encoder.recode(text)
   144 
   145   }
   146 }