more native Scala style;
authorwenzelm
Tue, 09 Jun 2009 20:29:23 +0200
changeset 315370466cb17064f
parent 31536 73adb1fa8553
child 31538 2c0b67a0e5e7
more native Scala style;
added is_open;
misc tuning;
src/Pure/General/symbol.scala
     1.1 --- a/src/Pure/General/symbol.scala	Tue Jun 09 20:18:21 2009 +0200
     1.2 +++ b/src/Pure/General/symbol.scala	Tue Jun 09 20:29:23 2009 +0200
     1.3 @@ -6,37 +6,49 @@
     1.4  
     1.5  package isabelle
     1.6  
     1.7 -import java.util.regex.Pattern
     1.8  import java.io.File
     1.9 +
    1.10  import scala.io.Source
    1.11 -import scala.collection.jcl.HashMap
    1.12 +import scala.collection.jcl
    1.13 +import scala.util.matching.Regex
    1.14  
    1.15  
    1.16 -object Symbol {
    1.17 +object Symbol
    1.18 +{
    1.19  
    1.20    /** Symbol regexps **/
    1.21  
    1.22 -  private def compile(s: String) =
    1.23 -    Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
    1.24 +  private val plain = new Regex("""(?xs)
    1.25 +    [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
    1.26  
    1.27 -  private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
    1.28 -
    1.29 -  private val symbol_pattern = compile(""" \\ \\? < (?:
    1.30 +  private val symbol = new Regex("""(?xs)
    1.31 +      \\ \\? < (?:
    1.32        \^? [A-Za-z][A-Za-z0-9_']* |
    1.33        \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
    1.34  
    1.35 -  private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
    1.36 +  private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
    1.37      """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
    1.38  
    1.39    // total pattern
    1.40 -  val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .")
    1.41 +  val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
    1.42  
    1.43 +  // prefix of another symbol
    1.44 +  def is_open(s: String): Boolean =
    1.45 +  {
    1.46 +    val len = s.length
    1.47 +    len == 1 && Character.isHighSurrogate(s(0)) ||
    1.48 +    s == "\\" ||
    1.49 +    s == "\\<" ||
    1.50 +    len > 2 && s(len - 1) != '>'
    1.51 +  }
    1.52  
    1.53  
    1.54    /** Recoding **/
    1.55  
    1.56 -  private class Recoder(list: List[(String, String)]) {
    1.57 -    private val (min, max) = {
    1.58 +  private class Recoder(list: List[(String, String)])
    1.59 +  {
    1.60 +    private val (min, max) =
    1.61 +    {
    1.62        var min = '\uffff'
    1.63        var max = '\u0000'
    1.64        for ((x, _) <- list) {
    1.65 @@ -46,14 +58,16 @@
    1.66        }
    1.67        (min, max)
    1.68      }
    1.69 -    private val table = {
    1.70 -      val table = new HashMap[String, String]
    1.71 +    private val table =
    1.72 +    {
    1.73 +      val table = new jcl.HashMap[String, String]   // reasonably efficient?
    1.74        for ((x, y) <- list) table + (x -> y)
    1.75        table
    1.76      }
    1.77 -    def recode(text: String) = {
    1.78 +    def recode(text: String): String =
    1.79 +    {
    1.80        val len = text.length
    1.81 -      val matcher = pattern.matcher(text)
    1.82 +      val matcher = regex.pattern.matcher(text)
    1.83        val result = new StringBuilder(len)
    1.84        var i = 0
    1.85        while (i < len) {
    1.86 @@ -62,10 +76,7 @@
    1.87            matcher.region(i, len)
    1.88            matcher.lookingAt
    1.89            val x = matcher.group
    1.90 -          table.get(x) match {
    1.91 -            case Some(y) => result.append(y)
    1.92 -            case None => result.append(x)
    1.93 -          }
    1.94 +          result.append(table.get(x) getOrElse x)
    1.95            i = matcher.end
    1.96          }
    1.97          else { result.append(c); i += 1 }
    1.98 @@ -80,75 +91,56 @@
    1.99  
   1.100    class Interpretation(symbol_decls: Iterator[String])
   1.101    {
   1.102 -    private val symbols = new HashMap[String, HashMap[String, String]]
   1.103 -    private var decoder: Recoder = null
   1.104 -    private var encoder: Recoder = null
   1.105 +    /* read symbols */
   1.106 +
   1.107 +    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
   1.108 +    private val key = new Regex("""(?xs) (.+): """)
   1.109 +
   1.110 +    private def read_decl(decl: String): (String, Map[String, String]) =
   1.111 +    {
   1.112 +      def err() = error("Bad symbol declaration: " + decl)
   1.113 +
   1.114 +      def read_props(props: List[String]): Map[String, String] =
   1.115 +      {
   1.116 +        props match {
   1.117 +          case Nil => Map()
   1.118 +          case _ :: Nil => err()
   1.119 +          case key(x) :: y :: rest => read_props(rest) + (x -> y)
   1.120 +          case _ => err()
   1.121 +        }
   1.122 +      }
   1.123 +      decl.split("\\s+").toList match {
   1.124 +        case Nil => err()
   1.125 +        case sym :: props => (sym, read_props(props))
   1.126 +      }
   1.127 +    }
   1.128 +
   1.129 +    private val symbols: List[(String, Map[String, String])] =
   1.130 +      for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
   1.131 +        yield read_decl(decl)
   1.132 +
   1.133 +
   1.134 +    /* main recoder methods */
   1.135 +
   1.136 +    private val (decoder, encoder) =
   1.137 +    {
   1.138 +      val mapping =
   1.139 +        for {
   1.140 +          (sym, props) <- symbols
   1.141 +          val code =
   1.142 +            try { Integer.decode(props("code")).intValue }
   1.143 +            catch {
   1.144 +              case _: NoSuchElementException => error("Missing code for symbol " + sym)
   1.145 +              case _: NumberFormatException => error("Bad code for symbol " + sym)
   1.146 +            }
   1.147 +          val ch = new String(Character.toChars(code))
   1.148 +        } yield (sym, ch)
   1.149 +      (new Recoder(mapping ++ (for ((x, y) <- mapping) yield ("\\" + x, y))),
   1.150 +       new Recoder(for ((x, y) <- mapping) yield (y, x)))
   1.151 +    }
   1.152  
   1.153      def decode(text: String) = decoder.recode(text)
   1.154      def encode(text: String) = encoder.recode(text)
   1.155  
   1.156 -
   1.157 -    /* read symbols */
   1.158 -
   1.159 -    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
   1.160 -    private val blank_pattern = compile(""" \s+ """)
   1.161 -    private val key_pattern = compile(""" (.+): """)
   1.162 -
   1.163 -    private def read_decl(decl: String) = {
   1.164 -      def err() = error("Bad symbol declaration: " + decl)
   1.165 -
   1.166 -      def read_props(props: List[String], tab: HashMap[String, String])
   1.167 -      {
   1.168 -        props match {
   1.169 -          case Nil => ()
   1.170 -          case _ :: Nil => err()
   1.171 -          case key :: value :: rest => {
   1.172 -            val key_matcher = key_pattern.matcher(key)
   1.173 -            if (key_matcher.matches) {
   1.174 -              tab + (key_matcher.group(1) -> value)
   1.175 -              read_props(rest, tab)
   1.176 -            }
   1.177 -            else err ()
   1.178 -          }
   1.179 -        }
   1.180 -      }
   1.181 -
   1.182 -      if (!empty_pattern.matcher(decl).matches) {
   1.183 -        blank_pattern.split(decl).toList match {
   1.184 -          case Nil => err()
   1.185 -          case symbol :: props => {
   1.186 -            val tab = new HashMap[String, String]
   1.187 -            read_props(props, tab)
   1.188 -            symbols + (symbol -> tab)
   1.189 -          }
   1.190 -        }
   1.191 -      }
   1.192 -    }
   1.193 -
   1.194 -
   1.195 -    /* init tables */
   1.196 -
   1.197 -    private def get_code(entry: (String, HashMap[String, String])) = {
   1.198 -      val (symbol, props) = entry
   1.199 -      val code =
   1.200 -        try { Integer.decode(props("code")).intValue }
   1.201 -        catch {
   1.202 -          case _: NoSuchElementException => error("Missing code for symbol " + symbol)
   1.203 -          case _: NumberFormatException => error("Bad code for symbol " + symbol)
   1.204 -        }
   1.205 -      (symbol, new String(Character.toChars(code)))
   1.206 -    }
   1.207 -
   1.208 -    private def init_recoders() = {
   1.209 -      val list = symbols.elements.toList.map(get_code)
   1.210 -      decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
   1.211 -      encoder = new Recoder(for ((x, y) <- list) yield (y, x))
   1.212 -    }
   1.213 -
   1.214 -
   1.215 -    /* constructor */
   1.216 -
   1.217 -    symbol_decls.foreach(read_decl)
   1.218 -    init_recoders()
   1.219    }
   1.220  }