use scala.collection.jcl.HashMap, which seems to be more efficient;
authorwenzelm
Sat, 16 Aug 2008 21:23:03 +0200
changeset 279237ebe9d38743a
parent 27922 ddf74e16ab01
child 27924 8dd8b564faf5
use scala.collection.jcl.HashMap, which seems to be more efficient;
char_pattern: proper matching of surrogate unicode characters, those outside the Basic Multilingual Plane;
class Interpretation: misc reorganization, more serious preparation of patterns and tables;
src/Pure/General/symbol.scala
     1.1 --- a/src/Pure/General/symbol.scala	Sat Aug 16 21:23:01 2008 +0200
     1.2 +++ b/src/Pure/General/symbol.scala	Sat Aug 16 21:23:03 2008 +0200
     1.3 @@ -10,18 +10,21 @@
     1.4  import java.util.regex.Pattern
     1.5  import java.io.File
     1.6  import scala.io.Source
     1.7 -import scala.collection.mutable.HashMap
     1.8 +import scala.collection.jcl.HashMap
     1.9  
    1.10  
    1.11  object Symbol {
    1.12  
    1.13 -  /* Regexp utils */
    1.14 +  /** Regexp utils **/
    1.15  
    1.16    private def compile(s: String) =
    1.17      Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
    1.18  
    1.19  
    1.20 -  /* Symbol regexps */
    1.21 +
    1.22 +  /** Symbol regexps **/
    1.23 +
    1.24 +  private val char_src = """ [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """
    1.25  
    1.26    private val symbol_src = """ \\ \\? < (?:
    1.27        \^? [A-Za-z][A-Za-z0-9_']* |
    1.28 @@ -30,31 +33,58 @@
    1.29    private val bad_symbol_src = "(?!" + symbol_src + ")" +
    1.30      """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*"""
    1.31  
    1.32 +  val char_pattern = compile(char_src)
    1.33    val symbol_pattern = compile(symbol_src)
    1.34    val bad_symbol_pattern = compile(bad_symbol_src)
    1.35 -  val pattern = compile(symbol_src + "|" + bad_symbol_src + "| .")
    1.36 +  val pattern = compile(char_pattern + "|" + symbol_src + "|" + bad_symbol_src)
    1.37  
    1.38  
    1.39 -  /* Symbol interpretation tables */
    1.40  
    1.41 -  private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
    1.42 -  private val blank_pattern = compile(""" \s+ """)
    1.43 -  private val key_pattern = compile(""" (.+): """)
    1.44 +  /** Symbol interpretation **/
    1.45  
    1.46 -  class Interpretation extends HashMap[String, List[(String, String)]] {
    1.47 +  class Interpretation {
    1.48  
    1.49 -    class BadEntry(val line: String) extends Exception
    1.50 +    class BadSymbol(val msg: String) extends Exception
    1.51  
    1.52 -    def read_line(line: String) = {
    1.53 -      def err() = throw new BadEntry ("Bad symbol specification: " + line)
    1.54 +    var symbols = new HashMap[String, HashMap[String, String]]
    1.55  
    1.56 -      def read_pairs(props: List[String]): List[(String, String)] = {
    1.57 +    var decode_pattern: Pattern = null
    1.58 +    var decode_table = new HashMap[String, String]
    1.59 +
    1.60 +    var encode_pattern: Pattern = null
    1.61 +    var encode_table = new HashMap[String, String]
    1.62 +
    1.63 +
    1.64 +    /* recode */
    1.65 +
    1.66 +    private def recode(pattern: Pattern, table: HashMap[String, String], text: String) = {
    1.67 +      // FIXME
    1.68 +      text
    1.69 +    }
    1.70 +
    1.71 +    def decode(text: String) = recode(decode_pattern, decode_table, text)
    1.72 +    def encode(text: String) = recode(encode_pattern, encode_table, text)
    1.73 +
    1.74 +
    1.75 +    /* read symbols */
    1.76 +
    1.77 +    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
    1.78 +    private val blank_pattern = compile(""" \s+ """)
    1.79 +    private val key_pattern = compile(""" (.+): """)
    1.80 +
    1.81 +    private def read_line(line: String) = {
    1.82 +      def err() = throw new BadSymbol(line)
    1.83 +
    1.84 +      def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
    1.85          props match {
    1.86 -          case Nil => Nil
    1.87 +          case Nil => ()
    1.88            case _ :: Nil => err()
    1.89            case key :: value :: rest => {
    1.90              val key_matcher = key_pattern.matcher(key)
    1.91 -            if (key_matcher.matches) { (key_matcher.group(1) -> value) :: read_pairs(rest) }
    1.92 +            if (key_matcher.matches) {
    1.93 +              tab + (key_matcher.group(1) -> value)
    1.94 +              read_props(rest, tab)
    1.95 +            }
    1.96              else err ()
    1.97            }
    1.98          }
    1.99 @@ -63,17 +93,61 @@
   1.100        if (!empty_pattern.matcher(line).matches) {
   1.101          blank_pattern.split(line).toList match {
   1.102            case Nil => err()
   1.103 -          case symbol :: props => this + (symbol -> read_pairs(props))
   1.104 +          case symbol :: props => {
   1.105 +            val tab = new HashMap[String, String]
   1.106 +            read_props(props, tab)
   1.107 +            symbols + (symbol -> tab)
   1.108 +          }
   1.109          }
   1.110        }
   1.111      }
   1.112  
   1.113 -    for (base <- List(IsabelleSystem.ISABELLE_HOME, IsabelleSystem.ISABELLE_HOME_USER)) {
   1.114 +    private def read_symbols(base: String) = {
   1.115        val file = new File(base + File.separator + "etc" + File.separator + "symbols")
   1.116        if (file.canRead) {
   1.117          for (line <- Source.fromFile(file).getLines) read_line(line)
   1.118        }
   1.119      }
   1.120 +
   1.121 +
   1.122 +    /* init tables */
   1.123 +
   1.124 +    private def init_tables() = {
   1.125 +      val decode_pat = new StringBuilder
   1.126 +      val encode_pat = new StringBuilder
   1.127 +
   1.128 +      val syms = symbols.elements
   1.129 +      for ((symbol, props) <- syms) {
   1.130 +        val code = {
   1.131 +          try { Integer.decode(props("code")) }
   1.132 +          catch {
   1.133 +            case e: NoSuchElementException => throw new BadSymbol(symbol)
   1.134 +            case e: NumberFormatException => throw new BadSymbol(symbol)
   1.135 +          }
   1.136 +        }
   1.137 +        val code_str = new String(Character.toChars(code.intValue))
   1.138 +
   1.139 +        decode_pat.append(Pattern.quote(symbol))
   1.140 +        encode_pat.append(Pattern.quote(code_str))
   1.141 +        if (syms.hasNext) {
   1.142 +          decode_pat.append("|")
   1.143 +          encode_pat.append("|")
   1.144 +        }
   1.145 +
   1.146 +        decode_table + (symbol -> code_str)
   1.147 +        encode_table + (code_str -> symbol)
   1.148 +      }
   1.149 +
   1.150 +      decode_pattern = compile(decode_pat.toString)
   1.151 +      encode_pattern = compile(encode_pat.toString)
   1.152 +    }
   1.153 +
   1.154 +
   1.155 +    /* constructor */
   1.156 +
   1.157 +    read_symbols(IsabelleSystem.ISABELLE_HOME)
   1.158 +    read_symbols(IsabelleSystem.ISABELLE_HOME_USER)
   1.159 +    init_tables()
   1.160    }
   1.161  
   1.162  }