src/Pure/General/symbol.scala
author wenzelm
Sat, 16 Aug 2008 23:51:09 +0200
changeset 27927 eb624bb54bc6
parent 27926 308be7332e25
child 27928 b1d0d1351ed9
permissions -rw-r--r--
tuned Recoder;
wenzelm@27901
     1
/*  Title:      Pure/General/symbol.scala
wenzelm@27901
     2
    ID:         $Id$
wenzelm@27901
     3
    Author:     Makarius
wenzelm@27901
     4
wenzelm@27924
     5
Detecting and recoding Isabelle symbols.
wenzelm@27901
     6
*/
wenzelm@27901
     7
wenzelm@27901
     8
package isabelle
wenzelm@27901
     9
wenzelm@27924
    10
import java.util.regex.{Pattern, Matcher}
wenzelm@27918
    11
import java.io.File
wenzelm@27918
    12
import scala.io.Source
wenzelm@27923
    13
import scala.collection.jcl.HashMap
wenzelm@27901
    14
wenzelm@27901
    15
wenzelm@27901
    16
object Symbol {
wenzelm@27901
    17
wenzelm@27924
    18
  /** Symbol regexps **/
wenzelm@27901
    19
wenzelm@27901
    20
  private def compile(s: String) =
wenzelm@27901
    21
    Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
wenzelm@27901
    22
wenzelm@27924
    23
  val char_pattern = compile(""" [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
wenzelm@27918
    24
wenzelm@27924
    25
  val symbol_pattern = compile(""" \\ \\? < (?:
wenzelm@27924
    26
      \^? [A-Za-z][A-Za-z0-9_']* |
wenzelm@27924
    27
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
wenzelm@27923
    28
wenzelm@27924
    29
  val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
wenzelm@27924
    30
    """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
wenzelm@27923
    31
wenzelm@27924
    32
  val pattern = compile(char_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern)
wenzelm@27918
    33
wenzelm@27901
    34
wenzelm@27901
    35
wenzelm@27927
    36
  /** Symbol interpretation **/
wenzelm@27924
    37
wenzelm@27927
    38
  private class Recoder(list: List[(String, String)]) {
wenzelm@27924
    39
wenzelm@27927
    40
    private val pattern = compile((for ((x, _) <- list) yield Pattern.quote(x)).mkString("|"))
wenzelm@27927
    41
    private val table = {
wenzelm@27927
    42
      val table = new HashMap[String, String]
wenzelm@27927
    43
      for ((x, y) <- list) table + (x -> Matcher.quoteReplacement(y))
wenzelm@27927
    44
      table
wenzelm@27927
    45
    }
wenzelm@27924
    46
    def recode(text: String) = {
wenzelm@27924
    47
      val output = new StringBuffer(text.length)
wenzelm@27924
    48
      val matcher = pattern.matcher(text)
wenzelm@27924
    49
      while(matcher.find) matcher.appendReplacement(output, table(matcher.group))
wenzelm@27924
    50
      matcher.appendTail(output)
wenzelm@27924
    51
      output.toString
wenzelm@27924
    52
    }
wenzelm@27924
    53
wenzelm@27924
    54
  }
wenzelm@27918
    55
wenzelm@27918
    56
wenzelm@27923
    57
  class Interpretation {
wenzelm@27918
    58
wenzelm@27923
    59
    class BadSymbol(val msg: String) extends Exception
wenzelm@27918
    60
wenzelm@27924
    61
    private var symbols = new HashMap[String, HashMap[String, String]]
wenzelm@27926
    62
    private var decoder: Recoder = null
wenzelm@27926
    63
    private var encoder: Recoder = null
wenzelm@27918
    64
wenzelm@27924
    65
    def decode(text: String) = decoder.recode(text)
wenzelm@27924
    66
    def encode(text: String) = encoder.recode(text)
wenzelm@27923
    67
wenzelm@27923
    68
wenzelm@27923
    69
    /* read symbols */
wenzelm@27923
    70
wenzelm@27923
    71
    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
wenzelm@27923
    72
    private val blank_pattern = compile(""" \s+ """)
wenzelm@27923
    73
    private val key_pattern = compile(""" (.+): """)
wenzelm@27923
    74
wenzelm@27923
    75
    private def read_line(line: String) = {
wenzelm@27923
    76
      def err() = throw new BadSymbol(line)
wenzelm@27923
    77
wenzelm@27923
    78
      def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
wenzelm@27918
    79
        props match {
wenzelm@27923
    80
          case Nil => ()
wenzelm@27918
    81
          case _ :: Nil => err()
wenzelm@27918
    82
          case key :: value :: rest => {
wenzelm@27918
    83
            val key_matcher = key_pattern.matcher(key)
wenzelm@27923
    84
            if (key_matcher.matches) {
wenzelm@27923
    85
              tab + (key_matcher.group(1) -> value)
wenzelm@27923
    86
              read_props(rest, tab)
wenzelm@27923
    87
            }
wenzelm@27918
    88
            else err ()
wenzelm@27918
    89
          }
wenzelm@27918
    90
        }
wenzelm@27918
    91
      }
wenzelm@27918
    92
wenzelm@27918
    93
      if (!empty_pattern.matcher(line).matches) {
wenzelm@27918
    94
        blank_pattern.split(line).toList match {
wenzelm@27918
    95
          case Nil => err()
wenzelm@27923
    96
          case symbol :: props => {
wenzelm@27923
    97
            val tab = new HashMap[String, String]
wenzelm@27923
    98
            read_props(props, tab)
wenzelm@27923
    99
            symbols + (symbol -> tab)
wenzelm@27923
   100
          }
wenzelm@27918
   101
        }
wenzelm@27918
   102
      }
wenzelm@27918
   103
    }
wenzelm@27918
   104
wenzelm@27923
   105
    private def read_symbols(base: String) = {
wenzelm@27918
   106
      val file = new File(base + File.separator + "etc" + File.separator + "symbols")
wenzelm@27918
   107
      if (file.canRead) {
wenzelm@27918
   108
        for (line <- Source.fromFile(file).getLines) read_line(line)
wenzelm@27918
   109
      }
wenzelm@27918
   110
    }
wenzelm@27923
   111
wenzelm@27923
   112
wenzelm@27923
   113
    /* init tables */
wenzelm@27923
   114
wenzelm@27924
   115
    private def get_code(entry: (String, HashMap[String, String])) = {
wenzelm@27924
   116
      val (symbol, props) = entry
wenzelm@27924
   117
      val code =
wenzelm@27924
   118
        try { Integer.decode(props("code")).intValue }
wenzelm@27924
   119
        catch {
wenzelm@27924
   120
          case e: NoSuchElementException => throw new BadSymbol(symbol)
wenzelm@27924
   121
          case e: NumberFormatException => throw new BadSymbol(symbol)
wenzelm@27924
   122
        }
wenzelm@27924
   123
      (symbol, new String(Character.toChars(code)))
wenzelm@27924
   124
    }
wenzelm@27923
   125
wenzelm@27924
   126
    private def init_recoders() = {
wenzelm@27924
   127
      val list = symbols.elements.toList.map(get_code)
wenzelm@27924
   128
      decoder = new Recoder(list)
wenzelm@27924
   129
      encoder = new Recoder(list.map((p: (String, String)) => (p._2, p._1)))
wenzelm@27923
   130
    }
wenzelm@27923
   131
wenzelm@27923
   132
wenzelm@27923
   133
    /* constructor */
wenzelm@27923
   134
wenzelm@27923
   135
    read_symbols(IsabelleSystem.ISABELLE_HOME)
wenzelm@27923
   136
    read_symbols(IsabelleSystem.ISABELLE_HOME_USER)
wenzelm@27924
   137
    init_recoders()
wenzelm@27918
   138
  }
wenzelm@27918
   139
wenzelm@27901
   140
}