src/Pure/General/symbol.scala
changeset 44570 58bb7ca5c7a2
parent 44569 5130dfe1b7be
child 44596 3749d1e6dde9
equal deleted inserted replaced
44569:5130dfe1b7be 44570:58bb7ca5c7a2
    11 import scala.util.matching.Regex
    11 import scala.util.matching.Regex
    12 
    12 
    13 
    13 
    14 object Symbol
    14 object Symbol
    15 {
    15 {
       
    16   type Symbol = String
       
    17 
       
    18 
    16   /* spaces */
    19   /* spaces */
    17 
    20 
    18   val spc = ' '
    21   val spc = ' '
    19   val space = " "
    22   val space = " "
    20 
    23 
    62 
    65 
    63   /* basic matching */
    66   /* basic matching */
    64 
    67 
    65   def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
    68   def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
    66 
    69 
    67   def is_physical_newline(s: String): Boolean =
    70   def is_physical_newline(s: Symbol): Boolean =
    68     s == "\n" || s == "\r" || s == "\r\n"
    71     s == "\n" || s == "\r" || s == "\r\n"
    69 
    72 
    70   def is_malformed(s: String): Boolean =
    73   def is_malformed(s: Symbol): Boolean =
    71     !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
    74     !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
    72 
    75 
    73   class Matcher(text: CharSequence)
    76   class Matcher(text: CharSequence)
    74   {
    77   {
    75     private val matcher = regex_total.pattern.matcher(text)
    78     private val matcher = regex_total.pattern.matcher(text)
    85   }
    88   }
    86 
    89 
    87 
    90 
    88   /* iterator */
    91   /* iterator */
    89 
    92 
    90   private val char_symbols: Array[String] =
    93   private val char_symbols: Array[Symbol] =
    91     (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
    94     (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
    92 
    95 
    93   def iterator(text: CharSequence): Iterator[String] =
    96   def iterator(text: CharSequence): Iterator[Symbol] =
    94     new Iterator[String]
    97     new Iterator[Symbol]
    95     {
    98     {
    96       private val matcher = new Matcher(text)
    99       private val matcher = new Matcher(text)
    97       private var i = 0
   100       private var i = 0
    98       def hasNext = i < text.length
   101       def hasNext = i < text.length
    99       def next =
   102       def next =
   214     /* read symbols */
   217     /* read symbols */
   215 
   218 
   216     private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
   219     private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
   217     private val key = new Regex("""(?xs) (.+): """)
   220     private val key = new Regex("""(?xs) (.+): """)
   218 
   221 
   219     private def read_decl(decl: String): (String, Map[String, String]) =
   222     private def read_decl(decl: String): (Symbol, Map[String, String]) =
   220     {
   223     {
   221       def err() = error("Bad symbol declaration: " + decl)
   224       def err() = error("Bad symbol declaration: " + decl)
   222 
   225 
   223       def read_props(props: List[String]): Map[String, String] =
   226       def read_props(props: List[String]): Map[String, String] =
   224       {
   227       {
   233         case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
   236         case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
   234         case _ => err()
   237         case _ => err()
   235       }
   238       }
   236     }
   239     }
   237 
   240 
   238     private val symbols: List[(String, Map[String, String])] =
   241     private val symbols: List[(Symbol, Map[String, String])] =
   239       Map((
   242       Map((
   240         for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
   243         for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
   241           yield read_decl(decl)): _*) toList
   244           yield read_decl(decl)): _*) toList
   242 
   245 
   243 
   246 
   244     /* misc properties */
   247     /* misc properties */
   245 
   248 
   246     val names: Map[String, String] =
   249     val names: Map[Symbol, String] =
   247     {
   250     {
   248       val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
   251       val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
   249       Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
   252       Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
   250     }
   253     }
   251 
   254 
   252     val abbrevs: Map[String, String] =
   255     val abbrevs: Map[Symbol, String] =
   253       Map((
   256       Map((
   254         for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
   257         for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
   255           yield (sym -> props("abbrev"))): _*)
   258           yield (sym -> props("abbrev"))): _*)
   256 
   259 
   257 
   260 
   293     }
   296     }
   294 
   297 
   295 
   298 
   296     /* user fonts */
   299     /* user fonts */
   297 
   300 
   298     val fonts: Map[String, String] =
   301     val fonts: Map[Symbol, String] =
   299       recode_map((
   302       recode_map((
   300         for ((sym, props) <- symbols if props.isDefinedAt("font"))
   303         for ((sym, props) <- symbols if props.isDefinedAt("font"))
   301           yield (sym -> props("font"))): _*)
   304           yield (sym -> props("font"))): _*)
   302 
   305 
   303     val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
   306     val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
   348       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
   351       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
   349 
   352 
   350 
   353 
   351     /* control symbols */
   354     /* control symbols */
   352 
   355 
   353     val ctrl_decoded: Set[String] =
   356     val ctrl_decoded: Set[Symbol] =
   354       Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
   357       Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
   355 
   358 
   356     val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
   359     val subscript_decoded = Set(decode("\\<^sub>"), decode("\\<^isub>"))
   357     val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
   360     val superscript_decoded = Set(decode("\\<^sup>"), decode("\\<^isup>"))
   358 
   361 
   364   }
   367   }
   365 
   368 
   366 
   369 
   367   /* tables */
   370   /* tables */
   368 
   371 
   369   def names: Map[String, String] = symbols.names
   372   def names: Map[Symbol, String] = symbols.names
   370   def abbrevs: Map[String, String] = symbols.abbrevs
   373   def abbrevs: Map[Symbol, String] = symbols.abbrevs
   371 
   374 
   372   def decode(text: String): String = symbols.decode(text)
   375   def decode(text: String): String = symbols.decode(text)
   373   def encode(text: String): String = symbols.encode(text)
   376   def encode(text: String): String = symbols.encode(text)
   374 
   377 
   375   def fonts: Map[String, String] = symbols.fonts
   378   def fonts: Map[Symbol, String] = symbols.fonts
   376   def font_names: List[String] = symbols.font_names
   379   def font_names: List[String] = symbols.font_names
   377   def font_index: Map[String, Int] = symbols.font_index
   380   def font_index: Map[String, Int] = symbols.font_index
   378   def lookup_font(sym: String): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
   381   def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
   379 
   382 
   380 
   383 
   381   /* classification */
   384   /* classification */
   382 
   385 
   383   def is_letter(sym: String): Boolean = symbols.letters.contains(sym)
   386   def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
   384   def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
   387   def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
   385   def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"
   388   def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
   386   def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
   389   def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
   387   def is_blank(sym: String): Boolean = symbols.blanks.contains(sym)
   390   def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
   388   def is_symbolic_char(sym: String): Boolean = symbols.sym_chars.contains(sym)
   391   def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
   389   def is_symbolic(sym: String): Boolean =
   392   def is_symbolic(sym: Symbol): Boolean =
   390     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
   393     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
   391 
   394 
   392 
   395 
   393   /* control symbols */
   396   /* control symbols */
   394 
   397 
   395   def is_ctrl(sym: String): Boolean =
   398   def is_ctrl(sym: Symbol): Boolean =
   396     sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
   399     sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
   397 
   400 
   398   def is_controllable(sym: String): Boolean =
   401   def is_controllable(sym: Symbol): Boolean =
   399     !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
   402     !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
   400 
   403 
   401   def is_subscript_decoded(sym: String): Boolean = symbols.subscript_decoded.contains(sym)
   404   def is_subscript_decoded(sym: Symbol): Boolean = symbols.subscript_decoded.contains(sym)
   402   def is_superscript_decoded(sym: String): Boolean = symbols.superscript_decoded.contains(sym)
   405   def is_superscript_decoded(sym: Symbol): Boolean = symbols.superscript_decoded.contains(sym)
   403   def is_bold_decoded(sym: String): Boolean = sym == symbols.bold_decoded
   406   def is_bold_decoded(sym: Symbol): Boolean = sym == symbols.bold_decoded
   404   def is_bsub_decoded(sym: String): Boolean = sym == symbols.bsub_decoded
   407   def is_bsub_decoded(sym: Symbol): Boolean = sym == symbols.bsub_decoded
   405   def is_esub_decoded(sym: String): Boolean = sym == symbols.esub_decoded
   408   def is_esub_decoded(sym: Symbol): Boolean = sym == symbols.esub_decoded
   406   def is_bsup_decoded(sym: String): Boolean = sym == symbols.bsup_decoded
   409   def is_bsup_decoded(sym: Symbol): Boolean = sym == symbols.bsup_decoded
   407   def is_esup_decoded(sym: String): Boolean = sym == symbols.esup_decoded
   410   def is_esup_decoded(sym: Symbol): Boolean = sym == symbols.esup_decoded
   408 }
   411 }