isac-java/src/java/isac/gui/mawen/syntax/isabelle/symbol.scala
author Walther Neuper <wneuper@ist.tugraz.at>
Wed, 28 Nov 2018 08:59:29 +0100
changeset 5232 34f18fdc3103
parent 5055 4ed34bf8f938
child 5239 b4e3883d7b66
permissions -rw-r--r--
update Scala 2.10 to 2.12.3
     1 /*  THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
     2     Title:      Pure/General/symbol.scala
     3     Author:     Makarius
     4     ... WITH ADDITIONS AFTER "/---..---\"
     5 
     6 Detecting and recoding Isabelle symbols.
     7 */
     8 
     9 package isac.gui.mawen.syntax.isabelle
    10 
    11 import scala.collection.mutable
    12 import scala.util.matching.Regex
    13 import scala.annotation.tailrec
    14 
    15 
    16 object XSymbol
    17 {
    18   type Symbol = String
    19 
    20   // counting Isabelle symbols, starting from 1
    21   type Offset = Text.Offset
    22   type Range = Text.Range
    23 
    24 
    25   /* spaces */
    26 
    27   val space = " "
    28 
    29   private val static_spaces = space * 4000
    30 
    31   def spaces(n: Int): String =
    32   {
    33     require(n >= 0)
    34     if (n < static_spaces.length) static_spaces.substring(0, n)
    35     else space * n
    36   }
    37 
    38 
    39   /* ASCII characters */
    40 
    41   def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
    42 
    43   def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
    44 
    45   def is_ascii_hex(c: Char): Boolean =
    46     '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
    47 
    48   def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
    49 
    50   def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
    51 
    52   def is_ascii_letdig(c: Char): Boolean =
    53     is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
    54 
    55   def is_ascii_identifier(s: String): Boolean =
    56     s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
    57 
    58   def ascii(c: Char): Symbol =
    59   {
    60     if (c > 127) {
    61       Console.printf("Non-ASCII character: " + XLibrary.quote(c.toString));
    62       "Non-ASCII character: " + XLibrary.quote(c.toString)
    63     }
    64     else char_symbols(c.toInt)
    65   }
    66 
    67 
    68   /* symbol matching */
    69 
    70   private val symbol_total = new Regex("""(?xs)
    71     [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
    72 
    73   private def is_plain(c: Char): Boolean =
    74     !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
    75 
    76   def is_malformed(s: Symbol): Boolean =
    77     s.length match {
    78       case 1 =>
    79         val c = s(0)
    80         Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
    81       case 2 =>
    82         val c1 = s(0)
    83         val c2 = s(1)
    84         !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
    85       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
    86     }
    87 
    88   def is_newline(s: Symbol): Boolean =
    89     s == "\n" || s == "\r" || s == "\r\n"
    90 
    91   class Matcher(text: CharSequence)
    92   {
    93     private val matcher = symbol_total.pattern.matcher(text)
    94     def apply(start: Int, end: Int): Int =
    95     {
    96       require(0 <= start && start < end && end <= text.length)
    97       if (is_plain(text.charAt(start))) 1
    98       else {
    99         matcher.region(start, end).lookingAt
   100         matcher.group.length
   101       }
   102     }
   103   }
   104 
   105 
   106   /* iterator */
   107 
   108   private val char_symbols: Array[Symbol] =
   109     (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
   110 
   111   def iterator(text: CharSequence): Iterator[Symbol] =
   112     new Iterator[Symbol]
   113     {
   114       private val matcher = new Matcher(text)
   115       private var i = 0
   116       def hasNext = i < text.length
   117       def next =
   118       {
   119         val n = matcher(i, text.length)
   120         val s =
   121           if (n == 0) ""
   122           else if (n == 1) {
   123             val c = text.charAt(i)
   124             if (c < char_symbols.length) char_symbols(c)
   125             else text.subSequence(i, i + n).toString
   126           }
   127           else text.subSequence(i, i + n).toString
   128         i += n
   129         s
   130       }
   131     }
   132 
   133   def explode(text: CharSequence): List[Symbol] = iterator(text).toList
   134 
   135   def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
   136   {
   137     var (line, column) = pos
   138     for (sym <- iterator(text)) {
   139       if (is_newline(sym)) { line += 1; column = 1 }
   140       else column += 1
   141     }
   142     (line, column)
   143   }
   144 
   145 
   146   /* decoding offsets */
   147 
   148   object Index
   149   {
   150     private sealed case class Entry(chr: Int, sym: Int)
   151 
   152     val empty: Index = new Index(Nil)
   153 
   154     def apply(text: CharSequence): Index =
   155     {
   156       val matcher = new Matcher(text)
   157       val buf = new mutable.ListBuffer[Entry]
   158       var chr = 0
   159       var sym = 0
   160       while (chr < text.length) {
   161         val n = matcher(chr, text.length)
   162         chr += n
   163         sym += 1
   164         if (n > 1) buf += Entry(chr, sym)
   165       }
   166       if (buf.isEmpty) empty else new Index(buf.toList)
   167     }
   168   }
   169 
   170   final class Index private(entries: List[Index.Entry])
   171   {
   172     private val hash: Int = entries.hashCode
   173     private val index: Array[Index.Entry] = entries.toArray
   174 
   175     def decode(symbol_offset: Offset): Text.Offset =
   176     {
   177       val sym = symbol_offset - 1
   178       val end = index.length
   179       @tailrec def bisect(a: Int, b: Int): Int =
   180       {
   181         if (a < b) {
   182           val c = (a + b) / 2
   183           if (sym < index(c).sym) bisect(a, c)
   184           else if (c + 1 == end || sym < index(c + 1).sym) c
   185           else bisect(c + 1, b)
   186         }
   187         else -1
   188       }
   189       val i = bisect(0, end)
   190       if (i < 0) sym
   191       else index(i).chr + sym - index(i).sym
   192     }
   193     def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
   194 
   195     override def hashCode: Int = hash
   196     override def equals(that: Any): Boolean =
   197       that match {
   198         case other: Index => index.sameElements(other.index)
   199         case _ => false
   200       }
   201   }
   202 
   203 //\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
   204 ////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
   205   
   206   //                Symbol
   207   def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'
   208   
   209 }