isac-java/src/java/isac/gui/mawen/syntax/isabelle/symbol.scala
author Walther Neuper <walther.neuper@jku.at>
Fri, 26 Mar 2021 10:45:05 +0100
changeset 5239 b4e3883d7b66
parent 5232 34f18fdc3103
permissions -rw-r--r--
reset mathematics-engine to Isabelle2015

note: for this version libisabelle was available,
which connects front-end (Java) and back-end (Isabelle/ML)
     1 /*  THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
     2     Title:      Pure/General/symbol.scala
     3     Author:     Makarius
     4     ... WITH ADDITIONS AFTER "/---..---\"
     5 
     6 Detecting and recoding Isabelle symbols.
     7 */
     8 
     9 package isac.gui.mawen.syntax.isabelle
    10 
    11 import scala.collection.mutable
    12 import scala.util.matching.Regex
    13 import scala.annotation.tailrec
    14 
    15 
    16 object XSymbol
    17 {
    18   type Symbol = String
    19 
    20   // counting Isabelle symbols, starting from 1
    21   type Offset = Text.Offset
    22   type Range = Text.Range
    23 
    24 
    25   /* spaces */
    26 
    27   val space = " "
    28 
    29   private val static_spaces = space * 4000
    30 
    31   def spaces(n: Int): String =
    32   {
    33     require(n >= 0)
    34     if (n < static_spaces.length) static_spaces.substring(0, n)
    35     else space * n
    36   }
    37 
    38 
    39   /* ASCII characters */
    40 
    41   def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
    42 
    43   def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
    44 
    45   def is_ascii_hex(c: Char): Boolean =
    46     '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
    47 
    48   def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
    49 
    50   def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
    51 
    52   def is_ascii_letdig(c: Char): Boolean =
    53     is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
    54 
    55   def is_ascii_identifier(s: String): Boolean =
    56     s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
    57 
    58   def ascii(c: Char): Symbol =
    59   {
    60     if (c > 127) error("Non-ASCII character: " + XLibrary.quote(c.toString))
    61     else char_symbols(c.toInt)
    62   }
    63 
    64 
    65   /* symbol matching */
    66 
    67   private val symbol_total = new Regex("""(?xs)
    68     [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
    69 
    70   private def is_plain(c: Char): Boolean =
    71     !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
    72 
    73   def is_malformed(s: Symbol): Boolean =
    74     s.length match {
    75       case 1 =>
    76         val c = s(0)
    77         Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
    78       case 2 =>
    79         val c1 = s(0)
    80         val c2 = s(1)
    81         !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
    82       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
    83     }
    84 
    85   def is_newline(s: Symbol): Boolean =
    86     s == "\n" || s == "\r" || s == "\r\n"
    87 
    88   class Matcher(text: CharSequence)
    89   {
    90     private val matcher = symbol_total.pattern.matcher(text)
    91     def apply(start: Int, end: Int): Int =
    92     {
    93       require(0 <= start && start < end && end <= text.length)
    94       if (is_plain(text.charAt(start))) 1
    95       else {
    96         matcher.region(start, end).lookingAt
    97         matcher.group.length
    98       }
    99     }
   100   }
   101 
   102 
   103   /* iterator */
   104 
   105   private val char_symbols: Array[Symbol] =
   106     (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
   107 
   108   def iterator(text: CharSequence): Iterator[Symbol] =
   109     new Iterator[Symbol]
   110     {
   111       private val matcher = new Matcher(text)
   112       private var i = 0
   113       def hasNext = i < text.length
   114       def next =
   115       {
   116         val n = matcher(i, text.length)
   117         val s =
   118           if (n == 0) ""
   119           else if (n == 1) {
   120             val c = text.charAt(i)
   121             if (c < char_symbols.length) char_symbols(c)
   122             else text.subSequence(i, i + n).toString
   123           }
   124           else text.subSequence(i, i + n).toString
   125         i += n
   126         s
   127       }
   128     }
   129 
   130   def explode(text: CharSequence): List[Symbol] = iterator(text).toList
   131 
   132   def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
   133   {
   134     var (line, column) = pos
   135     for (sym <- iterator(text)) {
   136       if (is_newline(sym)) { line += 1; column = 1 }
   137       else column += 1
   138     }
   139     (line, column)
   140   }
   141 
   142 
   143   /* decoding offsets */
   144 
   145   object Index
   146   {
   147     private sealed case class Entry(chr: Int, sym: Int)
   148 
   149     val empty: Index = new Index(Nil)
   150 
   151     def apply(text: CharSequence): Index =
   152     {
   153       val matcher = new Matcher(text)
   154       val buf = new mutable.ListBuffer[Entry]
   155       var chr = 0
   156       var sym = 0
   157       while (chr < text.length) {
   158         val n = matcher(chr, text.length)
   159         chr += n
   160         sym += 1
   161         if (n > 1) buf += Entry(chr, sym)
   162       }
   163       if (buf.isEmpty) empty else new Index(buf.toList)
   164     }
   165   }
   166 
   167   final class Index private(entries: List[Index.Entry])
   168   {
   169     private val hash: Int = entries.hashCode
   170     private val index: Array[Index.Entry] = entries.toArray
   171 
   172     def decode(symbol_offset: Offset): Text.Offset =
   173     {
   174       val sym = symbol_offset - 1
   175       val end = index.length
   176       @tailrec def bisect(a: Int, b: Int): Int =
   177       {
   178         if (a < b) {
   179           val c = (a + b) / 2
   180           if (sym < index(c).sym) bisect(a, c)
   181           else if (c + 1 == end || sym < index(c + 1).sym) c
   182           else bisect(c + 1, b)
   183         }
   184         else -1
   185       }
   186       val i = bisect(0, end)
   187       if (i < 0) sym
   188       else index(i).chr + sym - index(i).sym
   189     }
   190     def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
   191 
   192     override def hashCode: Int = hash
   193     override def equals(that: Any): Boolean =
   194       that match {
   195         case other: Index => index.sameElements(other.index)
   196         case _ => false
   197       }
   198   }
   199 
   200 //\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
   201 ////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
   202   
   203   //                Symbol
   204   def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'
   205   
   206 }