isac-java/src/java/isac/gui/mawen/syntax/isabelle/symbol.scala
author Walther Neuper <walther.neuper@jku.at>
Fri, 26 Mar 2021 10:45:05 +0100
changeset 5239 b4e3883d7b66
parent 5232 34f18fdc3103
permissions -rw-r--r--
reset mathematics-engine to Isabelle2015

note: for this version libisabelle was available,
which connects front-end (Java) and back-end (Isabelle/ML)
wneuper@5051
     1
/*  THIS IS A PARTIAL COPY OF Isabelle2016-1 ...
wneuper@5050
     2
    Title:      Pure/General/symbol.scala
wneuper@5050
     3
    Author:     Makarius
wneuper@5050
     4
    ... WITH ADDITIONS AFTER "/---..---\"
wneuper@5050
     5
wneuper@5050
     6
Detecting and recoding Isabelle symbols.
wneuper@5050
     7
*/
wneuper@5050
     8
wneuper@5050
     9
package isac.gui.mawen.syntax.isabelle
wneuper@5050
    10
wneuper@5050
    11
import scala.collection.mutable
wneuper@5050
    12
import scala.util.matching.Regex
wneuper@5050
    13
import scala.annotation.tailrec
wneuper@5050
    14
wneuper@5050
    15
wneuper@5055
    16
object XSymbol
wneuper@5050
    17
{
wneuper@5050
    18
  type Symbol = String
wneuper@5050
    19
wneuper@5050
    20
  // counting Isabelle symbols, starting from 1
wneuper@5050
    21
  type Offset = Text.Offset
wneuper@5050
    22
  type Range = Text.Range
wneuper@5050
    23
wneuper@5050
    24
wneuper@5050
    25
  /* spaces */
wneuper@5050
    26
wneuper@5050
    27
  val space = " "
wneuper@5050
    28
wneuper@5050
    29
  private val static_spaces = space * 4000
wneuper@5050
    30
wneuper@5050
    31
  def spaces(n: Int): String =
wneuper@5050
    32
  {
wneuper@5050
    33
    require(n >= 0)
wneuper@5050
    34
    if (n < static_spaces.length) static_spaces.substring(0, n)
wneuper@5050
    35
    else space * n
wneuper@5050
    36
  }
wneuper@5050
    37
wneuper@5050
    38
wneuper@5050
    39
  /* ASCII characters */
wneuper@5050
    40
wneuper@5050
    41
  def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
wneuper@5050
    42
wneuper@5050
    43
  def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
wneuper@5050
    44
wneuper@5050
    45
  def is_ascii_hex(c: Char): Boolean =
wneuper@5050
    46
    '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
wneuper@5050
    47
wneuper@5050
    48
  def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
wneuper@5050
    49
wneuper@5050
    50
  def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
wneuper@5050
    51
wneuper@5050
    52
  def is_ascii_letdig(c: Char): Boolean =
wneuper@5050
    53
    is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
wneuper@5050
    54
wneuper@5050
    55
  def is_ascii_identifier(s: String): Boolean =
wneuper@5050
    56
    s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
wneuper@5050
    57
wneuper@5050
    58
  def ascii(c: Char): Symbol =
wneuper@5050
    59
  {
walther@5239
    60
    if (c > 127) error("Non-ASCII character: " + XLibrary.quote(c.toString))
wneuper@5050
    61
    else char_symbols(c.toInt)
wneuper@5050
    62
  }
wneuper@5050
    63
wneuper@5050
    64
wneuper@5050
    65
  /* symbol matching */
wneuper@5050
    66
wneuper@5050
    67
  private val symbol_total = new Regex("""(?xs)
wneuper@5050
    68
    [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
wneuper@5050
    69
wneuper@5050
    70
  private def is_plain(c: Char): Boolean =
wneuper@5050
    71
    !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
wneuper@5050
    72
wneuper@5050
    73
  def is_malformed(s: Symbol): Boolean =
wneuper@5050
    74
    s.length match {
wneuper@5050
    75
      case 1 =>
wneuper@5050
    76
        val c = s(0)
wneuper@5050
    77
        Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
wneuper@5050
    78
      case 2 =>
wneuper@5050
    79
        val c1 = s(0)
wneuper@5050
    80
        val c2 = s(1)
wneuper@5050
    81
        !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
wneuper@5050
    82
      case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
wneuper@5050
    83
    }
wneuper@5050
    84
wneuper@5050
    85
  def is_newline(s: Symbol): Boolean =
wneuper@5050
    86
    s == "\n" || s == "\r" || s == "\r\n"
wneuper@5050
    87
wneuper@5050
    88
  class Matcher(text: CharSequence)
wneuper@5050
    89
  {
wneuper@5050
    90
    private val matcher = symbol_total.pattern.matcher(text)
wneuper@5050
    91
    def apply(start: Int, end: Int): Int =
wneuper@5050
    92
    {
wneuper@5050
    93
      require(0 <= start && start < end && end <= text.length)
wneuper@5050
    94
      if (is_plain(text.charAt(start))) 1
wneuper@5050
    95
      else {
wneuper@5050
    96
        matcher.region(start, end).lookingAt
wneuper@5050
    97
        matcher.group.length
wneuper@5050
    98
      }
wneuper@5050
    99
    }
wneuper@5050
   100
  }
wneuper@5050
   101
wneuper@5050
   102
wneuper@5050
   103
  /* iterator */
wneuper@5050
   104
wneuper@5050
   105
  private val char_symbols: Array[Symbol] =
wneuper@5050
   106
    (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
wneuper@5050
   107
wneuper@5050
   108
  def iterator(text: CharSequence): Iterator[Symbol] =
wneuper@5050
   109
    new Iterator[Symbol]
wneuper@5050
   110
    {
wneuper@5050
   111
      private val matcher = new Matcher(text)
wneuper@5050
   112
      private var i = 0
wneuper@5050
   113
      def hasNext = i < text.length
wneuper@5050
   114
      def next =
wneuper@5050
   115
      {
wneuper@5050
   116
        val n = matcher(i, text.length)
wneuper@5050
   117
        val s =
wneuper@5050
   118
          if (n == 0) ""
wneuper@5050
   119
          else if (n == 1) {
wneuper@5050
   120
            val c = text.charAt(i)
wneuper@5050
   121
            if (c < char_symbols.length) char_symbols(c)
wneuper@5050
   122
            else text.subSequence(i, i + n).toString
wneuper@5050
   123
          }
wneuper@5050
   124
          else text.subSequence(i, i + n).toString
wneuper@5050
   125
        i += n
wneuper@5050
   126
        s
wneuper@5050
   127
      }
wneuper@5050
   128
    }
wneuper@5050
   129
wneuper@5050
   130
  def explode(text: CharSequence): List[Symbol] = iterator(text).toList
wneuper@5050
   131
wneuper@5050
   132
  def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
wneuper@5050
   133
  {
wneuper@5050
   134
    var (line, column) = pos
wneuper@5050
   135
    for (sym <- iterator(text)) {
wneuper@5050
   136
      if (is_newline(sym)) { line += 1; column = 1 }
wneuper@5050
   137
      else column += 1
wneuper@5050
   138
    }
wneuper@5050
   139
    (line, column)
wneuper@5050
   140
  }
wneuper@5050
   141
wneuper@5050
   142
wneuper@5050
   143
  /* decoding offsets */
wneuper@5050
   144
wneuper@5050
   145
  object Index
wneuper@5050
   146
  {
wneuper@5050
   147
    private sealed case class Entry(chr: Int, sym: Int)
wneuper@5050
   148
wneuper@5050
   149
    val empty: Index = new Index(Nil)
wneuper@5050
   150
wneuper@5050
   151
    def apply(text: CharSequence): Index =
wneuper@5050
   152
    {
wneuper@5050
   153
      val matcher = new Matcher(text)
wneuper@5050
   154
      val buf = new mutable.ListBuffer[Entry]
wneuper@5050
   155
      var chr = 0
wneuper@5050
   156
      var sym = 0
wneuper@5050
   157
      while (chr < text.length) {
wneuper@5050
   158
        val n = matcher(chr, text.length)
wneuper@5050
   159
        chr += n
wneuper@5050
   160
        sym += 1
wneuper@5050
   161
        if (n > 1) buf += Entry(chr, sym)
wneuper@5050
   162
      }
wneuper@5050
   163
      if (buf.isEmpty) empty else new Index(buf.toList)
wneuper@5050
   164
    }
wneuper@5050
   165
  }
wneuper@5050
   166
wneuper@5050
   167
  final class Index private(entries: List[Index.Entry])
wneuper@5050
   168
  {
wneuper@5050
   169
    private val hash: Int = entries.hashCode
wneuper@5050
   170
    private val index: Array[Index.Entry] = entries.toArray
wneuper@5050
   171
wneuper@5050
   172
    def decode(symbol_offset: Offset): Text.Offset =
wneuper@5050
   173
    {
wneuper@5050
   174
      val sym = symbol_offset - 1
wneuper@5050
   175
      val end = index.length
wneuper@5050
   176
      @tailrec def bisect(a: Int, b: Int): Int =
wneuper@5050
   177
      {
wneuper@5050
   178
        if (a < b) {
wneuper@5050
   179
          val c = (a + b) / 2
wneuper@5050
   180
          if (sym < index(c).sym) bisect(a, c)
wneuper@5050
   181
          else if (c + 1 == end || sym < index(c + 1).sym) c
wneuper@5050
   182
          else bisect(c + 1, b)
wneuper@5050
   183
        }
wneuper@5050
   184
        else -1
wneuper@5050
   185
      }
wneuper@5050
   186
      val i = bisect(0, end)
wneuper@5050
   187
      if (i < 0) sym
wneuper@5050
   188
      else index(i).chr + sym - index(i).sym
wneuper@5050
   189
    }
wneuper@5050
   190
    def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
wneuper@5050
   191
wneuper@5050
   192
    override def hashCode: Int = hash
wneuper@5050
   193
    override def equals(that: Any): Boolean =
wneuper@5050
   194
      that match {
wneuper@5050
   195
        case other: Index => index.sameElements(other.index)
wneuper@5050
   196
        case _ => false
wneuper@5050
   197
      }
wneuper@5050
   198
  }
wneuper@5050
   199
wneuper@5050
   200
//\\----------------- FROM HERE ON TOO ADVANCED ----------------------------//
wneuper@5050
   201
////----------------- ADDITIONS TO Isabelle2016-1 FOR ISAC -----------------\\
wneuper@5050
   202
  
wneuper@5050
   203
  //                Symbol
wneuper@5050
   204
  def is_digit(sym: Char): Boolean = '0' <= sym && sym <= '9'
wneuper@5050
   205
  
wneuper@5050
   206
}