some support for partial scans with explicit context;
authorwenzelm
Thu, 16 Jun 2011 17:25:16 +0200
changeset 442850206466ee473
parent 44281 957617fe0765
child 44286 81517eed8a78
some support for partial scans with explicit context;
clarified junk vs. junk1;
src/Pure/General/scan.scala
src/Pure/Isar/outer_syntax.scala
     1.1 --- a/src/Pure/General/scan.scala	Thu Jun 16 11:59:29 2011 +0200
     1.2 +++ b/src/Pure/General/scan.scala	Thu Jun 16 17:25:16 2011 +0200
     1.3 @@ -18,6 +18,16 @@
     1.4  
     1.5  object Scan
     1.6  {
     1.7 +  /** context of partial scans **/
     1.8 +
     1.9 +  sealed abstract class Context
    1.10 +  case object Finished extends Context
    1.11 +  case class Quoted(quote: String) extends Context
    1.12 +  case object Verbatim extends Context
    1.13 +  case class Comment(depth: Int) extends Context
    1.14 +
    1.15 +
    1.16 +
    1.17    /** Lexicon -- position tree **/
    1.18  
    1.19    object Lexicon
    1.20 @@ -116,6 +126,12 @@
    1.21      override val whiteSpace = "".r
    1.22  
    1.23  
    1.24 +    /* optional termination */
    1.25 +
    1.26 +    def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
    1.27 +      p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
    1.28 +
    1.29 +
    1.30      /* keywords from lexicon */
    1.31  
    1.32      def keyword: Parser[String] = new Parser[String]
    1.33 @@ -178,12 +194,15 @@
    1.34  
    1.35      /* quoted strings */
    1.36  
    1.37 +    private def quoted_body(quote: String): Parser[String] =
    1.38 +    {
    1.39 +      rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
    1.40 +        (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
    1.41 +    }
    1.42 +
    1.43      private def quoted(quote: String): Parser[String] =
    1.44      {
    1.45 -      quote ~
    1.46 -        rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
    1.47 -          (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
    1.48 -      quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
    1.49 +      quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
    1.50      }.named("quoted")
    1.51  
    1.52      def quoted_content(quote: String, source: String): String =
    1.53 @@ -199,13 +218,30 @@
    1.54        else body
    1.55      }
    1.56  
    1.57 +    def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] =
    1.58 +    {
    1.59 +      ctxt match {
    1.60 +        case Finished =>
    1.61 +          quote ~ quoted_body(quote) ~ opt_term(quote) ^^
    1.62 +            { case x ~ y ~ Some(z) => (x + y + z, Finished)
    1.63 +              case x ~ y ~ None => (x + y, Quoted(quote)) }
    1.64 +        case Quoted(q) if q == quote =>
    1.65 +          quoted_body(quote) ~ opt_term(quote) ^^
    1.66 +            { case x ~ Some(y) => (x + y, Finished)
    1.67 +              case x ~ None => (x, ctxt) }
    1.68 +        case _ => failure("")
    1.69 +      }
    1.70 +    }.named("quoted_context")
    1.71 +
    1.72  
    1.73      /* verbatim text */
    1.74  
    1.75 +    private def verbatim_body: Parser[String] =
    1.76 +      rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
    1.77 +
    1.78      private def verbatim: Parser[String] =
    1.79      {
    1.80 -      "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^
    1.81 -        { case x ~ ys ~ z => x + ys.mkString + z }
    1.82 +      "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
    1.83      }.named("verbatim")
    1.84  
    1.85      def verbatim_content(source: String): String =
    1.86 @@ -214,6 +250,21 @@
    1.87        source.substring(2, source.length - 2)
    1.88      }
    1.89  
    1.90 +    def verbatim_context(ctxt: Context): Parser[(String, Context)] =
    1.91 +    {
    1.92 +      ctxt match {
    1.93 +        case Finished =>
    1.94 +          "{*" ~ verbatim_body ~ opt_term("*}") ^^
    1.95 +            { case x ~ y ~ Some(z) => (x + y + z, Finished)
    1.96 +              case x ~ y ~ None => (x + y, Verbatim) }
    1.97 +        case Verbatim =>
    1.98 +          verbatim_body ~ opt_term("*}") ^^
    1.99 +            { case x ~ Some(y) => (x + y, Finished)
   1.100 +              case x ~ None => (x, Verbatim) }
   1.101 +        case _ => failure("")
   1.102 +      }
   1.103 +    }.named("verbatim_context")
   1.104 +
   1.105  
   1.106      /* nested comments */
   1.107  
   1.108 @@ -254,7 +305,18 @@
   1.109  
   1.110      /* outer syntax tokens */
   1.111  
   1.112 -    def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
   1.113 +    private def delimited_token: Parser[Token] =
   1.114 +    {
   1.115 +      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
   1.116 +      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
   1.117 +      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
   1.118 +      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
   1.119 +
   1.120 +      string | (alt_string | (verb | cmt))
   1.121 +    }
   1.122 +
   1.123 +    private def other_token(symbols: Symbol.Interpretation, is_command: String => Boolean)
   1.124 +      : Parser[Token] =
   1.125      {
   1.126        val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
   1.127        val nat = many1(symbols.is_digit)
   1.128 @@ -278,23 +340,43 @@
   1.129  
   1.130        val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
   1.131  
   1.132 -      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
   1.133 -      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
   1.134 +      // FIXME check
   1.135 +      val junk = many(sym => !(symbols.is_blank(sym)))
   1.136 +      val junk1 = many1(sym => !(symbols.is_blank(sym)))
   1.137  
   1.138 -      val junk = many1(sym => !(symbols.is_blank(sym)))
   1.139        val bad_delimiter =
   1.140          ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
   1.141 -      val bad = junk ^^ (x => Token(Token.Kind.UNPARSED, x))
   1.142 +      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
   1.143  
   1.144 +      val command_keyword =
   1.145 +        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
   1.146  
   1.147 -      /* tokens */
   1.148 +      space | (bad_delimiter |
   1.149 +        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
   1.150 +          command_keyword) | bad))
   1.151 +    }
   1.152  
   1.153 -      (space | (string | (alt_string | (verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
   1.154 -        comment ^^ (x => Token(Token.Kind.COMMENT, x)))))) |
   1.155 -      bad_delimiter |
   1.156 -      ((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
   1.157 -        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))) |
   1.158 -      bad
   1.159 +    def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
   1.160 +      delimited_token | other_token(symbols, is_command)
   1.161 +
   1.162 +    def token_context(symbols: Symbol.Interpretation, is_command: String => Boolean, ctxt: Context)
   1.163 +      : Parser[(Token, Context)] =
   1.164 +    {
   1.165 +      val string =
   1.166 +        quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
   1.167 +      val alt_string =
   1.168 +        quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
   1.169 +      val verb =
   1.170 +        verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
   1.171 +      // FIXME comment_context
   1.172 +      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
   1.173 +      val other: Parser[(Token, Context)] =
   1.174 +        ctxt match {
   1.175 +          case Finished => (cmt | other_token(symbols, is_command)) ^^ { case x => (x, Finished) }
   1.176 +          case _ => failure("")
   1.177 +        }
   1.178 +
   1.179 +      string | (alt_string | (verb | other))
   1.180      }
   1.181    }
   1.182  
     2.1 --- a/src/Pure/Isar/outer_syntax.scala	Thu Jun 16 11:59:29 2011 +0200
     2.2 +++ b/src/Pure/Isar/outer_syntax.scala	Thu Jun 16 17:25:16 2011 +0200
     2.3 @@ -8,6 +8,7 @@
     2.4  
     2.5  
     2.6  import scala.util.parsing.input.{Reader, CharSequenceReader}
     2.7 +import scala.collection.mutable
     2.8  
     2.9  
    2.10  class Outer_Syntax(symbols: Symbol.Interpretation)
    2.11 @@ -73,4 +74,21 @@
    2.12  
    2.13    def scan(input: CharSequence): List[Token] =
    2.14      scan(new CharSequenceReader(input))
    2.15 +
    2.16 +  def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
    2.17 +  {
    2.18 +    import lexicon._
    2.19 +
    2.20 +    var in: Reader[Char] = new CharSequenceReader(input)
    2.21 +    val toks = new mutable.ListBuffer[Token]
    2.22 +    var ctxt = context
    2.23 +    while (!in.atEnd) {
    2.24 +      parse(token_context(symbols, is_command, ctxt), in) match {
    2.25 +        case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
    2.26 +        case NoSuccess(_, rest) =>
    2.27 +          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
    2.28 +      }
    2.29 +    }
    2.30 +    (toks.toList, ctxt)
    2.31 +  }
    2.32  }