1.1 --- a/src/Pure/General/scan.scala Thu Jun 16 11:59:29 2011 +0200
1.2 +++ b/src/Pure/General/scan.scala Thu Jun 16 17:25:16 2011 +0200
1.3 @@ -18,6 +18,16 @@
1.4
1.5 object Scan
1.6 {
1.7 + /** context of partial scans **/
1.8 +
1.9 + sealed abstract class Context
1.10 + case object Finished extends Context
1.11 + case class Quoted(quote: String) extends Context
1.12 + case object Verbatim extends Context
1.13 + case class Comment(depth: Int) extends Context
1.14 +
1.15 +
1.16 +
1.17 /** Lexicon -- position tree **/
1.18
1.19 object Lexicon
1.20 @@ -116,6 +126,12 @@
1.21 override val whiteSpace = "".r
1.22
1.23
1.24 + /* optional termination */
1.25 +
1.26 + def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
1.27 + p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
1.28 +
1.29 +
1.30 /* keywords from lexicon */
1.31
1.32 def keyword: Parser[String] = new Parser[String]
1.33 @@ -178,12 +194,15 @@
1.34
1.35 /* quoted strings */
1.36
1.37 + private def quoted_body(quote: String): Parser[String] =
1.38 + {
1.39 + rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
1.40 + (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
1.41 + }
1.42 +
1.43 private def quoted(quote: String): Parser[String] =
1.44 {
1.45 - quote ~
1.46 - rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
1.47 - (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
1.48 - quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
1.49 + quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
1.50 }.named("quoted")
1.51
1.52 def quoted_content(quote: String, source: String): String =
1.53 @@ -199,13 +218,30 @@
1.54 else body
1.55 }
1.56
1.57 + def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] =
1.58 + {
1.59 + ctxt match {
1.60 + case Finished =>
1.61 + quote ~ quoted_body(quote) ~ opt_term(quote) ^^
1.62 + { case x ~ y ~ Some(z) => (x + y + z, Finished)
1.63 + case x ~ y ~ None => (x + y, Quoted(quote)) }
1.64 + case Quoted(q) if q == quote =>
1.65 + quoted_body(quote) ~ opt_term(quote) ^^
1.66 + { case x ~ Some(y) => (x + y, Finished)
1.67 + case x ~ None => (x, ctxt) }
1.68 + case _ => failure("")
1.69 + }
1.70 + }.named("quoted_context")
1.71 +
1.72
1.73 /* verbatim text */
1.74
1.75 + private def verbatim_body: Parser[String] =
1.76 + rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
1.77 +
1.78 private def verbatim: Parser[String] =
1.79 {
1.80 - "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^
1.81 - { case x ~ ys ~ z => x + ys.mkString + z }
1.82 + "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
1.83 }.named("verbatim")
1.84
1.85 def verbatim_content(source: String): String =
1.86 @@ -214,6 +250,21 @@
1.87 source.substring(2, source.length - 2)
1.88 }
1.89
1.90 + def verbatim_context(ctxt: Context): Parser[(String, Context)] =
1.91 + {
1.92 + ctxt match {
1.93 + case Finished =>
1.94 + "{*" ~ verbatim_body ~ opt_term("*}") ^^
1.95 + { case x ~ y ~ Some(z) => (x + y + z, Finished)
1.96 + case x ~ y ~ None => (x + y, Verbatim) }
1.97 + case Verbatim =>
1.98 + verbatim_body ~ opt_term("*}") ^^
1.99 + { case x ~ Some(y) => (x + y, Finished)
1.100 + case x ~ None => (x, Verbatim) }
1.101 + case _ => failure("")
1.102 + }
1.103 + }.named("verbatim_context")
1.104 +
1.105
1.106 /* nested comments */
1.107
1.108 @@ -254,7 +305,18 @@
1.109
1.110 /* outer syntax tokens */
1.111
1.112 - def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
1.113 + private def delimited_token: Parser[Token] =
1.114 + {
1.115 + val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
1.116 + val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
1.117 + val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
1.118 + val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
1.119 +
1.120 + string | (alt_string | (verb | cmt))
1.121 + }
1.122 +
1.123 + private def other_token(symbols: Symbol.Interpretation, is_command: String => Boolean)
1.124 + : Parser[Token] =
1.125 {
1.126 val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
1.127 val nat = many1(symbols.is_digit)
1.128 @@ -278,23 +340,43 @@
1.129
1.130 val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
1.131
1.132 - val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
1.133 - val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
1.134 + // FIXME check
1.135 + val junk = many(sym => !(symbols.is_blank(sym)))
1.136 + val junk1 = many1(sym => !(symbols.is_blank(sym)))
1.137
1.138 - val junk = many1(sym => !(symbols.is_blank(sym)))
1.139 val bad_delimiter =
1.140 ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
1.141 - val bad = junk ^^ (x => Token(Token.Kind.UNPARSED, x))
1.142 + val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
1.143
1.144 + val command_keyword =
1.145 + keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
1.146
1.147 - /* tokens */
1.148 + space | (bad_delimiter |
1.149 + (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
1.150 + command_keyword) | bad))
1.151 + }
1.152
1.153 - (space | (string | (alt_string | (verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
1.154 - comment ^^ (x => Token(Token.Kind.COMMENT, x)))))) |
1.155 - bad_delimiter |
1.156 - ((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
1.157 - keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))) |
1.158 - bad
1.159 + def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
1.160 + delimited_token | other_token(symbols, is_command)
1.161 +
1.162 + def token_context(symbols: Symbol.Interpretation, is_command: String => Boolean, ctxt: Context)
1.163 + : Parser[(Token, Context)] =
1.164 + {
1.165 + val string =
1.166 + quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
1.167 + val alt_string =
1.168 + quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
1.169 + val verb =
1.170 + verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
1.171 + // FIXME comment_context
1.172 + val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
1.173 + val other: Parser[(Token, Context)] =
1.174 + ctxt match {
1.175 + case Finished => (cmt | other_token(symbols, is_command)) ^^ { case x => (x, Finished) }
1.176 + case _ => failure("")
1.177 + }
1.178 +
1.179 + string | (alt_string | (verb | other))
1.180 }
1.181 }
1.182
2.1 --- a/src/Pure/Isar/outer_syntax.scala Thu Jun 16 11:59:29 2011 +0200
2.2 +++ b/src/Pure/Isar/outer_syntax.scala Thu Jun 16 17:25:16 2011 +0200
2.3 @@ -8,6 +8,7 @@
2.4
2.5
2.6 import scala.util.parsing.input.{Reader, CharSequenceReader}
2.7 +import scala.collection.mutable
2.8
2.9
2.10 class Outer_Syntax(symbols: Symbol.Interpretation)
2.11 @@ -73,4 +74,21 @@
2.12
2.13 def scan(input: CharSequence): List[Token] =
2.14 scan(new CharSequenceReader(input))
2.15 +
2.16 + def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
2.17 + {
2.18 + import lexicon._
2.19 +
2.20 + var in: Reader[Char] = new CharSequenceReader(input)
2.21 + val toks = new mutable.ListBuffer[Token]
2.22 + var ctxt = context
2.23 + while (!in.atEnd) {
2.24 + parse(token_context(symbols, is_command, ctxt), in) match {
2.25 + case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
2.26 + case NoSuccess(_, rest) =>
2.27 + error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
2.28 + }
2.29 + }
2.30 + (toks.toList, ctxt)
2.31 + }
2.32 }