src/Tools/isac/BridgeJEdit/fdl_lexer.ML
changeset 60536 5038589d3033
parent 60535 d5c670beaba4
child 60537 f0305aeb010b
     1.1 --- a/src/Tools/isac/BridgeJEdit/fdl_lexer.ML	Tue Aug 23 18:05:08 2022 +0200
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,362 +0,0 @@
     1.4 -(*  Title:      HOL/SPARK/Tools/fdl_lexer.ML
     1.5 -    Author:     Stefan Berghofer
     1.6 -    Copyright:  secunet Security Networks AG
     1.7 -
     1.8 -Lexical analyzer for fdl files.
     1.9 -*)
    1.10 -
    1.11 -signature FDL_LEXER =
    1.12 -sig
    1.13 -  datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
    1.14 -  datatype T = Token of kind * string * Position.T;
    1.15 -  type chars
    1.16 -  type banner
    1.17 -  type date
    1.18 -  type time
    1.19 -  val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
    1.20 -    Position.T -> string -> 'a * T list
    1.21 -  val position_of: T -> Position.T
    1.22 -  val pos_of: T -> string
    1.23 -  val is_eof: T -> bool
    1.24 -  val stopper: T Scan.stopper
    1.25 -  val kind_of: T -> kind
    1.26 -  val content_of: T -> string
    1.27 -  val unparse: T -> string
    1.28 -  val is_proper: T -> bool
    1.29 -  val is_digit: string -> bool
    1.30 -  val is_char_eof: string * 'a -> bool
    1.31 -  val c_comment: chars -> T * chars
    1.32 -  val curly_comment: chars -> T * chars
    1.33 -  val percent_comment: chars -> T * chars
    1.34 -  val vcg_header: chars -> (banner * (date * time) option) * chars
    1.35 -  val siv_header: chars ->
    1.36 -    (banner * (date * time) * (date * time) * (string * string)) * chars (*.. NOT [], but ..
    1.37 -    (^^^^^^^^^^^^^^^ header ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^)   ^^^^^ ..trailing line=13..117 *)
    1.38 -  val explode_pos: string -> Position.T -> chars
    1.39 -  val leq_token: T * T -> bool
    1.40 -  val lexicon: Scan.lexicon
    1.41 -  val identifier: chars -> chars * chars
    1.42 -  val long_identifier: chars -> chars * chars
    1.43 -  val banner: chars -> (string * string * string) * chars
    1.44 -  val date: chars -> (string * string * string) * chars;
    1.45 -  val whitespace: chars -> chars * chars
    1.46 -  val whitespace1: chars -> chars * chars
    1.47 -  val keyword: string -> chars -> chars * chars
    1.48 -  val number: chars -> chars * chars
    1.49 -  val any: chars -> (string * Position.T) * chars
    1.50 -  val any_line: chars -> string * chars;
    1.51 -  val !!! : string -> (chars -> 'a) -> chars -> 'a
    1.52 -  val $$$ : string -> chars -> chars * chars
    1.53 -  val scan: (chars -> 'a * chars) -> (chars -> T * chars) -> chars -> ('a * T list) * chars;
    1.54 -  val char_stopper: (string * Position.T) Scan.stopper
    1.55 -  val make_token: kind -> chars -> T
    1.56 -end;
    1.57 -
    1.58 -structure Fdl_Lexer(**): FDL_LEXER(**) =
    1.59 -struct
    1.60 -
    1.61 -(** tokens **)
    1.62 -
    1.63 -datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
    1.64 -
    1.65 -datatype T = Token of kind * string * Position.T;
    1.66 -(*for Formalise.T     ^^^^ NOT required?*)
    1.67 -
    1.68 -fun make_token k xs = Token (k, implode (map fst xs),
    1.69 -  case xs of [] => Position.none | (_, p) :: _ => p);
    1.70 -
    1.71 -fun kind_of (Token (k, _, _)) = k;
    1.72 -
    1.73 -fun is_proper (Token (Comment, _, _)) = false
    1.74 -  | is_proper _ = true;
    1.75 -
    1.76 -fun content_of (Token (_, s, _)) = s;
    1.77 -
    1.78 -fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
    1.79 -  | unparse (Token (_, s, _)) = s;
    1.80 -
    1.81 -fun position_of (Token (_, _, pos)) = pos;
    1.82 -
    1.83 -val pos_of = Position.here o position_of;
    1.84 -
    1.85 -fun is_eof (Token (EOF, _, _)) = true
    1.86 -  | is_eof _ = false;
    1.87 -
    1.88 -fun mk_eof pos = Token (EOF, "", pos);
    1.89 -val eof = mk_eof Position.none;
    1.90 -
    1.91 -val stopper =
    1.92 -  Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
    1.93 -
    1.94 -fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
    1.95 -
    1.96 -
    1.97 -(** split up a string into a list of characters (with positions) **)
    1.98 -
    1.99 -type chars = (string * Position.T) list;
   1.100 -
   1.101 -fun is_char_eof ("", _) = true
   1.102 -  | is_char_eof _ = false;
   1.103 -
   1.104 -val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
   1.105 -
   1.106 -fun symbol (x : string, _ : Position.T) = x;
   1.107 -
   1.108 -(* convert string s to chars ("", pos) *)
   1.109 -fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
   1.110 -  ((x, pos), Position.symbol x pos)) (raw_explode s) pos);
   1.111 -
   1.112 -
   1.113 -(** scanners **)
   1.114 -
   1.115 -val any = Scan.one (not o Scan.is_stopper char_stopper);
   1.116 -
   1.117 -fun prfx [] = Scan.succeed []
   1.118 -  | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
   1.119 -
   1.120 -val $$$ = prfx o raw_explode;
   1.121 -
   1.122 -val lexicon = Scan.make_lexicon (map raw_explode
   1.123 -  ["rule_family",
   1.124 -   "For",
   1.125 -   ":",
   1.126 -   "[",
   1.127 -   "]",
   1.128 -   "(",
   1.129 -   ")",
   1.130 -   ",",
   1.131 -   "&",
   1.132 -   ";",
   1.133 -   "=",
   1.134 -   ".",
   1.135 -   "..",
   1.136 -   "requires",
   1.137 -   "may_be_replaced_by",
   1.138 -   "may_be_deduced",
   1.139 -   "may_be_deduced_from",
   1.140 -   "are_interchangeable",
   1.141 -   "if",
   1.142 -   "end",
   1.143 -   "function",
   1.144 -   "procedure",
   1.145 -   "type",
   1.146 -   "var",
   1.147 -   "const",
   1.148 -   "array",
   1.149 -   "record",
   1.150 -   ":=",
   1.151 -   "of",
   1.152 -   "**",
   1.153 -   "*",
   1.154 -   "/",
   1.155 -   "div",
   1.156 -   "mod",
   1.157 -   "+",
   1.158 -   "-",
   1.159 -   "<>",
   1.160 -   "<",
   1.161 -   ">",
   1.162 -   "<=",
   1.163 -   ">=",
   1.164 -   "<->",
   1.165 -   "->",
   1.166 -   "not",
   1.167 -   "and",
   1.168 -   "or",
   1.169 -   "for_some",
   1.170 -   "for_all",
   1.171 -   "***",
   1.172 -   "!!!",
   1.173 -   "element",
   1.174 -   "update",
   1.175 -   "pending"]);
   1.176 -
   1.177 -fun keyword s = Scan.literal lexicon :|--
   1.178 -  (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
   1.179 -
   1.180 -fun is_digit x = "0" <= x andalso x <= "9";
   1.181 -fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
   1.182 -val is_underscore = equal "_";
   1.183 -val is_tilde = equal "~";
   1.184 -val is_newline = equal "\n";
   1.185 -val is_tab = equal "\t";
   1.186 -val is_space = equal " ";
   1.187 -val is_whitespace = is_space orf is_tab orf is_newline;
   1.188 -val is_whitespace' = is_space orf is_tab;
   1.189 -
   1.190 -val number = Scan.many1 (is_digit o symbol);
   1.191 -
   1.192 -val identifier =
   1.193 -  Scan.one (is_alpha o symbol) :::
   1.194 -  Scan.many
   1.195 -    ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
   1.196 -   Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
   1.197 -
   1.198 -val long_identifier =
   1.199 -  identifier @@@ Scan.repeats1 ($$$ "." @@@ identifier);
   1.200 -
   1.201 -val whitespace = Scan.many (is_whitespace o symbol);
   1.202 -val whitespace1 = Scan.many1 (is_whitespace o symbol);
   1.203 -val whitespace' = Scan.many (is_whitespace' o symbol);
   1.204 -val newline = Scan.one (is_newline o symbol);
   1.205 -
   1.206 -fun beginning n cs =
   1.207 -  let
   1.208 -    val drop_blanks = drop_suffix is_whitespace;
   1.209 -    val all_cs = drop_blanks cs;
   1.210 -    val dots = if length all_cs > n then " ..." else "";
   1.211 -  in
   1.212 -    (drop_blanks (take n all_cs)
   1.213 -      |> map (fn c => if is_whitespace c then " " else c)
   1.214 -      |> implode) ^ dots
   1.215 -  end;
   1.216 -
   1.217 -fun !!! text scan =
   1.218 -  let
   1.219 -    fun get_pos [] = " (end-of-input)"
   1.220 -      | get_pos ((_, pos) :: _) = Position.here pos;
   1.221 -
   1.222 -    fun err (syms, msg) = fn () =>
   1.223 -      text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
   1.224 -        (case msg of NONE => "" | SOME m => "\n" ^ m ());
   1.225 -  in Scan.!! err scan end;
   1.226 -
   1.227 -val any_line' =
   1.228 -  Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
   1.229 -
   1.230 -val any_line = whitespace' |-- any_line' --|
   1.231 -  newline >> (implode o map symbol);
   1.232 -
   1.233 -fun gen_comment a b = $$$ a |-- !!! "unclosed comment"
   1.234 -  (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
   1.235 -
   1.236 -val c_comment = gen_comment "/*" "*/";
   1.237 -fun c_comment chars = 
   1.238 -  let
   1.239 -(** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.c_comment called with", 
   1.240 -      b_lenght_chars = length chars, chars = chars};( **)
   1.241 -    val xxx as (redex, toks) = (gen_comment "/*" "*/") chars
   1.242 -(** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.c_comment \<rightarrow>",
   1.243 -      b_lenght_chars = length toks, res = xxx};( **)
   1.244 -  in xxx end;
   1.245 -val curly_comment = gen_comment "{" "}";
   1.246 -
   1.247 -val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
   1.248 -
   1.249 -fun repeatn 0 _ = Scan.succeed []
   1.250 -  | repeatn n p = Scan.one p ::: repeatn (n-1) p;
   1.251 -
   1.252 -
   1.253 -(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
   1.254 -
   1.255 -type banner = string * string * string;
   1.256 -type date = string * string * string;
   1.257 -type time = string * string * string * string option;
   1.258 -
   1.259 -val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
   1.260 -
   1.261 -fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
   1.262 -fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
   1.263 -
   1.264 -val time =
   1.265 -  digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
   1.266 -  Scan.option ($$$ "." |-- digitn 2) >>
   1.267 -    (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
   1.268 -
   1.269 -val date =
   1.270 -  digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
   1.271 -    (fn ((d, m), y) => (d, m, y));
   1.272 -
   1.273 -val banner = 
   1.274 -  whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
   1.275 -    (any_line -- any_line -- any_line >>
   1.276 -(*WN:^^^^^^^^    ^^^^^^^^    ^^^^^^^^   3 lines ending with \n *)
   1.277 -       (fn ((l1, l2), l3) => (l1, l2, l3))) --|
   1.278 -    whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
   1.279 -
   1.280 -val vcg_header = banner -- Scan.option (whitespace |--
   1.281 -  $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
   1.282 -  Scan.option ($$$ "TIME :" -- whitespace) -- time);
   1.283 -
   1.284 -val siv_header = banner --| whitespace --
   1.285 -  ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   1.286 -  whitespace --
   1.287 -  ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   1.288 -  newline --| newline -- (any_line -- any_line) >>
   1.289 -    (fn (((b, c), s), ls) => (b, c, s, ls));
   1.290 -
   1.291 -(*WN: print ---vvvvv*)
   1.292 -fun siv_header chars =
   1.293 -  let
   1.294 -(** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.siv_header called with", 
   1.295 -      b_lenght_chars = length chars, chars = chars};( **)
   1.296 -    val xxx as (redex, toks) = 
   1.297 -      (
   1.298 -        banner --| whitespace --
   1.299 -        ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   1.300 -        whitespace --
   1.301 -        ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
   1.302 -        newline --| newline -- (any_line -- any_line) >>
   1.303 -          (fn (((b, c), s), ls) => (b, c, s, ls))
   1.304 -      ) chars;
   1.305 -(** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.siv_header \<rightarrow>",
   1.306 -      b_lenght_chars = length toks, res = xxx};( **)
   1.307 -  in xxx end;
   1.308 -
   1.309 -(** the main tokenizer **)
   1.310 -
   1.311 -fun scan header comment =
   1.312 -  !!! "bad header" header --| whitespace --
   1.313 -  Scan.repeat (Scan.unless (Scan.one is_char_eof)
   1.314 -    (!!! "bad input"
   1.315 -       (   comment
   1.316 -        || (keyword "For" -- whitespace1) |--
   1.317 -              Scan.repeat1 (Scan.unless (keyword ":") any) --|
   1.318 -              keyword ":" >> make_token Traceability
   1.319 -        || Scan.max leq_token
   1.320 -             (Scan.literal lexicon >> make_token Keyword)
   1.321 -             (   long_identifier >> make_token Long_Ident
   1.322 -              || identifier >> make_token Ident)
   1.323 -        || number >> make_token Number) --|
   1.324 -     whitespace) );
   1.325 -fun scan header comment chars =
   1.326 -  let
   1.327 -    (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.scan called with", 
   1.328 -      b_lenght_chars = length chars, chars = chars};( **)
   1.329 -    val xxx as (redex, toks) = 
   1.330 -      (!!! "bad header" header --| whitespace --
   1.331 -      Scan.repeat (Scan.unless (Scan.one is_char_eof)
   1.332 -        (!!! "bad input"
   1.333 -           (   comment
   1.334 -            || (keyword "For" -- whitespace1) |--
   1.335 -                  Scan.repeat1 (Scan.unless (keyword ":") any) --|
   1.336 -                  keyword ":" >> make_token Traceability             
   1.337 -            || Scan.max leq_token
   1.338 -                 (Scan.literal lexicon >> make_token Keyword)
   1.339 -                 (   long_identifier >> make_token Long_Ident
   1.340 -                  || identifier >> make_token Ident)
   1.341 -            || number >> make_token Number) --|
   1.342 -         whitespace))) chars
   1.343 -    (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.scan \<rightarrow>",
   1.344 -      b_lenght_chars = length toks, res = xxx};( **)
   1.345 -  in xxx end;
   1.346 -
   1.347 -fun tokenize header comment pos s =
   1.348 -  fst (Scan.finite char_stopper
   1.349 -    (Scan.error (scan header comment)) (explode_pos s pos));
   1.350 -
   1.351 -fun tokenize header(*= Fdl_Lexer.siv_header*) comment pos s =
   1.352 -((** )@{print} {a = "//--- ###I Fdl_Lexer.tokenize called with", header = header,
   1.353 -    comment = comment, pos = pos, s = s};( **)
   1.354 -  let
   1.355 -(** )val _ = @{print} {a = "###I Fdl_Lexer.tokenize: explode_pos", res = explode_pos s pos};( **)
   1.356 -(*convert string s to chars ("", pos) is ---vvvvvvvvvvv                  ^^^^^^^^^^^*)
   1.357 -    val xxx as (redex, toks) =
   1.358 -      fst (Scan.finite char_stopper
   1.359 -        (Scan.error (scan header comment)) (explode_pos s pos))
   1.360 -(** )val _ = @{print} {a = "\\--- ###I Fdl_Lexer.tokenize \<rightarrow>",
   1.361 -      b_lenght_chars = length toks, res = xxx};( **)
   1.362 -  in xxx end
   1.363 -);
   1.364 -
   1.365 -end;