1.1 --- a/src/Tools/isac/BridgeJEdit/fdl_lexer.ML Tue Aug 23 18:05:08 2022 +0200
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,362 +0,0 @@
1.4 -(* Title: HOL/SPARK/Tools/fdl_lexer.ML
1.5 - Author: Stefan Berghofer
1.6 - Copyright: secunet Security Networks AG
1.7 -
1.8 -Lexical analyzer for fdl files.
1.9 -*)
1.10 -
1.11 -signature FDL_LEXER =
1.12 -sig
1.13 - datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
1.14 - datatype T = Token of kind * string * Position.T;
1.15 - type chars
1.16 - type banner
1.17 - type date
1.18 - type time
1.19 - val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
1.20 - Position.T -> string -> 'a * T list
1.21 - val position_of: T -> Position.T
1.22 - val pos_of: T -> string
1.23 - val is_eof: T -> bool
1.24 - val stopper: T Scan.stopper
1.25 - val kind_of: T -> kind
1.26 - val content_of: T -> string
1.27 - val unparse: T -> string
1.28 - val is_proper: T -> bool
1.29 - val is_digit: string -> bool
1.30 - val is_char_eof: string * 'a -> bool
1.31 - val c_comment: chars -> T * chars
1.32 - val curly_comment: chars -> T * chars
1.33 - val percent_comment: chars -> T * chars
1.34 - val vcg_header: chars -> (banner * (date * time) option) * chars
1.35 - val siv_header: chars ->
1.36 - (banner * (date * time) * (date * time) * (string * string)) * chars (*.. NOT [], but ..
1.37 - (^^^^^^^^^^^^^^^ header ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^) ^^^^^ ..trailing line=13..117 *)
1.38 - val explode_pos: string -> Position.T -> chars
1.39 - val leq_token: T * T -> bool
1.40 - val lexicon: Scan.lexicon
1.41 - val identifier: chars -> chars * chars
1.42 - val long_identifier: chars -> chars * chars
1.43 - val banner: chars -> (string * string * string) * chars
1.44 - val date: chars -> (string * string * string) * chars;
1.45 - val whitespace: chars -> chars * chars
1.46 - val whitespace1: chars -> chars * chars
1.47 - val keyword: string -> chars -> chars * chars
1.48 - val number: chars -> chars * chars
1.49 - val any: chars -> (string * Position.T) * chars
1.50 - val any_line: chars -> string * chars;
1.51 - val !!! : string -> (chars -> 'a) -> chars -> 'a
1.52 - val $$$ : string -> chars -> chars * chars
1.53 - val scan: (chars -> 'a * chars) -> (chars -> T * chars) -> chars -> ('a * T list) * chars;
1.54 - val char_stopper: (string * Position.T) Scan.stopper
1.55 - val make_token: kind -> chars -> T
1.56 -end;
1.57 -
1.58 -structure Fdl_Lexer(**): FDL_LEXER(**) =
1.59 -struct
1.60 -
1.61 -(** tokens **)
1.62 -
1.63 -datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
1.64 -
1.65 -datatype T = Token of kind * string * Position.T;
1.66 -(*for Formalise.T ^^^^ NOT required?*)
1.67 -
1.68 -fun make_token k xs = Token (k, implode (map fst xs),
1.69 - case xs of [] => Position.none | (_, p) :: _ => p);
1.70 -
1.71 -fun kind_of (Token (k, _, _)) = k;
1.72 -
1.73 -fun is_proper (Token (Comment, _, _)) = false
1.74 - | is_proper _ = true;
1.75 -
1.76 -fun content_of (Token (_, s, _)) = s;
1.77 -
1.78 -fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
1.79 - | unparse (Token (_, s, _)) = s;
1.80 -
1.81 -fun position_of (Token (_, _, pos)) = pos;
1.82 -
1.83 -val pos_of = Position.here o position_of;
1.84 -
1.85 -fun is_eof (Token (EOF, _, _)) = true
1.86 - | is_eof _ = false;
1.87 -
1.88 -fun mk_eof pos = Token (EOF, "", pos);
1.89 -val eof = mk_eof Position.none;
1.90 -
1.91 -val stopper =
1.92 - Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
1.93 -
1.94 -fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
1.95 -
1.96 -
1.97 -(** split up a string into a list of characters (with positions) **)
1.98 -
1.99 -type chars = (string * Position.T) list;
1.100 -
1.101 -fun is_char_eof ("", _) = true
1.102 - | is_char_eof _ = false;
1.103 -
1.104 -val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
1.105 -
1.106 -fun symbol (x : string, _ : Position.T) = x;
1.107 -
1.108 -(* convert string s to chars ("", pos) *)
1.109 -fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
1.110 - ((x, pos), Position.symbol x pos)) (raw_explode s) pos);
1.111 -
1.112 -
1.113 -(** scanners **)
1.114 -
1.115 -val any = Scan.one (not o Scan.is_stopper char_stopper);
1.116 -
1.117 -fun prfx [] = Scan.succeed []
1.118 - | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
1.119 -
1.120 -val $$$ = prfx o raw_explode;
1.121 -
1.122 -val lexicon = Scan.make_lexicon (map raw_explode
1.123 - ["rule_family",
1.124 - "For",
1.125 - ":",
1.126 - "[",
1.127 - "]",
1.128 - "(",
1.129 - ")",
1.130 - ",",
1.131 - "&",
1.132 - ";",
1.133 - "=",
1.134 - ".",
1.135 - "..",
1.136 - "requires",
1.137 - "may_be_replaced_by",
1.138 - "may_be_deduced",
1.139 - "may_be_deduced_from",
1.140 - "are_interchangeable",
1.141 - "if",
1.142 - "end",
1.143 - "function",
1.144 - "procedure",
1.145 - "type",
1.146 - "var",
1.147 - "const",
1.148 - "array",
1.149 - "record",
1.150 - ":=",
1.151 - "of",
1.152 - "**",
1.153 - "*",
1.154 - "/",
1.155 - "div",
1.156 - "mod",
1.157 - "+",
1.158 - "-",
1.159 - "<>",
1.160 - "<",
1.161 - ">",
1.162 - "<=",
1.163 - ">=",
1.164 - "<->",
1.165 - "->",
1.166 - "not",
1.167 - "and",
1.168 - "or",
1.169 - "for_some",
1.170 - "for_all",
1.171 - "***",
1.172 - "!!!",
1.173 - "element",
1.174 - "update",
1.175 - "pending"]);
1.176 -
1.177 -fun keyword s = Scan.literal lexicon :|--
1.178 - (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
1.179 -
1.180 -fun is_digit x = "0" <= x andalso x <= "9";
1.181 -fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
1.182 -val is_underscore = equal "_";
1.183 -val is_tilde = equal "~";
1.184 -val is_newline = equal "\n";
1.185 -val is_tab = equal "\t";
1.186 -val is_space = equal " ";
1.187 -val is_whitespace = is_space orf is_tab orf is_newline;
1.188 -val is_whitespace' = is_space orf is_tab;
1.189 -
1.190 -val number = Scan.many1 (is_digit o symbol);
1.191 -
1.192 -val identifier =
1.193 - Scan.one (is_alpha o symbol) :::
1.194 - Scan.many
1.195 - ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
1.196 - Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
1.197 -
1.198 -val long_identifier =
1.199 - identifier @@@ Scan.repeats1 ($$$ "." @@@ identifier);
1.200 -
1.201 -val whitespace = Scan.many (is_whitespace o symbol);
1.202 -val whitespace1 = Scan.many1 (is_whitespace o symbol);
1.203 -val whitespace' = Scan.many (is_whitespace' o symbol);
1.204 -val newline = Scan.one (is_newline o symbol);
1.205 -
1.206 -fun beginning n cs =
1.207 - let
1.208 - val drop_blanks = drop_suffix is_whitespace;
1.209 - val all_cs = drop_blanks cs;
1.210 - val dots = if length all_cs > n then " ..." else "";
1.211 - in
1.212 - (drop_blanks (take n all_cs)
1.213 - |> map (fn c => if is_whitespace c then " " else c)
1.214 - |> implode) ^ dots
1.215 - end;
1.216 -
1.217 -fun !!! text scan =
1.218 - let
1.219 - fun get_pos [] = " (end-of-input)"
1.220 - | get_pos ((_, pos) :: _) = Position.here pos;
1.221 -
1.222 - fun err (syms, msg) = fn () =>
1.223 - text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
1.224 - (case msg of NONE => "" | SOME m => "\n" ^ m ());
1.225 - in Scan.!! err scan end;
1.226 -
1.227 -val any_line' =
1.228 - Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
1.229 -
1.230 -val any_line = whitespace' |-- any_line' --|
1.231 - newline >> (implode o map symbol);
1.232 -
1.233 -fun gen_comment a b = $$$ a |-- !!! "unclosed comment"
1.234 - (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
1.235 -
1.236 -val c_comment = gen_comment "/*" "*/";
1.237 -fun c_comment chars =
1.238 - let
1.239 -(** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.c_comment called with",
1.240 - b_lenght_chars = length chars, chars = chars};( **)
1.241 - val xxx as (redex, toks) = (gen_comment "/*" "*/") chars
1.242 -(** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.c_comment \<rightarrow>",
1.243 - b_lenght_chars = length toks, res = xxx};( **)
1.244 - in xxx end;
1.245 -val curly_comment = gen_comment "{" "}";
1.246 -
1.247 -val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
1.248 -
1.249 -fun repeatn 0 _ = Scan.succeed []
1.250 - | repeatn n p = Scan.one p ::: repeatn (n-1) p;
1.251 -
1.252 -
1.253 -(** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
1.254 -
1.255 -type banner = string * string * string;
1.256 -type date = string * string * string;
1.257 -type time = string * string * string * string option;
1.258 -
1.259 -val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
1.260 -
1.261 -fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
1.262 -fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
1.263 -
1.264 -val time =
1.265 - digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
1.266 - Scan.option ($$$ "." |-- digitn 2) >>
1.267 - (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
1.268 -
1.269 -val date =
1.270 - digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
1.271 - (fn ((d, m), y) => (d, m, y));
1.272 -
1.273 -val banner =
1.274 - whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
1.275 - (any_line -- any_line -- any_line >>
1.276 -(*WN:^^^^^^^^ ^^^^^^^^ ^^^^^^^^ 3 lines ending with \n *)
1.277 - (fn ((l1, l2), l3) => (l1, l2, l3))) --|
1.278 - whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
1.279 -
1.280 -val vcg_header = banner -- Scan.option (whitespace |--
1.281 - $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
1.282 - Scan.option ($$$ "TIME :" -- whitespace) -- time);
1.283 -
1.284 -val siv_header = banner --| whitespace --
1.285 - ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
1.286 - whitespace --
1.287 - ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
1.288 - newline --| newline -- (any_line -- any_line) >>
1.289 - (fn (((b, c), s), ls) => (b, c, s, ls));
1.290 -
1.291 -(*WN: print ---vvvvv*)
1.292 -fun siv_header chars =
1.293 - let
1.294 -(** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.siv_header called with",
1.295 - b_lenght_chars = length chars, chars = chars};( **)
1.296 - val xxx as (redex, toks) =
1.297 - (
1.298 - banner --| whitespace --
1.299 - ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
1.300 - whitespace --
1.301 - ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
1.302 - newline --| newline -- (any_line -- any_line) >>
1.303 - (fn (((b, c), s), ls) => (b, c, s, ls))
1.304 - ) chars;
1.305 -(** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.siv_header \<rightarrow>",
1.306 - b_lenght_chars = length toks, res = xxx};( **)
1.307 - in xxx end;
1.308 -
1.309 -(** the main tokenizer **)
1.310 -
1.311 -fun scan header comment =
1.312 - !!! "bad header" header --| whitespace --
1.313 - Scan.repeat (Scan.unless (Scan.one is_char_eof)
1.314 - (!!! "bad input"
1.315 - ( comment
1.316 - || (keyword "For" -- whitespace1) |--
1.317 - Scan.repeat1 (Scan.unless (keyword ":") any) --|
1.318 - keyword ":" >> make_token Traceability
1.319 - || Scan.max leq_token
1.320 - (Scan.literal lexicon >> make_token Keyword)
1.321 - ( long_identifier >> make_token Long_Ident
1.322 - || identifier >> make_token Ident)
1.323 - || number >> make_token Number) --|
1.324 - whitespace) );
1.325 -fun scan header comment chars =
1.326 - let
1.327 - (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.scan called with",
1.328 - b_lenght_chars = length chars, chars = chars};( **)
1.329 - val xxx as (redex, toks) =
1.330 - (!!! "bad header" header --| whitespace --
1.331 - Scan.repeat (Scan.unless (Scan.one is_char_eof)
1.332 - (!!! "bad input"
1.333 - ( comment
1.334 - || (keyword "For" -- whitespace1) |--
1.335 - Scan.repeat1 (Scan.unless (keyword ":") any) --|
1.336 - keyword ":" >> make_token Traceability
1.337 - || Scan.max leq_token
1.338 - (Scan.literal lexicon >> make_token Keyword)
1.339 - ( long_identifier >> make_token Long_Ident
1.340 - || identifier >> make_token Ident)
1.341 - || number >> make_token Number) --|
1.342 - whitespace))) chars
1.343 - (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.scan \<rightarrow>",
1.344 - b_lenght_chars = length toks, res = xxx};( **)
1.345 - in xxx end;
1.346 -
1.347 -fun tokenize header comment pos s =
1.348 - fst (Scan.finite char_stopper
1.349 - (Scan.error (scan header comment)) (explode_pos s pos));
1.350 -
1.351 -fun tokenize header(*= Fdl_Lexer.siv_header*) comment pos s =
1.352 -((** )@{print} {a = "//--- ###I Fdl_Lexer.tokenize called with", header = header,
1.353 - comment = comment, pos = pos, s = s};( **)
1.354 - let
1.355 -(** )val _ = @{print} {a = "###I Fdl_Lexer.tokenize: explode_pos", res = explode_pos s pos};( **)
1.356 -(*convert string s to chars ("", pos) is ---vvvvvvvvvvv ^^^^^^^^^^^*)
1.357 - val xxx as (redex, toks) =
1.358 - fst (Scan.finite char_stopper
1.359 - (Scan.error (scan header comment)) (explode_pos s pos))
1.360 -(** )val _ = @{print} {a = "\\--- ###I Fdl_Lexer.tokenize \<rightarrow>",
1.361 - b_lenght_chars = length toks, res = xxx};( **)
1.362 - in xxx end
1.363 -);
1.364 -
1.365 -end;