src/Tools/isac/BridgeJEdit/fdl_lexer.ML
changeset 60536 5038589d3033
parent 60404 716f399db0a5
equal deleted inserted replaced
60535:d5c670beaba4 60536:5038589d3033
     1 (*  Title:      HOL/SPARK/Tools/fdl_lexer.ML
       
     2     Author:     Stefan Berghofer
       
     3     Copyright:  secunet Security Networks AG
       
     4 
       
     5 Lexical analyzer for fdl files.
       
     6 *)
       
     7 
       
     8 signature FDL_LEXER =
       
     9 sig
       
    10   datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF
       
    11   datatype T = Token of kind * string * Position.T;
       
    12   type chars
       
    13   type banner
       
    14   type date
       
    15   type time
       
    16   val tokenize: (chars -> 'a * chars) -> (chars -> T * chars) ->
       
    17     Position.T -> string -> 'a * T list
       
    18   val position_of: T -> Position.T
       
    19   val pos_of: T -> string
       
    20   val is_eof: T -> bool
       
    21   val stopper: T Scan.stopper
       
    22   val kind_of: T -> kind
       
    23   val content_of: T -> string
       
    24   val unparse: T -> string
       
    25   val is_proper: T -> bool
       
    26   val is_digit: string -> bool
       
    27   val is_char_eof: string * 'a -> bool
       
    28   val c_comment: chars -> T * chars
       
    29   val curly_comment: chars -> T * chars
       
    30   val percent_comment: chars -> T * chars
       
    31   val vcg_header: chars -> (banner * (date * time) option) * chars
       
    32   val siv_header: chars ->
       
    33     (banner * (date * time) * (date * time) * (string * string)) * chars (*.. NOT [], but ..
       
    34     (^^^^^^^^^^^^^^^ header ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^)   ^^^^^ ..trailing line=13..117 *)
       
    35   val explode_pos: string -> Position.T -> chars
       
    36   val leq_token: T * T -> bool
       
    37   val lexicon: Scan.lexicon
       
    38   val identifier: chars -> chars * chars
       
    39   val long_identifier: chars -> chars * chars
       
    40   val banner: chars -> (string * string * string) * chars
       
    41   val date: chars -> (string * string * string) * chars;
       
    42   val whitespace: chars -> chars * chars
       
    43   val whitespace1: chars -> chars * chars
       
    44   val keyword: string -> chars -> chars * chars
       
    45   val number: chars -> chars * chars
       
    46   val any: chars -> (string * Position.T) * chars
       
    47   val any_line: chars -> string * chars;
       
    48   val !!! : string -> (chars -> 'a) -> chars -> 'a
       
    49   val $$$ : string -> chars -> chars * chars
       
    50   val scan: (chars -> 'a * chars) -> (chars -> T * chars) -> chars -> ('a * T list) * chars;
       
    51   val char_stopper: (string * Position.T) Scan.stopper
       
    52   val make_token: kind -> chars -> T
       
    53 end;
       
    54 
       
    55 structure Fdl_Lexer(**): FDL_LEXER(**) =
       
    56 struct
       
    57 
       
    58 (** tokens **)
       
    59 
       
    60 datatype kind = Keyword | Ident | Long_Ident | Traceability | Number | Comment | EOF;
       
    61 
       
    62 datatype T = Token of kind * string * Position.T;
       
    63 (*for Formalise.T     ^^^^ NOT required?*)
       
    64 
       
    65 fun make_token k xs = Token (k, implode (map fst xs),
       
    66   case xs of [] => Position.none | (_, p) :: _ => p);
       
    67 
       
    68 fun kind_of (Token (k, _, _)) = k;
       
    69 
       
    70 fun is_proper (Token (Comment, _, _)) = false
       
    71   | is_proper _ = true;
       
    72 
       
    73 fun content_of (Token (_, s, _)) = s;
       
    74 
       
    75 fun unparse (Token (Traceability, s, _)) = "For " ^ s ^ ":"
       
    76   | unparse (Token (_, s, _)) = s;
       
    77 
       
    78 fun position_of (Token (_, _, pos)) = pos;
       
    79 
       
    80 val pos_of = Position.here o position_of;
       
    81 
       
    82 fun is_eof (Token (EOF, _, _)) = true
       
    83   | is_eof _ = false;
       
    84 
       
    85 fun mk_eof pos = Token (EOF, "", pos);
       
    86 val eof = mk_eof Position.none;
       
    87 
       
    88 val stopper =
       
    89   Scan.stopper (fn [] => eof | toks => mk_eof (position_of (List.last toks))) is_eof;
       
    90 
       
    91 fun leq_token (Token (_, s, _), Token (_, s', _)) = size s <= size s';
       
    92 
       
    93 
       
    94 (** split up a string into a list of characters (with positions) **)
       
    95 
       
    96 type chars = (string * Position.T) list;
       
    97 
       
    98 fun is_char_eof ("", _) = true
       
    99   | is_char_eof _ = false;
       
   100 
       
   101 val char_stopper = Scan.stopper (K ("", Position.none)) is_char_eof;
       
   102 
       
   103 fun symbol (x : string, _ : Position.T) = x;
       
   104 
       
   105 (* convert string s to chars ("", pos) *)
       
   106 fun explode_pos s pos = fst (fold_map (fn x => fn pos =>
       
   107   ((x, pos), Position.symbol x pos)) (raw_explode s) pos);
       
   108 
       
   109 
       
   110 (** scanners **)
       
   111 
       
   112 val any = Scan.one (not o Scan.is_stopper char_stopper);
       
   113 
       
   114 fun prfx [] = Scan.succeed []
       
   115   | prfx (x :: xs) = Scan.one (equal x o symbol) ::: prfx xs;
       
   116 
       
   117 val $$$ = prfx o raw_explode;
       
   118 
       
   119 val lexicon = Scan.make_lexicon (map raw_explode
       
   120   ["rule_family",
       
   121    "For",
       
   122    ":",
       
   123    "[",
       
   124    "]",
       
   125    "(",
       
   126    ")",
       
   127    ",",
       
   128    "&",
       
   129    ";",
       
   130    "=",
       
   131    ".",
       
   132    "..",
       
   133    "requires",
       
   134    "may_be_replaced_by",
       
   135    "may_be_deduced",
       
   136    "may_be_deduced_from",
       
   137    "are_interchangeable",
       
   138    "if",
       
   139    "end",
       
   140    "function",
       
   141    "procedure",
       
   142    "type",
       
   143    "var",
       
   144    "const",
       
   145    "array",
       
   146    "record",
       
   147    ":=",
       
   148    "of",
       
   149    "**",
       
   150    "*",
       
   151    "/",
       
   152    "div",
       
   153    "mod",
       
   154    "+",
       
   155    "-",
       
   156    "<>",
       
   157    "<",
       
   158    ">",
       
   159    "<=",
       
   160    ">=",
       
   161    "<->",
       
   162    "->",
       
   163    "not",
       
   164    "and",
       
   165    "or",
       
   166    "for_some",
       
   167    "for_all",
       
   168    "***",
       
   169    "!!!",
       
   170    "element",
       
   171    "update",
       
   172    "pending"]);
       
   173 
       
   174 fun keyword s = Scan.literal lexicon :|--
       
   175   (fn xs => if map symbol xs = raw_explode s then Scan.succeed xs else Scan.fail);
       
   176 
       
   177 fun is_digit x = "0" <= x andalso x <= "9";
       
   178 fun is_alpha x = "a" <= x andalso x <= "z" orelse "A" <= x andalso x <= "Z";
       
   179 val is_underscore = equal "_";
       
   180 val is_tilde = equal "~";
       
   181 val is_newline = equal "\n";
       
   182 val is_tab = equal "\t";
       
   183 val is_space = equal " ";
       
   184 val is_whitespace = is_space orf is_tab orf is_newline;
       
   185 val is_whitespace' = is_space orf is_tab;
       
   186 
       
   187 val number = Scan.many1 (is_digit o symbol);
       
   188 
       
   189 val identifier =
       
   190   Scan.one (is_alpha o symbol) :::
       
   191   Scan.many
       
   192     ((is_alpha orf is_digit orf is_underscore) o symbol) @@@
       
   193    Scan.optional (Scan.one (is_tilde o symbol) >> single) [];
       
   194 
       
   195 val long_identifier =
       
   196   identifier @@@ Scan.repeats1 ($$$ "." @@@ identifier);
       
   197 
       
   198 val whitespace = Scan.many (is_whitespace o symbol);
       
   199 val whitespace1 = Scan.many1 (is_whitespace o symbol);
       
   200 val whitespace' = Scan.many (is_whitespace' o symbol);
       
   201 val newline = Scan.one (is_newline o symbol);
       
   202 
       
   203 fun beginning n cs =
       
   204   let
       
   205     val drop_blanks = drop_suffix is_whitespace;
       
   206     val all_cs = drop_blanks cs;
       
   207     val dots = if length all_cs > n then " ..." else "";
       
   208   in
       
   209     (drop_blanks (take n all_cs)
       
   210       |> map (fn c => if is_whitespace c then " " else c)
       
   211       |> implode) ^ dots
       
   212   end;
       
   213 
       
   214 fun !!! text scan =
       
   215   let
       
   216     fun get_pos [] = " (end-of-input)"
       
   217       | get_pos ((_, pos) :: _) = Position.here pos;
       
   218 
       
   219     fun err (syms, msg) = fn () =>
       
   220       text ^ get_pos syms ^ " at " ^ beginning 10 (map symbol syms) ^
       
   221         (case msg of NONE => "" | SOME m => "\n" ^ m ());
       
   222   in Scan.!! err scan end;
       
   223 
       
   224 val any_line' =
       
   225   Scan.many (not o (Scan.is_stopper char_stopper orf (is_newline o symbol)));
       
   226 
       
   227 val any_line = whitespace' |-- any_line' --|
       
   228   newline >> (implode o map symbol);
       
   229 
       
   230 fun gen_comment a b = $$$ a |-- !!! "unclosed comment"
       
   231   (Scan.repeat (Scan.unless ($$$ b) any) --| $$$ b) >> make_token Comment;
       
   232 
       
   233 val c_comment = gen_comment "/*" "*/";
       
   234 fun c_comment chars = 
       
   235   let
       
   236 (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.c_comment called with", 
       
   237       b_lenght_chars = length chars, chars = chars};( **)
       
   238     val xxx as (redex, toks) = (gen_comment "/*" "*/") chars
       
   239 (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.c_comment \<rightarrow>",
       
   240       b_lenght_chars = length toks, res = xxx};( **)
       
   241   in xxx end;
       
   242 val curly_comment = gen_comment "{" "}";
       
   243 
       
   244 val percent_comment = $$$ "%" |-- any_line' >> make_token Comment;
       
   245 
       
   246 fun repeatn 0 _ = Scan.succeed []
       
   247   | repeatn n p = Scan.one p ::: repeatn (n-1) p;
       
   248 
       
   249 
       
   250 (** header of *.vcg and *.siv files (see simplifier/load_provenance.pro) **)
       
   251 
       
   252 type banner = string * string * string;
       
   253 type date = string * string * string;
       
   254 type time = string * string * string * string option;
       
   255 
       
   256 val asterisks = Scan.repeat1 (Scan.one (equal "*" o symbol));
       
   257 
       
   258 fun alphan n = repeatn n (is_alpha o symbol) >> (implode o map symbol);
       
   259 fun digitn n = repeatn n (is_digit o symbol) >> (implode o map symbol);
       
   260 
       
   261 val time =
       
   262   digitn 2 --| $$$ ":" -- digitn 2 --| $$$ ":" -- digitn 2 --
       
   263   Scan.option ($$$ "." |-- digitn 2) >>
       
   264     (fn (((hr, mi), s), ms) => (hr, mi, s, ms));
       
   265 
       
   266 val date =
       
   267   digitn 2 --| $$$ "-" -- alphan 3 --| $$$ "-" -- digitn 4 >>
       
   268     (fn ((d, m), y) => (d, m, y));
       
   269 
       
   270 val banner = 
       
   271   whitespace' |-- asterisks --| whitespace' --| newline :|-- (fn xs =>
       
   272     (any_line -- any_line -- any_line >>
       
   273 (*WN:^^^^^^^^    ^^^^^^^^    ^^^^^^^^   3 lines ending with \n *)
       
   274        (fn ((l1, l2), l3) => (l1, l2, l3))) --|
       
   275     whitespace' --| prfx (map symbol xs) --| whitespace' --| newline);
       
   276 
       
   277 val vcg_header = banner -- Scan.option (whitespace |--
       
   278   $$$ "DATE :" |-- whitespace |-- date --| whitespace --|
       
   279   Scan.option ($$$ "TIME :" -- whitespace) -- time);
       
   280 
       
   281 val siv_header = banner --| whitespace --
       
   282   ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
       
   283   whitespace --
       
   284   ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
       
   285   newline --| newline -- (any_line -- any_line) >>
       
   286     (fn (((b, c), s), ls) => (b, c, s, ls));
       
   287 
       
   288 (*WN: print ---vvvvv*)
       
   289 fun siv_header chars =
       
   290   let
       
   291 (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.siv_header called with", 
       
   292       b_lenght_chars = length chars, chars = chars};( **)
       
   293     val xxx as (redex, toks) = 
       
   294       (
       
   295         banner --| whitespace --
       
   296         ($$$ "CREATED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
       
   297         whitespace --
       
   298         ($$$ "SIMPLIFIED" |-- whitespace |-- (date --| $$$ "," --| whitespace -- time)) --|
       
   299         newline --| newline -- (any_line -- any_line) >>
       
   300           (fn (((b, c), s), ls) => (b, c, s, ls))
       
   301       ) chars;
       
   302 (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.siv_header \<rightarrow>",
       
   303       b_lenght_chars = length toks, res = xxx};( **)
       
   304   in xxx end;
       
   305 
       
   306 (** the main tokenizer **)
       
   307 
       
   308 fun scan header comment =
       
   309   !!! "bad header" header --| whitespace --
       
   310   Scan.repeat (Scan.unless (Scan.one is_char_eof)
       
   311     (!!! "bad input"
       
   312        (   comment
       
   313         || (keyword "For" -- whitespace1) |--
       
   314               Scan.repeat1 (Scan.unless (keyword ":") any) --|
       
   315               keyword ":" >> make_token Traceability
       
   316         || Scan.max leq_token
       
   317              (Scan.literal lexicon >> make_token Keyword)
       
   318              (   long_identifier >> make_token Long_Ident
       
   319               || identifier >> make_token Ident)
       
   320         || number >> make_token Number) --|
       
   321      whitespace) );
       
   322 fun scan header comment chars =
       
   323   let
       
   324     (** )val _ = @{print} {a = "//--- ###I Fdl_Lexer.scan called with", 
       
   325       b_lenght_chars = length chars, chars = chars};( **)
       
   326     val xxx as (redex, toks) = 
       
   327       (!!! "bad header" header --| whitespace --
       
   328       Scan.repeat (Scan.unless (Scan.one is_char_eof)
       
   329         (!!! "bad input"
       
   330            (   comment
       
   331             || (keyword "For" -- whitespace1) |--
       
   332                   Scan.repeat1 (Scan.unless (keyword ":") any) --|
       
   333                   keyword ":" >> make_token Traceability             
       
   334             || Scan.max leq_token
       
   335                  (Scan.literal lexicon >> make_token Keyword)
       
   336                  (   long_identifier >> make_token Long_Ident
       
   337                   || identifier >> make_token Ident)
       
   338             || number >> make_token Number) --|
       
   339          whitespace))) chars
       
   340     (** )val _ = @{print} {z = "\\--- ###I Fdl_Lexer.scan \<rightarrow>",
       
   341       b_lenght_chars = length toks, res = xxx};( **)
       
   342   in xxx end;
       
   343 
       
   344 fun tokenize header comment pos s =
       
   345   fst (Scan.finite char_stopper
       
   346     (Scan.error (scan header comment)) (explode_pos s pos));
       
   347 
       
   348 fun tokenize header(*= Fdl_Lexer.siv_header*) comment pos s =
       
   349 ((** )@{print} {a = "//--- ###I Fdl_Lexer.tokenize called with", header = header,
       
   350     comment = comment, pos = pos, s = s};( **)
       
   351   let
       
   352 (** )val _ = @{print} {a = "###I Fdl_Lexer.tokenize: explode_pos", res = explode_pos s pos};( **)
       
   353 (*convert string s to chars ("", pos) is ---vvvvvvvvvvv                  ^^^^^^^^^^^*)
       
   354     val xxx as (redex, toks) =
       
   355       fst (Scan.finite char_stopper
       
   356         (Scan.error (scan header comment)) (explode_pos s pos))
       
   357 (** )val _ = @{print} {a = "\\--- ###I Fdl_Lexer.tokenize \<rightarrow>",
       
   358       b_lenght_chars = length toks, res = xxx};( **)
       
   359   in xxx end
       
   360 );
       
   361 
       
   362 end;